diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau')
33 files changed, 986 insertions, 146 deletions
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index c88776d1e784..3b5757aed9c8 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -28,6 +28,7 @@ config DRM_NOUVEAU select THERMAL if ACPI && X86 select ACPI_VIDEO if ACPI && X86 select SND_HDA_COMPONENT if SND_HDA_CORE + select PM_DEVFREQ if ARCH_TEGRA help Choose this option for open-source NVIDIA support. diff --git a/drivers/gpu/drm/nouveau/include/nvfw/hs.h b/drivers/gpu/drm/nouveau/include/nvfw/hs.h index 8b58b668fc0c..c78ab11ec3ac 100644 --- a/drivers/gpu/drm/nouveau/include/nvfw/hs.h +++ b/drivers/gpu/drm/nouveau/include/nvfw/hs.h @@ -52,7 +52,9 @@ struct nvfw_hs_load_header_v2 { struct { u32 offset; u32 size; - } app[]; + u32 data_offset; + u32 data_size; + } app[] __counted_by(num_apps); }; const struct nvfw_hs_load_header_v2 *nvfw_hs_load_header_v2(struct nvkm_subdev *, const void *); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h index 22f74fc88cd7..57bc542780bb 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h @@ -9,6 +9,8 @@ struct nvkm_device_tegra { struct nvkm_device device; struct platform_device *pdev; + void __iomem *regs; + struct reset_control *rst; struct clk *clk; struct clk *clk_ref; diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h index d5d8877064a7..6a09d397c651 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h @@ -134,4 +134,5 @@ int gf100_clk_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct int gk104_clk_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_clk **); int gk20a_clk_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_clk **); int gm20b_clk_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_clk **); +int gp10b_clk_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_clk **); #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index d59fd12268b9..6c26beeb427f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h @@ -57,7 +57,7 @@ nouveau_bo(struct ttm_buffer_object *bo) static inline void nouveau_bo_fini(struct nouveau_bo *bo) { - ttm_bo_put(&bo->bo); + ttm_bo_fini(&bo->bo); } extern struct ttm_device_funcs nouveau_bo_driver; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 805d0a87aa54..00515623a2cc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -30,6 +30,7 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_client_event.h> #include <drm/drm_crtc_helper.h> +#include <drm/drm_dumb_buffers.h> #include <drm/drm_fourcc.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_probe_helper.h> @@ -764,7 +765,7 @@ nouveau_display_suspend(struct drm_device *dev, bool runtime) { struct nouveau_display *disp = nouveau_display(dev); - drm_client_dev_suspend(dev, false); + drm_client_dev_suspend(dev); if (drm_drv_uses_atomic_modeset(dev)) { if (!runtime) { @@ -795,7 +796,7 @@ nouveau_display_resume(struct drm_device *dev, bool runtime) } } - drm_client_dev_resume(dev, false); + drm_client_dev_resume(dev); } int @@ -807,9 +808,9 @@ nouveau_display_dumb_create(struct drm_file *file_priv, struct drm_device *dev, uint32_t domain; int ret; - args->pitch = roundup(args->width * (args->bpp / 8), 256); - args->size = args->pitch * args->height; - args->size = roundup(args->size, PAGE_SIZE); + ret = drm_mode_size_dumb(dev, args, SZ_256, 0); + if (ret) + return ret; /* Use VRAM if there is any ; otherwise fallback to system memory */ if (nouveau_drm(dev)->client.device.info.ram_size != 0) diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index ca4932a150e3..58071652679d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -50,6 +50,7 @@ */ #define DMEM_CHUNK_SIZE (2UL << 20) #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT) +#define NR_CHUNKS (128) enum nouveau_aper { NOUVEAU_APER_VIRT, @@ -83,9 +84,15 @@ struct nouveau_dmem { struct list_head chunks; struct mutex mutex; struct page *free_pages; + struct folio *free_folios; spinlock_t lock; }; +struct nouveau_dmem_dma_info { + dma_addr_t dma_addr; + size_t size; +}; + static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page) { return container_of(page_pgmap(page), struct nouveau_dmem_chunk, @@ -108,14 +115,20 @@ unsigned long nouveau_dmem_page_addr(struct page *page) return chunk->bo->offset + off; } -static void nouveau_dmem_page_free(struct page *page) +static void nouveau_dmem_folio_free(struct folio *folio) { + struct page *page = &folio->page; struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page); struct nouveau_dmem *dmem = chunk->drm->dmem; spin_lock(&dmem->lock); - page->zone_device_data = dmem->free_pages; - dmem->free_pages = page; + if (folio_order(folio)) { + page->zone_device_data = dmem->free_folios; + dmem->free_folios = folio; + } else { + page->zone_device_data = dmem->free_pages; + dmem->free_pages = page; + } WARN_ON(!chunk->callocated); chunk->callocated--; @@ -139,20 +152,28 @@ static void nouveau_dmem_fence_done(struct nouveau_fence **fence) } } -static int nouveau_dmem_copy_one(struct nouveau_drm *drm, struct page *spage, - struct page *dpage, dma_addr_t *dma_addr) +static int nouveau_dmem_copy_folio(struct nouveau_drm *drm, + struct folio *sfolio, struct folio *dfolio, + struct nouveau_dmem_dma_info *dma_info) { struct device *dev = drm->dev->dev; + struct page *dpage = folio_page(dfolio, 0); + struct page *spage = folio_page(sfolio, 0); - lock_page(dpage); + folio_lock(dfolio); - *dma_addr = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, *dma_addr)) + dma_info->dma_addr = dma_map_page(dev, dpage, 0, page_size(dpage), + DMA_BIDIRECTIONAL); + dma_info->size = page_size(dpage); + if (dma_mapping_error(dev, dma_info->dma_addr)) return -EIO; - if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_HOST, *dma_addr, - NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(spage))) { - dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (drm->dmem->migrate.copy_func(drm, folio_nr_pages(sfolio), + NOUVEAU_APER_HOST, dma_info->dma_addr, + NOUVEAU_APER_VRAM, + nouveau_dmem_page_addr(spage))) { + dma_unmap_page(dev, dma_info->dma_addr, page_size(dpage), + DMA_BIDIRECTIONAL); return -EIO; } @@ -165,21 +186,48 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) struct nouveau_dmem *dmem = drm->dmem; struct nouveau_fence *fence; struct nouveau_svmm *svmm; - struct page *spage, *dpage; - unsigned long src = 0, dst = 0; - dma_addr_t dma_addr = 0; + struct page *dpage; vm_fault_t ret = 0; + int err; struct migrate_vma args = { .vma = vmf->vma, - .start = vmf->address, - .end = vmf->address + PAGE_SIZE, - .src = &src, - .dst = &dst, .pgmap_owner = drm->dev, .fault_page = vmf->page, - .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE, + .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | + MIGRATE_VMA_SELECT_COMPOUND, + .src = NULL, + .dst = NULL, }; + unsigned int order, nr; + struct folio *sfolio, *dfolio; + struct nouveau_dmem_dma_info dma_info; + + sfolio = page_folio(vmf->page); + order = folio_order(sfolio); + nr = 1 << order; + + /* + * Handle partial unmap faults, where the folio is large, but + * the pmd is split. + */ + if (vmf->pte) { + order = 0; + nr = 1; + } + + if (order) + args.flags |= MIGRATE_VMA_SELECT_COMPOUND; + args.start = ALIGN_DOWN(vmf->address, (PAGE_SIZE << order)); + args.vma = vmf->vma; + args.end = args.start + (PAGE_SIZE << order); + args.src = kcalloc(nr, sizeof(*args.src), GFP_KERNEL); + args.dst = kcalloc(nr, sizeof(*args.dst), GFP_KERNEL); + + if (!args.src || !args.dst) { + ret = VM_FAULT_OOM; + goto err; + } /* * FIXME what we really want is to find some heuristic to migrate more * than just one page on CPU fault. When such fault happens it is very @@ -190,22 +238,28 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) if (!args.cpages) return 0; - spage = migrate_pfn_to_page(src); - if (!spage || !(src & MIGRATE_PFN_MIGRATE)) - goto done; - - dpage = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vmf->vma, vmf->address); - if (!dpage) + if (order) + dpage = folio_page(vma_alloc_folio(GFP_HIGHUSER | __GFP_ZERO, + order, vmf->vma, vmf->address), 0); + else + dpage = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vmf->vma, + vmf->address); + if (!dpage) { + ret = VM_FAULT_OOM; goto done; + } - dst = migrate_pfn(page_to_pfn(dpage)); + args.dst[0] = migrate_pfn(page_to_pfn(dpage)); + if (order) + args.dst[0] |= MIGRATE_PFN_COMPOUND; + dfolio = page_folio(dpage); - svmm = spage->zone_device_data; + svmm = folio_zone_device_data(sfolio); mutex_lock(&svmm->mutex); nouveau_svmm_invalidate(svmm, args.start, args.end); - ret = nouveau_dmem_copy_one(drm, spage, dpage, &dma_addr); + err = nouveau_dmem_copy_folio(drm, sfolio, dfolio, &dma_info); mutex_unlock(&svmm->mutex); - if (ret) { + if (err) { ret = VM_FAULT_SIGBUS; goto done; } @@ -213,25 +267,40 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) nouveau_fence_new(&fence, dmem->migrate.chan); migrate_vma_pages(&args); nouveau_dmem_fence_done(&fence); - dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page(drm->dev->dev, dma_info.dma_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); done: migrate_vma_finalize(&args); +err: + kfree(args.src); + kfree(args.dst); return ret; } +static void nouveau_dmem_folio_split(struct folio *head, struct folio *tail) +{ + if (tail == NULL) + return; + tail->pgmap = head->pgmap; + tail->mapping = head->mapping; + folio_set_zone_device_data(tail, folio_zone_device_data(head)); +} + static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = { - .page_free = nouveau_dmem_page_free, + .folio_free = nouveau_dmem_folio_free, .migrate_to_ram = nouveau_dmem_migrate_to_ram, + .folio_split = nouveau_dmem_folio_split, }; static int -nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage) +nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage, + bool is_large) { struct nouveau_dmem_chunk *chunk; struct resource *res; struct page *page; void *ptr; - unsigned long i, pfn_first; + unsigned long i, pfn_first, pfn; int ret; chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); @@ -241,7 +310,7 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage) } /* Allocate unused physical address space for device private pages. */ - res = request_free_mem_region(&iomem_resource, DMEM_CHUNK_SIZE, + res = request_free_mem_region(&iomem_resource, DMEM_CHUNK_SIZE * NR_CHUNKS, "nouveau_dmem"); if (IS_ERR(res)) { ret = PTR_ERR(res); @@ -274,16 +343,40 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage) pfn_first = chunk->pagemap.range.start >> PAGE_SHIFT; page = pfn_to_page(pfn_first); spin_lock(&drm->dmem->lock); - for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) { - page->zone_device_data = drm->dmem->free_pages; - drm->dmem->free_pages = page; + + pfn = pfn_first; + for (i = 0; i < NR_CHUNKS; i++) { + int j; + + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) || !is_large) { + for (j = 0; j < DMEM_CHUNK_NPAGES - 1; j++, pfn++) { + page = pfn_to_page(pfn); + page->zone_device_data = drm->dmem->free_pages; + drm->dmem->free_pages = page; + } + } else { + page = pfn_to_page(pfn); + page->zone_device_data = drm->dmem->free_folios; + drm->dmem->free_folios = page_folio(page); + pfn += DMEM_CHUNK_NPAGES; + } + } + + /* Move to next page */ + if (is_large) { + *ppage = &drm->dmem->free_folios->page; + drm->dmem->free_folios = (*ppage)->zone_device_data; + } else { + *ppage = drm->dmem->free_pages; + drm->dmem->free_pages = (*ppage)->zone_device_data; } - *ppage = page; + chunk->callocated++; spin_unlock(&drm->dmem->lock); - NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", - DMEM_CHUNK_SIZE >> 20); + NV_INFO(drm, "DMEM: registered %ldMB of %sdevice memory %lx %lx\n", + NR_CHUNKS * DMEM_CHUNK_SIZE >> 20, is_large ? "THP " : "", pfn_first, + nouveau_dmem_page_addr(page)); return 0; @@ -298,27 +391,41 @@ out: } static struct page * -nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm) +nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm, bool is_large) { struct nouveau_dmem_chunk *chunk; struct page *page = NULL; + struct folio *folio = NULL; int ret; + unsigned int order = 0; spin_lock(&drm->dmem->lock); - if (drm->dmem->free_pages) { + if (is_large && drm->dmem->free_folios) { + folio = drm->dmem->free_folios; + page = &folio->page; + drm->dmem->free_folios = page->zone_device_data; + chunk = nouveau_page_to_chunk(&folio->page); + chunk->callocated++; + spin_unlock(&drm->dmem->lock); + order = ilog2(DMEM_CHUNK_NPAGES); + } else if (!is_large && drm->dmem->free_pages) { page = drm->dmem->free_pages; drm->dmem->free_pages = page->zone_device_data; chunk = nouveau_page_to_chunk(page); chunk->callocated++; spin_unlock(&drm->dmem->lock); + folio = page_folio(page); } else { spin_unlock(&drm->dmem->lock); - ret = nouveau_dmem_chunk_alloc(drm, &page); + ret = nouveau_dmem_chunk_alloc(drm, &page, is_large); if (ret) return NULL; + folio = page_folio(page); + if (is_large) + order = ilog2(DMEM_CHUNK_NPAGES); } - zone_device_page_init(page); + zone_device_folio_init(folio, order); return page; } @@ -369,12 +476,12 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) { unsigned long i, npages = range_len(&chunk->pagemap.range) >> PAGE_SHIFT; unsigned long *src_pfns, *dst_pfns; - dma_addr_t *dma_addrs; + struct nouveau_dmem_dma_info *dma_info; struct nouveau_fence *fence; src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL); dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL); - dma_addrs = kvcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL | __GFP_NOFAIL); + dma_info = kvcalloc(npages, sizeof(*dma_info), GFP_KERNEL | __GFP_NOFAIL); migrate_device_range(src_pfns, chunk->pagemap.range.start >> PAGE_SHIFT, npages); @@ -382,17 +489,28 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) for (i = 0; i < npages; i++) { if (src_pfns[i] & MIGRATE_PFN_MIGRATE) { struct page *dpage; + struct folio *folio = page_folio( + migrate_pfn_to_page(src_pfns[i])); + unsigned int order = folio_order(folio); + + if (src_pfns[i] & MIGRATE_PFN_COMPOUND) { + dpage = folio_page( + folio_alloc( + GFP_HIGHUSER_MOVABLE, order), 0); + } else { + /* + * _GFP_NOFAIL because the GPU is going away and there + * is nothing sensible we can do if we can't copy the + * data back. + */ + dpage = alloc_page(GFP_HIGHUSER | __GFP_NOFAIL); + } - /* - * _GFP_NOFAIL because the GPU is going away and there - * is nothing sensible we can do if we can't copy the - * data back. - */ - dpage = alloc_page(GFP_HIGHUSER | __GFP_NOFAIL); dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)); - nouveau_dmem_copy_one(chunk->drm, - migrate_pfn_to_page(src_pfns[i]), dpage, - &dma_addrs[i]); + nouveau_dmem_copy_folio(chunk->drm, + page_folio(migrate_pfn_to_page(src_pfns[i])), + page_folio(dpage), + &dma_info[i]); } } @@ -403,8 +521,9 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) kvfree(src_pfns); kvfree(dst_pfns); for (i = 0; i < npages; i++) - dma_unmap_page(chunk->drm->dev->dev, dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL); - kvfree(dma_addrs); + dma_unmap_page(chunk->drm->dev->dev, dma_info[i].dma_addr, + dma_info[i].size, DMA_BIDIRECTIONAL); + kvfree(dma_info); } void @@ -607,31 +726,36 @@ nouveau_dmem_init(struct nouveau_drm *drm) static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm, struct nouveau_svmm *svmm, unsigned long src, - dma_addr_t *dma_addr, u64 *pfn) + struct nouveau_dmem_dma_info *dma_info, u64 *pfn) { struct device *dev = drm->dev->dev; struct page *dpage, *spage; unsigned long paddr; + bool is_large = false; + unsigned long mpfn; spage = migrate_pfn_to_page(src); if (!(src & MIGRATE_PFN_MIGRATE)) goto out; - dpage = nouveau_dmem_page_alloc_locked(drm); + is_large = src & MIGRATE_PFN_COMPOUND; + dpage = nouveau_dmem_page_alloc_locked(drm, is_large); if (!dpage) goto out; paddr = nouveau_dmem_page_addr(dpage); if (spage) { - *dma_addr = dma_map_page(dev, spage, 0, page_size(spage), + dma_info->dma_addr = dma_map_page(dev, spage, 0, page_size(spage), DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, *dma_addr)) + dma_info->size = page_size(spage); + if (dma_mapping_error(dev, dma_info->dma_addr)) goto out_free_page; - if (drm->dmem->migrate.copy_func(drm, 1, - NOUVEAU_APER_VRAM, paddr, NOUVEAU_APER_HOST, *dma_addr)) + if (drm->dmem->migrate.copy_func(drm, folio_nr_pages(page_folio(spage)), + NOUVEAU_APER_VRAM, paddr, NOUVEAU_APER_HOST, + dma_info->dma_addr)) goto out_dma_unmap; } else { - *dma_addr = DMA_MAPPING_ERROR; + dma_info->dma_addr = DMA_MAPPING_ERROR; if (drm->dmem->migrate.clear_func(drm, page_size(dpage), NOUVEAU_APER_VRAM, paddr)) goto out_free_page; @@ -642,10 +766,13 @@ static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm, ((paddr >> PAGE_SHIFT) << NVIF_VMM_PFNMAP_V0_ADDR_SHIFT); if (src & MIGRATE_PFN_WRITE) *pfn |= NVIF_VMM_PFNMAP_V0_W; - return migrate_pfn(page_to_pfn(dpage)); + mpfn = migrate_pfn(page_to_pfn(dpage)); + if (folio_order(page_folio(dpage))) + mpfn |= MIGRATE_PFN_COMPOUND; + return mpfn; out_dma_unmap: - dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page(dev, dma_info->dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); out_free_page: nouveau_dmem_page_free_locked(drm, dpage); out: @@ -655,27 +782,38 @@ out: static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm, struct nouveau_svmm *svmm, struct migrate_vma *args, - dma_addr_t *dma_addrs, u64 *pfns) + struct nouveau_dmem_dma_info *dma_info, u64 *pfns) { struct nouveau_fence *fence; unsigned long addr = args->start, nr_dma = 0, i; + unsigned long order = 0; + + for (i = 0; addr < args->end; ) { + struct folio *folio; - for (i = 0; addr < args->end; i++) { args->dst[i] = nouveau_dmem_migrate_copy_one(drm, svmm, - args->src[i], dma_addrs + nr_dma, pfns + i); - if (!dma_mapping_error(drm->dev->dev, dma_addrs[nr_dma])) + args->src[i], dma_info + nr_dma, pfns + i); + if (!args->dst[i]) { + i++; + addr += PAGE_SIZE; + continue; + } + if (!dma_mapping_error(drm->dev->dev, dma_info[nr_dma].dma_addr)) nr_dma++; - addr += PAGE_SIZE; + folio = page_folio(migrate_pfn_to_page(args->dst[i])); + order = folio_order(folio); + i += 1 << order; + addr += (1 << order) * PAGE_SIZE; } nouveau_fence_new(&fence, drm->dmem->migrate.chan); migrate_vma_pages(args); nouveau_dmem_fence_done(&fence); - nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i); + nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i, order); while (nr_dma--) { - dma_unmap_page(drm->dev->dev, dma_addrs[nr_dma], PAGE_SIZE, - DMA_BIDIRECTIONAL); + dma_unmap_page(drm->dev->dev, dma_info[nr_dma].dma_addr, + dma_info[nr_dma].size, DMA_BIDIRECTIONAL); } migrate_vma_finalize(args); } @@ -688,20 +826,27 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, unsigned long end) { unsigned long npages = (end - start) >> PAGE_SHIFT; - unsigned long max = min(SG_MAX_SINGLE_ALLOC, npages); - dma_addr_t *dma_addrs; + unsigned long max = npages; struct migrate_vma args = { .vma = vma, .start = start, .pgmap_owner = drm->dev, - .flags = MIGRATE_VMA_SELECT_SYSTEM, + .flags = MIGRATE_VMA_SELECT_SYSTEM + | MIGRATE_VMA_SELECT_COMPOUND, }; unsigned long i; u64 *pfns; int ret = -ENOMEM; + struct nouveau_dmem_dma_info *dma_info; - if (drm->dmem == NULL) - return -ENODEV; + if (drm->dmem == NULL) { + ret = -ENODEV; + goto out; + } + + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + if (max > (unsigned long)HPAGE_PMD_NR) + max = (unsigned long)HPAGE_PMD_NR; args.src = kcalloc(max, sizeof(*args.src), GFP_KERNEL); if (!args.src) @@ -710,8 +855,8 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, if (!args.dst) goto out_free_src; - dma_addrs = kmalloc_array(max, sizeof(*dma_addrs), GFP_KERNEL); - if (!dma_addrs) + dma_info = kmalloc_array(max, sizeof(*dma_info), GFP_KERNEL); + if (!dma_info) goto out_free_dst; pfns = nouveau_pfns_alloc(max); @@ -729,7 +874,7 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, goto out_free_pfns; if (args.cpages) - nouveau_dmem_migrate_chunk(drm, svmm, &args, dma_addrs, + nouveau_dmem_migrate_chunk(drm, svmm, &args, dma_info, pfns); args.start = args.end; } @@ -738,7 +883,7 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, out_free_pfns: nouveau_pfns_free(pfns); out_free_dma: - kfree(dma_addrs); + kfree(dma_info); out_free_dst: kfree(args.dst); out_free_src: diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 55abc510067b..0e409414f44d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -10,7 +10,7 @@ #define DRIVER_MAJOR 1 #define DRIVER_MINOR 4 -#define DRIVER_PATCHLEVEL 0 +#define DRIVER_PATCHLEVEL 1 /* * 1.1.1: @@ -35,6 +35,8 @@ * programs that get directly linked with NVKM. * 1.3.1: * - implemented limited ABI16/NVIF interop + * 1.4.1: + * - add variable page sizes and compression for Turing+ */ #include <linux/notifier.h> @@ -49,6 +51,7 @@ #include <drm/drm_device.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> +#include <drm/drm_print.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_placement.h> diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 690e10fbf0bd..395d92ab6271 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -87,7 +87,7 @@ nouveau_gem_object_del(struct drm_gem_object *gem) return; } - ttm_bo_put(&nvbo->bo); + ttm_bo_fini(&nvbo->bo); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c index 8d5853deeee4..9fd351273236 100644 --- a/drivers/gpu/drm/nouveau/nouveau_platform.c +++ b/drivers/gpu/drm/nouveau/nouveau_platform.c @@ -21,6 +21,8 @@ */ #include "nouveau_platform.h" +#include <nvkm/subdev/clk/gk20a_devfreq.h> + static int nouveau_platform_probe(struct platform_device *pdev) { const struct nvkm_device_tegra_func *func; @@ -40,6 +42,21 @@ static void nouveau_platform_remove(struct platform_device *pdev) nouveau_drm_device_remove(drm); } +#ifdef CONFIG_PM_SLEEP +static int nouveau_platform_suspend(struct device *dev) +{ + return gk20a_devfreq_suspend(dev); +} + +static int nouveau_platform_resume(struct device *dev) +{ + return gk20a_devfreq_resume(dev); +} + +static SIMPLE_DEV_PM_OPS(nouveau_pm_ops, nouveau_platform_suspend, + nouveau_platform_resume); +#endif + #if IS_ENABLED(CONFIG_OF) static const struct nvkm_device_tegra_func gk20a_platform_data = { .iommu_bit = 34, @@ -81,6 +98,9 @@ struct platform_driver nouveau_platform_driver = { .driver = { .name = "nouveau", .of_match_table = of_match_ptr(nouveau_platform_match), +#ifdef CONFIG_PM_SLEEP + .pm = &nouveau_pm_ops, +#endif }, .probe = nouveau_platform_probe, .remove = nouveau_platform_remove, diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 6fa387da0637..b8a3378154d5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -921,12 +921,14 @@ nouveau_pfns_free(u64 *pfns) void nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm, - unsigned long addr, u64 *pfns, unsigned long npages) + unsigned long addr, u64 *pfns, unsigned long npages, + unsigned int page_shift) { struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns); args->p.addr = addr; - args->p.size = npages << PAGE_SHIFT; + args->p.size = npages << page_shift; + args->p.page = page_shift; mutex_lock(&svmm->mutex); diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.h b/drivers/gpu/drm/nouveau/nouveau_svm.h index e7d63d7f0c2d..3fd78662f17e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.h +++ b/drivers/gpu/drm/nouveau/nouveau_svm.h @@ -33,7 +33,8 @@ void nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit); u64 *nouveau_pfns_alloc(unsigned long npages); void nouveau_pfns_free(u64 *pfns); void nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm, - unsigned long addr, u64 *pfns, unsigned long npages); + unsigned long addr, u64 *pfns, unsigned long npages, + unsigned int page_shift); #else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */ static inline void nouveau_svm_init(struct nouveau_drm *drm) {} static inline void nouveau_svm_fini(struct nouveau_drm *drm) {} diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 7d2436e5d50d..0a55babdf667 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -302,8 +302,10 @@ nouveau_ttm_init(struct nouveau_drm *drm) ret = ttm_device_init(&drm->ttm.bdev, &nouveau_bo_driver, drm->dev->dev, dev->anon_inode->i_mapping, dev->vma_offset_manager, - drm_need_swiotlb(drm->client.mmu.dmabits), - drm->client.mmu.dmabits <= 32); + (drm_need_swiotlb(drm->client.mmu.dmabits) ? + TTM_ALLOCATION_POOL_USE_DMA_ALLOC : 0) | + (drm->client.mmu.dmabits <= 32 ? + TTM_ALLOCATION_POOL_USE_DMA32 : 0)); if (ret) { NV_ERROR(drm, "error initialising bo driver, %d\n", ret); return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 79eefdfd08a2..f10809115c56 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -107,34 +107,34 @@ nouveau_uvmm_vmm_sparse_unref(struct nouveau_uvmm *uvmm, static int nouveau_uvmm_vmm_get(struct nouveau_uvmm *uvmm, - u64 addr, u64 range) + u64 addr, u64 range, u8 page_shift) { struct nvif_vmm *vmm = &uvmm->vmm.vmm; - return nvif_vmm_raw_get(vmm, addr, range, PAGE_SHIFT); + return nvif_vmm_raw_get(vmm, addr, range, page_shift); } static int nouveau_uvmm_vmm_put(struct nouveau_uvmm *uvmm, - u64 addr, u64 range) + u64 addr, u64 range, u8 page_shift) { struct nvif_vmm *vmm = &uvmm->vmm.vmm; - return nvif_vmm_raw_put(vmm, addr, range, PAGE_SHIFT); + return nvif_vmm_raw_put(vmm, addr, range, page_shift); } static int nouveau_uvmm_vmm_unmap(struct nouveau_uvmm *uvmm, - u64 addr, u64 range, bool sparse) + u64 addr, u64 range, u8 page_shift, bool sparse) { struct nvif_vmm *vmm = &uvmm->vmm.vmm; - return nvif_vmm_raw_unmap(vmm, addr, range, PAGE_SHIFT, sparse); + return nvif_vmm_raw_unmap(vmm, addr, range, page_shift, sparse); } static int nouveau_uvmm_vmm_map(struct nouveau_uvmm *uvmm, - u64 addr, u64 range, + u64 addr, u64 range, u8 page_shift, u64 bo_offset, u8 kind, struct nouveau_mem *mem) { @@ -163,7 +163,7 @@ nouveau_uvmm_vmm_map(struct nouveau_uvmm *uvmm, return -ENOSYS; } - return nvif_vmm_raw_map(vmm, addr, range, PAGE_SHIFT, + return nvif_vmm_raw_map(vmm, addr, range, page_shift, &args, argc, &mem->mem, bo_offset); } @@ -182,8 +182,9 @@ nouveau_uvma_vmm_put(struct nouveau_uvma *uvma) { u64 addr = uvma->va.va.addr; u64 range = uvma->va.va.range; + u8 page_shift = uvma->page_shift; - return nouveau_uvmm_vmm_put(to_uvmm(uvma), addr, range); + return nouveau_uvmm_vmm_put(to_uvmm(uvma), addr, range, page_shift); } static int @@ -193,9 +194,11 @@ nouveau_uvma_map(struct nouveau_uvma *uvma, u64 addr = uvma->va.va.addr; u64 offset = uvma->va.gem.offset; u64 range = uvma->va.va.range; + u8 page_shift = uvma->page_shift; return nouveau_uvmm_vmm_map(to_uvmm(uvma), addr, range, - offset, uvma->kind, mem); + page_shift, offset, uvma->kind, + mem); } static int @@ -203,12 +206,13 @@ nouveau_uvma_unmap(struct nouveau_uvma *uvma) { u64 addr = uvma->va.va.addr; u64 range = uvma->va.va.range; + u8 page_shift = uvma->page_shift; bool sparse = !!uvma->region; if (drm_gpuva_invalidated(&uvma->va)) return 0; - return nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, sparse); + return nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, page_shift, sparse); } static int @@ -450,6 +454,62 @@ op_unmap_prepare_unwind(struct drm_gpuva *va) drm_gpuva_insert(va->vm, va); } +static bool +op_map_aligned_to_page_shift(const struct drm_gpuva_op_map *op, u8 page_shift) +{ + u64 non_page_bits = (1ULL << page_shift) - 1; + + return (op->va.addr & non_page_bits) == 0 && + (op->va.range & non_page_bits) == 0 && + (op->gem.offset & non_page_bits) == 0; +} + +static u8 +select_page_shift(struct nouveau_uvmm *uvmm, struct drm_gpuva_op_map *op) +{ + struct nouveau_bo *nvbo = nouveau_gem_object(op->gem.obj); + + /* nouveau_bo_fixup_align() guarantees that the page size will be aligned + * for most cases, but it can't handle cases where userspace allocates with + * a size and then binds with a smaller granularity. So in order to avoid + * breaking old userspace, we need to ensure that the VA is actually + * aligned before using it, and if it isn't, then we downgrade to the first + * granularity that will fit, which is optimal from a correctness and + * performance perspective. + */ + if (op_map_aligned_to_page_shift(op, nvbo->page)) + return nvbo->page; + + struct nouveau_mem *mem = nouveau_mem(nvbo->bo.resource); + struct nvif_vmm *vmm = &uvmm->vmm.vmm; + int i; + + /* If the given granularity doesn't fit, let's find one that will fit. */ + for (i = 0; i < vmm->page_nr; i++) { + /* Ignore anything that is bigger or identical to the BO preference. */ + if (vmm->page[i].shift >= nvbo->page) + continue; + + /* Skip incompatible domains. */ + if ((mem->mem.type & NVIF_MEM_VRAM) && !vmm->page[i].vram) + continue; + if ((mem->mem.type & NVIF_MEM_HOST) && + (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT)) + continue; + + /* If it fits, return the proposed shift. */ + if (op_map_aligned_to_page_shift(op, vmm->page[i].shift)) + return vmm->page[i].shift; + } + + /* If we get here then nothing can reconcile the requirements. This should never + * happen. + */ + drm_WARN_ONCE(op->gem.obj->dev, 1, "Could not find an appropriate page size.\n"); + + return PAGE_SHIFT; +} + static void nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, struct nouveau_uvma_prealloc *new, @@ -501,7 +561,8 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, if (vmm_get_range) nouveau_uvmm_vmm_put(uvmm, vmm_get_start, - vmm_get_range); + vmm_get_range, + select_page_shift(uvmm, &op->map)); break; } case DRM_GPUVA_OP_REMAP: { @@ -528,6 +589,7 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, u64 ustart = va->va.addr; u64 urange = va->va.range; u64 uend = ustart + urange; + u8 page_shift = uvma_from_va(va)->page_shift; /* Nothing to do for mappings we merge with. */ if (uend == vmm_get_start || @@ -538,7 +600,8 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, u64 vmm_get_range = ustart - vmm_get_start; nouveau_uvmm_vmm_put(uvmm, vmm_get_start, - vmm_get_range); + vmm_get_range, + page_shift); } vmm_get_start = uend; break; @@ -592,6 +655,7 @@ op_map_prepare(struct nouveau_uvmm *uvmm, uvma->region = args->region; uvma->kind = args->kind; + uvma->page_shift = select_page_shift(uvmm, op); drm_gpuva_map(&uvmm->base, &uvma->va, op); @@ -633,7 +697,8 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, if (vmm_get_range) { ret = nouveau_uvmm_vmm_get(uvmm, vmm_get_start, - vmm_get_range); + vmm_get_range, + new->map->page_shift); if (ret) { op_map_prepare_unwind(new->map); goto unwind; @@ -689,6 +754,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, u64 ustart = va->va.addr; u64 urange = va->va.range; u64 uend = ustart + urange; + u8 page_shift = uvma_from_va(va)->page_shift; op_unmap_prepare(u); @@ -704,7 +770,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, u64 vmm_get_range = ustart - vmm_get_start; ret = nouveau_uvmm_vmm_get(uvmm, vmm_get_start, - vmm_get_range); + vmm_get_range, page_shift); if (ret) { op_unmap_prepare_unwind(va); goto unwind; @@ -799,10 +865,11 @@ op_unmap_range(struct drm_gpuva_op_unmap *u, u64 addr, u64 range) { struct nouveau_uvma *uvma = uvma_from_va(u->va); + u8 page_shift = uvma->page_shift; bool sparse = !!uvma->region; if (!drm_gpuva_invalidated(u->va)) - nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, sparse); + nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, page_shift, sparse); } static void @@ -882,6 +949,7 @@ nouveau_uvmm_sm_cleanup(struct nouveau_uvmm *uvmm, struct drm_gpuva_op_map *n = r->next; struct drm_gpuva *va = r->unmap->va; struct nouveau_uvma *uvma = uvma_from_va(va); + u8 page_shift = uvma->page_shift; if (unmap) { u64 addr = va->va.addr; @@ -893,7 +961,7 @@ nouveau_uvmm_sm_cleanup(struct nouveau_uvmm *uvmm, if (n) end = n->va.addr; - nouveau_uvmm_vmm_put(uvmm, addr, end - addr); + nouveau_uvmm_vmm_put(uvmm, addr, end - addr, page_shift); } nouveau_uvma_gem_put(uvma); diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.h b/drivers/gpu/drm/nouveau/nouveau_uvmm.h index 9d3c348581eb..51925711ae90 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h @@ -33,6 +33,7 @@ struct nouveau_uvma { struct nouveau_uvma_region *region; u8 kind; + u8 page_shift; }; #define uvmm_from_gpuvm(x) container_of((x), struct nouveau_uvmm, base) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 3375a59ebf1a..2517b65d8faa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2280,6 +2280,7 @@ nv13b_chipset = { .acr = { 0x00000001, gp10b_acr_new }, .bar = { 0x00000001, gm20b_bar_new }, .bus = { 0x00000001, gf100_bus_new }, + .clk = { 0x00000001, gp10b_clk_new }, .fault = { 0x00000001, gp10b_fault_new }, .fb = { 0x00000001, gp10b_fb_new }, .fuse = { 0x00000001, gm107_fuse_new }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c index 114e50ca1827..03aa6f09ec89 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c @@ -259,6 +259,10 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func, tdev->func = func; tdev->pdev = pdev; + tdev->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(tdev->regs)) + return PTR_ERR(tdev->regs); + if (func->require_vdd) { tdev->vdd = devm_regulator_get(&pdev->dev, "vdd"); if (IS_ERR(tdev->vdd)) { diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/Kbuild index dcecd499d8df..be8f3283ee16 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/Kbuild @@ -10,6 +10,8 @@ nvkm-y += nvkm/subdev/clk/gf100.o nvkm-y += nvkm/subdev/clk/gk104.o nvkm-y += nvkm/subdev/clk/gk20a.o nvkm-y += nvkm/subdev/clk/gm20b.o +nvkm-y += nvkm/subdev/clk/gp10b.o +nvkm-$(CONFIG_PM_DEVFREQ) += nvkm/subdev/clk/gk20a_devfreq.o nvkm-y += nvkm/subdev/clk/pllnv04.o nvkm-y += nvkm/subdev/clk/pllgt215.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c index d573fb0917fc..65f5d0f1f3bf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c @@ -23,6 +23,7 @@ * */ #include "priv.h" +#include "gk20a_devfreq.h" #include "gk20a.h" #include <core/tegra.h> @@ -589,6 +590,10 @@ gk20a_clk_init(struct nvkm_clk *base) return ret; } + ret = gk20a_devfreq_init(base, &clk->devfreq); + if (ret) + return ret; + return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h index 286413ff4a9e..ea5b0bab4cce 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h @@ -118,6 +118,7 @@ struct gk20a_clk { const struct gk20a_clk_pllg_params *params; struct gk20a_pll pll; u32 parent_rate; + struct gk20a_devfreq *devfreq; u32 (*div_to_pl)(u32); u32 (*pl_to_div)(u32); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.c new file mode 100644 index 000000000000..41003cbcdbfa --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: MIT +#include <linux/clk.h> +#include <linux/math64.h> +#include <linux/platform_device.h> +#include <linux/pm_opp.h> + +#include <drm/drm_managed.h> + +#include <subdev/clk.h> + +#include "nouveau_drv.h" +#include "nouveau_chan.h" +#include "priv.h" +#include "gk20a_devfreq.h" +#include "gk20a.h" +#include "gp10b.h" + +#define PMU_BUSY_CYCLES_NORM_MAX 1000U + +#define PWR_PMU_IDLE_COUNTER_TOTAL 0U +#define PWR_PMU_IDLE_COUNTER_BUSY 4U + +#define PWR_PMU_IDLE_COUNT_REG_OFFSET 0x0010A508U +#define PWR_PMU_IDLE_COUNT_REG_SIZE 16U +#define PWR_PMU_IDLE_COUNT_MASK 0x7FFFFFFFU +#define PWR_PMU_IDLE_COUNT_RESET_VALUE (0x1U << 31U) + +#define PWR_PMU_IDLE_INTR_REG_OFFSET 0x0010A9E8U +#define PWR_PMU_IDLE_INTR_ENABLE_VALUE 0U + +#define PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET 0x0010A9ECU +#define PWR_PMU_IDLE_INTR_STATUS_MASK 0x00000001U +#define PWR_PMU_IDLE_INTR_STATUS_RESET_VALUE 0x1U + +#define PWR_PMU_IDLE_THRESHOLD_REG_OFFSET 0x0010A8A0U +#define PWR_PMU_IDLE_THRESHOLD_REG_SIZE 4U +#define PWR_PMU_IDLE_THRESHOLD_MAX_VALUE 0x7FFFFFFFU + +#define PWR_PMU_IDLE_CTRL_REG_OFFSET 0x0010A50CU +#define PWR_PMU_IDLE_CTRL_REG_SIZE 16U +#define PWR_PMU_IDLE_CTRL_VALUE_MASK 0x3U +#define PWR_PMU_IDLE_CTRL_VALUE_BUSY 0x2U +#define PWR_PMU_IDLE_CTRL_VALUE_ALWAYS 0x3U +#define PWR_PMU_IDLE_CTRL_FILTER_MASK (0x1U << 2) +#define PWR_PMU_IDLE_CTRL_FILTER_DISABLED 0x0U + +#define PWR_PMU_IDLE_MASK_REG_OFFSET 0x0010A504U +#define PWR_PMU_IDLE_MASK_REG_SIZE 16U +#define PWM_PMU_IDLE_MASK_GR_ENABLED 0x1U +#define PWM_PMU_IDLE_MASK_CE_2_ENABLED 0x200000U + +/** + * struct gk20a_devfreq - Device frequency management + */ +struct gk20a_devfreq { + /** @devfreq: devfreq device. */ + struct devfreq *devfreq; + + /** @regs: Device registers. */ + void __iomem *regs; + + /** @gov_data: Governor data. */ + struct devfreq_simple_ondemand_data gov_data; + + /** @busy_time: Busy time. */ + ktime_t busy_time; + + /** @total_time: Total time. */ + ktime_t total_time; + + /** @time_last_update: Last update time. */ + ktime_t time_last_update; +}; + +static struct gk20a_devfreq *dev_to_gk20a_devfreq(struct device *dev) +{ + struct nouveau_drm *drm = dev_get_drvdata(dev); + struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0); + struct nvkm_clk *base = nvkm_clk(subdev); + + switch (drm->nvkm->chipset) { + case 0x13b: return gp10b_clk(base)->devfreq; break; + default: return gk20a_clk(base)->devfreq; break; + } +} + +static void gk20a_pmu_init_perfmon_counter(struct gk20a_devfreq *gdevfreq) +{ + u32 data; + + // Set pmu idle intr status bit on total counter overflow + writel(PWR_PMU_IDLE_INTR_ENABLE_VALUE, + gdevfreq->regs + PWR_PMU_IDLE_INTR_REG_OFFSET); + + writel(PWR_PMU_IDLE_THRESHOLD_MAX_VALUE, + gdevfreq->regs + PWR_PMU_IDLE_THRESHOLD_REG_OFFSET + + (PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_THRESHOLD_REG_SIZE)); + + // Setup counter for total cycles + data = readl(gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET + + (PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_CTRL_REG_SIZE)); + data &= ~(PWR_PMU_IDLE_CTRL_VALUE_MASK | PWR_PMU_IDLE_CTRL_FILTER_MASK); + data |= PWR_PMU_IDLE_CTRL_VALUE_ALWAYS | PWR_PMU_IDLE_CTRL_FILTER_DISABLED; + writel(data, gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET + + (PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_CTRL_REG_SIZE)); + + // Setup counter for busy cycles + writel(PWM_PMU_IDLE_MASK_GR_ENABLED | PWM_PMU_IDLE_MASK_CE_2_ENABLED, + gdevfreq->regs + PWR_PMU_IDLE_MASK_REG_OFFSET + + (PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_MASK_REG_SIZE)); + + data = readl(gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET + + (PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_CTRL_REG_SIZE)); + data &= ~(PWR_PMU_IDLE_CTRL_VALUE_MASK | PWR_PMU_IDLE_CTRL_FILTER_MASK); + data |= PWR_PMU_IDLE_CTRL_VALUE_BUSY | PWR_PMU_IDLE_CTRL_FILTER_DISABLED; + writel(data, gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET + + (PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_CTRL_REG_SIZE)); +} + +static u32 gk20a_pmu_read_idle_counter(struct gk20a_devfreq *gdevfreq, u32 counter_id) +{ + u32 ret; + + ret = readl(gdevfreq->regs + PWR_PMU_IDLE_COUNT_REG_OFFSET + + (counter_id * PWR_PMU_IDLE_COUNT_REG_SIZE)); + + return ret & PWR_PMU_IDLE_COUNT_MASK; +} + +static void gk20a_pmu_reset_idle_counter(struct gk20a_devfreq *gdevfreq, u32 counter_id) +{ + writel(PWR_PMU_IDLE_COUNT_RESET_VALUE, gdevfreq->regs + PWR_PMU_IDLE_COUNT_REG_OFFSET + + (counter_id * PWR_PMU_IDLE_COUNT_REG_SIZE)); +} + +static u32 gk20a_pmu_read_idle_intr_status(struct gk20a_devfreq *gdevfreq) +{ + u32 ret; + + ret = readl(gdevfreq->regs + PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET); + + return ret & PWR_PMU_IDLE_INTR_STATUS_MASK; +} + +static void gk20a_pmu_clear_idle_intr_status(struct gk20a_devfreq *gdevfreq) +{ + writel(PWR_PMU_IDLE_INTR_STATUS_RESET_VALUE, + gdevfreq->regs + PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET); +} + +static void gk20a_devfreq_update_utilization(struct gk20a_devfreq *gdevfreq) +{ + ktime_t now, last; + u64 busy_cycles, total_cycles; + u32 norm, intr_status; + + now = ktime_get(); + last = gdevfreq->time_last_update; + gdevfreq->total_time = ktime_us_delta(now, last); + + busy_cycles = gk20a_pmu_read_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY); + total_cycles = gk20a_pmu_read_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL); + intr_status = gk20a_pmu_read_idle_intr_status(gdevfreq); + + gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY); + gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL); + + if (intr_status != 0UL) { + norm = PMU_BUSY_CYCLES_NORM_MAX; + gk20a_pmu_clear_idle_intr_status(gdevfreq); + } else if (total_cycles == 0ULL || busy_cycles > total_cycles) { + norm = PMU_BUSY_CYCLES_NORM_MAX; + } else { + norm = (u32)div64_u64(busy_cycles * PMU_BUSY_CYCLES_NORM_MAX, + total_cycles); + } + + gdevfreq->busy_time = div_u64(gdevfreq->total_time * norm, PMU_BUSY_CYCLES_NORM_MAX); + gdevfreq->time_last_update = now; +} + +static int gk20a_devfreq_target(struct device *dev, unsigned long *freq, + u32 flags) +{ + struct nouveau_drm *drm = dev_get_drvdata(dev); + struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0); + struct nvkm_clk *base = nvkm_clk(subdev); + struct nvkm_pstate *pstates = base->func->pstates; + int nr_pstates = base->func->nr_pstates; + int i, ret; + + for (i = 0; i < nr_pstates - 1; i++) + if (pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV >= *freq) + break; + + ret = nvkm_clk_ustate(base, pstates[i].pstate, 0); + ret |= nvkm_clk_ustate(base, pstates[i].pstate, 1); + if (ret) { + nvkm_error(subdev, "cannot update clock\n"); + return ret; + } + + *freq = pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV; + + return 0; +} + +static int gk20a_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) +{ + struct nouveau_drm *drm = dev_get_drvdata(dev); + struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0); + struct nvkm_clk *base = nvkm_clk(subdev); + + *freq = nvkm_clk_read(base, nv_clk_src_gpc) * GK20A_CLK_GPC_MDIV; + + return 0; +} + +static void gk20a_devfreq_reset(struct gk20a_devfreq *gdevfreq) +{ + gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY); + gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL); + gk20a_pmu_clear_idle_intr_status(gdevfreq); + + gdevfreq->busy_time = 0; + gdevfreq->total_time = 0; + gdevfreq->time_last_update = ktime_get(); +} + +static int gk20a_devfreq_get_dev_status(struct device *dev, + struct devfreq_dev_status *status) +{ + struct nouveau_drm *drm = dev_get_drvdata(dev); + struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev); + + gk20a_devfreq_get_cur_freq(dev, &status->current_frequency); + + gk20a_devfreq_update_utilization(gdevfreq); + + status->busy_time = ktime_to_ns(gdevfreq->busy_time); + status->total_time = ktime_to_ns(gdevfreq->total_time); + + gk20a_devfreq_reset(gdevfreq); + + NV_DEBUG(drm, "busy %lu total %lu %lu %% freq %lu MHz\n", + status->busy_time, status->total_time, + status->busy_time / (status->total_time / 100), + status->current_frequency / 1000 / 1000); + + return 0; +} + +static struct devfreq_dev_profile gk20a_devfreq_profile = { + .timer = DEVFREQ_TIMER_DELAYED, + .polling_ms = 50, + .target = gk20a_devfreq_target, + .get_cur_freq = gk20a_devfreq_get_cur_freq, + .get_dev_status = gk20a_devfreq_get_dev_status, +}; + +int gk20a_devfreq_init(struct nvkm_clk *base, struct gk20a_devfreq **gdevfreq) +{ + struct nvkm_device *device = base->subdev.device; + struct nouveau_drm *drm = dev_get_drvdata(device->dev); + struct nvkm_device_tegra *tdev = device->func->tegra(device); + struct nvkm_pstate *pstates = base->func->pstates; + int nr_pstates = base->func->nr_pstates; + struct gk20a_devfreq *new_gdevfreq; + int i; + + new_gdevfreq = drmm_kzalloc(drm->dev, sizeof(struct gk20a_devfreq), GFP_KERNEL); + if (!new_gdevfreq) + return -ENOMEM; + + new_gdevfreq->regs = tdev->regs; + + for (i = 0; i < nr_pstates; i++) + dev_pm_opp_add(base->subdev.device->dev, + pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV, 0); + + gk20a_pmu_init_perfmon_counter(new_gdevfreq); + gk20a_devfreq_reset(new_gdevfreq); + + gk20a_devfreq_profile.initial_freq = + nvkm_clk_read(base, nv_clk_src_gpc) * GK20A_CLK_GPC_MDIV; + + new_gdevfreq->gov_data.upthreshold = 45; + new_gdevfreq->gov_data.downdifferential = 5; + + new_gdevfreq->devfreq = devm_devfreq_add_device(device->dev, + &gk20a_devfreq_profile, + DEVFREQ_GOV_SIMPLE_ONDEMAND, + &new_gdevfreq->gov_data); + if (IS_ERR(new_gdevfreq->devfreq)) + return PTR_ERR(new_gdevfreq->devfreq); + + *gdevfreq = new_gdevfreq; + + return 0; +} + +int gk20a_devfreq_resume(struct device *dev) +{ + struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev); + + if (!gdevfreq || !gdevfreq->devfreq) + return 0; + + return devfreq_resume_device(gdevfreq->devfreq); +} + +int gk20a_devfreq_suspend(struct device *dev) +{ + struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev); + + if (!gdevfreq || !gdevfreq->devfreq) + return 0; + + return devfreq_suspend_device(gdevfreq->devfreq); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.h b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.h new file mode 100644 index 000000000000..5b7ca8a7a5cd --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef __GK20A_DEVFREQ_H__ +#define __GK20A_DEVFREQ_H__ + +#include <linux/devfreq.h> + +struct gk20a_devfreq; + +#if defined(CONFIG_PM_DEVFREQ) +int gk20a_devfreq_init(struct nvkm_clk *base, struct gk20a_devfreq **devfreq); + +int gk20a_devfreq_resume(struct device *dev); +int gk20a_devfreq_suspend(struct device *dev); +#else +static inline int gk20a_devfreq_init(struct nvkm_clk *base, struct gk20a_devfreq **devfreq) +{ + return 0; +} + +static inline int gk20a_devfreq_resume(struct device dev) { return 0; } +static inline int gk20a_devfreq_suspend(struct device *dev) { return 0; } +#endif /* CONFIG_PM_DEVFREQ */ + +#endif /* __GK20A_DEVFREQ_H__ */ diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c index 7c33542f651b..fa8ca53acbd1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c @@ -27,6 +27,7 @@ #include <core/tegra.h> #include "priv.h" +#include "gk20a_devfreq.h" #include "gk20a.h" #define GPCPLL_CFG_SYNC_MODE BIT(2) @@ -869,6 +870,10 @@ gm20b_clk_init(struct nvkm_clk *base) return ret; } + ret = gk20a_devfreq_init(base, &clk->devfreq); + if (ret) + return ret; + return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.c new file mode 100644 index 000000000000..492b62c0ee96 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: MIT +#include <subdev/clk.h> +#include <subdev/timer.h> +#include <core/device.h> +#include <core/tegra.h> + +#include "priv.h" +#include "gk20a_devfreq.h" +#include "gk20a.h" +#include "gp10b.h" + +static int +gp10b_clk_init(struct nvkm_clk *base) +{ + struct gp10b_clk *clk = gp10b_clk(base); + struct nvkm_subdev *subdev = &clk->base.subdev; + int ret; + + /* Start with the highest frequency, matching the BPMP default */ + base->func->calc(base, &base->func->pstates[base->func->nr_pstates - 1].base); + ret = base->func->prog(base); + if (ret) { + nvkm_error(subdev, "cannot initialize clock\n"); + return ret; + } + + ret = gk20a_devfreq_init(base, &clk->devfreq); + if (ret) + return ret; + + return 0; +} + +static int +gp10b_clk_read(struct nvkm_clk *base, enum nv_clk_src src) +{ + struct gp10b_clk *clk = gp10b_clk(base); + struct nvkm_subdev *subdev = &clk->base.subdev; + + switch (src) { + case nv_clk_src_gpc: + return clk_get_rate(clk->clk) / GK20A_CLK_GPC_MDIV; + default: + nvkm_error(subdev, "invalid clock source %d\n", src); + return -EINVAL; + } +} + +static int +gp10b_clk_calc(struct nvkm_clk *base, struct nvkm_cstate *cstate) +{ + struct gp10b_clk *clk = gp10b_clk(base); + u32 target_rate = cstate->domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV; + + clk->new_rate = clk_round_rate(clk->clk, target_rate) / GK20A_CLK_GPC_MDIV; + + return 0; +} + +static int +gp10b_clk_prog(struct nvkm_clk *base) +{ + struct gp10b_clk *clk = gp10b_clk(base); + int ret; + + ret = clk_set_rate(clk->clk, clk->new_rate * GK20A_CLK_GPC_MDIV); + if (ret < 0) + return ret; + + clk->rate = clk_get_rate(clk->clk) / GK20A_CLK_GPC_MDIV; + + return 0; +} + +static struct nvkm_pstate +gp10b_pstates[] = { + { + .base = { + .domain[nv_clk_src_gpc] = 114750, + }, + }, + { + .base = { + .domain[nv_clk_src_gpc] = 216750, + }, + }, + { + .base = { + .domain[nv_clk_src_gpc] = 318750, + }, + }, + { + .base = { + .domain[nv_clk_src_gpc] = 420750, + }, + }, + { + .base = { + .domain[nv_clk_src_gpc] = 522750, + }, + }, + { + .base = { + .domain[nv_clk_src_gpc] = 624750, + }, + }, + { + .base = { + .domain[nv_clk_src_gpc] = 726750, + }, + }, + { + .base = { + .domain[nv_clk_src_gpc] = 828750, + }, + }, + { + .base = { + .domain[nv_clk_src_gpc] = 930750, + }, + }, + { + .base = { + .domain[nv_clk_src_gpc] = 1032750, + }, + }, + { + .base = { + .domain[nv_clk_src_gpc] = 1134750, + }, + }, + { + .base = { + .domain[nv_clk_src_gpc] = 1236750, + }, + }, + { + .base = { + .domain[nv_clk_src_gpc] = 1300500, + }, + }, +}; + +static const struct nvkm_clk_func +gp10b_clk = { + .init = gp10b_clk_init, + .read = gp10b_clk_read, + .calc = gp10b_clk_calc, + .prog = gp10b_clk_prog, + .tidy = gk20a_clk_tidy, + .pstates = gp10b_pstates, + .nr_pstates = ARRAY_SIZE(gp10b_pstates), + .domains = { + { nv_clk_src_gpc, 0xff, 0, "core", GK20A_CLK_GPC_MDIV }, + { nv_clk_src_max } + } +}; + +int +gp10b_clk_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, + struct nvkm_clk **pclk) +{ + struct nvkm_device_tegra *tdev = device->func->tegra(device); + const struct nvkm_clk_func *func = &gp10b_clk; + struct gp10b_clk *clk; + int ret, i; + + clk = kzalloc(sizeof(*clk), GFP_KERNEL); + if (!clk) + return -ENOMEM; + *pclk = &clk->base; + clk->clk = tdev->clk; + + /* Finish initializing the pstates */ + for (i = 0; i < func->nr_pstates; i++) { + INIT_LIST_HEAD(&func->pstates[i].list); + func->pstates[i].pstate = i + 1; + } + + ret = nvkm_clk_ctor(func, device, type, inst, true, &clk->base); + if (ret) + return ret; + + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.h b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.h new file mode 100644 index 000000000000..178e3bcdbbf7 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef __NVKM_CLK_GP10B_H__ +#define __NVKM_CLK_GP10B_H__ + +struct gp10b_clk { + /* currently applied parameters */ + struct nvkm_clk base; + struct gk20a_devfreq *devfreq; + struct clk *clk; + u32 rate; + + /* new parameters to apply */ + u32 new_rate; +}; + +#define gp10b_clk(p) container_of((p), struct gp10b_clk, base) + +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c index 8a286a9349ac..7ce1b65e2c1c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c @@ -279,7 +279,7 @@ nvkm_fb_ctor(const struct nvkm_fb_func *func, struct nvkm_device *device, mutex_init(&fb->tags.mutex); if (func->sysmem.flush_page_init) { - fb->sysmem.flush_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + fb->sysmem.flush_page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); if (!fb->sysmem.flush_page) return -ENOMEM; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb100.c index 1c78c8853617..170776cc82fb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb100.c @@ -15,6 +15,9 @@ gb100_fb_sysmem_flush_page_init(struct nvkm_fb *fb) const u32 hshub = DRF_LO(NV_PFB_HSHUB0); struct nvkm_device *device = fb->subdev.device; + // Ensure that the address is within hardware limits + WARN_ON(fb->sysmem.flush_page_addr > DMA_BIT_MASK(52)); + nvkm_wr32(device, hshub + NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_HI, addr_hi); nvkm_wr32(device, hshub + NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_LO, addr_lo); nvkm_wr32(device, hshub + NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_HI, addr_hi); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb202.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb202.c index 848505026d02..a21bf19e1041 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb202.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb202.c @@ -13,6 +13,9 @@ gb202_fb_sysmem_flush_page_init(struct nvkm_fb *fb) struct nvkm_device *device = fb->subdev.device; const u64 addr = fb->sysmem.flush_page_addr; + // Ensure that the address is within hardware limits + WARN_ON(fb->sysmem.flush_page_addr > DMA_BIT_MASK(52)); + nvkm_wr32(device, NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI, upper_32_bits(addr)); nvkm_wr32(device, NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO, lower_32_bits(addr)); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c index 07db9b397ac1..64281a09fb39 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c @@ -80,6 +80,9 @@ gf100_fb_init_page(struct nvkm_fb *fb) void gf100_fb_sysmem_flush_page_init(struct nvkm_fb *fb) { + // Ensure that the address can actually fit in the register + WARN_ON(fb->sysmem.flush_page_addr > DMA_BIT_MASK(40)); + nvkm_wr32(fb->subdev.device, 0x100c10, fb->sysmem.flush_page_addr >> 8); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gh100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gh100.c index 2d8c51f882d5..8c9394048f25 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gh100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gh100.c @@ -13,6 +13,9 @@ gh100_fb_sysmem_flush_page_init(struct nvkm_fb *fb) const u64 addr = fb->sysmem.flush_page_addr >> NV_PFB_NISO_FLUSH_SYSMEM_ADDR_SHIFT; struct nvkm_device *device = fb->subdev.device; + // Ensure that the address is within hardware limits + WARN_ON(fb->sysmem.flush_page_addr > DMA_BIT_MASK(52)); + nvkm_wr32(device, NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI, upper_32_bits(addr)); nvkm_wr32(device, NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_LO, lower_32_bits(addr)); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c index a6efbd913c13..076d968b7297 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c @@ -214,6 +214,9 @@ nv50_fb_tags(struct nvkm_fb *base) static void nv50_fb_sysmem_flush_page_init(struct nvkm_fb *fb) { + // Ensure that the address can actually fit in the register + WARN_ON(fb->sysmem.flush_page_addr > DMA_BIT_MASK(40)); + nvkm_wr32(fb->subdev.device, 0x100c08, fb->sysmem.flush_page_addr >> 8); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c index 851fd847a2a9..ed15a4475181 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c @@ -21,9 +21,7 @@ */ #include "vmm.h" -#include <core/client.h> #include <subdev/fb.h> -#include <subdev/ltc.h> #include <subdev/timer.h> #include <engine/gr.h> @@ -111,13 +109,33 @@ gp100_vmm_pgt_pfn(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, nvkm_done(pt->memory); } +static inline u64 +gp100_vmm_comptag_nr(u64 size) +{ + return size >> 16; /* One comptag per 64KiB VRAM. */ +} + +static inline u64 +gp100_vmm_pte_comptagline_base(u64 addr) +{ + /* RM allocates enough comptags for all of VRAM, so use a 1:1 mapping. */ + return (1 + gp100_vmm_comptag_nr(addr)) << 36; /* NV_MMU_VER2_PTE_COMPTAGLINE */ +} + +static inline u64 +gp100_vmm_pte_comptagline_incr(u32 page_size) +{ + return gp100_vmm_comptag_nr(page_size) << 36; /* NV_MMU_VER2_PTE_COMPTAGLINE */ +} + static inline void gp100_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) { u64 data = (addr >> 4) | map->type; - map->type += ptes * map->ctag; + if (map->ctag) + data |= gp100_vmm_pte_comptagline_base(addr); while (ptes--) { VMM_WO064(pt, vmm, ptei++ * 8, data); @@ -142,7 +160,6 @@ gp100_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, while (ptes--) { const u64 data = (*map->dma++ >> 4) | map->type; VMM_WO064(pt, vmm, ptei++ * 8, data); - map->type += map->ctag; } nvkm_done(pt->memory); return; @@ -200,7 +217,8 @@ gp100_vmm_pd0_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, { u64 data = (addr >> 4) | map->type; - map->type += ptes * map->ctag; + if (map->ctag) + data |= gp100_vmm_pte_comptagline_base(addr); while (ptes--) { VMM_WO128(pt, vmm, ptei++ * 0x10, data, 0ULL); @@ -411,8 +429,6 @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, struct gp100_vmm_map_vn vn; struct gp100_vmm_map_v0 v0; } *args = argv; - struct nvkm_device *device = vmm->mmu->subdev.device; - struct nvkm_memory *memory = map->memory; u8 kind, kind_inv, priv, ro, vol; int kindn, aper, ret = -ENOSYS; const u8 *kindm; @@ -449,29 +465,24 @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, return -EINVAL; } + /* Handle compression. */ if (kindm[kind] != kind) { - u64 tags = nvkm_memory_size(memory) >> 16; - if (aper != 0 || !(page->type & NVKM_VMM_PAGE_COMP)) { - VMM_DEBUG(vmm, "comp %d %02x", aper, page->type); - return -EINVAL; - } - - if (!map->no_comp) { - ret = nvkm_memory_tags_get(memory, device, tags, - nvkm_ltc_tags_clear, - &map->tags); - if (ret) { - VMM_DEBUG(vmm, "comp %d", ret); - return ret; + struct nvkm_device *device = vmm->mmu->subdev.device; + + /* Compression is only supported when using GSP-RM, as + * PMU firmware is required in order to initialise the + * compbit backing store. + */ + if (nvkm_gsp_rm(device->gsp)) { + /* Turing GPUs require PTE_COMPTAGLINE to be filled, + * in addition to specifying a compressed kind. + */ + if (device->card_type < GA100) { + map->ctag = gp100_vmm_pte_comptagline_incr(1 << map->page->shift); + map->next |= map->ctag; } - } - - if (!map->no_comp && map->tags->mn) { - tags = map->tags->mn->offset + (map->offset >> 16); - map->ctag |= ((1ULL << page->shift) >> 16) << 36; - map->type |= tags << 36; - map->next |= map->ctag; } else { + /* Revert to non-compressed kind. */ kind = kindm[kind]; } } @@ -592,8 +603,8 @@ gp100_vmm = { { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, { 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx }, - { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SVxC }, - { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SVxC }, + { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SVxx }, + { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SVxx }, { 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SVHx }, {} } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c index e081239afe58..5791d134962b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c @@ -34,8 +34,8 @@ gp10b_vmm = { { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, { 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx }, - { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SxHC }, - { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SxHC }, + { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SxHx }, + { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SxHx }, { 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SxHx }, {} } |
