From f1f6f48a338cdab96efef712dbef6b1e279583e2 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 25 Apr 2023 14:13:36 -0400 Subject: drm/amdgpu: Set GTT size equal to TTM mem limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the helper function in TTM to get TTM mem limit and set GTT size to be equal to TTL mem limit. Signed-off-by: Mukul Joshi Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 2cd081cbf706..f61f07575f63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1803,26 +1803,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); - /* Compute GTT size, either based on 1/2 the size of RAM size - * or whatever the user passed on module init */ - if (amdgpu_gtt_size == -1) { - struct sysinfo si; - - si_meminfo(&si); - /* Certain GL unit tests for large textures can cause problems - * with the OOM killer since there is no way to link this memory - * to a process. This was originally mitigated (but not necessarily - * eliminated) by limiting the GTT size. The problem is this limit - * is often too low for many modern games so just make the limit 1/2 - * of system memory which aligns with TTM. The OOM accounting needs - * to be addressed, but we shouldn't prevent common 3D applications - * from being usable just to potentially mitigate that corner case. - */ - gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - (u64)si.totalram * si.mem_unit / 2); - } else { + /* Compute GTT size, either based on TTM limit + * or whatever the user passed on module init. + */ + if (amdgpu_gtt_size == -1) + gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT; + else gtt_size = (uint64_t)amdgpu_gtt_size << 20; - } /* Initialize GTT memory pool */ r = amdgpu_gtt_mgr_init(adev, gtt_size); -- cgit From 2f77b9a242a2e01822efc80c8b63eaa31df0f8b4 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 9 May 2022 21:45:50 -0400 Subject: drm/amdkfd: Update MQD management on multi XCC setup Update MQD management for both HIQ and user-mode compute queues on a multi XCC setup. MQDs needs to be allocated, initialized, loaded and destroyed for each XCC in the KFD node. v2: squash in fix "drm/amdkfd: Fix SDMA+HIQ HQD allocation on GFX9.4.3" Signed-off-by: Mukul Joshi Signed-off-by: Amber Lin Tested-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 51 +++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index f61f07575f63..6bbe3b89aef5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -800,6 +800,41 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev, sg_free_table(ttm->sg); } +/* + * total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ... + * MQDn+CtrlStackn where n is the number of XCCs per partition. + * pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD + * and uses memory type default, UC. The rest of pages_per_xcc are + * Ctrl stack and modify their memory type to NC. + */ +static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, + struct ttm_tt *ttm, uint64_t flags) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + uint64_t total_pages = ttm->num_pages; + int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); + uint64_t page_idx, pages_per_xcc = total_pages / num_xcc; + int i; + uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | + AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); + + for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) { + /* MQD page: use default flags */ + amdgpu_gart_bind(adev, + gtt->offset + (page_idx << PAGE_SHIFT), + 1, >t->ttm.dma_address[page_idx], flags); + /* + * Ctrl pages - modify the memory type to NC (ctrl_flags) from + * the second page of the BO onward. + */ + amdgpu_gart_bind(adev, + gtt->offset + ((page_idx + 1) << PAGE_SHIFT), + pages_per_xcc - 1, + >t->ttm.dma_address[page_idx + 1], + ctrl_flags); + } +} + static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, struct ttm_buffer_object *tbo, uint64_t flags) @@ -812,21 +847,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, flags |= AMDGPU_PTE_TMZ; if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { - uint64_t page_idx = 1; - - amdgpu_gart_bind(adev, gtt->offset, page_idx, - gtt->ttm.dma_address, flags); - - /* The memory type of the first page defaults to UC. Now - * modify the memory type to NC from the second page of - * the BO onward. - */ - flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; - flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); - - amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT), - ttm->num_pages - page_idx, - &(gtt->ttm.dma_address[page_idx]), flags); + amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags); } else { amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, gtt->ttm.dma_address, flags); -- cgit From 228ce176434b0f61451019065393040d58e1668d Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Fri, 27 Jan 2023 21:57:00 -0500 Subject: drm/amdgpu: Handle VRAM dependencies on GFXIP9.4.3 [For 1P NPS1 mode driver bringup] Changes required to initialize the amdgpu driver with frontdoor firmware loading and discovery=2 with the native mode SBIOS that enables CPU GPU unified interleaved memory. sudo modprobe amdgpu discovery=2 Once PSP TMR region is reported via the ACPI interface, the dependency on the ip_discovery.bin will be removed. Choice of where to allocate driver table is given to each IP version. In general, both GTT and VRAM domains will be considered. If one of the tables has a strict restriction for VRAM domain, then only VRAM domain is considered. Reviewed-by: Felix Kuehling (lijo: Modified the handling for SMU Tables) Signed-off-by: Lijo Lazar Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 89 ++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 36 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 6bbe3b89aef5..bc11ae56bba5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1708,15 +1708,20 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; } - ret = amdgpu_bo_create_kernel_at(adev, - adev->gmc.real_vram_size - adev->mman.discovery_tmr_size, - adev->mman.discovery_tmr_size, - &adev->mman.discovery_memory, - NULL); - if (ret) { - DRM_ERROR("alloc tmr failed(%d)!\n", ret); - amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); - return ret; + if (!adev->gmc.is_app_apu) { + ret = amdgpu_bo_create_kernel_at(adev, + adev->gmc.real_vram_size - + adev->mman.discovery_tmr_size, + adev->mman.discovery_tmr_size, + &adev->mman.discovery_memory, + NULL); + if (ret) { + DRM_ERROR("alloc tmr failed(%d)!\n", ret); + amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); + return ret; + } + } else { + DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n"); } return 0; @@ -1765,10 +1770,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base, adev->gmc.visible_vram_size); - else + else if (!adev->gmc.is_app_apu) #endif adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, adev->gmc.visible_vram_size); + else + DRM_DEBUG_DRIVER("No need to ioremap when real vram size is 0\n"); #endif /* @@ -1803,23 +1810,32 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) * This is used for VGA emulation and pre-OS scanout buffers to * avoid display artifacts while transitioning between pre-OS * and driver. */ - r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size, - &adev->mman.stolen_vga_memory, - NULL); - if (r) - return r; - r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, - adev->mman.stolen_extended_size, - &adev->mman.stolen_extended_memory, - NULL); - if (r) - return r; - r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset, - adev->mman.stolen_reserved_size, - &adev->mman.stolen_reserved_memory, - NULL); - if (r) - return r; + if (!adev->gmc.is_app_apu) { + r = amdgpu_bo_create_kernel_at(adev, 0, + adev->mman.stolen_vga_size, + &adev->mman.stolen_vga_memory, + NULL); + if (r) + return r; + + r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, + adev->mman.stolen_extended_size, + &adev->mman.stolen_extended_memory, + NULL); + + if (r) + return r; + + r = amdgpu_bo_create_kernel_at(adev, + adev->mman.stolen_reserved_offset, + adev->mman.stolen_reserved_size, + &adev->mman.stolen_reserved_memory, + NULL); + if (r) + return r; + } else { + DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n"); + } DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); @@ -1866,7 +1882,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_ERROR("Failed initializing oa heap.\n"); return r; } - if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mman.sdma_access_bo, NULL, @@ -1887,13 +1902,15 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_ttm_training_reserve_vram_fini(adev); /* return the stolen vga memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); - amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); - /* return the IP Discovery TMR memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); - if (adev->mman.stolen_reserved_size) - amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, - NULL, NULL); + if (!adev->gmc.is_app_apu) { + amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); + /* return the IP Discovery TMR memory back to VRAM */ + amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); + if (adev->mman.stolen_reserved_size) + amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, + NULL, NULL); + } amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr); amdgpu_ttm_fw_reserve_vram_fini(adev); @@ -1935,7 +1952,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) int r; if (!adev->mman.initialized || amdgpu_in_reset(adev) || - adev->mman.buffer_funcs_enabled == enable) + adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu) return; if (enable) { -- cgit From db3b5cb64a9ca301d14ed027e470834316720e42 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 24 Feb 2023 18:01:38 +0530 Subject: drm/amdgpu: Use apt name for FW reserved region Use the generic term fw_reserved_memory for FW reserve region. This region may also hold discovery TMR in addition to other reserve regions. This region size could be larger than discovery tmr size, hence don't change the discovery tmr size based on this. Signed-off-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 36 +++++++++++++++++---------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index bc11ae56bba5..c8a2d030f226 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1644,14 +1644,15 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev) return 0; } -static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev) +static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev, + uint32_t reserve_size) { struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; memset(ctx, 0, sizeof(*ctx)); ctx->c2p_train_data_offset = - ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M); + ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M); ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); ctx->train_data_size = @@ -1669,9 +1670,10 @@ static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev) */ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) { - int ret; struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; bool mem_train_support = false; + uint32_t reserve_size = 0; + int ret; if (!amdgpu_sriov_vf(adev)) { if (amdgpu_atomfirmware_mem_training_supported(adev)) @@ -1687,14 +1689,15 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) * Otherwise, fallback to legacy approach to check and reserve tmr block for ip * discovery data and G6 memory training data respectively */ - adev->mman.discovery_tmr_size = - amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); - if (!adev->mman.discovery_tmr_size) - adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET; + if (adev->bios) + reserve_size = + amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); + if (!reserve_size) + reserve_size = DISCOVERY_TMR_OFFSET; if (mem_train_support) { /* reserve vram for mem train according to TMR location */ - amdgpu_ttm_training_data_block_init(adev); + amdgpu_ttm_training_data_block_init(adev, reserve_size); ret = amdgpu_bo_create_kernel_at(adev, ctx->c2p_train_data_offset, ctx->train_data_size, @@ -1709,15 +1712,13 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) } if (!adev->gmc.is_app_apu) { - ret = amdgpu_bo_create_kernel_at(adev, - adev->gmc.real_vram_size - - adev->mman.discovery_tmr_size, - adev->mman.discovery_tmr_size, - &adev->mman.discovery_memory, - NULL); + ret = amdgpu_bo_create_kernel_at( + adev, adev->gmc.real_vram_size - reserve_size, + reserve_size, &adev->mman.fw_reserved_memory, NULL); if (ret) { DRM_ERROR("alloc tmr failed(%d)!\n", ret); - amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, + NULL, NULL); return ret; } } else { @@ -1905,8 +1906,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) if (!adev->gmc.is_app_apu) { amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); - /* return the IP Discovery TMR memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); + /* return the FW reserved memory back to VRAM */ + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, + NULL); if (adev->mman.stolen_reserved_size) amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, NULL, NULL); -- cgit From 1e03322cfef9b83aa87ea0a508588f9f05a47dfc Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 27 Feb 2023 11:16:09 -0500 Subject: drm/amdgpu: Set TTM pools for memory partitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For native mode only, create TTM pool for each memory partition to store the NUMA node id, then the TTM pool will be selected using memory partition id to allocate memory from the correct partition. Acked-by: Christian König (rajneesh: changed need_swiotlb and need_dma32 to false for pool init) Reviewed-by: Felix Kuehling Acked-and-tested-by: Mukul Joshi Signed-off-by: Philip Yang Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 61 +++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c8a2d030f226..7674109810b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -631,6 +631,7 @@ struct amdgpu_ttm_tt { struct task_struct *usertask; uint32_t userflags; bool bound; + int32_t pool_id; }; #define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm) @@ -1059,6 +1060,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, return NULL; } gtt->gobj = &bo->base; + gtt->pool_id = NUMA_NO_NODE; if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) caching = ttm_write_combined; @@ -1085,6 +1087,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); + struct ttm_pool *pool; pgoff_t i; int ret; @@ -1099,7 +1102,11 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) return 0; - ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx); + if (adev->mman.ttm_pools && gtt->pool_id >= 0) + pool = &adev->mman.ttm_pools[gtt->pool_id]; + else + pool = &adev->mman.bdev.pool; + ret = ttm_pool_alloc(pool, ttm, ctx); if (ret) return ret; @@ -1120,6 +1127,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, { struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); struct amdgpu_device *adev; + struct ttm_pool *pool; pgoff_t i; amdgpu_ttm_backend_unbind(bdev, ttm); @@ -1138,7 +1146,13 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, ttm->pages[i]->mapping = NULL; adev = amdgpu_ttm_adev(bdev); - return ttm_pool_free(&adev->mman.bdev.pool, ttm); + + if (adev->mman.ttm_pools && gtt->pool_id >= 0) + pool = &adev->mman.ttm_pools[gtt->pool_id]; + else + pool = &adev->mman.bdev.pool; + + return ttm_pool_free(pool, ttm); } /** @@ -1728,6 +1742,41 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) return 0; } +static int amdgpu_ttm_pools_init(struct amdgpu_device *adev) +{ + int i; + + if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions) + return 0; + + adev->mman.ttm_pools = kcalloc(adev->gmc.num_mem_partitions, + sizeof(*adev->mman.ttm_pools), + GFP_KERNEL); + if (!adev->mman.ttm_pools) + return -ENOMEM; + + for (i = 0; i < adev->gmc.num_mem_partitions; i++) { + ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev, + adev->gmc.mem_partitions[i].numa.node, + false, false); + } + return 0; +} + +static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev) +{ + int i; + + if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools) + return; + + for (i = 0; i < adev->gmc.num_mem_partitions; i++) + ttm_pool_fini(&adev->mman.ttm_pools[i]); + + kfree(adev->mman.ttm_pools); + adev->mman.ttm_pools = NULL; +} + /* * amdgpu_ttm_init - Init the memory management (ttm) as well as various * gtt/vram related fields. @@ -1754,6 +1803,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_ERROR("failed initializing buffer object driver(%d).\n", r); return r; } + + r = amdgpu_ttm_pools_init(adev); + if (r) { + DRM_ERROR("failed to init ttm pools(%d).\n", r); + return r; + } adev->mman.initialized = true; /* Initialize VRAM pool with all of VRAM divided into pages */ @@ -1901,6 +1956,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) if (!adev->mman.initialized) return; + amdgpu_ttm_pools_fini(adev); + amdgpu_ttm_training_reserve_vram_fini(adev); /* return the stolen vga memory back to VRAM */ if (!adev->gmc.is_app_apu) { -- cgit From fcfefd85f18a0004c7c7b499f0701fd2c76d4c68 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Mon, 27 Feb 2023 20:08:29 -0500 Subject: drm/amdkfd: Native mode memory partition support For native mode, after amdgpu_bo is created on CPU domain, then call amdgpu_ttm_tt_set_mem_pool to select the TTM pool using bo->mem_id. ttm_bo_validate will allocate the memory to the correct memory partition before mapping to GPUs. Reviewed-by: Felix Kuehling Acked-and-tested-by: Mukul Joshi Signed-off-by: Philip Yang Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 7674109810b0..3933432daaac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1155,6 +1155,24 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, return ttm_pool_free(pool, ttm); } +/** + * amdgpu_ttm_tt_set_mem_pool - Set the TTM memory pool for the TTM BO + * @tbo: The ttm_buffer_object that backs the VRAM bo + * @mem_id: to select the initialized ttm pool corresponding to the memory partition + */ +int amdgpu_ttm_tt_set_mem_pool(struct ttm_buffer_object *tbo, int mem_id) +{ + struct ttm_tt *ttm = tbo->ttm; + struct amdgpu_ttm_tt *gtt; + + if (!ttm && !ttm_tt_is_populated(ttm)) + return -EINVAL; + + gtt = ttm_to_amdgpu_ttm_tt(ttm); + gtt->pool_id = mem_id; + return 0; +} + /** * amdgpu_ttm_tt_get_userptr - Return the userptr GTT ttm_tt for the current * task -- cgit From a0ba127960982b8827ba8b410c272ec8f3ee7e6a Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 3 Mar 2023 18:03:00 +0530 Subject: drm/amdgpu: Fix unmapping of aperture When aperture size is zero, there is no mapping done. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 3933432daaac..09d1a98bd11e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1844,12 +1844,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base, adev->gmc.visible_vram_size); - else if (!adev->gmc.is_app_apu) + else if (adev->gmc.is_app_apu) + DRM_DEBUG_DRIVER( + "No need to ioremap when real vram size is 0\n"); + else #endif adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, adev->gmc.visible_vram_size); - else - DRM_DEBUG_DRIVER("No need to ioremap when real vram size is 0\n"); #endif /* -- cgit From 53c5692e7a3c8e8eed3ec6b876a3c982d217a5d7 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 26 Jan 2023 18:50:09 -0500 Subject: drm/amdkfd: Alloc memory of GPU support memory partition For dGPU mode VRAM allocation, create amdgpu_bo from amdgpu_vm->mem_id, to alloc from the correct memory range. For APU mode VRAM allocation, set alloc domain to GTT, and set bp->mem_id_plus1 from amdgpu_vm->mem_id + 1 to create amdgpu_bo, to allocate system memory from correct NUMA node. For GTT allocation, use mem_id -1 to allocate system memory from any NUMA nodes. Remove amdgpu_ttm_tt_set_mem_pool, to avoid the confusion that memory maybe allocated from different mem_id. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 09d1a98bd11e..129c593cb2bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1060,7 +1060,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, return NULL; } gtt->gobj = &bo->base; - gtt->pool_id = NUMA_NO_NODE; + gtt->pool_id = abo->mem_id; if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) caching = ttm_write_combined; @@ -1155,24 +1155,6 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, return ttm_pool_free(pool, ttm); } -/** - * amdgpu_ttm_tt_set_mem_pool - Set the TTM memory pool for the TTM BO - * @tbo: The ttm_buffer_object that backs the VRAM bo - * @mem_id: to select the initialized ttm pool corresponding to the memory partition - */ -int amdgpu_ttm_tt_set_mem_pool(struct ttm_buffer_object *tbo, int mem_id) -{ - struct ttm_tt *ttm = tbo->ttm; - struct amdgpu_ttm_tt *gtt; - - if (!ttm && !ttm_tt_is_populated(ttm)) - return -EINVAL; - - gtt = ttm_to_amdgpu_ttm_tt(ttm); - gtt->pool_id = mem_id; - return 0; -} - /** * amdgpu_ttm_tt_get_userptr - Return the userptr GTT ttm_tt for the current * task -- cgit From 3ebfd221c1a83e5f0edadb87d173d8fd93d1d125 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 8 Mar 2023 11:57:00 -0500 Subject: drm/amdkfd: Store xcp partition id to amdgpu bo For memory accounting per compute partition and export drm amdgpu bo and then import to KFD, we need the xcp id to account the memory usage or find the KFD node of the original amdgpu bo to create the KFD bo on the correct adev KFD node. Set xcp_id_plus1 of amdgpu_bo_param to create bo and store xcp_id to amddgpu bo. Add helper macro to get the mem_id from adev and xcp_id. v2: squash in fix ("drm/amdgpu: Fix BO creation failure on GFX 9.4.3 dGPU") Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 129c593cb2bd..23101c82519a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1051,6 +1051,7 @@ static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev, static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_ttm_tt *gtt; enum ttm_caching caching; @@ -1060,7 +1061,10 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, return NULL; } gtt->gobj = &bo->base; - gtt->pool_id = abo->mem_id; + if (adev->gmc.mem_partitions && abo->xcp_id >= 0) + gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id); + else + gtt->pool_id = abo->xcp_id; if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) caching = ttm_write_combined; -- cgit From 45b3a914d40e63d2c9e3a3e02fb2014be975b9b0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 16 May 2023 17:16:30 -0400 Subject: drm/amdgpu/gmc9: fix 64 bit division in partition code Rework logic or use do_div() to avoid problems on 32 bit. v2: add a missing case for XCP macro v3: fix out of bounds array access v4: fix xcp handling harder Acked-by: Guchun Chen (v1) Reviewed-by: Mukul Joshi (v3) Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 23101c82519a..902773ce41b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -814,11 +814,14 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, struct amdgpu_ttm_tt *gtt = (void *)ttm; uint64_t total_pages = ttm->num_pages; int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); - uint64_t page_idx, pages_per_xcc = total_pages / num_xcc; + uint64_t page_idx, pages_per_xcc; int i; uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); + pages_per_xcc = total_pages; + do_div(pages_per_xcc, num_xcc); + for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) { /* MQD page: use default flags */ amdgpu_gart_bind(adev, -- cgit From 01c3f464743b64e6e65cb9bad951458986819a42 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 17 May 2023 20:10:48 +0530 Subject: drm/amd/amdgpu: Fix errors & warnings in amdgpu_ttm.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix below checkpatch insisted error & warnings: ERROR: Macros with complex values should be enclosed in parentheses WARNING: Prefer 'unsigned int' to bare use of 'unsigned' WARNING: braces {} are not necessary for single statement blocks WARNING: Block comments use a trailing */ on a separate line WARNING: Missing a blank line after declarations Cc: Alex Deucher Cc: Christian König Signed-off-by: Srinivasan Shanmugam Acked-by: Luben Tuikov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 902773ce41b5..b8adcebb0d89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -65,7 +65,7 @@ MODULE_IMPORT_NS(DMA_BUF); -#define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128 +#define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128) static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, struct ttm_tt *ttm, @@ -184,11 +184,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, struct ttm_resource *mem, struct amdgpu_res_cursor *mm_cur, - unsigned window, struct amdgpu_ring *ring, + unsigned int window, struct amdgpu_ring *ring, bool tmz, uint64_t *size, uint64_t *addr) { struct amdgpu_device *adev = ring->adev; - unsigned offset, num_pages, num_dw, num_bytes; + unsigned int offset, num_pages, num_dw, num_bytes; uint64_t src_addr, dst_addr; struct amdgpu_job *job; void *cpu_addr; @@ -1060,9 +1060,9 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, enum ttm_caching caching; gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); - if (gtt == NULL) { + if (!gtt) return NULL; - } + gtt->gobj = &bo->base; if (adev->gmc.mem_partitions && abo->xcp_id >= 0) gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id); @@ -1847,9 +1847,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) *place on the VRAM, so reserve it early. */ r = amdgpu_ttm_fw_reserve_vram_init(adev); - if (r) { + if (r) return r; - } /* *The reserved vram for driver must be pinned to the specified @@ -1873,7 +1872,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* allocate memory as required for VGA * This is used for VGA emulation and pre-OS scanout buffers to * avoid display artifacts while transitioning between pre-OS - * and driver. */ + * and driver. + */ if (!adev->gmc.is_app_apu) { r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size, @@ -1902,7 +1902,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } DRM_INFO("amdgpu: %uM of VRAM memory ready\n", - (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); + (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024))); /* Compute GTT size, either based on TTM limit * or whatever the user passed on module init. @@ -1919,7 +1919,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } DRM_INFO("amdgpu: %uM of GTT memory ready.\n", - (unsigned)(gtt_size / (1024 * 1024))); + (unsigned int)(gtt_size / (1024 * 1024))); /* Initialize preemptible memory pool */ r = amdgpu_preempt_mgr_init(adev); @@ -1961,6 +1961,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) void amdgpu_ttm_fini(struct amdgpu_device *adev) { int idx; + if (!adev->mman.initialized) return; @@ -2089,10 +2090,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, bool vm_needs_flush, bool tmz) { struct amdgpu_device *adev = ring->adev; - unsigned num_loops, num_dw; + unsigned int num_loops, num_dw; struct amdgpu_job *job; uint32_t max_bytes; - unsigned i; + unsigned int i; int r; if (!direct_submit && !ring->sched.ready) { -- cgit From 109b4d8cfe4279da1cbcbcd99ae54cb2b2aee521 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Mon, 15 May 2023 09:34:28 +0800 Subject: drm/amdgpu: remove unnecessary (void*) conversions No need cast (void*) to (struct amdgpu_device *). Signed-off-by: Su Hui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b8adcebb0d89..509b26e9d0c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -2257,7 +2257,7 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type) static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused) { - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; + struct amdgpu_device *adev = m->private; return ttm_pool_debugfs(&adev->mman.bdev.pool, m); } -- cgit From 73ade646c545feda7c5df9b9c78c5d011ce76463 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 May 2023 15:51:38 +0200 Subject: drm/amdgpu: stop including swiotlb.h amdgpu does not need swiotlb.h, so stop including it. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 509b26e9d0c4..0abad5f89421 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include -- cgit From c3aaca43fb07ce05f3a3bd85288eb3d500469be5 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Wed, 17 May 2023 15:53:50 -0400 Subject: drm/amdgpu: Add a low priority scheduler for VRAM clearing Add a low priority DRM scheduler for VRAM clearing instead of using the exisiting high priority scheduler. Use the high priority scheduler for migrations and evictions. Signed-off-by: Mukul Joshi Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 37 +++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0abad5f89421..473eeac1f03b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -383,7 +383,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { struct dma_fence *wipe_fence = NULL; - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence); + r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence, + false); if (r) { goto error; } else if (wipe_fence) { @@ -2036,8 +2037,18 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) r); return; } + + r = drm_sched_entity_init(&adev->mman.delayed, + DRM_SCHED_PRIORITY_NORMAL, &sched, + 1, NULL); + if (r) { + DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", + r); + goto error_free_entity; + } } else { drm_sched_entity_destroy(&adev->mman.entity); + drm_sched_entity_destroy(&adev->mman.delayed); dma_fence_put(man->move); man->move = NULL; } @@ -2049,6 +2060,11 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) size = adev->gmc.visible_vram_size; man->size = size; adev->mman.buffer_funcs_enabled = enable; + + return; + +error_free_entity: + drm_sched_entity_destroy(&adev->mman.entity); } static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, @@ -2056,14 +2072,16 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, unsigned int num_dw, struct dma_resv *resv, bool vm_needs_flush, - struct amdgpu_job **job) + struct amdgpu_job **job, + bool delayed) { enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED; int r; - - r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, + struct drm_sched_entity *entity = delayed ? &adev->mman.delayed : + &adev->mman.entity; + r = amdgpu_job_alloc_with_ib(adev, entity, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4, pool, job); if (r) @@ -2104,7 +2122,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw, - resv, vm_needs_flush, &job); + resv, vm_needs_flush, &job, false); if (r) return r; @@ -2140,7 +2158,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, uint64_t dst_addr, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, - bool vm_needs_flush) + bool vm_needs_flush, bool delayed) { struct amdgpu_device *adev = ring->adev; unsigned int num_loops, num_dw; @@ -2153,7 +2171,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8); r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush, - &job); + &job, delayed); if (r) return r; @@ -2176,7 +2194,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, - struct dma_fence **f) + struct dma_fence **f, + bool delayed) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; @@ -2205,7 +2224,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, goto error; r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv, - &next, true); + &next, true, delayed); if (r) goto error; -- cgit From 9535a86a4072babc37dc6bdadae52bdbb88166f5 Mon Sep 17 00:00:00 2001 From: Shiwu Zhang Date: Wed, 17 May 2023 14:15:05 +0800 Subject: drm/amdgpu: bypass bios dependent operations Since bios reading does not work currently so just bypass all operations related to bios v2: hardcode the vram info for APP_APU case (hawking) v3: correct the vram_width with channel number * channel size (lijo) Signed-off-by: Shiwu Zhang Reviewed-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 473eeac1f03b..d2d0d27f9053 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1696,7 +1696,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) uint32_t reserve_size = 0; int ret; - if (!amdgpu_sriov_vf(adev)) { + if (adev->bios && !amdgpu_sriov_vf(adev)) { if (amdgpu_atomfirmware_mem_training_supported(adev)) mem_train_support = true; else @@ -1713,7 +1713,10 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) if (adev->bios) reserve_size = amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); - if (!reserve_size) + + if (!adev->bios && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + reserve_size = max(reserve_size, (uint32_t)280 << 20); + else if (!reserve_size) reserve_size = DISCOVERY_TMR_OFFSET; if (mem_train_support) { -- cgit