From 8e6d0b699635ecc5bad8629b8da685621aa416ca Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Wed, 8 Sep 2021 22:12:55 -0400 Subject: drm/amdgpu: Get atomicOps info from Host for sriov setup The AtomicOp Requester Enable bit is reserved in VFs and the PF value applies to all associated VFs. so guest driver can not directly enable the atomicOps for VF, it depends on PF to enable it. In current design, amdgpu driver will get the enabled atomicOps bits through private pf2vf data Signed-off-by: shaoyunl Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 41c6b3aacd37..aefa96bcbbab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3530,17 +3530,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); - /* enable PCIE atomic ops */ - r = pci_enable_atomic_ops_to_root(adev->pdev, - PCI_EXP_DEVCAP2_ATOMIC_COMP32 | - PCI_EXP_DEVCAP2_ATOMIC_COMP64); - if (r) { - adev->have_atomics_support = false; - DRM_INFO("PCIE atomic ops is not supported\n"); - } else { - adev->have_atomics_support = true; - } - amdgpu_device_get_pcie_info(adev); if (amdgpu_mcbp) @@ -3563,6 +3552,19 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + /* enable PCIE atomic ops */ + if (amdgpu_sriov_vf(adev)) + adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *) + adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags == + (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64); + else + adev->have_atomics_support = + !pci_enable_atomic_ops_to_root(adev->pdev, + PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64); + if (!adev->have_atomics_support) + dev_info(adev->dev, "PCIE atomic ops is not supported\n"); + /* doorbell bar mapping and doorbell index init*/ amdgpu_device_doorbell_init(adev); -- cgit From 9cec53c18a3170c7e5673c414da56aeecee94832 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Tue, 7 Sep 2021 11:32:22 -0400 Subject: drm/amdgpu: move iommu_resume before ip init/resume Separate iommu_resume from kfd_resume, and move it before other amdgpu ip init/resume. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277 Signed-off-by: James Zhu Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index aefa96bcbbab..620c7b773b15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2394,6 +2394,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; + r = amdgpu_amdkfd_resume_iommu(adev); + if (r) + goto init_failed; + r = amdgpu_device_ip_hw_init_phase1(adev); if (r) goto init_failed; @@ -3148,6 +3152,10 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) { int r; + r = amdgpu_amdkfd_resume_iommu(adev); + if (r) + return r; + r = amdgpu_device_ip_resume_phase1(adev); if (r) return r; @@ -4603,6 +4611,10 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, dev_warn(tmp_adev->dev, "asic atom init failed!"); } else { dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); + r = amdgpu_amdkfd_resume_iommu(tmp_adev); + if (r) + goto out; + r = amdgpu_device_ip_resume_phase1(tmp_adev); if (r) goto out; -- cgit From 6effad8abe0ba4db3d9c58ed585127858a990f35 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Sat, 18 Sep 2021 13:43:41 +0800 Subject: drm/amdgpu: move amdgpu_virt_release_full_gpu to fini_early stage adev->rmmio is set to be NULL in amdgpu_device_unmap_mmio to prevent access after pci_remove, however, in SRIOV case, amdgpu_virt_release_full_gpu will still use adev->rmmio for access after amdgpu_device_unmap_mmio. The patch is to move such SRIOV calling earlier to fini_early stage. Fixes: 07775fc13878 ("drm/amdgpu: Unmap all MMIO mappings") Cc: Andrey Grodzovsky Signed-off-by: Leslie Shi Signed-off-by: Guchun Chen Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 620c7b773b15..48089dc0180b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2745,6 +2745,11 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } + if (amdgpu_sriov_vf(adev)) { + if (amdgpu_virt_release_full_gpu(adev, false)) + DRM_ERROR("failed to release exclusive mode on fini\n"); + } + return 0; } @@ -2805,10 +2810,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) amdgpu_ras_fini(adev); - if (amdgpu_sriov_vf(adev)) - if (amdgpu_virt_release_full_gpu(adev, false)) - DRM_ERROR("failed to release exclusive mode on fini\n"); - return 0; } -- cgit From 894c6890a23c2a91493c845d05d6ffbb148efa6d Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 24 Aug 2021 16:15:48 -0400 Subject: drm/amdgpu: drm/amdgpu: Handle IOMMU enabled case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handle all DMA IOMMU group related dependencies before the group is removed and we try to access it after free. v2: Move the actul handling function to TTM Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 48089dc0180b..ec63496412db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3872,6 +3872,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_device_ip_fini_early(adev); + ttm_device_clear_dma_mappings(&adev->mman.bdev); + amdgpu_gart_dummy_page_fini(adev); amdgpu_device_unmap_mmio(adev); -- cgit From 3f68c01be9a2227de1e190317fe34a6fb835a094 Mon Sep 17 00:00:00 2001 From: Zhan Liu Date: Sat, 25 Sep 2021 00:01:48 -0700 Subject: drm/amd/display: add cyan_skillfish display support [Why] add display related cyan_skillfish files in. makefile controlled by CONFIG_DRM_AMD_DC_DCN201 flag. v2: squash in clang fixes from Harry, Nathan v3: squash in missing CONFIG_DRM_AMD_DC check (Alex) Signed-off-by: Charlene Liu Signed-off-by: Zhan Liu Reviewed-by: Charlene Liu Acked-by: Jun Lei Acked-by: Harry Wentland Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ec63496412db..097819c40db7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3241,6 +3241,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_RENOIR: + case CHIP_CYAN_SKILLFISH: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: -- cgit From 63352b7f98fdc33a896576a1ad1b8814c2bf3a5a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 26 Jul 2021 16:46:56 -0400 Subject: drm/amdgpu: Use IP discovery to drive setting IP blocks by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drive the asic setup from the IP discovery table rather than hardcoded settings based on asic type. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 097819c40db7..fd58e75f24d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2164,8 +2164,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) return r; break; default: - /* FIXME: not supported yet */ - return -EINVAL; + r = amdgpu_discovery_set_ip_blocks(adev); + if (r) + return r; + break; } amdgpu_amdkfd_device_probe(adev); -- cgit From 524cf3ab85f5a9e574f5fe6797223d70a76704ca Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 26 Jul 2021 16:49:21 -0400 Subject: drm/amdgpu: drive nav10 from the IP discovery table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than hardcoding based on asic_type, use the IP discovery table to configure the driver. Only tested on Navi10 so far. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fd58e75f24d2..bfd7becdcf19 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2142,7 +2142,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (r) return r; break; - case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: -- cgit From 75aa18415a4c56d1aacc07cac00f813fdd5d8799 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 Jul 2021 11:16:12 -0400 Subject: drm/amdgpu: drive all navi asics from the IP discovery table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than hardcoding based on asic_type, use the IP discovery table to configure the driver. v2: rebase Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bfd7becdcf19..274ef4a514a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2142,26 +2142,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (r) return r; break; - case CHIP_NAVI14: - case CHIP_NAVI12: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_BEIGE_GOBY: - case CHIP_VANGOGH: - case CHIP_YELLOW_CARP: - case CHIP_CYAN_SKILLFISH: - if (adev->asic_type == CHIP_VANGOGH) - adev->family = AMDGPU_FAMILY_VGH; - else if (adev->asic_type == CHIP_YELLOW_CARP) - adev->family = AMDGPU_FAMILY_YC; - else - adev->family = AMDGPU_FAMILY_NV; - - r = nv_set_ip_blocks(adev); - if (r) - return r; - break; default: r = amdgpu_discovery_set_ip_blocks(adev); if (r) -- cgit From 9878844094703fbae1c3b301c9bb71253a30efe7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 30 Jul 2021 15:50:38 -0400 Subject: drm/amdgpu: drive all vega asics from the IP discovery table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than hardcoding based on asic_type, use the IP discovery table to configure the driver. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 274ef4a514a2..480708dd2d73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2126,22 +2126,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (r) return r; break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - case CHIP_ARCTURUS: - case CHIP_RENOIR: - case CHIP_ALDEBARAN: - if (adev->flags & AMD_IS_APU) - adev->family = AMDGPU_FAMILY_RV; - else - adev->family = AMDGPU_FAMILY_AI; - - r = soc15_set_ip_blocks(adev); - if (r) - return r; - break; default: r = amdgpu_discovery_set_ip_blocks(adev); if (r) -- cgit From f7f12b25823c0dce1165b390522d29f99c4585b4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 3 Aug 2021 17:39:01 -0400 Subject: drm/amdgpu: default to true in amdgpu_device_asic_has_dc_support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are not going to support any new chips with the old non-DC code so make it the default. Reviewed-by: Harry Wentland Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 480708dd2d73..31c6d44208b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3214,13 +3214,15 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) case CHIP_VANGOGH: case CHIP_YELLOW_CARP: #endif + default: return amdgpu_dc != 0; -#endif +#else default: if (amdgpu_dc > 0) DRM_INFO_ONCE("Display Core has been requested via kernel parameter " "but isn't supported by ASIC, ignoring\n"); return false; +#endif } } -- cgit From 3ae695d691749aba2a15b2b0a3b2c6117d4bd247 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 9 Aug 2021 17:26:21 -0400 Subject: drm/amdgpu: add new asic_type for IP discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new asic type for asics where we don't have an explicit entry in the PCI ID list. We don't need an asic type for these asics, other than something higher than the existing ones, so just use this for all new asics. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 31c6d44208b5..9cecdf063111 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -125,6 +125,7 @@ const char *amdgpu_asic_name[] = { "DIMGREY_CAVEFISH", "BEIGE_GOBY", "YELLOW_CARP", + "IP DISCOVERY", "LAST", }; -- cgit From 286826d7d976e7646b09149d9bc2899d74ff962b Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 28 Sep 2021 15:42:35 +0800 Subject: drm/amdgpu: init iommu after amdkfd device init This patch is to fix clinfo failure in Raven/Picasso: Number of platforms: 1 Platform Profile: FULL_PROFILE Platform Version: OpenCL 2.2 AMD-APP (3364.0) Platform Name: AMD Accelerated Parallel Processing Platform Vendor: Advanced Micro Devices, Inc. Platform Extensions: cl_khr_icd cl_amd_event_callback Platform Name: AMD Accelerated Parallel Processing Number of devices: 0 Signed-off-by: Yifan Zhang Reviewed-by: James Zhu Tested-by: James Zhu Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9cecdf063111..221520f375d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2360,10 +2360,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; - r = amdgpu_amdkfd_resume_iommu(adev); - if (r) - goto init_failed; - r = amdgpu_device_ip_hw_init_phase1(adev); if (r) goto init_failed; @@ -2402,6 +2398,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (!adev->gmc.xgmi.pending_reset) amdgpu_amdkfd_device_init(adev); + r = amdgpu_amdkfd_resume_iommu(adev); + if (r) + goto init_failed; + amdgpu_fru_get_product_info(adev); init_failed: -- cgit From c8365dbda056578eebe164bf110816b1a39b4b7f Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 30 Sep 2021 11:22:51 +0200 Subject: drm/amdgpu: revert "Add autodump debugfs node for gpu reset v8" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 728e7e0cd61899208e924472b9e641dbeb0775c4. Further discussion reveals that this feature is severely broken and needs to be reverted ASAP. GPU reset can never be delayed by userspace even for debugging or otherwise we can run into in kernel deadlocks. Signed-off-by: Christian König Acked-by: Alex Deucher Acked-by: Nirmoy Das Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 221520f375d4..0207b25c2e6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4440,10 +4440,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, if (reset_context->reset_req_dev == adev) job = reset_context->job; - /* no need to dump if device is not in good state during probe period */ - if (!adev->gmc.xgmi.pending_reset) - amdgpu_debugfs_wait_dump(adev); - if (amdgpu_sriov_vf(adev)) { /* stop the data exchange thread */ amdgpu_virt_fini_data_exchange(adev); -- cgit From 127aedf979579c3a638de37cc0288139f879585a Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 30 Sep 2021 11:59:14 +0200 Subject: drm/amdgpu: print warning and taint kernel if lockup timeout is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure that we notice this in error reports. Signed-off-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0207b25c2e6e..b5f72724d449 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3324,6 +3324,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) continue; } else if (timeout < 0) { timeout = MAX_SCHEDULE_TIMEOUT; + dev_warn(adev->dev, "lockup timeout disabled"); + add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); } else { timeout = msecs_to_jiffies(timeout); } -- cgit From e17e27f9bdba274b404454072302cf5ea2282e5d Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Fri, 1 Oct 2021 09:48:50 +0800 Subject: drm/amdgpu: handle the case of pci_channel_io_frozen only in amdgpu_pci_resume In current code, when a PCI error state pci_channel_io_normal is detectd, it will report PCI_ERS_RESULT_CAN_RECOVER status to PCI driver, and PCI driver will continue the execution of PCI resume callback report_resume by pci_walk_bridge, and the callback will go into amdgpu_pci_resume finally, where write lock is releasd unconditionally without acquiring such lock first. In this case, a deadlock will happen when other threads start to acquire the read lock. To fix this, add a member in amdgpu_device strucutre to cache pci_channel_state, and only continue the execution in amdgpu_pci_resume when it's pci_channel_io_frozen. Fixes: c9a6b82f45e2 ("drm/amdgpu: Implement DPC recovery") Suggested-by: Andrey Grodzovsky Signed-off-by: Guchun Chen Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b5f72724d449..cda04918fd55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5371,6 +5371,8 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta return PCI_ERS_RESULT_DISCONNECT; } + adev->pci_channel_state = state; + switch (state) { case pci_channel_io_normal: return PCI_ERS_RESULT_CAN_RECOVER; @@ -5513,6 +5515,10 @@ void amdgpu_pci_resume(struct pci_dev *pdev) DRM_INFO("PCI error: resume callback!!\n"); + /* Only continue execution for the case of pci_channel_io_frozen */ + if (adev->pci_channel_state != pci_channel_io_frozen) + return; + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; -- cgit From 58144d283712c9e80e528e001af6ac5aeee71af2 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 6 Oct 2021 17:55:00 +0200 Subject: drm/amdgpu: unify BO evicting method in amdgpu_ttm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify BO evicting functionality for possible memory types in amdgpu_ttm.c. Signed-off-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index cda04918fd55..5ea36c1951f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3885,6 +3885,25 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) } +/** + * amdgpu_device_evict_resources - evict device resources + * @adev: amdgpu device object + * + * Evicts all ttm device resources(vram BOs, gart table) from the lru list + * of the vram memory type. Mainly used for evicting device resources + * at suspend time. + * + */ +static void amdgpu_device_evict_resources(struct amdgpu_device *adev) +{ + /* No need to evict vram on APUs for suspend to ram */ + if (adev->in_s3 && (adev->flags & AMD_IS_APU)) + return; + + if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM)) + DRM_WARN("evicting device resources failed\n"); + +} /* * Suspend & resume. @@ -3925,17 +3944,16 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (!adev->in_s0ix) amdgpu_amdkfd_suspend(adev, adev->in_runpm); - /* evict vram memory */ - amdgpu_bo_evict_vram(adev); + /* First evict vram memory */ + amdgpu_device_evict_resources(adev); amdgpu_fence_driver_hw_fini(adev); amdgpu_device_ip_suspend_phase2(adev); - /* evict remaining vram memory - * This second call to evict vram is to evict the gart page table - * using the CPU. + /* This second call to evict device resources is to evict + * the gart page table using the CPU. */ - amdgpu_bo_evict_vram(adev); + amdgpu_device_evict_resources(adev); return 0; } -- cgit From c58a863b1ccf638feb52cf3d9c756a9f578a57ae Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Fri, 8 Oct 2021 13:21:45 -0400 Subject: drm/amdgpu: use adev_to_drm for consistency when accessing drm_device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit adev_to_drm is used everywhere, so improve recent changes when accessing drm_device pointer from amdgpu_device. Signed-off-by: Guchun Chen Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5ea36c1951f3..239e71174855 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -306,7 +306,7 @@ void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, uint64_t last; int idx; - if (!drm_dev_enter(&adev->ddev, &idx)) + if (!drm_dev_enter(adev_to_drm(adev), &idx)) return; BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4)); -- cgit