From a6dcf9a7ccfed57abd44c24cc505b559281d44b9 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sat, 11 Mar 2023 17:36:11 +0800 Subject: drm/amdgpu: Move umc ras block init to gmc ras sw_init Initialize umc ras block only when umc ip block supports ras. Driver queries ras capabilities after early_init, ras block init needs to be moved to sw_init. Signed-off-by: Hawking Zhang Reviewed-by: Stanley Yang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 0305b660cd17..f1773abd5e1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -351,7 +351,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint16_t pasid, uint64_t timestamp); void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, uint16_t pasid); -int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev); +int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev); int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev); -- cgit From 057e335c71361063e173381cecf2e8487ec8b552 Mon Sep 17 00:00:00 2001 From: Yifan Zha Date: Mon, 6 Mar 2023 14:54:05 +0800 Subject: drm/amdgpu: Init MMVM_CONTEXTS_DISABLE in gmc11 golden setting under SRIOV [Why] If disable the mmhub vm contexts(set MMVM_CONTEXTS_DISABLE to 0xffff), driver loading failed on vf due to fence fallback timer expired on all rings. FLR cannot reset MMVM_CONTEXTS_DISABLE. So this vf can not be recovered anymore unless trigger a whole gpu reset. [How] Under SRIOV, init MMVM_CONTEXTS_DISABLE in gmc11 golden register setting. Signed-off-by: Yifan Zha Reviewed-by: Horace Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index f1773abd5e1a..232523e3e270 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -104,6 +104,8 @@ struct amdgpu_vmhub { uint32_t vm_cntx_cntl_vm_fault; uint32_t vm_l2_bank_select_reserved_cid2; + uint32_t vm_contexts_disable; + const struct amdgpu_vmhub_funcs *vmhub_funcs; }; -- cgit From dd299441654fd8209056c7985ddf2373ebaba6ed Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 11 Apr 2023 16:32:38 -0400 Subject: drm/amdgpu: Rework retry fault removal Rework retry fault removal from the software filter by storing an expired timestamp for a fault that is being removed. When a new fault comes, and it matches an entry in the sw filter, it will be added as a new fault only when its timestamp is greater than the timestamp expiry of the fault in the sw filter. This helps in avoiding stale faults being added back into the filter and preventing legitimate faults from being handled. Suggested-by: Felix Kuehling Signed-off-by: Mukul Joshi Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 232523e3e270..6d105d7fb98b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -70,6 +70,7 @@ struct amdgpu_gmc_fault { uint64_t timestamp:48; uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER; atomic64_t key; + uint64_t timestamp_expiry:48; }; /* -- cgit