summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilip Yang <Philip.Yang@amd.com>2025-02-25 10:04:06 -0500
committerAlex Deucher <alexander.deucher@amd.com>2025-02-27 16:50:04 -0500
commitee3ed100663d2ae8280bd5fc63b3e2c13c689cce (patch)
tree3b26c329fef267c1f5225272503a23c558cf3859
parentf9234217d0167f43cc0cdccc39353c37de1d028d (diff)
drm/amdkfd: Remove kfd_process_hw_exception worker
With GPU reset-domain worker implemented, KFD hw_exception worker is not needed any more, just call amdgpu_amdkfd_gpu_reset directly from kfd_hws_hang. Suggested-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h1
2 files changed, 1 insertions, 11 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 94b1ac8a4735..91e4988dc1e3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -66,7 +66,6 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm,
static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
static int allocate_sdma_queue(struct device_queue_manager *dqm,
struct queue *q, const uint32_t *restore_sdma_id);
-static void kfd_process_hw_exception(struct work_struct *work);
static inline
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
@@ -170,7 +169,7 @@ static void kfd_hws_hang(struct device_queue_manager *dqm)
/*
* Issue a GPU reset if HWS is unresponsive
*/
- schedule_work(&dqm->hw_exception_work);
+ amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
}
static int convert_to_mes_queue_type(int queue_type)
@@ -1740,7 +1739,6 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
dqm->active_cp_queue_count = 0;
dqm->gws_queue_count = 0;
dqm->active_runlist = false;
- INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
dqm->trap_debug_vmid = 0;
init_sdma_bitmaps(dqm);
@@ -3080,13 +3078,6 @@ int kfd_evict_process_device(struct kfd_process_device *pdd)
return ret;
}
-static void kfd_process_hw_exception(struct work_struct *work)
-{
- struct device_queue_manager *dqm = container_of(work,
- struct device_queue_manager, hw_exception_work);
- amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
-}
-
int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 09ab36f8e8c6..7146e227e2c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -269,7 +269,6 @@ struct device_queue_manager {
/* hw exception */
bool is_hws_hang;
bool is_resetting;
- struct work_struct hw_exception_work;
struct kfd_mem_obj hiq_sdma_mqd;
bool sched_running;
bool sched_halt;