diff options
Diffstat (limited to 'drivers/accel/ivpu/ivpu_pm.c')
-rw-r--r-- | drivers/accel/ivpu/ivpu_pm.c | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index ac0e22454596..ea30db181cd7 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -34,6 +34,7 @@ module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644); MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default"); #define PM_RESCHEDULE_LIMIT 5 +#define PM_TDR_HEARTBEAT_LIMIT 30 static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) { @@ -44,6 +45,7 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) ivpu_fw_log_reset(vdev); ivpu_fw_load(vdev); fw->entry_point = fw->cold_boot_entry_point; + fw->last_heartbeat = 0; } static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev) @@ -189,7 +191,24 @@ static void ivpu_job_timeout_work(struct work_struct *work) { struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work); struct ivpu_device *vdev = pm->vdev; + u64 heartbeat; + if (ivpu_jsm_get_heartbeat(vdev, 0, &heartbeat) || heartbeat <= vdev->fw->last_heartbeat) { + ivpu_err(vdev, "Job timeout detected, heartbeat not progressed\n"); + goto recovery; + } + + if (atomic_fetch_inc(&vdev->job_timeout_counter) > PM_TDR_HEARTBEAT_LIMIT) { + ivpu_err(vdev, "Job timeout detected, heartbeat limit exceeded\n"); + goto recovery; + } + + vdev->fw->last_heartbeat = heartbeat; + ivpu_start_job_timeout_detection(vdev); + return; + +recovery: + atomic_set(&vdev->job_timeout_counter, 0); ivpu_pm_trigger_recovery(vdev, "TDR"); } @@ -204,6 +223,7 @@ void ivpu_start_job_timeout_detection(struct ivpu_device *vdev) void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev) { cancel_delayed_work_sync(&vdev->pm->job_timeout_work); + atomic_set(&vdev->job_timeout_counter, 0); } int ivpu_pm_suspend_cb(struct device *dev) |