diff options
| -rw-r--r-- | drivers/gpu/drm/xe/xe_gpu_scheduler.c | 13 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_gpu_scheduler.h | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_guc_submit.c | 35 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_guc_submit.h | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_sriov_vf.c | 46 |
5 files changed, 97 insertions, 0 deletions
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index 869b43a4151d..455ccaf17314 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -101,6 +101,19 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) cancel_work_sync(&sched->work_process_msg); } +/** + * xe_sched_submission_stop_async - Stop further runs of submission tasks on a scheduler. + * @sched: the &xe_gpu_scheduler struct instance + * + * This call disables further runs of scheduling work queue. It does not wait + * for any in-progress runs to finish, only makes sure no further runs happen + * afterwards. + */ +void xe_sched_submission_stop_async(struct xe_gpu_scheduler *sched) +{ + drm_sched_wqueue_stop(&sched->base); +} + void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched) { drm_sched_resume_timeout(&sched->base, sched->base.timeout); diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index 308061f0cf37..e548b2aed95a 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -21,6 +21,7 @@ void xe_sched_fini(struct xe_gpu_scheduler *sched); void xe_sched_submission_start(struct xe_gpu_scheduler *sched); void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); +void xe_sched_submission_stop_async(struct xe_gpu_scheduler *sched); void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 679ae229cc82..ebc137be4de2 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1836,6 +1836,19 @@ void xe_guc_submit_stop(struct xe_guc *guc) } +/** + * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. + * @guc: the &xe_guc struct instance whose scheduler is to be disabled + */ +void xe_guc_submit_pause(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + xe_sched_submission_stop_async(&q->guc->sched); +} + static void guc_exec_queue_start(struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched = &q->guc->sched; @@ -1876,6 +1889,28 @@ int xe_guc_submit_start(struct xe_guc *guc) return 0; } +static void guc_exec_queue_unpause(struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + + xe_sched_submission_start(sched); +} + +/** + * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. + * @guc: the &xe_guc struct instance whose scheduler is to be enabled + */ +void xe_guc_submit_unpause(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + guc_exec_queue_unpause(q); + + wake_up_all(&guc->ct.wq); +} + static struct xe_exec_queue * g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) { diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 8f64e799283b..ff44500f3da2 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -18,6 +18,8 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc); void xe_guc_submit_reset_wait(struct xe_guc *guc); void xe_guc_submit_stop(struct xe_guc *guc); int xe_guc_submit_start(struct xe_guc *guc); +void xe_guc_submit_pause(struct xe_guc *guc); +void xe_guc_submit_unpause(struct xe_guc *guc); void xe_guc_submit_wedge(struct xe_guc *guc); int xe_guc_read_stopped(struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index 26e243c28994..c66b17da1ce7 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -11,6 +11,8 @@ #include "xe_gt_sriov_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc_ct.h" +#include "xe_guc_submit.h" +#include "xe_irq.h" #include "xe_pm.h" #include "xe_sriov.h" #include "xe_sriov_printk.h" @@ -147,6 +149,48 @@ void xe_sriov_vf_init_early(struct xe_device *xe) xe_sriov_info(xe, "migration not supported by this module version\n"); } +/** + * vf_post_migration_shutdown - Stop the driver activities after VF migration. + * @xe: the &xe_device struct instance + * + * After this VM is migrated and assigned to a new VF, it is running on a new + * hardware, and therefore many hardware-dependent states and related structures + * require fixups. Without fixups, the hardware cannot do any work, and therefore + * all GPU pipelines are stalled. + * Stop some of kernel activities to make the fixup process faster. + */ +static void vf_post_migration_shutdown(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + + for_each_gt(gt, xe, id) + xe_guc_submit_pause(>->uc.guc); +} + +/** + * vf_post_migration_kickstart - Re-start the driver activities under new hardware. + * @xe: the &xe_device struct instance + * + * After we have finished with all post-migration fixups, restart the driver + * activities to continue feeding the GPU with workloads. + */ +static void vf_post_migration_kickstart(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + + /* + * Make sure interrupts on the new HW are properly set. The GuC IRQ + * must be working at this point, since the recovery did started, + * but the rest was not enabled using the procedure from spec. + */ + xe_irq_resume(xe); + + for_each_gt(gt, xe, id) + xe_guc_submit_unpause(>->uc.guc); +} + static bool gt_vf_post_migration_needed(struct xe_gt *gt) { return test_bit(gt->info.id, >_to_xe(gt)->sriov.vf.migration.gt_flags); @@ -230,6 +274,7 @@ static void vf_post_migration_recovery(struct xe_device *xe) drm_dbg(&xe->drm, "migration recovery in progress\n"); xe_pm_runtime_get(xe); + vf_post_migration_shutdown(xe); if (!vf_migration_supported(xe)) { xe_sriov_err(xe, "migration not supported by this module version\n"); @@ -247,6 +292,7 @@ static void vf_post_migration_recovery(struct xe_device *xe) set_bit(id, &fixed_gts); } + vf_post_migration_kickstart(xe); err = vf_post_migration_notify_resfix_done(xe, fixed_gts); if (err) goto fail; |
