summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.c13
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.h1
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c35
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.h2
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.c46
5 files changed, 97 insertions, 0 deletions
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
index 869b43a4151d..455ccaf17314 100644
--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
@@ -101,6 +101,19 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched)
cancel_work_sync(&sched->work_process_msg);
}
+/**
+ * xe_sched_submission_stop_async - Stop further runs of submission tasks on a scheduler.
+ * @sched: the &xe_gpu_scheduler struct instance
+ *
+ * This call disables further runs of scheduling work queue. It does not wait
+ * for any in-progress runs to finish, only makes sure no further runs happen
+ * afterwards.
+ */
+void xe_sched_submission_stop_async(struct xe_gpu_scheduler *sched)
+{
+ drm_sched_wqueue_stop(&sched->base);
+}
+
void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched)
{
drm_sched_resume_timeout(&sched->base, sched->base.timeout);
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
index 308061f0cf37..e548b2aed95a 100644
--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -21,6 +21,7 @@ void xe_sched_fini(struct xe_gpu_scheduler *sched);
void xe_sched_submission_start(struct xe_gpu_scheduler *sched);
void xe_sched_submission_stop(struct xe_gpu_scheduler *sched);
+void xe_sched_submission_stop_async(struct xe_gpu_scheduler *sched);
void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 679ae229cc82..ebc137be4de2 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1836,6 +1836,19 @@ void xe_guc_submit_stop(struct xe_guc *guc)
}
+/**
+ * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC.
+ * @guc: the &xe_guc struct instance whose scheduler is to be disabled
+ */
+void xe_guc_submit_pause(struct xe_guc *guc)
+{
+ struct xe_exec_queue *q;
+ unsigned long index;
+
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+ xe_sched_submission_stop_async(&q->guc->sched);
+}
+
static void guc_exec_queue_start(struct xe_exec_queue *q)
{
struct xe_gpu_scheduler *sched = &q->guc->sched;
@@ -1876,6 +1889,28 @@ int xe_guc_submit_start(struct xe_guc *guc)
return 0;
}
+static void guc_exec_queue_unpause(struct xe_exec_queue *q)
+{
+ struct xe_gpu_scheduler *sched = &q->guc->sched;
+
+ xe_sched_submission_start(sched);
+}
+
+/**
+ * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC.
+ * @guc: the &xe_guc struct instance whose scheduler is to be enabled
+ */
+void xe_guc_submit_unpause(struct xe_guc *guc)
+{
+ struct xe_exec_queue *q;
+ unsigned long index;
+
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+ guc_exec_queue_unpause(q);
+
+ wake_up_all(&guc->ct.wq);
+}
+
static struct xe_exec_queue *
g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
{
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index 8f64e799283b..ff44500f3da2 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -18,6 +18,8 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc);
void xe_guc_submit_reset_wait(struct xe_guc *guc);
void xe_guc_submit_stop(struct xe_guc *guc);
int xe_guc_submit_start(struct xe_guc *guc);
+void xe_guc_submit_pause(struct xe_guc *guc);
+void xe_guc_submit_unpause(struct xe_guc *guc);
void xe_guc_submit_wedge(struct xe_guc *guc);
int xe_guc_read_stopped(struct xe_guc *guc);
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
index 26e243c28994..c66b17da1ce7 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
@@ -11,6 +11,8 @@
#include "xe_gt_sriov_printk.h"
#include "xe_gt_sriov_vf.h"
#include "xe_guc_ct.h"
+#include "xe_guc_submit.h"
+#include "xe_irq.h"
#include "xe_pm.h"
#include "xe_sriov.h"
#include "xe_sriov_printk.h"
@@ -147,6 +149,48 @@ void xe_sriov_vf_init_early(struct xe_device *xe)
xe_sriov_info(xe, "migration not supported by this module version\n");
}
+/**
+ * vf_post_migration_shutdown - Stop the driver activities after VF migration.
+ * @xe: the &xe_device struct instance
+ *
+ * After this VM is migrated and assigned to a new VF, it is running on a new
+ * hardware, and therefore many hardware-dependent states and related structures
+ * require fixups. Without fixups, the hardware cannot do any work, and therefore
+ * all GPU pipelines are stalled.
+ * Stop some of kernel activities to make the fixup process faster.
+ */
+static void vf_post_migration_shutdown(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+
+ for_each_gt(gt, xe, id)
+ xe_guc_submit_pause(&gt->uc.guc);
+}
+
+/**
+ * vf_post_migration_kickstart - Re-start the driver activities under new hardware.
+ * @xe: the &xe_device struct instance
+ *
+ * After we have finished with all post-migration fixups, restart the driver
+ * activities to continue feeding the GPU with workloads.
+ */
+static void vf_post_migration_kickstart(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+
+ /*
+ * Make sure interrupts on the new HW are properly set. The GuC IRQ
+ * must be working at this point, since the recovery did started,
+ * but the rest was not enabled using the procedure from spec.
+ */
+ xe_irq_resume(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_guc_submit_unpause(&gt->uc.guc);
+}
+
static bool gt_vf_post_migration_needed(struct xe_gt *gt)
{
return test_bit(gt->info.id, &gt_to_xe(gt)->sriov.vf.migration.gt_flags);
@@ -230,6 +274,7 @@ static void vf_post_migration_recovery(struct xe_device *xe)
drm_dbg(&xe->drm, "migration recovery in progress\n");
xe_pm_runtime_get(xe);
+ vf_post_migration_shutdown(xe);
if (!vf_migration_supported(xe)) {
xe_sriov_err(xe, "migration not supported by this module version\n");
@@ -247,6 +292,7 @@ static void vf_post_migration_recovery(struct xe_device *xe)
set_bit(id, &fixed_gts);
}
+ vf_post_migration_kickstart(xe);
err = vf_post_migration_notify_resfix_done(xe, fixed_gts);
if (err)
goto fail;