summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLucas De Marchi <lucas.demarchi@intel.com>2025-01-02 16:11:10 -0800
committerLucas De Marchi <lucas.demarchi@intel.com>2025-01-03 12:43:01 -0800
commit5001ef3af8f2c972d6fd9c5221a8457556f8bea6 (patch)
tree5a0da24e193ca648f5934d3e86b0e88a47f7e8eb
parent88fca61ba5e2ecd0552b9dea2500a16da12d0106 (diff)
drm/xe: Fix tlb invalidation when wedging
If GuC fails to load, the driver wedges, but in the process it tries to do stuff that may not be initialized yet. This moves the xe_gt_tlb_invalidation_init() to be done earlier: as its own doc says, it's a software-only initialization and should had been named with the _early() suffix. Move it to be called by xe_gt_init_early(), so the locks and seqno are initialized, avoiding a NULL ptr deref when wedging: xe 0000:03:00.0: [drm] *ERROR* GT0: load failed: status: Reset = 0, BootROM = 0x50, UKernel = 0x00, MIA = 0x00, Auth = 0x01 xe 0000:03:00.0: [drm] *ERROR* GT0: firmware signature verification failed xe 0000:03:00.0: [drm] *ERROR* CRITICAL: Xe has declared device 0000:03:00.0 as wedged. ... BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 9 UID: 0 PID: 3908 Comm: modprobe Tainted: G U W 6.13.0-rc4-xe+ #3 Tainted: [U]=USER, [W]=WARN Hardware name: Intel Corporation Alder Lake Client Platform/AlderLake-S ADP-S DDR5 UDIMM CRB, BIOS ADLSFWI1.R00.3275.A00.2207010640 07/01/2022 RIP: 0010:xe_gt_tlb_invalidation_reset+0x75/0x110 [xe] This can be easily triggered by poking the GuC binary to force a signature failure. There will still be an extra message, xe 0000:03:00.0: [drm] *ERROR* GT0: GuC mmio request 0x4100: no reply 0x4100 but that's better than a NULL ptr deref. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/3956 Fixes: 7dbe8af13c18 ("drm/xe: Wedge the entire device") Reviewed-by: Matthew Brost <matthew.brost@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20250103001111.331684-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c8
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h3
3 files changed, 8 insertions, 7 deletions
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 41ab7fbebc19..26e64530ada2 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -387,6 +387,10 @@ int xe_gt_init_early(struct xe_gt *gt)
xe_force_wake_init_gt(gt, gt_to_fw(gt));
spin_lock_init(&gt->global_invl_lock);
+ err = xe_gt_tlb_invalidation_init_early(gt);
+ if (err)
+ return err;
+
return 0;
}
@@ -588,10 +592,6 @@ int xe_gt_init(struct xe_gt *gt)
xe_hw_fence_irq_init(&gt->fence_irq[i]);
}
- err = xe_gt_tlb_invalidation_init(gt);
- if (err)
- return err;
-
err = xe_gt_pagefault_init(gt);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 665927b80e9e..257b500e1703 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -106,7 +106,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
}
/**
- * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state
+ * xe_gt_tlb_invalidation_init_early - Initialize GT TLB invalidation state
* @gt: graphics tile
*
* Initialize GT TLB invalidation state, purely software initialization, should
@@ -114,7 +114,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
*
* Return: 0 on success, negative error code on error.
*/
-int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
+int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt)
{
gt->tlb_invalidation.seqno = 1;
INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
index 00b1c6c01e8d..672acfcdf0d7 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -14,7 +14,8 @@ struct xe_gt;
struct xe_guc;
struct xe_vma;
-int xe_gt_tlb_invalidation_init(struct xe_gt *gt);
+int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt);
+
void xe_gt_tlb_invalidation_reset(struct xe_gt *gt);
int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,