From 5529df92b8e8cbb4b14a226665888f74648260ad Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 25 Mar 2025 15:47:10 -0700 Subject: drm/xe/bmg: Add one additional PCI ID One additional BMG PCI ID has been added to the spec; make sure our driver recognizes devices with this ID properly. Bspec: 68090 Cc: stable@vger.kernel.org # v6.12+ Reviewed-by: Clint Taylor Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250325224709.4073080-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper (cherry picked from commit cca9734ebe55f6af11ce8d57ca1afdc4d158c808) Signed-off-by: Lucas De Marchi --- include/drm/intel/pciids.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index 4736ea525048..d212848d07f3 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -850,6 +850,7 @@ MACRO__(0xE20C, ## __VA_ARGS__), \ MACRO__(0xE20D, ## __VA_ARGS__), \ MACRO__(0xE210, ## __VA_ARGS__), \ + MACRO__(0xE211, ## __VA_ARGS__), \ MACRO__(0xE212, ## __VA_ARGS__), \ MACRO__(0xE215, ## __VA_ARGS__), \ MACRO__(0xE216, ## __VA_ARGS__) -- cgit From 1d8c0557927e6d69cb6341b7792f882412cfccff Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 26 Mar 2025 09:05:48 +0100 Subject: drm/xe/svm: Fix a potential bo UAF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If drm_gpusvm_migrate_to_devmem() succeeds, if a cpu access happens to the range the bo may be freed before xe_bo_unlock(), causing a UAF. Since the reference is transferred, use xe_svm_devmem_release() to release the reference on drm_gpusvm_migrate_to_devmem() failure, and hold a local reference to protect the UAF. Fixes: 2f118c949160 ("drm/xe: Add SVM VRAM migration") Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250326080551.40201-3-thomas.hellstrom@linux.intel.com (cherry picked from commit c9db07cab766b665c8fa1184649cef452f448dc8) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_svm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 3e829c87d7b4..f8c128524d9f 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -696,11 +696,14 @@ retry: list_for_each_entry(block, blocks, link) block->private = vr; + xe_bo_get(bo); err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base, &bo->devmem_allocation, ctx); - xe_bo_unlock(bo); if (err) - xe_bo_put(bo); /* Creation ref */ + xe_svm_devmem_release(&bo->devmem_allocation); + + xe_bo_unlock(bo); + xe_bo_put(bo); unlock: mmap_read_unlock(mm); -- cgit From 7bcfeddb36b77f9fe3b010bb0b282b7618420bba Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 26 Mar 2025 16:16:34 +0100 Subject: drm/xe: Fix an out-of-bounds shift when invalidating TLB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the size of the range invalidated is larger than rounddown_pow_of_two(ULONG_MAX), The function macro roundup_pow_of_two(length) will hit an out-of-bounds shift [1]. Use a full TLB invalidation for such cases. v2: - Use a define for the range size limit over which we use a full TLB invalidation. (Lucas) - Use a better calculation of the limit. [1]: [ 39.202421] ------------[ cut here ]------------ [ 39.202657] UBSAN: shift-out-of-bounds in ./include/linux/log2.h:57:13 [ 39.202673] shift exponent 64 is too large for 64-bit type 'long unsigned int' [ 39.202688] CPU: 8 UID: 0 PID: 3129 Comm: xe_exec_system_ Tainted: G U 6.14.0+ #10 [ 39.202690] Tainted: [U]=USER [ 39.202690] Hardware name: ASUS System Product Name/PRIME B560M-A AC, BIOS 2001 02/01/2023 [ 39.202691] Call Trace: [ 39.202692] [ 39.202695] dump_stack_lvl+0x6e/0xa0 [ 39.202699] ubsan_epilogue+0x5/0x30 [ 39.202701] __ubsan_handle_shift_out_of_bounds.cold+0x61/0xe6 [ 39.202705] xe_gt_tlb_invalidation_range.cold+0x1d/0x3a [xe] [ 39.202800] ? find_held_lock+0x2b/0x80 [ 39.202803] ? mark_held_locks+0x40/0x70 [ 39.202806] xe_svm_invalidate+0x459/0x700 [xe] [ 39.202897] drm_gpusvm_notifier_invalidate+0x4d/0x70 [drm_gpusvm] [ 39.202900] __mmu_notifier_release+0x1f5/0x270 [ 39.202905] exit_mmap+0x40e/0x450 [ 39.202912] __mmput+0x45/0x110 [ 39.202914] exit_mm+0xc5/0x130 [ 39.202916] do_exit+0x21c/0x500 [ 39.202918] ? lockdep_hardirqs_on_prepare+0xdb/0x190 [ 39.202920] do_group_exit+0x36/0xa0 [ 39.202922] get_signal+0x8f8/0x900 [ 39.202926] arch_do_signal_or_restart+0x35/0x100 [ 39.202930] syscall_exit_to_user_mode+0x1fc/0x290 [ 39.202932] do_syscall_64+0xa1/0x180 [ 39.202934] ? do_user_addr_fault+0x59f/0x8a0 [ 39.202937] ? lock_release+0xd2/0x2a0 [ 39.202939] ? do_user_addr_fault+0x5a9/0x8a0 [ 39.202942] ? trace_hardirqs_off+0x4b/0xc0 [ 39.202944] ? clear_bhb_loop+0x25/0x80 [ 39.202946] ? clear_bhb_loop+0x25/0x80 [ 39.202947] ? clear_bhb_loop+0x25/0x80 [ 39.202950] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 39.202952] RIP: 0033:0x7fa945e543e1 [ 39.202961] Code: Unable to access opcode bytes at 0x7fa945e543b7. [ 39.202962] RSP: 002b:00007ffca8fb4170 EFLAGS: 00000293 [ 39.202963] RAX: 000000000000003d RBX: 0000000000000000 RCX: 00007fa945e543e3 [ 39.202964] RDX: 0000000000000000 RSI: 00007ffca8fb41ac RDI: 00000000ffffffff [ 39.202964] RBP: 00007ffca8fb4190 R08: 0000000000000000 R09: 00007fa945f600a0 [ 39.202965] R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000000000 [ 39.202966] R13: 00007fa9460dd310 R14: 00007ffca8fb41ac R15: 0000000000000000 [ 39.202970] [ 39.202970] ---[ end trace ]--- Fixes: 332dd0116c82 ("drm/xe: Add range based TLB invalidations") Cc: Matthew Brost Cc: Rodrigo Vivi Cc: # v6.8+ Signed-off-by: Thomas Hellström Reviewed-by: Lucas De Marchi #v1 Link: https://lore.kernel.org/r/20250326151634.36916-1-thomas.hellstrom@linux.intel.com (cherry picked from commit b88f48f86500bc0b44b4f73ac66d500a40d320ad) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 03072e094991..084cbdeba8ea 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -322,6 +322,13 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) return 0; } +/* + * Ensure that roundup_pow_of_two(length) doesn't overflow. + * Note that roundup_pow_of_two() operates on unsigned long, + * not on u64. + */ +#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) + /** * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an * address range @@ -346,6 +353,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_device *xe = gt_to_xe(gt); #define MAX_TLB_INVALIDATION_LEN 7 u32 action[MAX_TLB_INVALIDATION_LEN]; + u64 length = end - start; int len = 0; xe_gt_assert(gt, fence); @@ -358,11 +366,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ - if (!xe->info.has_range_tlb_invalidation) { + if (!xe->info.has_range_tlb_invalidation || + length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); } else { u64 orig_start = start; - u64 length = end - start; u64 align; if (length < SZ_4K) -- cgit From 262de94a3a7ef23c326534b3d9483602b7af841e Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Thu, 27 Mar 2025 11:56:04 -0700 Subject: drm/xe: Ensure fixed_slice_mode gets set after ccs_mode change The RCU_MODE_FIXED_SLICE_CCS_MODE setting is not getting invoked in the gt reset path after the ccs_mode setting by the user. Add it to engine register update list (in hw_engine_setup_default_state()) which ensures it gets set in the gt reset and engine reset paths. v2: Add register update to engine list to ensure it gets updated after engine reset also. Fixes: 0d97ecce16bd ("drm/xe: Enable Fixed CCS mode setting") Cc: stable@vger.kernel.org Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matt Roper Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250327185604.18230-1-niranjana.vishwanathapura@intel.com (cherry picked from commit 12468e519f98e4d93370712e3607fab61df9dae9) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hw_engine.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 8c05fd30b7df..93241fd0a4ba 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -389,12 +389,6 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) blit_cctl_val, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - /* Use Fixed slice CCS mode */ - { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), - XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), - XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, - RCU_MODE_FIXED_SLICE_CCS_MODE)) - }, /* Disable WMTP if HW doesn't support it */ { XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"), XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)), @@ -461,6 +455,12 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + /* Use Fixed slice CCS mode */ + { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), + XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), + XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, + RCU_MODE_FIXED_SLICE_CCS_MODE)) + }, }; xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), &hwe->reg_sr); -- cgit From 00e0ae4f1f872800413c819f8a2a909dc29cdc35 Mon Sep 17 00:00:00 2001 From: Julia Filipchuk Date: Tue, 25 Mar 2025 15:43:05 -0700 Subject: drm/xe/xe3lpg: Apply Wa_14022293748, Wa_22019794406 Extend Wa_14022293748, Wa_22019794406 to Xe3_LPG Signed-off-by: Julia Filipchuk Reviewed-by: Tejas Upadhyay Signed-off-by: John Harrison Link: https://lore.kernel.org/r/20250325224310.1455499-1-julia.filipchuk@intel.com (cherry picked from commit 32af900f2c6b1846fd3ede8ad36dd180d7e4ae70) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_wa_oob.rules | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 0c738af24f7c..9b9e176992a8 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -32,8 +32,10 @@ GRAPHICS_VERSION(3001) 14022293748 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) + GRAPHICS_VERSION_RANGE(3000, 3001) 22019794406 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) + GRAPHICS_VERSION_RANGE(3000, 3001) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) -- cgit From 20659d3150f1a2a258a173fe011013178ff2a197 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 11 Mar 2025 11:29:15 -0700 Subject: drm/xe: Use local fence in error path of xe_migrate_clear The intent of the error path in xe_migrate_clear is to wait on locally generated fence and then return. The code is waiting on m->fence which could be the local fence but this is only stable under the job mutex leading to a possible UAF. Fix code to wait on local fence. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250311182915.3606291-1-matthew.brost@intel.com (cherry picked from commit 762b7e95362170b3e13a8704f38d5e47eca4ba74) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index df4282c71bf0..a83bebef3780 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1177,7 +1177,7 @@ err: err_sync: /* Sync partial copies if any. FIXME: job_mutex? */ if (fence) { - dma_fence_wait(m->fence, false); + dma_fence_wait(fence, false); dma_fence_put(fence); } -- cgit From a5c71fd5b69b9da77e5e0b268e69e256932ba49c Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 27 Mar 2025 17:56:47 +0530 Subject: drm/xe/hw_engine: define sysfs_ops on all directories Sysfs_ops needs to be defined on all directories which can have attr files with set/get method. Add sysfs_ops to even those directories which is currently empty but would have attr files with set/get method in future. Leave .default with default sysfs_ops as it will never have setter method. V2(Himal/Rodrigo): - use single sysfs_ops for all dir and attr with set/get - add default ops as ./default does not need runtime pm at all Fixes: 3f0e14651ab0 ("drm/xe: Runtime PM wake on every sysfs call") Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20250327122647.886637-1-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay (cherry picked from commit 40780b9760b561e093508d07b8b9b06c94ab201e) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 108 +++++++++++++------------- 1 file changed, 52 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index b53e8d2accdb..a440442b4d72 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -32,14 +32,61 @@ bool xe_hw_engine_timeout_in_range(u64 timeout, u64 min, u64 max) return timeout >= min && timeout <= max; } -static void kobj_xe_hw_engine_release(struct kobject *kobj) +static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj) { kfree(kobj); } +static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct xe_device *xe = kobj_to_xe(kobj); + struct kobj_attribute *kattr; + ssize_t ret = -EIO; + + kattr = container_of(attr, struct kobj_attribute, attr); + if (kattr->show) { + xe_pm_runtime_get(xe); + ret = kattr->show(kobj, kattr, buf); + xe_pm_runtime_put(xe); + } + + return ret; +} + +static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, + size_t count) +{ + struct xe_device *xe = kobj_to_xe(kobj); + struct kobj_attribute *kattr; + ssize_t ret = -EIO; + + kattr = container_of(attr, struct kobj_attribute, attr); + if (kattr->store) { + xe_pm_runtime_get(xe); + ret = kattr->store(kobj, kattr, buf, count); + xe_pm_runtime_put(xe); + } + + return ret; +} + +static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = { + .show = xe_hw_engine_class_sysfs_attr_show, + .store = xe_hw_engine_class_sysfs_attr_store, +}; + static const struct kobj_type kobj_xe_hw_engine_type = { - .release = kobj_xe_hw_engine_release, - .sysfs_ops = &kobj_sysfs_ops + .release = xe_hw_engine_sysfs_kobj_release, + .sysfs_ops = &xe_hw_engine_class_sysfs_ops, +}; + +static const struct kobj_type kobj_xe_hw_engine_type_def = { + .release = xe_hw_engine_sysfs_kobj_release, + .sysfs_ops = &kobj_sysfs_ops, }; static ssize_t job_timeout_max_store(struct kobject *kobj, @@ -543,7 +590,7 @@ static int xe_add_hw_engine_class_defaults(struct xe_device *xe, if (!kobj) return -ENOMEM; - kobject_init(kobj, &kobj_xe_hw_engine_type); + kobject_init(kobj, &kobj_xe_hw_engine_type_def); err = kobject_add(kobj, parent, "%s", ".defaults"); if (err) goto err_object; @@ -559,57 +606,6 @@ err_object: return err; } -static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj) -{ - kfree(kobj); -} - -static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct xe_device *xe = kobj_to_xe(kobj); - struct kobj_attribute *kattr; - ssize_t ret = -EIO; - - kattr = container_of(attr, struct kobj_attribute, attr); - if (kattr->show) { - xe_pm_runtime_get(xe); - ret = kattr->show(kobj, kattr, buf); - xe_pm_runtime_put(xe); - } - - return ret; -} - -static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj, - struct attribute *attr, - const char *buf, - size_t count) -{ - struct xe_device *xe = kobj_to_xe(kobj); - struct kobj_attribute *kattr; - ssize_t ret = -EIO; - - kattr = container_of(attr, struct kobj_attribute, attr); - if (kattr->store) { - xe_pm_runtime_get(xe); - ret = kattr->store(kobj, kattr, buf, count); - xe_pm_runtime_put(xe); - } - - return ret; -} - -static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = { - .show = xe_hw_engine_class_sysfs_attr_show, - .store = xe_hw_engine_class_sysfs_attr_store, -}; - -static const struct kobj_type xe_hw_engine_sysfs_kobj_type = { - .release = xe_hw_engine_sysfs_kobj_release, - .sysfs_ops = &xe_hw_engine_class_sysfs_ops, -}; static void hw_engine_class_sysfs_fini(void *arg) { @@ -640,7 +636,7 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) if (!kobj) return -ENOMEM; - kobject_init(kobj, &xe_hw_engine_sysfs_kobj_type); + kobject_init(kobj, &kobj_xe_hw_engine_type); err = kobject_add(kobj, gt->sysfs, "engines"); if (err) -- cgit From dac2d70bb23f247034dd2e5c1abc6689c684b793 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Mar 2025 22:06:02 +0100 Subject: drm/xe: avoid plain 64-bit division Building the xe driver for i386 results in a link time warning: x86_64-linux-ld: drivers/gpu/drm/xe/xe_migrate.o: in function `xe_migrate_vram': xe_migrate.c:(.text+0x1e15): undefined reference to `__udivdi3' Avoid this by using DIV_U64_ROUND_UP() instead of DIV_ROUND_UP(). The driver is unlikely to be used on 32=bit hardware, so the extra cost here is not too important. Fixes: 9c44fd5f6e8a ("drm/xe: Add migrate layer functions for SVM support") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250324210612.2927194-1-arnd@kernel.org Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi (cherry picked from commit c9092257506af4985c085103714c403812a5bdcb) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_migrate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index a83bebef3780..5a3e89022c38 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1547,7 +1547,7 @@ void xe_migrate_wait(struct xe_migrate *m) static u32 pte_update_cmd_size(u64 size) { u32 num_dword; - u64 entries = DIV_ROUND_UP(size, XE_PAGE_SIZE); + u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE); XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER); /* @@ -1558,7 +1558,7 @@ static u32 pte_update_cmd_size(u64 size) * 2 dword for the page table's physical location * 2*n dword for value of pte to fill (each pte entry is 2 dwords) */ - num_dword = (1 + 2) * DIV_ROUND_UP(entries, 0x1ff); + num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff); num_dword += entries * 2; return num_dword; -- cgit From e775278cd75f24a2758c28558c4e41b36c935740 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sun, 30 Mar 2025 12:59:23 -0400 Subject: drm/xe: Invalidate L3 read-only cachelines for geometry streams too Historically, the Vertex Fetcher unit has not been an L3 client. That meant that, when a buffer containing vertex data was written to, it was necessary to issue a PIPE_CONTROL::VF Cache Invalidate to invalidate any VF L2 cachelines associated with that buffer, so the new value would be properly read from memory. Since Tigerlake and later, VERTEX_BUFFER_STATE and 3DSTATE_INDEX_BUFFER have included an "L3 Bypass Enable" bit which userspace drivers can set to request that the vertex fetcher unit snoop L3. However, unlike most true L3 clients, the "VF Cache Invalidate" bit continues to only invalidate the VF L2 cache - and not any associated L3 lines. To handle that, PIPE_CONTROL has a new "L3 Read Only Cache Invalidation Bit", which according to the docs, "controls the invalidation of the Geometry streams cached in L3 cache at the top of the pipe." In other words, the vertex and index buffer data that gets cached in L3 when "L3 Bypass Disable" is set. Mesa always sets L3 Bypass Disable so that the VF unit snoops L3, and whenever it issues a VF Cache Invalidate, it also issues a L3 Read Only Cache Invalidate so that both L2 and L3 vertex data is invalidated. xe is issuing VF cache invalidates too (which handles cases like CPU writes to a buffer between GPU batches). Because userspace may enable L3 snooping, it needs to issue an L3 Read Only Cache Invalidate as well. Fixes significant flickering in Firefox on Meteorlake, which was writing to vertex buffers via the CPU between batches; the missing L3 Read Only invalidates were causing the vertex fetcher to read stale data from L3. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4460 Fixes: 6ef3bb60557d ("drm/xe: enable lite restore") Cc: stable@vger.kernel.org # v6.13+ Signed-off-by: Kenneth Graunke Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250330165923.56410-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 61672806b579dd5a150a042ec9383be2bbc2ae7e) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/instructions/xe_gpu_commands.h | 1 + drivers/gpu/drm/xe/xe_ring_ops.c | 13 +++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h index a255946b6f77..8cfcd3360896 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h @@ -41,6 +41,7 @@ #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) +#define PIPE_CONTROL0_L3_READ_ONLY_CACHE_INVALIDATE BIT(10) /* gen12 */ #define PIPE_CONTROL0_HDC_PIPELINE_FLUSH BIT(9) /* gen12 */ #define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 917fc16de866..a7582b097ae6 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -137,7 +137,8 @@ emit_pipe_control(u32 *dw, int i, u32 bit_group_0, u32 bit_group_1, u32 offset, static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, int i) { - u32 flags = PIPE_CONTROL_CS_STALL | + u32 flags0 = 0; + u32 flags1 = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_COMMAND_CACHE_INVALIDATE | PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | @@ -148,11 +149,15 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, PIPE_CONTROL_STORE_DATA_INDEX; if (invalidate_tlb) - flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags1 |= PIPE_CONTROL_TLB_INVALIDATE; - flags &= ~mask_flags; + flags1 &= ~mask_flags; - return emit_pipe_control(dw, i, 0, flags, LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0); + if (flags1 & PIPE_CONTROL_VF_CACHE_INVALIDATE) + flags0 |= PIPE_CONTROL0_L3_READ_ONLY_CACHE_INVALIDATE; + + return emit_pipe_control(dw, i, flags0, flags1, + LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0); } static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, -- cgit From 88ecb66b9956a14577d513a6c8c28bb2e7989703 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 28 Mar 2025 14:17:52 -0400 Subject: drm/xe: Restore EIO errno return when GuC PC start fails Commit b4b05e53b550 ("drm/xe/guc_pc: Retry and wait longer for GuC PC start"), leads to the following Smatch static checker warning: drivers/gpu/drm/xe/xe_guc_pc.c:1073 xe_guc_pc_start() warn: missing error code here? '_dev_err()' failed. 'ret' = '0' Fixes: c605acb53f44 ("drm/xe/guc_pc: Retry and wait longer for GuC PC start") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/intel-xe/1454a5f1-ee18-4df1-a6b2-a4a3dddcd1cb@stanley.mountain/ Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250328181752.26677-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 3f2bdccbccdcb53b0d316474eafff2e3462a51ad) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_pc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 85215313976c..43b1192ba61c 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -1070,6 +1070,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, SLPC_RESET_EXTENDED_TIMEOUT_MS)) { xe_gt_err(gt, "GuC PC Start failed: Dynamic GT frequency control and GT sleep states are now disabled.\n"); + ret = -EIO; goto out; } -- cgit