63 files changed, 534 insertions, 216 deletions
diff --git a/drivers/gpu/drm/xe/display/xe_panic.c b/drivers/gpu/drm/xe/display/xe_panic.c
index f32b23338331..df663286092a 100644
--- a/drivers/gpu/drm/xe/display/xe_panic.c
+++ b/drivers/gpu/drm/xe/display/xe_panic.c
@@ -8,20 +8,23 @@
 #include "intel_fb.h"
 #include "intel_panic.h"
 #include "xe_bo.h"
+#include "xe_res_cursor.h"
 
 struct intel_panic {
-	struct page **pages;
+	struct xe_res_cursor res;
+	struct iosys_map vmap;
+
 	int page;
-	void *vaddr;
 };
 
 static void xe_panic_kunmap(struct intel_panic *panic)
 {
-	if (panic->vaddr) {
-		drm_clflush_virt_range(panic->vaddr, PAGE_SIZE);
-		kunmap_local(panic->vaddr);
-		panic->vaddr = NULL;
+	if (!panic->vmap.is_iomem && iosys_map_is_set(&panic->vmap)) {
+		drm_clflush_virt_range(panic->vmap.vaddr, PAGE_SIZE);
+		kunmap_local(panic->vmap.vaddr);
 	}
+	iosys_map_clear(&panic->vmap);
+	panic->page = -1;
 }
 
 /*
@@ -46,15 +49,29 @@ static void xe_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int
 	new_page = offset >> PAGE_SHIFT;
 	offset = offset % PAGE_SIZE;
 	if (new_page != panic->page) {
-		xe_panic_kunmap(panic);
+		if (xe_bo_is_vram(bo)) {
+			/* Display is always mapped on root tile */
+			struct xe_vram_region *vram = xe_bo_device(bo)->mem.vram;
+
+			if (panic->page < 0 || new_page < panic->page) {
+				xe_res_first(bo->ttm.resource, new_page * PAGE_SIZE,
+					     bo->ttm.base.size - new_page * PAGE_SIZE, &panic->res);
+			} else {
+				xe_res_next(&panic->res, PAGE_SIZE * (new_page - panic->page));
+			}
+			iosys_map_set_vaddr_iomem(&panic->vmap,
+						  vram->mapping + panic->res.start);
+		} else {
+			xe_panic_kunmap(panic);
+			iosys_map_set_vaddr(&panic->vmap,
+					    ttm_bo_kmap_try_from_panic(&bo->ttm,
+								       new_page));
+		}
 		panic->page = new_page;
-		panic->vaddr = ttm_bo_kmap_try_from_panic(&bo->ttm,
-							  panic->page);
-	}
-	if (panic->vaddr) {
-		u32 *pix = panic->vaddr + offset;
-		*pix = color;
 	}
+
+	if (iosys_map_is_set(&panic->vmap))
+		iosys_map_wr(&panic->vmap, offset, u32, color);
 }
 
 struct intel_panic *intel_panic_alloc(void)
@@ -68,6 +85,12 @@ struct intel_panic *intel_panic_alloc(void)
 
 int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb)
 {
+	struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private;
+	struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base));
+
+	if (xe_bo_is_vram(bo) && !xe_bo_is_visible_vram(bo))
+		return -ENODEV;
+
 	panic->page = -1;
 	sb->set_pixel = xe_panic_page_set_pixel;
 	return 0;
@@ -76,5 +99,4 @@ int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb)
 void intel_panic_finish(struct intel_panic *panic)
 {
 	xe_panic_kunmap(panic);
-	panic->page = -1;
 }
diff --git a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
index 8cfcd3360896..5d41ca297447 100644
--- a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
@@ -31,6 +31,12 @@
 #define   XY_FAST_COPY_BLT_D1_DST_TILE4	REG_BIT(30)
 #define   XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK	GENMASK(23, 20)
 
+#define MEM_COPY_CMD (2 << 29 | 0x5a << 22 | 0x8)
+#define   MEM_COPY_PAGE_COPY_MODE REG_BIT(19)
+#define   MEM_COPY_MATRIX_COPY REG_BIT(17)
+#define   MEM_COPY_SRC_MOCS_INDEX_MASK	GENMASK(31, 28)
+#define   MEM_COPY_DST_MOCS_INDEX_MASK	GENMASK(6, 3)
+
 #define	PVC_MEM_SET_CMD		(2 << 29 | 0x5b << 22)
 #define   PVC_MEM_SET_CMD_LEN_DW	7
 #define   PVC_MEM_SET_MATRIX		REG_BIT(17)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 228de47c0f3f..a895a8e801a9 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -37,6 +37,12 @@
 #define GMD_ID					XE_REG(0xd8c)
 #define   GMD_ID_ARCH_MASK			REG_GENMASK(31, 22)
 #define   GMD_ID_RELEASE_MASK			REG_GENMASK(21, 14)
+/*
+ * Spec defines these bits as "Reserved", but then make them assume some
+ * meaning that depends on the ARCH. To avoid any confusion, call them
+ * SUBIP_FLAG_MASK.
+ */
+#define   GMD_ID_SUBIP_FLAG_MASK		REG_GENMASK(13, 6)
 #define   GMD_ID_REVID				REG_GENMASK(5, 0)
 
 #define FORCEWAKE_ACK_GSC			XE_REG(0xdf8)
@@ -168,6 +174,7 @@
 
 #define XEHP_SLICE_COMMON_ECO_CHICKEN1		XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED)
 #define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE	REG_BIT(14)
+#define   FAST_CLEAR_VALIGN_FIX			REG_BIT(13)
 
 #define XE2LPM_CCCHKNREG1			XE_REG(0x82a8)
 
@@ -544,6 +551,9 @@
 #define SARB_CHICKEN1				XE_REG_MCR(0xe90c)
 #define   COMP_CKN_IN				REG_GENMASK(30, 29)
 
+#define MAIN_GAMCTRL_MODE			XE_REG(0xef00)
+#define   MAIN_GAMCTRL_QUEUE_SELECT		REG_BIT(0)
+
 #define RCU_MODE				XE_REG(0x14800, XE_REG_OPTION_MASKED)
 #define   RCU_MODE_FIXED_SLICE_CCS_MODE		REG_BIT(1)
 #define   RCU_MODE_CCS_ENABLE			REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c
index 37b344df2dc3..4d10a7e2b570 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c
@@ -44,21 +44,27 @@ static void check_media_ip(struct kunit *test)
 	KUNIT_ASSERT_EQ(test, mask, 0);
 }
 
-static void check_platform_gt_count(struct kunit *test)
+static void check_platform_desc(struct kunit *test)
 {
 	const struct pci_device_id *pci = test->param_value;
 	const struct xe_device_desc *desc =
 		(const struct xe_device_desc *)pci->driver_data;
-	int max_gt = desc->max_gt_per_tile;
 
-	KUNIT_ASSERT_GT(test, max_gt, 0);
-	KUNIT_ASSERT_LE(test, max_gt, XE_MAX_GT_PER_TILE);
+	KUNIT_EXPECT_GT(test, desc->dma_mask_size, 0);
+
+	KUNIT_EXPECT_GT(test, (unsigned int)desc->max_gt_per_tile, 0);
+	KUNIT_EXPECT_LE(test, (unsigned int)desc->max_gt_per_tile, XE_MAX_GT_PER_TILE);
+
+	KUNIT_EXPECT_GT(test, desc->va_bits, 0);
+	KUNIT_EXPECT_LE(test, desc->va_bits, 64);
+
+	KUNIT_EXPECT_GT(test, desc->vm_max_level, 0);
 }
 
 static struct kunit_case xe_pci_tests[] = {
 	KUNIT_CASE_PARAM(check_graphics_ip, xe_pci_graphics_ip_gen_param),
 	KUNIT_CASE_PARAM(check_media_ip, xe_pci_media_ip_gen_param),
-	KUNIT_CASE_PARAM(check_platform_gt_count, xe_pci_id_gen_param),
+	KUNIT_CASE_PARAM(check_platform_desc, xe_pci_id_gen_param),
 	{}
 };
 
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 7b6502081873..b0bd31d14bb9 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -610,6 +610,23 @@ static bool xe_ttm_resource_visible(struct ttm_resource *mem)
 	return vres->used_visible_size == mem->size;
 }
 
+/**
+ * xe_bo_is_visible_vram - check if BO is placed entirely in visible VRAM.
+ * @bo: The BO
+ *
+ * This function checks whether a given BO resides entirely in memory visible from the CPU
+ *
+ * Returns: true if the BO is entirely visible, false otherwise.
+ *
+ */
+bool xe_bo_is_visible_vram(struct xe_bo *bo)
+{
+	if (drm_WARN_ON(bo->ttm.base.dev, !xe_bo_is_vram(bo)))
+		return false;
+
+	return xe_ttm_resource_visible(bo->ttm.resource);
+}
+
 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
 				 struct ttm_resource *mem)
 {
@@ -1635,7 +1652,7 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
 	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
 		return -EIO;
 
-	if (!xe_ttm_resource_visible(ttm_bo->resource) || len >= SZ_16K) {
+	if (!xe_bo_is_visible_vram(bo) || len >= SZ_16K) {
 		struct xe_migrate *migrate =
 			mem_type_to_migrate(xe, ttm_bo->resource->mem_type);
 
@@ -2105,7 +2122,7 @@ void xe_bo_free(struct xe_bo *bo)
  * if the function should allocate a new one.
  * @tile: The tile to select for migration of this bo, and the tile used for
  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
- * @resv: Pointer to a locked shared reservation object to use fo this bo,
+ * @resv: Pointer to a locked shared reservation object to use for this bo,
  * or NULL for the xe_bo to use its own.
  * @bulk: The bulk move to use for LRU bumping, or NULL for external bos.
  * @size: The storage size to use for the bo.
@@ -2259,6 +2276,12 @@ static int __xe_bo_fixed_placement(struct xe_device *xe,
 	struct ttm_place *place = bo->placements;
 	u32 vram_flag, vram_stolen_flags;
 
+	/*
+	 * to allow fixed placement in GGTT of a VF, post-migration fixups would have to
+	 * include selecting a new fixed offset and shifting the page ranges for it
+	 */
+	xe_assert(xe, !IS_SRIOV_VF(xe) || !(bo->flags & XE_BO_FLAG_GGTT));
+
 	if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
 		return -EINVAL;
 
@@ -2629,7 +2652,7 @@ struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
  * @size: The storage size to use for the bo.
  * @type: The TTM buffer object type.
  * @flags: XE_BO_FLAG_ flags.
- * @intr: Whether to execut any waits for backing store interruptible.
+ * @intr: Whether to execute any waits for backing store interruptible.
  *
  * Create a pinned and mapped bo. The bo will be external and not associated
  * with a VM.
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 353d607d301d..911d5b90461a 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -274,6 +274,7 @@ int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size);
 
 bool mem_type_is_vram(u32 mem_type);
 bool xe_bo_is_vram(struct xe_bo *bo);
+bool xe_bo_is_visible_vram(struct xe_bo *bo);
 bool xe_bo_is_stolen(struct xe_bo *bo);
 bool xe_bo_is_stolen_devmem(struct xe_bo *bo);
 bool xe_bo_is_vm_bound(struct xe_bo *bo);
diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h
index 25a884c64bf1..401e7dd26ef3 100644
--- a/drivers/gpu/drm/xe/xe_bo_doc.h
+++ b/drivers/gpu/drm/xe/xe_bo_doc.h
@@ -12,7 +12,7 @@
  * BO management
  * =============
  *
- * TTM manages (placement, eviction, etc...) all BOs in XE.
+ * TTM manages (placement, eviction, etc...) all BOs in Xe.
  *
  * BO creation
  * ===========
@@ -29,7 +29,7 @@
  * a kernel BO (e.g. engine state, memory for page tables, etc...). These BOs
  * are typically mapped in the GGTT (any kernel BOs aside memory for page tables
  * are in the GGTT), are pinned (can't move or be evicted at runtime), have a
- * vmap (XE can access the memory via xe_map layer) and have contiguous physical
+ * vmap (Xe can access the memory via xe_map layer) and have contiguous physical
  * memory.
  *
  * More details of why kernel BOs are pinned and contiguous below.
@@ -40,7 +40,7 @@
  * A user BO is created via the DRM_IOCTL_XE_GEM_CREATE IOCTL. Once it is
  * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user
  * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All
- * user BOs are evictable and user BOs are never pinned by XE. The allocation of
+ * user BOs are evictable and user BOs are never pinned by Xe. The allocation of
  * the backing store can be deferred from creation time until first use which is
  * either mmap, bind, or pagefault.
  *
@@ -84,7 +84,7 @@
  * ====================
  *
  * All eviction (or in other words, moving a BO from one memory location to
- * another) is routed through TTM with a callback into XE.
+ * another) is routed through TTM with a callback into Xe.
  *
  * Runtime eviction
  * ----------------
diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c
index c1419a270fa4..9f6251b1008b 100644
--- a/drivers/gpu/drm/xe/xe_configfs.c
+++ b/drivers/gpu/drm/xe/xe_configfs.c
@@ -27,7 +27,7 @@
  * Overview
  * ========
  *
- * Configfs is a filesystem-based manager of kernel objects. XE KMD registers a
+ * Configfs is a filesystem-based manager of kernel objects. Xe KMD registers a
  * configfs subsystem called ``xe`` that creates a directory in the mounted
  * configfs directory. The user can create devices under this directory and
  * configure them as necessary. See Documentation/filesystems/configfs.rst for
@@ -301,7 +301,6 @@ struct engine_info {
 /* Some helpful macros to aid on the sizing of buffer allocation when parsing */
 #define MAX_ENGINE_CLASS_CHARS 5
 #define MAX_ENGINE_INSTANCE_CHARS 2
-#define MAX_GT_TYPE_CHARS 7
 
 static const struct engine_info engine_info[] = {
 	{ .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK, .engine_class = XE_ENGINE_CLASS_RENDER },
@@ -313,7 +312,7 @@ static const struct engine_info engine_info[] = {
 };
 
 static const struct {
-	const char name[MAX_GT_TYPE_CHARS + 1];
+	const char *name;
 	enum xe_gt_type type;
 } gt_types[] = {
 	{ .name = "primary", .type = XE_GT_TYPE_MAIN },
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 5f6a412b571c..47f5391ad8e9 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -1217,7 +1217,7 @@ static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
  *
  *   /sys/bus/pci/devices/<device>/survivability_mode
  *
- * - Admin/userpsace consumer can use firmware flashing tools like fwupd to flash
+ * - Admin/userspace consumer can use firmware flashing tools like fwupd to flash
  *   firmware and restore device to normal operation.
  */
 
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 9e3666a226da..dc17f63f9353 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -222,7 +222,7 @@ struct xe_tile {
 };
 
 /**
- * struct xe_device - Top level struct of XE device
+ * struct xe_device - Top level struct of Xe device
  */
 struct xe_device {
 	/** @drm: drm device */
@@ -245,9 +245,9 @@ struct xe_device {
 		u32 media_verx100;
 		/** @info.mem_region_mask: mask of valid memory regions */
 		u32 mem_region_mask;
-		/** @info.platform: XE platform enum */
+		/** @info.platform: Xe platform enum */
 		enum xe_platform platform;
-		/** @info.subplatform: XE subplatform enum */
+		/** @info.subplatform: Xe subplatform enum */
 		enum xe_subplatform subplatform;
 		/** @info.devid: device ID */
 		u16 devid;
@@ -300,6 +300,8 @@ struct xe_device {
 		 * pcode mailbox commands.
 		 */
 		u8 has_mbx_power_limits:1;
+		/** @info.has_mem_copy_instr: Device supports MEM_COPY instruction */
+		u8 has_mem_copy_instr:1;
 		/** @info.has_pxp: Device has PXP support */
 		u8 has_pxp:1;
 		/** @info.has_range_tlb_inval: Has range based TLB invalidations */
@@ -659,7 +661,7 @@ struct xe_device {
 };
 
 /**
- * struct xe_file - file handle for XE driver
+ * struct xe_file - file handle for Xe driver
  */
 struct xe_file {
 	/** @xe: xe DEVICE **/
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 0dc27476832b..521467d976f7 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -33,7 +33,7 @@
  * - Binding at exec time
  * - Flow controlling the ring at exec time
  *
- * In XE we avoid all of this complication by not allowing a BO list to be
+ * In Xe we avoid all of this complication by not allowing a BO list to be
  * passed into an exec, using the dma-buf implicit sync uAPI, have binds as
  * separate operations, and using the DRM scheduler to flow control the ring.
  * Let's deep dive on each of these.
diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h
index 899fbbcb3ea9..12d6e2367455 100644
--- a/drivers/gpu/drm/xe/xe_force_wake_types.h
+++ b/drivers/gpu/drm/xe/xe_force_wake_types.h
@@ -52,7 +52,7 @@ enum xe_force_wake_domains {
 };
 
 /**
- * struct xe_force_wake_domain - XE force wake domains
+ * struct xe_force_wake_domain - Xe force wake domains
  */
 struct xe_force_wake_domain {
 	/** @id: domain force wake id */
@@ -70,7 +70,7 @@ struct xe_force_wake_domain {
 };
 
 /**
- * struct xe_force_wake - XE force wake
+ * struct xe_force_wake - Xe force wake
  */
 struct xe_force_wake {
 	/** @gt: back pointers to GT */
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 40680f0c49a1..20d226d90c50 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -312,6 +312,9 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
 		ggtt->pt_ops = &xelp_pt_ops;
 
 	ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM);
+	if (!ggtt->wq)
+		return -ENOMEM;
+
 	__xe_ggtt_init_early(ggtt, xe_wopcm_size(xe));
 
 	err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt);
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index d8e94fb8b9bd..89808b33d0a8 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -818,17 +818,19 @@ static int gt_reset(struct xe_gt *gt)
 	unsigned int fw_ref;
 	int err;
 
-	if (xe_device_wedged(gt_to_xe(gt)))
-		return -ECANCELED;
+	if (xe_device_wedged(gt_to_xe(gt))) {
+		err = -ECANCELED;
+		goto err_pm_put;
+	}
 
 	/* We only support GT resets with GuC submission */
-	if (!xe_device_uc_enabled(gt_to_xe(gt)))
-		return -ENODEV;
+	if (!xe_device_uc_enabled(gt_to_xe(gt))) {
+		err = -ENODEV;
+		goto err_pm_put;
+	}
 
 	xe_gt_info(gt, "reset started\n");
 
-	xe_pm_runtime_get(gt_to_xe(gt));
-
 	if (xe_fault_inject_gt_reset()) {
 		err = -ECANCELED;
 		goto err_fail;
@@ -875,6 +877,7 @@ err_fail:
 	xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
 
 	xe_device_declare_wedged(gt_to_xe(gt));
+err_pm_put:
 	xe_pm_runtime_put(gt_to_xe(gt));
 
 	return err;
@@ -896,7 +899,9 @@ void xe_gt_reset_async(struct xe_gt *gt)
 		return;
 
 	xe_gt_info(gt, "reset queued\n");
-	queue_work(gt->ordered_wq, &gt->reset.worker);
+	xe_pm_runtime_get_noresume(gt_to_xe(gt));
+	if (!queue_work(gt->ordered_wq, &gt->reset.worker))
+		xe_pm_runtime_put(gt_to_xe(gt));
 }
 
 void xe_gt_suspend_prepare(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index 701349251bbc..e88f113226bc 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -36,7 +36,7 @@
  * - act_freq: The actual resolved frequency decided by PCODE.
  * - cur_freq: The current one requested by GuC PC to the PCODE.
  * - rpn_freq: The Render Performance (RP) N level, which is the minimal one.
- * - rpa_freq: The Render Performance (RP) A level, which is the achiveable one.
+ * - rpa_freq: The Render Performance (RP) A level, which is the achievable one.
  *   Calculated by PCODE at runtime based on multiple running conditions
  * - rpe_freq: The Render Performance (RP) E level, which is the efficient one.
  *   Calculated by PCODE at runtime based on multiple running conditions
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 81ecd9382635..164010860664 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -268,13 +268,14 @@ static const struct xe_mmio_range xe3p_xpc_gam_grp1_steering_table[] = {
 	{},
 };
 
-static const struct xe_mmio_range xe3p_xpc_psmi_grp19_steering_table[] = {
-	{ 0x00B500, 0x00B5FF },
+static const struct xe_mmio_range xe3p_xpc_node_steering_table[] = {
+	{ 0x00B000, 0x00B0FF },
+	{ 0x00D880, 0x00D8FF },
 	{},
 };
 
 static const struct xe_mmio_range xe3p_xpc_instance0_steering_table[] = {
-	{ 0x00B600, 0x00B6FF },		/* PSMI0 */
+	{ 0x00B500, 0x00B6FF },		/* PSMI */
 	{ 0x00C800, 0x00CFFF },		/* GAMCTRL */
 	{ 0x00F000, 0x00F0FF },		/* GAMCTRL */
 	{},
@@ -282,9 +283,22 @@ static const struct xe_mmio_range xe3p_xpc_instance0_steering_table[] = {
 
 static void init_steering_l3bank(struct xe_gt *gt)
 {
+	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_mmio *mmio = &gt->mmio;
 
-	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
+	if (GRAPHICS_VER(xe) >= 35) {
+		unsigned int first_bank = xe_l3_bank_mask_ffs(gt->fuse_topo.l3_bank_mask);
+		const int banks_per_node = 4;
+		unsigned int node = first_bank / banks_per_node;
+
+		/* L3BANK ranges place node in grpID, bank in instanceid */
+		gt->steering[L3BANK].group_target = node;
+		gt->steering[L3BANK].instance_target = first_bank % banks_per_node;
+
+		/* NODE ranges split the node across grpid and instanceid */
+		gt->steering[NODE].group_target = node >> 1;
+		gt->steering[NODE].instance_target = node & 1;
+	} else if (GRAPHICS_VERx100(xe) >= 1270) {
 		u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
 						xe_mmio_read32(mmio, MIRROR_FUSE3));
 		u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK,
@@ -297,7 +311,7 @@ static void init_steering_l3bank(struct xe_gt *gt)
 		gt->steering[L3BANK].group_target = __ffs(mslice_mask);
 		gt->steering[L3BANK].instance_target =
 			bank_mask & BIT(0) ? 0 : 2;
-	} else if (gt_to_xe(gt)->info.platform == XE_DG2) {
+	} else if (xe->info.platform == XE_DG2) {
 		u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
 						xe_mmio_read32(mmio, MIRROR_FUSE3));
 		u32 bank = __ffs(mslice_mask) * 8;
@@ -452,12 +466,6 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt)
 	gt->steering[SQIDI_PSMI].instance_target = select & 0x1;
 }
 
-static void init_steering_psmi(struct xe_gt *gt)
-{
-	gt->steering[PSMI19].group_target = 19;
-	gt->steering[PSMI19].instance_target = 0;
-}
-
 static void init_steering_gam1(struct xe_gt *gt)
 {
 	gt->steering[GAM1].group_target = 1;
@@ -469,12 +477,12 @@ static const struct {
 	void (*init)(struct xe_gt *gt);
 } xe_steering_types[] = {
 	[L3BANK] =	{ "L3BANK",	init_steering_l3bank },
+	[NODE] =	{ "NODE",	NULL }, /* initialized by l3bank init */
 	[MSLICE] =	{ "MSLICE",	init_steering_mslice },
 	[LNCF] =	{ "LNCF",	NULL }, /* initialized by mslice init */
 	[DSS] =		{ "DSS / XeCore", init_steering_dss },
 	[OADDRM] =	{ "OADDRM / GPMXMT", init_steering_oaddrm },
 	[SQIDI_PSMI] =  { "SQIDI_PSMI", init_steering_sqidi_psmi },
-	[PSMI19] =	{ "PSMI[19]",	init_steering_psmi },
 	[GAM1] =	{ "GAMWKRS / STLB / GAMREQSTRM", init_steering_gam1 },
 	[INSTANCE0] =	{ "INSTANCE 0",	NULL },
 	[IMPLICIT_STEERING] = { "IMPLICIT", NULL },
@@ -524,7 +532,8 @@ void xe_gt_mcr_init_early(struct xe_gt *gt)
 			gt->steering[DSS].ranges = xe3p_xpc_xecore_steering_table;
 			gt->steering[GAM1].ranges = xe3p_xpc_gam_grp1_steering_table;
 			gt->steering[INSTANCE0].ranges = xe3p_xpc_instance0_steering_table;
-			gt->steering[PSMI19].ranges = xe3p_xpc_psmi_grp19_steering_table;
+			gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table;
+			gt->steering[NODE].ranges = xe3p_xpc_node_steering_table;
 		} else if (GRAPHICS_VER(xe) >= 20) {
 			gt->steering[DSS].ranges = xe2lpg_dss_steering_table;
 			gt->steering[SQIDI_PSMI].ranges = xe2lpg_sqidi_psmi_steering_table;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index c4dda87b47cc..0714c758b9c1 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -158,39 +158,19 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
 	xe_gt_sriov_pf_service_update(gt);
 }
 
-static u32 pf_get_vf_regs_stride(struct xe_device *xe)
-{
-	return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000;
-}
-
-static struct xe_reg xe_reg_vf_to_pf(struct xe_reg vf_reg, unsigned int vfid, u32 stride)
-{
-	struct xe_reg pf_reg = vf_reg;
-
-	pf_reg.vf = 0;
-	pf_reg.addr += stride * vfid;
-
-	return pf_reg;
-}
-
 static void pf_clear_vf_scratch_regs(struct xe_gt *gt, unsigned int vfid)
 {
-	u32 stride = pf_get_vf_regs_stride(gt_to_xe(gt));
-	struct xe_reg scratch;
-	int n, count;
+	struct xe_mmio mmio;
+	int n;
+
+	xe_mmio_init_vf_view(&mmio, &gt->mmio, vfid);
 
 	if (xe_gt_is_media_type(gt)) {
-		count = MED_VF_SW_FLAG_COUNT;
-		for (n = 0; n < count; n++) {
-			scratch = xe_reg_vf_to_pf(MED_VF_SW_FLAG(n), vfid, stride);
-			xe_mmio_write32(&gt->mmio, scratch, 0);
-		}
+		for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++)
+			xe_mmio_write32(&mmio, MED_VF_SW_FLAG(n), 0);
 	} else {
-		count = VF_SW_FLAG_COUNT;
-		for (n = 0; n < count; n++) {
-			scratch = xe_reg_vf_to_pf(VF_SW_FLAG(n), vfid, stride);
-			xe_mmio_write32(&gt->mmio, scratch, 0);
-		}
+		for (n = 0; n < VF_SW_FLAG_COUNT; n++)
+			xe_mmio_write32(&mmio, VF_SW_FLAG(n), 0);
 	}
 }
 
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
index 2e6bd3d1fe1d..9de05db1f090 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
@@ -997,6 +997,8 @@ static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE);
 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START);
+
+		xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid);
 	}
 }
 
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 46518e629ba3..4c73a077d314 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -31,7 +31,6 @@
 #include "xe_lrc.h"
 #include "xe_memirq.h"
 #include "xe_mmio.h"
-#include "xe_pm.h"
 #include "xe_sriov.h"
 #include "xe_sriov_vf.h"
 #include "xe_sriov_vf_ccs.h"
@@ -739,7 +738,7 @@ static void vf_start_migration_recovery(struct xe_gt *gt)
 		gt->sriov.vf.migration.recovery_queued = true;
 		WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, true);
 		WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, true);
-		smp_wmb();	/* Ensure above writes visable before wake */
+		smp_wmb();	/* Ensure above writes visible before wake */
 
 		xe_guc_ct_wake_waiters(&gt->uc.guc.ct);
 
@@ -1218,7 +1217,6 @@ static void vf_post_migration_recovery(struct xe_gt *gt)
 
 	xe_gt_sriov_dbg(gt, "migration recovery in progress\n");
 
-	xe_pm_runtime_get(xe);
 	retry = vf_post_migration_shutdown(gt);
 	if (retry)
 		goto queue;
@@ -1241,12 +1239,10 @@ static void vf_post_migration_recovery(struct xe_gt *gt)
 
 	vf_post_migration_kickstart(gt);
 
-	xe_pm_runtime_put(xe);
 	xe_gt_sriov_notice(gt, "migration recovery ended\n");
 	return;
 fail:
 	vf_post_migration_abort(gt);
-	xe_pm_runtime_put(xe);
 	xe_gt_sriov_err(gt, "migration recovery failed (%pe)\n", ERR_PTR(err));
 	xe_device_declare_wedged(xe);
 	return;
@@ -1254,7 +1250,6 @@ fail:
 queue:
 	xe_gt_sriov_info(gt, "Re-queuing migration recovery\n");
 	queue_work(gt->ordered_wq, &gt->sriov.vf.migration.worker);
-	xe_pm_runtime_put(xe);
 }
 
 static void migration_worker_func(struct work_struct *w)
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index 1e0516ba7422..bd5260221d8d 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -309,6 +309,13 @@ xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
 	return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
 }
 
+/* Used to obtain the index of the first L3 bank. */
+unsigned int
+xe_l3_bank_mask_ffs(const xe_l3_bank_mask_t mask)
+{
+	return find_first_bit(mask, XE_MAX_L3_BANK_MASK_BITS);
+}
+
 /**
  * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
  * @gt: GT to check
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
index 3ff40f44bf2a..162d603c9b81 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.h
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -40,6 +40,8 @@ xe_gt_topology_mask_last_dss(const xe_dss_mask_t mask)
 
 unsigned int
 xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum);
+unsigned int
+xe_l3_bank_mask_ffs(const xe_l3_bank_mask_t mask);
 
 bool
 xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad);
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index d93faa1eedef..0b525643a048 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -66,6 +66,7 @@ struct xe_mmio_range {
  */
 enum xe_steering_type {
 	L3BANK,
+	NODE,
 	MSLICE,
 	LNCF,
 	DSS,
@@ -73,14 +74,6 @@ enum xe_steering_type {
 	SQIDI_PSMI,
 
 	/*
-	 * The bspec lists multiple ranges as "PSMI," but the different
-	 * ranges with that label have different grpid steering values so we
-	 * treat them independently in code.  Note that the ranges with grpid=0
-	 * are included in the INSTANCE0 group above.
-	 */
-	PSMI19,
-
-	/*
 	 * Although most GAM ranges must be steered to (0,0) and thus use the
 	 * INSTANCE0 type farther down, some platforms have special rules
 	 * for specific subtypes that require steering to (1,0) instead.
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index d94490979adc..ecc3e091b89e 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -91,6 +91,9 @@ static u32 guc_ctl_feature_flags(struct xe_guc *guc)
 	if (xe_configfs_get_psmi_enabled(to_pci_dev(xe->drm.dev)))
 		flags |= GUC_CTL_ENABLE_PSMI_LOGGING;
 
+	if (xe_guc_using_main_gamctrl_queues(guc))
+		flags |= GUC_CTL_MAIN_GAMCTRL_QUEUES;
+
 	return flags;
 }
 
@@ -1255,8 +1258,13 @@ int xe_guc_min_load_for_hwconfig(struct xe_guc *guc)
 
 int xe_guc_upload(struct xe_guc *guc)
 {
+	struct xe_gt *gt = guc_to_gt(guc);
+
 	xe_guc_ads_populate(&guc->ads);
 
+	if (xe_guc_using_main_gamctrl_queues(guc))
+		xe_mmio_write32(&gt->mmio, MAIN_GAMCTRL_MODE, MAIN_GAMCTRL_QUEUE_SELECT);
+
 	return __xe_guc_upload(guc);
 }
 
@@ -1657,6 +1665,44 @@ void xe_guc_declare_wedged(struct xe_guc *guc)
 	xe_guc_submit_wedge(guc);
 }
 
+/**
+ * xe_guc_using_main_gamctrl_queues() - Detect which reporting queues to use.
+ * @guc: The GuC object
+ *
+ * For Xe3p and beyond, we want to program the hardware to use the
+ * "Main GAMCTRL queue" rather than the legacy queue before we upload
+ * the GuC firmware.  This will allow the GuC to use a new set of
+ * registers for pagefault handling and avoid some unnecessary
+ * complications with MCR register range handling.
+ *
+ * Return: true if can use new main gamctrl queues.
+ */
+bool xe_guc_using_main_gamctrl_queues(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+
+	/*
+	 * For Xe3p media gt (35), the GuC and the CS subunits may be still Xe3
+	 * that lacks the Main GAMCTRL support. Reserved bits from the GMD_ID
+	 * inform the IP version of the subunits.
+	 */
+	if (xe_gt_is_media_type(gt) && MEDIA_VER(gt_to_xe(gt)) == 35) {
+		u32 val = xe_mmio_read32(&gt->mmio, GMD_ID);
+		u32 subip = REG_FIELD_GET(GMD_ID_SUBIP_FLAG_MASK, val);
+
+		if (!subip)
+			return true;
+
+		xe_gt_WARN(gt, subip != 1,
+			   "GMD_ID has unknown value in the SUBIP_FLAG field - 0x%x\n",
+			   subip);
+
+		return false;
+	}
+
+	return GT_VER(gt) >= 35;
+}
+
 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
 #include "tests/xe_guc_g2g_test.c"
 #endif
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index 1cca05967e62..e2d4c5f44ae3 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -52,6 +52,7 @@ void xe_guc_stop_prepare(struct xe_guc *guc);
 void xe_guc_stop(struct xe_guc *guc);
 int xe_guc_start(struct xe_guc *guc);
 void xe_guc_declare_wedged(struct xe_guc *guc);
+bool xe_guc_using_main_gamctrl_queues(struct xe_guc *guc);
 
 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
 int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *payload, u32 len);
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 22ac2a8b74c8..bcb85a1bf26d 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -820,16 +820,20 @@ static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
 static void guc_um_init_params(struct xe_guc_ads *ads)
 {
 	u32 um_queue_offset = guc_ads_um_queues_offset(ads);
+	struct xe_guc *guc = ads_to_guc(ads);
 	u64 base_dpa;
 	u32 base_ggtt;
+	bool with_dpa;
 	int i;
 
+	with_dpa = !xe_guc_using_main_gamctrl_queues(guc);
+
 	base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset;
 	base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset;
 
 	for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) {
 		ads_blob_write(ads, um_init_params.queue_params[i].base_dpa,
-			       base_dpa + (i * GUC_UM_QUEUE_SIZE));
+			       with_dpa ? (base_dpa + (i * GUC_UM_QUEUE_SIZE)) : 0);
 		ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address,
 			       base_ggtt + (i * GUC_UM_QUEUE_SIZE));
 		ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes,
diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h
index 70c132458ac3..48a8e092023f 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h
@@ -14,7 +14,7 @@ struct xe_bo;
  * struct xe_guc_ads - GuC additional data structures (ADS)
  */
 struct xe_guc_ads {
-	/** @bo: XE BO for GuC ads blob */
+	/** @bo: Xe BO for GuC ads blob */
 	struct xe_bo *bo;
 	/** @golden_lrc_size: golden LRC size */
 	size_t golden_lrc_size;
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index 8b03b50313d9..09d7ff1ef42a 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -126,7 +126,7 @@ struct xe_fast_req_fence {
  * for the H2G and G2H requests sent and received through the buffers.
  */
 struct xe_guc_ct {
-	/** @bo: XE BO for CT */
+	/** @bo: Xe BO for CT */
 	struct xe_bo *bo;
 	/** @lock: protects everything in CT layer */
 	struct mutex lock;
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 50c4c2406132..c90dd266e9cf 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -113,6 +113,7 @@ struct guc_update_exec_queue_policy {
 #define   GUC_CTL_ENABLE_SLPC		BIT(2)
 #define   GUC_CTL_ENABLE_LITE_RESTORE	BIT(4)
 #define   GUC_CTL_ENABLE_PSMI_LOGGING	BIT(7)
+#define   GUC_CTL_MAIN_GAMCTRL_QUEUES	BIT(9)
 #define   GUC_CTL_DISABLE_SCHEDULER	BIT(14)
 
 #define GUC_CTL_DEBUG			3
diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
index b3d5c72ac752..02851b924aa4 100644
--- a/drivers/gpu/drm/xe/xe_guc_log_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
@@ -44,7 +44,7 @@ struct xe_guc_log_snapshot {
 struct xe_guc_log {
 	/** @level: GuC log level */
 	u32 level;
-	/** @bo: XE BO for GuC log */
+	/** @bo: Xe BO for GuC log */
 	struct xe_bo *bo;
 	/** @stats: logging related stats */
 	struct {
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 0ef67d3523a7..d4ffdb71ef3d 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1920,7 +1920,7 @@ static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
 }
 
 /*
- * All of these functions are an abstraction layer which other parts of XE can
+ * All of these functions are an abstraction layer which other parts of Xe can
  * use to trap into the GuC backend. All of these functions, aside from init,
  * really shouldn't do much other than trap into the DRM scheduler which
  * synchronizes these operations.
diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
index 6bf2103602f8..a80175c7c478 100644
--- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
@@ -207,7 +207,7 @@ static const struct xe_tlb_inval_ops guc_tlb_inval_ops = {
  * @guc: GuC object
  * @tlb_inval: TLB invalidation client
  *
- * Inititialize GuC TLB invalidation by setting back pointer in TLB invalidation
+ * Initialize GuC TLB invalidation by setting back pointer in TLB invalidation
  * client to the GuC and setting GuC backend ops.
  */
 void xe_guc_tlb_inval_init_early(struct xe_guc *guc,
diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h
index f62e0c8b67ab..c44777125691 100644
--- a/drivers/gpu/drm/xe/xe_map.h
+++ b/drivers/gpu/drm/xe/xe_map.h
@@ -14,9 +14,9 @@
  * DOC: Map layer
  *
  * All access to any memory shared with a device (both sysmem and vram) in the
- * XE driver should go through this layer (xe_map). This layer is built on top
+ * Xe driver should go through this layer (xe_map). This layer is built on top
  * of :ref:`driver-api/device-io:Generalizing Access to System and I/O Memory`
- * and with extra hooks into the XE driver that allows adding asserts to memory
+ * and with extra hooks into the Xe driver that allows adding asserts to memory
  * accesses (e.g. for blocking runtime_pm D3Cold on Discrete Graphics).
  */
 
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 3112c966c67d..56a5804726e9 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -699,9 +699,9 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
 }
 
 #define EMIT_COPY_DW 10
-static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
-		      u64 src_ofs, u64 dst_ofs, unsigned int size,
-		      unsigned int pitch)
+static void emit_xy_fast_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+			      u64 dst_ofs, unsigned int size,
+			      unsigned int pitch)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 	u32 mocs = 0;
@@ -730,6 +730,61 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
 	bb->cs[bb->len++] = upper_32_bits(src_ofs);
 }
 
+#define PAGE_COPY_MODE_PS SZ_256 /* hw uses 256 bytes as the page-size */
+static void emit_mem_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+			  u64 dst_ofs, unsigned int size, unsigned int pitch)
+{
+	u32 mode, copy_type, width;
+
+	xe_gt_assert(gt, IS_ALIGNED(size, pitch));
+	xe_gt_assert(gt, pitch <= U16_MAX);
+	xe_gt_assert(gt, pitch);
+	xe_gt_assert(gt, size);
+
+	if (IS_ALIGNED(size, PAGE_COPY_MODE_PS) &&
+	    IS_ALIGNED(lower_32_bits(src_ofs), PAGE_COPY_MODE_PS) &&
+	    IS_ALIGNED(lower_32_bits(dst_ofs), PAGE_COPY_MODE_PS)) {
+		mode = MEM_COPY_PAGE_COPY_MODE;
+		copy_type = 0; /* linear copy */
+		width = size / PAGE_COPY_MODE_PS;
+	} else if (pitch > 1) {
+		xe_gt_assert(gt, size / pitch <= U16_MAX);
+		mode = 0; /* BYTE_COPY */
+		copy_type = MEM_COPY_MATRIX_COPY;
+		width = pitch;
+	} else {
+		mode = 0; /* BYTE_COPY */
+		copy_type = 0; /* linear copy */
+		width = size;
+	}
+
+	xe_gt_assert(gt, width <= U16_MAX);
+
+	bb->cs[bb->len++] = MEM_COPY_CMD | mode | copy_type;
+	bb->cs[bb->len++] = width - 1;
+	bb->cs[bb->len++] = size / pitch - 1; /* ignored by hw for page-copy/linear above */
+	bb->cs[bb->len++] = pitch - 1;
+	bb->cs[bb->len++] = pitch - 1;
+	bb->cs[bb->len++] = lower_32_bits(src_ofs);
+	bb->cs[bb->len++] = upper_32_bits(src_ofs);
+	bb->cs[bb->len++] = lower_32_bits(dst_ofs);
+	bb->cs[bb->len++] = upper_32_bits(dst_ofs);
+	bb->cs[bb->len++] = FIELD_PREP(MEM_COPY_SRC_MOCS_INDEX_MASK, gt->mocs.uc_index) |
+			    FIELD_PREP(MEM_COPY_DST_MOCS_INDEX_MASK, gt->mocs.uc_index);
+}
+
+static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
+		      u64 src_ofs, u64 dst_ofs, unsigned int size,
+		      unsigned int pitch)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (xe->info.has_mem_copy_instr)
+		emit_mem_copy(gt, bb, src_ofs, dst_ofs, size, pitch);
+	else
+		emit_xy_fast_copy(gt, bb, src_ofs, dst_ofs, size, pitch);
+}
+
 static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm)
 {
 	return usm ? m->usm_batch_base_ofs : m->batch_base_ofs;
@@ -847,7 +902,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 				&ccs_it);
 
 	while (size) {
-		u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+		u32 batch_size = 1; /* MI_BATCH_BUFFER_END */
 		struct xe_sched_job *job;
 		struct xe_bb *bb;
 		u32 flush_flags = 0;
@@ -1312,7 +1367,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 
 		/* Calculate final sizes and batch size.. */
 		pte_flags = clear_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0;
-		batch_size = 2 +
+		batch_size = 1 +
 			pte_update_size(m, pte_flags, src, &src_it,
 					&clear_L0, &clear_L0_ofs, &clear_L0_pt,
 					clear_bo_data ? emit_clear_cmd_len(gt) : 0, 0,
@@ -1798,11 +1853,15 @@ static void build_pt_update_batch_sram(struct xe_migrate *m,
 	u32 ptes;
 	int i = 0;
 
+	xe_tile_assert(m->tile, PAGE_ALIGNED(size));
+
 	ptes = DIV_ROUND_UP(size, gpu_page_size);
 	while (ptes) {
 		u32 chunk = min(MAX_PTE_PER_SDI, ptes);
 
-		chunk = ALIGN_DOWN(chunk, PAGE_SIZE / XE_PAGE_SIZE);
+		if (!level)
+			chunk = ALIGN_DOWN(chunk, PAGE_SIZE / XE_PAGE_SIZE);
+
 		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
 		bb->cs[bb->len++] = pt_offset;
 		bb->cs[bb->len++] = 0;
@@ -1811,12 +1870,13 @@ static void build_pt_update_batch_sram(struct xe_migrate *m,
 		ptes -= chunk;
 
 		while (chunk--) {
-			u64 addr = sram_addr[i].addr & ~(gpu_page_size - 1);
-			u64 pte, orig_addr = addr;
+			u64 addr = sram_addr[i].addr;
+			u64 pte;
 
 			xe_tile_assert(m->tile, sram_addr[i].proto ==
 				       DRM_INTERCONNECT_SYSTEM);
 			xe_tile_assert(m->tile, addr);
+			xe_tile_assert(m->tile, PAGE_ALIGNED(addr));
 
 again:
 			pte = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe,
@@ -1827,7 +1887,7 @@ again:
 
 			if (gpu_page_size < PAGE_SIZE) {
 				addr += XE_PAGE_SIZE;
-				if (orig_addr + PAGE_SIZE != addr) {
+				if (!PAGE_ALIGNED(addr)) {
 					chunk--;
 					goto again;
 				}
@@ -1860,6 +1920,25 @@ enum xe_migrate_copy_dir {
 #define XE_CACHELINE_BYTES	64ull
 #define XE_CACHELINE_MASK	(XE_CACHELINE_BYTES - 1)
 
+static u32 xe_migrate_copy_pitch(struct xe_device *xe, u32 len)
+{
+	u32 pitch;
+
+	if (IS_ALIGNED(len, PAGE_SIZE))
+		pitch = PAGE_SIZE;
+	else if (IS_ALIGNED(len, SZ_4K))
+		pitch = SZ_4K;
+	else if (IS_ALIGNED(len, SZ_256))
+		pitch = SZ_256;
+	else if (IS_ALIGNED(len, 4))
+		pitch = 4;
+	else
+		pitch = 1;
+
+	xe_assert(xe, pitch > 1 || xe->info.has_mem_copy_instr);
+	return pitch;
+}
+
 static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
 					 unsigned long len,
 					 unsigned long sram_offset,
@@ -1871,25 +1950,25 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
 	struct xe_device *xe = gt_to_xe(gt);
 	bool use_usm_batch = xe->info.has_usm;
 	struct dma_fence *fence = NULL;
-	u32 batch_size = 2;
+	u32 batch_size = 1;
 	u64 src_L0_ofs, dst_L0_ofs;
 	struct xe_sched_job *job;
 	struct xe_bb *bb;
 	u32 update_idx, pt_slot = 0;
 	unsigned long npages = DIV_ROUND_UP(len + sram_offset, PAGE_SIZE);
-	unsigned int pitch = len >= PAGE_SIZE && !(len & ~PAGE_MASK) ?
-		PAGE_SIZE : 4;
+	unsigned int pitch = xe_migrate_copy_pitch(xe, len);
 	int err;
 	unsigned long i, j;
 	bool use_pde = xe_migrate_vram_use_pde(sram_addr, len + sram_offset);
 
-	if (drm_WARN_ON(&xe->drm, (len & XE_CACHELINE_MASK) ||
-			(sram_offset | vram_addr) & XE_CACHELINE_MASK))
+	if (!xe->info.has_mem_copy_instr &&
+	    drm_WARN_ON(&xe->drm,
+			(!IS_ALIGNED(len, pitch)) || (sram_offset | vram_addr) & XE_CACHELINE_MASK))
 		return ERR_PTR(-EOPNOTSUPP);
 
 	xe_assert(xe, npages * PAGE_SIZE <= MAX_PREEMPTDISABLE_TRANSFER);
 
-	batch_size += pte_update_cmd_size(len);
+	batch_size += pte_update_cmd_size(npages << PAGE_SHIFT);
 	batch_size += EMIT_COPY_DW;
 
 	bb = xe_bb_new(gt, batch_size, use_usm_batch);
@@ -1918,10 +1997,10 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
 
 	if (use_pde)
 		build_pt_update_batch_sram(m, bb, m->large_page_copy_pdes,
-					   sram_addr, len + sram_offset, 1);
+					   sram_addr, npages << PAGE_SHIFT, 1);
 	else
 		build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE,
-					   sram_addr, len + sram_offset, 0);
+					   sram_addr, npages << PAGE_SHIFT, 0);
 
 	if (dir == XE_MIGRATE_COPY_TO_VRAM) {
 		if (use_pde)
@@ -1981,7 +2060,7 @@ err:
  *
  * Copy from an array dma addresses to a VRAM device physical address
  *
- * Return: dma fence for migrate to signal completion on succees, ERR_PTR on
+ * Return: dma fence for migrate to signal completion on success, ERR_PTR on
  * failure
  */
 struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
@@ -2002,7 +2081,7 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
  *
  * Copy from a VRAM device physical address to an array dma addresses
  *
- * Return: dma fence for migrate to signal completion on succees, ERR_PTR on
+ * Return: dma fence for migrate to signal completion on success, ERR_PTR on
  * failure
  */
 struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m,
@@ -2103,8 +2182,10 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
 	xe_bo_assert_held(bo);
 
 	/* Use bounce buffer for small access and unaligned access */
-	if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) ||
-	    !IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) {
+	if (!xe->info.has_mem_copy_instr &&
+	    (!IS_ALIGNED(len, 4) ||
+	     !IS_ALIGNED(page_offset, XE_CACHELINE_BYTES) ||
+	     !IS_ALIGNED(offset, XE_CACHELINE_BYTES))) {
 		int buf_offset = 0;
 		void *bounce;
 		int err;
@@ -2166,6 +2247,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
 		u64 vram_addr = vram_region_gpu_offset(bo->ttm.resource) +
 			cursor.start;
 		int current_bytes;
+		u32 pitch;
 
 		if (cursor.size > MAX_PREEMPTDISABLE_TRANSFER)
 			current_bytes = min_t(int, bytes_left,
@@ -2173,13 +2255,13 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
 		else
 			current_bytes = min_t(int, bytes_left, cursor.size);
 
-		if (current_bytes & ~PAGE_MASK) {
-			int pitch = 4;
-
+		pitch = xe_migrate_copy_pitch(xe, current_bytes);
+		if (xe->info.has_mem_copy_instr)
+			current_bytes = min_t(int, current_bytes, U16_MAX * pitch);
+		else
 			current_bytes = min_t(int, current_bytes,
 					      round_down(S16_MAX * pitch,
 							 XE_CACHELINE_BYTES));
-		}
 
 		__fence = xe_migrate_vram(m, current_bytes,
 					  (unsigned long)buf & ~PAGE_MASK,
diff --git a/drivers/gpu/drm/xe/xe_migrate_doc.h b/drivers/gpu/drm/xe/xe_migrate_doc.h
index 63c7d67b5b62..c082bc0b7068 100644
--- a/drivers/gpu/drm/xe/xe_migrate_doc.h
+++ b/drivers/gpu/drm/xe/xe_migrate_doc.h
@@ -9,7 +9,7 @@
 /**
  * DOC: Migrate Layer
  *
- * The XE migrate layer is used generate jobs which can copy memory (eviction),
+ * The Xe migrate layer is used generate jobs which can copy memory (eviction),
  * clear memory, or program tables (binds). This layer exists in every GT, has
  * a migrate engine, and uses a special VM for all generated jobs.
  *
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index ef6f3ea573a2..350dca1f0925 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -379,3 +379,32 @@ int xe_mmio_wait32_not(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 va
 {
 	return __xe_mmio_wait32(mmio, reg, mask, val, timeout_us, out_val, atomic, false);
 }
+
+#ifdef CONFIG_PCI_IOV
+static size_t vf_regs_stride(struct xe_device *xe)
+{
+	return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000;
+}
+
+/**
+ * xe_mmio_init_vf_view() - Initialize an MMIO instance for accesses like the VF
+ * @mmio: the target &xe_mmio to initialize as VF's view
+ * @base: the source &xe_mmio to initialize from
+ * @vfid: the VF identifier
+ */
+void xe_mmio_init_vf_view(struct xe_mmio *mmio, const struct xe_mmio *base, unsigned int vfid)
+{
+	struct xe_tile *tile = base->tile;
+	struct xe_device *xe = tile->xe;
+	size_t offset = vf_regs_stride(xe) * vfid;
+
+	xe_assert(xe, IS_SRIOV_PF(xe));
+	xe_assert(xe, vfid);
+	xe_assert(xe, !base->sriov_vf_gt);
+	xe_assert(xe, base->regs_size > offset);
+
+	*mmio = *base;
+	mmio->regs += offset;
+	mmio->regs_size -= offset;
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index c151ba569003..15362789ab99 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -42,4 +42,8 @@ static inline struct xe_mmio *xe_root_tile_mmio(struct xe_device *xe)
 	return &xe->tiles[0].mmio;
 }
 
+#ifdef CONFIG_PCI_IOV
+void xe_mmio_init_vf_view(struct xe_mmio *mmio, const struct xe_mmio *base, unsigned int vfid);
+#endif
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index e8ec4114302e..6613d3b48a84 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -568,6 +568,23 @@ static const struct xe_mocs_ops xe2_mocs_ops = {
 	.dump = xe2_mocs_dump,
 };
 
+/*
+ * Note that the "L3" and "L4" register fields actually control the L2 and L3
+ * caches respectively on this platform.
+ */
+static const struct xe_mocs_entry xe3p_xpc_mocs_table[] = {
+	/* Defer to PAT */
+	MOCS_ENTRY(0, XE2_L3_0_WB | L4_3_UC, 0),
+	/* UC */
+	MOCS_ENTRY(1, IG_PAT | XE2_L3_3_UC | L4_3_UC, 0),
+	/* L2 */
+	MOCS_ENTRY(2, IG_PAT | XE2_L3_0_WB | L4_3_UC, 0),
+	/* L3 */
+	MOCS_ENTRY(3, IG_PAT | XE2_L3_3_UC | L4_0_WB, 0),
+	/* L2 + L3 */
+	MOCS_ENTRY(4, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0),
+};
+
 static unsigned int get_mocs_settings(struct xe_device *xe,
 				      struct xe_mocs_info *info)
 {
@@ -576,6 +593,15 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 	memset(info, 0, sizeof(struct xe_mocs_info));
 
 	switch (xe->info.platform) {
+	case XE_CRESCENTISLAND:
+		info->ops = &xe2_mocs_ops;
+		info->table_size = ARRAY_SIZE(xe3p_xpc_mocs_table);
+		info->table = xe3p_xpc_mocs_table;
+		info->num_mocs_regs = XE2_NUM_MOCS_ENTRIES;
+		info->uc_index = 1;
+		info->wb_index = 4;
+		info->unused_entries_index = 4;
+		break;
 	case XE_NOVALAKE_S:
 	case XE_PANTHERLAKE:
 	case XE_LUNARLAKE:
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index c326430e75b5..6e59642e7820 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -342,6 +342,7 @@ static const struct xe_device_desc lnl_desc = {
 	.has_display = true,
 	.has_flat_ccs = 1,
 	.has_pxp = true,
+	.has_mem_copy_instr = true,
 	.max_gt_per_tile = 2,
 	.needs_scratch = true,
 	.va_bits = 48,
@@ -362,6 +363,7 @@ static const struct xe_device_desc bmg_desc = {
 	.has_heci_cscfi = 1,
 	.has_late_bind = true,
 	.has_sriov = true,
+	.has_mem_copy_instr = true,
 	.max_gt_per_tile = 2,
 	.needs_scratch = true,
 	.subplatforms = (const struct xe_subplatform_desc[]) {
@@ -378,6 +380,7 @@ static const struct xe_device_desc ptl_desc = {
 	.has_display = true,
 	.has_flat_ccs = 1,
 	.has_sriov = true,
+	.has_mem_copy_instr = true,
 	.max_gt_per_tile = 2,
 	.needs_scratch = true,
 	.needs_shared_vf_gt_wq = true,
@@ -390,12 +393,27 @@ static const struct xe_device_desc nvls_desc = {
 	.dma_mask_size = 46,
 	.has_display = true,
 	.has_flat_ccs = 1,
+	.has_mem_copy_instr = true,
 	.max_gt_per_tile = 2,
 	.require_force_probe = true,
 	.va_bits = 48,
 	.vm_max_level = 4,
 };
 
+static const struct xe_device_desc cri_desc = {
+	DGFX_FEATURES,
+	PLATFORM(CRESCENTISLAND),
+	.dma_mask_size = 52,
+	.has_display = false,
+	.has_flat_ccs = false,
+	.has_mbx_power_limits = true,
+	.has_sriov = true,
+	.max_gt_per_tile = 2,
+	.require_force_probe = true,
+	.va_bits = 57,
+	.vm_max_level = 4,
+};
+
 #undef PLATFORM
 __diag_pop();
 
@@ -423,6 +441,7 @@ static const struct pci_device_id pciidlist[] = {
 	INTEL_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc),
 	INTEL_PTL_IDS(INTEL_VGA_DEVICE, &ptl_desc),
 	INTEL_NVLS_IDS(INTEL_VGA_DEVICE, &nvls_desc),
+	INTEL_CRI_IDS(INTEL_PCI_DEVICE, &cri_desc),
 	{ }
 };
 MODULE_DEVICE_TABLE(pci, pciidlist);
@@ -655,6 +674,7 @@ static int xe_info_init_early(struct xe_device *xe,
 	xe->info.has_pxp = desc->has_pxp;
 	xe->info.has_sriov = xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev)) &&
 		desc->has_sriov;
+	xe->info.has_mem_copy_instr = desc->has_mem_copy_instr;
 	xe->info.skip_guc_pc = desc->skip_guc_pc;
 	xe->info.skip_mtcfg = desc->skip_mtcfg;
 	xe->info.skip_pcode = desc->skip_pcode;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index a4451bdc79fb..9892c063a9c5 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -46,6 +46,7 @@ struct xe_device_desc {
 	u8 has_late_bind:1;
 	u8 has_llc:1;
 	u8 has_mbx_power_limits:1;
+	u8 has_mem_copy_instr:1;
 	u8 has_pxp:1;
 	u8 has_sriov:1;
 	u8 needs_scratch:1;
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
index 78286285c249..f516dbddfd88 100644
--- a/drivers/gpu/drm/xe/xe_platform_types.h
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -25,6 +25,7 @@ enum xe_platform {
 	XE_BATTLEMAGE,
 	XE_PANTHERLAKE,
 	XE_NOVALAKE_S,
+	XE_CRESCENTISLAND,
 };
 
 enum xe_subplatform {
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 53507e09f7bc..7b089e6fb63f 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -102,7 +102,7 @@ static void xe_pm_block_end_signalling(void)
 /**
  * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend
  *
- * Annotation to use where the code might block or sieze to make
+ * Annotation to use where the code might block or seize to make
  * progress pending resume completion.
  */
 void xe_pm_might_block_on_suspend(void)
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
index 312c3372a49f..ac125c697a41 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence_types.h
+++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
@@ -12,7 +12,7 @@
 struct xe_exec_queue;
 
 /**
- * struct xe_preempt_fence - XE preempt fence
+ * struct xe_preempt_fence - Xe preempt fence
  *
  * hardware and triggers a callback once the xe_engine is complete.
  */
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index d22fd1ccc0ba..7c5bca78c8bf 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -715,7 +715,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		.vm = vm,
 		.tile = tile,
 		.curs = &curs,
-		.va_curs_start = range ? range->base.itree.start :
+		.va_curs_start = range ? xe_svm_range_start(range) :
 			xe_vma_start(vma),
 		.vma = vma,
 		.wupd.entries = entries,
@@ -734,7 +734,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		}
 		if (xe_svm_range_has_dma_mapping(range)) {
 			xe_res_first_dma(range->base.pages.dma_addr, 0,
-					 range->base.itree.last + 1 - range->base.itree.start,
+					 xe_svm_range_size(range),
 					 &curs);
 			xe_svm_range_debug(range, "BIND PREPARE - MIXED");
 		} else {
@@ -778,8 +778,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 
 walk_pt:
 	ret = xe_pt_walk_range(&pt->base, pt->level,
-			       range ? range->base.itree.start : xe_vma_start(vma),
-			       range ? range->base.itree.last + 1 : xe_vma_end(vma),
+			       range ? xe_svm_range_start(range) : xe_vma_start(vma),
+			       range ? xe_svm_range_end(range) : xe_vma_end(vma),
 			       &xe_walk.base);
 
 	*num_entries = xe_walk.wupd.num_used_entries;
@@ -975,8 +975,8 @@ bool xe_pt_zap_ptes_range(struct xe_tile *tile, struct xe_vm *vm,
 	if (!(pt_mask & BIT(tile->id)))
 		return false;
 
-	(void)xe_pt_walk_shared(&pt->base, pt->level, range->base.itree.start,
-				range->base.itree.last + 1, &xe_walk.base);
+	(void)xe_pt_walk_shared(&pt->base, pt->level, xe_svm_range_start(range),
+				xe_svm_range_end(range), &xe_walk.base);
 
 	return xe_walk.needs_invalidate;
 }
@@ -1661,8 +1661,8 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile,
 				       struct xe_svm_range *range,
 				       struct xe_vm_pgtable_update *entries)
 {
-	u64 start = range ? range->base.itree.start : xe_vma_start(vma);
-	u64 end = range ? range->base.itree.last + 1 : xe_vma_end(vma);
+	u64 start = range ? xe_svm_range_start(range) : xe_vma_start(vma);
+	u64 end = range ? xe_svm_range_end(range) : xe_vma_end(vma);
 	struct xe_pt_stage_unbind_walk xe_walk = {
 		.base = {
 			.ops = &xe_pt_stage_unbind_ops,
@@ -1872,7 +1872,7 @@ static int bind_range_prepare(struct xe_vm *vm, struct xe_tile *tile,
 
 	vm_dbg(&xe_vma_vm(vma)->xe->drm,
 	       "Preparing bind, with range [%lx...%lx)\n",
-	       range->base.itree.start, range->base.itree.last);
+	       xe_svm_range_start(range), xe_svm_range_end(range) - 1);
 
 	pt_op->vma = NULL;
 	pt_op->bind = true;
@@ -1887,8 +1887,8 @@ static int bind_range_prepare(struct xe_vm *vm, struct xe_tile *tile,
 					pt_op->num_entries, true);
 
 		xe_pt_update_ops_rfence_interval(pt_update_ops,
-						 range->base.itree.start,
-						 range->base.itree.last + 1);
+						 xe_svm_range_start(range),
+						 xe_svm_range_end(range));
 		++pt_update_ops->current_op;
 		pt_update_ops->needs_svm_lock = true;
 
@@ -1983,7 +1983,7 @@ static int unbind_range_prepare(struct xe_vm *vm,
 
 	vm_dbg(&vm->xe->drm,
 	       "Preparing unbind, with range [%lx...%lx)\n",
-	       range->base.itree.start, range->base.itree.last);
+	       xe_svm_range_start(range), xe_svm_range_end(range) - 1);
 
 	pt_op->vma = XE_INVALID_VMA;
 	pt_op->bind = false;
@@ -1994,8 +1994,8 @@ static int unbind_range_prepare(struct xe_vm *vm,
 
 	xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
 				pt_op->num_entries, false);
-	xe_pt_update_ops_rfence_interval(pt_update_ops, range->base.itree.start,
-					 range->base.itree.last + 1);
+	xe_pt_update_ops_rfence_interval(pt_update_ops, xe_svm_range_start(range),
+					 xe_svm_range_end(range));
 	++pt_update_ops->current_op;
 	pt_update_ops->needs_svm_lock = true;
 	pt_update_ops->needs_invalidation |= xe_vm_has_scratch(vm) ||
diff --git a/drivers/gpu/drm/xe/xe_range_fence.h b/drivers/gpu/drm/xe/xe_range_fence.h
index edd58b34f5c0..4934729dd904 100644
--- a/drivers/gpu/drm/xe/xe_range_fence.h
+++ b/drivers/gpu/drm/xe/xe_range_fence.h
@@ -13,13 +13,13 @@
 struct xe_range_fence_tree;
 struct xe_range_fence;
 
-/** struct xe_range_fence_ops - XE range fence ops */
+/** struct xe_range_fence_ops - Xe range fence ops */
 struct xe_range_fence_ops {
 	/** @free: free range fence op */
 	void (*free)(struct xe_range_fence *rfence);
 };
 
-/** struct xe_range_fence - XE range fence (address conflict tracking) */
+/** struct xe_range_fence - Xe range fence (address conflict tracking) */
 struct xe_range_fence {
 	/** @rb: RB tree node inserted into interval tree */
 	struct rb_node rb;
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index d21bf8f26964..6ae4cc6a3802 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -160,11 +160,11 @@ err_free:
 }
 
 /**
- * xe_sched_job_destroy - Destroy XE schedule job
- * @ref: reference to XE schedule job
+ * xe_sched_job_destroy - Destroy Xe schedule job
+ * @ref: reference to Xe schedule job
  *
  * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup
- * base DRM schedule job, and free memory for XE schedule job.
+ * base DRM schedule job, and free memory for Xe schedule job.
  */
 void xe_sched_job_destroy(struct kref *ref)
 {
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
index 3dc72c5c1f13..b467131b6d5f 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.h
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -23,10 +23,10 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 void xe_sched_job_destroy(struct kref *ref);
 
 /**
- * xe_sched_job_get - get reference to XE schedule job
- * @job: XE schedule job object
+ * xe_sched_job_get - get reference to Xe schedule job
+ * @job: Xe schedule job object
  *
- * Increment XE schedule job's reference count
+ * Increment Xe schedule job's reference count
  */
 static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job)
 {
@@ -35,10 +35,10 @@ static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job)
 }
 
 /**
- * xe_sched_job_put - put reference to XE schedule job
- * @job: XE schedule job object
+ * xe_sched_job_put - put reference to Xe schedule job
+ * @job: Xe schedule job object
  *
- * Decrement XE schedule job's reference count, call xe_sched_job_destroy when
+ * Decrement Xe schedule job's reference count, call xe_sched_job_destroy when
  * reference count == 0.
  */
 static inline void xe_sched_job_put(struct xe_sched_job *job)
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
index 13e7a12b03ad..d26612abb4ca 100644
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -32,7 +32,7 @@ struct xe_job_ptrs {
 };
 
 /**
- * struct xe_sched_job - XE schedule job (batch buffer tracking)
+ * struct xe_sched_job - Xe schedule job (batch buffer tracking)
  */
 struct xe_sched_job {
 	/** @drm: base DRM scheduler job */
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
index 911d5720917b..39c829daa97c 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
@@ -130,10 +130,15 @@
 bool xe_sriov_vf_migration_supported(struct xe_device *xe)
 {
 	xe_assert(xe, IS_SRIOV_VF(xe));
-	return xe->sriov.vf.migration.enabled;
+	return !xe->sriov.vf.migration.disabled;
 }
 
-static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...)
+/**
+ * xe_sriov_vf_migration_disable - Turn off VF migration with given log message.
+ * @xe: the &xe_device instance.
+ * @fmt: format string for the log message, to be combined with following VAs.
+ */
+void xe_sriov_vf_migration_disable(struct xe_device *xe, const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list va_args;
@@ -146,7 +151,7 @@ static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...)
 	xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf);
 	va_end(va_args);
 
-	xe->sriov.vf.migration.enabled = false;
+	xe->sriov.vf.migration.disabled = true;
 }
 
 static void vf_migration_init_early(struct xe_device *xe)
@@ -156,25 +161,12 @@ static void vf_migration_init_early(struct xe_device *xe)
 	 * supported at production quality.
 	 */
 	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
-		return vf_disable_migration(xe,
-					    "experimental feature not available on production builds");
-
-	if (GRAPHICS_VER(xe) < 20)
-		return vf_disable_migration(xe, "requires gfx version >= 20, but only %u found",
-					    GRAPHICS_VER(xe));
+		return xe_sriov_vf_migration_disable(xe,
+				"experimental feature not available on production builds");
 
-	if (!IS_DGFX(xe)) {
-		struct xe_uc_fw_version guc_version;
+	if (!xe_device_has_memirq(xe))
+		return xe_sriov_vf_migration_disable(xe, "requires memory-based IRQ support");
 
-		xe_gt_sriov_vf_guc_versions(xe_device_get_gt(xe, 0), NULL, &guc_version);
-		if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0))
-			return vf_disable_migration(xe,
-						    "CCS migration requires GuC ABI >= 1.23 but only %u.%u found",
-						    guc_version.major, guc_version.minor);
-	}
-
-	xe->sriov.vf.migration.enabled = true;
-	xe_sriov_dbg(xe, "migration support enabled\n");
 }
 
 /**
@@ -196,12 +188,7 @@ void xe_sriov_vf_init_early(struct xe_device *xe)
  */
 int xe_sriov_vf_init_late(struct xe_device *xe)
 {
-	int err = 0;
-
-	if (xe_sriov_vf_migration_supported(xe))
-		err = xe_sriov_vf_ccs_init(xe);
-
-	return err;
+	return xe_sriov_vf_ccs_init(xe);
 }
 
 static int sa_info_vf_ccs(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h
index 4df95266b261..e967d4166a43 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.h
@@ -14,6 +14,7 @@ struct xe_device;
 void xe_sriov_vf_init_early(struct xe_device *xe);
 int xe_sriov_vf_init_late(struct xe_device *xe);
 bool xe_sriov_vf_migration_supported(struct xe_device *xe);
+void xe_sriov_vf_migration_disable(struct xe_device *xe, const char *fmt, ...);
 void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
index 790249801364..797a4b866226 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
@@ -10,6 +10,8 @@
 #include "xe_device.h"
 #include "xe_exec_queue.h"
 #include "xe_exec_queue_types.h"
+#include "xe_gt_sriov_vf.h"
+#include "xe_guc.h"
 #include "xe_guc_submit.h"
 #include "xe_lrc.h"
 #include "xe_migrate.h"
@@ -260,6 +262,45 @@ int xe_sriov_vf_ccs_register_context(struct xe_device *xe)
 	return err;
 }
 
+/*
+ * Whether GuC requires CCS copy BBs for VF migration.
+ * @xe: the &xe_device instance.
+ *
+ * Only selected platforms require VF KMD to maintain CCS copy BBs and linked LRCAs.
+ *
+ * Return: true if VF driver must participate in the CCS migration, false otherwise.
+ */
+static bool vf_migration_ccs_bb_needed(struct xe_device *xe)
+{
+	xe_assert(xe, IS_SRIOV_VF(xe));
+
+	return !IS_DGFX(xe) && xe_device_has_flat_ccs(xe);
+}
+
+/*
+ * Check for disable migration due to no CCS BBs support in GuC FW.
+ * @xe: the &xe_device instance.
+ *
+ * Performs late disable of VF migration feature in case GuC FW cannot support it.
+ *
+ * Returns: True if VF migration with CCS BBs is supported, false otherwise.
+ */
+static bool vf_migration_ccs_bb_support_check(struct xe_device *xe)
+{
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	struct xe_uc_fw_version guc_version;
+
+	xe_gt_sriov_vf_guc_versions(gt, NULL, &guc_version);
+	if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0)) {
+		xe_sriov_vf_migration_disable(xe,
+					      "CCS migration requires GuC ABI >= 1.23 but only %u.%u found",
+					      guc_version.major, guc_version.minor);
+		return false;
+	}
+
+	return true;
+}
+
 static void xe_sriov_vf_ccs_fini(void *arg)
 {
 	struct xe_sriov_vf_ccs_ctx *ctx = arg;
@@ -292,9 +333,10 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
 	int err;
 
 	xe_assert(xe, IS_SRIOV_VF(xe));
-	xe_assert(xe, xe_sriov_vf_migration_supported(xe));
 
-	if (IS_DGFX(xe) || !xe_device_has_flat_ccs(xe))
+	if (!xe_sriov_vf_migration_supported(xe) ||
+	    !vf_migration_ccs_bb_needed(xe) ||
+	    !vf_migration_ccs_bb_support_check(xe))
 		return 0;
 
 	for_each_ccs_rw_ctx(ctx_id) {
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h
index 6a0fd0f5463e..d5f72d667817 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h
@@ -34,10 +34,10 @@ struct xe_device_vf {
 	/** @migration: VF Migration state data */
 	struct {
 		/**
-		 * @migration.enabled: flag indicating if migration support
-		 * was enabled or not due to missing prerequisites
+		 * @migration.disabled: flag indicating if migration support
+		 * was turned off due to missing prerequisites
 		 */
-		bool enabled;
+		bool disabled;
 	} migration;
 
 	/** @ccs: VF CCS state data */
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 129e7818565c..13af589715a7 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -633,7 +633,7 @@ err_out:
 
 	/*
 	 * XXX: We can't derive the GT here (or anywhere in this functions, but
-	 * compute always uses the primary GT so accumlate stats on the likely
+	 * compute always uses the primary GT so accumulate stats on the likely
 	 * GT of the fault.
 	 */
 	if (gt)
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.h b/drivers/gpu/drm/xe/xe_tlb_inval.h
index 554634dfd4e2..05614915463a 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval.h
+++ b/drivers/gpu/drm/xe/xe_tlb_inval.h
@@ -33,7 +33,7 @@ void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval,
  * xe_tlb_inval_fence_wait() - TLB invalidiation fence wait
  * @fence: TLB invalidation fence to wait on
  *
- * Wait on a TLB invalidiation fence until it signals, non interruptable
+ * Wait on a TLB invalidiation fence until it signals, non interruptible
  */
 static inline void
 xe_tlb_inval_fence_wait(struct xe_tlb_inval_fence *fence)
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index e368b2a36bac..1bddecfb723a 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -106,7 +106,7 @@ static u64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 
 	stolen_size = tile_size - mgr->stolen_base;
 
-	xe_assert(xe, stolen_size > wopcm_size);
+	xe_assert(xe, stolen_size >= wopcm_size);
 	stolen_size -= wopcm_size;
 
 	/* Verify usage fits in the actual resource available */
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
index 1144f9232ebb..a71e14818ec2 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
@@ -10,7 +10,7 @@
 #include <drm/ttm/ttm_device.h>
 
 /**
- * struct xe_ttm_vram_mgr - XE TTM VRAM manager
+ * struct xe_ttm_vram_mgr - Xe TTM VRAM manager
  *
  * Manages placement of TTM resource in VRAM.
  */
@@ -32,7 +32,7 @@ struct xe_ttm_vram_mgr {
 };
 
 /**
- * struct xe_ttm_vram_mgr_resource - XE TTM VRAM resource
+ * struct xe_ttm_vram_mgr_resource - Xe TTM VRAM resource
  */
 struct xe_ttm_vram_mgr_resource {
 	/** @base: Base TTM resource */
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
index 77a1dcf8b4ed..2ebe8c9db6ce 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -62,7 +62,7 @@ enum xe_uc_fw_type {
 };
 
 /**
- * struct xe_uc_fw_version - Version for XE micro controller firmware
+ * struct xe_uc_fw_version - Version for Xe micro controller firmware
  */
 struct xe_uc_fw_version {
 	/** @branch: branch version of the FW (not always available) */
@@ -84,7 +84,7 @@ enum xe_uc_fw_version_types {
 };
 
 /**
- * struct xe_uc_fw - XE micro controller firmware
+ * struct xe_uc_fw - Xe micro controller firmware
  */
 struct xe_uc_fw {
 	/** @type: type uC firmware */
@@ -112,7 +112,7 @@ struct xe_uc_fw {
 	/** @size: size of uC firmware including css header */
 	size_t size;
 
-	/** @bo: XE BO for uC firmware */
+	/** @bo: Xe BO for uC firmware */
 	struct xe_bo *bo;
 
 	/** @has_gsc_headers: whether the FW image starts with GSC headers */
diff --git a/drivers/gpu/drm/xe/xe_uc_types.h b/drivers/gpu/drm/xe/xe_uc_types.h
index 9924e4484866..1708379dc834 100644
--- a/drivers/gpu/drm/xe/xe_uc_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_types.h
@@ -12,7 +12,7 @@
 #include "xe_wopcm_types.h"
 
 /**
- * struct xe_uc - XE micro controllers
+ * struct xe_uc - Xe micro controllers
  */
 struct xe_uc {
 	/** @guc: Graphics micro controller */
diff --git a/drivers/gpu/drm/xe/xe_validation.h b/drivers/gpu/drm/xe/xe_validation.h
index fec331d791e7..1ef181c90434 100644
--- a/drivers/gpu/drm/xe/xe_validation.h
+++ b/drivers/gpu/drm/xe/xe_validation.h
@@ -108,7 +108,7 @@ struct xe_val_flags {
  * @request_exclusive: Whether to lock exclusively (write mode) the next time
  * the domain lock is locked.
  * @exec_flags: The drm_exec flags used for drm_exec (re-)initialization.
- * @nr: The drm_exec nr parameter used for drm_exec (re-)initializaiton.
+ * @nr: The drm_exec nr parameter used for drm_exec (re-)initialization.
  */
 struct xe_validation_ctx {
 	struct drm_exec *exec;
@@ -137,7 +137,7 @@ bool xe_validation_should_retry(struct xe_validation_ctx *ctx, int *ret);
  * @_ret: The current error value possibly holding -ENOMEM
  *
  * Use this in way similar to drm_exec_retry_on_contention().
- * If @_ret contains -ENOMEM the tranaction is restarted once in a way that
+ * If @_ret contains -ENOMEM the transaction is restarted once in a way that
  * blocks other transactions and allows exhastive eviction. If the transaction
  * was already restarted once, Just return the -ENOMEM. May also set
  * _ret to -EINTR if not retrying and waits are interruptible.
@@ -180,7 +180,7 @@ static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T)
  * @_val: The xe_validation_device.
  * @_exec: The struct drm_exec object
  * @_flags: Flags for the xe_validation_ctx initialization.
- * @_ret: Return in / out parameter. May be set by this macro. Typicall 0 when called.
+ * @_ret: Return in / out parameter. May be set by this macro. Typically 0 when called.
  *
  * This macro is will initiate a drm_exec transaction with additional support for
  * exhaustive eviction.
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 10d77666a425..00f3520dec38 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -824,7 +824,7 @@ xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
  *
  * (re)bind SVM range setting up GPU page tables for the range.
  *
- * Return: dma fence for rebind to signal completion on succees, ERR_PTR on
+ * Return: dma fence for rebind to signal completion on success, ERR_PTR on
  * failure
  */
 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
@@ -907,7 +907,7 @@ xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
  *
  * Unbind SVM range removing the GPU page tables for the range.
  *
- * Return: dma fence for unbind to signal completion on succees, ERR_PTR on
+ * Return: dma fence for unbind to signal completion on success, ERR_PTR on
  * failure
  */
 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
@@ -1291,7 +1291,7 @@ static u16 pde_pat_index(struct xe_bo *bo)
 	 * selection of options. The user PAT index is only for encoding leaf
 	 * nodes, where we have use of more bits to do the encoding. The
 	 * non-leaf nodes are instead under driver control so the chosen index
-	 * here should be distict from the user PAT index. Also the
+	 * here should be distinct from the user PAT index. Also the
 	 * corresponding coherency of the PAT index should be tied to the
 	 * allocation type of the page table (or at least we should pick
 	 * something which is always safe).
@@ -4172,7 +4172,7 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
 
 /**
  * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations
- * @xe: Pointer to the XE device structure
+ * @xe: Pointer to the Xe device structure
  * @vma: Pointer to the virtual memory area (VMA) structure
  * @is_atomic: In pagefault path and atomic operation
  *
@@ -4319,7 +4319,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm,
 			xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
 		} else if (__op->op == DRM_GPUVA_OP_MAP) {
 			vma = op->map.vma;
-			/* In case of madvise call, MAP will always be follwed by REMAP.
+			/* In case of madvise call, MAP will always be followed by REMAP.
 			 * Therefore temp_attr will always have sane values, making it safe to
 			 * copy them to new vma.
 			 */
diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h
index 1030ce214032..02e5288373c9 100644
--- a/drivers/gpu/drm/xe/xe_vm_doc.h
+++ b/drivers/gpu/drm/xe/xe_vm_doc.h
@@ -7,7 +7,7 @@
 #define _XE_VM_DOC_H_
 
 /**
- * DOC: XE VM (user address space)
+ * DOC: Xe VM (user address space)
  *
  * VM creation
  * ===========
@@ -202,13 +202,13 @@
  * User pointers are user allocated memory (malloc'd, mmap'd, etc..) for which the
  * user wants to create a GPU mapping. Typically in other DRM drivers a dummy BO
  * was created and then a binding was created. We bypass creating a dummy BO in
- * XE and simply create a binding directly from the userptr.
+ * Xe and simply create a binding directly from the userptr.
  *
  * Invalidation
  * ------------
  *
  * Since this a core kernel managed memory the kernel can move this memory
- * whenever it wants. We register an invalidation MMU notifier to alert XE when
+ * whenever it wants. We register an invalidation MMU notifier to alert Xe when
  * a user pointer is about to move. The invalidation notifier needs to block
  * until all pending users (jobs or compute mode engines) of the userptr are
  * idle to ensure no faults. This done by waiting on all of VM's dma-resv slots.
@@ -419,7 +419,7 @@
  * =======
  *
  * VM locking protects all of the core data paths (bind operations, execs,
- * evictions, and compute mode rebind worker) in XE.
+ * evictions, and compute mode rebind worker) in Xe.
  *
  * Locks
  * -----
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index d6e2a0fdd4b3..830ed7b05c27 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -52,7 +52,7 @@ struct xe_vm_pgtable_update_op;
  * struct xe_vma_mem_attr - memory attributes associated with vma
  */
 struct xe_vma_mem_attr {
-	/** @preferred_loc: perferred memory_location */
+	/** @preferred_loc: preferred memory_location */
 	struct {
 		/** @preferred_loc.migration_policy: Pages migration policy */
 		u32 migration_policy;
@@ -338,7 +338,7 @@ struct xe_vm {
 	u64 tlb_flush_seqno;
 	/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
 	bool batch_invalidate_tlb;
-	/** @xef: XE file handle for tracking this VM's drm client */
+	/** @xef: Xe file handle for tracking this VM's drm client */
 	struct xe_file *xef;
 };
 
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index b6dcd9827354..ec638b431131 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -916,6 +916,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE))
 	},
+	{ XE_RTP_NAME("14024681466"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1, FAST_CLEAR_VALIGN_FIX))
+	},
 };
 
 static __maybe_unused const struct xe_rtp_entry oob_was[] = {
diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h
index 9f095a99d6c9..6e53fb4cdd37 100644
--- a/include/drm/intel/pciids.h
+++ b/include/drm/intel/pciids.h
@@ -893,4 +893,8 @@
 	MACRO__(0xD744, ## __VA_ARGS__), \
 	MACRO__(0xD745, ## __VA_ARGS__)
 
+/* CRI */
+#define INTEL_CRI_IDS(MACRO__, ...) \
+	MACRO__(0x674C, ## __VA_ARGS__)
+
 #endif /* __PCIIDS_H__ */