summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/gpu/xe/index.rst1
-rw-r--r--Documentation/gpu/xe/xe_devcoredump.rst14
-rw-r--r--drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h7
-rw-r--r--drivers/gpu/drm/i915/display/intel_tc.c39
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_vm.c40
-rw-r--r--drivers/gpu/drm/xe/Makefile5
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_abi.h20
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h38
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h14
-rw-r--r--drivers/gpu/drm/xe/display/ext/i915_irq.c13
-rw-r--r--drivers/gpu/drm/xe/display/intel_bo.c25
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c12
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h2
-rw-r--r--drivers/gpu/drm/xe/regs/xe_oa_regs.h9
-rw-r--r--drivers/gpu/drm/xe/regs/xe_pmt.h19
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c7
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c17
-rw-r--r--drivers/gpu/drm/xe/xe_assert.h8
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c182
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h33
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.c14
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c119
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.h7
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump_types.h10
-rw-r--r--drivers/gpu/drm/xe/xe_device.c8
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h57
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.c80
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c7
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c35
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.h10
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.c47
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_printk.h31
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c78
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h3
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c25
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.c63
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.h8
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats_types.h15
-rw-r--r--drivers/gpu/drm/xe/xe_gt_throttle.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c21
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c320
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c11
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.c33
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.h6
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c32
-rw-r--r--drivers/gpu/drm/xe/xe_guc_fwif.h1
-rw-r--r--drivers/gpu/drm/xe/xe_guc_klv_helpers.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c151
-rw-r--r--drivers/gpu/drm/xe/xe_guc_types.h10
-rw-r--r--drivers/gpu/drm/xe/xe_heci_gsc.c8
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c11
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.h4
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_irq.c37
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c29
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h4
-rw-r--r--drivers/gpu/drm/xe/xe_macros.h12
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.c26
-rw-r--r--drivers/gpu/drm/xe/xe_module.c2
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c55
-rw-r--r--drivers/gpu/drm/xe/xe_oa_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c3
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c6
-rw-r--r--drivers/gpu/drm/xe/xe_query.c4
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr.c84
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_reg_whitelist.c37
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.c4
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_helpers.h2
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_types.h17
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.c263
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.h14
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h11
-rw-r--r--drivers/gpu/drm/xe/xe_trace_bo.h5
-rw-r--r--drivers/gpu/drm/xe/xe_trace_lrc.c9
-rw-r--r--drivers/gpu/drm/xe/xe_trace_lrc.h52
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr.c53
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c32
-rw-r--r--drivers/gpu/drm/xe/xe_vm_doc.h22
-rw-r--r--drivers/gpu/drm/xe/xe_vsec.c233
-rw-r--r--drivers/gpu/drm/xe/xe_vsec.h11
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c6
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules1
-rw-r--r--include/drm/intel/xe_pciids.h235
-rw-r--r--include/drm/ttm/ttm_bo.h2
-rw-r--r--include/uapi/drm/xe_drm.h9
94 files changed, 2461 insertions, 588 deletions
diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst
index 3f07aa3b5432..92cfb25e64d3 100644
--- a/Documentation/gpu/xe/index.rst
+++ b/Documentation/gpu/xe/index.rst
@@ -23,4 +23,5 @@ DG2, etc is provided to prototype the driver.
xe_firmware
xe_tile
xe_debugging
+ xe_devcoredump
xe-drm-usage-stats.rst
diff --git a/Documentation/gpu/xe/xe_devcoredump.rst b/Documentation/gpu/xe/xe_devcoredump.rst
new file mode 100644
index 000000000000..ae4ec0e34dc0
--- /dev/null
+++ b/Documentation/gpu/xe/xe_devcoredump.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+==================
+Xe Device Coredump
+==================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_devcoredump.c
+ :doc: Xe device coredump
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_devcoredump.c
+ :internal:
diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h
index f0e5c196eae4..c685479c9756 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h
@@ -200,6 +200,13 @@
#define XELPDP_SSC_ENABLE_PLLA REG_BIT(1)
#define XELPDP_SSC_ENABLE_PLLB REG_BIT(0)
+#define TCSS_DISP_MAILBOX_IN_CMD _MMIO(0x161300)
+#define TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY REG_BIT(31)
+#define TCSS_DISP_MAILBOX_IN_CMD_CMD_MASK REG_GENMASK(7, 0)
+#define TCSS_DISP_MAILBOX_IN_CMD_DATA(val) REG_FIELD_PREP(TCSS_DISP_MAILBOX_IN_CMD_CMD_MASK, val)
+
+#define TCSS_DISP_MAILBOX_IN_DATA _MMIO(0x161304)
+
/* C10 Vendor Registers */
#define PHY_C10_VDR_PLL(idx) (0xC00 + (idx))
#define C10_PLL0_FRACEN REG_BIT8(4)
diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c
index b16c4d2d4077..33b8f2772076 100644
--- a/drivers/gpu/drm/i915/display/intel_tc.c
+++ b/drivers/gpu/drm/i915/display/intel_tc.c
@@ -1013,21 +1013,52 @@ xelpdp_tc_phy_wait_for_tcss_power(struct intel_tc_port *tc, bool enabled)
return true;
}
+/*
+ * Gfx driver WA 14020908590 for PTL tcss_rxdetect_clkswb_req/ack
+ * handshake violation when pwwreq= 0->1 during TC7/10 entry
+ */
+static void xelpdp_tc_power_request_wa(struct intel_display *display, bool enable)
+{
+ /* check if mailbox is running busy */
+ if (intel_de_wait_for_clear(display, TCSS_DISP_MAILBOX_IN_CMD,
+ TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY, 10)) {
+ drm_dbg_kms(display->drm,
+ "Timeout waiting for TCSS mailbox run/busy bit to clear\n");
+ return;
+ }
+
+ intel_de_write(display, TCSS_DISP_MAILBOX_IN_DATA, enable ? 1 : 0);
+ intel_de_write(display, TCSS_DISP_MAILBOX_IN_CMD,
+ TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY |
+ TCSS_DISP_MAILBOX_IN_CMD_DATA(0x1));
+
+ /* wait to clear mailbox running busy bit before continuing */
+ if (intel_de_wait_for_clear(display, TCSS_DISP_MAILBOX_IN_CMD,
+ TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY, 10)) {
+ drm_dbg_kms(display->drm,
+ "Timeout after writing data to mailbox. Mailbox run/busy bit did not clear\n");
+ return;
+ }
+}
+
static void __xelpdp_tc_phy_enable_tcss_power(struct intel_tc_port *tc, bool enable)
{
- struct drm_i915_private *i915 = tc_to_i915(tc);
+ struct intel_display *display = to_intel_display(tc->dig_port);
enum port port = tc->dig_port->base.port;
- i915_reg_t reg = XELPDP_PORT_BUF_CTL1(i915, port);
+ i915_reg_t reg = XELPDP_PORT_BUF_CTL1(display, port);
u32 val;
assert_tc_cold_blocked(tc);
- val = intel_de_read(i915, reg);
+ if (DISPLAY_VER(display) == 30)
+ xelpdp_tc_power_request_wa(display, enable);
+
+ val = intel_de_read(display, reg);
if (enable)
val |= XELPDP_TCSS_POWER_REQUEST;
else
val &= ~XELPDP_TCSS_POWER_REQUEST;
- intel_de_write(i915, reg, val);
+ intel_de_write(display, reg, val);
}
static bool xelpdp_tc_phy_enable_tcss_power(struct intel_tc_port *tc, bool enable)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 0cf91d23f25c..a194db83421d 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -405,13 +405,25 @@ static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
return len;
}
-int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
- void *buf, int len, int write)
+/**
+ * ttm_bo_access - Helper to access a buffer object
+ *
+ * @bo: ttm buffer object
+ * @offset: access offset into buffer object
+ * @buf: pointer to caller memory to read into or write from
+ * @len: length of access
+ * @write: write access
+ *
+ * Utility function to access a buffer object. Useful when buffer object cannot
+ * be easily mapped (non-contiguous, non-visible, etc...). Should not directly
+ * be exported to user space via a peak / poke interface.
+ *
+ * Returns:
+ * @len if successful, negative error code on failure.
+ */
+int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset,
+ void *buf, int len, int write)
{
- struct ttm_buffer_object *bo = vma->vm_private_data;
- unsigned long offset = (addr) - vma->vm_start +
- ((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node))
- << PAGE_SHIFT);
int ret;
if (len < 1 || (offset + len) > bo->base.size)
@@ -429,8 +441,8 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
break;
default:
if (bo->bdev->funcs->access_memory)
- ret = bo->bdev->funcs->access_memory(
- bo, offset, buf, len, write);
+ ret = bo->bdev->funcs->access_memory
+ (bo, offset, buf, len, write);
else
ret = -EIO;
}
@@ -439,6 +451,18 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
return ret;
}
+EXPORT_SYMBOL(ttm_bo_access);
+
+int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
+ void *buf, int len, int write)
+{
+ struct ttm_buffer_object *bo = vma->vm_private_data;
+ unsigned long offset = (addr) - vma->vm_start +
+ ((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node))
+ << PAGE_SHIFT);
+
+ return ttm_bo_access(bo, offset, buf, len, write);
+}
EXPORT_SYMBOL(ttm_bo_vm_access);
static const struct vm_operations_struct ttm_bo_vm_ops = {
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index bc7a04ce69fd..7730e0596299 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -101,6 +101,7 @@ xe-y += xe_bb.o \
xe_trace.o \
xe_trace_bo.o \
xe_trace_guc.o \
+ xe_trace_lrc.o \
xe_ttm_sys_mgr.o \
xe_ttm_stolen_mgr.o \
xe_ttm_vram_mgr.o \
@@ -110,6 +111,7 @@ xe-y += xe_bb.o \
xe_vm.o \
xe_vram.o \
xe_vram_freq.o \
+ xe_vsec.o \
xe_wait_user_fence.o \
xe_wa.o \
xe_wopcm.o
@@ -124,7 +126,8 @@ xe-y += \
xe_gt_sriov_vf.o \
xe_guc_relay.o \
xe_memirq.o \
- xe_sriov.o
+ xe_sriov.o \
+ xe_sriov_vf.o
xe-$(CONFIG_PCI_IOV) += \
xe_gt_sriov_pf.o \
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
index b54fe40fc5a9..fee385532fb0 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -134,6 +134,8 @@ enum xe_guc_action {
XE_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
XE_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
XE_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
+ XE_GUC_ACTION_REGISTER_G2G = 0x4507,
+ XE_GUC_ACTION_DEREGISTER_G2G = 0x4508,
XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
@@ -218,4 +220,22 @@ enum xe_guc_tlb_inval_mode {
XE_GUC_TLB_INVAL_MODE_LITE = 0x1,
};
+/*
+ * GuC to GuC communication (de-)registration fields:
+ */
+enum xe_guc_g2g_type {
+ XE_G2G_TYPE_IN = 0x0,
+ XE_G2G_TYPE_OUT,
+ XE_G2G_TYPE_LIMIT,
+};
+
+#define XE_G2G_REGISTER_DEVICE REG_GENMASK(16, 16)
+#define XE_G2G_REGISTER_TILE REG_GENMASK(15, 12)
+#define XE_G2G_REGISTER_TYPE REG_GENMASK(11, 8)
+#define XE_G2G_REGISTER_SIZE REG_GENMASK(7, 0)
+
+#define XE_G2G_DEREGISTER_DEVICE REG_GENMASK(16, 16)
+#define XE_G2G_DEREGISTER_TILE REG_GENMASK(15, 12)
+#define XE_G2G_DEREGISTER_TYPE REG_GENMASK(11, 8)
+
#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
index b6a1852749dd..0b28659d94e9 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
@@ -502,6 +502,44 @@
#define VF2GUC_VF_RESET_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0
/**
+ * DOC: VF2GUC_NOTIFY_RESFIX_DONE
+ *
+ * This action is used by VF to notify the GuC that the VF KMD has completed
+ * post-migration recovery steps.
+ *
+ * This message must be sent as `MMIO HXG Message`_.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | DATA0 = MBZ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE` = 0x5508 |
+ * +---+-------+--------------------------------------------------------------+
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | DATA0 = MBZ |
+ * +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE 0x5508u
+
+#define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_LEN GUC_HXG_REQUEST_MSG_MIN_LEN
+#define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
+
+#define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0
+
+/**
* DOC: VF2GUC_QUERY_SINGLE_KLV
*
* This action is used by VF to query value of the single KLV data.
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 37606cf8cc5e..7dcb118e3d9f 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -291,6 +291,14 @@ enum {
*
* :0: (default)
* :1-65535: number of contexts (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_SCHED_PRIORITY` : 0x8A0C
+ * This config controls VF’s scheduling priority.
+ *
+ * :0: LOW = schedule VF only if it has active work (default)
+ * :1: NORMAL = schedule VF always, irrespective of whether it has work or not
+ * :2: HIGH = schedule VF in the next time-slice after current active
+ * time-slice completes if it has active work
*/
#define GUC_KLV_VF_CFG_GGTT_START_KEY 0x0001
@@ -343,6 +351,12 @@ enum {
#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY 0x8a0b
#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN 1u
+#define GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY 0x8a0c
+#define GUC_KLV_VF_CFG_SCHED_PRIORITY_LEN 1u
+#define GUC_SCHED_PRIORITY_LOW 0u
+#define GUC_SCHED_PRIORITY_NORMAL 1u
+#define GUC_SCHED_PRIORITY_HIGH 2u
+
/*
* Workaround keys:
*/
diff --git a/drivers/gpu/drm/xe/display/ext/i915_irq.c b/drivers/gpu/drm/xe/display/ext/i915_irq.c
index a7dbc6554d69..ac4cda2d81c7 100644
--- a/drivers/gpu/drm/xe/display/ext/i915_irq.c
+++ b/drivers/gpu/drm/xe/display/ext/i915_irq.c
@@ -53,18 +53,7 @@ void gen2_irq_init(struct intel_uncore *uncore, struct i915_irq_regs regs,
bool intel_irqs_enabled(struct xe_device *xe)
{
- /*
- * XXX: i915 has a racy handling of the irq.enabled, since it doesn't
- * lock its transitions. Because of that, the irq.enabled sometimes
- * is not read with the irq.lock in place.
- * However, the most critical cases like vblank and page flips are
- * properly using the locks.
- * We cannot take the lock in here or run any kind of assert because
- * of i915 inconsistency.
- * But at this point the xe irq is better protected against races,
- * although the full solution would be protecting the i915 side.
- */
- return xe->irq.enabled;
+ return atomic_read(&xe->irq.enabled);
}
void intel_synchronize_irq(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/display/intel_bo.c b/drivers/gpu/drm/xe/display/intel_bo.c
index 9f54fad0f1c0..b463f5bd4eed 100644
--- a/drivers/gpu/drm/xe/display/intel_bo.c
+++ b/drivers/gpu/drm/xe/display/intel_bo.c
@@ -40,31 +40,8 @@ int intel_bo_fb_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
int intel_bo_read_from_page(struct drm_gem_object *obj, u64 offset, void *dst, int size)
{
struct xe_bo *bo = gem_to_xe_bo(obj);
- struct ttm_bo_kmap_obj map;
- void *src;
- bool is_iomem;
- int ret;
- ret = xe_bo_lock(bo, true);
- if (ret)
- return ret;
-
- ret = ttm_bo_kmap(&bo->ttm, offset >> PAGE_SHIFT, 1, &map);
- if (ret)
- goto out_unlock;
-
- offset &= ~PAGE_MASK;
- src = ttm_kmap_obj_virtual(&map, &is_iomem);
- src += offset;
- if (is_iomem)
- memcpy_fromio(dst, (void __iomem *)src, size);
- else
- memcpy(dst, src, size);
-
- ttm_bo_kunmap(&map);
-out_unlock:
- xe_bo_unlock(bo);
- return ret;
+ return xe_bo_read(bo, offset, dst, size);
}
struct intel_frontbuffer *intel_bo_get_frontbuffer(struct drm_gem_object *obj)
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index 761510ae0690..9fa51b84737c 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -161,7 +161,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
}
vma->dpt = dpt;
- vma->node = dpt->ggtt_node;
+ vma->node = dpt->ggtt_node[tile0->id];
return 0;
}
@@ -213,8 +213,8 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
align = max_t(u32, align, SZ_64K);
- if (bo->ggtt_node && view->type == I915_GTT_VIEW_NORMAL) {
- vma->node = bo->ggtt_node;
+ if (bo->ggtt_node[ggtt->tile->id] && view->type == I915_GTT_VIEW_NORMAL) {
+ vma->node = bo->ggtt_node[ggtt->tile->id];
} else if (view->type == I915_GTT_VIEW_NORMAL) {
u32 x, size = bo->ttm.base.size;
@@ -345,10 +345,12 @@ err:
static void __xe_unpin_fb_vma(struct i915_vma *vma)
{
+ u8 tile_id = vma->node->ggtt->tile->id;
+
if (vma->dpt)
xe_bo_unpin_map_no_vm(vma->dpt);
- else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node) ||
- vma->bo->ggtt_node->base.start != vma->node->base.start)
+ else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node[tile_id]) ||
+ vma->bo->ggtt_node[tile_id]->base.start != vma->node->base.start)
xe_ggtt_node_remove(vma->node, false);
ttm_bo_reserve(&vma->bo->ttm, false, false, NULL);
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 0c9e4b2fafab..162f18e975da 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -445,6 +445,8 @@
#define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED)
#define ENABLE_SMALLPL REG_BIT(15)
+#define SMP_WAIT_FETCH_MERGING_COUNTER REG_GENMASK(11, 10)
+#define SMP_FORCE_128B_OVERFETCH REG_FIELD_PREP(SMP_WAIT_FETCH_MERGING_COUNTER, 1)
#define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9)
#define SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5)
#define INDIRECT_STATE_BASE_ADDR_OVERRIDE REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
index a9b0091cb7ee..a49561e9f3c3 100644
--- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
@@ -41,14 +41,6 @@
#define OAG_OABUFFER XE_REG(0xdb08)
#define OABUFFER_SIZE_MASK REG_GENMASK(5, 3)
-#define OABUFFER_SIZE_128K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0)
-#define OABUFFER_SIZE_256K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1)
-#define OABUFFER_SIZE_512K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2)
-#define OABUFFER_SIZE_1M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3)
-#define OABUFFER_SIZE_2M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4)
-#define OABUFFER_SIZE_4M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5)
-#define OABUFFER_SIZE_8M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6)
-#define OABUFFER_SIZE_16M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7)
#define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */
#define OAG_OACONTROL XE_REG(0xdaf4)
@@ -63,6 +55,7 @@
#define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED)
#define OAG_OA_DEBUG_DISABLE_MMIO_TRG REG_BIT(14)
#define OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL REG_BIT(13)
+#define OAG_OA_DEBUG_BUF_SIZE_SELECT REG_BIT(12)
#define OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL REG_BIT(8)
#define OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL REG_BIT(7)
#define OAG_OA_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6)
diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h
new file mode 100644
index 000000000000..f45abcd96ba8
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_pmt.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+#ifndef _XE_PMT_H_
+#define _XE_PMT_H_
+
+#define SOC_BASE 0x280000
+
+#define BMG_PMT_BASE_OFFSET 0xDB000
+#define BMG_DISCOVERY_OFFSET (SOC_BASE + BMG_PMT_BASE_OFFSET)
+
+#define BMG_TELEMETRY_BASE_OFFSET 0xE0000
+#define BMG_TELEMETRY_OFFSET (SOC_BASE + BMG_TELEMETRY_BASE_OFFSET)
+
+#define SG_REMAP_INDEX1 XE_REG(SOC_BASE + 0x08)
+#define SG_REMAP_BITS REG_GENMASK(31, 24)
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 3e0ae40ebbd2..c9ec7a313c6b 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -49,6 +49,13 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
KUNIT_FAIL(test, "Failed to submit bo clear.\n");
return PTR_ERR(fence);
}
+
+ if (dma_fence_wait_timeout(fence, false, 5 * HZ) <= 0) {
+ dma_fence_put(fence);
+ KUNIT_FAIL(test, "Timeout while clearing bo.\n");
+ return -ETIME;
+ }
+
dma_fence_put(fence);
}
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 1a192a2a941b..d5fe0ea889ad 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -83,7 +83,8 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
bo->size,
ttm_bo_type_kernel,
region |
- XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ XE_BO_FLAG_NEEDS_CPU_ACCESS |
+ XE_BO_FLAG_PINNED);
if (IS_ERR(remote)) {
KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n",
str, remote);
@@ -224,8 +225,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_PINNED);
if (IS_ERR(tiny)) {
- KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
- PTR_ERR(pt));
+ KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n",
+ PTR_ERR(tiny));
goto free_pt;
}
@@ -642,7 +643,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
DRM_XE_GEM_CPU_CACHING_WC,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_NEEDS_CPU_ACCESS |
+ XE_BO_FLAG_PINNED);
if (IS_ERR(sys_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
@@ -666,7 +669,8 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
DRM_XE_GEM_CPU_CACHING_WC,
- bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS |
+ XE_BO_FLAG_PINNED);
if (IS_ERR(ccs_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
@@ -690,7 +694,8 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
DRM_XE_GEM_CPU_CACHING_WC,
- bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS |
+ XE_BO_FLAG_PINNED);
if (IS_ERR(vram_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
PTR_ERR(vram_bo));
diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h
index 04d6b95c6d87..68fe70ce2be3 100644
--- a/drivers/gpu/drm/xe/xe_assert.h
+++ b/drivers/gpu/drm/xe/xe_assert.h
@@ -14,7 +14,7 @@
#include "xe_step.h"
/**
- * DOC: Xe ASSERTs
+ * DOC: Xe Asserts
*
* While Xe driver aims to be simpler than legacy i915 driver it is still
* complex enough that some changes introduced while adding new functionality
@@ -103,7 +103,7 @@
* (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions
* or as a condition.
*
- * See `Xe ASSERTs`_ for general usage guidelines.
+ * See `Xe Asserts`_ for general usage guidelines.
*/
#define xe_assert(xe, condition) xe_assert_msg((xe), condition, "")
#define xe_assert_msg(xe, condition, msg, arg...) ({ \
@@ -138,7 +138,7 @@
* (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions
* or as a condition.
*
- * See `Xe ASSERTs`_ for general usage guidelines.
+ * See `Xe Asserts`_ for general usage guidelines.
*/
#define xe_tile_assert(tile, condition) xe_tile_assert_msg((tile), condition, "")
#define xe_tile_assert_msg(tile, condition, msg, arg...) ({ \
@@ -162,7 +162,7 @@
* (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions
* or as a condition.
*
- * See `Xe ASSERTs`_ for general usage guidelines.
+ * See `Xe Asserts`_ for general usage guidelines.
*/
#define xe_gt_assert(gt, condition) xe_gt_assert_msg((gt), condition, "")
#define xe_gt_assert_msg(gt, condition, msg, arg...) ({ \
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index ae6b337cdc54..283cd0294570 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -162,6 +162,15 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
}
}
+static bool force_contiguous(u32 bo_flags)
+{
+ /*
+ * For eviction / restore on suspend / resume objects pinned in VRAM
+ * must be contiguous, also only contiguous BOs support xe_bo_vmap.
+ */
+ return bo_flags & (XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT);
+}
+
static void add_vram(struct xe_device *xe, struct xe_bo *bo,
struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
{
@@ -175,12 +184,7 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
xe_assert(xe, vram && vram->usable_size);
io_size = vram->io_size;
- /*
- * For eviction / restore on suspend / resume objects
- * pinned in VRAM must be contiguous
- */
- if (bo_flags & (XE_BO_FLAG_PINNED |
- XE_BO_FLAG_GGTT))
+ if (force_contiguous(bo_flags))
place.flags |= TTM_PL_FLAG_CONTIGUOUS;
if (io_size < vram->usable_size) {
@@ -212,8 +216,7 @@ static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
bo->placements[*c] = (struct ttm_place) {
.mem_type = XE_PL_STOLEN,
- .flags = bo_flags & (XE_BO_FLAG_PINNED |
- XE_BO_FLAG_GGTT) ?
+ .flags = force_contiguous(bo_flags) ?
TTM_PL_FLAG_CONTIGUOUS : 0,
};
*c += 1;
@@ -442,6 +445,14 @@ static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
kfree(tt);
}
+static bool xe_ttm_resource_visible(struct ttm_resource *mem)
+{
+ struct xe_ttm_vram_mgr_resource *vres =
+ to_xe_ttm_vram_mgr_resource(mem);
+
+ return vres->used_visible_size == mem->size;
+}
+
static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
struct ttm_resource *mem)
{
@@ -453,11 +464,9 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
return 0;
case XE_PL_VRAM0:
case XE_PL_VRAM1: {
- struct xe_ttm_vram_mgr_resource *vres =
- to_xe_ttm_vram_mgr_resource(mem);
struct xe_mem_region *vram = res_to_mem_region(mem);
- if (vres->used_visible_size < mem->size)
+ if (!xe_ttm_resource_visible(mem))
return -EINVAL;
mem->bus.offset = mem->start << PAGE_SHIFT;
@@ -876,6 +885,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
};
struct ttm_operation_ctx ctx = {
.interruptible = false,
+ .gfp_retry_mayfail = true,
};
struct ttm_resource *new_mem;
int ret;
@@ -937,6 +947,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
{
struct ttm_operation_ctx ctx = {
.interruptible = false,
+ .gfp_retry_mayfail = false,
};
struct ttm_resource *new_mem;
struct ttm_place *place = &bo->placements[0];
@@ -1106,7 +1117,8 @@ static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operati
static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
{
struct ttm_operation_ctx ctx = {
- .interruptible = false
+ .interruptible = false,
+ .gfp_retry_mayfail = false,
};
if (ttm_bo->ttm) {
@@ -1118,6 +1130,52 @@ static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
}
}
+static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
+ unsigned long offset, void *buf, int len,
+ int write)
+{
+ struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+ struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+ struct iosys_map vmap;
+ struct xe_res_cursor cursor;
+ struct xe_mem_region *vram;
+ int bytes_left = len;
+
+ xe_bo_assert_held(bo);
+ xe_device_assert_mem_access(xe);
+
+ if (!mem_type_is_vram(ttm_bo->resource->mem_type))
+ return -EIO;
+
+ /* FIXME: Use GPU for non-visible VRAM */
+ if (!xe_ttm_resource_visible(ttm_bo->resource))
+ return -EIO;
+
+ vram = res_to_mem_region(ttm_bo->resource);
+ xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
+ bo->size - (offset & PAGE_MASK), &cursor);
+
+ do {
+ unsigned long page_offset = (offset & ~PAGE_MASK);
+ int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left);
+
+ iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping +
+ cursor.start);
+ if (write)
+ xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count);
+ else
+ xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count);
+
+ buf += byte_count;
+ offset += byte_count;
+ bytes_left -= byte_count;
+ if (bytes_left)
+ xe_res_next(&cursor, PAGE_SIZE);
+ } while (bytes_left);
+
+ return len;
+}
+
const struct ttm_device_funcs xe_ttm_funcs = {
.ttm_tt_create = xe_ttm_tt_create,
.ttm_tt_populate = xe_ttm_tt_populate,
@@ -1127,6 +1185,7 @@ const struct ttm_device_funcs xe_ttm_funcs = {
.move = xe_bo_move,
.io_mem_reserve = xe_ttm_io_mem_reserve,
.io_mem_pfn = xe_ttm_io_mem_pfn,
+ .access_memory = xe_ttm_access_memory,
.release_notify = xe_ttm_bo_release_notify,
.eviction_valuable = ttm_bo_eviction_valuable,
.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
@@ -1137,6 +1196,8 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
{
struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+ struct xe_tile *tile;
+ u8 id;
if (bo->ttm.base.import_attach)
drm_prime_gem_destroy(&bo->ttm.base, NULL);
@@ -1144,8 +1205,9 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
- if (bo->ggtt_node && bo->ggtt_node->base.size)
- xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
+ for_each_tile(tile, xe, id)
+ if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size)
+ xe_ggtt_remove_bo(tile->mem.ggtt, bo);
#ifdef CONFIG_PROC_FS
if (bo->client)
@@ -1243,11 +1305,50 @@ out:
return ret;
}
+static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
+ void *buf, int len, int write)
+{
+ struct ttm_buffer_object *ttm_bo = vma->vm_private_data;
+ struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+ struct xe_device *xe = xe_bo_device(bo);
+ int ret;
+
+ xe_pm_runtime_get(xe);
+ ret = ttm_bo_vm_access(vma, addr, buf, len, write);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+/**
+ * xe_bo_read() - Read from an xe_bo
+ * @bo: The buffer object to read from.
+ * @offset: The byte offset to start reading from.
+ * @dst: Location to store the read.
+ * @size: Size in bytes for the read.
+ *
+ * Read @size bytes from the @bo, starting from @offset, storing into @dst.
+ *
+ * Return: Zero on success, or negative error.
+ */
+int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
+{
+ int ret;
+
+ ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0);
+ if (ret >= 0 && ret != size)
+ ret = -EIO;
+ else if (ret == size)
+ ret = 0;
+
+ return ret;
+}
+
static const struct vm_operations_struct xe_gem_vm_ops = {
.fault = xe_gem_fault,
.open = ttm_bo_vm_open,
.close = ttm_bo_vm_close,
- .access = ttm_bo_vm_access
+ .access = xe_bo_vm_access,
};
static const struct drm_gem_object_funcs xe_gem_object_funcs = {
@@ -1301,6 +1402,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
struct ttm_operation_ctx ctx = {
.interruptible = true,
.no_wait_gpu = false,
+ .gfp_retry_mayfail = true,
};
struct ttm_placement *placement;
uint32_t alignment;
@@ -1315,6 +1417,10 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
return ERR_PTR(-EINVAL);
}
+ /* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
+ if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
+ return ERR_PTR(-EINVAL);
+
if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
!(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
@@ -1505,19 +1611,29 @@ __xe_bo_create_locked(struct xe_device *xe,
bo->vm = vm;
if (bo->flags & XE_BO_FLAG_GGTT) {
- if (!tile && flags & XE_BO_FLAG_STOLEN)
- tile = xe_device_get_root_tile(xe);
+ struct xe_tile *t;
+ u8 id;
- xe_assert(xe, tile);
+ if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) {
+ if (!tile && flags & XE_BO_FLAG_STOLEN)
+ tile = xe_device_get_root_tile(xe);
- if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
- err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
- start + bo->size, U64_MAX);
- } else {
- err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
+ xe_assert(xe, tile);
+ }
+
+ for_each_tile(t, xe, id) {
+ if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t)))
+ continue;
+
+ if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
+ err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
+ start + bo->size, U64_MAX);
+ } else {
+ err = xe_ggtt_insert_bo(t->mem.ggtt, bo);
+ }
+ if (err)
+ goto err_unlock_put_bo;
}
- if (err)
- goto err_unlock_put_bo;
}
return bo;
@@ -1900,6 +2016,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
struct ttm_operation_ctx ctx = {
.interruptible = true,
.no_wait_gpu = false,
+ .gfp_retry_mayfail = true,
};
if (vm) {
@@ -1910,6 +2027,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
ctx.resv = xe_vm_resv(vm);
}
+ trace_xe_bo_validate(bo);
return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
}
@@ -1961,13 +2079,15 @@ dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
int xe_bo_vmap(struct xe_bo *bo)
{
+ struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
void *virtual;
bool is_iomem;
int ret;
xe_bo_assert_held(bo);
- if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS))
+ if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) ||
+ !force_contiguous(bo->flags)))
return -EINVAL;
if (!iosys_map_is_null(&bo->vmap))
@@ -2243,6 +2363,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
struct ttm_operation_ctx ctx = {
.interruptible = true,
.no_wait_gpu = false,
+ .gfp_retry_mayfail = true,
};
struct ttm_placement placement;
struct ttm_place requested;
@@ -2293,6 +2414,7 @@ int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
.interruptible = false,
.no_wait_gpu = false,
.force_alloc = force_alloc,
+ .gfp_retry_mayfail = true,
};
struct ttm_placement placement;
int ret;
@@ -2372,14 +2494,18 @@ void xe_bo_put_commit(struct llist_head *deferred)
void xe_bo_put(struct xe_bo *bo)
{
+ struct xe_tile *tile;
+ u8 id;
+
might_sleep();
if (bo) {
#ifdef CONFIG_PROC_FS
if (bo->client)
might_lock(&bo->client->bos_lock);
#endif
- if (bo->ggtt_node && bo->ggtt_node->ggtt)
- might_lock(&bo->ggtt_node->ggtt->lock);
+ for_each_tile(tile, xe_bo_device(bo), id)
+ if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt)
+ might_lock(&bo->ggtt_node[id]->ggtt->lock);
drm_gem_object_put(&bo->ttm.base);
}
}
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 7fa44a0138b0..d9386ab03140 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -39,10 +39,22 @@
#define XE_BO_FLAG_NEEDS_64K BIT(15)
#define XE_BO_FLAG_NEEDS_2M BIT(16)
#define XE_BO_FLAG_GGTT_INVALIDATE BIT(17)
+#define XE_BO_FLAG_GGTT0 BIT(18)
+#define XE_BO_FLAG_GGTT1 BIT(19)
+#define XE_BO_FLAG_GGTT2 BIT(20)
+#define XE_BO_FLAG_GGTT3 BIT(21)
+#define XE_BO_FLAG_GGTT_ALL (XE_BO_FLAG_GGTT0 | \
+ XE_BO_FLAG_GGTT1 | \
+ XE_BO_FLAG_GGTT2 | \
+ XE_BO_FLAG_GGTT3)
+
/* this one is trigger internally only */
#define XE_BO_FLAG_INTERNAL_TEST BIT(30)
#define XE_BO_FLAG_INTERNAL_64K BIT(31)
+#define XE_BO_FLAG_GGTTx(tile) \
+ (XE_BO_FLAG_GGTT0 << (tile)->id)
+
#define XE_PTE_SHIFT 12
#define XE_PAGE_SIZE (1 << XE_PTE_SHIFT)
#define XE_PTE_MASK (XE_PAGE_SIZE - 1)
@@ -194,18 +206,29 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size)
}
static inline u32
-xe_bo_ggtt_addr(struct xe_bo *bo)
+__xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id)
{
- if (XE_WARN_ON(!bo->ggtt_node))
+ struct xe_ggtt_node *ggtt_node = bo->ggtt_node[tile_id];
+
+ if (XE_WARN_ON(!ggtt_node))
return 0;
- XE_WARN_ON(bo->ggtt_node->base.size > bo->size);
- XE_WARN_ON(bo->ggtt_node->base.start + bo->ggtt_node->base.size > (1ull << 32));
- return bo->ggtt_node->base.start;
+ XE_WARN_ON(ggtt_node->base.size > bo->size);
+ XE_WARN_ON(ggtt_node->base.start + ggtt_node->base.size > (1ull << 32));
+ return ggtt_node->base.start;
+}
+
+static inline u32
+xe_bo_ggtt_addr(struct xe_bo *bo)
+{
+ xe_assert(xe_bo_device(bo), bo->tile);
+
+ return __xe_bo_ggtt_addr(bo, bo->tile->id);
}
int xe_bo_vmap(struct xe_bo *bo);
void xe_bo_vunmap(struct xe_bo *bo);
+int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size);
bool mem_type_is_vram(u32 mem_type);
bool xe_bo_is_vram(struct xe_bo *bo);
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 8fb2be061003..6a40eedd9db1 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -152,11 +152,17 @@ int xe_bo_restore_kernel(struct xe_device *xe)
}
if (bo->flags & XE_BO_FLAG_GGTT) {
- struct xe_tile *tile = bo->tile;
+ struct xe_tile *tile;
+ u8 id;
- mutex_lock(&tile->mem.ggtt->lock);
- xe_ggtt_map_bo(tile->mem.ggtt, bo);
- mutex_unlock(&tile->mem.ggtt->lock);
+ for_each_tile(tile, xe, id) {
+ if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile)))
+ continue;
+
+ mutex_lock(&tile->mem.ggtt->lock);
+ xe_ggtt_map_bo(tile->mem.ggtt, bo);
+ mutex_unlock(&tile->mem.ggtt->lock);
+ }
}
/*
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 99196228dcc8..46dc9e4e3e46 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -12,6 +12,7 @@
#include <drm/ttm/ttm_device.h>
#include <drm/ttm/ttm_placement.h>
+#include "xe_device_types.h"
#include "xe_ggtt_types.h"
struct xe_device;
@@ -38,8 +39,8 @@ struct xe_bo {
struct ttm_place placements[XE_BO_MAX_PLACEMENTS];
/** @placement: current placement for this BO */
struct ttm_placement placement;
- /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */
- struct xe_ggtt_node *ggtt_node;
+ /** @ggtt_node: Array of GGTT nodes if this BO is mapped in the GGTTs */
+ struct xe_ggtt_node *ggtt_node[XE_MAX_TILES_PER_DEVICE];
/** @vmap: iosys map of this buffer */
struct iosys_map vmap;
/** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index f8947e7e917e..71636e80b71d 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -30,30 +30,39 @@
/**
* DOC: Xe device coredump
*
- * Devices overview:
* Xe uses dev_coredump infrastructure for exposing the crash errors in a
- * standardized way.
- * devcoredump exposes a temporary device under /sys/class/devcoredump/
- * which is linked with our card device directly.
- * The core dump can be accessed either from
- * /sys/class/drm/card<n>/device/devcoredump/ or from
- * /sys/class/devcoredump/devcd<m> where
- * /sys/class/devcoredump/devcd<m>/failing_device is a link to
- * /sys/class/drm/card<n>/device/.
+ * standardized way. Once a crash occurs, devcoredump exposes a temporary
+ * node under ``/sys/class/devcoredump/devcd<m>/``. The same node is also
+ * accessible in ``/sys/class/drm/card<n>/device/devcoredump/``. The
+ * ``failing_device`` symlink points to the device that crashed and created the
+ * coredump.
*
- * Snapshot at hang:
- * The 'data' file is printed with a drm_printer pointer at devcoredump read
- * time. For this reason, we need to take snapshots from when the hang has
- * happened, and not only when the user is reading the file. Otherwise the
- * information is outdated since the resets might have happened in between.
+ * The following characteristics are observed by xe when creating a device
+ * coredump:
*
- * 'First' failure snapshot:
- * In general, the first hang is the most critical one since the following hangs
- * can be a consequence of the initial hang. For this reason we only take the
- * snapshot of the 'first' failure and ignore subsequent calls of this function,
- * at least while the coredump device is alive. Dev_coredump has a delayed work
- * queue that will eventually delete the device and free all the dump
- * information.
+ * **Snapshot at hang**:
+ * The 'data' file contains a snapshot of the HW and driver states at the time
+ * the hang happened. Due to the driver recovering from resets/crashes, it may
+ * not correspond to the state of the system when the file is read by
+ * userspace.
+ *
+ * **Coredump release**:
+ * After a coredump is generated, it stays in kernel memory until released by
+ * userpace by writing anything to it, or after an internal timer expires. The
+ * exact timeout may vary and should not be relied upon. Example to release
+ * a coredump:
+ *
+ * .. code-block:: shell
+ *
+ * $ > /sys/class/drm/card0/device/devcoredump/data
+ *
+ * **First failure only**:
+ * In general, the first hang is the most critical one since the following
+ * hangs can be a consequence of the initial hang. For this reason a snapshot
+ * is taken only for the first failure. Until the devcoredump is released by
+ * userspace or kernel, all subsequent hangs do not override the snapshot nor
+ * create new ones. Devcoredump has a delayed work queue that will eventually
+ * delete the file node and free all the dump information.
*/
#ifdef CONFIG_DEV_COREDUMP
@@ -91,6 +100,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
p = drm_coredump_printer(&iter);
drm_puts(&p, "**** Xe Device Coredump ****\n");
+ drm_printf(&p, "Reason: %s\n", ss->reason);
drm_puts(&p, "kernel: " UTS_RELEASE "\n");
drm_puts(&p, "module: " KBUILD_MODNAME "\n");
@@ -98,7 +108,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
ts = ktime_to_timespec64(ss->boot_time);
drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
- drm_printf(&p, "Process: %s\n", ss->process_name);
+ drm_printf(&p, "Process: %s [%d]\n", ss->process_name, ss->pid);
xe_device_snapshot_print(xe, &p);
drm_printf(&p, "\n**** GT #%d ****\n", ss->gt->info.id);
@@ -130,6 +140,9 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
{
int i;
+ kfree(ss->reason);
+ ss->reason = NULL;
+
xe_guc_log_snapshot_free(ss->guc.log);
ss->guc.log = NULL;
@@ -170,16 +183,24 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
/* Ensure delayed work is captured before continuing */
flush_work(&ss->work);
- if (!ss->read.buffer)
+ mutex_lock(&coredump->lock);
+
+ if (!ss->read.buffer) {
+ mutex_unlock(&coredump->lock);
return -ENODEV;
+ }
- if (offset >= ss->read.size)
+ if (offset >= ss->read.size) {
+ mutex_unlock(&coredump->lock);
return 0;
+ }
byte_copied = count < ss->read.size - offset ? count :
ss->read.size - offset;
memcpy(buffer, ss->read.buffer + offset, byte_copied);
+ mutex_unlock(&coredump->lock);
+
return byte_copied;
}
@@ -193,15 +214,18 @@ static void xe_devcoredump_free(void *data)
cancel_work_sync(&coredump->snapshot.work);
+ mutex_lock(&coredump->lock);
+
xe_devcoredump_snapshot_free(&coredump->snapshot);
kvfree(coredump->snapshot.read.buffer);
/* To prevent stale data on next snapshot, clear everything */
memset(&coredump->snapshot, 0, sizeof(coredump->snapshot));
coredump->captured = false;
- coredump->job = NULL;
drm_info(&coredump_to_xe(coredump)->drm,
"Xe device coredump has been deleted.\n");
+
+ mutex_unlock(&coredump->lock);
}
static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
@@ -244,10 +268,10 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
}
static void devcoredump_snapshot(struct xe_devcoredump *coredump,
+ struct xe_exec_queue *q,
struct xe_sched_job *job)
{
struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
- struct xe_exec_queue *q = job->q;
struct xe_guc *guc = exec_queue_to_guc(q);
u32 adj_logical_mask = q->logical_mask;
u32 width_mask = (0x1 << q->width) - 1;
@@ -260,12 +284,14 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
ss->snapshot_time = ktime_get_real();
ss->boot_time = ktime_get_boottime();
- if (q->vm && q->vm->xef)
+ if (q->vm && q->vm->xef) {
process_name = q->vm->xef->process_name;
+ ss->pid = q->vm->xef->pid;
+ }
+
strscpy(ss->process_name, process_name);
ss->gt = q->gt;
- coredump->job = job;
INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work);
cookie = dma_fence_begin_signalling();
@@ -284,10 +310,11 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true);
ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct);
ss->ge = xe_guc_exec_queue_snapshot_capture(q);
- ss->job = xe_sched_job_snapshot_capture(job);
+ if (job)
+ ss->job = xe_sched_job_snapshot_capture(job);
ss->vm = xe_vm_snapshot_capture(q->vm);
- xe_engine_snapshot_capture_for_job(job);
+ xe_engine_snapshot_capture_for_queue(q);
queue_work(system_unbound_wq, &ss->work);
@@ -297,28 +324,42 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
/**
* xe_devcoredump - Take the required snapshots and initialize coredump device.
+ * @q: The faulty xe_exec_queue, where the issue was detected.
* @job: The faulty xe_sched_job, where the issue was detected.
+ * @fmt: Printf format + args to describe the reason for the core dump
*
* This function should be called at the crash time within the serialized
* gt_reset. It is skipped if we still have the core dump device available
* with the information of the 'first' snapshot.
*/
-void xe_devcoredump(struct xe_sched_job *job)
+__printf(3, 4)
+void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job, const char *fmt, ...)
{
- struct xe_device *xe = gt_to_xe(job->q->gt);
+ struct xe_device *xe = gt_to_xe(q->gt);
struct xe_devcoredump *coredump = &xe->devcoredump;
+ va_list varg;
+
+ mutex_lock(&coredump->lock);
if (coredump->captured) {
drm_dbg(&xe->drm, "Multiple hangs are occurring, but only the first snapshot was taken\n");
+ mutex_unlock(&coredump->lock);
return;
}
coredump->captured = true;
- devcoredump_snapshot(coredump, job);
+
+ va_start(varg, fmt);
+ coredump->snapshot.reason = kvasprintf(GFP_ATOMIC, fmt, varg);
+ va_end(varg);
+
+ devcoredump_snapshot(coredump, q, job);
drm_info(&xe->drm, "Xe device coredump has been created\n");
drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
xe->drm.primary->index);
+
+ mutex_unlock(&coredump->lock);
}
static void xe_driver_devcoredump_fini(void *arg)
@@ -330,6 +371,18 @@ static void xe_driver_devcoredump_fini(void *arg)
int xe_devcoredump_init(struct xe_device *xe)
{
+ int err;
+
+ err = drmm_mutex_init(&xe->drm, &xe->devcoredump.lock);
+ if (err)
+ return err;
+
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&xe->devcoredump.lock);
+ fs_reclaim_release(GFP_KERNEL);
+ }
+
return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm);
}
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
index a4eebc285fc8..6a17e6d60102 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump.h
@@ -10,13 +10,16 @@
struct drm_printer;
struct xe_device;
+struct xe_exec_queue;
struct xe_sched_job;
#ifdef CONFIG_DEV_COREDUMP
-void xe_devcoredump(struct xe_sched_job *job);
+void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job, const char *fmt, ...);
int xe_devcoredump_init(struct xe_device *xe);
#else
-static inline void xe_devcoredump(struct xe_sched_job *job)
+static inline void xe_devcoredump(struct xe_exec_queue *q,
+ struct xe_sched_job *job,
+ const char *fmt, ...)
{
}
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index 3703ddea1252..1a1d16a96b2d 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -28,6 +28,10 @@ struct xe_devcoredump_snapshot {
ktime_t boot_time;
/** @process_name: Name of process that triggered this gpu hang */
char process_name[TASK_COMM_LEN];
+ /** @pid: Process id of process that triggered this gpu hang */
+ pid_t pid;
+ /** @reason: The reason the coredump was triggered */
+ char *reason;
/** @gt: Affected GT, used by forcewake for delayed capture */
struct xe_gt *gt;
@@ -76,12 +80,12 @@ struct xe_devcoredump_snapshot {
* for reading the information.
*/
struct xe_devcoredump {
- /** @captured: The snapshot of the first hang has already been taken. */
+ /** @lock: protects access to entire structure */
+ struct mutex lock;
+ /** @captured: The snapshot of the first hang has already been taken */
bool captured;
/** @snapshot: Snapshot is captured at time of the first crash */
struct xe_devcoredump_snapshot snapshot;
- /** @job: Point to the faulting job */
- struct xe_sched_job *job;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index d8b6bc8fc8cf..56d4ffb650da 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -44,6 +44,7 @@
#include "xe_memirq.h"
#include "xe_mmio.h"
#include "xe_module.h"
+#include "xe_oa.h"
#include "xe_observation.h"
#include "xe_pat.h"
#include "xe_pcode.h"
@@ -55,6 +56,7 @@
#include "xe_ttm_sys_mgr.h"
#include "xe_vm.h"
#include "xe_vram.h"
+#include "xe_vsec.h"
#include "xe_wait_user_fence.h"
#include "xe_wa.h"
@@ -365,6 +367,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
goto err;
}
+ err = drmm_mutex_init(&xe->drm, &xe->pmt.lock);
+ if (err)
+ goto err;
+
err = xe_display_create(xe);
if (WARN_ON(err))
goto err;
@@ -759,6 +765,8 @@ int xe_device_probe(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_sanitize_freq(gt);
+ xe_vsec_init(xe);
+
return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
err_fini_display:
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index b9ea455d6f59..ace22e35e769 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -16,7 +16,7 @@
#include "xe_heci_gsc.h"
#include "xe_lmtt_types.h"
#include "xe_memirq_types.h"
-#include "xe_oa.h"
+#include "xe_oa_types.h"
#include "xe_platform_types.h"
#include "xe_pt_types.h"
#include "xe_sriov_types.h"
@@ -42,8 +42,6 @@ struct xe_pat_ops;
#define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100)
#define MEDIA_VERx100(xe) ((xe)->info.media_verx100)
#define IS_DGFX(xe) ((xe)->info.is_dgfx)
-#define HAS_HECI_GSCFI(xe) ((xe)->info.has_heci_gscfi)
-#define HAS_HECI_CSCFI(xe) ((xe)->info.has_heci_cscfi)
#define XE_VRAM_FLAGS_NEED64K BIT(0)
@@ -296,14 +294,24 @@ struct xe_device {
/** @info.va_bits: Maximum bits of a virtual address */
u8 va_bits;
- /** @info.is_dgfx: is discrete device */
- u8 is_dgfx:1;
- /** @info.has_asid: Has address space ID */
- u8 has_asid:1;
+ /*
+ * Keep all flags below alphabetically sorted
+ */
+
/** @info.force_execlist: Forced execlist submission */
u8 force_execlist:1;
+ /** @info.has_asid: Has address space ID */
+ u8 has_asid:1;
+ /** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */
+ u8 has_atomic_enable_pte_bit:1;
+ /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */
+ u8 has_device_atomics_on_smem:1;
/** @info.has_flat_ccs: Whether flat CCS metadata is used */
u8 has_flat_ccs:1;
+ /** @info.has_heci_cscfi: device has heci cscfi */
+ u8 has_heci_cscfi:1;
+ /** @info.has_heci_gscfi: device has heci gscfi */
+ u8 has_heci_gscfi:1;
/** @info.has_llc: Device has a shared CPU+GPU last level cache */
u8 has_llc:1;
/** @info.has_mmio_ext: Device has extra MMIO address range */
@@ -314,6 +322,8 @@ struct xe_device {
u8 has_sriov:1;
/** @info.has_usm: Device has unified shared memory support */
u8 has_usm:1;
+ /** @info.is_dgfx: is discrete device */
+ u8 is_dgfx:1;
/**
* @info.probe_display: Probe display hardware. If set to
* false, the driver will behave as if there is no display
@@ -323,20 +333,12 @@ struct xe_device {
* state the firmware or bootloader left it in.
*/
u8 probe_display:1;
+ /** @info.skip_guc_pc: Skip GuC based PM feature init */
+ u8 skip_guc_pc:1;
/** @info.skip_mtcfg: skip Multi-Tile configuration from MTCFG register */
u8 skip_mtcfg:1;
/** @info.skip_pcode: skip access to PCODE uC */
u8 skip_pcode:1;
- /** @info.has_heci_gscfi: device has heci gscfi */
- u8 has_heci_gscfi:1;
- /** @info.has_heci_cscfi: device has heci cscfi */
- u8 has_heci_cscfi:1;
- /** @info.skip_guc_pc: Skip GuC based PM feature init */
- u8 skip_guc_pc:1;
- /** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */
- u8 has_atomic_enable_pte_bit:1;
- /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */
- u8 has_device_atomics_on_smem:1;
} info;
/** @irq: device interrupt state */
@@ -345,7 +347,7 @@ struct xe_device {
spinlock_t lock;
/** @irq.enabled: interrupts enabled on this device */
- bool enabled;
+ atomic_t enabled;
} irq;
/** @ttm: ttm device */
@@ -374,6 +376,8 @@ struct xe_device {
/** @sriov.pf: PF specific data */
struct xe_device_pf pf;
+ /** @sriov.vf: VF specific data */
+ struct xe_device_vf vf;
/** @sriov.wq: workqueue used by the virtualization workers */
struct workqueue_struct *wq;
@@ -481,6 +485,12 @@ struct xe_device {
struct mutex lock;
} d3cold;
+ /** @pmt: Support the PMT driver callback interface */
+ struct {
+ /** @pmt.lock: protect access for telemetry data */
+ struct mutex lock;
+ } pmt;
+
/**
* @pm_callback_task: Track the active task that is running in either
* the runtime_suspend or runtime_resume callbacks.
@@ -588,7 +598,7 @@ struct xe_file {
/** @vm.xe: xarray to store VMs */
struct xarray xa;
/**
- * @vm.lock: Protects VM lookup + reference and removal a from
+ * @vm.lock: Protects VM lookup + reference and removal from
* file xarray. Not an intended to be an outer lock which does
* thing while being held.
*/
@@ -601,10 +611,15 @@ struct xe_file {
struct xarray xa;
/**
* @exec_queue.lock: Protects exec queue lookup + reference and
- * removal a frommfile xarray. Not an intended to be an outer
- * lock which does thing while being held.
+ * removal from file xarray. Not intended to be an outer lock
+ * which does things while being held.
*/
struct mutex lock;
+ /**
+ * @exec_queue.pending_removal: items pending to be removed to
+ * synchronize GPU state update with ongoing query.
+ */
+ atomic_t pending_removal;
} exec_queue;
/** @run_ticks: hw engine class run time in ticks for this drm client */
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 22f0f1a6dfd5..298a587da7f1 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -269,6 +269,49 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
}
}
+static struct xe_hw_engine *any_engine(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned long gt_id;
+
+ for_each_gt(gt, xe, gt_id) {
+ struct xe_hw_engine *hwe = xe_gt_any_hw_engine(gt);
+
+ if (hwe)
+ return hwe;
+ }
+
+ return NULL;
+}
+
+static bool force_wake_get_any_engine(struct xe_device *xe,
+ struct xe_hw_engine **phwe,
+ unsigned int *pfw_ref)
+{
+ enum xe_force_wake_domains domain;
+ unsigned int fw_ref;
+ struct xe_hw_engine *hwe;
+ struct xe_force_wake *fw;
+
+ hwe = any_engine(xe);
+ if (!hwe)
+ return false;
+
+ domain = xe_hw_engine_to_fw_domain(hwe);
+ fw = gt_to_fw(hwe->gt);
+
+ fw_ref = xe_force_wake_get(fw, domain);
+ if (!xe_force_wake_ref_has_domain(fw_ref, domain)) {
+ xe_force_wake_put(fw, fw_ref);
+ return false;
+ }
+
+ *phwe = hwe;
+ *pfw_ref = fw_ref;
+
+ return true;
+}
+
static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
{
unsigned long class, i, gt_id, capacity[XE_ENGINE_CLASS_MAX] = { };
@@ -280,7 +323,18 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
u64 gpu_timestamp;
unsigned int fw_ref;
+ /*
+ * Wait for any exec queue going away: their cycles will get updated on
+ * context switch out, so wait for that to happen
+ */
+ wait_var_event(&xef->exec_queue.pending_removal,
+ !atomic_read(&xef->exec_queue.pending_removal));
+
xe_pm_runtime_get(xe);
+ if (!force_wake_get_any_engine(xe, &hwe, &fw_ref)) {
+ xe_pm_runtime_put(xe);
+ return;
+ }
/* Accumulate all the exec queues from this client */
mutex_lock(&xef->exec_queue.lock);
@@ -295,33 +349,11 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
}
mutex_unlock(&xef->exec_queue.lock);
- /* Get the total GPU cycles */
- for_each_gt(gt, xe, gt_id) {
- enum xe_force_wake_domains fw;
-
- hwe = xe_gt_any_hw_engine(gt);
- if (!hwe)
- continue;
-
- fw = xe_hw_engine_to_fw_domain(hwe);
-
- fw_ref = xe_force_wake_get(gt_to_fw(gt), fw);
- if (!xe_force_wake_ref_has_domain(fw_ref, fw)) {
- hwe = NULL;
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- break;
- }
-
- gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- break;
- }
+ gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
+ xe_force_wake_put(gt_to_fw(hwe->gt), fw_ref);
xe_pm_runtime_put(xe);
- if (unlikely(!hwe))
- return;
-
for (class = 0; class < XE_ENGINE_CLASS_MAX; class++) {
const char *class_name;
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index fd0f3b3c9101..aab9e561153d 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -240,6 +240,7 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
return q;
}
+ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO);
void xe_exec_queue_destroy(struct kref *ref)
{
@@ -262,8 +263,11 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
/*
* Before releasing our ref to lrc and xef, accumulate our run ticks
+ * and wakeup any waiters.
*/
xe_exec_queue_update_run_ticks(q);
+ if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal))
+ wake_up_var(&q->xef->exec_queue.pending_removal);
for (i = 0; i < q->width; ++i)
xe_lrc_put(q->lrc[i]);
@@ -826,7 +830,10 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
mutex_lock(&xef->exec_queue.lock);
q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id);
+ if (q)
+ atomic_inc(&xef->exec_queue.pending_removal);
mutex_unlock(&xef->exec_queue.lock);
+
if (XE_IOCTL_DBG(xe, !q))
return -ENOENT;
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 558fac8bb6fb..05154f9de1a6 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -598,10 +598,10 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
u64 start;
u64 offset, pte;
- if (XE_WARN_ON(!bo->ggtt_node))
+ if (XE_WARN_ON(!bo->ggtt_node[ggtt->tile->id]))
return;
- start = bo->ggtt_node->base.start;
+ start = bo->ggtt_node[ggtt->tile->id]->base.start;
for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) {
pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index);
@@ -612,15 +612,16 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
u64 start, u64 end)
{
- int err;
u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE;
+ u8 tile_id = ggtt->tile->id;
+ int err;
if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
alignment = SZ_64K;
- if (XE_WARN_ON(bo->ggtt_node)) {
+ if (XE_WARN_ON(bo->ggtt_node[tile_id])) {
/* Someone's already inserted this BO in the GGTT */
- xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size);
+ xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size);
return 0;
}
@@ -630,19 +631,19 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile));
- bo->ggtt_node = xe_ggtt_node_init(ggtt);
- if (IS_ERR(bo->ggtt_node)) {
- err = PTR_ERR(bo->ggtt_node);
- bo->ggtt_node = NULL;
+ bo->ggtt_node[tile_id] = xe_ggtt_node_init(ggtt);
+ if (IS_ERR(bo->ggtt_node[tile_id])) {
+ err = PTR_ERR(bo->ggtt_node[tile_id]);
+ bo->ggtt_node[tile_id] = NULL;
goto out;
}
mutex_lock(&ggtt->lock);
- err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size,
- alignment, 0, start, end, 0);
+ err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base,
+ bo->size, alignment, 0, start, end, 0);
if (err) {
- xe_ggtt_node_fini(bo->ggtt_node);
- bo->ggtt_node = NULL;
+ xe_ggtt_node_fini(bo->ggtt_node[tile_id]);
+ bo->ggtt_node[tile_id] = NULL;
} else {
xe_ggtt_map_bo(ggtt, bo);
}
@@ -691,13 +692,15 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
*/
void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
{
- if (XE_WARN_ON(!bo->ggtt_node))
+ u8 tile_id = ggtt->tile->id;
+
+ if (XE_WARN_ON(!bo->ggtt_node[tile_id]))
return;
/* This BO is not currently in the GGTT */
- xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size);
+ xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size);
- xe_ggtt_node_remove(bo->ggtt_node,
+ xe_ggtt_node_remove(bo->ggtt_node[tile_id],
bo->flags & XE_BO_FLAG_GGTT_INVALIDATE);
}
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
index 64b2ae6839db..c250ea773491 100644
--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -71,8 +71,14 @@ static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched,
static inline
struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched)
{
- return list_first_entry_or_null(&sched->base.pending_list,
- struct xe_sched_job, drm.list);
+ struct xe_sched_job *job;
+
+ spin_lock(&sched->base.job_list_lock);
+ job = list_first_entry_or_null(&sched->base.pending_list,
+ struct xe_sched_job, drm.list);
+ spin_unlock(&sched->base.job_list_lock);
+
+ return job;
}
static inline int
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index fc64b45d324b..24cc6a4f9a96 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -139,17 +139,29 @@ static int proxy_send_to_gsc(struct xe_gsc *gsc, u32 size)
return 0;
}
-static int validate_proxy_header(struct xe_gsc_proxy_header *header,
+static int validate_proxy_header(struct xe_gt *gt,
+ struct xe_gsc_proxy_header *header,
u32 source, u32 dest, u32 max_size)
{
u32 type = FIELD_GET(GSC_PROXY_TYPE, header->hdr);
u32 length = FIELD_GET(GSC_PROXY_PAYLOAD_LENGTH, header->hdr);
+ int ret = 0;
- if (header->destination != dest || header->source != source)
- return -ENOEXEC;
+ if (header->destination != dest || header->source != source) {
+ ret = -ENOEXEC;
+ goto out;
+ }
- if (length + PROXY_HDR_SIZE > max_size)
- return -E2BIG;
+ if (length + PROXY_HDR_SIZE > max_size) {
+ ret = -E2BIG;
+ goto out;
+ }
+
+ /* We only care about the status if this is a message for the driver */
+ if (dest == GSC_PROXY_ADDRESSING_KMD && header->status != 0) {
+ ret = -EIO;
+ goto out;
+ }
switch (type) {
case GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD:
@@ -157,12 +169,20 @@ static int validate_proxy_header(struct xe_gsc_proxy_header *header,
break;
fallthrough;
case GSC_PROXY_MSG_TYPE_PROXY_INVALID:
- return -EIO;
+ ret = -EIO;
+ break;
default:
break;
}
- return 0;
+out:
+ if (ret)
+ xe_gt_err(gt,
+ "GSC proxy error: s=0x%x[0x%x], d=0x%x[0x%x], t=%u, l=0x%x, st=0x%x\n",
+ header->source, source, header->destination, dest,
+ type, length, header->status);
+
+ return ret;
}
#define proxy_header_wr(xe_, map_, offset_, field_, val_) \
@@ -228,12 +248,17 @@ static int proxy_query(struct xe_gsc *gsc)
xe_map_memcpy_from(xe, to_csme_hdr, &gsc->proxy.from_gsc,
reply_offset, PROXY_HDR_SIZE);
- /* stop if this was the last message */
- if (FIELD_GET(GSC_PROXY_TYPE, to_csme_hdr->hdr) == GSC_PROXY_MSG_TYPE_PROXY_END)
+ /* Check the status and stop if this was the last message */
+ if (FIELD_GET(GSC_PROXY_TYPE, to_csme_hdr->hdr) == GSC_PROXY_MSG_TYPE_PROXY_END) {
+ ret = validate_proxy_header(gt, to_csme_hdr,
+ GSC_PROXY_ADDRESSING_GSC,
+ GSC_PROXY_ADDRESSING_KMD,
+ GSC_PROXY_BUFFER_SIZE - reply_offset);
break;
+ }
/* make sure the GSC-to-CSME proxy header is sane */
- ret = validate_proxy_header(to_csme_hdr,
+ ret = validate_proxy_header(gt, to_csme_hdr,
GSC_PROXY_ADDRESSING_GSC,
GSC_PROXY_ADDRESSING_CSME,
GSC_PROXY_BUFFER_SIZE - reply_offset);
@@ -262,7 +287,7 @@ static int proxy_query(struct xe_gsc *gsc)
}
/* make sure the CSME-to-GSC proxy header is sane */
- ret = validate_proxy_header(gsc->proxy.from_csme,
+ ret = validate_proxy_header(gt, gsc->proxy.from_csme,
GSC_PROXY_ADDRESSING_CSME,
GSC_PROXY_ADDRESSING_GSC,
GSC_PROXY_BUFFER_SIZE - reply_offset);
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index d6744be01a68..41ab7fbebc19 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -748,10 +748,8 @@ static int do_gt_restart(struct xe_gt *gt)
if (err)
return err;
- for_each_hw_engine(hwe, gt, id) {
+ for_each_hw_engine(hwe, gt, id)
xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
- xe_reg_sr_apply_whitelist(hwe);
- }
/* Get CCS mode in sync between sw/hw */
xe_gt_apply_ccs_mode(gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h
index 5dc71394372d..11da0228cea7 100644
--- a/drivers/gpu/drm/xe/xe_gt_printk.h
+++ b/drivers/gpu/drm/xe/xe_gt_printk.h
@@ -60,6 +60,21 @@ static inline void __xe_gt_printfn_info(struct drm_printer *p, struct va_format
xe_gt_info(gt, "%pV", vaf);
}
+static inline void __xe_gt_printfn_dbg(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_gt *gt = p->arg;
+ struct drm_printer dbg;
+
+ /*
+ * The original xe_gt_dbg() callsite annotations are useless here,
+ * redirect to the tweaked drm_dbg_printer() instead.
+ */
+ dbg = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, NULL);
+ dbg.origin = p->origin;
+
+ drm_printf(&dbg, "GT%u: %pV", gt->info.id, vaf);
+}
+
/**
* xe_gt_err_printer - Construct a &drm_printer that outputs to xe_gt_err()
* @gt: the &xe_gt pointer to use in xe_gt_err()
@@ -90,4 +105,20 @@ static inline struct drm_printer xe_gt_info_printer(struct xe_gt *gt)
return p;
}
+/**
+ * xe_gt_dbg_printer - Construct a &drm_printer that outputs like xe_gt_dbg()
+ * @gt: the &xe_gt pointer to use in xe_gt_dbg()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_gt_dbg_printer(struct xe_gt *gt)
+{
+ struct drm_printer p = {
+ .printfn = __xe_gt_printfn_dbg,
+ .arg = gt,
+ .origin = (const void *)_THIS_IP_,
+ };
+ return p;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 192643d63d22..65082f12f1a8 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -207,6 +207,11 @@ static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u
return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, *preempt_timeout);
}
+static int pf_push_vf_cfg_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority)
+{
+ return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY, priority);
+}
+
static int pf_push_vf_cfg_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
{
return pf_push_vf_cfg_u64(gt, vfid, GUC_KLV_VF_CFG_LMEM_SIZE_KEY, size);
@@ -1540,8 +1545,6 @@ static u64 pf_query_max_lmem(struct xe_gt *gt)
#ifdef CONFIG_DRM_XE_DEBUG_SRIOV
#define MAX_FAIR_LMEM SZ_128M /* XXX: make it small for the driver bringup */
-#else
-#define MAX_FAIR_LMEM SZ_2G /* XXX: known issue with allocating BO over 2GiB */
#endif
static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs)
@@ -1767,6 +1770,77 @@ u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfi
return preempt_timeout;
}
+static const char *sched_priority_unit(u32 priority)
+{
+ return priority == GUC_SCHED_PRIORITY_LOW ? "(low)" :
+ priority == GUC_SCHED_PRIORITY_NORMAL ? "(normal)" :
+ priority == GUC_SCHED_PRIORITY_HIGH ? "(high)" :
+ "(?)";
+}
+
+static int pf_provision_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+ int err;
+
+ err = pf_push_vf_cfg_sched_priority(gt, vfid, priority);
+ if (unlikely(err))
+ return err;
+
+ config->sched_priority = priority;
+ return 0;
+}
+
+static int pf_get_sched_priority(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+
+ return config->sched_priority;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_sched_priority() - Configure scheduling priority.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @priority: requested scheduling priority
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority)
+{
+ int err;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ err = pf_provision_sched_priority(gt, vfid, priority);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_config_set_u32_done(gt, vfid, priority,
+ xe_gt_sriov_pf_config_get_sched_priority(gt, vfid),
+ "scheduling priority", sched_priority_unit, err);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_sched_priority - Get VF's scheduling priority.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ *
+ * Return: VF's (or PF's) scheduling priority.
+ */
+u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid)
+{
+ u32 priority;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ priority = pf_get_sched_priority(gt, vfid);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return priority;
+}
+
static void pf_reset_config_sched(struct xe_gt *gt, struct xe_gt_sriov_config *config)
{
lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
index 0c55aa40a1a7..f894e9d4abba 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
@@ -44,6 +44,9 @@ u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfi
int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
u32 preempt_timeout);
+u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority);
+
u32 xe_gt_sriov_pf_config_get_threshold(struct xe_gt *gt, unsigned int vfid,
enum xe_guc_klv_threshold_index index);
int xe_gt_sriov_pf_config_set_threshold(struct xe_gt *gt, unsigned int vfid,
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
index 2d3b73d78f14..686c7b3b6d7a 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
@@ -33,6 +33,8 @@ struct xe_gt_sriov_config {
u32 exec_quantum;
/** @preempt_timeout: preemption timeout in microseconds. */
u32 preempt_timeout;
+ /** @sched_priority: scheduling priority. */
+ u32 sched_priority;
/** @thresholds: GuC thresholds for adverse events notifications. */
u32 thresholds[XE_GUC_KLV_NUM_THRESHOLDS];
};
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
index 05df4ab3514b..b2521dd6ec42 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -164,6 +164,7 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent)
* │   │   ├── contexts_spare
* │   │   ├── exec_quantum_ms
* │   │   ├── preempt_timeout_us
+ * │   │   ├── sched_priority
* │   ├── vf1
* │   │   ├── ggtt_quota
* │   │   ├── lmem_quota
@@ -171,6 +172,7 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent)
* │   │   ├── contexts_quota
* │   │   ├── exec_quantum_ms
* │   │   ├── preempt_timeout_us
+ * │   │   ├── sched_priority
*/
#define DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(CONFIG, TYPE, FORMAT) \
@@ -209,6 +211,7 @@ DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ctxs, u32, "%llu\n");
DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(dbs, u32, "%llu\n");
DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(exec_quantum, u32, "%llu\n");
DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(preempt_timeout, u32, "%llu\n");
+DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(sched_priority, u32, "%llu\n");
/*
* /sys/kernel/debug/dri/0/
@@ -295,6 +298,8 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne
&exec_quantum_fops);
debugfs_create_file_unsafe("preempt_timeout_us", 0644, parent, parent,
&preempt_timeout_fops);
+ debugfs_create_file_unsafe("sched_priority", 0644, parent, parent,
+ &sched_priority_fops);
/* register all threshold attributes */
#define register_threshold_attribute(TAG, NAME, ...) \
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h
index 0bf12d89ceb2..6af219d93c3b 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h
@@ -18,7 +18,7 @@
* is within a range of supported VF numbers (up to maximum number of VFs that
* driver can support, including VF0 that represents the PF itself).
*
- * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information.
+ * Note: Effective only on debug builds. See `Xe Asserts`_ for more information.
*/
#define xe_gt_sriov_pf_assert_vfid(gt, vfid) xe_sriov_pf_assert_vfid(gt_to_xe(gt), (vfid))
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c
index fae5be5a2a11..c00fb354705f 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c
@@ -135,14 +135,33 @@ static int pf_update_policy_u32(struct xe_gt *gt, u16 key, u32 *policy, u32 valu
return 0;
}
+static void pf_bulk_reset_sched_priority(struct xe_gt *gt, u32 priority)
+{
+ unsigned int total_vfs = 1 + xe_gt_sriov_pf_get_totalvfs(gt);
+ unsigned int n;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ for (n = 0; n < total_vfs; n++)
+ gt->sriov.pf.vfs[n].config.sched_priority = priority;
+}
+
static int pf_provision_sched_if_idle(struct xe_gt *gt, bool enable)
{
+ int err;
+
xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
- return pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY,
- &gt->sriov.pf.policy.guc.sched_if_idle,
- enable);
+ err = pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY,
+ &gt->sriov.pf.policy.guc.sched_if_idle,
+ enable);
+
+ if (!err)
+ pf_bulk_reset_sched_priority(gt, enable ? GUC_SCHED_PRIORITY_NORMAL :
+ GUC_SCHED_PRIORITY_LOW);
+ return err;
}
static int pf_reprovision_sched_if_idle(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index d3baba50f085..cca5d5732802 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -27,6 +27,7 @@
#include "xe_guc_relay.h"
#include "xe_mmio.h"
#include "xe_sriov.h"
+#include "xe_sriov_vf.h"
#include "xe_uc_fw.h"
#include "xe_wopcm.h"
@@ -223,6 +224,44 @@ int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt)
return 0;
}
+static int guc_action_vf_notify_resfix_done(struct xe_guc *guc)
+{
+ u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE),
+ };
+ int ret;
+
+ ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request));
+
+ return ret > 0 ? -EPROTO : ret;
+}
+
+/**
+ * xe_gt_sriov_vf_notify_resfix_done - Notify GuC about resource fixups apply completed.
+ * @gt: the &xe_gt struct instance linked to target GuC
+ *
+ * Returns: 0 if the operation completed successfully, or a negative error
+ * code otherwise.
+ */
+int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt)
+{
+ struct xe_guc *guc = &gt->uc.guc;
+ int err;
+
+ xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+ err = guc_action_vf_notify_resfix_done(guc);
+ if (unlikely(err))
+ xe_gt_sriov_err(gt, "Failed to notify GuC about resource fixup done (%pe)\n",
+ ERR_PTR(err));
+ else
+ xe_gt_sriov_dbg_verbose(gt, "sent GuC resource fixup done\n");
+
+ return err;
+}
+
static int guc_action_query_single_klv(struct xe_guc *guc, u32 key,
u32 *value, u32 value_len)
{
@@ -692,6 +731,30 @@ failed:
return err;
}
+/**
+ * xe_gt_sriov_vf_migrated_event_handler - Start a VF migration recovery,
+ * or just mark that a GuC is ready for it.
+ * @gt: the &xe_gt struct instance linked to target GuC
+ *
+ * This function shall be called only by VF.
+ */
+void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ xe_gt_assert(gt, IS_SRIOV_VF(xe));
+
+ set_bit(gt->info.id, &xe->sriov.vf.migration.gt_flags);
+ /*
+ * We need to be certain that if all flags were set, at least one
+ * thread will notice that and schedule the recovery.
+ */
+ smp_mb__after_atomic();
+
+ xe_gt_sriov_info(gt, "ready for recovery after migration\n");
+ xe_sriov_vf_start_migration_recovery(xe);
+}
+
static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor)
{
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
index e541ce57bec2..912d20814261 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
@@ -17,6 +17,8 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt);
int xe_gt_sriov_vf_connect(struct xe_gt *gt);
int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt);
int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt);
+int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt);
+void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt);
u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt);
u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h
index 91d944f6c4e4..38325ef53617 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.h
+++ b/drivers/gpu/drm/xe/xe_gt_stats.h
@@ -6,15 +6,11 @@
#ifndef _XE_GT_STATS_H_
#define _XE_GT_STATS_H_
+#include "xe_gt_stats_types.h"
+
struct xe_gt;
struct drm_printer;
-enum xe_gt_stats_id {
- XE_GT_STATS_ID_TLB_INVAL,
- /* must be the last entry */
- __XE_GT_STATS_NUM_IDS,
-};
-
#ifdef CONFIG_DEBUG_FS
int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p);
void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr);
diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h
new file mode 100644
index 000000000000..2fc055e39f27
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_STATS_TYPES_H_
+#define _XE_GT_STATS_TYPES_H_
+
+enum xe_gt_stats_id {
+ XE_GT_STATS_ID_TLB_INVAL,
+ /* must be the last entry */
+ __XE_GT_STATS_NUM_IDS,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index 03b225364101..8db78d616b6f 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -8,6 +8,7 @@
#include <regs/xe_gt_regs.h>
#include "xe_device.h"
#include "xe_gt.h"
+#include "xe_gt_printk.h"
#include "xe_gt_sysfs.h"
#include "xe_gt_throttle.h"
#include "xe_mmio.h"
@@ -53,6 +54,7 @@ static u32 read_status(struct xe_gt *gt)
{
u32 status = xe_gt_throttle_get_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK;
+ xe_gt_dbg(gt, "throttle reasons: 0x%08x\n", status);
return status;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 3cb228c773cd..665927b80e9e 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -65,6 +65,14 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe
__invalidation_fence_signal(xe, fence);
}
+void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
+{
+ if (WARN_ON_ONCE(!fence->gt))
+ return;
+
+ __invalidation_fence_signal(gt_to_xe(fence->gt), fence);
+}
+
static void xe_gt_tlb_fence_timeout(struct work_struct *work)
{
struct xe_gt *gt = container_of(work, struct xe_gt,
@@ -253,9 +261,18 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt,
0, /* seqno, replaced in send_tlb_invalidation */
MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
};
+ int ret;
+
+ ret = send_tlb_invalidation(&gt->uc.guc, fence, action,
+ ARRAY_SIZE(action));
+ /*
+ * -ECANCELED indicates the CT is stopped for a GT reset. TLB caches
+ * should be nuked on a GT reset so this error can be ignored.
+ */
+ if (ret == -ECANCELED)
+ return 0;
- return send_tlb_invalidation(&gt->uc.guc, fence, action,
- ARRAY_SIZE(action));
+ return ret;
}
/**
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
index f430d5797af7..00b1c6c01e8d 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -28,6 +28,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence,
bool stack);
+void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence);
static inline void
xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index a287b98ee70b..6e66bf0e8b3f 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -11,10 +11,10 @@
#include "xe_gt_idle_types.h"
#include "xe_gt_sriov_pf_types.h"
#include "xe_gt_sriov_vf_types.h"
-#include "xe_gt_stats.h"
+#include "xe_gt_stats_types.h"
#include "xe_hw_engine_types.h"
#include "xe_hw_fence_types.h"
-#include "xe_oa.h"
+#include "xe_oa_types.h"
#include "xe_reg_sr_types.h"
#include "xe_sa_types.h"
#include "xe_uc_types.h"
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 7f704346a8f4..4e2868efb620 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -44,7 +44,15 @@ static u32 guc_bo_ggtt_addr(struct xe_guc *guc,
struct xe_bo *bo)
{
struct xe_device *xe = guc_to_xe(guc);
- u32 addr = xe_bo_ggtt_addr(bo);
+ u32 addr;
+
+ /*
+ * For most BOs, the address on the allocating tile is fine. However for
+ * some, e.g. G2G CTB, the address on a specific tile is required as it
+ * might be different for each tile. So, just always ask for the address
+ * on the target GuC.
+ */
+ addr = __xe_bo_ggtt_addr(bo, gt_to_tile(guc_to_gt(guc))->id);
/* GuC addresses above GUC_GGTT_TOP don't map through the GTT */
xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc)));
@@ -244,6 +252,293 @@ static void guc_write_params(struct xe_guc *guc)
xe_mmio_write32(&gt->mmio, SOFT_SCRATCH(1 + i), guc->params[i]);
}
+static int guc_action_register_g2g_buffer(struct xe_guc *guc, u32 type, u32 dst_tile, u32 dst_dev,
+ u32 desc_addr, u32 buff_addr, u32 size)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 action[] = {
+ XE_GUC_ACTION_REGISTER_G2G,
+ FIELD_PREP(XE_G2G_REGISTER_SIZE, size / SZ_4K - 1) |
+ FIELD_PREP(XE_G2G_REGISTER_TYPE, type) |
+ FIELD_PREP(XE_G2G_REGISTER_TILE, dst_tile) |
+ FIELD_PREP(XE_G2G_REGISTER_DEVICE, dst_dev),
+ desc_addr,
+ buff_addr,
+ };
+
+ xe_assert(xe, (type == XE_G2G_TYPE_IN) || (type == XE_G2G_TYPE_OUT));
+ xe_assert(xe, !(size % SZ_4K));
+
+ return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+static int guc_action_deregister_g2g_buffer(struct xe_guc *guc, u32 type, u32 dst_tile, u32 dst_dev)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 action[] = {
+ XE_GUC_ACTION_DEREGISTER_G2G,
+ FIELD_PREP(XE_G2G_DEREGISTER_TYPE, type) |
+ FIELD_PREP(XE_G2G_DEREGISTER_TILE, dst_tile) |
+ FIELD_PREP(XE_G2G_DEREGISTER_DEVICE, dst_dev),
+ };
+
+ xe_assert(xe, (type == XE_G2G_TYPE_IN) || (type == XE_G2G_TYPE_OUT));
+
+ return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+#define G2G_DEV(gt) (((gt)->info.type == XE_GT_TYPE_MAIN) ? 0 : 1)
+
+#define G2G_BUFFER_SIZE (SZ_4K)
+#define G2G_DESC_SIZE (64)
+#define G2G_DESC_AREA_SIZE (SZ_4K)
+
+/*
+ * Generate a unique id for each bi-directional CTB for each pair of
+ * near and far tiles/devices. The id can then be used as an index into
+ * a single allocation that is sub-divided into multiple CTBs.
+ *
+ * For example, with two devices per tile and two tiles, the table should
+ * look like:
+ * Far <tile>.<dev>
+ * 0.0 0.1 1.0 1.1
+ * N 0.0 --/-- 00/01 02/03 04/05
+ * e 0.1 01/00 --/-- 06/07 08/09
+ * a 1.0 03/02 07/06 --/-- 10/11
+ * r 1.1 05/04 09/08 11/10 --/--
+ *
+ * Where each entry is Rx/Tx channel id.
+ *
+ * So GuC #3 (tile 1, dev 1) talking to GuC #2 (tile 1, dev 0) would
+ * be reading from channel #11 and writing to channel #10. Whereas,
+ * GuC #2 talking to GuC #3 would be read on #10 and write to #11.
+ */
+static unsigned int g2g_slot(u32 near_tile, u32 near_dev, u32 far_tile, u32 far_dev,
+ u32 type, u32 max_inst, bool have_dev)
+{
+ u32 near = near_tile, far = far_tile;
+ u32 idx = 0, x, y, direction;
+ int i;
+
+ if (have_dev) {
+ near = (near << 1) | near_dev;
+ far = (far << 1) | far_dev;
+ }
+
+ /* No need to send to one's self */
+ if (far == near)
+ return -1;
+
+ if (far > near) {
+ /* Top right table half */
+ x = far;
+ y = near;
+
+ /* T/R is 'forwards' direction */
+ direction = type;
+ } else {
+ /* Bottom left table half */
+ x = near;
+ y = far;
+
+ /* B/L is 'backwards' direction */
+ direction = (1 - type);
+ }
+
+ /* Count the rows prior to the target */
+ for (i = y; i > 0; i--)
+ idx += max_inst - i;
+
+ /* Count this row up to the target */
+ idx += (x - 1 - y);
+
+ /* Slots are in Rx/Tx pairs */
+ idx *= 2;
+
+ /* Pick Rx/Tx direction */
+ idx += direction;
+
+ return idx;
+}
+
+static int guc_g2g_register(struct xe_guc *near_guc, struct xe_gt *far_gt, u32 type, bool have_dev)
+{
+ struct xe_gt *near_gt = guc_to_gt(near_guc);
+ struct xe_device *xe = gt_to_xe(near_gt);
+ struct xe_bo *g2g_bo;
+ u32 near_tile = gt_to_tile(near_gt)->id;
+ u32 near_dev = G2G_DEV(near_gt);
+ u32 far_tile = gt_to_tile(far_gt)->id;
+ u32 far_dev = G2G_DEV(far_gt);
+ u32 max = xe->info.gt_count;
+ u32 base, desc, buf;
+ int slot;
+
+ /* G2G is not allowed between different cards */
+ xe_assert(xe, xe == gt_to_xe(far_gt));
+
+ g2g_bo = near_guc->g2g.bo;
+ xe_assert(xe, g2g_bo);
+
+ slot = g2g_slot(near_tile, near_dev, far_tile, far_dev, type, max, have_dev);
+ xe_assert(xe, slot >= 0);
+
+ base = guc_bo_ggtt_addr(near_guc, g2g_bo);
+ desc = base + slot * G2G_DESC_SIZE;
+ buf = base + G2G_DESC_AREA_SIZE + slot * G2G_BUFFER_SIZE;
+
+ xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE);
+ xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= g2g_bo->size);
+
+ return guc_action_register_g2g_buffer(near_guc, type, far_tile, far_dev,
+ desc, buf, G2G_BUFFER_SIZE);
+}
+
+static void guc_g2g_deregister(struct xe_guc *guc, u32 far_tile, u32 far_dev, u32 type)
+{
+ guc_action_deregister_g2g_buffer(guc, type, far_tile, far_dev);
+}
+
+static u32 guc_g2g_size(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ unsigned int count = xe->info.gt_count;
+ u32 num_channels = (count * (count - 1)) / 2;
+
+ xe_assert(xe, num_channels * XE_G2G_TYPE_LIMIT * G2G_DESC_SIZE <= G2G_DESC_AREA_SIZE);
+
+ return num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE;
+}
+
+static bool xe_guc_g2g_wanted(struct xe_device *xe)
+{
+ /* Can't do GuC to GuC communication if there is only one GuC */
+ if (xe->info.gt_count <= 1)
+ return false;
+
+ /* No current user */
+ return false;
+}
+
+static int guc_g2g_alloc(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_bo *bo;
+ u32 g2g_size;
+
+ if (guc->g2g.bo)
+ return 0;
+
+ if (gt->info.id != 0) {
+ struct xe_gt *root_gt = xe_device_get_gt(xe, 0);
+ struct xe_guc *root_guc = &root_gt->uc.guc;
+ struct xe_bo *bo;
+
+ bo = xe_bo_get(root_guc->g2g.bo);
+ if (!bo)
+ return -ENODEV;
+
+ guc->g2g.bo = bo;
+ guc->g2g.owned = false;
+ return 0;
+ }
+
+ g2g_size = guc_g2g_size(guc);
+ bo = xe_managed_bo_create_pin_map(xe, tile, g2g_size,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_ALL |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size);
+ guc->g2g.bo = bo;
+ guc->g2g.owned = true;
+
+ return 0;
+}
+
+static void guc_g2g_fini(struct xe_guc *guc)
+{
+ if (!guc->g2g.bo)
+ return;
+
+ /* Unpinning the owned object is handled by generic shutdown */
+ if (!guc->g2g.owned)
+ xe_bo_put(guc->g2g.bo);
+
+ guc->g2g.bo = NULL;
+}
+
+static int guc_g2g_start(struct xe_guc *guc)
+{
+ struct xe_gt *far_gt, *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ unsigned int i, j;
+ int t, err;
+ bool have_dev;
+
+ if (!guc->g2g.bo) {
+ int ret;
+
+ ret = guc_g2g_alloc(guc);
+ if (ret)
+ return ret;
+ }
+
+ /* GuC interface will need extending if more GT device types are ever created. */
+ xe_gt_assert(gt, (gt->info.type == XE_GT_TYPE_MAIN) || (gt->info.type == XE_GT_TYPE_MEDIA));
+
+ /* Channel numbering depends on whether there are multiple GTs per tile */
+ have_dev = xe->info.gt_count > xe->info.tile_count;
+
+ for_each_gt(far_gt, xe, i) {
+ u32 far_tile, far_dev;
+
+ if (far_gt->info.id == gt->info.id)
+ continue;
+
+ far_tile = gt_to_tile(far_gt)->id;
+ far_dev = G2G_DEV(far_gt);
+
+ for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) {
+ err = guc_g2g_register(guc, far_gt, t, have_dev);
+ if (err) {
+ while (--t >= 0)
+ guc_g2g_deregister(guc, far_tile, far_dev, t);
+ goto err_deregister;
+ }
+ }
+ }
+
+ return 0;
+
+err_deregister:
+ for_each_gt(far_gt, xe, j) {
+ u32 tile, dev;
+
+ if (far_gt->info.id == gt->info.id)
+ continue;
+
+ if (j >= i)
+ break;
+
+ tile = gt_to_tile(far_gt)->id;
+ dev = G2G_DEV(far_gt);
+
+ for (t = 0; t < XE_G2G_TYPE_LIMIT; t++)
+ guc_g2g_deregister(guc, tile, dev, t);
+ }
+
+ return err;
+}
+
static void guc_fini_hw(void *arg)
{
struct xe_guc *guc = arg;
@@ -253,6 +548,8 @@ static void guc_fini_hw(void *arg)
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
xe_uc_fini_hw(&guc_to_gt(guc)->uc);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
+
+ guc_g2g_fini(guc);
}
/**
@@ -423,7 +720,16 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
int xe_guc_post_load_init(struct xe_guc *guc)
{
+ int ret;
+
xe_guc_ads_populate_post_load(&guc->ads);
+
+ if (xe_guc_g2g_wanted(guc_to_xe(guc))) {
+ ret = guc_g2g_start(guc);
+ if (ret)
+ return ret;
+ }
+
guc->submission_state.enabled = true;
return 0;
@@ -945,7 +1251,6 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT);
- xe_assert(xe, !xe_guc_ct_enabled(&guc->ct));
xe_assert(xe, len);
xe_assert(xe, len <= VF_SW_FLAG_COUNT);
xe_assert(xe, len <= MED_VF_SW_FLAG_COUNT);
@@ -1099,10 +1404,21 @@ int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val)
return guc_self_cfg(guc, key, 2, val);
}
+static void xe_guc_sw_0_irq_handler(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+
+ if (IS_SRIOV_VF(gt_to_xe(gt)))
+ xe_gt_sriov_vf_migrated_event_handler(gt);
+}
+
void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir)
{
if (iir & GUC_INTR_GUC2HOST)
xe_guc_ct_irq_handler(&guc->ct);
+
+ if (iir & GUC_INTR_SW_INT_0)
+ xe_guc_sw_0_irq_handler(guc);
}
void xe_guc_sanitize(struct xe_guc *guc)
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 4e746ae98888..943146e5b460 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -231,11 +231,6 @@ static size_t guc_ads_size(struct xe_guc_ads *ads)
guc_ads_private_data_size(ads);
}
-static bool needs_wa_1607983814(struct xe_device *xe)
-{
- return GRAPHICS_VERx100(xe) < 1250;
-}
-
static size_t calculate_regset_size(struct xe_gt *gt)
{
struct xe_reg_sr_entry *sr_entry;
@@ -250,7 +245,7 @@ static size_t calculate_regset_size(struct xe_gt *gt)
count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES;
- if (needs_wa_1607983814(gt_to_xe(gt)))
+ if (XE_WA(gt, 1607983814))
count += LNCFCMOCS_REG_COUNT;
return count * sizeof(struct guc_mmio_reg);
@@ -709,7 +704,6 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
struct iosys_map *regset_map,
struct xe_hw_engine *hwe)
{
- struct xe_device *xe = ads_to_xe(ads);
struct xe_hw_engine *hwe_rcs_reset_domain =
xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER);
struct xe_reg_sr_entry *entry;
@@ -740,8 +734,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
guc_mmio_regset_write_one(ads, regset_map, e->reg, count++);
}
- /* Wa_1607983814 */
- if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) {
+ if (XE_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) {
for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
guc_mmio_regset_write_one(ads, regset_map,
XELP_LNCFCMOCS(i), count++);
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
index d63912d28246..137571fae4ed 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture.c
+++ b/drivers/gpu/drm/xe/xe_guc_capture.c
@@ -1806,7 +1806,6 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm
if (!devcore_snapshot->matched_node)
return;
- xe_gt_assert(gt, snapshot->source <= XE_ENGINE_CAPTURE_SOURCE_GUC);
xe_gt_assert(gt, snapshot->hwe);
capture_class = xe_engine_class_to_guc_capture_class(snapshot->hwe->class);
@@ -1815,7 +1814,8 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm
snapshot->name ? snapshot->name : "",
snapshot->logical_instance);
drm_printf(p, "\tCapture_source: %s\n",
- snapshot->source == XE_ENGINE_CAPTURE_SOURCE_GUC ? "GuC" : "Manual");
+ devcore_snapshot->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC ?
+ "GuC" : "Manual");
drm_printf(p, "\tCoverage: %s\n", grptype[devcore_snapshot->matched_node->is_partial]);
drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
snapshot->forcewake.domain, snapshot->forcewake.ref);
@@ -1840,29 +1840,24 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm
}
/**
- * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the job.
- * @job: The job object.
+ * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the queue.
+ * @q: The exec queue object
*
- * Search within the capture outlist for the job, could be used for check if
- * GuC capture is ready for the job.
+ * Search within the capture outlist for the queue, could be used for check if
+ * GuC capture is ready for the queue.
* If found, the locked boolean of the node will be flagged.
*
* Returns: found guc-capture node ptr else NULL
*/
struct __guc_capture_parsed_output *
-xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job)
+xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q)
{
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
- struct xe_exec_queue *q;
struct xe_device *xe;
u16 guc_class = GUC_LAST_ENGINE_CLASS + 1;
struct xe_devcoredump_snapshot *ss;
- if (!job)
- return NULL;
-
- q = job->q;
if (!q || !q->gt)
return NULL;
@@ -1874,7 +1869,7 @@ xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job)
if (ss->matched_node && ss->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC)
return ss->matched_node;
- /* Find hwe for the job */
+ /* Find hwe for the queue */
for_each_hw_engine(hwe, q->gt, id) {
if (hwe != q->hwe)
continue;
@@ -1906,17 +1901,16 @@ xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job)
}
/**
- * xe_engine_snapshot_capture_for_job - Take snapshot of associated engine
- * @job: The job object
+ * xe_engine_snapshot_capture_for_queue - Take snapshot of associated engine
+ * @q: The exec queue object
*
* Take snapshot of associated HW Engine
*
* Returns: None.
*/
void
-xe_engine_snapshot_capture_for_job(struct xe_sched_job *job)
+xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q)
{
- struct xe_exec_queue *q = job->q;
struct xe_device *xe = gt_to_xe(q->gt);
struct xe_devcoredump *coredump = &xe->devcoredump;
struct xe_hw_engine *hwe;
@@ -1934,11 +1928,12 @@ xe_engine_snapshot_capture_for_job(struct xe_sched_job *job)
}
if (!coredump->snapshot.hwe[id]) {
- coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe, job);
+ coredump->snapshot.hwe[id] =
+ xe_hw_engine_snapshot_capture(hwe, q);
} else {
struct __guc_capture_parsed_output *new;
- new = xe_guc_capture_get_matching_and_lock(job);
+ new = xe_guc_capture_get_matching_and_lock(q);
if (new) {
struct xe_guc *guc = &q->gt->uc.guc;
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.h b/drivers/gpu/drm/xe/xe_guc_capture.h
index 97a795d13dd1..20a078dc4b85 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture.h
+++ b/drivers/gpu/drm/xe/xe_guc_capture.h
@@ -11,10 +11,10 @@
#include "xe_guc.h"
#include "xe_guc_fwif.h"
+struct xe_exec_queue;
struct xe_guc;
struct xe_hw_engine;
struct xe_hw_engine_snapshot;
-struct xe_sched_job;
static inline enum guc_capture_list_class_type xe_guc_class_to_capture_class(u16 class)
{
@@ -50,10 +50,10 @@ size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc);
const struct __guc_mmio_reg_descr_group *
xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type,
enum guc_capture_list_class_type capture_class, bool is_ext);
-struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job);
+struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q);
void xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot);
void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p);
-void xe_engine_snapshot_capture_for_job(struct xe_sched_job *job);
+void xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q);
void xe_guc_capture_steered_list_init(struct xe_guc *guc);
void xe_guc_capture_put_matched_nodes(struct xe_guc *guc);
int xe_guc_capture_init(struct xe_guc *guc);
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 8aeb1789805c..7d33f3a11e61 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -54,6 +54,7 @@ enum {
CT_DEAD_PARSE_G2H_UNKNOWN, /* 0x1000 */
CT_DEAD_PARSE_G2H_ORIGIN, /* 0x2000 */
CT_DEAD_PARSE_G2H_TYPE, /* 0x4000 */
+ CT_DEAD_CRASH, /* 0x8000 */
};
static void ct_dead_worker_func(struct work_struct *w);
@@ -469,8 +470,10 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
* after any existing dead state has been dumped.
*/
spin_lock_irq(&ct->dead.lock);
- if (ct->dead.reason)
+ if (ct->dead.reason) {
ct->dead.reason |= (1 << CT_DEAD_STATE_REARM);
+ queue_work(system_unbound_wq, &ct->dead.worker);
+ }
spin_unlock_irq(&ct->dead.lock);
#endif
@@ -1017,7 +1020,6 @@ retry_same_fence:
}
ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
-
if (!ret) {
LNL_FLUSH_WORK(&ct->g2h_worker);
if (g2h_fence.done) {
@@ -1121,6 +1123,24 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
return 0;
}
+static int guc_crash_process_msg(struct xe_guc_ct *ct, u32 action)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+
+ if (action == XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED)
+ xe_gt_err(gt, "GuC Crash dump notification\n");
+ else if (action == XE_GUC_ACTION_NOTIFY_EXCEPTION)
+ xe_gt_err(gt, "GuC Exception notification\n");
+ else
+ xe_gt_err(gt, "Unknown GuC crash notification: 0x%04X\n", action);
+
+ CT_DEAD(ct, NULL, CRASH);
+
+ kick_reset(ct);
+
+ return 0;
+}
+
static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
{
struct xe_gt *gt = ct_to_gt(ct);
@@ -1295,13 +1315,17 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
case GUC_ACTION_GUC2PF_ADVERSE_EVENT:
ret = xe_gt_sriov_pf_monitor_process_guc2pf(gt, hxg, hxg_len);
break;
+ case XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED:
+ case XE_GUC_ACTION_NOTIFY_EXCEPTION:
+ ret = guc_crash_process_msg(ct, action);
+ break;
default:
xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
}
if (ret) {
- xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
- action, ERR_PTR(ret));
+ xe_gt_err(gt, "G2H action %#04x failed (%pe) len %u msg %*ph\n",
+ action, ERR_PTR(ret), hxg_len, (int)sizeof(u32) * hxg_len, hxg);
CT_DEAD(ct, NULL, PROCESS_FAILED);
}
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 08ffe59f22fa..057153f89b30 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -17,6 +17,7 @@
#define G2H_LEN_DW_TLB_INVALIDATE 3
#define GUC_ID_MAX 65535
+#define GUC_ID_UNKNOWN 0xffffffff
#define GUC_CONTEXT_DISABLE 0
#define GUC_CONTEXT_ENABLE 1
diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
index 9d99fe266d97..146a6eda9e06 100644
--- a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
+++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
@@ -49,6 +49,8 @@ const char *xe_guc_klv_key_to_string(u16 key)
return "begin_db_id";
case GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY:
return "begin_ctx_id";
+ case GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY:
+ return "sched_priority";
/* VF CFG threshold keys */
#define define_threshold_key_to_string_case(TAG, NAME, ...) \
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 6f4a9812b4f4..9c36329fe857 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -412,12 +412,11 @@ static const int xe_exec_queue_prio_to_guc[] = {
static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
{
struct exec_queue_policy policy;
- struct xe_device *xe = guc_to_xe(guc);
enum xe_exec_queue_priority prio = q->sched_props.priority;
u32 timeslice_us = q->sched_props.timeslice_us;
u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
- xe_assert(xe, exec_queue_registered(q));
+ xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
__guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
@@ -451,12 +450,11 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc,
struct guc_ctxt_registration_info *info)
{
#define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
- struct xe_device *xe = guc_to_xe(guc);
u32 action[MAX_MLRC_REG_SIZE];
int len = 0;
int i;
- xe_assert(xe, xe_exec_queue_is_parallel(q));
+ xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q));
action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
action[len++] = info->flags;
@@ -479,7 +477,7 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc,
action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
}
- xe_assert(xe, len <= MAX_MLRC_REG_SIZE);
+ xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE);
#undef MAX_MLRC_REG_SIZE
xe_guc_ct_send(&guc->ct, action, len, 0, 0);
@@ -513,7 +511,7 @@ static void register_exec_queue(struct xe_exec_queue *q)
struct xe_lrc *lrc = q->lrc[0];
struct guc_ctxt_registration_info info;
- xe_assert(xe, !exec_queue_registered(q));
+ xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q));
memset(&info, 0, sizeof(info));
info.context_idx = q->guc->id;
@@ -603,7 +601,7 @@ static int wq_noop_append(struct xe_exec_queue *q)
if (wq_wait_for_space(q, wq_space_until_wrap(q)))
return -ENODEV;
- xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw));
+ xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw));
parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
@@ -643,13 +641,13 @@ static void wq_item_append(struct xe_exec_queue *q)
wqi[i++] = lrc->ring.tail / sizeof(u64);
}
- xe_assert(xe, i == wqi_size / sizeof(u32));
+ xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32));
iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
wq[q->guc->wqi_tail / sizeof(u32)]));
xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
q->guc->wqi_tail += wqi_size;
- xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE);
+ xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE);
xe_device_wmb(xe);
@@ -661,7 +659,6 @@ static void wq_item_append(struct xe_exec_queue *q)
static void submit_exec_queue(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
struct xe_lrc *lrc = q->lrc[0];
u32 action[3];
u32 g2h_len = 0;
@@ -669,7 +666,7 @@ static void submit_exec_queue(struct xe_exec_queue *q)
int len = 0;
bool extra_submit = false;
- xe_assert(xe, exec_queue_registered(q));
+ xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
if (xe_exec_queue_is_parallel(q))
wq_item_append(q);
@@ -716,12 +713,11 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
struct xe_sched_job *job = to_xe_sched_job(drm_job);
struct xe_exec_queue *q = job->q;
struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
struct dma_fence *fence = NULL;
bool lr = xe_exec_queue_is_lr(q);
- xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
- exec_queue_banned(q) || exec_queue_suspended(q));
+ xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
+ exec_queue_banned(q) || exec_queue_suspended(q));
trace_xe_sched_job_run(job);
@@ -823,7 +819,7 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
*/
void xe_guc_submit_wedge(struct xe_guc *guc)
{
- struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
struct xe_exec_queue *q;
unsigned long index;
int err;
@@ -833,7 +829,8 @@ void xe_guc_submit_wedge(struct xe_guc *guc)
err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
guc_submit_wedged_fini, guc);
if (err) {
- drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n");
+ xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; "
+ "Although device is wedged.\n");
return;
}
@@ -865,11 +862,10 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
container_of(w, struct xe_guc_exec_queue, lr_tdr);
struct xe_exec_queue *q = ge->q;
struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
struct xe_gpu_scheduler *sched = &ge->sched;
bool wedged;
- xe_assert(xe, xe_exec_queue_is_lr(q));
+ xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q));
trace_xe_exec_queue_lr_cleanup(q);
wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
@@ -903,13 +899,19 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
!exec_queue_pending_disable(q) ||
xe_guc_read_stopped(guc), HZ * 5);
if (!ret) {
- drm_warn(&xe->drm, "Schedule disable failed to respond");
+ xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n",
+ q->guc->id);
+ xe_devcoredump(q, NULL, "Schedule disable failed to respond, guc_id=%d\n",
+ q->guc->id);
xe_sched_submission_start(sched);
xe_gt_reset_async(q->gt);
return;
}
}
+ if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0]))
+ xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d", q->guc->id);
+
xe_sched_submission_start(sched);
}
@@ -1068,13 +1070,13 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
* do manual capture first and decide later if we need to use it
*/
if (!exec_queue_killed(q) && !xe->devcoredump.captured &&
- !xe_guc_capture_get_matching_and_lock(job)) {
+ !xe_guc_capture_get_matching_and_lock(q)) {
/* take force wake before engine register manual capture */
fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n");
- xe_engine_snapshot_capture_for_job(job);
+ xe_engine_snapshot_capture_for_queue(q);
xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
}
@@ -1132,7 +1134,12 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
if (!ret || xe_guc_read_stopped(guc)) {
trigger_reset:
if (!ret)
- xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond");
+ xe_gt_warn(guc_to_gt(guc),
+ "Schedule disable failed to respond, guc_id=%d",
+ q->guc->id);
+ xe_devcoredump(q, job,
+ "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d",
+ q->guc->id, ret, xe_guc_read_stopped(guc));
set_exec_queue_extra_ref(q);
xe_exec_queue_get(q); /* GT reset owns this */
set_exec_queue_banned(q);
@@ -1162,7 +1169,10 @@ trigger_reset:
trace_xe_sched_job_timedout(job);
if (!exec_queue_killed(q))
- xe_devcoredump(job);
+ xe_devcoredump(q, job,
+ "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx",
+ xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
+ q->guc->id, q->flags);
/*
* Kernel jobs should never fail, nor should VM jobs if they do
@@ -1277,9 +1287,8 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
{
struct xe_exec_queue *q = msg->private_data;
struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
- xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
+ xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
trace_xe_exec_queue_cleanup_entity(q);
if (exec_queue_registered(q))
@@ -1315,11 +1324,10 @@ static void __suspend_fence_signal(struct xe_exec_queue *q)
static void suspend_fence_signal(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
- xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) ||
- xe_guc_read_stopped(guc));
- xe_assert(xe, q->guc->suspend_pending);
+ xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) ||
+ xe_guc_read_stopped(guc));
+ xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending);
__suspend_fence_signal(q);
}
@@ -1415,12 +1423,11 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
{
struct xe_gpu_scheduler *sched;
struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
struct xe_guc_exec_queue *ge;
long timeout;
int err, i;
- xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc)));
+ xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc)));
ge = kzalloc(sizeof(*ge), GFP_KERNEL);
if (!ge)
@@ -1633,9 +1640,8 @@ static void guc_exec_queue_resume(struct xe_exec_queue *q)
struct xe_gpu_scheduler *sched = &q->guc->sched;
struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
- xe_assert(xe, !q->guc->suspend_pending);
+ xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending);
xe_sched_msg_lock(sched);
guc_exec_queue_try_add_msg(q, msg, RESUME);
@@ -1708,7 +1714,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
ban = true;
}
} else if (xe_exec_queue_is_lr(q) &&
- (xe_lrc_ring_head(q->lrc[0]) != xe_lrc_ring_tail(q->lrc[0]))) {
+ !xe_lrc_ring_is_idle(q->lrc[0])) {
ban = true;
}
@@ -1747,9 +1753,8 @@ void xe_guc_submit_stop(struct xe_guc *guc)
{
struct xe_exec_queue *q;
unsigned long index;
- struct xe_device *xe = guc_to_xe(guc);
- xe_assert(xe, xe_guc_read_stopped(guc) == 1);
+ xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1);
mutex_lock(&guc->submission_state.lock);
@@ -1791,9 +1796,8 @@ int xe_guc_submit_start(struct xe_guc *guc)
{
struct xe_exec_queue *q;
unsigned long index;
- struct xe_device *xe = guc_to_xe(guc);
- xe_assert(xe, xe_guc_read_stopped(guc) == 1);
+ xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1);
mutex_lock(&guc->submission_state.lock);
atomic_dec(&guc->submission_state.stopped);
@@ -1814,22 +1818,22 @@ int xe_guc_submit_start(struct xe_guc *guc)
static struct xe_exec_queue *
g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
{
- struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
struct xe_exec_queue *q;
if (unlikely(guc_id >= GUC_ID_MAX)) {
- drm_err(&xe->drm, "Invalid guc_id %u", guc_id);
+ xe_gt_err(gt, "Invalid guc_id %u\n", guc_id);
return NULL;
}
q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
if (unlikely(!q)) {
- drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id);
+ xe_gt_err(gt, "Not engine present for guc_id %u\n", guc_id);
return NULL;
}
- xe_assert(xe, guc_id >= q->guc->id);
- xe_assert(xe, guc_id < (q->guc->id + q->width));
+ xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id);
+ xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width));
return q;
}
@@ -1898,15 +1902,14 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
- struct xe_device *xe = guc_to_xe(guc);
struct xe_exec_queue *q;
- u32 guc_id = msg[0];
- u32 runnable_state = msg[1];
+ u32 guc_id, runnable_state;
- if (unlikely(len < 2)) {
- drm_err(&xe->drm, "Invalid length %u", len);
+ if (unlikely(len < 2))
return -EPROTO;
- }
+
+ guc_id = msg[0];
+ runnable_state = msg[1];
q = g2h_exec_queue_lookup(guc, guc_id);
if (unlikely(!q))
@@ -1940,14 +1943,13 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
- struct xe_device *xe = guc_to_xe(guc);
struct xe_exec_queue *q;
- u32 guc_id = msg[0];
+ u32 guc_id;
- if (unlikely(len < 1)) {
- drm_err(&xe->drm, "Invalid length %u", len);
+ if (unlikely(len < 1))
return -EPROTO;
- }
+
+ guc_id = msg[0];
q = g2h_exec_queue_lookup(guc, guc_id);
if (unlikely(!q))
@@ -1969,14 +1971,13 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
struct xe_gt *gt = guc_to_gt(guc);
- struct xe_device *xe = guc_to_xe(guc);
struct xe_exec_queue *q;
- u32 guc_id = msg[0];
+ u32 guc_id;
- if (unlikely(len < 1)) {
- drm_err(&xe->drm, "Invalid length %u", len);
+ if (unlikely(len < 1))
return -EPROTO;
- }
+
+ guc_id = msg[0];
q = g2h_exec_queue_lookup(guc, guc_id);
if (unlikely(!q))
@@ -2016,10 +2017,8 @@ int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
u32 status;
- if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) {
- xe_gt_dbg(guc_to_gt(guc), "Invalid length %u", len);
+ if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN))
return -EPROTO;
- }
status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
@@ -2034,13 +2033,21 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
u32 len)
{
struct xe_gt *gt = guc_to_gt(guc);
- struct xe_device *xe = guc_to_xe(guc);
struct xe_exec_queue *q;
- u32 guc_id = msg[0];
+ u32 guc_id;
- if (unlikely(len < 1)) {
- drm_err(&xe->drm, "Invalid length %u", len);
+ if (unlikely(len < 1))
return -EPROTO;
+
+ guc_id = msg[0];
+
+ if (guc_id == GUC_ID_UNKNOWN) {
+ /*
+ * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF
+ * context. In such case only PF will be notified about that fault.
+ */
+ xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n");
+ return 0;
}
q = g2h_exec_queue_lookup(guc, guc_id);
@@ -2062,24 +2069,22 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
- struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
u8 guc_class, instance;
u32 reason;
- if (unlikely(len != 3)) {
- drm_err(&xe->drm, "Invalid length %u", len);
+ if (unlikely(len != 3))
return -EPROTO;
- }
guc_class = msg[0];
instance = msg[1];
reason = msg[2];
/* Unexpected failure of a hardware feature, log an actual error */
- drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X",
- guc_class, instance, reason);
+ xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X",
+ guc_class, instance, reason);
- xe_gt_reset_async(guc_to_gt(guc));
+ xe_gt_reset_async(gt);
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index fa75f57bf5da..83a41ebcdc91 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -64,6 +64,15 @@ struct xe_guc {
struct xe_guc_pc pc;
/** @dbm: GuC Doorbell Manager */
struct xe_guc_db_mgr dbm;
+
+ /** @g2g: GuC to GuC communication state */
+ struct {
+ /** @g2g.bo: Storage for GuC to GuC communication channels */
+ struct xe_bo *bo;
+ /** @g2g.owned: Is the BO owned by this GT or just mapped in */
+ bool owned;
+ } g2g;
+
/** @submission_state: GuC submission state */
struct {
/** @submission_state.idm: GuC context ID Manager */
@@ -79,6 +88,7 @@ struct xe_guc {
/** @submission_state.fini_wq: submit fini wait queue */
wait_queue_head_t fini_wq;
} submission_state;
+
/** @hwconfig: Hardware config state */
struct {
/** @hwconfig.bo: buffer object of the hardware config */
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c
index 65b2e147c4b9..d765bfd3636b 100644
--- a/drivers/gpu/drm/xe/xe_heci_gsc.c
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.c
@@ -92,7 +92,7 @@ void xe_heci_gsc_fini(struct xe_device *xe)
{
struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
- if (!HAS_HECI_GSCFI(xe) && !HAS_HECI_CSCFI(xe))
+ if (!xe->info.has_heci_gscfi && !xe->info.has_heci_cscfi)
return;
if (heci_gsc->adev) {
@@ -177,7 +177,7 @@ void xe_heci_gsc_init(struct xe_device *xe)
const struct heci_gsc_def *def;
int ret;
- if (!HAS_HECI_GSCFI(xe) && !HAS_HECI_CSCFI(xe))
+ if (!xe->info.has_heci_gscfi && !xe->info.has_heci_cscfi)
return;
heci_gsc->irq = -1;
@@ -222,7 +222,7 @@ void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir)
if ((iir & GSC_IRQ_INTF(1)) == 0)
return;
- if (!HAS_HECI_GSCFI(xe)) {
+ if (!xe->info.has_heci_gscfi) {
drm_warn_once(&xe->drm, "GSC irq: not supported");
return;
}
@@ -242,7 +242,7 @@ void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir)
if ((iir & CSC_IRQ_INTF(1)) == 0)
return;
- if (!HAS_HECI_CSCFI(xe)) {
+ if (!xe->info.has_heci_cscfi) {
drm_warn_once(&xe->drm, "CSC irq: not supported");
return;
}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 1557acee3523..b19366744148 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -574,7 +574,6 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
- xe_reg_sr_apply_whitelist(hwe);
hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
@@ -829,7 +828,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
/**
* xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
* @hwe: Xe HW Engine.
- * @job: The job object.
+ * @q: The exec queue object.
*
* This can be printed out in a later stage like during dev_coredump
* analysis.
@@ -838,7 +837,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
* caller, using `xe_hw_engine_snapshot_free`.
*/
struct xe_hw_engine_snapshot *
-xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job)
+xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q)
{
struct xe_hw_engine_snapshot *snapshot;
struct __guc_capture_parsed_output *node;
@@ -864,15 +863,14 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job
if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
return snapshot;
- if (job) {
+ if (q) {
/* If got guc capture, set source to GuC */
- node = xe_guc_capture_get_matching_and_lock(job);
+ node = xe_guc_capture_get_matching_and_lock(q);
if (node) {
struct xe_device *xe = gt_to_xe(hwe->gt);
struct xe_devcoredump *coredump = &xe->devcoredump;
coredump->snapshot.matched_node = node;
- snapshot->source = XE_ENGINE_CAPTURE_SOURCE_GUC;
xe_gt_dbg(hwe->gt, "Found and locked GuC-err-capture node");
return snapshot;
}
@@ -880,7 +878,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job
/* otherwise, do manual capture */
xe_engine_manual_capture(hwe, snapshot);
- snapshot->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL;
xe_gt_dbg(hwe->gt, "Proceeding with manual engine snapshot");
return snapshot;
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
index da0a6922a26f..6b5f9fa2a594 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -11,7 +11,7 @@
struct drm_printer;
struct drm_xe_engine_class_instance;
struct xe_device;
-struct xe_sched_job;
+struct xe_exec_queue;
#ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN
#define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN
@@ -56,7 +56,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe);
u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
enum xe_engine_class engine_class);
struct xe_hw_engine_snapshot *
-xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job);
+xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q);
void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot);
void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p);
void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 719f27ef00a5..e14bee95e364 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -165,8 +165,6 @@ enum xe_hw_engine_snapshot_source_id {
struct xe_hw_engine_snapshot {
/** @name: name of the hw engine */
char *name;
- /** @source: Data source, either manual or GuC */
- enum xe_hw_engine_snapshot_source_id source;
/** @hwe: hw engine */
struct xe_hw_engine *hwe;
/** @logical_instance: logical instance of this hw engine */
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index b7995ebd54ab..1c509e66694d 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -192,7 +192,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) {
gsc_mask = irqs | GSC_ER_COMPLETE;
heci_mask = GSC_IRQ_INTF(1);
- } else if (HAS_HECI_GSCFI(xe)) {
+ } else if (xe->info.has_heci_gscfi) {
gsc_mask = GSC_IRQ_INTF(1);
}
@@ -325,7 +325,7 @@ static void gt_irq_handler(struct xe_tile *tile,
if (class == XE_ENGINE_CLASS_OTHER) {
/* HECI GSCFI interrupts come from outside of GT */
- if (HAS_HECI_GSCFI(xe) && instance == OTHER_GSC_INSTANCE)
+ if (xe->info.has_heci_gscfi && instance == OTHER_GSC_INSTANCE)
xe_heci_gsc_irq_handler(xe, intr_vec);
else
gt_other_irq_handler(engine_gt, instance, intr_vec);
@@ -348,12 +348,8 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg)
unsigned long intr_dw[2];
u32 identity[32];
- spin_lock(&xe->irq.lock);
- if (!xe->irq.enabled) {
- spin_unlock(&xe->irq.lock);
+ if (!atomic_read(&xe->irq.enabled))
return IRQ_NONE;
- }
- spin_unlock(&xe->irq.lock);
master_ctl = xelp_intr_disable(xe);
if (!master_ctl) {
@@ -417,12 +413,8 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
/* TODO: This really shouldn't be copied+pasted */
- spin_lock(&xe->irq.lock);
- if (!xe->irq.enabled) {
- spin_unlock(&xe->irq.lock);
+ if (!atomic_read(&xe->irq.enabled))
return IRQ_NONE;
- }
- spin_unlock(&xe->irq.lock);
master_tile_ctl = dg1_intr_disable(xe);
if (!master_tile_ctl) {
@@ -459,7 +451,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
* the primary tile.
*/
if (id == 0) {
- if (HAS_HECI_CSCFI(xe))
+ if (xe->info.has_heci_cscfi)
xe_heci_csc_irq_handler(xe, master_ctl);
xe_display_irq_handler(xe, master_ctl);
gu_misc_iir = gu_misc_irq_ack(xe, master_ctl);
@@ -508,7 +500,7 @@ static void gt_irq_reset(struct xe_tile *tile)
if ((tile->media_gt &&
xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) ||
- HAS_HECI_GSCFI(tile_to_xe(tile))) {
+ tile_to_xe(tile)->info.has_heci_gscfi) {
xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0);
xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0);
xe_mmio_write32(mmio, HECI2_RSVD_INTR_MASK, ~0);
@@ -644,12 +636,8 @@ static irqreturn_t vf_mem_irq_handler(int irq, void *arg)
struct xe_tile *tile;
unsigned int id;
- spin_lock(&xe->irq.lock);
- if (!xe->irq.enabled) {
- spin_unlock(&xe->irq.lock);
+ if (!atomic_read(&xe->irq.enabled))
return IRQ_NONE;
- }
- spin_unlock(&xe->irq.lock);
for_each_tile(tile, xe, id)
xe_memirq_handler(&tile->memirq);
@@ -674,10 +662,9 @@ static void irq_uninstall(void *arg)
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
int irq;
- if (!xe->irq.enabled)
+ if (!atomic_xchg(&xe->irq.enabled, 0))
return;
- xe->irq.enabled = false;
xe_irq_reset(xe);
irq = pci_irq_vector(pdev, 0);
@@ -724,7 +711,7 @@ int xe_irq_install(struct xe_device *xe)
return err;
}
- xe->irq.enabled = true;
+ atomic_set(&xe->irq.enabled, 1);
xe_irq_postinstall(xe);
@@ -744,9 +731,7 @@ void xe_irq_suspend(struct xe_device *xe)
{
int irq = to_pci_dev(xe->drm.dev)->irq;
- spin_lock_irq(&xe->irq.lock);
- xe->irq.enabled = false; /* no new irqs */
- spin_unlock_irq(&xe->irq.lock);
+ atomic_set(&xe->irq.enabled, 0); /* no new irqs */
synchronize_irq(irq); /* flush irqs */
xe_irq_reset(xe); /* turn irqs off */
@@ -762,7 +747,7 @@ void xe_irq_resume(struct xe_device *xe)
* 1. no irq will arrive before the postinstall
* 2. display is not yet resumed
*/
- xe->irq.enabled = true;
+ atomic_set(&xe->irq.enabled, 1);
xe_irq_reset(xe);
xe_irq_postinstall(xe); /* turn irqs on */
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 4f64c7f4e68d..22e58c6e2a35 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -25,6 +25,7 @@
#include "xe_map.h"
#include "xe_memirq.h"
#include "xe_sriov.h"
+#include "xe_trace_lrc.h"
#include "xe_vm.h"
#include "xe_wa.h"
@@ -1060,6 +1061,14 @@ u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
}
+static u32 xe_lrc_ring_start(struct xe_lrc *lrc)
+{
+ if (xe_lrc_has_indirect_ring_state(lrc))
+ return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START);
+ else
+ return xe_lrc_read_ctx_reg(lrc, CTX_RING_START);
+}
+
void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
{
if (xe_lrc_has_indirect_ring_state(lrc))
@@ -1635,10 +1644,12 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
xe_vm_get(lrc->bo->vm);
snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
+ snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc);
snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
snapshot->head = xe_lrc_ring_head(lrc);
snapshot->tail.internal = lrc->ring.tail;
snapshot->tail.memory = xe_lrc_ring_tail(lrc);
+ snapshot->start = xe_lrc_ring_start(lrc);
snapshot->start_seqno = xe_lrc_start_seqno(lrc);
snapshot->seqno = xe_lrc_seqno(lrc);
snapshot->lrc_bo = xe_bo_get(lrc->bo);
@@ -1692,11 +1703,14 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer
return;
drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
+ drm_printf(p, "\tHW Ring address: 0x%08x\n",
+ snapshot->ring_addr);
drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
snapshot->indirect_context_desc);
drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
snapshot->tail.internal, snapshot->tail.memory);
+ drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start);
drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
@@ -1758,5 +1772,20 @@ u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
+ trace_xe_lrc_update_timestamp(lrc, *old_ts);
+
return lrc->ctx_timestamp;
}
+
+/**
+ * xe_lrc_ring_is_idle() - LRC is idle
+ * @lrc: Pointer to the lrc.
+ *
+ * Compare LRC ring head and tail to determine if idle.
+ *
+ * Return: True is ring is idle, False otherwise
+ */
+bool xe_lrc_ring_is_idle(struct xe_lrc *lrc)
+{
+ return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc);
+}
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index 40d8f6906d3e..b459dcab8787 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -25,8 +25,10 @@ struct xe_lrc_snapshot {
unsigned long lrc_size, lrc_offset;
u32 context_desc;
+ u32 ring_addr;
u32 indirect_context_desc;
u32 head;
+ u32 start;
struct {
u32 internal;
u32 memory;
@@ -78,6 +80,8 @@ u32 xe_lrc_ring_head(struct xe_lrc *lrc);
u32 xe_lrc_ring_space(struct xe_lrc *lrc);
void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size);
+bool xe_lrc_ring_is_idle(struct xe_lrc *lrc);
+
u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc);
u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc);
u32 *xe_lrc_regs(struct xe_lrc *lrc);
diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h
index daf56c846d03..8a77c2423555 100644
--- a/drivers/gpu/drm/xe/xe_macros.h
+++ b/drivers/gpu/drm/xe/xe_macros.h
@@ -10,9 +10,13 @@
#define XE_WARN_ON WARN_ON
-#define XE_IOCTL_DBG(xe, cond) \
- ((cond) && (drm_dbg(&(xe)->drm, \
- "Ioctl argument check failed at %s:%d: %s", \
- __FILE__, __LINE__, #cond), 1))
+#define XE_IOCTL_DBG(xe, cond) ({ \
+ int cond__ = !!(cond); \
+ if (cond__) \
+ drm_dbg(&(xe)->drm, \
+ "Ioctl argument check failed at %s:%d: %s", \
+ __FILE__, __LINE__, #cond); \
+ cond__; \
+})
#endif
diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
index f833da88150a..404fa2a456d5 100644
--- a/drivers/gpu/drm/xe/xe_memirq.c
+++ b/drivers/gpu/drm/xe/xe_memirq.c
@@ -155,13 +155,6 @@ static const char *guc_name(struct xe_guc *guc)
*
*/
-static void __release_xe_bo(struct drm_device *drm, void *arg)
-{
- struct xe_bo *bo = arg;
-
- xe_bo_unpin_map_no_vm(bo);
-}
-
static inline bool hw_reports_to_instance_zero(struct xe_memirq *memirq)
{
/*
@@ -184,14 +177,12 @@ static int memirq_alloc_pages(struct xe_memirq *memirq)
BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET(0), SZ_64));
BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET(0), SZ_4K));
- /* XXX: convert to managed bo */
- bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE |
- XE_BO_FLAG_NEEDS_UC |
- XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ bo = xe_managed_bo_create_pin_map(xe, tile, bo_size,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_NEEDS_UC |
+ XE_BO_FLAG_NEEDS_CPU_ACCESS);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto out;
@@ -215,7 +206,7 @@ static int memirq_alloc_pages(struct xe_memirq *memirq)
xe_bo_ggtt_addr(bo), bo_size, XE_MEMIRQ_SOURCE_OFFSET(0),
XE_MEMIRQ_STATUS_OFFSET(0));
- return drmm_add_action_or_reset(&xe->drm, __release_xe_bo, memirq->bo);
+ return 0;
out:
memirq_err(memirq, "Failed to allocate memirq page (%pe)\n", ERR_PTR(err));
@@ -442,6 +433,9 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat
if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name))
xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST);
+
+ if (memirq_received(memirq, status, ilog2(GUC_INTR_SW_INT_0), name))
+ xe_guc_irq_handler(guc, GUC_INTR_SW_INT_0);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index bfc3deebdaa2..07b27114be9a 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -19,7 +19,7 @@
struct xe_modparam xe_modparam = {
.probe_display = true,
- .guc_log_level = 5,
+ .guc_log_level = 3,
.force_probe = CONFIG_DRM_XE_FORCE_PROBE,
.wedged_mode = 1,
/* the rest are 0 by default */
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 8dd55798ab31..ec88b18e9baa 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -96,6 +96,7 @@ struct xe_oa_open_param {
struct drm_xe_sync __user *syncs_user;
int num_syncs;
struct xe_sync_entry *syncs;
+ size_t oa_buffer_size;
};
struct xe_oa_config_bo {
@@ -403,11 +404,19 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
{
- struct xe_mmio *mmio = &stream->gt->mmio;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
- u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT;
+ int size_exponent = __ffs(stream->oa_buffer.bo->size);
+ u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT;
+ struct xe_mmio *mmio = &stream->gt->mmio;
unsigned long flags;
+ /*
+ * If oa buffer size is more than 16MB (exponent greater than 24), the
+ * oa buffer size field is multiplied by 8 in xe_oa_enable_metric_set.
+ */
+ oa_buf |= REG_FIELD_PREP(OABUFFER_SIZE_MASK,
+ size_exponent > 24 ? size_exponent - 20 : size_exponent - 17);
+
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0);
@@ -901,15 +910,12 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
xe_file_put(stream->xef);
}
-static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream)
+static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
{
struct xe_bo *bo;
- BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE);
- BUILD_BUG_ON(XE_OA_BUFFER_SIZE < SZ_128K || XE_OA_BUFFER_SIZE > SZ_16M);
-
bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL,
- XE_OA_BUFFER_SIZE, ttm_bo_type_kernel,
+ size, ttm_bo_type_kernel,
XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -1087,6 +1093,13 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream)
0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
}
+static u32 oag_buf_size_select(const struct xe_oa_stream *stream)
+{
+ return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT,
+ stream->oa_buffer.bo->size > SZ_16M ?
+ OAG_OA_DEBUG_BUF_SIZE_SELECT : 0);
+}
+
static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
{
struct xe_mmio *mmio = &stream->gt->mmio;
@@ -1119,6 +1132,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug,
_MASKED_BIT_ENABLE(oa_debug) |
oag_report_ctx_switches(stream) |
+ oag_buf_size_select(stream) |
oag_configure_mmio_trigger(stream, true));
xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
@@ -1260,6 +1274,17 @@ static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value,
return 0;
}
+static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ if (!is_power_of_2(value) || value < SZ_128K || value > SZ_128M) {
+ drm_dbg(&oa->xe->drm, "OA buffer size invalid %llu\n", value);
+ return -EINVAL;
+ }
+ param->oa_buffer_size = value;
+ return 0;
+}
+
static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value,
struct xe_oa_open_param *param)
{
@@ -1280,6 +1305,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = {
[DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt,
[DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
[DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
+ [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size,
};
static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
@@ -1294,6 +1320,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
[DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval,
[DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
[DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
+ [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval,
};
static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from,
@@ -1553,7 +1580,7 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg)
static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg)
{
- struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, };
+ struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, };
void __user *uaddr = (void __user *)arg;
if (copy_to_user(uaddr, &info, sizeof(info)))
@@ -1639,7 +1666,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma)
}
/* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */
- if (vma->vm_end - vma->vm_start != XE_OA_BUFFER_SIZE) {
+ if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) {
drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n");
return -EINVAL;
}
@@ -1783,9 +1810,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
stream->oa_buffer.circ_size =
- XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
+ param->oa_buffer_size -
+ param->oa_buffer_size % stream->oa_buffer.format->size;
else
- stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
+ stream->oa_buffer.circ_size = param->oa_buffer_size;
if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
/* If we don't find the context offset, just return error */
@@ -1828,7 +1856,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
goto err_fw_put;
}
- ret = xe_oa_alloc_oa_buffer(stream);
+ ret = xe_oa_alloc_oa_buffer(stream, param->oa_buffer_size);
if (ret)
goto err_fw_put;
@@ -2125,6 +2153,9 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz);
}
+ if (!param.oa_buffer_size)
+ param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE;
+
ret = xe_oa_parse_syncs(oa, &param);
if (ret)
goto err_exec_q;
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index fea9d981e414..df7793915628 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -15,7 +15,7 @@
#include "regs/xe_reg_defs.h"
#include "xe_hw_engine_types.h"
-#define XE_OA_BUFFER_SIZE SZ_16M
+#define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M
enum xe_oa_report_header {
HDR_32_BIT = 0,
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 40f7c844ed44..80699dbeb2e9 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -738,9 +738,6 @@ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
xe->d3cold.allowed = false;
mutex_unlock(&xe->d3cold.lock);
-
- drm_dbg(&xe->drm,
- "d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed));
}
/**
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index f27f579f4d85..65c3c1688710 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -136,6 +136,7 @@ err_kfree:
xe_pt_free(pt);
return ERR_PTR(err);
}
+ALLOW_ERROR_INJECTION(xe_pt_create, ERRNO);
/**
* xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero
@@ -1333,8 +1334,7 @@ static void invalidation_fence_cb(struct dma_fence *fence,
queue_work(system_wq, &ifence->work);
} else {
ifence->base.base.error = ifence->fence->error;
- dma_fence_signal(&ifence->base.base);
- dma_fence_put(&ifence->base.base);
+ xe_gt_tlb_invalidation_fence_signal(&ifence->base);
}
dma_fence_put(ifence->fence);
}
@@ -1851,6 +1851,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO);
static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
@@ -2131,6 +2132,7 @@ kill_vm_tile1:
return ERR_PTR(err);
}
+ALLOW_ERROR_INJECTION(xe_pt_update_ops_run, ERRNO);
/**
* xe_pt_update_ops_fini() - Finish PT update operations
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 170ae72d1a7b..d2a816f71bf2 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -23,6 +23,7 @@
#include "xe_guc_hwconfig.h"
#include "xe_macros.h"
#include "xe_mmio.h"
+#include "xe_oa.h"
#include "xe_ttm_vram_mgr.h"
#include "xe_wa.h"
@@ -670,7 +671,8 @@ static int query_oa_units(struct xe_device *xe,
du->oa_unit_id = u->oa_unit_id;
du->oa_unit_type = u->type;
du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt);
- du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS;
+ du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS |
+ DRM_XE_OA_CAPS_OA_BUFFER_SIZE;
j = 0;
for_each_hw_engine(hwe, gt, hwe_id) {
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index e1a0e27cda14..9475e3f74958 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -24,49 +24,29 @@
#include "xe_hw_engine_types.h"
#include "xe_macros.h"
#include "xe_mmio.h"
-#include "xe_reg_whitelist.h"
#include "xe_rtp_types.h"
-#define XE_REG_SR_GROW_STEP_DEFAULT 16
-
static void reg_sr_fini(struct drm_device *drm, void *arg)
{
struct xe_reg_sr *sr = arg;
+ struct xe_reg_sr_entry *entry;
+ unsigned long reg;
+
+ xa_for_each(&sr->xa, reg, entry)
+ kfree(entry);
xa_destroy(&sr->xa);
- kfree(sr->pool.arr);
- memset(&sr->pool, 0, sizeof(sr->pool));
}
int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe)
{
xa_init(&sr->xa);
- memset(&sr->pool, 0, sizeof(sr->pool));
- sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT;
sr->name = name;
return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr);
}
EXPORT_SYMBOL_IF_KUNIT(xe_reg_sr_init);
-static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr)
-{
- if (sr->pool.used == sr->pool.allocated) {
- struct xe_reg_sr_entry *arr;
-
- arr = krealloc_array(sr->pool.arr,
- ALIGN(sr->pool.allocated + 1, sr->pool.grow_step),
- sizeof(*arr), GFP_KERNEL);
- if (!arr)
- return NULL;
-
- sr->pool.arr = arr;
- sr->pool.allocated += sr->pool.grow_step;
- }
-
- return &sr->pool.arr[sr->pool.used++];
-}
-
static bool compatible_entries(const struct xe_reg_sr_entry *e1,
const struct xe_reg_sr_entry *e2)
{
@@ -112,7 +92,7 @@ int xe_reg_sr_add(struct xe_reg_sr *sr,
return 0;
}
- pentry = alloc_entry(sr);
+ pentry = kmalloc(sizeof(*pentry), GFP_KERNEL);
if (!pentry) {
ret = -ENOMEM;
goto fail;
@@ -211,58 +191,6 @@ err_force_wake:
xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n");
}
-void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
-{
- struct xe_reg_sr *sr = &hwe->reg_whitelist;
- struct xe_gt *gt = hwe->gt;
- struct xe_device *xe = gt_to_xe(gt);
- struct xe_reg_sr_entry *entry;
- struct drm_printer p;
- u32 mmio_base = hwe->mmio_base;
- unsigned long reg;
- unsigned int slot = 0;
- unsigned int fw_ref;
-
- if (xa_empty(&sr->xa))
- return;
-
- drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name);
-
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
- goto err_force_wake;
-
- p = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL);
- xa_for_each(&sr->xa, reg, entry) {
- if (slot == RING_MAX_NONPRIV_SLOTS) {
- xe_gt_err(gt,
- "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n",
- hwe->name, RING_MAX_NONPRIV_SLOTS);
- break;
- }
-
- xe_reg_whitelist_print_entry(&p, 0, reg, entry);
- xe_mmio_write32(&gt->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot),
- reg | entry->set_bits);
- slot++;
- }
-
- /* And clear the rest just in case of garbage */
- for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) {
- u32 addr = RING_NOPID(mmio_base).addr;
-
- xe_mmio_write32(&gt->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr);
- }
-
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
-
- return;
-
-err_force_wake:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- drm_err(&xe->drm, "Failed to apply, err=-ETIMEDOUT\n");
-}
-
/**
* xe_reg_sr_dump - print all save/restore entries
* @sr: Save/restore entries
diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h
index ad48a52b824a..ebe11f237fa2 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr_types.h
+++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h
@@ -20,12 +20,6 @@ struct xe_reg_sr_entry {
};
struct xe_reg_sr {
- struct {
- struct xe_reg_sr_entry *arr;
- unsigned int used;
- unsigned int allocated;
- unsigned int grow_step;
- } pool;
struct xarray xa;
const char *name;
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 3996934974fa..edab5d4e3ba5 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -10,7 +10,9 @@
#include "regs/xe_oa_regs.h"
#include "regs/xe_regs.h"
#include "xe_gt_types.h"
+#include "xe_gt_printk.h"
#include "xe_platform_types.h"
+#include "xe_reg_sr.h"
#include "xe_rtp.h"
#include "xe_step.h"
@@ -89,6 +91,40 @@ static const struct xe_rtp_entry_sr register_whitelist[] = {
{}
};
+static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe)
+{
+ struct xe_reg_sr *sr = &hwe->reg_whitelist;
+ struct xe_reg_sr_entry *entry;
+ struct drm_printer p;
+ unsigned long reg;
+ unsigned int slot;
+
+ xe_gt_dbg(hwe->gt, "Add %s whitelist to engine\n", sr->name);
+ p = xe_gt_dbg_printer(hwe->gt);
+
+ slot = 0;
+ xa_for_each(&sr->xa, reg, entry) {
+ struct xe_reg_sr_entry hwe_entry = {
+ .reg = RING_FORCE_TO_NONPRIV(hwe->mmio_base, slot),
+ .set_bits = entry->reg.addr | entry->set_bits,
+ .clr_bits = ~0u,
+ .read_mask = entry->read_mask,
+ };
+
+ if (slot == RING_MAX_NONPRIV_SLOTS) {
+ xe_gt_err(hwe->gt,
+ "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n",
+ hwe->name, RING_MAX_NONPRIV_SLOTS);
+ break;
+ }
+
+ xe_reg_whitelist_print_entry(&p, 0, reg, entry);
+ xe_reg_sr_add(&hwe->reg_sr, &hwe_entry, hwe->gt);
+
+ slot++;
+ }
+}
+
/**
* xe_reg_whitelist_process_engine - process table of registers to whitelist
* @hwe: engine instance to process whitelist for
@@ -102,6 +138,7 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe)
struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist);
+ whitelist_apply_to_hwe(hwe);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index ef10782af656..04e2f539ccd9 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -14,6 +14,7 @@
#include "xe_mmio.h"
#include "xe_sriov.h"
#include "xe_sriov_pf.h"
+#include "xe_sriov_vf.h"
/**
* xe_sriov_mode_to_string - Convert enum value to string.
@@ -114,6 +115,9 @@ int xe_sriov_init(struct xe_device *xe)
return err;
}
+ if (IS_SRIOV_VF(xe))
+ xe_sriov_vf_init_early(xe);
+
xe_assert(xe, !xe->sriov.wq);
xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0);
if (!xe->sriov.wq)
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h
index 7d156ba82479..dd1df950b021 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h
@@ -20,7 +20,7 @@
* is within a range of supported VF numbers (up to maximum number of VFs that
* driver can support, including VF0 that represents the PF itself).
*
- * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information.
+ * Note: Effective only on debug builds. See `Xe Asserts`_ for more information.
*/
#define xe_sriov_pf_assert_vfid(xe, vfid) \
xe_assert((xe), (vfid) <= xe_sriov_pf_get_totalvfs(xe))
diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h
index c7b7ad4af5c8..ca94382a721e 100644
--- a/drivers/gpu/drm/xe/xe_sriov_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_types.h
@@ -9,6 +9,7 @@
#include <linux/build_bug.h>
#include <linux/mutex.h>
#include <linux/types.h>
+#include <linux/workqueue_types.h>
/**
* VFID - Virtual Function Identifier
@@ -56,4 +57,20 @@ struct xe_device_pf {
struct mutex master_lock;
};
+/**
+ * struct xe_device_vf - Xe Virtual Function related data
+ *
+ * The data in this structure is valid only if driver is running in the
+ * @XE_SRIOV_MODE_VF mode.
+ */
+struct xe_device_vf {
+ /** @migration: VF Migration state data */
+ struct {
+ /** @migration.worker: VF migration recovery worker */
+ struct work_struct worker;
+ /** @migration.gt_flags: Per-GT request flags for VF migration recovery */
+ unsigned long gt_flags;
+ } migration;
+};
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
new file mode 100644
index 000000000000..c1275e64aa9c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_gt_sriov_vf.h"
+#include "xe_pm.h"
+#include "xe_sriov.h"
+#include "xe_sriov_printk.h"
+#include "xe_sriov_vf.h"
+
+/**
+ * DOC: VF restore procedure in PF KMD and VF KMD
+ *
+ * Restoring previously saved state of a VF is one of core features of
+ * SR-IOV. All major VM Management applications allow saving and restoring
+ * the VM state, and doing that to a VM which uses SRIOV VF as one of
+ * the accessible devices requires support from KMD on both PF and VF side.
+ * VMM initiates all required operations through VFIO module, which then
+ * translates them into PF KMD calls. This description will focus on these
+ * calls, leaving out the module which initiates these steps (VFIO).
+ *
+ * In order to start the restore procedure, GuC needs to keep the VF in
+ * proper state. The PF driver can ensure GuC set it to VF_READY state
+ * by provisioning the VF, which in turn can be done after Function Level
+ * Reset of said VF (or after it was freshly created - in that case FLR
+ * is not needed). The FLR procedure ends with GuC sending message
+ * `GUC_PF_NOTIFY_VF_FLR_DONE`, and then provisioning data is sent to GuC.
+ * After the provisioning is completed, the VF needs to be paused, and
+ * at that point the actual restore can begin.
+ *
+ * During VF Restore, state of several resources is restored. These may
+ * include local memory content (system memory is restored by VMM itself),
+ * values of MMIO registers, stateless compression metadata and others.
+ * The final resource which also needs restoring is state of the VF
+ * submission maintained within GuC. For that, `GUC_PF_OPCODE_VF_RESTORE`
+ * message is used, with reference to the state blob to be consumed by
+ * GuC.
+ *
+ * Next, when VFIO is asked to set the VM into running state, the PF driver
+ * sends `GUC_PF_TRIGGER_VF_RESUME` to GuC. When sent after restore, this
+ * changes VF state within GuC to `VF_RESFIX_BLOCKED` rather than the
+ * usual `VF_RUNNING`. At this point GuC triggers an interrupt to inform
+ * the VF KMD within the VM that it was migrated.
+ *
+ * As soon as Virtual GPU of the VM starts, the VF driver within receives
+ * the MIGRATED interrupt and schedules post-migration recovery worker.
+ * That worker queries GuC for new provisioning (using MMIO communication),
+ * and applies fixups to any non-virtualized resources used by the VF.
+ *
+ * When the VF driver is ready to continue operation on the newly connected
+ * hardware, it sends `VF2GUC_NOTIFY_RESFIX_DONE` which causes it to
+ * enter the long awaited `VF_RUNNING` state, and therefore start handling
+ * CTB messages and scheduling workloads from the VF::
+ *
+ * PF GuC VF
+ * [ ] | |
+ * [ ] PF2GUC_VF_CONTROL(pause) | |
+ * [ ]---------------------------> [ ] |
+ * [ ] [ ] GuC sets new VF state to |
+ * [ ] [ ]------- VF_READY_PAUSED |
+ * [ ] [ ] | |
+ * [ ] [ ] <----- |
+ * [ ] success [ ] |
+ * [ ] <---------------------------[ ] |
+ * [ ] | |
+ * [ ] PF loads resources from the | |
+ * [ ]------- saved image supplied | |
+ * [ ] | | |
+ * [ ] <----- | |
+ * [ ] | |
+ * [ ] GUC_PF_OPCODE_VF_RESTORE | |
+ * [ ]---------------------------> [ ] |
+ * [ ] [ ] GuC loads contexts and CTB |
+ * [ ] [ ]------- state from image |
+ * [ ] [ ] | |
+ * [ ] [ ] <----- |
+ * [ ] [ ] |
+ * [ ] [ ] GuC sets new VF state to |
+ * [ ] [ ]------- VF_RESFIX_PAUSED |
+ * [ ] [ ] | |
+ * [ ] success [ ] <----- |
+ * [ ] <---------------------------[ ] |
+ * [ ] | |
+ * [ ] GUC_PF_TRIGGER_VF_RESUME | |
+ * [ ]---------------------------> [ ] |
+ * [ ] [ ] GuC sets new VF state to |
+ * [ ] [ ]------- VF_RESFIX_BLOCKED |
+ * [ ] [ ] | |
+ * [ ] [ ] <----- |
+ * [ ] [ ] |
+ * [ ] [ ] GUC_INTR_SW_INT_0 |
+ * [ ] success [ ]---------------------------> [ ]
+ * [ ] <---------------------------[ ] [ ]
+ * | | VF2GUC_QUERY_SINGLE_KLV [ ]
+ * | [ ] <---------------------------[ ]
+ * | [ ] [ ]
+ * | [ ] new VF provisioning [ ]
+ * | [ ]---------------------------> [ ]
+ * | | [ ]
+ * | | VF driver applies post [ ]
+ * | | migration fixups -------[ ]
+ * | | | [ ]
+ * | | -----> [ ]
+ * | | [ ]
+ * | | VF2GUC_NOTIFY_RESFIX_DONE [ ]
+ * | [ ] <---------------------------[ ]
+ * | [ ] [ ]
+ * | [ ] GuC sets new VF state to [ ]
+ * | [ ]------- VF_RUNNING [ ]
+ * | [ ] | [ ]
+ * | [ ] <----- [ ]
+ * | [ ] success [ ]
+ * | [ ]---------------------------> [ ]
+ * | | |
+ * | | |
+ */
+
+static void migration_worker_func(struct work_struct *w);
+
+/**
+ * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data.
+ * @xe: the &xe_device to initialize
+ */
+void xe_sriov_vf_init_early(struct xe_device *xe)
+{
+ INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
+}
+
+/**
+ * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning.
+ * @xe: the &xe_device struct instance
+ *
+ * After migration, we need to re-query all VF configuration to make sure
+ * they match previous provisioning. Note that most of VF provisioning
+ * shall be the same, except GGTT range, since GGTT is not virtualized per-VF.
+ *
+ * Returns: 0 if the operation completed successfully, or a negative error
+ * code otherwise.
+ */
+static int vf_post_migration_requery_guc(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int err, ret = 0;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_vf_query_config(gt);
+ ret = ret ?: err;
+ }
+
+ return ret;
+}
+
+/*
+ * vf_post_migration_imminent - Check if post-restore recovery is coming.
+ * @xe: the &xe_device struct instance
+ *
+ * Return: True if migration recovery worker will soon be running. Any worker currently
+ * executing does not affect the result.
+ */
+static bool vf_post_migration_imminent(struct xe_device *xe)
+{
+ return xe->sriov.vf.migration.gt_flags != 0 ||
+ work_pending(&xe->sriov.vf.migration.worker);
+}
+
+/*
+ * Notify all GuCs about resource fixups apply finished.
+ */
+static void vf_post_migration_notify_resfix_done(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+
+ for_each_gt(gt, xe, id) {
+ if (vf_post_migration_imminent(xe))
+ goto skip;
+ xe_gt_sriov_vf_notify_resfix_done(gt);
+ }
+ return;
+
+skip:
+ drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n");
+}
+
+static void vf_post_migration_recovery(struct xe_device *xe)
+{
+ int err;
+
+ drm_dbg(&xe->drm, "migration recovery in progress\n");
+ xe_pm_runtime_get(xe);
+ err = vf_post_migration_requery_guc(xe);
+ if (vf_post_migration_imminent(xe))
+ goto defer;
+ if (unlikely(err))
+ goto fail;
+
+ /* FIXME: add the recovery steps */
+ vf_post_migration_notify_resfix_done(xe);
+ xe_pm_runtime_put(xe);
+ drm_notice(&xe->drm, "migration recovery ended\n");
+ return;
+defer:
+ xe_pm_runtime_put(xe);
+ drm_dbg(&xe->drm, "migration recovery deferred\n");
+ return;
+fail:
+ xe_pm_runtime_put(xe);
+ drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err));
+ xe_device_declare_wedged(xe);
+}
+
+static void migration_worker_func(struct work_struct *w)
+{
+ struct xe_device *xe = container_of(w, struct xe_device,
+ sriov.vf.migration.worker);
+
+ vf_post_migration_recovery(xe);
+}
+
+static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+
+ for_each_gt(gt, xe, id) {
+ if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) {
+ xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+/**
+ * xe_sriov_vf_start_migration_recovery - Start VF migration recovery.
+ * @xe: the &xe_device to start recovery on
+ *
+ * This function shall be called only by VF.
+ */
+void xe_sriov_vf_start_migration_recovery(struct xe_device *xe)
+{
+ bool started;
+
+ xe_assert(xe, IS_SRIOV_VF(xe));
+
+ if (!vf_ready_to_recovery_on_all_gts(xe))
+ return;
+
+ WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0);
+ /* Ensure other threads see that no flags are set now. */
+ smp_mb();
+
+ started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker);
+ drm_info(&xe->drm, "VF migration recovery %s\n", started ?
+ "scheduled" : "already in progress");
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h
new file mode 100644
index 000000000000..7b8622cff2b7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_VF_H_
+#define _XE_SRIOV_VF_H_
+
+struct xe_device;
+
+void xe_sriov_vf_init_early(struct xe_device *xe);
+void xe_sriov_vf_start_migration_recovery(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 91130ad8999c..d5281de04d54 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -211,6 +211,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,
__string(dev, __dev_name_eq(job->q))
__field(u32, seqno)
__field(u32, lrc_seqno)
+ __field(u8, gt_id)
__field(u16, guc_id)
__field(u32, guc_state)
__field(u32, flags)
@@ -223,6 +224,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,
__assign_str(dev);
__entry->seqno = xe_sched_job_seqno(job);
__entry->lrc_seqno = xe_sched_job_lrc_seqno(job);
+ __entry->gt_id = job->q->gt->info.id;
__entry->guc_id = job->q->guc->id;
__entry->guc_state =
atomic_read(&job->q->guc->state);
@@ -232,9 +234,9 @@ DECLARE_EVENT_CLASS(xe_sched_job,
__entry->batch_addr = (u64)job->ptrs[0].batch_addr;
),
- TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
+ TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, gt=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
__get_str(dev), __entry->fence, __entry->seqno,
- __entry->lrc_seqno, __entry->guc_id,
+ __entry->lrc_seqno, __entry->gt_id, __entry->guc_id,
__entry->batch_addr, __entry->guc_state,
__entry->flags, __entry->error)
);
@@ -282,6 +284,7 @@ DECLARE_EVENT_CLASS(xe_sched_msg,
__string(dev, __dev_name_eq(((struct xe_exec_queue *)msg->private_data)))
__field(u32, opcode)
__field(u16, guc_id)
+ __field(u8, gt_id)
),
TP_fast_assign(
@@ -289,9 +292,11 @@ DECLARE_EVENT_CLASS(xe_sched_msg,
__entry->opcode = msg->opcode;
__entry->guc_id =
((struct xe_exec_queue *)msg->private_data)->guc->id;
+ __entry->gt_id =
+ ((struct xe_exec_queue *)msg->private_data)->gt->info.id;
),
- TP_printk("dev=%s, guc_id=%d, opcode=%u", __get_str(dev), __entry->guc_id,
+ TP_printk("dev=%s, gt=%u guc_id=%d, opcode=%u", __get_str(dev), __entry->gt_id, __entry->guc_id,
__entry->opcode)
);
diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h
index 30a3cfbaaa09..1762dd30ba6d 100644
--- a/drivers/gpu/drm/xe/xe_trace_bo.h
+++ b/drivers/gpu/drm/xe/xe_trace_bo.h
@@ -48,6 +48,11 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault,
TP_ARGS(bo)
);
+DEFINE_EVENT(xe_bo, xe_bo_validate,
+ TP_PROTO(struct xe_bo *bo),
+ TP_ARGS(bo)
+);
+
TRACE_EVENT(xe_bo_move,
TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement,
bool move_lacks_source),
diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.c b/drivers/gpu/drm/xe/xe_trace_lrc.c
new file mode 100644
index 000000000000..ab9b7e2970bc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_trace_lrc.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "xe_trace_lrc.h"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.h b/drivers/gpu/drm/xe/xe_trace_lrc.h
new file mode 100644
index 000000000000..5c669a0b2180
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_trace_lrc.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM xe
+
+#if !defined(_XE_TRACE_LRC_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _XE_TRACE_LRC_H_
+
+#include <linux/tracepoint.h>
+#include <linux/types.h>
+
+#include "xe_gt_types.h"
+#include "xe_lrc.h"
+#include "xe_lrc_types.h"
+
+#define __dev_name_lrc(lrc) dev_name(gt_to_xe((lrc)->fence_ctx.gt)->drm.dev)
+
+TRACE_EVENT(xe_lrc_update_timestamp,
+ TP_PROTO(struct xe_lrc *lrc, uint32_t old),
+ TP_ARGS(lrc, old),
+ TP_STRUCT__entry(
+ __field(struct xe_lrc *, lrc)
+ __field(u32, old)
+ __field(u32, new)
+ __string(name, lrc->fence_ctx.name)
+ __string(device_id, __dev_name_lrc(lrc))
+ ),
+
+ TP_fast_assign(
+ __entry->lrc = lrc;
+ __entry->old = old;
+ __entry->new = lrc->ctx_timestamp;
+ __assign_str(name);
+ __assign_str(device_id);
+ ),
+ TP_printk("lrc=:%p lrc->name=%s old=%u new=%u device_id:%s",
+ __entry->lrc, __get_str(name),
+ __entry->old, __entry->new,
+ __get_str(device_id))
+);
+
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe
+#define TRACE_INCLUDE_FILE xe_trace_lrc
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 423b261ea743..c95728c45ea4 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -52,7 +52,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
struct xe_ttm_vram_mgr_resource *vres;
struct drm_buddy *mm = &mgr->mm;
- u64 size, remaining_size, min_page_size;
+ u64 size, min_page_size;
unsigned long lpfn;
int err;
@@ -98,17 +98,6 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
goto error_fini;
}
- if (WARN_ON(min_page_size > SZ_2G)) { /* FIXME: sg limit */
- err = -EINVAL;
- goto error_fini;
- }
-
- if (WARN_ON((size > SZ_2G &&
- (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS)))) {
- err = -EINVAL;
- goto error_fini;
- }
-
if (WARN_ON(!IS_ALIGNED(size, min_page_size))) {
err = -EINVAL;
goto error_fini;
@@ -116,12 +105,11 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
mutex_lock(&mgr->lock);
if (lpfn <= mgr->visible_size >> PAGE_SHIFT && size > mgr->visible_avail) {
- mutex_unlock(&mgr->lock);
err = -ENOSPC;
- goto error_fini;
+ goto error_unlock;
}
- if (place->fpfn + (size >> PAGE_SHIFT) != place->lpfn &&
+ if (place->fpfn + (size >> PAGE_SHIFT) != lpfn &&
place->flags & TTM_PL_FLAG_CONTIGUOUS) {
size = roundup_pow_of_two(size);
min_page_size = size;
@@ -129,25 +117,11 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
lpfn = max_t(unsigned long, place->fpfn + (size >> PAGE_SHIFT), lpfn);
}
- remaining_size = size;
- do {
- /*
- * Limit maximum size to 2GiB due to SG table limitations.
- * FIXME: Should maybe be handled as part of sg construction.
- */
- u64 alloc_size = min_t(u64, remaining_size, SZ_2G);
-
- err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
- (u64)lpfn << PAGE_SHIFT,
- alloc_size,
- min_page_size,
- &vres->blocks,
- vres->flags);
- if (err)
- goto error_free_blocks;
-
- remaining_size -= alloc_size;
- } while (remaining_size);
+ err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+ (u64)lpfn << PAGE_SHIFT, size,
+ min_page_size, &vres->blocks, vres->flags);
+ if (err)
+ goto error_unlock;
if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks))
@@ -194,9 +168,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
*res = &vres->base;
return 0;
-
-error_free_blocks:
- drm_buddy_free_list(mm, &vres->blocks, 0);
+error_unlock:
mutex_unlock(&mgr->lock);
error_fini:
ttm_resource_fini(man, &vres->base);
@@ -393,7 +365,8 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
xe_res_first(res, offset, length, &cursor);
while (cursor.remaining) {
num_entries++;
- xe_res_next(&cursor, cursor.size);
+ /* Limit maximum size to 2GiB due to SG table limitations. */
+ xe_res_next(&cursor, min_t(u64, cursor.size, SZ_2G));
}
r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
@@ -413,7 +386,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
xe_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
phys_addr_t phys = cursor.start + tile->mem.vram.io_start;
- size_t size = cursor.size;
+ size_t size = min_t(u64, cursor.size, SZ_2G);
dma_addr_t addr;
addr = dma_map_resource(dev, phys, size, dir,
@@ -426,7 +399,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
sg_dma_address(sg) = addr;
sg_dma_len(sg) = size;
- xe_res_next(&cursor, cursor.size);
+ xe_res_next(&cursor, size);
}
return 0;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 00ea57c2f4b9..b4a44e1ea167 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -732,13 +732,14 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
vops->pt_update_ops[i].ops =
kmalloc_array(vops->pt_update_ops[i].num_ops,
sizeof(*vops->pt_update_ops[i].ops),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (!vops->pt_update_ops[i].ops)
return array_of_binds ? -ENOBUFS : -ENOMEM;
}
return 0;
}
+ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
static void xe_vma_ops_fini(struct xe_vma_ops *vops)
{
@@ -1351,6 +1352,7 @@ static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
return 0;
}
+ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
static void xe_vm_free_scratch(struct xe_vm *vm)
{
@@ -1977,6 +1979,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
return ops;
}
+ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
u16 pat_index, unsigned int flags)
@@ -2356,13 +2359,15 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
bool validate)
{
struct xe_bo *bo = xe_vma_bo(vma);
+ struct xe_vm *vm = xe_vma_vm(vma);
int err = 0;
if (bo) {
if (!bo->vm)
err = drm_exec_lock_obj(exec, &bo->ttm.base);
if (!err && validate)
- err = xe_bo_validate(bo, xe_vma_vm(vma), true);
+ err = xe_bo_validate(bo, vm,
+ !xe_vm_in_preempt_fence_mode(vm));
}
return err;
@@ -2696,6 +2701,7 @@ unlock:
drm_exec_fini(&exec);
return err;
}
+ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
#define SUPPORTED_FLAGS_STUB \
(DRM_XE_VM_BIND_FLAG_READONLY | \
@@ -2732,7 +2738,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
*bind_ops = kvmalloc_array(args->num_binds,
sizeof(struct drm_xe_vm_bind_op),
- GFP_KERNEL | __GFP_ACCOUNT);
+ GFP_KERNEL | __GFP_ACCOUNT |
+ __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (!*bind_ops)
return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
@@ -2972,14 +2979,16 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
if (args->num_binds) {
bos = kvcalloc(args->num_binds, sizeof(*bos),
- GFP_KERNEL | __GFP_ACCOUNT);
+ GFP_KERNEL | __GFP_ACCOUNT |
+ __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (!bos) {
err = -ENOMEM;
goto release_vm_lock;
}
ops = kvcalloc(args->num_binds, sizeof(*ops),
- GFP_KERNEL | __GFP_ACCOUNT);
+ GFP_KERNEL | __GFP_ACCOUNT |
+ __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (!ops) {
err = -ENOMEM;
goto release_vm_lock;
@@ -3302,7 +3311,6 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
for (int i = 0; i < snap->num_snaps; i++) {
struct xe_bo *bo = snap->snap[i].bo;
- struct iosys_map src;
int err;
if (IS_ERR(snap->snap[i].data))
@@ -3315,16 +3323,8 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
}
if (bo) {
- xe_bo_lock(bo, false);
- err = ttm_bo_vmap(&bo->ttm, &src);
- if (!err) {
- xe_map_memcpy_from(xe_bo_device(bo),
- snap->snap[i].data,
- &src, snap->snap[i].bo_ofs,
- snap->snap[i].len);
- ttm_bo_vunmap(&bo->ttm, &src);
- }
- xe_bo_unlock(bo);
+ err = xe_bo_read(bo, snap->snap[i].bo_ofs,
+ snap->snap[i].data, snap->snap[i].len);
} else {
void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h
index 4d33f310b653..078786958403 100644
--- a/drivers/gpu/drm/xe/xe_vm_doc.h
+++ b/drivers/gpu/drm/xe/xe_vm_doc.h
@@ -64,8 +64,8 @@
* update page level 2 PDE[1] to page level 3b phys address (GPU)
*
* bind BO2 0x1ff000-0x201000
- * update page level 3a PTE[511] to BO2 phys addres (GPU)
- * update page level 3b PTE[0] to BO2 phys addres + 0x1000 (GPU)
+ * update page level 3a PTE[511] to BO2 phys address (GPU)
+ * update page level 3b PTE[0] to BO2 phys address + 0x1000 (GPU)
*
* GPU bypass
* ~~~~~~~~~~
@@ -192,7 +192,7 @@
*
* If a VM is in fault mode (TODO: link to fault mode), new bind operations that
* create mappings are by default deferred to the page fault handler (first
- * use). This behavior can be overriden by setting the flag
+ * use). This behavior can be overridden by setting the flag
* DRM_XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping
* immediately.
*
@@ -209,7 +209,7 @@
*
* Since this a core kernel managed memory the kernel can move this memory
* whenever it wants. We register an invalidation MMU notifier to alert XE when
- * a user poiter is about to move. The invalidation notifier needs to block
+ * a user pointer is about to move. The invalidation notifier needs to block
* until all pending users (jobs or compute mode engines) of the userptr are
* idle to ensure no faults. This done by waiting on all of VM's dma-resv slots.
*
@@ -252,7 +252,7 @@
* Rebind worker
* -------------
*
- * The rebind worker is very similar to an exec. It is resposible for rebinding
+ * The rebind worker is very similar to an exec. It is responsible for rebinding
* evicted BOs or userptrs, waiting on those operations, installing new preempt
* fences, and finally resuming executing of engines in the VM.
*
@@ -317,11 +317,11 @@
* are not allowed, only long running workloads and ULLS are enabled on a faulting
* VM.
*
- * Defered VM binds
+ * Deferred VM binds
* ----------------
*
* By default, on a faulting VM binds just allocate the VMA and the actual
- * updating of the page tables is defered to the page fault handler. This
+ * updating of the page tables is deferred to the page fault handler. This
* behavior can be overridden by setting the flag DRM_XE_VM_BIND_FLAG_IMMEDIATE in
* the VM bind which will then do the bind immediately.
*
@@ -500,18 +500,18 @@
* Slot waiting
* ------------
*
- * 1. The exection of all jobs from kernel ops shall wait on all slots
+ * 1. The execution of all jobs from kernel ops shall wait on all slots
* (DMA_RESV_USAGE_PREEMPT_FENCE) of either an external BO or VM (depends on if
* kernel op is operating on external or private BO)
*
- * 2. In non-compute mode, the exection of all jobs from rebinds in execs shall
+ * 2. In non-compute mode, the execution of all jobs from rebinds in execs shall
* wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO or VM
* (depends on if the rebind is operatiing on an external or private BO)
*
- * 3. In non-compute mode, the exection of all jobs from execs shall wait on the
+ * 3. In non-compute mode, the execution of all jobs from execs shall wait on the
* last rebind job
*
- * 4. In compute mode, the exection of all jobs from rebinds in the rebind
+ * 4. In compute mode, the execution of all jobs from rebinds in the rebind
* worker shall wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO
* or VM (depends on if rebind is operating on external or private BO)
*
diff --git a/drivers/gpu/drm/xe/xe_vsec.c b/drivers/gpu/drm/xe/xe_vsec.c
new file mode 100644
index 000000000000..b378848d3b7b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vsec.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright © 2024 Intel Corporation */
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/cleanup.h>
+#include <linux/errno.h>
+#include <linux/intel_vsec.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_drv.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+#include "xe_pm.h"
+#include "xe_vsec.h"
+
+#include "regs/xe_pmt.h"
+
+/* PMT GUID value for BMG devices. NOTE: this is NOT a PCI id */
+#define BMG_DEVICE_ID 0xE2F8
+
+static struct intel_vsec_header bmg_telemetry = {
+ .length = 0x10,
+ .id = VSEC_ID_TELEMETRY,
+ .num_entries = 2,
+ .entry_size = 4,
+ .tbir = 0,
+ .offset = BMG_DISCOVERY_OFFSET,
+};
+
+static struct intel_vsec_header bmg_punit_crashlog = {
+ .length = 0x10,
+ .id = VSEC_ID_CRASHLOG,
+ .num_entries = 1,
+ .entry_size = 4,
+ .tbir = 0,
+ .offset = BMG_DISCOVERY_OFFSET + 0x60,
+};
+
+static struct intel_vsec_header bmg_oobmsm_crashlog = {
+ .length = 0x10,
+ .id = VSEC_ID_CRASHLOG,
+ .num_entries = 1,
+ .entry_size = 4,
+ .tbir = 0,
+ .offset = BMG_DISCOVERY_OFFSET + 0x78,
+};
+
+static struct intel_vsec_header *bmg_capabilities[] = {
+ &bmg_telemetry,
+ &bmg_punit_crashlog,
+ &bmg_oobmsm_crashlog,
+ NULL
+};
+
+enum xe_vsec {
+ XE_VSEC_UNKNOWN = 0,
+ XE_VSEC_BMG,
+};
+
+static struct intel_vsec_platform_info xe_vsec_info[] = {
+ [XE_VSEC_BMG] = {
+ .caps = VSEC_CAP_TELEMETRY | VSEC_CAP_CRASHLOG,
+ .headers = bmg_capabilities,
+ },
+ { }
+};
+
+/*
+ * The GUID will have the following bits to decode:
+ * [0:3] - {Telemetry space iteration number (0,1,..)}
+ * [4:7] - Segment (SEGMENT_INDEPENDENT-0, Client-1, Server-2)
+ * [8:11] - SOC_SKU
+ * [12:27] – Device ID – changes for each down bin SKU’s
+ * [28:29] - Capability Type (Crashlog-0, Telemetry Aggregator-1, Watcher-2)
+ * [30:31] - Record-ID (0-PUNIT, 1-OOBMSM_0, 2-OOBMSM_1)
+ */
+#define GUID_TELEM_ITERATION GENMASK(3, 0)
+#define GUID_SEGMENT GENMASK(7, 4)
+#define GUID_SOC_SKU GENMASK(11, 8)
+#define GUID_DEVICE_ID GENMASK(27, 12)
+#define GUID_CAP_TYPE GENMASK(29, 28)
+#define GUID_RECORD_ID GENMASK(31, 30)
+
+#define PUNIT_TELEMETRY_OFFSET 0x0200
+#define PUNIT_WATCHER_OFFSET 0x14A0
+#define OOBMSM_0_WATCHER_OFFSET 0x18D8
+#define OOBMSM_1_TELEMETRY_OFFSET 0x1000
+
+enum record_id {
+ PUNIT,
+ OOBMSM_0,
+ OOBMSM_1,
+};
+
+enum capability {
+ CRASHLOG,
+ TELEMETRY,
+ WATCHER,
+};
+
+static int xe_guid_decode(u32 guid, int *index, u32 *offset)
+{
+ u32 record_id = FIELD_GET(GUID_RECORD_ID, guid);
+ u32 cap_type = FIELD_GET(GUID_CAP_TYPE, guid);
+ u32 device_id = FIELD_GET(GUID_DEVICE_ID, guid);
+
+ if (device_id != BMG_DEVICE_ID)
+ return -ENODEV;
+
+ if (cap_type > WATCHER)
+ return -EINVAL;
+
+ *offset = 0;
+
+ if (cap_type == CRASHLOG) {
+ *index = record_id == PUNIT ? 2 : 4;
+ return 0;
+ }
+
+ switch (record_id) {
+ case PUNIT:
+ *index = 0;
+ if (cap_type == TELEMETRY)
+ *offset = PUNIT_TELEMETRY_OFFSET;
+ else
+ *offset = PUNIT_WATCHER_OFFSET;
+ break;
+
+ case OOBMSM_0:
+ *index = 1;
+ if (cap_type == WATCHER)
+ *offset = OOBMSM_0_WATCHER_OFFSET;
+ break;
+
+ case OOBMSM_1:
+ *index = 1;
+ if (cap_type == TELEMETRY)
+ *offset = OOBMSM_1_TELEMETRY_OFFSET;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset,
+ u32 count)
+{
+ struct xe_device *xe = pdev_to_xe_device(pdev);
+ void __iomem *telem_addr = xe->mmio.regs + BMG_TELEMETRY_OFFSET;
+ u32 mem_region;
+ u32 offset;
+ int ret;
+
+ ret = xe_guid_decode(guid, &mem_region, &offset);
+ if (ret)
+ return ret;
+
+ telem_addr += offset + user_offset;
+
+ guard(mutex)(&xe->pmt.lock);
+
+ /* indicate that we are not at an appropriate power level */
+ if (!xe_pm_runtime_get_if_active(xe))
+ return -ENODATA;
+
+ /* set SoC re-mapper index register based on GUID memory region */
+ xe_mmio_rmw32(xe_root_tile_mmio(xe), SG_REMAP_INDEX1, SG_REMAP_BITS,
+ REG_FIELD_PREP(SG_REMAP_BITS, mem_region));
+
+ memcpy_fromio(data, telem_addr, count);
+ xe_pm_runtime_put(xe);
+
+ return count;
+}
+
+static struct pmt_callbacks xe_pmt_cb = {
+ .read_telem = xe_pmt_telem_read,
+};
+
+static const int vsec_platforms[] = {
+ [XE_BATTLEMAGE] = XE_VSEC_BMG,
+};
+
+static enum xe_vsec get_platform_info(struct xe_device *xe)
+{
+ if (xe->info.platform > XE_BATTLEMAGE)
+ return XE_VSEC_UNKNOWN;
+
+ return vsec_platforms[xe->info.platform];
+}
+
+/**
+ * xe_vsec_init - Initialize resources and add intel_vsec auxiliary
+ * interface
+ * @xe: valid xe instance
+ */
+void xe_vsec_init(struct xe_device *xe)
+{
+ struct intel_vsec_platform_info *info;
+ struct device *dev = xe->drm.dev;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ enum xe_vsec platform;
+
+ platform = get_platform_info(xe);
+ if (platform == XE_VSEC_UNKNOWN)
+ return;
+
+ info = &xe_vsec_info[platform];
+ if (!info->headers)
+ return;
+
+ switch (platform) {
+ case XE_VSEC_BMG:
+ info->priv_data = &xe_pmt_cb;
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * Register a VSEC. Cleanup is handled using device managed
+ * resources.
+ */
+ intel_vsec_register(pdev, info);
+}
+MODULE_IMPORT_NS("INTEL_VSEC");
diff --git a/drivers/gpu/drm/xe/xe_vsec.h b/drivers/gpu/drm/xe/xe_vsec.h
new file mode 100644
index 000000000000..5777c53faec2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vsec.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright © 2024 Intel Corporation */
+
+#ifndef _XE_VSEC_H_
+#define _XE_VSEC_H_
+
+struct xe_device;
+
+void xe_vsec_init(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 02cf647f86d8..570fe0376402 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -607,6 +607,12 @@ static const struct xe_rtp_entry_sr engine_was[] = {
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
},
+ { XE_RTP_NAME("16024792527"),
+ XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0),
+ FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(FIELD_SET(SAMPLER_MODE, SMP_WAIT_FETCH_MERGING_COUNTER,
+ SMP_FORCE_128B_OVERFETCH))
+ },
{}
};
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index bcd04464b85e..3ed12a85cc60 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -1,3 +1,4 @@
+1607983814 GRAPHICS_VERSION_RANGE(1200, 1210)
22012773006 GRAPHICS_VERSION_RANGE(1200, 1250)
14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)
PLATFORM(DG2)
diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h
new file mode 100644
index 000000000000..16d4b8bb590a
--- /dev/null
+++ b/include/drm/intel/xe_pciids.h
@@ -0,0 +1,235 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PCIIDS_H_
+#define _XE_PCIIDS_H_
+
+/*
+ * Lists below can be turned into initializers for a struct pci_device_id
+ * by defining INTEL_VGA_DEVICE:
+ *
+ * #define INTEL_VGA_DEVICE(id, info) { \
+ * 0x8086, id, \
+ * ~0, ~0, \
+ * 0x030000, 0xff0000, \
+ * (unsigned long) info }
+ *
+ * And then calling like:
+ *
+ * XE_TGL_12_GT1_IDS(INTEL_VGA_DEVICE, ## __VA_ARGS__)
+ *
+ * To turn them into something else, just provide a different macro passed as
+ * first argument.
+ */
+
+/* TGL */
+#define XE_TGL_GT1_IDS(MACRO__, ...) \
+ MACRO__(0x9A60, ## __VA_ARGS__), \
+ MACRO__(0x9A68, ## __VA_ARGS__), \
+ MACRO__(0x9A70, ## __VA_ARGS__)
+
+#define XE_TGL_GT2_IDS(MACRO__, ...) \
+ MACRO__(0x9A40, ## __VA_ARGS__), \
+ MACRO__(0x9A49, ## __VA_ARGS__), \
+ MACRO__(0x9A59, ## __VA_ARGS__), \
+ MACRO__(0x9A78, ## __VA_ARGS__), \
+ MACRO__(0x9AC0, ## __VA_ARGS__), \
+ MACRO__(0x9AC9, ## __VA_ARGS__), \
+ MACRO__(0x9AD9, ## __VA_ARGS__), \
+ MACRO__(0x9AF8, ## __VA_ARGS__)
+
+#define XE_TGL_IDS(MACRO__, ...) \
+ XE_TGL_GT1_IDS(MACRO__, ## __VA_ARGS__),\
+ XE_TGL_GT2_IDS(MACRO__, ## __VA_ARGS__)
+
+/* RKL */
+#define XE_RKL_IDS(MACRO__, ...) \
+ MACRO__(0x4C80, ## __VA_ARGS__), \
+ MACRO__(0x4C8A, ## __VA_ARGS__), \
+ MACRO__(0x4C8B, ## __VA_ARGS__), \
+ MACRO__(0x4C8C, ## __VA_ARGS__), \
+ MACRO__(0x4C90, ## __VA_ARGS__), \
+ MACRO__(0x4C9A, ## __VA_ARGS__)
+
+/* DG1 */
+#define XE_DG1_IDS(MACRO__, ...) \
+ MACRO__(0x4905, ## __VA_ARGS__), \
+ MACRO__(0x4906, ## __VA_ARGS__), \
+ MACRO__(0x4907, ## __VA_ARGS__), \
+ MACRO__(0x4908, ## __VA_ARGS__), \
+ MACRO__(0x4909, ## __VA_ARGS__)
+
+/* ADL-S */
+#define XE_ADLS_IDS(MACRO__, ...) \
+ MACRO__(0x4680, ## __VA_ARGS__), \
+ MACRO__(0x4682, ## __VA_ARGS__), \
+ MACRO__(0x4688, ## __VA_ARGS__), \
+ MACRO__(0x468A, ## __VA_ARGS__), \
+ MACRO__(0x468B, ## __VA_ARGS__), \
+ MACRO__(0x4690, ## __VA_ARGS__), \
+ MACRO__(0x4692, ## __VA_ARGS__), \
+ MACRO__(0x4693, ## __VA_ARGS__)
+
+/* ADL-P */
+#define XE_ADLP_IDS(MACRO__, ...) \
+ MACRO__(0x46A0, ## __VA_ARGS__), \
+ MACRO__(0x46A1, ## __VA_ARGS__), \
+ MACRO__(0x46A2, ## __VA_ARGS__), \
+ MACRO__(0x46A3, ## __VA_ARGS__), \
+ MACRO__(0x46A6, ## __VA_ARGS__), \
+ MACRO__(0x46A8, ## __VA_ARGS__), \
+ MACRO__(0x46AA, ## __VA_ARGS__), \
+ MACRO__(0x462A, ## __VA_ARGS__), \
+ MACRO__(0x4626, ## __VA_ARGS__), \
+ MACRO__(0x4628, ## __VA_ARGS__), \
+ MACRO__(0x46B0, ## __VA_ARGS__), \
+ MACRO__(0x46B1, ## __VA_ARGS__), \
+ MACRO__(0x46B2, ## __VA_ARGS__), \
+ MACRO__(0x46B3, ## __VA_ARGS__), \
+ MACRO__(0x46C0, ## __VA_ARGS__), \
+ MACRO__(0x46C1, ## __VA_ARGS__), \
+ MACRO__(0x46C2, ## __VA_ARGS__), \
+ MACRO__(0x46C3, ## __VA_ARGS__)
+
+/* ADL-N */
+#define XE_ADLN_IDS(MACRO__, ...) \
+ MACRO__(0x46D0, ## __VA_ARGS__), \
+ MACRO__(0x46D1, ## __VA_ARGS__), \
+ MACRO__(0x46D2, ## __VA_ARGS__), \
+ MACRO__(0x46D3, ## __VA_ARGS__), \
+ MACRO__(0x46D4, ## __VA_ARGS__)
+
+/* RPL-S */
+#define XE_RPLS_IDS(MACRO__, ...) \
+ MACRO__(0xA780, ## __VA_ARGS__), \
+ MACRO__(0xA781, ## __VA_ARGS__), \
+ MACRO__(0xA782, ## __VA_ARGS__), \
+ MACRO__(0xA783, ## __VA_ARGS__), \
+ MACRO__(0xA788, ## __VA_ARGS__), \
+ MACRO__(0xA789, ## __VA_ARGS__), \
+ MACRO__(0xA78A, ## __VA_ARGS__), \
+ MACRO__(0xA78B, ## __VA_ARGS__)
+
+/* RPL-U */
+#define XE_RPLU_IDS(MACRO__, ...) \
+ MACRO__(0xA721, ## __VA_ARGS__), \
+ MACRO__(0xA7A1, ## __VA_ARGS__), \
+ MACRO__(0xA7A9, ## __VA_ARGS__), \
+ MACRO__(0xA7AC, ## __VA_ARGS__), \
+ MACRO__(0xA7AD, ## __VA_ARGS__)
+
+/* RPL-P */
+#define XE_RPLP_IDS(MACRO__, ...) \
+ MACRO__(0xA720, ## __VA_ARGS__), \
+ MACRO__(0xA7A0, ## __VA_ARGS__), \
+ MACRO__(0xA7A8, ## __VA_ARGS__), \
+ MACRO__(0xA7AA, ## __VA_ARGS__), \
+ MACRO__(0xA7AB, ## __VA_ARGS__)
+
+/* DG2 */
+#define XE_DG2_G10_IDS(MACRO__, ...) \
+ MACRO__(0x5690, ## __VA_ARGS__), \
+ MACRO__(0x5691, ## __VA_ARGS__), \
+ MACRO__(0x5692, ## __VA_ARGS__), \
+ MACRO__(0x56A0, ## __VA_ARGS__), \
+ MACRO__(0x56A1, ## __VA_ARGS__), \
+ MACRO__(0x56A2, ## __VA_ARGS__), \
+ MACRO__(0x56BE, ## __VA_ARGS__), \
+ MACRO__(0x56BF, ## __VA_ARGS__)
+
+#define XE_DG2_G11_IDS(MACRO__, ...) \
+ MACRO__(0x5693, ## __VA_ARGS__), \
+ MACRO__(0x5694, ## __VA_ARGS__), \
+ MACRO__(0x5695, ## __VA_ARGS__), \
+ MACRO__(0x56A5, ## __VA_ARGS__), \
+ MACRO__(0x56A6, ## __VA_ARGS__), \
+ MACRO__(0x56B0, ## __VA_ARGS__), \
+ MACRO__(0x56B1, ## __VA_ARGS__), \
+ MACRO__(0x56BA, ## __VA_ARGS__), \
+ MACRO__(0x56BB, ## __VA_ARGS__), \
+ MACRO__(0x56BC, ## __VA_ARGS__), \
+ MACRO__(0x56BD, ## __VA_ARGS__)
+
+#define XE_DG2_G12_IDS(MACRO__, ...) \
+ MACRO__(0x5696, ## __VA_ARGS__), \
+ MACRO__(0x5697, ## __VA_ARGS__), \
+ MACRO__(0x56A3, ## __VA_ARGS__), \
+ MACRO__(0x56A4, ## __VA_ARGS__), \
+ MACRO__(0x56B2, ## __VA_ARGS__), \
+ MACRO__(0x56B3, ## __VA_ARGS__)
+
+#define XE_DG2_IDS(MACRO__, ...) \
+ XE_DG2_G10_IDS(MACRO__, ## __VA_ARGS__),\
+ XE_DG2_G11_IDS(MACRO__, ## __VA_ARGS__),\
+ XE_DG2_G12_IDS(MACRO__, ## __VA_ARGS__)
+
+#define XE_ATS_M150_IDS(MACRO__, ...) \
+ MACRO__(0x56C0, ## __VA_ARGS__), \
+ MACRO__(0x56C2, ## __VA_ARGS__)
+
+#define XE_ATS_M75_IDS(MACRO__, ...) \
+ MACRO__(0x56C1, ## __VA_ARGS__)
+
+#define XE_ATS_M_IDS(MACRO__, ...) \
+ XE_ATS_M150_IDS(MACRO__, ## __VA_ARGS__),\
+ XE_ATS_M75_IDS(MACRO__, ## __VA_ARGS__)
+
+/* ARL */
+#define XE_ARL_IDS(MACRO__, ...) \
+ MACRO__(0x7D41, ## __VA_ARGS__), \
+ MACRO__(0x7D51, ## __VA_ARGS__), \
+ MACRO__(0x7D67, ## __VA_ARGS__), \
+ MACRO__(0x7DD1, ## __VA_ARGS__), \
+ MACRO__(0xB640, ## __VA_ARGS__)
+
+/* MTL */
+#define XE_MTL_IDS(MACRO__, ...) \
+ MACRO__(0x7D40, ## __VA_ARGS__), \
+ MACRO__(0x7D45, ## __VA_ARGS__), \
+ MACRO__(0x7D55, ## __VA_ARGS__), \
+ MACRO__(0x7D60, ## __VA_ARGS__), \
+ MACRO__(0x7DD5, ## __VA_ARGS__)
+
+/* PVC */
+#define XE_PVC_IDS(MACRO__, ...) \
+ MACRO__(0x0B69, ## __VA_ARGS__), \
+ MACRO__(0x0B6E, ## __VA_ARGS__), \
+ MACRO__(0x0BD4, ## __VA_ARGS__), \
+ MACRO__(0x0BD5, ## __VA_ARGS__), \
+ MACRO__(0x0BD6, ## __VA_ARGS__), \
+ MACRO__(0x0BD7, ## __VA_ARGS__), \
+ MACRO__(0x0BD8, ## __VA_ARGS__), \
+ MACRO__(0x0BD9, ## __VA_ARGS__), \
+ MACRO__(0x0BDA, ## __VA_ARGS__), \
+ MACRO__(0x0BDB, ## __VA_ARGS__), \
+ MACRO__(0x0BE0, ## __VA_ARGS__), \
+ MACRO__(0x0BE1, ## __VA_ARGS__), \
+ MACRO__(0x0BE5, ## __VA_ARGS__)
+
+#define XE_LNL_IDS(MACRO__, ...) \
+ MACRO__(0x6420, ## __VA_ARGS__), \
+ MACRO__(0x64A0, ## __VA_ARGS__), \
+ MACRO__(0x64B0, ## __VA_ARGS__)
+
+#define XE_BMG_IDS(MACRO__, ...) \
+ MACRO__(0xE202, ## __VA_ARGS__), \
+ MACRO__(0xE20B, ## __VA_ARGS__), \
+ MACRO__(0xE20C, ## __VA_ARGS__), \
+ MACRO__(0xE20D, ## __VA_ARGS__), \
+ MACRO__(0xE212, ## __VA_ARGS__)
+
+#define XE_PTL_IDS(MACRO__, ...) \
+ MACRO__(0xB080, ## __VA_ARGS__), \
+ MACRO__(0xB081, ## __VA_ARGS__), \
+ MACRO__(0xB082, ## __VA_ARGS__), \
+ MACRO__(0xB090, ## __VA_ARGS__), \
+ MACRO__(0xB091, ## __VA_ARGS__), \
+ MACRO__(0xB092, ## __VA_ARGS__), \
+ MACRO__(0xB0A0, ## __VA_ARGS__), \
+ MACRO__(0xB0A1, ## __VA_ARGS__), \
+ MACRO__(0xB0A2, ## __VA_ARGS__), \
+ MACRO__(0xB0B0, ## __VA_ARGS__)
+
+#endif
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 5804408815be..8ea11cd8df39 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -421,6 +421,8 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo);
int ttm_bo_evict_first(struct ttm_device *bdev,
struct ttm_resource_manager *man,
struct ttm_operation_ctx *ctx);
+int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset,
+ void *buf, int len, int write);
vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
struct vm_fault *vmf);
vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 4a8a4a63e99c..0383b52cbd86 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1486,6 +1486,7 @@ struct drm_xe_oa_unit {
__u64 capabilities;
#define DRM_XE_OA_CAPS_BASE (1 << 0)
#define DRM_XE_OA_CAPS_SYNCS (1 << 1)
+#define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2)
/** @oa_timestamp_freq: OA timestamp freq */
__u64 oa_timestamp_freq;
@@ -1651,6 +1652,14 @@ enum drm_xe_oa_property_id {
* to the VM bind case.
*/
DRM_XE_OA_PROPERTY_SYNCS,
+
+ /**
+ * @DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE: Size of OA buffer to be
+ * allocated by the driver in bytes. Supported sizes are powers of
+ * 2 from 128 KiB to 128 MiB. When not specified, a 16 MiB OA
+ * buffer is allocated by default.
+ */
+ DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE,
};
/**