summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe')
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_gpu_commands.h1
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_mi_commands.h4
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h5
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_lrc_layout.h2
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs.c7
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.c5
-rw-r--r--drivers/gpu/drm/xe/xe_eu_stall.c14
-rw-r--r--drivers/gpu/drm/xe/xe_eu_stall.h3
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c22
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.c11
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c9
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c11
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c12
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c75
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c1
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c2
-rw-r--r--drivers/gpu/drm/xe/xe_hmm.c24
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c12
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c108
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c199
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h5
-rw-r--r--drivers/gpu/drm/xe/xe_lrc_types.h9
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c6
-rw-r--r--drivers/gpu/drm/xe/xe_module.c3
-rw-r--r--drivers/gpu/drm/xe/xe_module.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pci_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c14
-rw-r--r--drivers/gpu/drm/xe/xe_pxp_debugfs.c13
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c20
-rw-r--r--drivers/gpu/drm/xe/xe_shrinker.c2
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c137
-rw-r--r--drivers/gpu/drm/xe/xe_svm.h13
-rw-r--r--drivers/gpu/drm/xe/xe_trace_lrc.h8
-rw-r--r--drivers/gpu/drm/xe/xe_uc.c8
-rw-r--r--drivers/gpu/drm/xe/xe_uc.h1
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c3
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c4
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules2
46 files changed, 573 insertions, 218 deletions
diff --git a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
index a255946b6f77..8cfcd3360896 100644
--- a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
@@ -41,6 +41,7 @@
#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
+#define PIPE_CONTROL0_L3_READ_ONLY_CACHE_INVALIDATE BIT(10) /* gen12 */
#define PIPE_CONTROL0_HDC_PIPELINE_FLUSH BIT(9) /* gen12 */
#define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29)
diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
index 167fb0f742de..5a47991b4b81 100644
--- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
@@ -47,6 +47,10 @@
#define MI_LRI_FORCE_POSTED REG_BIT(12)
#define MI_LRI_LEN(x) (((x) & 0xff) + 1)
+#define MI_STORE_REGISTER_MEM (__MI_INSTR(0x24) | XE_INSTR_NUM_DW(4))
+#define MI_SRM_USE_GGTT REG_BIT(22)
+#define MI_SRM_ADD_CS_OFFSET REG_BIT(19)
+
#define MI_FLUSH_DW __MI_INSTR(0x26)
#define MI_FLUSH_DW_PROTECTED_MEM_EN REG_BIT(22)
#define MI_FLUSH_DW_STORE_INDEX REG_BIT(21)
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index fb8ec317b6ee..891f928d80ce 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -43,6 +43,10 @@
#define XEHPC_BCS8_RING_BASE 0x3ee000
#define GSCCS_RING_BASE 0x11a000
+#define ENGINE_ID(base) XE_REG((base) + 0x8c)
+#define ENGINE_INSTANCE_ID REG_GENMASK(9, 4)
+#define ENGINE_CLASS_ID REG_GENMASK(2, 0)
+
#define RING_TAIL(base) XE_REG((base) + 0x30)
#define TAIL_ADDR REG_GENMASK(20, 3)
@@ -154,6 +158,7 @@
#define STOP_RING REG_BIT(8)
#define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8)
+#define RING_CTX_TIMESTAMP_UDW(base) XE_REG((base) + 0x3ac)
#define CSBE_DEBUG_STATUS(base) XE_REG((base) + 0x3fc)
#define RING_FORCE_TO_NONPRIV(base, i) XE_REG(((base) + 0x4d0) + (i) * 4)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index da1f198ac107..181913967ac9 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -157,6 +157,7 @@
#define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108)
#define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED)
+#define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12)
#define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6)
#define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED)
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
index 57944f90bbf6..994af591a2e8 100644
--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -11,7 +11,9 @@
#define CTX_RING_TAIL (0x06 + 1)
#define CTX_RING_START (0x08 + 1)
#define CTX_RING_CTL (0x0a + 1)
+#define CTX_BB_PER_CTX_PTR (0x12 + 1)
#define CTX_TIMESTAMP (0x22 + 1)
+#define CTX_TIMESTAMP_UDW (0x24 + 1)
#define CTX_INDIRECT_RING_STATE (0x26 + 1)
#define CTX_PDP0_UDW (0x30 + 1)
#define CTX_PDP0_LDW (0x32 + 1)
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index ef1e5256c56a..0e502feaca81 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -46,8 +46,11 @@ static void read_l3cc_table(struct xe_gt *gt,
unsigned int fw_ref, i;
u32 reg_val;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n");
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ KUNIT_ASSERT_TRUE_MSG(test, true, "Forcewake Failed.\n");
+ }
for (i = 0; i < info->num_mocs_regs; i++) {
if (!(i & 1)) {
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 72ef0b6fc425..0482f26aa480 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -330,6 +330,8 @@ struct xe_device {
u8 has_sriov:1;
/** @info.has_usm: Device has unified shared memory support */
u8 has_usm:1;
+ /** @info.has_64bit_timestamp: Device supports 64-bit timestamps */
+ u8 has_64bit_timestamp:1;
/** @info.is_dgfx: is discrete device */
u8 is_dgfx:1;
/**
@@ -585,6 +587,7 @@ struct xe_device {
INTEL_DRAM_DDR5,
INTEL_DRAM_LPDDR5,
INTEL_DRAM_GDDR,
+ INTEL_DRAM_GDDR_ECC,
} type;
u8 num_qgv_points;
u8 num_psf_gv_points;
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index f67803e15a0e..f7a20264ea33 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -145,10 +145,7 @@ static void xe_dma_buf_unmap(struct dma_buf_attachment *attach,
struct sg_table *sgt,
enum dma_data_direction dir)
{
- struct dma_buf *dma_buf = attach->dmabuf;
- struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv);
-
- if (!xe_bo_is_vram(bo)) {
+ if (sg_page(sgt->sgl)) {
dma_unmap_sgtable(attach->dev, sgt, dir, 0);
sg_free_table(sgt);
kfree(sgt);
diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c
index f2bb9168967c..e2bb156c71fb 100644
--- a/drivers/gpu/drm/xe/xe_eu_stall.c
+++ b/drivers/gpu/drm/xe/xe_eu_stall.c
@@ -52,6 +52,8 @@ struct xe_eu_stall_data_stream {
struct xe_gt *gt;
struct xe_bo *bo;
+ /* Lock to protect data buffer pointers */
+ struct mutex xecore_buf_lock;
struct per_xecore_buf *xecore_buf;
struct {
bool reported_to_user;
@@ -208,6 +210,9 @@ int xe_eu_stall_init(struct xe_gt *gt)
struct xe_device *xe = gt_to_xe(gt);
int ret;
+ if (!xe_eu_stall_supported_on_platform(xe))
+ return 0;
+
gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL);
if (!gt->eu_stall) {
ret = -ENOMEM;
@@ -378,7 +383,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream)
u16 group, instance;
unsigned int xecore;
- mutex_lock(&gt->eu_stall->stream_lock);
+ mutex_lock(&stream->xecore_buf_lock);
for_each_dss_steering(xecore, gt, group, instance) {
xecore_buf = &stream->xecore_buf[xecore];
read_ptr = xecore_buf->read;
@@ -396,7 +401,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream)
set_bit(xecore, stream->data_drop.mask);
xecore_buf->write = write_ptr;
}
- mutex_unlock(&gt->eu_stall->stream_lock);
+ mutex_unlock(&stream->xecore_buf_lock);
return min_data_present;
}
@@ -511,11 +516,13 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st
unsigned int xecore;
int ret = 0;
+ mutex_lock(&stream->xecore_buf_lock);
if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) {
if (!stream->data_drop.reported_to_user) {
stream->data_drop.reported_to_user = true;
xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n",
XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask);
+ mutex_unlock(&stream->xecore_buf_lock);
return -EIO;
}
stream->data_drop.reported_to_user = false;
@@ -527,6 +534,7 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st
if (ret || count == total_size)
break;
}
+ mutex_unlock(&stream->xecore_buf_lock);
return total_size ?: (ret ?: -EAGAIN);
}
@@ -583,6 +591,7 @@ static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream)
{
struct xe_gt *gt = stream->gt;
+ mutex_destroy(&stream->xecore_buf_lock);
gt->eu_stall->stream = NULL;
kfree(stream);
}
@@ -718,6 +727,7 @@ static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
}
init_waitqueue_head(&stream->poll_wq);
+ mutex_init(&stream->xecore_buf_lock);
INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn);
stream->per_xecore_buf_size = per_xecore_buf_size;
stream->sampling_rate_mult = props->sampling_rate_mult;
diff --git a/drivers/gpu/drm/xe/xe_eu_stall.h b/drivers/gpu/drm/xe/xe_eu_stall.h
index ed9d0f233566..d1c76e503799 100644
--- a/drivers/gpu/drm/xe/xe_eu_stall.h
+++ b/drivers/gpu/drm/xe/xe_eu_stall.h
@@ -7,6 +7,7 @@
#define __XE_EU_STALL_H__
#include "xe_gt_types.h"
+#include "xe_sriov.h"
size_t xe_eu_stall_get_per_xecore_buf_size(void);
size_t xe_eu_stall_data_record_size(struct xe_device *xe);
@@ -19,6 +20,6 @@ int xe_eu_stall_stream_open(struct drm_device *dev,
static inline bool xe_eu_stall_supported_on_platform(struct xe_device *xe)
{
- return xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20;
+ return !IS_SRIOV_VF(xe) && (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20);
}
#endif
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 606922d9dd73..cd9b1c32f30f 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -830,7 +830,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
{
struct xe_device *xe = gt_to_xe(q->gt);
struct xe_lrc *lrc;
- u32 old_ts, new_ts;
+ u64 old_ts, new_ts;
int idx;
/*
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index fd41113f8572..0bcf97063ff6 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -555,6 +555,28 @@ void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc)
flush_work(&gsc->work);
}
+void xe_gsc_stop_prepare(struct xe_gsc *gsc)
+{
+ struct xe_gt *gt = gsc_to_gt(gsc);
+ int ret;
+
+ if (!xe_uc_fw_is_loadable(&gsc->fw) || xe_uc_fw_is_in_error_state(&gsc->fw))
+ return;
+
+ xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GSC);
+
+ /*
+ * If the GSC FW load or the proxy init are interrupted, the only way
+ * to recover it is to do an FLR and reload the GSC from scratch.
+ * Therefore, let's wait for the init to complete before stopping
+ * operations. The proxy init is the last step, so we can just wait on
+ * that
+ */
+ ret = xe_gsc_wait_for_proxy_init_done(gsc);
+ if (ret)
+ xe_gt_err(gt, "failed to wait for GSC init completion before uc stop\n");
+}
+
/*
* wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a
* GSC engine reset by writing a notification bit in the GS1 register and then
diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h
index d99f66c38075..b8b8e0810ad9 100644
--- a/drivers/gpu/drm/xe/xe_gsc.h
+++ b/drivers/gpu/drm/xe/xe_gsc.h
@@ -16,6 +16,7 @@ struct xe_hw_engine;
int xe_gsc_init(struct xe_gsc *gsc);
int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc);
void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc);
+void xe_gsc_stop_prepare(struct xe_gsc *gsc);
void xe_gsc_load_start(struct xe_gsc *gsc);
void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec);
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index 8cf70b228ff3..d0519cd6704a 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -71,6 +71,17 @@ bool xe_gsc_proxy_init_done(struct xe_gsc *gsc)
HECI1_FWSTS1_PROXY_STATE_NORMAL;
}
+int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc)
+{
+ struct xe_gt *gt = gsc_to_gt(gsc);
+
+ /* Proxy init can take up to 500ms, so wait double that for safety */
+ return xe_mmio_wait32(&gt->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
+ HECI1_FWSTS1_CURRENT_STATE,
+ HECI1_FWSTS1_PROXY_STATE_NORMAL,
+ USEC_PER_SEC, NULL, false);
+}
+
static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set)
{
struct xe_gt *gt = gsc_to_gt(gsc);
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h
index fdef56995cd4..765602221dbc 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.h
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h
@@ -12,6 +12,7 @@ struct xe_gsc;
int xe_gsc_proxy_init(struct xe_gsc *gsc);
bool xe_gsc_proxy_init_done(struct xe_gsc *gsc);
+int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc);
int xe_gsc_proxy_start(struct xe_gsc *gsc);
int xe_gsc_proxy_request_handler(struct xe_gsc *gsc);
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 10a9e3c72b36..66198cf2662c 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -857,7 +857,7 @@ void xe_gt_suspend_prepare(struct xe_gt *gt)
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- xe_uc_stop_prepare(&gt->uc);
+ xe_uc_suspend_prepare(&gt->uc);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 2d63a69cbfa3..f7005a3643e6 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -92,22 +92,23 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p)
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
unsigned int fw_ref;
+ int ret = 0;
xe_pm_runtime_get(xe);
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
- xe_pm_runtime_put(xe);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- return -ETIMEDOUT;
+ ret = -ETIMEDOUT;
+ goto fw_put;
}
for_each_hw_engine(hwe, gt, id)
xe_hw_engine_print(hwe, p);
+fw_put:
xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(xe);
- return 0;
+ return ret;
}
static int powergate_info(struct xe_gt *gt, struct drm_printer *p)
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index c5ad9a0a89c2..0c22b3a36655 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -435,9 +435,16 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue)
num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss,
XE_MAX_EU_FUSE_BITS) * num_dss;
- /* user can issue separate page faults per EU and per CS */
+ /*
+ * user can issue separate page faults per EU and per CS
+ *
+ * XXX: Multiplier required as compute UMD are getting PF queue errors
+ * without it. Follow on why this multiplier is required.
+ */
+#define PF_MULTIPLIER 8
pf_queue->num_dw =
- (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW;
+ (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER;
+#undef PF_MULTIPLIER
pf_queue->gt = gt;
pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw,
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 03072e094991..084cbdeba8ea 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -322,6 +322,13 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
return 0;
}
+/*
+ * Ensure that roundup_pow_of_two(length) doesn't overflow.
+ * Note that roundup_pow_of_two() operates on unsigned long,
+ * not on u64.
+ */
+#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX))
+
/**
* xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an
* address range
@@ -346,6 +353,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
struct xe_device *xe = gt_to_xe(gt);
#define MAX_TLB_INVALIDATION_LEN 7
u32 action[MAX_TLB_INVALIDATION_LEN];
+ u64 length = end - start;
int len = 0;
xe_gt_assert(gt, fence);
@@ -358,11 +366,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
- if (!xe->info.has_range_tlb_invalidation) {
+ if (!xe->info.has_range_tlb_invalidation ||
+ length > MAX_RANGE_TLB_INVALIDATION_LENGTH) {
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
} else {
u64 orig_start = start;
- u64 length = end - start;
u64 align;
if (length < SZ_4K)
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index e7c9e095a19f..7031542a70ce 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -490,24 +490,52 @@ static void fill_engine_enable_masks(struct xe_gt *gt,
engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER));
}
-static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads)
+/*
+ * Write the offsets corresponding to the golden LRCs. The actual data is
+ * populated later by guc_golden_lrc_populate()
+ */
+static void guc_golden_lrc_init(struct xe_guc_ads *ads)
{
struct xe_device *xe = ads_to_xe(ads);
+ struct xe_gt *gt = ads_to_gt(ads);
struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
offsetof(struct __guc_ads_blob, system_info));
- u8 guc_class;
+ size_t alloc_size, real_size;
+ u32 addr_ggtt, offset;
+ int class;
+
+ offset = guc_ads_golden_lrc_offset(ads);
+ addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
+
+ for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+ u8 guc_class;
+
+ guc_class = xe_engine_class_to_guc_class(class);
- for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) {
if (!info_map_read(xe, &info_map,
engine_enabled_masks[guc_class]))
continue;
+ real_size = xe_gt_lrc_size(gt, class);
+ alloc_size = PAGE_ALIGN(real_size);
+
+ /*
+ * This interface is slightly confusing. We need to pass the
+ * base address of the full golden context and the size of just
+ * the engine state, which is the section of the context image
+ * that starts after the execlists LRC registers. This is
+ * required to allow the GuC to restore just the engine state
+ * when a watchdog reset occurs.
+ * We calculate the engine state size by removing the size of
+ * what comes before it in the context image (which is identical
+ * on all engines).
+ */
ads_blob_write(ads, ads.eng_state_size[guc_class],
- guc_ads_golden_lrc_size(ads) -
- xe_lrc_skip_size(xe));
+ real_size - xe_lrc_skip_size(xe));
ads_blob_write(ads, ads.golden_context_lrca[guc_class],
- xe_bo_ggtt_addr(ads->bo) +
- guc_ads_golden_lrc_offset(ads));
+ addr_ggtt);
+
+ addr_ggtt += alloc_size;
}
}
@@ -857,7 +885,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
guc_policies_init(ads);
- guc_prep_golden_lrc_null(ads);
+ guc_golden_lrc_init(ads);
guc_mapping_table_init_invalid(gt, &info_map);
guc_doorbell_init(ads);
@@ -883,7 +911,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads)
guc_policies_init(ads);
fill_engine_enable_masks(gt, &info_map);
guc_mmio_reg_state_init(ads);
- guc_prep_golden_lrc_null(ads);
+ guc_golden_lrc_init(ads);
guc_mapping_table_init(gt, &info_map);
guc_capture_prep_lists(ads);
guc_doorbell_init(ads);
@@ -903,18 +931,22 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads)
guc_ads_private_data_offset(ads));
}
-static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
+/*
+ * After the golden LRC's are recorded for each engine class by the first
+ * submission, copy them to the ADS, as initialized earlier by
+ * guc_golden_lrc_init().
+ */
+static void guc_golden_lrc_populate(struct xe_guc_ads *ads)
{
struct xe_device *xe = ads_to_xe(ads);
struct xe_gt *gt = ads_to_gt(ads);
struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
offsetof(struct __guc_ads_blob, system_info));
size_t total_size = 0, alloc_size, real_size;
- u32 addr_ggtt, offset;
+ u32 offset;
int class;
offset = guc_ads_golden_lrc_offset(ads);
- addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
u8 guc_class;
@@ -931,26 +963,9 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
alloc_size = PAGE_ALIGN(real_size);
total_size += alloc_size;
- /*
- * This interface is slightly confusing. We need to pass the
- * base address of the full golden context and the size of just
- * the engine state, which is the section of the context image
- * that starts after the execlists LRC registers. This is
- * required to allow the GuC to restore just the engine state
- * when a watchdog reset occurs.
- * We calculate the engine state size by removing the size of
- * what comes before it in the context image (which is identical
- * on all engines).
- */
- ads_blob_write(ads, ads.eng_state_size[guc_class],
- real_size - xe_lrc_skip_size(xe));
- ads_blob_write(ads, ads.golden_context_lrca[guc_class],
- addr_ggtt);
-
xe_map_memcpy_to(xe, ads_to_map(ads), offset,
gt->default_lrc[class], real_size);
- addr_ggtt += alloc_size;
offset += alloc_size;
}
@@ -959,7 +974,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
{
- guc_populate_golden_lrc(ads);
+ guc_golden_lrc_populate(ads);
}
static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset)
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
index f6d523e4c5fe..9095618648bc 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture.c
+++ b/drivers/gpu/drm/xe/xe_guc_capture.c
@@ -359,7 +359,7 @@ static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
ext->reg = XE_REG(extlist->reg.__reg.addr);
ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1);
- ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
+ ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
ext->regname = extlist->name;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 85215313976c..43b1192ba61c 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -1070,6 +1070,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
SLPC_RESET_EXTENDED_TIMEOUT_MS)) {
xe_gt_err(gt, "GuC PC Start failed: Dynamic GT frequency control and GT sleep states are now disabled.\n");
+ ret = -EIO;
goto out;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 31bc2022bfc2..769781d577df 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -941,7 +941,7 @@ static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
return xe_sched_invalidate_job(job, 2);
}
- ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
+ ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0]));
ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
/*
diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c
index c3cc0fa105e8..57b71956ddf4 100644
--- a/drivers/gpu/drm/xe/xe_hmm.c
+++ b/drivers/gpu/drm/xe/xe_hmm.c
@@ -19,29 +19,6 @@ static u64 xe_npages_in_range(unsigned long start, unsigned long end)
return (end - start) >> PAGE_SHIFT;
}
-/**
- * xe_mark_range_accessed() - mark a range is accessed, so core mm
- * have such information for memory eviction or write back to
- * hard disk
- * @range: the range to mark
- * @write: if write to this range, we mark pages in this range
- * as dirty
- */
-static void xe_mark_range_accessed(struct hmm_range *range, bool write)
-{
- struct page *page;
- u64 i, npages;
-
- npages = xe_npages_in_range(range->start, range->end);
- for (i = 0; i < npages; i++) {
- page = hmm_pfn_to_page(range->hmm_pfns[i]);
- if (write)
- set_page_dirty_lock(page);
-
- mark_page_accessed(page);
- }
-}
-
static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st,
struct hmm_range *range, struct rw_semaphore *notifier_sem)
{
@@ -331,7 +308,6 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
if (ret)
goto out_unlock;
- xe_mark_range_accessed(&hmm_range, write);
userptr->sg = &userptr->sgt;
xe_hmm_userptr_set_mapped(uvma);
userptr->notifier_seq = hmm_range.notifier_seq;
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 8c05fd30b7df..93241fd0a4ba 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -389,12 +389,6 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
blit_cctl_val,
XE_RTP_ACTION_FLAG(ENGINE_BASE)))
},
- /* Use Fixed slice CCS mode */
- { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
- XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
- XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
- RCU_MODE_FIXED_SLICE_CCS_MODE))
- },
/* Disable WMTP if HW doesn't support it */
{ XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
@@ -461,6 +455,12 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ,
XE_RTP_ACTION_FLAG(ENGINE_BASE)))
},
+ /* Use Fixed slice CCS mode */
+ { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
+ XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
+ XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
+ RCU_MODE_FIXED_SLICE_CCS_MODE))
+ },
};
xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), &hwe->reg_sr);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
index b53e8d2accdb..a440442b4d72 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -32,14 +32,61 @@ bool xe_hw_engine_timeout_in_range(u64 timeout, u64 min, u64 max)
return timeout >= min && timeout <= max;
}
-static void kobj_xe_hw_engine_release(struct kobject *kobj)
+static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj)
{
kfree(kobj);
}
+static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct xe_device *xe = kobj_to_xe(kobj);
+ struct kobj_attribute *kattr;
+ ssize_t ret = -EIO;
+
+ kattr = container_of(attr, struct kobj_attribute, attr);
+ if (kattr->show) {
+ xe_pm_runtime_get(xe);
+ ret = kattr->show(kobj, kattr, buf);
+ xe_pm_runtime_put(xe);
+ }
+
+ return ret;
+}
+
+static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct xe_device *xe = kobj_to_xe(kobj);
+ struct kobj_attribute *kattr;
+ ssize_t ret = -EIO;
+
+ kattr = container_of(attr, struct kobj_attribute, attr);
+ if (kattr->store) {
+ xe_pm_runtime_get(xe);
+ ret = kattr->store(kobj, kattr, buf, count);
+ xe_pm_runtime_put(xe);
+ }
+
+ return ret;
+}
+
+static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = {
+ .show = xe_hw_engine_class_sysfs_attr_show,
+ .store = xe_hw_engine_class_sysfs_attr_store,
+};
+
static const struct kobj_type kobj_xe_hw_engine_type = {
- .release = kobj_xe_hw_engine_release,
- .sysfs_ops = &kobj_sysfs_ops
+ .release = xe_hw_engine_sysfs_kobj_release,
+ .sysfs_ops = &xe_hw_engine_class_sysfs_ops,
+};
+
+static const struct kobj_type kobj_xe_hw_engine_type_def = {
+ .release = xe_hw_engine_sysfs_kobj_release,
+ .sysfs_ops = &kobj_sysfs_ops,
};
static ssize_t job_timeout_max_store(struct kobject *kobj,
@@ -543,7 +590,7 @@ static int xe_add_hw_engine_class_defaults(struct xe_device *xe,
if (!kobj)
return -ENOMEM;
- kobject_init(kobj, &kobj_xe_hw_engine_type);
+ kobject_init(kobj, &kobj_xe_hw_engine_type_def);
err = kobject_add(kobj, parent, "%s", ".defaults");
if (err)
goto err_object;
@@ -559,57 +606,6 @@ err_object:
return err;
}
-static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj)
-{
- kfree(kobj);
-}
-
-static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct xe_device *xe = kobj_to_xe(kobj);
- struct kobj_attribute *kattr;
- ssize_t ret = -EIO;
-
- kattr = container_of(attr, struct kobj_attribute, attr);
- if (kattr->show) {
- xe_pm_runtime_get(xe);
- ret = kattr->show(kobj, kattr, buf);
- xe_pm_runtime_put(xe);
- }
-
- return ret;
-}
-
-static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buf,
- size_t count)
-{
- struct xe_device *xe = kobj_to_xe(kobj);
- struct kobj_attribute *kattr;
- ssize_t ret = -EIO;
-
- kattr = container_of(attr, struct kobj_attribute, attr);
- if (kattr->store) {
- xe_pm_runtime_get(xe);
- ret = kattr->store(kobj, kattr, buf, count);
- xe_pm_runtime_put(xe);
- }
-
- return ret;
-}
-
-static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = {
- .show = xe_hw_engine_class_sysfs_attr_show,
- .store = xe_hw_engine_class_sysfs_attr_store,
-};
-
-static const struct kobj_type xe_hw_engine_sysfs_kobj_type = {
- .release = xe_hw_engine_sysfs_kobj_release,
- .sysfs_ops = &xe_hw_engine_class_sysfs_ops,
-};
static void hw_engine_class_sysfs_fini(void *arg)
{
@@ -640,7 +636,7 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
if (!kobj)
return -ENOMEM;
- kobject_init(kobj, &xe_hw_engine_sysfs_kobj_type);
+ kobject_init(kobj, &kobj_xe_hw_engine_type);
err = kobject_add(kobj, gt->sysfs, "engines");
if (err)
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index df3ceddede07..03bfba696b37 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -24,6 +24,7 @@
#include "xe_hw_fence.h"
#include "xe_map.h"
#include "xe_memirq.h"
+#include "xe_mmio.h"
#include "xe_sriov.h"
#include "xe_trace_lrc.h"
#include "xe_vm.h"
@@ -650,6 +651,7 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
#define LRC_PARALLEL_PPHWSP_OFFSET 2048
+#define LRC_ENGINE_ID_PPHWSP_OFFSET 2096
#define LRC_PPHWSP_SIZE SZ_4K
u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
@@ -684,7 +686,7 @@ static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
{
- /* The start seqno is stored in the driver-defined portion of PPHWSP */
+ /* This is stored in the driver-defined portion of PPHWSP */
return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
}
@@ -694,11 +696,21 @@ static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
}
+static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc)
+{
+ return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET;
+}
+
static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
{
return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
}
+static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc)
+{
+ return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32);
+}
+
static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
{
/* Indirect ring state page is at the very end of LRC */
@@ -726,8 +738,10 @@ DECL_MAP_ADDR_HELPERS(regs)
DECL_MAP_ADDR_HELPERS(start_seqno)
DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
DECL_MAP_ADDR_HELPERS(ctx_timestamp)
+DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw)
DECL_MAP_ADDR_HELPERS(parallel)
DECL_MAP_ADDR_HELPERS(indirect_ring)
+DECL_MAP_ADDR_HELPERS(engine_id)
#undef DECL_MAP_ADDR_HELPERS
@@ -743,18 +757,37 @@ u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
}
/**
+ * xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address
+ * @lrc: Pointer to the lrc.
+ *
+ * Returns: ctx timestamp udw GGTT address
+ */
+u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc)
+{
+ return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
+}
+
+/**
* xe_lrc_ctx_timestamp() - Read ctx timestamp value
* @lrc: Pointer to the lrc.
*
* Returns: ctx timestamp value
*/
-u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
+u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
{
struct xe_device *xe = lrc_to_xe(lrc);
struct iosys_map map;
+ u32 ldw, udw = 0;
map = __xe_lrc_ctx_timestamp_map(lrc);
- return xe_map_read32(xe, &map);
+ ldw = xe_map_read32(xe, &map);
+
+ if (xe->info.has_64bit_timestamp) {
+ map = __xe_lrc_ctx_timestamp_udw_map(lrc);
+ udw = xe_map_read32(xe, &map);
+ }
+
+ return (u64)udw << 32 | ldw;
}
/**
@@ -864,7 +897,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe)
static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
{
- u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
+ u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt));
xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
@@ -877,6 +910,65 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
xe_bo_unpin(lrc->bo);
xe_bo_unlock(lrc->bo);
xe_bo_put(lrc->bo);
+ xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo);
+}
+
+/*
+ * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active
+ * context run ticks.
+ * @lrc: Pointer to the lrc.
+ *
+ * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the
+ * context, but only gets updated when the context switches out. In order to
+ * check how long a context has been active before it switches out, two things
+ * are required:
+ *
+ * (1) Determine if the context is running:
+ * To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in
+ * the LRC. The value chosen is 1 since 0 is the initial value when the LRC is
+ * initialized. During a query, we just check for this value to determine if the
+ * context is active. If the context switched out, it would overwrite this
+ * location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as
+ * the last part of context restore, so reusing this LRC location will not
+ * clobber anything.
+ *
+ * (2) Calculate the time that the context has been active for:
+ * The CTX_TIMESTAMP ticks only when the context is active. If a context is
+ * active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization.
+ * While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific
+ * engine instance. Since we do not know which instance the context is running
+ * on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and
+ * store it in the PPHSWP.
+ */
+#define CONTEXT_ACTIVE 1ULL
+static void xe_lrc_setup_utilization(struct xe_lrc *lrc)
+{
+ u32 *cmd;
+
+ cmd = lrc->bb_per_ctx_bo->vmap.vaddr;
+
+ *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
+ *cmd++ = ENGINE_ID(0).addr;
+ *cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc);
+ *cmd++ = 0;
+
+ *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
+ *cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
+ *cmd++ = 0;
+ *cmd++ = lower_32_bits(CONTEXT_ACTIVE);
+
+ if (lrc_to_xe(lrc)->info.has_64bit_timestamp) {
+ *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
+ *cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
+ *cmd++ = 0;
+ *cmd++ = upper_32_bits(CONTEXT_ACTIVE);
+ }
+
+ *cmd++ = MI_BATCH_BUFFER_END;
+
+ xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR,
+ xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1);
+
}
#define PVC_CTX_ASID (0x2e + 1)
@@ -893,31 +985,40 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
void *init_data = NULL;
u32 arb_enable;
u32 lrc_size;
+ u32 bo_flags;
int err;
kref_init(&lrc->refcount);
+ lrc->gt = gt;
lrc->flags = 0;
lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
if (xe_gt_has_indirect_ring_state(gt))
lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
+ bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE;
+
/*
* FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
* via VM bind calls.
*/
lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ bo_flags);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
+ lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
+ ttm_bo_type_kernel,
+ bo_flags);
+ if (IS_ERR(lrc->bb_per_ctx_bo)) {
+ err = PTR_ERR(lrc->bb_per_ctx_bo);
+ goto err_lrc_finish;
+ }
+
lrc->size = lrc_size;
- lrc->tile = gt_to_tile(hwe->gt);
lrc->ring.size = ring_size;
lrc->ring.tail = 0;
- lrc->ctx_timestamp = 0;
xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
hwe->fence_irq, hwe->name);
@@ -990,7 +1091,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
_MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE));
+ lrc->ctx_timestamp = 0;
xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
+ if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
+ xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0);
if (xe->info.has_asid && vm)
xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
@@ -1019,6 +1123,8 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
map = __xe_lrc_start_seqno_map(lrc);
xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
+ xe_lrc_setup_utilization(lrc);
+
return 0;
err_lrc_finish:
@@ -1238,6 +1344,21 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
return __xe_lrc_parallel_map(lrc);
}
+/**
+ * xe_lrc_engine_id() - Read engine id value
+ * @lrc: Pointer to the lrc.
+ *
+ * Returns: context id value
+ */
+static u32 xe_lrc_engine_id(struct xe_lrc *lrc)
+{
+ struct xe_device *xe = lrc_to_xe(lrc);
+ struct iosys_map map;
+
+ map = __xe_lrc_engine_id_map(lrc);
+ return xe_map_read32(xe, &map);
+}
+
static int instr_dw(u32 cmd_header)
{
/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
@@ -1684,7 +1805,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
snapshot->lrc_snapshot = NULL;
- snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
+ snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
return snapshot;
}
@@ -1784,22 +1905,74 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
kfree(snapshot);
}
+static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts)
+{
+ u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id);
+ u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id);
+ struct xe_hw_engine *hwe;
+ u64 val;
+
+ hwe = xe_gt_hw_engine(lrc->gt, class, instance, false);
+ if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe),
+ "Unexpected engine class:instance %d:%d for context utilization\n",
+ class, instance))
+ return -1;
+
+ if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
+ val = xe_mmio_read64_2x32(&hwe->gt->mmio,
+ RING_CTX_TIMESTAMP(hwe->mmio_base));
+ else
+ val = xe_mmio_read32(&hwe->gt->mmio,
+ RING_CTX_TIMESTAMP(hwe->mmio_base));
+
+ *reg_ctx_ts = val;
+
+ return 0;
+}
+
/**
* xe_lrc_update_timestamp() - Update ctx timestamp
* @lrc: Pointer to the lrc.
* @old_ts: Old timestamp value
*
* Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
- * update saved value.
+ * update saved value. With support for active contexts, the calculation may be
+ * slightly racy, so follow a read-again logic to ensure that the context is
+ * still active before returning the right timestamp.
*
* Returns: New ctx timestamp value
*/
-u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
+u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts)
{
+ u64 lrc_ts, reg_ts;
+ u32 engine_id;
+
*old_ts = lrc->ctx_timestamp;
- lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
+ lrc_ts = xe_lrc_ctx_timestamp(lrc);
+ /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */
+ if (IS_SRIOV_VF(lrc_to_xe(lrc))) {
+ lrc->ctx_timestamp = lrc_ts;
+ goto done;
+ }
+
+ if (lrc_ts == CONTEXT_ACTIVE) {
+ engine_id = xe_lrc_engine_id(lrc);
+ if (!get_ctx_timestamp(lrc, engine_id, &reg_ts))
+ lrc->ctx_timestamp = reg_ts;
+
+ /* read lrc again to ensure context is still active */
+ lrc_ts = xe_lrc_ctx_timestamp(lrc);
+ }
+
+ /*
+ * If context switched out, just use the lrc_ts. Note that this needs to
+ * be a separate if condition.
+ */
+ if (lrc_ts != CONTEXT_ACTIVE)
+ lrc->ctx_timestamp = lrc_ts;
+done:
trace_xe_lrc_update_timestamp(lrc, *old_ts);
return lrc->ctx_timestamp;
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index 0b40f349ab95..eb6e8de8c939 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -120,7 +120,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer
void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot);
u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc);
-u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc);
+u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc);
+u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc);
u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc);
u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc);
@@ -136,6 +137,6 @@ u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc);
*
* Returns the current LRC timestamp
*/
-u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts);
+u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts);
#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index 71ecb453f811..ae24cf6f8dd9 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -25,8 +25,8 @@ struct xe_lrc {
/** @size: size of lrc including any indirect ring state page */
u32 size;
- /** @tile: tile which this LRC belongs to */
- struct xe_tile *tile;
+ /** @gt: gt which this LRC belongs to */
+ struct xe_gt *gt;
/** @flags: LRC flags */
#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1
@@ -52,7 +52,10 @@ struct xe_lrc {
struct xe_hw_fence_ctx fence_ctx;
/** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */
- u32 ctx_timestamp;
+ u64 ctx_timestamp;
+
+ /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */
+ struct xe_bo *bb_per_ctx_bo;
};
struct xe_lrc_snapshot;
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index df4282c71bf0..5a3e89022c38 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1177,7 +1177,7 @@ err:
err_sync:
/* Sync partial copies if any. FIXME: job_mutex? */
if (fence) {
- dma_fence_wait(m->fence, false);
+ dma_fence_wait(fence, false);
dma_fence_put(fence);
}
@@ -1547,7 +1547,7 @@ void xe_migrate_wait(struct xe_migrate *m)
static u32 pte_update_cmd_size(u64 size)
{
u32 num_dword;
- u64 entries = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+ u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE);
XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER);
/*
@@ -1558,7 +1558,7 @@ static u32 pte_update_cmd_size(u64 size)
* 2 dword for the page table's physical location
* 2*n dword for value of pte to fill (each pte entry is 2 dwords)
*/
- num_dword = (1 + 2) * DIV_ROUND_UP(entries, 0x1ff);
+ num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff);
num_dword += entries * 2;
return num_dword;
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 9f4632e39a1a..e861c694f336 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -29,9 +29,6 @@ struct xe_modparam xe_modparam = {
module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600);
MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2");
-module_param_named(always_migrate_to_vram, xe_modparam.always_migrate_to_vram, bool, 0444);
-MODULE_PARM_DESC(always_migrate_to_vram, "Always migrate to VRAM on GPU fault");
-
module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444);
MODULE_PARM_DESC(force_execlist, "Force Execlist submission");
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index 84339e509c80..5a3bfea8b7b4 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -12,7 +12,6 @@
struct xe_modparam {
bool force_execlist;
bool probe_display;
- bool always_migrate_to_vram;
u32 force_vram_bar_size;
int guc_log_level;
char *guc_firmware_path;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 818f023166d5..f4d108dc49b1 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -140,6 +140,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
.has_indirect_ring_state = 1, \
.has_range_tlb_invalidation = 1, \
.has_usm = 1, \
+ .has_64bit_timestamp = 1, \
.va_bits = 48, \
.vm_max_level = 4, \
.hw_engine_mask = \
@@ -668,6 +669,7 @@ static int xe_info_init(struct xe_device *xe,
xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
xe->info.has_usm = graphics_desc->has_usm;
+ xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp;
for_each_remote_tile(tile, xe, id) {
int err;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index e9b9bbc138d3..ca6b10d35573 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -21,6 +21,7 @@ struct xe_graphics_desc {
u8 has_indirect_ring_state:1;
u8 has_range_tlb_invalidation:1;
u8 has_usm:1;
+ u8 has_64bit_timestamp:1;
};
struct xe_media_desc {
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index ffaf0d02dc7d..856038553b81 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -2232,11 +2232,19 @@ static void op_commit(struct xe_vm *vm,
}
case DRM_GPUVA_OP_DRIVER:
{
+ /* WRITE_ONCE pairs with READ_ONCE in xe_svm.c */
+
if (op->subop == XE_VMA_SUBOP_MAP_RANGE) {
- op->map_range.range->tile_present |= BIT(tile->id);
- op->map_range.range->tile_invalidated &= ~BIT(tile->id);
+ WRITE_ONCE(op->map_range.range->tile_present,
+ op->map_range.range->tile_present |
+ BIT(tile->id));
+ WRITE_ONCE(op->map_range.range->tile_invalidated,
+ op->map_range.range->tile_invalidated &
+ ~BIT(tile->id));
} else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) {
- op->unmap_range.range->tile_present &= ~BIT(tile->id);
+ WRITE_ONCE(op->unmap_range.range->tile_present,
+ op->unmap_range.range->tile_present &
+ ~BIT(tile->id));
}
break;
}
diff --git a/drivers/gpu/drm/xe/xe_pxp_debugfs.c b/drivers/gpu/drm/xe/xe_pxp_debugfs.c
index ccfbacf08efc..525a2f6bb076 100644
--- a/drivers/gpu/drm/xe/xe_pxp_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_pxp_debugfs.c
@@ -66,9 +66,18 @@ static int pxp_terminate(struct seq_file *m, void *data)
{
struct xe_pxp *pxp = node_to_pxp(m->private);
struct drm_printer p = drm_seq_file_printer(m);
+ int ready = xe_pxp_get_readiness_status(pxp);
- if (!xe_pxp_is_enabled(pxp))
- return -ENODEV;
+ if (ready < 0)
+ return ready; /* disabled or error occurred */
+ else if (!ready)
+ return -EBUSY; /* init still in progress */
+
+ /* no need for a termination if PXP is not active */
+ if (pxp->status != XE_PXP_ACTIVE) {
+ drm_printf(&p, "PXP not active\n");
+ return 0;
+ }
/* simulate a termination interrupt */
spin_lock_irq(&pxp->xe->irq.lock);
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 917fc16de866..bc1689db4cd7 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -137,7 +137,8 @@ emit_pipe_control(u32 *dw, int i, u32 bit_group_0, u32 bit_group_1, u32 offset,
static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
int i)
{
- u32 flags = PIPE_CONTROL_CS_STALL |
+ u32 flags0 = 0;
+ u32 flags1 = PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
@@ -148,11 +149,15 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
PIPE_CONTROL_STORE_DATA_INDEX;
if (invalidate_tlb)
- flags |= PIPE_CONTROL_TLB_INVALIDATE;
+ flags1 |= PIPE_CONTROL_TLB_INVALIDATE;
- flags &= ~mask_flags;
+ flags1 &= ~mask_flags;
- return emit_pipe_control(dw, i, 0, flags, LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0);
+ if (flags1 & PIPE_CONTROL_VF_CACHE_INVALIDATE)
+ flags0 |= PIPE_CONTROL0_L3_READ_ONLY_CACHE_INVALIDATE;
+
+ return emit_pipe_control(dw, i, flags0, flags1,
+ LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0);
}
static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
@@ -229,13 +234,10 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job)
static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i)
{
- dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT |
- MI_COPY_MEM_MEM_DST_GGTT;
+ dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
+ dw[i++] = RING_CTX_TIMESTAMP(0).addr;
dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
dw[i++] = 0;
- dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc);
- dw[i++] = 0;
- dw[i++] = MI_NOOP;
return i;
}
diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c
index 8184390f9c7b..86d47aaf0358 100644
--- a/drivers/gpu/drm/xe/xe_shrinker.c
+++ b/drivers/gpu/drm/xe/xe_shrinker.c
@@ -227,7 +227,7 @@ struct xe_shrinker *xe_shrinker_create(struct xe_device *xe)
if (!shrinker)
return ERR_PTR(-ENOMEM);
- shrinker->shrink = shrinker_alloc(0, "xe system shrinker");
+ shrinker->shrink = shrinker_alloc(0, "drm-xe_gem:%s", xe->drm.unique);
if (!shrinker->shrink) {
kfree(shrinker);
return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 3e829c87d7b4..975094c1a582 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -15,8 +15,17 @@
static bool xe_svm_range_in_vram(struct xe_svm_range *range)
{
- /* Not reliable without notifier lock */
- return range->base.flags.has_devmem_pages;
+ /*
+ * Advisory only check whether the range is currently backed by VRAM
+ * memory.
+ */
+
+ struct drm_gpusvm_range_flags flags = {
+ /* Pairs with WRITE_ONCE in drm_gpusvm.c */
+ .__flags = READ_ONCE(range->base.flags.__flags),
+ };
+
+ return flags.has_devmem_pages;
}
static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range)
@@ -79,7 +88,7 @@ xe_svm_range_alloc(struct drm_gpusvm *gpusvm)
range = kzalloc(sizeof(*range), GFP_KERNEL);
if (!range)
- return ERR_PTR(-ENOMEM);
+ return NULL;
INIT_LIST_HEAD(&range->garbage_collector_link);
xe_vm_get(gpusvm_to_vm(gpusvm));
@@ -645,9 +654,16 @@ void xe_svm_fini(struct xe_vm *vm)
}
static bool xe_svm_range_is_valid(struct xe_svm_range *range,
- struct xe_tile *tile)
+ struct xe_tile *tile,
+ bool devmem_only)
{
- return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id);
+ /*
+ * Advisory only check whether the range currently has a valid mapping,
+ * READ_ONCE pairs with WRITE_ONCE in xe_pt.c
+ */
+ return ((READ_ONCE(range->tile_present) &
+ ~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) &&
+ (!devmem_only || xe_svm_range_in_vram(range));
}
static struct xe_vram_region *tile_to_vr(struct xe_tile *tile)
@@ -696,11 +712,14 @@ retry:
list_for_each_entry(block, blocks, link)
block->private = vr;
+ xe_bo_get(bo);
err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base,
&bo->devmem_allocation, ctx);
- xe_bo_unlock(bo);
if (err)
- xe_bo_put(bo); /* Creation ref */
+ xe_svm_devmem_release(&bo->devmem_allocation);
+
+ xe_bo_unlock(bo);
+ xe_bo_put(bo);
unlock:
mmap_read_unlock(mm);
@@ -709,6 +728,36 @@ unlock:
return err;
}
+static bool supports_4K_migration(struct xe_device *xe)
+{
+ if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+ return false;
+
+ return true;
+}
+
+static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range,
+ struct xe_vma *vma)
+{
+ struct xe_vm *vm = range_to_vm(&range->base);
+ u64 range_size = xe_svm_range_size(range);
+
+ if (!range->base.flags.migrate_devmem)
+ return false;
+
+ if (xe_svm_range_in_vram(range)) {
+ drm_dbg(&vm->xe->drm, "Range is already in VRAM\n");
+ return false;
+ }
+
+ if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) {
+ drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n");
+ return false;
+ }
+
+ return true;
+}
+
/**
* xe_svm_handle_pagefault() - SVM handle page fault
* @vm: The VM.
@@ -732,11 +781,16 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
.check_pages_threshold = IS_DGFX(vm->xe) &&
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0,
+ .devmem_only = atomic && IS_DGFX(vm->xe) &&
+ IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
+ .timeslice_ms = atomic && IS_DGFX(vm->xe) &&
+ IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0,
};
struct xe_svm_range *range;
struct drm_gpusvm_range *r;
struct drm_exec exec;
struct dma_fence *fence;
+ int migrate_try_count = ctx.devmem_only ? 3 : 1;
ktime_t end = 0;
int err;
@@ -755,24 +809,31 @@ retry:
if (IS_ERR(r))
return PTR_ERR(r);
+ if (ctx.devmem_only && !r->flags.migrate_devmem)
+ return -EACCES;
+
range = to_xe_range(r);
- if (xe_svm_range_is_valid(range, tile))
+ if (xe_svm_range_is_valid(range, tile, ctx.devmem_only))
return 0;
range_debug(range, "PAGE FAULT");
- /* XXX: Add migration policy, for now migrate range once */
- if (!range->skip_migrate && range->base.flags.migrate_devmem &&
- xe_svm_range_size(range) >= SZ_64K) {
- range->skip_migrate = true;
-
+ if (--migrate_try_count >= 0 &&
+ xe_svm_range_needs_migrate_to_vram(range, vma)) {
err = xe_svm_alloc_vram(vm, tile, range, &ctx);
+ ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
if (err) {
- drm_dbg(&vm->xe->drm,
- "VRAM allocation failed, falling back to "
- "retrying fault, asid=%u, errno=%pe\n",
- vm->usm.asid, ERR_PTR(err));
- goto retry;
+ if (migrate_try_count || !ctx.devmem_only) {
+ drm_dbg(&vm->xe->drm,
+ "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n",
+ vm->usm.asid, ERR_PTR(err));
+ goto retry;
+ } else {
+ drm_err(&vm->xe->drm,
+ "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n",
+ vm->usm.asid, ERR_PTR(err));
+ return err;
+ }
}
}
@@ -780,15 +841,23 @@ retry:
err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx);
/* Corner where CPU mappings have changed */
if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {
- if (err == -EOPNOTSUPP) {
- range_debug(range, "PAGE FAULT - EVICT PAGES");
- drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base);
+ ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
+ if (migrate_try_count > 0 || !ctx.devmem_only) {
+ if (err == -EOPNOTSUPP) {
+ range_debug(range, "PAGE FAULT - EVICT PAGES");
+ drm_gpusvm_range_evict(&vm->svm.gpusvm,
+ &range->base);
+ }
+ drm_dbg(&vm->xe->drm,
+ "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
+ vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
+ range_debug(range, "PAGE FAULT - RETRY PAGES");
+ goto retry;
+ } else {
+ drm_err(&vm->xe->drm,
+ "Get pages failed, retry count exceeded, asid=%u, gpusvm=%p, errno=%pe\n",
+ vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
}
- drm_dbg(&vm->xe->drm,
- "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
- vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
- range_debug(range, "PAGE FAULT - RETRY PAGES");
- goto retry;
}
if (err) {
range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
@@ -812,6 +881,7 @@ retry_bind:
drm_exec_fini(&exec);
err = PTR_ERR(fence);
if (err == -EAGAIN) {
+ ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
range_debug(range, "PAGE FAULT - RETRY BIND");
goto retry;
}
@@ -822,9 +892,6 @@ retry_bind:
}
drm_exec_fini(&exec);
- if (xe_modparam.always_migrate_to_vram)
- range->skip_migrate = false;
-
dma_fence_wait(fence, false);
dma_fence_put(fence);
@@ -944,3 +1011,15 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
return 0;
}
#endif
+
+/**
+ * xe_svm_flush() - SVM flush
+ * @vm: The VM.
+ *
+ * Flush all SVM actions.
+ */
+void xe_svm_flush(struct xe_vm *vm)
+{
+ if (xe_vm_in_fault_mode(vm))
+ flush_work(&vm->svm.garbage_collector.work);
+}
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index e059590e5076..fe58ac2f4baa 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -36,11 +36,6 @@ struct xe_svm_range {
* range. Protected by GPU SVM notifier lock.
*/
u8 tile_invalidated;
- /**
- * @skip_migrate: Skip migration to VRAM, protected by GPU fault handler
- * locking.
- */
- u8 skip_migrate :1;
};
#if IS_ENABLED(CONFIG_DRM_GPUSVM)
@@ -72,6 +67,9 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end);
int xe_svm_bo_evict(struct xe_bo *bo);
void xe_svm_range_debug(struct xe_svm_range *range, const char *operation);
+
+void xe_svm_flush(struct xe_vm *vm);
+
#else
static inline bool xe_svm_range_pages_valid(struct xe_svm_range *range)
{
@@ -124,6 +122,11 @@ static inline
void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
{
}
+
+static inline void xe_svm_flush(struct xe_vm *vm)
+{
+}
+
#endif
/**
diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.h b/drivers/gpu/drm/xe/xe_trace_lrc.h
index 5c669a0b2180..d525cbee1e34 100644
--- a/drivers/gpu/drm/xe/xe_trace_lrc.h
+++ b/drivers/gpu/drm/xe/xe_trace_lrc.h
@@ -19,12 +19,12 @@
#define __dev_name_lrc(lrc) dev_name(gt_to_xe((lrc)->fence_ctx.gt)->drm.dev)
TRACE_EVENT(xe_lrc_update_timestamp,
- TP_PROTO(struct xe_lrc *lrc, uint32_t old),
+ TP_PROTO(struct xe_lrc *lrc, uint64_t old),
TP_ARGS(lrc, old),
TP_STRUCT__entry(
__field(struct xe_lrc *, lrc)
- __field(u32, old)
- __field(u32, new)
+ __field(u64, old)
+ __field(u64, new)
__string(name, lrc->fence_ctx.name)
__string(device_id, __dev_name_lrc(lrc))
),
@@ -36,7 +36,7 @@ TRACE_EVENT(xe_lrc_update_timestamp,
__assign_str(name);
__assign_str(device_id);
),
- TP_printk("lrc=:%p lrc->name=%s old=%u new=%u device_id:%s",
+ TP_printk("lrc=:%p lrc->name=%s old=%llu new=%llu device_id:%s",
__entry->lrc, __get_str(name),
__entry->old, __entry->new,
__get_str(device_id))
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index c14bd2282044..3a8751a8b92d 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -244,7 +244,7 @@ void xe_uc_gucrc_disable(struct xe_uc *uc)
void xe_uc_stop_prepare(struct xe_uc *uc)
{
- xe_gsc_wait_for_worker_completion(&uc->gsc);
+ xe_gsc_stop_prepare(&uc->gsc);
xe_guc_stop_prepare(&uc->guc);
}
@@ -278,6 +278,12 @@ again:
goto again;
}
+void xe_uc_suspend_prepare(struct xe_uc *uc)
+{
+ xe_gsc_wait_for_worker_completion(&uc->gsc);
+ xe_guc_stop_prepare(&uc->guc);
+}
+
int xe_uc_suspend(struct xe_uc *uc)
{
/* GuC submission not enabled, nothing to do */
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
index 3813c1ede450..c23e6f5e2514 100644
--- a/drivers/gpu/drm/xe/xe_uc.h
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -18,6 +18,7 @@ int xe_uc_reset_prepare(struct xe_uc *uc);
void xe_uc_stop_prepare(struct xe_uc *uc);
void xe_uc_stop(struct xe_uc *uc);
int xe_uc_start(struct xe_uc *uc);
+void xe_uc_suspend_prepare(struct xe_uc *uc);
int xe_uc_suspend(struct xe_uc *uc);
int xe_uc_sanitize_reset(struct xe_uc *uc);
void xe_uc_declare_wedged(struct xe_uc *uc);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 60303998bd61..367c84b90e9e 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3312,8 +3312,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
}
/* Ensure all UNMAPs visible */
- if (xe_vm_in_fault_mode(vm))
- flush_work(&vm->svm.garbage_collector.work);
+ xe_svm_flush(vm);
err = down_write_killable(&vm->lock);
if (err)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 24f644c0a673..2f833f0d575f 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -815,6 +815,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX))
},
+ { XE_RTP_NAME("22021007897"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE))
+ },
/* Xe3_LPG */
{ XE_RTP_NAME("14021490052"),
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 0c738af24f7c..9b9e176992a8 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -32,8 +32,10 @@
GRAPHICS_VERSION(3001)
14022293748 GRAPHICS_VERSION(2001)
GRAPHICS_VERSION(2004)
+ GRAPHICS_VERSION_RANGE(3000, 3001)
22019794406 GRAPHICS_VERSION(2001)
GRAPHICS_VERSION(2004)
+ GRAPHICS_VERSION_RANGE(3000, 3001)
22019338487 MEDIA_VERSION(2000)
GRAPHICS_VERSION(2001)
MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf)