summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon24
-rw-r--r--Documentation/gpu/xe/index.rst1
-rw-r--r--Documentation/gpu/xe/xe_configfs.rst10
-rw-r--r--Documentation/gpu/xe/xe_pcode.rst7
-rw-r--r--drivers/gpu/drm/xe/Kconfig16
-rw-r--r--drivers/gpu/drm/xe/Makefile3
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_abi.h1
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h1
-rw-r--r--drivers/gpu/drm/xe/display/intel_fbdev_fb.c4
-rw-r--r--drivers/gpu/drm/xe/display/xe_plane_initial.c2
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_alu_commands.h79
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h1
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_mi_commands.h5
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h4
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h13
-rw-r--r--drivers/gpu/drm/xe/regs/xe_pcode_regs.h3
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c4
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c9
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c403
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h23
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.c309
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.h7
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_configfs.c250
-rw-r--r--drivers/gpu/drm/xe/xe_configfs.h24
-rw-r--r--drivers/gpu/drm/xe/xe_device.c29
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h32
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.c5
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.c6
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c84
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c30
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c3
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c13
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c20
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c66
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c6
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.c1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c3
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c87
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c5
-rw-r--r--drivers/gpu/drm/xe/xe_guc_debugfs.c148
-rw-r--r--drivers/gpu/drm/xe/xe_guc_engine_activity.c200
-rw-r--r--drivers/gpu/drm/xe/xe_guc_engine_activity.h7
-rw-r--r--drivers/gpu/drm/xe/xe_guc_engine_activity_types.h12
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.c3
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c18
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.h1
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c9
-rw-r--r--drivers/gpu/drm/xe/xe_guc_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_hmm.c24
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c1
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c125
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.c2
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c28
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.c2
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c24
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.c11
-rw-r--r--drivers/gpu/drm/xe/xe_module.c9
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c1
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c29
-rw-r--r--drivers/gpu/drm/xe/xe_pci_sriov.c20
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pcode_api.h3
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c8
-rw-r--r--drivers/gpu/drm/xe/xe_pmu.c77
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c238
-rw-r--r--drivers/gpu/drm/xe/xe_query.c2
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr.c3
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.c3
-rw-r--r--drivers/gpu/drm/xe/xe_sa.c3
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.c69
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.h1
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c23
-rw-r--r--drivers/gpu/drm/xe/xe_svm.h80
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c94
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c39
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_vram.c5
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c12
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules2
-rw-r--r--include/uapi/drm/xe_drm.h6
84 files changed, 2131 insertions, 820 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
index 9bce281314df..adbb9bce15a5 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
@@ -124,3 +124,27 @@ Contact: intel-xe@lists.freedesktop.org
Description: RO. VRAM temperature in millidegree Celsius.
Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan1_input
+Date: March 2025
+KernelVersion: 6.14
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Fan 1 speed in RPM.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan2_input
+Date: March 2025
+KernelVersion: 6.14
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Fan 2 speed in RPM.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan3_input
+Date: March 2025
+KernelVersion: 6.14
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Fan 3 speed in RPM.
+
+ Only supported for particular Intel Xe graphics platforms.
diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst
index 92cfb25e64d3..b2369561f24e 100644
--- a/Documentation/gpu/xe/index.rst
+++ b/Documentation/gpu/xe/index.rst
@@ -25,3 +25,4 @@ DG2, etc is provided to prototype the driver.
xe_debugging
xe_devcoredump
xe-drm-usage-stats.rst
+ xe_configfs
diff --git a/Documentation/gpu/xe/xe_configfs.rst b/Documentation/gpu/xe/xe_configfs.rst
new file mode 100644
index 000000000000..9b9d941eb20e
--- /dev/null
+++ b/Documentation/gpu/xe/xe_configfs.rst
@@ -0,0 +1,10 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+.. _xe_configfs:
+
+============
+Xe Configfs
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_configfs.c
+ :doc: Xe Configfs
diff --git a/Documentation/gpu/xe/xe_pcode.rst b/Documentation/gpu/xe/xe_pcode.rst
index d2e22cc45061..5937ef3599b0 100644
--- a/Documentation/gpu/xe/xe_pcode.rst
+++ b/Documentation/gpu/xe/xe_pcode.rst
@@ -12,3 +12,10 @@ Internal API
.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c
:internal:
+
+==================
+Boot Survivability
+==================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_survivability_mode.c
+ :doc: Xe Boot Survivability
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 5c2f459a2925..9bce047901b2 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -39,7 +39,6 @@ config DRM_XE
select DRM_TTM_HELPER
select DRM_EXEC
select DRM_GPUVM
- select DRM_GPUSVM if !UML && DEVICE_PRIVATE
select DRM_SCHED
select MMU_NOTIFIER
select WANT_DEV_COREDUMP
@@ -74,9 +73,22 @@ config DRM_XE_DP_TUNNEL
If in doubt say "Y".
+config DRM_XE_GPUSVM
+ bool "Enable CPU to GPU address mirroring"
+ depends on DRM_XE
+ depends on !UML
+ depends on DEVICE_PRIVATE
+ default y
+ select DRM_GPUSVM
+ help
+ Enable this option if you want support for CPU to GPU address
+ mirroring.
+
+ If in doubut say "Y".
+
config DRM_XE_DEVMEM_MIRROR
bool "Enable device memory mirror"
- depends on DRM_XE
+ depends on DRM_XE_GPUSVM
select GET_FREE_REGION
default y
help
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index cd464fe26eb8..c5d6681645ed 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -125,12 +125,13 @@ xe-y += xe_bb.o \
xe_wopcm.o
xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o
-xe-$(CONFIG_DRM_GPUSVM) += xe_svm.o
+xe-$(CONFIG_DRM_XE_GPUSVM) += xe_svm.o
# graphics hardware monitoring (HWMON) support
xe-$(CONFIG_HWMON) += xe_hwmon.o
xe-$(CONFIG_PERF_EVENTS) += xe_pmu.o
+xe-$(CONFIG_CONFIGFS_FS) += xe_configfs.o
# graphics virtualization (SR-IOV) support
xe-y += \
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
index ec516e838ee8..448afb86e05c 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -141,6 +141,7 @@ enum xe_guc_action {
XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C,
+ XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER = 0x550D,
XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000,
XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002,
XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003,
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index d633f1c739e4..7de8f827281f 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -367,6 +367,7 @@ enum xe_guc_klv_ids {
GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008,
GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009,
GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO = 0x900a,
+ GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH = 0x900b,
};
#endif
diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
index 3a1e505ff182..267f31697343 100644
--- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
+++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
@@ -45,7 +45,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
NULL, size,
ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
XE_BO_FLAG_STOLEN |
- XE_BO_FLAG_GGTT | XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_GGTT);
if (!IS_ERR(obj))
drm_info(&xe->drm, "Allocated fbdev into stolen\n");
else
@@ -56,7 +56,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, size,
ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
- XE_BO_FLAG_GGTT | XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_GGTT);
}
if (IS_ERR(obj)) {
diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c
index 4ca0cb571194..6502b8274173 100644
--- a/drivers/gpu/drm/xe/display/xe_plane_initial.c
+++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c
@@ -83,7 +83,7 @@ initial_plane_bo(struct xe_device *xe,
if (plane_config->size == 0)
return NULL;
- flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT;
+ flags = XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT;
base = round_down(plane_config->base, page_size);
if (IS_DGFX(xe)) {
diff --git a/drivers/gpu/drm/xe/instructions/xe_alu_commands.h b/drivers/gpu/drm/xe/instructions/xe_alu_commands.h
new file mode 100644
index 000000000000..2987b10d3e16
--- /dev/null
+++ b/drivers/gpu/drm/xe/instructions/xe_alu_commands.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_ALU_COMMANDS_H_
+#define _XE_ALU_COMMANDS_H_
+
+#include "instructions/xe_instr_defs.h"
+
+/* Instruction Opcodes */
+#define CS_ALU_OPCODE_NOOP 0x000
+#define CS_ALU_OPCODE_FENCE_RD 0x001
+#define CS_ALU_OPCODE_FENCE_WR 0x002
+#define CS_ALU_OPCODE_LOAD 0x080
+#define CS_ALU_OPCODE_LOADINV 0x480
+#define CS_ALU_OPCODE_LOAD0 0x081
+#define CS_ALU_OPCODE_LOAD1 0x481
+#define CS_ALU_OPCODE_LOADIND 0x082
+#define CS_ALU_OPCODE_ADD 0x100
+#define CS_ALU_OPCODE_SUB 0x101
+#define CS_ALU_OPCODE_AND 0x102
+#define CS_ALU_OPCODE_OR 0x103
+#define CS_ALU_OPCODE_XOR 0x104
+#define CS_ALU_OPCODE_SHL 0x105
+#define CS_ALU_OPCODE_SHR 0x106
+#define CS_ALU_OPCODE_SAR 0x107
+#define CS_ALU_OPCODE_STORE 0x180
+#define CS_ALU_OPCODE_STOREINV 0x580
+#define CS_ALU_OPCODE_STOREIND 0x181
+
+/* Instruction Operands */
+#define CS_ALU_OPERAND_REG(n) REG_FIELD_PREP(GENMASK(3, 0), (n))
+#define CS_ALU_OPERAND_REG0 0x0
+#define CS_ALU_OPERAND_REG1 0x1
+#define CS_ALU_OPERAND_REG2 0x2
+#define CS_ALU_OPERAND_REG3 0x3
+#define CS_ALU_OPERAND_REG4 0x4
+#define CS_ALU_OPERAND_REG5 0x5
+#define CS_ALU_OPERAND_REG6 0x6
+#define CS_ALU_OPERAND_REG7 0x7
+#define CS_ALU_OPERAND_REG8 0x8
+#define CS_ALU_OPERAND_REG9 0x9
+#define CS_ALU_OPERAND_REG10 0xa
+#define CS_ALU_OPERAND_REG11 0xb
+#define CS_ALU_OPERAND_REG12 0xc
+#define CS_ALU_OPERAND_REG13 0xd
+#define CS_ALU_OPERAND_REG14 0xe
+#define CS_ALU_OPERAND_REG15 0xf
+#define CS_ALU_OPERAND_SRCA 0x20
+#define CS_ALU_OPERAND_SRCB 0x21
+#define CS_ALU_OPERAND_ACCU 0x31
+#define CS_ALU_OPERAND_ZF 0x32
+#define CS_ALU_OPERAND_CF 0x33
+#define CS_ALU_OPERAND_NA 0 /* N/A operand */
+
+/* Command Streamer ALU Instructions */
+#define CS_ALU_INSTR(opcode, op1, op2) (REG_FIELD_PREP(GENMASK(31, 20), (opcode)) | \
+ REG_FIELD_PREP(GENMASK(19, 10), (op1)) | \
+ REG_FIELD_PREP(GENMASK(9, 0), (op2)))
+
+#define __CS_ALU_INSTR(opcode, op1, op2) CS_ALU_INSTR(CS_ALU_OPCODE_##opcode, \
+ CS_ALU_OPERAND_##op1, \
+ CS_ALU_OPERAND_##op2)
+
+#define CS_ALU_INSTR_NOOP __CS_ALU_INSTR(NOOP, NA, NA)
+#define CS_ALU_INSTR_LOAD(op1, op2) __CS_ALU_INSTR(LOAD, op1, op2)
+#define CS_ALU_INSTR_LOADINV(op1, op2) __CS_ALU_INSTR(LOADINV, op1, op2)
+#define CS_ALU_INSTR_LOAD0(op1) __CS_ALU_INSTR(LOAD0, op1, NA)
+#define CS_ALU_INSTR_LOAD1(op1) __CS_ALU_INSTR(LOAD1, op1, NA)
+#define CS_ALU_INSTR_ADD __CS_ALU_INSTR(ADD, NA, NA)
+#define CS_ALU_INSTR_SUB __CS_ALU_INSTR(SUB, NA, NA)
+#define CS_ALU_INSTR_AND __CS_ALU_INSTR(AND, NA, NA)
+#define CS_ALU_INSTR_OR __CS_ALU_INSTR(OR, NA, NA)
+#define CS_ALU_INSTR_XOR __CS_ALU_INSTR(XOR, NA, NA)
+#define CS_ALU_INSTR_STORE(op1, op2) __CS_ALU_INSTR(STORE, op1, op2)
+#define CS_ALU_INSTR_STOREINV(op1, op2) __CS_ALU_INSTR(STOREINV, op1, op2)
+
+#endif
diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
index 31d28a67ef6a..457881af8af9 100644
--- a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
@@ -137,6 +137,7 @@
#define CMD_3DSTATE_CLIP_MESH GFXPIPE_3D_CMD(0x0, 0x81)
#define CMD_3DSTATE_SBE_MESH GFXPIPE_3D_CMD(0x0, 0x82)
#define CMD_3DSTATE_CPSIZE_CONTROL_BUFFER GFXPIPE_3D_CMD(0x0, 0x83)
+#define CMD_3DSTATE_COARSE_PIXEL GFXPIPE_3D_CMD(0x0, 0x89)
#define CMD_3DSTATE_DRAWING_RECTANGLE GFXPIPE_3D_CMD(0x1, 0x0)
#define CMD_3DSTATE_CHROMA_KEY GFXPIPE_3D_CMD(0x1, 0x4)
diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
index 167fb0f742de..eba582058d55 100644
--- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
@@ -32,6 +32,7 @@
#define MI_BATCH_BUFFER_END __MI_INSTR(0xA)
#define MI_TOPOLOGY_FILTER __MI_INSTR(0xD)
#define MI_FORCE_WAKEUP __MI_INSTR(0x1D)
+#define MI_MATH(n) (__MI_INSTR(0x1A) | XE_INSTR_NUM_DW((n) + 1))
#define MI_STORE_DATA_IMM __MI_INSTR(0x20)
#define MI_SDI_GGTT REG_BIT(22)
@@ -61,6 +62,10 @@
#define MI_LOAD_REGISTER_MEM (__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4))
#define MI_LRM_USE_GGTT REG_BIT(22)
+#define MI_LOAD_REGISTER_REG (__MI_INSTR(0x2a) | XE_INSTR_NUM_DW(3))
+#define MI_LRR_DST_CS_MMIO REG_BIT(19)
+#define MI_LRR_SRC_CS_MMIO REG_BIT(18)
+
#define MI_COPY_MEM_MEM (__MI_INSTR(0x2e) | XE_INSTR_NUM_DW(5))
#define MI_COPY_MEM_MEM_SRC_GGTT REG_BIT(22)
#define MI_COPY_MEM_MEM_DST_GGTT REG_BIT(21)
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index fb8ec317b6ee..da713634d6a0 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -188,6 +188,10 @@
#define PREEMPT_GPGPU_LEVEL_MASK PREEMPT_GPGPU_LEVEL(1, 1)
#define PREEMPT_3D_OBJECT_LEVEL REG_BIT(0)
+#define CS_GPR_DATA(base, n) XE_REG((base) + 0x600 + (n) * 4)
+#define CS_GPR_REG(base, n) CS_GPR_DATA((base), (n) * 2)
+#define CS_GPR_REG_UDW(base, n) CS_GPR_DATA((base), (n) * 2 + 1)
+
#define VDBOX_CGCTL3F08(base) XE_REG((base) + 0x3f08)
#define CG3DDISHRS_CLKGATE_DIS REG_BIT(5)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index da1f198ac107..cbb9f7cbcfc0 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -62,7 +62,6 @@
#define LE_SSE_MASK REG_GENMASK(18, 17)
#define LE_SSE(value) REG_FIELD_PREP(LE_SSE_MASK, value)
#define LE_COS_MASK REG_GENMASK(16, 15)
-#define LE_COS(value) REG_FIELD_PREP(LE_COS_MASK)
#define LE_SCF_MASK REG_BIT(14)
#define LE_SCF(value) REG_FIELD_PREP(LE_SCF_MASK, value)
#define LE_PFM_MASK REG_GENMASK(13, 11)
@@ -392,6 +391,18 @@
#define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4)
#define XEHP_LNESPARE REG_BIT(19)
+#define LSN_VC_REG2 XE_REG_MCR(0xb0c8)
+#define LSN_LNI_WGT_MASK REG_GENMASK(31, 28)
+#define LSN_LNI_WGT(value) REG_FIELD_PREP(LSN_LNI_WGT_MASK, value)
+#define LSN_LNE_WGT_MASK REG_GENMASK(27, 24)
+#define LSN_LNE_WGT(value) REG_FIELD_PREP(LSN_LNE_WGT_MASK, value)
+#define LSN_DIM_X_WGT_MASK REG_GENMASK(23, 20)
+#define LSN_DIM_X_WGT(value) REG_FIELD_PREP(LSN_DIM_X_WGT_MASK, value)
+#define LSN_DIM_Y_WGT_MASK REG_GENMASK(19, 16)
+#define LSN_DIM_Y_WGT(value) REG_FIELD_PREP(LSN_DIM_Y_WGT_MASK, value)
+#define LSN_DIM_Z_WGT_MASK REG_GENMASK(15, 12)
+#define LSN_DIM_Z_WGT(value) REG_FIELD_PREP(LSN_DIM_Z_WGT_MASK, value)
+
#define L3SQCREG2 XE_REG_MCR(0xb104)
#define COMPMEMRD256BOVRFETCHEN REG_BIT(20)
diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
index 8846eb9ce2a4..c7d5d782e3f9 100644
--- a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
@@ -21,6 +21,9 @@
#define BMG_PACKAGE_POWER_SKU XE_REG(0x138098)
#define BMG_PACKAGE_POWER_SKU_UNIT XE_REG(0x1380dc)
#define BMG_PACKAGE_ENERGY_STATUS XE_REG(0x138120)
+#define BMG_FAN_1_SPEED XE_REG(0x138140)
+#define BMG_FAN_2_SPEED XE_REG(0x138170)
+#define BMG_FAN_3_SPEED XE_REG(0x1381a0)
#define BMG_VRAM_TEMPERATURE XE_REG(0x1382c0)
#define BMG_PACKAGE_TEMPERATURE XE_REG(0x138434)
#define BMG_PACKAGE_RAPL_LIMIT XE_REG(0x138440)
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 9fde67ca989f..230eb824550f 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -252,7 +252,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
for_each_gt(__gt, xe, id)
xe_gt_sanitize(__gt);
- err = xe_bo_restore_kernel(xe);
+ err = xe_bo_restore_early(xe);
/*
* Snapshotting the CTB and copying back a potentially old
* version seems risky, depending on what might have been
@@ -273,7 +273,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
goto cleanup_all;
}
- err = xe_bo_restore_user(xe);
+ err = xe_bo_restore_late(xe);
if (err) {
KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err));
goto cleanup_all;
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index d5fe0ea889ad..52f89476bf62 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -202,8 +202,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_VRAM_IF_DGFX(tile));
if (IS_ERR(big)) {
KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big));
goto vunmap;
@@ -211,8 +210,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_VRAM_IF_DGFX(tile));
if (IS_ERR(pt)) {
KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
PTR_ERR(pt));
@@ -222,8 +220,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
tiny = xe_bo_create_pin_map(xe, tile, m->q->vm,
2 * SZ_4K,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_VRAM_IF_DGFX(tile));
if (IS_ERR(tiny)) {
KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n",
PTR_ERR(tiny));
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 32a5a6390f62..89ab502a3849 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -55,6 +55,8 @@ static struct ttm_placement sys_placement = {
.placement = &sys_placement_flags,
};
+static struct ttm_placement purge_placement;
+
static const struct ttm_place tt_placement_flags[] = {
{
.fpfn = 0,
@@ -189,11 +191,18 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
static bool force_contiguous(u32 bo_flags)
{
+ if (bo_flags & XE_BO_FLAG_STOLEN)
+ return true; /* users expect this */
+ else if (bo_flags & XE_BO_FLAG_PINNED &&
+ !(bo_flags & XE_BO_FLAG_PINNED_LATE_RESTORE))
+ return true; /* needs vmap */
+
/*
* For eviction / restore on suspend / resume objects pinned in VRAM
* must be contiguous, also only contiguous BOs support xe_bo_vmap.
*/
- return bo_flags & (XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT);
+ return bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS &&
+ bo_flags & XE_BO_FLAG_PINNED;
}
static void add_vram(struct xe_device *xe, struct xe_bo *bo,
@@ -281,6 +290,8 @@ int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
static void xe_evict_flags(struct ttm_buffer_object *tbo,
struct ttm_placement *placement)
{
+ struct xe_device *xe = container_of(tbo->bdev, typeof(*xe), ttm);
+ bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
struct xe_bo *bo;
if (!xe_bo_is_xe_bo(tbo)) {
@@ -290,7 +301,7 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
return;
}
- *placement = sys_placement;
+ *placement = device_unplugged ? purge_placement : sys_placement;
return;
}
@@ -300,6 +311,11 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
return;
}
+ if (device_unplugged && !tbo->base.dma_buf) {
+ *placement = purge_placement;
+ return;
+ }
+
/*
* For xe, sg bos that are evicted to system just triggers a
* rebind of the sg list upon subsequent validation to XE_PL_TT.
@@ -657,11 +673,20 @@ static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
ttm);
struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+ bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
struct sg_table *sg;
xe_assert(xe, attach);
xe_assert(xe, ttm_bo->ttm);
+ if (device_unplugged && new_res->mem_type == XE_PL_SYSTEM &&
+ ttm_bo->sg) {
+ dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
+ dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
+ ttm_bo->sg = NULL;
+ }
+
if (new_res->mem_type == XE_PL_SYSTEM)
goto out;
@@ -898,79 +923,44 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
xe_pm_runtime_get_noresume(xe);
}
- if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
- /*
- * Kernel memory that is pinned should only be moved on suspend
- * / resume, some of the pinned memory is required for the
- * device to resume / use the GPU to move other evicted memory
- * (user memory) around. This likely could be optimized a bit
- * further where we find the minimum set of pinned memory
- * required for resume but for simplity doing a memcpy for all
- * pinned memory.
- */
- ret = xe_bo_vmap(bo);
- if (!ret) {
- ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
-
- /* Create a new VMAP once kernel BO back in VRAM */
- if (!ret && resource_is_vram(new_mem)) {
- struct xe_vram_region *vram = res_to_mem_region(new_mem);
- void __iomem *new_addr = vram->mapping +
- (new_mem->start << PAGE_SHIFT);
-
- if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
- ret = -EINVAL;
- xe_pm_runtime_put(xe);
- goto out;
- }
+ if (move_lacks_source) {
+ u32 flags = 0;
- xe_assert(xe, new_mem->start ==
- bo->placements->fpfn);
+ if (mem_type_is_vram(new_mem->mem_type))
+ flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
+ else if (handle_system_ccs)
+ flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
- iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
- }
- }
+ fence = xe_migrate_clear(migrate, bo, new_mem, flags);
} else {
- if (move_lacks_source) {
- u32 flags = 0;
-
- if (mem_type_is_vram(new_mem->mem_type))
- flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
- else if (handle_system_ccs)
- flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
-
- fence = xe_migrate_clear(migrate, bo, new_mem, flags);
- }
- else
- fence = xe_migrate_copy(migrate, bo, bo, old_mem,
- new_mem, handle_system_ccs);
- if (IS_ERR(fence)) {
- ret = PTR_ERR(fence);
- xe_pm_runtime_put(xe);
- goto out;
- }
- if (!move_lacks_source) {
- ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
- true, new_mem);
- if (ret) {
- dma_fence_wait(fence, false);
- ttm_bo_move_null(ttm_bo, new_mem);
- ret = 0;
- }
- } else {
- /*
- * ttm_bo_move_accel_cleanup() may blow up if
- * bo->resource == NULL, so just attach the
- * fence and set the new resource.
- */
- dma_resv_add_fence(ttm_bo->base.resv, fence,
- DMA_RESV_USAGE_KERNEL);
+ fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem,
+ handle_system_ccs);
+ }
+ if (IS_ERR(fence)) {
+ ret = PTR_ERR(fence);
+ xe_pm_runtime_put(xe);
+ goto out;
+ }
+ if (!move_lacks_source) {
+ ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true,
+ new_mem);
+ if (ret) {
+ dma_fence_wait(fence, false);
ttm_bo_move_null(ttm_bo, new_mem);
+ ret = 0;
}
-
- dma_fence_put(fence);
+ } else {
+ /*
+ * ttm_bo_move_accel_cleanup() may blow up if
+ * bo->resource == NULL, so just attach the
+ * fence and set the new resource.
+ */
+ dma_resv_add_fence(ttm_bo->base.resv, fence,
+ DMA_RESV_USAGE_KERNEL);
+ ttm_bo_move_null(ttm_bo, new_mem);
}
+ dma_fence_put(fence);
xe_pm_runtime_put(xe);
out:
@@ -1107,59 +1097,93 @@ out_unref:
*/
int xe_bo_evict_pinned(struct xe_bo *bo)
{
- struct ttm_place place = {
- .mem_type = XE_PL_TT,
- };
- struct ttm_placement placement = {
- .placement = &place,
- .num_placement = 1,
- };
- struct ttm_operation_ctx ctx = {
- .interruptible = false,
- .gfp_retry_mayfail = true,
- };
- struct ttm_resource *new_mem;
- int ret;
+ struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
+ struct xe_bo *backup;
+ bool unmap = false;
+ int ret = 0;
- xe_bo_assert_held(bo);
+ xe_bo_lock(bo, false);
- if (WARN_ON(!bo->ttm.resource))
- return -EINVAL;
+ if (WARN_ON(!bo->ttm.resource)) {
+ ret = -EINVAL;
+ goto out_unlock_bo;
+ }
- if (WARN_ON(!xe_bo_is_pinned(bo)))
- return -EINVAL;
+ if (WARN_ON(!xe_bo_is_pinned(bo))) {
+ ret = -EINVAL;
+ goto out_unlock_bo;
+ }
if (!xe_bo_is_vram(bo))
- return 0;
+ goto out_unlock_bo;
- ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
- if (ret)
- return ret;
+ if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
+ goto out_unlock_bo;
- if (!bo->ttm.ttm) {
- bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
- if (!bo->ttm.ttm) {
- ret = -ENOMEM;
- goto err_res_free;
- }
+ backup = xe_bo_create_locked(xe, NULL, NULL, bo->size, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
+ XE_BO_FLAG_PINNED);
+ if (IS_ERR(backup)) {
+ ret = PTR_ERR(backup);
+ goto out_unlock_bo;
}
- ret = ttm_bo_populate(&bo->ttm, &ctx);
- if (ret)
- goto err_res_free;
+ if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
+ struct xe_migrate *migrate;
+ struct dma_fence *fence;
- ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
- if (ret)
- goto err_res_free;
+ if (bo->tile)
+ migrate = bo->tile->migrate;
+ else
+ migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
- ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
- if (ret)
- goto err_res_free;
+ ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
+ if (ret)
+ goto out_backup;
- return 0;
+ ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
+ if (ret)
+ goto out_backup;
-err_res_free:
- ttm_resource_free(&bo->ttm, &new_mem);
+ fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource,
+ backup->ttm.resource, false);
+ if (IS_ERR(fence)) {
+ ret = PTR_ERR(fence);
+ goto out_backup;
+ }
+
+ dma_resv_add_fence(bo->ttm.base.resv, fence,
+ DMA_RESV_USAGE_KERNEL);
+ dma_resv_add_fence(backup->ttm.base.resv, fence,
+ DMA_RESV_USAGE_KERNEL);
+ dma_fence_put(fence);
+ } else {
+ ret = xe_bo_vmap(backup);
+ if (ret)
+ goto out_backup;
+
+ if (iosys_map_is_null(&bo->vmap)) {
+ ret = xe_bo_vmap(bo);
+ if (ret)
+ goto out_backup;
+ unmap = true;
+ }
+
+ xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0,
+ bo->size);
+ }
+
+ bo->backup_obj = backup;
+
+out_backup:
+ xe_bo_vunmap(backup);
+ xe_bo_unlock(backup);
+ if (ret)
+ xe_bo_put(backup);
+out_unlock_bo:
+ if (unmap)
+ xe_bo_vunmap(bo);
+ xe_bo_unlock(bo);
return ret;
}
@@ -1180,50 +1204,110 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
.interruptible = false,
.gfp_retry_mayfail = false,
};
- struct ttm_resource *new_mem;
- struct ttm_place *place = &bo->placements[0];
+ struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
+ struct xe_bo *backup = bo->backup_obj;
+ bool unmap = false;
int ret;
- xe_bo_assert_held(bo);
+ if (!backup)
+ return 0;
- if (WARN_ON(!bo->ttm.resource))
- return -EINVAL;
+ xe_bo_lock(backup, false);
- if (WARN_ON(!xe_bo_is_pinned(bo)))
- return -EINVAL;
+ ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx);
+ if (ret)
+ goto out_backup;
- if (WARN_ON(xe_bo_is_vram(bo)))
- return -EINVAL;
+ if (WARN_ON(!dma_resv_trylock(bo->ttm.base.resv))) {
+ ret = -EBUSY;
+ goto out_backup;
+ }
- if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo)))
- return -EINVAL;
+ if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
+ struct xe_migrate *migrate;
+ struct dma_fence *fence;
- if (!mem_type_is_vram(place->mem_type))
- return 0;
+ if (bo->tile)
+ migrate = bo->tile->migrate;
+ else
+ migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
- ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
- if (ret)
- return ret;
+ ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
+ if (ret)
+ goto out_unlock_bo;
- ret = ttm_bo_populate(&bo->ttm, &ctx);
- if (ret)
- goto err_res_free;
+ ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
+ if (ret)
+ goto out_unlock_bo;
- ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
- if (ret)
- goto err_res_free;
+ fence = xe_migrate_copy(migrate, backup, bo,
+ backup->ttm.resource, bo->ttm.resource,
+ false);
+ if (IS_ERR(fence)) {
+ ret = PTR_ERR(fence);
+ goto out_unlock_bo;
+ }
- ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
- if (ret)
- goto err_res_free;
+ dma_resv_add_fence(bo->ttm.base.resv, fence,
+ DMA_RESV_USAGE_KERNEL);
+ dma_resv_add_fence(backup->ttm.base.resv, fence,
+ DMA_RESV_USAGE_KERNEL);
+ dma_fence_put(fence);
+ } else {
+ ret = xe_bo_vmap(backup);
+ if (ret)
+ goto out_unlock_bo;
- return 0;
+ if (iosys_map_is_null(&bo->vmap)) {
+ ret = xe_bo_vmap(bo);
+ if (ret)
+ goto out_unlock_bo;
+ unmap = true;
+ }
-err_res_free:
- ttm_resource_free(&bo->ttm, &new_mem);
+ xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr,
+ bo->size);
+ }
+
+ bo->backup_obj = NULL;
+
+out_unlock_bo:
+ if (unmap)
+ xe_bo_vunmap(bo);
+ xe_bo_unlock(bo);
+out_backup:
+ xe_bo_vunmap(backup);
+ xe_bo_unlock(backup);
+ if (!bo->backup_obj)
+ xe_bo_put(backup);
return ret;
}
+int xe_bo_dma_unmap_pinned(struct xe_bo *bo)
+{
+ struct ttm_buffer_object *ttm_bo = &bo->ttm;
+ struct ttm_tt *tt = ttm_bo->ttm;
+
+ if (tt) {
+ struct xe_ttm_tt *xe_tt = container_of(tt, typeof(*xe_tt), ttm);
+
+ if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
+ dma_buf_unmap_attachment(ttm_bo->base.import_attach,
+ ttm_bo->sg,
+ DMA_BIDIRECTIONAL);
+ ttm_bo->sg = NULL;
+ xe_tt->sg = NULL;
+ } else if (xe_tt->sg) {
+ dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg,
+ DMA_BIDIRECTIONAL, 0);
+ sg_free_table(xe_tt->sg);
+ xe_tt->sg = NULL;
+ }
+ }
+
+ return 0;
+}
+
static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
unsigned long page_offset)
{
@@ -1947,7 +2031,7 @@ struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
flags |= XE_BO_FLAG_GGTT;
bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
- flags | XE_BO_FLAG_NEEDS_CPU_ACCESS,
+ flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED,
alignment);
if (IS_ERR(bo))
return bo;
@@ -2049,7 +2133,8 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str
struct xe_bo *bo;
u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
- dst_flags |= (*src)->flags & XE_BO_FLAG_GGTT_INVALIDATE;
+ dst_flags |= (*src)->flags & (XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_PINNED_NORESTORE);
xe_assert(xe, IS_DGFX(xe));
xe_assert(xe, !(*src)->vmap.is_iomem);
@@ -2073,10 +2158,16 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res)
{
struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
- if (res->mem_type == XE_PL_STOLEN)
+ switch (res->mem_type) {
+ case XE_PL_STOLEN:
return xe_ttm_stolen_gpu_offset(xe);
-
- return res_to_mem_region(res)->dpa_base;
+ case XE_PL_TT:
+ case XE_PL_SYSTEM:
+ return 0;
+ default:
+ return res_to_mem_region(res)->dpa_base;
+ }
+ return 0;
}
/**
@@ -2102,12 +2193,9 @@ int xe_bo_pin_external(struct xe_bo *bo)
if (err)
return err;
- if (xe_bo_is_vram(bo)) {
- spin_lock(&xe->pinned.lock);
- list_add_tail(&bo->pinned_link,
- &xe->pinned.external_vram);
- spin_unlock(&xe->pinned.lock);
- }
+ spin_lock(&xe->pinned.lock);
+ list_add_tail(&bo->pinned_link, &xe->pinned.late.external);
+ spin_unlock(&xe->pinned.lock);
}
ttm_bo_pin(&bo->ttm);
@@ -2149,25 +2237,12 @@ int xe_bo_pin(struct xe_bo *bo)
if (err)
return err;
- /*
- * For pinned objects in on DGFX, which are also in vram, we expect
- * these to be in contiguous VRAM memory. Required eviction / restore
- * during suspend / resume (force restore to same physical address).
- */
- if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
- bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
- if (mem_type_is_vram(place->mem_type)) {
- xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
-
- place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
- vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
- place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
- }
- }
-
if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
spin_lock(&xe->pinned.lock);
- list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+ if (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)
+ list_add_tail(&bo->pinned_link, &xe->pinned.late.kernel_bo_present);
+ else
+ list_add_tail(&bo->pinned_link, &xe->pinned.early.kernel_bo_present);
spin_unlock(&xe->pinned.lock);
}
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 1a3734d9f8be..b39b46570808 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -39,20 +39,23 @@
#define XE_BO_FLAG_NEEDS_64K BIT(15)
#define XE_BO_FLAG_NEEDS_2M BIT(16)
#define XE_BO_FLAG_GGTT_INVALIDATE BIT(17)
-#define XE_BO_FLAG_GGTT0 BIT(18)
-#define XE_BO_FLAG_GGTT1 BIT(19)
-#define XE_BO_FLAG_GGTT2 BIT(20)
-#define XE_BO_FLAG_GGTT3 BIT(21)
-#define XE_BO_FLAG_GGTT_ALL (XE_BO_FLAG_GGTT0 | \
- XE_BO_FLAG_GGTT1 | \
- XE_BO_FLAG_GGTT2 | \
- XE_BO_FLAG_GGTT3)
-#define XE_BO_FLAG_CPU_ADDR_MIRROR BIT(22)
+#define XE_BO_FLAG_PINNED_NORESTORE BIT(18)
+#define XE_BO_FLAG_PINNED_LATE_RESTORE BIT(19)
+#define XE_BO_FLAG_GGTT0 BIT(20)
+#define XE_BO_FLAG_GGTT1 BIT(21)
+#define XE_BO_FLAG_GGTT2 BIT(22)
+#define XE_BO_FLAG_GGTT3 BIT(23)
+#define XE_BO_FLAG_CPU_ADDR_MIRROR BIT(24)
/* this one is trigger internally only */
#define XE_BO_FLAG_INTERNAL_TEST BIT(30)
#define XE_BO_FLAG_INTERNAL_64K BIT(31)
+#define XE_BO_FLAG_GGTT_ALL (XE_BO_FLAG_GGTT0 | \
+ XE_BO_FLAG_GGTT1 | \
+ XE_BO_FLAG_GGTT2 | \
+ XE_BO_FLAG_GGTT3)
+
#define XE_BO_FLAG_GGTTx(tile) \
(XE_BO_FLAG_GGTT0 << (tile)->id)
@@ -276,6 +279,8 @@ int xe_bo_evict(struct xe_bo *bo);
int xe_bo_evict_pinned(struct xe_bo *bo);
int xe_bo_restore_pinned(struct xe_bo *bo);
+int xe_bo_dma_unmap_pinned(struct xe_bo *bo);
+
extern const struct ttm_device_funcs xe_ttm_funcs;
extern const char *const xe_mem_type_to_name[];
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 6a40eedd9db1..2bf74eb7f281 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -10,6 +10,42 @@
#include "xe_ggtt.h"
#include "xe_tile.h"
+typedef int (*xe_pinned_fn)(struct xe_bo *bo);
+
+static int xe_bo_apply_to_pinned(struct xe_device *xe,
+ struct list_head *pinned_list,
+ struct list_head *new_list,
+ const xe_pinned_fn pinned_fn)
+{
+ LIST_HEAD(still_in_list);
+ struct xe_bo *bo;
+ int ret = 0;
+
+ spin_lock(&xe->pinned.lock);
+ while (!ret) {
+ bo = list_first_entry_or_null(pinned_list, typeof(*bo),
+ pinned_link);
+ if (!bo)
+ break;
+ xe_bo_get(bo);
+ list_move_tail(&bo->pinned_link, &still_in_list);
+ spin_unlock(&xe->pinned.lock);
+
+ ret = pinned_fn(bo);
+ if (ret && pinned_list != new_list) {
+ spin_lock(&xe->pinned.lock);
+ list_move(&bo->pinned_link, pinned_list);
+ spin_unlock(&xe->pinned.lock);
+ }
+ xe_bo_put(bo);
+ spin_lock(&xe->pinned.lock);
+ }
+ list_splice_tail(&still_in_list, new_list);
+ spin_unlock(&xe->pinned.lock);
+
+ return ret;
+}
+
/**
* xe_bo_evict_all - evict all BOs from VRAM
*
@@ -27,9 +63,7 @@
int xe_bo_evict_all(struct xe_device *xe)
{
struct ttm_device *bdev = &xe->ttm;
- struct xe_bo *bo;
struct xe_tile *tile;
- struct list_head still_in_list;
u32 mem_type;
u8 id;
int ret;
@@ -57,34 +91,13 @@ int xe_bo_evict_all(struct xe_device *xe)
}
}
- /* Pinned user memory in VRAM */
- INIT_LIST_HEAD(&still_in_list);
- spin_lock(&xe->pinned.lock);
- for (;;) {
- bo = list_first_entry_or_null(&xe->pinned.external_vram,
- typeof(*bo), pinned_link);
- if (!bo)
- break;
- xe_bo_get(bo);
- list_move_tail(&bo->pinned_link, &still_in_list);
- spin_unlock(&xe->pinned.lock);
-
- xe_bo_lock(bo, false);
- ret = xe_bo_evict_pinned(bo);
- xe_bo_unlock(bo);
- xe_bo_put(bo);
- if (ret) {
- spin_lock(&xe->pinned.lock);
- list_splice_tail(&still_in_list,
- &xe->pinned.external_vram);
- spin_unlock(&xe->pinned.lock);
- return ret;
- }
+ ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external,
+ &xe->pinned.late.external,
+ xe_bo_evict_pinned);
- spin_lock(&xe->pinned.lock);
- }
- list_splice_tail(&still_in_list, &xe->pinned.external_vram);
- spin_unlock(&xe->pinned.lock);
+ if (!ret)
+ ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present,
+ &xe->pinned.late.evicted, xe_bo_evict_pinned);
/*
* Wait for all user BO to be evicted as those evictions depend on the
@@ -93,32 +106,49 @@ int xe_bo_evict_all(struct xe_device *xe)
for_each_tile(tile, xe, id)
xe_tile_migrate_wait(tile);
- spin_lock(&xe->pinned.lock);
- for (;;) {
- bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present,
- typeof(*bo), pinned_link);
- if (!bo)
- break;
- xe_bo_get(bo);
- list_move_tail(&bo->pinned_link, &xe->pinned.evicted);
- spin_unlock(&xe->pinned.lock);
+ if (ret)
+ return ret;
- xe_bo_lock(bo, false);
- ret = xe_bo_evict_pinned(bo);
- xe_bo_unlock(bo);
- xe_bo_put(bo);
- if (ret)
- return ret;
+ return xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present,
+ &xe->pinned.early.evicted,
+ xe_bo_evict_pinned);
+}
- spin_lock(&xe->pinned.lock);
+static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo)
+{
+ struct xe_device *xe = xe_bo_device(bo);
+ int ret;
+
+ ret = xe_bo_restore_pinned(bo);
+ if (ret)
+ return ret;
+
+ if (bo->flags & XE_BO_FLAG_GGTT) {
+ struct xe_tile *tile;
+ u8 id;
+
+ for_each_tile(tile, xe_bo_device(bo), id) {
+ if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile)))
+ continue;
+
+ mutex_lock(&tile->mem.ggtt->lock);
+ xe_ggtt_map_bo(tile->mem.ggtt, bo);
+ mutex_unlock(&tile->mem.ggtt->lock);
+ }
}
- spin_unlock(&xe->pinned.lock);
+
+ /*
+ * We expect validate to trigger a move VRAM and our move code
+ * should setup the iosys map.
+ */
+ xe_assert(xe, !(bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) ||
+ !iosys_map_is_null(&bo->vmap));
return 0;
}
/**
- * xe_bo_restore_kernel - restore kernel BOs to VRAM
+ * xe_bo_restore_early - restore early phase kernel BOs to VRAM
*
* @xe: xe device
*
@@ -128,111 +158,130 @@ int xe_bo_evict_all(struct xe_device *xe)
* This function should be called early, before trying to init the GT, on device
* resume.
*/
-int xe_bo_restore_kernel(struct xe_device *xe)
+int xe_bo_restore_early(struct xe_device *xe)
{
- struct xe_bo *bo;
- int ret;
+ return xe_bo_apply_to_pinned(xe, &xe->pinned.early.evicted,
+ &xe->pinned.early.kernel_bo_present,
+ xe_bo_restore_and_map_ggtt);
+}
- spin_lock(&xe->pinned.lock);
- for (;;) {
- bo = list_first_entry_or_null(&xe->pinned.evicted,
- typeof(*bo), pinned_link);
- if (!bo)
- break;
- xe_bo_get(bo);
- list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
- spin_unlock(&xe->pinned.lock);
+/**
+ * xe_bo_restore_late - restore pinned late phase BOs
+ *
+ * @xe: xe device
+ *
+ * Move pinned user and kernel BOs which can use blitter from temporary
+ * (typically system) memory to VRAM. All moves done via TTM calls.
+ *
+ * This function should be called late, after GT init, on device resume.
+ */
+int xe_bo_restore_late(struct xe_device *xe)
+{
+ struct xe_tile *tile;
+ int ret, id;
- xe_bo_lock(bo, false);
- ret = xe_bo_restore_pinned(bo);
- xe_bo_unlock(bo);
- if (ret) {
- xe_bo_put(bo);
- return ret;
- }
+ ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.evicted,
+ &xe->pinned.late.kernel_bo_present,
+ xe_bo_restore_and_map_ggtt);
- if (bo->flags & XE_BO_FLAG_GGTT) {
- struct xe_tile *tile;
- u8 id;
+ for_each_tile(tile, xe, id)
+ xe_tile_migrate_wait(tile);
- for_each_tile(tile, xe, id) {
- if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile)))
- continue;
+ if (ret)
+ return ret;
- mutex_lock(&tile->mem.ggtt->lock);
- xe_ggtt_map_bo(tile->mem.ggtt, bo);
- mutex_unlock(&tile->mem.ggtt->lock);
- }
- }
+ if (!IS_DGFX(xe))
+ return 0;
- /*
- * We expect validate to trigger a move VRAM and our move code
- * should setup the iosys map.
- */
- xe_assert(xe, !iosys_map_is_null(&bo->vmap));
+ /* Pinned user memory in VRAM should be validated on resume */
+ ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external,
+ &xe->pinned.late.external,
+ xe_bo_restore_pinned);
- xe_bo_put(bo);
+ /* Wait for restore to complete */
+ for_each_tile(tile, xe, id)
+ xe_tile_migrate_wait(tile);
- spin_lock(&xe->pinned.lock);
- }
- spin_unlock(&xe->pinned.lock);
+ return ret;
+}
- return 0;
+static void xe_bo_pci_dev_remove_pinned(struct xe_device *xe)
+{
+ struct xe_tile *tile;
+ unsigned int id;
+
+ (void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.external,
+ &xe->pinned.late.external,
+ xe_bo_dma_unmap_pinned);
+ for_each_tile(tile, xe, id)
+ xe_tile_migrate_wait(tile);
}
/**
- * xe_bo_restore_user - restore pinned user BOs to VRAM
- *
- * @xe: xe device
+ * xe_bo_pci_dev_remove_all() - Handle bos when the pci_device is about to be removed
+ * @xe: The xe device.
*
- * Move pinned user BOs from temporary (typically system) memory to VRAM via
- * CPU. All moves done via TTM calls.
+ * On pci_device removal we need to drop all dma mappings and move
+ * the data of exported bos out to system. This includes SVM bos and
+ * exported dma-buf bos. This is done by evicting all bos, but
+ * the evict placement in xe_evict_flags() is chosen such that all
+ * bos except those mentioned are purged, and thus their memory
+ * is released.
*
- * This function should be called late, after GT init, on device resume.
+ * For pinned bos, we're unmapping dma.
*/
-int xe_bo_restore_user(struct xe_device *xe)
+void xe_bo_pci_dev_remove_all(struct xe_device *xe)
{
- struct xe_bo *bo;
- struct xe_tile *tile;
- struct list_head still_in_list;
- u8 id;
- int ret;
+ unsigned int mem_type;
- if (!IS_DGFX(xe))
- return 0;
+ /*
+ * Move pagemap bos and exported dma-buf to system, and
+ * purge everything else.
+ */
+ for (mem_type = XE_PL_VRAM1; mem_type >= XE_PL_TT; --mem_type) {
+ struct ttm_resource_manager *man =
+ ttm_manager_type(&xe->ttm, mem_type);
- /* Pinned user memory in VRAM should be validated on resume */
- INIT_LIST_HEAD(&still_in_list);
- spin_lock(&xe->pinned.lock);
- for (;;) {
- bo = list_first_entry_or_null(&xe->pinned.external_vram,
- typeof(*bo), pinned_link);
- if (!bo)
- break;
- list_move_tail(&bo->pinned_link, &still_in_list);
- xe_bo_get(bo);
- spin_unlock(&xe->pinned.lock);
+ if (man) {
+ int ret = ttm_resource_manager_evict_all(&xe->ttm, man);
- xe_bo_lock(bo, false);
- ret = xe_bo_restore_pinned(bo);
- xe_bo_unlock(bo);
- xe_bo_put(bo);
- if (ret) {
- spin_lock(&xe->pinned.lock);
- list_splice_tail(&still_in_list,
- &xe->pinned.external_vram);
- spin_unlock(&xe->pinned.lock);
- return ret;
+ drm_WARN_ON(&xe->drm, ret);
}
-
- spin_lock(&xe->pinned.lock);
}
- list_splice_tail(&still_in_list, &xe->pinned.external_vram);
- spin_unlock(&xe->pinned.lock);
- /* Wait for restore to complete */
- for_each_tile(tile, xe, id)
- xe_tile_migrate_wait(tile);
+ xe_bo_pci_dev_remove_pinned(xe);
+}
- return 0;
+static void xe_bo_pinned_fini(void *arg)
+{
+ struct xe_device *xe = arg;
+
+ (void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present,
+ &xe->pinned.late.kernel_bo_present,
+ xe_bo_dma_unmap_pinned);
+ (void)xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present,
+ &xe->pinned.early.kernel_bo_present,
+ xe_bo_dma_unmap_pinned);
+}
+
+/**
+ * xe_bo_pinned_init() - Initialize pinned bo tracking
+ * @xe: The xe device.
+ *
+ * Initializes the lists and locks required for pinned bo
+ * tracking and registers a callback to dma-unmap
+ * any remaining pinned bos on pci device removal.
+ *
+ * Return: %0 on success, negative error code on error.
+ */
+int xe_bo_pinned_init(struct xe_device *xe)
+{
+ spin_lock_init(&xe->pinned.lock);
+ INIT_LIST_HEAD(&xe->pinned.early.kernel_bo_present);
+ INIT_LIST_HEAD(&xe->pinned.early.evicted);
+ INIT_LIST_HEAD(&xe->pinned.late.kernel_bo_present);
+ INIT_LIST_HEAD(&xe->pinned.late.evicted);
+ INIT_LIST_HEAD(&xe->pinned.late.external);
+
+ return devm_add_action_or_reset(xe->drm.dev, xe_bo_pinned_fini, xe);
}
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h b/drivers/gpu/drm/xe/xe_bo_evict.h
index 746894798852..d63eb3fc5cc9 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.h
+++ b/drivers/gpu/drm/xe/xe_bo_evict.h
@@ -9,7 +9,10 @@
struct xe_device;
int xe_bo_evict_all(struct xe_device *xe);
-int xe_bo_restore_kernel(struct xe_device *xe);
-int xe_bo_restore_user(struct xe_device *xe);
+int xe_bo_restore_early(struct xe_device *xe);
+int xe_bo_restore_late(struct xe_device *xe);
+void xe_bo_pci_dev_remove_all(struct xe_device *xe);
+
+int xe_bo_pinned_init(struct xe_device *xe);
#endif
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 15a92e3d4898..81396181aaea 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -28,6 +28,8 @@ struct xe_vm;
struct xe_bo {
/** @ttm: TTM base buffer object */
struct ttm_buffer_object ttm;
+ /** @backup_obj: The backup object when pinned and suspended (vram only) */
+ struct xe_bo *backup_obj;
/** @size: Size of this buffer object */
size_t size;
/** @flags: flags for this buffer object */
diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c
new file mode 100644
index 000000000000..cb9f175c89a1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_configfs.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/configfs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "xe_configfs.h"
+#include "xe_module.h"
+
+/**
+ * DOC: Xe Configfs
+ *
+ * Overview
+ * =========
+ *
+ * Configfs is a filesystem-based manager of kernel objects. XE KMD registers a
+ * configfs subsystem called ``'xe'`` that creates a directory in the mounted configfs directory
+ * The user can create devices under this directory and configure them as necessary
+ * See Documentation/filesystems/configfs.rst for more information about how configfs works.
+ *
+ * Create devices
+ * ===============
+ *
+ * In order to create a device, the user has to create a directory inside ``'xe'``::
+ *
+ * mkdir /sys/kernel/config/xe/0000:03:00.0/
+ *
+ * Every device created is populated by the driver with entries that can be
+ * used to configure it::
+ *
+ * /sys/kernel/config/xe/
+ * .. 0000:03:00.0/
+ * ... survivability_mode
+ *
+ * Configure Attributes
+ * ====================
+ *
+ * Survivability mode:
+ * -------------------
+ *
+ * Enable survivability mode on supported cards. This setting only takes
+ * effect when probing the device. Example to enable it::
+ *
+ * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode
+ * # echo 0000:03:00.0 > /sys/bus/pci/drivers/xe/bind (Enters survivability mode if supported)
+ *
+ * Remove devices
+ * ==============
+ *
+ * The created device directories can be removed using ``rmdir``::
+ *
+ * rmdir /sys/kernel/config/xe/0000:03:00.0/
+ */
+
+struct xe_config_device {
+ struct config_group group;
+
+ bool survivability_mode;
+
+ /* protects attributes */
+ struct mutex lock;
+};
+
+static struct xe_config_device *to_xe_config_device(struct config_item *item)
+{
+ return container_of(to_config_group(item), struct xe_config_device, group);
+}
+
+static ssize_t survivability_mode_show(struct config_item *item, char *page)
+{
+ struct xe_config_device *dev = to_xe_config_device(item);
+
+ return sprintf(page, "%d\n", dev->survivability_mode);
+}
+
+static ssize_t survivability_mode_store(struct config_item *item, const char *page, size_t len)
+{
+ struct xe_config_device *dev = to_xe_config_device(item);
+ bool survivability_mode;
+ int ret;
+
+ ret = kstrtobool(page, &survivability_mode);
+ if (ret)
+ return ret;
+
+ mutex_lock(&dev->lock);
+ dev->survivability_mode = survivability_mode;
+ mutex_unlock(&dev->lock);
+
+ return len;
+}
+
+CONFIGFS_ATTR(, survivability_mode);
+
+static struct configfs_attribute *xe_config_device_attrs[] = {
+ &attr_survivability_mode,
+ NULL,
+};
+
+static void xe_config_device_release(struct config_item *item)
+{
+ struct xe_config_device *dev = to_xe_config_device(item);
+
+ mutex_destroy(&dev->lock);
+ kfree(dev);
+}
+
+static struct configfs_item_operations xe_config_device_ops = {
+ .release = xe_config_device_release,
+};
+
+static const struct config_item_type xe_config_device_type = {
+ .ct_item_ops = &xe_config_device_ops,
+ .ct_attrs = xe_config_device_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct config_group *xe_config_make_device_group(struct config_group *group,
+ const char *name)
+{
+ unsigned int domain, bus, slot, function;
+ struct xe_config_device *dev;
+ struct pci_dev *pdev;
+ int ret;
+
+ ret = sscanf(name, "%04x:%02x:%02x.%x", &domain, &bus, &slot, &function);
+ if (ret != 4)
+ return ERR_PTR(-EINVAL);
+
+ pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function));
+ if (!pdev)
+ return ERR_PTR(-EINVAL);
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return ERR_PTR(-ENOMEM);
+
+ config_group_init_type_name(&dev->group, name, &xe_config_device_type);
+
+ mutex_init(&dev->lock);
+
+ return &dev->group;
+}
+
+static struct configfs_group_operations xe_config_device_group_ops = {
+ .make_group = xe_config_make_device_group,
+};
+
+static const struct config_item_type xe_configfs_type = {
+ .ct_group_ops = &xe_config_device_group_ops,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem xe_configfs = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "xe",
+ .ci_type = &xe_configfs_type,
+ },
+ },
+};
+
+static struct xe_config_device *configfs_find_group(struct pci_dev *pdev)
+{
+ struct config_item *item;
+ char name[64];
+
+ snprintf(name, sizeof(name), "%04x:%02x:%02x.%x", pci_domain_nr(pdev->bus),
+ pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+
+ mutex_lock(&xe_configfs.su_mutex);
+ item = config_group_find_item(&xe_configfs.su_group, name);
+ mutex_unlock(&xe_configfs.su_mutex);
+
+ if (!item)
+ return NULL;
+
+ return to_xe_config_device(item);
+}
+
+/**
+ * xe_configfs_get_survivability_mode - get configfs survivability mode attribute
+ * @pdev: pci device
+ *
+ * find the configfs group that belongs to the pci device and return
+ * the survivability mode attribute
+ *
+ * Return: survivability mode if config group is found, false otherwise
+ */
+bool xe_configfs_get_survivability_mode(struct pci_dev *pdev)
+{
+ struct xe_config_device *dev = configfs_find_group(pdev);
+ bool mode;
+
+ if (!dev)
+ return false;
+
+ mode = dev->survivability_mode;
+ config_item_put(&dev->group.cg_item);
+
+ return mode;
+}
+
+/**
+ * xe_configfs_clear_survivability_mode - clear configfs survivability mode attribute
+ * @pdev: pci device
+ *
+ * find the configfs group that belongs to the pci device and clear survivability
+ * mode attribute
+ */
+void xe_configfs_clear_survivability_mode(struct pci_dev *pdev)
+{
+ struct xe_config_device *dev = configfs_find_group(pdev);
+
+ if (!dev)
+ return;
+
+ mutex_lock(&dev->lock);
+ dev->survivability_mode = 0;
+ mutex_unlock(&dev->lock);
+
+ config_item_put(&dev->group.cg_item);
+}
+
+int __init xe_configfs_init(void)
+{
+ struct config_group *root = &xe_configfs.su_group;
+ int ret;
+
+ config_group_init(root);
+ mutex_init(&xe_configfs.su_mutex);
+ ret = configfs_register_subsystem(&xe_configfs);
+ if (ret) {
+ pr_err("Error %d while registering %s subsystem\n",
+ ret, root->cg_item.ci_namebuf);
+ return ret;
+ }
+
+ return 0;
+}
+
+void __exit xe_configfs_exit(void)
+{
+ configfs_unregister_subsystem(&xe_configfs);
+}
+
diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h
new file mode 100644
index 000000000000..d7d041ec2611
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_configfs.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+#ifndef _XE_CONFIGFS_H_
+#define _XE_CONFIGFS_H_
+
+#include <linux/types.h>
+
+struct pci_dev;
+
+#if IS_ENABLED(CONFIG_CONFIGFS_FS)
+int xe_configfs_init(void);
+void xe_configfs_exit(void);
+bool xe_configfs_get_survivability_mode(struct pci_dev *pdev);
+void xe_configfs_clear_survivability_mode(struct pci_dev *pdev);
+#else
+static inline int xe_configfs_init(void) { return 0; };
+static inline void xe_configfs_exit(void) {};
+static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; };
+static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) {};
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 00191227bc95..75e753e0a682 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -23,6 +23,7 @@
#include "regs/xe_gt_regs.h"
#include "regs/xe_regs.h"
#include "xe_bo.h"
+#include "xe_bo_evict.h"
#include "xe_debugfs.h"
#include "xe_devcoredump.h"
#include "xe_dma_buf.h"
@@ -467,10 +468,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
xa_erase(&xe->usm.asid_to_vm, asid);
}
- spin_lock_init(&xe->pinned.lock);
- INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
- INIT_LIST_HEAD(&xe->pinned.external_vram);
- INIT_LIST_HEAD(&xe->pinned.evicted);
+ err = xe_bo_pinned_init(xe);
+ if (err)
+ goto err;
xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq",
WQ_MEM_RECLAIM);
@@ -505,7 +505,15 @@ ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */
static bool xe_driver_flr_disabled(struct xe_device *xe)
{
- return xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS;
+ if (IS_SRIOV_VF(xe))
+ return true;
+
+ if (xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) {
+ drm_info(&xe->drm, "Driver-FLR disabled by BIOS\n");
+ return true;
+ }
+
+ return false;
}
/*
@@ -523,7 +531,7 @@ static bool xe_driver_flr_disabled(struct xe_device *xe)
*/
static void __xe_driver_flr(struct xe_device *xe)
{
- const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
+ const unsigned int flr_timeout = 3 * USEC_PER_SEC; /* specs recommend a 3s wait */
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
int ret;
@@ -569,10 +577,8 @@ static void __xe_driver_flr(struct xe_device *xe)
static void xe_driver_flr(struct xe_device *xe)
{
- if (xe_driver_flr_disabled(xe)) {
- drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
+ if (xe_driver_flr_disabled(xe))
return;
- }
__xe_driver_flr(xe);
}
@@ -706,7 +712,7 @@ int xe_device_probe_early(struct xe_device *xe)
sriov_update_device_info(xe);
err = xe_pcode_probe_early(xe);
- if (err) {
+ if (err || xe_survivability_mode_is_requested(xe)) {
int save_err = err;
/*
@@ -729,6 +735,7 @@ int xe_device_probe_early(struct xe_device *xe)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_device_probe_early, ERRNO); /* See xe_pci_probe() */
static int probe_has_flat_ccs(struct xe_device *xe)
{
@@ -932,6 +939,8 @@ void xe_device_remove(struct xe_device *xe)
xe_display_unregister(xe);
drm_dev_unplug(&xe->drm);
+
+ xe_bo_pci_dev_remove_all(xe);
}
void xe_device_shutdown(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index f7f2afa596c5..0369fc09c9da 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -107,6 +107,9 @@ struct xe_vram_region {
resource_size_t actual_physical_size;
/** @mapping: pointer to VRAM mappable space */
void __iomem *mapping;
+ /** @ttm: VRAM TTM manager */
+ struct xe_ttm_vram_mgr ttm;
+#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
/** @pagemap: Used to remap device memory as ZONE_DEVICE */
struct dev_pagemap pagemap;
/**
@@ -120,8 +123,7 @@ struct xe_vram_region {
* This is generated when remap device memory as ZONE_DEVICE
*/
resource_size_t hpa_base;
- /** @ttm: VRAM TTM manager */
- struct xe_ttm_vram_mgr ttm;
+#endif
};
/**
@@ -314,6 +316,8 @@ struct xe_device {
u8 has_atomic_enable_pte_bit:1;
/** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */
u8 has_device_atomics_on_smem:1;
+ /** @info.has_fan_control: Device supports fan control */
+ u8 has_fan_control:1;
/** @info.has_flat_ccs: Whether flat CCS metadata is used */
u8 has_flat_ccs:1;
/** @info.has_heci_cscfi: device has heci cscfi */
@@ -332,6 +336,8 @@ struct xe_device {
u8 has_usm:1;
/** @info.is_dgfx: is discrete device */
u8 is_dgfx:1;
+ /** @info.needs_scratch: needs scratch page for oob prefetch to work */
+ u8 needs_scratch:1;
/**
* @info.probe_display: Probe display hardware. If set to
* false, the driver will behave as if there is no display
@@ -418,12 +424,22 @@ struct xe_device {
struct {
/** @pinned.lock: protected pinned BO list state */
spinlock_t lock;
- /** @pinned.kernel_bo_present: pinned kernel BO that are present */
- struct list_head kernel_bo_present;
- /** @pinned.evicted: pinned BO that have been evicted */
- struct list_head evicted;
- /** @pinned.external_vram: pinned external BO in vram*/
- struct list_head external_vram;
+ /** @pinned.early: early pinned lists */
+ struct {
+ /** @pinned.early.kernel_bo_present: pinned kernel BO that are present */
+ struct list_head kernel_bo_present;
+ /** @pinned.early.evicted: pinned BO that have been evicted */
+ struct list_head evicted;
+ } early;
+ /** @pinned.late: late pinned lists */
+ struct {
+ /** @pinned.late.kernel_bo_present: pinned kernel BO that are present */
+ struct list_head kernel_bo_present;
+ /** @pinned.late.evicted: pinned BO that have been evicted */
+ struct list_head evicted;
+ /** @pinned.external: pinned external and dma-buf. */
+ struct list_head external;
+ } late;
} pinned;
/** @ufence_wq: user fence wait queue */
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index f06961575d8a..346f857f3837 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -145,10 +145,7 @@ static void xe_dma_buf_unmap(struct dma_buf_attachment *attach,
struct sg_table *sgt,
enum dma_data_direction dir)
{
- struct dma_buf *dma_buf = attach->dmabuf;
- struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv);
-
- if (!xe_bo_is_vram(bo)) {
+ if (sg_page(sgt->sgl)) {
dma_unmap_sgtable(attach->dev, sgt, dir, 0);
sg_free_table(sgt);
kfree(sgt);
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index 4f6784e5abf8..8a5cba22b586 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -49,9 +49,6 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
fw->gt = gt;
spin_lock_init(&fw->lock);
- /* Assuming gen11+ so assert this assumption is correct */
- xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
-
if (xe->info.graphics_verx100 >= 1270) {
init_domain(fw, XE_FW_DOMAIN_ID_GT,
FORCEWAKE_GT,
@@ -67,9 +64,6 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
{
int i, j;
- /* Assuming gen11+ so assert this assumption is correct */
- xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
-
if (!xe_gt_is_media_type(gt))
init_domain(fw, XE_FW_DOMAIN_ID_RENDER,
FORCEWAKE_RENDER,
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 5fcb2b4c2c13..7062115909f2 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -365,7 +365,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt)
* scratch entries, rather keep the scratch page in system memory on
* platforms where 64K pages are needed for VRAM.
*/
- flags = XE_BO_FLAG_PINNED;
+ flags = 0;
if (ggtt->flags & XE_GGTT_FLAGS_64K)
flags |= XE_BO_FLAG_SYSTEM;
else
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 10a9e3c72b36..8068b4bc0a09 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -12,8 +12,10 @@
#include <generated/xe_wa_oob.h>
+#include "instructions/xe_alu_commands.h"
#include "instructions/xe_gfxpipe_commands.h"
#include "instructions/xe_mi_commands.h"
+#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
#include "xe_assert.h"
#include "xe_bb.h"
@@ -176,15 +178,6 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
return 0;
}
-/*
- * Convert back from encoded value to type-safe, only to be used when reg.mcr
- * is true
- */
-static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg)
-{
- return (const struct xe_reg_mcr){.__reg.raw = reg.raw };
-}
-
static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
{
struct xe_reg_sr *sr = &q->hwe->reg_lrc;
@@ -194,6 +187,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
struct xe_bb *bb;
struct dma_fence *fence;
long timeout;
+ int count_rmw = 0;
int count = 0;
if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
@@ -206,30 +200,32 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
if (IS_ERR(bb))
return PTR_ERR(bb);
- xa_for_each(&sr->xa, idx, entry)
- ++count;
+ /* count RMW registers as those will be handled separately */
+ xa_for_each(&sr->xa, idx, entry) {
+ if (entry->reg.masked || entry->clr_bits == ~0)
+ ++count;
+ else
+ ++count_rmw;
+ }
- if (count) {
+ if (count || count_rmw)
xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name);
+ if (count) {
+ /* emit single LRI with all non RMW regs */
+
bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
xa_for_each(&sr->xa, idx, entry) {
struct xe_reg reg = entry->reg;
- struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg);
u32 val;
- /*
- * Skip reading the register if it's not really needed
- */
if (reg.masked)
val = entry->clr_bits << 16;
- else if (entry->clr_bits + 1)
- val = (reg.mcr ?
- xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
- xe_mmio_read32(&gt->mmio, reg)) & (~entry->clr_bits);
- else
+ else if (entry->clr_bits == ~0)
val = 0;
+ else
+ continue;
val |= entry->set_bits;
@@ -239,6 +235,52 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
}
}
+ if (count_rmw) {
+ /* emit MI_MATH for each RMW reg */
+
+ xa_for_each(&sr->xa, idx, entry) {
+ if (entry->reg.masked || entry->clr_bits == ~0)
+ continue;
+
+ bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
+ bb->cs[bb->len++] = entry->reg.addr;
+ bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr;
+
+ bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
+ MI_LRI_LRM_CS_MMIO;
+ bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr;
+ bb->cs[bb->len++] = entry->clr_bits;
+ bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr;
+ bb->cs[bb->len++] = entry->set_bits;
+
+ bb->cs[bb->len++] = MI_MATH(8);
+ bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0);
+ bb->cs[bb->len++] = CS_ALU_INSTR_LOADINV(SRCB, REG1);
+ bb->cs[bb->len++] = CS_ALU_INSTR_AND;
+ bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU);
+ bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0);
+ bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCB, REG2);
+ bb->cs[bb->len++] = CS_ALU_INSTR_OR;
+ bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU);
+
+ bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO;
+ bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr;
+ bb->cs[bb->len++] = entry->reg.addr;
+
+ xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n",
+ entry->reg.addr, entry->clr_bits, entry->set_bits);
+ }
+
+ /* reset used GPR */
+ bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | MI_LRI_LRM_CS_MMIO;
+ bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr;
+ bb->cs[bb->len++] = 0;
+ bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr;
+ bb->cs[bb->len++] = 0;
+ bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr;
+ bb->cs[bb->len++] = 0;
+ }
+
xe_lrc_emit_hwe_state_instructions(q, bb);
job = xe_bb_create_job(q, bb);
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 2d63a69cbfa3..a88076e9cc7d 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -299,20 +299,20 @@ static int hwconfig(struct xe_gt *gt, struct drm_printer *p)
return 0;
}
-static const struct drm_info_list debugfs_list[] = {
- {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines},
+/*
+ * only for GT debugfs files which can be safely used on the VF as well:
+ * - without access to the GT privileged registers
+ * - without access to the PF specific data
+ */
+static const struct drm_info_list vf_safe_debugfs_list[] = {
{"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset},
{"force_reset_sync", .show = xe_gt_debugfs_simple_show, .data = force_reset_sync},
{"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info},
{"topology", .show = xe_gt_debugfs_simple_show, .data = topology},
- {"steering", .show = xe_gt_debugfs_simple_show, .data = steering},
{"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt},
- {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info},
{"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore},
{"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds},
{"tunings", .show = xe_gt_debugfs_simple_show, .data = tunings},
- {"pat", .show = xe_gt_debugfs_simple_show, .data = pat},
- {"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs},
{"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc},
{"default_lrc_ccs", .show = xe_gt_debugfs_simple_show, .data = ccs_default_lrc},
{"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc},
@@ -322,6 +322,15 @@ static const struct drm_info_list debugfs_list[] = {
{"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig},
};
+/* everything else should be added here */
+static const struct drm_info_list pf_only_debugfs_list[] = {
+ {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines},
+ {"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs},
+ {"pat", .show = xe_gt_debugfs_simple_show, .data = pat},
+ {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info},
+ {"steering", .show = xe_gt_debugfs_simple_show, .data = steering},
+};
+
void xe_gt_debugfs_register(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
@@ -345,10 +354,15 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
*/
root->d_inode->i_private = gt;
- drm_debugfs_create_files(debugfs_list,
- ARRAY_SIZE(debugfs_list),
+ drm_debugfs_create_files(vf_safe_debugfs_list,
+ ARRAY_SIZE(vf_safe_debugfs_list),
root, minor);
+ if (!IS_SRIOV_VF(xe))
+ drm_debugfs_create_files(pf_only_debugfs_list,
+ ARRAY_SIZE(pf_only_debugfs_list),
+ root, minor);
+
xe_uc_debugfs_register(&gt->uc, root);
if (IS_SRIOV_PF(xe))
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 605aad3554e7..d4d9730f0d2c 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -345,7 +345,8 @@ fallback:
* Some older platforms don't have tables or don't have complete tables.
* Newer platforms should always have the required info.
*/
- if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 2000)
+ if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 2000 &&
+ !gt_to_xe(gt)->info.force_execlist)
xe_gt_err(gt, "Slice/Subslice counts missing from hwconfig table; using typical fallback values\n");
if (gt_to_xe(gt)->info.platform == XE_PVC)
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index c5ad9a0a89c2..10622ca471a2 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -240,7 +240,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
atomic = access_is_atomic(pf->access_type);
if (xe_vma_is_cpu_addr_mirror(vma))
- err = xe_svm_handle_pagefault(vm, vma, gt_to_tile(gt),
+ err = xe_svm_handle_pagefault(vm, vma, gt,
pf->page_addr, atomic);
else
err = handle_vma_pagefault(gt, vma, atomic);
@@ -435,9 +435,16 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue)
num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss,
XE_MAX_EU_FUSE_BITS) * num_dss;
- /* user can issue separate page faults per EU and per CS */
+ /*
+ * user can issue separate page faults per EU and per CS
+ *
+ * XXX: Multiplier required as compute UMD are getting PF queue errors
+ * without it. Follow on why this multiplier is required.
+ */
+#define PF_MULTIPLIER 8
pf_queue->num_dw =
- (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW;
+ (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER;
+#undef PF_MULTIPLIER
pf_queue->gt = gt;
pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw,
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 10be109bf357..2420a548cacc 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -1444,15 +1444,23 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
return 0;
xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M);
- bo = xe_bo_create_pin_map(xe, tile, NULL,
- ALIGN(size, PAGE_SIZE),
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_NEEDS_2M |
- XE_BO_FLAG_PINNED);
+ bo = xe_bo_create_locked(xe, tile, NULL,
+ ALIGN(size, PAGE_SIZE),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_NEEDS_2M |
+ XE_BO_FLAG_PINNED |
+ XE_BO_FLAG_PINNED_LATE_RESTORE);
if (IS_ERR(bo))
return PTR_ERR(bo);
+ err = xe_bo_pin(bo);
+ xe_bo_unlock(bo);
+ if (unlikely(err)) {
+ xe_bo_put(bo);
+ return err;
+ }
+
config->lmem_obj = bo;
if (xe_device_has_lmtt(xe)) {
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
index b2521dd6ec42..0fe47f41b63c 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -51,27 +51,18 @@ static unsigned int extract_vfid(struct dentry *d)
* /sys/kernel/debug/dri/0/
* ├── gt0
* │   ├── pf
- * │   │   ├── ggtt_available
- * │   │   ├── ggtt_provisioned
* │   │   ├── contexts_provisioned
* │   │   ├── doorbells_provisioned
* │   │   ├── runtime_registers
* │   │   ├── negotiated_versions
* │   │   ├── adverse_events
+ * ├── gt1
+ * │   ├── pf
+ * │   │   ├── ...
*/
static const struct drm_info_list pf_info[] = {
{
- "ggtt_available",
- .show = xe_gt_debugfs_simple_show,
- .data = xe_gt_sriov_pf_config_print_available_ggtt,
- },
- {
- "ggtt_provisioned",
- .show = xe_gt_debugfs_simple_show,
- .data = xe_gt_sriov_pf_config_print_ggtt,
- },
- {
"contexts_provisioned",
.show = xe_gt_debugfs_simple_show,
.data = xe_gt_sriov_pf_config_print_ctxs,
@@ -82,11 +73,6 @@ static const struct drm_info_list pf_info[] = {
.data = xe_gt_sriov_pf_config_print_dbs,
},
{
- "lmem_provisioned",
- .show = xe_gt_debugfs_simple_show,
- .data = xe_gt_sriov_pf_config_print_lmem,
- },
- {
"runtime_registers",
.show = xe_gt_debugfs_simple_show,
.data = xe_gt_sriov_pf_service_print_runtime,
@@ -107,6 +93,42 @@ static const struct drm_info_list pf_info[] = {
* /sys/kernel/debug/dri/0/
* ├── gt0
* │   ├── pf
+ * │   │   ├── ggtt_available
+ * │   │   ├── ggtt_provisioned
+ */
+
+static const struct drm_info_list pf_ggtt_info[] = {
+ {
+ "ggtt_available",
+ .show = xe_gt_debugfs_simple_show,
+ .data = xe_gt_sriov_pf_config_print_available_ggtt,
+ },
+ {
+ "ggtt_provisioned",
+ .show = xe_gt_debugfs_simple_show,
+ .data = xe_gt_sriov_pf_config_print_ggtt,
+ },
+};
+
+/*
+ * /sys/kernel/debug/dri/0/
+ * ├── gt0
+ * │   ├── pf
+ * │   │   ├── lmem_provisioned
+ */
+
+static const struct drm_info_list pf_lmem_info[] = {
+ {
+ "lmem_provisioned",
+ .show = xe_gt_debugfs_simple_show,
+ .data = xe_gt_sriov_pf_config_print_lmem,
+ },
+};
+
+/*
+ * /sys/kernel/debug/dri/0/
+ * ├── gt0
+ * │   ├── pf
* │   │   ├── reset_engine
* │   │   ├── sample_period
* │   │   ├── sched_if_idle
@@ -532,6 +554,16 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root)
pfdentry->d_inode->i_private = gt;
drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor);
+ if (!xe_gt_is_media_type(gt)) {
+ drm_debugfs_create_files(pf_ggtt_info,
+ ARRAY_SIZE(pf_ggtt_info),
+ pfdentry, minor);
+ if (IS_DGFX(gt_to_xe(gt)))
+ drm_debugfs_create_files(pf_lmem_info,
+ ARRAY_SIZE(pf_lmem_info),
+ pfdentry, minor);
+ }
+
pf_add_policy_attrs(gt, pfdentry);
pf_add_config_attrs(gt, pfdentry, PFID);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
index 4efde5f46b43..821cfcc34e6b 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
@@ -112,7 +112,6 @@ static const struct xe_reg tgl_runtime_regs[] = {
XELP_GT_SLICE_ENABLE, /* _MMIO(0x9138) */
XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */
GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */
- CTC_MODE, /* _MMIO(0xa26c) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
};
@@ -124,7 +123,6 @@ static const struct xe_reg ats_m_runtime_regs[] = {
XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */
GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */
XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */
- CTC_MODE, /* _MMIO(0xa26c) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
};
@@ -136,7 +134,6 @@ static const struct xe_reg pvc_runtime_regs[] = {
GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */
XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */
XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */
- CTC_MODE, /* _MMIO(0xA26C) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
};
@@ -150,7 +147,6 @@ static const struct xe_reg ver_1270_runtime_regs[] = {
GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */
XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */
XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */
- CTC_MODE, /* _MMIO(0xa26c) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
};
@@ -167,7 +163,6 @@ static const struct xe_reg ver_2000_runtime_regs[] = {
XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */
XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */
XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */
- CTC_MODE, /* _MMIO(0xa26c) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
};
@@ -185,7 +180,6 @@ static const struct xe_reg ver_3000_runtime_regs[] = {
XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */
XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */
XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */
- CTC_MODE, /* _MMIO(0xa26c) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
};
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c
index 6155ea354432..30f942671c2b 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.c
+++ b/drivers/gpu/drm/xe/xe_gt_stats.c
@@ -27,6 +27,7 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr)
}
static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = {
+ "svm_pagefault_count",
"tlb_inval_count",
"vma_pagefault_count",
"vma_pagefault_kb",
diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h
index d556771f99d6..be3244d7133c 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h
@@ -7,6 +7,7 @@
#define _XE_GT_STATS_TYPES_H_
enum xe_gt_stats_id {
+ XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT,
XE_GT_STATS_ID_TLB_INVAL,
XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT,
XE_GT_STATS_ID_VMA_PAGEFAULT_KB,
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index bc5714a5b36b..38866135c019 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -483,7 +483,8 @@ static int guc_g2g_alloc(struct xe_guc *guc)
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_ALL |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_PINNED_NORESTORE);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index e7c9e095a19f..44c1fa2fe7c8 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -376,6 +376,11 @@ static void guc_waklv_init(struct xe_guc_ads *ads)
GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET,
&offset, &remain);
+ if (GUC_FIRMWARE_VER(&gt->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_WA(gt, 16026508708))
+ guc_waklv_enable_simple(ads,
+ GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH,
+ &offset, &remain);
+
size = guc_ads_waklv_size(ads) - remain;
if (!size)
return;
@@ -414,7 +419,8 @@ int xe_guc_ads_init(struct xe_guc_ads *ads)
bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE,
XE_BO_FLAG_SYSTEM |
XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_PINNED_NORESTORE);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -490,24 +496,52 @@ static void fill_engine_enable_masks(struct xe_gt *gt,
engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER));
}
-static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads)
+/*
+ * Write the offsets corresponding to the golden LRCs. The actual data is
+ * populated later by guc_golden_lrc_populate()
+ */
+static void guc_golden_lrc_init(struct xe_guc_ads *ads)
{
struct xe_device *xe = ads_to_xe(ads);
+ struct xe_gt *gt = ads_to_gt(ads);
struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
offsetof(struct __guc_ads_blob, system_info));
- u8 guc_class;
+ size_t alloc_size, real_size;
+ u32 addr_ggtt, offset;
+ int class;
+
+ offset = guc_ads_golden_lrc_offset(ads);
+ addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
+
+ for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+ u8 guc_class;
+
+ guc_class = xe_engine_class_to_guc_class(class);
- for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) {
if (!info_map_read(xe, &info_map,
engine_enabled_masks[guc_class]))
continue;
+ real_size = xe_gt_lrc_size(gt, class);
+ alloc_size = PAGE_ALIGN(real_size);
+
+ /*
+ * This interface is slightly confusing. We need to pass the
+ * base address of the full golden context and the size of just
+ * the engine state, which is the section of the context image
+ * that starts after the execlists LRC registers. This is
+ * required to allow the GuC to restore just the engine state
+ * when a watchdog reset occurs.
+ * We calculate the engine state size by removing the size of
+ * what comes before it in the context image (which is identical
+ * on all engines).
+ */
ads_blob_write(ads, ads.eng_state_size[guc_class],
- guc_ads_golden_lrc_size(ads) -
- xe_lrc_skip_size(xe));
+ real_size - xe_lrc_skip_size(xe));
ads_blob_write(ads, ads.golden_context_lrca[guc_class],
- xe_bo_ggtt_addr(ads->bo) +
- guc_ads_golden_lrc_offset(ads));
+ addr_ggtt);
+
+ addr_ggtt += alloc_size;
}
}
@@ -682,8 +716,8 @@ static int guc_capture_prep_lists(struct xe_guc_ads *ads)
}
if (ads->capture_size != PAGE_ALIGN(total_size))
- xe_gt_dbg(gt, "ADS capture alloc size changed from %d to %d\n",
- ads->capture_size, PAGE_ALIGN(total_size));
+ xe_gt_dbg(gt, "Updated ADS capture size %d (was %d)\n",
+ PAGE_ALIGN(total_size), ads->capture_size);
return PAGE_ALIGN(total_size);
}
@@ -857,7 +891,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
guc_policies_init(ads);
- guc_prep_golden_lrc_null(ads);
+ guc_golden_lrc_init(ads);
guc_mapping_table_init_invalid(gt, &info_map);
guc_doorbell_init(ads);
@@ -883,7 +917,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads)
guc_policies_init(ads);
fill_engine_enable_masks(gt, &info_map);
guc_mmio_reg_state_init(ads);
- guc_prep_golden_lrc_null(ads);
+ guc_golden_lrc_init(ads);
guc_mapping_table_init(gt, &info_map);
guc_capture_prep_lists(ads);
guc_doorbell_init(ads);
@@ -903,18 +937,22 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads)
guc_ads_private_data_offset(ads));
}
-static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
+/*
+ * After the golden LRC's are recorded for each engine class by the first
+ * submission, copy them to the ADS, as initialized earlier by
+ * guc_golden_lrc_init().
+ */
+static void guc_golden_lrc_populate(struct xe_guc_ads *ads)
{
struct xe_device *xe = ads_to_xe(ads);
struct xe_gt *gt = ads_to_gt(ads);
struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
offsetof(struct __guc_ads_blob, system_info));
size_t total_size = 0, alloc_size, real_size;
- u32 addr_ggtt, offset;
+ u32 offset;
int class;
offset = guc_ads_golden_lrc_offset(ads);
- addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
u8 guc_class;
@@ -931,26 +969,9 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
alloc_size = PAGE_ALIGN(real_size);
total_size += alloc_size;
- /*
- * This interface is slightly confusing. We need to pass the
- * base address of the full golden context and the size of just
- * the engine state, which is the section of the context image
- * that starts after the execlists LRC registers. This is
- * required to allow the GuC to restore just the engine state
- * when a watchdog reset occurs.
- * We calculate the engine state size by removing the size of
- * what comes before it in the context image (which is identical
- * on all engines).
- */
- ads_blob_write(ads, ads.eng_state_size[guc_class],
- real_size - xe_lrc_skip_size(xe));
- ads_blob_write(ads, ads.golden_context_lrca[guc_class],
- addr_ggtt);
-
xe_map_memcpy_to(xe, ads_to_map(ads), offset,
gt->default_lrc[class], real_size);
- addr_ggtt += alloc_size;
offset += alloc_size;
}
@@ -959,7 +980,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
{
- guc_populate_golden_lrc(ads);
+ guc_golden_lrc_populate(ads);
}
static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset)
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 72ad576fc18e..0a4fef7d7225 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -238,7 +238,8 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(),
XE_BO_FLAG_SYSTEM |
XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_PINNED_NORESTORE);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -1828,10 +1829,10 @@ static void ct_dead_print(struct xe_dead_ct *dead)
return;
}
- drm_printf(&lp, "CTB is dead - reason=0x%X\n", dead->reason);
/* Can't generate a genuine core dump at this point, so just do the good bits */
drm_puts(&lp, "**** Xe Device Coredump ****\n");
+ drm_printf(&lp, "Reason: CTB is dead - 0x%X\n", dead->reason);
xe_device_snapshot_print(xe, &lp);
drm_printf(&lp, "**** GT #%d ****\n", gt->info.id);
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
index c569ff456e74..f33013f8a0f3 100644
--- a/drivers/gpu/drm/xe/xe_guc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -17,101 +17,119 @@
#include "xe_macros.h"
#include "xe_pm.h"
-static struct xe_guc *node_to_guc(struct drm_info_node *node)
-{
- return node->info_ent->data;
-}
-
-static int guc_info(struct seq_file *m, void *data)
+/*
+ * guc_debugfs_show - A show callback for struct drm_info_list
+ * @m: the &seq_file
+ * @data: data used by the drm debugfs helpers
+ *
+ * This callback can be used in struct drm_info_list to describe debugfs
+ * files that are &xe_guc specific in similar way how we handle &xe_gt
+ * specific files using &xe_gt_debugfs_simple_show.
+ *
+ * It is assumed that those debugfs files will be created on directory entry
+ * which grandparent struct dentry d_inode->i_private points to &xe_gt.
+ *
+ * /sys/kernel/debug/dri/0/
+ * ├── gt0 # dent->d_parent->d_parent (d_inode->i_private == gt)
+ * │   ├── uc # dent->d_parent
+ * │   │   ├── guc_info # dent
+ * │   │   ├── guc_...
+ *
+ * This function assumes that &m->private will be set to the &struct
+ * drm_info_node corresponding to the instance of the info on a given &struct
+ * drm_minor (see struct drm_info_list.show for details).
+ *
+ * This function also assumes that struct drm_info_list.data will point to the
+ * function code that will actually print a file content::
+ *
+ * int (*print)(struct xe_guc *, struct drm_printer *)
+ *
+ * Example::
+ *
+ * int foo(struct xe_guc *guc, struct drm_printer *p)
+ * {
+ * drm_printf(p, "enabled %d\n", guc->submission_state.enabled);
+ * return 0;
+ * }
+ *
+ * static const struct drm_info_list bar[] = {
+ * { name = "foo", .show = guc_debugfs_show, .data = foo },
+ * };
+ *
+ * parent = debugfs_create_dir("uc", gtdir);
+ * drm_debugfs_create_files(bar, ARRAY_SIZE(bar), parent, minor);
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+static int guc_debugfs_show(struct seq_file *m, void *data)
{
- struct xe_guc *guc = node_to_guc(m->private);
- struct xe_device *xe = guc_to_xe(guc);
struct drm_printer p = drm_seq_file_printer(m);
+ struct drm_info_node *node = m->private;
+ struct dentry *parent = node->dent->d_parent;
+ struct dentry *grandparent = parent->d_parent;
+ struct xe_gt *gt = grandparent->d_inode->i_private;
+ struct xe_device *xe = gt_to_xe(gt);
+ int (*print)(struct xe_guc *, struct drm_printer *) = node->info_ent->data;
+ int ret;
xe_pm_runtime_get(xe);
- xe_guc_print_info(guc, &p);
+ ret = print(&gt->uc.guc, &p);
xe_pm_runtime_put(xe);
- return 0;
+ return ret;
}
-static int guc_log(struct seq_file *m, void *data)
+static int guc_log(struct xe_guc *guc, struct drm_printer *p)
{
- struct xe_guc *guc = node_to_guc(m->private);
- struct xe_device *xe = guc_to_xe(guc);
- struct drm_printer p = drm_seq_file_printer(m);
-
- xe_pm_runtime_get(xe);
- xe_guc_log_print(&guc->log, &p);
- xe_pm_runtime_put(xe);
-
+ xe_guc_log_print(&guc->log, p);
return 0;
}
-static int guc_log_dmesg(struct seq_file *m, void *data)
+static int guc_log_dmesg(struct xe_guc *guc, struct drm_printer *p)
{
- struct xe_guc *guc = node_to_guc(m->private);
- struct xe_device *xe = guc_to_xe(guc);
-
- xe_pm_runtime_get(xe);
xe_guc_log_print_dmesg(&guc->log);
- xe_pm_runtime_put(xe);
-
return 0;
}
-static int guc_ctb(struct seq_file *m, void *data)
+static int guc_ctb(struct xe_guc *guc, struct drm_printer *p)
{
- struct xe_guc *guc = node_to_guc(m->private);
- struct xe_device *xe = guc_to_xe(guc);
- struct drm_printer p = drm_seq_file_printer(m);
-
- xe_pm_runtime_get(xe);
- xe_guc_ct_print(&guc->ct, &p, true);
- xe_pm_runtime_put(xe);
-
+ xe_guc_ct_print(&guc->ct, p, true);
return 0;
}
-static int guc_pc(struct seq_file *m, void *data)
+static int guc_pc(struct xe_guc *guc, struct drm_printer *p)
{
- struct xe_guc *guc = node_to_guc(m->private);
- struct xe_device *xe = guc_to_xe(guc);
- struct drm_printer p = drm_seq_file_printer(m);
-
- xe_pm_runtime_get(xe);
- xe_guc_pc_print(&guc->pc, &p);
- xe_pm_runtime_put(xe);
-
+ xe_guc_pc_print(&guc->pc, p);
return 0;
}
-static const struct drm_info_list debugfs_list[] = {
- {"guc_info", guc_info, 0},
- {"guc_log", guc_log, 0},
- {"guc_log_dmesg", guc_log_dmesg, 0},
- {"guc_ctb", guc_ctb, 0},
- {"guc_pc", guc_pc, 0},
+/*
+ * only for GuC debugfs files which can be safely used on the VF as well:
+ * - without access to the GuC privileged registers
+ * - without access to the PF specific GuC objects
+ */
+static const struct drm_info_list vf_safe_debugfs_list[] = {
+ { "guc_info", .show = guc_debugfs_show, .data = xe_guc_print_info },
+ { "guc_ctb", .show = guc_debugfs_show, .data = guc_ctb },
+};
+
+/* everything else should be added here */
+static const struct drm_info_list pf_only_debugfs_list[] = {
+ { "guc_log", .show = guc_debugfs_show, .data = guc_log },
+ { "guc_log_dmesg", .show = guc_debugfs_show, .data = guc_log_dmesg },
+ { "guc_pc", .show = guc_debugfs_show, .data = guc_pc },
};
void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent)
{
struct drm_minor *minor = guc_to_xe(guc)->drm.primary;
- struct drm_info_list *local;
- int i;
-#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
- local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL);
- if (!local)
- return;
-
- memcpy(local, debugfs_list, DEBUGFS_SIZE);
-#undef DEBUGFS_SIZE
-
- for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
- local[i].data = guc;
-
- drm_debugfs_create_files(local,
- ARRAY_SIZE(debugfs_list),
+ drm_debugfs_create_files(vf_safe_debugfs_list,
+ ARRAY_SIZE(vf_safe_debugfs_list),
parent, minor);
+
+ if (!IS_SRIOV_VF(guc_to_xe(guc)))
+ drm_debugfs_create_files(pf_only_debugfs_list,
+ ARRAY_SIZE(pf_only_debugfs_list),
+ parent, minor);
}
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.c b/drivers/gpu/drm/xe/xe_guc_engine_activity.c
index 2a457dcf31d5..b96fea78df8b 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_activity.c
+++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.c
@@ -17,36 +17,61 @@
#include "xe_hw_engine.h"
#include "xe_map.h"
#include "xe_mmio.h"
+#include "xe_sriov_pf_helpers.h"
#include "xe_trace_guc.h"
#define TOTAL_QUANTA 0x8000
-static struct iosys_map engine_activity_map(struct xe_guc *guc, struct xe_hw_engine *hwe)
+static struct iosys_map engine_activity_map(struct xe_guc *guc, struct xe_hw_engine *hwe,
+ unsigned int index)
{
struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
- struct engine_activity_buffer *buffer = &engine_activity->device_buffer;
+ struct engine_activity_buffer *buffer;
u16 guc_class = xe_engine_class_to_guc_class(hwe->class);
size_t offset;
- offset = offsetof(struct guc_engine_activity_data,
+ if (engine_activity->num_functions) {
+ buffer = &engine_activity->function_buffer;
+ offset = sizeof(struct guc_engine_activity_data) * index;
+ } else {
+ buffer = &engine_activity->device_buffer;
+ offset = 0;
+ }
+
+ offset += offsetof(struct guc_engine_activity_data,
engine_activity[guc_class][hwe->logical_instance]);
return IOSYS_MAP_INIT_OFFSET(&buffer->activity_bo->vmap, offset);
}
-static struct iosys_map engine_metadata_map(struct xe_guc *guc)
+static struct iosys_map engine_metadata_map(struct xe_guc *guc,
+ unsigned int index)
{
struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
- struct engine_activity_buffer *buffer = &engine_activity->device_buffer;
+ struct engine_activity_buffer *buffer;
+ size_t offset;
- return buffer->metadata_bo->vmap;
+ if (engine_activity->num_functions) {
+ buffer = &engine_activity->function_buffer;
+ offset = sizeof(struct guc_engine_activity_metadata) * index;
+ } else {
+ buffer = &engine_activity->device_buffer;
+ offset = 0;
+ }
+
+ return IOSYS_MAP_INIT_OFFSET(&buffer->metadata_bo->vmap, offset);
}
static int allocate_engine_activity_group(struct xe_guc *guc)
{
struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
struct xe_device *xe = guc_to_xe(guc);
- u32 num_activity_group = 1; /* Will be modified for VF */
+ u32 num_activity_group;
+
+ /*
+ * An additional activity group is allocated for PF
+ */
+ num_activity_group = IS_SRIOV_PF(xe) ? xe_sriov_pf_get_totalvfs(xe) + 1 : 1;
engine_activity->eag = drmm_kcalloc(&xe->drm, num_activity_group,
sizeof(struct engine_activity_group), GFP_KERNEL);
@@ -60,10 +85,11 @@ static int allocate_engine_activity_group(struct xe_guc *guc)
}
static int allocate_engine_activity_buffers(struct xe_guc *guc,
- struct engine_activity_buffer *buffer)
+ struct engine_activity_buffer *buffer,
+ int count)
{
- u32 metadata_size = sizeof(struct guc_engine_activity_metadata);
- u32 size = sizeof(struct guc_engine_activity_data);
+ u32 metadata_size = sizeof(struct guc_engine_activity_metadata) * count;
+ u32 size = sizeof(struct guc_engine_activity_data) * count;
struct xe_gt *gt = guc_to_gt(guc);
struct xe_tile *tile = gt_to_tile(gt);
struct xe_bo *bo, *metadata_bo;
@@ -118,10 +144,11 @@ static bool is_engine_activity_supported(struct xe_guc *guc)
return true;
}
-static struct engine_activity *hw_engine_to_engine_activity(struct xe_hw_engine *hwe)
+static struct engine_activity *hw_engine_to_engine_activity(struct xe_hw_engine *hwe,
+ unsigned int index)
{
struct xe_guc *guc = &hwe->gt->uc.guc;
- struct engine_activity_group *eag = &guc->engine_activity.eag[0];
+ struct engine_activity_group *eag = &guc->engine_activity.eag[index];
u16 guc_class = xe_engine_class_to_guc_class(hwe->class);
return &eag->engine[guc_class][hwe->logical_instance];
@@ -138,9 +165,10 @@ static u64 cpu_ns_to_guc_tsc_tick(ktime_t ns, u32 freq)
#define read_metadata_record(xe_, map_, field_) \
xe_map_rd_field(xe_, map_, 0, struct guc_engine_activity_metadata, field_)
-static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
+static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe,
+ unsigned int index)
{
- struct engine_activity *ea = hw_engine_to_engine_activity(hwe);
+ struct engine_activity *ea = hw_engine_to_engine_activity(hwe, index);
struct guc_engine_activity *cached_activity = &ea->activity;
struct guc_engine_activity_metadata *cached_metadata = &ea->metadata;
struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
@@ -151,8 +179,8 @@ static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
u64 active_ticks, gpm_ts;
u16 change_num;
- activity_map = engine_activity_map(guc, hwe);
- metadata_map = engine_metadata_map(guc);
+ activity_map = engine_activity_map(guc, hwe, index);
+ metadata_map = engine_metadata_map(guc, index);
global_change_num = read_metadata_record(xe, &metadata_map, global_change_num);
/* GuC has not initialized activity data yet, return 0 */
@@ -194,9 +222,9 @@ update:
return ea->total + ea->active;
}
-static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
+static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe, unsigned int index)
{
- struct engine_activity *ea = hw_engine_to_engine_activity(hwe);
+ struct engine_activity *ea = hw_engine_to_engine_activity(hwe, index);
struct guc_engine_activity_metadata *cached_metadata = &ea->metadata;
struct guc_engine_activity *cached_activity = &ea->activity;
struct iosys_map activity_map, metadata_map;
@@ -205,8 +233,8 @@ static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
u64 numerator;
u16 quanta_ratio;
- activity_map = engine_activity_map(guc, hwe);
- metadata_map = engine_metadata_map(guc);
+ activity_map = engine_activity_map(guc, hwe, index);
+ metadata_map = engine_metadata_map(guc, index);
if (!cached_metadata->guc_tsc_frequency_hz)
cached_metadata->guc_tsc_frequency_hz = read_metadata_record(xe, &metadata_map,
@@ -245,10 +273,35 @@ static int enable_engine_activity_stats(struct xe_guc *guc)
return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
}
-static void engine_activity_set_cpu_ts(struct xe_guc *guc)
+static int enable_function_engine_activity_stats(struct xe_guc *guc, bool enable)
{
struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
- struct engine_activity_group *eag = &engine_activity->eag[0];
+ u32 metadata_ggtt_addr = 0, ggtt_addr = 0, num_functions = 0;
+ struct engine_activity_buffer *buffer = &engine_activity->function_buffer;
+ u32 action[6];
+ int len = 0;
+
+ if (enable) {
+ metadata_ggtt_addr = xe_bo_ggtt_addr(buffer->metadata_bo);
+ ggtt_addr = xe_bo_ggtt_addr(buffer->activity_bo);
+ num_functions = engine_activity->num_functions;
+ }
+
+ action[len++] = XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER;
+ action[len++] = num_functions;
+ action[len++] = metadata_ggtt_addr;
+ action[len++] = 0;
+ action[len++] = ggtt_addr;
+ action[len++] = 0;
+
+ /* Blocking here to ensure the buffers are ready before reading them */
+ return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+static void engine_activity_set_cpu_ts(struct xe_guc *guc, unsigned int index)
+{
+ struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
+ struct engine_activity_group *eag = &engine_activity->eag[index];
int i, j;
for (i = 0; i < GUC_MAX_ENGINE_CLASSES; i++)
@@ -265,34 +318,106 @@ static u32 gpm_timestamp_shift(struct xe_gt *gt)
return 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);
}
+static bool is_function_valid(struct xe_guc *guc, unsigned int fn_id)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
+
+ if (!IS_SRIOV_PF(xe) && fn_id)
+ return false;
+
+ if (engine_activity->num_functions && fn_id >= engine_activity->num_functions)
+ return false;
+
+ return true;
+}
+
+static int engine_activity_disable_function_stats(struct xe_guc *guc)
+{
+ struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
+ struct engine_activity_buffer *buffer = &engine_activity->function_buffer;
+ int ret;
+
+ if (!engine_activity->num_functions)
+ return 0;
+
+ ret = enable_function_engine_activity_stats(guc, false);
+ if (ret)
+ return ret;
+
+ free_engine_activity_buffers(buffer);
+ engine_activity->num_functions = 0;
+
+ return 0;
+}
+
+static int engine_activity_enable_function_stats(struct xe_guc *guc, int num_vfs)
+{
+ struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
+ struct engine_activity_buffer *buffer = &engine_activity->function_buffer;
+ int ret, i;
+
+ if (!num_vfs)
+ return 0;
+
+ /* This includes 1 PF and num_vfs */
+ engine_activity->num_functions = num_vfs + 1;
+
+ ret = allocate_engine_activity_buffers(guc, buffer, engine_activity->num_functions);
+ if (ret)
+ return ret;
+
+ ret = enable_function_engine_activity_stats(guc, true);
+ if (ret) {
+ free_engine_activity_buffers(buffer);
+ engine_activity->num_functions = 0;
+ return ret;
+ }
+
+ for (i = 0; i < engine_activity->num_functions; i++)
+ engine_activity_set_cpu_ts(guc, i + 1);
+
+ return 0;
+}
+
/**
* xe_guc_engine_activity_active_ticks - Get engine active ticks
* @guc: The GuC object
* @hwe: The hw_engine object
+ * @fn_id: function id to report on
*
* Return: accumulated ticks @hwe was active since engine activity stats were enabled.
*/
-u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
+u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe,
+ unsigned int fn_id)
{
if (!xe_guc_engine_activity_supported(guc))
return 0;
- return get_engine_active_ticks(guc, hwe);
+ if (!is_function_valid(guc, fn_id))
+ return 0;
+
+ return get_engine_active_ticks(guc, hwe, fn_id);
}
/**
* xe_guc_engine_activity_total_ticks - Get engine total ticks
* @guc: The GuC object
* @hwe: The hw_engine object
+ * @fn_id: function id to report on
*
* Return: accumulated quanta of ticks allocated for the engine
*/
-u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
+u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe,
+ unsigned int fn_id)
{
if (!xe_guc_engine_activity_supported(guc))
return 0;
- return get_engine_total_ticks(guc, hwe);
+ if (!is_function_valid(guc, fn_id))
+ return 0;
+
+ return get_engine_total_ticks(guc, hwe, fn_id);
}
/**
@@ -311,6 +436,25 @@ bool xe_guc_engine_activity_supported(struct xe_guc *guc)
}
/**
+ * xe_guc_engine_activity_function_stats - Enable/Disable per-function engine activity stats
+ * @guc: The GuC object
+ * @num_vfs: number of vfs
+ * @enable: true to enable, false otherwise
+ *
+ * Return: 0 on success, negative error code otherwise
+ */
+int xe_guc_engine_activity_function_stats(struct xe_guc *guc, int num_vfs, bool enable)
+{
+ if (!xe_guc_engine_activity_supported(guc))
+ return 0;
+
+ if (enable)
+ return engine_activity_enable_function_stats(guc, num_vfs);
+
+ return engine_activity_disable_function_stats(guc);
+}
+
+/**
* xe_guc_engine_activity_enable_stats - Enable engine activity stats
* @guc: The GuC object
*
@@ -327,7 +471,7 @@ void xe_guc_engine_activity_enable_stats(struct xe_guc *guc)
if (ret)
xe_gt_err(guc_to_gt(guc), "failed to enable activity stats%d\n", ret);
else
- engine_activity_set_cpu_ts(guc);
+ engine_activity_set_cpu_ts(guc, 0);
}
static void engine_activity_fini(void *arg)
@@ -360,7 +504,7 @@ int xe_guc_engine_activity_init(struct xe_guc *guc)
return ret;
}
- ret = allocate_engine_activity_buffers(guc, &engine_activity->device_buffer);
+ ret = allocate_engine_activity_buffers(guc, &engine_activity->device_buffer, 1);
if (ret) {
xe_gt_err(gt, "failed to allocate engine activity buffers (%pe)\n", ERR_PTR(ret));
return ret;
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.h b/drivers/gpu/drm/xe/xe_guc_engine_activity.h
index a042d4cb404c..b32926c2d208 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_activity.h
+++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.h
@@ -14,6 +14,9 @@ struct xe_guc;
int xe_guc_engine_activity_init(struct xe_guc *guc);
bool xe_guc_engine_activity_supported(struct xe_guc *guc);
void xe_guc_engine_activity_enable_stats(struct xe_guc *guc);
-u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
-u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
+int xe_guc_engine_activity_function_stats(struct xe_guc *guc, int num_vfs, bool enable);
+u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe,
+ unsigned int fn_id);
+u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe,
+ unsigned int fn_id);
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h b/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h
index 5cdd034b6b70..48f69ddefa36 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h
@@ -79,14 +79,24 @@ struct xe_guc_engine_activity {
/** @num_activity_group: number of activity groups */
u32 num_activity_group;
+ /** @num_functions: number of functions */
+ u32 num_functions;
+
/** @supported: indicates support for engine activity stats */
bool supported;
- /** @eag: holds the device level engine activity data */
+ /**
+ * @eag: holds the device level engine activity data in native mode.
+ * In SRIOV mode, points to an array with entries which holds the engine
+ * activity data for PF and VF's
+ */
struct engine_activity_group *eag;
/** @device_buffer: buffer object for global engine activity */
struct engine_activity_buffer device_buffer;
+
+ /** @function_buffer: buffer object for per-function engine activity */
+ struct engine_activity_buffer function_buffer;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index 80514a446ba2..38039c411387 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -260,7 +260,8 @@ int xe_guc_log_init(struct xe_guc_log *log)
bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(),
XE_BO_FLAG_SYSTEM |
XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_PINNED_NORESTORE);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 43b1192ba61c..18c623992035 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -462,6 +462,21 @@ static u32 get_cur_freq(struct xe_gt *gt)
}
/**
+ * xe_guc_pc_get_cur_freq_fw - With fw held, get requested frequency
+ * @pc: The GuC PC
+ *
+ * Returns: the requested frequency for that GT instance
+ */
+u32 xe_guc_pc_get_cur_freq_fw(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+
+ xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+ return get_cur_freq(gt);
+}
+
+/**
* xe_guc_pc_get_cur_freq - Get Current requested frequency
* @pc: The GuC PC
* @freq: A pointer to a u32 where the freq value will be returned
@@ -1170,7 +1185,8 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
bo = xe_managed_bo_create_pin_map(xe, tile, size,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_PINNED_NORESTORE);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index 39102b79602f..0a2664d5c811 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -22,6 +22,7 @@ void xe_guc_pc_print(struct xe_guc_pc *pc, struct drm_printer *p);
u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc);
int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq);
+u32 xe_guc_pc_get_cur_freq_fw(struct xe_guc_pc *pc);
u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc);
u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc);
u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 31bc2022bfc2..813c3c0bb250 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -300,6 +300,8 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
primelockdep(guc);
+ guc->submission_state.initialized = true;
+
return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
}
@@ -834,6 +836,13 @@ void xe_guc_submit_wedge(struct xe_guc *guc)
xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
+ /*
+ * If device is being wedged even before submission_state is
+ * initialized, there's nothing to do here.
+ */
+ if (!guc->submission_state.initialized)
+ return;
+
err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
guc_submit_wedged_fini, guc);
if (err) {
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index 63bac64429a5..1fde7614fcc5 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -89,6 +89,11 @@ struct xe_guc {
struct mutex lock;
/** @submission_state.enabled: submission is enabled */
bool enabled;
+ /**
+ * @submission_state.initialized: mark when submission state is
+ * even initialized - before that not even the lock is valid
+ */
+ bool initialized;
/** @submission_state.fini_wq: submit fini wait queue */
wait_queue_head_t fini_wq;
} submission_state;
diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c
index c3cc0fa105e8..57b71956ddf4 100644
--- a/drivers/gpu/drm/xe/xe_hmm.c
+++ b/drivers/gpu/drm/xe/xe_hmm.c
@@ -19,29 +19,6 @@ static u64 xe_npages_in_range(unsigned long start, unsigned long end)
return (end - start) >> PAGE_SHIFT;
}
-/**
- * xe_mark_range_accessed() - mark a range is accessed, so core mm
- * have such information for memory eviction or write back to
- * hard disk
- * @range: the range to mark
- * @write: if write to this range, we mark pages in this range
- * as dirty
- */
-static void xe_mark_range_accessed(struct hmm_range *range, bool write)
-{
- struct page *page;
- u64 i, npages;
-
- npages = xe_npages_in_range(range->start, range->end);
- for (i = 0; i < npages; i++) {
- page = hmm_pfn_to_page(range->hmm_pfns[i]);
- if (write)
- set_page_dirty_lock(page);
-
- mark_page_accessed(page);
- }
-}
-
static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st,
struct hmm_range *range, struct rw_semaphore *notifier_sem)
{
@@ -331,7 +308,6 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
if (ret)
goto out_unlock;
- xe_mark_range_accessed(&hmm_range, write);
userptr->sg = &userptr->sgt;
xe_hmm_userptr_set_mapped(uvma);
userptr->notifier_seq = hmm_range.notifier_seq;
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
index a440442b4d72..640950172088 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -605,6 +605,7 @@ err_object:
kobject_put(kobj);
return err;
}
+ALLOW_ERROR_INJECTION(xe_add_hw_engine_class_defaults, ERRNO); /* See xe_pci_probe() */
static void hw_engine_class_sysfs_fini(void *arg)
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 48d80ffdf7bb..eb293aec36a0 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -5,6 +5,7 @@
#include <linux/hwmon-sysfs.h>
#include <linux/hwmon.h>
+#include <linux/jiffies.h>
#include <linux/types.h>
#include <linux/units.h>
@@ -27,6 +28,7 @@ enum xe_hwmon_reg {
REG_PKG_POWER_SKU_UNIT,
REG_GT_PERF_STATUS,
REG_PKG_ENERGY_STATUS,
+ REG_FAN_SPEED,
};
enum xe_hwmon_reg_operation {
@@ -42,6 +44,13 @@ enum xe_hwmon_channel {
CHANNEL_MAX,
};
+enum xe_fan_channel {
+ FAN_1,
+ FAN_2,
+ FAN_3,
+ FAN_MAX,
+};
+
/*
* SF_* - scale factors for particular quantities according to hwmon spec.
*/
@@ -62,6 +71,16 @@ struct xe_hwmon_energy_info {
};
/**
+ * struct xe_hwmon_fan_info - to cache previous fan reading
+ */
+struct xe_hwmon_fan_info {
+ /** @reg_val_prev: previous fan reg val */
+ u32 reg_val_prev;
+ /** @time_prev: previous timestamp */
+ u64 time_prev;
+};
+
+/**
* struct xe_hwmon - xe hwmon data structure
*/
struct xe_hwmon {
@@ -79,6 +98,8 @@ struct xe_hwmon {
int scl_shift_time;
/** @ei: Energy info for energyN_input */
struct xe_hwmon_energy_info ei[CHANNEL_MAX];
+ /** @fi: Fan info for fanN_input */
+ struct xe_hwmon_fan_info fi[FAN_MAX];
};
static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg,
@@ -144,6 +165,14 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
return PCU_CR_PACKAGE_ENERGY_STATUS;
}
break;
+ case REG_FAN_SPEED:
+ if (channel == FAN_1)
+ return BMG_FAN_1_SPEED;
+ else if (channel == FAN_2)
+ return BMG_FAN_2_SPEED;
+ else if (channel == FAN_3)
+ return BMG_FAN_3_SPEED;
+ break;
default:
drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg);
break;
@@ -454,6 +483,7 @@ static const struct hwmon_channel_info * const hwmon_info[] = {
HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL),
HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL),
HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL),
+ HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT, HWMON_F_INPUT, HWMON_F_INPUT),
NULL
};
@@ -480,6 +510,19 @@ static int xe_hwmon_pcode_write_i1(const struct xe_hwmon *hwmon, u32 uval)
(uval & POWER_SETUP_I1_DATA_MASK));
}
+static int xe_hwmon_pcode_read_fan_control(const struct xe_hwmon *hwmon, u32 subcmd, u32 *uval)
+{
+ struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe);
+
+ /* Platforms that don't return correct value */
+ if (hwmon->xe->info.platform == XE_DG2 && subcmd == FSC_READ_NUM_FANS) {
+ *uval = 2;
+ return 0;
+ }
+
+ return xe_pcode_read(root_tile, PCODE_MBOX(FAN_SPEED_CONTROL, subcmd, 0), uval, NULL);
+}
+
static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel,
long *value, u32 scale_factor)
{
@@ -706,6 +749,75 @@ xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
}
static umode_t
+xe_hwmon_fan_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
+{
+ u32 uval;
+
+ if (!hwmon->xe->info.has_fan_control)
+ return 0;
+
+ switch (attr) {
+ case hwmon_fan_input:
+ if (xe_hwmon_pcode_read_fan_control(hwmon, FSC_READ_NUM_FANS, &uval))
+ return 0;
+
+ return channel < uval ? 0444 : 0;
+ default:
+ return 0;
+ }
+}
+
+static int
+xe_hwmon_fan_input_read(struct xe_hwmon *hwmon, int channel, long *val)
+{
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
+ struct xe_hwmon_fan_info *fi = &hwmon->fi[channel];
+ u64 rotations, time_now, time;
+ u32 reg_val;
+ int ret = 0;
+
+ mutex_lock(&hwmon->hwmon_lock);
+
+ reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_FAN_SPEED, channel));
+ time_now = get_jiffies_64();
+
+ /*
+ * HW register value is accumulated count of pulses from PWM fan with the scale
+ * of 2 pulses per rotation.
+ */
+ rotations = (reg_val - fi->reg_val_prev) / 2;
+
+ time = jiffies_delta_to_msecs(time_now - fi->time_prev);
+ if (unlikely(!time)) {
+ ret = -EAGAIN;
+ goto unlock;
+ }
+
+ /*
+ * Calculate fan speed in RPM by time averaging two subsequent readings in minutes.
+ * RPM = number of rotations * msecs per minute / time in msecs
+ */
+ *val = DIV_ROUND_UP_ULL(rotations * (MSEC_PER_SEC * 60), time);
+
+ fi->reg_val_prev = reg_val;
+ fi->time_prev = time_now;
+unlock:
+ mutex_unlock(&hwmon->hwmon_lock);
+ return ret;
+}
+
+static int
+xe_hwmon_fan_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
+{
+ switch (attr) {
+ case hwmon_fan_input:
+ return xe_hwmon_fan_input_read(hwmon, channel, val);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static umode_t
xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
u32 attr, int channel)
{
@@ -730,6 +842,9 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
case hwmon_energy:
ret = xe_hwmon_energy_is_visible(hwmon, attr, channel);
break;
+ case hwmon_fan:
+ ret = xe_hwmon_fan_is_visible(hwmon, attr, channel);
+ break;
default:
ret = 0;
break;
@@ -765,6 +880,9 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
case hwmon_energy:
ret = xe_hwmon_energy_read(hwmon, attr, channel, val);
break;
+ case hwmon_fan:
+ ret = xe_hwmon_fan_read(hwmon, attr, channel, val);
+ break;
default:
ret = -EOPNOTSUPP;
break;
@@ -842,7 +960,7 @@ static void
xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon)
{
struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
- long energy;
+ long energy, fan_speed;
u64 val_sku_unit = 0;
int channel;
struct xe_reg pkg_power_sku_unit;
@@ -866,6 +984,11 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon)
for (channel = 0; channel < CHANNEL_MAX; channel++)
if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, channel))
xe_hwmon_energy_get(hwmon, channel, &energy);
+
+ /* Initialize 'struct xe_hwmon_fan_info' with initial fan register reading. */
+ for (channel = 0; channel < FAN_MAX; channel++)
+ if (xe_hwmon_is_visible(hwmon, hwmon_fan, hwmon_fan_input, channel))
+ xe_hwmon_fan_input_read(hwmon, channel, &fan_speed);
}
static void xe_hwmon_mutex_destroy(void *arg)
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index 89393dcb53d9..63db66df064b 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -71,7 +71,7 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level
lmtt->ops->lmtt_pte_num(level)),
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
- XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_NEEDS_64K);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto out_free_pt;
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index df3ceddede07..855c8acaf3f1 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -37,6 +37,7 @@
#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61)
#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
+#define LRC_PPHWSP_SIZE SZ_4K
#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
static struct xe_device *
@@ -50,19 +51,22 @@ size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
struct xe_device *xe = gt_to_xe(gt);
size_t size;
+ /* Per-process HW status page (PPHWSP) */
+ size = LRC_PPHWSP_SIZE;
+
+ /* Engine context image */
switch (class) {
case XE_ENGINE_CLASS_RENDER:
if (GRAPHICS_VER(xe) >= 20)
- size = 4 * SZ_4K;
+ size += 3 * SZ_4K;
else
- size = 14 * SZ_4K;
+ size += 13 * SZ_4K;
break;
case XE_ENGINE_CLASS_COMPUTE:
- /* 14 pages since graphics_ver == 11 */
if (GRAPHICS_VER(xe) >= 20)
- size = 3 * SZ_4K;
+ size += 2 * SZ_4K;
else
- size = 14 * SZ_4K;
+ size += 13 * SZ_4K;
break;
default:
WARN(1, "Unknown engine class: %d", class);
@@ -71,7 +75,7 @@ size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
case XE_ENGINE_CLASS_VIDEO_DECODE:
case XE_ENGINE_CLASS_VIDEO_ENHANCE:
case XE_ENGINE_CLASS_OTHER:
- size = 2 * SZ_4K;
+ size += 1 * SZ_4K;
}
/* Add indirect ring state page */
@@ -650,7 +654,6 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
#define LRC_PARALLEL_PPHWSP_OFFSET 2048
-#define LRC_PPHWSP_SIZE SZ_4K
u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
{
@@ -893,6 +896,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
void *init_data = NULL;
u32 arb_enable;
u32 lrc_size;
+ u32 bo_flags;
int err;
kref_init(&lrc->refcount);
@@ -901,15 +905,18 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
if (xe_gt_has_indirect_ring_state(gt))
lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
+ bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE;
+ if (vm && vm->xef) /* userspace */
+ bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
+
/*
* FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
* via VM bind calls.
*/
lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ bo_flags);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
@@ -1445,6 +1452,7 @@ static int dump_gfxpipe_command(struct drm_printer *p,
MATCH3D(3DSTATE_CLIP_MESH);
MATCH3D(3DSTATE_SBE_MESH);
MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
+ MATCH3D(3DSTATE_COARSE_PIXEL);
MATCH3D(3DSTATE_DRAWING_RECTANGLE);
MATCH3D(3DSTATE_CHROMA_KEY);
diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
index 404fa2a456d5..49c45ec3e83c 100644
--- a/drivers/gpu/drm/xe/xe_memirq.c
+++ b/drivers/gpu/drm/xe/xe_memirq.c
@@ -86,7 +86,7 @@ static const char *guc_name(struct xe_guc *guc)
* This object needs to be 4KiB aligned.
*
* - _`Interrupt Source Report Page`: this is the equivalent of the
- * GEN11_GT_INTR_DWx registers, with each bit in those registers being
+ * GT_INTR_DWx registers, with each bit in those registers being
* mapped to a byte here. The offsets are the same, just bytes instead
* of bits. This object needs to be cacheline aligned.
*
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 5a3e89022c38..3777cc30d688 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -97,7 +97,7 @@ struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile)
return tile->migrate->q;
}
-static void xe_migrate_fini(struct drm_device *dev, void *arg)
+static void xe_migrate_fini(void *arg)
{
struct xe_migrate *m = arg;
@@ -209,7 +209,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
num_entries * XE_PAGE_SIZE,
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_PINNED |
XE_BO_FLAG_PAGETABLE);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -401,7 +400,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
struct xe_vm *vm;
int err;
- m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL);
+ m = devm_kzalloc(xe->drm.dev, sizeof(*m), GFP_KERNEL);
if (!m)
return ERR_PTR(-ENOMEM);
@@ -455,7 +454,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
might_lock(&m->job_mutex);
fs_reclaim_release(GFP_KERNEL);
- err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m);
+ err = devm_add_action_or_reset(xe->drm.dev, xe_migrate_fini, m);
if (err)
return ERR_PTR(err);
@@ -779,10 +778,12 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
bool dst_is_pltt = dst->mem_type == XE_PL_TT;
bool src_is_vram = mem_type_is_vram(src->mem_type);
bool dst_is_vram = mem_type_is_vram(dst->mem_type);
+ bool type_device = src_bo->ttm.type == ttm_bo_type_device;
+ bool needs_ccs_emit = type_device && xe_migrate_needs_ccs_emit(xe);
bool copy_ccs = xe_device_has_flat_ccs(xe) &&
xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo);
bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram);
- bool use_comp_pat = xe_device_has_flat_ccs(xe) &&
+ bool use_comp_pat = type_device && xe_device_has_flat_ccs(xe) &&
GRAPHICS_VER(xe) >= 20 && src_is_vram && !dst_is_vram;
/* Copying CCS between two different BOs is not supported yet. */
@@ -839,6 +840,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
avail_pts, avail_pts);
if (copy_system_ccs) {
+ xe_assert(xe, type_device);
ccs_size = xe_device_ccs_bytes(xe, src_L0);
batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size,
&ccs_ofs, &ccs_pt, 0,
@@ -849,7 +851,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
/* Add copy commands size here */
batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
- ((xe_migrate_needs_ccs_emit(xe) ? EMIT_COPY_CCS_DW : 0));
+ ((needs_ccs_emit ? EMIT_COPY_CCS_DW : 0));
bb = xe_bb_new(gt, batch_size, usm);
if (IS_ERR(bb)) {
@@ -878,7 +880,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
if (!copy_only_ccs)
emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
- if (xe_migrate_needs_ccs_emit(xe))
+ if (needs_ccs_emit)
flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
IS_DGFX(xe) ? src_is_vram : src_is_pltt,
dst_L0_ofs,
@@ -1544,6 +1546,7 @@ void xe_migrate_wait(struct xe_migrate *m)
dma_fence_wait(m->fence, false);
}
+#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
static u32 pte_update_cmd_size(u64 size)
{
u32 num_dword;
@@ -1608,6 +1611,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
{
struct xe_gt *gt = m->tile->primary_gt;
struct xe_device *xe = gt_to_xe(gt);
+ bool use_usm_batch = xe->info.has_usm;
struct dma_fence *fence = NULL;
u32 batch_size = 2;
u64 src_L0_ofs, dst_L0_ofs;
@@ -1624,7 +1628,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
batch_size += pte_update_cmd_size(round_update_size);
batch_size += EMIT_COPY_DW;
- bb = xe_bb_new(gt, batch_size, true);
+ bb = xe_bb_new(gt, batch_size, use_usm_batch);
if (IS_ERR(bb)) {
err = PTR_ERR(bb);
return ERR_PTR(err);
@@ -1649,7 +1653,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
XE_PAGE_SIZE);
job = xe_bb_create_migration_job(m->q, bb,
- xe_migrate_batch_base(m, true),
+ xe_migrate_batch_base(m, use_usm_batch),
update_idx);
if (IS_ERR(job)) {
err = PTR_ERR(job);
@@ -1719,6 +1723,8 @@ struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m,
XE_MIGRATE_COPY_TO_SRAM);
}
+#endif
+
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
#include "tests/xe_migrate.c"
#endif
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 70a36e777546..096c38cc51c8 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -138,6 +138,7 @@ int xe_mmio_probe_early(struct xe_device *xe)
return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe);
}
+ALLOW_ERROR_INJECTION(xe_mmio_probe_early, ERRNO); /* See xe_pci_probe() */
/**
* xe_mmio_init() - Initialize an MMIO instance
@@ -204,8 +205,9 @@ void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val)
trace_xe_reg_rw(mmio, true, addr, val, sizeof(val));
- if (!reg.vf && mmio->sriov_vf_gt)
- xe_gt_sriov_vf_write32(mmio->sriov_vf_gt, reg, val);
+ if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe))
+ xe_gt_sriov_vf_write32(mmio->sriov_vf_gt ?:
+ mmio->tile->primary_gt, reg, val);
else
writel(val, mmio->regs + addr);
}
@@ -218,8 +220,9 @@ u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg)
/* Wa_15015404425 */
mmio_flush_pending_writes(mmio);
- if (!reg.vf && mmio->sriov_vf_gt)
- val = xe_gt_sriov_vf_read32(mmio->sriov_vf_gt, reg);
+ if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe))
+ val = xe_gt_sriov_vf_read32(mmio->sriov_vf_gt ?:
+ mmio->tile->primary_gt, reg);
else
val = readl(mmio->regs + addr);
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 9f4632e39a1a..64bf46646544 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -11,6 +11,7 @@
#include <drm/drm_module.h>
#include "xe_drv.h"
+#include "xe_configfs.h"
#include "xe_hw_fence.h"
#include "xe_pci.h"
#include "xe_pm.h"
@@ -38,8 +39,8 @@ MODULE_PARM_DESC(force_execlist, "Force Execlist submission");
module_param_named(probe_display, xe_modparam.probe_display, bool, 0444);
MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)");
-module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, uint, 0600);
-MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)");
+module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, int, 0600);
+MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size (in MiB) - <0=disable-resize, 0=max-needed-size[default], >0=force-size");
module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600);
MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)");
@@ -89,6 +90,10 @@ static const struct init_funcs init_funcs[] = {
.init = xe_check_nomodeset,
},
{
+ .init = xe_configfs_init,
+ .exit = xe_configfs_exit,
+ },
+ {
.init = xe_hw_fence_module_init,
.exit = xe_hw_fence_module_exit,
},
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 7ffc98f67e69..346f357b3d1f 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -2221,6 +2221,7 @@ addr_err:
kfree(oa_regs);
return ERR_PTR(err);
}
+ALLOW_ERROR_INJECTION(xe_oa_alloc_regs, ERRNO);
static ssize_t show_dynamic_id(struct kobject *kobj,
struct kobj_attribute *attr,
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 818f023166d5..07fe994f2a80 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -62,11 +62,13 @@ struct xe_device_desc {
u8 is_dgfx:1;
u8 has_display:1;
+ u8 has_fan_control:1;
u8 has_heci_gscfi:1;
u8 has_heci_cscfi:1;
u8 has_llc:1;
u8 has_pxp:1;
u8 has_sriov:1;
+ u8 needs_scratch:1;
u8 skip_guc_pc:1;
u8 skip_mtcfg:1;
u8 skip_pcode:1;
@@ -302,6 +304,7 @@ static const struct xe_device_desc dg2_desc = {
DG2_FEATURES,
.has_display = true,
+ .has_fan_control = true,
};
static const __maybe_unused struct xe_device_desc pvc_desc = {
@@ -329,6 +332,7 @@ static const struct xe_device_desc lnl_desc = {
.dma_mask_size = 46,
.has_display = true,
.has_pxp = true,
+ .needs_scratch = true,
};
static const struct xe_device_desc bmg_desc = {
@@ -336,7 +340,9 @@ static const struct xe_device_desc bmg_desc = {
PLATFORM(BATTLEMAGE),
.dma_mask_size = 46,
.has_display = true,
+ .has_fan_control = true,
.has_heci_cscfi = 1,
+ .needs_scratch = true,
};
static const struct xe_device_desc ptl_desc = {
@@ -345,6 +351,7 @@ static const struct xe_device_desc ptl_desc = {
.has_display = true,
.has_sriov = true,
.require_force_probe = true,
+ .needs_scratch = true,
};
#undef PLATFORM
@@ -575,6 +582,7 @@ static int xe_info_init_early(struct xe_device *xe,
xe->info.dma_mask_size = desc->dma_mask_size;
xe->info.is_dgfx = desc->is_dgfx;
+ xe->info.has_fan_control = desc->has_fan_control;
xe->info.has_heci_gscfi = desc->has_heci_gscfi;
xe->info.has_heci_cscfi = desc->has_heci_cscfi;
xe->info.has_llc = desc->has_llc;
@@ -583,6 +591,7 @@ static int xe_info_init_early(struct xe_device *xe,
xe->info.skip_guc_pc = desc->skip_guc_pc;
xe->info.skip_mtcfg = desc->skip_mtcfg;
xe->info.skip_pcode = desc->skip_pcode;
+ xe->info.needs_scratch = desc->needs_scratch;
xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) &&
xe_modparam.probe_display &&
@@ -803,18 +812,17 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
return err;
err = xe_device_probe_early(xe);
- if (err) {
- /*
- * In Boot Survivability mode, no drm card is exposed and driver
- * is loaded with bare minimum to allow for firmware to be
- * flashed through mei. If early probe failed, but it managed to
- * enable survivability mode, return success.
- */
- if (xe_survivability_mode_is_enabled(xe))
- return 0;
+ /*
+ * In Boot Survivability mode, no drm card is exposed and driver
+ * is loaded with bare minimum to allow for firmware to be
+ * flashed through mei. Return success, if survivability mode
+ * is enabled due to pcode failure or configfs being set
+ */
+ if (xe_survivability_mode_is_enabled(xe))
+ return 0;
+ if (err)
return err;
- }
err = xe_info_init(xe, desc);
if (err)
@@ -920,6 +928,7 @@ static int xe_pci_suspend(struct device *dev)
pci_save_state(pdev);
pci_disable_device(pdev);
+ pci_set_power_state(pdev, PCI_D3cold);
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c
index 09ee8a06fe2e..d69b6b2a3061 100644
--- a/drivers/gpu/drm/xe/xe_pci_sriov.c
+++ b/drivers/gpu/drm/xe/xe_pci_sriov.c
@@ -7,6 +7,7 @@
#include "xe_device.h"
#include "xe_gt_sriov_pf_config.h"
#include "xe_gt_sriov_pf_control.h"
+#include "xe_guc_engine_activity.h"
#include "xe_pci_sriov.h"
#include "xe_pm.h"
#include "xe_sriov.h"
@@ -111,6 +112,20 @@ static void pf_link_vfs(struct xe_device *xe, int num_vfs)
}
}
+static void pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs, bool enable)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int ret = 0;
+
+ for_each_gt(gt, xe, id) {
+ ret = xe_guc_engine_activity_function_stats(&gt->uc.guc, num_vfs, enable);
+ if (ret)
+ xe_sriov_info(xe, "Failed to %s engine activity function stats (%pe)\n",
+ str_enable_disable(enable), ERR_PTR(ret));
+ }
+}
+
static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -145,6 +160,9 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
xe_sriov_info(xe, "Enabled %u of %u VF%s\n",
num_vfs, total_vfs, str_plural(total_vfs));
+
+ pf_engine_activity_stats(xe, num_vfs, true);
+
return num_vfs;
failed:
@@ -168,6 +186,8 @@ static int pf_disable_vfs(struct xe_device *xe)
if (!num_vfs)
return 0;
+ pf_engine_activity_stats(xe, num_vfs, false);
+
pci_disable_sriov(pdev);
pf_reset_vfs(xe, num_vfs);
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index 9333ce776a6e..cf955b3ed52c 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -7,6 +7,7 @@
#include <linux/delay.h>
#include <linux/errno.h>
+#include <linux/error-injection.h>
#include <drm/drm_managed.h>
@@ -323,3 +324,4 @@ int xe_pcode_probe_early(struct xe_device *xe)
{
return xe_pcode_ready(xe, false);
}
+ALLOW_ERROR_INJECTION(xe_pcode_probe_early, ERRNO); /* See xe_pci_probe */
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
index 2bae9afdbd35..e622ae17f08d 100644
--- a/drivers/gpu/drm/xe/xe_pcode_api.h
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -49,6 +49,9 @@
/* Domain IDs (param2) */
#define PCODE_MBOX_DOMAIN_HBM 0x2
+#define FAN_SPEED_CONTROL 0x7D
+#define FSC_READ_NUM_FANS 0x4
+
#define PCODE_SCRATCH(x) XE_REG(0x138320 + ((x) * 4))
/* PCODE_SCRATCH0 */
#define AUXINFO_REG_OFFSET REG_GENMASK(17, 15)
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 7b6b754ad6eb..4e112fbacada 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -188,7 +188,7 @@ int xe_pm_resume(struct xe_device *xe)
* This only restores pinned memory which is the memory required for the
* GT(s) to resume.
*/
- err = xe_bo_restore_kernel(xe);
+ err = xe_bo_restore_early(xe);
if (err)
goto err;
@@ -199,7 +199,7 @@ int xe_pm_resume(struct xe_device *xe)
xe_display_pm_resume(xe);
- err = xe_bo_restore_user(xe);
+ err = xe_bo_restore_late(xe);
if (err)
goto err;
@@ -484,7 +484,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
* This only restores pinned memory which is the memory
* required for the GT(s) to resume.
*/
- err = xe_bo_restore_kernel(xe);
+ err = xe_bo_restore_early(xe);
if (err)
goto out;
}
@@ -497,7 +497,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
xe_display_pm_runtime_resume(xe);
if (xe->d3cold.allowed) {
- err = xe_bo_restore_user(xe);
+ err = xe_bo_restore_late(xe);
if (err)
goto out;
}
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index 4f62a6e515d6..69df0e3520a5 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -10,9 +10,11 @@
#include "xe_force_wake.h"
#include "xe_gt_idle.h"
#include "xe_guc_engine_activity.h"
+#include "xe_guc_pc.h"
#include "xe_hw_engine.h"
#include "xe_pm.h"
#include "xe_pmu.h"
+#include "xe_sriov_pf_helpers.h"
/**
* DOC: Xe PMU (Performance Monitoring Unit)
@@ -32,9 +34,10 @@
* gt[60:63] Selects gt for the event
* engine_class[20:27] Selects engine-class for event
* engine_instance[12:19] Selects the engine-instance for the event
+ * function[44:59] Selects the function of the event (SRIOV enabled)
*
* For engine specific events (engine-*), gt, engine_class and engine_instance parameters must be
- * set as populated by DRM_XE_DEVICE_QUERY_ENGINES.
+ * set as populated by DRM_XE_DEVICE_QUERY_ENGINES and function if SRIOV is enabled.
*
* For gt specific events (gt-*) gt parameter must be passed. All other parameters will be 0.
*
@@ -49,6 +52,7 @@
*/
#define XE_PMU_EVENT_GT_MASK GENMASK_ULL(63, 60)
+#define XE_PMU_EVENT_FUNCTION_MASK GENMASK_ULL(59, 44)
#define XE_PMU_EVENT_ENGINE_CLASS_MASK GENMASK_ULL(27, 20)
#define XE_PMU_EVENT_ENGINE_INSTANCE_MASK GENMASK_ULL(19, 12)
#define XE_PMU_EVENT_ID_MASK GENMASK_ULL(11, 0)
@@ -58,6 +62,11 @@ static unsigned int config_to_event_id(u64 config)
return FIELD_GET(XE_PMU_EVENT_ID_MASK, config);
}
+static unsigned int config_to_function_id(u64 config)
+{
+ return FIELD_GET(XE_PMU_EVENT_FUNCTION_MASK, config);
+}
+
static unsigned int config_to_engine_class(u64 config)
{
return FIELD_GET(XE_PMU_EVENT_ENGINE_CLASS_MASK, config);
@@ -76,6 +85,8 @@ static unsigned int config_to_gt_id(u64 config)
#define XE_PMU_EVENT_GT_C6_RESIDENCY 0x01
#define XE_PMU_EVENT_ENGINE_ACTIVE_TICKS 0x02
#define XE_PMU_EVENT_ENGINE_TOTAL_TICKS 0x03
+#define XE_PMU_EVENT_GT_ACTUAL_FREQUENCY 0x04
+#define XE_PMU_EVENT_GT_REQUESTED_FREQUENCY 0x05
static struct xe_gt *event_to_gt(struct perf_event *event)
{
@@ -111,6 +122,14 @@ static bool is_engine_event(u64 config)
event_id == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS);
}
+static bool is_gt_frequency_event(struct perf_event *event)
+{
+ u32 id = config_to_event_id(event->attr.config);
+
+ return id == XE_PMU_EVENT_GT_ACTUAL_FREQUENCY ||
+ id == XE_PMU_EVENT_GT_REQUESTED_FREQUENCY;
+}
+
static bool event_gt_forcewake(struct perf_event *event)
{
struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
@@ -118,7 +137,7 @@ static bool event_gt_forcewake(struct perf_event *event)
struct xe_gt *gt;
unsigned int *fw_ref;
- if (!is_engine_event(config))
+ if (!is_engine_event(config) && !is_gt_frequency_event(event))
return true;
gt = xe_device_get_gt(xe, config_to_gt_id(config));
@@ -151,7 +170,7 @@ static bool event_supported(struct xe_pmu *pmu, unsigned int gt,
static bool event_param_valid(struct perf_event *event)
{
struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
- unsigned int engine_class, engine_instance;
+ unsigned int engine_class, engine_instance, function_id;
u64 config = event->attr.config;
struct xe_gt *gt;
@@ -161,16 +180,28 @@ static bool event_param_valid(struct perf_event *event)
engine_class = config_to_engine_class(config);
engine_instance = config_to_engine_instance(config);
+ function_id = config_to_function_id(config);
switch (config_to_event_id(config)) {
case XE_PMU_EVENT_GT_C6_RESIDENCY:
- if (engine_class || engine_instance)
+ case XE_PMU_EVENT_GT_ACTUAL_FREQUENCY:
+ case XE_PMU_EVENT_GT_REQUESTED_FREQUENCY:
+ if (engine_class || engine_instance || function_id)
return false;
break;
case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS:
case XE_PMU_EVENT_ENGINE_TOTAL_TICKS:
if (!event_to_hwe(event))
return false;
+
+ /* PF(0) and total vfs when SRIOV is enabled */
+ if (IS_SRIOV_PF(xe)) {
+ if (function_id > xe_sriov_pf_get_totalvfs(xe))
+ return false;
+ } else if (function_id) {
+ return false;
+ }
+
break;
}
@@ -242,13 +273,17 @@ static int xe_pmu_event_init(struct perf_event *event)
static u64 read_engine_events(struct xe_gt *gt, struct perf_event *event)
{
struct xe_hw_engine *hwe;
- u64 val = 0;
+ unsigned int function_id;
+ u64 config, val = 0;
+
+ config = event->attr.config;
+ function_id = config_to_function_id(config);
hwe = event_to_hwe(event);
- if (config_to_event_id(event->attr.config) == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS)
- val = xe_guc_engine_activity_active_ticks(&gt->uc.guc, hwe);
+ if (config_to_event_id(config) == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS)
+ val = xe_guc_engine_activity_active_ticks(&gt->uc.guc, hwe, function_id);
else
- val = xe_guc_engine_activity_total_ticks(&gt->uc.guc, hwe);
+ val = xe_guc_engine_activity_total_ticks(&gt->uc.guc, hwe, function_id);
return val;
}
@@ -266,6 +301,10 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS:
case XE_PMU_EVENT_ENGINE_TOTAL_TICKS:
return read_engine_events(gt, event);
+ case XE_PMU_EVENT_GT_ACTUAL_FREQUENCY:
+ return xe_guc_pc_get_act_freq(&gt->uc.guc.pc);
+ case XE_PMU_EVENT_GT_REQUESTED_FREQUENCY:
+ return xe_guc_pc_get_cur_freq_fw(&gt->uc.guc.pc);
}
return 0;
@@ -281,7 +320,14 @@ static void xe_pmu_event_update(struct perf_event *event)
new = __xe_pmu_event_read(event);
} while (!local64_try_cmpxchg(&hwc->prev_count, &prev, new));
- local64_add(new - prev, &event->count);
+ /*
+ * GT frequency is not a monotonically increasing counter, so add the
+ * instantaneous value instead.
+ */
+ if (is_gt_frequency_event(event))
+ local64_add(new, &event->count);
+ else
+ local64_add(new - prev, &event->count);
}
static void xe_pmu_event_read(struct perf_event *event)
@@ -351,6 +397,7 @@ static void xe_pmu_event_del(struct perf_event *event, int flags)
}
PMU_FORMAT_ATTR(gt, "config:60-63");
+PMU_FORMAT_ATTR(function, "config:44-59");
PMU_FORMAT_ATTR(engine_class, "config:20-27");
PMU_FORMAT_ATTR(engine_instance, "config:12-19");
PMU_FORMAT_ATTR(event, "config:0-11");
@@ -359,6 +406,7 @@ static struct attribute *pmu_format_attrs[] = {
&format_attr_event.attr,
&format_attr_engine_class.attr,
&format_attr_engine_instance.attr,
+ &format_attr_function.attr,
&format_attr_gt.attr,
NULL,
};
@@ -419,6 +467,10 @@ static ssize_t event_attr_show(struct device *dev,
XE_EVENT_ATTR_SIMPLE(gt-c6-residency, gt_c6_residency, XE_PMU_EVENT_GT_C6_RESIDENCY, "ms");
XE_EVENT_ATTR_NOUNIT(engine-active-ticks, engine_active_ticks, XE_PMU_EVENT_ENGINE_ACTIVE_TICKS);
XE_EVENT_ATTR_NOUNIT(engine-total-ticks, engine_total_ticks, XE_PMU_EVENT_ENGINE_TOTAL_TICKS);
+XE_EVENT_ATTR_SIMPLE(gt-actual-frequency, gt_actual_frequency,
+ XE_PMU_EVENT_GT_ACTUAL_FREQUENCY, "MHz");
+XE_EVENT_ATTR_SIMPLE(gt-requested-frequency, gt_requested_frequency,
+ XE_PMU_EVENT_GT_REQUESTED_FREQUENCY, "MHz");
static struct attribute *pmu_empty_event_attrs[] = {
/* Empty - all events are added as groups with .attr_update() */
@@ -434,6 +486,8 @@ static const struct attribute_group *pmu_events_attr_update[] = {
&pmu_group_gt_c6_residency,
&pmu_group_engine_active_ticks,
&pmu_group_engine_total_ticks,
+ &pmu_group_gt_actual_frequency,
+ &pmu_group_gt_requested_frequency,
NULL,
};
@@ -442,8 +496,11 @@ static void set_supported_events(struct xe_pmu *pmu)
struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
struct xe_gt *gt = xe_device_get_gt(xe, 0);
- if (!xe->info.skip_guc_pc)
+ if (!xe->info.skip_guc_pc) {
pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_C6_RESIDENCY);
+ pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_ACTUAL_FREQUENCY);
+ pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_REQUESTED_FREQUENCY);
+ }
if (xe_guc_engine_activity_supported(&gt->uc.guc)) {
pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_ENGINE_ACTIVE_TICKS);
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index ffaf0d02dc7d..b42cf5d1b20c 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -103,6 +103,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
{
struct xe_pt *pt;
struct xe_bo *bo;
+ u32 bo_flags;
int err;
if (level) {
@@ -115,14 +116,16 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
if (!pt)
return ERR_PTR(-ENOMEM);
+ bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE |
+ XE_BO_FLAG_NO_RESV_EVICT | XE_BO_FLAG_PAGETABLE;
+ if (vm->xef) /* userspace */
+ bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
+
pt->level = level;
bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE |
- XE_BO_FLAG_PINNED |
- XE_BO_FLAG_NO_RESV_EVICT |
- XE_BO_FLAG_PAGETABLE);
+ bo_flags);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto err_kfree;
@@ -269,8 +272,11 @@ struct xe_pt_update {
bool preexisting;
};
+/**
+ * struct xe_pt_stage_bind_walk - Walk state for the stage_bind walk.
+ */
struct xe_pt_stage_bind_walk {
- /** base: The base class. */
+ /** @base: The base class. */
struct xe_pt_walk base;
/* Input parameters for the walk */
@@ -278,15 +284,19 @@ struct xe_pt_stage_bind_walk {
struct xe_vm *vm;
/** @tile: The tile we're building for. */
struct xe_tile *tile;
- /** @default_pte: PTE flag only template. No address is associated */
- u64 default_pte;
+ /** @default_vram_pte: PTE flag only template for VRAM. No address is associated */
+ u64 default_vram_pte;
+ /** @default_system_pte: PTE flag only template for System. No address is associated */
+ u64 default_system_pte;
/** @dma_offset: DMA offset to add to the PTE. */
u64 dma_offset;
/**
- * @needs_64k: This address range enforces 64K alignment and
- * granularity.
+ * @needs_64K: This address range enforces 64K alignment and
+ * granularity on VRAM.
*/
bool needs_64K;
+ /** @clear_pt: clear page table entries during the bind walk */
+ bool clear_pt;
/**
* @vma: VMA being mapped
*/
@@ -299,6 +309,7 @@ struct xe_pt_stage_bind_walk {
u64 va_curs_start;
/* Output */
+ /** @wupd: Walk output data for page-table updates. */
struct xe_walk_update {
/** @wupd.entries: Caller provided storage. */
struct xe_vm_pgtable_update *entries;
@@ -316,7 +327,7 @@ struct xe_pt_stage_bind_walk {
u64 l0_end_addr;
/** @addr_64K: The start address of the current 64K chunk. */
u64 addr_64K;
- /** @found_64: Whether @add_64K actually points to a 64K chunk. */
+ /** @found_64K: Whether @add_64K actually points to a 64K chunk. */
bool found_64K;
};
@@ -436,6 +447,10 @@ static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
if (xe_vma_is_null(xe_walk->vma))
return true;
+ /* if we are clearing page table, no dma addresses*/
+ if (xe_walk->clear_pt)
+ return true;
+
/* Is the DMA address huge PTE size aligned? */
size = next - addr;
dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs);
@@ -515,24 +530,35 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
struct xe_res_cursor *curs = xe_walk->curs;
bool is_null = xe_vma_is_null(xe_walk->vma);
+ bool is_vram = is_null ? false : xe_res_is_vram(curs);
XE_WARN_ON(xe_walk->va_curs_start != addr);
- pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
- xe_res_dma(curs) + xe_walk->dma_offset,
- xe_walk->vma, pat_index, level);
- pte |= xe_walk->default_pte;
+ if (xe_walk->clear_pt) {
+ pte = 0;
+ } else {
+ pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
+ xe_res_dma(curs) +
+ xe_walk->dma_offset,
+ xe_walk->vma,
+ pat_index, level);
+ if (!is_null)
+ pte |= is_vram ? xe_walk->default_vram_pte :
+ xe_walk->default_system_pte;
- /*
- * Set the XE_PTE_PS64 hint if possible, otherwise if
- * this device *requires* 64K PTE size for VRAM, fail.
- */
- if (level == 0 && !xe_parent->is_compact) {
- if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) {
- xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K;
- pte |= XE_PTE_PS64;
- } else if (XE_WARN_ON(xe_walk->needs_64K)) {
- return -EINVAL;
+ /*
+ * Set the XE_PTE_PS64 hint if possible, otherwise if
+ * this device *requires* 64K PTE size for VRAM, fail.
+ */
+ if (level == 0 && !xe_parent->is_compact) {
+ if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) {
+ xe_walk->vma->gpuva.flags |=
+ XE_VMA_PTE_64K;
+ pte |= XE_PTE_PS64;
+ } else if (XE_WARN_ON(xe_walk->needs_64K &&
+ is_vram)) {
+ return -EINVAL;
+ }
}
}
@@ -540,7 +566,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
if (unlikely(ret))
return ret;
- if (!is_null)
+ if (!is_null && !xe_walk->clear_pt)
xe_res_next(curs, next - addr);
xe_walk->va_curs_start = next;
xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level);
@@ -603,6 +629,44 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
.pt_entry = xe_pt_stage_bind_entry,
};
+/*
+ * Default atomic expectations for different allocation scenarios are as follows:
+ *
+ * 1. Traditional API: When the VM is not in LR mode:
+ * - Device atomics are expected to function with all allocations.
+ *
+ * 2. Compute/SVM API: When the VM is in LR mode:
+ * - Device atomics are the default behavior when the bo is placed in a single region.
+ * - In all other cases device atomics will be disabled with AE=0 until an application
+ * request differently using a ioctl like madvise.
+ */
+static bool xe_atomic_for_vram(struct xe_vm *vm)
+{
+ return true;
+}
+
+static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_bo *bo)
+{
+ struct xe_device *xe = vm->xe;
+
+ if (!xe->info.has_device_atomics_on_smem)
+ return false;
+
+ /*
+ * If a SMEM+LMEM allocation is backed by SMEM, a device
+ * atomics will cause a gpu page fault and which then
+ * gets migrated to LMEM, bind such allocations with
+ * device atomics enabled.
+ *
+ * TODO: Revisit this. Perhaps add something like a
+ * fault_on_atomics_in_system UAPI flag.
+ * Note that this also prohibits GPU atomics in LR mode for
+ * userptr and system memory on DGFX.
+ */
+ return (!IS_DGFX(xe) || (!xe_vm_in_lr_mode(vm) ||
+ (bo && xe_bo_has_single_placement(bo))));
+}
+
/**
* xe_pt_stage_bind() - Build a disconnected page-table tree for a given address
* range.
@@ -612,6 +676,7 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
* @entries: Storage for the update entries used for connecting the tree to
* the main tree at commit time.
* @num_entries: On output contains the number of @entries used.
+ * @clear_pt: Clear the page table entries.
*
* This function builds a disconnected page-table tree for a given address
* range. The tree is connected to the main vm tree for the gpu using
@@ -625,13 +690,13 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
static int
xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
struct xe_svm_range *range,
- struct xe_vm_pgtable_update *entries, u32 *num_entries)
+ struct xe_vm_pgtable_update *entries,
+ u32 *num_entries, bool clear_pt)
{
struct xe_device *xe = tile_to_xe(tile);
struct xe_bo *bo = xe_vma_bo(vma);
- bool is_devmem = !xe_vma_is_userptr(vma) && bo &&
- (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo));
struct xe_res_cursor curs;
+ struct xe_vm *vm = xe_vma_vm(vma);
struct xe_pt_stage_bind_walk xe_walk = {
.base = {
.ops = &xe_pt_stage_bind_ops,
@@ -639,34 +704,31 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
.max_level = XE_PT_HIGHEST_LEVEL,
.staging = true,
},
- .vm = xe_vma_vm(vma),
+ .vm = vm,
.tile = tile,
.curs = &curs,
.va_curs_start = range ? range->base.itree.start :
xe_vma_start(vma),
.vma = vma,
.wupd.entries = entries,
+ .clear_pt = clear_pt,
};
- struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+ struct xe_pt *pt = vm->pt_root[tile->id];
int ret;
if (range) {
/* Move this entire thing to xe_svm.c? */
- xe_svm_notifier_lock(xe_vma_vm(vma));
+ xe_svm_notifier_lock(vm);
if (!xe_svm_range_pages_valid(range)) {
xe_svm_range_debug(range, "BIND PREPARE - RETRY");
- xe_svm_notifier_unlock(xe_vma_vm(vma));
+ xe_svm_notifier_unlock(vm);
return -EAGAIN;
}
if (xe_svm_range_has_dma_mapping(range)) {
xe_res_first_dma(range->base.dma_addr, 0,
range->base.itree.last + 1 - range->base.itree.start,
&curs);
- is_devmem = xe_res_is_vram(&curs);
- if (is_devmem)
- xe_svm_range_debug(range, "BIND PREPARE - DMA VRAM");
- else
- xe_svm_range_debug(range, "BIND PREPARE - DMA");
+ xe_svm_range_debug(range, "BIND PREPARE - MIXED");
} else {
xe_assert(xe, false);
}
@@ -674,54 +736,21 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
* Note, when unlocking the resource cursor dma addresses may become
* stale, but the bind will be aborted anyway at commit time.
*/
- xe_svm_notifier_unlock(xe_vma_vm(vma));
+ xe_svm_notifier_unlock(vm);
}
- xe_walk.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem;
+ xe_walk.needs_64K = (vm->flags & XE_VM_FLAG_64K);
+ if (clear_pt)
+ goto walk_pt;
- /**
- * Default atomic expectations for different allocation scenarios are as follows:
- *
- * 1. Traditional API: When the VM is not in LR mode:
- * - Device atomics are expected to function with all allocations.
- *
- * 2. Compute/SVM API: When the VM is in LR mode:
- * - Device atomics are the default behavior when the bo is placed in a single region.
- * - In all other cases device atomics will be disabled with AE=0 until an application
- * request differently using a ioctl like madvise.
- */
if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) {
- if (xe_vm_in_lr_mode(xe_vma_vm(vma))) {
- if (bo && xe_bo_has_single_placement(bo))
- xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
- /**
- * If a SMEM+LMEM allocation is backed by SMEM, a device
- * atomics will cause a gpu page fault and which then
- * gets migrated to LMEM, bind such allocations with
- * device atomics enabled.
- */
- else if (is_devmem)
- xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
- } else {
- xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
- }
-
- /**
- * Unset AE if the platform(PVC) doesn't support it on an
- * allocation
- */
- if (!xe->info.has_device_atomics_on_smem && !is_devmem)
- xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE;
- }
-
- if (is_devmem) {
- xe_walk.default_pte |= XE_PPGTT_PTE_DM;
- xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0;
+ xe_walk.default_vram_pte = xe_atomic_for_vram(vm) ? XE_USM_PPGTT_PTE_AE : 0;
+ xe_walk.default_system_pte = xe_atomic_for_system(vm, bo) ?
+ XE_USM_PPGTT_PTE_AE : 0;
}
- if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo))
- xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo));
-
+ xe_walk.default_vram_pte |= XE_PPGTT_PTE_DM;
+ xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0;
if (!range)
xe_bo_assert_held(bo);
@@ -739,6 +768,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
curs.size = xe_vma_size(vma);
}
+walk_pt:
ret = xe_pt_walk_range(&pt->base, pt->level,
range ? range->base.itree.start : xe_vma_start(vma),
range ? range->base.itree.last + 1 : xe_vma_end(vma),
@@ -1103,12 +1133,14 @@ static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries,
static int
xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
struct xe_svm_range *range,
- struct xe_vm_pgtable_update *entries, u32 *num_entries)
+ struct xe_vm_pgtable_update *entries,
+ u32 *num_entries, bool invalidate_on_bind)
{
int err;
*num_entries = 0;
- err = xe_pt_stage_bind(tile, vma, range, entries, num_entries);
+ err = xe_pt_stage_bind(tile, vma, range, entries, num_entries,
+ invalidate_on_bind);
if (!err)
xe_tile_assert(tile, *num_entries);
@@ -1420,6 +1452,7 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
return err;
}
+#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
{
struct xe_vm *vm = pt_update->vops->vm;
@@ -1453,6 +1486,7 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
return 0;
}
+#endif
struct invalidation_fence {
struct xe_gt_tlb_invalidation_fence base;
@@ -1791,7 +1825,7 @@ static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma)
static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
- struct xe_vma *vma)
+ struct xe_vma *vma, bool invalidate_on_bind)
{
u32 current_op = pt_update_ops->current_op;
struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
@@ -1813,7 +1847,7 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
return err;
err = xe_pt_prepare_bind(tile, vma, NULL, pt_op->entries,
- &pt_op->num_entries);
+ &pt_op->num_entries, invalidate_on_bind);
if (!err) {
xe_tile_assert(tile, pt_op->num_entries <=
ARRAY_SIZE(pt_op->entries));
@@ -1835,11 +1869,11 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
* If !rebind, and scratch enabled VMs, there is a chance the scratch
* PTE is already cached in the TLB so it needs to be invalidated.
* On !LR VMs this is done in the ring ops preceding a batch, but on
- * non-faulting LR, in particular on user-space batch buffer chaining,
- * it needs to be done here.
+ * LR, in particular on user-space batch buffer chaining, it needs to
+ * be done here.
*/
if ((!pt_op->rebind && xe_vm_has_scratch(vm) &&
- xe_vm_in_preempt_fence_mode(vm)))
+ xe_vm_in_lr_mode(vm)))
pt_update_ops->needs_invalidation = true;
else if (pt_op->rebind && !xe_vm_in_lr_mode(vm))
/* We bump also if batch_invalidate_tlb is true */
@@ -1875,7 +1909,7 @@ static int bind_range_prepare(struct xe_vm *vm, struct xe_tile *tile,
pt_op->rebind = BIT(tile->id) & range->tile_present;
err = xe_pt_prepare_bind(tile, vma, range, pt_op->entries,
- &pt_op->num_entries);
+ &pt_op->num_entries, false);
if (!err) {
xe_tile_assert(tile, pt_op->num_entries <=
ARRAY_SIZE(pt_op->entries));
@@ -1987,11 +2021,13 @@ static int op_prepare(struct xe_vm *vm,
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
- if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) ||
+ if ((!op->map.immediate && xe_vm_in_fault_mode(vm) &&
+ !op->map.invalidate_on_bind) ||
op->map.is_cpu_addr_mirror)
break;
- err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma);
+ err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma,
+ op->map.invalidate_on_bind);
pt_update_ops->wait_vm_kernel = true;
break;
case DRM_GPUVA_OP_REMAP:
@@ -2005,12 +2041,12 @@ static int op_prepare(struct xe_vm *vm,
if (!err && op->remap.prev) {
err = bind_op_prepare(vm, tile, pt_update_ops,
- op->remap.prev);
+ op->remap.prev, false);
pt_update_ops->wait_vm_bookkeep = true;
}
if (!err && op->remap.next) {
err = bind_op_prepare(vm, tile, pt_update_ops,
- op->remap.next);
+ op->remap.next, false);
pt_update_ops->wait_vm_bookkeep = true;
}
break;
@@ -2032,7 +2068,7 @@ static int op_prepare(struct xe_vm *vm,
if (xe_vma_is_cpu_addr_mirror(vma))
break;
- err = bind_op_prepare(vm, tile, pt_update_ops, vma);
+ err = bind_op_prepare(vm, tile, pt_update_ops, vma, false);
pt_update_ops->wait_vm_kernel = true;
break;
}
@@ -2115,7 +2151,7 @@ ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO);
static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
struct xe_vma *vma, struct dma_fence *fence,
- struct dma_fence *fence2)
+ struct dma_fence *fence2, bool invalidate_on_bind)
{
xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma));
@@ -2132,6 +2168,8 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
}
vma->tile_present |= BIT(tile->id);
vma->tile_staged &= ~BIT(tile->id);
+ if (invalidate_on_bind)
+ vma->tile_invalidated |= BIT(tile->id);
if (xe_vma_is_userptr(vma)) {
lockdep_assert_held_read(&vm->userptr.notifier_lock);
to_userptr_vma(vma)->userptr.initial_bind = true;
@@ -2193,7 +2231,7 @@ static void op_commit(struct xe_vm *vm,
break;
bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence,
- fence2);
+ fence2, op->map.invalidate_on_bind);
break;
case DRM_GPUVA_OP_REMAP:
{
@@ -2206,10 +2244,10 @@ static void op_commit(struct xe_vm *vm,
if (op->remap.prev)
bind_op_commit(vm, tile, pt_update_ops, op->remap.prev,
- fence, fence2);
+ fence, fence2, false);
if (op->remap.next)
bind_op_commit(vm, tile, pt_update_ops, op->remap.next,
- fence, fence2);
+ fence, fence2, false);
break;
}
case DRM_GPUVA_OP_UNMAP:
@@ -2227,7 +2265,7 @@ static void op_commit(struct xe_vm *vm,
if (!xe_vma_is_cpu_addr_mirror(vma))
bind_op_commit(vm, tile, pt_update_ops, vma, fence,
- fence2);
+ fence2, false);
break;
}
case DRM_GPUVA_OP_DRIVER:
@@ -2257,11 +2295,15 @@ static const struct xe_migrate_pt_update_ops userptr_migrate_ops = {
.pre_commit = xe_pt_userptr_pre_commit,
};
+#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
static const struct xe_migrate_pt_update_ops svm_migrate_ops = {
.populate = xe_vm_populate_pgtable,
.clear = xe_migrate_clear_pgtable_callback,
.pre_commit = xe_pt_svm_pre_commit,
};
+#else
+static const struct xe_migrate_pt_update_ops svm_migrate_ops;
+#endif
/**
* xe_pt_update_ops_run() - Run PT update operations
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 5e65830dad25..2dbf4066d86f 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -340,7 +340,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
if (xe_device_get_root_tile(xe)->mem.vram.usable_size)
config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM;
- if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_GPUSVM))
+ if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_XE_GPUSVM))
config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR;
config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 9475e3f74958..fc8447a838c4 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -173,6 +173,9 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
if (xa_empty(&sr->xa))
return;
+ if (IS_SRIOV_VF(gt_to_xe(gt)))
+ return;
+
xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name);
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
diff --git a/drivers/gpu/drm/xe/xe_ring_ops_types.h b/drivers/gpu/drm/xe/xe_ring_ops_types.h
index 1ae56e2ee7b4..d7e3e150a9a5 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops_types.h
+++ b/drivers/gpu/drm/xe/xe_ring_ops_types.h
@@ -8,7 +8,7 @@
struct xe_sched_job;
-#define MAX_JOB_SIZE_DW 48
+#define MAX_JOB_SIZE_DW 58
#define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4)
/**
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 13bb62d3e615..29e694bb1219 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -258,9 +258,6 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
rtp_get_context(ctx, &hwe, &gt, &xe);
- if (IS_SRIOV_VF(xe))
- return;
-
xe_assert(xe, entries);
for (entry = entries; entry - entries < n_entries; entry++) {
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
index f8fe61e25518..1d43e183ca21 100644
--- a/drivers/gpu/drm/xe/xe_sa.c
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -60,7 +60,8 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3
bo = xe_managed_bo_create_pin_map(xe, tile, size,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_PINNED_NORESTORE);
if (IS_ERR(bo)) {
drm_err(&xe->drm, "Failed to prepare %uKiB BO for SA manager (%pe)\n",
size / SZ_1K, bo);
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c
index cb813b337fd3..1f710b3fc599 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.c
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.c
@@ -10,6 +10,7 @@
#include <linux/pci.h>
#include <linux/sysfs.h>
+#include "xe_configfs.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_heci_gsc.h"
@@ -28,20 +29,32 @@
* This is implemented by loading the driver with bare minimum (no drm card) to allow the firmware
* to be flashed through mei and collect telemetry. The driver's probe flow is modified
* such that it enters survivability mode when pcode initialization is incomplete and boot status
- * denotes a failure. The driver then populates the survivability_mode PCI sysfs indicating
- * survivability mode and provides additional information required for debug
+ * denotes a failure.
*
- * KMD exposes below admin-only readable sysfs in survivability mode
+ * Survivability mode can also be entered manually using the survivability mode attribute available
+ * through configfs which is beneficial in several usecases. It can be used to address scenarios
+ * where pcode does not detect failure or for validation purposes. It can also be used in
+ * In-Field-Repair (IFR) to repair a single card without impacting the other cards in a node.
*
- * device/survivability_mode: The presence of this file indicates that the card is in survivability
- * mode. Also, provides additional information on why the driver entered
- * survivability mode.
+ * Use below command enable survivability mode manually::
*
- * Capability Information - Provides boot status
- * Postcode Information - Provides information about the failure
- * Overflow Information - Provides history of previous failures
- * Auxiliary Information - Certain failures may have information in
- * addition to postcode information
+ * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode
+ *
+ * Refer :ref:`xe_configfs` for more details on how to use configfs
+ *
+ * Survivability mode is indicated by the below admin-only readable sysfs which provides additional
+ * debug information::
+ *
+ * /sys/bus/pci/devices/<device>/surivability_mode
+ *
+ * Capability Information:
+ * Provides boot status
+ * Postcode Information:
+ * Provides information about the failure
+ * Overflow Information
+ * Provides history of previous failures
+ * Auxiliary Information
+ * Certain failures may have information in addition to postcode information
*/
static u32 aux_history_offset(u32 reg_value)
@@ -133,6 +146,7 @@ static void xe_survivability_mode_fini(void *arg)
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
struct device *dev = &pdev->dev;
+ xe_configfs_clear_survivability_mode(pdev);
sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
}
@@ -186,24 +200,41 @@ bool xe_survivability_mode_is_enabled(struct xe_device *xe)
return xe->survivability.mode;
}
-/*
- * survivability_mode_requested - check if it's possible to enable
- * survivability mode and that was requested by firmware
+/**
+ * xe_survivability_mode_is_requested - check if it's possible to enable survivability
+ * mode that was requested by firmware or userspace
+ * @xe: xe device instance
*
- * This function reads the boot status from Pcode.
+ * This function reads configfs and boot status from Pcode.
*
* Return: true if platform support is available and boot status indicates
- * failure, false otherwise.
+ * failure or if survivability mode is requested, false otherwise.
*/
-static bool survivability_mode_requested(struct xe_device *xe)
+bool xe_survivability_mode_is_requested(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
u32 data;
+ bool survivability_mode;
- if (!IS_DGFX(xe) || xe->info.platform < XE_BATTLEMAGE || IS_SRIOV_VF(xe))
+ if (!IS_DGFX(xe) || IS_SRIOV_VF(xe))
return false;
+ survivability_mode = xe_configfs_get_survivability_mode(pdev);
+
+ if (xe->info.platform < XE_BATTLEMAGE) {
+ if (survivability_mode) {
+ dev_err(&pdev->dev, "Survivability Mode is not supported on this card\n");
+ xe_configfs_clear_survivability_mode(pdev);
+ }
+ return false;
+ }
+
+ /* Enable survivability mode if set via configfs */
+ if (survivability_mode)
+ return true;
+
data = xe_mmio_read32(mmio, PCODE_SCRATCH(0));
survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data);
@@ -226,7 +257,7 @@ int xe_survivability_mode_enable(struct xe_device *xe)
struct xe_survivability_info *info;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
- if (!survivability_mode_requested(xe))
+ if (!xe_survivability_mode_is_requested(xe))
return 0;
survivability->size = MAX_SCRATCH_MMIO;
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h
index d7e64885570d..02231c2bf008 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.h
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.h
@@ -12,5 +12,6 @@ struct xe_device;
int xe_survivability_mode_enable(struct xe_device *xe);
bool xe_survivability_mode_is_enabled(struct xe_device *xe);
+bool xe_survivability_mode_is_requested(struct xe_device *xe);
#endif /* _XE_SURVIVABILITY_MODE_H_ */
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index f8c128524d9f..56b18a293bbc 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -4,6 +4,7 @@
*/
#include "xe_bo.h"
+#include "xe_gt_stats.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_migrate.h"
#include "xe_module.h"
@@ -339,6 +340,8 @@ static void xe_svm_garbage_collector_work_func(struct work_struct *w)
up_write(&vm->lock);
}
+#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
+
static struct xe_vram_region *page_to_vr(struct page *page)
{
return container_of(page_pgmap(page), struct xe_vram_region, pagemap);
@@ -577,6 +580,8 @@ static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = {
.copy_to_ram = xe_svm_copy_to_ram,
};
+#endif
+
static const struct drm_gpusvm_ops gpusvm_ops = {
.range_alloc = xe_svm_range_alloc,
.range_free = xe_svm_range_free,
@@ -650,6 +655,7 @@ static bool xe_svm_range_is_valid(struct xe_svm_range *range,
return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id);
}
+#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
static struct xe_vram_region *tile_to_vr(struct xe_tile *tile)
{
return &tile->mem.vram;
@@ -711,12 +717,21 @@ unlock:
return err;
}
+#else
+static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile,
+ struct xe_svm_range *range,
+ const struct drm_gpusvm_ctx *ctx)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
/**
* xe_svm_handle_pagefault() - SVM handle page fault
* @vm: The VM.
* @vma: The CPU address mirror VMA.
- * @tile: The tile upon the fault occurred.
+ * @gt: The gt upon the fault occurred.
* @fault_addr: The GPU fault address.
* @atomic: The fault atomic access bit.
*
@@ -726,7 +741,7 @@ unlock:
* Return: 0 on success, negative error code on error.
*/
int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_tile *tile, u64 fault_addr,
+ struct xe_gt *gt, u64 fault_addr,
bool atomic)
{
struct drm_gpusvm_ctx ctx = {
@@ -740,12 +755,15 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
struct drm_gpusvm_range *r;
struct drm_exec exec;
struct dma_fence *fence;
+ struct xe_tile *tile = gt_to_tile(gt);
ktime_t end = 0;
int err;
lockdep_assert_held_write(&vm->lock);
xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1);
+
retry:
/* Always process UNMAPs first so view SVM ranges is current */
err = xe_svm_garbage_collector(vm);
@@ -866,6 +884,7 @@ int xe_svm_bo_evict(struct xe_bo *bo)
}
#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
+
static struct drm_pagemap_device_addr
xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
struct device *dev,
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index e059590e5076..3d441eb1f7ea 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -6,16 +6,19 @@
#ifndef _XE_SVM_H_
#define _XE_SVM_H_
+#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
+
#include <drm/drm_pagemap.h>
#include <drm/drm_gpusvm.h>
#define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER
struct xe_bo;
-struct xe_vram_region;
+struct xe_gt;
struct xe_tile;
struct xe_vm;
struct xe_vma;
+struct xe_vram_region;
/** struct xe_svm_range - SVM range */
struct xe_svm_range {
@@ -43,7 +46,6 @@ struct xe_svm_range {
u8 skip_migrate :1;
};
-#if IS_ENABLED(CONFIG_DRM_GPUSVM)
/**
* xe_svm_range_pages_valid() - SVM range pages valid
* @range: SVM range
@@ -64,7 +66,7 @@ void xe_svm_fini(struct xe_vm *vm);
void xe_svm_close(struct xe_vm *vm);
int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_tile *tile, u64 fault_addr,
+ struct xe_gt *gt, u64 fault_addr,
bool atomic);
bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end);
@@ -72,7 +74,50 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end);
int xe_svm_bo_evict(struct xe_bo *bo);
void xe_svm_range_debug(struct xe_svm_range *range, const char *operation);
+
+/**
+ * xe_svm_range_has_dma_mapping() - SVM range has DMA mapping
+ * @range: SVM range
+ *
+ * Return: True if SVM range has a DMA mapping, False otherwise
+ */
+static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range)
+{
+ lockdep_assert_held(&range->base.gpusvm->notifier_lock);
+ return range->base.flags.has_dma_mapping;
+}
+
+#define xe_svm_assert_in_notifier(vm__) \
+ lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock)
+
+#define xe_svm_notifier_lock(vm__) \
+ drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm)
+
+#define xe_svm_notifier_unlock(vm__) \
+ drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm)
+
#else
+#include <linux/interval_tree.h>
+
+struct drm_pagemap_device_addr;
+struct xe_bo;
+struct xe_gt;
+struct xe_vm;
+struct xe_vma;
+struct xe_tile;
+struct xe_vram_region;
+
+#define XE_INTERCONNECT_VRAM 1
+
+struct xe_svm_range {
+ struct {
+ struct interval_tree_node itree;
+ const struct drm_pagemap_device_addr *dma_addr;
+ } base;
+ u32 tile_present;
+ u32 tile_invalidated;
+};
+
static inline bool xe_svm_range_pages_valid(struct xe_svm_range *range)
{
return false;
@@ -102,7 +147,7 @@ void xe_svm_close(struct xe_vm *vm)
static inline
int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_tile *tile, u64 fault_addr,
+ struct xe_gt *gt, u64 fault_addr,
bool atomic)
{
return 0;
@@ -124,27 +169,16 @@ static inline
void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
{
}
-#endif
-/**
- * xe_svm_range_has_dma_mapping() - SVM range has DMA mapping
- * @range: SVM range
- *
- * Return: True if SVM range has a DMA mapping, False otherwise
- */
-static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range)
+#define xe_svm_assert_in_notifier(...) do {} while (0)
+#define xe_svm_range_has_dma_mapping(...) false
+
+static inline void xe_svm_notifier_lock(struct xe_vm *vm)
{
- lockdep_assert_held(&range->base.gpusvm->notifier_lock);
- return range->base.flags.has_dma_mapping;
}
-#define xe_svm_assert_in_notifier(vm__) \
- lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock)
-
-#define xe_svm_notifier_lock(vm__) \
- drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm)
-
-#define xe_svm_notifier_unlock(vm__) \
- drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm)
-
+static inline void xe_svm_notifier_unlock(struct xe_vm *vm)
+{
+}
+#endif
#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index fb0eda3d5682..2741849bbf4d 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -92,6 +92,8 @@
struct uc_fw_entry {
enum xe_platform platform;
+ enum xe_gt_type gt_type;
+
struct {
const char *path;
u16 major;
@@ -106,32 +108,37 @@ struct fw_blobs_by_type {
u32 count;
};
-#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \
- fw_def(BATTLEMAGE, major_ver(xe, guc, bmg, 70, 29, 2)) \
- fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 29, 2)) \
- fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 29, 2)) \
- fw_def(DG2, major_ver(i915, guc, dg2, 70, 29, 2)) \
- fw_def(DG1, major_ver(i915, guc, dg1, 70, 29, 2)) \
- fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 29, 2)) \
- fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 29, 2)) \
- fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 29, 2)) \
- fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) \
- fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 29, 2))
+/*
+ * Add an "ANY" define just to convey the meaning it's given here.
+ */
+#define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED
+
+#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \
+ fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \
+ fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \
+ fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \
+ fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \
+ fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \
+ fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \
+ fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \
+ fw_def(ALDERLAKE_S, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \
+ fw_def(ROCKETLAKE, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \
+ fw_def(TIGERLAKE, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1))
#define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \
- fw_def(BATTLEMAGE, no_ver(xe, huc, bmg)) \
- fw_def(LUNARLAKE, no_ver(xe, huc, lnl)) \
- fw_def(METEORLAKE, no_ver(i915, huc_gsc, mtl)) \
- fw_def(DG1, no_ver(i915, huc, dg1)) \
- fw_def(ALDERLAKE_P, no_ver(i915, huc, tgl)) \
- fw_def(ALDERLAKE_S, no_ver(i915, huc, tgl)) \
- fw_def(ROCKETLAKE, no_ver(i915, huc, tgl)) \
- fw_def(TIGERLAKE, no_ver(i915, huc, tgl))
+ fw_def(BATTLEMAGE, GT_TYPE_ANY, no_ver(xe, huc, bmg)) \
+ fw_def(LUNARLAKE, GT_TYPE_ANY, no_ver(xe, huc, lnl)) \
+ fw_def(METEORLAKE, GT_TYPE_ANY, no_ver(i915, huc_gsc, mtl)) \
+ fw_def(DG1, GT_TYPE_ANY, no_ver(i915, huc, dg1)) \
+ fw_def(ALDERLAKE_P, GT_TYPE_ANY, no_ver(i915, huc, tgl)) \
+ fw_def(ALDERLAKE_S, GT_TYPE_ANY, no_ver(i915, huc, tgl)) \
+ fw_def(ROCKETLAKE, GT_TYPE_ANY, no_ver(i915, huc, tgl)) \
+ fw_def(TIGERLAKE, GT_TYPE_ANY, no_ver(i915, huc, tgl))
/* for the GSC FW we match the compatibility version and not the release one */
#define XE_GSC_FIRMWARE_DEFS(fw_def, major_ver) \
- fw_def(LUNARLAKE, major_ver(xe, gsc, lnl, 104, 1, 0)) \
- fw_def(METEORLAKE, major_ver(i915, gsc, mtl, 102, 1, 0))
+ fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, gsc, lnl, 104, 1, 0)) \
+ fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, gsc, mtl, 102, 1, 0))
#define MAKE_FW_PATH(dir__, uc__, shortname__, version__) \
__stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin"
@@ -159,12 +166,13 @@ struct fw_blobs_by_type {
a, b, c }
/* All blobs need to be declared via MODULE_FIRMWARE() */
-#define XE_UC_MODULE_FIRMWARE(platform__, fw_filename) \
+#define XE_UC_MODULE_FIRMWARE(platform__, gt_type__, fw_filename) \
MODULE_FIRMWARE(fw_filename);
-#define XE_UC_FW_ENTRY(platform__, entry__) \
+#define XE_UC_FW_ENTRY(platform__, gt_type__, entry__) \
{ \
.platform = XE_ ## platform__, \
+ .gt_type = XE_ ## gt_type__, \
entry__, \
},
@@ -222,30 +230,38 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
[XE_UC_FW_TYPE_HUC] = { entries_huc, ARRAY_SIZE(entries_huc) },
[XE_UC_FW_TYPE_GSC] = { entries_gsc, ARRAY_SIZE(entries_gsc) },
};
- static const struct uc_fw_entry *entries;
+ struct xe_gt *gt = uc_fw_to_gt(uc_fw);
enum xe_platform p = xe->info.platform;
+ const struct uc_fw_entry *entries;
u32 count;
int i;
- xe_assert(xe, uc_fw->type < ARRAY_SIZE(blobs_all));
+ xe_gt_assert(gt, uc_fw->type < ARRAY_SIZE(blobs_all));
+ xe_gt_assert(gt, gt->info.type != XE_GT_TYPE_UNINITIALIZED);
+
entries = blobs_all[uc_fw->type].entries;
count = blobs_all[uc_fw->type].count;
for (i = 0; i < count && p <= entries[i].platform; i++) {
- if (p == entries[i].platform) {
- uc_fw->path = entries[i].path;
- uc_fw->versions.wanted.major = entries[i].major;
- uc_fw->versions.wanted.minor = entries[i].minor;
- uc_fw->versions.wanted.patch = entries[i].patch;
- uc_fw->full_ver_required = entries[i].full_ver_required;
-
- if (uc_fw->type == XE_UC_FW_TYPE_GSC)
- uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY;
- else
- uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE;
-
- break;
- }
+ if (p != entries[i].platform)
+ continue;
+
+ if (entries[i].gt_type != XE_GT_TYPE_ANY &&
+ entries[i].gt_type != gt->info.type)
+ continue;
+
+ uc_fw->path = entries[i].path;
+ uc_fw->versions.wanted.major = entries[i].major;
+ uc_fw->versions.wanted.minor = entries[i].minor;
+ uc_fw->versions.wanted.patch = entries[i].patch;
+ uc_fw->full_ver_required = entries[i].full_ver_required;
+
+ if (uc_fw->type == XE_UC_FW_TYPE_GSC)
+ uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY;
+ else
+ uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE;
+
+ break;
}
}
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 60303998bd61..0c69ef6b5ec5 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2049,7 +2049,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
- args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
+ args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
+ !xe->info.needs_scratch))
return -EINVAL;
if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
@@ -2201,6 +2202,20 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
}
#endif
+static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
+{
+ if (!xe_vm_in_fault_mode(vm))
+ return false;
+
+ if (!xe_vm_has_scratch(vm))
+ return false;
+
+ if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
+ return false;
+
+ return true;
+}
+
/*
* Create operations list from IOCTL arguments, setup operations fields so parse
* and commit steps are decoupled from IOCTL arguments. This step can fail.
@@ -2273,6 +2288,8 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
op->map.pat_index = pat_index;
+ op->map.invalidate_on_bind =
+ __xe_vm_needs_clear_scratch_pages(vm, flags);
} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
op->prefetch.region = prefetch_region;
}
@@ -2472,8 +2489,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
return PTR_ERR(vma);
op->map.vma = vma;
- if ((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
- !op->map.is_cpu_addr_mirror)
+ if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
+ !op->map.is_cpu_addr_mirror) ||
+ op->map.invalidate_on_bind)
xe_vma_ops_incr_pt_update_ops(vops,
op->tile_mask);
break;
@@ -2726,9 +2744,10 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
- err = vma_lock_and_validate(exec, op->map.vma,
- !xe_vm_in_fault_mode(vm) ||
- op->map.immediate);
+ if (!op->map.invalidate_on_bind)
+ err = vma_lock_and_validate(exec, op->map.vma,
+ !xe_vm_in_fault_mode(vm) ||
+ op->map.immediate);
break;
case DRM_GPUVA_OP_REMAP:
err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
@@ -3109,7 +3128,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
(!xe_vm_in_fault_mode(vm) ||
- !IS_ENABLED(CONFIG_DRM_GPUSVM)))) {
+ !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
err = -EINVAL;
goto free_bind_ops;
}
@@ -3243,7 +3262,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
XE_64K_PAGE_MASK) ||
XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
- return -EINVAL;
+ return -EINVAL;
}
}
@@ -3251,7 +3270,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
if (bo->cpu_caching) {
if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
- return -EINVAL;
+ return -EINVAL;
}
} else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
/*
@@ -3260,7 +3279,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
* how it was mapped on the CPU. Just assume is it
* potentially cached on CPU side.
*/
- return -EINVAL;
+ return -EINVAL;
}
/* If a BO is protected it can only be mapped if the key is still valid */
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 84fa41b9fa20..1662604c4486 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -330,6 +330,8 @@ struct xe_vma_op_map {
bool is_cpu_addr_mirror;
/** @dumpable: whether BO is dumped on GPU hang */
bool dumpable;
+ /** @invalidate: invalidate the VMA before bind */
+ bool invalidate_on_bind;
/** @pat_index: The pat index to use for this operation. */
u16 pat_index;
};
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c
index b1f81dca610d..e421a74fb87c 100644
--- a/drivers/gpu/drm/xe/xe_vram.c
+++ b/drivers/gpu/drm/xe/xe_vram.c
@@ -49,7 +49,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size)
*/
static void resize_vram_bar(struct xe_device *xe)
{
- u64 force_vram_bar_size = xe_modparam.force_vram_bar_size;
+ int force_vram_bar_size = xe_modparam.force_vram_bar_size;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
struct pci_bus *root = pdev->bus;
resource_size_t current_size;
@@ -66,6 +66,9 @@ static void resize_vram_bar(struct xe_device *xe)
if (!bar_size_mask)
return;
+ if (force_vram_bar_size < 0)
+ return;
+
/* set to a specific size? */
if (force_vram_bar_size) {
u32 bar_size_bit;
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 24f644c0a673..6f6563cc7430 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -230,6 +230,18 @@ static const struct xe_rtp_entry_sr gt_was[] = {
XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
},
+ /* Xe2_HPG */
+
+ { XE_RTP_NAME("16025250150"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001)),
+ XE_RTP_ACTIONS(SET(LSN_VC_REG2,
+ LSN_LNI_WGT(1) |
+ LSN_LNE_WGT(1) |
+ LSN_DIM_X_WGT(1) |
+ LSN_DIM_Y_WGT(1) |
+ LSN_DIM_Z_WGT(1)))
+ },
+
/* Xe2_HPM */
{ XE_RTP_NAME("16021867713"),
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 9b9e176992a8..9efc5accd43d 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -57,3 +57,5 @@ no_media_l3 MEDIA_VERSION(3000)
GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0)
16023105232 GRAPHICS_VERSION_RANGE(2001, 3001)
MEDIA_VERSION_RANGE(1301, 3000)
+16026508708 GRAPHICS_VERSION_RANGE(1200, 3001)
+ MEDIA_VERSION_RANGE(1300, 3000)
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 616916985e3f..9c08738c3b91 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -917,7 +917,11 @@ struct drm_xe_gem_mmap_offset {
* struct drm_xe_vm_create - Input of &DRM_IOCTL_XE_VM_CREATE
*
* The @flags can be:
- * - %DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE
+ * - %DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE - Map the whole virtual address
+ * space of the VM to scratch page. A vm_bind would overwrite the scratch
+ * page mapping. This flag is mutually exclusive with the
+ * %DRM_XE_VM_CREATE_FLAG_FAULT_MODE flag, with an exception of on x2 and
+ * xe3 platform.
* - %DRM_XE_VM_CREATE_FLAG_LR_MODE - An LR, or Long Running VM accepts
* exec submissions to its exec_queues that don't have an upper time
* limit on the job execution time. But exec submissions to these