diff options
Diffstat (limited to 'drivers/gpu/drm/xe')
349 files changed, 31304 insertions, 9639 deletions
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 2169bc969ea1..4b288eb3f5b0 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,7 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE - tristate "Intel Xe Graphics" - depends on DRM && PCI && (m || (y && KUNIT=y)) + tristate "Intel Xe2 Graphics" + depends on DRM && PCI + depends on KUNIT || !KUNIT + depends on INTEL_VSEC || !INTEL_VSEC + depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) + depends on PAGE_SIZE_4KB || COMPILE_TEST || BROKEN select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs @@ -9,7 +13,6 @@ config DRM_XE select TMPFS select DRM_BUDDY select DRM_CLIENT_SELECTION - select DRM_EXEC select DRM_KMS_HELPER select DRM_KUNIT_TEST_HELPERS if DRM_XE_KUNIT_TEST != n select DRM_PANEL @@ -27,10 +30,8 @@ config DRM_XE select BACKLIGHT_CLASS_DEVICE if ACPI select INPUT if ACPI select ACPI_VIDEO if X86 && ACPI - select X86_PLATFORM_DEVICES if X86 && ACPI select ACPI_WMI if X86 && ACPI select SYNC_FILE - select IOSF_MBI select CRC32 select SND_HDA_I915 if SND_HDA_CORE select CEC_CORE if CEC_NOTIFIER @@ -38,14 +39,16 @@ config DRM_XE select DRM_TTM select DRM_TTM_HELPER select DRM_EXEC + select DRM_GPUSVM if !UML && DEVICE_PRIVATE select DRM_GPUVM select DRM_SCHED select MMU_NOTIFIER select WANT_DEV_COREDUMP select AUXILIARY_BUS - select HMM_MIRROR + select REGMAP if I2C help - Experimental driver for Intel Xe series GPUs + Driver for Intel Xe2 series GPUs and later. Experimental support + for Xe series is also available. If "M" is selected, the module will be called xe. @@ -84,16 +87,18 @@ config DRM_XE_GPUSVM Enable this option if you want support for CPU to GPU address mirroring. - If in doubut say "Y". + If in doubt say "Y". -config DRM_XE_DEVMEM_MIRROR - bool "Enable device memory mirror" +config DRM_XE_PAGEMAP + bool "Enable device memory pool for SVM" depends on DRM_XE_GPUSVM select GET_FREE_REGION default y help - Disable this option only if you want to compile out without device - memory mirror. Will reduce KMD memory footprint when disabled. + Disable this option only if you don't want to expose local device + memory for SVM. Will reduce KMD memory footprint when disabled. + + If in doubt say "Y". config DRM_XE_FORCE_PROBE string "Force probe xe for selected Intel hardware IDs" diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 0d749ed44878..01227c77f6d7 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -40,23 +40,23 @@ config DRM_XE_DEBUG_VM If in doubt, say "N". -config DRM_XE_DEBUG_MEMIRQ - bool "Enable extra memirq debugging" +config DRM_XE_DEBUG_SRIOV + bool "Enable extra SR-IOV debugging" default n + imply DRM_XE_DEBUG_MEMIRQ help - Choose this option to enable additional debugging info for - memory based interrupts. + Enable extra SR-IOV debugging info. Recommended for driver developers only. If in doubt, say "N". -config DRM_XE_DEBUG_SRIOV - bool "Enable extra SR-IOV debugging" +config DRM_XE_DEBUG_MEMIRQ + bool "Enable extra memirq debugging" default n - select DRM_XE_DEBUG_MEMIRQ help - Enable extra SR-IOV debugging info. + Choose this option to enable additional debugging info for + memory based interrupts. Recommended for driver developers only. @@ -86,12 +86,17 @@ config DRM_XE_KUNIT_TEST If in doubt, say "N". -config DRM_XE_LARGE_GUC_BUFFER - bool "Enable larger guc log buffer" +config DRM_XE_DEBUG_GUC + bool "Enable extra GuC related debug options" + depends on DRM_XE_DEBUG default n + select STACKDEPOT help Choose this option when debugging guc issues. - Buffer should be large enough for complex issues. + The GuC log buffer is increased to the maximum allowed, which should + be large enough for complex issues. The tracking of FAST_REQ messages + is extended to include a record of the calling stack, which is then + dumped on a FAST_REQ error notification. Recommended for driver developers only. @@ -99,6 +104,7 @@ config DRM_XE_LARGE_GUC_BUFFER config DRM_XE_USERPTR_INVAL_INJECT bool "Inject userptr invalidation -EINVAL errors" + depends on DRM_GPUSVM default n help Choose this option when debugging error paths that diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index e4bf484d4121..62be4a5227e4 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -21,6 +21,13 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ $(src)/xe_wa_oob.rules $(call cmd,wa_oob) +generated_device_oob := $(obj)/generated/xe_device_wa_oob.c $(obj)/generated/xe_device_wa_oob.h +quiet_cmd_device_wa_oob = GEN $(notdir $(generated_device_oob)) + cmd_device_wa_oob = mkdir -p $(@D); $^ $(generated_device_oob) +$(obj)/generated/%_device_wa_oob.c $(obj)/generated/%_device_wa_oob.h: $(obj)/xe_gen_wa_oob \ + $(src)/xe_device_wa_oob.rules + $(call cmd,device_wa_oob) + # Please keep these build lists sorted! # core driver code @@ -28,6 +35,7 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ xe-y += xe_bb.o \ xe_bo.o \ xe_bo_evict.o \ + xe_dep_scheduler.o \ xe_devcoredump.o \ xe_device.o \ xe_device_sysfs.o \ @@ -50,10 +58,8 @@ xe-y += xe_bb.o \ xe_gt_freq.o \ xe_gt_idle.o \ xe_gt_mcr.o \ - xe_gt_pagefault.o \ xe_gt_sysfs.o \ xe_gt_throttle.o \ - xe_gt_tlb_invalidation.o \ xe_gt_topology.o \ xe_guc.o \ xe_guc_ads.o \ @@ -66,27 +72,35 @@ xe-y += xe_bb.o \ xe_guc_id_mgr.o \ xe_guc_klv_helpers.o \ xe_guc_log.o \ + xe_guc_pagefault.o \ xe_guc_pc.o \ xe_guc_submit.o \ + xe_guc_tlb_inval.o \ xe_heci_gsc.o \ xe_huc.o \ xe_hw_engine.o \ xe_hw_engine_class_sysfs.o \ xe_hw_engine_group.o \ + xe_hw_error.o \ xe_hw_fence.o \ xe_irq.o \ + xe_late_bind_fw.o \ xe_lrc.o \ xe_migrate.o \ xe_mmio.o \ + xe_mmio_gem.o \ xe_mocs.o \ xe_module.o \ + xe_nvm.o \ xe_oa.o \ xe_observation.o \ + xe_pagefault.o \ xe_pat.o \ xe_pci.o \ xe_pcode.o \ xe_pm.o \ xe_preempt_fence.o \ + xe_psmi.o \ xe_pt.o \ xe_pt_walk.o \ xe_pxp.o \ @@ -106,6 +120,8 @@ xe-y += xe_bb.o \ xe_sync.o \ xe_tile.o \ xe_tile_sysfs.o \ + xe_tlb_inval.o \ + xe_tlb_inval_job.o \ xe_trace.o \ xe_trace_bo.o \ xe_trace_guc.o \ @@ -116,7 +132,9 @@ xe-y += xe_bb.o \ xe_tuning.o \ xe_uc.o \ xe_uc_fw.o \ + xe_validation.o \ xe_vm.o \ + xe_vm_madvise.o \ xe_vram.o \ xe_vram_freq.o \ xe_vsec.o \ @@ -124,8 +142,9 @@ xe-y += xe_bb.o \ xe_wait_user_fence.o \ xe_wopcm.o -xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o +xe-$(CONFIG_I2C) += xe_i2c.o xe-$(CONFIG_DRM_XE_GPUSVM) += xe_svm.o +xe-$(CONFIG_DRM_GPUSVM) += xe_userptr.o # graphics hardware monitoring (HWMON) support xe-$(CONFIG_HWMON) += xe_hwmon.o @@ -139,7 +158,9 @@ xe-y += \ xe_guc_relay.o \ xe_memirq.o \ xe_sriov.o \ - xe_sriov_vf.o + xe_sriov_vf.o \ + xe_sriov_vf_ccs.o \ + xe_tile_sriov_vf.o xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf.o \ @@ -153,7 +174,19 @@ xe-$(CONFIG_PCI_IOV) += \ xe_lmtt_2l.o \ xe_lmtt_ml.o \ xe_pci_sriov.o \ - xe_sriov_pf.o + xe_sriov_packet.o \ + xe_sriov_pf.o \ + xe_sriov_pf_control.o \ + xe_sriov_pf_debugfs.o \ + xe_sriov_pf_migration.o \ + xe_sriov_pf_provision.o \ + xe_sriov_pf_service.o \ + xe_sriov_pf_sysfs.o \ + xe_tile_sriov_pf_debugfs.o + +ifdef CONFIG_XE_VFIO_PCI + xe-$(CONFIG_PCI_IOV) += xe_sriov_vfio.o +endif # include helpers for tests even when XE is built-in ifdef CONFIG_DRM_XE_KUNIT_TEST @@ -180,7 +213,6 @@ $(obj)/i915-display/%.o: $(srctree)/drivers/gpu/drm/i915/display/%.c FORCE # Display code specific to xe xe-$(CONFIG_DRM_XE_DISPLAY) += \ display/ext/i915_irq.o \ - display/ext/i915_utils.o \ display/intel_bo.o \ display/intel_fb_bo.o \ display/intel_fbdev_fb.o \ @@ -191,7 +223,9 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ display/xe_dsb_buffer.o \ display/xe_fb_pin.o \ display/xe_hdcp_gsc.o \ + display/xe_panic.o \ display/xe_plane_initial.o \ + display/xe_stolen.o \ display/xe_tdf.o # SOC code shared with i915 @@ -204,20 +238,23 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/icl_dsi.o \ i915-display/intel_alpm.o \ i915-display/intel_atomic.o \ - i915-display/intel_atomic_plane.o \ i915-display/intel_audio.o \ i915-display/intel_backlight.o \ i915-display/intel_bios.o \ i915-display/intel_bw.o \ + i915-display/intel_casf.o \ i915-display/intel_cdclk.o \ i915-display/intel_cmtg.o \ i915-display/intel_color.o \ + i915-display/intel_colorop.o \ + i915-display/intel_color_pipeline.o \ i915-display/intel_combo_phy.o \ i915-display/intel_connector.o \ i915-display/intel_crtc.o \ i915-display/intel_crtc_state_dump.o \ i915-display/intel_cursor.o \ i915-display/intel_cx0_phy.o \ + i915-display/intel_dbuf_bw.o \ i915-display/intel_ddi.o \ i915-display/intel_ddi_buf_trans.o \ i915-display/intel_display.o \ @@ -229,7 +266,9 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_display_power.o \ i915-display/intel_display_power_map.o \ i915-display/intel_display_power_well.o \ + i915-display/intel_display_rpm.o \ i915-display/intel_display_trace.o \ + i915-display/intel_display_utils.o \ i915-display/intel_display_wa.o \ i915-display/intel_dkl_phy.o \ i915-display/intel_dmc.o \ @@ -254,6 +293,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_fbc.o \ i915-display/intel_fdi.o \ i915-display/intel_fifo_underrun.o \ + i915-display/intel_flipq.o \ i915-display/intel_frontbuffer.o \ i915-display/intel_global_state.o \ i915-display/intel_gmbus.o \ @@ -265,11 +305,13 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_hti.o \ i915-display/intel_link_bw.o \ i915-display/intel_lspcon.o \ + i915-display/intel_lt_phy.o \ i915-display/intel_modeset_lock.o \ i915-display/intel_modeset_setup.o \ i915-display/intel_modeset_verify.o \ i915-display/intel_panel.o \ i915-display/intel_pfit.o \ + i915-display/intel_plane.o \ i915-display/intel_pmdemand.o \ i915-display/intel_pch.o \ i915-display/intel_pps.o \ @@ -284,6 +326,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_vga.o \ i915-display/intel_vrr.o \ i915-display/intel_wm.o \ + i915-display/skl_prefill.o \ i915-display/skl_scaler.o \ i915-display/skl_universal_plane.o \ i915-display/skl_watermark.o @@ -305,6 +348,7 @@ ifeq ($(CONFIG_DEBUG_FS),y) xe_gt_stats.o \ xe_guc_debugfs.o \ xe_huc_debugfs.o \ + xe_tile_debugfs.o \ xe_uc_debugfs.o xe-$(CONFIG_PCI_IOV) += xe_gt_sriov_pf_debugfs.o @@ -337,4 +381,4 @@ $(obj)/%.hdrtest: $(src)/%.h FORCE $(call if_changed_dep,hdrtest) uses_generated_oob := $(addprefix $(obj)/, $(xe-y)) -$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h +$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h $(obj)/generated/xe_device_wa_oob.h diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index 448afb86e05c..47756e4674a1 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -117,6 +117,7 @@ enum xe_guc_action { XE_GUC_ACTION_ENTER_S_STATE = 0x501, XE_GUC_ACTION_EXIT_S_STATE = 0x502, XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506, + XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV = 0x509, XE_GUC_ACTION_SCHED_CONTEXT = 0x1000, XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001, XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, @@ -142,6 +143,7 @@ enum xe_guc_action { XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C, XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER = 0x550D, + XE_GUC_ACTION_OPT_IN_FEATURE_KLV = 0x550E, XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000, XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002, XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003, @@ -153,6 +155,8 @@ enum xe_guc_action { XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003, XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004, XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005, + XE_GUC_ACTION_TEST_G2G_SEND = 0xF001, + XE_GUC_ACTION_TEST_G2G_RECV = 0xF002, XE_GUC_ACTION_LIMIT }; @@ -161,6 +165,37 @@ enum xe_guc_preempt_options { XE_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, }; +enum xe_guc_register_context_param_offsets { + XE_GUC_REGISTER_CONTEXT_DATA_0_MBZ = 0, + XE_GUC_REGISTER_CONTEXT_DATA_1_FLAGS, + XE_GUC_REGISTER_CONTEXT_DATA_2_CONTEXT_INDEX, + XE_GUC_REGISTER_CONTEXT_DATA_3_ENGINE_CLASS, + XE_GUC_REGISTER_CONTEXT_DATA_4_ENGINE_SUBMIT_MASK, + XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER, + XE_GUC_REGISTER_CONTEXT_DATA_6_WQ_DESC_ADDR_UPPER, + XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER, + XE_GUC_REGISTER_CONTEXT_DATA_8_WQ_BUF_BASE_UPPER, + XE_GUC_REGISTER_CONTEXT_DATA_9_WQ_BUF_SIZE, + XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR, + XE_GUC_REGISTER_CONTEXT_MSG_LEN, +}; + +enum xe_guc_register_context_multi_lrc_param_offsets { + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_0_MBZ = 0, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_1_FLAGS, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_2_PARENT_CONTEXT, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_3_ENGINE_CLASS, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_4_ENGINE_SUBMIT_MASK, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_6_WQ_DESC_ADDR_UPPER, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_8_WQ_BUF_BASE_UPPER, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_9_WQ_BUF_SIZE, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN = 11, +}; + enum xe_guc_report_status { XE_GUC_REPORT_STATUS_UNKNOWN = 0x0, XE_GUC_REPORT_STATUS_ACKED = 0x1, @@ -240,4 +275,7 @@ enum xe_guc_g2g_type { #define XE_G2G_DEREGISTER_TILE REG_GENMASK(15, 12) #define XE_G2G_DEREGISTER_TYPE REG_GENMASK(11, 8) +/* invalid type for XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR */ +#define XE_GUC_CAT_ERR_TYPE_INVALID 0xdeadbeef + #endif diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h index b28c8fa061f7..ce5c59517528 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h @@ -210,6 +210,11 @@ struct slpc_shared_data { u8 reserved_mode_definition[4096]; } __packed; +enum slpc_power_profile { + SLPC_POWER_PROFILE_BASE = 0x0, + SLPC_POWER_PROFILE_POWER_SAVING = 0x1 +}; + /** * DOC: SLPC H2G MESSAGE FORMAT * diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h index 2c627a21648f..ad76b4baf42e 100644 --- a/drivers/gpu/drm/xe/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h @@ -6,8 +6,7 @@ #ifndef _ABI_GUC_ERRORS_ABI_H #define _ABI_GUC_ERRORS_ABI_H -enum xe_guc_response_status { - XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0, +enum xe_guc_response { XE_GUC_RESPONSE_ERROR_PROTOCOL = 0x04, XE_GUC_RESPONSE_INVALID_STATE = 0x0A, XE_GUC_RESPONSE_UNSUPPORTED_VERSION = 0x0B, @@ -21,12 +20,20 @@ enum xe_guc_response_status { XE_GUC_RESPONSE_CANNOT_COMPLETE_ACTION = 0x41, XE_GUC_RESPONSE_INVALID_KLV_DATA = 0x50, XE_GUC_RESPONSE_INVALID_PARAMS = 0x60, + XE_GUC_RESPONSE_INVALID_CONTEXT_INDEX = 0x61, + XE_GUC_RESPONSE_INVALID_CONTEXT_REGISTRATION = 0x62, + XE_GUC_RESPONSE_INVALID_DOORBELL_ID = 0x63, + XE_GUC_RESPONSE_INVALID_ENGINE_ID = 0x64, XE_GUC_RESPONSE_INVALID_BUFFER_RANGE = 0x70, XE_GUC_RESPONSE_INVALID_BUFFER = 0x71, + XE_GUC_RESPONSE_BUFFER_ALREADY_REGISTERED = 0x72, XE_GUC_RESPONSE_INVALID_GGTT_ADDRESS = 0x80, XE_GUC_RESPONSE_PENDING_ACTION = 0x90, + XE_GUC_RESPONSE_CONTEXT_NOT_REGISTERED = 0x100, + XE_GUC_RESPONSE_CONTEXT_ALREADY_REGISTERED = 0X101, XE_GUC_RESPONSE_INVALID_SIZE = 0x102, XE_GUC_RESPONSE_MALFORMED_KLV = 0x103, + XE_GUC_RESPONSE_INVALID_CONTEXT = 0x104, XE_GUC_RESPONSE_INVALID_KLV_KEY = 0x105, XE_GUC_RESPONSE_DATA_TOO_LARGE = 0x106, XE_GUC_RESPONSE_VF_MIGRATED = 0x107, @@ -40,10 +47,11 @@ enum xe_guc_response_status { XE_GUC_RESPONSE_CTB_NOT_REGISTERED = 0x304, XE_GUC_RESPONSE_CTB_IN_USE = 0x305, XE_GUC_RESPONSE_CTB_INVALID_DESC = 0x306, + XE_GUC_RESPONSE_HW_TIMEOUT = 0x30C, XE_GUC_RESPONSE_CTB_SOURCE_INVALID_DESCRIPTOR = 0x30D, XE_GUC_RESPONSE_CTB_DESTINATION_INVALID_DESCRIPTOR = 0x30E, XE_GUC_RESPONSE_INVALID_CONFIG_STATE = 0x30F, - XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, + XE_GUC_RESPONSE_GENERIC_FAIL = 0xF000, }; enum xe_guc_load_status { @@ -55,6 +63,7 @@ enum xe_guc_load_status { XE_GUC_LOAD_STATUS_HWCONFIG_START = 0x05, XE_GUC_LOAD_STATUS_HWCONFIG_DONE = 0x06, XE_GUC_LOAD_STATUS_HWCONFIG_ERROR = 0x07, + XE_GUC_LOAD_STATUS_BOOTROM_VERSION_MISMATCH = 0x08, XE_GUC_LOAD_STATUS_GDT_DONE = 0x10, XE_GUC_LOAD_STATUS_IDT_DONE = 0x20, XE_GUC_LOAD_STATUS_LAPIC_DONE = 0x30, @@ -67,6 +76,8 @@ enum xe_guc_load_status { XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START, XE_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73, XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74, + XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR = 0x75, + XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG = 0x76, XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END, XE_GUC_LOAD_STATUS_READY = 0xF0, diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 7de8f827281f..265a135e7061 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -16,6 +16,8 @@ * +===+=======+==============================================================+ * | 0 | 31:16 | **KEY** - KLV key identifier | * | | | - `GuC Self Config KLVs`_ | + * | | | - `GuC Opt In Feature KLVs`_ | + * | | | - `GuC Scheduling Policies KLVs`_ | * | | | - `GuC VGT Policy KLVs`_ | * | | | - `GuC VF Configuration KLVs`_ | * | | | | @@ -125,6 +127,57 @@ enum { }; /** + * DOC: GuC Opt In Feature KLVs + * + * `GuC KLV`_ keys available for use with OPT_IN_FEATURE_KLV + * + * _`GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE` : 0x4001 + * Adds an extra dword to the XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR G2H + * containing the type of the CAT error. On HW that does not support + * reporting the CAT error type, the extra dword is set to 0xdeadbeef. + * + * _`GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH` : 0x4003 + * This KLV enables the Dynamic Inhibit Context Switch optimization, which + * consists in the GuC setting the CTX_CTRL_INHIBIT_SYN_CTX_SWITCH bit to + * zero in the CTX_CONTEXT_CONTROL register of LRCs that are submitted + * to an oversubscribed engine. This will cause those contexts to be + * switched out immediately if they hit an unsatisfied semaphore wait + * (instead of waiting the full timeslice duration). The bit is instead set + * to one if a single context is queued on the engine, to avoid it being + * switched out if there isn't another context that can run in its place. + */ + +#define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_KEY 0x4001 +#define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_LEN 0u + +#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_KEY 0x4003 +#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_LEN 0u + +/** + * DOC: GuC Scheduling Policies KLVs + * + * `GuC KLV`_ keys available for use with UPDATE_SCHEDULING_POLICIES_KLV. + * + * _`GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD` : 0x1001 + * Some platforms do not allow concurrent execution of RCS and CCS + * workloads from different address spaces. By default, the GuC prioritizes + * RCS submissions over CCS ones, which can lead to CCS workloads being + * significantly (or completely) starved of execution time. This KLV allows + * the driver to specify a quantum (in ms) and a ratio (percentage value + * between 0 and 100), and the GuC will prioritize the CCS for that + * percentage of each quantum. For example, specifying 100ms and 30% will + * make the GuC prioritize the CCS for 30ms of every 100ms. + * Note that this does not necessarly mean that RCS and CCS engines will + * only be active for their percentage of the quantum, as the restriction + * only kicks in if both classes are fully busy with non-compatible address + * spaces; i.e., if one engine is idle or running the same address space, + * a pending job on the other engine will still be submitted to the HW no + * matter what the ratio is + */ +#define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_KEY 0x1001 +#define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_LEN 2u + +/** * DOC: GuC VGT Policy KLVs * * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY. @@ -362,12 +415,14 @@ enum { */ enum xe_guc_klv_ids { GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002, + GUC_WORKAROUND_KLV_DISABLE_PSMI_INTERRUPTS_AT_C6_ENTRY_RESTORE_AT_EXIT = 0x9004, GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING = 0x9005, GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007, GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008, GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009, GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO = 0x900a, GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH = 0x900b, + GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG = 0x900c, }; #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h index 8a048980ea38..0548b2e0316f 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h @@ -5,10 +5,8 @@ #define __I915_GEM_OBJECT_H__ struct dma_fence; -struct i915_sched_attr; -static inline void i915_gem_fence_wait_priority(struct dma_fence *fence, - const struct i915_sched_attr *attr) +static inline void i915_gem_fence_wait_priority_display(struct dma_fence *fence) { } diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index 41d39d67817a..48e3256ba37e 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -6,87 +6,35 @@ #ifndef _I915_GEM_STOLEN_H_ #define _I915_GEM_STOLEN_H_ -#include "xe_ttm_stolen_mgr.h" -#include "xe_res_cursor.h" - -struct xe_bo; - -struct i915_stolen_fb { - struct xe_bo *bo; -}; - -static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, - struct i915_stolen_fb *fb, - u32 size, u32 align, - u32 start, u32 end) -{ - struct xe_bo *bo; - int err; - u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN; - - if (start < SZ_4K) - start = SZ_4K; - - if (align) { - size = ALIGN(size, align); - start = ALIGN(start, align); - } - - bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe), - NULL, size, start, end, - ttm_bo_type_kernel, flags, 0); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - bo = NULL; - return err; - } - err = xe_bo_pin(bo); - xe_bo_unlock_vm_held(bo); - - if (err) { - xe_bo_put(fb->bo); - bo = NULL; - } - - fb->bo = bo; - - return err; -} - -static inline int i915_gem_stolen_insert_node(struct xe_device *xe, - struct i915_stolen_fb *fb, - u32 size, u32 align) -{ - /* Not used on xe */ - BUG_ON(1); - return -ENODEV; -} - -static inline void i915_gem_stolen_remove_node(struct xe_device *xe, - struct i915_stolen_fb *fb) -{ - xe_bo_unpin_map_no_vm(fb->bo); - fb->bo = NULL; -} - -#define i915_gem_stolen_initialized(xe) (!!ttm_manager_type(&(xe)->ttm, XE_PL_STOLEN)) -#define i915_gem_stolen_node_allocated(fb) (!!((fb)->bo)) - -static inline u32 i915_gem_stolen_node_offset(struct i915_stolen_fb *fb) -{ - struct xe_res_cursor res; - - xe_res_first(fb->bo->ttm.resource, 0, 4096, &res); - return res.start; -} - -/* Used for < gen4. These are not supported by Xe */ -#define i915_gem_stolen_area_address(xe) (!WARN_ON(1)) -/* Used for gen9 specific WA. Gen9 is not supported by Xe */ -#define i915_gem_stolen_area_size(xe) (!WARN_ON(1)) - -#define i915_gem_stolen_node_address(xe, fb) (xe_ttm_stolen_gpu_offset(xe) + \ - i915_gem_stolen_node_offset(fb)) -#define i915_gem_stolen_node_size(fb) ((u64)((fb)->bo->ttm.base.size)) +#include <linux/types.h> + +struct drm_device; +struct intel_stolen_node; + +int i915_gem_stolen_insert_node_in_range(struct intel_stolen_node *node, u64 size, + unsigned int align, u64 start, u64 end); + +int i915_gem_stolen_insert_node(struct intel_stolen_node *node, u64 size, + unsigned int align); + +void i915_gem_stolen_remove_node(struct intel_stolen_node *node); + +bool i915_gem_stolen_initialized(struct drm_device *drm); + +bool i915_gem_stolen_node_allocated(const struct intel_stolen_node *node); + +u32 i915_gem_stolen_node_offset(struct intel_stolen_node *node); + +u64 i915_gem_stolen_area_address(struct drm_device *drm); + +u64 i915_gem_stolen_area_size(struct drm_device *drm); + +u64 i915_gem_stolen_node_address(struct intel_stolen_node *node); + +u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node); + +struct intel_stolen_node *i915_gem_stolen_node_alloc(struct drm_device *drm); + +void i915_gem_stolen_node_free(const struct intel_stolen_node *node); #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 9b7572e06f34..3e79a74ff7de 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -12,8 +12,6 @@ #include <drm/drm_drv.h> -#include "i915_utils.h" -#include "xe_device.h" /* for xe_device_has_flat_ccs() */ #include "xe_device_types.h" static inline struct drm_i915_private *to_i915(const struct drm_device *dev) @@ -26,38 +24,14 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev) #define IS_I915G(dev_priv) (dev_priv && 0) #define IS_I915GM(dev_priv) (dev_priv && 0) #define IS_PINEVIEW(dev_priv) (dev_priv && 0) -#define IS_IVYBRIDGE(dev_priv) (dev_priv && 0) #define IS_VALLEYVIEW(dev_priv) (dev_priv && 0) #define IS_CHERRYVIEW(dev_priv) (dev_priv && 0) #define IS_HASWELL(dev_priv) (dev_priv && 0) #define IS_BROADWELL(dev_priv) (dev_priv && 0) -#define IS_SKYLAKE(dev_priv) (dev_priv && 0) #define IS_BROXTON(dev_priv) (dev_priv && 0) -#define IS_KABYLAKE(dev_priv) (dev_priv && 0) #define IS_GEMINILAKE(dev_priv) (dev_priv && 0) -#define IS_COFFEELAKE(dev_priv) (dev_priv && 0) -#define IS_COMETLAKE(dev_priv) (dev_priv && 0) -#define IS_ICELAKE(dev_priv) (dev_priv && 0) -#define IS_JASPERLAKE(dev_priv) (dev_priv && 0) -#define IS_ELKHARTLAKE(dev_priv) (dev_priv && 0) -#define IS_TIGERLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_TIGERLAKE) -#define IS_ROCKETLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_ROCKETLAKE) -#define IS_DG1(dev_priv) IS_PLATFORM(dev_priv, XE_DG1) -#define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S) -#define IS_ALDERLAKE_P(dev_priv) (IS_PLATFORM(dev_priv, XE_ALDERLAKE_P) || \ - IS_PLATFORM(dev_priv, XE_ALDERLAKE_N)) #define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, XE_DG2) -#define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE) -#define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE) -#define IS_BATTLEMAGE(dev_priv) IS_PLATFORM(dev_priv, XE_BATTLEMAGE) -#define IS_PANTHERLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_PANTHERLAKE) - -#define IS_HASWELL_ULT(dev_priv) (dev_priv && 0) -#define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0) #define IS_MOBILE(xe) (xe && 0) -#define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe)) -#define HAS_128_BYTE_Y_TILING(xe) (xe || 1) - #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_scheduler_types.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_scheduler_types.h deleted file mode 100644 index c11130440d31..000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_scheduler_types.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* Copyright © 2025 Intel Corporation */ - -#ifndef __I915_SCHEDULER_TYPES_H__ -#define __I915_SCHEDULER_TYPES_H__ - -#define I915_PRIORITY_DISPLAY 0 - -struct i915_sched_attr { - int priority; -}; - -#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h index 1d7c4360e5c0..bcd441dc0fce 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h @@ -3,4 +3,11 @@ * Copyright © 2023 Intel Corporation */ -#include "../../i915/i915_utils.h" +/* for soc/ */ +#ifndef MISSING_CASE +#define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \ + __stringify(x), (long)(x)) +#endif + +/* for a couple of users under i915/display */ +#define i915_inject_probe_failure(unused) ((unused) && 0) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h index 4465c40f8134..b17e3bab23d5 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h @@ -26,8 +26,6 @@ struct i915_vma { struct xe_ggtt_node *node; }; -#define i915_ggtt_clear_scanout(bo) do { } while (0) - #define i915_vma_fence_id(vma) -1 static inline u32 i915_ggtt_offset(const struct i915_vma *vma) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h index a473aa6697d0..4fcd3bf6b76f 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h @@ -6,37 +6,6 @@ #ifndef __INTEL_PCODE_H__ #define __INTEL_PCODE_H__ -#include "intel_uncore.h" #include "xe_pcode.h" -static inline int -snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val, - int fast_timeout_us, int slow_timeout_ms) -{ - return xe_pcode_write_timeout(__compat_uncore_to_tile(uncore), mbox, val, - slow_timeout_ms ?: 1); -} - -static inline int -snb_pcode_write(struct intel_uncore *uncore, u32 mbox, u32 val) -{ - - return xe_pcode_write(__compat_uncore_to_tile(uncore), mbox, val); -} - -static inline int -snb_pcode_read(struct intel_uncore *uncore, u32 mbox, u32 *val, u32 *val1) -{ - return xe_pcode_read(__compat_uncore_to_tile(uncore), mbox, val, val1); -} - -static inline int -skl_pcode_request(struct intel_uncore *uncore, u32 mbox, - u32 request, u32 reply_mask, u32 reply, - int timeout_base_ms) -{ - return xe_pcode_request(__compat_uncore_to_tile(uncore), mbox, request, reply_mask, reply, - timeout_base_ms); -} - #endif /* __INTEL_PCODE_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h index 0c1e88e36a1e..d93ddacdf743 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h @@ -24,13 +24,6 @@ static inline struct xe_mmio *__compat_uncore_to_mmio(struct intel_uncore *uncor return xe_root_tile_mmio(xe); } -static inline struct xe_tile *__compat_uncore_to_tile(struct intel_uncore *uncore) -{ - struct xe_device *xe = container_of(uncore, struct xe_device, uncore); - - return xe_device_get_root_tile(xe); -} - static inline u32 intel_uncore_read(struct intel_uncore *uncore, i915_reg_t i915_reg) { @@ -98,26 +91,6 @@ static inline u32 intel_uncore_rmw(struct intel_uncore *uncore, return xe_mmio_rmw32(__compat_uncore_to_mmio(uncore), reg, clear, set); } -static inline int intel_wait_for_register(struct intel_uncore *uncore, - i915_reg_t i915_reg, u32 mask, - u32 value, unsigned int timeout) -{ - struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - - return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value, - timeout * USEC_PER_MSEC, NULL, false); -} - -static inline int intel_wait_for_register_fw(struct intel_uncore *uncore, - i915_reg_t i915_reg, u32 mask, - u32 value, unsigned int timeout) -{ - struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - - return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value, - timeout * USEC_PER_MSEC, NULL, false); -} - static inline int __intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg, u32 mask, u32 value, unsigned int fast_timeout_us, @@ -139,6 +112,16 @@ __intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg, out_value, atomic); } +static inline int +__intel_wait_for_register_fw(struct intel_uncore *uncore, i915_reg_t i915_reg, + u32 mask, u32 value, unsigned int fast_timeout_us, + unsigned int slow_timeout_ms, u32 *out_value) +{ + return __intel_wait_for_register(uncore, i915_reg, mask, value, + fast_timeout_us, slow_timeout_ms, + out_value); +} + static inline u32 intel_uncore_read_fw(struct intel_uncore *uncore, i915_reg_t i915_reg) { diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb.h new file mode 100644 index 000000000000..69e1935e9cdf --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2013-2021 Intel Corporation + */ + +#ifndef _VLV_IOSF_SB_H_ +#define _VLV_IOSF_SB_H_ + +#include <linux/types.h> + +#include "vlv_iosf_sb_reg.h" + +struct drm_device; + +enum vlv_iosf_sb_unit { + VLV_IOSF_SB_BUNIT, + VLV_IOSF_SB_CCK, + VLV_IOSF_SB_CCU, + VLV_IOSF_SB_DPIO, + VLV_IOSF_SB_DPIO_2, + VLV_IOSF_SB_FLISDSI, + VLV_IOSF_SB_GPIO, + VLV_IOSF_SB_NC, + VLV_IOSF_SB_PUNIT, +}; + +static inline void vlv_iosf_sb_get(struct drm_device *drm, unsigned long ports) +{ +} +static inline u32 vlv_iosf_sb_read(struct drm_device *drm, enum vlv_iosf_sb_unit unit, u32 addr) +{ + return 0; +} +static inline int vlv_iosf_sb_write(struct drm_device *drm, enum vlv_iosf_sb_unit unit, u32 addr, u32 val) +{ + return 0; +} +static inline void vlv_iosf_sb_put(struct drm_device *drm, unsigned long ports) +{ +} + +#endif /* _VLV_IOSF_SB_H_ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb_reg.h index 949f134ce3cf..cb7fa8e794a6 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb_reg.h @@ -3,4 +3,4 @@ * Copyright © 2023 Intel Corporation */ -#include "../../i915/vlv_sideband_reg.h" +#include "../../i915/vlv_iosf_sb_reg.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h deleted file mode 100644 index ec6f12de5727..000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h +++ /dev/null @@ -1,132 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2013-2021 Intel Corporation - */ - -#ifndef _VLV_SIDEBAND_H_ -#define _VLV_SIDEBAND_H_ - -#include <linux/types.h> - -#include "vlv_sideband_reg.h" - -enum pipe; -struct drm_i915_private; - -enum { - VLV_IOSF_SB_BUNIT, - VLV_IOSF_SB_CCK, - VLV_IOSF_SB_CCU, - VLV_IOSF_SB_DPIO, - VLV_IOSF_SB_FLISDSI, - VLV_IOSF_SB_GPIO, - VLV_IOSF_SB_NC, - VLV_IOSF_SB_PUNIT, -}; - -static inline void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports) -{ -} -static inline u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg) -{ - return 0; -} -static inline void vlv_iosf_sb_write(struct drm_i915_private *i915, - u8 port, u32 reg, u32 val) -{ -} -static inline void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports) -{ -} -static inline void vlv_bunit_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg) -{ - return 0; -} -static inline void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val) -{ -} -static inline void vlv_bunit_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_cck_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg) -{ - return 0; -} -static inline void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val) -{ -} -static inline void vlv_cck_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_ccu_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg) -{ - return 0; -} -static inline void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val) -{ -} -static inline void vlv_ccu_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_dpio_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_dpio_read(struct drm_i915_private *i915, int pipe, int reg) -{ - return 0; -} -static inline void vlv_dpio_write(struct drm_i915_private *i915, - int pipe, int reg, u32 val) -{ -} -static inline void vlv_dpio_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_flisdsi_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg) -{ - return 0; -} -static inline void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val) -{ -} -static inline void vlv_flisdsi_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_nc_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr) -{ - return 0; -} -static inline void vlv_nc_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_punit_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr) -{ - return 0; -} -static inline int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val) -{ - return 0; -} -static inline void vlv_punit_put(struct drm_i915_private *i915) -{ -} - -#endif /* _VLV_SIDEBAND_H_ */ diff --git a/drivers/gpu/drm/xe/display/ext/i915_utils.c b/drivers/gpu/drm/xe/display/ext/i915_utils.c deleted file mode 100644 index 43b10a2cc508..000000000000 --- a/drivers/gpu/drm/xe/display/ext/i915_utils.c +++ /dev/null @@ -1,26 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2023 Intel Corporation - */ - -#include "i915_drv.h" - -bool i915_vtd_active(struct drm_i915_private *i915) -{ - if (device_iommu_mapped(i915->drm.dev)) - return true; - - /* Running as a guest, we assume the host is enforcing VT'd */ - return i915_run_as_guest(); -} - -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) - -/* i915 specific, just put here for shutting it up */ -int __i915_inject_probe_error(struct drm_i915_private *i915, int err, - const char *func, int line) -{ - return 0; -} - -#endif diff --git a/drivers/gpu/drm/xe/display/intel_bo.c b/drivers/gpu/drm/xe/display/intel_bo.c index 27437c22bd70..bad2243b9114 100644 --- a/drivers/gpu/drm/xe/display/intel_bo.c +++ b/drivers/gpu/drm/xe/display/intel_bo.c @@ -5,6 +5,7 @@ #include "xe_bo.h" #include "intel_bo.h" +#include "intel_frontbuffer.h" bool intel_bo_is_tiled(struct drm_gem_object *obj) { @@ -28,10 +29,6 @@ bool intel_bo_is_protected(struct drm_gem_object *obj) return xe_bo_is_protected(gem_to_xe_bo(obj)); } -void intel_bo_flush_if_display(struct drm_gem_object *obj) -{ -} - int intel_bo_fb_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { return drm_gem_prime_mmap(obj, vma); @@ -44,15 +41,60 @@ int intel_bo_read_from_page(struct drm_gem_object *obj, u64 offset, void *dst, i return xe_bo_read(bo, offset, dst, size); } -struct intel_frontbuffer *intel_bo_get_frontbuffer(struct drm_gem_object *obj) +struct xe_frontbuffer { + struct intel_frontbuffer base; + struct drm_gem_object *obj; + struct kref ref; +}; + +struct intel_frontbuffer *intel_bo_frontbuffer_get(struct drm_gem_object *obj) +{ + struct xe_frontbuffer *front; + + front = kmalloc(sizeof(*front), GFP_KERNEL); + if (!front) + return NULL; + + intel_frontbuffer_init(&front->base, obj->dev); + + kref_init(&front->ref); + + drm_gem_object_get(obj); + front->obj = obj; + + return &front->base; +} + +void intel_bo_frontbuffer_ref(struct intel_frontbuffer *_front) { - return NULL; + struct xe_frontbuffer *front = + container_of(_front, typeof(*front), base); + + kref_get(&front->ref); +} + +static void frontbuffer_release(struct kref *ref) +{ + struct xe_frontbuffer *front = + container_of(ref, typeof(*front), ref); + + intel_frontbuffer_fini(&front->base); + + drm_gem_object_put(front->obj); + + kfree(front); +} + +void intel_bo_frontbuffer_put(struct intel_frontbuffer *_front) +{ + struct xe_frontbuffer *front = + container_of(_front, typeof(*front), base); + + kref_put(&front->ref, frontbuffer_release); } -struct intel_frontbuffer *intel_bo_set_frontbuffer(struct drm_gem_object *obj, - struct intel_frontbuffer *front) +void intel_bo_frontbuffer_flush_for_display(struct intel_frontbuffer *front) { - return front; } void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj) diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c index ebdb22c9499d..db8b1a27b4de 100644 --- a/drivers/gpu/drm/xe/display/intel_fb_bo.c +++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c @@ -24,8 +24,7 @@ void intel_fb_bo_framebuffer_fini(struct drm_gem_object *obj) xe_bo_put(bo); } -int intel_fb_bo_framebuffer_init(struct drm_framebuffer *fb, - struct drm_gem_object *obj, +int intel_fb_bo_framebuffer_init(struct drm_gem_object *obj, struct drm_mode_fb_cmd2 *mode_cmd) { struct xe_bo *bo = gem_to_xe_bo(obj); diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index e8191562d122..7ad76022cb14 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -3,49 +3,39 @@ * Copyright © 2023 Intel Corporation */ -#include <drm/drm_fb_helper.h> +#include <linux/fb.h> -#include "intel_display_types.h" -#include "intel_fb.h" #include "intel_fbdev_fb.h" #include "xe_bo.h" #include "xe_ttm_stolen_mgr.h" #include "xe_wa.h" -#include <generated/xe_wa_oob.h> +#include <generated/xe_device_wa_oob.h> -struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, - struct drm_fb_helper_surface_size *sizes) +/* + * FIXME: There shouldn't be any reason to have XE_PAGE_SIZE stride + * alignment. The same 64 as i915 uses should be fine, and we shouldn't need to + * have driver specific values. However, dropping the stride alignment to 64 + * leads to underflowing the bo pin count in the atomic cleanup work. + */ +u32 intel_fbdev_fb_pitch_align(u32 stride) { - struct drm_framebuffer *fb; - struct drm_device *dev = helper->dev; - struct xe_device *xe = to_xe_device(dev); - struct drm_mode_fb_cmd2 mode_cmd = {}; - struct xe_bo *obj; - int size; - - /* we don't do packed 24bpp */ - if (sizes->surface_bpp == 24) - sizes->surface_bpp = 32; - - mode_cmd.width = sizes->surface_width; - mode_cmd.height = sizes->surface_height; + return ALIGN(stride, XE_PAGE_SIZE); +} - mode_cmd.pitches[0] = ALIGN(mode_cmd.width * - DIV_ROUND_UP(sizes->surface_bpp, 8), XE_PAGE_SIZE); - mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, - sizes->surface_depth); +struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_bo *obj; - size = mode_cmd.pitches[0] * mode_cmd.height; - size = PAGE_ALIGN(size); obj = ERR_PTR(-ENODEV); - if (!IS_DGFX(xe) && !XE_WA(xe_root_mmio_gt(xe), 22019338487_display)) { - obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), - NULL, size, - ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | - XE_BO_FLAG_STOLEN | - XE_BO_FLAG_GGTT); + if (!IS_DGFX(xe) && !XE_DEVICE_WA(xe, 22019338487_display)) { + obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), + size, + ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_GGTT, false); if (!IS_ERR(obj)) drm_info(&xe->drm, "Allocated fbdev into stolen\n"); else @@ -53,37 +43,30 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, } if (IS_ERR(obj)) { - obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, size, - ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | - XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_FLAG_GGTT); + obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), size, + ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | + XE_BO_FLAG_GGTT, false); } if (IS_ERR(obj)) { drm_err(&xe->drm, "failed to allocate framebuffer (%pe)\n", obj); - fb = ERR_PTR(-ENOMEM); - goto err; - } - - fb = intel_framebuffer_create(&obj->ttm.base, &mode_cmd); - if (IS_ERR(fb)) { - xe_bo_unpin_map_no_vm(obj); - goto err; + return ERR_PTR(-ENOMEM); } - drm_gem_object_put(&obj->ttm.base); - - return to_intel_framebuffer(fb); + return &obj->ttm.base; +} -err: - return ERR_CAST(fb); +void intel_fbdev_fb_bo_destroy(struct drm_gem_object *obj) +{ + xe_bo_unpin_map_no_vm(gem_to_xe_bo(obj)); } -int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, +int intel_fbdev_fb_fill_info(struct drm_device *drm, struct fb_info *info, struct drm_gem_object *_obj, struct i915_vma *vma) { struct xe_bo *obj = gem_to_xe_bo(_obj); - struct pci_dev *pdev = to_pci_dev(display->drm->dev); + struct pci_dev *pdev = to_pci_dev(drm->dev); if (!(obj->flags & XE_BO_FLAG_SYSTEM)) { if (obj->flags & XE_BO_FLAG_STOLEN) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 68f064f33d4b..8b0afa270216 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -13,6 +13,8 @@ #include <drm/drm_drv.h> #include <drm/drm_managed.h> #include <drm/drm_probe_helper.h> +#include <drm/intel/display_member.h> +#include <drm/intel/display_parent_interface.h> #include <uapi/drm/xe_drm.h> #include "soc/intel_dram.h" @@ -20,6 +22,7 @@ #include "intel_audio.h" #include "intel_bw.h" #include "intel_display.h" +#include "intel_display_device.h" #include "intel_display_driver.h" #include "intel_display_irq.h" #include "intel_display_types.h" @@ -32,20 +35,21 @@ #include "intel_hotplug.h" #include "intel_opregion.h" #include "skl_watermark.h" +#include "xe_display_rpm.h" #include "xe_module.h" -/* Xe device functions */ +/* Ensure drm and display members are placed properly. */ +INTEL_DISPLAY_MEMBER_STATIC_ASSERT(struct xe_device, drm, display); -static bool has_display(struct xe_device *xe) -{ - return HAS_DISPLAY(&xe->display); -} +/* Xe device functions */ /** * xe_display_driver_probe_defer - Detect if we need to wait for other drivers * early on * @pdev: PCI device * + * Note: This is called before xe or display device creation. + * * Returns: true if probe needs to be deferred, false otherwise */ bool xe_display_driver_probe_defer(struct pci_dev *pdev) @@ -63,6 +67,8 @@ bool xe_display_driver_probe_defer(struct pci_dev *pdev) * Set features and function hooks in @driver that are needed for driving the * display IP. This sets the driver's capability of driving display, regardless * if the device has it enabled + * + * Note: This is called before xe or display device creation. */ void xe_display_driver_set_hooks(struct drm_driver *driver) { @@ -81,41 +87,15 @@ static void unset_display_features(struct xe_device *xe) xe->drm.driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); } -static void display_destroy(struct drm_device *dev, void *dummy) -{ - struct xe_device *xe = to_xe_device(dev); - - destroy_workqueue(xe->display.hotplug.dp_wq); -} - -/** - * xe_display_create - create display struct - * @xe: XE device instance - * - * Initialize all fields used by the display part. - * - * TODO: once everything can be inside a single struct, make the struct opaque - * to the rest of xe and return it to be xe->display. - * - * Returns: 0 on success - */ -int xe_display_create(struct xe_device *xe) -{ - spin_lock_init(&xe->display.fb_tracking.lock); - - xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0); - - return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); -} - static void xe_display_fini_early(void *arg) { struct xe_device *xe = arg; - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; + intel_hpd_cancel_work(display); intel_display_driver_remove_nogem(display); intel_display_driver_remove_noirq(display); intel_opregion_cleanup(display); @@ -124,7 +104,7 @@ static void xe_display_fini_early(void *arg) int xe_display_init_early(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; int err; if (!xe->info.probe_display) @@ -142,7 +122,9 @@ int xe_display_init_early(struct xe_device *xe) * Fill the dram structure to get the system dram info. This will be * used for memory latency calculation. */ - intel_dram_detect(xe); + err = intel_dram_detect(xe); + if (err) + goto err_opregion; intel_bw_init_hw(display); @@ -168,7 +150,7 @@ err_opregion: static void xe_display_fini(void *arg) { struct xe_device *xe = arg; - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; intel_hpd_poll_fini(display); intel_hdcp_component_fini(display); @@ -178,7 +160,7 @@ static void xe_display_fini(void *arg) int xe_display_init(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; int err; if (!xe->info.probe_display) @@ -193,7 +175,7 @@ int xe_display_init(struct xe_device *xe) void xe_display_register(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -204,7 +186,7 @@ void xe_display_register(struct xe_device *xe) void xe_display_unregister(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -217,7 +199,7 @@ void xe_display_unregister(struct xe_device *xe) void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -228,7 +210,7 @@ void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -239,7 +221,7 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) void xe_display_irq_reset(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -247,15 +229,14 @@ void xe_display_irq_reset(struct xe_device *xe) gen11_display_irq_reset(display); } -void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) +void xe_display_irq_postinstall(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; - if (gt->info.id == XE_GT0) - gen11_de_irq_postinstall(display); + gen11_de_irq_postinstall(display); } static bool suspend_to_idle(void) @@ -290,7 +271,7 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe) static void xe_display_enable_d3cold(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -307,27 +288,27 @@ static void xe_display_enable_d3cold(struct xe_device *xe) intel_dmc_suspend(display); - if (has_display(xe)) + if (intel_display_device_present(display)) intel_hpd_poll_enable(display); } static void xe_display_disable_d3cold(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; intel_dmc_resume(display); - if (has_display(xe)) + if (intel_display_device_present(display)) drm_mode_config_reset(&xe->drm); intel_display_driver_init_hw(display); intel_hpd_init(display); - if (has_display(xe)) + if (intel_display_device_present(display)) intel_hpd_poll_disable(display); intel_opregion_resume(display); @@ -337,7 +318,7 @@ static void xe_display_disable_d3cold(struct xe_device *xe) void xe_display_pm_suspend(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; bool s2idle = suspend_to_idle(); if (!xe->info.probe_display) @@ -348,9 +329,9 @@ void xe_display_pm_suspend(struct xe_device *xe) * properly. */ intel_power_domains_disable(display); - drm_client_dev_suspend(&xe->drm, false); + drm_client_dev_suspend(&xe->drm); - if (has_display(xe)) { + if (intel_display_device_present(display)) { drm_kms_helper_poll_disable(&xe->drm); intel_display_driver_disable_user_access(display); intel_display_driver_suspend(display); @@ -358,11 +339,13 @@ void xe_display_pm_suspend(struct xe_device *xe) xe_display_flush_cleanup_work(xe); + intel_encoder_block_all_hpds(display); + intel_hpd_cancel_work(display); - if (has_display(xe)) { + if (intel_display_device_present(display)) { intel_display_driver_suspend_access(display); - intel_encoder_suspend_all(&xe->display); + intel_encoder_suspend_all(display); } intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold); @@ -372,15 +355,15 @@ void xe_display_pm_suspend(struct xe_device *xe) void xe_display_pm_shutdown(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; intel_power_domains_disable(display); - drm_client_dev_suspend(&xe->drm, false); + drm_client_dev_suspend(&xe->drm); - if (has_display(xe)) { + if (intel_display_device_present(display)) { drm_kms_helper_poll_disable(&xe->drm); intel_display_driver_disable_user_access(display); intel_display_driver_suspend(display); @@ -388,9 +371,10 @@ void xe_display_pm_shutdown(struct xe_device *xe) xe_display_flush_cleanup_work(xe); intel_dp_mst_suspend(display); + intel_encoder_block_all_hpds(display); intel_hpd_cancel_work(display); - if (has_display(xe)) + if (intel_display_device_present(display)) intel_display_driver_suspend_access(display); intel_encoder_suspend_all(display); @@ -403,7 +387,7 @@ void xe_display_pm_shutdown(struct xe_device *xe) void xe_display_pm_runtime_suspend(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -418,7 +402,7 @@ void xe_display_pm_runtime_suspend(struct xe_device *xe) void xe_display_pm_suspend_late(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; bool s2idle = suspend_to_idle(); if (!xe->info.probe_display) @@ -429,7 +413,7 @@ void xe_display_pm_suspend_late(struct xe_device *xe) void xe_display_pm_runtime_suspend_late(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -447,7 +431,7 @@ void xe_display_pm_runtime_suspend_late(struct xe_device *xe) void xe_display_pm_shutdown_late(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -462,7 +446,7 @@ void xe_display_pm_shutdown_late(struct xe_device *xe) void xe_display_pm_resume_early(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -472,42 +456,44 @@ void xe_display_pm_resume_early(struct xe_device *xe) void xe_display_pm_resume(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; intel_dmc_resume(display); - if (has_display(xe)) + if (intel_display_device_present(display)) drm_mode_config_reset(&xe->drm); intel_display_driver_init_hw(display); - if (has_display(xe)) + if (intel_display_device_present(display)) intel_display_driver_resume_access(display); intel_hpd_init(display); - if (has_display(xe)) { + intel_encoder_unblock_all_hpds(display); + + if (intel_display_device_present(display)) { intel_display_driver_resume(display); drm_kms_helper_poll_enable(&xe->drm); intel_display_driver_enable_user_access(display); } - if (has_display(xe)) + if (intel_display_device_present(display)) intel_hpd_poll_disable(display); intel_opregion_resume(display); - drm_client_dev_resume(&xe->drm, false); + drm_client_dev_resume(&xe->drm); intel_power_domains_enable(display); } void xe_display_pm_runtime_resume(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -530,6 +516,21 @@ static void display_device_remove(struct drm_device *dev, void *arg) intel_display_device_remove(display); } +static const struct intel_display_parent_interface parent = { + .rpm = &xe_display_rpm_interface, +}; + +/** + * xe_display_probe - probe display and create display struct + * @xe: XE device instance + * + * Initialize all fields used by the display part. + * + * TODO: once everything can be inside a single struct, make the struct opaque + * to the rest of xe and return it to be xe->display. + * + * Returns: 0 on success + */ int xe_display_probe(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -539,13 +540,17 @@ int xe_display_probe(struct xe_device *xe) if (!xe->info.probe_display) goto no_display; - display = intel_display_device_probe(pdev); + display = intel_display_device_probe(pdev, &parent); + if (IS_ERR(display)) + return PTR_ERR(display); err = drmm_add_action_or_reset(&xe->drm, display_device_remove, display); if (err) return err; - if (has_display(xe)) + xe->display = display; + + if (intel_display_device_present(display)) return 0; no_display: diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h index 46e14f8dee28..76db95c25f7e 100644 --- a/drivers/gpu/drm/xe/display/xe_display.h +++ b/drivers/gpu/drm/xe/display/xe_display.h @@ -15,8 +15,6 @@ struct drm_driver; bool xe_display_driver_probe_defer(struct pci_dev *pdev); void xe_display_driver_set_hooks(struct drm_driver *driver); -int xe_display_create(struct xe_device *xe); - int xe_display_probe(struct xe_device *xe); int xe_display_init_early(struct xe_device *xe); @@ -28,7 +26,7 @@ void xe_display_unregister(struct xe_device *xe); void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl); void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir); void xe_display_irq_reset(struct xe_device *xe); -void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt); +void xe_display_irq_postinstall(struct xe_device *xe); void xe_display_pm_suspend(struct xe_device *xe); void xe_display_pm_shutdown(struct xe_device *xe); @@ -46,8 +44,6 @@ static inline int xe_display_driver_probe_defer(struct pci_dev *pdev) { return 0 static inline void xe_display_driver_set_hooks(struct drm_driver *driver) { } static inline void xe_display_driver_remove(struct xe_device *xe) {} -static inline int xe_display_create(struct xe_device *xe) { return 0; } - static inline int xe_display_probe(struct xe_device *xe) { return 0; } static inline int xe_display_init_early(struct xe_device *xe) { return 0; } @@ -59,7 +55,7 @@ static inline void xe_display_unregister(struct xe_device *xe) {} static inline void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) {} static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) {} static inline void xe_display_irq_reset(struct xe_device *xe) {} -static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {} +static inline void xe_display_irq_postinstall(struct xe_device *xe) {} static inline void xe_display_pm_suspend(struct xe_device *xe) {} static inline void xe_display_pm_shutdown(struct xe_device *xe) {} diff --git a/drivers/gpu/drm/xe/display/xe_display_rpm.c b/drivers/gpu/drm/xe/display/xe_display_rpm.c index 1955153aadba..340f65884812 100644 --- a/drivers/gpu/drm/xe/display/xe_display_rpm.c +++ b/drivers/gpu/drm/xe/display/xe_display_rpm.c @@ -1,71 +1,74 @@ // SPDX-License-Identifier: MIT /* Copyright © 2025 Intel Corporation */ +#include <drm/intel/display_parent_interface.h> + +#include "intel_display_core.h" #include "intel_display_rpm.h" +#include "xe_device.h" #include "xe_device_types.h" #include "xe_pm.h" -static struct xe_device *display_to_xe(struct intel_display *display) -{ - return container_of(display, struct xe_device, display); -} - -struct ref_tracker *intel_display_rpm_get_raw(struct intel_display *display) +static struct ref_tracker *xe_display_rpm_get(const struct drm_device *drm) { - return intel_display_rpm_get(display); + return xe_pm_runtime_resume_and_get(to_xe_device(drm)) ? INTEL_WAKEREF_DEF : NULL; } -void intel_display_rpm_put_raw(struct intel_display *display, struct ref_tracker *wakeref) +static struct ref_tracker *xe_display_rpm_get_if_in_use(const struct drm_device *drm) { - intel_display_rpm_put(display, wakeref); + return xe_pm_runtime_get_if_in_use(to_xe_device(drm)) ? INTEL_WAKEREF_DEF : NULL; } -struct ref_tracker *intel_display_rpm_get(struct intel_display *display) +static struct ref_tracker *xe_display_rpm_get_noresume(const struct drm_device *drm) { - return xe_pm_runtime_resume_and_get(display_to_xe(display)) ? INTEL_WAKEREF_DEF : NULL; -} - -struct ref_tracker *intel_display_rpm_get_if_in_use(struct intel_display *display) -{ - return xe_pm_runtime_get_if_in_use(display_to_xe(display)) ? INTEL_WAKEREF_DEF : NULL; -} - -struct ref_tracker *intel_display_rpm_get_noresume(struct intel_display *display) -{ - xe_pm_runtime_get_noresume(display_to_xe(display)); + xe_pm_runtime_get_noresume(to_xe_device(drm)); return INTEL_WAKEREF_DEF; } -void intel_display_rpm_put(struct intel_display *display, struct ref_tracker *wakeref) +static void xe_display_rpm_put(const struct drm_device *drm, struct ref_tracker *wakeref) { if (wakeref) - xe_pm_runtime_put(display_to_xe(display)); + xe_pm_runtime_put(to_xe_device(drm)); } -void intel_display_rpm_put_unchecked(struct intel_display *display) +static void xe_display_rpm_put_unchecked(const struct drm_device *drm) { - xe_pm_runtime_put(display_to_xe(display)); + xe_pm_runtime_put(to_xe_device(drm)); } -bool intel_display_rpm_suspended(struct intel_display *display) +static bool xe_display_rpm_suspended(const struct drm_device *drm) { - struct xe_device *xe = display_to_xe(display); + struct xe_device *xe = to_xe_device(drm); return pm_runtime_suspended(xe->drm.dev); } -void assert_display_rpm_held(struct intel_display *display) +static void xe_display_rpm_assert_held(const struct drm_device *drm) { /* FIXME */ } -void intel_display_rpm_assert_block(struct intel_display *display) +static void xe_display_rpm_assert_block(const struct drm_device *drm) { /* FIXME */ } -void intel_display_rpm_assert_unblock(struct intel_display *display) +static void xe_display_rpm_assert_unblock(const struct drm_device *drm) { /* FIXME */ } + +const struct intel_display_rpm_interface xe_display_rpm_interface = { + .get = xe_display_rpm_get, + .get_raw = xe_display_rpm_get, + .get_if_in_use = xe_display_rpm_get_if_in_use, + .get_noresume = xe_display_rpm_get_noresume, + .put = xe_display_rpm_put, + .put_raw = xe_display_rpm_put, + .put_unchecked = xe_display_rpm_put_unchecked, + .suspended = xe_display_rpm_suspended, + .assert_held = xe_display_rpm_assert_held, + .assert_block = xe_display_rpm_assert_block, + .assert_unblock = xe_display_rpm_assert_unblock +}; diff --git a/drivers/gpu/drm/xe/display/xe_display_rpm.h b/drivers/gpu/drm/xe/display/xe_display_rpm.h new file mode 100644 index 000000000000..0bf9d31e87c1 --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_display_rpm.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_DISPLAY_RPM_H_ +#define _XE_DISPLAY_RPM_H_ + +extern const struct intel_display_rpm_interface xe_display_rpm_interface; + +#endif /* _XE_DISPLAY_RPM_H_ */ diff --git a/drivers/gpu/drm/xe/display/xe_display_wa.c b/drivers/gpu/drm/xe/display/xe_display_wa.c index 2933ca97d673..2aa1b8c03411 100644 --- a/drivers/gpu/drm/xe/display/xe_display_wa.c +++ b/drivers/gpu/drm/xe/display/xe_display_wa.c @@ -3,8 +3,8 @@ * Copyright © 2024 Intel Corporation */ +#include "intel_display_core.h" #include "intel_display_wa.h" - #include "xe_device.h" #include "xe_wa.h" @@ -13,6 +13,7 @@ bool intel_display_needs_wa_16023588340(struct intel_display *display) { struct xe_device *xe = to_xe_device(display->drm); + struct xe_gt *wa_gt = xe_root_mmio_gt(xe); - return XE_WA(xe_root_mmio_gt(xe), 16023588340); + return wa_gt && XE_GT_WA(wa_gt, 16023588340); } diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index f95375451e2f..58581d7aaae6 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -17,10 +17,7 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); - xe_device_l2_flush(xe); } u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) @@ -30,12 +27,9 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); - xe_device_l2_flush(xe); } bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) @@ -49,11 +43,11 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d return false; /* Set scanout flag for WC mapping */ - obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), - NULL, PAGE_ALIGN(size), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT); + obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), + PAGE_ALIGN(size), + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | + XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT, false); if (IS_ERR(obj)) { kfree(vma); return false; @@ -74,9 +68,12 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + /* * The memory barrier here is to ensure coherency of DSB vs MMIO, * both for weak ordering archs and discrete cards. */ - xe_device_wmb(dsb_buf->vma->bo->tile->xe); + xe_device_wmb(xe); + xe_device_l2_flush(xe); } diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index d918ae1c8061..1fd4a815e784 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -6,6 +6,7 @@ #include <drm/ttm/ttm_bo.h> #include "i915_vma.h" +#include "intel_display_core.h" #include "intel_display_types.h" #include "intel_dpt.h" #include "intel_fb.h" @@ -15,6 +16,7 @@ #include "xe_device.h" #include "xe_ggtt.h" #include "xe_pm.h" +#include "xe_vram_types.h" static void write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ofs, @@ -23,6 +25,7 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ struct xe_device *xe = xe_bo_device(bo); struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; u32 column, row; + u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]); /* TODO: Maybe rewrite so we can traverse the bo addresses sequentially, * by writing dpt/ggtt in a different order? @@ -32,10 +35,9 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ u32 src_idx = src_stride * (height - 1) + column + bo_ofs; for (row = 0; row < height; row++) { - u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_NONE]); + u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE); - iosys_map_wr(map, *dpt_ofs, u64, pte); + iosys_map_wr(map, *dpt_ofs, u64, pte | addr); *dpt_ofs += 8; src_idx -= src_stride; } @@ -55,17 +57,15 @@ write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, { struct xe_device *xe = xe_bo_device(bo); struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; - u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index) - = ggtt->pt_ops->pte_encode_bo; u32 column, row; + u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]); for (row = 0; row < height; row++) { u32 src_idx = src_stride * row + bo_ofs; for (column = 0; column < width; column++) { - iosys_map_wr(map, *dpt_ofs, u64, - pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_NONE])); + u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE); + iosys_map_wr(map, *dpt_ofs, u64, pte | addr); *dpt_ofs += 8; src_idx++; @@ -102,40 +102,40 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, XE_PAGE_SIZE); if (IS_DGFX(xe)) - dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, - dpt_size, ~0ull, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM0 | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_PAGETABLE, - alignment); + dpt = xe_bo_create_pin_map_at_novm(xe, tile0, + dpt_size, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM0 | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE, + alignment, false); else - dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, - dpt_size, ~0ull, - ttm_bo_type_kernel, - XE_BO_FLAG_STOLEN | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_PAGETABLE, - alignment); + dpt = xe_bo_create_pin_map_at_novm(xe, tile0, + dpt_size, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE, + alignment, false); if (IS_ERR(dpt)) - dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, - dpt_size, ~0ull, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_PAGETABLE, - alignment); + dpt = xe_bo_create_pin_map_at_novm(xe, tile0, + dpt_size, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE, + alignment, false); if (IS_ERR(dpt)) return PTR_ERR(dpt); if (view->type == I915_GTT_VIEW_NORMAL) { + u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]); u32 x; for (x = 0; x < size / XE_PAGE_SIZE; x++) { - u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_NONE]); + u64 addr = xe_bo_addr(bo, x * XE_PAGE_SIZE, XE_PAGE_SIZE); - iosys_map_wr(&dpt->vmap, x * 8, u64, pte); + iosys_map_wr(&dpt->vmap, x * 8, u64, pte | addr); } } else if (view->type == I915_GTT_VIEW_REMAPPED) { const struct intel_remapped_info *remap_info = &view->remapped; @@ -164,6 +164,9 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, vma->dpt = dpt; vma->node = dpt->ggtt_node[tile0->id]; + + /* Ensure DPT writes are flushed */ + xe_device_l2_flush(xe); return 0; } @@ -173,15 +176,15 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo { struct xe_device *xe = xe_bo_device(bo); u32 column, row; + u64 pte = ggtt->pt_ops->pte_encode_flags(bo, xe->pat.idx[XE_CACHE_NONE]); for (column = 0; column < width; column++) { u32 src_idx = src_stride * (height - 1) + column + bo_ofs; for (row = 0; row < height; row++) { - u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_NONE]); + u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE); - ggtt->pt_ops->ggtt_set_pte(ggtt, *ggtt_ofs, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, *ggtt_ofs, pte | addr); *ggtt_ofs += XE_PAGE_SIZE; src_idx -= src_stride; } @@ -199,14 +202,15 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, struct drm_gem_object *obj = intel_fb_bo(&fb->base); struct xe_bo *bo = gem_to_xe_bo(obj); struct xe_device *xe = to_xe_device(fb->base.dev); - struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; + struct xe_tile *tile0 = xe_device_get_root_tile(xe); + struct xe_ggtt *ggtt = tile0->mem.ggtt; u32 align; int ret; /* TODO: Consider sharing framebuffer mapping? * embed i915_vma inside intel_framebuffer */ - xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); + xe_pm_runtime_get_noresume(xe); ret = mutex_lock_interruptible(&ggtt->lock); if (ret) goto out; @@ -215,29 +219,22 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) align = max_t(u32, align, SZ_64K); - if (bo->ggtt_node[ggtt->tile->id] && view->type == I915_GTT_VIEW_NORMAL) { - vma->node = bo->ggtt_node[ggtt->tile->id]; + if (bo->ggtt_node[tile0->id] && view->type == I915_GTT_VIEW_NORMAL) { + vma->node = bo->ggtt_node[tile0->id]; } else if (view->type == I915_GTT_VIEW_NORMAL) { - u32 x, size = bo->ttm.base.size; - vma->node = xe_ggtt_node_init(ggtt); if (IS_ERR(vma->node)) { ret = PTR_ERR(vma->node); goto out_unlock; } - ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); + ret = xe_ggtt_node_insert_locked(vma->node, xe_bo_size(bo), align, 0); if (ret) { xe_ggtt_node_fini(vma->node); goto out_unlock; } - for (x = 0; x < size; x += XE_PAGE_SIZE) { - u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, - xe->pat.idx[XE_CACHE_NONE]); - - ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node->base.start + x, pte); - } + xe_ggtt_map_bo(ggtt, vma->node, bo, xe->pat.idx[XE_CACHE_NONE]); } else { u32 i, ggtt_ofs; const struct intel_rotation_info *rot_info = &view->rotated; @@ -271,7 +268,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, out_unlock: mutex_unlock(&ggtt->lock); out: - xe_pm_runtime_put(tile_to_xe(ggtt->tile)); + xe_pm_runtime_put(xe); return ret; } @@ -284,7 +281,9 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); struct drm_gem_object *obj = intel_fb_bo(&fb->base); struct xe_bo *bo = gem_to_xe_bo(obj); - int ret; + struct xe_validation_ctx ctx; + struct drm_exec exec; + int ret = 0; if (!vma) return ERR_PTR(-ENODEV); @@ -293,7 +292,7 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) && intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 && !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS)) { - struct xe_tile *tile = xe_device_get_root_tile(xe); + struct xe_vram_region *vram = xe_device_get_root_tile(xe)->mem.vram; /* * If we need to able to access the clear-color value stored in @@ -301,7 +300,7 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, * accessible. This is important on small-bar systems where * only some subset of VRAM is CPU accessible. */ - if (tile->mem.vram.io_size < tile->mem.vram.usable_size) { + if (xe_vram_region_io_size(vram) < xe_vram_region_usable_size(vram)) { ret = -EINVAL; goto err; } @@ -311,17 +310,22 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, * Pin the framebuffer, we can't use xe_bo_(un)pin functions as the * assumptions are incorrect for framebuffers */ - ret = ttm_bo_reserve(&bo->ttm, false, false, NULL); - if (ret) - goto err; - - if (IS_DGFX(xe)) - ret = xe_bo_migrate(bo, XE_PL_VRAM0); - else - ret = xe_bo_validate(bo, NULL, true); - if (!ret) - ttm_bo_pin(&bo->ttm); - ttm_bo_unreserve(&bo->ttm); + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, + ret) { + ret = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + if (ret) + break; + + if (IS_DGFX(xe)) + ret = xe_bo_migrate(bo, XE_PL_VRAM0, NULL, &exec); + else + ret = xe_bo_validate(bo, NULL, true, &exec); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &ret); + if (!ret) + ttm_bo_pin(&bo->ttm); + } if (ret) goto err; @@ -333,8 +337,6 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (ret) goto err_unpin; - /* Ensure DPT writes are flushed */ - xe_device_l2_flush(xe); return vma; err_unpin: @@ -348,7 +350,7 @@ err: static void __xe_unpin_fb_vma(struct i915_vma *vma) { - u8 tile_id = vma->node->ggtt->tile->id; + u8 tile_id = xe_device_get_root_tile(xe_bo_device(vma->bo))->id; if (!refcount_dec_and_test(&vma->ref)) return; @@ -388,7 +390,9 @@ static bool reuse_vma(struct intel_plane_state *new_plane_state, const struct intel_plane_state *old_plane_state) { struct intel_framebuffer *fb = to_intel_framebuffer(new_plane_state->hw.fb); + struct intel_plane *plane = to_intel_plane(new_plane_state->uapi.plane); struct xe_device *xe = to_xe_device(fb->base.dev); + struct intel_display *display = xe->display; struct i915_vma *vma; if (old_plane_state->hw.fb == new_plane_state->hw.fb && @@ -399,8 +403,8 @@ static bool reuse_vma(struct intel_plane_state *new_plane_state, goto found; } - if (fb == intel_fbdev_framebuffer(xe->display.fbdev.fbdev)) { - vma = intel_fbdev_vma_pointer(xe->display.fbdev.fbdev); + if (fb == intel_fbdev_framebuffer(display->fbdev.fbdev)) { + vma = intel_fbdev_vma_pointer(display->fbdev.fbdev); if (vma) goto found; } @@ -410,6 +414,10 @@ static bool reuse_vma(struct intel_plane_state *new_plane_state, found: refcount_inc(&vma->ref); new_plane_state->ggtt_vma = vma; + + new_plane_state->surf = i915_ggtt_offset(new_plane_state->ggtt_vma) + + plane->surf_offset(new_plane_state); + return true; } @@ -436,6 +444,10 @@ int intel_plane_pin_fb(struct intel_plane_state *new_plane_state, return PTR_ERR(vma); new_plane_state->ggtt_vma = vma; + + new_plane_state->surf = i915_ggtt_offset(new_plane_state->ggtt_vma) + + plane->surf_offset(new_plane_state); + return 0; } @@ -463,3 +475,8 @@ u64 intel_dpt_offset(struct i915_vma *dpt_vma) { return 0; } + +void intel_fb_get_map(struct i915_vma *vma, struct iosys_map *map) +{ + *map = vma->bo->vmap; +} diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index b35a6f201d4a..4ae847b628e2 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -72,10 +72,10 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe, int ret = 0; /* allocate object of two page for HDCP command memory and store it */ - bo = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, PAGE_SIZE * 2, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT); + bo = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), PAGE_SIZE * 2, + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) { drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n"); @@ -85,7 +85,7 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe, cmd_in = xe_bo_ggtt_addr(bo); cmd_out = cmd_in + PAGE_SIZE; - xe_map_memset(xe, &bo->vmap, 0, 0, bo->size); + xe_map_memset(xe, &bo->vmap, 0, 0, xe_bo_size(bo)); gsc_context->hdcp_bo = bo; gsc_context->hdcp_cmd_in = cmd_in; diff --git a/drivers/gpu/drm/xe/display/xe_panic.c b/drivers/gpu/drm/xe/display/xe_panic.c new file mode 100644 index 000000000000..df663286092a --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_panic.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include <drm/drm_cache.h> +#include <drm/drm_panic.h> + +#include "intel_display_types.h" +#include "intel_fb.h" +#include "intel_panic.h" +#include "xe_bo.h" +#include "xe_res_cursor.h" + +struct intel_panic { + struct xe_res_cursor res; + struct iosys_map vmap; + + int page; +}; + +static void xe_panic_kunmap(struct intel_panic *panic) +{ + if (!panic->vmap.is_iomem && iosys_map_is_set(&panic->vmap)) { + drm_clflush_virt_range(panic->vmap.vaddr, PAGE_SIZE); + kunmap_local(panic->vmap.vaddr); + } + iosys_map_clear(&panic->vmap); + panic->page = -1; +} + +/* + * The scanout buffer pages are not mapped, so for each pixel, + * use kmap_local_page_try_from_panic() to map the page, and write the pixel. + * Try to keep the map from the previous pixel, to avoid too much map/unmap. + */ +static void xe_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int x, + unsigned int y, u32 color) +{ + struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; + struct intel_panic *panic = fb->panic; + struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base)); + unsigned int new_page; + unsigned int offset; + + if (fb->panic_tiling) + offset = fb->panic_tiling(sb->width, x, y); + else + offset = y * sb->pitch[0] + x * sb->format->cpp[0]; + + new_page = offset >> PAGE_SHIFT; + offset = offset % PAGE_SIZE; + if (new_page != panic->page) { + if (xe_bo_is_vram(bo)) { + /* Display is always mapped on root tile */ + struct xe_vram_region *vram = xe_bo_device(bo)->mem.vram; + + if (panic->page < 0 || new_page < panic->page) { + xe_res_first(bo->ttm.resource, new_page * PAGE_SIZE, + bo->ttm.base.size - new_page * PAGE_SIZE, &panic->res); + } else { + xe_res_next(&panic->res, PAGE_SIZE * (new_page - panic->page)); + } + iosys_map_set_vaddr_iomem(&panic->vmap, + vram->mapping + panic->res.start); + } else { + xe_panic_kunmap(panic); + iosys_map_set_vaddr(&panic->vmap, + ttm_bo_kmap_try_from_panic(&bo->ttm, + new_page)); + } + panic->page = new_page; + } + + if (iosys_map_is_set(&panic->vmap)) + iosys_map_wr(&panic->vmap, offset, u32, color); +} + +struct intel_panic *intel_panic_alloc(void) +{ + struct intel_panic *panic; + + panic = kzalloc(sizeof(*panic), GFP_KERNEL); + + return panic; +} + +int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb) +{ + struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; + struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base)); + + if (xe_bo_is_vram(bo) && !xe_bo_is_visible_vram(bo)) + return -ENODEV; + + panic->page = -1; + sb->set_pixel = xe_panic_page_set_pixel; + return 0; +} + +void intel_panic_finish(struct intel_panic *panic) +{ + xe_panic_kunmap(panic); +} diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index 6502b8274173..12d25c5290fd 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -10,19 +10,22 @@ #include "xe_ggtt.h" #include "xe_mmio.h" -#include "i915_reg.h" -#include "intel_atomic_plane.h" +#include "i915_vma.h" #include "intel_crtc.h" #include "intel_display.h" +#include "intel_display_core.h" +#include "intel_display_regs.h" #include "intel_display_types.h" #include "intel_fb.h" #include "intel_fb_pin.h" #include "intel_frontbuffer.h" +#include "intel_plane.h" #include "intel_plane_initial.h" #include "xe_bo.h" +#include "xe_vram_types.h" #include "xe_wa.h" -#include <generated/xe_wa_oob.h> +#include <generated/xe_device_wa_oob.h> void intel_plane_initial_vblank_wait(struct intel_crtc *crtc) { @@ -87,12 +90,8 @@ initial_plane_bo(struct xe_device *xe, base = round_down(plane_config->base, page_size); if (IS_DGFX(xe)) { - u64 __iomem *gte = tile0->mem.ggtt->gsm; - u64 pte; + u64 pte = xe_ggtt_read_pte(tile0->mem.ggtt, base); - gte += base / XE_PAGE_SIZE; - - pte = ioread64(gte); if (!(pte & XE_GGTT_PTE_DM)) { drm_err(&xe->drm, "Initial plane programming missing DM bit\n"); @@ -106,7 +105,7 @@ initial_plane_bo(struct xe_device *xe, * We don't currently expect this to ever be placed in the * stolen portion. */ - if (phys_base >= tile0->mem.vram.usable_size) { + if (phys_base >= xe_vram_region_usable_size(tile0->mem.vram)) { drm_err(&xe->drm, "Initial plane programming using invalid range, phys_base=%pa\n", &phys_base); @@ -124,7 +123,7 @@ initial_plane_bo(struct xe_device *xe, phys_base = base; flags |= XE_BO_FLAG_STOLEN; - if (XE_WA(xe_root_mmio_gt(xe), 22019338487_display)) + if (XE_DEVICE_WA(xe, 22019338487_display)) return NULL; /* @@ -141,8 +140,8 @@ initial_plane_bo(struct xe_device *xe, page_size); size -= base; - bo = xe_bo_create_pin_map_at(xe, tile0, NULL, size, phys_base, - ttm_bo_type_kernel, flags); + bo = xe_bo_create_pin_map_at_novm(xe, tile0, size, phys_base, + ttm_bo_type_kernel, flags, 0, false); if (IS_ERR(bo)) { drm_dbg(&xe->drm, "Failed to create bo phys_base=%pa size %u with flags %x: %li\n", @@ -187,7 +186,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; if (intel_framebuffer_init(to_intel_framebuffer(fb), - &bo->ttm.base, &mode_cmd)) { + &bo->ttm.base, fb->format, &mode_cmd)) { drm_dbg_kms(&xe->drm, "intel fb init failed\n"); goto err_bo; } @@ -237,6 +236,9 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc, goto nofb; plane_state->ggtt_vma = vma; + + plane_state->surf = i915_ggtt_offset(plane_state->ggtt_vma); + plane_state->uapi.src_x = 0; plane_state->uapi.src_y = 0; plane_state->uapi.src_w = fb->width << 16; diff --git a/drivers/gpu/drm/xe/display/xe_stolen.c b/drivers/gpu/drm/xe/display/xe_stolen.c new file mode 100644 index 000000000000..9f04ba36e930 --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_stolen.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include "gem/i915_gem_stolen.h" +#include "xe_res_cursor.h" +#include "xe_ttm_stolen_mgr.h" +#include "xe_validation.h" + +struct intel_stolen_node { + struct xe_device *xe; + struct xe_bo *bo; +}; + +int i915_gem_stolen_insert_node_in_range(struct intel_stolen_node *node, u64 size, + unsigned int align, u64 start, u64 end) +{ + struct xe_device *xe = node->xe; + + struct xe_bo *bo; + int err = 0; + u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN; + + if (start < SZ_4K) + start = SZ_4K; + + if (align) { + size = ALIGN(size, align); + start = ALIGN(start, align); + } + + bo = xe_bo_create_pin_range_novm(xe, xe_device_get_root_tile(xe), + size, start, end, ttm_bo_type_kernel, flags); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + bo = NULL; + return err; + } + + node->bo = bo; + + return err; +} + +int i915_gem_stolen_insert_node(struct intel_stolen_node *node, u64 size, unsigned int align) +{ + /* Not used on xe */ + WARN_ON(1); + + return -ENODEV; +} + +void i915_gem_stolen_remove_node(struct intel_stolen_node *node) +{ + xe_bo_unpin_map_no_vm(node->bo); + node->bo = NULL; +} + +bool i915_gem_stolen_initialized(struct drm_device *drm) +{ + struct xe_device *xe = to_xe_device(drm); + + return ttm_manager_type(&xe->ttm, XE_PL_STOLEN); +} + +bool i915_gem_stolen_node_allocated(const struct intel_stolen_node *node) +{ + return node->bo; +} + +u32 i915_gem_stolen_node_offset(struct intel_stolen_node *node) +{ + struct xe_res_cursor res; + + xe_res_first(node->bo->ttm.resource, 0, 4096, &res); + return res.start; +} + +/* Used for < gen4. These are not supported by Xe */ +u64 i915_gem_stolen_area_address(struct drm_device *drm) +{ + WARN_ON(1); + + return 0; +} + +/* Used for gen9 specific WA. Gen9 is not supported by Xe */ +u64 i915_gem_stolen_area_size(struct drm_device *drm) +{ + WARN_ON(1); + + return 0; +} + +u64 i915_gem_stolen_node_address(struct intel_stolen_node *node) +{ + struct xe_device *xe = node->xe; + + return xe_ttm_stolen_gpu_offset(xe) + i915_gem_stolen_node_offset(node); +} + +u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node) +{ + return node->bo->ttm.base.size; +} + +struct intel_stolen_node *i915_gem_stolen_node_alloc(struct drm_device *drm) +{ + struct xe_device *xe = to_xe_device(drm); + struct intel_stolen_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return NULL; + + node->xe = xe; + + return node; +} + +void i915_gem_stolen_node_free(const struct intel_stolen_node *node) +{ + kfree(node); +} diff --git a/drivers/gpu/drm/xe/display/xe_tdf.c b/drivers/gpu/drm/xe/display/xe_tdf.c index 2a7fccbeb1d5..78bda4c47874 100644 --- a/drivers/gpu/drm/xe/display/xe_tdf.c +++ b/drivers/gpu/drm/xe/display/xe_tdf.c @@ -3,9 +3,9 @@ * Copyright © 2024 Intel Corporation */ -#include "xe_device.h" -#include "intel_display_types.h" +#include "intel_display_core.h" #include "intel_tdf.h" +#include "xe_device.h" void intel_td_flush(struct intel_display *display) { diff --git a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h index 8cfcd3360896..5d41ca297447 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h @@ -31,6 +31,12 @@ #define XY_FAST_COPY_BLT_D1_DST_TILE4 REG_BIT(30) #define XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK GENMASK(23, 20) +#define MEM_COPY_CMD (2 << 29 | 0x5a << 22 | 0x8) +#define MEM_COPY_PAGE_COPY_MODE REG_BIT(19) +#define MEM_COPY_MATRIX_COPY REG_BIT(17) +#define MEM_COPY_SRC_MOCS_INDEX_MASK GENMASK(31, 28) +#define MEM_COPY_DST_MOCS_INDEX_MASK GENMASK(6, 3) + #define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22) #define PVC_MEM_SET_CMD_LEN_DW 7 #define PVC_MEM_SET_MATRIX REG_BIT(17) diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index e3f5e8bb3ebc..c47b290e0e9f 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -65,6 +65,7 @@ #define MI_LOAD_REGISTER_MEM (__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4)) #define MI_LRM_USE_GGTT REG_BIT(22) +#define MI_LRM_ASYNC REG_BIT(21) #define MI_LOAD_REGISTER_REG (__MI_INSTR(0x2a) | XE_INSTR_NUM_DW(3)) #define MI_LRR_DST_CS_MMIO REG_BIT(19) diff --git a/drivers/gpu/drm/xe/regs/xe_bars.h b/drivers/gpu/drm/xe/regs/xe_bars.h index ce05b6ae832f..880140d6ccdc 100644 --- a/drivers/gpu/drm/xe/regs/xe_bars.h +++ b/drivers/gpu/drm/xe/regs/xe_bars.h @@ -7,5 +7,6 @@ #define GTTMMADR_BAR 0 /* MMIO + GTT */ #define LMEM_BAR 2 /* VRAM */ +#define VF_LMEM_BAR 9 /* VF VRAM */ #endif diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 7ade41e2b7b3..68172b0248a6 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -111,6 +111,9 @@ #define PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS REG_BIT(14) #define CS_PRIORITY_MEM_READ REG_BIT(7) +#define CS_DEBUG_MODE2(base) XE_REG((base) + 0xd8, XE_REG_OPTION_MASKED) +#define INSTRUCTION_STATE_CACHE_INVALIDATE REG_BIT(6) + #define FF_SLICE_CS_CHICKEN1(base) XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED) #define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14) @@ -138,6 +141,8 @@ #define INHIBIT_SWITCH_UNTIL_PREEMPTED REG_BIT(31) #define IDLE_DELAY REG_GENMASK(20, 0) +#define RING_CURRENT_LRCA(base) XE_REG((base) + 0x240) + #define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED) #define CTX_CTRL_PXP_ENABLE REG_BIT(10) #define CTX_CTRL_OAC_CONTEXT_ENABLE REG_BIT(8) @@ -150,6 +155,8 @@ #define GFX_DISABLE_LEGACY_MODE REG_BIT(3) #define GFX_MSIX_INTERRUPT_ENABLE REG_BIT(13) +#define RING_CSMQDEBUG(base) XE_REG((base) + 0x2b0) + #define RING_TIMESTAMP(base) XE_REG((base) + 0x358) #define RING_TIMESTAMP_UDW(base) XE_REG((base) + 0x358 + 4) diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h index 7702364b65f1..180be82672ab 100644 --- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h @@ -13,9 +13,15 @@ /* Definitions of GSC H/W registers, bits, etc */ +#define BMG_GSC_HECI1_BASE 0x373000 + #define MTL_GSC_HECI1_BASE 0x00116000 #define MTL_GSC_HECI2_BASE 0x00117000 +#define DG1_GSC_HECI2_BASE 0x00259000 +#define PVC_GSC_HECI2_BASE 0x00285000 +#define DG2_GSC_HECI2_BASE 0x00374000 + #define HECI_H_CSR(base) XE_REG((base) + 0x4) #define HECI_H_CSR_IE REG_BIT(0) #define HECI_H_CSR_IS REG_BIT(1) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 5cd5ab8529c5..917a088c28f2 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -37,12 +37,18 @@ #define GMD_ID XE_REG(0xd8c) #define GMD_ID_ARCH_MASK REG_GENMASK(31, 22) #define GMD_ID_RELEASE_MASK REG_GENMASK(21, 14) +/* + * Spec defines these bits as "Reserved", but then make them assume some + * meaning that depends on the ARCH. To avoid any confusion, call them + * SUBIP_FLAG_MASK. + */ +#define GMD_ID_SUBIP_FLAG_MASK REG_GENMASK(13, 6) #define GMD_ID_REVID REG_GENMASK(5, 0) #define FORCEWAKE_ACK_GSC XE_REG(0xdf8) #define FORCEWAKE_ACK_GT_MTL XE_REG(0xdfc) -#define MCFG_MCR_SELECTOR XE_REG(0xfd0) +#define STEER_SEMAPHORE XE_REG(0xfd0) #define MTL_MCR_SELECTOR XE_REG(0xfd4) #define SF_MCR_SELECTOR XE_REG(0xfd8) #define MCR_SELECTOR XE_REG(0xfdc) @@ -95,7 +101,6 @@ #define XE2_LMEM_CFG XE_REG(0x48b0) -#define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4) #define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) #define XEHP_FLAT_CCS_PTR REG_GENMASK(31, 8) @@ -168,6 +173,7 @@ #define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) +#define FAST_CLEAR_VALIGN_FIX REG_BIT(13) #define XE2LPM_CCCHKNREG1 XE_REG(0x82a8) @@ -239,6 +245,9 @@ #define XE2_GT_GEOMETRY_DSS_1 XE_REG(0x9150) #define XE2_GT_GEOMETRY_DSS_2 XE_REG(0x9154) +#define SERVICE_COPY_ENABLE XE_REG(0x9170) +#define FUSE_SERVICE_COPY_ENABLE_MASK REG_GENMASK(7, 0) + #define GDRST XE_REG(0x941c) #define GRDOM_GUC REG_BIT(3) #define GRDOM_FULL REG_BIT(0) @@ -342,13 +351,10 @@ #define POWERGATE_ENABLE XE_REG(0xa210) #define RENDER_POWERGATE_ENABLE REG_BIT(0) #define MEDIA_POWERGATE_ENABLE REG_BIT(1) +#define MEDIA_SAMPLERS_POWERGATE_ENABLE REG_BIT(2) #define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n)) #define VDN_MFXVDENC_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n)) -#define CTC_MODE XE_REG(0xa26c) -#define CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1) -#define CTC_SOURCE_DIVIDE_LOGIC REG_BIT(0) - #define FORCEWAKE_RENDER XE_REG(0xa278) #define POWERGATE_DOMAIN_STATUS XE_REG(0xa2a0) @@ -522,6 +528,7 @@ #define TDL_CHICKEN XE_REG_MCR(0xe5f4, XE_REG_OPTION_MASKED) #define QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE REG_BIT(12) +#define EUSTALL_PERF_SAMPLING_DISABLE REG_BIT(5) #define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) #define DISABLE_D8_D16_COASLESCE REG_BIT(30) @@ -543,6 +550,9 @@ #define SARB_CHICKEN1 XE_REG_MCR(0xe90c) #define COMP_CKN_IN REG_GENMASK(30, 29) +#define MAIN_GAMCTRL_MODE XE_REG(0xef00) +#define MAIN_GAMCTRL_QUEUE_SELECT REG_BIT(0) + #define RCU_MODE XE_REG(0x14800, XE_REG_OPTION_MASKED) #define RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1) #define RCU_MODE_CCS_ENABLE REG_BIT(0) @@ -579,6 +589,7 @@ #define GT_GFX_RC6 XE_REG(0x138108) #define GT0_PERF_LIMIT_REASONS XE_REG(0x1381a8) +/* Common performance limit reason bits - available on all platforms */ #define GT0_PERF_LIMIT_REASONS_MASK 0xde3 #define PROCHOT_MASK REG_BIT(0) #define THERMAL_LIMIT_MASK REG_BIT(1) @@ -588,6 +599,18 @@ #define POWER_LIMIT_4_MASK REG_BIT(8) #define POWER_LIMIT_1_MASK REG_BIT(10) #define POWER_LIMIT_2_MASK REG_BIT(11) +/* Platform-specific performance limit reason bits - for Crescent Island */ +#define CRI_PERF_LIMIT_REASONS_MASK 0xfdff +#define SOC_THERMAL_LIMIT_MASK REG_BIT(1) +#define MEM_THERMAL_MASK REG_BIT(2) +#define VR_THERMAL_MASK REG_BIT(3) +#define ICCMAX_MASK REG_BIT(4) +#define SOC_AVG_THERMAL_MASK REG_BIT(6) +#define FASTVMODE_MASK REG_BIT(7) +#define PSYS_PL1_MASK REG_BIT(12) +#define PSYS_PL2_MASK REG_BIT(13) +#define P0_FREQ_MASK REG_BIT(14) +#define PSYS_CRIT_MASK REG_BIT(15) #define GT_PERF_STATUS XE_REG(0x1381b4) #define VOLTAGE_MASK REG_GENMASK(10, 0) diff --git a/drivers/gpu/drm/xe/regs/xe_hw_error_regs.h b/drivers/gpu/drm/xe/regs/xe_hw_error_regs.h new file mode 100644 index 000000000000..c146b9ef44eb --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_hw_error_regs.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_HW_ERROR_REGS_H_ +#define _XE_HW_ERROR_REGS_H_ + +#define HEC_UNCORR_ERR_STATUS(base) XE_REG((base) + 0x118) +#define UNCORR_FW_REPORTED_ERR BIT(6) + +#define HEC_UNCORR_FW_ERR_DW0(base) XE_REG((base) + 0x124) + +#define DEV_ERR_STAT_NONFATAL 0x100178 +#define DEV_ERR_STAT_CORRECTABLE 0x10017c +#define DEV_ERR_STAT_REG(x) XE_REG(_PICK_EVEN((x), \ + DEV_ERR_STAT_CORRECTABLE, \ + DEV_ERR_STAT_NONFATAL)) +#define XE_CSC_ERROR BIT(17) +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_i2c_regs.h b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h new file mode 100644 index 000000000000..f2e455e2bfe4 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _XE_I2C_REGS_H_ +#define _XE_I2C_REGS_H_ + +#include <linux/pci_regs.h> + +#include "xe_reg_defs.h" +#include "xe_regs.h" + +#define I2C_BRIDGE_OFFSET (SOC_BASE + 0xd9000) +#define I2C_CONFIG_SPACE_OFFSET (SOC_BASE + 0xf6000) +#define I2C_MEM_SPACE_OFFSET (SOC_BASE + 0xf7400) + +#define REG_SG_REMAP_ADDR_PREFIX XE_REG(SOC_BASE + 0x0164) +#define REG_SG_REMAP_ADDR_POSTFIX XE_REG(SOC_BASE + 0x0168) + +#define I2C_BRIDGE_PCICFGCTL XE_REG(I2C_BRIDGE_OFFSET + 0x200) +#define ACPI_INTR_EN REG_BIT(1) + +#define I2C_CONFIG_CMD XE_REG(I2C_CONFIG_SPACE_OFFSET + PCI_COMMAND) +#define I2C_CONFIG_PMCSR XE_REG(I2C_CONFIG_SPACE_OFFSET + 0x84) + +#endif /* _XE_I2C_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h index f0ecfcac4003..2f97662d958d 100644 --- a/drivers/gpu/drm/xe/regs/xe_irq_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h @@ -18,7 +18,9 @@ #define GFX_MSTR_IRQ XE_REG(0x190010, XE_REG_OPTION_VF) #define MASTER_IRQ REG_BIT(31) #define GU_MISC_IRQ REG_BIT(29) +#define ERROR_IRQ(x) REG_BIT(26 + (x)) #define DISPLAY_IRQ REG_BIT(16) +#define I2C_IRQ REG_BIT(12) #define GT_DW_IRQ(x) REG_BIT(x) /* @@ -63,7 +65,10 @@ #define BCS_RSVD_INTR_MASK XE_REG(0x1900a0, XE_REG_OPTION_VF) #define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8, XE_REG_OPTION_VF) #define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac, XE_REG_OPTION_VF) +#define VCS4_VCS5_INTR_MASK XE_REG(0x1900b0, XE_REG_OPTION_VF) +#define VCS6_VCS7_INTR_MASK XE_REG(0x1900b4, XE_REG_OPTION_VF) #define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0, XE_REG_OPTION_VF) +#define VECS2_VECS3_INTR_MASK XE_REG(0x1900d4, XE_REG_OPTION_VF) #define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4) #define GUC_SG_INTR_MASK XE_REG(0x1900e8, XE_REG_OPTION_VF) #define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec, XE_REG_OPTION_VF) @@ -78,9 +83,10 @@ #define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) #define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8) #define GSC_ER_COMPLETE REG_BIT(5) -#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4) +#define GT_FLUSH_COMPLETE_INTERRUPT REG_BIT(4) #define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) -#define GT_RENDER_USER_INTERRUPT REG_BIT(0) +#define GT_COMPUTE_WALKER_INTERRUPT REG_BIT(2) +#define GT_MI_USER_INTERRUPT REG_BIT(0) /* irqs for OTHER_KCR_INSTANCE */ #define KCR_PXP_STATE_TERMINATED_INTERRUPT REG_BIT(1) diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 994af591a2e8..b5eff383902c 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -12,9 +12,13 @@ #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) #define CTX_BB_PER_CTX_PTR (0x12 + 1) +#define CTX_CS_INDIRECT_CTX (0x14 + 1) +#define CTX_CS_INDIRECT_CTX_OFFSET (0x16 + 1) #define CTX_TIMESTAMP (0x22 + 1) #define CTX_TIMESTAMP_UDW (0x24 + 1) #define CTX_INDIRECT_RING_STATE (0x26 + 1) +#define CTX_ACC_CTR_THOLD (0x2a + 1) +#define CTX_ASID (0x2e + 1) #define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_LDW (0x32 + 1) diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h index f5e5234857c1..ef2bf984723f 100644 --- a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h @@ -38,10 +38,11 @@ #define TEMP_MASK REG_GENMASK(7, 0) #define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0) -#define PKG_PWR_LIM_1 REG_GENMASK(14, 0) -#define PKG_PWR_LIM_1_EN REG_BIT(15) -#define PKG_PWR_LIM_1_TIME REG_GENMASK(23, 17) -#define PKG_PWR_LIM_1_TIME_X REG_GENMASK(23, 22) -#define PKG_PWR_LIM_1_TIME_Y REG_GENMASK(21, 17) +#define PWR_LIM_VAL REG_GENMASK(14, 0) +#define PWR_LIM_EN REG_BIT(15) +#define PWR_LIM REG_GENMASK(15, 0) +#define PWR_LIM_TIME REG_GENMASK(23, 17) +#define PWR_LIM_TIME_X REG_GENMASK(23, 22) +#define PWR_LIM_TIME_Y REG_GENMASK(21, 17) #endif /* _XE_MCHBAR_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index a79ad2da070c..e693a50706f8 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -97,4 +97,7 @@ #define OAM_STATUS(base) XE_REG((base) + OAM_STATUS_OFFSET) #define OAM_MMIO_TRG(base) XE_REG((base) + OAM_MMIO_TRG_OFFSET) +#define OAM_COMPRESSION_T3_CONTROL XE_REG(0x1c2e00) +#define OAM_LAT_MEASURE_ENABLE REG_BIT(4) + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h index c7d5d782e3f9..fb097607b86c 100644 --- a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h @@ -18,16 +18,10 @@ #define PVC_GT0_PLATFORM_ENERGY_STATUS XE_REG(0x28106c) #define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080) -#define BMG_PACKAGE_POWER_SKU XE_REG(0x138098) -#define BMG_PACKAGE_POWER_SKU_UNIT XE_REG(0x1380dc) -#define BMG_PACKAGE_ENERGY_STATUS XE_REG(0x138120) #define BMG_FAN_1_SPEED XE_REG(0x138140) #define BMG_FAN_2_SPEED XE_REG(0x138170) #define BMG_FAN_3_SPEED XE_REG(0x1381a0) #define BMG_VRAM_TEMPERATURE XE_REG(0x1382c0) #define BMG_PACKAGE_TEMPERATURE XE_REG(0x138434) -#define BMG_PACKAGE_RAPL_LIMIT XE_REG(0x138440) -#define BMG_PLATFORM_ENERGY_STATUS XE_REG(0x138458) -#define BMG_PLATFORM_POWER_LIMIT XE_REG(0x138460) #endif /* _XE_PCODE_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h index f45abcd96ba8..0f79c0714454 100644 --- a/drivers/gpu/drm/xe/regs/xe_pmt.h +++ b/drivers/gpu/drm/xe/regs/xe_pmt.h @@ -5,15 +5,31 @@ #ifndef _XE_PMT_H_ #define _XE_PMT_H_ -#define SOC_BASE 0x280000 +#include "xe_regs.h" #define BMG_PMT_BASE_OFFSET 0xDB000 #define BMG_DISCOVERY_OFFSET (SOC_BASE + BMG_PMT_BASE_OFFSET) +#define PUNIT_TELEMETRY_GUID XE_REG(BMG_DISCOVERY_OFFSET + 0x4) +#define BMG_ENERGY_STATUS_PMT_OFFSET (0x30) +#define ENERGY_PKG REG_GENMASK64(31, 0) +#define ENERGY_CARD REG_GENMASK64(63, 32) + #define BMG_TELEMETRY_BASE_OFFSET 0xE0000 #define BMG_TELEMETRY_OFFSET (SOC_BASE + BMG_TELEMETRY_BASE_OFFSET) #define SG_REMAP_INDEX1 XE_REG(SOC_BASE + 0x08) #define SG_REMAP_BITS REG_GENMASK(31, 24) +#define BMG_MODS_RESIDENCY_OFFSET (0x4D0) +#define BMG_G2_RESIDENCY_OFFSET (0x530) +#define BMG_G6_RESIDENCY_OFFSET (0x538) +#define BMG_G7_RESIDENCY_OFFSET (0x4B0) +#define BMG_G8_RESIDENCY_OFFSET (0x540) +#define BMG_G10_RESIDENCY_OFFSET (0x548) + +#define BMG_PCIE_LINK_L0_RESIDENCY_OFFSET (0x570) +#define BMG_PCIE_LINK_L1_RESIDENCY_OFFSET (0x578) +#define BMG_PCIE_LINK_L1_2_RESIDENCY_OFFSET (0x580) + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 3abb17d2ca33..ad93c57edd17 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -7,6 +7,8 @@ #include "regs/xe_reg_defs.h" +#define SOC_BASE 0x280000 + #define GU_CNTL_PROTECTED XE_REG(0x10100C) #define DRIVERINT_FLR_DIS REG_BIT(31) @@ -38,6 +40,8 @@ #define STOLEN_RESERVED XE_REG(0x1082c0) #define WOPCM_SIZE_MASK REG_GENMASK64(9, 7) +#define SG_TILE_ADDR_RANGE(_idx) XE_REG(0x1083a0 + (_idx) * 4) + #define MTL_RP_STATE_CAP XE_REG(0x138000) #define MTL_GT_RPA_FREQUENCY XE_REG(0x138008) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 378dcd0fb414..2294cf89f3e1 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -23,7 +23,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, bool clear, u64 get_val, u64 assign_val, - struct kunit *test) + struct kunit *test, struct drm_exec *exec) { struct dma_fence *fence; struct ttm_tt *ttm; @@ -35,7 +35,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, u32 offset; /* Move bo to VRAM if not already there. */ - ret = xe_bo_validate(bo, NULL, false); + ret = xe_bo_validate(bo, NULL, false, exec); if (ret) { KUNIT_FAIL(test, "Failed to validate bo.\n"); return ret; @@ -60,7 +60,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, } /* Evict to system. CCS data should be copied. */ - ret = xe_bo_evict(bo); + ret = xe_bo_evict(bo, exec); if (ret) { KUNIT_FAIL(test, "Failed to evict bo.\n"); return ret; @@ -106,7 +106,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, } /* Check last CCS value, or at least last value in page. */ - offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size); + offset = xe_device_ccs_bytes(tile_to_xe(tile), xe_bo_size(bo)); offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; if (cpu_map[offset] != get_val) { KUNIT_FAIL(test, @@ -132,14 +132,15 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, /* TODO: Sanity check */ unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; if (IS_DGFX(xe)) kunit_info(test, "Testing vram id %u\n", tile->id); else kunit_info(test, "Testing system memory\n"); - bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, - bo_flags); + bo = xe_bo_create_user(xe, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, + bo_flags, exec); if (IS_ERR(bo)) { KUNIT_FAIL(test, "Failed to create bo.\n"); return; @@ -149,18 +150,18 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, - test); + test, exec); if (ret) goto out_unlock; kunit_info(test, "Verifying that CCS data survives migration.\n"); ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL, - 0xdeadbeefdeadbeefULL, test); + 0xdeadbeefdeadbeefULL, test, exec); if (ret) goto out_unlock; kunit_info(test, "Verifying that CCS data can be properly cleared.\n"); - ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test); + ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test, exec); out_unlock: xe_bo_unlock(bo); @@ -210,6 +211,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc struct xe_bo *bo, *external; unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; struct xe_gt *__gt; int err, i, id; @@ -218,25 +220,25 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc for (i = 0; i < 2; ++i) { xe_vm_lock(vm, false); - bo = xe_bo_create_user(xe, NULL, vm, 0x10000, + bo = xe_bo_create_user(xe, vm, 0x10000, DRM_XE_GEM_CPU_CACHING_WC, - bo_flags); + bo_flags, exec); xe_vm_unlock(vm); if (IS_ERR(bo)) { KUNIT_FAIL(test, "bo create err=%pe\n", bo); break; } - external = xe_bo_create_user(xe, NULL, NULL, 0x10000, + external = xe_bo_create_user(xe, NULL, 0x10000, DRM_XE_GEM_CPU_CACHING_WC, - bo_flags); + bo_flags, NULL); if (IS_ERR(external)) { KUNIT_FAIL(test, "external bo create err=%pe\n", external); goto cleanup_bo; } xe_bo_lock(external, false); - err = xe_bo_pin_external(external); + err = xe_bo_pin_external(external, false, exec); xe_bo_unlock(external); if (err) { KUNIT_FAIL(test, "external bo pin err=%pe\n", @@ -294,7 +296,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc if (i) { down_read(&vm->lock); xe_vm_lock(vm, false); - err = xe_bo_validate(bo, bo->vm, false); + err = xe_bo_validate(bo, bo->vm, false, exec); xe_vm_unlock(vm); up_read(&vm->lock); if (err) { @@ -303,7 +305,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc goto cleanup_all; } xe_bo_lock(external, false); - err = xe_bo_validate(external, NULL, false); + err = xe_bo_validate(external, NULL, false, exec); xe_bo_unlock(external); if (err) { KUNIT_FAIL(test, "external bo valid err=%pe\n", @@ -495,9 +497,9 @@ static int shrink_test_run_device(struct xe_device *xe) INIT_LIST_HEAD(&link->link); /* We can create bos using WC caching here. But it is slower. */ - bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE, + bo = xe_bo_create_user(xe, NULL, XE_BO_SHRINK_SIZE, DRM_XE_GEM_CPU_CACHING_WB, - XE_BO_FLAG_SYSTEM); + XE_BO_FLAG_SYSTEM, NULL); if (IS_ERR(bo)) { if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) && bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS)) @@ -514,9 +516,9 @@ static int shrink_test_run_device(struct xe_device *xe) * other way around, they may not be subject to swapping... */ if (alloced < purgeable) { - xe_ttm_tt_account_subtract(&xe_tt->ttm); + xe_ttm_tt_account_subtract(xe, &xe_tt->ttm); xe_tt->purgeable = true; - xe_ttm_tt_account_add(&xe_tt->ttm); + xe_ttm_tt_account_add(xe, &xe_tt->ttm); bo->ttm.priority = 0; spin_lock(&bo->ttm.bdev->lru_lock); ttm_bo_move_to_lru_tail(&bo->ttm); diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index c53f67ce4b0a..5df98de5ba3c 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -27,9 +27,11 @@ static bool is_dynamic(struct dma_buf_test_params *params) } static void check_residency(struct kunit *test, struct xe_bo *exported, - struct xe_bo *imported, struct dma_buf *dmabuf) + struct xe_bo *imported, struct dma_buf *dmabuf, + struct drm_exec *exec) { struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv); + struct dma_buf_attachment *attach; u32 mem_type; int ret; @@ -45,7 +47,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, mem_type = XE_PL_TT; else if (params->force_different_devices && !is_dynamic(params) && (params->mem_mask & XE_BO_FLAG_SYSTEM)) - /* Pin migrated to TT */ + /* Pin migrated to TT on non-dynamic attachments. */ mem_type = XE_PL_TT; if (!xe_bo_is_mem_type(exported, mem_type)) { @@ -57,16 +59,12 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, return; /* - * Evict exporter. Note that the gem object dma_buf member isn't - * set from xe_gem_prime_export(), and it's needed for the move_notify() - * functionality, so hack that up here. Evicting the exported bo will + * Evict exporter. Evicting the exported bo will * evict also the imported bo through the move_notify() functionality if * importer is on a different device. If they're on the same device, * the exporter and the importer should be the same bo. */ - swap(exported->ttm.base.dma_buf, dmabuf); - ret = xe_bo_evict(exported); - swap(exported->ttm.base.dma_buf, dmabuf); + ret = xe_bo_evict(exported, exec); if (ret) { if (ret != -EINTR && ret != -ERESTARTSYS) KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n", @@ -81,7 +79,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, } /* Re-validate the importer. This should move also exporter in. */ - ret = xe_bo_validate(imported, NULL, false); + ret = xe_bo_validate(imported, NULL, false, exec); if (ret) { if (ret != -EINTR && ret != -ERESTARTSYS) KUNIT_FAIL(test, "Validating importer failed with err=%d.\n", @@ -89,15 +87,19 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, return; } - /* - * If on different devices, the exporter is kept in system if - * possible, saving a migration step as the transfer is just - * likely as fast from system memory. - */ - if (params->mem_mask & XE_BO_FLAG_SYSTEM) - KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT)); - else - KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); + + /* Check that we can pin without migrating. */ + attach = list_first_entry_or_null(&dmabuf->attachments, typeof(*attach), node); + if (attach) { + int err = dma_buf_pin(attach); + + if (!err) { + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); + dma_buf_unpin(attach); + } + KUNIT_EXPECT_EQ(test, err, 0); + } if (params->force_different_devices) KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT)); @@ -125,8 +127,8 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) size = SZ_64K; kunit_info(test, "running %s\n", __func__); - bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, - params->mem_mask); + bo = xe_bo_create_user(xe, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, + params->mem_mask, NULL); if (IS_ERR(bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(bo)); @@ -139,6 +141,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) PTR_ERR(dmabuf)); goto out; } + bo->ttm.base.dma_buf = dmabuf; import = xe_gem_prime_import(&xe->drm, dmabuf); if (!IS_ERR(import)) { @@ -153,13 +156,14 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) KUNIT_FAIL(test, "xe_gem_prime_import() succeeded when it shouldn't have\n"); } else { + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; int err; /* Is everything where we expect it to be? */ xe_bo_lock(import_bo, false); - err = xe_bo_validate(import_bo, NULL, false); + err = xe_bo_validate(import_bo, NULL, false, exec); - /* Pinning in VRAM is not allowed. */ + /* Pinning in VRAM is not allowed for non-dynamic attachments */ if (!is_dynamic(params) && params->force_different_devices && !(params->mem_mask & XE_BO_FLAG_SYSTEM)) @@ -170,7 +174,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) err == -ERESTARTSYS); if (!err) - check_residency(test, bo, import_bo, dmabuf); + check_residency(test, bo, import_bo, dmabuf, exec); xe_bo_unlock(import_bo); } drm_gem_object_put(import); @@ -186,6 +190,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n", PTR_ERR(import)); } + bo->ttm.base.dma_buf = NULL; dma_buf_put(dmabuf); out: drm_gem_object_put(&bo->ttm.base); @@ -206,7 +211,7 @@ static const struct dma_buf_attach_ops nop2p_attach_ops = { static const struct dma_buf_test_params test_params[] = { {.mem_mask = XE_BO_FLAG_VRAM0, .attach_ops = &xe_dma_buf_attach_ops}, - {.mem_mask = XE_BO_FLAG_VRAM0, + {.mem_mask = XE_BO_FLAG_VRAM0 | XE_BO_FLAG_NEEDS_CPU_ACCESS, .attach_ops = &xe_dma_buf_attach_ops, .force_different_devices = true}, @@ -238,7 +243,8 @@ static const struct dma_buf_test_params test_params[] = { {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0, .attach_ops = &xe_dma_buf_attach_ops}, - {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0, + {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0 | + XE_BO_FLAG_NEEDS_CPU_ACCESS, .attach_ops = &xe_dma_buf_attach_ops, .force_different_devices = true}, diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c new file mode 100644 index 000000000000..42bfc4bcfbcf --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <kunit/static_stub.h> +#include <kunit/test.h> +#include <kunit/test-bug.h> + +#include "xe_kunit_helpers.h" +#include "xe_pci_test.h" + +#define TEST_MAX_VFS 63 + +static void pf_set_admin_mode(struct xe_device *xe, bool enable) +{ + /* should match logic of xe_sriov_pf_admin_only() */ + xe->info.probe_display = !enable; + KUNIT_EXPECT_EQ(kunit_get_current_test(), enable, xe_sriov_pf_admin_only(xe)); +} + +static const void *num_vfs_gen_param(struct kunit *test, const void *prev, char *desc) +{ + unsigned long next = 1 + (unsigned long)prev; + + if (next > TEST_MAX_VFS) + return NULL; + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%lu VF%s", + next, str_plural(next)); + return (void *)next; +} + +static int pf_gt_config_test_init(struct kunit *test) +{ + struct xe_pci_fake_data fake = { + .sriov_mode = XE_SRIOV_MODE_PF, + .platform = XE_TIGERLAKE, /* any random platform with SR-IOV */ + .subplatform = XE_SUBPLATFORM_NONE, + }; + struct xe_device *xe; + struct xe_gt *gt; + + test->priv = &fake; + xe_kunit_helper_xe_device_test_init(test); + + xe = test->priv; + KUNIT_ASSERT_TRUE(test, IS_SRIOV_PF(xe)); + + gt = xe_root_mmio_gt(xe); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt); + test->priv = gt; + + /* pretend it can support up to 63 VFs */ + xe->sriov.pf.device_total_vfs = TEST_MAX_VFS; + xe->sriov.pf.driver_max_vfs = TEST_MAX_VFS; + KUNIT_ASSERT_EQ(test, xe_sriov_pf_get_totalvfs(xe), 63); + + pf_set_admin_mode(xe, false); + KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); + + /* more sanity checks */ + KUNIT_EXPECT_EQ(test, GUC_ID_MAX + 1, SZ_64K); + KUNIT_EXPECT_EQ(test, GUC_NUM_DOORBELLS, SZ_256); + + return 0; +} + +static void fair_contexts_1vf(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + + pf_set_admin_mode(xe, false); + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); + KUNIT_EXPECT_EQ(test, SZ_32K, pf_profile_fair_ctxs(gt, 1)); + + pf_set_admin_mode(xe, true); + KUNIT_ASSERT_TRUE(test, xe_sriov_pf_admin_only(xe)); + KUNIT_EXPECT_EQ(test, SZ_64K - SZ_1K, pf_profile_fair_ctxs(gt, 1)); +} + +static void fair_contexts(struct kunit *test) +{ + unsigned int num_vfs = (unsigned long)test->param_value; + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + + pf_set_admin_mode(xe, false); + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); + + KUNIT_EXPECT_TRUE(test, is_power_of_2(pf_profile_fair_ctxs(gt, num_vfs))); + KUNIT_EXPECT_GT(test, GUC_ID_MAX, num_vfs * pf_profile_fair_ctxs(gt, num_vfs)); + + if (num_vfs > 31) + KUNIT_ASSERT_EQ(test, SZ_1K, pf_profile_fair_ctxs(gt, num_vfs)); + else if (num_vfs > 15) + KUNIT_ASSERT_EQ(test, SZ_2K, pf_profile_fair_ctxs(gt, num_vfs)); + else if (num_vfs > 7) + KUNIT_ASSERT_EQ(test, SZ_4K, pf_profile_fair_ctxs(gt, num_vfs)); + else if (num_vfs > 3) + KUNIT_ASSERT_EQ(test, SZ_8K, pf_profile_fair_ctxs(gt, num_vfs)); + else if (num_vfs > 1) + KUNIT_ASSERT_EQ(test, SZ_16K, pf_profile_fair_ctxs(gt, num_vfs)); + else + KUNIT_ASSERT_EQ(test, SZ_32K, pf_profile_fair_ctxs(gt, num_vfs)); +} + +static void fair_doorbells_1vf(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + + pf_set_admin_mode(xe, false); + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); + KUNIT_EXPECT_EQ(test, 128, pf_profile_fair_dbs(gt, 1)); + + pf_set_admin_mode(xe, true); + KUNIT_ASSERT_TRUE(test, xe_sriov_pf_admin_only(xe)); + KUNIT_EXPECT_EQ(test, 240, pf_profile_fair_dbs(gt, 1)); +} + +static void fair_doorbells(struct kunit *test) +{ + unsigned int num_vfs = (unsigned long)test->param_value; + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + + pf_set_admin_mode(xe, false); + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); + + KUNIT_EXPECT_TRUE(test, is_power_of_2(pf_profile_fair_dbs(gt, num_vfs))); + KUNIT_EXPECT_GE(test, GUC_NUM_DOORBELLS, (num_vfs + 1) * pf_profile_fair_dbs(gt, num_vfs)); + + if (num_vfs > 31) + KUNIT_ASSERT_EQ(test, SZ_4, pf_profile_fair_dbs(gt, num_vfs)); + else if (num_vfs > 15) + KUNIT_ASSERT_EQ(test, SZ_8, pf_profile_fair_dbs(gt, num_vfs)); + else if (num_vfs > 7) + KUNIT_ASSERT_EQ(test, SZ_16, pf_profile_fair_dbs(gt, num_vfs)); + else if (num_vfs > 3) + KUNIT_ASSERT_EQ(test, SZ_32, pf_profile_fair_dbs(gt, num_vfs)); + else if (num_vfs > 1) + KUNIT_ASSERT_EQ(test, SZ_64, pf_profile_fair_dbs(gt, num_vfs)); + else + KUNIT_ASSERT_EQ(test, SZ_128, pf_profile_fair_dbs(gt, num_vfs)); +} + +static void fair_ggtt_1vf(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + + pf_set_admin_mode(xe, false); + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); + KUNIT_EXPECT_EQ(test, SZ_2G, pf_profile_fair_ggtt(gt, 1)); + + pf_set_admin_mode(xe, true); + KUNIT_ASSERT_TRUE(test, xe_sriov_pf_admin_only(xe)); + KUNIT_EXPECT_EQ(test, SZ_2G + SZ_1G + SZ_512M, pf_profile_fair_ggtt(gt, 1)); +} + +static void fair_ggtt(struct kunit *test) +{ + unsigned int num_vfs = (unsigned long)test->param_value; + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + u64 alignment = pf_get_ggtt_alignment(gt); + u64 shareable = SZ_2G + SZ_1G + SZ_512M; + + pf_set_admin_mode(xe, false); + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); + + KUNIT_EXPECT_TRUE(test, IS_ALIGNED(pf_profile_fair_ggtt(gt, num_vfs), alignment)); + KUNIT_EXPECT_GE(test, shareable, num_vfs * pf_profile_fair_ggtt(gt, num_vfs)); + + if (num_vfs > 56) + KUNIT_ASSERT_EQ(test, SZ_64M - SZ_8M, pf_profile_fair_ggtt(gt, num_vfs)); + else if (num_vfs > 28) + KUNIT_ASSERT_EQ(test, SZ_64M, pf_profile_fair_ggtt(gt, num_vfs)); + else if (num_vfs > 14) + KUNIT_ASSERT_EQ(test, SZ_128M, pf_profile_fair_ggtt(gt, num_vfs)); + else if (num_vfs > 7) + KUNIT_ASSERT_EQ(test, SZ_256M, pf_profile_fair_ggtt(gt, num_vfs)); + else if (num_vfs > 3) + KUNIT_ASSERT_EQ(test, SZ_512M, pf_profile_fair_ggtt(gt, num_vfs)); + else if (num_vfs > 1) + KUNIT_ASSERT_EQ(test, SZ_1G, pf_profile_fair_ggtt(gt, num_vfs)); + else + KUNIT_ASSERT_EQ(test, SZ_2G, pf_profile_fair_ggtt(gt, num_vfs)); +} + +static struct kunit_case pf_gt_config_test_cases[] = { + KUNIT_CASE(fair_contexts_1vf), + KUNIT_CASE(fair_doorbells_1vf), + KUNIT_CASE(fair_ggtt_1vf), + KUNIT_CASE_PARAM(fair_contexts, num_vfs_gen_param), + KUNIT_CASE_PARAM(fair_doorbells, num_vfs_gen_param), + KUNIT_CASE_PARAM(fair_ggtt, num_vfs_gen_param), + {} +}; + +static struct kunit_suite pf_gt_config_suite = { + .name = "pf_gt_config", + .test_cases = pf_gt_config_test_cases, + .init = pf_gt_config_test_init, +}; + +kunit_test_suite(pf_gt_config_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c deleted file mode 100644 index b683585db852..000000000000 --- a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 AND MIT -/* - * Copyright © 2024 Intel Corporation - */ - -#include <kunit/test.h> - -#include "xe_device.h" -#include "xe_kunit_helpers.h" -#include "xe_pci_test.h" - -static int pf_service_test_init(struct kunit *test) -{ - struct xe_pci_fake_data fake = { - .sriov_mode = XE_SRIOV_MODE_PF, - .platform = XE_TIGERLAKE, /* some random platform */ - .subplatform = XE_SUBPLATFORM_NONE, - }; - struct xe_device *xe; - struct xe_gt *gt; - - test->priv = &fake; - xe_kunit_helper_xe_device_test_init(test); - - xe = test->priv; - KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); - - gt = xe_device_get_gt(xe, 0); - pf_init_versions(gt); - - /* - * sanity check: - * - all supported platforms VF/PF ABI versions must be defined - * - base version can't be newer than latest - */ - KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.latest.major); - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.minor, - gt->sriov.pf.service.version.latest.minor); - - test->priv = gt; - return 0; -} - -static void pf_negotiate_any(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, VF2PF_HANDSHAKE_MAJOR_ANY, - VF2PF_HANDSHAKE_MINOR_ANY, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_base_match(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.base.minor); -} - -static void pf_negotiate_base_newer(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_GE(test, minor, gt->sriov.pf.service.version.base.minor); - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); - else - KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); -} - -static void pf_negotiate_base_next(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major + 1, 0, - &major, &minor)); - KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_LE(test, major, gt->sriov.pf.service.version.latest.major); - if (major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); - else - KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); -} - -static void pf_negotiate_base_older(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (!gt->sriov.pf.service.version.base.minor) - kunit_skip(test, "no older minor\n"); - - KUNIT_ASSERT_NE(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor - 1, - &major, &minor)); -} - -static void pf_negotiate_base_prev(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_NE(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major - 1, 1, - &major, &minor)); -} - -static void pf_negotiate_latest_match(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_newer(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_next(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major + 1, 0, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_older(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (!gt->sriov.pf.service.version.latest.minor) - kunit_skip(test, "no older minor\n"); - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor - 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor - 1); -} - -static void pf_negotiate_latest_prev(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - kunit_skip(test, "no prev major"); - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major - 1, - gt->sriov.pf.service.version.base.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major - 1); - KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); -} - -static struct kunit_case pf_service_test_cases[] = { - KUNIT_CASE(pf_negotiate_any), - KUNIT_CASE(pf_negotiate_base_match), - KUNIT_CASE(pf_negotiate_base_newer), - KUNIT_CASE(pf_negotiate_base_next), - KUNIT_CASE(pf_negotiate_base_older), - KUNIT_CASE(pf_negotiate_base_prev), - KUNIT_CASE(pf_negotiate_latest_match), - KUNIT_CASE(pf_negotiate_latest_newer), - KUNIT_CASE(pf_negotiate_latest_next), - KUNIT_CASE(pf_negotiate_latest_older), - KUNIT_CASE(pf_negotiate_latest_prev), - {} -}; - -static struct kunit_suite pf_service_suite = { - .name = "pf_service", - .test_cases = pf_service_test_cases, - .init = pf_service_test_init, -}; - -kunit_test_suite(pf_service_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c index 6faffcd74869..d266882adc0e 100644 --- a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c +++ b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c @@ -32,7 +32,7 @@ static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device * bo->tile = tile; bo->ttm.bdev = &xe->ttm; - bo->size = size; + bo->ttm.base.size = size; iosys_map_set_vaddr(&bo->vmap, buf); if (flags & XE_BO_FLAG_GGTT) { @@ -42,10 +42,8 @@ static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device * KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo->ggtt_node[tile->id]); KUNIT_ASSERT_EQ(test, 0, - drm_mm_insert_node_in_range(&ggtt->mm, - &bo->ggtt_node[tile->id]->base, - bo->size, SZ_4K, - 0, 0, U64_MAX, 0)); + xe_ggtt_node_insert(bo->ggtt_node[tile->id], + xe_bo_size(bo), SZ_4K)); } return bo; @@ -67,8 +65,9 @@ static int guc_buf_test_init(struct kunit *test) ggtt = xe_device_get_root_tile(test->priv)->mem.ggtt; guc = &xe_device_get_gt(test->priv, 0)->uc.guc; - drm_mm_init(&ggtt->mm, DUT_GGTT_START, DUT_GGTT_SIZE); - mutex_init(&ggtt->lock); + KUNIT_ASSERT_EQ(test, 0, + xe_ggtt_init_kunit(ggtt, DUT_GGTT_START, + DUT_GGTT_START + DUT_GGTT_SIZE)); kunit_activate_static_stub(test, xe_managed_bo_create_pin_map, replacement_xe_managed_bo_create_pin_map); diff --git a/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c new file mode 100644 index 000000000000..3b213fcae916 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c @@ -0,0 +1,776 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/delay.h> + +#include <kunit/test.h> +#include <kunit/visibility.h> + +#include "tests/xe_kunit_helpers.h" +#include "tests/xe_pci_test.h" +#include "tests/xe_test.h" + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_pm.h" + +/* + * There are different ways to allocate the G2G buffers. The plan for this test + * is to make sure that all the possible options work. The particular option + * chosen by the driver may vary from one platform to another, it may also change + * with time. So to ensure consistency of testing, the relevant driver code is + * replicated here to guarantee it won't change without the test being updated + * to keep testing the other options. + * + * In order to test the actual code being used by the driver, there is also the + * 'default' scheme. That will use the official driver routines to test whatever + * method the driver is using on the current platform at the current time. + */ +enum { + /* Driver defined allocation scheme */ + G2G_CTB_TYPE_DEFAULT, + /* Single buffer in host memory */ + G2G_CTB_TYPE_HOST, + /* Single buffer in a specific tile, loops across all tiles */ + G2G_CTB_TYPE_TILE, +}; + +/* + * Payload is opaque to GuC. So KMD can define any structure or size it wants. + */ +struct g2g_test_payload { + u32 tx_dev; + u32 tx_tile; + u32 rx_dev; + u32 rx_tile; + u32 seqno; +}; + +static void g2g_test_send(struct kunit *test, struct xe_guc *guc, + u32 far_tile, u32 far_dev, + struct g2g_test_payload *payload) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + u32 *action, total; + size_t payload_len; + int ret; + + static_assert(IS_ALIGNED(sizeof(*payload), sizeof(u32))); + payload_len = sizeof(*payload) / sizeof(u32); + + total = 4 + payload_len; + action = kunit_kmalloc_array(test, total, sizeof(*action), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, action); + + action[0] = XE_GUC_ACTION_TEST_G2G_SEND; + action[1] = far_tile; + action[2] = far_dev; + action[3] = payload_len; + memcpy(action + 4, payload, payload_len * sizeof(u32)); + + atomic_inc(&xe->g2g_test_count); + + /* + * Should specify the expected response notification here. Problem is that + * the response will be coming from a different GuC. By the end, it should + * all add up as long as an equal number of messages are sent from each GuC + * and to each GuC. However, in the middle negative reservation space errors + * and such like can occur. Rather than add intrusive changes to the CT layer + * it is simpler to just not bother counting it at all. The system should be + * idle when running the selftest, and the selftest's notification total size + * is well within the G2H allocation size. So there should be no issues with + * needing to block for space, which is all the tracking code is really for. + */ + ret = xe_guc_ct_send(&guc->ct, action, total, 0, 0); + kunit_kfree(test, action); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G send failed: %d [%d:%d -> %d:%d]\n", ret, + gt_to_tile(gt)->id, G2G_DEV(gt), far_tile, far_dev); +} + +/* + * NB: Can't use KUNIT_ASSERT and friends in here as this is called asynchronously + * from the G2H notification handler. Need that to actually complete rather than + * thread-abort in order to keep the rest of the driver alive! + */ +int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *rx_gt = guc_to_gt(guc), *test_gt, *tx_gt = NULL; + u32 tx_tile, tx_dev, rx_tile, rx_dev, idx, got_len; + struct g2g_test_payload *payload; + size_t payload_len; + int ret = 0, i; + + payload_len = sizeof(*payload) / sizeof(u32); + + if (unlikely(len != (G2H_LEN_DW_G2G_NOTIFY_MIN + payload_len))) { + xe_gt_err(rx_gt, "G2G test notification invalid length %u", len); + ret = -EPROTO; + goto done; + } + + tx_tile = msg[0]; + tx_dev = msg[1]; + got_len = msg[2]; + payload = (struct g2g_test_payload *)(msg + 3); + + rx_tile = gt_to_tile(rx_gt)->id; + rx_dev = G2G_DEV(rx_gt); + + if (got_len != payload_len) { + xe_gt_err(rx_gt, "G2G: Invalid payload length: %u vs %zu\n", got_len, payload_len); + ret = -EPROTO; + goto done; + } + + if (payload->tx_dev != tx_dev || payload->tx_tile != tx_tile || + payload->rx_dev != rx_dev || payload->rx_tile != rx_tile) { + xe_gt_err(rx_gt, "G2G: Invalid payload: %d:%d -> %d:%d vs %d:%d -> %d:%d! [%d]\n", + payload->tx_tile, payload->tx_dev, payload->rx_tile, payload->rx_dev, + tx_tile, tx_dev, rx_tile, rx_dev, payload->seqno); + ret = -EPROTO; + goto done; + } + + if (!xe->g2g_test_array) { + xe_gt_err(rx_gt, "G2G: Missing test array!\n"); + ret = -ENOMEM; + goto done; + } + + for_each_gt(test_gt, xe, i) { + if (gt_to_tile(test_gt)->id != tx_tile) + continue; + + if (G2G_DEV(test_gt) != tx_dev) + continue; + + if (tx_gt) { + xe_gt_err(rx_gt, "G2G: Got duplicate TX GTs: %d vs %d for %d:%d!\n", + tx_gt->info.id, test_gt->info.id, tx_tile, tx_dev); + ret = -EINVAL; + goto done; + } + + tx_gt = test_gt; + } + if (!tx_gt) { + xe_gt_err(rx_gt, "G2G: Failed to find a TX GT for %d:%d!\n", tx_tile, tx_dev); + ret = -EINVAL; + goto done; + } + + idx = (tx_gt->info.id * xe->info.gt_count) + rx_gt->info.id; + + if (xe->g2g_test_array[idx] != payload->seqno - 1) { + xe_gt_err(rx_gt, "G2G: Seqno mismatch %d vs %d for %d:%d -> %d:%d!\n", + xe->g2g_test_array[idx], payload->seqno - 1, + tx_tile, tx_dev, rx_tile, rx_dev); + ret = -EINVAL; + goto done; + } + + xe->g2g_test_array[idx] = payload->seqno; + +done: + atomic_dec(&xe->g2g_test_count); + return ret; +} + +/* + * Send the given seqno from all GuCs to all other GuCs in tile/GT order + */ +static void g2g_test_in_order(struct kunit *test, struct xe_device *xe, u32 seqno) +{ + struct xe_gt *near_gt, *far_gt; + int i, j; + + for_each_gt(near_gt, xe, i) { + u32 near_tile = gt_to_tile(near_gt)->id; + u32 near_dev = G2G_DEV(near_gt); + + for_each_gt(far_gt, xe, j) { + u32 far_tile = gt_to_tile(far_gt)->id; + u32 far_dev = G2G_DEV(far_gt); + struct g2g_test_payload payload; + + if (far_gt->info.id == near_gt->info.id) + continue; + + payload.tx_dev = near_dev; + payload.tx_tile = near_tile; + payload.rx_dev = far_dev; + payload.rx_tile = far_tile; + payload.seqno = seqno; + g2g_test_send(test, &near_gt->uc.guc, far_tile, far_dev, &payload); + } + } +} + +#define WAIT_TIME_MS 100 +#define WAIT_COUNT (1000 / WAIT_TIME_MS) + +static void g2g_wait_for_complete(void *_xe) +{ + struct xe_device *xe = (struct xe_device *)_xe; + struct kunit *test = kunit_get_current_test(); + int wait = 0; + + /* Wait for all G2H messages to be received */ + while (atomic_read(&xe->g2g_test_count)) { + if (++wait > WAIT_COUNT) + break; + + msleep(WAIT_TIME_MS); + } + + KUNIT_ASSERT_EQ_MSG(test, 0, atomic_read(&xe->g2g_test_count), + "Timed out waiting for notifications\n"); + kunit_info(test, "Got all notifications back\n"); +} + +#undef WAIT_TIME_MS +#undef WAIT_COUNT + +static void g2g_clean_array(void *_xe) +{ + struct xe_device *xe = (struct xe_device *)_xe; + + xe->g2g_test_array = NULL; +} + +#define NUM_LOOPS 16 + +static void g2g_run_test(struct kunit *test, struct xe_device *xe) +{ + u32 seqno, max_array; + int ret, i, j; + + max_array = xe->info.gt_count * xe->info.gt_count; + xe->g2g_test_array = kunit_kcalloc(test, max_array, sizeof(u32), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe->g2g_test_array); + + ret = kunit_add_action_or_reset(test, g2g_clean_array, xe); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n"); + + /* + * Send incrementing seqnos from all GuCs to all other GuCs in tile/GT order. + * Tile/GT order doesn't really mean anything to the hardware but it is going + * to be a fixed sequence every time. + * + * Verify that each one comes back having taken the correct route. + */ + ret = kunit_add_action(test, g2g_wait_for_complete, xe); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n"); + for (seqno = 1; seqno < NUM_LOOPS; seqno++) + g2g_test_in_order(test, xe, seqno); + seqno--; + + kunit_release_action(test, &g2g_wait_for_complete, xe); + + /* Check for the final seqno in each slot */ + for (i = 0; i < xe->info.gt_count; i++) { + for (j = 0; j < xe->info.gt_count; j++) { + u32 idx = (j * xe->info.gt_count) + i; + + if (i == j) + KUNIT_ASSERT_EQ_MSG(test, 0, xe->g2g_test_array[idx], + "identity seqno modified: %d for %dx%d!\n", + xe->g2g_test_array[idx], i, j); + else + KUNIT_ASSERT_EQ_MSG(test, seqno, xe->g2g_test_array[idx], + "invalid seqno: %d vs %d for %dx%d!\n", + xe->g2g_test_array[idx], seqno, i, j); + } + } + + kunit_kfree(test, xe->g2g_test_array); + kunit_release_action(test, &g2g_clean_array, xe); + + kunit_info(test, "Test passed\n"); +} + +#undef NUM_LOOPS + +static void g2g_ct_stop(struct xe_guc *guc) +{ + struct xe_gt *remote_gt, *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + int i, t; + + for_each_gt(remote_gt, xe, i) { + u32 tile, dev; + + if (remote_gt->info.id == gt->info.id) + continue; + + tile = gt_to_tile(remote_gt)->id; + dev = G2G_DEV(remote_gt); + + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) + guc_g2g_deregister(guc, tile, dev, t); + } +} + +/* Size of a single allocation that contains all G2G CTBs across all GTs */ +static u32 g2g_ctb_size(struct kunit *test, struct xe_device *xe) +{ + unsigned int count = xe->info.gt_count; + u32 num_channels = (count * (count - 1)) / 2; + + kunit_info(test, "Size: (%d * %d / 2) * %d * 0x%08X + 0x%08X => 0x%08X [%d]\n", + count, count - 1, XE_G2G_TYPE_LIMIT, G2G_BUFFER_SIZE, G2G_DESC_AREA_SIZE, + num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE, + num_channels * XE_G2G_TYPE_LIMIT); + + return num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE; +} + +/* + * Use the driver's regular CTB allocation scheme. + */ +static void g2g_alloc_default(struct kunit *test, struct xe_device *xe) +{ + struct xe_gt *gt; + int i; + + kunit_info(test, "Default [tiles = %d, GTs = %d]\n", + xe->info.tile_count, xe->info.gt_count); + + for_each_gt(gt, xe, i) { + struct xe_guc *guc = >->uc.guc; + int ret; + + ret = guc_g2g_alloc(guc); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G alloc failed: %pe", ERR_PTR(ret)); + continue; + } +} + +static void g2g_distribute(struct kunit *test, struct xe_device *xe, struct xe_bo *bo) +{ + struct xe_gt *root_gt, *gt; + int i; + + root_gt = xe_device_get_gt(xe, 0); + root_gt->uc.guc.g2g.bo = bo; + root_gt->uc.guc.g2g.owned = true; + kunit_info(test, "[%d.%d] Assigned 0x%p\n", gt_to_tile(root_gt)->id, root_gt->info.id, bo); + + for_each_gt(gt, xe, i) { + if (gt->info.id != 0) { + gt->uc.guc.g2g.owned = false; + gt->uc.guc.g2g.bo = xe_bo_get(bo); + kunit_info(test, "[%d.%d] Pinned 0x%p\n", + gt_to_tile(gt)->id, gt->info.id, gt->uc.guc.g2g.bo); + } + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt->uc.guc.g2g.bo); + } +} + +/* + * Allocate a single blob on the host and split between all G2G CTBs. + */ +static void g2g_alloc_host(struct kunit *test, struct xe_device *xe) +{ + struct xe_bo *bo; + u32 g2g_size; + + kunit_info(test, "Host [tiles = %d, GTs = %d]\n", xe->info.tile_count, xe->info.gt_count); + + g2g_size = g2g_ctb_size(test, xe); + bo = xe_managed_bo_create_pin_map(xe, xe_device_get_root_tile(xe), g2g_size, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_ALL | + XE_BO_FLAG_GGTT_INVALIDATE); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo); + kunit_info(test, "[HST] G2G buffer create: 0x%p\n", bo); + + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size); + + g2g_distribute(test, xe, bo); +} + +/* + * Allocate a single blob on the given tile and split between all G2G CTBs. + */ +static void g2g_alloc_tile(struct kunit *test, struct xe_device *xe, struct xe_tile *tile) +{ + struct xe_bo *bo; + u32 g2g_size; + + KUNIT_ASSERT_TRUE(test, IS_DGFX(xe)); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tile); + + kunit_info(test, "Tile %d [tiles = %d, GTs = %d]\n", + tile->id, xe->info.tile_count, xe->info.gt_count); + + g2g_size = g2g_ctb_size(test, xe); + bo = xe_managed_bo_create_pin_map(xe, tile, g2g_size, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_ALL | + XE_BO_FLAG_GGTT_INVALIDATE); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo); + kunit_info(test, "[%d.*] G2G buffer create: 0x%p\n", tile->id, bo); + + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size); + + g2g_distribute(test, xe, bo); +} + +static void g2g_free(struct kunit *test, struct xe_device *xe) +{ + struct xe_gt *gt; + struct xe_bo *bo; + int i; + + for_each_gt(gt, xe, i) { + bo = gt->uc.guc.g2g.bo; + if (!bo) + continue; + + if (gt->uc.guc.g2g.owned) { + xe_managed_bo_unpin_map_no_vm(bo); + kunit_info(test, "[%d.%d] Unmapped 0x%p\n", + gt_to_tile(gt)->id, gt->info.id, bo); + } else { + xe_bo_put(bo); + kunit_info(test, "[%d.%d] Unpinned 0x%p\n", + gt_to_tile(gt)->id, gt->info.id, bo); + } + + gt->uc.guc.g2g.bo = NULL; + } +} + +static void g2g_stop(struct kunit *test, struct xe_device *xe) +{ + struct xe_gt *gt; + int i; + + for_each_gt(gt, xe, i) { + struct xe_guc *guc = >->uc.guc; + + if (!guc->g2g.bo) + continue; + + g2g_ct_stop(guc); + } + + g2g_free(test, xe); +} + +/* + * Generate a unique id for each bi-directional CTB for each pair of + * near and far tiles/devices. The id can then be used as an index into + * a single allocation that is sub-divided into multiple CTBs. + * + * For example, with two devices per tile and two tiles, the table should + * look like: + * Far <tile>.<dev> + * 0.0 0.1 1.0 1.1 + * N 0.0 --/-- 00/01 02/03 04/05 + * e 0.1 01/00 --/-- 06/07 08/09 + * a 1.0 03/02 07/06 --/-- 10/11 + * r 1.1 05/04 09/08 11/10 --/-- + * + * Where each entry is Rx/Tx channel id. + * + * So GuC #3 (tile 1, dev 1) talking to GuC #2 (tile 1, dev 0) would + * be reading from channel #11 and writing to channel #10. Whereas, + * GuC #2 talking to GuC #3 would be read on #10 and write to #11. + */ +static int g2g_slot_flat(u32 near_tile, u32 near_dev, u32 far_tile, u32 far_dev, + u32 type, u32 max_inst, bool have_dev) +{ + u32 near = near_tile, far = far_tile; + u32 idx = 0, x, y, direction; + int i; + + if (have_dev) { + near = (near << 1) | near_dev; + far = (far << 1) | far_dev; + } + + /* No need to send to one's self */ + if (far == near) + return -1; + + if (far > near) { + /* Top right table half */ + x = far; + y = near; + + /* T/R is 'forwards' direction */ + direction = type; + } else { + /* Bottom left table half */ + x = near; + y = far; + + /* B/L is 'backwards' direction */ + direction = (1 - type); + } + + /* Count the rows prior to the target */ + for (i = y; i > 0; i--) + idx += max_inst - i; + + /* Count this row up to the target */ + idx += (x - 1 - y); + + /* Slots are in Rx/Tx pairs */ + idx *= 2; + + /* Pick Rx/Tx direction */ + idx += direction; + + return idx; +} + +static int g2g_register_flat(struct xe_guc *guc, u32 far_tile, u32 far_dev, u32 type, bool have_dev) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + u32 near_tile = gt_to_tile(gt)->id; + u32 near_dev = G2G_DEV(gt); + u32 max = xe->info.gt_count; + int idx; + u32 base, desc, buf; + + if (!guc->g2g.bo) + return -ENODEV; + + idx = g2g_slot_flat(near_tile, near_dev, far_tile, far_dev, type, max, have_dev); + xe_assert(xe, idx >= 0); + + base = guc_bo_ggtt_addr(guc, guc->g2g.bo); + desc = base + idx * G2G_DESC_SIZE; + buf = base + idx * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE; + + xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE); + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(guc->g2g.bo)); + + return guc_action_register_g2g_buffer(guc, type, far_tile, far_dev, + desc, buf, G2G_BUFFER_SIZE); +} + +static void g2g_start(struct kunit *test, struct xe_guc *guc) +{ + struct xe_gt *remote_gt, *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + unsigned int i; + int t, ret; + bool have_dev; + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, guc->g2g.bo); + + /* GuC interface will need extending if more GT device types are ever created. */ + KUNIT_ASSERT_TRUE(test, + (gt->info.type == XE_GT_TYPE_MAIN) || + (gt->info.type == XE_GT_TYPE_MEDIA)); + + /* Channel numbering depends on whether there are multiple GTs per tile */ + have_dev = xe->info.gt_count > xe->info.tile_count; + + for_each_gt(remote_gt, xe, i) { + u32 tile, dev; + + if (remote_gt->info.id == gt->info.id) + continue; + + tile = gt_to_tile(remote_gt)->id; + dev = G2G_DEV(remote_gt); + + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) { + ret = g2g_register_flat(guc, tile, dev, t, have_dev); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G register failed: %pe", ERR_PTR(ret)); + } + } +} + +static void g2g_reinit(struct kunit *test, struct xe_device *xe, int ctb_type, struct xe_tile *tile) +{ + struct xe_gt *gt; + int i, found = 0; + + g2g_stop(test, xe); + + for_each_gt(gt, xe, i) { + struct xe_guc *guc = >->uc.guc; + + KUNIT_ASSERT_NULL(test, guc->g2g.bo); + } + + switch (ctb_type) { + case G2G_CTB_TYPE_DEFAULT: + g2g_alloc_default(test, xe); + break; + + case G2G_CTB_TYPE_HOST: + g2g_alloc_host(test, xe); + break; + + case G2G_CTB_TYPE_TILE: + g2g_alloc_tile(test, xe, tile); + break; + + default: + KUNIT_ASSERT_TRUE(test, false); + } + + for_each_gt(gt, xe, i) { + struct xe_guc *guc = >->uc.guc; + + if (!guc->g2g.bo) + continue; + + if (ctb_type == G2G_CTB_TYPE_DEFAULT) + guc_g2g_start(guc); + else + g2g_start(test, guc); + found++; + } + + KUNIT_ASSERT_GT_MSG(test, found, 1, "insufficient G2G channels running: %d", found); + + kunit_info(test, "Testing across %d GTs\n", found); +} + +static void g2g_recreate_ctb(void *_xe) +{ + struct xe_device *xe = (struct xe_device *)_xe; + struct kunit *test = kunit_get_current_test(); + + g2g_stop(test, xe); + + if (xe_guc_g2g_wanted(xe)) + g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL); +} + +static void g2g_pm_runtime_put(void *_xe) +{ + struct xe_device *xe = (struct xe_device *)_xe; + + xe_pm_runtime_put(xe); +} + +static void g2g_pm_runtime_get(struct kunit *test) +{ + struct xe_device *xe = test->priv; + int ret; + + xe_pm_runtime_get(xe); + ret = kunit_add_action_or_reset(test, g2g_pm_runtime_put, xe); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register runtime PM action\n"); +} + +static void g2g_check_skip(struct kunit *test) +{ + struct xe_device *xe = test->priv; + struct xe_gt *gt; + int i; + + if (IS_SRIOV_VF(xe)) + kunit_skip(test, "not supported from a VF"); + + if (xe->info.gt_count <= 1) + kunit_skip(test, "not enough GTs"); + + for_each_gt(gt, xe, i) { + struct xe_guc *guc = >->uc.guc; + + if (guc->fw.build_type == CSS_UKERNEL_INFO_BUILDTYPE_PROD) + kunit_skip(test, + "G2G test interface not available in production firmware builds\n"); + } +} + +/* + * Simple test that does not try to recreate the CTBs. + * Requires that the platform already enables G2G comms + * but has no risk of leaving the system in a broken state + * afterwards. + */ +static void xe_live_guc_g2g_kunit_default(struct kunit *test) +{ + struct xe_device *xe = test->priv; + + if (!xe_guc_g2g_wanted(xe)) + kunit_skip(test, "G2G not enabled"); + + g2g_check_skip(test); + + g2g_pm_runtime_get(test); + + kunit_info(test, "Testing default CTBs\n"); + g2g_run_test(test, xe); + + kunit_release_action(test, &g2g_pm_runtime_put, xe); +} + +/* + * More complex test that re-creates the CTBs in various location to + * test access to each location from each GuC. Can be run even on + * systems that do not enable G2G by default. On the other hand, + * because it recreates the CTBs, if something goes wrong it could + * leave the system with broken G2G comms. + */ +static void xe_live_guc_g2g_kunit_allmem(struct kunit *test) +{ + struct xe_device *xe = test->priv; + int ret; + + g2g_check_skip(test); + + g2g_pm_runtime_get(test); + + /* Make sure to leave the system as we found it */ + ret = kunit_add_action_or_reset(test, g2g_recreate_ctb, xe); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register CTB re-creation action\n"); + + kunit_info(test, "Testing CTB type 'default'...\n"); + g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL); + g2g_run_test(test, xe); + + kunit_info(test, "Testing CTB type 'host'...\n"); + g2g_reinit(test, xe, G2G_CTB_TYPE_HOST, NULL); + g2g_run_test(test, xe); + + if (IS_DGFX(xe)) { + struct xe_tile *tile; + int id; + + for_each_tile(tile, xe, id) { + kunit_info(test, "Testing CTB type 'tile: #%d'...\n", id); + + g2g_reinit(test, xe, G2G_CTB_TYPE_TILE, tile); + g2g_run_test(test, xe); + } + } else { + kunit_info(test, "Skipping local memory on integrated platform\n"); + } + + kunit_release_action(test, g2g_recreate_ctb, xe); + kunit_release_action(test, g2g_pm_runtime_put, xe); +} + +static struct kunit_case xe_guc_g2g_tests[] = { + KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_default, xe_pci_live_device_gen_param), + KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_allmem, xe_pci_live_device_gen_param), + {} +}; + +VISIBLE_IF_KUNIT +struct kunit_suite xe_guc_g2g_test_suite = { + .name = "xe_guc_g2g", + .test_cases = xe_guc_g2g_tests, + .init = xe_kunit_helper_xe_device_live_test_init, +}; +EXPORT_SYMBOL_IF_KUNIT(xe_guc_g2g_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c index 81277c77016d..c55e46f1ae92 100644 --- a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c +++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c @@ -10,12 +10,14 @@ extern struct kunit_suite xe_bo_shrink_test_suite; extern struct kunit_suite xe_dma_buf_test_suite; extern struct kunit_suite xe_migrate_test_suite; extern struct kunit_suite xe_mocs_test_suite; +extern struct kunit_suite xe_guc_g2g_test_suite; kunit_test_suite(xe_bo_test_suite); kunit_test_suite(xe_bo_shrink_test_suite); kunit_test_suite(xe_dma_buf_test_suite); kunit_test_suite(xe_migrate_test_suite); kunit_test_suite(xe_mocs_test_suite); +kunit_test_suite(xe_guc_g2g_test_suite); MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 4a65e3103f77..5904d658d1f2 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -70,28 +70,29 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe, } } while (0) static void test_copy(struct xe_migrate *m, struct xe_bo *bo, - struct kunit *test, u32 region) + struct kunit *test, u32 region, struct drm_exec *exec) { struct xe_device *xe = tile_to_xe(m->tile); u64 retval, expected = 0; - bool big = bo->size >= SZ_2M; + bool big = xe_bo_size(bo) >= SZ_2M; struct dma_fence *fence; const char *str = big ? "Copying big bo" : "Copying small bo"; int err; struct xe_bo *remote = xe_bo_create_locked(xe, m->tile, NULL, - bo->size, + xe_bo_size(bo), ttm_bo_type_kernel, region | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_PINNED, + exec); if (IS_ERR(remote)) { KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n", str, remote); return; } - err = xe_bo_validate(remote, NULL, false); + err = xe_bo_validate(remote, NULL, false, exec); if (err) { KUNIT_FAIL(test, "Failed to validate system bo for %s: %i\n", str, err); @@ -105,7 +106,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, goto out_unlock; } - xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); + xe_map_memset(xe, &remote->vmap, 0, 0xd0, xe_bo_size(remote)); fence = xe_migrate_clear(m, remote, remote->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" : @@ -113,15 +114,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &remote->vmap, 0, u64); check(retval, expected, "remote first offset should be cleared", test); - retval = xe_map_rd(xe, &remote->vmap, remote->size - 8, u64); + retval = xe_map_rd(xe, &remote->vmap, xe_bo_size(remote) - 8, u64); check(retval, expected, "remote last offset should be cleared", test); } dma_fence_put(fence); /* Try to copy 0xc0 from remote to vram with 2MB or 64KiB/4KiB pages */ - xe_map_memset(xe, &remote->vmap, 0, 0xc0, remote->size); - xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size); + xe_map_memset(xe, &remote->vmap, 0, 0xc0, xe_bo_size(remote)); + xe_map_memset(xe, &bo->vmap, 0, 0xd0, xe_bo_size(bo)); expected = 0xc0c0c0c0c0c0c0c0; fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource, @@ -131,15 +132,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &bo->vmap, 0, u64); check(retval, expected, "remote -> vram bo first offset should be copied", test); - retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64); + retval = xe_map_rd(xe, &bo->vmap, xe_bo_size(bo) - 8, u64); check(retval, expected, "remote -> vram bo offset should be copied", test); } dma_fence_put(fence); /* And other way around.. slightly hacky.. */ - xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); - xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size); + xe_map_memset(xe, &remote->vmap, 0, 0xd0, xe_bo_size(remote)); + xe_map_memset(xe, &bo->vmap, 0, 0xc0, xe_bo_size(bo)); fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource, remote->ttm.resource, false); @@ -148,7 +149,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &remote->vmap, 0, u64); check(retval, expected, "vram -> remote bo first offset should be copied", test); - retval = xe_map_rd(xe, &remote->vmap, bo->size - 8, u64); + retval = xe_map_rd(xe, &remote->vmap, xe_bo_size(bo) - 8, u64); check(retval, expected, "vram -> remote bo last offset should be copied", test); } @@ -161,13 +162,13 @@ out_unlock: } static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo, - struct kunit *test) + struct drm_exec *exec, struct kunit *test) { - test_copy(m, bo, test, XE_BO_FLAG_SYSTEM); + test_copy(m, bo, test, XE_BO_FLAG_SYSTEM, exec); } static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo, - struct kunit *test) + struct drm_exec *exec, struct kunit *test) { u32 region; @@ -178,10 +179,11 @@ static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo, region = XE_BO_FLAG_VRAM1; else region = XE_BO_FLAG_VRAM0; - test_copy(m, bo, test, region); + test_copy(m, bo, test, region, exec); } -static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) +static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test, + struct drm_exec *exec) { struct xe_tile *tile = m->tile; struct xe_device *xe = tile_to_xe(tile); @@ -202,7 +204,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile)); + XE_BO_FLAG_VRAM_IF_DGFX(tile), + exec); if (IS_ERR(big)) { KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big)); goto vunmap; @@ -210,7 +213,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile)); + XE_BO_FLAG_VRAM_IF_DGFX(tile), + exec); if (IS_ERR(pt)) { KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", PTR_ERR(pt)); @@ -220,7 +224,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) tiny = xe_bo_create_pin_map(xe, tile, m->q->vm, 2 * SZ_4K, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile)); + XE_BO_FLAG_VRAM_IF_DGFX(tile), + exec); if (IS_ERR(tiny)) { KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n", PTR_ERR(tiny)); @@ -245,9 +250,9 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) if (m->q->vm->flags & XE_VM_FLAG_64K) expected |= XE_PTE_PS64; if (xe_bo_is_vram(pt)) - xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); + xe_res_first(pt->ttm.resource, 0, xe_bo_size(pt), &src_it); else - xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it); + xe_res_first_sg(xe_bo_sg(pt), 0, xe_bo_size(pt), &src_it); emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false, &src_it, XE_PAGE_SIZE, pt->ttm.resource); @@ -276,7 +281,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* Clear a small bo */ kunit_info(test, "Clearing small buffer object\n"); - xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); + xe_map_memset(xe, &tiny->vmap, 0, 0x22, xe_bo_size(tiny)); expected = 0; fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); @@ -286,19 +291,19 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) dma_fence_put(fence); retval = xe_map_rd(xe, &tiny->vmap, 0, u32); check(retval, expected, "Command clear small first value", test); - retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32); + retval = xe_map_rd(xe, &tiny->vmap, xe_bo_size(tiny) - 4, u32); check(retval, expected, "Command clear small last value", test); kunit_info(test, "Copying small buffer object to system\n"); - test_copy_sysmem(m, tiny, test); + test_copy_sysmem(m, tiny, exec, test); if (xe->info.tile_count > 1) { kunit_info(test, "Copying small buffer object to other vram\n"); - test_copy_vram(m, tiny, test); + test_copy_vram(m, tiny, exec, test); } /* Clear a big bo */ kunit_info(test, "Clearing big buffer object\n"); - xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); + xe_map_memset(xe, &big->vmap, 0, 0x11, xe_bo_size(big)); expected = 0; fence = xe_migrate_clear(m, big, big->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); @@ -308,14 +313,14 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) dma_fence_put(fence); retval = xe_map_rd(xe, &big->vmap, 0, u32); check(retval, expected, "Command clear big first value", test); - retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32); + retval = xe_map_rd(xe, &big->vmap, xe_bo_size(big) - 4, u32); check(retval, expected, "Command clear big last value", test); kunit_info(test, "Copying big buffer object to system\n"); - test_copy_sysmem(m, big, test); + test_copy_sysmem(m, big, exec, test); if (xe->info.tile_count > 1) { kunit_info(test, "Copying big buffer object to other vram\n"); - test_copy_vram(m, big, test); + test_copy_vram(m, big, exec, test); } out: @@ -343,10 +348,11 @@ static int migrate_test_run_device(struct xe_device *xe) for_each_tile(tile, xe, id) { struct xe_migrate *m = tile->migrate; + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; kunit_info(test, "Testing tile id %d.\n", id); xe_vm_lock(m->q->vm, false); - xe_migrate_sanity_test(m, test); + xe_migrate_sanity_test(m, test, exec); xe_vm_unlock(m->q->vm); } @@ -370,7 +376,7 @@ static struct dma_fence *blt_copy(struct xe_tile *tile, struct xe_migrate *m = tile->migrate; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; - u64 size = src_bo->size; + u64 size = xe_bo_size(src_bo); struct xe_res_cursor src_it, dst_it; struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource; u64 src_L0_ofs, dst_L0_ofs; @@ -490,7 +496,7 @@ err_sync: static void test_migrate(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct xe_bo *ccs_bo, - struct kunit *test) + struct drm_exec *exec, struct kunit *test) { struct dma_fence *fence; u64 expected, retval; @@ -498,7 +504,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, long ret; expected = 0xd0d0d0d0d0d0d0d0; - xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size); + xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, xe_bo_size(sys_bo)); fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test); if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) { @@ -509,7 +515,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, dma_fence_put(fence); kunit_info(test, "Evict vram buffer object\n"); - ret = xe_bo_evict(vram_bo); + ret = xe_bo_evict(vram_bo, exec); if (ret) { KUNIT_FAIL(test, "Failed to evict bo.\n"); return; @@ -523,7 +529,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); check(retval, expected, "Clear evicted vram data first value", test); - retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64); + retval = xe_map_rd(xe, &vram_bo->vmap, xe_bo_size(vram_bo) - 8, u64); check(retval, expected, "Clear evicted vram data last value", test); fence = blt_copy(tile, vram_bo, ccs_bo, @@ -532,13 +538,13 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64); check(retval, 0, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64); + retval = xe_map_rd(xe, &ccs_bo->vmap, xe_bo_size(ccs_bo) - 8, u64); check(retval, 0, "Clear ccs data last value", test); } dma_fence_put(fence); kunit_info(test, "Restore vram buffer object\n"); - ret = xe_bo_validate(vram_bo, NULL, false); + ret = xe_bo_validate(vram_bo, NULL, false, exec); if (ret) { KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret); return; @@ -562,7 +568,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); check(retval, expected, "Restored value must be equal to initial value", test); - retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64); + retval = xe_map_rd(xe, &vram_bo->vmap, xe_bo_size(vram_bo) - 8, u64); check(retval, expected, "Restored value must be equal to initial value", test); fence = blt_copy(tile, vram_bo, ccs_bo, @@ -570,7 +576,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64); check(retval, 0, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64); + retval = xe_map_rd(xe, &ccs_bo->vmap, xe_bo_size(ccs_bo) - 8, u64); check(retval, 0, "Clear ccs data last value", test); } dma_fence_put(fence); @@ -583,7 +589,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, u64 expected, retval; expected = 0xd0d0d0d0d0d0d0d0; - xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size); + xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, xe_bo_size(sys_bo)); fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test); if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) { @@ -597,7 +603,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Decompressed value must be equal to initial value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Decompressed value must be equal to initial value", test); } dma_fence_put(fence); @@ -615,7 +621,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Clear main buffer first value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Clear main buffer last value", test); } dma_fence_put(fence); @@ -625,7 +631,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Clear ccs data last value", test); } dma_fence_put(fence); @@ -636,13 +642,14 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til { struct xe_bo *sys_bo, *vram_bo = NULL, *ccs_bo = NULL; unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); + struct drm_exec *exec; long ret; - sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + sys_bo = xe_bo_create_user(xe, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_PINNED, NULL); if (IS_ERR(sys_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", @@ -650,8 +657,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til return; } + exec = XE_VALIDATION_OPT_OUT; xe_bo_lock(sys_bo, false); - ret = xe_bo_validate(sys_bo, NULL, false); + ret = xe_bo_validate(sys_bo, NULL, false, exec); if (ret) { KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret); goto free_sysbo; @@ -664,10 +672,10 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_unlock(sys_bo); - ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + ccs_bo = xe_bo_create_user(xe, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_PINNED, NULL); if (IS_ERR(ccs_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", @@ -676,7 +684,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_lock(ccs_bo, false); - ret = xe_bo_validate(ccs_bo, NULL, false); + ret = xe_bo_validate(ccs_bo, NULL, false, exec); if (ret) { KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret); goto free_ccsbo; @@ -689,10 +697,10 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_unlock(ccs_bo); - vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + vram_bo = xe_bo_create_user(xe, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_PINNED, NULL); if (IS_ERR(vram_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(vram_bo)); @@ -700,7 +708,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_lock(vram_bo, false); - ret = xe_bo_validate(vram_bo, NULL, false); + ret = xe_bo_validate(vram_bo, NULL, false, exec); if (ret) { KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret); goto free_vrambo; @@ -713,7 +721,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } test_clear(xe, tile, sys_bo, vram_bo, test); - test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, test); + test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, exec, test); xe_bo_unlock(vram_bo); xe_bo_lock(vram_bo, false); diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index 0e502feaca81..6bb278167aaf 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -49,7 +49,7 @@ static void read_l3cc_table(struct xe_gt *gt, fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { xe_force_wake_put(gt_to_fw(gt), fw_ref); - KUNIT_ASSERT_TRUE_MSG(test, true, "Forcewake Failed.\n"); + KUNIT_FAIL_AND_ABORT(test, "Forcewake Failed.\n"); } for (i = 0; i < info->num_mocs_regs; i++) { diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 1d3e2e50c355..f3179b31f13e 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -12,63 +12,325 @@ #include <kunit/test-bug.h> #include <kunit/visibility.h> +#define PLATFORM_CASE(platform__, graphics_step__) \ + { \ + .platform = XE_ ## platform__, \ + .subplatform = XE_SUBPLATFORM_NONE, \ + .step = { .graphics = STEP_ ## graphics_step__ } \ + } + +#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__) \ + { \ + .platform = XE_ ## platform__, \ + .subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__, \ + .step = { .graphics = STEP_ ## graphics_step__ } \ + } + +#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__, \ + media_verx100__, media_step__) \ + { \ + .platform = XE_ ## platform__, \ + .subplatform = XE_SUBPLATFORM_NONE, \ + .graphics_verx100 = graphics_verx100__, \ + .media_verx100 = media_verx100__, \ + .step = { .graphics = STEP_ ## graphics_step__, \ + .media = STEP_ ## media_step__ } \ + } + +static const struct xe_pci_fake_data cases[] = { + PLATFORM_CASE(TIGERLAKE, B0), + PLATFORM_CASE(DG1, A0), + PLATFORM_CASE(DG1, B0), + PLATFORM_CASE(ALDERLAKE_S, A0), + PLATFORM_CASE(ALDERLAKE_S, B0), + PLATFORM_CASE(ALDERLAKE_S, C0), + PLATFORM_CASE(ALDERLAKE_S, D0), + PLATFORM_CASE(ALDERLAKE_P, A0), + PLATFORM_CASE(ALDERLAKE_P, B0), + PLATFORM_CASE(ALDERLAKE_P, C0), + SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0), + SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0), + SUBPLATFORM_CASE(DG2, G10, C0), + SUBPLATFORM_CASE(DG2, G11, B1), + SUBPLATFORM_CASE(DG2, G12, A1), + GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0), + GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0), + GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0), + GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), + GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0), + GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1), + GMDID_CASE(PANTHERLAKE, 3000, A0, 3000, A0), +}; + +KUNIT_ARRAY_PARAM(platform, cases, xe_pci_fake_data_desc); + /** - * xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs - * @xe_fn: Function to call for each device. + * xe_pci_fake_data_gen_params - Generate struct xe_pci_fake_data parameters + * @test: test context object + * @prev: the pointer to the previous parameter to iterate from or NULL + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE + * + * This function prepares struct xe_pci_fake_data parameter. + * + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. * - * This function iterates over the descriptors for all graphics IPs recognized - * by the driver and calls @xe_fn: for each one of them. + * Return: pointer to the next parameter or NULL if no more parameters */ -void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn) +const void *xe_pci_fake_data_gen_params(struct kunit *test, const void *prev, char *desc) { - const struct xe_graphics_desc *desc, *last = NULL; + return platform_gen_params(test, prev, desc); +} +EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_data_gen_params); - for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++) { - desc = graphics_ips[i].desc; - if (desc == last) - continue; +static const struct xe_device_desc *lookup_desc(enum xe_platform p) +{ + const struct xe_device_desc *desc; + const struct pci_device_id *ids; - xe_fn(desc); - last = desc; + for (ids = pciidlist; ids->driver_data; ids++) { + desc = (const void *)ids->driver_data; + if (desc->platform == p) + return desc; } + return NULL; +} + +static const struct xe_subplatform_desc *lookup_sub_desc(enum xe_platform p, enum xe_subplatform s) +{ + const struct xe_device_desc *desc = lookup_desc(p); + const struct xe_subplatform_desc *spd; + + if (desc && desc->subplatforms) + for (spd = desc->subplatforms; spd->subplatform; spd++) + if (spd->subplatform == s) + return spd; + return NULL; +} + +static const char *lookup_platform_name(enum xe_platform p) +{ + const struct xe_device_desc *desc = lookup_desc(p); + + return desc ? desc->platform_name : "INVALID"; +} + +static const char *__lookup_subplatform_name(enum xe_platform p, enum xe_subplatform s) +{ + const struct xe_subplatform_desc *desc = lookup_sub_desc(p, s); + + return desc ? desc->name : "INVALID"; +} + +static const char *lookup_subplatform_name(enum xe_platform p, enum xe_subplatform s) +{ + return s == XE_SUBPLATFORM_NONE ? "" : __lookup_subplatform_name(p, s); +} + +static const char *subplatform_prefix(enum xe_subplatform s) +{ + return s == XE_SUBPLATFORM_NONE ? "" : " "; +} + +static const char *step_prefix(enum xe_step step) +{ + return step == STEP_NONE ? "" : " "; +} + +static const char *step_name(enum xe_step step) +{ + return step == STEP_NONE ? "" : xe_step_name(step); +} + +static const char *sriov_prefix(enum xe_sriov_mode mode) +{ + return mode <= XE_SRIOV_MODE_NONE ? "" : " "; +} + +static const char *sriov_name(enum xe_sriov_mode mode) +{ + return mode <= XE_SRIOV_MODE_NONE ? "" : xe_sriov_mode_to_string(mode); +} + +static const char *lookup_graphics_name(unsigned int verx100) +{ + const struct xe_ip *ip = find_graphics_ip(verx100); + + return ip ? ip->name : ""; +} + +static const char *lookup_media_name(unsigned int verx100) +{ + const struct xe_ip *ip = find_media_ip(verx100); + + return ip ? ip->name : ""; } -EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_graphics_ip); /** - * xe_call_for_each_media_ip - Iterate over all recognized media IPs - * @xe_fn: Function to call for each device. + * xe_pci_fake_data_desc - Describe struct xe_pci_fake_data parameter + * @param: the &struct xe_pci_fake_data parameter to describe + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * - * This function iterates over the descriptors for all media IPs recognized - * by the driver and calls @xe_fn: for each one of them. + * This function prepares description of the struct xe_pci_fake_data parameter. + * + * It is tailored for use in parameterized KUnit tests where parameter generator + * is based on the struct xe_pci_fake_data arrays. */ -void xe_call_for_each_media_ip(xe_media_fn xe_fn) +void xe_pci_fake_data_desc(const struct xe_pci_fake_data *param, char *desc) { - const struct xe_media_desc *desc, *last = NULL; + if (param->graphics_verx100 || param->media_verx100) + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s%s%s %u.%02u(%s)%s%s %u.%02u(%s)%s%s%s%s", + lookup_platform_name(param->platform), + subplatform_prefix(param->subplatform), + lookup_subplatform_name(param->platform, param->subplatform), + param->graphics_verx100 / 100, param->graphics_verx100 % 100, + lookup_graphics_name(param->graphics_verx100), + step_prefix(param->step.graphics), step_name(param->step.graphics), + param->media_verx100 / 100, param->media_verx100 % 100, + lookup_media_name(param->media_verx100), + step_prefix(param->step.media), step_name(param->step.media), + sriov_prefix(param->sriov_mode), sriov_name(param->sriov_mode)); + else + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s%s%s%s%s%s%s", + lookup_platform_name(param->platform), + subplatform_prefix(param->subplatform), + lookup_subplatform_name(param->platform, param->subplatform), + step_prefix(param->step.graphics), step_name(param->step.graphics), + sriov_prefix(param->sriov_mode), sriov_name(param->sriov_mode)); +} +EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_data_desc); - for (int i = 0; i < ARRAY_SIZE(media_ips); i++) { - desc = media_ips[i].desc; - if (desc == last) - continue; +static void xe_ip_kunit_desc(const struct xe_ip *param, char *desc) +{ + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%u.%02u %s", + param->verx100 / 100, param->verx100 % 100, param->name); +} - xe_fn(desc); - last = desc; - } +/* + * Pre-GMDID Graphics and Media IPs definitions. + * + * Mimic the way GMDID IPs are declared so the same + * param generator can be used for both + */ +static const struct xe_ip pre_gmdid_graphics_ips[] = { + { 1200, "Xe_LP", &graphics_xelp }, + { 1210, "Xe_LP+", &graphics_xelp }, + { 1255, "Xe_HPG", &graphics_xehpg }, + { 1260, "Xe_HPC", &graphics_xehpc }, +}; + +static const struct xe_ip pre_gmdid_media_ips[] = { + { 1200, "Xe_M", &media_xem }, + { 1255, "Xe_HPM", &media_xem }, +}; + +KUNIT_ARRAY_PARAM(pre_gmdid_graphics_ip, pre_gmdid_graphics_ips, xe_ip_kunit_desc); +KUNIT_ARRAY_PARAM(pre_gmdid_media_ip, pre_gmdid_media_ips, xe_ip_kunit_desc); + +KUNIT_ARRAY_PARAM(graphics_ip, graphics_ips, xe_ip_kunit_desc); +KUNIT_ARRAY_PARAM(media_ip, media_ips, xe_ip_kunit_desc); + +static void xe_pci_id_kunit_desc(const struct pci_device_id *param, char *desc) +{ + const struct xe_device_desc *dev_desc = + (const struct xe_device_desc *)param->driver_data; + + if (dev_desc) + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "0x%X (%s)", + param->device, dev_desc->platform_name); +} + +KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc); + +/** + * xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters + * @test: test context object + * @prev: the pointer to the previous parameter to iterate from or NULL + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE + * + * This function prepares struct xe_ip parameter. + * + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. + * + * Return: pointer to the next parameter or NULL if no more parameters + */ +const void *xe_pci_graphics_ip_gen_param(struct kunit *test, const void *prev, char *desc) +{ + const void *next = pre_gmdid_graphics_ip_gen_params(test, prev, desc); + + if (next) + return next; + if (is_insidevar(prev, pre_gmdid_graphics_ips)) + prev = NULL; + + return graphics_ip_gen_params(test, prev, desc); } -EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_media_ip); +EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param); -static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, - u32 *ver, u32 *revid) +/** + * xe_pci_media_ip_gen_param - Generate media struct xe_ip parameters + * @test: test context object + * @prev: the pointer to the previous parameter to iterate from or NULL + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE + * + * This function prepares struct xe_ip parameter. + * + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. + * + * Return: pointer to the next parameter or NULL if no more parameters + */ +const void *xe_pci_media_ip_gen_param(struct kunit *test, const void *prev, char *desc) +{ + const void *next = pre_gmdid_media_ip_gen_params(test, prev, desc); + + if (next) + return next; + if (is_insidevar(prev, pre_gmdid_media_ips)) + prev = NULL; + + return media_ip_gen_params(test, prev, desc); +} +EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param); + +/** + * xe_pci_id_gen_param - Generate struct pci_device_id parameters + * @test: test context object + * @prev: the pointer to the previous parameter to iterate from or NULL + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE + * + * This function prepares struct pci_device_id parameter. + * + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. + * + * Return: pointer to the next parameter or NULL if no more parameters + */ +const void *xe_pci_id_gen_param(struct kunit *test, const void *prev, char *desc) +{ + const struct pci_device_id *pci = pci_id_gen_params(test, prev, desc); + + return pci->driver_data ? pci : NULL; +} +EXPORT_SYMBOL_IF_KUNIT(xe_pci_id_gen_param); + +static int fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, + u32 *ver, u32 *revid) { struct kunit *test = kunit_get_current_test(); struct xe_pci_fake_data *data = test->priv; if (type == GMDID_MEDIA) { *ver = data->media_verx100; - *revid = xe_step_to_gmdid(data->media_step); + *revid = xe_step_to_gmdid(data->step.media); } else { *ver = data->graphics_verx100; - *revid = xe_step_to_gmdid(data->graphics_step); + *revid = xe_step_to_gmdid(data->step.graphics); } + + return 0; +} + +static void fake_xe_info_probe_tile_count(struct xe_device *xe) +{ + /* Nothing to do, just use the statically defined value. */ } int xe_pci_fake_device_init(struct xe_device *xe) @@ -108,6 +370,8 @@ done: data->sriov_mode : XE_SRIOV_MODE_NONE; kunit_activate_static_stub(test, read_gmdid, fake_read_gmdid); + kunit_activate_static_stub(test, xe_info_probe_tile_count, + fake_xe_info_probe_tile_count); xe_info_init_early(xe, desc, subplatform_desc); xe_info_init(xe, desc); @@ -118,6 +382,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init); /** * xe_pci_live_device_gen_param - Helper to iterate Xe devices as KUnit parameters + * @test: test context object * @prev: the previously returned value, or NULL for the first iteration * @desc: the buffer for a parameter name * @@ -129,7 +394,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init); * Return: pointer to the next &struct xe_device ready to be used as a parameter * or NULL if there are no more Xe devices on the system. */ -const void *xe_pci_live_device_gen_param(const void *prev, char *desc) +const void *xe_pci_live_device_gen_param(struct kunit *test, const void *prev, char *desc) { const struct xe_device *xe = prev; struct device *dev = xe ? xe->drm.dev : NULL; diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c index 744a37583d2d..4d10a7e2b570 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.c +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c @@ -14,9 +14,10 @@ #include "xe_pci_test.h" #include "xe_pci_types.h" -static void check_graphics_ip(const struct xe_graphics_desc *graphics) +static void check_graphics_ip(struct kunit *test) { - struct kunit *test = kunit_get_current_test(); + const struct xe_ip *param = test->param_value; + const struct xe_graphics_desc *graphics = param->desc; u64 mask = graphics->hw_engine_mask; /* RCS, CCS, and BCS engines are allowed on the graphics IP */ @@ -28,9 +29,10 @@ static void check_graphics_ip(const struct xe_graphics_desc *graphics) KUNIT_ASSERT_EQ(test, mask, 0); } -static void check_media_ip(const struct xe_media_desc *media) +static void check_media_ip(struct kunit *test) { - struct kunit *test = kunit_get_current_test(); + const struct xe_ip *param = test->param_value; + const struct xe_media_desc *media = param->desc; u64 mask = media->hw_engine_mask; /* VCS, VECS and GSCCS engines are allowed on the media IP */ @@ -42,19 +44,27 @@ static void check_media_ip(const struct xe_media_desc *media) KUNIT_ASSERT_EQ(test, mask, 0); } -static void xe_gmdid_graphics_ip(struct kunit *test) +static void check_platform_desc(struct kunit *test) { - xe_call_for_each_graphics_ip(check_graphics_ip); -} + const struct pci_device_id *pci = test->param_value; + const struct xe_device_desc *desc = + (const struct xe_device_desc *)pci->driver_data; -static void xe_gmdid_media_ip(struct kunit *test) -{ - xe_call_for_each_media_ip(check_media_ip); + KUNIT_EXPECT_GT(test, desc->dma_mask_size, 0); + + KUNIT_EXPECT_GT(test, (unsigned int)desc->max_gt_per_tile, 0); + KUNIT_EXPECT_LE(test, (unsigned int)desc->max_gt_per_tile, XE_MAX_GT_PER_TILE); + + KUNIT_EXPECT_GT(test, desc->va_bits, 0); + KUNIT_EXPECT_LE(test, desc->va_bits, 64); + + KUNIT_EXPECT_GT(test, desc->vm_max_level, 0); } static struct kunit_case xe_pci_tests[] = { - KUNIT_CASE(xe_gmdid_graphics_ip), - KUNIT_CASE(xe_gmdid_media_ip), + KUNIT_CASE_PARAM(check_graphics_ip, xe_pci_graphics_ip_gen_param), + KUNIT_CASE_PARAM(check_media_ip, xe_pci_media_ip_gen_param), + KUNIT_CASE_PARAM(check_platform_desc, xe_pci_id_gen_param), {} }; diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index ede46800aff1..30505d1cbefc 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -7,33 +7,30 @@ #define _XE_PCI_TEST_H_ #include <linux/types.h> +#include <kunit/test.h> #include "xe_platform_types.h" #include "xe_sriov_types.h" +#include "xe_step_types.h" struct xe_device; -struct xe_graphics_desc; -struct xe_media_desc; - -typedef int (*xe_device_fn)(struct xe_device *); -typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *); -typedef void (*xe_media_fn)(const struct xe_media_desc *); - -void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn); -void xe_call_for_each_media_ip(xe_media_fn xe_fn); struct xe_pci_fake_data { enum xe_sriov_mode sriov_mode; enum xe_platform platform; enum xe_subplatform subplatform; + struct xe_step_info step; u32 graphics_verx100; u32 media_verx100; - u32 graphics_step; - u32 media_step; }; int xe_pci_fake_device_init(struct xe_device *xe); +const void *xe_pci_fake_data_gen_params(struct kunit *test, const void *prev, char *desc); +void xe_pci_fake_data_desc(const struct xe_pci_fake_data *param, char *desc); -const void *xe_pci_live_device_gen_param(const void *prev, char *desc); +const void *xe_pci_graphics_ip_gen_param(struct kunit *test, const void *prev, char *desc); +const void *xe_pci_media_ip_gen_param(struct kunit *test, const void *prev, char *desc); +const void *xe_pci_id_gen_param(struct kunit *test, const void *prev, char *desc); +const void *xe_pci_live_device_gen_param(struct kunit *test, const void *prev, char *desc); #endif diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index b0254b014fe4..d2255a59e58f 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -48,12 +48,14 @@ struct rtp_test_case { const struct xe_rtp_entry *entries; }; -static bool match_yes(const struct xe_gt *gt, const struct xe_hw_engine *hwe) +static bool match_yes(const struct xe_device *xe, const struct xe_gt *gt, + const struct xe_hw_engine *hwe) { return true; } -static bool match_no(const struct xe_gt *gt, const struct xe_hw_engine *hwe) +static bool match_no(const struct xe_device *xe, const struct xe_gt *gt, + const struct xe_hw_engine *hwe) { return false; } diff --git a/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c b/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c new file mode 100644 index 000000000000..ba95e29b597d --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2024-2025 Intel Corporation + */ + +#include <kunit/test.h> + +#include "xe_device.h" +#include "xe_kunit_helpers.h" +#include "xe_pci_test.h" + +static int pf_service_test_init(struct kunit *test) +{ + struct xe_pci_fake_data fake = { + .sriov_mode = XE_SRIOV_MODE_PF, + .platform = XE_TIGERLAKE, /* some random platform */ + .subplatform = XE_SUBPLATFORM_NONE, + }; + struct xe_device *xe; + + test->priv = &fake; + xe_kunit_helper_xe_device_test_init(test); + + xe = test->priv; + KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); + + xe_sriov_pf_service_init(xe); + /* + * sanity check: + * - all supported platforms VF/PF ABI versions must be defined + * - base version can't be newer than latest + */ + KUNIT_ASSERT_NE(test, 0, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_NE(test, 0, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_LE(test, xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.latest.major); + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, xe->sriov.pf.service.version.base.minor, + xe->sriov.pf.service.version.latest.minor); + return 0; +} + +static void pf_negotiate_any(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, VF2PF_HANDSHAKE_MAJOR_ANY, + VF2PF_HANDSHAKE_MINOR_ANY, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_base_match(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.base.minor); +} + +static void pf_negotiate_base_newer(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_GE(test, minor, xe->sriov.pf.service.version.base.minor); + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, xe->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_next(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_GE(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_LE(test, major, xe->sriov.pf.service.version.latest.major); + if (major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, xe->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_older(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (!xe->sriov.pf.service.version.base.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor - 1, + &major, &minor)); +} + +static void pf_negotiate_base_prev(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major - 1, 1, + &major, &minor)); +} + +static void pf_negotiate_latest_match(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_newer(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_next(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_older(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (!xe->sriov.pf.service.version.latest.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor - 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor - 1); +} + +static void pf_negotiate_latest_prev(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + kunit_skip(test, "no prev major"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major - 1, + xe->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major - 1); + KUNIT_ASSERT_GE(test, major, xe->sriov.pf.service.version.base.major); +} + +static struct kunit_case pf_service_test_cases[] = { + KUNIT_CASE(pf_negotiate_any), + KUNIT_CASE(pf_negotiate_base_match), + KUNIT_CASE(pf_negotiate_base_newer), + KUNIT_CASE(pf_negotiate_base_next), + KUNIT_CASE(pf_negotiate_base_older), + KUNIT_CASE(pf_negotiate_base_prev), + KUNIT_CASE(pf_negotiate_latest_match), + KUNIT_CASE(pf_negotiate_latest_newer), + KUNIT_CASE(pf_negotiate_latest_next), + KUNIT_CASE(pf_negotiate_latest_older), + KUNIT_CASE(pf_negotiate_latest_prev), + {} +}; + +static struct kunit_suite pf_service_suite = { + .name = "pf_service", + .test_cases = pf_service_test_cases, + .init = pf_service_test_init, +}; + +kunit_test_suite(pf_service_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index c96d1fe34151..49d191043dfa 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -15,86 +15,10 @@ #include "xe_tuning.h" #include "xe_wa.h" -struct platform_test_case { - const char *name; - enum xe_platform platform; - enum xe_subplatform subplatform; - u32 graphics_verx100; - u32 media_verx100; - struct xe_step_info step; -}; - -#define PLATFORM_CASE(platform__, graphics_step__) \ - { \ - .name = #platform__ " (" #graphics_step__ ")", \ - .platform = XE_ ## platform__, \ - .subplatform = XE_SUBPLATFORM_NONE, \ - .step = { .graphics = STEP_ ## graphics_step__ } \ - } - - -#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__) \ - { \ - .name = #platform__ "_" #subplatform__ " (" #graphics_step__ ")", \ - .platform = XE_ ## platform__, \ - .subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__, \ - .step = { .graphics = STEP_ ## graphics_step__ } \ - } - -#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__, \ - media_verx100__, media_step__) \ - { \ - .name = #platform__ " (g:" #graphics_step__ ", m:" #media_step__ ")",\ - .platform = XE_ ## platform__, \ - .subplatform = XE_SUBPLATFORM_NONE, \ - .graphics_verx100 = graphics_verx100__, \ - .media_verx100 = media_verx100__, \ - .step = { .graphics = STEP_ ## graphics_step__, \ - .media = STEP_ ## media_step__ } \ - } - -static const struct platform_test_case cases[] = { - PLATFORM_CASE(TIGERLAKE, B0), - PLATFORM_CASE(DG1, A0), - PLATFORM_CASE(DG1, B0), - PLATFORM_CASE(ALDERLAKE_S, A0), - PLATFORM_CASE(ALDERLAKE_S, B0), - PLATFORM_CASE(ALDERLAKE_S, C0), - PLATFORM_CASE(ALDERLAKE_S, D0), - PLATFORM_CASE(ALDERLAKE_P, A0), - PLATFORM_CASE(ALDERLAKE_P, B0), - PLATFORM_CASE(ALDERLAKE_P, C0), - SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0), - SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0), - SUBPLATFORM_CASE(DG2, G10, C0), - SUBPLATFORM_CASE(DG2, G11, B1), - SUBPLATFORM_CASE(DG2, G12, A1), - GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0), - GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0), - GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0), - GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), - GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0), - GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1), -}; - -static void platform_desc(const struct platform_test_case *t, char *desc) -{ - strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE); -} - -KUNIT_ARRAY_PARAM(platform, cases, platform_desc); - static int xe_wa_test_init(struct kunit *test) { - const struct platform_test_case *param = test->param_value; - struct xe_pci_fake_data data = { - .platform = param->platform, - .subplatform = param->subplatform, - .graphics_verx100 = param->graphics_verx100, - .media_verx100 = param->media_verx100, - .graphics_step = param->step.graphics, - .media_step = param->step.media, - }; + const struct xe_pci_fake_data *param = test->param_value; + struct xe_pci_fake_data data = *param; struct xe_device *xe; struct device *dev; int ret; @@ -119,13 +43,6 @@ static int xe_wa_test_init(struct kunit *test) return 0; } -static void xe_wa_test_exit(struct kunit *test) -{ - struct xe_device *xe = test->priv; - - drm_kunit_helper_free_device(test, xe->drm.dev); -} - static void xe_wa_gt(struct kunit *test) { struct xe_device *xe = test->priv; @@ -143,14 +60,13 @@ static void xe_wa_gt(struct kunit *test) } static struct kunit_case xe_wa_tests[] = { - KUNIT_CASE_PARAM(xe_wa_gt, platform_gen_params), + KUNIT_CASE_PARAM(xe_wa_gt, xe_pci_fake_data_gen_params), {} }; static struct kunit_suite xe_rtp_test_suite = { .name = "xe_wa", .init = xe_wa_test_init, - .exit = xe_wa_test_exit, .test_cases = xe_wa_tests, }; diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h index 68fe70ce2be3..a818eaa05b7d 100644 --- a/drivers/gpu/drm/xe/xe_assert.h +++ b/drivers/gpu/drm/xe/xe_assert.h @@ -12,6 +12,7 @@ #include "xe_gt_types.h" #include "xe_step.h" +#include "xe_vram.h" /** * DOC: Xe Asserts @@ -145,7 +146,8 @@ const struct xe_tile *__tile = (tile); \ char __buf[10] __maybe_unused; \ xe_assert_msg(tile_to_xe(__tile), condition, "tile: %u VRAM %s\n" msg, \ - __tile->id, ({ string_get_size(__tile->mem.vram.actual_physical_size, 1, \ + __tile->id, ({ string_get_size( \ + xe_vram_region_actual_physical_size(__tile->mem.vram), 1, \ STRING_UNITS_2, __buf, sizeof(__buf)); __buf; }), ## arg); \ }) diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 9570672fce33..6d20229c11de 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -19,7 +19,7 @@ static int bb_prefetch(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt)) + if (GRAPHICS_VERx100(xe) >= 1250 && xe_gt_is_main_type(gt)) /* * RCS and CCS require 1K, although other engines would be * okay with 512. @@ -60,6 +60,41 @@ err: return ERR_PTR(err); } +struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords, + enum xe_sriov_vf_ccs_rw_ctxs ctx_id) +{ + struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); + struct xe_device *xe = gt_to_xe(gt); + struct xe_sa_manager *bb_pool; + int err; + + if (!bb) + return ERR_PTR(-ENOMEM); + /* + * We need to allocate space for the requested number of dwords & + * one additional MI_BATCH_BUFFER_END dword. Since the whole SA + * is submitted to HW, we need to make sure that the last instruction + * is not over written when the last chunk of SA is allocated for BB. + * So, this extra DW acts as a guard here. + */ + + bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool; + bb->bo = xe_sa_bo_new(bb_pool, 4 * (dwords + 1)); + + if (IS_ERR(bb->bo)) { + err = PTR_ERR(bb->bo); + goto err; + } + + bb->cs = xe_sa_bo_cpu_addr(bb->bo); + bb->len = 0; + + return bb; +err: + kfree(bb); + return ERR_PTR(err); +} + static struct xe_sched_job * __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr) { diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h index fafacd73dcc3..2a8adc9a6dee 100644 --- a/drivers/gpu/drm/xe/xe_bb.h +++ b/drivers/gpu/drm/xe/xe_bb.h @@ -13,8 +13,11 @@ struct dma_fence; struct xe_gt; struct xe_exec_queue; struct xe_sched_job; +enum xe_sriov_vf_ccs_rw_ctxs; -struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm); +struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm); +struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords, + enum xe_sriov_vf_ccs_rw_ctxs ctx_id); struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb); struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index d99d91fe8aa9..bf4ee976b680 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -9,6 +9,7 @@ #include <linux/nospec.h> #include <drm/drm_drv.h> +#include <drm/drm_dumb_buffers.h> #include <drm/drm_gem_ttm_helper.h> #include <drm/drm_managed.h> #include <drm/ttm/ttm_backup.h> @@ -19,6 +20,8 @@ #include <kunit/static_stub.h> +#include <trace/events/gpu_mem.h> + #include "xe_device.h" #include "xe_dma_buf.h" #include "xe_drm_client.h" @@ -31,9 +34,12 @@ #include "xe_pxp.h" #include "xe_res_cursor.h" #include "xe_shrinker.h" +#include "xe_sriov_vf_ccs.h" +#include "xe_tile.h" #include "xe_trace_bo.h" #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" +#include "xe_vram_types.h" const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES] = { [XE_PL_SYSTEM] = "system", @@ -77,6 +83,10 @@ static struct ttm_placement tt_placement = { .placement = tt_placement_flags, }; +#define for_each_set_bo_vram_flag(bit__, bo_flags__) \ + for (unsigned int __bit_tmp = BIT(0); __bit_tmp <= XE_BO_FLAG_VRAM_MASK; __bit_tmp <<= 1) \ + for_each_if(((bit__) = __bit_tmp) & (bo_flags__) & XE_BO_FLAG_VRAM_MASK) + bool mem_type_is_vram(u32 mem_type) { return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN; @@ -184,6 +194,8 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo, bo->placements[*c] = (struct ttm_place) { .mem_type = XE_PL_TT, + .flags = (bo_flags & XE_BO_FLAG_VRAM_MASK) ? + TTM_PL_FLAG_FALLBACK : 0, }; *c += 1; } @@ -196,6 +208,8 @@ static bool force_contiguous(u32 bo_flags) else if (bo_flags & XE_BO_FLAG_PINNED && !(bo_flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) return true; /* needs vmap */ + else if (bo_flags & XE_BO_FLAG_CPU_ADDR_MIRROR) + return true; /* * For eviction / restore on suspend / resume objects pinned in VRAM @@ -205,6 +219,27 @@ static bool force_contiguous(u32 bo_flags) bo_flags & XE_BO_FLAG_PINNED; } +static u8 vram_bo_flag_to_tile_id(struct xe_device *xe, u32 vram_bo_flag) +{ + xe_assert(xe, vram_bo_flag & XE_BO_FLAG_VRAM_MASK); + xe_assert(xe, (vram_bo_flag & (vram_bo_flag - 1)) == 0); + + return __ffs(vram_bo_flag >> (__ffs(XE_BO_FLAG_VRAM0) - 1)) - 1; +} + +static u32 bo_vram_flags_to_vram_placement(struct xe_device *xe, u32 bo_flags, u32 vram_flag, + enum ttm_bo_type type) +{ + u8 tile_id = vram_bo_flag_to_tile_id(xe, vram_flag); + + xe_assert(xe, tile_id < xe->info.tile_count); + + if (type == ttm_bo_type_kernel && !(bo_flags & XE_BO_FLAG_FORCE_USER_VRAM)) + return xe->tiles[tile_id].mem.kernel_vram->placement; + else + return xe->tiles[tile_id].mem.vram->placement; +} + static void add_vram(struct xe_device *xe, struct xe_bo *bo, struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c) { @@ -237,12 +272,15 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, } static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, - u32 bo_flags, u32 *c) + u32 bo_flags, enum ttm_bo_type type, u32 *c) { - if (bo_flags & XE_BO_FLAG_VRAM0) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); - if (bo_flags & XE_BO_FLAG_VRAM1) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); + u32 vram_flag; + + for_each_set_bo_vram_flag(vram_flag, bo_flags) { + u32 pl = bo_vram_flags_to_vram_placement(xe, bo_flags, vram_flag, type); + + add_vram(xe, bo, bo->placements, bo_flags, pl, c); + } } static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, @@ -261,11 +299,11 @@ static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, } static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, - u32 bo_flags) + u32 bo_flags, enum ttm_bo_type type) { u32 c = 0; - try_add_vram(xe, bo, bo_flags, &c); + try_add_vram(xe, bo, bo_flags, type, &c); try_add_system(xe, bo, bo_flags, &c); try_add_stolen(xe, bo, bo_flags, &c); @@ -281,10 +319,10 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, } int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, - u32 bo_flags) + u32 bo_flags, enum ttm_bo_type type) { xe_bo_assert_held(bo); - return __xe_bo_placement_for_flags(xe, bo, bo_flags); + return __xe_bo_placement_for_flags(xe, bo, bo_flags, type); } static void xe_evict_flags(struct ttm_buffer_object *tbo, @@ -336,15 +374,13 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo, /* struct xe_ttm_tt - Subclassed ttm_tt for xe */ struct xe_ttm_tt { struct ttm_tt ttm; - /** @xe - The xe device */ - struct xe_device *xe; struct sg_table sgt; struct sg_table *sg; /** @purgeable: Whether the content of the pages of @ttm is purgeable. */ bool purgeable; }; -static int xe_tt_map_sg(struct ttm_tt *tt) +static int xe_tt_map_sg(struct xe_device *xe, struct ttm_tt *tt) { struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); unsigned long num_pages = tt->num_pages; @@ -359,13 +395,13 @@ static int xe_tt_map_sg(struct ttm_tt *tt) ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages, num_pages, 0, (u64)num_pages << PAGE_SHIFT, - xe_sg_segment_size(xe_tt->xe->drm.dev), + xe_sg_segment_size(xe->drm.dev), GFP_KERNEL); if (ret) return ret; xe_tt->sg = &xe_tt->sgt; - ret = dma_map_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL, + ret = dma_map_sgtable(xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); if (ret) { sg_free_table(xe_tt->sg); @@ -376,12 +412,12 @@ static int xe_tt_map_sg(struct ttm_tt *tt) return 0; } -static void xe_tt_unmap_sg(struct ttm_tt *tt) +static void xe_tt_unmap_sg(struct xe_device *xe, struct ttm_tt *tt) { struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); if (xe_tt->sg) { - dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, + dma_unmap_sgtable(xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL, 0); sg_free_table(xe_tt->sg); xe_tt->sg = NULL; @@ -400,24 +436,37 @@ struct sg_table *xe_bo_sg(struct xe_bo *bo) * Account ttm pages against the device shrinker's shrinkable and * purgeable counts. */ -static void xe_ttm_tt_account_add(struct ttm_tt *tt) +static void xe_ttm_tt_account_add(struct xe_device *xe, struct ttm_tt *tt) { struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); if (xe_tt->purgeable) - xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, tt->num_pages); + xe_shrinker_mod_pages(xe->mem.shrinker, 0, tt->num_pages); else - xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, tt->num_pages, 0); + xe_shrinker_mod_pages(xe->mem.shrinker, tt->num_pages, 0); } -static void xe_ttm_tt_account_subtract(struct ttm_tt *tt) +static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt) { struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); if (xe_tt->purgeable) - xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, -(long)tt->num_pages); + xe_shrinker_mod_pages(xe->mem.shrinker, 0, -(long)tt->num_pages); else - xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, -(long)tt->num_pages, 0); + xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0); +} + +static void update_global_total_pages(struct ttm_device *ttm_dev, + long num_pages) +{ +#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) + struct xe_device *xe = ttm_to_xe_device(ttm_dev); + u64 global_total_pages = + atomic64_add_return(num_pages, &xe->global_total_pages); + + trace_gpu_mem_total(xe->drm.primary->index, 0, + global_total_pages << PAGE_SHIFT); +#endif } static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, @@ -436,11 +485,10 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, return NULL; tt = &xe_tt->ttm; - xe_tt->xe = xe; extra_pages = 0; if (xe_bo_needs_ccs_pages(bo)) - extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size), + extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, xe_bo_size(bo)), PAGE_SIZE); /* @@ -527,21 +575,25 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt, return err; xe_tt->purgeable = false; - xe_ttm_tt_account_add(tt); + xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt); + update_global_total_pages(ttm_dev, tt->num_pages); return 0; } static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt) { + struct xe_device *xe = ttm_to_xe_device(ttm_dev); + if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) && !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE)) return; - xe_tt_unmap_sg(tt); + xe_tt_unmap_sg(xe, tt); ttm_pool_free(&ttm_dev->pool, tt); - xe_ttm_tt_account_subtract(tt); + xe_ttm_tt_account_subtract(xe, tt); + update_global_total_pages(ttm_dev, -(long)tt->num_pages); } static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt) @@ -558,6 +610,23 @@ static bool xe_ttm_resource_visible(struct ttm_resource *mem) return vres->used_visible_size == mem->size; } +/** + * xe_bo_is_visible_vram - check if BO is placed entirely in visible VRAM. + * @bo: The BO + * + * This function checks whether a given BO resides entirely in memory visible from the CPU + * + * Returns: true if the BO is entirely visible, false otherwise. + * + */ +bool xe_bo_is_visible_vram(struct xe_bo *bo) +{ + if (drm_WARN_ON(bo->ttm.base.dev, !xe_bo_is_vram(bo))) + return false; + + return xe_ttm_resource_visible(bo->ttm.resource); +} + static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { @@ -789,21 +858,21 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, /* Bo creation path, moving to system or TT. */ if ((!old_mem && ttm) && !handle_system_ccs) { if (new_mem->mem_type == XE_PL_TT) - ret = xe_tt_map_sg(ttm); + ret = xe_tt_map_sg(xe, ttm); if (!ret) ttm_bo_move_null(ttm_bo, new_mem); goto out; } if (ttm_bo->type == ttm_bo_type_sg) { - ret = xe_bo_move_notify(bo, ctx); + if (new_mem->mem_type == XE_PL_SYSTEM) + ret = xe_bo_move_notify(bo, ctx); if (!ret) ret = xe_bo_move_dmabuf(ttm_bo, new_mem); return ret; } - tt_has_data = ttm && (ttm_tt_is_populated(ttm) || - (ttm->page_flags & TTM_TT_FLAG_SWAPPED)); + tt_has_data = ttm && (ttm_tt_is_populated(ttm) || ttm_tt_is_swapped(ttm)); move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) : (!mem_type_is_vram(old_mem_type) && !tt_has_data)); @@ -812,7 +881,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, (!ttm && ttm_bo->type == ttm_bo_type_device); if (new_mem->mem_type == XE_PL_TT) { - ret = xe_tt_map_sg(ttm); + ret = xe_tt_map_sg(xe, ttm); if (ret) goto out; } @@ -841,21 +910,6 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, goto out; } - /* Reject BO eviction if BO is bound to current VM. */ - if (evict && ctx->resv) { - struct drm_gpuvm_bo *vm_bo; - - drm_gem_for_each_gpuvm_bo(vm_bo, &bo->ttm.base) { - struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); - - if (xe_vm_resv(vm) == ctx->resv && - xe_vm_in_preempt_fence_mode(vm)) { - ret = -EBUSY; - goto out; - } - } - } - /* * Failed multi-hop where the old_mem is still marked as * TTM_PL_FLAG_TEMPORARY, should just be a dummy move. @@ -963,6 +1017,20 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, dma_fence_put(fence); xe_pm_runtime_put(xe); + /* + * CCS meta data is migrated from TT -> SMEM. So, let us detach the + * BBs from BO as it is no longer needed. + */ + if (IS_VF_CCS_READY(xe) && old_mem_type == XE_PL_TT && + new_mem->mem_type == XE_PL_SYSTEM) + xe_sriov_vf_ccs_detach_bo(bo); + + if (IS_VF_CCS_READY(xe) && + ((move_lacks_source && new_mem->mem_type == XE_PL_TT) || + (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT)) && + handle_system_ccs) + ret = xe_sriov_vf_ccs_attach_bo(bo); + out: if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) && ttm_bo->ttm) { @@ -973,7 +1041,10 @@ out: if (timeout < 0) ret = timeout; - xe_tt_unmap_sg(ttm_bo->ttm); + if (IS_VF_CCS_READY(xe)) + xe_sriov_vf_ccs_detach_bo(bo); + + xe_tt_unmap_sg(xe, ttm_bo->ttm); } return ret; @@ -983,6 +1054,7 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, unsigned long *scanned) { + struct xe_device *xe = ttm_to_xe_device(bo->bdev); long lret; /* Fake move to system, without copying data. */ @@ -997,7 +1069,7 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx, if (lret) return lret; - xe_tt_unmap_sg(bo->ttm); + xe_tt_unmap_sg(xe, bo->ttm); ttm_bo_move_null(bo, new_resource); } @@ -1008,11 +1080,30 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx, .allow_move = false}); if (lret > 0) - xe_ttm_tt_account_subtract(bo->ttm); + xe_ttm_tt_account_subtract(xe, bo->ttm); return lret; } +static bool +xe_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) +{ + struct drm_gpuvm_bo *vm_bo; + + if (!ttm_bo_eviction_valuable(bo, place)) + return false; + + if (!xe_bo_is_xe_bo(bo)) + return true; + + drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) { + if (xe_vm_is_validating(gpuvm_to_vm(vm_bo->vm))) + return false; + } + + return true; +} + /** * xe_bo_shrink() - Try to shrink an xe bo. * @ctx: The struct ttm_operation_ctx used for shrinking. @@ -1039,7 +1130,7 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); struct ttm_place place = {.mem_type = bo->resource->mem_type}; struct xe_bo *xe_bo = ttm_to_xe_bo(bo); - struct xe_device *xe = xe_tt->xe; + struct xe_device *xe = ttm_to_xe_device(bo->bdev); bool needs_rpm; long lret = 0L; @@ -1047,7 +1138,7 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, (flags.purge && !xe_tt->purgeable)) return -EBUSY; - if (!ttm_bo_eviction_valuable(bo, &place)) + if (!xe_bo_eviction_valuable(bo, &place)) return -EBUSY; if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo)) @@ -1076,7 +1167,7 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, xe_pm_runtime_put(xe); if (lret > 0) - xe_ttm_tt_account_subtract(tt); + xe_ttm_tt_account_subtract(xe, tt); out_unref: xe_bo_put(xe_bo); @@ -1097,42 +1188,47 @@ out_unref: int xe_bo_notifier_prepare_pinned(struct xe_bo *bo) { struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct xe_validation_ctx ctx; + struct drm_exec exec; struct xe_bo *backup; int ret = 0; - xe_bo_lock(bo, false); + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) { + ret = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + xe_assert(xe, !ret); + xe_assert(xe, !bo->backup_obj); - xe_assert(xe, !bo->backup_obj); + /* + * Since this is called from the PM notifier we might have raced with + * someone unpinning this after we dropped the pinned list lock and + * grabbing the above bo lock. + */ + if (!xe_bo_is_pinned(bo)) + break; - /* - * Since this is called from the PM notifier we might have raced with - * someone unpinning this after we dropped the pinned list lock and - * grabbing the above bo lock. - */ - if (!xe_bo_is_pinned(bo)) - goto out_unlock_bo; + if (!xe_bo_is_vram(bo)) + break; - if (!xe_bo_is_vram(bo)) - goto out_unlock_bo; + if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) + break; - if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) - goto out_unlock_bo; + backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo), + DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED, &exec); + if (IS_ERR(backup)) { + drm_exec_retry_on_contention(&exec); + ret = PTR_ERR(backup); + xe_validation_retry_on_oom(&ctx, &ret); + break; + } - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, - DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); - if (IS_ERR(backup)) { - ret = PTR_ERR(backup); - goto out_unlock_bo; + backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ + ttm_bo_pin(&backup->ttm); + bo->backup_obj = backup; } - backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ - ttm_bo_pin(&backup->ttm); - bo->backup_obj = backup; - -out_unlock_bo: - xe_bo_unlock(bo); return ret; } @@ -1158,56 +1254,12 @@ int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo) return 0; } -/** - * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory - * @bo: The buffer object to move. - * - * On successful completion, the object memory will be moved to system memory. - * - * This is needed to for special handling of pinned VRAM object during - * suspend-resume. - * - * Return: 0 on success. Negative error code on failure. - */ -int xe_bo_evict_pinned(struct xe_bo *bo) +static int xe_bo_evict_pinned_copy(struct xe_bo *bo, struct xe_bo *backup) { - struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); - struct xe_bo *backup = bo->backup_obj; - bool backup_created = false; + struct xe_device *xe = xe_bo_device(bo); bool unmap = false; int ret = 0; - xe_bo_lock(bo, false); - - if (WARN_ON(!bo->ttm.resource)) { - ret = -EINVAL; - goto out_unlock_bo; - } - - if (WARN_ON(!xe_bo_is_pinned(bo))) { - ret = -EINVAL; - goto out_unlock_bo; - } - - if (!xe_bo_is_vram(bo)) - goto out_unlock_bo; - - if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) - goto out_unlock_bo; - - if (!backup) { - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, - DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); - if (IS_ERR(backup)) { - ret = PTR_ERR(backup); - goto out_unlock_bo; - } - backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ - backup_created = true; - } - if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) { struct xe_migrate *migrate; struct dma_fence *fence; @@ -1217,14 +1269,11 @@ int xe_bo_evict_pinned(struct xe_bo *bo) else migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type); + xe_assert(xe, bo->ttm.base.resv == backup->ttm.base.resv); ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); if (ret) goto out_backup; - ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1); - if (ret) - goto out_backup; - fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource, backup->ttm.resource, false); if (IS_ERR(fence)) { @@ -1234,8 +1283,6 @@ int xe_bo_evict_pinned(struct xe_bo *bo) dma_resv_add_fence(bo->ttm.base.resv, fence, DMA_RESV_USAGE_KERNEL); - dma_resv_add_fence(backup->ttm.base.resv, fence, - DMA_RESV_USAGE_KERNEL); dma_fence_put(fence); } else { ret = xe_bo_vmap(backup); @@ -1245,25 +1292,88 @@ int xe_bo_evict_pinned(struct xe_bo *bo) if (iosys_map_is_null(&bo->vmap)) { ret = xe_bo_vmap(bo); if (ret) - goto out_backup; + goto out_vunmap; unmap = true; } xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0, - bo->size); + xe_bo_size(bo)); } if (!bo->backup_obj) bo->backup_obj = backup; - -out_backup: +out_vunmap: xe_bo_vunmap(backup); - if (ret && backup_created) - xe_bo_put(backup); -out_unlock_bo: +out_backup: if (unmap) xe_bo_vunmap(bo); - xe_bo_unlock(bo); + + return ret; +} + +/** + * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory + * @bo: The buffer object to move. + * + * On successful completion, the object memory will be moved to system memory. + * + * This is needed to for special handling of pinned VRAM object during + * suspend-resume. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_bo_evict_pinned(struct xe_bo *bo) +{ + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct xe_validation_ctx ctx; + struct drm_exec exec; + struct xe_bo *backup = bo->backup_obj; + bool backup_created = false; + int ret = 0; + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) { + ret = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + xe_assert(xe, !ret); + + if (WARN_ON(!bo->ttm.resource)) { + ret = -EINVAL; + break; + } + + if (WARN_ON(!xe_bo_is_pinned(bo))) { + ret = -EINVAL; + break; + } + + if (!xe_bo_is_vram(bo)) + break; + + if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) + break; + + if (!backup) { + backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, + xe_bo_size(bo), + DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED, &exec); + if (IS_ERR(backup)) { + drm_exec_retry_on_contention(&exec); + ret = PTR_ERR(backup); + xe_validation_retry_on_oom(&ctx, &ret); + break; + } + backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ + backup_created = true; + } + + ret = xe_bo_evict_pinned_copy(bo, backup); + } + + if (ret && backup_created) + xe_bo_put(backup); + return ret; } @@ -1313,10 +1423,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo) if (ret) goto out_unlock_bo; - ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1); - if (ret) - goto out_unlock_bo; - fence = xe_migrate_copy(migrate, backup, bo, backup->ttm.resource, bo->ttm.resource, false); @@ -1327,8 +1433,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo) dma_resv_add_fence(bo->ttm.base.resv, fence, DMA_RESV_USAGE_KERNEL); - dma_resv_add_fence(backup->ttm.base.resv, fence, - DMA_RESV_USAGE_KERNEL); dma_fence_put(fence); } else { ret = xe_bo_vmap(backup); @@ -1343,7 +1447,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo) } xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr, - bo->size); + xe_bo_size(bo)); } bo->backup_obj = NULL; @@ -1377,7 +1481,8 @@ int xe_bo_dma_unmap_pinned(struct xe_bo *bo) ttm_bo->sg = NULL; xe_tt->sg = NULL; } else if (xe_tt->sg) { - dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, + dma_unmap_sgtable(ttm_to_xe_device(ttm_bo->bdev)->drm.dev, + xe_tt->sg, DMA_BIDIRECTIONAL, 0); sg_free_table(xe_tt->sg); xe_tt->sg = NULL; @@ -1422,7 +1527,7 @@ static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo) * always succeed here, as long as we hold the lru lock. */ spin_lock(&ttm_bo->bdev->lru_lock); - locked = dma_resv_trylock(ttm_bo->base.resv); + locked = dma_resv_trylock(&ttm_bo->base._resv); spin_unlock(&ttm_bo->bdev->lru_lock); xe_assert(xe, locked); @@ -1442,13 +1547,6 @@ static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo) bo = ttm_to_xe_bo(ttm_bo); xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount))); - /* - * Corner case where TTM fails to allocate memory and this BOs resv - * still points the VMs resv - */ - if (ttm_bo->base.resv != &ttm_bo->base._resv) - return; - if (!xe_ttm_bo_lock_in_destructor(ttm_bo)) return; @@ -1458,14 +1556,14 @@ static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo) * TODO: Don't do this for external bos once we scrub them after * unbind. */ - dma_resv_for_each_fence(&cursor, ttm_bo->base.resv, + dma_resv_for_each_fence(&cursor, &ttm_bo->base._resv, DMA_RESV_USAGE_BOOKKEEP, fence) { if (xe_fence_is_xe_preempt(fence) && !dma_fence_is_signaled(fence)) { if (!replacement) replacement = dma_fence_get_stub(); - dma_resv_replace_fences(ttm_bo->base.resv, + dma_resv_replace_fences(&ttm_bo->base._resv, fence->context, replacement, DMA_RESV_USAGE_BOOKKEEP); @@ -1473,14 +1571,19 @@ static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo) } dma_fence_put(replacement); - dma_resv_unlock(ttm_bo->base.resv); + dma_resv_unlock(&ttm_bo->base._resv); } static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo) { + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + if (!xe_bo_is_xe_bo(ttm_bo)) return; + if (IS_VF_CCS_READY(ttm_to_xe_device(ttm_bo->bdev))) + xe_sriov_vf_ccs_detach_bo(bo); + /* * Object is idle and about to be destroyed. Release the * dma-buf attachment. @@ -1542,7 +1645,7 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, if (!mem_type_is_vram(ttm_bo->resource->mem_type)) return -EIO; - if (!xe_ttm_resource_visible(ttm_bo->resource) || len >= SZ_16K) { + if (!xe_bo_is_visible_vram(bo) || len >= SZ_16K) { struct xe_migrate *migrate = mem_type_to_migrate(xe, ttm_bo->resource->mem_type); @@ -1553,7 +1656,7 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, vram = res_to_mem_region(ttm_bo->resource); xe_res_first(ttm_bo->resource, offset & PAGE_MASK, - bo->size - (offset & PAGE_MASK), &cursor); + xe_bo_size(bo) - (offset & PAGE_MASK), &cursor); do { unsigned long page_offset = (offset & ~PAGE_MASK); @@ -1588,7 +1691,7 @@ const struct ttm_device_funcs xe_ttm_funcs = { .io_mem_pfn = xe_ttm_io_mem_pfn, .access_memory = xe_ttm_access_memory, .release_notify = xe_ttm_bo_release_notify, - .eviction_valuable = ttm_bo_eviction_valuable, + .eviction_valuable = xe_bo_eviction_valuable, .delete_mem_notify = xe_ttm_bo_delete_mem_notify, .swap_notify = xe_ttm_bo_swap_notify, }; @@ -1645,7 +1748,7 @@ static void xe_gem_object_free(struct drm_gem_object *obj) * refcount directly if needed. */ __xe_bo_vunmap(gem_to_xe_bo(obj)); - ttm_bo_put(container_of(obj, struct ttm_buffer_object, base)); + ttm_bo_fini(container_of(obj, struct ttm_buffer_object, base)); } static void xe_gem_object_close(struct drm_gem_object *obj, @@ -1662,50 +1765,258 @@ static void xe_gem_object_close(struct drm_gem_object *obj, } } -static vm_fault_t xe_gem_fault(struct vm_fault *vmf) +static bool should_migrate_to_smem(struct xe_bo *bo) +{ + /* + * NOTE: The following atomic checks are platform-specific. For example, + * if a device supports CXL atomics, these may not be necessary or + * may behave differently. + */ + + return bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL || + bo->attr.atomic_access == DRM_XE_ATOMIC_CPU; +} + +static int xe_bo_wait_usage_kernel(struct xe_bo *bo, struct ttm_operation_ctx *ctx) +{ + long lerr; + + if (ctx->no_wait_gpu) + return dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL) ? + 0 : -EBUSY; + + lerr = dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + ctx->interruptible, MAX_SCHEDULE_TIMEOUT); + if (lerr < 0) + return lerr; + if (lerr == 0) + return -EBUSY; + + return 0; +} + +/* Populate the bo if swapped out, or migrate if the access mode requires that. */ +static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx, + struct drm_exec *exec) +{ + struct ttm_buffer_object *tbo = &bo->ttm; + int err = 0; + + if (ttm_manager_type(tbo->bdev, tbo->resource->mem_type)->use_tt) { + err = xe_bo_wait_usage_kernel(bo, ctx); + if (!err) + err = ttm_bo_populate(&bo->ttm, ctx); + } else if (should_migrate_to_smem(bo)) { + xe_assert(xe_bo_device(bo), bo->flags & XE_BO_FLAG_SYSTEM); + err = xe_bo_migrate(bo, XE_PL_TT, ctx, exec); + } + + return err; +} + +/* Call into TTM to populate PTEs, and register bo for PTE removal on runtime suspend. */ +static vm_fault_t __xe_bo_cpu_fault(struct vm_fault *vmf, struct xe_device *xe, struct xe_bo *bo) +{ + vm_fault_t ret; + + trace_xe_bo_cpu_fault(bo); + + ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT); + /* + * When TTM is actually called to insert PTEs, ensure no blocking conditions + * remain, in which case TTM may drop locks and return VM_FAULT_RETRY. + */ + xe_assert(xe, ret != VM_FAULT_RETRY); + + if (ret == VM_FAULT_NOPAGE && + mem_type_is_vram(bo->ttm.resource->mem_type)) { + mutex_lock(&xe->mem_access.vram_userfault.lock); + if (list_empty(&bo->vram_userfault_link)) + list_add(&bo->vram_userfault_link, + &xe->mem_access.vram_userfault.list); + mutex_unlock(&xe->mem_access.vram_userfault.lock); + } + + return ret; +} + +static vm_fault_t xe_err_to_fault_t(int err) +{ + switch (err) { + case 0: + case -EINTR: + case -ERESTARTSYS: + case -EAGAIN: + return VM_FAULT_NOPAGE; + case -ENOMEM: + case -ENOSPC: + return VM_FAULT_OOM; + default: + break; + } + return VM_FAULT_SIGBUS; +} + +static bool xe_ttm_bo_is_imported(struct ttm_buffer_object *tbo) +{ + dma_resv_assert_held(tbo->base.resv); + + return tbo->ttm && + (tbo->ttm->page_flags & (TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE)) == + TTM_TT_FLAG_EXTERNAL; +} + +static vm_fault_t xe_bo_cpu_fault_fastpath(struct vm_fault *vmf, struct xe_device *xe, + struct xe_bo *bo, bool needs_rpm) +{ + struct ttm_buffer_object *tbo = &bo->ttm; + vm_fault_t ret = VM_FAULT_RETRY; + struct xe_validation_ctx ctx; + struct ttm_operation_ctx tctx = { + .interruptible = true, + .no_wait_gpu = true, + .gfp_retry_mayfail = true, + + }; + int err; + + if (needs_rpm && !xe_pm_runtime_get_if_active(xe)) + return VM_FAULT_RETRY; + + err = xe_validation_ctx_init(&ctx, &xe->val, NULL, + (struct xe_val_flags) { + .interruptible = true, + .no_block = true + }); + if (err) + goto out_pm; + + if (!dma_resv_trylock(tbo->base.resv)) + goto out_validation; + + if (xe_ttm_bo_is_imported(tbo)) { + ret = VM_FAULT_SIGBUS; + drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n"); + goto out_unlock; + } + + err = xe_bo_fault_migrate(bo, &tctx, NULL); + if (err) { + /* Return VM_FAULT_RETRY on these errors. */ + if (err != -ENOMEM && err != -ENOSPC && err != -EBUSY) + ret = xe_err_to_fault_t(err); + goto out_unlock; + } + + if (dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL)) + ret = __xe_bo_cpu_fault(vmf, xe, bo); + +out_unlock: + dma_resv_unlock(tbo->base.resv); +out_validation: + xe_validation_ctx_fini(&ctx); +out_pm: + if (needs_rpm) + xe_pm_runtime_put(xe); + + return ret; +} + +static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf) { struct ttm_buffer_object *tbo = vmf->vma->vm_private_data; struct drm_device *ddev = tbo->base.dev; struct xe_device *xe = to_xe_device(ddev); struct xe_bo *bo = ttm_to_xe_bo(tbo); bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK; + bool retry_after_wait = false; + struct xe_validation_ctx ctx; + struct drm_exec exec; vm_fault_t ret; + int err = 0; int idx; - if (needs_rpm) - xe_pm_runtime_get(xe); + if (!drm_dev_enter(&xe->drm, &idx)) + return ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); - ret = ttm_bo_vm_reserve(tbo, vmf); - if (ret) + ret = xe_bo_cpu_fault_fastpath(vmf, xe, bo, needs_rpm); + if (ret != VM_FAULT_RETRY) goto out; - if (drm_dev_enter(ddev, &idx)) { - trace_xe_bo_cpu_fault(bo); - - ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, - TTM_BO_VM_NUM_PREFAULT); - drm_dev_exit(idx); + if (fault_flag_allow_retry_first(vmf->flags)) { + if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) + goto out; + retry_after_wait = true; + xe_bo_get(bo); + mmap_read_unlock(vmf->vma->vm_mm); } else { - ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); + ret = VM_FAULT_NOPAGE; } - if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) - goto out; /* - * ttm_bo_vm_reserve() already has dma_resv_lock. + * The fastpath failed and we were not required to return and retry immediately. + * We're now running in one of two modes: + * + * 1) retry_after_wait == true: The mmap_read_lock() is dropped, and we're trying + * to resolve blocking waits. But we can't resolve the fault since the + * mmap_read_lock() is dropped. After retrying the fault, the aim is that the fastpath + * should succeed. But it may fail since we drop the bo lock. + * + * 2) retry_after_wait == false: The fastpath failed, typically even after + * a retry. Do whatever's necessary to resolve the fault. + * + * This construct is recommended to avoid excessive waits under the mmap_lock. */ - if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) { - mutex_lock(&xe->mem_access.vram_userfault.lock); - if (list_empty(&bo->vram_userfault_link)) - list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list); - mutex_unlock(&xe->mem_access.vram_userfault.lock); + + if (needs_rpm) + xe_pm_runtime_get(xe); + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, + err) { + struct ttm_operation_ctx tctx = { + .interruptible = true, + .no_wait_gpu = false, + .gfp_retry_mayfail = retry_after_wait, + }; + + err = drm_exec_lock_obj(&exec, &tbo->base); + drm_exec_retry_on_contention(&exec); + if (err) + break; + + if (xe_ttm_bo_is_imported(tbo)) { + err = -EFAULT; + drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n"); + break; + } + + err = xe_bo_fault_migrate(bo, &tctx, &exec); + if (err) { + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + break; + } + + err = xe_bo_wait_usage_kernel(bo, &tctx); + if (err) + break; + + if (!retry_after_wait) + ret = __xe_bo_cpu_fault(vmf, xe, bo); } + /* if retry_after_wait == true, we *must* return VM_FAULT_RETRY. */ + if (err && !retry_after_wait) + ret = xe_err_to_fault_t(err); - dma_resv_unlock(tbo->base.resv); -out: if (needs_rpm) xe_pm_runtime_put(xe); + if (retry_after_wait) + xe_bo_put(bo); +out: + drm_dev_exit(idx); + return ret; } @@ -1749,7 +2060,7 @@ int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size) } static const struct vm_operations_struct xe_gem_vm_ops = { - .fault = xe_gem_fault, + .fault = xe_bo_cpu_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, .access = xe_bo_vm_access, @@ -1797,11 +2108,32 @@ void xe_bo_free(struct xe_bo *bo) kfree(bo); } -struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, - struct xe_tile *tile, struct dma_resv *resv, - struct ttm_lru_bulk_move *bulk, size_t size, - u16 cpu_caching, enum ttm_bo_type type, - u32 flags) +/** + * xe_bo_init_locked() - Initialize or create an xe_bo. + * @xe: The xe device. + * @bo: An already allocated buffer object or NULL + * if the function should allocate a new one. + * @tile: The tile to select for migration of this bo, and the tile used for + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @resv: Pointer to a locked shared reservation object to use for this bo, + * or NULL for the xe_bo to use its own. + * @bulk: The bulk move to use for LRU bumping, or NULL for external bos. + * @size: The storage size to use for the bo. + * @cpu_caching: The cpu caching used for system memory backing store. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * @exec: The drm_exec transaction to use for exhaustive eviction. + * + * Initialize or create an xe buffer object. On failure, any allocated buffer + * object passed in @bo will have been unreferenced. + * + * Return: The buffer object on success. Negative error pointer on failure. + */ +struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, + struct xe_tile *tile, struct dma_resv *resv, + struct ttm_lru_bulk_move *bulk, size_t size, + u16 cpu_caching, enum ttm_bo_type type, + u32 flags, struct drm_exec *exec) { struct ttm_operation_ctx ctx = { .interruptible = true, @@ -1853,7 +2185,6 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, bo->ccs_cleared = false; bo->tile = tile; - bo->size = size; bo->flags = flags; bo->cpu_caching = cpu_caching; bo->ttm.base.funcs = &xe_gem_object_funcs; @@ -1871,8 +2202,9 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, ctx.resv = resv; } + xe_validation_assert_exec(xe, exec, &bo->ttm.base); if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) { - err = __xe_bo_placement_for_flags(xe, bo, bo->flags); + err = __xe_bo_placement_for_flags(xe, bo, bo->flags, type); if (WARN_ON(err)) { xe_ttm_bo_destroy(&bo->ttm); return ERR_PTR(err); @@ -1930,34 +2262,37 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, } static int __xe_bo_fixed_placement(struct xe_device *xe, - struct xe_bo *bo, + struct xe_bo *bo, enum ttm_bo_type type, u32 flags, u64 start, u64 end, u64 size) { struct ttm_place *place = bo->placements; + u32 vram_flag, vram_stolen_flags; + + /* + * to allow fixed placement in GGTT of a VF, post-migration fixups would have to + * include selecting a new fixed offset and shifting the page ranges for it + */ + xe_assert(xe, !IS_SRIOV_VF(xe) || !(bo->flags & XE_BO_FLAG_GGTT)); if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM)) return -EINVAL; + vram_flag = flags & XE_BO_FLAG_VRAM_MASK; + vram_stolen_flags = (flags & (XE_BO_FLAG_STOLEN)) | vram_flag; + + /* check if more than one VRAM/STOLEN flag is set */ + if (hweight32(vram_stolen_flags) > 1) + return -EINVAL; + place->flags = TTM_PL_FLAG_CONTIGUOUS; place->fpfn = start >> PAGE_SHIFT; place->lpfn = end >> PAGE_SHIFT; - switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) { - case XE_BO_FLAG_VRAM0: - place->mem_type = XE_PL_VRAM0; - break; - case XE_BO_FLAG_VRAM1: - place->mem_type = XE_PL_VRAM1; - break; - case XE_BO_FLAG_STOLEN: + if (flags & XE_BO_FLAG_STOLEN) place->mem_type = XE_PL_STOLEN; - break; - - default: - /* 0 or multiple of the above set */ - return -EINVAL; - } + else + place->mem_type = bo_vram_flags_to_vram_placement(xe, flags, vram_flag, type); bo->placement = (struct ttm_placement) { .num_placement = 1, @@ -1972,7 +2307,7 @@ __xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u64 start, u64 end, u16 cpu_caching, enum ttm_bo_type type, u32 flags, - u64 alignment) + u64 alignment, struct drm_exec *exec) { struct xe_bo *bo = NULL; int err; @@ -1986,18 +2321,18 @@ __xe_bo_create_locked(struct xe_device *xe, return bo; flags |= XE_BO_FLAG_FIXED_PLACEMENT; - err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size); + err = __xe_bo_fixed_placement(xe, bo, type, flags, start, end, size); if (err) { xe_bo_free(bo); return ERR_PTR(err); } } - bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL, - vm && !xe_vm_in_fault_mode(vm) && - flags & XE_BO_FLAG_USER ? - &vm->lru_bulk_move : NULL, size, - cpu_caching, type, flags); + bo = xe_bo_init_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL, + vm && !xe_vm_in_fault_mode(vm) && + flags & XE_BO_FLAG_USER ? + &vm->lru_bulk_move : NULL, size, + cpu_caching, type, flags, exec); if (IS_ERR(bo)) return bo; @@ -2031,9 +2366,10 @@ __xe_bo_create_locked(struct xe_device *xe, if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo, - start + bo->size, U64_MAX); + start + xe_bo_size(bo), U64_MAX, + exec); } else { - err = xe_ggtt_insert_bo(t->mem.ggtt, bo); + err = xe_ggtt_insert_bo(t->mem.ggtt, bo, exec); } if (err) goto err_unlock_put_bo; @@ -2050,82 +2386,166 @@ err_unlock_put_bo: return ERR_PTR(err); } -struct xe_bo * -xe_bo_create_locked_range(struct xe_device *xe, - struct xe_tile *tile, struct xe_vm *vm, - size_t size, u64 start, u64 end, - enum ttm_bo_type type, u32 flags, u64 alignment) -{ - return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, - flags, alignment); -} - +/** + * xe_bo_create_locked() - Create a BO + * @xe: The xe device. + * @tile: The tile to select for migration of this bo, and the tile used for + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @vm: The local vm or NULL for external objects. + * @size: The storage size to use for the bo. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * @exec: The drm_exec transaction to use for exhaustive eviction. + * + * Create a locked xe BO with no range- nor alignment restrictions. + * + * Return: The buffer object on success. Negative error pointer on failure. + */ struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags) + enum ttm_bo_type type, u32 flags, + struct drm_exec *exec) { return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, - flags, 0); + flags, 0, exec); } -struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, size_t size, - u16 cpu_caching, - u32 flags) +static struct xe_bo *xe_bo_create_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, u16 cpu_caching, + enum ttm_bo_type type, u32 flags, + u64 alignment, bool intr) { - struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, - cpu_caching, ttm_bo_type_device, - flags | XE_BO_FLAG_USER, 0); - if (!IS_ERR(bo)) - xe_bo_unlock_vm_held(bo); + struct xe_validation_ctx ctx; + struct drm_exec exec; + struct xe_bo *bo; + int ret = 0; - return bo; + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr}, + ret) { + bo = __xe_bo_create_locked(xe, tile, NULL, size, 0, ~0ULL, + cpu_caching, type, flags, alignment, &exec); + drm_exec_retry_on_contention(&exec); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &ret); + } else { + xe_bo_unlock(bo); + } + } + + return ret ? ERR_PTR(ret) : bo; } -struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags) +/** + * xe_bo_create_user() - Create a user BO + * @xe: The xe device. + * @vm: The local vm or NULL for external objects. + * @size: The storage size to use for the bo. + * @cpu_caching: The caching mode to be used for system backing store. + * @flags: XE_BO_FLAG_ flags. + * @exec: The drm_exec transaction to use for exhaustive eviction, or NULL + * if such a transaction should be initiated by the call. + * + * Create a bo on behalf of user-space. + * + * Return: The buffer object on success. Negative error pointer on failure. + */ +struct xe_bo *xe_bo_create_user(struct xe_device *xe, + struct xe_vm *vm, size_t size, + u16 cpu_caching, + u32 flags, struct drm_exec *exec) { - struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags); + struct xe_bo *bo; - if (!IS_ERR(bo)) - xe_bo_unlock_vm_held(bo); + flags |= XE_BO_FLAG_USER; + + if (vm || exec) { + xe_assert(xe, exec); + bo = __xe_bo_create_locked(xe, NULL, vm, size, 0, ~0ULL, + cpu_caching, ttm_bo_type_device, + flags, 0, exec); + if (!IS_ERR(bo)) + xe_bo_unlock_vm_held(bo); + } else { + bo = xe_bo_create_novm(xe, NULL, size, cpu_caching, + ttm_bo_type_device, flags, 0, true); + } return bo; } -struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, - size_t size, u64 offset, - enum ttm_bo_type type, u32 flags) +/** + * xe_bo_create_pin_range_novm() - Create and pin a BO with range options. + * @xe: The xe device. + * @tile: The tile to select for migration of this bo, and the tile used for + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @size: The storage size to use for the bo. + * @start: Start of fixed VRAM range or 0. + * @end: End of fixed VRAM range or ~0ULL. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * + * Create an Xe BO with range- and options. If @start and @end indicate + * a fixed VRAM range, this must be a ttm_bo_type_kernel bo with VRAM placement + * only. + * + * Return: The buffer object on success. Negative error pointer on failure. + */ +struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, u64 start, u64 end, + enum ttm_bo_type type, u32 flags) { - return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset, - type, flags, 0); + struct xe_validation_ctx ctx; + struct drm_exec exec; + struct xe_bo *bo; + int err = 0; + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { + bo = __xe_bo_create_locked(xe, tile, NULL, size, start, end, + 0, type, flags, 0, &exec); + if (IS_ERR(bo)) { + drm_exec_retry_on_contention(&exec); + err = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &err); + break; + } + + err = xe_bo_pin(bo, &exec); + xe_bo_unlock(bo); + if (err) { + xe_bo_put(bo); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + break; + } + } + + return err ? ERR_PTR(err) : bo; } -struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, - struct xe_tile *tile, - struct xe_vm *vm, - size_t size, u64 offset, - enum ttm_bo_type type, u32 flags, - u64 alignment) +static struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, + struct xe_tile *tile, + struct xe_vm *vm, + size_t size, u64 offset, + enum ttm_bo_type type, u32 flags, + u64 alignment, struct drm_exec *exec) { struct xe_bo *bo; int err; u64 start = offset == ~0ull ? 0 : offset; - u64 end = offset == ~0ull ? offset : start + size; + u64 end = offset == ~0ull ? ~0ull : start + size; if (flags & XE_BO_FLAG_STOLEN && xe_ttm_stolen_cpu_access_needs_ggtt(xe)) flags |= XE_BO_FLAG_GGTT; - bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type, - flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED, - alignment); + bo = __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, + flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED, + alignment, exec); if (IS_ERR(bo)) return bo; - err = xe_bo_pin(bo); + err = xe_bo_pin(bo, exec); if (err) goto err_put; @@ -2145,26 +2565,100 @@ err_put: return ERR_PTR(err); } +/** + * xe_bo_create_pin_map_at_novm() - Create pinned and mapped bo at optional VRAM offset + * @xe: The xe device. + * @tile: The tile to select for migration of this bo, and the tile used for + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @size: The storage size to use for the bo. + * @offset: Optional VRAM offset or %~0ull for don't care. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * @alignment: GGTT alignment. + * @intr: Whether to execute any waits for backing store interruptible. + * + * Create a pinned and optionally mapped bo with VRAM offset and GGTT alignment + * options. The bo will be external and not associated with a VM. + * + * Return: The buffer object on success. Negative error pointer on failure. + * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set + * to true on entry. + */ +struct xe_bo * +xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, u64 offset, enum ttm_bo_type type, u32 flags, + u64 alignment, bool intr) +{ + struct xe_validation_ctx ctx; + struct drm_exec exec; + struct xe_bo *bo; + int ret = 0; + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr}, + ret) { + bo = xe_bo_create_pin_map_at_aligned(xe, tile, NULL, size, offset, + type, flags, alignment, &exec); + if (IS_ERR(bo)) { + drm_exec_retry_on_contention(&exec); + ret = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &ret); + } + } + + return ret ? ERR_PTR(ret) : bo; +} + +/** + * xe_bo_create_pin_map() - Create pinned and mapped bo + * @xe: The xe device. + * @tile: The tile to select for migration of this bo, and the tile used for + * @vm: The vm to associate the buffer object with. The vm's resv must be locked + * with the transaction represented by @exec. + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @size: The storage size to use for the bo. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * @exec: The drm_exec transaction to use for exhaustive eviction, and + * previously used for locking @vm's resv. + * + * Create a pinned and mapped bo. The bo will be external and not associated + * with a VM. + * + * Return: The buffer object on success. Negative error pointer on failure. + * In particular, the function may return ERR_PTR(%-EINTR) if @exec was + * configured for interruptible locking. + */ struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags) + enum ttm_bo_type type, u32 flags, + struct drm_exec *exec) { - return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags); + return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, ~0ull, type, flags, + 0, exec); } -struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, - const void *data, size_t size, - enum ttm_bo_type type, u32 flags) +/** + * xe_bo_create_pin_map_novm() - Create pinned and mapped bo + * @xe: The xe device. + * @tile: The tile to select for migration of this bo, and the tile used for + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @size: The storage size to use for the bo. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * @intr: Whether to execute any waits for backing store interruptible. + * + * Create a pinned and mapped bo. The bo will be external and not associated + * with a VM. + * + * Return: The buffer object on success. Negative error pointer on failure. + * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set + * to true on entry. + */ +struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, enum ttm_bo_type type, u32 flags, + bool intr) { - struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - type, flags); - if (IS_ERR(bo)) - return bo; - - xe_map_memcpy_to(xe, &bo->vmap, 0, data, size); - - return bo; + return xe_bo_create_pin_map_at_novm(xe, tile, size, ~0ull, type, flags, 0, intr); } static void __xe_bo_unpin_map_no_vm(void *arg) @@ -2179,8 +2673,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile int ret; KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags); - - bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags); + bo = xe_bo_create_pin_map_novm(xe, tile, size, ttm_bo_type_kernel, flags, true); if (IS_ERR(bo)) return bo; @@ -2191,6 +2684,11 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile return bo; } +void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo) +{ + devm_release_action(xe_bo_device(bo)->drm.dev, __xe_bo_unpin_map_no_vm, bo); +} + struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, const void *data, size_t size, u32 flags) { @@ -2229,7 +2727,7 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str xe_assert(xe, !(*src)->vmap.is_iomem); bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr, - (*src)->size, dst_flags); + xe_bo_size(*src), dst_flags); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -2262,6 +2760,8 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res) /** * xe_bo_pin_external - pin an external BO * @bo: buffer object to be pinned + * @in_place: Pin in current placement, don't attempt to migrate. + * @exec: The drm_exec transaction to use for exhaustive eviction. * * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD) * BO. Unique call compared to xe_bo_pin as this function has it own set of @@ -2269,7 +2769,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res) * * Returns 0 for success, negative error code otherwise. */ -int xe_bo_pin_external(struct xe_bo *bo) +int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec) { struct xe_device *xe = xe_bo_device(bo); int err; @@ -2278,9 +2778,11 @@ int xe_bo_pin_external(struct xe_bo *bo) xe_assert(xe, xe_bo_is_user(bo)); if (!xe_bo_is_pinned(bo)) { - err = xe_bo_validate(bo, NULL, false); - if (err) - return err; + if (!in_place) { + err = xe_bo_validate(bo, NULL, false, exec); + if (err) + return err; + } spin_lock(&xe->pinned.lock); list_add_tail(&bo->pinned_link, &xe->pinned.late.external); @@ -2289,7 +2791,7 @@ int xe_bo_pin_external(struct xe_bo *bo) ttm_bo_pin(&bo->ttm); if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) - xe_ttm_tt_account_subtract(bo->ttm.ttm); + xe_ttm_tt_account_subtract(xe, bo->ttm.ttm); /* * FIXME: If we always use the reserve / unreserve functions for locking @@ -2300,7 +2802,17 @@ int xe_bo_pin_external(struct xe_bo *bo) return 0; } -int xe_bo_pin(struct xe_bo *bo) +/** + * xe_bo_pin() - Pin a kernel bo after potentially migrating it + * @bo: The kernel bo to pin. + * @exec: The drm_exec transaction to use for exhaustive eviction. + * + * Attempts to migrate a bo to @bo->placement. If that succeeds, + * pins the bo. + * + * Return: %0 on success, negative error code on migration failure. + */ +int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec) { struct ttm_place *place = &bo->placements[0]; struct xe_device *xe = xe_bo_device(bo); @@ -2322,7 +2834,7 @@ int xe_bo_pin(struct xe_bo *bo) /* We only expect at most 1 pin */ xe_assert(xe, !xe_bo_is_pinned(bo)); - err = xe_bo_validate(bo, NULL, false); + err = xe_bo_validate(bo, NULL, false, exec); if (err) return err; @@ -2337,7 +2849,7 @@ int xe_bo_pin(struct xe_bo *bo) ttm_bo_pin(&bo->ttm); if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) - xe_ttm_tt_account_subtract(bo->ttm.ttm); + xe_ttm_tt_account_subtract(xe, bo->ttm.ttm); /* * FIXME: If we always use the reserve / unreserve functions for locking @@ -2373,7 +2885,7 @@ void xe_bo_unpin_external(struct xe_bo *bo) ttm_bo_unpin(&bo->ttm); if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) - xe_ttm_tt_account_add(bo->ttm.ttm); + xe_ttm_tt_account_add(xe, bo->ttm.ttm); /* * FIXME: If we always use the reserve / unreserve functions for locking @@ -2405,7 +2917,7 @@ void xe_bo_unpin(struct xe_bo *bo) } ttm_bo_unpin(&bo->ttm); if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) - xe_ttm_tt_account_add(bo->ttm.ttm); + xe_ttm_tt_account_add(xe, bo->ttm.ttm); } /** @@ -2415,6 +2927,7 @@ void xe_bo_unpin(struct xe_bo *bo) * NULL. Used together with @allow_res_evict. * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's * reservation object. + * @exec: The drm_exec transaction to use for exhaustive eviction. * * Make sure the bo is in allowed placement, migrating it if necessary. If * needed, other bos will be evicted. If bos selected for eviction shares @@ -2424,13 +2937,18 @@ void xe_bo_unpin(struct xe_bo *bo) * Return: 0 on success, negative error code on failure. May return * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal. */ -int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) +int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict, + struct drm_exec *exec) { struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, .gfp_retry_mayfail = true, }; + int ret; + + if (xe_bo_is_pinned(bo)) + return 0; if (vm) { lockdep_assert_held(&vm->lock); @@ -2440,8 +2958,13 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) ctx.resv = xe_vm_resv(vm); } + xe_vm_set_validating(vm, allow_res_evict); trace_xe_bo_validate(bo); - return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); + xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base); + ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); + xe_vm_clear_validating(vm, allow_res_evict); + + return ret; } bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo) @@ -2513,7 +3036,7 @@ int xe_bo_vmap(struct xe_bo *bo) * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap * to use struct iosys_map. */ - ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap); + ret = ttm_bo_kmap(&bo->ttm, 0, xe_bo_size(bo) >> PAGE_SHIFT, &bo->kmap); if (ret) return ret; @@ -2557,7 +3080,7 @@ typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe, u64 value); static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = { - [DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_set_pxp_type, + [DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE] = gem_create_set_pxp_type, }; static int gem_create_user_ext_set_property(struct xe_device *xe, @@ -2633,8 +3156,9 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); struct drm_xe_gem_create *args = data; + struct xe_validation_ctx ctx; + struct drm_exec exec; struct xe_vm *vm = NULL; - ktime_t end = 0; struct xe_bo *bo; unsigned int bo_flags; u32 handle; @@ -2708,25 +3232,26 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, return -ENOENT; } -retry: - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - goto out_vm; + err = 0; + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, + err) { + if (vm) { + err = xe_vm_drm_exec_lock(vm, &exec); + drm_exec_retry_on_contention(&exec); + if (err) + break; + } + bo = xe_bo_create_user(xe, vm, args->size, args->cpu_caching, + bo_flags, &exec); + drm_exec_retry_on_contention(&exec); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &err); + break; + } } - - bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching, - bo_flags); - - if (vm) - xe_vm_unlock(vm); - - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - if (xe_vm_validate_should_retry(NULL, err, &end)) - goto retry; + if (err) goto out_vm; - } if (args->extensions) { err = gem_create_user_extensions(xe, bo, args->extensions, 0); @@ -2875,6 +3400,9 @@ static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place) * xe_bo_migrate - Migrate an object to the desired region id * @bo: The buffer object to migrate. * @mem_type: The TTM region type to migrate to. + * @tctx: A pointer to a struct ttm_operation_ctx or NULL if + * a default interruptibe ctx is to be used. + * @exec: The drm_exec transaction to use for exhaustive eviction. * * Attempt to migrate the buffer object to the desired memory region. The * buffer object may not be pinned, and must be locked. @@ -2886,7 +3414,8 @@ static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place) * Return: 0 on success. Negative error code on failure. In particular may * return -EINTR or -ERESTARTSYS if signal pending. */ -int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) +int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *tctx, + struct drm_exec *exec) { struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); struct ttm_operation_ctx ctx = { @@ -2898,6 +3427,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) struct ttm_place requested; xe_bo_assert_held(bo); + tctx = tctx ? tctx : &ctx; if (bo->ttm.resource->mem_type == mem_type) return 0; @@ -2924,19 +3454,22 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) add_vram(xe, bo, &requested, bo->flags, mem_type, &c); } - return ttm_bo_validate(&bo->ttm, &placement, &ctx); + if (!tctx->no_wait_gpu) + xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base); + return ttm_bo_validate(&bo->ttm, &placement, tctx); } /** * xe_bo_evict - Evict an object to evict placement * @bo: The buffer object to migrate. + * @exec: The drm_exec transaction to use for exhaustive eviction. * * On successful completion, the object memory will be moved to evict * placement. This function blocks until the object has been fully moved. * * Return: 0 on success. Negative error code on failure. */ -int xe_bo_evict(struct xe_bo *bo) +int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec) { struct ttm_operation_ctx ctx = { .interruptible = false, @@ -2982,6 +3515,14 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo) if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM)) return false; + /* + * Compression implies coh_none, therefore we know for sure that WB + * memory can't currently use compression, which is likely one of the + * common cases. + */ + if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB) + return false; + return true; } @@ -3057,7 +3598,7 @@ void xe_bo_put(struct xe_bo *bo) #endif for_each_tile(tile, xe_bo_device(bo), id) if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt) - might_lock(&bo->ggtt_node[id]->ggtt->lock); + xe_ggtt_might_lock(bo->ggtt_node[id]->ggtt); drm_gem_object_put(&bo->ttm.base); } } @@ -3079,20 +3620,19 @@ int xe_bo_dumb_create(struct drm_file *file_priv, struct xe_device *xe = to_xe_device(dev); struct xe_bo *bo; uint32_t handle; - int cpp = DIV_ROUND_UP(args->bpp, 8); int err; u32 page_size = max_t(u32, PAGE_SIZE, xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K); - args->pitch = ALIGN(args->width * cpp, 64); - args->size = ALIGN(mul_u32_u32(args->pitch, args->height), - page_size); + err = drm_mode_size_dumb(dev, args, SZ_64, page_size); + if (err) + return err; - bo = xe_bo_create_user(xe, NULL, NULL, args->size, + bo = xe_bo_create_user(xe, NULL, args->size, DRM_XE_GEM_CPU_CACHING_WC, XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | XE_BO_FLAG_SCANOUT | - XE_BO_FLAG_NEEDS_CPU_ACCESS); + XE_BO_FLAG_NEEDS_CPU_ACCESS, NULL); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 02ada1fb8a23..911d5b90461a 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -10,8 +10,10 @@ #include "xe_bo_types.h" #include "xe_macros.h" +#include "xe_validation.h" #include "xe_vm_types.h" #include "xe_vm.h" +#include "xe_vram_types.h" #define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ @@ -23,8 +25,9 @@ #define XE_BO_FLAG_VRAM_MASK (XE_BO_FLAG_VRAM0 | XE_BO_FLAG_VRAM1) /* -- */ #define XE_BO_FLAG_STOLEN BIT(4) +#define XE_BO_FLAG_VRAM(vram) (XE_BO_FLAG_VRAM0 << ((vram)->id)) #define XE_BO_FLAG_VRAM_IF_DGFX(tile) (IS_DGFX(tile_to_xe(tile)) ? \ - XE_BO_FLAG_VRAM0 << (tile)->id : \ + XE_BO_FLAG_VRAM((tile)->mem.vram) : \ XE_BO_FLAG_SYSTEM) #define XE_BO_FLAG_GGTT BIT(5) #define XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE BIT(6) @@ -46,6 +49,7 @@ #define XE_BO_FLAG_GGTT2 BIT(22) #define XE_BO_FLAG_GGTT3 BIT(23) #define XE_BO_FLAG_CPU_ADDR_MIRROR BIT(24) +#define XE_BO_FLAG_FORCE_USER_VRAM BIT(25) /* this one is trigger internally only */ #define XE_BO_FLAG_INTERNAL_TEST BIT(30) @@ -86,49 +90,40 @@ struct sg_table; struct xe_bo *xe_bo_alloc(void); void xe_bo_free(struct xe_bo *bo); -struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, - struct xe_tile *tile, struct dma_resv *resv, - struct ttm_lru_bulk_move *bulk, size_t size, - u16 cpu_caching, enum ttm_bo_type type, - u32 flags); -struct xe_bo * -xe_bo_create_locked_range(struct xe_device *xe, - struct xe_tile *tile, struct xe_vm *vm, - size_t size, u64 start, u64 end, - enum ttm_bo_type type, u32 flags, u64 alignment); +struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, + struct xe_tile *tile, struct dma_resv *resv, + struct ttm_lru_bulk_move *bulk, size_t size, + u16 cpu_caching, enum ttm_bo_type type, + u32 flags, struct drm_exec *exec); struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, size_t size, - u16 cpu_caching, - u32 flags); + enum ttm_bo_type type, u32 flags, + struct drm_exec *exec); +struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_vm *vm, size_t size, + u16 cpu_caching, u32 flags, struct drm_exec *exec); struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, size_t size, u64 offset, - enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, - struct xe_tile *tile, - struct xe_vm *vm, - size_t size, u64 offset, - enum ttm_bo_type type, u32 flags, - u64 alignment); -struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, - const void *data, size_t size, - enum ttm_bo_type type, u32 flags); + enum ttm_bo_type type, u32 flags, + struct drm_exec *exec); +struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, enum ttm_bo_type type, u32 flags, + bool intr); +struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, u64 start, u64 end, + enum ttm_bo_type type, u32 flags); +struct xe_bo * +xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, u64 offset, enum ttm_bo_type type, + u32 flags, u64 alignment, bool intr); struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, size_t size, u32 flags); +void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo); struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, const void *data, size_t size, u32 flags); int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src); int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, - u32 bo_flags); + u32 bo_flags, enum ttm_bo_type type); static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo) { @@ -201,11 +196,12 @@ static inline void xe_bo_unlock_vm_held(struct xe_bo *bo) } } -int xe_bo_pin_external(struct xe_bo *bo); -int xe_bo_pin(struct xe_bo *bo); +int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec); +int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec); void xe_bo_unpin_external(struct xe_bo *bo); void xe_bo_unpin(struct xe_bo *bo); -int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict); +int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict, + struct drm_exec *exec); static inline bool xe_bo_is_pinned(struct xe_bo *bo) { @@ -238,6 +234,19 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) return xe_bo_addr(bo, 0, page_size); } +/** + * xe_bo_size() - Xe BO size + * @bo: The bo object. + * + * Simple helper to return Xe BO's size. + * + * Return: Xe BO's size + */ +static inline size_t xe_bo_size(struct xe_bo *bo) +{ + return bo->ttm.base.size; +} + static inline u32 __xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) { @@ -246,7 +255,7 @@ __xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) if (XE_WARN_ON(!ggtt_node)) return 0; - XE_WARN_ON(ggtt_node->base.size > bo->size); + XE_WARN_ON(ggtt_node->base.size > xe_bo_size(bo)); XE_WARN_ON(ggtt_node->base.start + ggtt_node->base.size > (1ull << 32)); return ggtt_node->base.start; } @@ -265,6 +274,7 @@ int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size); bool mem_type_is_vram(u32 mem_type); bool xe_bo_is_vram(struct xe_bo *bo); +bool xe_bo_is_visible_vram(struct xe_bo *bo); bool xe_bo_is_stolen(struct xe_bo *bo); bool xe_bo_is_stolen_devmem(struct xe_bo *bo); bool xe_bo_is_vm_bound(struct xe_bo *bo); @@ -273,8 +283,9 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res); bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); -int xe_bo_migrate(struct xe_bo *bo, u32 mem_type); -int xe_bo_evict(struct xe_bo *bo); +int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *ctc, + struct drm_exec *exec); +int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec); int xe_bo_evict_pinned(struct xe_bo *bo); int xe_bo_notifier_prepare_pinned(struct xe_bo *bo); @@ -300,7 +311,22 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo); static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo) { - return PAGE_ALIGN(bo->ttm.base.size); + return PAGE_ALIGN(xe_bo_size(bo)); +} + +/** + * xe_bo_has_valid_ccs_bb - Check if CCS's BBs were setup for the BO. + * @bo: the &xe_bo to check + * + * The CCS's BBs should only be setup by the driver VF, but it is safe + * to call this function also by non-VF driver. + * + * Return: true iff the CCS's BBs are setup, false otherwise. + */ +static inline bool xe_bo_has_valid_ccs_bb(struct xe_bo *bo) +{ + return bo->bb_ccs[XE_SRIOV_VF_CCS_READ_CTX] && + bo->bb_ccs[XE_SRIOV_VF_CCS_WRITE_CTX]; } static inline bool xe_bo_has_pages(struct xe_bo *bo) diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h index 25a884c64bf1..401e7dd26ef3 100644 --- a/drivers/gpu/drm/xe/xe_bo_doc.h +++ b/drivers/gpu/drm/xe/xe_bo_doc.h @@ -12,7 +12,7 @@ * BO management * ============= * - * TTM manages (placement, eviction, etc...) all BOs in XE. + * TTM manages (placement, eviction, etc...) all BOs in Xe. * * BO creation * =========== @@ -29,7 +29,7 @@ * a kernel BO (e.g. engine state, memory for page tables, etc...). These BOs * are typically mapped in the GGTT (any kernel BOs aside memory for page tables * are in the GGTT), are pinned (can't move or be evicted at runtime), have a - * vmap (XE can access the memory via xe_map layer) and have contiguous physical + * vmap (Xe can access the memory via xe_map layer) and have contiguous physical * memory. * * More details of why kernel BOs are pinned and contiguous below. @@ -40,7 +40,7 @@ * A user BO is created via the DRM_IOCTL_XE_GEM_CREATE IOCTL. Once it is * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All - * user BOs are evictable and user BOs are never pinned by XE. The allocation of + * user BOs are evictable and user BOs are never pinned by Xe. The allocation of * the backing store can be deferred from creation time until first use which is * either mmap, bind, or pagefault. * @@ -84,7 +84,7 @@ * ==================== * * All eviction (or in other words, moving a BO from one memory location to - * another) is routed through TTM with a callback into XE. + * another) is routed through TTM with a callback into Xe. * * Runtime eviction * ---------------- diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index ed3746d32b27..7661fca7f278 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -73,6 +73,11 @@ int xe_bo_notifier_prepare_all_pinned(struct xe_device *xe) &xe->pinned.late.kernel_bo_present, xe_bo_notifier_prepare_pinned); + if (!ret) + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external, + &xe->pinned.late.external, + xe_bo_notifier_prepare_pinned); + return ret; } @@ -93,6 +98,10 @@ void xe_bo_notifier_unprepare_all_pinned(struct xe_device *xe) (void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, &xe->pinned.late.kernel_bo_present, xe_bo_notifier_unprepare_pinned); + + (void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.external, + &xe->pinned.late.external, + xe_bo_notifier_unprepare_pinned); } /** @@ -158,8 +167,8 @@ int xe_bo_evict_all(struct xe_device *xe) if (ret) return ret; - ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, - &xe->pinned.late.evicted, xe_bo_evict_pinned); + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external, + &xe->pinned.late.external, xe_bo_evict_pinned); if (!ret) ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, @@ -182,7 +191,6 @@ int xe_bo_evict_all(struct xe_device *xe) static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) { - struct xe_device *xe = xe_bo_device(bo); int ret; ret = xe_bo_restore_pinned(bo); @@ -197,19 +205,10 @@ static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile))) continue; - mutex_lock(&tile->mem.ggtt->lock); - xe_ggtt_map_bo(tile->mem.ggtt, bo); - mutex_unlock(&tile->mem.ggtt->lock); + xe_ggtt_map_bo_unlocked(tile->mem.ggtt, bo); } } - /* - * We expect validate to trigger a move VRAM and our move code - * should setup the iosys map. - */ - xe_assert(xe, !(bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) || - !iosys_map_is_null(&bo->vmap)); - return 0; } diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index eb5e83c5f233..d4fe3c8dca5b 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -9,6 +9,7 @@ #include <linux/iosys-map.h> #include <drm/drm_gpusvm.h> +#include <drm/drm_pagemap.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_device.h> #include <drm/ttm/ttm_placement.h> @@ -24,7 +25,9 @@ struct xe_vm; /* TODO: To be selected with VM_MADVISE */ #define XE_BO_PRIORITY_NORMAL 1 -/** @xe_bo: XE buffer object */ +/** + * struct xe_bo - Xe buffer object + */ struct xe_bo { /** @ttm: TTM base buffer object */ struct ttm_buffer_object ttm; @@ -32,8 +35,6 @@ struct xe_bo { struct xe_bo *backup_obj; /** @parent_obj: Ref to parent bo if this a backup_obj */ struct xe_bo *parent_obj; - /** @size: Size of this buffer object */ - size_t size; /** @flags: flags for this buffer object */ u32 flags; /** @vm: VM this BO is attached to, for extobj this will be NULL */ @@ -48,7 +49,7 @@ struct xe_bo { struct xe_ggtt_node *ggtt_node[XE_MAX_TILES_PER_DEVICE]; /** @vmap: iosys map of this buffer */ struct iosys_map vmap; - /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ + /** @kmap: TTM bo kmap object for internal use only. Keep off. */ struct ttm_bo_kmap_obj kmap; /** @pinned_link: link to present / evicted list of pinned BO */ struct list_head pinned_link; @@ -62,6 +63,14 @@ struct xe_bo { */ struct list_head client_link; #endif + /** @attr: User controlled attributes for bo */ + struct { + /** + * @atomic_access: type of atomic access bo needs + * protected by bo dma-resv lock + */ + u32 atomic_access; + } attr; /** * @pxp_key_instance: PXP key instance this BO was created against. A * 0 in this variable indicates that the BO does not use PXP encryption. @@ -75,9 +84,12 @@ struct xe_bo { /** @created: Whether the bo has passed initial creation */ bool created; - /** @ccs_cleared */ + /** @ccs_cleared: true means that CCS region of BO is already cleared */ bool ccs_cleared; + /** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */ + struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; + /** * @cpu_caching: CPU caching mode. Currently only used for userspace * objects. Exceptions are system memory on DGFX, which is always @@ -86,12 +98,13 @@ struct xe_bo { u16 cpu_caching; /** @devmem_allocation: SVM device memory allocation */ - struct drm_gpusvm_devmem devmem_allocation; + struct drm_pagemap_devmem devmem_allocation; /** @vram_userfault_link: Link into @mem_access.vram_userfault.list */ - struct list_head vram_userfault_link; + struct list_head vram_userfault_link; - /** @min_align: minimum alignment needed for this BO if different + /** + * @min_align: minimum alignment needed for this BO if different * from default */ u64 min_align; diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index cb9f175c89a1..9f6251b1008b 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -3,38 +3,71 @@ * Copyright © 2025 Intel Corporation */ +#include <linux/bitops.h> +#include <linux/ctype.h> #include <linux/configfs.h> +#include <linux/cleanup.h> +#include <linux/find.h> #include <linux/init.h> #include <linux/module.h> #include <linux/pci.h> +#include <linux/string.h> +#include "instructions/xe_mi_commands.h" #include "xe_configfs.h" +#include "xe_gt_types.h" +#include "xe_hw_engine_types.h" #include "xe_module.h" +#include "xe_pci_types.h" +#include "xe_sriov_types.h" /** * DOC: Xe Configfs * * Overview - * ========= + * ======== * - * Configfs is a filesystem-based manager of kernel objects. XE KMD registers a - * configfs subsystem called ``'xe'`` that creates a directory in the mounted configfs directory - * The user can create devices under this directory and configure them as necessary - * See Documentation/filesystems/configfs.rst for more information about how configfs works. + * Configfs is a filesystem-based manager of kernel objects. Xe KMD registers a + * configfs subsystem called ``xe`` that creates a directory in the mounted + * configfs directory. The user can create devices under this directory and + * configure them as necessary. See Documentation/filesystems/configfs.rst for + * more information about how configfs works. * * Create devices - * =============== + * ============== + * + * To create a device, the ``xe`` module should already be loaded, but some + * attributes can only be set before binding the device. It can be accomplished + * by blocking the driver autoprobe:: * - * In order to create a device, the user has to create a directory inside ``'xe'``:: + * # echo 0 > /sys/bus/pci/drivers_autoprobe + * # modprobe xe * - * mkdir /sys/kernel/config/xe/0000:03:00.0/ + * In order to create a device, the user has to create a directory inside ``xe``:: + * + * # mkdir /sys/kernel/config/xe/0000:03:00.0/ * * Every device created is populated by the driver with entries that can be * used to configure it:: * * /sys/kernel/config/xe/ - * .. 0000:03:00.0/ - * ... survivability_mode + * ├── 0000:00:02.0 + * │ └── ... + * ├── 0000:00:02.1 + * │ └── ... + * : + * └── 0000:03:00.0 + * ├── survivability_mode + * ├── gt_types_allowed + * ├── engines_allowed + * └── enable_psmi + * + * After configuring the attributes as per next section, the device can be + * probed with:: + * + * # echo 0000:03:00.0 > /sys/bus/pci/drivers/xe/bind + * # # or + * # echo 0000:03:00.0 > /sys/bus/pci/drivers_probe * * Configure Attributes * ==================== @@ -46,28 +79,280 @@ * effect when probing the device. Example to enable it:: * * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode - * # echo 0000:03:00.0 > /sys/bus/pci/drivers/xe/bind (Enters survivability mode if supported) + * + * This attribute can only be set before binding to the device. + * + * Allowed GT types: + * ----------------- + * + * Allow only specific types of GTs to be detected and initialized by the + * driver. Any combination of GT types can be enabled/disabled, although + * some settings will cause the device to fail to probe. + * + * Writes support both comma- and newline-separated input format. Reads + * will always return one GT type per line. "primary" and "media" are the + * GT type names supported by this interface. + * + * This attribute can only be set before binding to the device. + * + * Examples: + * + * Allow both primary and media GTs to be initialized and used. This matches + * the driver's default behavior:: + * + * # echo 'primary,media' > /sys/kernel/config/xe/0000:03:00.0/gt_types_allowed + * + * Allow only the primary GT of each tile to be initialized and used, + * effectively disabling the media GT if it exists on the platform:: + * + * # echo 'primary' > /sys/kernel/config/xe/0000:03:00.0/gt_types_allowed + * + * Allow only the media GT of each tile to be initialized and used, + * effectively disabling the primary GT. **This configuration will cause + * device probe failure on all current platforms, but may be allowed on + * igpu platforms in the future**:: + * + * # echo 'media' > /sys/kernel/config/xe/0000:03:00.0/gt_types_allowed + * + * Disable all GTs. Only other GPU IP (such as display) is potentially usable. + * **This configuration will cause device probe failure on all current + * platforms, but may be allowed on igpu platforms in the future**:: + * + * # echo '' > /sys/kernel/config/xe/0000:03:00.0/gt_types_allowed + * + * Allowed engines: + * ---------------- + * + * Allow only a set of engine(s) to be available, disabling the other engines + * even if they are available in hardware. This is applied after HW fuses are + * considered on each tile. Examples: + * + * Allow only one render and one copy engines, nothing else:: + * + * # echo 'rcs0,bcs0' > /sys/kernel/config/xe/0000:03:00.0/engines_allowed + * + * Allow only compute engines and first copy engine:: + * + * # echo 'ccs*,bcs0' > /sys/kernel/config/xe/0000:03:00.0/engines_allowed + * + * Note that the engine names are the per-GT hardware names. On multi-tile + * platforms, writing ``rcs0,bcs0`` to this file would allow the first render + * and copy engines on each tile. + * + * The requested configuration may not be supported by the platform and driver + * may fail to probe. For example: if at least one copy engine is expected to be + * available for migrations, but it's disabled. This is intended for debugging + * purposes only. + * + * This attribute can only be set before binding to the device. + * + * PSMI + * ---- + * + * Enable extra debugging capabilities to trace engine execution. Only useful + * during early platform enabling and requires additional hardware connected. + * Once it's enabled, additionals WAs are added and runtime configuration is + * done via debugfs. Example to enable it:: + * + * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/enable_psmi + * + * This attribute can only be set before binding to the device. + * + * Context restore BB + * ------------------ + * + * Allow to execute a batch buffer during any context switches. When the + * GPU is restoring the context, it executes additional commands. It's useful + * for testing additional workarounds and validating certain HW behaviors: it's + * not intended for normal execution and will taint the kernel with TAINT_TEST + * when used. + * + * The syntax allows to pass straight instructions to be executed by the engine + * in a batch buffer or set specific registers. + * + * #. Generic instruction:: + * + * <engine-class> cmd <instr> [[dword0] [dword1] [...]] + * + * #. Simple register setting:: + * + * <engine-class> reg <address> <value> + * + * Commands are saved per engine class: all instances of that class will execute + * those commands during context switch. The instruction, dword arguments, + * addresses and values are in hex format like in the examples below. + * + * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 after the + * normal context restore:: + * + * # echo 'rcs cmd 11000001 4F100 DEADBEEF' \ + * > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_post_bb + * + * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 at the + * beginning of the context restore:: + * + * # echo 'rcs cmd 11000001 4F100 DEADBEEF' \ + * > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_mid_bb + + * #. Load certain values in a couple of registers (it can be used as a simpler + * alternative to the `cmd`) action:: + * + * # cat > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_post_bb <<EOF + * rcs reg 4F100 DEADBEEF + * rcs reg 4F104 FFFFFFFF + * EOF + * + * .. note:: + * + * When using multiple lines, make sure to use a command that is + * implemented with a single write syscall, like HEREDOC. + * + * Currently this is implemented only for post and mid context restore and + * these attributes can only be set before binding to the device. + * + * Max SR-IOV Virtual Functions + * ---------------------------- + * + * This config allows to limit number of the Virtual Functions (VFs) that can + * be managed by the Physical Function (PF) driver, where value 0 disables the + * PF mode (no VFs). + * + * The default max_vfs config value is taken from the max_vfs modparam. + * + * How to enable PF with support with unlimited (up to HW limit) number of VFs:: + * + * # echo unlimited > /sys/kernel/config/xe/0000:00:02.0/sriov/max_vfs + * # echo 0000:00:02.0 > /sys/bus/pci/drivers/xe/bind + * + * How to enable PF with support up to 3 VFs:: + * + * # echo 3 > /sys/kernel/config/xe/0000:00:02.0/sriov/max_vfs + * # echo 0000:00:02.0 > /sys/bus/pci/drivers/xe/bind + * + * How to disable PF mode and always run as native:: + * + * # echo 0 > /sys/kernel/config/xe/0000:00:02.0/sriov/max_vfs + * # echo 0000:00:02.0 > /sys/bus/pci/drivers/xe/bind + * + * This setting only takes effect when probing the device. * * Remove devices * ============== * * The created device directories can be removed using ``rmdir``:: * - * rmdir /sys/kernel/config/xe/0000:03:00.0/ + * # rmdir /sys/kernel/config/xe/0000:03:00.0/ */ -struct xe_config_device { +/* Similar to struct xe_bb, but not tied to HW (yet) */ +struct wa_bb { + u32 *cs; + u32 len; /* in dwords */ +}; + +struct xe_config_group_device { struct config_group group; + struct config_group sriov; - bool survivability_mode; + struct xe_config_device { + u64 gt_types_allowed; + u64 engines_allowed; + struct wa_bb ctx_restore_post_bb[XE_ENGINE_CLASS_MAX]; + struct wa_bb ctx_restore_mid_bb[XE_ENGINE_CLASS_MAX]; + bool survivability_mode; + bool enable_psmi; + struct { + unsigned int max_vfs; + } sriov; + } config; /* protects attributes */ struct mutex lock; + /* matching descriptor */ + const struct xe_device_desc *desc; + /* tentative SR-IOV mode */ + enum xe_sriov_mode mode; +}; + +static const struct xe_config_device device_defaults = { + .gt_types_allowed = U64_MAX, + .engines_allowed = U64_MAX, + .survivability_mode = false, + .enable_psmi = false, + .sriov = { + .max_vfs = UINT_MAX, + }, }; +static void set_device_defaults(struct xe_config_device *config) +{ + *config = device_defaults; +#ifdef CONFIG_PCI_IOV + config->sriov.max_vfs = xe_modparam.max_vfs; +#endif +} + +struct engine_info { + const char *cls; + u64 mask; + enum xe_engine_class engine_class; +}; + +/* Some helpful macros to aid on the sizing of buffer allocation when parsing */ +#define MAX_ENGINE_CLASS_CHARS 5 +#define MAX_ENGINE_INSTANCE_CHARS 2 + +static const struct engine_info engine_info[] = { + { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK, .engine_class = XE_ENGINE_CLASS_RENDER }, + { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK, .engine_class = XE_ENGINE_CLASS_COPY }, + { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK, .engine_class = XE_ENGINE_CLASS_VIDEO_DECODE }, + { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK, .engine_class = XE_ENGINE_CLASS_VIDEO_ENHANCE }, + { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK, .engine_class = XE_ENGINE_CLASS_COMPUTE }, + { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK, .engine_class = XE_ENGINE_CLASS_OTHER }, +}; + +static const struct { + const char *name; + enum xe_gt_type type; +} gt_types[] = { + { .name = "primary", .type = XE_GT_TYPE_MAIN }, + { .name = "media", .type = XE_GT_TYPE_MEDIA }, +}; + +static struct xe_config_group_device *to_xe_config_group_device(struct config_item *item) +{ + return container_of(to_config_group(item), struct xe_config_group_device, group); +} + static struct xe_config_device *to_xe_config_device(struct config_item *item) { - return container_of(to_config_group(item), struct xe_config_device, group); + return &to_xe_config_group_device(item)->config; +} + +static bool is_bound(struct xe_config_group_device *dev) +{ + unsigned int domain, bus, slot, function; + struct pci_dev *pdev; + const char *name; + bool ret; + + lockdep_assert_held(&dev->lock); + + name = dev->group.cg_item.ci_name; + if (sscanf(name, "%x:%x:%x.%x", &domain, &bus, &slot, &function) != 4) + return false; + + pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function)); + if (!pdev) + return false; + + ret = pci_get_drvdata(pdev); + pci_dev_put(pdev); + + if (ret) + pci_dbg(pdev, "Already bound to driver\n"); + + return ret; } static ssize_t survivability_mode_show(struct config_item *item, char *page) @@ -79,7 +364,7 @@ static ssize_t survivability_mode_show(struct config_item *item, char *page) static ssize_t survivability_mode_store(struct config_item *item, const char *page, size_t len) { - struct xe_config_device *dev = to_xe_config_device(item); + struct xe_config_group_device *dev = to_xe_config_group_device(item); bool survivability_mode; int ret; @@ -87,25 +372,466 @@ static ssize_t survivability_mode_store(struct config_item *item, const char *pa if (ret) return ret; - mutex_lock(&dev->lock); - dev->survivability_mode = survivability_mode; - mutex_unlock(&dev->lock); + guard(mutex)(&dev->lock); + if (is_bound(dev)) + return -EBUSY; + + dev->config.survivability_mode = survivability_mode; + + return len; +} + +static ssize_t gt_types_allowed_show(struct config_item *item, char *page) +{ + struct xe_config_device *dev = to_xe_config_device(item); + char *p = page; + + for (size_t i = 0; i < ARRAY_SIZE(gt_types); i++) + if (dev->gt_types_allowed & BIT_ULL(gt_types[i].type)) + p += sprintf(p, "%s\n", gt_types[i].name); + + return p - page; +} + +static ssize_t gt_types_allowed_store(struct config_item *item, const char *page, + size_t len) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + char *buf __free(kfree) = kstrdup(page, GFP_KERNEL); + char *p = buf; + u64 typemask = 0; + + if (!buf) + return -ENOMEM; + + while (p) { + char *typename = strsep(&p, ",\n"); + bool matched = false; + + if (typename[0] == '\0') + continue; + + for (size_t i = 0; i < ARRAY_SIZE(gt_types); i++) { + if (strcmp(typename, gt_types[i].name) == 0) { + typemask |= BIT(gt_types[i].type); + matched = true; + break; + } + } + + if (!matched) + return -EINVAL; + } + + guard(mutex)(&dev->lock); + if (is_bound(dev)) + return -EBUSY; + + dev->config.gt_types_allowed = typemask; + + return len; +} + +static ssize_t engines_allowed_show(struct config_item *item, char *page) +{ + struct xe_config_device *dev = to_xe_config_device(item); + char *p = page; + + for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) { + u64 mask = engine_info[i].mask; + + if ((dev->engines_allowed & mask) == mask) { + p += sprintf(p, "%s*\n", engine_info[i].cls); + } else if (mask & dev->engines_allowed) { + u16 bit0 = __ffs64(mask), bit; + + mask &= dev->engines_allowed; + + for_each_set_bit(bit, (const unsigned long *)&mask, 64) + p += sprintf(p, "%s%u\n", engine_info[i].cls, + bit - bit0); + } + } + + return p - page; +} + +/* + * Lookup engine_info. If @mask is not NULL, reduce the mask according to the + * instance in @pattern. + * + * Examples of inputs: + * - lookup_engine_info("rcs0", &mask): return "rcs" entry from @engine_info and + * mask == BIT_ULL(XE_HW_ENGINE_RCS0) + * - lookup_engine_info("rcs*", &mask): return "rcs" entry from @engine_info and + * mask == XE_HW_ENGINE_RCS_MASK + * - lookup_engine_info("rcs", NULL): return "rcs" entry from @engine_info + */ +static const struct engine_info *lookup_engine_info(const char *pattern, u64 *mask) +{ + for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) { + u8 instance; + u16 bit; + + if (!str_has_prefix(pattern, engine_info[i].cls)) + continue; + + pattern += strlen(engine_info[i].cls); + if (!mask) + return *pattern ? NULL : &engine_info[i]; + + if (!strcmp(pattern, "*")) { + *mask = engine_info[i].mask; + return &engine_info[i]; + } + + if (kstrtou8(pattern, 10, &instance)) + return NULL; + + bit = __ffs64(engine_info[i].mask) + instance; + if (bit >= fls64(engine_info[i].mask)) + return NULL; + + *mask = BIT_ULL(bit); + return &engine_info[i]; + } + + return NULL; +} + +static int parse_engine(const char *s, const char *end_chars, u64 *mask, + const struct engine_info **pinfo) +{ + char buf[MAX_ENGINE_CLASS_CHARS + MAX_ENGINE_INSTANCE_CHARS + 1]; + const struct engine_info *info; + size_t len; + + len = strcspn(s, end_chars); + if (len >= sizeof(buf)) + return -EINVAL; + + memcpy(buf, s, len); + buf[len] = '\0'; + + info = lookup_engine_info(buf, mask); + if (!info) + return -ENOENT; + + if (pinfo) + *pinfo = info; + + return len; +} + +static ssize_t engines_allowed_store(struct config_item *item, const char *page, + size_t len) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + ssize_t patternlen, p; + u64 mask, val = 0; + + for (p = 0; p < len; p += patternlen + 1) { + patternlen = parse_engine(page + p, ",\n", &mask, NULL); + if (patternlen < 0) + return -EINVAL; + + val |= mask; + } + + guard(mutex)(&dev->lock); + if (is_bound(dev)) + return -EBUSY; + + dev->config.engines_allowed = val; + + return len; +} + +static ssize_t enable_psmi_show(struct config_item *item, char *page) +{ + struct xe_config_device *dev = to_xe_config_device(item); + + return sprintf(page, "%d\n", dev->enable_psmi); +} + +static ssize_t enable_psmi_store(struct config_item *item, const char *page, size_t len) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + bool val; + int ret; + + ret = kstrtobool(page, &val); + if (ret) + return ret; + + guard(mutex)(&dev->lock); + if (is_bound(dev)) + return -EBUSY; + + dev->config.enable_psmi = val; + + return len; +} + +static bool wa_bb_read_advance(bool dereference, char **p, + const char *append, size_t len, + size_t *max_size) +{ + if (dereference) { + if (len >= *max_size) + return false; + *max_size -= len; + if (append) + memcpy(*p, append, len); + } + + *p += len; + + return true; +} + +static ssize_t wa_bb_show(struct xe_config_group_device *dev, + struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX], + char *data, size_t sz) +{ + char *p = data; + + guard(mutex)(&dev->lock); + + for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) { + enum xe_engine_class ec = engine_info[i].engine_class; + size_t len; + + if (!wa_bb[ec].len) + continue; + + len = snprintf(p, sz, "%s:", engine_info[i].cls); + if (!wa_bb_read_advance(data, &p, NULL, len, &sz)) + return -ENOBUFS; + + for (size_t j = 0; j < wa_bb[ec].len; j++) { + len = snprintf(p, sz, " %08x", wa_bb[ec].cs[j]); + if (!wa_bb_read_advance(data, &p, NULL, len, &sz)) + return -ENOBUFS; + } + + if (!wa_bb_read_advance(data, &p, "\n", 1, &sz)) + return -ENOBUFS; + } + + if (!wa_bb_read_advance(data, &p, "", 1, &sz)) + return -ENOBUFS; + + /* Reserve one more to match check for '\0' */ + if (!data) + p++; + + return p - data; +} + +static ssize_t ctx_restore_mid_bb_show(struct config_item *item, char *page) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + + return wa_bb_show(dev, dev->config.ctx_restore_mid_bb, page, SZ_4K); +} + +static ssize_t ctx_restore_post_bb_show(struct config_item *item, char *page) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + + return wa_bb_show(dev, dev->config.ctx_restore_post_bb, page, SZ_4K); +} + +static void wa_bb_append(struct wa_bb *wa_bb, u32 val) +{ + if (wa_bb->cs) + wa_bb->cs[wa_bb->len] = val; + + wa_bb->len++; +} + +static ssize_t parse_hex(const char *line, u32 *pval) +{ + char numstr[12]; + const char *p; + ssize_t numlen; + + p = line + strspn(line, " \t"); + if (!*p || *p == '\n') + return 0; + + numlen = strcspn(p, " \t\n"); + if (!numlen || numlen >= sizeof(numstr) - 1) + return -EINVAL; + + memcpy(numstr, p, numlen); + numstr[numlen] = '\0'; + p += numlen; + + if (kstrtou32(numstr, 16, pval)) + return -EINVAL; + + return p - line; +} + +/* + * Parse lines with the format + * + * <engine-class> cmd <u32> <u32...> + * <engine-class> reg <u32_addr> <u32_val> + * + * and optionally save them in @wa_bb[i].cs is non-NULL. + * + * Return the number of dwords parsed. + */ +static ssize_t parse_wa_bb_lines(const char *lines, + struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX]) +{ + ssize_t dwords = 0, ret; + const char *p; + + for (p = lines; *p; p++) { + const struct engine_info *info = NULL; + u32 val, val2; + + /* Also allow empty lines */ + p += strspn(p, " \t\n"); + if (!*p) + break; + + ret = parse_engine(p, " \t\n", NULL, &info); + if (ret < 0) + return ret; + + p += ret; + p += strspn(p, " \t"); + + if (str_has_prefix(p, "cmd")) { + for (p += strlen("cmd"); *p;) { + ret = parse_hex(p, &val); + if (ret < 0) + return -EINVAL; + if (!ret) + break; + + p += ret; + dwords++; + wa_bb_append(&wa_bb[info->engine_class], val); + } + } else if (str_has_prefix(p, "reg")) { + p += strlen("reg"); + ret = parse_hex(p, &val); + if (ret <= 0) + return -EINVAL; + + p += ret; + ret = parse_hex(p, &val2); + if (ret <= 0) + return -EINVAL; + + p += ret; + dwords += 3; + wa_bb_append(&wa_bb[info->engine_class], + MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1)); + wa_bb_append(&wa_bb[info->engine_class], val); + wa_bb_append(&wa_bb[info->engine_class], val2); + } else { + return -EINVAL; + } + } + + return dwords; +} + +static ssize_t wa_bb_store(struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX], + struct xe_config_group_device *dev, + const char *page, size_t len) +{ + /* tmp_wa_bb must match wa_bb's size */ + struct wa_bb tmp_wa_bb[XE_ENGINE_CLASS_MAX] = { }; + ssize_t count, class; + u32 *tmp; + + /* 1. Count dwords - wa_bb[i].cs is NULL for all classes */ + count = parse_wa_bb_lines(page, tmp_wa_bb); + if (count < 0) + return count; + + guard(mutex)(&dev->lock); + + if (is_bound(dev)) + return -EBUSY; + + /* + * 2. Allocate a u32 array and set the pointers to the right positions + * according to the length of each class' wa_bb + */ + tmp = krealloc(wa_bb[0].cs, count * sizeof(u32), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + if (!count) { + memset(wa_bb, 0, sizeof(tmp_wa_bb)); + return len; + } + + for (class = 0, count = 0; class < XE_ENGINE_CLASS_MAX; ++class) { + tmp_wa_bb[class].cs = tmp + count; + count += tmp_wa_bb[class].len; + tmp_wa_bb[class].len = 0; + } + + /* 3. Parse wa_bb lines again, this time saving the values */ + count = parse_wa_bb_lines(page, tmp_wa_bb); + if (count < 0) + return count; + + memcpy(wa_bb, tmp_wa_bb, sizeof(tmp_wa_bb)); return len; } +static ssize_t ctx_restore_mid_bb_store(struct config_item *item, + const char *data, size_t sz) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + + return wa_bb_store(dev->config.ctx_restore_mid_bb, dev, data, sz); +} + +static ssize_t ctx_restore_post_bb_store(struct config_item *item, + const char *data, size_t sz) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + + return wa_bb_store(dev->config.ctx_restore_post_bb, dev, data, sz); +} + +CONFIGFS_ATTR(, ctx_restore_mid_bb); +CONFIGFS_ATTR(, ctx_restore_post_bb); +CONFIGFS_ATTR(, enable_psmi); +CONFIGFS_ATTR(, engines_allowed); +CONFIGFS_ATTR(, gt_types_allowed); CONFIGFS_ATTR(, survivability_mode); static struct configfs_attribute *xe_config_device_attrs[] = { + &attr_ctx_restore_mid_bb, + &attr_ctx_restore_post_bb, + &attr_enable_psmi, + &attr_engines_allowed, + &attr_gt_types_allowed, &attr_survivability_mode, NULL, }; static void xe_config_device_release(struct config_item *item) { - struct xe_config_device *dev = to_xe_config_device(item); + struct xe_config_group_device *dev = to_xe_config_group_device(item); mutex_destroy(&dev->lock); + + kfree(dev->config.ctx_restore_post_bb[0].cs); kfree(dev); } @@ -113,45 +839,192 @@ static struct configfs_item_operations xe_config_device_ops = { .release = xe_config_device_release, }; +static bool xe_config_device_is_visible(struct config_item *item, + struct configfs_attribute *attr, int n) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + + if (attr == &attr_survivability_mode) { + if (!dev->desc->is_dgfx || dev->desc->platform < XE_BATTLEMAGE) + return false; + } + + return true; +} + +static struct configfs_group_operations xe_config_device_group_ops = { + .is_visible = xe_config_device_is_visible, +}; + static const struct config_item_type xe_config_device_type = { .ct_item_ops = &xe_config_device_ops, + .ct_group_ops = &xe_config_device_group_ops, .ct_attrs = xe_config_device_attrs, .ct_owner = THIS_MODULE, }; +static ssize_t sriov_max_vfs_show(struct config_item *item, char *page) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item->ci_parent); + + guard(mutex)(&dev->lock); + + if (dev->config.sriov.max_vfs == UINT_MAX) + return sprintf(page, "%s\n", "unlimited"); + else + return sprintf(page, "%u\n", dev->config.sriov.max_vfs); +} + +static ssize_t sriov_max_vfs_store(struct config_item *item, const char *page, size_t len) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item->ci_parent); + unsigned int max_vfs; + int ret; + + guard(mutex)(&dev->lock); + + if (is_bound(dev)) + return -EBUSY; + + ret = kstrtouint(page, 0, &max_vfs); + if (ret) { + if (!sysfs_streq(page, "unlimited")) + return ret; + max_vfs = UINT_MAX; + } + + dev->config.sriov.max_vfs = max_vfs; + return len; +} + +CONFIGFS_ATTR(sriov_, max_vfs); + +static struct configfs_attribute *xe_config_sriov_attrs[] = { + &sriov_attr_max_vfs, + NULL, +}; + +static bool xe_config_sriov_is_visible(struct config_item *item, + struct configfs_attribute *attr, int n) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item->ci_parent); + + if (attr == &sriov_attr_max_vfs && dev->mode != XE_SRIOV_MODE_PF) + return false; + + return true; +} + +static struct configfs_group_operations xe_config_sriov_group_ops = { + .is_visible = xe_config_sriov_is_visible, +}; + +static const struct config_item_type xe_config_sriov_type = { + .ct_owner = THIS_MODULE, + .ct_group_ops = &xe_config_sriov_group_ops, + .ct_attrs = xe_config_sriov_attrs, +}; + +static const struct xe_device_desc *xe_match_desc(struct pci_dev *pdev) +{ + struct device_driver *driver = driver_find("xe", &pci_bus_type); + struct pci_driver *drv = to_pci_driver(driver); + const struct pci_device_id *ids = drv ? drv->id_table : NULL; + const struct pci_device_id *found = pci_match_id(ids, pdev); + + return found ? (const void *)found->driver_data : NULL; +} + +static struct pci_dev *get_physfn_instead(struct pci_dev *virtfn) +{ + struct pci_dev *physfn = pci_physfn(virtfn); + + pci_dev_get(physfn); + pci_dev_put(virtfn); + return physfn; +} + static struct config_group *xe_config_make_device_group(struct config_group *group, const char *name) { unsigned int domain, bus, slot, function; - struct xe_config_device *dev; + struct xe_config_group_device *dev; + const struct xe_device_desc *match; + enum xe_sriov_mode mode; struct pci_dev *pdev; + char canonical[16]; + int vfnumber = 0; int ret; - ret = sscanf(name, "%04x:%02x:%02x.%x", &domain, &bus, &slot, &function); + ret = sscanf(name, "%x:%x:%x.%x", &domain, &bus, &slot, &function); if (ret != 4) return ERR_PTR(-EINVAL); + ret = scnprintf(canonical, sizeof(canonical), "%04x:%02x:%02x.%d", domain, bus, + PCI_SLOT(PCI_DEVFN(slot, function)), + PCI_FUNC(PCI_DEVFN(slot, function))); + if (ret != 12 || strcmp(name, canonical)) + return ERR_PTR(-EINVAL); + pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function)); + mode = pdev ? dev_is_pf(&pdev->dev) ? + XE_SRIOV_MODE_PF : XE_SRIOV_MODE_NONE : XE_SRIOV_MODE_VF; + + if (!pdev && function) + pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, 0)); + if (!pdev && slot) + pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(0, 0)); if (!pdev) - return ERR_PTR(-EINVAL); + return ERR_PTR(-ENODEV); + + if (PCI_DEVFN(slot, function) != pdev->devfn) { + pdev = get_physfn_instead(pdev); + vfnumber = PCI_DEVFN(slot, function) - pdev->devfn; + if (!dev_is_pf(&pdev->dev) || vfnumber > pci_sriov_get_totalvfs(pdev)) { + pci_dev_put(pdev); + return ERR_PTR(-ENODEV); + } + } + + match = xe_match_desc(pdev); + if (match && vfnumber && !match->has_sriov) { + pci_info(pdev, "xe driver does not support VFs on this device\n"); + match = NULL; + } else if (!match) { + pci_info(pdev, "xe driver does not support configuration of this device\n"); + } + + pci_dev_put(pdev); + + if (!match) + return ERR_PTR(-ENOENT); dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return ERR_PTR(-ENOMEM); + dev->desc = match; + dev->mode = match->has_sriov ? mode : XE_SRIOV_MODE_NONE; + + set_device_defaults(&dev->config); + config_group_init_type_name(&dev->group, name, &xe_config_device_type); + if (dev->mode != XE_SRIOV_MODE_NONE) { + config_group_init_type_name(&dev->sriov, "sriov", &xe_config_sriov_type); + configfs_add_default_group(&dev->sriov, &dev->group); + } mutex_init(&dev->lock); return &dev->group; } -static struct configfs_group_operations xe_config_device_group_ops = { +static struct configfs_group_operations xe_config_group_ops = { .make_group = xe_config_make_device_group, }; static const struct config_item_type xe_configfs_type = { - .ct_group_ops = &xe_config_device_group_ops, + .ct_group_ops = &xe_config_group_ops, .ct_owner = THIS_MODULE, }; @@ -164,87 +1037,255 @@ static struct configfs_subsystem xe_configfs = { }, }; -static struct xe_config_device *configfs_find_group(struct pci_dev *pdev) +static struct xe_config_group_device *find_xe_config_group_device(struct pci_dev *pdev) { struct config_item *item; - char name[64]; - - snprintf(name, sizeof(name), "%04x:%02x:%02x.%x", pci_domain_nr(pdev->bus), - pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); mutex_lock(&xe_configfs.su_mutex); - item = config_group_find_item(&xe_configfs.su_group, name); + item = config_group_find_item(&xe_configfs.su_group, pci_name(pdev)); mutex_unlock(&xe_configfs.su_mutex); if (!item) return NULL; - return to_xe_config_device(item); + return to_xe_config_group_device(item); +} + +static void dump_custom_dev_config(struct pci_dev *pdev, + struct xe_config_group_device *dev) +{ +#define PRI_CUSTOM_ATTR(fmt_, attr_) do { \ + if (dev->config.attr_ != device_defaults.attr_) \ + pci_info(pdev, "configfs: " __stringify(attr_) " = " fmt_ "\n", \ + dev->config.attr_); \ + } while (0) + + PRI_CUSTOM_ATTR("%llx", gt_types_allowed); + PRI_CUSTOM_ATTR("%llx", engines_allowed); + PRI_CUSTOM_ATTR("%d", enable_psmi); + PRI_CUSTOM_ATTR("%d", survivability_mode); + +#undef PRI_CUSTOM_ATTR +} + +/** + * xe_configfs_check_device() - Test if device was configured by configfs + * @pdev: the &pci_dev device to test + * + * Try to find the configfs group that belongs to the specified pci device + * and print a diagnostic message if different than the default value. + */ +void xe_configfs_check_device(struct pci_dev *pdev) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + + if (!dev) + return; + + /* memcmp here is safe as both are zero-initialized */ + if (memcmp(&dev->config, &device_defaults, sizeof(dev->config))) { + pci_info(pdev, "Found custom settings in configfs\n"); + dump_custom_dev_config(pdev, dev); + } + + config_group_put(&dev->group); } /** * xe_configfs_get_survivability_mode - get configfs survivability mode attribute * @pdev: pci device * - * find the configfs group that belongs to the pci device and return - * the survivability mode attribute - * - * Return: survivability mode if config group is found, false otherwise + * Return: survivability_mode attribute in configfs */ bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { - struct xe_config_device *dev = configfs_find_group(pdev); + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); bool mode; if (!dev) - return false; + return device_defaults.survivability_mode; - mode = dev->survivability_mode; - config_item_put(&dev->group.cg_item); + mode = dev->config.survivability_mode; + config_group_put(&dev->group); return mode; } +static u64 get_gt_types_allowed(struct pci_dev *pdev) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + u64 mask; + + if (!dev) + return device_defaults.gt_types_allowed; + + mask = dev->config.gt_types_allowed; + config_group_put(&dev->group); + + return mask; +} + +/** + * xe_configfs_primary_gt_allowed - determine whether primary GTs are supported + * @pdev: pci device + * + * Return: True if primary GTs are enabled, false if they have been disabled via + * configfs. + */ +bool xe_configfs_primary_gt_allowed(struct pci_dev *pdev) +{ + return get_gt_types_allowed(pdev) & BIT_ULL(XE_GT_TYPE_MAIN); +} + +/** + * xe_configfs_media_gt_allowed - determine whether media GTs are supported + * @pdev: pci device + * + * Return: True if the media GTs are enabled, false if they have been disabled + * via configfs. + */ +bool xe_configfs_media_gt_allowed(struct pci_dev *pdev) +{ + return get_gt_types_allowed(pdev) & BIT_ULL(XE_GT_TYPE_MEDIA); +} + /** - * xe_configfs_clear_survivability_mode - clear configfs survivability mode attribute + * xe_configfs_get_engines_allowed - get engine allowed mask from configfs * @pdev: pci device * - * find the configfs group that belongs to the pci device and clear survivability - * mode attribute + * Return: engine mask with allowed engines set in configfs */ -void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) +u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) { - struct xe_config_device *dev = configfs_find_group(pdev); + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + u64 engines_allowed; if (!dev) - return; + return device_defaults.engines_allowed; + + engines_allowed = dev->config.engines_allowed; + config_group_put(&dev->group); + + return engines_allowed; +} + +/** + * xe_configfs_get_psmi_enabled - get configfs enable_psmi setting + * @pdev: pci device + * + * Return: enable_psmi setting in configfs + */ +bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + bool ret; + + if (!dev) + return false; - mutex_lock(&dev->lock); - dev->survivability_mode = 0; - mutex_unlock(&dev->lock); + ret = dev->config.enable_psmi; + config_group_put(&dev->group); - config_item_put(&dev->group.cg_item); + return ret; +} + +/** + * xe_configfs_get_ctx_restore_mid_bb - get configfs ctx_restore_mid_bb setting + * @pdev: pci device + * @class: hw engine class + * @cs: pointer to the bb to use - only valid during probe + * + * Return: Number of dwords used in the mid_ctx_restore setting in configfs + */ +u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, + enum xe_engine_class class, + const u32 **cs) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + u32 len; + + if (!dev) + return 0; + + if (cs) + *cs = dev->config.ctx_restore_mid_bb[class].cs; + + len = dev->config.ctx_restore_mid_bb[class].len; + config_group_put(&dev->group); + + return len; } +/** + * xe_configfs_get_ctx_restore_post_bb - get configfs ctx_restore_post_bb setting + * @pdev: pci device + * @class: hw engine class + * @cs: pointer to the bb to use - only valid during probe + * + * Return: Number of dwords used in the post_ctx_restore setting in configfs + */ +u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, + enum xe_engine_class class, + const u32 **cs) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + u32 len; + + if (!dev) + return 0; + + *cs = dev->config.ctx_restore_post_bb[class].cs; + len = dev->config.ctx_restore_post_bb[class].len; + config_group_put(&dev->group); + + return len; +} + +#ifdef CONFIG_PCI_IOV +/** + * xe_configfs_get_max_vfs() - Get number of VFs that could be managed + * @pdev: the &pci_dev device + * + * Find the configfs group that belongs to the PCI device and return maximum + * number of Virtual Functions (VFs) that could be managed by this device. + * If configfs group is not present, use value of max_vfs module parameter. + * + * Return: maximum number of VFs that could be managed. + */ +unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + unsigned int max_vfs; + + if (!dev) + return xe_modparam.max_vfs; + + scoped_guard(mutex, &dev->lock) + max_vfs = dev->config.sriov.max_vfs; + + config_group_put(&dev->group); + + return max_vfs; +} +#endif + int __init xe_configfs_init(void) { - struct config_group *root = &xe_configfs.su_group; int ret; - config_group_init(root); + config_group_init(&xe_configfs.su_group); mutex_init(&xe_configfs.su_mutex); ret = configfs_register_subsystem(&xe_configfs); if (ret) { - pr_err("Error %d while registering %s subsystem\n", - ret, root->cg_item.ci_namebuf); + mutex_destroy(&xe_configfs.su_mutex); return ret; } return 0; } -void __exit xe_configfs_exit(void) +void xe_configfs_exit(void) { configfs_unregister_subsystem(&xe_configfs); + mutex_destroy(&xe_configfs.su_mutex); } - diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h index d7d041ec2611..fed57be0b90e 100644 --- a/drivers/gpu/drm/xe/xe_configfs.h +++ b/drivers/gpu/drm/xe/xe_configfs.h @@ -5,20 +5,43 @@ #ifndef _XE_CONFIGFS_H_ #define _XE_CONFIGFS_H_ +#include <linux/limits.h> #include <linux/types.h> +#include <xe_hw_engine_types.h> + struct pci_dev; #if IS_ENABLED(CONFIG_CONFIGFS_FS) int xe_configfs_init(void); void xe_configfs_exit(void); +void xe_configfs_check_device(struct pci_dev *pdev); bool xe_configfs_get_survivability_mode(struct pci_dev *pdev); -void xe_configfs_clear_survivability_mode(struct pci_dev *pdev); +bool xe_configfs_primary_gt_allowed(struct pci_dev *pdev); +bool xe_configfs_media_gt_allowed(struct pci_dev *pdev); +u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev); +bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev); +u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class, + const u32 **cs); +u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class, + const u32 **cs); +#ifdef CONFIG_PCI_IOV +unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev); +#endif #else -static inline int xe_configfs_init(void) { return 0; }; -static inline void xe_configfs_exit(void) {}; -static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; }; -static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) {}; +static inline int xe_configfs_init(void) { return 0; } +static inline void xe_configfs_exit(void) { } +static inline void xe_configfs_check_device(struct pci_dev *pdev) { } +static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; } +static inline bool xe_configfs_primary_gt_allowed(struct pci_dev *pdev) { return true; } +static inline bool xe_configfs_media_gt_allowed(struct pci_dev *pdev) { return true; } +static inline u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) { return U64_MAX; } +static inline bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev) { return false; } +static inline u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class, + const u32 **cs) { return 0; } +static inline u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class, + const u32 **cs) { return 0; } +static inline unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev) { return UINT_MAX; } #endif #endif diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index d0503959a8ed..e91da9589c5f 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -11,16 +11,24 @@ #include <drm/drm_debugfs.h> +#include "regs/xe_pmt.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_force_wake.h" #include "xe_gt_debugfs.h" #include "xe_gt_printk.h" #include "xe_guc_ads.h" +#include "xe_mmio.h" #include "xe_pm.h" +#include "xe_psmi.h" #include "xe_pxp_debugfs.h" #include "xe_sriov.h" +#include "xe_sriov_pf_debugfs.h" +#include "xe_sriov_vf.h" #include "xe_step.h" +#include "xe_tile_debugfs.h" +#include "xe_vsec.h" +#include "xe_wa.h" #ifdef CONFIG_DRM_XE_DEBUG #include "xe_bo_evict.h" @@ -29,6 +37,24 @@ #endif DECLARE_FAULT_ATTR(gt_reset_failure); +DECLARE_FAULT_ATTR(inject_csc_hw_error); + +static void read_residency_counter(struct xe_device *xe, struct xe_mmio *mmio, + u32 offset, const char *name, struct drm_printer *p) +{ + u64 residency = 0; + int ret; + + ret = xe_pmt_telem_read(to_pci_dev(xe->drm.dev), + xe_mmio_read32(mmio, PUNIT_TELEMETRY_GUID), + &residency, offset, sizeof(residency)); + if (ret != sizeof(residency)) { + drm_warn(&xe->drm, "%s counter failed to read, ret %d\n", name, ret); + return; + } + + drm_printf(p, "%s : %llu\n", name, residency); +} static struct xe_device *node_to_xe(struct drm_info_node *node) { @@ -82,9 +108,89 @@ static int sriov_info(struct seq_file *m, void *data) return 0; } +static int workarounds(struct xe_device *xe, struct drm_printer *p) +{ + xe_pm_runtime_get(xe); + xe_wa_device_dump(xe, p); + xe_pm_runtime_put(xe); + + return 0; +} + +static int workaround_info(struct seq_file *m, void *data) +{ + struct xe_device *xe = node_to_xe(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + workarounds(xe, &p); + return 0; +} + +static int dgfx_pkg_residencies_show(struct seq_file *m, void *data) +{ + struct xe_device *xe; + struct xe_mmio *mmio; + struct drm_printer p; + + xe = node_to_xe(m->private); + p = drm_seq_file_printer(m); + xe_pm_runtime_get(xe); + mmio = xe_root_tile_mmio(xe); + static const struct { + u32 offset; + const char *name; + } residencies[] = { + {BMG_G2_RESIDENCY_OFFSET, "Package G2"}, + {BMG_G6_RESIDENCY_OFFSET, "Package G6"}, + {BMG_G7_RESIDENCY_OFFSET, "Package G7"}, + {BMG_G8_RESIDENCY_OFFSET, "Package G8"}, + {BMG_G10_RESIDENCY_OFFSET, "Package G10"}, + {BMG_MODS_RESIDENCY_OFFSET, "Package ModS"} + }; + + for (int i = 0; i < ARRAY_SIZE(residencies); i++) + read_residency_counter(xe, mmio, residencies[i].offset, residencies[i].name, &p); + + xe_pm_runtime_put(xe); + return 0; +} + +static int dgfx_pcie_link_residencies_show(struct seq_file *m, void *data) +{ + struct xe_device *xe; + struct xe_mmio *mmio; + struct drm_printer p; + + xe = node_to_xe(m->private); + p = drm_seq_file_printer(m); + xe_pm_runtime_get(xe); + mmio = xe_root_tile_mmio(xe); + + static const struct { + u32 offset; + const char *name; + } residencies[] = { + {BMG_PCIE_LINK_L0_RESIDENCY_OFFSET, "PCIE LINK L0 RESIDENCY"}, + {BMG_PCIE_LINK_L1_RESIDENCY_OFFSET, "PCIE LINK L1 RESIDENCY"}, + {BMG_PCIE_LINK_L1_2_RESIDENCY_OFFSET, "PCIE LINK L1.2 RESIDENCY"} + }; + + for (int i = 0; i < ARRAY_SIZE(residencies); i++) + read_residency_counter(xe, mmio, residencies[i].offset, residencies[i].name, &p); + + xe_pm_runtime_put(xe); + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"info", info, 0}, { .name = "sriov_info", .show = sriov_info, }, + { .name = "workarounds", .show = workaround_info, }, +}; + +static const struct drm_info_list debugfs_residencies[] = { + { .name = "dgfx_pkg_residencies", .show = dgfx_pkg_residencies_show, }, + { .name = "dgfx_pcie_link_residencies", .show = dgfx_pcie_link_residencies_show, }, }; static int forcewake_open(struct inode *inode, struct file *file) @@ -191,26 +297,112 @@ static const struct file_operations wedged_mode_fops = { .write = wedged_mode_set, }; +static ssize_t atomic_svm_timeslice_ms_show(struct file *f, char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + char buf[32]; + int len = 0; + + len = scnprintf(buf, sizeof(buf), "%d\n", xe->atomic_svm_timeslice_ms); + + return simple_read_from_buffer(ubuf, size, pos, buf, len); +} + +static ssize_t atomic_svm_timeslice_ms_set(struct file *f, + const char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + u32 atomic_svm_timeslice_ms; + ssize_t ret; + + ret = kstrtouint_from_user(ubuf, size, 0, &atomic_svm_timeslice_ms); + if (ret) + return ret; + + xe->atomic_svm_timeslice_ms = atomic_svm_timeslice_ms; + + return size; +} + +static const struct file_operations atomic_svm_timeslice_ms_fops = { + .owner = THIS_MODULE, + .read = atomic_svm_timeslice_ms_show, + .write = atomic_svm_timeslice_ms_set, +}; + +static ssize_t disable_late_binding_show(struct file *f, char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + struct xe_late_bind *late_bind = &xe->late_bind; + char buf[32]; + int len; + + len = scnprintf(buf, sizeof(buf), "%d\n", late_bind->disable); + + return simple_read_from_buffer(ubuf, size, pos, buf, len); +} + +static ssize_t disable_late_binding_set(struct file *f, const char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + struct xe_late_bind *late_bind = &xe->late_bind; + bool val; + int ret; + + ret = kstrtobool_from_user(ubuf, size, &val); + if (ret) + return ret; + + late_bind->disable = val; + return size; +} + +static const struct file_operations disable_late_binding_fops = { + .owner = THIS_MODULE, + .read = disable_late_binding_show, + .write = disable_late_binding_set, +}; + void xe_debugfs_register(struct xe_device *xe) { struct ttm_device *bdev = &xe->ttm; struct drm_minor *minor = xe->drm.primary; struct dentry *root = minor->debugfs_root; struct ttm_resource_manager *man; + struct xe_tile *tile; struct xe_gt *gt; u32 mem_type; + u8 tile_id; u8 id; drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), root, minor); + if (xe->info.platform == XE_BATTLEMAGE && !IS_SRIOV_VF(xe)) { + drm_debugfs_create_files(debugfs_residencies, + ARRAY_SIZE(debugfs_residencies), + root, minor); + fault_create_debugfs_attr("inject_csc_hw_error", root, + &inject_csc_hw_error); + } + debugfs_create_file("forcewake_all", 0400, root, xe, &forcewake_all_fops); debugfs_create_file("wedged_mode", 0600, root, xe, &wedged_mode_fops); + debugfs_create_file("atomic_svm_timeslice_ms", 0600, root, xe, + &atomic_svm_timeslice_ms_fops); + + debugfs_create_file("disable_late_binding", 0600, root, xe, + &disable_late_binding_fops); + for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { man = ttm_manager_type(bdev, mem_type); @@ -229,10 +421,20 @@ void xe_debugfs_register(struct xe_device *xe) if (man) ttm_resource_manager_create_debugfs(man, root, "stolen_mm"); + for_each_tile(tile, xe, tile_id) + xe_tile_debugfs_register(tile); + for_each_gt(gt, xe, id) xe_gt_debugfs_register(gt); xe_pxp_debugfs_register(xe->pxp); + xe_psmi_debugfs_register(xe); + fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure); + + if (IS_SRIOV_PF(xe)) + xe_sriov_pf_debugfs_register(xe, root); + else if (IS_SRIOV_VF(xe)) + xe_sriov_vf_debugfs_register(xe, root); } diff --git a/drivers/gpu/drm/xe/xe_dep_job_types.h b/drivers/gpu/drm/xe/xe_dep_job_types.h new file mode 100644 index 000000000000..c6a484f24c8c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_dep_job_types.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_DEP_JOB_TYPES_H_ +#define _XE_DEP_JOB_TYPES_H_ + +#include <drm/gpu_scheduler.h> + +struct xe_dep_job; + +/** struct xe_dep_job_ops - Generic Xe dependency job operations */ +struct xe_dep_job_ops { + /** @run_job: Run generic Xe dependency job */ + struct dma_fence *(*run_job)(struct xe_dep_job *job); + /** @free_job: Free generic Xe dependency job */ + void (*free_job)(struct xe_dep_job *job); +}; + +/** struct xe_dep_job - Generic dependency Xe job */ +struct xe_dep_job { + /** @drm: base DRM scheduler job */ + struct drm_sched_job drm; + /** @ops: dependency job operations */ + const struct xe_dep_job_ops *ops; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_dep_scheduler.c b/drivers/gpu/drm/xe/xe_dep_scheduler.c new file mode 100644 index 000000000000..9bd3bfd2e526 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_dep_scheduler.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/slab.h> + +#include <drm/gpu_scheduler.h> + +#include "xe_dep_job_types.h" +#include "xe_dep_scheduler.h" +#include "xe_device_types.h" + +/** + * DOC: Xe Dependency Scheduler + * + * The Xe dependency scheduler is a simple wrapper built around the DRM + * scheduler to execute jobs once their dependencies are resolved (i.e., all + * input fences specified as dependencies are signaled). The jobs that are + * executed contain virtual functions to run (execute) and free the job, + * allowing a single dependency scheduler to handle jobs performing different + * operations. + * + * Example use cases include deferred resource freeing, TLB invalidations after + * bind jobs, etc. + */ + +/** struct xe_dep_scheduler - Generic Xe dependency scheduler */ +struct xe_dep_scheduler { + /** @sched: DRM GPU scheduler */ + struct drm_gpu_scheduler sched; + /** @entity: DRM scheduler entity */ + struct drm_sched_entity entity; + /** @rcu: For safe freeing of exported dma fences */ + struct rcu_head rcu; +}; + +static struct dma_fence *xe_dep_scheduler_run_job(struct drm_sched_job *drm_job) +{ + struct xe_dep_job *dep_job = + container_of(drm_job, typeof(*dep_job), drm); + + return dep_job->ops->run_job(dep_job); +} + +static void xe_dep_scheduler_free_job(struct drm_sched_job *drm_job) +{ + struct xe_dep_job *dep_job = + container_of(drm_job, typeof(*dep_job), drm); + + dep_job->ops->free_job(dep_job); +} + +static const struct drm_sched_backend_ops sched_ops = { + .run_job = xe_dep_scheduler_run_job, + .free_job = xe_dep_scheduler_free_job, +}; + +/** + * xe_dep_scheduler_create() - Generic Xe dependency scheduler create + * @xe: Xe device + * @submit_wq: Submit workqueue struct (can be NULL) + * @name: Name of dependency scheduler + * @job_limit: Max dependency jobs that can be scheduled + * + * Create a generic Xe dependency scheduler and initialize internal DRM + * scheduler objects. + * + * Return: Generic Xe dependency scheduler object on success, ERR_PTR failure + */ +struct xe_dep_scheduler * +xe_dep_scheduler_create(struct xe_device *xe, + struct workqueue_struct *submit_wq, + const char *name, u32 job_limit) +{ + struct xe_dep_scheduler *dep_scheduler; + struct drm_gpu_scheduler *sched; + const struct drm_sched_init_args args = { + .ops = &sched_ops, + .submit_wq = submit_wq, + .num_rqs = 1, + .credit_limit = job_limit, + .timeout = MAX_SCHEDULE_TIMEOUT, + .name = name, + .dev = xe->drm.dev, + }; + int err; + + dep_scheduler = kzalloc(sizeof(*dep_scheduler), GFP_KERNEL); + if (!dep_scheduler) + return ERR_PTR(-ENOMEM); + + err = drm_sched_init(&dep_scheduler->sched, &args); + if (err) + goto err_free; + + sched = &dep_scheduler->sched; + err = drm_sched_entity_init(&dep_scheduler->entity, 0, &sched, 1, NULL); + if (err) + goto err_sched; + + init_rcu_head(&dep_scheduler->rcu); + + return dep_scheduler; + +err_sched: + drm_sched_fini(&dep_scheduler->sched); +err_free: + kfree(dep_scheduler); + + return ERR_PTR(err); +} + +/** + * xe_dep_scheduler_fini() - Generic Xe dependency scheduler finalize + * @dep_scheduler: Generic Xe dependency scheduler object + * + * Finalize internal DRM scheduler objects and free generic Xe dependency + * scheduler object + */ +void xe_dep_scheduler_fini(struct xe_dep_scheduler *dep_scheduler) +{ + drm_sched_entity_fini(&dep_scheduler->entity); + drm_sched_fini(&dep_scheduler->sched); + /* + * RCU free due sched being exported via DRM scheduler fences + * (timeline name). + */ + kfree_rcu(dep_scheduler, rcu); +} + +/** + * xe_dep_scheduler_entity() - Retrieve a generic Xe dependency scheduler + * DRM scheduler entity + * @dep_scheduler: Generic Xe dependency scheduler object + * + * Return: The generic Xe dependency scheduler's DRM scheduler entity + */ +struct drm_sched_entity * +xe_dep_scheduler_entity(struct xe_dep_scheduler *dep_scheduler) +{ + return &dep_scheduler->entity; +} diff --git a/drivers/gpu/drm/xe/xe_dep_scheduler.h b/drivers/gpu/drm/xe/xe_dep_scheduler.h new file mode 100644 index 000000000000..853961eec64b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_dep_scheduler.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/types.h> + +struct drm_sched_entity; +struct workqueue_struct; +struct xe_dep_scheduler; +struct xe_device; + +struct xe_dep_scheduler * +xe_dep_scheduler_create(struct xe_device *xe, + struct workqueue_struct *submit_wq, + const char *name, u32 job_limit); + +void xe_dep_scheduler_fini(struct xe_dep_scheduler *dep_scheduler); + +struct drm_sched_entity * +xe_dep_scheduler_entity(struct xe_dep_scheduler *dep_scheduler); diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 7a8af2311318..d444eda65ca6 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -106,9 +106,9 @@ static ssize_t __xe_devcoredump_read(char *buffer, ssize_t count, drm_puts(&p, "module: " KBUILD_MODNAME "\n"); ts = ktime_to_timespec64(ss->snapshot_time); - drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); + drm_printf(&p, "Snapshot time: %ptSp\n", &ts); ts = ktime_to_timespec64(ss->boot_time); - drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); + drm_printf(&p, "Uptime: %ptSp\n", &ts); drm_printf(&p, "Process: %s [%d]\n", ss->process_name, ss->pid); xe_device_snapshot_print(xe, &p); @@ -171,14 +171,32 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) #define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G) +/** + * xe_devcoredump_read() - Read data from the Xe device coredump snapshot + * @buffer: Destination buffer to copy the coredump data into + * @offset: Offset in the coredump data to start reading from + * @count: Number of bytes to read + * @data: Pointer to the xe_devcoredump structure + * @datalen: Length of the data (unused) + * + * Reads a chunk of the coredump snapshot data into the provided buffer. + * If the devcoredump is smaller than 1.5 GB (XE_DEVCOREDUMP_CHUNK_MAX), + * it is read directly from a pre-written buffer. For larger devcoredumps, + * the pre-written buffer must be periodically repopulated from the snapshot + * state due to kmalloc size limitations. + * + * Return: Number of bytes copied on success, or a negative error code on failure. + */ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { struct xe_devcoredump *coredump = data; struct xe_devcoredump_snapshot *ss; - ssize_t byte_copied; + ssize_t byte_copied = 0; u32 chunk_offset; ssize_t new_chunk_position; + bool pm_needed = false; + int ret = 0; if (!coredump) return -ENODEV; @@ -188,20 +206,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, /* Ensure delayed work is captured before continuing */ flush_work(&ss->work); - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + pm_needed = ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX; + if (pm_needed) xe_pm_runtime_get(gt_to_xe(ss->gt)); mutex_lock(&coredump->lock); if (!ss->read.buffer) { - mutex_unlock(&coredump->lock); - return -ENODEV; + ret = -ENODEV; + goto unlock; } - if (offset >= ss->read.size) { - mutex_unlock(&coredump->lock); - return 0; - } + if (offset >= ss->read.size) + goto unlock; new_chunk_position = div_u64_rem(offset, XE_DEVCOREDUMP_CHUNK_MAX, @@ -221,12 +238,13 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, ss->read.size - offset; memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied); +unlock: mutex_unlock(&coredump->lock); - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + if (pm_needed) xe_pm_runtime_put(gt_to_xe(ss->gt)); - return byte_copied; + return byte_copied ? byte_copied : ret; } static void xe_devcoredump_free(void *data) @@ -313,13 +331,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, { struct xe_devcoredump_snapshot *ss = &coredump->snapshot; struct xe_guc *guc = exec_queue_to_guc(q); - u32 adj_logical_mask = q->logical_mask; - u32 width_mask = (0x1 << q->width) - 1; const char *process_name = "no process"; - unsigned int fw_ref; bool cookie; - int i; ss->snapshot_time = ktime_get_real(); ss->boot_time = ktime_get_boottime(); @@ -335,14 +349,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); cookie = dma_fence_begin_signalling(); - for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { - if (adj_logical_mask & BIT(i)) { - adj_logical_mask |= width_mask << i; - i += q->width; - } else { - ++i; - } - } /* keep going if fw fails as we still want to save the memory and SW data */ fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index c02c4c4e9412..cf29e259861f 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -8,6 +8,7 @@ #include <linux/aperture.h> #include <linux/delay.h> #include <linux/fault-inject.h> +#include <linux/iopoll.h> #include <linux/units.h> #include <drm/drm_atomic_helper.h> @@ -40,18 +41,23 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" +#include "xe_guc_pc.h" #include "xe_hw_engine_group.h" #include "xe_hwmon.h" +#include "xe_i2c.h" #include "xe_irq.h" -#include "xe_memirq.h" +#include "xe_late_bind_fw.h" #include "xe_mmio.h" #include "xe_module.h" +#include "xe_nvm.h" #include "xe_oa.h" #include "xe_observation.h" +#include "xe_pagefault.h" #include "xe_pat.h" #include "xe_pcode.h" #include "xe_pm.h" #include "xe_pmu.h" +#include "xe_psmi.h" #include "xe_pxp.h" #include "xe_query.h" #include "xe_shrinker.h" @@ -61,11 +67,14 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_sys_mgr.h" #include "xe_vm.h" +#include "xe_vm_madvise.h" #include "xe_vram.h" +#include "xe_vram_types.h" #include "xe_vsec.h" #include "xe_wait_user_fence.h" #include "xe_wa.h" +#include <generated/xe_device_wa_oob.h> #include <generated/xe_wa_oob.h> static int xe_file_open(struct drm_device *dev, struct drm_file *file) @@ -197,6 +206,9 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_VM_QUERY_MEM_RANGE_ATTRS, xe_vm_query_vmas_attrs_ioctl, + DRM_RENDER_ALLOW), }; static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -402,9 +414,6 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) if (xe->unordered_wq) destroy_workqueue(xe->unordered_wq); - if (!IS_ERR_OR_NULL(xe->mem.shrinker)) - xe_shrinker_destroy(xe->mem.shrinker); - if (xe->destroy_wq) destroy_workqueue(xe->destroy_wq); @@ -429,7 +438,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev, xe->drm.anon_inode->i_mapping, - xe->drm.vma_offset_manager, false, false); + xe->drm.vma_offset_manager, 0); if (WARN_ON(err)) goto err; @@ -438,18 +447,21 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, if (err) goto err; - xe->mem.shrinker = xe_shrinker_create(xe); - if (IS_ERR(xe->mem.shrinker)) - return ERR_CAST(xe->mem.shrinker); + err = xe_shrinker_create(xe); + if (err) + goto err; xe->info.devid = pdev->device; xe->info.revid = pdev->revision; xe->info.force_execlist = xe_modparam.force_execlist; + xe->atomic_svm_timeslice_ms = 5; err = xe_irq_init(xe); if (err) goto err; + xe_validation_device_init(&xe->val); + init_waitqueue_head(&xe->ufence_wq); init_rwsem(&xe->usm.lock); @@ -493,10 +505,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, if (err) goto err; - err = xe_display_create(xe); - if (WARN_ON(err)) - goto err; - return xe; err: @@ -527,7 +535,7 @@ static bool xe_driver_flr_disabled(struct xe_device *xe) * re-init and saving/restoring (or re-populating) the wiped memory. Since we * perform the FLR as the very last action before releasing access to the HW * during the driver release flow, we don't attempt recovery at all, because - * if/when a new instance of i915 is bound to the device it will do a full + * if/when a new instance of Xe is bound to the device it will do a full * re-init anyway. */ static void __xe_driver_flr(struct xe_device *xe) @@ -624,16 +632,22 @@ mask_err: return err; } -static bool verify_lmem_ready(struct xe_device *xe) +static int lmem_initializing(struct xe_device *xe) { - u32 val = xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT; + if (xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT) + return 0; - return !!val; + if (signal_pending(current)) + return -EINTR; + + return 1; } static int wait_for_lmem_ready(struct xe_device *xe) { - unsigned long timeout, start; + const unsigned long TIMEOUT_SEC = 60; + unsigned long prev_jiffies; + int initializing; if (!IS_DGFX(xe)) return 0; @@ -641,53 +655,65 @@ static int wait_for_lmem_ready(struct xe_device *xe) if (IS_SRIOV_VF(xe)) return 0; - if (verify_lmem_ready(xe)) + if (!lmem_initializing(xe)) return 0; drm_dbg(&xe->drm, "Waiting for lmem initialization\n"); + prev_jiffies = jiffies; - start = jiffies; - timeout = start + secs_to_jiffies(60); /* 60 sec! */ - - do { - if (signal_pending(current)) - return -EINTR; - - /* - * The boot firmware initializes local memory and - * assesses its health. If memory training fails, - * the punit will have been instructed to keep the GT powered - * down.we won't be able to communicate with it - * - * If the status check is done before punit updates the register, - * it can lead to the system being unusable. - * use a timeout and defer the probe to prevent this. - */ - if (time_after(jiffies, timeout)) { - drm_dbg(&xe->drm, "lmem not initialized by firmware\n"); - return -EPROBE_DEFER; - } - - msleep(20); - - } while (!verify_lmem_ready(xe)); + /* + * The boot firmware initializes local memory and + * assesses its health. If memory training fails, + * the punit will have been instructed to keep the GT powered + * down.we won't be able to communicate with it + * + * If the status check is done before punit updates the register, + * it can lead to the system being unusable. + * use a timeout and defer the probe to prevent this. + */ + poll_timeout_us(initializing = lmem_initializing(xe), + initializing <= 0, + 20 * USEC_PER_MSEC, TIMEOUT_SEC * USEC_PER_SEC, true); + if (initializing < 0) + return initializing; + + if (initializing) { + drm_dbg(&xe->drm, "lmem not initialized by firmware\n"); + return -EPROBE_DEFER; + } drm_dbg(&xe->drm, "lmem ready after %ums", - jiffies_to_msecs(jiffies - start)); + jiffies_to_msecs(jiffies - prev_jiffies)); return 0; } ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */ -static void sriov_update_device_info(struct xe_device *xe) +static void vf_update_device_info(struct xe_device *xe) { + xe_assert(xe, IS_SRIOV_VF(xe)); /* disable features that are not available/applicable to VFs */ - if (IS_SRIOV_VF(xe)) { - xe->info.probe_display = 0; - xe->info.has_heci_gscfi = 0; - xe->info.skip_guc_pc = 1; - xe->info.skip_pcode = 1; - } + xe->info.probe_display = 0; + xe->info.has_heci_cscfi = 0; + xe->info.has_heci_gscfi = 0; + xe->info.has_late_bind = 0; + xe->info.skip_guc_pc = 1; + xe->info.skip_pcode = 1; +} + +static int xe_device_vram_alloc(struct xe_device *xe) +{ + struct xe_vram_region *vram; + + if (!IS_DGFX(xe)) + return 0; + + vram = drmm_kzalloc(&xe->drm, sizeof(*vram), GFP_KERNEL); + if (!vram) + return -ENOMEM; + + xe->mem.vram = vram; + return 0; } /** @@ -704,13 +730,17 @@ int xe_device_probe_early(struct xe_device *xe) { int err; + xe_wa_device_init(xe); + xe_wa_process_device_oob(xe); + err = xe_mmio_probe_early(xe); if (err) return err; xe_sriov_probe_early(xe); - sriov_update_device_info(xe); + if (IS_SRIOV_VF(xe)) + vf_update_device_info(xe); err = xe_pcode_probe_early(xe); if (err || xe_survivability_mode_is_requested(xe)) { @@ -721,7 +751,7 @@ int xe_device_probe_early(struct xe_device *xe) * possible, but still return the previous error for error * propagation */ - err = xe_survivability_mode_enable(xe); + err = xe_survivability_mode_boot_enable(xe); if (err) return err; @@ -734,6 +764,10 @@ int xe_device_probe_early(struct xe_device *xe) xe->wedged.mode = xe_modparam.wedged_mode; + err = xe_device_vram_alloc(xe); + if (err) + return err; + return 0; } ALLOW_ERROR_INJECTION(xe_device_probe_early, ERRNO); /* See xe_pci_probe() */ @@ -749,6 +783,8 @@ static int probe_has_flat_ccs(struct xe_device *xe) return 0; gt = xe_root_mmio_gt(xe); + if (!gt) + return 0; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (!fw_ref) @@ -789,51 +825,18 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; - err = xe_ttm_sys_mgr_init(xe); - if (err) - return err; - for_each_gt(gt, xe, id) { err = xe_gt_init_early(gt); if (err) return err; - - /* - * Only after this point can GT-specific MMIO operations - * (including things like communication with the GuC) - * be performed. - */ - xe_gt_mmio_init(gt); } for_each_tile(tile, xe, id) { - if (IS_SRIOV_VF(xe)) { - xe_guc_comm_init_early(&tile->primary_gt->uc.guc); - err = xe_gt_sriov_vf_bootstrap(tile->primary_gt); - if (err) - return err; - err = xe_gt_sriov_vf_query_config(tile->primary_gt); - if (err) - return err; - } err = xe_ggtt_init_early(tile->mem.ggtt); if (err) return err; - err = xe_memirq_init(&tile->memirq); - if (err) - return err; } - for_each_gt(gt, xe, id) { - err = xe_gt_init_hwconfig(gt); - if (err) - return err; - } - - err = xe_devcoredump_init(xe); - if (err) - return err; - /* * From here on, if a step fails, make sure a Driver-FLR is triggereed */ @@ -855,6 +858,14 @@ int xe_device_probe(struct xe_device *xe) return err; } + /* + * Allow allocations only now to ensure xe_display_init_early() + * is the first to allocate, always. + */ + err = xe_ttm_sys_mgr_init(xe); + if (err) + return err; + /* Allocate and map stolen after potential VRAM resize */ err = xe_ttm_stolen_mgr_init(xe); if (err) @@ -886,10 +897,28 @@ int xe_device_probe(struct xe_device *xe) return err; } + err = xe_pagefault_init(xe); + if (err) + return err; + + if (xe->tiles->media_gt && + XE_GT_WA(xe->tiles->media_gt, 15015404425_disable)) + XE_DEVICE_WA_DISABLE(xe, 15015404425); + + err = xe_devcoredump_init(xe); + if (err) + return err; + + xe_nvm_init(xe); + err = xe_heci_gsc_init(xe); if (err) return err; + err = xe_late_bind_init(&xe->late_bind); + if (err) + return err; + err = xe_oa_init(xe); if (err) return err; @@ -902,6 +931,10 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; + err = xe_psmi_init(xe); + if (err) + return err; + err = drm_dev_register(&xe->drm, 0); if (err) return err; @@ -926,11 +959,19 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err_unregister_display; + err = xe_i2c_probe(xe); + if (err) + goto err_unregister_display; + for_each_gt(gt, xe, id) xe_gt_sanitize_freq(gt); xe_vsec_init(xe); + err = xe_sriov_init_late(xe); + if (err) + goto err_unregister_display; + return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); err_unregister_display: @@ -943,6 +984,8 @@ void xe_device_remove(struct xe_device *xe) { xe_display_unregister(xe); + xe_nvm_fini(xe); + drm_dev_unplug(&xe->drm); xe_bo_pci_dev_remove_all(xe); @@ -955,16 +998,16 @@ void xe_device_shutdown(struct xe_device *xe) drm_dbg(&xe->drm, "Shutting down device\n"); - if (xe_driver_flr_disabled(xe)) { - xe_display_pm_shutdown(xe); + xe_display_pm_shutdown(xe); - xe_irq_suspend(xe); + xe_irq_suspend(xe); - for_each_gt(gt, xe, id) - xe_gt_shutdown(gt); + for_each_gt(gt, xe, id) + xe_gt_shutdown(gt); - xe_display_pm_shutdown_late(xe); - } else { + xe_display_pm_shutdown_late(xe); + + if (!xe_driver_flr_disabled(xe)) { /* BOOM! */ __xe_driver_flr(xe); } @@ -986,38 +1029,15 @@ void xe_device_wmb(struct xe_device *xe) xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0); } -/** - * xe_device_td_flush() - Flush transient L3 cache entries - * @xe: The device - * - * Display engine has direct access to memory and is never coherent with L3/L4 - * caches (or CPU caches), however KMD is responsible for specifically flushing - * transient L3 GPU cache entries prior to the flip sequence to ensure scanout - * can happen from such a surface without seeing corruption. - * - * Display surfaces can be tagged as transient by mapping it using one of the - * various L3:XD PAT index modes on Xe2. - * - * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed - * at the end of each submission via PIPE_CONTROL for compute/render, since SA - * Media is not coherent with L3 and we want to support render-vs-media - * usescases. For other engines like copy/blt the HW internally forces uncached - * behaviour, hence why we can skip the TDF on such platforms. +/* + * Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt. */ -void xe_device_td_flush(struct xe_device *xe) +static void tdf_request_sync(struct xe_device *xe) { - struct xe_gt *gt; unsigned int fw_ref; + struct xe_gt *gt; u8 id; - if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) - return; - - if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { - xe_device_l2_flush(xe); - return; - } - for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) continue; @@ -1027,6 +1047,7 @@ void xe_device_td_flush(struct xe_device *xe) return; xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); + /* * FIXME: We can likely do better here with our choice of * timeout. Currently we just assume the worst case, i.e. 150us, @@ -1035,7 +1056,7 @@ void xe_device_td_flush(struct xe_device *xe) * transient and need to be flushed.. */ if (xe_mmio_wait32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0, - 150, NULL, false)) + 300, NULL, false)) xe_gt_err_once(gt, "TD flush timeout\n"); xe_force_wake_put(gt_to_fw(gt), fw_ref); @@ -1048,8 +1069,10 @@ void xe_device_l2_flush(struct xe_device *xe) unsigned int fw_ref; gt = xe_root_mmio_gt(xe); + if (!gt) + return; - if (!XE_WA(gt, 16023588340)) + if (!XE_GT_WA(gt, 16023588340)) return; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); @@ -1057,15 +1080,55 @@ void xe_device_l2_flush(struct xe_device *xe) return; spin_lock(>->global_invl_lock); - xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); - if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) + xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); + if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 1000, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); + spin_unlock(>->global_invl_lock); xe_force_wake_put(gt_to_fw(gt), fw_ref); } +/** + * xe_device_td_flush() - Flush transient L3 cache entries + * @xe: The device + * + * Display engine has direct access to memory and is never coherent with L3/L4 + * caches (or CPU caches), however KMD is responsible for specifically flushing + * transient L3 GPU cache entries prior to the flip sequence to ensure scanout + * can happen from such a surface without seeing corruption. + * + * Display surfaces can be tagged as transient by mapping it using one of the + * various L3:XD PAT index modes on Xe2. + * + * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed + * at the end of each submission via PIPE_CONTROL for compute/render, since SA + * Media is not coherent with L3 and we want to support render-vs-media + * usescases. For other engines like copy/blt the HW internally forces uncached + * behaviour, hence why we can skip the TDF on such platforms. + */ +void xe_device_td_flush(struct xe_device *xe) +{ + struct xe_gt *root_gt; + + if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) + return; + + root_gt = xe_root_mmio_gt(xe); + if (!root_gt) + return; + + if (XE_GT_WA(root_gt, 16023588340)) { + /* A transient flush is not sufficient: flush the L2 */ + xe_device_l2_flush(xe); + } else { + xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc); + tdf_request_sync(xe); + xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc); + } +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? @@ -1127,11 +1190,63 @@ static void xe_device_wedged_fini(struct drm_device *drm, void *arg) } /** + * DOC: Xe Device Wedging + * + * Xe driver uses drm device wedged uevent as documented in Documentation/gpu/drm-uapi.rst. + * When device is in wedged state, every IOCTL will be blocked and GT cannot be + * used. Certain critical errors like gt reset failure, firmware failures can cause + * the device to be wedged. The default recovery method for a wedged state + * is rebind/bus-reset. + * + * Another recovery method is vendor-specific. Below are the cases that send + * ``WEDGED=vendor-specific`` recovery method in drm device wedged uevent. + * + * Case: Firmware Flash + * -------------------- + * + * Identification Hint + * +++++++++++++++++++ + * + * ``WEDGED=vendor-specific`` drm device wedged uevent with + * :ref:`Runtime Survivability mode <xe-survivability-mode>` is used to notify + * admin/userspace consumer about the need for a firmware flash. + * + * Recovery Procedure + * ++++++++++++++++++ + * + * Once ``WEDGED=vendor-specific`` drm device wedged uevent is received, follow + * the below steps + * + * - Check Runtime Survivability mode sysfs. + * If enabled, firmware flash is required to recover the device. + * + * /sys/bus/pci/devices/<device>/survivability_mode + * + * - Admin/userspace consumer can use firmware flashing tools like fwupd to flash + * firmware and restore device to normal operation. + */ + +/** + * xe_device_set_wedged_method - Set wedged recovery method + * @xe: xe device instance + * @method: recovery method to set + * + * Set wedged recovery method to be sent in drm wedged uevent. + */ +void xe_device_set_wedged_method(struct xe_device *xe, unsigned long method) +{ + xe->wedged.method = method; +} + +/** * xe_device_declare_wedged - Declare device wedged * @xe: xe device instance * - * This is a final state that can only be cleared with a module - * re-probe (unbind + bind). + * This is a final state that can only be cleared with the recovery method + * specified in the drm wedged uevent. The method can be set using + * xe_device_set_wedged_method before declaring the device as wedged. If no method + * is set, reprobe (unbind/re-bind) will be sent by default. + * * In this state every IOCTL will be blocked so the GT cannot be used. * In general it will be called upon any critical error such as gt reset * failure or guc loading failure. Userspace will be notified of this state @@ -1165,12 +1280,18 @@ void xe_device_declare_wedged(struct xe_device *xe) "IOCTLs and executions are blocked. Only a rebind may clear the failure\n" "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n", dev_name(xe->drm.dev)); - - /* Notify userspace of wedged device */ - drm_dev_wedged_event(&xe->drm, - DRM_WEDGE_RECOVERY_REBIND | DRM_WEDGE_RECOVERY_BUS_RESET); } for_each_gt(gt, xe, id) xe_gt_declare_wedged(gt); + + if (xe_device_wedged(xe)) { + /* If no wedge recovery method is set, use default */ + if (!xe->wedged.method) + xe_device_set_wedged_method(xe, DRM_WEDGE_RECOVERY_REBIND | + DRM_WEDGE_RECOVERY_BUS_RESET); + + /* Notify userspace of wedged device */ + drm_dev_wedged_event(&xe->drm, xe->wedged.method, NULL); + } } diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 0bc3bc8e6803..32cc6323b7f6 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -60,35 +60,32 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe) return &xe->tiles[0]; } +/* + * Highest GT/tile count for any platform. Used only for memory allocation + * sizing. Any logic looping over GTs or mapping userspace GT IDs into GT + * structures should use the per-platform xe->info.max_gt_per_tile instead. + */ #define XE_MAX_GT_PER_TILE 2 -static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id) -{ - if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id >= XE_MAX_GT_PER_TILE)) - gt_id = 0; - - return gt_id ? tile->media_gt : tile->primary_gt; -} - static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) { - struct xe_tile *root_tile = xe_device_get_root_tile(xe); + struct xe_tile *tile; struct xe_gt *gt; - /* - * FIXME: This only works for now because multi-tile and standalone - * media are mutually exclusive on the platforms we have today. - * - * id => GT mapping may change once we settle on how we want to handle - * our UAPI. - */ - if (MEDIA_VER(xe) >= 13) { - gt = xe_tile_get_gt(root_tile, gt_id); - } else { - if (drm_WARN_ON(&xe->drm, gt_id >= XE_MAX_TILES_PER_DEVICE)) - gt_id = 0; - - gt = xe->tiles[gt_id].primary_gt; + if (gt_id >= xe->info.tile_count * xe->info.max_gt_per_tile) + return NULL; + + tile = &xe->tiles[gt_id / xe->info.max_gt_per_tile]; + switch (gt_id % xe->info.max_gt_per_tile) { + default: + xe_assert(xe, false); + fallthrough; + case 0: + gt = tile->primary_gt; + break; + case 1: + gt = tile->media_gt; + break; } if (!gt) @@ -130,14 +127,14 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe) for ((id__) = 1; (id__) < (xe__)->info.tile_count; (id__)++) \ for_each_if((tile__) = &(xe__)->tiles[(id__)]) -/* - * FIXME: This only works for now since multi-tile and standalone media - * happen to be mutually exclusive. Future platforms may change this... - */ #define for_each_gt(gt__, xe__, id__) \ - for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \ + for ((id__) = 0; (id__) < (xe__)->info.tile_count * (xe__)->info.max_gt_per_tile; (id__)++) \ for_each_if((gt__) = xe_device_get_gt((xe__), (id__))) +#define for_each_gt_on_tile(gt__, tile__, id__) \ + for_each_gt((gt__), (tile__)->xe, (id__)) \ + for_each_if((gt__)->tile == (tile__)) + static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) { return >->pm.fw; @@ -190,11 +187,14 @@ static inline bool xe_device_wedged(struct xe_device *xe) return atomic_read(&xe->wedged.flag); } +void xe_device_set_wedged_method(struct xe_device *xe, unsigned long method); void xe_device_declare_wedged(struct xe_device *xe); struct xe_file *xe_file_get(struct xe_file *xef); void xe_file_put(struct xe_file *xef); +int xe_is_injection_active(void); + /* * Occasionally it is seen that the G2H worker starts running after a delay of more than * a second even after being queued and activated by the Linux workqueue subsystem. This diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index 2e657692e5b5..ec9c06b06fb5 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -24,6 +24,12 @@ * * vram_d3cold_threshold - Report/change vram used threshold(in MB) below * which vram save/restore is permissible during runtime D3cold entry/exit. + * + * lb_fan_control_version - Fan control version provisioned by late binding. + * Exposed only if supported by the device. + * + * lb_voltage_regulator_version - Voltage regulator version provisioned by late + * binding. Exposed only if supported by the device. */ static ssize_t @@ -32,13 +38,8 @@ vram_d3cold_threshold_show(struct device *dev, { struct pci_dev *pdev = to_pci_dev(dev); struct xe_device *xe = pdev_to_xe_device(pdev); - int ret; - xe_pm_runtime_get(xe); - ret = sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold); - xe_pm_runtime_put(xe); - - return ret; + return sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold); } static ssize_t @@ -65,6 +66,128 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR_RW(vram_d3cold_threshold); +static struct attribute *vram_attrs[] = { + &dev_attr_vram_d3cold_threshold.attr, + NULL +}; + +static const struct attribute_group vram_attr_group = { + .attrs = vram_attrs, +}; + +static ssize_t +lb_fan_control_version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap = 0, ver_low = FAN_TABLE, ver_high = FAN_TABLE; + u16 major = 0, minor = 0, hotfix = 0, build = 0; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(V1_FAN_PROVISIONED, cap)) { + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0), + &ver_low, NULL); + if (ret) + goto out; + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0), + &ver_high, NULL); + if (ret) + goto out; + + major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low); + minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low); + hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high); + build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high); + } +out: + xe_pm_runtime_put(xe); + + return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); +} +static DEVICE_ATTR_ADMIN_RO(lb_fan_control_version); + +static ssize_t +lb_voltage_regulator_version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap = 0, ver_low = VR_CONFIG, ver_high = VR_CONFIG; + u16 major = 0, minor = 0, hotfix = 0, build = 0; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(VR_PARAMS_PROVISIONED, cap)) { + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0), + &ver_low, NULL); + if (ret) + goto out; + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0), + &ver_high, NULL); + if (ret) + goto out; + + major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low); + minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low); + hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high); + build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high); + } +out: + xe_pm_runtime_put(xe); + + return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); +} +static DEVICE_ATTR_ADMIN_RO(lb_voltage_regulator_version); + +static struct attribute *late_bind_attrs[] = { + &dev_attr_lb_fan_control_version.attr, + &dev_attr_lb_voltage_regulator_version.attr, + NULL +}; + +static umode_t late_bind_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap = 0; + int ret; + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + return 0; + + if (attr == &dev_attr_lb_fan_control_version.attr && + REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) + return attr->mode; + if (attr == &dev_attr_lb_voltage_regulator_version.attr && + REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) + return attr->mode; + + return 0; +} + +static const struct attribute_group late_bind_attr_group = { + .attrs = late_bind_attrs, + .is_visible = late_bind_attr_is_visible, +}; + /** * DOC: PCIe Gen5 Limitations * @@ -115,7 +238,7 @@ auto_link_downgrade_capable_show(struct device *dev, struct device_attribute *at xe_pm_runtime_put(xe); cap = REG_FIELD_GET(LINK_DOWNGRADE, val); - return sysfs_emit(buf, "%u\n", cap == DOWNGRADE_CAPABLE ? true : false); + return sysfs_emit(buf, "%u\n", cap == DOWNGRADE_CAPABLE); } static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_capable); @@ -138,22 +261,15 @@ auto_link_downgrade_status_show(struct device *dev, struct device_attribute *att } static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_status); -static const struct attribute *auto_link_downgrade_attrs[] = { +static struct attribute *auto_link_downgrade_attrs[] = { &dev_attr_auto_link_downgrade_capable.attr, &dev_attr_auto_link_downgrade_status.attr, NULL }; -static void xe_device_sysfs_fini(void *arg) -{ - struct xe_device *xe = arg; - - if (xe->d3cold.capable) - sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); - - if (xe->info.platform == XE_BATTLEMAGE) - sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs); -} +static const struct attribute_group auto_link_downgrade_attr_group = { + .attrs = auto_link_downgrade_attrs, +}; int xe_device_sysfs_init(struct xe_device *xe) { @@ -161,16 +277,20 @@ int xe_device_sysfs_init(struct xe_device *xe) int ret; if (xe->d3cold.capable) { - ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr); + ret = devm_device_add_group(dev, &vram_attr_group); if (ret) return ret; } - if (xe->info.platform == XE_BATTLEMAGE) { - ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs); + if (xe->info.platform == XE_BATTLEMAGE && !IS_SRIOV_VF(xe)) { + ret = devm_device_add_group(dev, &auto_link_downgrade_attr_group); + if (ret) + return ret; + + ret = devm_device_add_group(dev, &late_bind_attr_group); if (ret) return ret; } - return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index c8fa2c011666..0b2fa7c56d38 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -10,34 +10,39 @@ #include <drm/drm_device.h> #include <drm/drm_file.h> -#include <drm/drm_pagemap.h> #include <drm/ttm/ttm_device.h> #include "xe_devcoredump_types.h" #include "xe_heci_gsc.h" +#include "xe_late_bind_fw_types.h" #include "xe_lmtt_types.h" #include "xe_memirq_types.h" #include "xe_oa_types.h" +#include "xe_pagefault_types.h" #include "xe_platform_types.h" #include "xe_pmu_types.h" #include "xe_pt_types.h" +#include "xe_sriov_pf_types.h" #include "xe_sriov_types.h" +#include "xe_sriov_vf_types.h" +#include "xe_sriov_vf_ccs_types.h" #include "xe_step_types.h" #include "xe_survivability_mode_types.h" -#include "xe_ttm_vram_mgr_types.h" +#include "xe_tile_sriov_vf_types.h" +#include "xe_validation.h" #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) #define TEST_VM_OPS_ERROR #endif -#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) -#include "intel_display_core.h" -#include "intel_display_device.h" -#endif - +struct dram_info; +struct intel_display; +struct intel_dg_nvm_dev; struct xe_ggtt; +struct xe_i2c; struct xe_pat_ops; struct xe_pxp; +struct xe_vram_region; #define XE_BO_INVALID_OFFSET LONG_MAX @@ -71,61 +76,6 @@ struct xe_pxp; struct xe_tile * : (tile__)->xe) /** - * struct xe_vram_region - memory region structure - * This is used to describe a memory region in xe - * device, such as HBM memory or CXL extension memory. - */ -struct xe_vram_region { - /** @io_start: IO start address of this VRAM instance */ - resource_size_t io_start; - /** - * @io_size: IO size of this VRAM instance - * - * This represents how much of this VRAM we can access - * via the CPU through the VRAM BAR. This can be smaller - * than @usable_size, in which case only part of VRAM is CPU - * accessible (typically the first 256M). This - * configuration is known as small-bar. - */ - resource_size_t io_size; - /** @dpa_base: This memory regions's DPA (device physical address) base */ - resource_size_t dpa_base; - /** - * @usable_size: usable size of VRAM - * - * Usable size of VRAM excluding reserved portions - * (e.g stolen mem) - */ - resource_size_t usable_size; - /** - * @actual_physical_size: Actual VRAM size - * - * Actual VRAM size including reserved portions - * (e.g stolen mem) - */ - resource_size_t actual_physical_size; - /** @mapping: pointer to VRAM mappable space */ - void __iomem *mapping; - /** @ttm: VRAM TTM manager */ - struct xe_ttm_vram_mgr ttm; -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) - /** @pagemap: Used to remap device memory as ZONE_DEVICE */ - struct dev_pagemap pagemap; - /** - * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory - * pages of this tile. - */ - struct drm_pagemap dpagemap; - /** - * @hpa_base: base host physical address - * - * This is generated when remap device memory as ZONE_DEVICE - */ - resource_size_t hpa_base; -#endif -}; - -/** * struct xe_mmio - register mmio structure * * Represents an MMIO region that the CPU may use to access registers. A @@ -210,12 +160,20 @@ struct xe_tile { /** @mem: memory management info for tile */ struct { /** - * @mem.vram: VRAM info for tile. + * @mem.kernel_vram: kernel-dedicated VRAM info for tile. + * + * Although VRAM is associated with a specific tile, it can + * still be accessed by all tiles' GTs. + */ + struct xe_vram_region *kernel_vram; + + /** + * @mem.vram: general purpose VRAM info for tile. * * Although VRAM is associated with a specific tile, it can * still be accessed by all tiles' GTs. */ - struct xe_vram_region vram; + struct xe_vram_region *vram; /** @mem.ggtt: Global graphics translation table */ struct xe_ggtt *ggtt; @@ -237,12 +195,17 @@ struct xe_tile { struct { /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ struct xe_ggtt_node *ggtt_balloon[2]; + /** @sriov.vf.self_config: VF configuration data */ + struct xe_tile_sriov_vf_selfconfig self_config; } vf; } sriov; /** @memirq: Memory Based Interrupts. */ struct xe_memirq memirq; + /** @csc_hw_error_work: worker to report CSC HW errors */ + struct work_struct csc_hw_error_work; + /** @pcode: tile's PCODE */ struct { /** @pcode.lock: protecting tile's PCODE mailbox data */ @@ -254,15 +217,23 @@ struct xe_tile { /** @sysfs: sysfs' kobj used by xe_tile_sysfs */ struct kobject *sysfs; + + /** @debugfs: debugfs directory associated with this tile */ + struct dentry *debugfs; }; /** - * struct xe_device - Top level struct of XE device + * struct xe_device - Top level struct of Xe device */ struct xe_device { /** @drm: drm device */ struct drm_device drm; +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) + /** @display: display device data, must be placed after drm device member */ + struct intel_display *display; +#endif + /** @devcoredump: device coredump */ struct xe_devcoredump devcoredump; @@ -280,9 +251,9 @@ struct xe_device { u32 media_verx100; /** @info.mem_region_mask: mask of valid memory regions */ u32 mem_region_mask; - /** @info.platform: XE platform enum */ + /** @info.platform: Xe platform enum */ enum xe_platform platform; - /** @info.subplatform: XE subplatform enum */ + /** @info.subplatform: Xe subplatform enum */ enum xe_subplatform subplatform; /** @info.devid: device ID */ u16 devid; @@ -296,6 +267,8 @@ struct xe_device { u8 vram_flags; /** @info.tile_count: Number of tiles */ u8 tile_count; + /** @info.max_gt_per_tile: Number of GT IDs allocated to each tile */ + u8 max_gt_per_tile; /** @info.gt_count: Total number of GTs for entire device */ u8 gt_count; /** @info.vm_max_level: Max VM level */ @@ -319,16 +292,26 @@ struct xe_device { u8 has_fan_control:1; /** @info.has_flat_ccs: Whether flat CCS metadata is used */ u8 has_flat_ccs:1; + /** @info.has_gsc_nvm: Device has gsc non-volatile memory */ + u8 has_gsc_nvm:1; /** @info.has_heci_cscfi: device has heci cscfi */ u8 has_heci_cscfi:1; /** @info.has_heci_gscfi: device has heci gscfi */ u8 has_heci_gscfi:1; + /** @info.has_late_bind: Device has firmware late binding support */ + u8 has_late_bind:1; /** @info.has_llc: Device has a shared CPU+GPU last level cache */ u8 has_llc:1; + /** @info.has_mbx_power_limits: Device has support to manage power limits using + * pcode mailbox commands. + */ + u8 has_mbx_power_limits:1; + /** @info.has_mem_copy_instr: Device supports MEM_COPY instruction */ + u8 has_mem_copy_instr:1; /** @info.has_pxp: Device has PXP support */ u8 has_pxp:1; - /** @info.has_range_tlb_invalidation: Has range based TLB invalidations */ - u8 has_range_tlb_invalidation:1; + /** @info.has_range_tlb_inval: Has range based TLB invalidations */ + u8 has_range_tlb_inval:1; /** @info.has_sriov: Supports SR-IOV */ u8 has_sriov:1; /** @info.has_usm: Device has unified shared memory support */ @@ -354,8 +337,23 @@ struct xe_device { u8 skip_mtcfg:1; /** @info.skip_pcode: skip access to PCODE uC */ u8 skip_pcode:1; + /** @info.needs_shared_vf_gt_wq: needs shared GT WQ on VF */ + u8 needs_shared_vf_gt_wq:1; } info; + /** @wa_active: keep track of active workarounds */ + struct { + /** @wa_active.oob: bitmap with active OOB workarounds */ + unsigned long *oob; + + /** + * @wa_active.oob_initialized: Mark oob as initialized to help detecting misuse + * of XE_DEVICE_WA() - it can only be called on initialization after + * Device OOB WAs have been processed. + */ + bool oob_initialized; + } wa_active; + /** @survivability: survivability information for device */ struct xe_survivability survivability; @@ -390,7 +388,7 @@ struct xe_device { /** @mem: memory info for device */ struct { /** @mem.vram: VRAM info for device */ - struct xe_vram_region vram; + struct xe_vram_region *vram; /** @mem.sys_mgr: system TTM manager */ struct ttm_resource_manager sys_mgr; /** @mem.sys_mgr: system memory shrinker. */ @@ -402,10 +400,12 @@ struct xe_device { /** @sriov.__mode: SR-IOV mode (Don't access directly!) */ enum xe_sriov_mode __mode; - /** @sriov.pf: PF specific data */ - struct xe_device_pf pf; - /** @sriov.vf: VF specific data */ - struct xe_device_vf vf; + union { + /** @sriov.pf: PF specific data */ + struct xe_device_pf pf; + /** @sriov.vf: VF specific data */ + struct xe_device_vf vf; + }; /** @sriov.wq: workqueue used by the virtualization workers */ struct workqueue_struct *wq; @@ -419,6 +419,16 @@ struct xe_device { u32 next_asid; /** @usm.lock: protects UM state */ struct rw_semaphore lock; + /** @usm.pf_wq: page fault work queue, unbound, high priority */ + struct workqueue_struct *pf_wq; + /* + * We pick 4 here because, in the current implementation, it + * yields the best bandwidth utilization of the kernel paging + * engine. + */ +#define XE_PAGEFAULT_QUEUE_COUNT 4 + /** @usm.pf_queue: Page fault queues */ + struct xe_pagefault_queue pf_queue[XE_PAGEFAULT_QUEUE_COUNT]; } usm; /** @pinned: pinned BO state */ @@ -452,7 +462,7 @@ struct xe_device { /** @ordered_wq: used to serialize compute mode resume */ struct workqueue_struct *ordered_wq; - /** @unordered_wq: used to serialize unordered work, mostly display */ + /** @unordered_wq: used to serialize unordered work */ struct workqueue_struct *unordered_wq; /** @destroy_wq: used to serialize user destroy work, like queue */ @@ -498,6 +508,10 @@ struct xe_device { const struct xe_pat_table_entry *table; /** @pat.n_entries: Number of PAT entries */ int n_entries; + /** @pat.ats_entry: PAT entry for PCIe ATS responses */ + const struct xe_pat_table_entry *pat_ats; + /** @pat.pta_entry: PAT entry for page table accesses */ + const struct xe_pat_table_entry *pat_pta; u32 idx[__XE_CACHE_LEVEL_COUNT]; } pat; @@ -525,6 +539,12 @@ struct xe_device { /** @pm_notifier: Our PM notifier to perform actions in response to various PM events. */ struct notifier_block pm_notifier; + /** @pm_block: Completion to block validating tasks on suspend / hibernate prepare */ + struct completion pm_block; + /** @rebind_resume_list: List of wq items to kick on resume. */ + struct list_head rebind_resume_list; + /** @rebind_resume_lock: Lock to protect the rebind_resume_list */ + struct mutex rebind_resume_lock; /** @pmt: Support the PMT driver callback interface */ struct { @@ -544,6 +564,12 @@ struct xe_device { /** @heci_gsc: graphics security controller */ struct xe_heci_gsc heci_gsc; + /** @nvm: discrete graphics non-volatile memory */ + struct intel_dg_nvm_dev *nvm; + + /** @late_bind: xe mei late bind interface */ + struct xe_late_bind late_bind; + /** @oa: oa observation subsystem */ struct xe_oa oa; @@ -559,6 +585,8 @@ struct xe_device { atomic_t flag; /** @wedged.mode: Mode controlled by kernel parameter and debugfs */ int mode; + /** @wedged.method: Recovery method to be sent in the drm device wedged uevent */ + unsigned long method; } wedged; /** @bo_device: Struct to control async free of BOs */ @@ -572,6 +600,12 @@ struct xe_device { /** @pmu: performance monitoring unit */ struct xe_pmu pmu; + /** @i2c: I2C host controller */ + struct xe_i2c *i2c; + + /** @atomic_svm_timeslice_ms: Atomic SVM fault timeslice MS */ + u32 atomic_svm_timeslice_ms; + #ifdef TEST_VM_OPS_ERROR /** * @vm_inject_error_position: inject errors at different places in VM @@ -580,6 +614,31 @@ struct xe_device { u8 vm_inject_error_position; #endif +#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) + /** + * @global_total_pages: global GPU page usage tracked for gpu_mem + * tracepoints + */ + atomic64_t global_total_pages; +#endif + /** @val: The domain for exhaustive eviction, which is currently per device. */ + struct xe_validation_device val; + + /** @psmi: GPU debugging via additional validation HW */ + struct { + /** @psmi.capture_obj: PSMI buffer for VRAM */ + struct xe_bo *capture_obj[XE_MAX_TILES_PER_DEVICE + 1]; + /** @psmi.region_mask: Mask of valid memory regions */ + u8 region_mask; + } psmi; + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) + /** @g2g_test_array: for testing G2G communications */ + u32 *g2g_test_array; + /** @g2g_test_count: for testing G2G communications */ + atomic_t g2g_test_count; +#endif + /* private: */ #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) @@ -589,27 +648,7 @@ struct xe_device { * drm_i915_private during build. After cleanup these should go away, * migrating to the right sub-structs */ - struct intel_display display; - - struct dram_info { - bool wm_lv_0_adjust_needed; - u8 num_channels; - bool symmetric_memory; - enum intel_dram_type { - INTEL_DRAM_UNKNOWN, - INTEL_DRAM_DDR3, - INTEL_DRAM_DDR4, - INTEL_DRAM_LPDDR3, - INTEL_DRAM_LPDDR4, - INTEL_DRAM_DDR5, - INTEL_DRAM_LPDDR5, - INTEL_DRAM_GDDR, - INTEL_DRAM_GDDR_ECC, - __INTEL_DRAM_TYPE_MAX, - } type; - u8 num_qgv_points; - u8 num_psf_gv_points; - } dram_info; + const struct dram_info *dram_info; /* * edram size in MB. @@ -617,27 +656,14 @@ struct xe_device { */ u32 edram_size_mb; - /* To shut up runtime pm macros.. */ - struct xe_runtime_pm {} runtime_pm; - - /* only to allow build, not used functionally */ - u32 irq_mask; - struct intel_uncore { spinlock_t lock; } uncore; - - /* only to allow build, not used functionally */ - struct { - unsigned int hpll_freq; - unsigned int czclk_freq; - unsigned int fsb_freq, mem_freq, is_ddr3; - }; #endif }; /** - * struct xe_file - file handle for XE driver + * struct xe_file - file handle for Xe driver */ struct xe_file { /** @xe: xe DEVICE **/ diff --git a/drivers/gpu/drm/xe/xe_device_wa_oob.rules b/drivers/gpu/drm/xe/xe_device_wa_oob.rules new file mode 100644 index 000000000000..55ba01bc8f38 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device_wa_oob.rules @@ -0,0 +1,5 @@ +22010954014 PLATFORM(DG2) +15015404425 PLATFORM(LUNARLAKE) + PLATFORM(PANTHERLAKE) +22019338487_display PLATFORM(LUNARLAKE) +14022085890 SUBPLATFORM(BATTLEMAGE, G21) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 346f857f3837..7c74a31d4486 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -48,31 +48,43 @@ static void xe_dma_buf_detach(struct dma_buf *dmabuf, static int xe_dma_buf_pin(struct dma_buf_attachment *attach) { - struct drm_gem_object *obj = attach->dmabuf->priv; + struct dma_buf *dmabuf = attach->dmabuf; + struct drm_gem_object *obj = dmabuf->priv; struct xe_bo *bo = gem_to_xe_bo(obj); struct xe_device *xe = xe_bo_device(bo); + struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED; + bool allow_vram = true; int ret; - /* - * For now only support pinning in TT memory, for two reasons: - * 1) Avoid pinning in a placement not accessible to some importers. - * 2) Pinning in VRAM requires PIN accounting which is a to-do. - */ - if (xe_bo_is_pinned(bo) && !xe_bo_is_mem_type(bo, XE_PL_TT)) { + if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) { + allow_vram = false; + } else { + list_for_each_entry(attach, &dmabuf->attachments, node) { + if (!attach->peer2peer) { + allow_vram = false; + break; + } + } + } + + if (xe_bo_is_pinned(bo) && !xe_bo_is_mem_type(bo, XE_PL_TT) && + !(xe_bo_is_vram(bo) && allow_vram)) { drm_dbg(&xe->drm, "Can't migrate pinned bo for dma-buf pin.\n"); return -EINVAL; } - ret = xe_bo_migrate(bo, XE_PL_TT); - if (ret) { - if (ret != -EINTR && ret != -ERESTARTSYS) - drm_dbg(&xe->drm, - "Failed migrating dma-buf to TT memory: %pe\n", - ERR_PTR(ret)); - return ret; + if (!allow_vram) { + ret = xe_bo_migrate(bo, XE_PL_TT, NULL, exec); + if (ret) { + if (ret != -EINTR && ret != -ERESTARTSYS) + drm_dbg(&xe->drm, + "Failed migrating dma-buf to TT memory: %pe\n", + ERR_PTR(ret)); + return ret; + } } - ret = xe_bo_pin_external(bo); + ret = xe_bo_pin_external(bo, !allow_vram, exec); xe_assert(xe, !ret); return 0; @@ -92,6 +104,7 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach, struct dma_buf *dma_buf = attach->dmabuf; struct drm_gem_object *obj = dma_buf->priv; struct xe_bo *bo = gem_to_xe_bo(obj); + struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED; struct sg_table *sgt; int r = 0; @@ -100,9 +113,9 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach, if (!xe_bo_is_pinned(bo)) { if (!attach->peer2peer) - r = xe_bo_migrate(bo, XE_PL_TT); + r = xe_bo_migrate(bo, XE_PL_TT, NULL, exec); else - r = xe_bo_validate(bo, NULL, false); + r = xe_bo_validate(bo, NULL, false, exec); if (r) return ERR_PTR(r); } @@ -111,7 +124,7 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach, case XE_PL_TT: sgt = drm_prime_pages_to_sg(obj->dev, bo->ttm.ttm->pages, - bo->ttm.ttm->num_pages); + obj->size >> PAGE_SHIFT); if (IS_ERR(sgt)) return sgt; @@ -161,15 +174,26 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, struct xe_bo *bo = gem_to_xe_bo(obj); bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE); + struct xe_validation_ctx ctx; + struct drm_exec exec; + int ret = 0; if (!reads) return 0; /* Can we do interruptible lock here? */ - xe_bo_lock(bo, false); - (void)xe_bo_migrate(bo, XE_PL_TT); - xe_bo_unlock(bo); + xe_validation_guard(&ctx, &xe_bo_device(bo)->val, &exec, (struct xe_val_flags) {}, ret) { + ret = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + if (ret) + break; + + ret = xe_bo_migrate(bo, XE_PL_TT, NULL, &exec); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &ret); + } + /* If we failed, cpu-access takes place in current placement. */ return 0; } @@ -191,10 +215,22 @@ struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags) { struct xe_bo *bo = gem_to_xe_bo(obj); struct dma_buf *buf; + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = true, + /* We opt to avoid OOM on system pages allocations */ + .gfp_retry_mayfail = true, + .allow_res_evict = false, + }; + int ret; if (bo->vm) return ERR_PTR(-EPERM); + ret = ttm_bo_setup_export(&bo->ttm, &ctx); + if (ret) + return ERR_PTR(ret); + buf = drm_gem_prime_export(obj, flags); if (!IS_ERR(buf)) buf->ops = &xe_dmabuf_ops; @@ -208,32 +244,45 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, { struct dma_resv *resv = dma_buf->resv; struct xe_device *xe = to_xe_device(dev); + struct xe_validation_ctx ctx; + struct drm_gem_object *dummy_obj; + struct drm_exec exec; struct xe_bo *bo; - int ret; - - dma_resv_lock(resv, NULL); - bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size, - 0, /* Will require 1way or 2way for vm_bind */ - ttm_bo_type_sg, XE_BO_FLAG_SYSTEM); - if (IS_ERR(bo)) { - ret = PTR_ERR(bo); - goto error; + int ret = 0; + + dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm); + if (!dummy_obj) + return ERR_PTR(-ENOMEM); + + dummy_obj->resv = resv; + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) { + ret = drm_exec_lock_obj(&exec, dummy_obj); + drm_exec_retry_on_contention(&exec); + if (ret) + break; + + bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size, + 0, /* Will require 1way or 2way for vm_bind */ + ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec); + drm_exec_retry_on_contention(&exec); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &ret); + break; + } } - dma_resv_unlock(resv); - - return &bo->ttm.base; + drm_gem_object_put(dummy_obj); -error: - dma_resv_unlock(resv); - return ERR_PTR(ret); + return ret ? ERR_PTR(ret) : &bo->ttm.base; } static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach) { struct drm_gem_object *obj = attach->importer_priv; struct xe_bo *bo = gem_to_xe_bo(obj); + struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED; - XE_WARN_ON(xe_bo_evict(bo)); + XE_WARN_ON(xe_bo_evict(bo, exec)); } static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = { diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 31f688e953d7..f931ff9b1ec0 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -167,7 +167,7 @@ void xe_drm_client_remove_bo(struct xe_bo *bo) static void bo_meminfo(struct xe_bo *bo, struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) { - u64 sz = bo->size; + u64 sz = xe_bo_size(bo); u32 mem_type = bo->ttm.resource->mem_type; xe_bo_assert_held(bo); diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h index d61650d4aa0b..95242a375e54 100644 --- a/drivers/gpu/drm/xe/xe_drv.h +++ b/drivers/gpu/drm/xe/xe_drv.h @@ -9,7 +9,7 @@ #include <drm/drm_drv.h> #define DRIVER_NAME "xe" -#define DRIVER_DESC "Intel Xe Graphics" +#define DRIVER_DESC "Intel Xe2 Graphics" /* Interface history: * diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index 96732613b4b7..a5c36a317a70 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -49,6 +49,7 @@ struct xe_eu_stall_data_stream { wait_queue_head_t poll_wq; size_t data_record_size; size_t per_xecore_buf_size; + unsigned int fw_ref; struct xe_gt *gt; struct xe_bo *bo; @@ -124,6 +125,27 @@ struct xe_eu_stall_data_xe2 { __u64 unused[6]; } __packed; +/* + * EU stall data format for Xe3p arch GPUs. + */ +struct xe_eu_stall_data_xe3p { + __u64 ip_addr:61; /* Bits 0 to 60 */ + __u64 tdr_count:8; /* Bits 61 to 68 */ + __u64 other_count:8; /* Bits 69 to 76 */ + __u64 control_count:8; /* Bits 77 to 84 */ + __u64 pipestall_count:8; /* Bits 85 to 92 */ + __u64 send_count:8; /* Bits 93 to 100 */ + __u64 dist_acc_count:8; /* Bits 101 to 108 */ + __u64 sbid_count:8; /* Bits 109 to 116 */ + __u64 sync_count:8; /* Bits 117 to 124 */ + __u64 inst_fetch_count:8; /* Bits 125 to 132 */ + __u64 active_count:8; /* Bits 133 to 140 */ + __u64 ex_id:3; /* Bits 141 to 143 */ + __u64 end_flag:1; /* Bit 144 */ + __u64 unused_bits:47; + __u64 unused[5]; +} __packed; + const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7}; /** @@ -167,10 +189,13 @@ size_t xe_eu_stall_data_record_size(struct xe_device *xe) { size_t record_size = 0; - if (xe->info.platform == XE_PVC) - record_size = sizeof(struct xe_eu_stall_data_pvc); + if (GRAPHICS_VER(xe) >= 35) + record_size = sizeof(struct xe_eu_stall_data_xe3p); else if (GRAPHICS_VER(xe) >= 20) record_size = sizeof(struct xe_eu_stall_data_xe2); + else if (xe->info.platform == XE_PVC) + record_size = sizeof(struct xe_eu_stall_data_pvc); + xe_assert(xe, is_power_of_2(record_size)); @@ -258,11 +283,13 @@ static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value, static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value, struct eu_stall_open_properties *props) { - if (value >= xe->info.gt_count) { + struct xe_gt *gt = xe_device_get_gt(xe, value); + + if (!gt) { drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value); return -EINVAL; } - props->gt = xe_device_get_gt(xe, value); + props->gt = gt; return 0; } @@ -288,7 +315,7 @@ static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension return -EFAULT; if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(xe_set_eu_stall_property_funcs)) || - XE_IOCTL_DBG(xe, ext.pad)) + XE_IOCTL_DBG(xe, !ext.property) || XE_IOCTL_DBG(xe, ext.pad)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_set_eu_stall_property_funcs)); @@ -615,9 +642,8 @@ static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream, size = stream->per_xecore_buf_size * last_xecore; - bo = xe_bo_create_pin_map_at_aligned(tile->xe, tile, NULL, - size, ~0ull, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64); + bo = xe_bo_create_pin_map_at_novm(tile->xe, tile, size, ~0ull, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64, false); if (IS_ERR(bo)) { kfree(stream->xecore_buf); return PTR_ERR(bo); @@ -635,19 +661,18 @@ static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream) struct per_xecore_buf *xecore_buf; struct xe_gt *gt = stream->gt; u16 group, instance; - unsigned int fw_ref; int xecore; /* Take runtime pm ref and forcewake to disable RC6 */ xe_pm_runtime_get(gt_to_xe(gt)); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_RENDER)) { + stream->fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER); + if (!xe_force_wake_ref_has_domain(stream->fw_ref, XE_FW_RENDER)) { xe_gt_err(gt, "Failed to get RENDER forcewake\n"); xe_pm_runtime_put(gt_to_xe(gt)); return -ETIMEDOUT; } - if (XE_WA(gt, 22016596838)) + if (XE_GT_WA(gt, 22016596838)) xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, _MASKED_BIT_ENABLE(DISABLE_DOP_GATING)); @@ -803,11 +828,11 @@ static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream) cancel_delayed_work_sync(&stream->buf_poll_work); - if (XE_WA(gt, 22016596838)) + if (XE_GT_WA(gt, 22016596838)) xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); - xe_force_wake_put(gt_to_fw(gt), XE_FW_RENDER); + xe_force_wake_put(gt_to_fw(gt), stream->fw_ref); xe_pm_runtime_put(gt_to_xe(gt)); return 0; diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 44364c042ad7..fd9480031750 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -16,9 +16,12 @@ #include "xe_exec_queue.h" #include "xe_hw_engine_group.h" #include "xe_macros.h" +#include "xe_pm.h" #include "xe_ring_ops_types.h" #include "xe_sched_job.h" #include "xe_sync.h" +#include "xe_svm.h" +#include "xe_trace.h" #include "xe_vm.h" /** @@ -31,7 +34,7 @@ * - Binding at exec time * - Flow controlling the ring at exec time * - * In XE we avoid all of this complication by not allowing a BO list to be + * In Xe we avoid all of this complication by not allowing a BO list to be * passed into an exec, using the dma-buf implicit sync uAPI, have binds as * separate operations, and using the DRM scheduler to flow control the ring. * Let's deep dive on each of these. @@ -97,9 +100,13 @@ static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec) { struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm); + int ret; /* The fence slot added here is intended for the exec sched job. */ - return xe_vm_validate_rebind(vm, &vm_exec->exec, 1); + xe_vm_set_validation_exec(vm, &vm_exec->exec); + ret = xe_vm_validate_rebind(vm, &vm_exec->exec, 1); + xe_vm_set_validation_exec(vm, NULL); + return ret; } int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -115,17 +122,18 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn}; struct drm_exec *exec = &vm_exec.exec; u32 i, num_syncs, num_ufence = 0; + struct xe_validation_ctx ctx; struct xe_sched_job *job; struct xe_vm *vm; - bool write_locked, skip_retry = false; - ktime_t end = 0; + bool write_locked; int err = 0; struct xe_hw_engine_group *group; enum xe_hw_engine_group_execution_mode mode, previous_mode; if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]) || + XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS)) return -EINVAL; q = xe_exec_queue_lookup(xef, args->exec_queue_id); @@ -148,6 +156,12 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto err_exec_queue; } + if (atomic_read(&q->job_cnt) >= XE_MAX_JOB_COUNT_PER_EXEC_QUEUE) { + trace_xe_exec_queue_reach_max_job_count(q, XE_MAX_JOB_COUNT_PER_EXEC_QUEUE); + err = -EAGAIN; + goto err_exec_queue; + } + if (args->num_syncs) { syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); if (!syncs) { @@ -160,7 +174,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], - &syncs_user[num_syncs], SYNC_PARSE_FLAG_EXEC | + &syncs_user[num_syncs], NULL, 0, + SYNC_PARSE_FLAG_EXEC | (xe_vm_in_lr_mode(vm) ? SYNC_PARSE_FLAG_LR_MODE : 0)); if (err) @@ -237,17 +252,21 @@ retry: goto err_unlock_list; } - vm_exec.vm = &vm->gpuvm; - vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT; - if (xe_vm_in_lr_mode(vm)) { - drm_exec_init(exec, vm_exec.flags, 0); - } else { - err = drm_gpuvm_exec_lock(&vm_exec); - if (err) { - if (xe_vm_validate_should_retry(exec, err, &end)) - err = -EAGAIN; + /* + * It's OK to block interruptible here with the vm lock held, since + * on task freezing during suspend / hibernate, the call will + * return -ERESTARTSYS and the IOCTL will be rerun. + */ + err = xe_pm_block_on_suspend(xe); + if (err) + goto err_unlock_list; + + if (!xe_vm_in_lr_mode(vm)) { + vm_exec.vm = &vm->gpuvm; + vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT; + err = xe_validation_exec_lock(&ctx, &vm_exec, &xe->val); + if (err) goto err_unlock_list; - } } if (xe_vm_is_closed_or_banned(q->vm)) { @@ -256,12 +275,6 @@ retry: goto err_exec; } - if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) { - err = -EWOULDBLOCK; /* Aliased to -EAGAIN */ - skip_retry = true; - goto err_exec; - } - if (xe_exec_queue_uses_pxp(q)) { err = xe_vm_validate_protected(q->vm); if (err) @@ -290,11 +303,7 @@ retry: goto err_put_job; if (!xe_vm_in_lr_mode(vm)) { - err = xe_sched_job_last_fence_add_dep(job, vm); - if (err) - goto err_put_job; - - err = down_read_interruptible(&vm->userptr.notifier_lock); + err = xe_svm_notifier_lock_interruptible(vm); if (err) goto err_put_job; @@ -318,8 +327,6 @@ retry: xe_sched_job_init_user_fence(job, &syncs[i]); } - if (xe_exec_queue_is_lr(q)) - q->ring_ops->emit_job(job); if (!xe_vm_in_lr_mode(vm)) xe_exec_queue_last_fence_set(q, vm, &job->drm.s_fence->finished); xe_sched_job_push(job); @@ -336,15 +343,16 @@ retry: err_repin: if (!xe_vm_in_lr_mode(vm)) - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); err_put_job: if (err) xe_sched_job_put(job); err_exec: - drm_exec_fini(exec); + if (!xe_vm_in_lr_mode(vm)) + xe_validation_ctx_fini(&ctx); err_unlock_list: up_read(&vm->lock); - if (err == -EAGAIN && !skip_retry) + if (err == -EAGAIN) goto retry; err_hw_exec_mode: if (mode == EXEC_MODE_DMA_FENCE) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index ce78cee5dec6..8724f8de67e2 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -10,10 +10,13 @@ #include <drm/drm_device.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> +#include <drm/drm_syncobj.h> #include <uapi/drm/xe_drm.h> +#include "xe_dep_scheduler.h" #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_sriov_vf.h" #include "xe_hw_engine_class_sysfs.h" #include "xe_hw_engine_group.h" #include "xe_hw_fence.h" @@ -27,6 +30,29 @@ #include "xe_vm.h" #include "xe_pxp.h" +/** + * DOC: Execution Queue + * + * An Execution queue is an interface for the HW context of execution. + * The user creates an execution queue, submits the GPU jobs through those + * queues and in the end destroys them. + * + * Execution queues can also be created by XeKMD itself for driver internal + * operations like object migration etc. + * + * An execution queue is associated with a specified HW engine or a group of + * engines (belonging to the same tile and engine class) and any GPU job + * submitted on the queue will be run on one of these engines. + * + * An execution queue is tied to an address space (VM). It holds a reference + * of the associated VM and the underlying Logical Ring Context/s (LRC/s) + * until the queue is destroyed. + * + * The execution queue sits on top of the submission backend. It opaquely + * handles the GuC and Execlist backends whichever the platform uses, and + * the ring operations the different engine classes support. + */ + enum xe_exec_queue_sched_prop { XE_EXEC_QUEUE_JOB_TIMEOUT = 0, XE_EXEC_QUEUE_TIMESLICE = 1, @@ -39,6 +65,12 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue static void __xe_exec_queue_free(struct xe_exec_queue *q) { + int i; + + for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) + if (q->tlb_inval[i].dep_scheduler) + xe_dep_scheduler_fini(q->tlb_inval[i].dep_scheduler); + if (xe_exec_queue_uses_pxp(q)) xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); if (q->vm) @@ -50,6 +82,39 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) kfree(q); } +static int alloc_dep_schedulers(struct xe_device *xe, struct xe_exec_queue *q) +{ + struct xe_tile *tile = gt_to_tile(q->gt); + int i; + + for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) { + struct xe_dep_scheduler *dep_scheduler; + struct xe_gt *gt; + struct workqueue_struct *wq; + + if (i == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT) + gt = tile->primary_gt; + else + gt = tile->media_gt; + + if (!gt) + continue; + + wq = gt->tlb_inval.job_wq; + +#define MAX_TLB_INVAL_JOBS 16 /* Picking a reasonable value */ + dep_scheduler = xe_dep_scheduler_create(xe, wq, q->name, + MAX_TLB_INVAL_JOBS); + if (IS_ERR(dep_scheduler)) + return PTR_ERR(dep_scheduler); + + q->tlb_inval[i].dep_scheduler = dep_scheduler; + } +#undef MAX_TLB_INVAL_JOBS + + return 0; +} + static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, @@ -94,6 +159,14 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, else q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; + if (q->flags & (EXEC_QUEUE_FLAG_MIGRATE | EXEC_QUEUE_FLAG_VM)) { + err = alloc_dep_schedulers(xe, q); + if (err) { + __xe_exec_queue_free(q); + return ERR_PTR(err); + } + } + if (vm) q->vm = xe_vm_get(vm); @@ -112,9 +185,8 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, return q; } -static int __xe_exec_queue_init(struct xe_exec_queue *q) +static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) { - struct xe_vm *vm = q->vm; int i, err; u32 flags = 0; @@ -132,38 +204,56 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q) flags |= XE_LRC_CREATE_RUNALONE; } - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - return err; - } + if (!(exec_queue_flags & EXEC_QUEUE_FLAG_KERNEL)) + flags |= XE_LRC_CREATE_USER_CTX; + err = q->ops->init(q); + if (err) + return err; + + /* + * This must occur after q->ops->init to avoid race conditions during VF + * post-migration recovery, as the fixups for the LRC GGTT addresses + * depend on the queue being present in the backend tracking structure. + * + * In addition to above, we must wait on inflight GGTT changes to avoid + * writing out stale values here. Such wait provides a solid solution + * (without a race) only if the function can detect migration instantly + * from the moment vCPU resumes execution. + */ for (i = 0; i < q->width; ++i) { - q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec, flags); - if (IS_ERR(q->lrc[i])) { - err = PTR_ERR(q->lrc[i]); - goto err_unlock; + struct xe_lrc *lrc; + + xe_gt_sriov_vf_wait_valid_ggtt(q->gt); + lrc = xe_lrc_create(q->hwe, q->vm, xe_lrc_ring_size(), + q->msix_vec, flags); + if (IS_ERR(lrc)) { + err = PTR_ERR(lrc); + goto err_lrc; } - } - - if (vm) - xe_vm_unlock(vm); - err = q->ops->init(q); - if (err) - goto err_lrc; + /* Pairs with READ_ONCE to xe_exec_queue_contexts_hwsp_rebase */ + WRITE_ONCE(q->lrc[i], lrc); + } return 0; -err_unlock: - if (vm) - xe_vm_unlock(vm); err_lrc: for (i = i - 1; i >= 0; --i) xe_lrc_put(q->lrc[i]); return err; } +static void __xe_exec_queue_fini(struct xe_exec_queue *q) +{ + int i; + + q->ops->fini(q); + + for (i = 0; i < q->width; ++i) + xe_lrc_put(q->lrc[i]); +} + struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, u16 width, struct xe_hw_engine *hwe, u32 flags, @@ -180,7 +270,7 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (IS_ERR(q)) return q; - err = __xe_exec_queue_init(q); + err = __xe_exec_queue_init(q, flags); if (err) goto err_post_alloc; @@ -194,11 +284,13 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (xe_exec_queue_uses_pxp(q)) { err = xe_pxp_exec_queue_add(xe->pxp, q); if (err) - goto err_post_alloc; + goto err_post_init; } return q; +err_post_init: + __xe_exec_queue_fini(q); err_post_alloc: __xe_exec_queue_free(q); return ERR_PTR(err); @@ -277,6 +369,16 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, } xe_vm_put(migrate_vm); + if (!IS_ERR(q)) { + int err = drm_syncobj_create(&q->ufence_syncobj, + DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (err) { + xe_exec_queue_put(q); + return ERR_PTR(err); + } + } + return q; } ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); @@ -285,24 +387,31 @@ void xe_exec_queue_destroy(struct kref *ref) { struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); struct xe_exec_queue *eq, *next; + int i; + + xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0); + + if (q->ufence_syncobj) + drm_syncobj_put(q->ufence_syncobj); if (xe_exec_queue_uses_pxp(q)) xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); xe_exec_queue_last_fence_put_unlocked(q); + for_each_tlb_inval(i) + xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i); + if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { list_for_each_entry_safe(eq, next, &q->multi_gt_list, multi_gt_link) xe_exec_queue_put(eq); } - q->ops->fini(q); + q->ops->destroy(q); } void xe_exec_queue_fini(struct xe_exec_queue *q) { - int i; - /* * Before releasing our ref to lrc and xef, accumulate our run ticks * and wakeup any waiters. @@ -311,9 +420,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) wake_up_var(&q->xef->exec_queue.pending_removal); - for (i = 0; i < q->width; ++i) - xe_lrc_put(q->lrc[i]); - + __xe_exec_queue_fini(q); __xe_exec_queue_free(q); } @@ -623,7 +730,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, err)) return -EFAULT; - if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) + if (XE_IOCTL_DBG(xe, !xe_device_get_gt(xe, eci[0].gt_id))) return -EINVAL; if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) @@ -755,34 +862,30 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, } /** - * xe_exec_queue_is_lr() - Whether an exec_queue is long-running - * @q: The exec_queue + * xe_exec_queue_lrc() - Get the LRC from exec queue. + * @q: The exec_queue. * - * Return: True if the exec_queue is long-running, false otherwise. + * Retrieves the primary LRC for the exec queue. Note that this function + * returns only the first LRC instance, even when multiple parallel LRCs + * are configured. + * + * Return: Pointer to LRC on success, error on failure */ -bool xe_exec_queue_is_lr(struct xe_exec_queue *q) +struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q) { - return q->vm && xe_vm_in_lr_mode(q->vm) && - !(q->flags & EXEC_QUEUE_FLAG_VM); -} - -static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q) -{ - return q->lrc[0]->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc[0]) - 1; + return q->lrc[0]; } /** - * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full + * xe_exec_queue_is_lr() - Whether an exec_queue is long-running * @q: The exec_queue * - * Return: True if the exec_queue's ring is full, false otherwise. + * Return: True if the exec_queue is long-running, false otherwise. */ -bool xe_exec_queue_ring_full(struct xe_exec_queue *q) +bool xe_exec_queue_is_lr(struct xe_exec_queue *q) { - struct xe_lrc *lrc = q->lrc[0]; - s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES; - - return xe_exec_queue_num_job_inflight(q) >= max_job; + return q->vm && xe_vm_in_lr_mode(q->vm) && + !(q->flags & EXEC_QUEUE_FLAG_VM); } /** @@ -915,7 +1018,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, struct xe_vm *vm) { - if (q->flags & EXEC_QUEUE_FLAG_VM) { + if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) { + xe_migrate_job_lock_assert(q); + } else if (q->flags & EXEC_QUEUE_FLAG_VM) { lockdep_assert_held(&vm->lock); } else { xe_vm_assert_held(vm); @@ -1014,29 +1119,132 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, struct dma_fence *fence) { xe_exec_queue_last_fence_lockdep_assert(q, vm); + xe_assert(vm->xe, !dma_fence_is_container(fence)); xe_exec_queue_last_fence_put(q, vm); q->last_fence = dma_fence_get(fence); } /** - * xe_exec_queue_last_fence_test_dep - Test last fence dependency of queue + * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence * @q: The exec queue - * @vm: The VM the engine does a bind or exec for + * @vm: The VM the engine does a bind for + * @type: Either primary or media GT + */ +void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q, + struct xe_vm *vm, + unsigned int type) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + + xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type); +} + +/** + * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB + * invalidation fence unlocked + * @q: The exec queue + * @type: Either primary or media GT + * + * Only safe to be called from xe_exec_queue_destroy(). + */ +void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, + unsigned int type) +{ + xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + + dma_fence_put(q->tlb_inval[type].last_fence); + q->tlb_inval[type].last_fence = NULL; +} + +/** + * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation + * @q: The exec queue + * @vm: The VM the engine does a bind for + * @type: Either primary or media GT + * + * Get last fence, takes a ref * - * Returns: - * -ETIME if there exists an unsignalled last fence dependency, zero otherwise. + * Returns: last fence if not signaled, dma fence stub if signaled */ -int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm) +struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q, + struct xe_vm *vm, + unsigned int type) { struct dma_fence *fence; + + xe_exec_queue_last_fence_lockdep_assert(q, vm); + xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | + EXEC_QUEUE_FLAG_MIGRATE)); + + if (q->tlb_inval[type].last_fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &q->tlb_inval[type].last_fence->flags)) + xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); + + fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub(); + dma_fence_get(fence); + return fence; +} + +/** + * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation + * @q: The exec queue + * @vm: The VM the engine does a bind for + * @fence: The fence + * @type: Either primary or media GT + * + * Set the last fence for the tlb invalidation type on the queue. Increases + * reference count for fence, when closing queue + * xe_exec_queue_tlb_inval_last_fence_put should be called. + */ +void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q, + struct xe_vm *vm, + struct dma_fence *fence, + unsigned int type) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | + EXEC_QUEUE_FLAG_MIGRATE)); + xe_assert(vm->xe, !dma_fence_is_container(fence)); + + xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); + q->tlb_inval[type].last_fence = dma_fence_get(fence); +} + +/** + * xe_exec_queue_contexts_hwsp_rebase - Re-compute GGTT references + * within all LRCs of a queue. + * @q: the &xe_exec_queue struct instance containing target LRCs + * @scratch: scratch buffer to be used as temporary storage + * + * Returns: zero on success, negative error code on failure + */ +int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch) +{ + int i; int err = 0; - fence = xe_exec_queue_last_fence_get(q, vm); - if (fence) { - err = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) ? - 0 : -ETIME; - dma_fence_put(fence); + for (i = 0; i < q->width; ++i) { + struct xe_lrc *lrc; + + /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ + lrc = READ_ONCE(q->lrc[i]); + if (!lrc) + continue; + + xe_lrc_update_memirq_regs_with_address(lrc, q->hwe, scratch); + xe_lrc_update_hwctx_regs_with_address(lrc); + err = xe_lrc_setup_wa_bb_with_scratch(lrc, q->hwe, scratch); + if (err) + break; } return err; diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 17bc50a7f05a..fda4d4f9bda8 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -14,6 +14,10 @@ struct drm_file; struct xe_device; struct xe_file; +#define for_each_tlb_inval(__i) \ + for (__i = XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT; \ + __i <= XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT; ++__i) + struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, u16 width, struct xe_hw_engine *hw_engine, u32 flags, @@ -64,8 +68,6 @@ static inline bool xe_exec_queue_uses_pxp(struct xe_exec_queue *q) bool xe_exec_queue_is_lr(struct xe_exec_queue *q); -bool xe_exec_queue_ring_full(struct xe_exec_queue *q); - bool xe_exec_queue_is_idle(struct xe_exec_queue *q); void xe_exec_queue_kill(struct xe_exec_queue *q); @@ -86,8 +88,27 @@ struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue * struct xe_vm *vm); void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm, struct dma_fence *fence); -int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, - struct xe_vm *vm); + +void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q, + struct xe_vm *vm, + unsigned int type); + +void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, + unsigned int type); + +struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q, + struct xe_vm *vm, + unsigned int type); + +void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q, + struct xe_vm *vm, + struct dma_fence *fence, + unsigned int type); + void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q); +int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch); + +struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q); + #endif diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index cc1cffb5c87f..771ffe35cd0c 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -15,6 +15,7 @@ #include "xe_hw_fence_types.h" #include "xe_lrc_types.h" +struct drm_syncobj; struct xe_execlist_exec_queue; struct xe_gt; struct xe_guc_exec_queue; @@ -87,6 +88,8 @@ struct xe_exec_queue { #define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(4) /* flag to indicate low latency hint to guc */ #define EXEC_QUEUE_FLAG_LOW_LATENCY BIT(5) +/* for migration (kernel copy, clear, bind) jobs */ +#define EXEC_QUEUE_FLAG_MIGRATE BIT(6) /** * @flags: flags for this exec queue, should statically setup aside from ban @@ -132,6 +135,24 @@ struct xe_exec_queue { struct list_head link; } lr; +#define XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT 0 +#define XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT 1 +#define XE_EXEC_QUEUE_TLB_INVAL_COUNT (XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT + 1) + + /** @tlb_inval: TLB invalidations exec queue state */ + struct { + /** + * @tlb_inval.dep_scheduler: The TLB invalidation + * dependency scheduler + */ + struct xe_dep_scheduler *dep_scheduler; + /** + * @last_fence: last fence for tlb invalidation, protected by + * vm->lock in write mode + */ + struct dma_fence *last_fence; + } tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_COUNT]; + /** @pxp: PXP info tracking */ struct { /** @pxp.type: PXP session type used by this queue */ @@ -140,6 +161,12 @@ struct xe_exec_queue { struct list_head link; } pxp; + /** @ufence_syncobj: User fence syncobj */ + struct drm_syncobj *ufence_syncobj; + + /** @ufence_timeline_value: User fence timeline value */ + u64 ufence_timeline_value; + /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; @@ -147,6 +174,11 @@ struct xe_exec_queue { const struct xe_ring_ops *ring_ops; /** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */ struct drm_sched_entity *entity; + +#define XE_MAX_JOB_COUNT_PER_EXEC_QUEUE 1000 + /** @job_cnt: number of drm jobs in this exec queue */ + atomic_t job_cnt; + /** * @tlb_flush_seqno: The seqno of the last rebind tlb flush performed * Protected by @vm's resv. Unused if @vm == NULL. @@ -166,8 +198,14 @@ struct xe_exec_queue_ops { int (*init)(struct xe_exec_queue *q); /** @kill: Kill inflight submissions for backend */ void (*kill)(struct xe_exec_queue *q); - /** @fini: Fini exec queue for submission backend */ + /** @fini: Undoes the init() for submission backend */ void (*fini)(struct xe_exec_queue *q); + /** + * @destroy: Destroy exec queue for submission backend. The backend + * function must call xe_exec_queue_fini() (which will in turn call the + * fini() backend function) to ensure the queue is properly cleaned up. + */ + void (*destroy)(struct xe_exec_queue *q); /** @set_priority: Set priority for exec queue */ int (*set_priority)(struct xe_exec_queue *q, enum xe_exec_queue_priority priority); @@ -186,6 +224,9 @@ struct xe_exec_queue_ops { * call after suspend. In dma-fencing path thus must return within a * reasonable amount of time. -ETIME return shall indicate an error * waiting for suspend resulting in associated VM getting killed. + * -EAGAIN return indicates the wait should be tried again, if the wait + * is within a work item, the work item should be requeued as deadlock + * avoidance mechanism. */ int (*suspend_wait)(struct xe_exec_queue *q); /** diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 788f56b066b6..769d05517f93 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -339,7 +339,7 @@ static int execlist_exec_queue_init(struct xe_exec_queue *q) const struct drm_sched_init_args args = { .ops = &drm_sched_ops, .num_rqs = 1, - .credit_limit = q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, + .credit_limit = xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, .hang_limit = XE_SCHED_HANG_LIMIT, .timeout = XE_SCHED_JOB_TIMEOUT, .name = q->hwe->name, @@ -385,10 +385,20 @@ err_free: return err; } -static void execlist_exec_queue_fini_async(struct work_struct *w) +static void execlist_exec_queue_fini(struct xe_exec_queue *q) +{ + struct xe_execlist_exec_queue *exl = q->execlist; + + drm_sched_entity_fini(&exl->entity); + drm_sched_fini(&exl->sched); + + kfree(exl); +} + +static void execlist_exec_queue_destroy_async(struct work_struct *w) { struct xe_execlist_exec_queue *ee = - container_of(w, struct xe_execlist_exec_queue, fini_async); + container_of(w, struct xe_execlist_exec_queue, destroy_async); struct xe_exec_queue *q = ee->q; struct xe_execlist_exec_queue *exl = q->execlist; struct xe_device *xe = gt_to_xe(q->gt); @@ -401,10 +411,6 @@ static void execlist_exec_queue_fini_async(struct work_struct *w) list_del(&exl->active_link); spin_unlock_irqrestore(&exl->port->lock, flags); - drm_sched_entity_fini(&exl->entity); - drm_sched_fini(&exl->sched); - kfree(exl); - xe_exec_queue_fini(q); } @@ -413,10 +419,10 @@ static void execlist_exec_queue_kill(struct xe_exec_queue *q) /* NIY */ } -static void execlist_exec_queue_fini(struct xe_exec_queue *q) +static void execlist_exec_queue_destroy(struct xe_exec_queue *q) { - INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async); - queue_work(system_unbound_wq, &q->execlist->fini_async); + INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async); + queue_work(system_unbound_wq, &q->execlist->destroy_async); } static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, @@ -467,6 +473,7 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = { .init = execlist_exec_queue_init, .kill = execlist_exec_queue_kill, .fini = execlist_exec_queue_fini, + .destroy = execlist_exec_queue_destroy, .set_priority = execlist_exec_queue_set_priority, .set_timeslice = execlist_exec_queue_set_timeslice, .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout, diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h index 415140936f11..92c4ba52db0c 100644 --- a/drivers/gpu/drm/xe/xe_execlist_types.h +++ b/drivers/gpu/drm/xe/xe_execlist_types.h @@ -42,7 +42,7 @@ struct xe_execlist_exec_queue { bool has_run; - struct work_struct fini_async; + struct work_struct destroy_async; enum xe_exec_queue_priority active_priority; struct list_head active_link; diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 8a5cba22b586..c59a9b330697 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -64,7 +64,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) { int i, j; - if (!xe_gt_is_media_type(gt)) + if (xe_gt_is_main_type(gt)) init_domain(fw, XE_FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER, FORCEWAKE_ACK_RENDER); diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h index 899fbbcb3ea9..14b7b86e801b 100644 --- a/drivers/gpu/drm/xe/xe_force_wake_types.h +++ b/drivers/gpu/drm/xe/xe_force_wake_types.h @@ -52,7 +52,22 @@ enum xe_force_wake_domains { }; /** - * struct xe_force_wake_domain - XE force wake domains + * struct xe_force_wake_domain - Xe force wake power domain + * + * Represents an individual device-internal power domain. The driver must + * ensure the power domain is awake before accessing registers or other + * hardware functionality that is part of the power domain. Since different + * driver threads may access hardware units simultaneously, a reference count + * is used to ensure that the domain remains awake as long as any software + * is using the part of the hardware covered by the power domain. + * + * Hardware provides a register interface to allow the driver to request + * wake/sleep of power domains, although in most cases the actual action of + * powering the hardware up/down is handled by firmware (and may be subject to + * requirements and constraints outside of the driver's visibility) so the + * driver needs to wait for an acknowledgment that a wake request has been + * acted upon before accessing the parts of the hardware that reside within the + * power domain. */ struct xe_force_wake_domain { /** @id: domain force wake id */ @@ -70,7 +85,14 @@ struct xe_force_wake_domain { }; /** - * struct xe_force_wake - XE force wake + * struct xe_force_wake - Xe force wake collection + * + * Represents a collection of related power domains (struct + * xe_force_wake_domain) associated with a subunit of the device. + * + * Currently only used for GT power domains (where the term "forcewake" is used + * in the hardware documentation), although the interface could be extended to + * power wells in other parts of the hardware in the future. */ struct xe_force_wake { /** @gt: back pointers to GT */ diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c index ed9183599e31..247e41c1c48d 100644 --- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -18,8 +18,8 @@ " *\n" \ " * This file was generated from rules: %s\n" \ " */\n" \ - "#ifndef _GENERATED_XE_WA_OOB_\n" \ - "#define _GENERATED_XE_WA_OOB_\n" \ + "#ifndef _GENERATED_%s_\n" \ + "#define _GENERATED_%s_\n" \ "\n" \ "enum {\n" @@ -52,7 +52,7 @@ static char *strip(char *line, size_t linelen) } #define MAX_LINE_LEN 4096 -static int parse(FILE *input, FILE *csource, FILE *cheader) +static int parse(FILE *input, FILE *csource, FILE *cheader, char *prefix) { char line[MAX_LINE_LEN + 1]; char *name, *prev_name = NULL, *rules; @@ -96,7 +96,7 @@ static int parse(FILE *input, FILE *csource, FILE *cheader) } if (name) { - fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx); + fprintf(cheader, "\t%s_%s = %u,\n", prefix, name, idx); /* Close previous entry before starting a new one */ if (idx) @@ -118,7 +118,41 @@ static int parse(FILE *input, FILE *csource, FILE *cheader) if (idx) fprintf(csource, ") },\n"); - fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx); + fprintf(cheader, "\t_%s_COUNT = %u\n", prefix, idx); + + return 0; +} + +/* Avoid GNU vs POSIX basename() discrepancy, just use our own */ +static const char *xbasename(const char *s) +{ + const char *p = strrchr(s, '/'); + + return p ? p + 1 : s; +} + +static int fn_to_prefix(const char *fn, char *prefix, size_t size) +{ + size_t len; + + fn = xbasename(fn); + len = strlen(fn); + + if (len > size - 1) + return -ENAMETOOLONG; + + memcpy(prefix, fn, len + 1); + + for (char *p = prefix; *p; p++) { + switch (*p) { + case '.': + *p = '\0'; + return 0; + default: + *p = toupper(*p); + break; + } + } return 0; } @@ -141,6 +175,7 @@ int main(int argc, const char *argv[]) [ARGS_CHEADER] = { .fn = argv[3], .mode = "w" }, }; int ret = 1; + char prefix[128]; if (argc < 3) { fprintf(stderr, "ERROR: wrong arguments\n"); @@ -148,6 +183,9 @@ int main(int argc, const char *argv[]) return 1; } + if (fn_to_prefix(args[ARGS_CHEADER].fn, prefix, sizeof(prefix)) < 0) + return 1; + for (int i = 0; i < _ARGS_COUNT; i++) { args[i].f = fopen(args[i].fn, args[i].mode); if (!args[i].f) { @@ -157,9 +195,10 @@ int main(int argc, const char *argv[]) } } - fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn); + fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn, prefix, prefix); + ret = parse(args[ARGS_INPUT].f, args[ARGS_CSOURCE].f, - args[ARGS_CHEADER].f); + args[ARGS_CHEADER].f, prefix); if (!ret) fprintf(args[ARGS_CHEADER].f, FOOTER); diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 7062115909f2..ef481b334af4 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -5,6 +5,7 @@ #include "xe_ggtt.h" +#include <kunit/visibility.h> #include <linux/fault-inject.h> #include <linux/io-64-nonatomic-lo-hi.h> #include <linux/sizes.h> @@ -22,12 +23,14 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_gt_printk.h" -#include "xe_gt_sriov_vf.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_pm.h" +#include "xe_res_cursor.h" #include "xe_sriov.h" +#include "xe_tile_printk.h" +#include "xe_tile_sriov_vf.h" +#include "xe_tlb_inval.h" #include "xe_wa.h" #include "xe_wopcm.h" @@ -64,13 +67,9 @@ * give us the correct placement for free. */ -static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, - u16 pat_index) +static u64 xelp_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) { - u64 pte; - - pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); - pte |= XE_PAGE_PRESENT; + u64 pte = XE_PAGE_PRESENT; if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) pte |= XE_GGTT_PTE_DM; @@ -78,13 +77,12 @@ static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, return pte; } -static u64 xelpg_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, - u16 pat_index) +static u64 xelpg_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) { struct xe_device *xe = xe_bo_device(bo); u64 pte; - pte = xelp_ggtt_pte_encode_bo(bo, bo_offset, pat_index); + pte = xelp_ggtt_pte_flags(bo, pat_index); xe_assert(xe, pat_index <= 3); @@ -109,10 +107,23 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev) static void ggtt_update_access_counter(struct xe_ggtt *ggtt) { struct xe_tile *tile = ggtt->tile; - struct xe_gt *affected_gt = XE_WA(tile->primary_gt, 22019338487) ? - tile->primary_gt : tile->media_gt; - struct xe_mmio *mmio = &affected_gt->mmio; - u32 max_gtt_writes = XE_WA(ggtt->tile->primary_gt, 22019338487) ? 1100 : 63; + struct xe_gt *affected_gt; + u32 max_gtt_writes; + + if (tile->primary_gt && XE_GT_WA(tile->primary_gt, 22019338487)) { + affected_gt = tile->primary_gt; + max_gtt_writes = 1100; + + /* Only expected to apply to primary GT on dgpu platforms */ + xe_tile_assert(tile, IS_DGFX(tile_to_xe(tile))); + } else { + affected_gt = tile->media_gt; + max_gtt_writes = 63; + + /* Only expected to apply to media GT on igpu platforms */ + xe_tile_assert(tile, !IS_DGFX(tile_to_xe(tile))); + } + /* * Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit * to wait for completion of prior GTT writes before letting this through. @@ -121,7 +132,7 @@ static void ggtt_update_access_counter(struct xe_ggtt *ggtt) lockdep_assert_held(&ggtt->lock); if ((++ggtt->access_count % max_gtt_writes) == 0) { - xe_mmio_write32(mmio, GMD_ID, 0x0); + xe_mmio_write32(&affected_gt->mmio, GMD_ID, 0x0); ggtt->access_count = 0; } } @@ -140,6 +151,14 @@ static void xe_ggtt_set_pte_and_flush(struct xe_ggtt *ggtt, u64 addr, u64 pte) ggtt_update_access_counter(ggtt); } +static u64 xe_ggtt_get_pte(struct xe_ggtt *ggtt, u64 addr) +{ + xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); + xe_tile_assert(ggtt->tile, addr < ggtt->size); + + return readq(&ggtt->gsm[addr >> XE_PTE_SHIFT]); +} + static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) { u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; @@ -149,8 +168,9 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) xe_tile_assert(ggtt->tile, start < end); if (ggtt->scratch) - scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, - pat_index); + scratch_pte = xe_bo_addr(ggtt->scratch, 0, XE_PAGE_SIZE) | + ggtt->pt_ops->pte_encode_flags(ggtt->scratch, + pat_index); else scratch_pte = 0; @@ -160,12 +180,47 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) } } +static void primelockdep(struct xe_ggtt *ggtt) +{ + if (!IS_ENABLED(CONFIG_LOCKDEP)) + return; + + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&ggtt->lock); + fs_reclaim_release(GFP_KERNEL); +} + +/** + * xe_ggtt_alloc - Allocate a GGTT for a given &xe_tile + * @tile: &xe_tile + * + * Allocates a &xe_ggtt for a given tile. + * + * Return: &xe_ggtt on success, or NULL when out of memory. + */ +struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_ggtt *ggtt; + + ggtt = drmm_kzalloc(&xe->drm, sizeof(*ggtt), GFP_KERNEL); + if (!ggtt) + return NULL; + + if (drmm_mutex_init(&xe->drm, &ggtt->lock)) + return NULL; + + primelockdep(ggtt); + ggtt->tile = tile; + + return ggtt; +} + static void ggtt_fini_early(struct drm_device *drm, void *arg) { struct xe_ggtt *ggtt = arg; destroy_workqueue(ggtt->wq); - mutex_destroy(&ggtt->lock); drm_mm_takedown(&ggtt->mm); } @@ -176,31 +231,52 @@ static void ggtt_fini(void *arg) ggtt->scratch = NULL; } -static void primelockdep(struct xe_ggtt *ggtt) +#ifdef CONFIG_LOCKDEP +void xe_ggtt_might_lock(struct xe_ggtt *ggtt) { - if (!IS_ENABLED(CONFIG_LOCKDEP)) - return; - - fs_reclaim_acquire(GFP_KERNEL); might_lock(&ggtt->lock); - fs_reclaim_release(GFP_KERNEL); } +#endif static const struct xe_ggtt_pt_ops xelp_pt_ops = { - .pte_encode_bo = xelp_ggtt_pte_encode_bo, + .pte_encode_flags = xelp_ggtt_pte_flags, .ggtt_set_pte = xe_ggtt_set_pte, + .ggtt_get_pte = xe_ggtt_get_pte, }; static const struct xe_ggtt_pt_ops xelpg_pt_ops = { - .pte_encode_bo = xelpg_ggtt_pte_encode_bo, + .pte_encode_flags = xelpg_ggtt_pte_flags, .ggtt_set_pte = xe_ggtt_set_pte, + .ggtt_get_pte = xe_ggtt_get_pte, }; static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { - .pte_encode_bo = xelpg_ggtt_pte_encode_bo, + .pte_encode_flags = xelpg_ggtt_pte_flags, .ggtt_set_pte = xe_ggtt_set_pte_and_flush, + .ggtt_get_pte = xe_ggtt_get_pte, }; +static void __xe_ggtt_init_early(struct xe_ggtt *ggtt, u32 reserved) +{ + drm_mm_init(&ggtt->mm, reserved, + ggtt->size - reserved); +} + +int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size) +{ + ggtt->size = size; + __xe_ggtt_init_early(ggtt, reserved); + return 0; +} +EXPORT_SYMBOL_IF_KUNIT(xe_ggtt_init_kunit); + +static void dev_fini_ggtt(void *arg) +{ + struct xe_ggtt *ggtt = arg; + + drain_workqueue(ggtt->wq); +} + /** * xe_ggtt_init_early - Early GGTT initialization * @ggtt: the &xe_ggtt to be initialized @@ -219,13 +295,13 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) unsigned int gsm_size; int err; - if (IS_SRIOV_VF(xe)) + if (IS_SRIOV_VF(xe) || GRAPHICS_VERx100(xe) >= 1250) gsm_size = SZ_8M; /* GGTT is expected to be 4GiB */ else gsm_size = probe_gsm_size(pdev); if (gsm_size == 0) { - drm_err(&xe->drm, "Hardware reported no preallocated GSM\n"); + xe_tile_err(ggtt->tile, "Hardware reported no preallocated GSM\n"); return -ENOMEM; } @@ -239,26 +315,29 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) ggtt->size = GUC_GGTT_TOP; if (GRAPHICS_VERx100(xe) >= 1270) - ggtt->pt_ops = (ggtt->tile->media_gt && - XE_WA(ggtt->tile->media_gt, 22019338487)) || - XE_WA(ggtt->tile->primary_gt, 22019338487) ? - &xelpg_pt_wa_ops : &xelpg_pt_ops; + ggtt->pt_ops = + (ggtt->tile->media_gt && XE_GT_WA(ggtt->tile->media_gt, 22019338487)) || + (ggtt->tile->primary_gt && XE_GT_WA(ggtt->tile->primary_gt, 22019338487)) ? + &xelpg_pt_wa_ops : &xelpg_pt_ops; else ggtt->pt_ops = &xelp_pt_ops; ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM); + if (!ggtt->wq) + return -ENOMEM; - drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), - ggtt->size - xe_wopcm_size(xe)); - mutex_init(&ggtt->lock); - primelockdep(ggtt); + __xe_ggtt_init_early(ggtt, xe_wopcm_size(xe)); err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); if (err) return err; + err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); + if (err) + return err; + if (IS_SRIOV_VF(xe)) { - err = xe_gt_sriov_vf_prepare_ggtt(xe_tile_get_gt(ggtt->tile, 0)); + err = xe_tile_sriov_vf_prepare_ggtt(ggtt->tile); if (err) return err; } @@ -377,7 +456,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) goto err; } - xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size); + xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, xe_bo_size(ggtt->scratch)); xe_ggtt_initial_clear(ggtt); @@ -394,9 +473,8 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) if (!gt) return; - err = xe_gt_tlb_invalidation_ggtt(gt); - if (err) - drm_warn(>_to_xe(gt)->drm, "xe_gt_tlb_invalidation_ggtt error=%d", err); + err = xe_tlb_inval_ggtt(>->tlb_inval); + xe_gt_WARN(gt, err, "Failed to invalidate GGTT (%pe)", ERR_PTR(err)); } static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) @@ -423,22 +501,23 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { string_get_size(node->size, 1, STRING_UNITS_2, buf, sizeof(buf)); - xe_gt_dbg(ggtt->tile->primary_gt, "GGTT %#llx-%#llx (%s) %s\n", - node->start, node->start + node->size, buf, description); + xe_tile_dbg(ggtt->tile, "GGTT %#llx-%#llx (%s) %s\n", + node->start, node->start + node->size, buf, description); } } /** - * xe_ggtt_node_insert_balloon - prevent allocation of specified GGTT addresses + * xe_ggtt_node_insert_balloon_locked - prevent allocation of specified GGTT addresses * @node: the &xe_ggtt_node to hold reserved GGTT node * @start: the starting GGTT address of the reserved region * @end: then end GGTT address of the reserved region * - * Use xe_ggtt_node_remove_balloon() to release a reserved GGTT node. + * To be used in cases where ggtt->lock is already taken. + * Use xe_ggtt_node_remove_balloon_locked() to release a reserved GGTT node. * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end) +int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, u64 start, u64 end) { struct xe_ggtt *ggtt = node->ggtt; int err; @@ -447,18 +526,16 @@ int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end) xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE)); xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE)); xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base)); + lockdep_assert_held(&ggtt->lock); node->base.color = 0; node->base.start = start; node->base.size = end - start; - mutex_lock(&ggtt->lock); err = drm_mm_reserve_node(&ggtt->mm, &node->base); - mutex_unlock(&ggtt->lock); - if (xe_gt_WARN(ggtt->tile->primary_gt, err, - "Failed to balloon GGTT %#llx-%#llx (%pe)\n", - node->base.start, node->base.start + node->base.size, ERR_PTR(err))) + if (xe_tile_WARN(ggtt->tile, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n", + node->base.start, node->base.start + node->base.size, ERR_PTR(err))) return err; xe_ggtt_dump_node(ggtt, &node->base, "balloon"); @@ -466,27 +543,72 @@ int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end) } /** - * xe_ggtt_node_remove_balloon - release a reserved GGTT region + * xe_ggtt_node_remove_balloon_locked - release a reserved GGTT region * @node: the &xe_ggtt_node with reserved GGTT region * - * See xe_ggtt_node_insert_balloon() for details. + * To be used in cases where ggtt->lock is already taken. + * See xe_ggtt_node_insert_balloon_locked() for details. */ -void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node) +void xe_ggtt_node_remove_balloon_locked(struct xe_ggtt_node *node) { - if (!node || !node->ggtt) + if (!xe_ggtt_node_allocated(node)) return; - if (!drm_mm_node_allocated(&node->base)) - goto free_node; + lockdep_assert_held(&node->ggtt->lock); xe_ggtt_dump_node(node->ggtt, &node->base, "remove-balloon"); - mutex_lock(&node->ggtt->lock); drm_mm_remove_node(&node->base); - mutex_unlock(&node->ggtt->lock); +} -free_node: - xe_ggtt_node_fini(node); +static void xe_ggtt_assert_fit(struct xe_ggtt *ggtt, u64 start, u64 size) +{ + struct xe_tile *tile = ggtt->tile; + struct xe_device *xe = tile_to_xe(tile); + u64 __maybe_unused wopcm = xe_wopcm_size(xe); + + xe_tile_assert(tile, start >= wopcm); + xe_tile_assert(tile, start + size < ggtt->size - wopcm); +} + +/** + * xe_ggtt_shift_nodes_locked - Shift GGTT nodes to adjust for a change in usable address range. + * @ggtt: the &xe_ggtt struct instance + * @shift: change to the location of area provisioned for current VF + * + * This function moves all nodes from the GGTT VM, to a temp list. These nodes are expected + * to represent allocations in range formerly assigned to current VF, before the range changed. + * When the GGTT VM is completely clear of any nodes, they are re-added with shifted offsets. + * + * The function has no ability of failing - because it shifts existing nodes, without + * any additional processing. If the nodes were successfully existing at the old address, + * they will do the same at the new one. A fail inside this function would indicate that + * the list of nodes was either already damaged, or that the shift brings the address range + * outside of valid bounds. Both cases justify an assert rather than error code. + */ +void xe_ggtt_shift_nodes_locked(struct xe_ggtt *ggtt, s64 shift) +{ + struct xe_tile *tile __maybe_unused = ggtt->tile; + struct drm_mm_node *node, *tmpn; + LIST_HEAD(temp_list_head); + + lockdep_assert_held(&ggtt->lock); + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) + drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) + xe_ggtt_assert_fit(ggtt, node->start + shift, node->size); + + drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) { + drm_mm_remove_node(node); + list_add(&node->node_list, &temp_list_head); + } + + list_for_each_entry_safe(node, tmpn, &temp_list_head, node_list) { + list_del(&node->node_list); + node->start += shift; + drm_mm_reserve_node(&ggtt->mm, node); + xe_tile_assert(tile, drm_mm_node_allocated(node)); + } } /** @@ -537,12 +659,12 @@ int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align) * xe_ggtt_node_init - Initialize %xe_ggtt_node struct * @ggtt: the &xe_ggtt where the new node will later be inserted/reserved. * - * This function will allocated the struct %xe_ggtt_node and return it's pointer. + * This function will allocate the struct %xe_ggtt_node and return its pointer. * This struct will then be freed after the node removal upon xe_ggtt_node_remove() - * or xe_ggtt_node_remove_balloon(). + * or xe_ggtt_node_remove_balloon_locked(). * Having %xe_ggtt_node struct allocated doesn't mean that the node is already allocated * in GGTT. Only the xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), - * xe_ggtt_node_insert_balloon() will ensure the node is inserted or reserved in GGTT. + * xe_ggtt_node_insert_balloon_locked() will ensure the node is inserted or reserved in GGTT. * * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. **/ @@ -564,7 +686,7 @@ struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) * @node: the &xe_ggtt_node to be freed * * If anything went wrong with either xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), - * or xe_ggtt_node_insert_balloon(); and this @node is not going to be reused, then, + * or xe_ggtt_node_insert_balloon_locked(); and this @node is not going to be reused, then, * this function needs to be called to free the %xe_ggtt_node struct **/ void xe_ggtt_node_fini(struct xe_ggtt_node *node) @@ -587,30 +709,77 @@ bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node) } /** + * xe_ggtt_node_pt_size() - Get the size of page table entries needed to map a GGTT node. + * @node: the &xe_ggtt_node + * + * Return: GGTT node page table entries size in bytes. + */ +size_t xe_ggtt_node_pt_size(const struct xe_ggtt_node *node) +{ + if (!node) + return 0; + + return node->base.size / XE_PAGE_SIZE * sizeof(u64); +} + +/** * xe_ggtt_map_bo - Map the BO into GGTT * @ggtt: the &xe_ggtt where node will be mapped + * @node: the &xe_ggtt_node where this BO is mapped * @bo: the &xe_bo to be mapped + * @pat_index: Which pat_index to use. */ -void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, + struct xe_bo *bo, u16 pat_index) { - u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; - u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; - u64 start; - u64 offset, pte; - if (XE_WARN_ON(!bo->ggtt_node[ggtt->tile->id])) + u64 start, pte, end; + struct xe_res_cursor cur; + + if (XE_WARN_ON(!node)) return; - start = bo->ggtt_node[ggtt->tile->id]->base.start; + start = node->base.start; + end = start + xe_bo_size(bo); + + pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index); + if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { + xe_assert(xe_bo_device(bo), bo->ttm.ttm); + + for (xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &cur); + cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) + ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, + pte | xe_res_dma(&cur)); + } else { + /* Prepend GPU offset */ + pte |= vram_region_gpu_offset(bo->ttm.resource); - for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { - pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); - ggtt->pt_ops->ggtt_set_pte(ggtt, start + offset, pte); + for (xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); + cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) + ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, + pte + cur.start); } } +/** + * xe_ggtt_map_bo_unlocked - Restore a mapping of a BO into GGTT + * @ggtt: the &xe_ggtt where node will be mapped + * @bo: the &xe_bo to be mapped + * + * This is used to restore a GGTT mapping after suspend. + */ +void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo) +{ + u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; + u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; + + mutex_lock(&ggtt->lock); + xe_ggtt_map_bo(ggtt, bo->ggtt_node[ggtt->tile->id], bo, pat_index); + mutex_unlock(&ggtt->lock); +} + static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, - u64 start, u64 end) + u64 start, u64 end, struct drm_exec *exec) { u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE; u8 tile_id = ggtt->tile->id; @@ -621,11 +790,11 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (XE_WARN_ON(bo->ggtt_node[tile_id])) { /* Someone's already inserted this BO in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); return 0; } - err = xe_bo_validate(bo, NULL, false); + err = xe_bo_validate(bo, NULL, false, exec); if (err) return err; @@ -640,12 +809,15 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, mutex_lock(&ggtt->lock); err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, - bo->size, alignment, 0, start, end, 0); + xe_bo_size(bo), alignment, 0, start, end, 0); if (err) { xe_ggtt_node_fini(bo->ggtt_node[tile_id]); bo->ggtt_node[tile_id] = NULL; } else { - xe_ggtt_map_bo(ggtt, bo); + u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; + u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; + + xe_ggtt_map_bo(ggtt, bo->ggtt_node[tile_id], bo, pat_index); } mutex_unlock(&ggtt->lock); @@ -664,25 +836,28 @@ out: * @bo: the &xe_bo to be inserted * @start: address where it will be inserted * @end: end of the range where it will be inserted + * @exec: The drm_exec transaction to use for exhaustive eviction. * * Return: 0 on success or a negative error code on failure. */ int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, - u64 start, u64 end) + u64 start, u64 end, struct drm_exec *exec) { - return __xe_ggtt_insert_bo_at(ggtt, bo, start, end); + return __xe_ggtt_insert_bo_at(ggtt, bo, start, end, exec); } /** * xe_ggtt_insert_bo - Insert BO into GGTT * @ggtt: the &xe_ggtt where bo will be inserted * @bo: the &xe_bo to be inserted + * @exec: The drm_exec transaction to use for exhaustive eviction. * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, + struct drm_exec *exec) { - return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); + return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX, exec); } /** @@ -698,7 +873,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) return; /* This BO is not currently in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); xe_ggtt_node_remove(bo->ggtt_node[tile_id], bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); @@ -780,6 +955,85 @@ void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid) xe_ggtt_assign_locked(node->ggtt, &node->base, vfid); mutex_unlock(&node->ggtt->lock); } + +/** + * xe_ggtt_node_save() - Save a &xe_ggtt_node to a buffer. + * @node: the &xe_ggtt_node to be saved + * @dst: destination buffer + * @size: destination buffer size in bytes + * @vfid: VF identifier + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_ggtt_node_save(struct xe_ggtt_node *node, void *dst, size_t size, u16 vfid) +{ + struct xe_ggtt *ggtt; + u64 start, end; + u64 *buf = dst; + u64 pte; + + if (!node) + return -ENOENT; + + guard(mutex)(&node->ggtt->lock); + + if (xe_ggtt_node_pt_size(node) != size) + return -EINVAL; + + ggtt = node->ggtt; + start = node->base.start; + end = start + node->base.size - 1; + + while (start < end) { + pte = ggtt->pt_ops->ggtt_get_pte(ggtt, start); + if (vfid != u64_get_bits(pte, GGTT_PTE_VFID)) + return -EPERM; + + *buf++ = u64_replace_bits(pte, 0, GGTT_PTE_VFID); + start += XE_PAGE_SIZE; + } + + return 0; +} + +/** + * xe_ggtt_node_load() - Load a &xe_ggtt_node from a buffer. + * @node: the &xe_ggtt_node to be loaded + * @src: source buffer + * @size: source buffer size in bytes + * @vfid: VF identifier + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_ggtt_node_load(struct xe_ggtt_node *node, const void *src, size_t size, u16 vfid) +{ + u64 vfid_pte = xe_encode_vfid_pte(vfid); + const u64 *buf = src; + struct xe_ggtt *ggtt; + u64 start, end; + + if (!node) + return -ENOENT; + + guard(mutex)(&node->ggtt->lock); + + if (xe_ggtt_node_pt_size(node) != size) + return -EINVAL; + + ggtt = node->ggtt; + start = node->base.start; + end = start + node->base.size - 1; + + while (start < end) { + vfid_pte = u64_replace_bits(*buf++, vfid, GGTT_PTE_VFID); + ggtt->pt_ops->ggtt_set_pte(ggtt, start, vfid_pte); + start += XE_PAGE_SIZE; + } + xe_ggtt_invalidate(ggtt); + + return 0; +} + #endif /** @@ -841,3 +1095,30 @@ u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer return total; } + +/** + * xe_ggtt_encode_pte_flags - Get PTE encoding flags for BO + * @ggtt: &xe_ggtt + * @bo: &xe_bo + * @pat_index: The pat_index for the PTE. + * + * This function returns the pte_flags for a given BO, without address. + * It's used for DPT to fill a GGTT mapped BO with a linear lookup table. + */ +u64 xe_ggtt_encode_pte_flags(struct xe_ggtt *ggtt, + struct xe_bo *bo, u16 pat_index) +{ + return ggtt->pt_ops->pte_encode_flags(bo, pat_index); +} + +/** + * xe_ggtt_read_pte - Read a PTE from the GGTT + * @ggtt: &xe_ggtt + * @offset: the offset for which the mapping should be read. + * + * Used by testcases, and by display reading out an inherited bios FB. + */ +u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset) +{ + return ioread64(ggtt->gsm + (offset / XE_PAGE_SIZE)); +} diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 27e7d67de004..93fea4b6079c 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -9,25 +9,33 @@ #include "xe_ggtt_types.h" struct drm_printer; +struct xe_tile; +struct drm_exec; +struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile); int xe_ggtt_init_early(struct xe_ggtt *ggtt); +int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size); int xe_ggtt_init(struct xe_ggtt *ggtt); struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt); void xe_ggtt_node_fini(struct xe_ggtt_node *node); -int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, - u64 start, u64 size); -void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node); +int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, + u64 start, u64 size); +void xe_ggtt_node_remove_balloon_locked(struct xe_ggtt_node *node); +void xe_ggtt_shift_nodes_locked(struct xe_ggtt *ggtt, s64 shift); int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align); int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, u32 size, u32 align, u32 mm_flags); void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate); bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); -void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); -int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +size_t xe_ggtt_node_pt_size(const struct xe_ggtt_node *node); +void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, + struct xe_bo *bo, u16 pat_index); +void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo); +int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, struct drm_exec *exec); int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, - u64 start, u64 end); + u64 start, u64 end, struct drm_exec *exec); void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare); @@ -36,6 +44,18 @@ u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer #ifdef CONFIG_PCI_IOV void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid); +int xe_ggtt_node_save(struct xe_ggtt_node *node, void *dst, size_t size, u16 vfid); +int xe_ggtt_node_load(struct xe_ggtt_node *node, const void *src, size_t size, u16 vfid); #endif +#ifndef CONFIG_LOCKDEP +static inline void xe_ggtt_might_lock(struct xe_ggtt *ggtt) +{ } +#else +void xe_ggtt_might_lock(struct xe_ggtt *ggtt); +#endif + +u64 xe_ggtt_encode_pte_flags(struct xe_ggtt *ggtt, struct xe_bo *bo, u16 pat_index); +u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset); + #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index cb02b7994a9a..dacd796f8184 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -74,10 +74,12 @@ struct xe_ggtt_node { * Which can vary from platform to platform. */ struct xe_ggtt_pt_ops { - /** @pte_encode_bo: Encode PTE address for a given BO */ - u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); + /** @pte_encode_flags: Encode PTE flags for a given BO */ + u64 (*pte_encode_flags)(struct xe_bo *bo, u16 pat_index); /** @ggtt_set_pte: Directly write into GGTT's PTE */ void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte); + /** @ggtt_get_pte: Directly read from GGTT's PTE */ + u64 (*ggtt_get_pte)(struct xe_ggtt *ggtt, u64 addr); }; #endif diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index 869b43a4151d..f91e06d03511 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -122,3 +122,17 @@ void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, list_add_tail(&msg->link, &sched->msgs); xe_sched_process_msg_queue(sched); } + +/** + * xe_sched_add_msg_head() - Xe GPU scheduler add message to head of list + * @sched: Xe GPU scheduler + * @msg: Message to add + */ +void xe_sched_add_msg_head(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg) +{ + lockdep_assert_held(&sched->base.job_list_lock); + + list_add(&msg->link, &sched->msgs); + xe_sched_process_msg_queue(sched); +} diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index c250ea773491..c7a77a3a9681 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -7,7 +7,7 @@ #define _XE_GPU_SCHEDULER_H_ #include "xe_gpu_scheduler_types.h" -#include "xe_sched_job_types.h" +#include "xe_sched_job.h" int xe_sched_init(struct xe_gpu_scheduler *sched, const struct drm_sched_backend_ops *ops, @@ -28,6 +28,8 @@ void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg); void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg); +void xe_sched_add_msg_head(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg); static inline void xe_sched_msg_lock(struct xe_gpu_scheduler *sched) { @@ -51,7 +53,17 @@ static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched) static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched) { - drm_sched_resubmit_jobs(&sched->base); + struct drm_sched_job *s_job; + bool restore_replay = false; + + list_for_each_entry(s_job, &sched->base.pending_list, list) { + struct drm_sched_fence *s_fence = s_job->s_fence; + struct dma_fence *hw_fence = s_fence->parent; + + restore_replay |= to_xe_sched_job(s_job)->restore_replay; + if (restore_replay || (hw_fence && !dma_fence_is_signaled(hw_fence))) + sched->base.ops->run_job(s_job); + } } static inline bool @@ -68,17 +80,30 @@ static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched, spin_unlock(&sched->base.job_list_lock); } +/** + * xe_sched_first_pending_job() - Find first pending job which is unsignaled + * @sched: Xe GPU scheduler + * + * Return first unsignaled job in pending list or NULL + */ static inline struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched) { - struct xe_sched_job *job; + struct xe_sched_job *job, *r_job = NULL; spin_lock(&sched->base.job_list_lock); - job = list_first_entry_or_null(&sched->base.pending_list, - struct xe_sched_job, drm.list); + list_for_each_entry(job, &sched->base.pending_list, drm.list) { + struct drm_sched_fence *s_fence = job->drm.s_fence; + struct dma_fence *hw_fence = s_fence->parent; + + if (hw_fence && !dma_fence_is_signaled(hw_fence)) { + r_job = job; + break; + } + } spin_unlock(&sched->base.job_list_lock); - return job; + return r_job; } static inline int diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 0bcf97063ff6..dd69cb834f8e 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -59,7 +59,8 @@ static int memcpy_fw(struct xe_gsc *gsc) xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); - xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); + xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, + xe_bo_size(gsc->private) - fw_size); kfree(storage); @@ -82,7 +83,8 @@ static int emit_gsc_upload(struct xe_gsc *gsc) bb->cs[bb->len++] = GSC_FW_LOAD; bb->cs[bb->len++] = lower_32_bits(offset); bb->cs[bb->len++] = upper_32_bits(offset); - bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; + bb->cs[bb->len++] = (xe_bo_size(gsc->private) / SZ_4K) | + GSC_FW_LOAD_LIMIT_VALID; job = xe_bb_create_job(gsc->q, bb); if (IS_ERR(job)) { @@ -134,10 +136,10 @@ static int query_compatibility_version(struct xe_gsc *gsc) u64 ggtt_offset; int err; - bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT); + bo = xe_bo_create_pin_map_novm(xe, tile, GSC_VER_PKT_SZ * 2, + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) { xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); return PTR_ERR(bo); @@ -264,7 +266,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) unsigned int fw_ref; int ret; - if (XE_WA(tile->primary_gt, 14018094691)) { + if (tile->primary_gt && XE_GT_WA(tile->primary_gt, 14018094691)) { fw_ref = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); /* @@ -279,7 +281,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) ret = gsc_upload(gsc); - if (XE_WA(tile->primary_gt, 14018094691)) + if (tile->primary_gt && XE_GT_WA(tile->primary_gt, 14018094691)) xe_force_wake_put(gt_to_fw(tile->primary_gt), fw_ref); if (ret) @@ -591,7 +593,7 @@ void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP; /* WA only applies if the GSC is loaded */ - if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) + if (!XE_GT_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) return; xe_mmio_rmw32(>->mmio, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index d0519cd6704a..464282a89eef 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -23,6 +23,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_pm.h" +#include "xe_tile.h" /* * GSC proxy: @@ -483,7 +484,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc) } /* no multi-tile devices with this feature yet */ - if (tile->id > 0) { + if (!xe_tile_is_root(tile)) { xe_gt_err(gt, "unexpected GSC proxy init on tile %u\n", tile->id); return -EINVAL; } diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 0e5d243c9451..cdce210e36f2 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -32,15 +32,14 @@ #include "xe_gt_freq.h" #include "xe_gt_idle.h" #include "xe_gt_mcr.h" -#include "xe_gt_pagefault.h" #include "xe_gt_printk.h" #include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_vf.h" #include "xe_gt_sysfs.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_gt_topology.h" #include "xe_guc_exec_queue_types.h" #include "xe_guc_pc.h" +#include "xe_guc_submit.h" #include "xe_hw_fence.h" #include "xe_hw_engine_class_sysfs.h" #include "xe_irq.h" @@ -49,6 +48,7 @@ #include "xe_map.h" #include "xe_migrate.h" #include "xe_mmio.h" +#include "xe_pagefault.h" #include "xe_pat.h" #include "xe_pm.h" #include "xe_mocs.h" @@ -57,6 +57,7 @@ #include "xe_sa.h" #include "xe_sched_job.h" #include "xe_sriov.h" +#include "xe_tlb_inval.h" #include "xe_tuning.h" #include "xe_uc.h" #include "xe_uc_fw.h" @@ -64,29 +65,29 @@ #include "xe_wa.h" #include "xe_wopcm.h" -static void gt_fini(struct drm_device *drm, void *arg) -{ - struct xe_gt *gt = arg; - - destroy_workqueue(gt->ordered_wq); -} - struct xe_gt *xe_gt_alloc(struct xe_tile *tile) { + struct xe_device *xe = tile_to_xe(tile); + struct drm_device *drm = &xe->drm; + bool shared_wq = xe->info.needs_shared_vf_gt_wq && tile->primary_gt && + IS_SRIOV_VF(xe); + struct workqueue_struct *ordered_wq; struct xe_gt *gt; - int err; - gt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*gt), GFP_KERNEL); + gt = drmm_kzalloc(drm, sizeof(*gt), GFP_KERNEL); if (!gt) return ERR_PTR(-ENOMEM); gt->tile = tile; - gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", - WQ_MEM_RECLAIM); + if (shared_wq && tile->primary_gt->ordered_wq) + ordered_wq = tile->primary_gt->ordered_wq; + else + ordered_wq = drmm_alloc_ordered_workqueue(drm, "gt-ordered-wq", + WQ_MEM_RECLAIM); + if (IS_ERR(ordered_wq)) + return ERR_CAST(ordered_wq); - err = drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_fini, gt); - if (err) - return ERR_PTR(err); + gt->ordered_wq = ordered_wq; return gt; } @@ -97,7 +98,7 @@ void xe_gt_sanitize(struct xe_gt *gt) * FIXME: if xe_uc_sanitize is called here, on TGL driver will not * reload */ - gt->uc.guc.submission_state.enabled = false; + xe_guc_submit_disable(>->uc.guc); } static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) @@ -105,20 +106,20 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) unsigned int fw_ref; u32 reg; - if (!XE_WA(gt, 16023588340)) + if (!XE_GT_WA(gt, 16023588340)) return; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (!fw_ref) return; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg |= CG_DIS_CNTLBUS; xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); } - xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF); xe_force_wake_put(gt_to_fw(gt), fw_ref); } @@ -127,7 +128,7 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) unsigned int fw_ref; u32 reg; - if (!XE_WA(gt, 16023588340)) + if (!XE_GT_WA(gt, 16023588340)) return; if (xe_gt_is_media_type(gt)) @@ -146,30 +147,23 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) static void gt_reset_worker(struct work_struct *w); -static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) +static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb, + long timeout_jiffies) { struct xe_sched_job *job; - struct xe_bb *bb; struct dma_fence *fence; long timeout; - bb = xe_bb_new(gt, 4, false); - if (IS_ERR(bb)) - return PTR_ERR(bb); - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); + if (IS_ERR(job)) return PTR_ERR(job); - } xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); - timeout = dma_fence_wait_timeout(fence, false, HZ); + timeout = dma_fence_wait_timeout(fence, false, timeout_jiffies); dma_fence_put(fence); - xe_bb_free(bb, NULL); if (timeout < 0) return timeout; else if (!timeout) @@ -178,27 +172,30 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) return 0; } +static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) +{ + struct xe_bb *bb; + int ret; + + bb = xe_bb_new(gt, 4, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + ret = emit_job_sync(q, bb, HZ); + xe_bb_free(bb, NULL); + + return ret; +} + static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { struct xe_reg_sr *sr = &q->hwe->reg_lrc; struct xe_reg_sr_entry *entry; + int count_rmw = 0, count = 0, ret; unsigned long idx; - struct xe_sched_job *job; struct xe_bb *bb; - struct dma_fence *fence; - long timeout; - int count_rmw = 0; - int count = 0; - - if (q->hwe->class == XE_ENGINE_CLASS_RENDER) - /* Big enough to emit all of the context's 3DSTATE */ - bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false); - else - /* Just pick a large BB size */ - bb = xe_bb_new(gt, SZ_4K, false); - - if (IS_ERR(bb)) - return PTR_ERR(bb); + size_t bb_len = 0; + u32 *cs; /* count RMW registers as those will be handled separately */ xa_for_each(&sr->xa, idx, entry) { @@ -208,13 +205,34 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) ++count_rmw; } - if (count || count_rmw) - xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name); + if (count) + bb_len += count * 2 + 1; + + if (count_rmw) + bb_len += count_rmw * 20 + 7; + + if (q->hwe->class == XE_ENGINE_CLASS_RENDER) + /* + * Big enough to emit all of the context's 3DSTATE via + * xe_lrc_emit_hwe_state_instructions() + */ + bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32); + + xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len); + + bb = xe_bb_new(gt, bb_len, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + cs = bb->cs; if (count) { - /* emit single LRI with all non RMW regs */ + /* + * Emit single LRI with all non RMW regs: 1 leading dw + 2dw per + * reg + 1 + */ - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); xa_for_each(&sr->xa, idx, entry) { struct xe_reg reg = entry->reg; @@ -229,79 +247,68 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) val |= entry->set_bits; - bb->cs[bb->len++] = reg.addr; - bb->cs[bb->len++] = val; + *cs++ = reg.addr; + *cs++ = val; xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val); } } if (count_rmw) { - /* emit MI_MATH for each RMW reg */ + /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */ xa_for_each(&sr->xa, idx, entry) { if (entry->reg.masked || entry->clr_bits == ~0) continue; - bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; - bb->cs[bb->len++] = entry->reg.addr; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; - - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | - MI_LRI_LRM_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr; - bb->cs[bb->len++] = entry->clr_bits; - bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr; - bb->cs[bb->len++] = entry->set_bits; - - bb->cs[bb->len++] = MI_MATH(8); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0); - bb->cs[bb->len++] = CS_ALU_INSTR_LOADINV(SRCB, REG1); - bb->cs[bb->len++] = CS_ALU_INSTR_AND; - bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCB, REG2); - bb->cs[bb->len++] = CS_ALU_INSTR_OR; - bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU); - - bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; - bb->cs[bb->len++] = entry->reg.addr; + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; + *cs++ = entry->reg.addr; + *cs++ = CS_GPR_REG(0, 0).addr; + + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | + MI_LRI_LRM_CS_MMIO; + *cs++ = CS_GPR_REG(0, 1).addr; + *cs++ = entry->clr_bits; + *cs++ = CS_GPR_REG(0, 2).addr; + *cs++ = entry->set_bits; + + *cs++ = MI_MATH(8); + *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0); + *cs++ = CS_ALU_INSTR_LOADINV(SRCB, REG1); + *cs++ = CS_ALU_INSTR_AND; + *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU); + *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0); + *cs++ = CS_ALU_INSTR_LOAD(SRCB, REG2); + *cs++ = CS_ALU_INSTR_OR; + *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU); + + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO; + *cs++ = CS_GPR_REG(0, 0).addr; + *cs++ = entry->reg.addr; xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n", entry->reg.addr, entry->clr_bits, entry->set_bits); } /* reset used GPR */ - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | MI_LRI_LRM_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr; - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr; - bb->cs[bb->len++] = 0; + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | + MI_LRI_LRM_CS_MMIO; + *cs++ = CS_GPR_REG(0, 0).addr; + *cs++ = 0; + *cs++ = CS_GPR_REG(0, 1).addr; + *cs++ = 0; + *cs++ = CS_GPR_REG(0, 2).addr; + *cs++ = 0; } - xe_lrc_emit_hwe_state_instructions(q, bb); + cs = xe_lrc_emit_hwe_state_instructions(q, cs); - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); - return PTR_ERR(job); - } + bb->len = cs - bb->cs; - xe_sched_job_arm(job); - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); + ret = emit_job_sync(q, bb, HZ); - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); xe_bb_free(bb, NULL); - if (timeout < 0) - return timeout; - else if (!timeout) - return -ETIME; - return 0; + return ret; } int xe_gt_record_default_lrcs(struct xe_gt *gt) @@ -363,14 +370,6 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) goto put_nop_q; } - /* Reload golden LRC to record the effect of any indirect W/A */ - err = emit_nop_job(gt, q); - if (err) { - xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), q->guc->id); - goto put_nop_q; - } - xe_map_memcpy_from(xe, default_lrc, &q->lrc[0]->bo->vmap, xe_lrc_pphwsp_offset(q->lrc[0]), @@ -390,6 +389,7 @@ put_exec_queue: int xe_gt_init_early(struct xe_gt *gt) { + unsigned int fw_ref; int err; if (IS_SRIOV_PF(gt_to_xe(gt))) { @@ -398,9 +398,15 @@ int xe_gt_init_early(struct xe_gt *gt) return err; } + if (IS_SRIOV_VF(gt_to_xe(gt))) { + err = xe_gt_sriov_vf_init_early(gt); + if (err) + return err; + } + xe_reg_sr_init(>->reg_sr, "GT", gt_to_xe(gt)); - err = xe_wa_init(gt); + err = xe_wa_gt_init(gt); if (err) return err; @@ -408,15 +414,36 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; - xe_wa_process_oob(gt); + xe_wa_process_gt_oob(gt); xe_force_wake_init_gt(gt, gt_to_fw(gt)); spin_lock_init(>->global_invl_lock); - err = xe_gt_tlb_invalidation_init_early(gt); + err = xe_gt_tlb_inval_init_early(gt); if (err) return err; + xe_mocs_init_early(gt); + + /* + * Only after this point can GT-specific MMIO operations + * (including things like communication with the GuC) + * be performed. + */ + xe_gt_mmio_init(gt); + + err = xe_uc_init_noalloc(>->uc); + if (err) + return err; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; + + xe_gt_mcr_init_early(gt); + xe_pat_init(gt); + xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } @@ -431,7 +458,7 @@ static void dump_pat_on_error(struct xe_gt *gt) xe_pat_dump(gt, &p); } -static int gt_fw_domain_init(struct xe_gt *gt) +static int gt_init_with_gt_forcewake(struct xe_gt *gt) { unsigned int fw_ref; int err; @@ -440,7 +467,15 @@ static int gt_fw_domain_init(struct xe_gt *gt) if (!fw_ref) return -ETIMEDOUT; - if (!xe_gt_is_media_type(gt)) { + err = xe_uc_init(>->uc); + if (err) + goto err_force_wake; + + xe_gt_topology_init(gt); + xe_gt_mcr_init(gt); + xe_gt_enable_host_l2_vram(gt); + + if (xe_gt_is_main_type(gt)) { err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); if (err) goto err_force_wake; @@ -455,8 +490,10 @@ static int gt_fw_domain_init(struct xe_gt *gt) xe_gt_mcr_init(gt); err = xe_hw_engines_init_early(gt); - if (err) + if (err) { + dump_pat_on_error(gt); goto err_force_wake; + } err = xe_hw_engine_class_sysfs_init(gt); if (err) @@ -477,13 +514,12 @@ static int gt_fw_domain_init(struct xe_gt *gt) return 0; err_force_wake: - dump_pat_on_error(gt); xe_force_wake_put(gt_to_fw(gt), fw_ref); return err; } -static int all_fw_domain_init(struct xe_gt *gt) +static int gt_init_with_all_forcewake(struct xe_gt *gt) { unsigned int fw_ref; int err; @@ -516,7 +552,7 @@ static int all_fw_domain_init(struct xe_gt *gt) if (err) goto err_force_wake; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { /* * USM has its only SA pool to non-block behind user operations */ @@ -532,17 +568,15 @@ static int all_fw_domain_init(struct xe_gt *gt) } } - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { struct xe_tile *tile = gt_to_tile(gt); - tile->migrate = xe_migrate_init(tile); - if (IS_ERR(tile->migrate)) { - err = PTR_ERR(tile->migrate); + err = xe_migrate_init(tile->migrate); + if (err) goto err_force_wake; - } } - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) goto err_force_wake; @@ -552,13 +586,11 @@ static int all_fw_domain_init(struct xe_gt *gt) xe_gt_apply_ccs_mode(gt); } - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); - if (IS_SRIOV_PF(gt_to_xe(gt))) { - xe_gt_sriov_pf_init(gt); + if (IS_SRIOV_PF(gt_to_xe(gt))) xe_gt_sriov_pf_init_hw(gt); - } xe_force_wake_put(gt_to_fw(gt), fw_ref); @@ -570,44 +602,18 @@ err_force_wake: return err; } -/* - * Initialize enough GT to be able to load GuC in order to obtain hwconfig and - * enable CTB communication. - */ -int xe_gt_init_hwconfig(struct xe_gt *gt) -{ - unsigned int fw_ref; - int err; - - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) - return -ETIMEDOUT; - - xe_gt_mcr_init_early(gt); - xe_pat_init(gt); - - err = xe_uc_init(>->uc); - if (err) - goto out_fw; - - err = xe_uc_init_hwconfig(>->uc); - if (err) - goto out_fw; - - xe_gt_topology_init(gt); - xe_gt_mcr_init(gt); - xe_gt_enable_host_l2_vram(gt); - -out_fw: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return err; -} - static void xe_gt_fini(void *arg) { struct xe_gt *gt = arg; int i; + if (disable_work_sync(>->reset.worker)) + /* + * If gt_reset_worker was halted from executing, take care of + * releasing the rpm reference here. + */ + xe_pm_runtime_put(gt_to_xe(gt)); + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) xe_hw_fence_irq_finish(>->fence_irq[i]); @@ -630,17 +636,11 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; - err = xe_gt_pagefault_init(gt); - if (err) - return err; - - xe_mocs_init_early(gt); - err = xe_gt_sysfs_init(gt); if (err) return err; - err = gt_fw_domain_init(gt); + err = gt_init_with_gt_forcewake(gt); if (err) return err; @@ -654,7 +654,7 @@ int xe_gt_init(struct xe_gt *gt) xe_force_wake_init_engines(gt, gt_to_fw(gt)); - err = all_fw_domain_init(gt); + err = gt_init_with_all_forcewake(gt); if (err) return err; @@ -664,6 +664,12 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; + if (IS_SRIOV_VF(gt_to_xe(gt))) { + err = xe_gt_sriov_vf_init(gt); + if (err) + return err; + } + return 0; } @@ -742,7 +748,7 @@ static int vf_gt_restart(struct xe_gt *gt) if (err) return err; - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) return err; @@ -780,20 +786,17 @@ static int do_gt_restart(struct xe_gt *gt) if (err) return err; - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) return err; - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); if (IS_SRIOV_PF(gt_to_xe(gt))) xe_gt_sriov_pf_init_hw(gt); xe_mocs_init(gt); - err = xe_uc_start(>->uc); - if (err) - return err; for_each_hw_engine(hwe, gt, id) xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); @@ -801,6 +804,10 @@ static int do_gt_restart(struct xe_gt *gt) /* Get CCS mode in sync between sw/hw */ xe_gt_apply_ccs_mode(gt); + err = xe_uc_start(>->uc); + if (err) + return err; + /* Restore GT freq to expected values */ xe_gt_sanitize_freq(gt); @@ -810,22 +817,21 @@ static int do_gt_restart(struct xe_gt *gt) return 0; } -static int gt_reset(struct xe_gt *gt) +static void gt_reset_worker(struct work_struct *w) { + struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker); unsigned int fw_ref; int err; if (xe_device_wedged(gt_to_xe(gt))) - return -ECANCELED; + goto err_pm_put; /* We only support GT resets with GuC submission */ if (!xe_device_uc_enabled(gt_to_xe(gt))) - return -ENODEV; + goto err_pm_put; xe_gt_info(gt, "reset started\n"); - xe_pm_runtime_get(gt_to_xe(gt)); - if (xe_fault_inject_gt_reset()) { err = -ECANCELED; goto err_fail; @@ -839,13 +845,16 @@ static int gt_reset(struct xe_gt *gt) goto err_out; } + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_stop_prepare(gt); + xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); - xe_gt_pagefault_reset(gt); + xe_pagefault_reset(gt_to_xe(gt), gt); xe_uc_stop(>->uc); - xe_gt_tlb_invalidation_reset(gt); + xe_tlb_inval_reset(>->tlb_inval); err = do_gt_reset(gt); if (err) @@ -856,29 +865,23 @@ static int gt_reset(struct xe_gt *gt) goto err_out; xe_force_wake_put(gt_to_fw(gt), fw_ref); + + /* Pair with get while enqueueing the work in xe_gt_reset_async() */ xe_pm_runtime_put(gt_to_xe(gt)); xe_gt_info(gt, "reset done\n"); - return 0; + return; err_out: xe_force_wake_put(gt_to_fw(gt), fw_ref); XE_WARN_ON(xe_uc_start(>->uc)); + err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); - xe_device_declare_wedged(gt_to_xe(gt)); +err_pm_put: xe_pm_runtime_put(gt_to_xe(gt)); - - return err; -} - -static void gt_reset_worker(struct work_struct *w) -{ - struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker); - - gt_reset(gt); } void xe_gt_reset_async(struct xe_gt *gt) @@ -890,7 +893,11 @@ void xe_gt_reset_async(struct xe_gt *gt) return; xe_gt_info(gt, "reset queued\n"); - queue_work(gt->ordered_wq, >->reset.worker); + + /* Pair with put in gt_reset_worker() if work is enqueued */ + xe_pm_runtime_get_noresume(gt_to_xe(gt)); + if (!queue_work(gt->ordered_wq, >->reset.worker)) + xe_pm_runtime_put(gt_to_xe(gt)); } void xe_gt_suspend_prepare(struct xe_gt *gt) @@ -961,7 +968,7 @@ int xe_gt_sanitize_freq(struct xe_gt *gt) if ((!xe_uc_fw_is_available(>->uc.gsc.fw) || xe_uc_fw_is_loaded(>->uc.gsc.fw) || xe_uc_fw_is_in_error_state(>->uc.gsc.fw)) && - XE_WA(gt, 22019338487)) + XE_GT_WA(gt, 22019338487)) ret = xe_guc_pc_restore_stashed_freq(>->uc.guc.pc); return ret; @@ -1059,5 +1066,5 @@ void xe_gt_declare_wedged(struct xe_gt *gt) xe_gt_assert(gt, gt_to_xe(gt)->wedged.mode); xe_uc_declare_wedged(>->uc); - xe_gt_tlb_invalidation_reset(gt); + xe_tlb_inval_reset(>->tlb_inval); } diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 187fa6490eaf..9d710049da45 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -12,6 +12,7 @@ #include "xe_device.h" #include "xe_device_types.h" +#include "xe_gt_sriov_vf.h" #include "xe_hw_engine.h" #define for_each_hw_engine(hwe__, gt__, id__) \ @@ -21,14 +22,19 @@ #define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0) +#define GT_VER(gt) ({ \ + typeof(gt) gt_ = (gt); \ + struct xe_device *xe = gt_to_xe(gt_); \ + xe_gt_is_media_type(gt_) ? MEDIA_VER(xe) : GRAPHICS_VER(xe); \ +}) + extern struct fault_attr gt_reset_failure; static inline bool xe_fault_inject_gt_reset(void) { - return should_fail(>_reset_failure, 1); + return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(>_reset_failure, 1); } struct xe_gt *xe_gt_alloc(struct xe_tile *tile); -int xe_gt_init_hwconfig(struct xe_gt *gt); int xe_gt_init_early(struct xe_gt *gt); int xe_gt_init(struct xe_gt *gt); void xe_gt_mmio_init(struct xe_gt *gt); @@ -107,6 +113,11 @@ static inline bool xe_gt_has_indirect_ring_state(struct xe_gt *gt) xe_device_uc_enabled(gt_to_xe(gt)); } +static inline bool xe_gt_is_main_type(struct xe_gt *gt) +{ + return gt->info.type == XE_GT_TYPE_MAIN; +} + static inline bool xe_gt_is_media_type(struct xe_gt *gt) { return gt->info.type == XE_GT_TYPE_MEDIA; @@ -120,4 +131,16 @@ static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe) hwe->instance == gt->usm.reserved_bcs_instance; } +/** + * xe_gt_recovery_pending() - GT recovery pending + * @gt: the &xe_gt + * + * Return: True if GT recovery in pending, False otherwise + */ +static inline bool xe_gt_recovery_pending(struct xe_gt *gt) +{ + return IS_SRIOV_VF(gt_to_xe(gt)) && + xe_gt_sriov_vf_recovery_pending(gt); +} + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 4f011d1573c6..bfc25c46f798 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -55,30 +55,11 @@ static void read_crystal_clock(struct xe_gt *gt, u32 rpm_config_reg, u32 *freq, } } -static void check_ctc_mode(struct xe_gt *gt) -{ - /* - * CTC_MODE[0] = 1 is definitely not supported for Xe2 and later - * platforms. In theory it could be a valid setting for pre-Xe2 - * platforms, but there's no documentation on how to properly handle - * this case. Reading TIMESTAMP_OVERRIDE, as the driver attempted in - * the past has been confirmed as incorrect by the hardware architects. - * - * For now just warn if we ever encounter hardware in the wild that - * has this setting and move on as if it hadn't been set. - */ - if (xe_mmio_read32(>->mmio, CTC_MODE) & CTC_SOURCE_DIVIDE_LOGIC) - xe_gt_warn(gt, "CTC_MODE[0] is set; this is unexpected and undocumented\n"); -} - int xe_gt_clock_init(struct xe_gt *gt) { u32 freq; u32 c0; - if (!IS_SRIOV_VF(gt_to_xe(gt))) - check_ctc_mode(gt); - c0 = xe_mmio_read32(>->mmio, RPM_CONFIG0); read_crystal_clock(gt, c0, &freq, >->info.timestamp_base); @@ -93,11 +74,6 @@ int xe_gt_clock_init(struct xe_gt *gt) return 0; } -static u64 div_u64_roundup(u64 n, u32 d) -{ - return div_u64(n + d - 1, d); -} - /** * xe_gt_clock_interval_to_ms - Convert sampled GT clock ticks to msec * @@ -108,5 +84,5 @@ static u64 div_u64_roundup(u64 n, u32 d) */ u64 xe_gt_clock_interval_to_ms(struct xe_gt *gt, u64 count) { - return div_u64_roundup(count * MSEC_PER_SEC, gt->info.reference_clock); + return mul_u64_u32_div(count, MSEC_PER_SEC, gt->info.reference_clock); } diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 119a55bb7580..e4fd632f43cf 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -12,7 +12,6 @@ #include "xe_device.h" #include "xe_force_wake.h" -#include "xe_ggtt.h" #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_gt_idle.h" @@ -29,11 +28,18 @@ #include "xe_pm.h" #include "xe_reg_sr.h" #include "xe_reg_whitelist.h" +#include "xe_sa.h" #include "xe_sriov.h" +#include "xe_sriov_vf_ccs.h" #include "xe_tuning.h" #include "xe_uc_debugfs.h" #include "xe_wa.h" +static struct xe_gt *node_to_gt(struct drm_info_node *node) +{ + return node->dent->d_parent->d_inode->i_private; +} + /** * xe_gt_debugfs_simple_show - A show callback for struct drm_info_list * @m: the &seq_file @@ -76,8 +82,7 @@ int xe_gt_debugfs_simple_show(struct seq_file *m, void *data) { struct drm_printer p = drm_seq_file_printer(m); struct drm_info_node *node = m->private; - struct dentry *parent = node->dent->d_parent; - struct xe_gt *gt = parent->d_inode->i_private; + struct xe_gt *gt = node_to_gt(node); int (*print)(struct xe_gt *, struct drm_printer *) = node->info_ent->data; if (WARN_ON(!print)) @@ -86,15 +91,36 @@ int xe_gt_debugfs_simple_show(struct seq_file *m, void *data) return print(gt, &p); } -static int hw_engines(struct xe_gt *gt, struct drm_printer *p) +/** + * xe_gt_debugfs_show_with_rpm - A show callback for struct drm_info_list + * @m: the &seq_file + * @data: data used by the drm debugfs helpers + * + * Similar to xe_gt_debugfs_simple_show() but implicitly takes a RPM ref. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_debugfs_show_with_rpm(struct seq_file *m, void *data) { + struct drm_info_node *node = m->private; + struct xe_gt *gt = node_to_gt(node); struct xe_device *xe = gt_to_xe(gt); + int ret; + + xe_pm_runtime_get(xe); + ret = xe_gt_debugfs_simple_show(m, data); + xe_pm_runtime_put(xe); + + return ret; +} + +static int hw_engines(struct xe_gt *gt, struct drm_printer *p) +{ struct xe_hw_engine *hwe; enum xe_hw_engine_id id; unsigned int fw_ref; int ret = 0; - xe_pm_runtime_get(xe); fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { ret = -ETIMEDOUT; @@ -106,88 +132,21 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p) fw_put: xe_force_wake_put(gt_to_fw(gt), fw_ref); - xe_pm_runtime_put(xe); - - return ret; -} - -static int powergate_info(struct xe_gt *gt, struct drm_printer *p) -{ - int ret; - - xe_pm_runtime_get(gt_to_xe(gt)); - ret = xe_gt_idle_pg_print(gt, p); - xe_pm_runtime_put(gt_to_xe(gt)); return ret; } -static int force_reset(struct xe_gt *gt, struct drm_printer *p) -{ - xe_pm_runtime_get(gt_to_xe(gt)); - xe_gt_reset_async(gt); - xe_pm_runtime_put(gt_to_xe(gt)); - - return 0; -} - -static int force_reset_sync(struct xe_gt *gt, struct drm_printer *p) -{ - xe_pm_runtime_get(gt_to_xe(gt)); - xe_gt_reset(gt); - xe_pm_runtime_put(gt_to_xe(gt)); - - return 0; -} - -static int sa_info(struct xe_gt *gt, struct drm_printer *p) -{ - struct xe_tile *tile = gt_to_tile(gt); - - xe_pm_runtime_get(gt_to_xe(gt)); - drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p, - tile->mem.kernel_bb_pool->gpu_addr); - xe_pm_runtime_put(gt_to_xe(gt)); - - return 0; -} - -static int topology(struct xe_gt *gt, struct drm_printer *p) -{ - xe_pm_runtime_get(gt_to_xe(gt)); - xe_gt_topology_dump(gt, p); - xe_pm_runtime_put(gt_to_xe(gt)); - - return 0; -} - static int steering(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_mcr_steering_dump(gt, p); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } -static int ggtt(struct xe_gt *gt, struct drm_printer *p) -{ - int ret; - - xe_pm_runtime_get(gt_to_xe(gt)); - ret = xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, p); - xe_pm_runtime_put(gt_to_xe(gt)); - - return ret; -} - static int register_save_restore(struct xe_gt *gt, struct drm_printer *p) { struct xe_hw_engine *hwe; enum xe_hw_engine_id id; - xe_pm_runtime_get(gt_to_xe(gt)); - xe_reg_sr_dump(>->reg_sr, p); drm_printf(p, "\n"); @@ -205,98 +164,42 @@ static int register_save_restore(struct xe_gt *gt, struct drm_printer *p) for_each_hw_engine(hwe, gt, id) xe_reg_whitelist_dump(&hwe->reg_whitelist, p); - xe_pm_runtime_put(gt_to_xe(gt)); - - return 0; -} - -static int workarounds(struct xe_gt *gt, struct drm_printer *p) -{ - xe_pm_runtime_get(gt_to_xe(gt)); - xe_wa_dump(gt, p); - xe_pm_runtime_put(gt_to_xe(gt)); - - return 0; -} - -static int tunings(struct xe_gt *gt, struct drm_printer *p) -{ - xe_pm_runtime_get(gt_to_xe(gt)); - xe_tuning_dump(gt, p); - xe_pm_runtime_put(gt_to_xe(gt)); - - return 0; -} - -static int pat(struct xe_gt *gt, struct drm_printer *p) -{ - xe_pm_runtime_get(gt_to_xe(gt)); - xe_pat_dump(gt, p); - xe_pm_runtime_put(gt_to_xe(gt)); - - return 0; -} - -static int mocs(struct xe_gt *gt, struct drm_printer *p) -{ - xe_pm_runtime_get(gt_to_xe(gt)); - xe_mocs_dump(gt, p); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int rcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_RENDER); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int ccs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_COMPUTE); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int bcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_COPY); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int vcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_VIDEO_DECODE); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int vecs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_VIDEO_ENHANCE); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } static int hwconfig(struct xe_gt *gt, struct drm_printer *p) { - xe_pm_runtime_get(gt_to_xe(gt)); xe_guc_hwconfig_dump(>->uc.guc, p); - xe_pm_runtime_put(gt_to_xe(gt)); - return 0; } @@ -306,43 +209,134 @@ static int hwconfig(struct xe_gt *gt, struct drm_printer *p) * - without access to the PF specific data */ static const struct drm_info_list vf_safe_debugfs_list[] = { - {"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset}, - {"force_reset_sync", .show = xe_gt_debugfs_simple_show, .data = force_reset_sync}, - {"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info}, - {"topology", .show = xe_gt_debugfs_simple_show, .data = topology}, - {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt}, - {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore}, - {"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds}, - {"tunings", .show = xe_gt_debugfs_simple_show, .data = tunings}, - {"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc}, - {"default_lrc_ccs", .show = xe_gt_debugfs_simple_show, .data = ccs_default_lrc}, - {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, - {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc}, - {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc}, - {"stats", .show = xe_gt_debugfs_simple_show, .data = xe_gt_stats_print_info}, - {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig}, + { "topology", .show = xe_gt_debugfs_show_with_rpm, .data = xe_gt_topology_dump }, + { "register-save-restore", + .show = xe_gt_debugfs_show_with_rpm, .data = register_save_restore }, + { "workarounds", .show = xe_gt_debugfs_show_with_rpm, .data = xe_wa_gt_dump }, + { "tunings", .show = xe_gt_debugfs_show_with_rpm, .data = xe_tuning_dump }, + { "default_lrc_rcs", .show = xe_gt_debugfs_show_with_rpm, .data = rcs_default_lrc }, + { "default_lrc_ccs", .show = xe_gt_debugfs_show_with_rpm, .data = ccs_default_lrc }, + { "default_lrc_bcs", .show = xe_gt_debugfs_show_with_rpm, .data = bcs_default_lrc }, + { "default_lrc_vcs", .show = xe_gt_debugfs_show_with_rpm, .data = vcs_default_lrc }, + { "default_lrc_vecs", .show = xe_gt_debugfs_show_with_rpm, .data = vecs_default_lrc }, + { "hwconfig", .show = xe_gt_debugfs_show_with_rpm, .data = hwconfig }, }; /* everything else should be added here */ static const struct drm_info_list pf_only_debugfs_list[] = { - {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines}, - {"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs}, - {"pat", .show = xe_gt_debugfs_simple_show, .data = pat}, - {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info}, - {"steering", .show = xe_gt_debugfs_simple_show, .data = steering}, + { "hw_engines", .show = xe_gt_debugfs_show_with_rpm, .data = hw_engines }, + { "mocs", .show = xe_gt_debugfs_show_with_rpm, .data = xe_mocs_dump }, + { "pat", .show = xe_gt_debugfs_show_with_rpm, .data = xe_pat_dump }, + { "powergate_info", .show = xe_gt_debugfs_show_with_rpm, .data = xe_gt_idle_pg_print }, + { "steering", .show = xe_gt_debugfs_show_with_rpm, .data = steering }, }; +static ssize_t write_to_gt_call(const char __user *userbuf, size_t count, loff_t *ppos, + void (*call)(struct xe_gt *), struct xe_gt *gt) +{ + bool yes; + int ret; + + if (*ppos) + return -EINVAL; + ret = kstrtobool_from_user(userbuf, count, &yes); + if (ret < 0) + return ret; + if (yes) + call(gt); + return count; +} + +static ssize_t stats_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct seq_file *s = file->private_data; + struct xe_gt *gt = s->private; + + return write_to_gt_call(userbuf, count, ppos, xe_gt_stats_clear, gt); +} + +static int stats_show(struct seq_file *s, void *unused) +{ + struct drm_printer p = drm_seq_file_printer(s); + struct xe_gt *gt = s->private; + + return xe_gt_stats_print_info(gt, &p); +} +DEFINE_SHOW_STORE_ATTRIBUTE(stats); + +static void force_reset(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_pm_runtime_get(xe); + xe_gt_reset_async(gt); + xe_pm_runtime_put(xe); +} + +static ssize_t force_reset_write(struct file *file, + const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct seq_file *s = file->private_data; + struct xe_gt *gt = s->private; + + return write_to_gt_call(userbuf, count, ppos, force_reset, gt); +} + +static int force_reset_show(struct seq_file *s, void *unused) +{ + struct xe_gt *gt = s->private; + + force_reset(gt); /* to be deprecated! */ + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(force_reset); + +static void force_reset_sync(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_pm_runtime_get(xe); + xe_gt_reset(gt); + xe_pm_runtime_put(xe); +} + +static ssize_t force_reset_sync_write(struct file *file, + const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct seq_file *s = file->private_data; + struct xe_gt *gt = s->private; + + return write_to_gt_call(userbuf, count, ppos, force_reset_sync, gt); +} + +static int force_reset_sync_show(struct seq_file *s, void *unused) +{ + struct xe_gt *gt = s->private; + + force_reset_sync(gt); /* to be deprecated! */ + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(force_reset_sync); + void xe_gt_debugfs_register(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); struct drm_minor *minor = gt_to_xe(gt)->drm.primary; + struct dentry *parent = gt->tile->debugfs; struct dentry *root; + char symlink[16]; char name[8]; xe_gt_assert(gt, minor->debugfs_root); + if (IS_ERR(parent)) + return; + snprintf(name, sizeof(name), "gt%d", gt->info.id); - root = debugfs_create_dir(name, minor->debugfs_root); + root = debugfs_create_dir(name, parent); if (IS_ERR(root)) { drm_warn(&xe->drm, "Create GT directory failed"); return; @@ -355,6 +349,11 @@ void xe_gt_debugfs_register(struct xe_gt *gt) */ root->d_inode->i_private = gt; + /* VF safe */ + debugfs_create_file("stats", 0600, root, gt, &stats_fops); + debugfs_create_file("force_reset", 0600, root, gt, &force_reset_fops); + debugfs_create_file("force_reset_sync", 0600, root, gt, &force_reset_sync_fops); + drm_debugfs_create_files(vf_safe_debugfs_list, ARRAY_SIZE(vf_safe_debugfs_list), root, minor); @@ -370,4 +369,11 @@ void xe_gt_debugfs_register(struct xe_gt *gt) xe_gt_sriov_pf_debugfs_register(gt, root); else if (IS_SRIOV_VF(xe)) xe_gt_sriov_vf_debugfs_register(gt, root); + + /* + * Backwards compatibility only: create a link for the legacy clients + * who may expect gt/ directory at the root level, not the tile level. + */ + snprintf(symlink, sizeof(symlink), "tile%u/%s", gt->tile->id, name); + debugfs_create_symlink(name, minor->debugfs_root, symlink); } diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.h b/drivers/gpu/drm/xe/xe_gt_debugfs.h index 05a6cc93c78c..32ee3264051b 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.h +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.h @@ -11,5 +11,6 @@ struct xe_gt; void xe_gt_debugfs_register(struct xe_gt *gt); int xe_gt_debugfs_simple_show(struct seq_file *m, void *data); +int xe_gt_debugfs_show_with_rpm(struct seq_file *m, void *data); #endif diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 868a5d2c1a52..ce3c7810469f 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -29,19 +29,26 @@ * PCODE is the ultimate decision maker of the actual running frequency, based * on thermal and other running conditions. * - * Xe's Freq provides a sysfs API for frequency management: + * Xe's Freq provides a sysfs API for frequency management under + * ``<device>/tile#/gt#/freq0/`` directory. * - * device/tile#/gt#/freq0/<item>_freq *read-only* files: - * - act_freq: The actual resolved frequency decided by PCODE. - * - cur_freq: The current one requested by GuC PC to the PCODE. - * - rpn_freq: The Render Performance (RP) N level, which is the minimal one. - * - rpe_freq: The Render Performance (RP) E level, which is the efficient one. - * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one. + * **Read-only** attributes: * - * device/tile#/gt#/freq0/<item>_freq *read-write* files: - * - min_freq: Min frequency request. - * - max_freq: Max frequency request. - * If max <= min, then freq_min becomes a fixed frequency request. + * - ``act_freq``: The actual resolved frequency decided by PCODE. + * - ``cur_freq``: The current one requested by GuC PC to the PCODE. + * - ``rpn_freq``: The Render Performance (RP) N level, which is the minimal one. + * - ``rpa_freq``: The Render Performance (RP) A level, which is the achievable one. + * Calculated by PCODE at runtime based on multiple running conditions + * - ``rpe_freq``: The Render Performance (RP) E level, which is the efficient one. + * Calculated by PCODE at runtime based on multiple running conditions + * - ``rp0_freq``: The Render Performance (RP) 0 level, which is the maximum one. + * + * **Read-write** attributes: + * + * - ``min_freq``: Min frequency request. + * - ``max_freq``: Max frequency request. + * If max <= min, then freq_min becomes a fixed frequency + * request. */ static struct xe_guc_pc * @@ -94,13 +101,8 @@ static ssize_t rp0_freq_show(struct kobject *kobj, { struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); - u32 freq; - - xe_pm_runtime_get(dev_to_xe(dev)); - freq = xe_guc_pc_get_rp0_freq(pc); - xe_pm_runtime_put(dev_to_xe(dev)); - return sysfs_emit(buf, "%d\n", freq); + return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rp0_freq(pc)); } static struct kobj_attribute attr_rp0_freq = __ATTR_RO(rp0_freq); @@ -222,6 +224,33 @@ static ssize_t max_freq_store(struct kobject *kobj, } static struct kobj_attribute attr_max_freq = __ATTR_RW(max_freq); +static ssize_t power_profile_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buff) +{ + struct device *dev = kobj_to_dev(kobj); + + xe_guc_pc_get_power_profile(dev_to_pc(dev), buff); + + return strlen(buff); +} + +static ssize_t power_profile_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buff, size_t count) +{ + struct device *dev = kobj_to_dev(kobj); + struct xe_guc_pc *pc = dev_to_pc(dev); + int err; + + xe_pm_runtime_get(dev_to_xe(dev)); + err = xe_guc_pc_set_power_profile(pc, buff); + xe_pm_runtime_put(dev_to_xe(dev)); + + return err ?: count; +} +static struct kobj_attribute attr_power_profile = __ATTR_RW(power_profile); + static const struct attribute *freq_attrs[] = { &attr_act_freq.attr, &attr_cur_freq.attr, @@ -231,6 +260,7 @@ static const struct attribute *freq_attrs[] = { &attr_rpn_freq.attr, &attr_min_freq.attr, &attr_max_freq.attr, + &attr_power_profile.attr, NULL }; @@ -263,8 +293,10 @@ int xe_gt_freq_init(struct xe_gt *gt) return -ENOMEM; err = sysfs_create_files(gt->freq, freq_attrs); - if (err) + if (err) { + kobject_put(gt->freq); return err; + } err = devm_add_action_or_reset(xe->drm.dev, freq_fini, gt->freq); if (err) diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index c11206410a4d..3e3d1d52f630 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -5,6 +5,7 @@ #include <drm/drm_managed.h> +#include <generated/xe_wa_oob.h> #include "xe_force_wake.h" #include "xe_device.h" #include "xe_gt.h" @@ -16,6 +17,7 @@ #include "xe_mmio.h" #include "xe_pm.h" #include "xe_sriov.h" +#include "xe_wa.h" /** * DOC: Xe GT Idle @@ -121,9 +123,12 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) if (vcs_mask || vecs_mask) gtidle->powergate_enable = MEDIA_POWERGATE_ENABLE; - if (!xe_gt_is_media_type(gt)) + if (xe_gt_is_main_type(gt)) gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; + if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255) + gtidle->powergate_enable |= MEDIA_SAMPLERS_POWERGATE_ENABLE; + if (xe->info.platform != XE_DG1) { for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { if ((gt->info.engine_mask & BIT(i))) @@ -142,6 +147,12 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) xe_mmio_write32(mmio, RENDER_POWERGATE_IDLE_HYSTERESIS, 25); } + if (XE_GT_WA(gt, 14020316580)) + gtidle->powergate_enable &= ~(VDN_HCP_POWERGATE_ENABLE(0) | + VDN_MFXVDENC_POWERGATE_ENABLE(0) | + VDN_HCP_POWERGATE_ENABLE(2) | + VDN_MFXVDENC_POWERGATE_ENABLE(2)); + xe_mmio_write32(mmio, POWERGATE_ENABLE, gtidle->powergate_enable); xe_force_wake_put(gt_to_fw(gt), fw_ref); } @@ -246,6 +257,11 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "Media Slice%d Power Gate Status: %s\n", n, str_up_down(pg_status & media_slices[n].status_bit)); } + + if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255) + drm_printf(p, "Media Samplers Power Gating Enabled: %s\n", + str_yes_no(pg_enabled & MEDIA_SAMPLERS_POWERGATE_ENABLE)); + return 0; } @@ -322,15 +338,11 @@ static void gt_idle_fini(void *arg) { struct kobject *kobj = arg; struct xe_gt *gt = kobj_to_gt(kobj->parent); - unsigned int fw_ref; xe_gt_idle_disable_pg(gt); - if (gt_to_xe(gt)->info.skip_guc_pc) { - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (gt_to_xe(gt)->info.skip_guc_pc) xe_gt_idle_disable_c6(gt); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - } sysfs_remove_files(kobj, gt_idle_attrs); kobject_put(kobj); @@ -390,14 +402,23 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt) RC_CTL_HW_ENABLE | RC_CTL_TO_MODE | RC_CTL_RC6_ENABLE); } -void xe_gt_idle_disable_c6(struct xe_gt *gt) +int xe_gt_idle_disable_c6(struct xe_gt *gt) { + unsigned int fw_ref; + xe_device_assert_mem_access(gt_to_xe(gt)); - xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); if (IS_SRIOV_VF(gt_to_xe(gt))) - return; + return 0; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; xe_mmio_write32(>->mmio, RC_CONTROL, 0); xe_mmio_write32(>->mmio, RC_STATE, 0); + + xe_force_wake_put(gt_to_fw(gt), fw_ref); + + return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h index 591a01e181bc..9c34a155e102 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.h +++ b/drivers/gpu/drm/xe/xe_gt_idle.h @@ -13,7 +13,7 @@ struct xe_gt; int xe_gt_idle_init(struct xe_gt_idle *gtidle); void xe_gt_idle_enable_c6(struct xe_gt *gt); -void xe_gt_idle_disable_c6(struct xe_gt *gt); +int xe_gt_idle_disable_c6(struct xe_gt *gt); void xe_gt_idle_enable_pg(struct xe_gt *gt); void xe_gt_idle_disable_pg(struct xe_gt *gt); int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index d4d9730f0d2c..164010860664 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -46,8 +46,6 @@ * MCR registers are not available on Virtual Function (VF). */ -#define STEER_SEMAPHORE XE_REG(0xFD0) - static inline struct xe_reg to_xe_reg(struct xe_reg_mcr reg_mcr) { return reg_mcr.__reg; @@ -171,6 +169,15 @@ static const struct xe_mmio_range xelpg_dss_steering_table[] = { {}, }; +static const struct xe_mmio_range xe3p_xpc_xecore_steering_table[] = { + { 0x008140, 0x00817F }, /* SLICE, XeCore, SLICE */ + { 0x009480, 0x00955F }, /* SLICE, XeCore */ + { 0x00D800, 0x00D87F }, /* SLICE */ + { 0x00DC00, 0x00E9FF }, /* SLICE, rsvd, XeCore, rsvd, XeCore, rsvd, XeCore */ + { 0x013000, 0x0135FF }, /* XeCore, SLICE */ + {}, +}; + static const struct xe_mmio_range xelpmp_oaddrm_steering_table[] = { { 0x393200, 0x39323F }, { 0x393400, 0x3934FF }, @@ -238,21 +245,60 @@ static const struct xe_mmio_range xe2lpm_instance0_steering_table[] = { }; static const struct xe_mmio_range xe3lpm_instance0_steering_table[] = { - { 0x384000, 0x3847DF }, /* GAM, rsvd, GAM */ + { 0x384000, 0x3841FF }, /* GAM */ + { 0x384400, 0x3847DF }, /* GAM */ { 0x384900, 0x384AFF }, /* GAM */ { 0x389560, 0x3895FF }, /* MEDIAINF */ { 0x38B600, 0x38B8FF }, /* L3BANK */ { 0x38C800, 0x38D07F }, /* GAM, MEDIAINF */ - { 0x38D0D0, 0x38F0FF }, /* MEDIAINF, GAM */ + { 0x38D0D0, 0x38F0FF }, /* MEDIAINF, rsvd, GAM */ { 0x393C00, 0x393C7F }, /* MEDIAINF */ {}, }; +/* + * Different "GAM" ranges have different rules; GAMWKRS, STLB, and GAMREQSTRM + * range subtypes need to be steered to (1,0), while all other GAM subtypes + * are steered to (0,0) and are included in the "INSTANCE0" table farther + * down. + */ +static const struct xe_mmio_range xe3p_xpc_gam_grp1_steering_table[] = { + { 0x004000, 0x004AFF }, /* GAMREQSTRM, rsvd, STLB, GAMWKRS, GAMREQSTRM */ + { 0x00F100, 0x00FFFF }, /* GAMWKRS */ + {}, +}; + +static const struct xe_mmio_range xe3p_xpc_node_steering_table[] = { + { 0x00B000, 0x00B0FF }, + { 0x00D880, 0x00D8FF }, + {}, +}; + +static const struct xe_mmio_range xe3p_xpc_instance0_steering_table[] = { + { 0x00B500, 0x00B6FF }, /* PSMI */ + { 0x00C800, 0x00CFFF }, /* GAMCTRL */ + { 0x00F000, 0x00F0FF }, /* GAMCTRL */ + {}, +}; + static void init_steering_l3bank(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); struct xe_mmio *mmio = >->mmio; - if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { + if (GRAPHICS_VER(xe) >= 35) { + unsigned int first_bank = xe_l3_bank_mask_ffs(gt->fuse_topo.l3_bank_mask); + const int banks_per_node = 4; + unsigned int node = first_bank / banks_per_node; + + /* L3BANK ranges place node in grpID, bank in instanceid */ + gt->steering[L3BANK].group_target = node; + gt->steering[L3BANK].instance_target = first_bank % banks_per_node; + + /* NODE ranges split the node across grpid and instanceid */ + gt->steering[NODE].group_target = node >> 1; + gt->steering[NODE].instance_target = node & 1; + } else if (GRAPHICS_VERx100(xe) >= 1270) { u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK, xe_mmio_read32(mmio, MIRROR_FUSE3)); u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK, @@ -265,7 +311,7 @@ static void init_steering_l3bank(struct xe_gt *gt) gt->steering[L3BANK].group_target = __ffs(mslice_mask); gt->steering[L3BANK].instance_target = bank_mask & BIT(0) ? 0 : 2; - } else if (gt_to_xe(gt)->info.platform == XE_DG2) { + } else if (xe->info.platform == XE_DG2) { u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK, xe_mmio_read32(mmio, MIRROR_FUSE3)); u32 bank = __ffs(mslice_mask) * 8; @@ -364,7 +410,7 @@ fallback: * @group: pointer to storage for steering group ID * @instance: pointer to storage for steering instance ID */ -void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance) +void xe_gt_mcr_get_dss_steering(const struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance) { xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS); @@ -420,10 +466,10 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt) gt->steering[SQIDI_PSMI].instance_target = select & 0x1; } -static void init_steering_inst0(struct xe_gt *gt) +static void init_steering_gam1(struct xe_gt *gt) { - gt->steering[INSTANCE0].group_target = 0; /* unused */ - gt->steering[INSTANCE0].instance_target = 0; /* unused */ + gt->steering[GAM1].group_target = 1; + gt->steering[GAM1].instance_target = 0; } static const struct { @@ -431,12 +477,14 @@ static const struct { void (*init)(struct xe_gt *gt); } xe_steering_types[] = { [L3BANK] = { "L3BANK", init_steering_l3bank }, + [NODE] = { "NODE", NULL }, /* initialized by l3bank init */ [MSLICE] = { "MSLICE", init_steering_mslice }, [LNCF] = { "LNCF", NULL }, /* initialized by mslice init */ - [DSS] = { "DSS", init_steering_dss }, + [DSS] = { "DSS / XeCore", init_steering_dss }, [OADDRM] = { "OADDRM / GPMXMT", init_steering_oaddrm }, [SQIDI_PSMI] = { "SQIDI_PSMI", init_steering_sqidi_psmi }, - [INSTANCE0] = { "INSTANCE 0", init_steering_inst0 }, + [GAM1] = { "GAMWKRS / STLB / GAMREQSTRM", init_steering_gam1 }, + [INSTANCE0] = { "INSTANCE 0", NULL }, [IMPLICIT_STEERING] = { "IMPLICIT", NULL }, }; @@ -446,25 +494,17 @@ static const struct { * * Perform early software only initialization of the MCR lock to allow * the synchronization on accessing the STEER_SEMAPHORE register and - * use the xe_gt_mcr_multicast_write() function. + * use the xe_gt_mcr_multicast_write() function, plus the minimum + * safe MCR registers required for VRAM/CCS probing. */ void xe_gt_mcr_init_early(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); + BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES); BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES); spin_lock_init(>->mcr_lock); -} - -/** - * xe_gt_mcr_init - Normal initialization of the MCR support - * @gt: GT structure - * - * Perform normal initialization of the MCR for all usages. - */ -void xe_gt_mcr_init(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); if (IS_SRIOV_VF(xe)) return; @@ -482,7 +522,19 @@ void xe_gt_mcr_init(struct xe_gt *gt) gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table; } } else { - if (GRAPHICS_VER(xe) >= 20) { + if (GRAPHICS_VERx100(xe) == 3511) { + /* + * TODO: there are some ranges in bspec with missing + * termination: [0x00B000, 0x00B0FF] and + * [0x00D880, 0x00D8FF] (NODE); [0x00B100, 0x00B3FF] + * (L3BANK). Update them here once bspec is updated. + */ + gt->steering[DSS].ranges = xe3p_xpc_xecore_steering_table; + gt->steering[GAM1].ranges = xe3p_xpc_gam_grp1_steering_table; + gt->steering[INSTANCE0].ranges = xe3p_xpc_instance0_steering_table; + gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table; + gt->steering[NODE].ranges = xe3p_xpc_node_steering_table; + } else if (GRAPHICS_VER(xe) >= 20) { gt->steering[DSS].ranges = xe2lpg_dss_steering_table; gt->steering[SQIDI_PSMI].ranges = xe2lpg_sqidi_psmi_steering_table; gt->steering[INSTANCE0].ranges = xe2lpg_instance0_steering_table; @@ -505,10 +557,27 @@ void xe_gt_mcr_init(struct xe_gt *gt) } } + /* Mark instance 0 as initialized, we need this early for VRAM and CCS probe. */ + gt->steering[INSTANCE0].initialized = true; +} + +/** + * xe_gt_mcr_init - Normal initialization of the MCR support + * @gt: GT structure + * + * Perform normal initialization of the MCR for all usages. + */ +void xe_gt_mcr_init(struct xe_gt *gt) +{ + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + /* Select non-terminated steering target for each type */ - for (int i = 0; i < NUM_STEERING_TYPES; i++) + for (int i = 0; i < NUM_STEERING_TYPES; i++) { + gt->steering[i].initialized = true; if (gt->steering[i].ranges && xe_steering_types[i].init) xe_steering_types[i].init(gt); + } } /** @@ -530,7 +599,7 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt) u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) | REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2); - xe_mmio_write32(>->mmio, MCFG_MCR_SELECTOR, steer_val); + xe_mmio_write32(>->mmio, STEER_SEMAPHORE, steer_val); xe_mmio_write32(>->mmio, SF_MCR_SELECTOR, steer_val); /* * For GAM registers, all reads should be directed to instance 1 @@ -570,6 +639,10 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) { if (xe_mmio_in_range(>->mmio, >->steering[type].ranges[i], reg)) { + drm_WARN(>_to_xe(gt)->drm, !gt->steering[type].initialized, + "Uninitialized usage of MCR register %s/%#x\n", + xe_steering_types[type].name, reg.addr); + *group = gt->steering[type].group_target; *instance = gt->steering[type].instance_target; return true; diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h index bc06520befab..283a1c9770e2 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.h +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -31,7 +31,8 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, u8 *group, u8 *instance); void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p); -void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance); +void xe_gt_mcr_get_dss_steering(const struct xe_gt *gt, + unsigned int dss, u16 *group, u16 *instance); u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance); /* diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c deleted file mode 100644 index 10622ca471a2..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ /dev/null @@ -1,713 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include "xe_gt_pagefault.h" - -#include <linux/bitfield.h> -#include <linux/circ_buf.h> - -#include <drm/drm_exec.h> -#include <drm/drm_managed.h> - -#include "abi/guc_actions_abi.h" -#include "xe_bo.h" -#include "xe_gt.h" -#include "xe_gt_stats.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_guc.h" -#include "xe_guc_ct.h" -#include "xe_migrate.h" -#include "xe_svm.h" -#include "xe_trace_bo.h" -#include "xe_vm.h" - -struct pagefault { - u64 page_addr; - u32 asid; - u16 pdata; - u8 vfid; - u8 access_type; - u8 fault_type; - u8 fault_level; - u8 engine_class; - u8 engine_instance; - u8 fault_unsuccessful; - bool trva_fault; -}; - -enum access_type { - ACCESS_TYPE_READ = 0, - ACCESS_TYPE_WRITE = 1, - ACCESS_TYPE_ATOMIC = 2, - ACCESS_TYPE_RESERVED = 3, -}; - -enum fault_type { - NOT_PRESENT = 0, - WRITE_ACCESS_VIOLATION = 1, - ATOMIC_ACCESS_VIOLATION = 2, -}; - -struct acc { - u64 va_range_base; - u32 asid; - u32 sub_granularity; - u8 granularity; - u8 vfid; - u8 access_type; - u8 engine_class; - u8 engine_instance; -}; - -static bool access_is_atomic(enum access_type access_type) -{ - return access_type == ACCESS_TYPE_ATOMIC; -} - -static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) -{ - return BIT(tile->id) & vma->tile_present && - !(BIT(tile->id) & vma->tile_invalidated); -} - -static bool vma_matches(struct xe_vma *vma, u64 page_addr) -{ - if (page_addr > xe_vma_end(vma) - 1 || - page_addr + SZ_4K - 1 < xe_vma_start(vma)) - return false; - - return true; -} - -static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr) -{ - struct xe_vma *vma = NULL; - - if (vm->usm.last_fault_vma) { /* Fast lookup */ - if (vma_matches(vm->usm.last_fault_vma, page_addr)) - vma = vm->usm.last_fault_vma; - } - if (!vma) - vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); - - return vma; -} - -static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, - bool atomic, unsigned int id) -{ - struct xe_bo *bo = xe_vma_bo(vma); - struct xe_vm *vm = xe_vma_vm(vma); - int err; - - err = xe_vm_lock_vma(exec, vma); - if (err) - return err; - - if (atomic && IS_DGFX(vm->xe)) { - if (xe_vma_is_userptr(vma)) { - err = -EACCES; - return err; - } - - /* Migrate to VRAM, move should invalidate the VMA first */ - err = xe_bo_migrate(bo, XE_PL_VRAM0 + id); - if (err) - return err; - } else if (bo) { - /* Create backing store if needed */ - err = xe_bo_validate(bo, vm, true); - if (err) - return err; - } - - return 0; -} - -static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, - bool atomic) -{ - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_tile *tile = gt_to_tile(gt); - struct drm_exec exec; - struct dma_fence *fence; - ktime_t end = 0; - int err; - - lockdep_assert_held_write(&vm->lock); - - xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1); - xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, xe_vma_size(vma) / 1024); - - trace_xe_vma_pagefault(vma); - - /* Check if VMA is valid */ - if (vma_is_valid(tile, vma) && !atomic) - return 0; - -retry_userptr: - if (xe_vma_is_userptr(vma) && - xe_vma_userptr_check_repin(to_userptr_vma(vma))) { - struct xe_userptr_vma *uvma = to_userptr_vma(vma); - - err = xe_vma_userptr_pin_pages(uvma); - if (err) - return err; - } - - /* Lock VM and BOs dma-resv */ - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - err = xe_pf_begin(&exec, vma, atomic, tile->id); - drm_exec_retry_on_contention(&exec); - if (xe_vm_validate_should_retry(&exec, err, &end)) - err = -EAGAIN; - if (err) - goto unlock_dma_resv; - - /* Bind VMA only to the GT that has faulted */ - trace_xe_vma_pf_bind(vma); - fence = xe_vma_rebind(vm, vma, BIT(tile->id)); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - if (xe_vm_validate_should_retry(&exec, err, &end)) - err = -EAGAIN; - goto unlock_dma_resv; - } - } - - dma_fence_wait(fence, false); - dma_fence_put(fence); - vma->tile_invalidated &= ~BIT(tile->id); - -unlock_dma_resv: - drm_exec_fini(&exec); - if (err == -EAGAIN) - goto retry_userptr; - - return err; -} - -static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid) -{ - struct xe_vm *vm; - - down_read(&xe->usm.lock); - vm = xa_load(&xe->usm.asid_to_vm, asid); - if (vm && xe_vm_in_fault_mode(vm)) - xe_vm_get(vm); - else - vm = ERR_PTR(-EINVAL); - up_read(&xe->usm.lock); - - return vm; -} - -static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) -{ - struct xe_device *xe = gt_to_xe(gt); - struct xe_vm *vm; - struct xe_vma *vma = NULL; - int err; - bool atomic; - - /* SW isn't expected to handle TRTT faults */ - if (pf->trva_fault) - return -EFAULT; - - vm = asid_to_vm(xe, pf->asid); - if (IS_ERR(vm)) - return PTR_ERR(vm); - - /* - * TODO: Change to read lock? Using write lock for simplicity. - */ - down_write(&vm->lock); - - if (xe_vm_is_closed(vm)) { - err = -ENOENT; - goto unlock_vm; - } - - vma = lookup_vma(vm, pf->page_addr); - if (!vma) { - err = -EINVAL; - goto unlock_vm; - } - - atomic = access_is_atomic(pf->access_type); - - if (xe_vma_is_cpu_addr_mirror(vma)) - err = xe_svm_handle_pagefault(vm, vma, gt, - pf->page_addr, atomic); - else - err = handle_vma_pagefault(gt, vma, atomic); - -unlock_vm: - if (!err) - vm->usm.last_fault_vma = vma; - up_write(&vm->lock); - xe_vm_put(vm); - - return err; -} - -static int send_pagefault_reply(struct xe_guc *guc, - struct xe_guc_pagefault_reply *reply) -{ - u32 action[] = { - XE_GUC_ACTION_PAGE_FAULT_RES_DESC, - reply->dw0, - reply->dw1, - }; - - return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); -} - -static void print_pagefault(struct xe_device *xe, struct pagefault *pf) -{ - drm_dbg(&xe->drm, "\n\tASID: %d\n" - "\tVFID: %d\n" - "\tPDATA: 0x%04x\n" - "\tFaulted Address: 0x%08x%08x\n" - "\tFaultType: %d\n" - "\tAccessType: %d\n" - "\tFaultLevel: %d\n" - "\tEngineClass: %d %s\n" - "\tEngineInstance: %d\n", - pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), - lower_32_bits(pf->page_addr), - pf->fault_type, pf->access_type, pf->fault_level, - pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class), - pf->engine_instance); -} - -#define PF_MSG_LEN_DW 4 - -static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) -{ - const struct xe_guc_pagefault_desc *desc; - bool ret = false; - - spin_lock_irq(&pf_queue->lock); - if (pf_queue->tail != pf_queue->head) { - desc = (const struct xe_guc_pagefault_desc *) - (pf_queue->data + pf_queue->tail); - - pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0); - pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0); - pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0); - pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0); - pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) << - PFD_PDATA_HI_SHIFT; - pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0); - pf->asid = FIELD_GET(PFD_ASID, desc->dw1); - pf->vfid = FIELD_GET(PFD_VFID, desc->dw2); - pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2); - pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2); - pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) << - PFD_VIRTUAL_ADDR_HI_SHIFT; - pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) << - PFD_VIRTUAL_ADDR_LO_SHIFT; - - pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) % - pf_queue->num_dw; - ret = true; - } - spin_unlock_irq(&pf_queue->lock); - - return ret; -} - -static bool pf_queue_full(struct pf_queue *pf_queue) -{ - lockdep_assert_held(&pf_queue->lock); - - return CIRC_SPACE(pf_queue->head, pf_queue->tail, - pf_queue->num_dw) <= - PF_MSG_LEN_DW; -} - -int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = gt_to_xe(gt); - struct pf_queue *pf_queue; - unsigned long flags; - u32 asid; - bool full; - - if (unlikely(len != PF_MSG_LEN_DW)) - return -EPROTO; - - asid = FIELD_GET(PFD_ASID, msg[1]); - pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE); - - /* - * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 - */ - xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW)); - - spin_lock_irqsave(&pf_queue->lock, flags); - full = pf_queue_full(pf_queue); - if (!full) { - memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32)); - pf_queue->head = (pf_queue->head + len) % - pf_queue->num_dw; - queue_work(gt->usm.pf_wq, &pf_queue->worker); - } else { - drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); - } - spin_unlock_irqrestore(&pf_queue->lock, flags); - - return full ? -ENOSPC : 0; -} - -#define USM_QUEUE_MAX_RUNTIME_MS 20 - -static void pf_queue_work_func(struct work_struct *w) -{ - struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker); - struct xe_gt *gt = pf_queue->gt; - struct xe_device *xe = gt_to_xe(gt); - struct xe_guc_pagefault_reply reply = {}; - struct pagefault pf = {}; - unsigned long threshold; - int ret; - - threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); - - while (get_pagefault(pf_queue, &pf)) { - ret = handle_pagefault(gt, &pf); - if (unlikely(ret)) { - print_pagefault(xe, &pf); - pf.fault_unsuccessful = 1; - drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret); - } - - reply.dw0 = FIELD_PREP(PFR_VALID, 1) | - FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) | - FIELD_PREP(PFR_REPLY, PFR_ACCESS) | - FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | - FIELD_PREP(PFR_ASID, pf.asid); - - reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) | - FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) | - FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) | - FIELD_PREP(PFR_PDATA, pf.pdata); - - send_pagefault_reply(>->uc.guc, &reply); - - if (time_after(jiffies, threshold) && - pf_queue->tail != pf_queue->head) { - queue_work(gt->usm.pf_wq, w); - break; - } - } -} - -static void acc_queue_work_func(struct work_struct *w); - -static void pagefault_fini(void *arg) -{ - struct xe_gt *gt = arg; - struct xe_device *xe = gt_to_xe(gt); - - if (!xe->info.has_usm) - return; - - destroy_workqueue(gt->usm.acc_wq); - destroy_workqueue(gt->usm.pf_wq); -} - -static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) -{ - struct xe_device *xe = gt_to_xe(gt); - xe_dss_mask_t all_dss; - int num_dss, num_eus; - - bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, - XE_MAX_DSS_FUSE_BITS); - - num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS); - num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, - XE_MAX_EU_FUSE_BITS) * num_dss; - - /* - * user can issue separate page faults per EU and per CS - * - * XXX: Multiplier required as compute UMD are getting PF queue errors - * without it. Follow on why this multiplier is required. - */ -#define PF_MULTIPLIER 8 - pf_queue->num_dw = - (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; -#undef PF_MULTIPLIER - - pf_queue->gt = gt; - pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw, - sizeof(u32), GFP_KERNEL); - if (!pf_queue->data) - return -ENOMEM; - - spin_lock_init(&pf_queue->lock); - INIT_WORK(&pf_queue->worker, pf_queue_work_func); - - return 0; -} - -int xe_gt_pagefault_init(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); - int i, ret = 0; - - if (!xe->info.has_usm) - return 0; - - for (i = 0; i < NUM_PF_QUEUE; ++i) { - ret = xe_alloc_pf_queue(gt, >->usm.pf_queue[i]); - if (ret) - return ret; - } - for (i = 0; i < NUM_ACC_QUEUE; ++i) { - gt->usm.acc_queue[i].gt = gt; - spin_lock_init(>->usm.acc_queue[i].lock); - INIT_WORK(>->usm.acc_queue[i].worker, acc_queue_work_func); - } - - gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue", - WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE); - if (!gt->usm.pf_wq) - return -ENOMEM; - - gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", - WQ_UNBOUND | WQ_HIGHPRI, - NUM_ACC_QUEUE); - if (!gt->usm.acc_wq) { - destroy_workqueue(gt->usm.pf_wq); - return -ENOMEM; - } - - return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt); -} - -void xe_gt_pagefault_reset(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); - int i; - - if (!xe->info.has_usm) - return; - - for (i = 0; i < NUM_PF_QUEUE; ++i) { - spin_lock_irq(>->usm.pf_queue[i].lock); - gt->usm.pf_queue[i].head = 0; - gt->usm.pf_queue[i].tail = 0; - spin_unlock_irq(>->usm.pf_queue[i].lock); - } - - for (i = 0; i < NUM_ACC_QUEUE; ++i) { - spin_lock(>->usm.acc_queue[i].lock); - gt->usm.acc_queue[i].head = 0; - gt->usm.acc_queue[i].tail = 0; - spin_unlock(>->usm.acc_queue[i].lock); - } -} - -static int granularity_in_byte(int val) -{ - switch (val) { - case 0: - return SZ_128K; - case 1: - return SZ_2M; - case 2: - return SZ_16M; - case 3: - return SZ_64M; - default: - return 0; - } -} - -static int sub_granularity_in_byte(int val) -{ - return (granularity_in_byte(val) / 32); -} - -static void print_acc(struct xe_device *xe, struct acc *acc) -{ - drm_warn(&xe->drm, "Access counter request:\n" - "\tType: %s\n" - "\tASID: %d\n" - "\tVFID: %d\n" - "\tEngine: %d:%d\n" - "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" - "\tSub_Granularity Vector: 0x%08x\n" - "\tVA Range base: 0x%016llx\n", - acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", - acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, - granularity_in_byte(acc->granularity) / SZ_1K, - sub_granularity_in_byte(acc->granularity) / SZ_1K, - acc->sub_granularity, acc->va_range_base); -} - -static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) -{ - u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) * - sub_granularity_in_byte(acc->granularity); - - return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K); -} - -static int handle_acc(struct xe_gt *gt, struct acc *acc) -{ - struct xe_device *xe = gt_to_xe(gt); - struct xe_tile *tile = gt_to_tile(gt); - struct drm_exec exec; - struct xe_vm *vm; - struct xe_vma *vma; - int ret = 0; - - /* We only support ACC_TRIGGER at the moment */ - if (acc->access_type != ACC_TRIGGER) - return -EINVAL; - - vm = asid_to_vm(xe, acc->asid); - if (IS_ERR(vm)) - return PTR_ERR(vm); - - down_read(&vm->lock); - - /* Lookup VMA */ - vma = get_acc_vma(vm, acc); - if (!vma) { - ret = -EINVAL; - goto unlock_vm; - } - - trace_xe_vma_acc(vma); - - /* Userptr or null can't be migrated, nothing to do */ - if (xe_vma_has_no_bo(vma)) - goto unlock_vm; - - /* Lock VM and BOs dma-resv */ - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - ret = xe_pf_begin(&exec, vma, true, tile->id); - drm_exec_retry_on_contention(&exec); - if (ret) - break; - } - - drm_exec_fini(&exec); -unlock_vm: - up_read(&vm->lock); - xe_vm_put(vm); - - return ret; -} - -#define make_u64(hi__, low__) ((u64)(hi__) << 32 | (u64)(low__)) - -#define ACC_MSG_LEN_DW 4 - -static bool get_acc(struct acc_queue *acc_queue, struct acc *acc) -{ - const struct xe_guc_acc_desc *desc; - bool ret = false; - - spin_lock(&acc_queue->lock); - if (acc_queue->tail != acc_queue->head) { - desc = (const struct xe_guc_acc_desc *) - (acc_queue->data + acc_queue->tail); - - acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2); - acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 | - FIELD_GET(ACC_SUBG_LO, desc->dw0); - acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1); - acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1); - acc->asid = FIELD_GET(ACC_ASID, desc->dw1); - acc->vfid = FIELD_GET(ACC_VFID, desc->dw2); - acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0); - acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI, - desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO); - - acc_queue->tail = (acc_queue->tail + ACC_MSG_LEN_DW) % - ACC_QUEUE_NUM_DW; - ret = true; - } - spin_unlock(&acc_queue->lock); - - return ret; -} - -static void acc_queue_work_func(struct work_struct *w) -{ - struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker); - struct xe_gt *gt = acc_queue->gt; - struct xe_device *xe = gt_to_xe(gt); - struct acc acc = {}; - unsigned long threshold; - int ret; - - threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); - - while (get_acc(acc_queue, &acc)) { - ret = handle_acc(gt, &acc); - if (unlikely(ret)) { - print_acc(xe, &acc); - drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret); - } - - if (time_after(jiffies, threshold) && - acc_queue->tail != acc_queue->head) { - queue_work(gt->usm.acc_wq, w); - break; - } - } -} - -static bool acc_queue_full(struct acc_queue *acc_queue) -{ - lockdep_assert_held(&acc_queue->lock); - - return CIRC_SPACE(acc_queue->head, acc_queue->tail, ACC_QUEUE_NUM_DW) <= - ACC_MSG_LEN_DW; -} - -int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct acc_queue *acc_queue; - u32 asid; - bool full; - - /* - * The below logic doesn't work unless ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW == 0 - */ - BUILD_BUG_ON(ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW); - - if (unlikely(len != ACC_MSG_LEN_DW)) - return -EPROTO; - - asid = FIELD_GET(ACC_ASID, msg[1]); - acc_queue = >->usm.acc_queue[asid % NUM_ACC_QUEUE]; - - spin_lock(&acc_queue->lock); - full = acc_queue_full(acc_queue); - if (!full) { - memcpy(acc_queue->data + acc_queue->head, msg, - len * sizeof(u32)); - acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW; - queue_work(gt->usm.acc_wq, &acc_queue->worker); - } else { - drm_warn(>_to_xe(gt)->drm, "ACC Queue full, dropping ACC"); - } - spin_unlock(&acc_queue->lock); - - return full ? -ENOSPC : 0; -} diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.h b/drivers/gpu/drm/xe/xe_gt_pagefault.h deleted file mode 100644 index 839c065a5e4c..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2022 Intel Corporation - */ - -#ifndef _XE_GT_PAGEFAULT_H_ -#define _XE_GT_PAGEFAULT_H_ - -#include <linux/types.h> - -struct xe_gt; -struct xe_guc; - -int xe_gt_pagefault_init(struct xe_gt *gt); -void xe_gt_pagefault_reset(struct xe_gt *gt); -int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len); -int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len); - -#endif /* _XE_GT_PAGEFAULT_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h index 11da0228cea7..1313d32862db 100644 --- a/drivers/gpu/drm/xe/xe_gt_printk.h +++ b/drivers/gpu/drm/xe/xe_gt_printk.h @@ -6,18 +6,22 @@ #ifndef _XE_GT_PRINTK_H_ #define _XE_GT_PRINTK_H_ -#include <drm/drm_print.h> - #include "xe_gt_types.h" +#include "xe_tile_printk.h" + +#define __XE_GT_PRINTK_FMT(_gt, _fmt, _args...) "GT%u: " _fmt, (_gt)->info.id, ##_args #define xe_gt_printk(_gt, _level, _fmt, ...) \ - drm_##_level(>_to_xe(_gt)->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + xe_tile_printk((_gt)->tile, _level, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__)) + +#define xe_gt_err(_gt, _fmt, ...) \ + xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__) #define xe_gt_err_once(_gt, _fmt, ...) \ xe_gt_printk((_gt), err_once, _fmt, ##__VA_ARGS__) -#define xe_gt_err(_gt, _fmt, ...) \ - xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__) +#define xe_gt_err_ratelimited(_gt, _fmt, ...) \ + xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__) #define xe_gt_warn(_gt, _fmt, ...) \ xe_gt_printk((_gt), warn, _fmt, ##__VA_ARGS__) @@ -31,20 +35,20 @@ #define xe_gt_dbg(_gt, _fmt, ...) \ xe_gt_printk((_gt), dbg, _fmt, ##__VA_ARGS__) -#define xe_gt_err_ratelimited(_gt, _fmt, ...) \ - xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__) +#define xe_gt_WARN_type(_gt, _type, _condition, _fmt, ...) \ + xe_tile_WARN##_type((_gt)->tile, _condition, _fmt, ## __VA_ARGS__) #define xe_gt_WARN(_gt, _condition, _fmt, ...) \ - drm_WARN(>_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + xe_gt_WARN_type((_gt),, _condition, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__)) #define xe_gt_WARN_ONCE(_gt, _condition, _fmt, ...) \ - drm_WARN_ONCE(>_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + xe_gt_WARN_type((_gt), _ONCE, _condition, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__)) #define xe_gt_WARN_ON(_gt, _condition) \ - xe_gt_WARN((_gt), _condition, "%s(%s)", "gt_WARN_ON", __stringify(_condition)) + xe_gt_WARN((_gt), _condition, "%s(%s)", "WARN_ON", __stringify(_condition)) #define xe_gt_WARN_ON_ONCE(_gt, _condition) \ - xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "gt_WARN_ON_ONCE", __stringify(_condition)) + xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition)) static inline void __xe_gt_printfn_err(struct drm_printer *p, struct va_format *vaf) { @@ -67,12 +71,12 @@ static inline void __xe_gt_printfn_dbg(struct drm_printer *p, struct va_format * /* * The original xe_gt_dbg() callsite annotations are useless here, - * redirect to the tweaked drm_dbg_printer() instead. + * redirect to the tweaked xe_tile_dbg_printer() instead. */ - dbg = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, NULL); + dbg = xe_tile_dbg_printer((gt)->tile); dbg.origin = p->origin; - drm_printf(&dbg, "GT%u: %pV", gt->info.id, vaf); + drm_printf(&dbg, __XE_GT_PRINTK_FMT(gt, "%pV", vaf)); } /** diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index c08efca6420e..0714c758b9c1 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -16,6 +16,7 @@ #include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_service.h" #include "xe_gt_sriov_printk.h" +#include "xe_guc_submit.h" #include "xe_mmio.h" #include "xe_pm.h" @@ -47,9 +48,21 @@ static int pf_alloc_metadata(struct xe_gt *gt) static void pf_init_workers(struct xe_gt *gt) { + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); INIT_WORK(>->sriov.pf.workers.restart, pf_worker_restart_func); } +static void pf_fini_workers(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + if (disable_work_sync(>->sriov.pf.workers.restart)) { + xe_gt_sriov_dbg_verbose(gt, "pending restart disabled!\n"); + /* release an rpm reference taken on the worker's behalf */ + xe_pm_runtime_put(gt_to_xe(gt)); + } +} + /** * xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on PF. * @gt: the &xe_gt to initialize @@ -79,6 +92,21 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) return 0; } +static void pf_fini_action(void *arg) +{ + struct xe_gt *gt = arg; + + pf_fini_workers(gt); +} + +static int pf_init_late(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + return devm_add_action_or_reset(xe->drm.dev, pf_fini_action, gt); +} + /** * xe_gt_sriov_pf_init - Prepare SR-IOV PF data structures on PF. * @gt: the &xe_gt to initialize @@ -95,7 +123,15 @@ int xe_gt_sriov_pf_init(struct xe_gt *gt) if (err) return err; - return xe_gt_sriov_pf_migration_init(gt); + err = xe_gt_sriov_pf_migration_init(gt); + if (err) + return err; + + err = pf_init_late(gt); + if (err) + return err; + + return 0; } static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe) @@ -122,39 +158,19 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) xe_gt_sriov_pf_service_update(gt); } -static u32 pf_get_vf_regs_stride(struct xe_device *xe) -{ - return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000; -} - -static struct xe_reg xe_reg_vf_to_pf(struct xe_reg vf_reg, unsigned int vfid, u32 stride) -{ - struct xe_reg pf_reg = vf_reg; - - pf_reg.vf = 0; - pf_reg.addr += stride * vfid; - - return pf_reg; -} - static void pf_clear_vf_scratch_regs(struct xe_gt *gt, unsigned int vfid) { - u32 stride = pf_get_vf_regs_stride(gt_to_xe(gt)); - struct xe_reg scratch; - int n, count; + struct xe_mmio mmio; + int n; + + xe_mmio_init_vf_view(&mmio, >->mmio, vfid); if (xe_gt_is_media_type(gt)) { - count = MED_VF_SW_FLAG_COUNT; - for (n = 0; n < count; n++) { - scratch = xe_reg_vf_to_pf(MED_VF_SW_FLAG(n), vfid, stride); - xe_mmio_write32(>->mmio, scratch, 0); - } + for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) + xe_mmio_write32(&mmio, MED_VF_SW_FLAG(n), 0); } else { - count = VF_SW_FLAG_COUNT; - for (n = 0; n < count; n++) { - scratch = xe_reg_vf_to_pf(VF_SW_FLAG(n), vfid, stride); - xe_mmio_write32(>->mmio, scratch, 0); - } + for (n = 0; n < VF_SW_FLAG_COUNT; n++) + xe_mmio_write32(&mmio, VF_SW_FLAG(n), 0); } } @@ -172,13 +188,38 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) pf_clear_vf_scratch_regs(gt, vfid); } +static void pf_cancel_restart(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + if (cancel_work_sync(>->sriov.pf.workers.restart)) { + xe_gt_sriov_dbg_verbose(gt, "pending restart canceled!\n"); + /* release an rpm reference taken on the worker's behalf */ + xe_pm_runtime_put(gt_to_xe(gt)); + } +} + +/** + * xe_gt_sriov_pf_stop_prepare() - Prepare to stop SR-IOV support. + * @gt: the &xe_gt + * + * This function can only be called on the PF. + */ +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ + pf_cancel_restart(gt); +} + static void pf_restart(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - xe_pm_runtime_get(xe); + xe_gt_assert(gt, !xe_pm_runtime_suspended(xe)); + xe_gt_sriov_pf_config_restart(gt); xe_gt_sriov_pf_control_restart(gt); + + /* release an rpm reference taken on our behalf */ xe_pm_runtime_put(xe); xe_gt_sriov_dbg(gt, "restart completed\n"); @@ -197,8 +238,13 @@ static void pf_queue_restart(struct xe_gt *gt) xe_gt_assert(gt, IS_SRIOV_PF(xe)); - if (!queue_work(xe->sriov.wq, >->sriov.pf.workers.restart)) + /* take an rpm reference on behalf of the worker */ + xe_pm_runtime_get_noresume(xe); + + if (!queue_work(xe->sriov.wq, >->sriov.pf.workers.restart)) { xe_gt_sriov_dbg(gt, "restart already in queue!\n"); + xe_pm_runtime_put(xe); + } } /** @@ -211,3 +257,27 @@ void xe_gt_sriov_pf_restart(struct xe_gt *gt) { pf_queue_restart(gt); } + +static void pf_flush_restart(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + flush_work(>->sriov.pf.workers.restart); +} + +/** + * xe_gt_sriov_pf_wait_ready() - Wait until per-GT PF SR-IOV support is ready. + * @gt: the &xe_gt + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_wait_ready(struct xe_gt *gt) +{ + /* don't wait if there is another ongoing reset */ + if (xe_guc_read_stopped(>->uc.guc)) + return -EBUSY; + + pf_flush_restart(gt); + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index f474509411c0..e7fde3f9937a 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -11,8 +11,10 @@ struct xe_gt; #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_init_early(struct xe_gt *gt); int xe_gt_sriov_pf_init(struct xe_gt *gt); +int xe_gt_sriov_pf_wait_ready(struct xe_gt *gt); void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt); void xe_gt_sriov_pf_restart(struct xe_gt *gt); #else static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) @@ -29,6 +31,10 @@ static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) { } +static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ +} + static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt) { } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 2420a548cacc..59c5c6b4d994 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -9,6 +9,7 @@ #include "abi/guc_actions_sriov_abi.h" #include "abi/guc_klvs_abi.h" +#include "regs/xe_gtt_defs.h" #include "regs/xe_guc_regs.h" #include "xe_bo.h" @@ -33,6 +34,7 @@ #include "xe_migrate.h" #include "xe_sriov.h" #include "xe_ttm_vram_mgr.h" +#include "xe_vram_types.h" #include "xe_wopcm.h" #define make_u64_from_u32(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo))) @@ -104,13 +106,13 @@ static int pf_push_vf_buf_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs } if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); void *klvs = xe_guc_buf_cpu_ptr(buf); char name[8]; - xe_gt_sriov_info(gt, "pushed %s config with %u KLV%s:\n", - xe_sriov_function_name(vfid, name, sizeof(name)), - num_klvs, str_plural(num_klvs)); + xe_gt_sriov_dbg(gt, "pushed %s config with %u KLV%s:\n", + xe_sriov_function_name(vfid, name, sizeof(name)), + num_klvs, str_plural(num_klvs)); xe_guc_klv_print(klvs, num_dwords, &p); } @@ -238,26 +240,35 @@ static struct xe_gt_sriov_config *pf_pick_vf_config(struct xe_gt *gt, unsigned i } /* Return: number of configuration dwords written */ -static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) +static u32 encode_ggtt(u32 *cfg, u64 start, u64 size, bool details) { u32 n = 0; - if (xe_ggtt_node_allocated(config->ggtt_region)) { - if (details) { - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); - cfg[n++] = lower_32_bits(config->ggtt_region->base.start); - cfg[n++] = upper_32_bits(config->ggtt_region->base.start); - } - - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); - cfg[n++] = lower_32_bits(config->ggtt_region->base.size); - cfg[n++] = upper_32_bits(config->ggtt_region->base.size); + if (details) { + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); + cfg[n++] = lower_32_bits(start); + cfg[n++] = upper_32_bits(start); } + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); + cfg[n++] = lower_32_bits(size); + cfg[n++] = upper_32_bits(size); + return n; } /* Return: number of configuration dwords written */ +static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) +{ + struct xe_ggtt_node *node = config->ggtt_region; + + if (!xe_ggtt_node_allocated(node)) + return 0; + + return encode_ggtt(cfg, node->base.start, node->base.size, details); +} + +/* Return: number of configuration dwords written */ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) { u32 n = 0; @@ -282,8 +293,8 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool if (config->lmem_obj) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_LMEM_SIZE); - cfg[n++] = lower_32_bits(config->lmem_obj->size); - cfg[n++] = upper_32_bits(config->lmem_obj->size); + cfg[n++] = lower_32_bits(xe_bo_size(config->lmem_obj)); + cfg[n++] = upper_32_bits(xe_bo_size(config->lmem_obj)); } cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_EXEC_QUANTUM); @@ -332,6 +343,17 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) } xe_gt_assert(gt, num_dwords <= max_cfg_dwords); + if (vfid == PFID) { + u64 ggtt_start = xe_wopcm_size(gt_to_xe(gt)); + u64 ggtt_size = gt_to_tile(gt)->mem.ggtt->size - ggtt_start; + + /* plain PF config data will never include a real GGTT region */ + xe_gt_assert(gt, !encode_config_ggtt(cfg + num_dwords, config, true)); + + /* fake PF GGTT config covers full GGTT range except reserved WOPCM */ + num_dwords += encode_ggtt(cfg + num_dwords, ggtt_start, ggtt_size, true); + } + num_klvs = xe_guc_klv_count(cfg, num_dwords); err = pf_push_vf_buf_klvs(gt, vfid, num_klvs, buf, num_dwords); @@ -376,7 +398,7 @@ static u64 pf_get_spare_ggtt(struct xe_gt *gt) { u64 spare; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -388,7 +410,7 @@ static u64 pf_get_spare_ggtt(struct xe_gt *gt) static int pf_set_spare_ggtt(struct xe_gt *gt, u64 size) { - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -443,7 +465,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) int err; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); size = round_up(size, alignment); @@ -492,7 +514,7 @@ static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); struct xe_ggtt_node *node = config->ggtt_region; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); return xe_ggtt_node_allocated(node) ? node->base.size : 0; } @@ -560,7 +582,7 @@ int xe_gt_sriov_pf_config_set_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size { int err; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); if (vfid) @@ -622,7 +644,7 @@ int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid, int err = 0; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!num_vfs) return 0; @@ -676,6 +698,22 @@ static u64 pf_estimate_fair_ggtt(struct xe_gt *gt, unsigned int num_vfs) return fair; } +static u64 pf_profile_fair_ggtt(struct xe_gt *gt, unsigned int num_vfs) +{ + bool admin_only_pf = xe_sriov_pf_admin_only(gt_to_xe(gt)); + u64 shareable = ALIGN_DOWN(GUC_GGTT_TOP, SZ_512M); + u64 alignment = pf_get_ggtt_alignment(gt); + + if (admin_only_pf && num_vfs == 1) + return ALIGN_DOWN(shareable, alignment); + + /* need to hardcode due to ~512M of GGTT being reserved */ + if (num_vfs > 56) + return SZ_64M - SZ_8M; + + return rounddown_pow_of_two(div_u64(shareable, num_vfs)); +} + /** * xe_gt_sriov_pf_config_set_fair_ggtt - Provision many VFs with fair GGTT. * @gt: the &xe_gt (can't be media) @@ -689,11 +727,12 @@ static u64 pf_estimate_fair_ggtt(struct xe_gt *gt, unsigned int num_vfs) int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs) { + u64 profile = pf_profile_fair_ggtt(gt, num_vfs); u64 fair; xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); fair = pf_estimate_fair_ggtt(gt, num_vfs); @@ -702,9 +741,71 @@ int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid, if (!fair) return -ENOSPC; + fair = min(fair, profile); + if (fair < profile) + xe_gt_sriov_info(gt, "Using non-profile provisioning (%s %llu vs %llu)\n", + "GGTT", fair, profile); + return xe_gt_sriov_pf_config_bulk_set_ggtt(gt, vfid, num_vfs, fair); } +/** + * xe_gt_sriov_pf_config_ggtt_save() - Save a VF provisioned GGTT data into a buffer. + * @gt: the &xe_gt + * @vfid: VF identifier (can't be 0) + * @buf: the GGTT data destination buffer (or NULL to query the buf size) + * @size: the size of the buffer (or 0 to query the buf size) + * + * This function can only be called on PF. + * + * Return: size of the buffer needed to save GGTT data if querying, + * 0 on successful save or a negative error code on failure. + */ +ssize_t xe_gt_sriov_pf_config_ggtt_save(struct xe_gt *gt, unsigned int vfid, + void *buf, size_t size) +{ + struct xe_ggtt_node *node; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, !(!buf ^ !size)); + + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + + node = pf_pick_vf_config(gt, vfid)->ggtt_region; + + if (!buf) + return xe_ggtt_node_pt_size(node); + + return xe_ggtt_node_save(node, buf, size, vfid); +} + +/** + * xe_gt_sriov_pf_config_ggtt_restore() - Restore a VF provisioned GGTT data from a buffer. + * @gt: the &xe_gt + * @vfid: VF identifier (can't be 0) + * @buf: the GGTT data source buffer + * @size: the size of the buffer + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_ggtt_restore(struct xe_gt *gt, unsigned int vfid, + const void *buf, size_t size) +{ + struct xe_ggtt_node *node; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid); + + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + + node = pf_pick_vf_config(gt, vfid)->ggtt_region; + + return xe_ggtt_node_load(node, buf, size, vfid); +} + static u32 pf_get_min_spare_ctxs(struct xe_gt *gt) { /* XXX: preliminary */ @@ -903,7 +1004,8 @@ static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, uns const char *what, const char *(*unit)(u32), unsigned int last, int err) { - xe_gt_assert(gt, first); + char name[8]; + xe_gt_assert(gt, num_vfs); xe_gt_assert(gt, first <= last); @@ -911,8 +1013,9 @@ static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, uns return pf_config_set_u32_done(gt, first, value, get(gt, first), what, unit, err); if (unlikely(err)) { - xe_gt_sriov_notice(gt, "Failed to bulk provision VF%u..VF%u with %s\n", - first, first + num_vfs - 1, what); + xe_gt_sriov_notice(gt, "Failed to bulk provision %s..VF%u with %s\n", + xe_sriov_function_name(first, name, sizeof(name)), + first + num_vfs - 1, what); if (last > first) pf_config_bulk_set_u32_done(gt, first, last - first, value, get, what, unit, last, 0); @@ -921,8 +1024,9 @@ static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, uns /* pick actual value from first VF - bulk provisioning shall be equal across all VFs */ value = get(gt, first); - xe_gt_sriov_info(gt, "VF%u..VF%u provisioned with %u%s %s\n", - first, first + num_vfs - 1, value, unit(value), what); + xe_gt_sriov_info(gt, "%s..VF%u provisioned with %u%s %s\n", + xe_sriov_function_name(first, name, sizeof(name)), + first + num_vfs - 1, value, unit(value), what); return 0; } @@ -961,6 +1065,16 @@ int xe_gt_sriov_pf_config_bulk_set_ctxs(struct xe_gt *gt, unsigned int vfid, "GuC context IDs", no_unit, n, err); } +static u32 pf_profile_fair_ctxs(struct xe_gt *gt, unsigned int num_vfs) +{ + bool admin_only_pf = xe_sriov_pf_admin_only(gt_to_xe(gt)); + + if (admin_only_pf && num_vfs == 1) + return ALIGN_DOWN(GUC_ID_MAX, SZ_1K); + + return rounddown_pow_of_two(GUC_ID_MAX / num_vfs); +} + static u32 pf_estimate_fair_ctxs(struct xe_gt *gt, unsigned int num_vfs) { struct xe_guc_id_mgr *idm = >->uc.guc.submission_state.idm; @@ -993,6 +1107,7 @@ static u32 pf_estimate_fair_ctxs(struct xe_gt *gt, unsigned int num_vfs) int xe_gt_sriov_pf_config_set_fair_ctxs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs) { + u32 profile = pf_profile_fair_ctxs(gt, num_vfs); u32 fair; xe_gt_assert(gt, vfid); @@ -1005,6 +1120,11 @@ int xe_gt_sriov_pf_config_set_fair_ctxs(struct xe_gt *gt, unsigned int vfid, if (!fair) return -ENOSPC; + fair = min(fair, profile); + if (fair < profile) + xe_gt_sriov_info(gt, "Using non-profile provisioning (%s %u vs %u)\n", + "GuC context IDs", fair, profile); + return xe_gt_sriov_pf_config_bulk_set_ctxs(gt, vfid, num_vfs, fair); } @@ -1209,6 +1329,17 @@ int xe_gt_sriov_pf_config_bulk_set_dbs(struct xe_gt *gt, unsigned int vfid, "GuC doorbell IDs", no_unit, n, err); } +static u32 pf_profile_fair_dbs(struct xe_gt *gt, unsigned int num_vfs) +{ + bool admin_only_pf = xe_sriov_pf_admin_only(gt_to_xe(gt)); + + /* XXX: preliminary */ + if (admin_only_pf && num_vfs == 1) + return GUC_NUM_DOORBELLS - SZ_16; + + return rounddown_pow_of_two(GUC_NUM_DOORBELLS / (num_vfs + 1)); +} + static u32 pf_estimate_fair_dbs(struct xe_gt *gt, unsigned int num_vfs) { struct xe_guc_db_mgr *dbm = >->uc.guc.dbm; @@ -1241,6 +1372,7 @@ static u32 pf_estimate_fair_dbs(struct xe_gt *gt, unsigned int num_vfs) int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs) { + u32 profile = pf_profile_fair_dbs(gt, num_vfs); u32 fair; xe_gt_assert(gt, vfid); @@ -1253,6 +1385,11 @@ int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid, if (!fair) return -ENOSPC; + fair = min(fair, profile); + if (fair < profile) + xe_gt_sriov_info(gt, "Using non-profile provisioning (%s %u vs %u)\n", + "GuC doorbell IDs", fair, profile); + return xe_gt_sriov_pf_config_bulk_set_dbs(gt, vfid, num_vfs, fair); } @@ -1299,7 +1436,7 @@ static u64 pf_get_vf_config_lmem(struct xe_gt *gt, unsigned int vfid) struct xe_bo *bo; bo = config->lmem_obj; - return bo ? bo->size : 0; + return bo ? xe_bo_size(bo) : 0; } static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) @@ -1327,7 +1464,17 @@ static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 si static void pf_force_lmtt_invalidate(struct xe_device *xe) { - /* TODO */ + struct xe_lmtt *lmtt; + struct xe_tile *tile; + unsigned int tid; + + xe_assert(xe, xe_device_has_lmtt(xe)); + xe_assert(xe, IS_SRIOV_PF(xe)); + + for_each_tile(tile, xe, tid) { + lmtt = &tile->sriov.pf.lmtt; + xe_lmtt_invalidate_hw(lmtt); + } } static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid) @@ -1388,7 +1535,7 @@ static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid) err = xe_lmtt_populate_pages(lmtt, vfid, bo, offset); if (err) goto fail; - offset += bo->size; + offset += xe_bo_size(bo); } } @@ -1403,16 +1550,19 @@ fail: return err; } -static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) +/* Return: %true if there was an LMEM provisioned, %false otherwise */ +static bool pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) { xe_gt_assert(gt, IS_DGFX(gt_to_xe(gt))); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); if (config->lmem_obj) { xe_bo_unpin_map_no_vm(config->lmem_obj); config->lmem_obj = NULL; + return true; } + return false; } static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) @@ -1425,7 +1575,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) xe_gt_assert(gt, vfid); xe_gt_assert(gt, IS_DGFX(xe)); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); size = round_up(size, pf_get_lmem_alignment(gt)); @@ -1444,23 +1594,17 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) return 0; xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M); - bo = xe_bo_create_locked(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_NEEDS_2M | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_PINNED_LATE_RESTORE); + bo = xe_bo_create_pin_range_novm(xe, tile, + ALIGN(size, PAGE_SIZE), 0, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_NEEDS_2M | + XE_BO_FLAG_PINNED | + XE_BO_FLAG_PINNED_LATE_RESTORE | + XE_BO_FLAG_FORCE_USER_VRAM); if (IS_ERR(bo)) return PTR_ERR(bo); - err = xe_bo_pin(bo); - xe_bo_unlock(bo); - if (unlikely(err)) { - xe_bo_put(bo); - return err; - } - config->lmem_obj = bo; if (xe_device_has_lmtt(xe)) { @@ -1469,12 +1613,12 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) goto release; } - err = pf_push_vf_cfg_lmem(gt, vfid, bo->size); + err = pf_push_vf_cfg_lmem(gt, vfid, xe_bo_size(bo)); if (unlikely(err)) goto reset_lmtt; xe_gt_sriov_dbg_verbose(gt, "VF%u LMEM %zu (%zuM)\n", - vfid, bo->size, bo->size / SZ_1M); + vfid, xe_bo_size(bo), xe_bo_size(bo) / SZ_1M); return 0; reset_lmtt: @@ -1520,6 +1664,9 @@ int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size { int err; + if (!xe_device_has_lmtt(gt_to_xe(gt))) + return -EPERM; + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); if (vfid) err = pf_provision_vf_lmem(gt, vfid, size); @@ -1550,7 +1697,7 @@ int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, int err = 0; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!num_vfs) return 0; @@ -1568,11 +1715,37 @@ int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, "LMEM", n, err); } +static struct xe_bo *pf_get_vf_config_lmem_obj(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + + return config->lmem_obj; +} + +/** + * xe_gt_sriov_pf_config_get_lmem_obj() - Take a reference to the struct &xe_bo backing VF LMEM. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + * + * This function can only be called on PF. + * The caller is responsible for calling xe_bo_put() on the returned object. + * + * Return: pointer to struct &xe_bo backing VF LMEM (if any). + */ +struct xe_bo *xe_gt_sriov_pf_config_get_lmem_obj(struct xe_gt *gt, unsigned int vfid) +{ + xe_gt_assert(gt, vfid); + + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + + return xe_bo_get(pf_get_vf_config_lmem_obj(gt, vfid)); +} + static u64 pf_query_free_lmem(struct xe_gt *gt) { struct xe_tile *tile = gt->tile; - return xe_ttm_vram_get_avail(&tile->mem.vram.ttm.manager); + return xe_ttm_vram_get_avail(&tile->mem.vram->ttm.manager); } static u64 pf_query_max_lmem(struct xe_gt *gt) @@ -1600,7 +1773,6 @@ static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) u64 fair; fair = div_u64(available, num_vfs); - fair = rounddown_pow_of_two(fair); /* XXX: ttm_vram_mgr & drm_buddy limitation */ fair = ALIGN_DOWN(fair, alignment); #ifdef MAX_FAIR_LMEM fair = min_t(u64, MAX_FAIR_LMEM, fair); @@ -1627,9 +1799,9 @@ int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); - if (!IS_DGFX(gt_to_xe(gt))) + if (!xe_device_has_lmtt(gt_to_xe(gt))) return 0; mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); @@ -1661,7 +1833,7 @@ int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { err = xe_gt_sriov_pf_config_set_fair_ggtt(gt, vfid, num_vfs); result = result ?: err; err = xe_gt_sriov_pf_config_set_fair_lmem(gt, vfid, num_vfs); @@ -1694,7 +1866,7 @@ static int pf_provision_exec_quantum(struct xe_gt *gt, unsigned int vfid, return 0; } -static int pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) +static u32 pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); @@ -1702,47 +1874,107 @@ static int pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) } /** - * xe_gt_sriov_pf_config_set_exec_quantum - Configure execution quantum for the VF. + * xe_gt_sriov_pf_config_set_exec_quantum_locked() - Configure PF/VF execution quantum. * @gt: the &xe_gt - * @vfid: the VF identifier + * @vfid: the PF or VF identifier * @exec_quantum: requested execution quantum in milliseconds (0 is infinity) * - * This function can only be called on PF. + * This function can only be called on PF with the master mutex hold. + * It will log the provisioned value or an error in case of the failure. * * Return: 0 on success or a negative error code on failure. */ -int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, - u32 exec_quantum) +int xe_gt_sriov_pf_config_set_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid, + u32 exec_quantum) { int err; - mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_exec_quantum(gt, vfid, exec_quantum); - mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); return pf_config_set_u32_done(gt, vfid, exec_quantum, - xe_gt_sriov_pf_config_get_exec_quantum(gt, vfid), + pf_get_exec_quantum(gt, vfid), "execution quantum", exec_quantum_unit, err); } /** - * xe_gt_sriov_pf_config_get_exec_quantum - Get VF's execution quantum. + * xe_gt_sriov_pf_config_set_exec_quantum() - Configure PF/VF execution quantum. * @gt: the &xe_gt - * @vfid: the VF identifier + * @vfid: the PF or VF identifier + * @exec_quantum: requested execution quantum in milliseconds (0 is infinity) * * This function can only be called on PF. + * It will log the provisioned value or an error in case of the failure. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, + u32 exec_quantum) +{ + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + + return xe_gt_sriov_pf_config_set_exec_quantum_locked(gt, vfid, exec_quantum); +} + +/** + * xe_gt_sriov_pf_config_get_exec_quantum_locked() - Get PF/VF execution quantum. + * @gt: the &xe_gt + * @vfid: the PF or VF identifier * - * Return: VF's (or PF's) execution quantum in milliseconds. + * This function can only be called on PF with the master mutex hold. + * + * Return: execution quantum in milliseconds (or 0 if infinity). + */ +u32 xe_gt_sriov_pf_config_get_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid) +{ + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_get_exec_quantum(gt, vfid); +} + +/** + * xe_gt_sriov_pf_config_get_exec_quantum() - Get PF/VF execution quantum. + * @gt: the &xe_gt + * @vfid: the PF or VF identifier + * + * This function can only be called on PF. + * + * Return: execution quantum in milliseconds (or 0 if infinity). */ u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) { - u32 exec_quantum; + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); - mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); - exec_quantum = pf_get_exec_quantum(gt, vfid); - mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + return pf_get_exec_quantum(gt, vfid); +} + +/** + * xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked() - Configure EQ for PF and VFs. + * @gt: the &xe_gt to configure + * @exec_quantum: requested execution quantum in milliseconds (0 is infinity) + * + * This function can only be called on PF with the master mutex hold. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(struct xe_gt *gt, u32 exec_quantum) +{ + unsigned int totalvfs = xe_gt_sriov_pf_get_totalvfs(gt); + unsigned int n; + int err = 0; + + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); - return exec_quantum; + for (n = 0; n <= totalvfs; n++) { + err = pf_provision_exec_quantum(gt, VFID(n), exec_quantum); + if (err) + break; + } + + return pf_config_bulk_set_u32_done(gt, 0, 1 + totalvfs, exec_quantum, + pf_get_exec_quantum, "execution quantum", + exec_quantum_unit, n, err); } static const char *preempt_timeout_unit(u32 preempt_timeout) @@ -1765,7 +1997,7 @@ static int pf_provision_preempt_timeout(struct xe_gt *gt, unsigned int vfid, return 0; } -static int pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) +static u32 pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); @@ -1773,47 +2005,106 @@ static int pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) } /** - * xe_gt_sriov_pf_config_set_preempt_timeout - Configure preemption timeout for the VF. + * xe_gt_sriov_pf_config_set_preempt_timeout_locked() - Configure PF/VF preemption timeout. * @gt: the &xe_gt - * @vfid: the VF identifier + * @vfid: the PF or VF identifier * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity) * - * This function can only be called on PF. + * This function can only be called on PF with the master mutex hold. + * It will log the provisioned value or an error in case of the failure. * * Return: 0 on success or a negative error code on failure. */ -int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, - u32 preempt_timeout) +int xe_gt_sriov_pf_config_set_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid, + u32 preempt_timeout) { int err; - mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_preempt_timeout(gt, vfid, preempt_timeout); - mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); return pf_config_set_u32_done(gt, vfid, preempt_timeout, - xe_gt_sriov_pf_config_get_preempt_timeout(gt, vfid), + pf_get_preempt_timeout(gt, vfid), "preemption timeout", preempt_timeout_unit, err); } /** - * xe_gt_sriov_pf_config_get_preempt_timeout - Get VF's preemption timeout. + * xe_gt_sriov_pf_config_set_preempt_timeout() - Configure PF/VF preemption timeout. * @gt: the &xe_gt - * @vfid: the VF identifier + * @vfid: the PF or VF identifier + * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity) * * This function can only be called on PF. * - * Return: VF's (or PF's) preemption timeout in microseconds. + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, + u32 preempt_timeout) +{ + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + + return xe_gt_sriov_pf_config_set_preempt_timeout_locked(gt, vfid, preempt_timeout); +} + +/** + * xe_gt_sriov_pf_config_get_preempt_timeout_locked() - Get PF/VF preemption timeout. + * @gt: the &xe_gt + * @vfid: the PF or VF identifier + * + * This function can only be called on PF with the master mutex hold. + * + * Return: preemption timeout in microseconds (or 0 if infinity). + */ +u32 xe_gt_sriov_pf_config_get_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid) +{ + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_get_preempt_timeout(gt, vfid); +} + +/** + * xe_gt_sriov_pf_config_get_preempt_timeout() - Get PF/VF preemption timeout. + * @gt: the &xe_gt + * @vfid: the PF or VF identifier + * + * This function can only be called on PF. + * + * Return: preemption timeout in microseconds (or 0 if infinity). */ u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) { - u32 preempt_timeout; + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); - mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); - preempt_timeout = pf_get_preempt_timeout(gt, vfid); - mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + return pf_get_preempt_timeout(gt, vfid); +} - return preempt_timeout; +/** + * xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked() - Configure PT for PF and VFs. + * @gt: the &xe_gt to configure + * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity) + * + * This function can only be called on PF with the master mutex hold. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(struct xe_gt *gt, u32 preempt_timeout) +{ + unsigned int totalvfs = xe_gt_sriov_pf_get_totalvfs(gt); + unsigned int n; + int err = 0; + + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + for (n = 0; n <= totalvfs; n++) { + err = pf_provision_preempt_timeout(gt, VFID(n), preempt_timeout); + if (err) + break; + } + + return pf_config_bulk_set_u32_done(gt, 0, 1 + totalvfs, preempt_timeout, + pf_get_preempt_timeout, "preemption timeout", + preempt_timeout_unit, n, err); } static const char *sched_priority_unit(u32 priority) @@ -1988,12 +2279,13 @@ static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); struct xe_device *xe = gt_to_xe(gt); + bool released; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { pf_release_vf_config_ggtt(gt, config); if (IS_DGFX(xe)) { - pf_release_vf_config_lmem(gt, config); - if (xe_device_has_lmtt(xe)) + released = pf_release_vf_config_lmem(gt, config); + if (released && xe_device_has_lmtt(xe)) pf_update_vf_lmtt(xe, vfid); } } @@ -2080,7 +2372,7 @@ static int pf_sanitize_vf_resources(struct xe_gt *gt, u32 vfid, long timeout) * Only GGTT and LMEM requires to be cleared by the PF. * GuC doorbell IDs and context IDs do not need any clearing. */ - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { pf_sanitize_ggtt(config->ggtt_region, vfid); if (IS_DGFX(xe)) err = pf_sanitize_lmem(tile, config->lmem_obj, timeout); @@ -2147,7 +2439,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_device *xe = gt_to_xe(gt); - bool is_primary = !xe_gt_is_media_type(gt); + bool is_primary = xe_gt_is_main_type(gt); bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_any, valid_all; @@ -2163,7 +2455,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) valid_all = valid_all && valid_ggtt; valid_any = valid_any || (valid_ggtt && is_primary); - if (IS_DGFX(xe)) { + if (xe_device_has_lmtt(xe)) { bool valid_lmem = pf_get_vf_config_lmem(primary_gt, vfid); valid_any = valid_any || (valid_lmem && is_primary); @@ -2347,7 +2639,7 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return -EINVAL; if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); drm_printf(&p, "restoring VF%u config:\n", vfid); xe_guc_klv_print(buf, size / sizeof(u32), &p); @@ -2364,6 +2656,35 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return err; } +static void pf_prepare_self_config(struct xe_gt *gt) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, PFID); + + /* + * We want PF to be allowed to use all of context ID, doorbells IDs + * and whole usable GGTT area. While we can store ctxs/dbs numbers + * directly in the config structure, can't do the same with the GGTT + * configuration, so let it be prepared on demand while pushing KLVs. + */ + config->num_ctxs = GUC_ID_MAX; + config->num_dbs = GUC_NUM_DOORBELLS; +} + +static int pf_push_self_config(struct xe_gt *gt) +{ + int err; + + err = pf_push_full_vf_config(gt, PFID); + if (err) { + xe_gt_sriov_err(gt, "Failed to push self configuration (%pe)\n", + ERR_PTR(err)); + return err; + } + + xe_gt_sriov_dbg_verbose(gt, "self configuration completed\n"); + return 0; +} + static void fini_config(void *arg) { struct xe_gt *gt = arg; @@ -2387,9 +2708,18 @@ static void fini_config(void *arg) int xe_gt_sriov_pf_config_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + int err; xe_gt_assert(gt, IS_SRIOV_PF(xe)); + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_prepare_self_config(gt); + err = pf_push_self_config(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (err) + return err; + return devm_add_action_or_reset(xe->drm.dev, fini_config, gt); } @@ -2407,6 +2737,10 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt) unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); unsigned int fail = 0, skip = 0; + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_push_self_config(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = 1; n <= total_vfs; n++) { if (xe_gt_sriov_pf_config_is_empty(gt, n)) skip++; @@ -2550,10 +2884,10 @@ int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p) if (!config->lmem_obj) continue; - string_get_size(config->lmem_obj->size, 1, STRING_UNITS_2, + string_get_size(xe_bo_size(config->lmem_obj), 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "VF%u:\t%zu\t(%s)\n", - n, config->lmem_obj->size, buf); + n, xe_bo_size(config->lmem_obj), buf); } mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); @@ -2598,3 +2932,7 @@ int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_prin return 0; } + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_gt_sriov_pf_config_kunit.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h index 513e6512a575..4975730423d7 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -36,14 +36,25 @@ int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs); int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs, u64 size); +struct xe_bo *xe_gt_sriov_pf_config_get_lmem_obj(struct xe_gt *gt, unsigned int vfid); u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum); +u32 xe_gt_sriov_pf_config_get_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid, + u32 exec_quantum); +int xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(struct xe_gt *gt, u32 exec_quantum); + u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 preempt_timeout); +u32 xe_gt_sriov_pf_config_get_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid, + u32 preempt_timeout); +int xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(struct xe_gt *gt, u32 preempt_timeout); + u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority); @@ -61,6 +72,11 @@ ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *bu int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, const void *buf, size_t size); +ssize_t xe_gt_sriov_pf_config_ggtt_save(struct xe_gt *gt, unsigned int vfid, + void *buf, size_t size); +int xe_gt_sriov_pf_config_ggtt_restore(struct xe_gt *gt, unsigned int vfid, + const void *buf, size_t size); + bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_config_init(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 1f50aec3a059..bf48b05797de 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -15,10 +15,15 @@ #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_monitor.h" -#include "xe_gt_sriov_pf_service.h" #include "xe_gt_sriov_printk.h" #include "xe_guc_ct.h" #include "xe_sriov.h" +#include "xe_sriov_packet.h" +#include "xe_sriov_packet_types.h" +#include "xe_sriov_pf_control.h" +#include "xe_sriov_pf_migration.h" +#include "xe_sriov_pf_service.h" +#include "xe_tile.h" static const char *control_cmd_to_string(u32 cmd) { @@ -169,6 +174,7 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit) CASE2STR(FLR_SEND_START); CASE2STR(FLR_WAIT_GUC); CASE2STR(FLR_GUC_DONE); + CASE2STR(FLR_SYNC); CASE2STR(FLR_RESET_CONFIG); CASE2STR(FLR_RESET_DATA); CASE2STR(FLR_RESET_MMIO); @@ -178,9 +184,20 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit) CASE2STR(PAUSE_SEND_PAUSE); CASE2STR(PAUSE_WAIT_GUC); CASE2STR(PAUSE_GUC_DONE); - CASE2STR(PAUSE_SAVE_GUC); CASE2STR(PAUSE_FAILED); CASE2STR(PAUSED); + CASE2STR(SAVE_WIP); + CASE2STR(SAVE_PROCESS_DATA); + CASE2STR(SAVE_WAIT_DATA); + CASE2STR(SAVE_DATA_DONE); + CASE2STR(SAVE_FAILED); + CASE2STR(SAVED); + CASE2STR(RESTORE_WIP); + CASE2STR(RESTORE_PROCESS_DATA); + CASE2STR(RESTORE_WAIT_DATA); + CASE2STR(RESTORE_DATA_DONE); + CASE2STR(RESTORE_FAILED); + CASE2STR(RESTORED); CASE2STR(RESUME_WIP); CASE2STR(RESUME_SEND_RESUME); CASE2STR(RESUME_FAILED); @@ -205,6 +222,8 @@ static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit) case XE_GT_SRIOV_STATE_FLR_WIP: case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG: return 5 * HZ; + case XE_GT_SRIOV_STATE_RESTORE_WIP: + return 20 * HZ; default: return HZ; } @@ -222,7 +241,7 @@ static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); - return &cs->state; + return cs->state; } static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid, @@ -270,12 +289,19 @@ static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid, return result; } +static void pf_track_vf_state(struct xe_gt *gt, unsigned int vfid, + enum xe_gt_sriov_control_bits bit, + const char *what) +{ + xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) %s\n", + vfid, control_bit_to_string(bit), bit, what); +} + static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid, enum xe_gt_sriov_control_bits bit) { if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) { - xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) enter\n", - vfid, control_bit_to_string(bit), bit); + pf_track_vf_state(gt, vfid, bit, "enter"); return true; } return false; @@ -285,8 +311,7 @@ static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid, enum xe_gt_sriov_control_bits bit) { if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) { - xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) exit\n", - vfid, control_bit_to_string(bit), bit); + pf_track_vf_state(gt, vfid, bit, "exit"); return true; } return false; @@ -320,6 +345,8 @@ static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid) pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED); pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED); pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED); } #define pf_enter_vf_state_machine_bug(gt, vfid) ({ \ @@ -350,6 +377,8 @@ static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid) static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid); static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid); +static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid); +static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid); static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid); static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid); @@ -371,6 +400,8 @@ static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid) pf_exit_vf_flr_wip(gt, vfid); pf_exit_vf_stop_wip(gt, vfid); + pf_exit_vf_save_wip(gt, vfid); + pf_exit_vf_restore_wip(gt, vfid); pf_exit_vf_pause_wip(gt, vfid); pf_exit_vf_resume_wip(gt, vfid); @@ -390,6 +421,8 @@ static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid) pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED); pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED); pf_exit_vf_mismatch(gt, vfid); pf_exit_vf_wip(gt, vfid); } @@ -420,8 +453,7 @@ static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid) * : PAUSE_GUC_DONE o-----restart * : | : * : | o---<--busy : - * : v / / : - * : PAUSE_SAVE_GUC : + * : / : * : / : * : / : * :....o..............o...............o...........: @@ -441,7 +473,6 @@ static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid) pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE); pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC); pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE); - pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC); } } @@ -472,41 +503,12 @@ static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid) pf_enter_vf_pause_failed(gt, vfid); } -static void pf_enter_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid) -{ - if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC)) - pf_enter_vf_state_machine_bug(gt, vfid); -} - -static bool pf_exit_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid) -{ - int err; - - if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC)) - return false; - - err = xe_gt_sriov_pf_migration_save_guc_state(gt, vfid); - if (err) { - /* retry if busy */ - if (err == -EBUSY) { - pf_enter_vf_pause_save_guc(gt, vfid); - return true; - } - /* give up on error */ - if (err == -EIO) - pf_enter_vf_mismatch(gt, vfid); - } - - pf_enter_vf_pause_completed(gt, vfid); - return true; -} - static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid) { if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE)) return false; - pf_enter_vf_pause_save_guc(gt, vfid); + pf_enter_vf_pause_completed(gt, vfid); return true; } @@ -615,7 +617,7 @@ int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid) } if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { - xe_gt_sriov_info(gt, "VF%u paused!\n", vfid); + xe_gt_sriov_dbg(gt, "VF%u paused!\n", vfid); return 0; } @@ -666,6 +668,8 @@ static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid) { pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED); pf_exit_vf_mismatch(gt, vfid); pf_exit_vf_wip(gt, vfid); } @@ -744,6 +748,16 @@ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid) return -EPERM; } + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { + xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid); + return -EBUSY; + } + + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { + xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid); + return -EBUSY; + } + if (!pf_enter_vf_resume_wip(gt, vfid)) { xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid); return -EALREADY; @@ -754,7 +768,7 @@ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid) return err; if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) { - xe_gt_sriov_info(gt, "VF%u resumed!\n", vfid); + xe_gt_sriov_dbg(gt, "VF%u resumed!\n", vfid); return 0; } @@ -768,6 +782,562 @@ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid) } /** + * DOC: The VF SAVE state machine + * + * SAVE extends the PAUSED state. + * + * The VF SAVE state machine looks like:: + * + * ....PAUSED.................................................... + * : : + * : (O)<---------o : + * : | \ : + * : save (SAVED) (SAVE_FAILED) : + * : | ^ ^ : + * : | | | : + * : ....V...............o...........o......SAVE_WIP......... : + * : : | | | : : + * : : | empty | : : + * : : | | | : : + * : : | | | : : + * : : | DATA_DONE | : : + * : : | ^ | : : + * : : | | error : : + * : : | no_data / : : + * : : | / / : : + * : : | / / : : + * : : | / / : : + * : : o---------->PROCESS_DATA<----consume : : + * : : \ \ : : + * : : \ \ : : + * : : \ \ : : + * : : ring_full----->WAIT_DATA : : + * : : : : + * : :......................................................: : + * :............................................................: + * + * For the full state machine view, see `The VF state machine`_. + */ + +static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { + xe_gt_sriov_pf_migration_ring_free(gt, vfid); + + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); + } +} + +static void pf_enter_vf_saved(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED)) + pf_enter_vf_state_machine_bug(gt, vfid); + + xe_gt_sriov_dbg(gt, "VF%u saved!\n", vfid); + + pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); + pf_exit_vf_mismatch(gt, vfid); + pf_exit_vf_wip(gt, vfid); +} + +static void pf_enter_vf_save_failed(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED)) + pf_enter_vf_state_machine_bug(gt, vfid); + + wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid)); + + pf_exit_vf_wip(gt, vfid); +} + +static int pf_handle_vf_save_data(struct xe_gt *gt, unsigned int vfid) +{ + int ret; + + if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, + XE_SRIOV_PACKET_TYPE_GUC)) { + ret = xe_gt_sriov_pf_migration_guc_save(gt, vfid); + if (ret) + return ret; + + xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, + XE_SRIOV_PACKET_TYPE_GUC); + + return -EAGAIN; + } + + if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, + XE_SRIOV_PACKET_TYPE_GGTT)) { + ret = xe_gt_sriov_pf_migration_ggtt_save(gt, vfid); + if (ret) + return ret; + + xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, + XE_SRIOV_PACKET_TYPE_GGTT); + + return -EAGAIN; + } + + if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, + XE_SRIOV_PACKET_TYPE_MMIO)) { + ret = xe_gt_sriov_pf_migration_mmio_save(gt, vfid); + if (ret) + return ret; + + xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, + XE_SRIOV_PACKET_TYPE_MMIO); + + return -EAGAIN; + } + + if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, + XE_SRIOV_PACKET_TYPE_VRAM)) { + ret = xe_gt_sriov_pf_migration_vram_save(gt, vfid); + if (ret == -EAGAIN) + return -EAGAIN; + else if (ret) + return ret; + + xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, + XE_SRIOV_PACKET_TYPE_VRAM); + + return -EAGAIN; + } + + return 0; +} + +static bool pf_handle_vf_save(struct xe_gt *gt, unsigned int vfid) +{ + int ret; + + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA)) + return false; + + if (xe_gt_sriov_pf_migration_ring_full(gt, vfid)) { + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA); + return true; + } + + ret = pf_handle_vf_save_data(gt, vfid); + if (ret == -EAGAIN) + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); + else if (ret) + pf_enter_vf_save_failed(gt, vfid); + else + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); + + return true; +} + +static void pf_exit_vf_save_wait_data(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)) + return; + + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); + pf_queue_vf(gt, vfid); +} + +static bool pf_enter_vf_save_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { + xe_gt_sriov_pf_migration_save_init(gt, vfid); + pf_enter_vf_wip(gt, vfid); + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); + pf_queue_vf(gt, vfid); + return true; + } + + return false; +} + +/** + * xe_gt_sriov_pf_control_check_save_data_done() - Check if all save migration data was produced. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: true if all migration data was produced, false otherwise. + */ +bool xe_gt_sriov_pf_control_check_save_data_done(struct xe_gt *gt, unsigned int vfid) +{ + return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); +} + +/** + * xe_gt_sriov_pf_control_check_save_failed() - Check if save processing has failed. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: true if save processing failed, false otherwise. + */ +bool xe_gt_sriov_pf_control_check_save_failed(struct xe_gt *gt, unsigned int vfid) +{ + return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED); +} + +/** + * xe_gt_sriov_pf_control_process_save_data() - Queue VF save migration data processing. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_process_save_data(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED)) + return -EIO; + + pf_exit_vf_save_wait_data(gt, vfid); + + return 0; +} + +/** + * xe_gt_sriov_pf_control_trigger_save_vf() - Start an SR-IOV VF migration data save sequence. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { + xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid); + return -EPERM; + } + + if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { + xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid); + return -EPERM; + } + + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { + xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid); + return -EBUSY; + } + + if (!pf_enter_vf_save_wip(gt, vfid)) { + xe_gt_sriov_dbg(gt, "VF%u save already in progress!\n", vfid); + return -EALREADY; + } + + return 0; +} + +/** + * xe_gt_sriov_pf_control_finish_save_vf() - Complete a VF migration data save sequence. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE)) { + xe_gt_sriov_err(gt, "VF%u save is still in progress!\n", vfid); + return -EIO; + } + + pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); + pf_enter_vf_saved(gt, vfid); + + return 0; +} + +/** + * DOC: The VF RESTORE state machine + * + * RESTORE extends the PAUSED state. + * + * The VF RESTORE state machine looks like:: + * + * ....PAUSED.................................................... + * : : + * : (O)<---------o : + * : | \ : + * : restore (RESTORED) (RESTORE_FAILED) : + * : | ^ ^ : + * : | | | : + * : ....V...............o...........o......RESTORE_WIP...... : + * : : | | | : : + * : : | empty | : : + * : : | | | : : + * : : | | | : : + * : : | DATA_DONE | : : + * : : | ^ | : : + * : : | | error : : + * : : | trailer / : : + * : : | / / : : + * : : | / / : : + * : : | / / : : + * : : o---------->PROCESS_DATA<----produce : : + * : : \ \ : : + * : : \ \ : : + * : : \ \ : : + * : : ring_empty---->WAIT_DATA : : + * : : : : + * : :......................................................: : + * :............................................................: + * + * For the full state machine view, see `The VF state machine`_. + */ + +static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { + xe_gt_sriov_pf_migration_ring_free(gt, vfid); + + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE); + } +} + +static void pf_enter_vf_restored(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED)) + pf_enter_vf_state_machine_bug(gt, vfid); + + xe_gt_sriov_dbg(gt, "VF%u restored!\n", vfid); + + pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); + pf_exit_vf_mismatch(gt, vfid); + pf_exit_vf_wip(gt, vfid); +} + +static void pf_enter_vf_restore_failed(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED)) + pf_enter_vf_state_machine_bug(gt, vfid); + + wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid)); + + pf_exit_vf_wip(gt, vfid); +} + +static int pf_handle_vf_restore_data(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_sriov_packet *data = xe_gt_sriov_pf_migration_restore_consume(gt, vfid); + int ret = 0; + + switch (data->hdr.type) { + case XE_SRIOV_PACKET_TYPE_GGTT: + ret = xe_gt_sriov_pf_migration_ggtt_restore(gt, vfid, data); + break; + case XE_SRIOV_PACKET_TYPE_MMIO: + ret = xe_gt_sriov_pf_migration_mmio_restore(gt, vfid, data); + break; + case XE_SRIOV_PACKET_TYPE_GUC: + ret = xe_gt_sriov_pf_migration_guc_restore(gt, vfid, data); + break; + case XE_SRIOV_PACKET_TYPE_VRAM: + ret = xe_gt_sriov_pf_migration_vram_restore(gt, vfid, data); + break; + default: + xe_gt_sriov_notice(gt, "Skipping VF%u unknown data type: %d\n", + vfid, data->hdr.type); + break; + } + + xe_sriov_packet_free(data); + + return ret; +} + +static bool pf_handle_vf_restore(struct xe_gt *gt, unsigned int vfid) +{ + int ret; + + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA)) + return false; + + if (xe_gt_sriov_pf_migration_ring_empty(gt, vfid)) { + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE)) + pf_enter_vf_restored(gt, vfid); + else + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA); + + return true; + } + + ret = pf_handle_vf_restore_data(gt, vfid); + if (ret) + pf_enter_vf_restore_failed(gt, vfid); + else + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); + + return true; +} + +static void pf_exit_vf_restore_wait_data(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)) + return; + + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); + pf_queue_vf(gt, vfid); +} + +static bool pf_enter_vf_restore_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { + pf_enter_vf_wip(gt, vfid); + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); + pf_queue_vf(gt, vfid); + return true; + } + + return false; +} + +/** + * xe_gt_sriov_pf_control_check_restore_failed() - Check if restore processing has failed. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: true if restore processing failed, false otherwise. + */ +bool xe_gt_sriov_pf_control_check_restore_failed(struct xe_gt *gt, unsigned int vfid) +{ + return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED); +} + +/** + * xe_gt_sriov_pf_control_restore_data_done() - Indicate the end of VF migration data stream. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_restore_data_done(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE)) { + pf_enter_vf_state_machine_bug(gt, vfid); + return -EIO; + } + + return xe_gt_sriov_pf_control_process_restore_data(gt, vfid); +} + +/** + * xe_gt_sriov_pf_control_process_restore_data() - Queue VF restore migration data processing. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_process_restore_data(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED)) { + xe_gt_sriov_pf_migration_ring_free(gt, vfid); + return -EIO; + } + + pf_exit_vf_restore_wait_data(gt, vfid); + + return 0; +} + +/** + * xe_gt_sriov_pf_control_trigger restore_vf() - Start an SR-IOV VF migration data restore sequence. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { + xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid); + return -EPERM; + } + + if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { + xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid); + return -EPERM; + } + + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { + xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid); + return -EBUSY; + } + + if (!pf_enter_vf_restore_wip(gt, vfid)) { + xe_gt_sriov_dbg(gt, "VF%u restore already in progress!\n", vfid); + return -EALREADY; + } + + return 0; +} + +static int pf_wait_vf_restore_done(struct xe_gt *gt, unsigned int vfid) +{ + unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESTORE_WIP); + int err; + + err = pf_wait_vf_wip_done(gt, vfid, timeout); + if (err) { + xe_gt_sriov_notice(gt, "VF%u RESTORE didn't finish in %u ms (%pe)\n", + vfid, jiffies_to_msecs(timeout), ERR_PTR(err)); + return err; + } + + if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED)) + return -EIO; + + return 0; +} + +/** + * xe_gt_sriov_pf_control_finish_restore_vf() - Complete a VF migration data restore sequence. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid) +{ + int ret; + + ret = pf_wait_vf_restore_done(gt, vfid); + if (ret) + return ret; + + if (!pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED)) { + pf_enter_vf_mismatch(gt, vfid); + return -EIO; + } + + pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); + + return 0; +} + +/** * DOC: The VF STOP state machine * * The VF STOP state machine looks like:: @@ -808,6 +1378,8 @@ static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid) pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED); pf_exit_vf_mismatch(gt, vfid); pf_exit_vf_wip(gt, vfid); } @@ -895,7 +1467,7 @@ int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid) return err; if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { - xe_gt_sriov_info(gt, "VF%u stopped!\n", vfid); + xe_gt_sriov_dbg(gt, "VF%u stopped!\n", vfid); return 0; } @@ -933,6 +1505,10 @@ int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid) * : v : | | * : FLR_GUC_DONE : | | * : | : | | + * : | o--<--sync : | | + * : |/ / : | | + * : FLR_SYNC--o : | | + * : | : | | * : FLR_RESET_CONFIG---failed--->-----------o--------+-----------o * : | : | | * : FLR_RESET_DATA : | | @@ -984,6 +1560,8 @@ static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE); pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC); pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START); + + xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid); } } @@ -1064,7 +1642,9 @@ static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) return false; - xe_gt_sriov_pf_service_reset(gt, vfid); + if (xe_tile_is_root(gt->tile) && xe_gt_is_main_type(gt)) + xe_sriov_pf_service_reset_vf(gt_to_xe(gt), vfid); + xe_gt_sriov_pf_monitor_flr(gt, vfid); pf_enter_vf_flr_reset_mmio(gt, vfid); @@ -1138,12 +1718,38 @@ static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid) return true; } +static bool pf_exit_vf_flr_sync(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC)) + return false; + + pf_enter_vf_flr_reset_config(gt, vfid); + return true; +} + +static void pf_enter_vf_flr_sync(struct xe_gt *gt, unsigned int vfid) +{ + int ret; + + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC)) + pf_enter_vf_state_machine_bug(gt, vfid); + + ret = xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid); + if (ret < 0) { + xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint %pe\n", ERR_PTR(ret)); + pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC); + } else { + xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint pass\n"); + pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC); + } +} + static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) { if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE)) return false; - pf_enter_vf_flr_reset_config(gt, vfid); + pf_enter_vf_flr_sync(gt, vfid); return true; } @@ -1164,10 +1770,52 @@ static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) */ int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid) { + pf_enter_vf_flr_wip(gt, vfid); + + return 0; +} + +/** + * xe_gt_sriov_pf_control_sync_flr() - Synchronize on the VF FLR checkpoint. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @sync: if true it will allow to exit the checkpoint + * + * Return: non-zero if FLR checkpoint has been reached, zero if the is no FLR + * in progress, or a negative error code on the FLR busy or failed. + */ +int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync) +{ + if (sync && pf_exit_vf_flr_sync(gt, vfid)) + return 1; + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC)) + return 1; + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) + return -EBUSY; + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) + return -EIO; + return 0; +} + +/** + * xe_gt_sriov_pf_control_wait_flr() - Wait for a VF FLR to complete. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_wait_flr(struct xe_gt *gt, unsigned int vfid) +{ unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP); int err; - pf_enter_vf_flr_wip(gt, vfid); + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) + return -EIO; + + if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) + return 0; err = pf_wait_vf_wip_done(gt, vfid, timeout); if (err) { @@ -1375,7 +2023,22 @@ static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid) if (pf_exit_vf_pause_guc_done(gt, vfid)) return true; - if (pf_exit_vf_pause_save_guc(gt, vfid)) + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)) { + xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, + control_bit_to_string(XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)); + return false; + } + + if (pf_handle_vf_save(gt, vfid)) + return true; + + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)) { + xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, + control_bit_to_string(XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)); + return false; + } + + if (pf_handle_vf_restore(gt, vfid)) return true; if (pf_exit_vf_resume_send_resume(gt, vfid)) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h index c85e64f099cc..c36c8767f3ad 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h @@ -16,8 +16,20 @@ void xe_gt_sriov_pf_control_restart(struct xe_gt *gt); int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid); +bool xe_gt_sriov_pf_control_check_save_data_done(struct xe_gt *gt, unsigned int vfid); +bool xe_gt_sriov_pf_control_check_save_failed(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_process_save_data(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_restore_data_done(struct xe_gt *gt, unsigned int vfid); +bool xe_gt_sriov_pf_control_check_restore_failed(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_process_restore_data(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync); +int xe_gt_sriov_pf_control_wait_flr(struct xe_gt *gt, unsigned int vfid); #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h index f02f941b4ad2..6027ba05a7f2 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h @@ -18,6 +18,7 @@ * @XE_GT_SRIOV_STATE_FLR_SEND_START: indicates that the PF wants to send a FLR START command. * @XE_GT_SRIOV_STATE_FLR_WAIT_GUC: indicates that the PF awaits for a response from the GuC. * @XE_GT_SRIOV_STATE_FLR_GUC_DONE: indicates that the PF has received a response from the GuC. + * @XE_GT_SRIOV_STATE_FLR_SYNC: indicates that the PF awaits to synchronize with other GuCs. * @XE_GT_SRIOV_STATE_FLR_RESET_CONFIG: indicates that the PF needs to clear VF's resources. * @XE_GT_SRIOV_STATE_FLR_RESET_DATA: indicates that the PF needs to clear VF's data. * @XE_GT_SRIOV_STATE_FLR_RESET_MMIO: indicates that the PF needs to reset VF's registers. @@ -27,9 +28,20 @@ * @XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE: indicates that the PF is about to send a PAUSE command. * @XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC: indicates that the PF awaits for a response from the GuC. * @XE_GT_SRIOV_STATE_PAUSE_GUC_DONE: indicates that the PF has received a response from the GuC. - * @XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC: indicates that the PF needs to save the VF GuC state. * @XE_GT_SRIOV_STATE_PAUSE_FAILED: indicates that a VF pause operation has failed. * @XE_GT_SRIOV_STATE_PAUSED: indicates that the VF is paused. + * @XE_GT_SRIOV_STATE_SAVE_WIP: indicates that VF save operation is in progress. + * @XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA: indicates that VF migration data is being produced. + * @XE_GT_SRIOV_STATE_SAVE_WAIT_DATA: indicates that PF awaits for space in migration data ring. + * @XE_GT_SRIOV_STATE_SAVE_DATA_DONE: indicates that all migration data was produced by Xe. + * @XE_GT_SRIOV_STATE_SAVE_FAILED: indicates that VF save operation has failed. + * @XE_GT_SRIOV_STATE_SAVED: indicates that VF data is saved. + * @XE_GT_SRIOV_STATE_RESTORE_WIP: indicates that VF restore operation is in progress. + * @XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA: indicates that VF migration data is being consumed. + * @XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA: indicates that PF awaits for data in migration data ring. + * @XE_GT_SRIOV_STATE_RESTORE_DATA_DONE: indicates that all migration data was produced by the user. + * @XE_GT_SRIOV_STATE_RESTORE_FAILED: indicates that VF restore operation has failed. + * @XE_GT_SRIOV_STATE_RESTORED: indicates that VF data is restored. * @XE_GT_SRIOV_STATE_RESUME_WIP: indicates the a VF resume operation is in progress. * @XE_GT_SRIOV_STATE_RESUME_SEND_RESUME: indicates that the PF is about to send RESUME command. * @XE_GT_SRIOV_STATE_RESUME_FAILED: indicates that a VF resume operation has failed. @@ -47,6 +59,7 @@ enum xe_gt_sriov_control_bits { XE_GT_SRIOV_STATE_FLR_SEND_START, XE_GT_SRIOV_STATE_FLR_WAIT_GUC, XE_GT_SRIOV_STATE_FLR_GUC_DONE, + XE_GT_SRIOV_STATE_FLR_SYNC, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG, XE_GT_SRIOV_STATE_FLR_RESET_DATA, XE_GT_SRIOV_STATE_FLR_RESET_MMIO, @@ -57,10 +70,23 @@ enum xe_gt_sriov_control_bits { XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE, - XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC, XE_GT_SRIOV_STATE_PAUSE_FAILED, XE_GT_SRIOV_STATE_PAUSED, + XE_GT_SRIOV_STATE_SAVE_WIP, + XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA, + XE_GT_SRIOV_STATE_SAVE_WAIT_DATA, + XE_GT_SRIOV_STATE_SAVE_DATA_DONE, + XE_GT_SRIOV_STATE_SAVE_FAILED, + XE_GT_SRIOV_STATE_SAVED, + + XE_GT_SRIOV_STATE_RESTORE_WIP, + XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA, + XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA, + XE_GT_SRIOV_STATE_RESTORE_DATA_DONE, + XE_GT_SRIOV_STATE_RESTORE_FAILED, + XE_GT_SRIOV_STATE_RESTORED, + XE_GT_SRIOV_STATE_RESUME_WIP, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME, XE_GT_SRIOV_STATE_RESUME_FAILED, @@ -71,9 +97,11 @@ enum xe_gt_sriov_control_bits { XE_GT_SRIOV_STATE_STOP_FAILED, XE_GT_SRIOV_STATE_STOPPED, - XE_GT_SRIOV_STATE_MISMATCH = BITS_PER_LONG - 1, + XE_GT_SRIOV_STATE_MISMATCH, /* always keep as last */ }; +#define XE_GT_SRIOV_NUM_STATES (XE_GT_SRIOV_STATE_MISMATCH + 1) + /** * struct xe_gt_sriov_control_state - GT-level per-VF control state. * @@ -81,7 +109,7 @@ enum xe_gt_sriov_control_bits { */ struct xe_gt_sriov_control_state { /** @state: VF state bits */ - unsigned long state; + DECLARE_BITMAP(state, XE_GT_SRIOV_NUM_STATES); /** @done: completion of async operations */ struct completion done; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 0fe47f41b63c..5278ea4fd655 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -22,14 +22,26 @@ #include "xe_gt_sriov_pf_policy.h" #include "xe_gt_sriov_pf_service.h" #include "xe_pm.h" +#include "xe_sriov_pf.h" +#include "xe_sriov_pf_provision.h" /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 # d_inode->i_private = gt - * │ ├── pf # d_inode->i_private = gt - * │ ├── vf1 # d_inode->i_private = VFID(1) - * : : - * │ ├── vfN # d_inode->i_private = VFID(N) + * /sys/kernel/debug/dri/BDF/ + * ├── sriov # d_inode->i_private = (xe_device*) + * │ ├── pf # d_inode->i_private = (xe_device*) + * │ │ ├── tile0 # d_inode->i_private = (xe_tile*) + * │ │ │ ├── gt0 # d_inode->i_private = (xe_gt*) + * │ │ │ ├── gt1 # d_inode->i_private = (xe_gt*) + * │ │ ├── tile1 + * │ │ │ : + * │ ├── vf1 # d_inode->i_private = VFID(1) + * │ │ ├── tile0 # d_inode->i_private = (xe_tile*) + * │ │ │ ├── gt0 # d_inode->i_private = (xe_gt*) + * │ │ │ ├── gt1 # d_inode->i_private = (xe_gt*) + * │ │ ├── tile1 + * │ │ │ : + * : : + * │ ├── vfN # d_inode->i_private = VFID(N) */ static void *extract_priv(struct dentry *d) @@ -39,26 +51,31 @@ static void *extract_priv(struct dentry *d) static struct xe_gt *extract_gt(struct dentry *d) { - return extract_priv(d->d_parent); + return extract_priv(d); +} + +static struct xe_device *extract_xe(struct dentry *d) +{ + return extract_priv(d->d_parent->d_parent->d_parent); } static unsigned int extract_vfid(struct dentry *d) { - return extract_priv(d) == extract_gt(d) ? PFID : (uintptr_t)extract_priv(d); + void *priv = extract_priv(d->d_parent->d_parent); + + return priv == extract_xe(d) ? PFID : (uintptr_t)priv; } /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │ ├── pf - * │ │ ├── contexts_provisioned - * │ │ ├── doorbells_provisioned - * │ │ ├── runtime_registers - * │ │ ├── negotiated_versions - * │ │ ├── adverse_events - * ├── gt1 - * │ ├── pf - * │ │ ├── ... + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── pf + * : ├── tile0 + * : ├── gt0 + * : ├── contexts_provisioned + * ├── doorbells_provisioned + * ├── runtime_registers + * ├── adverse_events */ static const struct drm_info_list pf_info[] = { @@ -78,11 +95,6 @@ static const struct drm_info_list pf_info[] = { .data = xe_gt_sriov_pf_service_print_runtime, }, { - "negotiated_versions", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_service_print_version, - }, - { "adverse_events", .show = xe_gt_debugfs_simple_show, .data = xe_gt_sriov_pf_monitor_print_events, @@ -90,48 +102,14 @@ static const struct drm_info_list pf_info[] = { }; /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │ ├── pf - * │ │ ├── ggtt_available - * │ │ ├── ggtt_provisioned - */ - -static const struct drm_info_list pf_ggtt_info[] = { - { - "ggtt_available", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_config_print_available_ggtt, - }, - { - "ggtt_provisioned", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_config_print_ggtt, - }, -}; - -/* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │ ├── pf - * │ │ ├── lmem_provisioned - */ - -static const struct drm_info_list pf_lmem_info[] = { - { - "lmem_provisioned", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_config_print_lmem, - }, -}; - -/* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │ ├── pf - * │ │ ├── reset_engine - * │ │ ├── sample_period - * │ │ ├── sched_if_idle + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── pf + * : ├── tile0 + * : ├── gt0 + * : ├── reset_engine + * ├── sample_period + * ├── sched_if_idle */ #define DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(POLICY, TYPE, FORMAT) \ @@ -147,6 +125,8 @@ static int POLICY##_set(void *data, u64 val) \ \ xe_pm_runtime_get(xe); \ err = xe_gt_sriov_pf_policy_set_##POLICY(gt, val); \ + if (!err) \ + xe_sriov_pf_provision_set_custom_mode(xe); \ xe_pm_runtime_put(xe); \ \ return err; \ @@ -177,24 +157,24 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent) } /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │ ├── pf - * │ │ ├── ggtt_spare - * │ │ ├── lmem_spare - * │ │ ├── doorbells_spare - * │ │ ├── contexts_spare - * │ │ ├── exec_quantum_ms - * │ │ ├── preempt_timeout_us - * │ │ ├── sched_priority - * │ ├── vf1 - * │ │ ├── ggtt_quota - * │ │ ├── lmem_quota - * │ │ ├── doorbells_quota - * │ │ ├── contexts_quota - * │ │ ├── exec_quantum_ms - * │ │ ├── preempt_timeout_us - * │ │ ├── sched_priority + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── pf + * │ ├── tile0 + * │ : ├── gt0 + * │ : ├── doorbells_spare + * │ ├── contexts_spare + * │ ├── exec_quantum_ms + * │ ├── preempt_timeout_us + * │ ├── sched_priority + * ├── vf1 + * : ├── tile0 + * : ├── gt0 + * : ├── doorbells_quota + * ├── contexts_quota + * ├── exec_quantum_ms + * ├── preempt_timeout_us + * ├── sched_priority */ #define DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(CONFIG, TYPE, FORMAT) \ @@ -210,7 +190,10 @@ static int CONFIG##_set(void *data, u64 val) \ return -EOVERFLOW; \ \ xe_pm_runtime_get(xe); \ - err = xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \ + err = xe_sriov_pf_wait_ready(xe) ?: \ + xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \ + if (!err) \ + xe_sriov_pf_provision_set_custom_mode(xe); \ xe_pm_runtime_put(xe); \ \ return err; \ @@ -227,8 +210,6 @@ static int CONFIG##_get(void *data, u64 *val) \ \ DEFINE_DEBUGFS_ATTRIBUTE(CONFIG##_fops, CONFIG##_get, CONFIG##_set, FORMAT) -DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ggtt, u64, "%llu\n"); -DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(lmem, u64, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ctxs, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(dbs, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(exec_quantum, u32, "%llu\n"); @@ -236,22 +217,26 @@ DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(preempt_timeout, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(sched_priority, u32, "%llu\n"); /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │ ├── pf - * │ │ ├── threshold_cat_error_count - * │ │ ├── threshold_doorbell_time_us - * │ │ ├── threshold_engine_reset_count - * │ │ ├── threshold_guc_time_us - * │ │ ├── threshold_irq_time_us - * │ │ ├── threshold_page_fault_count - * │ ├── vf1 - * │ │ ├── threshold_cat_error_count - * │ │ ├── threshold_doorbell_time_us - * │ │ ├── threshold_engine_reset_count - * │ │ ├── threshold_guc_time_us - * │ │ ├── threshold_irq_time_us - * │ │ ├── threshold_page_fault_count + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── pf + * │ ├── tile0 + * │ : ├── gt0 + * │ : ├── threshold_cat_error_count + * │ ├── threshold_doorbell_time_us + * │ ├── threshold_engine_reset_count + * │ ├── threshold_guc_time_us + * │ ├── threshold_irq_time_us + * │ ├── threshold_page_fault_count + * ├── vf1 + * : ├── tile0 + * : ├── gt0 + * : ├── threshold_cat_error_count + * ├── threshold_doorbell_time_us + * ├── threshold_engine_reset_count + * ├── threshold_guc_time_us + * ├── threshold_irq_time_us + * ├── threshold_page_fault_count */ static int set_threshold(void *data, u64 val, enum xe_guc_klv_threshold_index index) @@ -266,6 +251,8 @@ static int set_threshold(void *data, u64 val, enum xe_guc_klv_threshold_index in xe_pm_runtime_get(xe); err = xe_gt_sriov_pf_config_set_threshold(gt, vfid, index, val); + if (!err) + xe_sriov_pf_provision_set_custom_mode(xe); xe_pm_runtime_put(xe); return err; @@ -305,13 +292,6 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne xe_gt_assert(gt, gt == extract_gt(parent)); xe_gt_assert(gt, vfid == extract_vfid(parent)); - if (!xe_gt_is_media_type(gt)) { - debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare", - 0644, parent, parent, &ggtt_fops); - if (IS_DGFX(gt_to_xe(gt))) - debugfs_create_file_unsafe(vfid ? "lmem_quota" : "lmem_spare", - 0644, parent, parent, &lmem_fops); - } debugfs_create_file_unsafe(vfid ? "doorbells_quota" : "doorbells_spare", 0644, parent, parent, &dbs_fops); debugfs_create_file_unsafe(vfid ? "contexts_quota" : "contexts_spare", @@ -332,10 +312,12 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne } /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │ ├── vf1 - * │ │ ├── control { stop, pause, resume } + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── vf1 + * : ├── tile0 + * : ├── gt0 + * : ├── control { stop, pause, resume } */ static const struct { @@ -345,9 +327,6 @@ static const struct { { "stop", xe_gt_sriov_pf_control_stop_vf }, { "pause", xe_gt_sriov_pf_control_pause_vf }, { "resume", xe_gt_sriov_pf_control_resume_vf }, -#ifdef CONFIG_DRM_XE_DEBUG_SRIOV - { "restore!", xe_gt_sriov_pf_migration_restore_guc_state }, -#endif }; static ssize_t control_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) @@ -412,58 +391,27 @@ static const struct file_operations control_ops = { }; /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │ ├── vf1 - * │ │ ├── guc_state + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── vf1 + * : ├── tile0 + * : ├── gt0 + * : ├── config_blob */ -static ssize_t guc_state_read(struct file *file, char __user *buf, - size_t count, loff_t *pos) -{ - struct dentry *dent = file_dentry(file); - struct dentry *parent = dent->d_parent; - struct xe_gt *gt = extract_gt(parent); - unsigned int vfid = extract_vfid(parent); - - return xe_gt_sriov_pf_migration_read_guc_state(gt, vfid, buf, count, pos); -} - -static ssize_t guc_state_write(struct file *file, const char __user *buf, - size_t count, loff_t *pos) -{ - struct dentry *dent = file_dentry(file); - struct dentry *parent = dent->d_parent; - struct xe_gt *gt = extract_gt(parent); - unsigned int vfid = extract_vfid(parent); - - if (*pos) - return -EINVAL; - - return xe_gt_sriov_pf_migration_write_guc_state(gt, vfid, buf, count); -} -static const struct file_operations guc_state_ops = { - .owner = THIS_MODULE, - .read = guc_state_read, - .write = guc_state_write, - .llseek = default_llseek, +struct config_blob_data { + size_t size; + u8 blob[]; }; -/* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │ ├── vf1 - * │ │ ├── config_blob - */ -static ssize_t config_blob_read(struct file *file, char __user *buf, - size_t count, loff_t *pos) +static int config_blob_open(struct inode *inode, struct file *file) { struct dentry *dent = file_dentry(file); struct dentry *parent = dent->d_parent; struct xe_gt *gt = extract_gt(parent); unsigned int vfid = extract_vfid(parent); + struct config_blob_data *cbd; ssize_t ret; - void *tmp; ret = xe_gt_sriov_pf_config_save(gt, vfid, NULL, 0); if (!ret) @@ -471,16 +419,27 @@ static ssize_t config_blob_read(struct file *file, char __user *buf, if (ret < 0) return ret; - tmp = kzalloc(ret, GFP_KERNEL); - if (!tmp) + cbd = kzalloc(struct_size(cbd, blob, ret), GFP_KERNEL); + if (!cbd) return -ENOMEM; - ret = xe_gt_sriov_pf_config_save(gt, vfid, tmp, ret); - if (ret > 0) - ret = simple_read_from_buffer(buf, count, pos, tmp, ret); + ret = xe_gt_sriov_pf_config_save(gt, vfid, cbd->blob, ret); + if (ret < 0) { + kfree(cbd); + return ret; + } - kfree(tmp); - return ret; + cbd->size = ret; + file->private_data = cbd; + return nonseekable_open(inode, file); +} + +static ssize_t config_blob_read(struct file *file, char __user *buf, + size_t count, loff_t *pos) +{ + struct config_blob_data *cbd = file->private_data; + + return simple_read_from_buffer(buf, count, pos, cbd->blob, cbd->size); } static ssize_t config_blob_write(struct file *file, const char __user *buf, @@ -517,80 +476,147 @@ static ssize_t config_blob_write(struct file *file, const char __user *buf, return ret; } +static int config_blob_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + static const struct file_operations config_blob_ops = { .owner = THIS_MODULE, + .open = config_blob_open, .read = config_blob_read, .write = config_blob_write, - .llseek = default_llseek, + .release = config_blob_release, }; -/** - * xe_gt_sriov_pf_debugfs_register - Register SR-IOV PF specific entries in GT debugfs. - * @gt: the &xe_gt to register - * @root: the &dentry that represents the GT directory - * - * Register SR-IOV PF entries that are GT related and must be shown under GT debugfs. - */ -void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) +static void pf_add_compat_attrs(struct xe_gt *gt, struct dentry *dent, unsigned int vfid) { struct xe_device *xe = gt_to_xe(gt); - struct drm_minor *minor = xe->drm.primary; - int n, totalvfs = xe_sriov_pf_get_totalvfs(xe); - struct dentry *pfdentry; - struct dentry *vfdentry; - char buf[14]; /* should be enough up to "vf%u\0" for 2^32 - 1 */ - - xe_gt_assert(gt, IS_SRIOV_PF(xe)); - xe_gt_assert(gt, root->d_inode->i_private == gt); - /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │ ├── pf - */ - pfdentry = debugfs_create_dir("pf", root); - if (IS_ERR(pfdentry)) + if (!xe_gt_is_main_type(gt)) return; - pfdentry->d_inode->i_private = gt; - - drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor); - if (!xe_gt_is_media_type(gt)) { - drm_debugfs_create_files(pf_ggtt_info, - ARRAY_SIZE(pf_ggtt_info), - pfdentry, minor); - if (IS_DGFX(gt_to_xe(gt))) - drm_debugfs_create_files(pf_lmem_info, - ARRAY_SIZE(pf_lmem_info), - pfdentry, minor); + + if (vfid) { + debugfs_create_symlink("ggtt_quota", dent, "../ggtt_quota"); + if (xe_device_has_lmtt(xe)) + debugfs_create_symlink("lmem_quota", dent, "../vram_quota"); + } else { + debugfs_create_symlink("ggtt_spare", dent, "../ggtt_spare"); + debugfs_create_symlink("ggtt_available", dent, "../ggtt_available"); + debugfs_create_symlink("ggtt_provisioned", dent, "../ggtt_provisioned"); + if (xe_device_has_lmtt(xe)) { + debugfs_create_symlink("lmem_spare", dent, "../vram_spare"); + debugfs_create_symlink("lmem_provisioned", dent, "../vram_provisioned"); + } } +} - pf_add_policy_attrs(gt, pfdentry); - pf_add_config_attrs(gt, pfdentry, PFID); - - for (n = 1; n <= totalvfs; n++) { - /* - * /sys/kernel/debug/dri/0/ - * ├── gt0 - * │ ├── vf1 - * │ ├── vf2 - */ - snprintf(buf, sizeof(buf), "vf%u", n); - vfdentry = debugfs_create_dir(buf, root); - if (IS_ERR(vfdentry)) - break; - vfdentry->d_inode->i_private = (void *)(uintptr_t)n; +static void pf_populate_gt(struct xe_gt *gt, struct dentry *dent, unsigned int vfid) +{ + struct xe_device *xe = gt_to_xe(gt); + struct drm_minor *minor = xe->drm.primary; - pf_add_config_attrs(gt, vfdentry, VFID(n)); - debugfs_create_file("control", 0600, vfdentry, NULL, &control_ops); + if (vfid) { + pf_add_config_attrs(gt, dent, vfid); + + debugfs_create_file("control", 0600, dent, NULL, &control_ops); /* for testing/debugging purposes only! */ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { - debugfs_create_file("guc_state", - IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400, - vfdentry, NULL, &guc_state_ops); debugfs_create_file("config_blob", IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400, - vfdentry, NULL, &config_blob_ops); + dent, NULL, &config_blob_ops); } + + } else { + pf_add_config_attrs(gt, dent, PFID); + pf_add_policy_attrs(gt, dent); + + drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), dent, minor); } + + /* for backward compatibility only */ + pf_add_compat_attrs(gt, dent, vfid); +} + +/** + * xe_gt_sriov_pf_debugfs_populate() - Create SR-IOV GT-level debugfs directories and files. + * @gt: the &xe_gt to register + * @parent: the parent &dentry that represents a &xe_tile + * @vfid: the VF identifier + * + * Add to the @parent directory new debugfs directory that will represent a @gt and + * populate it with GT files that are related to the SR-IOV @vfid function. + * + * This function can only be called on PF. + */ +void xe_gt_sriov_pf_debugfs_populate(struct xe_gt *gt, struct dentry *parent, unsigned int vfid) +{ + struct dentry *dent; + char name[8]; /* should be enough up to "gt%u\0" for 2^8 - 1 */ + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, extract_priv(parent) == gt->tile); + xe_gt_assert(gt, extract_priv(parent->d_parent) == gt_to_xe(gt) || + (uintptr_t)extract_priv(parent->d_parent) == vfid); + + /* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * │ ├── pf + * │ │ ├── tile0 # parent + * │ │ │ ├── gt0 # d_inode->i_private = (xe_gt*) + * │ │ │ ├── gt1 + * │ │ : : + * │ ├── vf1 + * │ │ ├── tile0 # parent + * │ │ │ ├── gt0 # d_inode->i_private = (xe_gt*) + * │ │ │ ├── gt1 + * │ : : : + */ + snprintf(name, sizeof(name), "gt%u", gt->info.id); + dent = debugfs_create_dir(name, parent); + if (IS_ERR(dent)) + return; + dent->d_inode->i_private = gt; + + xe_gt_assert(gt, extract_gt(dent) == gt); + xe_gt_assert(gt, extract_vfid(dent) == vfid); + + pf_populate_gt(gt, dent, vfid); +} + +static void pf_add_links(struct xe_gt *gt, struct dentry *dent) +{ + unsigned int totalvfs = xe_gt_sriov_pf_get_totalvfs(gt); + unsigned int vfid; + char name[16]; /* should be more than enough for "vf%u\0" and VFID(UINT_MAX) */ + char symlink[64]; /* should be more enough for "../../sriov/vf%u/tile%u/gt%u\0" */ + + for (vfid = 0; vfid <= totalvfs; vfid++) { + if (vfid) + snprintf(name, sizeof(name), "vf%u", vfid); + else + snprintf(name, sizeof(name), "pf"); + snprintf(symlink, sizeof(symlink), "../../sriov/%s/tile%u/gt%u", + name, gt->tile->id, gt->info.id); + debugfs_create_symlink(name, dent, symlink); + } +} + +/** + * xe_gt_sriov_pf_debugfs_register - Register SR-IOV PF specific entries in GT debugfs. + * @gt: the &xe_gt to register + * @dent: the &dentry that represents the GT directory + * + * Instead of actual files, create symlinks for PF and each VF to their GT specific + * attributes that should be already exposed in the dedicated debugfs SR-IOV tree. + */ +void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *dent) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, dent->d_inode->i_private == gt); + + pf_add_links(gt, dent); } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h index 038cc8ddc244..82ff3b7f0532 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h @@ -11,6 +11,7 @@ struct dentry; #ifdef CONFIG_PCI_IOV void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root); +void xe_gt_sriov_pf_debugfs_populate(struct xe_gt *gt, struct dentry *parent, unsigned int vfid); #else static inline void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) { } #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c index c712111aa30d..3174a8dee779 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c @@ -5,14 +5,150 @@ #include <drm/drm_managed.h> +#include "regs/xe_guc_regs.h" + #include "abi/guc_actions_sriov_abi.h" #include "xe_bo.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_gt_sriov_pf.h" +#include "xe_gt_sriov_pf_config.h" +#include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_printk.h" #include "xe_guc.h" +#include "xe_guc_buf.h" #include "xe_guc_ct.h" +#include "xe_migrate.h" +#include "xe_mmio.h" #include "xe_sriov.h" +#include "xe_sriov_packet.h" +#include "xe_sriov_packet_types.h" +#include "xe_sriov_pf_migration.h" + +#define XE_GT_SRIOV_PF_MIGRATION_RING_SIZE 5 + +static struct xe_gt_sriov_migration_data *pf_pick_gt_migration(struct xe_gt *gt, unsigned int vfid) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + return >->sriov.pf.vfs[vfid].migration; +} + +static void pf_dump_mig_data(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data, + const char *what) +{ + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { + struct drm_printer p = xe_gt_dbg_printer(gt); + + drm_printf(&p, "VF%u %s (%llu bytes)\n", vfid, what, data->hdr.size); + drm_print_hex_dump(&p, "mig_hdr: ", (void *)&data->hdr, sizeof(data->hdr)); + drm_print_hex_dump(&p, "mig_data: ", data->vaddr, min(SZ_64, data->hdr.size)); + } +} + +static ssize_t pf_migration_ggtt_size(struct xe_gt *gt, unsigned int vfid) +{ + if (!xe_gt_is_main_type(gt)) + return 0; + + return xe_gt_sriov_pf_config_ggtt_save(gt, vfid, NULL, 0); +} + +static int pf_save_vf_ggtt_mig_data(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_sriov_packet *data; + size_t size; + int ret; + + size = pf_migration_ggtt_size(gt, vfid); + xe_gt_assert(gt, size); + + data = xe_sriov_packet_alloc(gt_to_xe(gt)); + if (!data) + return -ENOMEM; + + ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id, + XE_SRIOV_PACKET_TYPE_GGTT, 0, size); + if (ret) + goto fail; + + ret = xe_gt_sriov_pf_config_ggtt_save(gt, vfid, data->vaddr, size); + if (ret) + goto fail; + + pf_dump_mig_data(gt, vfid, data, "GGTT data save"); + + ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data); + if (ret) + goto fail; + + return 0; + +fail: + xe_sriov_packet_free(data); + xe_gt_sriov_err(gt, "Failed to save VF%u GGTT data (%pe)\n", vfid, ERR_PTR(ret)); + return ret; +} + +static int pf_restore_vf_ggtt_mig_data(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) +{ + int ret; + + pf_dump_mig_data(gt, vfid, data, "GGTT data restore"); + + ret = xe_gt_sriov_pf_config_ggtt_restore(gt, vfid, data->vaddr, data->hdr.size); + if (ret) { + xe_gt_sriov_err(gt, "Failed to restore VF%u GGTT data (%pe)\n", + vfid, ERR_PTR(ret)); + return ret; + } + + return 0; +} + +/** + * xe_gt_sriov_pf_migration_ggtt_save() - Save VF GGTT migration data. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_ggtt_save(struct xe_gt *gt, unsigned int vfid) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + return pf_save_vf_ggtt_mig_data(gt, vfid); +} + +/** + * xe_gt_sriov_pf_migration_ggtt_restore() - Restore VF GGTT migration data. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + * @data: the &xe_sriov_packet containing migration data + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_ggtt_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + return pf_restore_vf_ggtt_mig_data(gt, vfid, data); +} /* Return: number of dwords saved/restored/required or a negative error code on failure */ static int guc_action_vf_save_restore(struct xe_guc *guc, u32 vfid, u32 opcode, @@ -33,7 +169,7 @@ static int guc_action_vf_save_restore(struct xe_guc *guc, u32 vfid, u32 opcode, } /* Return: size of the state in dwords or a negative error code on failure */ -static int pf_send_guc_query_vf_state_size(struct xe_gt *gt, unsigned int vfid) +static int pf_send_guc_query_vf_mig_data_size(struct xe_gt *gt, unsigned int vfid) { int ret; @@ -42,353 +178,856 @@ static int pf_send_guc_query_vf_state_size(struct xe_gt *gt, unsigned int vfid) } /* Return: number of state dwords saved or a negative error code on failure */ -static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid, - void *buff, size_t size) +static int pf_send_guc_save_vf_mig_data(struct xe_gt *gt, unsigned int vfid, + void *dst, size_t size) { const int ndwords = size / sizeof(u32); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_device *xe = tile_to_xe(tile); struct xe_guc *guc = >->uc.guc; - struct xe_bo *bo; + CLASS(xe_guc_buf, buf)(&guc->buf, ndwords); int ret; xe_gt_assert(gt, size % sizeof(u32) == 0); xe_gt_assert(gt, size == ndwords * sizeof(u32)); - bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); - if (IS_ERR(bo)) - return PTR_ERR(bo); + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + /* FW expects this buffer to be zero-initialized */ + memset(xe_guc_buf_cpu_ptr(buf), 0, size); ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_SAVE, - xe_bo_ggtt_addr(bo), ndwords); + xe_guc_buf_flush(buf), ndwords); if (!ret) ret = -ENODATA; else if (ret > ndwords) ret = -EPROTO; else if (ret > 0) - xe_map_memcpy_from(xe, buff, &bo->vmap, 0, ret * sizeof(u32)); + memcpy(dst, xe_guc_buf_sync_read(buf), ret * sizeof(u32)); - xe_bo_unpin_map_no_vm(bo); return ret; } /* Return: number of state dwords restored or a negative error code on failure */ -static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int vfid, - const void *buff, size_t size) +static int pf_send_guc_restore_vf_mig_data(struct xe_gt *gt, unsigned int vfid, + const void *src, size_t size) { const int ndwords = size / sizeof(u32); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_device *xe = tile_to_xe(tile); struct xe_guc *guc = >->uc.guc; - struct xe_bo *bo; + CLASS(xe_guc_buf_from_data, buf)(&guc->buf, src, size); int ret; xe_gt_assert(gt, size % sizeof(u32) == 0); xe_gt_assert(gt, size == ndwords * sizeof(u32)); - bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); - if (IS_ERR(bo)) - return PTR_ERR(bo); - - xe_map_memcpy_to(xe, &bo->vmap, 0, buff, size); + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_RESTORE, - xe_bo_ggtt_addr(bo), ndwords); + xe_guc_buf_flush(buf), ndwords); if (!ret) ret = -ENODATA; else if (ret > ndwords) ret = -EPROTO; - xe_bo_unpin_map_no_vm(bo); return ret; } static bool pf_migration_supported(struct xe_gt *gt) { - xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); - return gt->sriov.pf.migration.supported; + return xe_sriov_pf_migration_supported(gt_to_xe(gt)); } -static struct mutex *pf_migration_mutex(struct xe_gt *gt) +static int pf_save_vf_guc_mig_data(struct xe_gt *gt, unsigned int vfid) { - xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); - return >->sriov.pf.migration.snapshot_lock; + struct xe_sriov_packet *data; + size_t size; + int ret; + + ret = pf_send_guc_query_vf_mig_data_size(gt, vfid); + if (ret < 0) + goto fail; + + size = ret * sizeof(u32); + + data = xe_sriov_packet_alloc(gt_to_xe(gt)); + if (!data) { + ret = -ENOMEM; + goto fail; + } + + ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id, + XE_SRIOV_PACKET_TYPE_GUC, 0, size); + if (ret) + goto fail_free; + + ret = pf_send_guc_save_vf_mig_data(gt, vfid, data->vaddr, size); + if (ret < 0) + goto fail_free; + size = ret * sizeof(u32); + xe_gt_assert(gt, size); + xe_gt_assert(gt, size <= data->hdr.size); + data->hdr.size = size; + data->remaining = size; + + pf_dump_mig_data(gt, vfid, data, "GuC data save"); + + ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data); + if (ret) + goto fail_free; + + return 0; + +fail_free: + xe_sriov_packet_free(data); +fail: + xe_gt_sriov_err(gt, "Failed to save VF%u GuC data (%pe)\n", + vfid, ERR_PTR(ret)); + return ret; } -static struct xe_gt_sriov_state_snapshot *pf_pick_vf_snapshot(struct xe_gt *gt, - unsigned int vfid) +static ssize_t pf_migration_guc_size(struct xe_gt *gt, unsigned int vfid) +{ + ssize_t size; + + if (!pf_migration_supported(gt)) + return -ENOPKG; + + size = pf_send_guc_query_vf_mig_data_size(gt, vfid); + if (size >= 0) + size *= sizeof(u32); + + return size; +} + +/** + * xe_gt_sriov_pf_migration_guc_save() - Save VF GuC migration data. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_guc_save(struct xe_gt *gt, unsigned int vfid) { xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); - lockdep_assert_held(pf_migration_mutex(gt)); - return >->sriov.pf.vfs[vfid].snapshot; + if (!pf_migration_supported(gt)) + return -ENOPKG; + + return pf_save_vf_guc_mig_data(gt, vfid); } -static unsigned int pf_snapshot_index(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot) +static int pf_restore_vf_guc_state(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) { - return container_of(snapshot, struct xe_gt_sriov_metadata, snapshot) - gt->sriov.pf.vfs; + int ret; + + xe_gt_assert(gt, data->hdr.size); + + pf_dump_mig_data(gt, vfid, data, "GuC data restore"); + + ret = pf_send_guc_restore_vf_mig_data(gt, vfid, data->vaddr, data->hdr.size); + if (ret < 0) + goto fail; + + return 0; + +fail: + xe_gt_sriov_err(gt, "Failed to restore VF%u GuC data (%pe)\n", + vfid, ERR_PTR(ret)); + return ret; } -static void pf_free_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot) +/** + * xe_gt_sriov_pf_migration_guc_restore() - Restore VF GuC migration data. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @data: the &xe_sriov_packet containing migration data + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_guc_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) { - struct xe_device *xe = gt_to_xe(gt); + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); - drmm_kfree(&xe->drm, snapshot->guc.buff); - snapshot->guc.buff = NULL; - snapshot->guc.size = 0; + if (!pf_migration_supported(gt)) + return -ENOPKG; + + return pf_restore_vf_guc_state(gt, vfid, data); } -static int pf_alloc_guc_state(struct xe_gt *gt, - struct xe_gt_sriov_state_snapshot *snapshot, - size_t size) +static ssize_t pf_migration_mmio_size(struct xe_gt *gt, unsigned int vfid) { - struct xe_device *xe = gt_to_xe(gt); - void *p; - - pf_free_guc_state(gt, snapshot); + if (xe_gt_is_media_type(gt)) + return MED_VF_SW_FLAG_COUNT * sizeof(u32); + else + return VF_SW_FLAG_COUNT * sizeof(u32); +} - if (!size) - return -ENODATA; +static int pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid, void *buf, size_t size) +{ + struct xe_mmio mmio; + u32 *regs = buf; + int n; - if (size % sizeof(u32)) + if (size != pf_migration_mmio_size(gt, vfid)) return -EINVAL; - if (size > SZ_2M) - return -EFBIG; + xe_mmio_init_vf_view(&mmio, >->mmio, vfid); - p = drmm_kzalloc(&xe->drm, size, GFP_KERNEL); - if (!p) - return -ENOMEM; + if (xe_gt_is_media_type(gt)) + for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) + regs[n] = xe_mmio_read32(>->mmio, MED_VF_SW_FLAG(n)); + else + for (n = 0; n < VF_SW_FLAG_COUNT; n++) + regs[n] = xe_mmio_read32(>->mmio, VF_SW_FLAG(n)); - snapshot->guc.buff = p; - snapshot->guc.size = size; return 0; } -static void pf_dump_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot) +static int pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, + const void *buf, size_t size) { - if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - unsigned int vfid __maybe_unused = pf_snapshot_index(gt, snapshot); + const u32 *regs = buf; + struct xe_mmio mmio; + int n; - xe_gt_sriov_dbg_verbose(gt, "VF%u GuC state is %zu dwords:\n", - vfid, snapshot->guc.size / sizeof(u32)); - print_hex_dump_bytes("state: ", DUMP_PREFIX_OFFSET, - snapshot->guc.buff, min(SZ_64, snapshot->guc.size)); - } + if (size != pf_migration_mmio_size(gt, vfid)) + return -EINVAL; + + xe_mmio_init_vf_view(&mmio, >->mmio, vfid); + + if (xe_gt_is_media_type(gt)) + for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) + xe_mmio_write32(>->mmio, MED_VF_SW_FLAG(n), regs[n]); + else + for (n = 0; n < VF_SW_FLAG_COUNT; n++) + xe_mmio_write32(>->mmio, VF_SW_FLAG(n), regs[n]); + + return 0; } -static int pf_save_vf_guc_state(struct xe_gt *gt, unsigned int vfid) +static int pf_save_vf_mmio_mig_data(struct xe_gt *gt, unsigned int vfid) { - struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid); + struct xe_sriov_packet *data; size_t size; int ret; - ret = pf_send_guc_query_vf_state_size(gt, vfid); - if (ret < 0) + size = pf_migration_mmio_size(gt, vfid); + xe_gt_assert(gt, size); + + data = xe_sriov_packet_alloc(gt_to_xe(gt)); + if (!data) + return -ENOMEM; + + ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id, + XE_SRIOV_PACKET_TYPE_MMIO, 0, size); + if (ret) goto fail; - size = ret * sizeof(u32); - xe_gt_sriov_dbg_verbose(gt, "VF%u state size is %d dwords (%zu bytes)\n", vfid, ret, size); - ret = pf_alloc_guc_state(gt, snapshot, size); - if (ret < 0) + ret = pf_migration_mmio_save(gt, vfid, data->vaddr, size); + if (ret) goto fail; - ret = pf_send_guc_save_vf_state(gt, vfid, snapshot->guc.buff, size); - if (ret < 0) + pf_dump_mig_data(gt, vfid, data, "MMIO data save"); + + ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data); + if (ret) goto fail; - size = ret * sizeof(u32); - xe_gt_assert(gt, size); - xe_gt_assert(gt, size <= snapshot->guc.size); - snapshot->guc.size = size; - pf_dump_guc_state(gt, snapshot); return 0; fail: - xe_gt_sriov_dbg(gt, "Unable to save VF%u state (%pe)\n", vfid, ERR_PTR(ret)); - pf_free_guc_state(gt, snapshot); + xe_sriov_packet_free(data); + xe_gt_sriov_err(gt, "Failed to save VF%u MMIO data (%pe)\n", vfid, ERR_PTR(ret)); return ret; } +static int pf_restore_vf_mmio_mig_data(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) +{ + int ret; + + pf_dump_mig_data(gt, vfid, data, "MMIO data restore"); + + ret = pf_migration_mmio_restore(gt, vfid, data->vaddr, data->hdr.size); + if (ret) { + xe_gt_sriov_err(gt, "Failed to restore VF%u MMIO data (%pe)\n", + vfid, ERR_PTR(ret)); + + return ret; + } + + return 0; +} + /** - * xe_gt_sriov_pf_migration_save_guc_state() - Take a GuC VF state snapshot. + * xe_gt_sriov_pf_migration_mmio_save() - Save VF MMIO migration data. * @gt: the &xe_gt - * @vfid: the VF identifier + * @vfid: the VF identifier (can't be 0) * * This function is for PF only. * * Return: 0 on success or a negative error code on failure. */ -int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid) +int xe_gt_sriov_pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid) { - int err; + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + return pf_save_vf_mmio_mig_data(gt, vfid); +} + +/** + * xe_gt_sriov_pf_migration_mmio_restore() - Restore VF MMIO migration data. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + * @data: the &xe_sriov_packet containing migration data + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) +{ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); xe_gt_assert(gt, vfid != PFID); xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); - if (!pf_migration_supported(gt)) - return -ENOPKG; + return pf_restore_vf_mmio_mig_data(gt, vfid, data); +} - mutex_lock(pf_migration_mutex(gt)); - err = pf_save_vf_guc_state(gt, vfid); - mutex_unlock(pf_migration_mutex(gt)); +static ssize_t pf_migration_vram_size(struct xe_gt *gt, unsigned int vfid) +{ + if (!xe_gt_is_main_type(gt)) + return 0; - return err; + return xe_gt_sriov_pf_config_get_lmem(gt, vfid); +} + +static struct dma_fence *__pf_save_restore_vram(struct xe_gt *gt, unsigned int vfid, + struct xe_bo *vram, u64 vram_offset, + struct xe_bo *sysmem, u64 sysmem_offset, + size_t size, bool save) +{ + struct dma_fence *ret = NULL; + struct drm_exec exec; + int err; + + drm_exec_init(&exec, 0, 0); + drm_exec_until_all_locked(&exec) { + err = drm_exec_lock_obj(&exec, &vram->ttm.base); + drm_exec_retry_on_contention(&exec); + if (err) { + ret = ERR_PTR(err); + goto err; + } + + err = drm_exec_lock_obj(&exec, &sysmem->ttm.base); + drm_exec_retry_on_contention(&exec); + if (err) { + ret = ERR_PTR(err); + goto err; + } + } + + ret = xe_migrate_vram_copy_chunk(vram, vram_offset, sysmem, sysmem_offset, size, + save ? XE_MIGRATE_COPY_TO_SRAM : XE_MIGRATE_COPY_TO_VRAM); + +err: + drm_exec_fini(&exec); + + return ret; } -static int pf_restore_vf_guc_state(struct xe_gt *gt, unsigned int vfid) +#define PF_VRAM_SAVE_RESTORE_TIMEOUT (5 * HZ) +static int pf_save_vram_chunk(struct xe_gt *gt, unsigned int vfid, + struct xe_bo *src_vram, u64 src_vram_offset, + size_t size) { - struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid); + struct xe_sriov_packet *data; + struct dma_fence *fence; int ret; - if (!snapshot->guc.size) - return -ENODATA; + data = xe_sriov_packet_alloc(gt_to_xe(gt)); + if (!data) + return -ENOMEM; - xe_gt_sriov_dbg_verbose(gt, "restoring %zu dwords of VF%u GuC state\n", - snapshot->guc.size / sizeof(u32), vfid); - ret = pf_send_guc_restore_vf_state(gt, vfid, snapshot->guc.buff, snapshot->guc.size); - if (ret < 0) + ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id, + XE_SRIOV_PACKET_TYPE_VRAM, src_vram_offset, + size); + if (ret) + goto fail; + + fence = __pf_save_restore_vram(gt, vfid, + src_vram, src_vram_offset, + data->bo, 0, size, true); + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + goto fail; + } + + ret = dma_fence_wait_timeout(fence, false, PF_VRAM_SAVE_RESTORE_TIMEOUT); + dma_fence_put(fence); + if (!ret) { + ret = -ETIME; + goto fail; + } + + pf_dump_mig_data(gt, vfid, data, "VRAM data save"); + + ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data); + if (ret) goto fail; - xe_gt_sriov_dbg_verbose(gt, "restored %d dwords of VF%u GuC state\n", ret, vfid); return 0; fail: - xe_gt_sriov_dbg(gt, "Failed to restore VF%u GuC state (%pe)\n", vfid, ERR_PTR(ret)); + xe_sriov_packet_free(data); + return ret; +} + +#define VF_VRAM_STATE_CHUNK_MAX_SIZE SZ_512M +static int pf_save_vf_vram_mig_data(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); + loff_t *offset = &migration->save.vram_offset; + struct xe_bo *vram; + size_t vram_size, chunk_size; + int ret; + + vram = xe_gt_sriov_pf_config_get_lmem_obj(gt, vfid); + if (!vram) + return -ENXIO; + + vram_size = xe_bo_size(vram); + + xe_gt_assert(gt, *offset < vram_size); + + chunk_size = min(vram_size - *offset, VF_VRAM_STATE_CHUNK_MAX_SIZE); + + ret = pf_save_vram_chunk(gt, vfid, vram, *offset, chunk_size); + if (ret) + goto fail; + + *offset += chunk_size; + + xe_bo_put(vram); + + if (*offset < vram_size) + return -EAGAIN; + + return 0; + +fail: + xe_bo_put(vram); + xe_gt_sriov_err(gt, "Failed to save VF%u VRAM data (%pe)\n", vfid, ERR_PTR(ret)); + return ret; +} + +static int pf_restore_vf_vram_mig_data(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) +{ + u64 end = data->hdr.offset + data->hdr.size; + struct dma_fence *fence; + struct xe_bo *vram; + size_t size; + int ret = 0; + + vram = xe_gt_sriov_pf_config_get_lmem_obj(gt, vfid); + if (!vram) + return -ENXIO; + + size = xe_bo_size(vram); + + if (end > size || end < data->hdr.size) { + ret = -EINVAL; + goto err; + } + + pf_dump_mig_data(gt, vfid, data, "VRAM data restore"); + + fence = __pf_save_restore_vram(gt, vfid, vram, data->hdr.offset, + data->bo, 0, data->hdr.size, false); + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + goto err; + } + + ret = dma_fence_wait_timeout(fence, false, PF_VRAM_SAVE_RESTORE_TIMEOUT); + dma_fence_put(fence); + if (!ret) { + ret = -ETIME; + goto err; + } + + xe_bo_put(vram); + + return 0; +err: + xe_bo_put(vram); + xe_gt_sriov_err(gt, "Failed to restore VF%u VRAM data (%pe)\n", vfid, ERR_PTR(ret)); return ret; } /** - * xe_gt_sriov_pf_migration_restore_guc_state() - Restore a GuC VF state. + * xe_gt_sriov_pf_migration_vram_save() - Save VF VRAM migration data. * @gt: the &xe_gt - * @vfid: the VF identifier + * @vfid: the VF identifier (can't be 0) * * This function is for PF only. * * Return: 0 on success or a negative error code on failure. */ -int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid) +int xe_gt_sriov_pf_migration_vram_save(struct xe_gt *gt, unsigned int vfid) { - int ret; - xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); xe_gt_assert(gt, vfid != PFID); xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); - if (!pf_migration_supported(gt)) - return -ENOPKG; + return pf_save_vf_vram_mig_data(gt, vfid); +} - mutex_lock(pf_migration_mutex(gt)); - ret = pf_restore_vf_guc_state(gt, vfid); - mutex_unlock(pf_migration_mutex(gt)); +/** + * xe_gt_sriov_pf_migration_vram_restore() - Restore VF VRAM migration data. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + * @data: the &xe_sriov_packet containing migration data + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_vram_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); - return ret; + return pf_restore_vf_vram_mig_data(gt, vfid, data); } -#ifdef CONFIG_DEBUG_FS /** - * xe_gt_sriov_pf_migration_read_guc_state() - Read a GuC VF state. + * xe_gt_sriov_pf_migration_size() - Total size of migration data from all components within a GT. * @gt: the &xe_gt - * @vfid: the VF identifier - * @buf: the user space buffer to read to - * @count: the maximum number of bytes to read - * @pos: the current position in the buffer + * @vfid: the VF identifier (can't be 0) * * This function is for PF only. * - * This function reads up to @count bytes from the saved VF GuC state buffer - * at offset @pos into the user space address starting at @buf. - * - * Return: the number of bytes read or a negative error code on failure. + * Return: total migration data size in bytes or a negative error code on failure. */ -ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid, - char __user *buf, size_t count, loff_t *pos) +ssize_t xe_gt_sriov_pf_migration_size(struct xe_gt *gt, unsigned int vfid) { - struct xe_gt_sriov_state_snapshot *snapshot; - ssize_t ret; + ssize_t total = 0; + ssize_t size; xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); xe_gt_assert(gt, vfid != PFID); xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); - if (!pf_migration_supported(gt)) - return -ENOPKG; + size = pf_migration_guc_size(gt, vfid); + if (size < 0) + return size; + if (size > 0) + size += sizeof(struct xe_sriov_packet_hdr); + total += size; + + size = pf_migration_ggtt_size(gt, vfid); + if (size < 0) + return size; + if (size > 0) + size += sizeof(struct xe_sriov_packet_hdr); + total += size; + + size = pf_migration_mmio_size(gt, vfid); + if (size < 0) + return size; + if (size > 0) + size += sizeof(struct xe_sriov_packet_hdr); + total += size; + + size = pf_migration_vram_size(gt, vfid); + if (size < 0) + return size; + if (size > 0) + size += sizeof(struct xe_sriov_packet_hdr); + total += size; + + return total; +} - mutex_lock(pf_migration_mutex(gt)); - snapshot = pf_pick_vf_snapshot(gt, vfid); - if (snapshot->guc.size) - ret = simple_read_from_buffer(buf, count, pos, snapshot->guc.buff, - snapshot->guc.size); - else - ret = -ENODATA; - mutex_unlock(pf_migration_mutex(gt)); +/** + * xe_gt_sriov_pf_migration_ring_empty() - Check if a migration ring is empty. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * Return: true if the ring is empty, otherwise false. + */ +bool xe_gt_sriov_pf_migration_ring_empty(struct xe_gt *gt, unsigned int vfid) +{ + return ptr_ring_empty(&pf_pick_gt_migration(gt, vfid)->ring); +} - return ret; +/** + * xe_gt_sriov_pf_migration_ring_full() - Check if a migration ring is full. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * Return: true if the ring is full, otherwise false. + */ +bool xe_gt_sriov_pf_migration_ring_full(struct xe_gt *gt, unsigned int vfid) +{ + return ptr_ring_full(&pf_pick_gt_migration(gt, vfid)->ring); +} + +/** + * xe_gt_sriov_pf_migration_ring_free() - Consume and free all data in migration ring + * @gt: the &xe_gt + * @vfid: the VF identifier + */ +void xe_gt_sriov_pf_migration_ring_free(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); + struct xe_sriov_packet *data; + + if (ptr_ring_empty(&migration->ring)) + return; + + xe_gt_sriov_notice(gt, "VF%u unprocessed migration data left in the ring!\n", vfid); + + while ((data = ptr_ring_consume(&migration->ring))) + xe_sriov_packet_free(data); +} + +static void pf_migration_save_data_todo(struct xe_gt *gt, unsigned int vfid, + enum xe_sriov_packet_type type) +{ + set_bit(type, &pf_pick_gt_migration(gt, vfid)->save.data_remaining); +} + +/** + * xe_gt_sriov_pf_migration_save_init() - Initialize per-GT migration related data. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + */ +void xe_gt_sriov_pf_migration_save_init(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); + + migration->save.data_remaining = 0; + migration->save.vram_offset = 0; + + xe_gt_assert(gt, pf_migration_guc_size(gt, vfid) > 0); + pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_GUC); + + if (pf_migration_ggtt_size(gt, vfid) > 0) + pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_GGTT); + + xe_gt_assert(gt, pf_migration_mmio_size(gt, vfid) > 0); + pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_MMIO); + + if (pf_migration_vram_size(gt, vfid) > 0) + pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_VRAM); } /** - * xe_gt_sriov_pf_migration_write_guc_state() - Write a GuC VF state. + * xe_gt_sriov_pf_migration_save_data_pending() - Check if migration data type needs to be saved. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + * @type: the &xe_sriov_packet_type of data to be checked + * + * Return: true if the data needs saving, otherwise false. + */ +bool xe_gt_sriov_pf_migration_save_data_pending(struct xe_gt *gt, unsigned int vfid, + enum xe_sriov_packet_type type) +{ + return test_bit(type, &pf_pick_gt_migration(gt, vfid)->save.data_remaining); +} + +/** + * xe_gt_sriov_pf_migration_save_data_complete() - Complete migration data type save. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + * @type: the &xe_sriov_packet_type to be marked as completed. + */ +void xe_gt_sriov_pf_migration_save_data_complete(struct xe_gt *gt, unsigned int vfid, + enum xe_sriov_packet_type type) +{ + clear_bit(type, &pf_pick_gt_migration(gt, vfid)->save.data_remaining); +} + +/** + * xe_gt_sriov_pf_migration_save_produce() - Add VF save data packet to migration ring. * @gt: the &xe_gt * @vfid: the VF identifier - * @buf: the user space buffer with GuC VF state - * @size: the size of GuC VF state (in bytes) + * @data: the &xe_sriov_packet * - * This function is for PF only. + * Called by the save migration data producer (PF SR-IOV Control worker) when + * processing migration data. + * Wakes up the save migration data consumer (userspace), that is potentially + * waiting for data when the ring was empty. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_save_produce(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) +{ + int ret; + + ret = ptr_ring_produce(&pf_pick_gt_migration(gt, vfid)->ring, data); + if (ret) + return ret; + + wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid)); + + return 0; +} + +/** + * xe_gt_sriov_pf_migration_restore_consume() - Get VF restore data packet from migration ring. + * @gt: the &xe_gt + * @vfid: the VF identifier * - * This function reads @size bytes of the VF GuC state stored at user space - * address @buf and writes it into a internal VF state buffer. + * Called by the restore migration data consumer (PF SR-IOV Control worker) when + * processing migration data. + * Wakes up the restore migration data producer (userspace), that is + * potentially waiting to add more data when the ring is full. * - * Return: the number of bytes used or a negative error code on failure. + * Return: Pointer to &xe_sriov_packet on success, + * NULL if ring is empty. */ -ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid, - const char __user *buf, size_t size) +struct xe_sriov_packet * +xe_gt_sriov_pf_migration_restore_consume(struct xe_gt *gt, unsigned int vfid) { - struct xe_gt_sriov_state_snapshot *snapshot; - loff_t pos = 0; - ssize_t ret; + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); + struct wait_queue_head *wq = xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid); + struct xe_sriov_packet *data; - xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); - xe_gt_assert(gt, vfid != PFID); - xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + data = ptr_ring_consume(&migration->ring); + if (data) + wake_up_all(wq); - if (!pf_migration_supported(gt)) - return -ENOPKG; + return data; +} - mutex_lock(pf_migration_mutex(gt)); - snapshot = pf_pick_vf_snapshot(gt, vfid); - ret = pf_alloc_guc_state(gt, snapshot, size); - if (!ret) { - ret = simple_write_to_buffer(snapshot->guc.buff, size, &pos, buf, size); - if (ret < 0) - pf_free_guc_state(gt, snapshot); - else - pf_dump_guc_state(gt, snapshot); +static bool pf_restore_data_ready(struct xe_gt *gt, unsigned int vfid) +{ + if (xe_gt_sriov_pf_control_check_restore_failed(gt, vfid) || + !ptr_ring_full(&pf_pick_gt_migration(gt, vfid)->ring)) + return true; + + return false; +} + +/** + * xe_gt_sriov_pf_migration_restore_produce() - Add VF restore data packet to migration ring. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @data: the &xe_sriov_packet + * + * Called by the restore migration data producer (userspace) when processing + * migration data. + * If the ring is full, waits until there is space. + * Queues the restore migration data consumer (PF SR-IOV Control worker), that + * is potentially waiting for data when the ring was empty. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_restore_produce(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) +{ + int ret; + + xe_gt_assert(gt, data->hdr.tile_id == gt->tile->id); + xe_gt_assert(gt, data->hdr.gt_id == gt->info.id); + + for (;;) { + if (xe_gt_sriov_pf_control_check_restore_failed(gt, vfid)) + return -EIO; + + ret = ptr_ring_produce(&pf_pick_gt_migration(gt, vfid)->ring, data); + if (!ret) + break; + + ret = wait_event_interruptible(*xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid), + pf_restore_data_ready(gt, vfid)); + if (ret) + return ret; } - mutex_unlock(pf_migration_mutex(gt)); - return ret; + return xe_gt_sriov_pf_control_process_restore_data(gt, vfid); } -#endif /* CONFIG_DEBUG_FS */ -static bool pf_check_migration_support(struct xe_gt *gt) +/** + * xe_gt_sriov_pf_migration_save_consume() - Get VF save data packet from migration ring. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * Called by the save migration data consumer (userspace) when + * processing migration data. + * Queues the save migration data producer (PF SR-IOV Control worker), that is + * potentially waiting to add more data when the ring is full. + * + * Return: Pointer to &xe_sriov_packet on success, + * NULL if ring is empty and there's no more data available, + * ERR_PTR(-EAGAIN) if the ring is empty, but data is still produced. + */ +struct xe_sriov_packet * +xe_gt_sriov_pf_migration_save_consume(struct xe_gt *gt, unsigned int vfid) { - /* GuC 70.25 with save/restore v2 is required */ - xe_gt_assert(gt, GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 25, 0)); + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); + struct xe_sriov_packet *data; + int ret; + + data = ptr_ring_consume(&migration->ring); + if (data) { + ret = xe_gt_sriov_pf_control_process_save_data(gt, vfid); + if (ret) { + xe_sriov_packet_free(data); + return ERR_PTR(ret); + } - /* XXX: for now this is for feature enabling only */ - return IS_ENABLED(CONFIG_DRM_XE_DEBUG); + return data; + } + + if (xe_gt_sriov_pf_control_check_save_data_done(gt, vfid)) + return NULL; + + if (xe_gt_sriov_pf_control_check_save_failed(gt, vfid)) + return ERR_PTR(-EIO); + + return ERR_PTR(-EAGAIN); +} + +static void destroy_pf_packet(void *ptr) +{ + struct xe_sriov_packet *data = ptr; + + xe_sriov_packet_free(data); +} + +static void action_ring_cleanup(void *arg) +{ + struct ptr_ring *r = arg; + + ptr_ring_cleanup(r, destroy_pf_packet); +} + +static void pf_gt_migration_check_support(struct xe_gt *gt) +{ + if (GUC_FIRMWARE_VER(>->uc.guc) < MAKE_GUC_VER(70, 54, 0)) + xe_sriov_pf_migration_disable(gt_to_xe(gt), "requires GuC version >= 70.54.0"); } /** @@ -402,18 +1041,29 @@ static bool pf_check_migration_support(struct xe_gt *gt) int xe_gt_sriov_pf_migration_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + unsigned int n, totalvfs; int err; xe_gt_assert(gt, IS_SRIOV_PF(xe)); - gt->sriov.pf.migration.supported = pf_check_migration_support(gt); + pf_gt_migration_check_support(gt); if (!pf_migration_supported(gt)) return 0; - err = drmm_mutex_init(&xe->drm, >->sriov.pf.migration.snapshot_lock); - if (err) - return err; + totalvfs = xe_sriov_pf_get_totalvfs(xe); + for (n = 1; n <= totalvfs; n++) { + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, n); + + err = ptr_ring_init(&migration->ring, + XE_GT_SRIOV_PF_MIGRATION_RING_SIZE, GFP_KERNEL); + if (err) + return err; + + err = devm_add_action_or_reset(xe->drm.dev, action_ring_cleanup, &migration->ring); + if (err) + return err; + } return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h index 09faeae00ddb..181207a637b9 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h @@ -9,16 +9,46 @@ #include <linux/types.h> struct xe_gt; +struct xe_sriov_packet; +enum xe_sriov_packet_type; + +/* TODO: get this information by querying GuC in the future */ +#define XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE SZ_8M int xe_gt_sriov_pf_migration_init(struct xe_gt *gt); -int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid); -int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid); - -#ifdef CONFIG_DEBUG_FS -ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid, - char __user *buf, size_t count, loff_t *pos); -ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid, - const char __user *buf, size_t count); -#endif +int xe_gt_sriov_pf_migration_guc_save(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_migration_guc_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data); +int xe_gt_sriov_pf_migration_ggtt_save(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_migration_ggtt_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data); +int xe_gt_sriov_pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data); +int xe_gt_sriov_pf_migration_vram_save(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_migration_vram_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data); + +ssize_t xe_gt_sriov_pf_migration_size(struct xe_gt *gt, unsigned int vfid); + +bool xe_gt_sriov_pf_migration_ring_empty(struct xe_gt *gt, unsigned int vfid); +bool xe_gt_sriov_pf_migration_ring_full(struct xe_gt *gt, unsigned int vfid); +void xe_gt_sriov_pf_migration_ring_free(struct xe_gt *gt, unsigned int vfid); + +void xe_gt_sriov_pf_migration_save_init(struct xe_gt *gt, unsigned int vfid); +bool xe_gt_sriov_pf_migration_save_data_pending(struct xe_gt *gt, unsigned int vfid, + enum xe_sriov_packet_type type); +void xe_gt_sriov_pf_migration_save_data_complete(struct xe_gt *gt, unsigned int vfid, + enum xe_sriov_packet_type type); + +int xe_gt_sriov_pf_migration_save_produce(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data); +struct xe_sriov_packet * +xe_gt_sriov_pf_migration_restore_consume(struct xe_gt *gt, unsigned int vfid); + +int xe_gt_sriov_pf_migration_restore_produce(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data); +struct xe_sriov_packet * +xe_gt_sriov_pf_migration_save_consume(struct xe_gt *gt, unsigned int vfid); #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h index 1f3110b6d44f..f50c64241e9c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h @@ -6,35 +6,23 @@ #ifndef _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_ #define _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_ -#include <linux/mutex.h> -#include <linux/types.h> +#include <linux/ptr_ring.h> /** - * struct xe_gt_sriov_state_snapshot - GT-level per-VF state snapshot data. + * struct xe_gt_sriov_migration_data - GT-level per-VF migration data. * * Used by the PF driver to maintain per-VF migration data. */ -struct xe_gt_sriov_state_snapshot { - /** @guc: GuC VF state snapshot */ +struct xe_gt_sriov_migration_data { + /** @ring: queue containing VF save / restore migration data */ + struct ptr_ring ring; + /** @save: structure for currently processed save migration data */ struct { - /** @guc.buff: buffer with the VF state */ - u32 *buff; - /** @guc.size: size of the buffer (must be dwords aligned) */ - u32 size; - } guc; -}; - -/** - * struct xe_gt_sriov_pf_migration - GT-level data. - * - * Used by the PF driver to maintain non-VF specific per-GT data. - */ -struct xe_gt_sriov_pf_migration { - /** @supported: indicates whether the feature is supported */ - bool supported; - - /** @snapshot_lock: protects all VFs snapshots */ - struct mutex snapshot_lock; + /** @save.data_remaining: bitmap of migration types that need to be saved */ + unsigned long data_remaining; + /** @save.vram_offset: last saved offset within VRAM, used for chunked VRAM save */ + loff_t vram_offset; + } save; }; #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index 821cfcc34e6b..2eb21610e5a0 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -19,91 +19,7 @@ #include "xe_gt_sriov_pf_service_types.h" #include "xe_guc_ct.h" #include "xe_guc_hxg_helpers.h" - -static void pf_init_versions(struct xe_gt *gt) -{ - BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR); - BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR); - - /* base versions may differ between platforms */ - gt->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR; - gt->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR; - - /* latest version is same for all platforms */ - gt->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR; - gt->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR; -} - -/* Return: 0 on success or a negative error code on failure. */ -static int pf_negotiate_version(struct xe_gt *gt, - u32 wanted_major, u32 wanted_minor, - u32 *major, u32 *minor) -{ - struct xe_gt_sriov_pf_service_version base = gt->sriov.pf.service.version.base; - struct xe_gt_sriov_pf_service_version latest = gt->sriov.pf.service.version.latest; - - xe_gt_assert(gt, base.major); - xe_gt_assert(gt, base.major <= latest.major); - xe_gt_assert(gt, (base.major < latest.major) || (base.minor <= latest.minor)); - - /* VF doesn't care - return our latest */ - if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY && - wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) { - *major = latest.major; - *minor = latest.minor; - return 0; - } - - /* VF wants newer than our - return our latest */ - if (wanted_major > latest.major) { - *major = latest.major; - *minor = latest.minor; - return 0; - } - - /* VF wants older than min required - reject */ - if (wanted_major < base.major || - (wanted_major == base.major && wanted_minor < base.minor)) { - return -EPERM; - } - - /* previous major - return wanted, as we should still support it */ - if (wanted_major < latest.major) { - /* XXX: we are not prepared for multi-versions yet */ - xe_gt_assert(gt, base.major == latest.major); - return -ENOPKG; - } - - /* same major - return common minor */ - *major = wanted_major; - *minor = min_t(u32, latest.minor, wanted_minor); - return 0; -} - -static void pf_connect(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - xe_gt_assert(gt, major || minor); - - gt->sriov.pf.vfs[vfid].version.major = major; - gt->sriov.pf.vfs[vfid].version.minor = minor; -} - -static void pf_disconnect(struct xe_gt *gt, u32 vfid) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - - gt->sriov.pf.vfs[vfid].version.major = 0; - gt->sriov.pf.vfs[vfid].version.minor = 0; -} - -static bool pf_is_negotiated(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - - return major == gt->sriov.pf.vfs[vfid].version.major && - minor <= gt->sriov.pf.vfs[vfid].version.minor; -} +#include "xe_sriov_pf_service.h" static const struct xe_reg tgl_runtime_regs[] = { RPM_CONFIG0, /* _MMIO(0x0d00) */ @@ -183,11 +99,30 @@ static const struct xe_reg ver_3000_runtime_regs[] = { HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ }; +static const struct xe_reg ver_35_runtime_regs[] = { + RPM_CONFIG0, /* _MMIO(0x0d00) */ + XEHP_FUSE4, /* _MMIO(0x9114) */ + MIRROR_FUSE3, /* _MMIO(0x9118) */ + MIRROR_L3BANK_ENABLE, /* _MMIO(0x9130) */ + XELP_EU_ENABLE, /* _MMIO(0x9134) */ + XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ + GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ + XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ + XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */ + XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */ + XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */ + SERVICE_COPY_ENABLE, /* _MMIO(0x9170) */ +}; + static const struct xe_reg *pick_runtime_regs(struct xe_device *xe, unsigned int *count) { const struct xe_reg *regs; - if (GRAPHICS_VERx100(xe) >= 3000) { + if (GRAPHICS_VER(xe) >= 35) { + *count = ARRAY_SIZE(ver_35_runtime_regs); + regs = ver_35_runtime_regs; + } else if (GRAPHICS_VERx100(xe) >= 3000) { *count = ARRAY_SIZE(ver_3000_runtime_regs); regs = ver_3000_runtime_regs; } else if (GRAPHICS_VERx100(xe) >= 2000) { @@ -266,7 +201,7 @@ static void pf_prepare_runtime_info(struct xe_gt *gt) read_many(gt, size, regs, values); if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); xe_gt_sriov_pf_service_print_runtime(gt, &p); } @@ -285,8 +220,6 @@ int xe_gt_sriov_pf_service_init(struct xe_gt *gt) { int err; - pf_init_versions(gt); - err = pf_alloc_runtime_info(gt); if (unlikely(err)) goto failed; @@ -311,47 +244,6 @@ void xe_gt_sriov_pf_service_update(struct xe_gt *gt) pf_prepare_runtime_info(gt); } -/** - * xe_gt_sriov_pf_service_reset - Reset a connection with the VF. - * @gt: the &xe_gt - * @vfid: the VF identifier - * - * Reset a VF driver negotiated VF/PF ABI version. - * After that point, the VF driver will have to perform new version handshake - * to continue use of the PF services again. - * - * This function can only be called on PF. - */ -void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid) -{ - pf_disconnect(gt, vfid); -} - -/* Return: 0 on success or a negative error code on failure. */ -static int pf_process_handshake(struct xe_gt *gt, u32 vfid, - u32 wanted_major, u32 wanted_minor, - u32 *major, u32 *minor) -{ - int err; - - xe_gt_sriov_dbg_verbose(gt, "VF%u wants ABI version %u.%u\n", - vfid, wanted_major, wanted_minor); - - err = pf_negotiate_version(gt, wanted_major, wanted_minor, major, minor); - - if (err < 0) { - xe_gt_sriov_notice(gt, "VF%u failed to negotiate ABI %u.%u (%pe)\n", - vfid, wanted_major, wanted_minor, ERR_PTR(err)); - pf_disconnect(gt, vfid); - } else { - xe_gt_sriov_dbg(gt, "VF%u negotiated ABI version %u.%u\n", - vfid, *major, *minor); - pf_connect(gt, vfid, *major, *minor); - } - - return 0; -} - /* Return: length of the response message or a negative error code on failure. */ static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin, const u32 *request, u32 len, u32 *response, u32 size) @@ -371,7 +263,8 @@ static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin, wanted_major = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR, request[1]); wanted_minor = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR, request[1]); - err = pf_process_handshake(gt, origin, wanted_major, wanted_minor, &major, &minor); + err = xe_sriov_pf_service_handshake_vf(gt_to_xe(gt), origin, wanted_major, wanted_minor, + &major, &minor); if (err < 0) return err; @@ -430,8 +323,10 @@ static int pf_process_runtime_query_msg(struct xe_gt *gt, u32 origin, u32 remaining = 0; int ret; - if (!pf_is_negotiated(gt, origin, 1, 0)) + /* this action is available from ABI 1.0 */ + if (!xe_sriov_pf_service_is_negotiated(gt_to_xe(gt), origin, 1, 0)) return -EACCES; + if (unlikely(msg_len > VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) return -EMSGSIZE; if (unlikely(msg_len < VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) @@ -528,33 +423,3 @@ int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p return 0; } - -/** - * xe_gt_sriov_pf_service_print_version - Print ABI versions negotiated with VFs. - * @gt: the &xe_gt - * @p: the &drm_printer - * - * This function is for PF use only. - */ -int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p) -{ - struct xe_device *xe = gt_to_xe(gt); - unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); - struct xe_gt_sriov_pf_service_version *version; - - xe_gt_assert(gt, IS_SRIOV_PF(xe)); - - for (n = 1; n <= total_vfs; n++) { - version = >->sriov.pf.vfs[n].version; - if (!version->major && !version->minor) - continue; - - drm_printf(p, "VF%u:\t%u.%u\n", n, version->major, version->minor); - } - - return 0; -} - -#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) -#include "tests/xe_gt_sriov_pf_service_test.c" -#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h index 56aaadf0360d..10b02c9b651c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h @@ -14,9 +14,7 @@ struct xe_gt; int xe_gt_sriov_pf_service_init(struct xe_gt *gt); void xe_gt_sriov_pf_service_update(struct xe_gt *gt); -void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid); -int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p); int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p); #ifdef CONFIG_PCI_IOV diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h index a64a6835ad65..667b8310478d 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h @@ -31,8 +31,8 @@ struct xe_gt_sriov_metadata { /** @version: negotiated VF/PF ABI version */ struct xe_gt_sriov_pf_service_version version; - /** @snapshot: snapshot of the VF state data */ - struct xe_gt_sriov_state_snapshot snapshot; + /** @migration: per-VF migration data. */ + struct xe_gt_sriov_migration_data migration; }; /** @@ -58,7 +58,6 @@ struct xe_gt_sriov_pf { struct xe_gt_sriov_pf_service service; struct xe_gt_sriov_pf_control control; struct xe_gt_sriov_pf_policy policy; - struct xe_gt_sriov_pf_migration migration; struct xe_gt_sriov_spare_config spare; struct xe_gt_sriov_metadata *vfs; }; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_printk.h b/drivers/gpu/drm/xe/xe_gt_sriov_printk.h index 17624b16300a..d3457d608db8 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_printk.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_printk.h @@ -7,10 +7,13 @@ #define _XE_GT_SRIOV_PRINTK_H_ #include "xe_gt_printk.h" -#include "xe_sriov_printk.h" +#include "xe_tile_sriov_printk.h" + +#define __XE_GT_SRIOV_PRINTK_FMT(_gt, _fmt, ...) \ + __XE_TILE_SRIOV_PRINTK_FMT((_gt)->tile, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__)) #define __xe_gt_sriov_printk(gt, _level, fmt, ...) \ - xe_gt_printk((gt), _level, "%s" fmt, xe_sriov_printk_prefix(gt_to_xe(gt)), ##__VA_ARGS__) + xe_sriov_##_level(gt_to_xe(gt), __XE_GT_SRIOV_PRINTK_FMT((gt), fmt, ##__VA_ARGS__)) #define xe_gt_sriov_err(_gt, _fmt, ...) \ __xe_gt_sriov_printk(_gt, err, _fmt, ##__VA_ARGS__) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index a439261bf4d7..033eae2d03d3 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -23,11 +23,19 @@ #include "xe_gt_sriov_vf.h" #include "xe_gt_sriov_vf_types.h" #include "xe_guc.h" +#include "xe_guc_ct.h" #include "xe_guc_hxg_helpers.h" #include "xe_guc_relay.h" +#include "xe_guc_submit.h" +#include "xe_irq.h" +#include "xe_lrc.h" +#include "xe_memirq.h" #include "xe_mmio.h" #include "xe_sriov.h" #include "xe_sriov_vf.h" +#include "xe_sriov_vf_ccs.h" +#include "xe_tile_sriov_vf.h" +#include "xe_tlb_inval.h" #include "xe_uc_fw.h" #include "xe_wopcm.h" @@ -82,17 +90,17 @@ int xe_gt_sriov_vf_reset(struct xe_gt *gt) } static int guc_action_match_version(struct xe_guc *guc, - u32 wanted_branch, u32 wanted_major, u32 wanted_minor, - u32 *branch, u32 *major, u32 *minor, u32 *patch) + struct xe_uc_fw_version *wanted, + struct xe_uc_fw_version *found) { u32 request[VF2GUC_MATCH_VERSION_REQUEST_MSG_LEN] = { FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_MATCH_VERSION), - FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH, wanted_branch) | - FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR, wanted_major) | - FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR, wanted_minor), + FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH, wanted->branch) | + FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR, wanted->major) | + FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR, wanted->minor), }; u32 response[GUC_MAX_MMIO_MSG_LEN]; int ret; @@ -106,120 +114,138 @@ static int guc_action_match_version(struct xe_guc *guc, if (unlikely(FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_0_MBZ, response[0]))) return -EPROTO; - *branch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH, response[1]); - *major = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR, response[1]); - *minor = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR, response[1]); - *patch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH, response[1]); + memset(found, 0, sizeof(struct xe_uc_fw_version)); + found->branch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH, response[1]); + found->major = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR, response[1]); + found->minor = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR, response[1]); + found->patch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH, response[1]); return 0; } -static void vf_minimum_guc_version(struct xe_gt *gt, u32 *branch, u32 *major, u32 *minor) +static int guc_action_match_version_any(struct xe_guc *guc, + struct xe_uc_fw_version *found) +{ + struct xe_uc_fw_version wanted = { + .branch = GUC_VERSION_BRANCH_ANY, + .major = GUC_VERSION_MAJOR_ANY, + .minor = GUC_VERSION_MINOR_ANY, + .patch = 0 + }; + + return guc_action_match_version(guc, &wanted, found); +} + +static void vf_minimum_guc_version(struct xe_gt *gt, struct xe_uc_fw_version *ver) { struct xe_device *xe = gt_to_xe(gt); + memset(ver, 0, sizeof(struct xe_uc_fw_version)); + switch (xe->info.platform) { case XE_TIGERLAKE ... XE_PVC: /* 1.1 this is current baseline for Xe driver */ - *branch = 0; - *major = 1; - *minor = 1; + ver->branch = 0; + ver->major = 1; + ver->minor = 1; break; default: /* 1.2 has support for the GMD_ID KLV */ - *branch = 0; - *major = 1; - *minor = 2; + ver->branch = 0; + ver->major = 1; + ver->minor = 2; break; } } -static void vf_wanted_guc_version(struct xe_gt *gt, u32 *branch, u32 *major, u32 *minor) +static void vf_wanted_guc_version(struct xe_gt *gt, struct xe_uc_fw_version *ver) { /* for now it's the same as minimum */ - return vf_minimum_guc_version(gt, branch, major, minor); + return vf_minimum_guc_version(gt, ver); } static int vf_handshake_with_guc(struct xe_gt *gt) { - struct xe_gt_sriov_vf_guc_version *guc_version = >->sriov.vf.guc_version; + struct xe_uc_fw_version *guc_version = >->sriov.vf.guc_version; + struct xe_uc_fw_version wanted = {0}; struct xe_guc *guc = >->uc.guc; - u32 wanted_branch, wanted_major, wanted_minor; - u32 branch, major, minor, patch; + bool old = false; int err; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); /* select wanted version - prefer previous (if any) */ if (guc_version->major || guc_version->minor) { - wanted_branch = guc_version->branch; - wanted_major = guc_version->major; - wanted_minor = guc_version->minor; + wanted = *guc_version; + old = true; } else { - vf_wanted_guc_version(gt, &wanted_branch, &wanted_major, &wanted_minor); - xe_gt_assert(gt, wanted_major != GUC_VERSION_MAJOR_ANY); + vf_wanted_guc_version(gt, &wanted); + xe_gt_assert(gt, wanted.major != GUC_VERSION_MAJOR_ANY); + + /* First time we handshake, so record the minimum wanted */ + gt->sriov.vf.wanted_guc_version = wanted; } - err = guc_action_match_version(guc, wanted_branch, wanted_major, wanted_minor, - &branch, &major, &minor, &patch); + err = guc_action_match_version(guc, &wanted, guc_version); if (unlikely(err)) goto fail; - /* we don't support interface version change */ - if ((guc_version->major || guc_version->minor) && - (guc_version->branch != branch || guc_version->major != major || - guc_version->minor != minor)) { - xe_gt_sriov_err(gt, "New GuC interface version detected: %u.%u.%u.%u\n", - branch, major, minor, patch); - xe_gt_sriov_info(gt, "Previously used version was: %u.%u.%u.%u\n", - guc_version->branch, guc_version->major, - guc_version->minor, guc_version->patch); - err = -EREMCHG; - goto fail; + if (old) { + /* we don't support interface version change */ + if (MAKE_GUC_VER_STRUCT(*guc_version) != MAKE_GUC_VER_STRUCT(wanted)) { + xe_gt_sriov_err(gt, "New GuC interface version detected: %u.%u.%u.%u\n", + guc_version->branch, guc_version->major, + guc_version->minor, guc_version->patch); + xe_gt_sriov_info(gt, "Previously used version was: %u.%u.%u.%u\n", + wanted.branch, wanted.major, + wanted.minor, wanted.patch); + err = -EREMCHG; + goto fail; + } else { + /* version is unchanged, no need to re-verify it */ + return 0; + } } /* illegal */ - if (major > wanted_major) { + if (guc_version->major > wanted.major) { err = -EPROTO; goto unsupported; } /* there's no fallback on major version. */ - if (major != wanted_major) { + if (guc_version->major != wanted.major) { err = -ENOPKG; goto unsupported; } /* check against minimum version supported by us */ - vf_minimum_guc_version(gt, &wanted_branch, &wanted_major, &wanted_minor); - xe_gt_assert(gt, major != GUC_VERSION_MAJOR_ANY); - if (major < wanted_major || (major == wanted_major && minor < wanted_minor)) { + vf_minimum_guc_version(gt, &wanted); + xe_gt_assert(gt, wanted.major != GUC_VERSION_MAJOR_ANY); + if (MAKE_GUC_VER_STRUCT(*guc_version) < MAKE_GUC_VER_STRUCT(wanted)) { err = -ENOKEY; goto unsupported; } xe_gt_sriov_dbg(gt, "using GuC interface version %u.%u.%u.%u\n", - branch, major, minor, patch); + guc_version->branch, guc_version->major, + guc_version->minor, guc_version->patch); - guc_version->branch = branch; - guc_version->major = major; - guc_version->minor = minor; - guc_version->patch = patch; return 0; unsupported: xe_gt_sriov_err(gt, "Unsupported GuC version %u.%u.%u.%u (%pe)\n", - branch, major, minor, patch, ERR_PTR(err)); + guc_version->branch, guc_version->major, + guc_version->minor, guc_version->patch, + ERR_PTR(err)); fail: xe_gt_sriov_err(gt, "Unable to confirm GuC version %u.%u (%pe)\n", - wanted_major, wanted_minor, ERR_PTR(err)); + wanted.major, wanted.minor, ERR_PTR(err)); /* try again with *any* just to query which version is supported */ - if (!guc_action_match_version(guc, GUC_VERSION_BRANCH_ANY, - GUC_VERSION_MAJOR_ANY, GUC_VERSION_MINOR_ANY, - &branch, &major, &minor, &patch)) + if (!guc_action_match_version_any(guc, &wanted)) xe_gt_sriov_notice(gt, "GuC reports interface version %u.%u.%u.%u\n", - branch, major, minor, patch); + wanted.branch, wanted.major, wanted.minor, wanted.patch); return err; } @@ -250,6 +276,29 @@ int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt) return 0; } +/** + * xe_gt_sriov_vf_guc_versions - Minimum required and found GuC ABI versions + * @gt: the &xe_gt + * @wanted: pointer to the xe_uc_fw_version to be filled with the wanted version + * @found: pointer to the xe_uc_fw_version to be filled with the found version + * + * This function is for VF use only and it can only be used after successful + * version handshake with the GuC. + */ +void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt, + struct xe_uc_fw_version *wanted, + struct xe_uc_fw_version *found) +{ + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_gt_assert(gt, gt->sriov.vf.guc_version.major); + + if (wanted) + *wanted = gt->sriov.vf.wanted_guc_version; + + if (found) + *found = gt->sriov.vf.guc_version; +} + static int guc_action_vf_notify_resfix_done(struct xe_guc *guc) { u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = { @@ -265,13 +314,13 @@ static int guc_action_vf_notify_resfix_done(struct xe_guc *guc) } /** - * xe_gt_sriov_vf_notify_resfix_done - Notify GuC about resource fixups apply completed. + * vf_notify_resfix_done - Notify GuC about resource fixups apply completed. * @gt: the &xe_gt struct instance linked to target GuC * * Returns: 0 if the operation completed successfully, or a negative error * code otherwise. */ -int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt) +static int vf_notify_resfix_done(struct xe_gt *gt) { struct xe_guc *guc = >->uc.guc; int err; @@ -391,13 +440,17 @@ u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt) static int vf_get_ggtt_info(struct xe_gt *gt) { - struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; + struct xe_tile *tile = gt_to_tile(gt); + struct xe_ggtt *ggtt = tile->mem.ggtt; struct xe_guc *guc = >->uc.guc; - u64 start, size; + u64 start, size, ggtt_size; + s64 shift; int err; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + guard(mutex)(&ggtt->lock); + err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_GGTT_START_KEY, &start); if (unlikely(err)) return err; @@ -406,27 +459,44 @@ static int vf_get_ggtt_info(struct xe_gt *gt) if (unlikely(err)) return err; - if (config->ggtt_size && config->ggtt_size != size) { + if (!size) + return -ENODATA; + + ggtt_size = xe_tile_sriov_vf_ggtt(tile); + if (ggtt_size && ggtt_size != size) { xe_gt_sriov_err(gt, "Unexpected GGTT reassignment: %lluK != %lluK\n", - size / SZ_1K, config->ggtt_size / SZ_1K); + size / SZ_1K, ggtt_size / SZ_1K); return -EREMCHG; } xe_gt_sriov_dbg_verbose(gt, "GGTT %#llx-%#llx = %lluK\n", start, start + size - 1, size / SZ_1K); - config->ggtt_base = start; - config->ggtt_size = size; + shift = start - (s64)xe_tile_sriov_vf_ggtt_base(tile); + xe_tile_sriov_vf_ggtt_base_store(tile, start); + xe_tile_sriov_vf_ggtt_store(tile, size); + + if (shift && shift != start) { + xe_gt_sriov_info(gt, "Shifting GGTT base by %lld to 0x%016llx\n", + shift, start); + xe_tile_sriov_vf_fixup_ggtt_nodes_locked(gt_to_tile(gt), shift); + } + + if (xe_sriov_vf_migration_supported(gt_to_xe(gt))) { + WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false); + smp_wmb(); /* Ensure above write visible before wake */ + wake_up_all(>->sriov.vf.migration.wq); + } - return config->ggtt_size ? 0 : -ENODATA; + return 0; } static int vf_get_lmem_info(struct xe_gt *gt) { - struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; + struct xe_tile *tile = gt_to_tile(gt); struct xe_guc *guc = >->uc.guc; char size_str[10]; - u64 size; + u64 size, lmem_size; int err; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); @@ -435,18 +505,19 @@ static int vf_get_lmem_info(struct xe_gt *gt) if (unlikely(err)) return err; - if (config->lmem_size && config->lmem_size != size) { + lmem_size = xe_tile_sriov_vf_lmem(tile); + if (lmem_size && lmem_size != size) { xe_gt_sriov_err(gt, "Unexpected LMEM reassignment: %lluM != %lluM\n", - size / SZ_1M, config->lmem_size / SZ_1M); + size / SZ_1M, lmem_size / SZ_1M); return -EREMCHG; } string_get_size(size, 1, STRING_UNITS_2, size_str, sizeof(size_str)); xe_gt_sriov_dbg_verbose(gt, "LMEM %lluM %s\n", size / SZ_1M, size_str); - config->lmem_size = size; + xe_tile_sriov_vf_lmem_store(tile, size); - return config->lmem_size ? 0 : -ENODATA; + return size ? 0 : -ENODATA; } static int vf_get_submission_cfg(struct xe_gt *gt) @@ -497,7 +568,9 @@ static void vf_cache_gmdid(struct xe_gt *gt) * xe_gt_sriov_vf_query_config - Query SR-IOV config data over MMIO. * @gt: the &xe_gt * - * This function is for VF use only. + * This function is for VF use only. This function may shift the GGTT and is + * performed under GGTT lock, making this step visible to all GTs that share a + * GGTT. * * Return: 0 on success or a negative error code on failure. */ @@ -510,7 +583,7 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt) if (unlikely(err)) return err; - if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { + if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) { err = vf_get_lmem_info(gt); if (unlikely(err)) return err; @@ -543,125 +616,6 @@ u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt) return gt->sriov.vf.self_config.num_ctxs; } -/** - * xe_gt_sriov_vf_lmem - VF LMEM configuration. - * @gt: the &xe_gt - * - * This function is for VF use only. - * - * Return: size of the LMEM assigned to VF. - */ -u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt) -{ - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - xe_gt_assert(gt, gt->sriov.vf.guc_version.major); - xe_gt_assert(gt, gt->sriov.vf.self_config.lmem_size); - - return gt->sriov.vf.self_config.lmem_size; -} - -static struct xe_ggtt_node * -vf_balloon_ggtt_node(struct xe_ggtt *ggtt, u64 start, u64 end) -{ - struct xe_ggtt_node *node; - int err; - - node = xe_ggtt_node_init(ggtt); - if (IS_ERR(node)) - return node; - - err = xe_ggtt_node_insert_balloon(node, start, end); - if (err) { - xe_ggtt_node_fini(node); - return ERR_PTR(err); - } - - return node; -} - -static int vf_balloon_ggtt(struct xe_gt *gt) -{ - struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; - struct xe_tile *tile = gt_to_tile(gt); - struct xe_ggtt *ggtt = tile->mem.ggtt; - struct xe_device *xe = gt_to_xe(gt); - u64 start, end; - - xe_gt_assert(gt, IS_SRIOV_VF(xe)); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); - - if (!config->ggtt_size) - return -ENODATA; - - /* - * VF can only use part of the GGTT as allocated by the PF: - * - * WOPCM GUC_GGTT_TOP - * |<------------ Total GGTT size ------------------>| - * - * VF GGTT base -->|<- size ->| - * - * +--------------------+----------+-----------------+ - * |////////////////////| block |\\\\\\\\\\\\\\\\\| - * +--------------------+----------+-----------------+ - * - * |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->| - */ - - start = xe_wopcm_size(xe); - end = config->ggtt_base; - if (end != start) { - tile->sriov.vf.ggtt_balloon[0] = vf_balloon_ggtt_node(ggtt, start, end); - if (IS_ERR(tile->sriov.vf.ggtt_balloon[0])) - return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]); - } - - start = config->ggtt_base + config->ggtt_size; - end = GUC_GGTT_TOP; - if (end != start) { - tile->sriov.vf.ggtt_balloon[1] = vf_balloon_ggtt_node(ggtt, start, end); - if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) { - xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); - return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]); - } - } - - return 0; -} - -static void deballoon_ggtt(struct drm_device *drm, void *arg) -{ - struct xe_tile *tile = arg; - - xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[1]); - xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); -} - -/** - * xe_gt_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration. - * @gt: the &xe_gt - * - * This function is for VF use only. - * - * Return: 0 on success or a negative error code on failure. - */ -int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt) -{ - struct xe_tile *tile = gt_to_tile(gt); - struct xe_device *xe = tile_to_xe(tile); - int err; - - if (xe_gt_is_media_type(gt)) - return 0; - - err = vf_balloon_ggtt(gt); - if (err) - return err; - - return drmm_add_action_or_reset(&xe->drm, deballoon_ggtt, tile); -} - static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor) { u32 request[VF2PF_HANDSHAKE_REQUEST_MSG_LEN] = { @@ -694,21 +648,22 @@ static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor) return 0; } -static void vf_connect_pf(struct xe_gt *gt, u16 major, u16 minor) +static void vf_connect_pf(struct xe_device *xe, u16 major, u16 minor) { - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_assert(xe, IS_SRIOV_VF(xe)); - gt->sriov.vf.pf_version.major = major; - gt->sriov.vf.pf_version.minor = minor; + xe->sriov.vf.pf_version.major = major; + xe->sriov.vf.pf_version.minor = minor; } -static void vf_disconnect_pf(struct xe_gt *gt) +static void vf_disconnect_pf(struct xe_device *xe) { - vf_connect_pf(gt, 0, 0); + vf_connect_pf(xe, 0, 0); } static int vf_handshake_with_pf(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); u32 major_wanted = GUC_RELAY_VERSION_LATEST_MAJOR; u32 minor_wanted = GUC_RELAY_VERSION_LATEST_MINOR; u32 major = major_wanted, minor = minor_wanted; @@ -724,13 +679,13 @@ static int vf_handshake_with_pf(struct xe_gt *gt) } xe_gt_sriov_dbg(gt, "using VF/PF ABI %u.%u\n", major, minor); - vf_connect_pf(gt, major, minor); + vf_connect_pf(xe, major, minor); return 0; failed: xe_gt_sriov_err(gt, "Unable to confirm VF/PF ABI version %u.%u (%pe)\n", major, minor, ERR_PTR(err)); - vf_disconnect_pf(gt); + vf_disconnect_pf(xe); return err; } @@ -758,6 +713,44 @@ failed: } /** + * xe_gt_sriov_vf_default_lrcs_hwsp_rebase - Update GGTT references in HWSP of default LRCs. + * @gt: the &xe_gt struct instance + */ +static void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) + xe_default_lrc_update_memirq_regs_with_address(hwe); +} + +static void vf_start_migration_recovery(struct xe_gt *gt) +{ + bool started; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + spin_lock(>->sriov.vf.migration.lock); + + if (!gt->sriov.vf.migration.recovery_queued && + !gt->sriov.vf.migration.recovery_teardown) { + gt->sriov.vf.migration.recovery_queued = true; + WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, true); + WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, true); + smp_wmb(); /* Ensure above writes visible before wake */ + + xe_guc_ct_wake_waiters(>->uc.guc.ct); + + started = queue_work(gt->ordered_wq, >->sriov.vf.migration.worker); + xe_gt_sriov_info(gt, "VF migration recovery %s\n", started ? + "scheduled" : "already in progress"); + } + + spin_unlock(>->sriov.vf.migration.lock); +} + +/** * xe_gt_sriov_vf_migrated_event_handler - Start a VF migration recovery, * or just mark that a GuC is ready for it. * @gt: the &xe_gt struct instance linked to target GuC @@ -769,24 +762,25 @@ void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); xe_gt_assert(gt, IS_SRIOV_VF(xe)); + xe_gt_assert(gt, xe_gt_sriov_vf_recovery_pending(gt)); - set_bit(gt->info.id, &xe->sriov.vf.migration.gt_flags); - /* - * We need to be certain that if all flags were set, at least one - * thread will notice that and schedule the recovery. - */ - smp_mb__after_atomic(); + if (!xe_sriov_vf_migration_supported(xe)) { + xe_gt_sriov_err(gt, "migration not supported\n"); + return; + } xe_gt_sriov_info(gt, "ready for recovery after migration\n"); - xe_sriov_vf_start_migration_recovery(xe); + vf_start_migration_recovery(gt); } static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor) { - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_VF(xe)); - return major == gt->sriov.vf.pf_version.major && - minor <= gt->sriov.vf.pf_version.minor; + return major == xe->sriov.vf.pf_version.major && + minor <= xe->sriov.vf.pf_version.minor; } static int vf_prepare_runtime_info(struct xe_gt *gt, unsigned int num_regs) @@ -974,7 +968,6 @@ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg) struct vf_runtime_reg *rr; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - xe_gt_assert(gt, gt->sriov.vf.pf_version.major); xe_gt_assert(gt, !reg.vf); if (reg.addr == GMD_ID.addr) { @@ -1032,20 +1025,25 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) { struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; struct xe_device *xe = gt_to_xe(gt); + u64 lmem_size; char buf[10]; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - drm_printf(p, "GGTT range:\t%#llx-%#llx\n", - config->ggtt_base, - config->ggtt_base + config->ggtt_size - 1); + if (xe_gt_is_main_type(gt)) { + u64 ggtt_size = xe_tile_sriov_vf_ggtt(gt_to_tile(gt)); + u64 ggtt_base = xe_tile_sriov_vf_ggtt_base(gt_to_tile(gt)); - string_get_size(config->ggtt_size, 1, STRING_UNITS_2, buf, sizeof(buf)); - drm_printf(p, "GGTT size:\t%llu (%s)\n", config->ggtt_size, buf); + drm_printf(p, "GGTT range:\t%#llx-%#llx\n", + ggtt_base, ggtt_base + ggtt_size - 1); + string_get_size(ggtt_size, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "GGTT size:\t%llu (%s)\n", ggtt_size, buf); - if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { - string_get_size(config->lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf)); - drm_printf(p, "LMEM size:\t%llu (%s)\n", config->lmem_size, buf); + if (IS_DGFX(xe)) { + lmem_size = xe_tile_sriov_vf_lmem(gt_to_tile(gt)); + string_get_size(lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "LMEM size:\t%llu (%s)\n", lmem_size, buf); + } } drm_printf(p, "GuC contexts:\t%u\n", config->num_ctxs); @@ -1079,19 +1077,21 @@ void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) */ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt_sriov_vf_guc_version *guc_version = >->sriov.vf.guc_version; - struct xe_gt_sriov_vf_relay_version *pf_version = >->sriov.vf.pf_version; - u32 branch, major, minor; + struct xe_device *xe = gt_to_xe(gt); + struct xe_uc_fw_version *guc_version = >->sriov.vf.guc_version; + struct xe_uc_fw_version *wanted = >->sriov.vf.wanted_guc_version; + struct xe_sriov_vf_relay_version *pf_version = &xe->sriov.vf.pf_version; + struct xe_uc_fw_version ver; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); drm_printf(p, "GuC ABI:\n"); - vf_minimum_guc_version(gt, &branch, &major, &minor); - drm_printf(p, "\tbase:\t%u.%u.%u.*\n", branch, major, minor); + vf_minimum_guc_version(gt, &ver); + drm_printf(p, "\tbase:\t%u.%u.%u.*\n", ver.branch, ver.major, ver.minor); - vf_wanted_guc_version(gt, &branch, &major, &minor); - drm_printf(p, "\twanted:\t%u.%u.%u.*\n", branch, major, minor); + drm_printf(p, "\twanted:\t%u.%u.%u.*\n", + wanted->branch, wanted->major, wanted->minor); drm_printf(p, "\thandshake:\t%u.%u.%u.%u\n", guc_version->branch, guc_version->major, @@ -1106,3 +1106,272 @@ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "\thandshake:\t%u.%u\n", pf_version->major, pf_version->minor); } + +static bool vf_post_migration_shutdown(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + /* + * On platforms where CCS must be restored by the primary GT, the media + * GT's VF post-migration recovery must run afterward. Detect this case + * and re-queue the media GT's restore work item if necessary. + */ + if (xe->info.needs_shared_vf_gt_wq && xe_gt_is_media_type(gt)) { + struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; + + if (xe_gt_sriov_vf_recovery_pending(primary_gt)) + return true; + } + + spin_lock_irq(>->sriov.vf.migration.lock); + gt->sriov.vf.migration.recovery_queued = false; + spin_unlock_irq(>->sriov.vf.migration.lock); + + xe_guc_ct_flush_and_stop(>->uc.guc.ct); + xe_guc_submit_pause(>->uc.guc); + xe_tlb_inval_reset(>->tlb_inval); + + return false; +} + +static size_t post_migration_scratch_size(struct xe_device *xe) +{ + return max(xe_lrc_reg_size(xe), LRC_WA_BB_SIZE); +} + +static int vf_post_migration_fixups(struct xe_gt *gt) +{ + void *buf = gt->sriov.vf.migration.scratch; + int err; + + /* xe_gt_sriov_vf_query_config will fixup the GGTT addresses */ + err = xe_gt_sriov_vf_query_config(gt); + if (err) + return err; + + if (xe_gt_is_main_type(gt)) + xe_sriov_vf_ccs_rebase(gt_to_xe(gt)); + + xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt); + err = xe_guc_contexts_hwsp_rebase(>->uc.guc, buf); + if (err) + return err; + + return 0; +} + +static void vf_post_migration_rearm(struct xe_gt *gt) +{ + xe_guc_ct_restart(>->uc.guc.ct); + xe_guc_submit_unpause_prepare(>->uc.guc); +} + +static void vf_post_migration_kickstart(struct xe_gt *gt) +{ + xe_guc_submit_unpause(>->uc.guc); +} + +static void vf_post_migration_abort(struct xe_gt *gt) +{ + spin_lock_irq(>->sriov.vf.migration.lock); + WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, false); + WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false); + spin_unlock_irq(>->sriov.vf.migration.lock); + + wake_up_all(>->sriov.vf.migration.wq); + + xe_guc_submit_pause_abort(>->uc.guc); +} + +static int vf_post_migration_notify_resfix_done(struct xe_gt *gt) +{ + bool skip_resfix = false; + + spin_lock_irq(>->sriov.vf.migration.lock); + if (gt->sriov.vf.migration.recovery_queued) { + skip_resfix = true; + xe_gt_sriov_dbg(gt, "another recovery imminent, resfix skipped\n"); + } else { + WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, false); + } + spin_unlock_irq(>->sriov.vf.migration.lock); + + if (skip_resfix) + return -EAGAIN; + + /* + * Make sure interrupts on the new HW are properly set. The GuC IRQ + * must be working at this point, since the recovery did started, + * but the rest was not enabled using the procedure from spec. + */ + xe_irq_resume(gt_to_xe(gt)); + + return vf_notify_resfix_done(gt); +} + +static void vf_post_migration_recovery(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + bool retry; + + xe_gt_sriov_dbg(gt, "migration recovery in progress\n"); + + retry = vf_post_migration_shutdown(gt); + if (retry) + goto queue; + + if (!xe_sriov_vf_migration_supported(xe)) { + xe_gt_sriov_err(gt, "migration is not supported\n"); + err = -ENOTRECOVERABLE; + goto fail; + } + + err = vf_post_migration_fixups(gt); + if (err) + goto fail; + + vf_post_migration_rearm(gt); + + err = vf_post_migration_notify_resfix_done(gt); + if (err && err != -EAGAIN) + goto fail; + + vf_post_migration_kickstart(gt); + + xe_gt_sriov_notice(gt, "migration recovery ended\n"); + return; +fail: + vf_post_migration_abort(gt); + xe_gt_sriov_err(gt, "migration recovery failed (%pe)\n", ERR_PTR(err)); + xe_device_declare_wedged(xe); + return; + +queue: + xe_gt_sriov_info(gt, "Re-queuing migration recovery\n"); + queue_work(gt->ordered_wq, >->sriov.vf.migration.worker); +} + +static void migration_worker_func(struct work_struct *w) +{ + struct xe_gt *gt = container_of(w, struct xe_gt, + sriov.vf.migration.worker); + + vf_post_migration_recovery(gt); +} + +static void vf_migration_fini(void *arg) +{ + struct xe_gt *gt = arg; + + spin_lock_irq(>->sriov.vf.migration.lock); + gt->sriov.vf.migration.recovery_teardown = true; + spin_unlock_irq(>->sriov.vf.migration.lock); + + cancel_work_sync(>->sriov.vf.migration.worker); +} + +/** + * xe_gt_sriov_vf_init_early() - GT VF init early + * @gt: the &xe_gt + * + * Return 0 on success, errno on failure + */ +int xe_gt_sriov_vf_init_early(struct xe_gt *gt) +{ + void *buf; + + if (!xe_sriov_vf_migration_supported(gt_to_xe(gt))) + return 0; + + buf = drmm_kmalloc(>_to_xe(gt)->drm, + post_migration_scratch_size(gt_to_xe(gt)), + GFP_KERNEL); + if (!buf) + return -ENOMEM; + + gt->sriov.vf.migration.scratch = buf; + spin_lock_init(>->sriov.vf.migration.lock); + INIT_WORK(>->sriov.vf.migration.worker, migration_worker_func); + init_waitqueue_head(>->sriov.vf.migration.wq); + + return 0; +} + +/** + * xe_gt_sriov_vf_init() - GT VF init + * @gt: the &xe_gt + * + * Return 0 on success, errno on failure + */ +int xe_gt_sriov_vf_init(struct xe_gt *gt) +{ + if (!xe_sriov_vf_migration_supported(gt_to_xe(gt))) + return 0; + + /* + * We want to tear down the VF post-migration early during driver + * unload; therefore, we add this finalization action later during + * driver load. + */ + return devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, + vf_migration_fini, gt); +} + +/** + * xe_gt_sriov_vf_recovery_pending() - VF post migration recovery pending + * @gt: the &xe_gt + * + * The return value of this function must be immediately visible upon vCPU + * unhalt and must persist until RESFIX_DONE is issued. This guarantee is + * currently implemented only for platforms that support memirq. If non-memirq + * platforms begin to support VF migration, this function will need to be + * updated accordingly. + * + * Return: True if VF post migration recovery is pending, False otherwise + */ +bool xe_gt_sriov_vf_recovery_pending(struct xe_gt *gt) +{ + struct xe_memirq *memirq = >_to_tile(gt)->memirq; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + /* early detection until recovery starts */ + if (xe_device_uses_memirq(gt_to_xe(gt)) && + xe_memirq_guc_sw_int_0_irq_pending(memirq, >->uc.guc)) + return true; + + return READ_ONCE(gt->sriov.vf.migration.recovery_inprogress); +} + +static bool vf_valid_ggtt(struct xe_gt *gt) +{ + struct xe_memirq *memirq = >_to_tile(gt)->memirq; + bool irq_pending = xe_device_uses_memirq(gt_to_xe(gt)) && + xe_memirq_guc_sw_int_0_irq_pending(memirq, >->uc.guc); + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + if (irq_pending || READ_ONCE(gt->sriov.vf.migration.ggtt_need_fixes)) + return false; + + return true; +} + +/** + * xe_gt_sriov_vf_wait_valid_ggtt() - VF wait for valid GGTT addresses + * @gt: the &xe_gt + */ +void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt) +{ + int ret; + + if (!IS_SRIOV_VF(gt_to_xe(gt)) || + !xe_sriov_vf_migration_supported(gt_to_xe(gt))) + return; + + ret = wait_event_interruptible_timeout(gt->sriov.vf.migration.wq, + vf_valid_ggtt(gt), + HZ * 5); + xe_gt_WARN_ON(gt, !ret); +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index ba6c5d74e326..af40276790fa 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -11,19 +11,26 @@ struct drm_printer; struct xe_gt; struct xe_reg; +struct xe_uc_fw_version; int xe_gt_sriov_vf_reset(struct xe_gt *gt); int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt); +void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt, + struct xe_uc_fw_version *wanted, + struct xe_uc_fw_version *found); int xe_gt_sriov_vf_query_config(struct xe_gt *gt); int xe_gt_sriov_vf_connect(struct xe_gt *gt); int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt); -int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt); -int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt); void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt); +int xe_gt_sriov_vf_init_early(struct xe_gt *gt); +int xe_gt_sriov_vf_init(struct xe_gt *gt); +bool xe_gt_sriov_vf_recovery_pending(struct xe_gt *gt); + u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt); u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt); u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt); + u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg); void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val); @@ -31,4 +38,6 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p); void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p); void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p); +void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt); + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index a57f13b5afcd..420b0e6089de 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -7,41 +7,14 @@ #define _XE_GT_SRIOV_VF_TYPES_H_ #include <linux/types.h> - -/** - * struct xe_gt_sriov_vf_guc_version - GuC ABI version details. - */ -struct xe_gt_sriov_vf_guc_version { - /** @branch: branch version. */ - u8 branch; - /** @major: major version. */ - u8 major; - /** @minor: minor version. */ - u8 minor; - /** @patch: patch version. */ - u8 patch; -}; - -/** - * struct xe_gt_sriov_vf_relay_version - PF ABI version details. - */ -struct xe_gt_sriov_vf_relay_version { - /** @major: major version. */ - u16 major; - /** @minor: minor version. */ - u16 minor; -}; +#include <linux/wait.h> +#include <linux/workqueue.h> +#include "xe_uc_fw_types.h" /** * struct xe_gt_sriov_vf_selfconfig - VF configuration data. */ struct xe_gt_sriov_vf_selfconfig { - /** @ggtt_base: assigned base offset of the GGTT region. */ - u64 ggtt_base; - /** @ggtt_size: assigned size of the GGTT region. */ - u64 ggtt_size; - /** @lmem_size: assigned size of the LMEM. */ - u64 lmem_size; /** @num_ctxs: assigned number of GuC submission context IDs. */ u16 num_ctxs; /** @num_dbs: assigned number of GuC doorbells IDs. */ @@ -68,17 +41,41 @@ struct xe_gt_sriov_vf_runtime { }; /** + * xe_gt_sriov_vf_migration - VF migration data. + */ +struct xe_gt_sriov_vf_migration { + /** @migration: VF migration recovery worker */ + struct work_struct worker; + /** @lock: Protects recovery_queued, teardown */ + spinlock_t lock; + /** @wq: wait queue for migration fixes */ + wait_queue_head_t wq; + /** @scratch: Scratch memory for VF recovery */ + void *scratch; + /** @recovery_teardown: VF post migration recovery is being torn down */ + bool recovery_teardown; + /** @recovery_queued: VF post migration recovery in queued */ + bool recovery_queued; + /** @recovery_inprogress: VF post migration recovery in progress */ + bool recovery_inprogress; + /** @ggtt_need_fixes: VF GGTT needs fixes */ + bool ggtt_need_fixes; +}; + +/** * struct xe_gt_sriov_vf - GT level VF virtualization data. */ struct xe_gt_sriov_vf { + /** @wanted_guc_version: minimum wanted GuC ABI version. */ + struct xe_uc_fw_version wanted_guc_version; /** @guc_version: negotiated GuC ABI version. */ - struct xe_gt_sriov_vf_guc_version guc_version; + struct xe_uc_fw_version guc_version; /** @self_config: resource configurations. */ struct xe_gt_sriov_vf_selfconfig self_config; - /** @pf_version: negotiated VF/PF ABI version. */ - struct xe_gt_sriov_vf_relay_version pf_version; /** @runtime: runtime data retrieved from the PF. */ struct xe_gt_sriov_vf_runtime runtime; + /** @migration: migration data for the VF. */ + struct xe_gt_sriov_vf_migration migration; }; #endif diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index 30f942671c2b..5f74706bab81 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -26,11 +26,46 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) atomic64_add(incr, >->stats.counters[id]); } +#define DEF_STAT_STR(ID, name) [XE_GT_STATS_ID_##ID] = name + static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { - "svm_pagefault_count", - "tlb_inval_count", - "vma_pagefault_count", - "vma_pagefault_kb", + DEF_STAT_STR(SVM_PAGEFAULT_COUNT, "svm_pagefault_count"), + DEF_STAT_STR(TLB_INVAL, "tlb_inval_count"), + DEF_STAT_STR(SVM_TLB_INVAL_COUNT, "svm_tlb_inval_count"), + DEF_STAT_STR(SVM_TLB_INVAL_US, "svm_tlb_inval_us"), + DEF_STAT_STR(VMA_PAGEFAULT_COUNT, "vma_pagefault_count"), + DEF_STAT_STR(VMA_PAGEFAULT_KB, "vma_pagefault_kb"), + DEF_STAT_STR(SVM_4K_PAGEFAULT_COUNT, "svm_4K_pagefault_count"), + DEF_STAT_STR(SVM_64K_PAGEFAULT_COUNT, "svm_64K_pagefault_count"), + DEF_STAT_STR(SVM_2M_PAGEFAULT_COUNT, "svm_2M_pagefault_count"), + DEF_STAT_STR(SVM_4K_VALID_PAGEFAULT_COUNT, "svm_4K_valid_pagefault_count"), + DEF_STAT_STR(SVM_64K_VALID_PAGEFAULT_COUNT, "svm_64K_valid_pagefault_count"), + DEF_STAT_STR(SVM_2M_VALID_PAGEFAULT_COUNT, "svm_2M_valid_pagefault_count"), + DEF_STAT_STR(SVM_4K_PAGEFAULT_US, "svm_4K_pagefault_us"), + DEF_STAT_STR(SVM_64K_PAGEFAULT_US, "svm_64K_pagefault_us"), + DEF_STAT_STR(SVM_2M_PAGEFAULT_US, "svm_2M_pagefault_us"), + DEF_STAT_STR(SVM_4K_MIGRATE_COUNT, "svm_4K_migrate_count"), + DEF_STAT_STR(SVM_64K_MIGRATE_COUNT, "svm_64K_migrate_count"), + DEF_STAT_STR(SVM_2M_MIGRATE_COUNT, "svm_2M_migrate_count"), + DEF_STAT_STR(SVM_4K_MIGRATE_US, "svm_4K_migrate_us"), + DEF_STAT_STR(SVM_64K_MIGRATE_US, "svm_64K_migrate_us"), + DEF_STAT_STR(SVM_2M_MIGRATE_US, "svm_2M_migrate_us"), + DEF_STAT_STR(SVM_DEVICE_COPY_US, "svm_device_copy_us"), + DEF_STAT_STR(SVM_4K_DEVICE_COPY_US, "svm_4K_device_copy_us"), + DEF_STAT_STR(SVM_64K_DEVICE_COPY_US, "svm_64K_device_copy_us"), + DEF_STAT_STR(SVM_2M_DEVICE_COPY_US, "svm_2M_device_copy_us"), + DEF_STAT_STR(SVM_CPU_COPY_US, "svm_cpu_copy_us"), + DEF_STAT_STR(SVM_4K_CPU_COPY_US, "svm_4K_cpu_copy_us"), + DEF_STAT_STR(SVM_64K_CPU_COPY_US, "svm_64K_cpu_copy_us"), + DEF_STAT_STR(SVM_2M_CPU_COPY_US, "svm_2M_cpu_copy_us"), + DEF_STAT_STR(SVM_DEVICE_COPY_KB, "svm_device_copy_kb"), + DEF_STAT_STR(SVM_CPU_COPY_KB, "svm_cpu_copy_kb"), + DEF_STAT_STR(SVM_4K_GET_PAGES_US, "svm_4K_get_pages_us"), + DEF_STAT_STR(SVM_64K_GET_PAGES_US, "svm_64K_get_pages_us"), + DEF_STAT_STR(SVM_2M_GET_PAGES_US, "svm_2M_get_pages_us"), + DEF_STAT_STR(SVM_4K_BIND_US, "svm_4K_bind_us"), + DEF_STAT_STR(SVM_64K_BIND_US, "svm_64K_bind_us"), + DEF_STAT_STR(SVM_2M_BIND_US, "svm_2M_bind_us"), }; /** @@ -50,3 +85,17 @@ int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p) return 0; } + +/** + * xe_gt_stats_clear - Clear the GT stats + * @gt: GT structure + * + * This clear (zeros) all the available GT stats. + */ +void xe_gt_stats_clear(struct xe_gt *gt) +{ + int id; + + for (id = 0; id < ARRAY_SIZE(gt->stats.counters); ++id) + atomic64_set(>->stats.counters[id], 0); +} diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h index 38325ef53617..e8aea32bc971 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.h +++ b/drivers/gpu/drm/xe/xe_gt_stats.h @@ -13,6 +13,7 @@ struct drm_printer; #ifdef CONFIG_DEBUG_FS int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p); +void xe_gt_stats_clear(struct xe_gt *gt); void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr); #else static inline void diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h index be3244d7133c..d8348a8de2e1 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats_types.h +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h @@ -9,8 +9,41 @@ enum xe_gt_stats_id { XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, XE_GT_STATS_ID_TLB_INVAL, + XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT, + XE_GT_STATS_ID_SVM_TLB_INVAL_US, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, + XE_GT_STATS_ID_SVM_4K_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_64K_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_2M_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_4K_VALID_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_64K_VALID_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_2M_VALID_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_4K_PAGEFAULT_US, + XE_GT_STATS_ID_SVM_64K_PAGEFAULT_US, + XE_GT_STATS_ID_SVM_2M_PAGEFAULT_US, + XE_GT_STATS_ID_SVM_4K_MIGRATE_COUNT, + XE_GT_STATS_ID_SVM_64K_MIGRATE_COUNT, + XE_GT_STATS_ID_SVM_2M_MIGRATE_COUNT, + XE_GT_STATS_ID_SVM_4K_MIGRATE_US, + XE_GT_STATS_ID_SVM_64K_MIGRATE_US, + XE_GT_STATS_ID_SVM_2M_MIGRATE_US, + XE_GT_STATS_ID_SVM_DEVICE_COPY_US, + XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US, + XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US, + XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US, + XE_GT_STATS_ID_SVM_CPU_COPY_US, + XE_GT_STATS_ID_SVM_4K_CPU_COPY_US, + XE_GT_STATS_ID_SVM_64K_CPU_COPY_US, + XE_GT_STATS_ID_SVM_2M_CPU_COPY_US, + XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, + XE_GT_STATS_ID_SVM_CPU_COPY_KB, + XE_GT_STATS_ID_SVM_4K_GET_PAGES_US, + XE_GT_STATS_ID_SVM_64K_GET_PAGES_US, + XE_GT_STATS_ID_SVM_2M_GET_PAGES_US, + XE_GT_STATS_ID_SVM_4K_BIND_US, + XE_GT_STATS_ID_SVM_64K_BIND_US, + XE_GT_STATS_ID_SVM_2M_BIND_US, /* must be the last entry */ __XE_GT_STATS_NUM_IDS, }; diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c index aa962c783cdf..01477fc7b37b 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle.c +++ b/drivers/gpu/drm/xe/xe_gt_throttle.c @@ -8,221 +8,222 @@ #include <regs/xe_gt_regs.h> #include "xe_device.h" #include "xe_gt.h" -#include "xe_gt_printk.h" #include "xe_gt_sysfs.h" #include "xe_gt_throttle.h" #include "xe_mmio.h" +#include "xe_platform_types.h" #include "xe_pm.h" /** * DOC: Xe GT Throttle * - * Provides sysfs entries and other helpers for frequency throttle reasons in GT + * The GT frequency may be throttled by hardware/firmware for various reasons + * that are provided through attributes under the ``freq0/throttle/`` directory. + * Their availability depend on the platform and some may not be visible if that + * reason is not available. * - * device/gt#/freq0/throttle/status - Overall status - * device/gt#/freq0/throttle/reason_pl1 - Frequency throttle due to PL1 - * device/gt#/freq0/throttle/reason_pl2 - Frequency throttle due to PL2 - * device/gt#/freq0/throttle/reason_pl4 - Frequency throttle due to PL4, Iccmax etc. - * device/gt#/freq0/throttle/reason_thermal - Frequency throttle due to thermal - * device/gt#/freq0/throttle/reason_prochot - Frequency throttle due to prochot - * device/gt#/freq0/throttle/reason_ratl - Frequency throttle due to RATL - * device/gt#/freq0/throttle/reason_vr_thermalert - Frequency throttle due to VR THERMALERT - * device/gt#/freq0/throttle/reason_vr_tdc - Frequency throttle due to VR TDC + * The ``reasons`` attribute can be used by sysadmin to monitor all possible + * reasons for throttling and report them. It's preferred over monitoring + * ``status`` and then reading the reason from individual attributes since that + * is racy. If there's no throttling happening, "none" is returned. + * + * The following attributes are available on Crescent Island platform: + * + * - ``status``: Overall throttle status (0: no throttling, 1: throttling) + * - ``reasons``: Array of reasons causing throttling separated by space + * - ``reason_pl1``: package PL1 + * - ``reason_pl2``: package PL2 + * - ``reason_pl4``: package PL4 + * - ``reason_prochot``: prochot + * - ``reason_soc_thermal``: SoC thermal + * - ``reason_mem_thermal``: Memory thermal + * - ``reason_vr_thermal``: VR thermal + * - ``reason_iccmax``: ICCMAX + * - ``reason_ratl``: RATL thermal algorithm + * - ``reason_soc_avg_thermal``: SoC average temp + * - ``reason_fastvmode``: VR is hitting FastVMode + * - ``reason_psys_pl1``: PSYS PL1 + * - ``reason_psys_pl2``: PSYS PL2 + * - ``reason_p0_freq``: P0 frequency + * - ``reason_psys_crit``: PSYS critical + * + * Other platforms support the following reasons: + * + * - ``status``: Overall throttle status (0: no throttling, 1: throttling) + * - ``reasons``: Array of reasons causing throttling separated by space + * - ``reason_pl1``: package PL1 + * - ``reason_pl2``: package PL2 + * - ``reason_pl4``: package PL4, Iccmax etc. + * - ``reason_thermal``: thermal + * - ``reason_prochot``: prochot + * - ``reason_ratl``: RATL hermal algorithm + * - ``reason_vr_thermalert``: VR THERMALERT + * - ``reason_vr_tdc``: VR TDC */ -static struct xe_gt * -dev_to_gt(struct device *dev) -{ - return kobj_to_gt(dev->kobj.parent); -} - -u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt) -{ - u32 reg; - - xe_pm_runtime_get(gt_to_xe(gt)); - if (xe_gt_is_media_type(gt)) - reg = xe_mmio_read32(>->mmio, MTL_MEDIA_PERF_LIMIT_REASONS); - else - reg = xe_mmio_read32(>->mmio, GT0_PERF_LIMIT_REASONS); - xe_pm_runtime_put(gt_to_xe(gt)); - - return reg; -} - -static u32 read_status(struct xe_gt *gt) -{ - u32 status = xe_gt_throttle_get_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK; - - xe_gt_dbg(gt, "throttle reasons: 0x%08x\n", status); - return status; -} +struct throttle_attribute { + struct kobj_attribute attr; + u32 mask; +}; -static u32 read_reason_pl1(struct xe_gt *gt) +static struct xe_gt *dev_to_gt(struct device *dev) { - u32 pl1 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_1_MASK; - - return pl1; + return kobj_to_gt(dev->kobj.parent); } -static u32 read_reason_pl2(struct xe_gt *gt) +static struct xe_gt *throttle_to_gt(struct kobject *kobj) { - u32 pl2 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_2_MASK; - - return pl2; + return dev_to_gt(kobj_to_dev(kobj)); } -static u32 read_reason_pl4(struct xe_gt *gt) +static struct throttle_attribute *kobj_attribute_to_throttle(struct kobj_attribute *attr) { - u32 pl4 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_4_MASK; - - return pl4; + return container_of(attr, struct throttle_attribute, attr); } -static u32 read_reason_thermal(struct xe_gt *gt) -{ - u32 thermal = xe_gt_throttle_get_limit_reasons(gt) & THERMAL_LIMIT_MASK; - - return thermal; -} - -static u32 read_reason_prochot(struct xe_gt *gt) +u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt) { - u32 prochot = xe_gt_throttle_get_limit_reasons(gt) & PROCHOT_MASK; - - return prochot; -} + struct xe_device *xe = gt_to_xe(gt); + struct xe_reg reg; + u32 val, mask; -static u32 read_reason_ratl(struct xe_gt *gt) -{ - u32 ratl = xe_gt_throttle_get_limit_reasons(gt) & RATL_MASK; + if (xe_gt_is_media_type(gt)) + reg = MTL_MEDIA_PERF_LIMIT_REASONS; + else + reg = GT0_PERF_LIMIT_REASONS; - return ratl; -} + if (xe->info.platform == XE_CRESCENTISLAND) + mask = CRI_PERF_LIMIT_REASONS_MASK; + else + mask = GT0_PERF_LIMIT_REASONS_MASK; -static u32 read_reason_vr_thermalert(struct xe_gt *gt) -{ - u32 thermalert = xe_gt_throttle_get_limit_reasons(gt) & VR_THERMALERT_MASK; + xe_pm_runtime_get(xe); + val = xe_mmio_read32(>->mmio, reg) & mask; + xe_pm_runtime_put(xe); - return thermalert; + return val; } -static u32 read_reason_vr_tdc(struct xe_gt *gt) +static bool is_throttled_by(struct xe_gt *gt, u32 mask) { - u32 tdc = xe_gt_throttle_get_limit_reasons(gt) & VR_TDC_MASK; - - return tdc; + return xe_gt_throttle_get_limit_reasons(gt) & mask; } -static ssize_t status_show(struct kobject *kobj, +static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr, char *buff) { - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool status = !!read_status(gt); + struct throttle_attribute *ta = kobj_attribute_to_throttle(attr); + struct xe_gt *gt = throttle_to_gt(kobj); - return sysfs_emit(buff, "%u\n", status); + return sysfs_emit(buff, "%u\n", is_throttled_by(gt, ta->mask)); } -static struct kobj_attribute attr_status = __ATTR_RO(status); -static ssize_t reason_pl1_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool pl1 = !!read_reason_pl1(gt); +static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe); - return sysfs_emit(buff, "%u\n", pl1); -} -static struct kobj_attribute attr_reason_pl1 = __ATTR_RO(reason_pl1); - -static ssize_t reason_pl2_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) +static ssize_t reasons_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool pl2 = !!read_reason_pl2(gt); + struct xe_gt *gt = throttle_to_gt(kobj); + struct xe_device *xe = gt_to_xe(gt); + const struct attribute_group *group; + struct attribute **pother; + ssize_t ret = 0; + u32 reasons; - return sysfs_emit(buff, "%u\n", pl2); -} -static struct kobj_attribute attr_reason_pl2 = __ATTR_RO(reason_pl2); + reasons = xe_gt_throttle_get_limit_reasons(gt); + if (!reasons) + goto ret_none; -static ssize_t reason_pl4_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool pl4 = !!read_reason_pl4(gt); + group = get_platform_throttle_group(xe); + for (pother = group->attrs; *pother; pother++) { + struct kobj_attribute *kattr = container_of(*pother, struct kobj_attribute, attr); + struct throttle_attribute *other_ta = kobj_attribute_to_throttle(kattr); - return sysfs_emit(buff, "%u\n", pl4); -} -static struct kobj_attribute attr_reason_pl4 = __ATTR_RO(reason_pl4); + if (other_ta->mask != U32_MAX && reasons & other_ta->mask) + ret += sysfs_emit_at(buff, ret, "%s ", (*pother)->name + strlen("reason_")); + } -static ssize_t reason_thermal_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool thermal = !!read_reason_thermal(gt); + if (drm_WARN_ONCE(&xe->drm, !ret, "Unknown reason: %#x\n", reasons)) + goto ret_none; - return sysfs_emit(buff, "%u\n", thermal); -} -static struct kobj_attribute attr_reason_thermal = __ATTR_RO(reason_thermal); + /* Drop extra space from last iteration above */ + ret--; + ret += sysfs_emit_at(buff, ret, "\n"); -static ssize_t reason_prochot_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool prochot = !!read_reason_prochot(gt); + return ret; - return sysfs_emit(buff, "%u\n", prochot); +ret_none: + return sysfs_emit(buff, "none\n"); } -static struct kobj_attribute attr_reason_prochot = __ATTR_RO(reason_prochot); -static ssize_t reason_ratl_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool ratl = !!read_reason_ratl(gt); - - return sysfs_emit(buff, "%u\n", ratl); -} -static struct kobj_attribute attr_reason_ratl = __ATTR_RO(reason_ratl); - -static ssize_t reason_vr_thermalert_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool thermalert = !!read_reason_vr_thermalert(gt); - - return sysfs_emit(buff, "%u\n", thermalert); -} -static struct kobj_attribute attr_reason_vr_thermalert = __ATTR_RO(reason_vr_thermalert); - -static ssize_t reason_vr_tdc_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool tdc = !!read_reason_vr_tdc(gt); - - return sysfs_emit(buff, "%u\n", tdc); -} -static struct kobj_attribute attr_reason_vr_tdc = __ATTR_RO(reason_vr_tdc); +#define THROTTLE_ATTR_RO(name, _mask) \ + struct throttle_attribute attr_##name = { \ + .attr = __ATTR(name, 0444, reason_show, NULL), \ + .mask = _mask, \ + } + +#define THROTTLE_ATTR_RO_FUNC(name, _mask, _show) \ + struct throttle_attribute attr_##name = { \ + .attr = __ATTR(name, 0444, _show, NULL), \ + .mask = _mask, \ + } + +static THROTTLE_ATTR_RO_FUNC(reasons, 0, reasons_show); +static THROTTLE_ATTR_RO(status, U32_MAX); +static THROTTLE_ATTR_RO(reason_pl1, POWER_LIMIT_1_MASK); +static THROTTLE_ATTR_RO(reason_pl2, POWER_LIMIT_2_MASK); +static THROTTLE_ATTR_RO(reason_pl4, POWER_LIMIT_4_MASK); +static THROTTLE_ATTR_RO(reason_thermal, THERMAL_LIMIT_MASK); +static THROTTLE_ATTR_RO(reason_prochot, PROCHOT_MASK); +static THROTTLE_ATTR_RO(reason_ratl, RATL_MASK); +static THROTTLE_ATTR_RO(reason_vr_thermalert, VR_THERMALERT_MASK); +static THROTTLE_ATTR_RO(reason_vr_tdc, VR_TDC_MASK); static struct attribute *throttle_attrs[] = { - &attr_status.attr, - &attr_reason_pl1.attr, - &attr_reason_pl2.attr, - &attr_reason_pl4.attr, - &attr_reason_thermal.attr, - &attr_reason_prochot.attr, - &attr_reason_ratl.attr, - &attr_reason_vr_thermalert.attr, - &attr_reason_vr_tdc.attr, + &attr_reasons.attr.attr, + &attr_status.attr.attr, + &attr_reason_pl1.attr.attr, + &attr_reason_pl2.attr.attr, + &attr_reason_pl4.attr.attr, + &attr_reason_thermal.attr.attr, + &attr_reason_prochot.attr.attr, + &attr_reason_ratl.attr.attr, + &attr_reason_vr_thermalert.attr.attr, + &attr_reason_vr_tdc.attr.attr, + NULL +}; + +static THROTTLE_ATTR_RO(reason_vr_thermal, VR_THERMAL_MASK); +static THROTTLE_ATTR_RO(reason_soc_thermal, SOC_THERMAL_LIMIT_MASK); +static THROTTLE_ATTR_RO(reason_mem_thermal, MEM_THERMAL_MASK); +static THROTTLE_ATTR_RO(reason_iccmax, ICCMAX_MASK); +static THROTTLE_ATTR_RO(reason_soc_avg_thermal, SOC_AVG_THERMAL_MASK); +static THROTTLE_ATTR_RO(reason_fastvmode, FASTVMODE_MASK); +static THROTTLE_ATTR_RO(reason_psys_pl1, PSYS_PL1_MASK); +static THROTTLE_ATTR_RO(reason_psys_pl2, PSYS_PL2_MASK); +static THROTTLE_ATTR_RO(reason_p0_freq, P0_FREQ_MASK); +static THROTTLE_ATTR_RO(reason_psys_crit, PSYS_CRIT_MASK); + +static struct attribute *cri_throttle_attrs[] = { + /* Common */ + &attr_reasons.attr.attr, + &attr_status.attr.attr, + &attr_reason_pl1.attr.attr, + &attr_reason_pl2.attr.attr, + &attr_reason_pl4.attr.attr, + &attr_reason_prochot.attr.attr, + &attr_reason_ratl.attr.attr, + /* CRI */ + &attr_reason_vr_thermal.attr.attr, + &attr_reason_soc_thermal.attr.attr, + &attr_reason_mem_thermal.attr.attr, + &attr_reason_iccmax.attr.attr, + &attr_reason_soc_avg_thermal.attr.attr, + &attr_reason_fastvmode.attr.attr, + &attr_reason_psys_pl1.attr.attr, + &attr_reason_psys_pl2.attr.attr, + &attr_reason_p0_freq.attr.attr, + &attr_reason_psys_crit.attr.attr, NULL }; @@ -231,19 +232,37 @@ static const struct attribute_group throttle_group_attrs = { .attrs = throttle_attrs, }; +static const struct attribute_group cri_throttle_group_attrs = { + .name = "throttle", + .attrs = cri_throttle_attrs, +}; + +static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe) +{ + switch (xe->info.platform) { + case XE_CRESCENTISLAND: + return &cri_throttle_group_attrs; + default: + return &throttle_group_attrs; + } +} + static void gt_throttle_sysfs_fini(void *arg) { struct xe_gt *gt = arg; + struct xe_device *xe = gt_to_xe(gt); + const struct attribute_group *group = get_platform_throttle_group(xe); - sysfs_remove_group(gt->freq, &throttle_group_attrs); + sysfs_remove_group(gt->freq, group); } int xe_gt_throttle_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + const struct attribute_group *group = get_platform_throttle_group(xe); int err; - err = sysfs_create_group(gt->freq, &throttle_group_attrs); + err = sysfs_create_group(gt->freq, group); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c deleted file mode 100644 index 084cbdeba8ea..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ /dev/null @@ -1,578 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2023 Intel Corporation - */ - -#include "xe_gt_tlb_invalidation.h" - -#include "abi/guc_actions_abi.h" -#include "xe_device.h" -#include "xe_force_wake.h" -#include "xe_gt.h" -#include "xe_gt_printk.h" -#include "xe_guc.h" -#include "xe_guc_ct.h" -#include "xe_gt_stats.h" -#include "xe_mmio.h" -#include "xe_pm.h" -#include "xe_sriov.h" -#include "xe_trace.h" -#include "regs/xe_guc_regs.h" - -#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS - -/* - * TLB inval depends on pending commands in the CT queue and then the real - * invalidation time. Double up the time to process full CT queue - * just to be on the safe side. - */ -static long tlb_timeout_jiffies(struct xe_gt *gt) -{ - /* this reflects what HW/GuC needs to process TLB inv request */ - const long hw_tlb_timeout = HZ / 4; - - /* this estimates actual delay caused by the CTB transport */ - long delay = xe_guc_ct_queue_proc_time_jiffies(>->uc.guc.ct); - - return hw_tlb_timeout + 2 * delay; -} - -static void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence) -{ - if (WARN_ON_ONCE(!fence->gt)) - return; - - xe_pm_runtime_put(gt_to_xe(fence->gt)); - fence->gt = NULL; /* fini() should be called once */ -} - -static void -__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); - - trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); - xe_gt_tlb_invalidation_fence_fini(fence); - dma_fence_signal(&fence->base); - if (!stack) - dma_fence_put(&fence->base); -} - -static void -invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - list_del(&fence->link); - __invalidation_fence_signal(xe, fence); -} - -void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) -{ - if (WARN_ON_ONCE(!fence->gt)) - return; - - __invalidation_fence_signal(gt_to_xe(fence->gt), fence); -} - -static void xe_gt_tlb_fence_timeout(struct work_struct *work) -{ - struct xe_gt *gt = container_of(work, struct xe_gt, - tlb_invalidation.fence_tdr.work); - struct xe_device *xe = gt_to_xe(gt); - struct xe_gt_tlb_invalidation_fence *fence, *next; - - LNL_FLUSH_WORK(>->uc.guc.ct.g2h_worker); - - spin_lock_irq(>->tlb_invalidation.pending_lock); - list_for_each_entry_safe(fence, next, - >->tlb_invalidation.pending_fences, link) { - s64 since_inval_ms = ktime_ms_delta(ktime_get(), - fence->invalidation_time); - - if (msecs_to_jiffies(since_inval_ms) < tlb_timeout_jiffies(gt)) - break; - - trace_xe_gt_tlb_invalidation_fence_timeout(xe, fence); - xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", - fence->seqno, gt->tlb_invalidation.seqno_recv); - - fence->base.error = -ETIME; - invalidation_fence_signal(xe, fence); - } - if (!list_empty(>->tlb_invalidation.pending_fences)) - queue_delayed_work(system_wq, - >->tlb_invalidation.fence_tdr, - tlb_timeout_jiffies(gt)); - spin_unlock_irq(>->tlb_invalidation.pending_lock); -} - -/** - * xe_gt_tlb_invalidation_init_early - Initialize GT TLB invalidation state - * @gt: GT structure - * - * Initialize GT TLB invalidation state, purely software initialization, should - * be called once during driver load. - * - * Return: 0 on success, negative error code on error. - */ -int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt) -{ - gt->tlb_invalidation.seqno = 1; - INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); - spin_lock_init(>->tlb_invalidation.pending_lock); - spin_lock_init(>->tlb_invalidation.lock); - INIT_DELAYED_WORK(>->tlb_invalidation.fence_tdr, - xe_gt_tlb_fence_timeout); - - return 0; -} - -/** - * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset - * @gt: GT structure - * - * Signal any pending invalidation fences, should be called during a GT reset - */ -void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) -{ - struct xe_gt_tlb_invalidation_fence *fence, *next; - int pending_seqno; - - /* - * CT channel is already disabled at this point. No new TLB requests can - * appear. - */ - - mutex_lock(>->uc.guc.ct.lock); - spin_lock_irq(>->tlb_invalidation.pending_lock); - cancel_delayed_work(>->tlb_invalidation.fence_tdr); - /* - * We might have various kworkers waiting for TLB flushes to complete - * which are not tracked with an explicit TLB fence, however at this - * stage that will never happen since the CT is already disabled, so - * make sure we signal them here under the assumption that we have - * completed a full GT reset. - */ - if (gt->tlb_invalidation.seqno == 1) - pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1; - else - pending_seqno = gt->tlb_invalidation.seqno - 1; - WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno); - - list_for_each_entry_safe(fence, next, - >->tlb_invalidation.pending_fences, link) - invalidation_fence_signal(gt_to_xe(gt), fence); - spin_unlock_irq(>->tlb_invalidation.pending_lock); - mutex_unlock(>->uc.guc.ct.lock); -} - -static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) -{ - int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv); - - if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2)) - return false; - - if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) - return true; - - return seqno_recv >= seqno; -} - -static int send_tlb_invalidation(struct xe_guc *guc, - struct xe_gt_tlb_invalidation_fence *fence, - u32 *action, int len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = gt_to_xe(gt); - int seqno; - int ret; - - xe_gt_assert(gt, fence); - - /* - * XXX: The seqno algorithm relies on TLB invalidation being processed - * in order which they currently are, if that changes the algorithm will - * need to be updated. - */ - - mutex_lock(&guc->ct.lock); - seqno = gt->tlb_invalidation.seqno; - fence->seqno = seqno; - trace_xe_gt_tlb_invalidation_fence_send(xe, fence); - action[1] = seqno; - ret = xe_guc_ct_send_locked(&guc->ct, action, len, - G2H_LEN_DW_TLB_INVALIDATE, 1); - if (!ret) { - spin_lock_irq(>->tlb_invalidation.pending_lock); - /* - * We haven't actually published the TLB fence as per - * pending_fences, but in theory our seqno could have already - * been written as we acquired the pending_lock. In such a case - * we can just go ahead and signal the fence here. - */ - if (tlb_invalidation_seqno_past(gt, seqno)) { - __invalidation_fence_signal(xe, fence); - } else { - fence->invalidation_time = ktime_get(); - list_add_tail(&fence->link, - >->tlb_invalidation.pending_fences); - - if (list_is_singular(>->tlb_invalidation.pending_fences)) - queue_delayed_work(system_wq, - >->tlb_invalidation.fence_tdr, - tlb_timeout_jiffies(gt)); - } - spin_unlock_irq(>->tlb_invalidation.pending_lock); - } else { - __invalidation_fence_signal(xe, fence); - } - if (!ret) { - gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) % - TLB_INVALIDATION_SEQNO_MAX; - if (!gt->tlb_invalidation.seqno) - gt->tlb_invalidation.seqno = 1; - } - mutex_unlock(&guc->ct.lock); - xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1); - - return ret; -} - -#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ - XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ - XE_GUC_TLB_INVAL_FLUSH_CACHE) - -/** - * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC - * @gt: GT structure - * @fence: invalidation fence which will be signal on TLB invalidation - * completion - * - * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and - * caller can use the invalidation fence to wait for completion. - * - * Return: 0 on success, negative error code on error - */ -static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence) -{ - u32 action[] = { - XE_GUC_ACTION_TLB_INVALIDATION, - 0, /* seqno, replaced in send_tlb_invalidation */ - MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), - }; - int ret; - - ret = send_tlb_invalidation(>->uc.guc, fence, action, - ARRAY_SIZE(action)); - /* - * -ECANCELED indicates the CT is stopped for a GT reset. TLB caches - * should be nuked on a GT reset so this error can be ignored. - */ - if (ret == -ECANCELED) - return 0; - - return ret; -} - -/** - * xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT - * @gt: GT structure - * - * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is - * synchronous. - * - * Return: 0 on success, negative error code on error - */ -int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); - unsigned int fw_ref; - - if (xe_guc_ct_enabled(>->uc.guc.ct) && - gt->uc.guc.submission_state.enabled) { - struct xe_gt_tlb_invalidation_fence fence; - int ret; - - xe_gt_tlb_invalidation_fence_init(gt, &fence, true); - ret = xe_gt_tlb_invalidation_guc(gt, &fence); - if (ret) - return ret; - - xe_gt_tlb_invalidation_fence_wait(&fence); - } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { - struct xe_mmio *mmio = >->mmio; - - if (IS_SRIOV_VF(xe)) - return 0; - - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { - xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1, - PVC_GUC_TLB_INV_DESC1_INVALIDATE); - xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0, - PVC_GUC_TLB_INV_DESC0_VALID); - } else { - xe_mmio_write32(mmio, GUC_TLB_INV_CR, - GUC_TLB_INV_CR_INVALIDATE); - } - xe_force_wake_put(gt_to_fw(gt), fw_ref); - } - - return 0; -} - -/* - * Ensure that roundup_pow_of_two(length) doesn't overflow. - * Note that roundup_pow_of_two() operates on unsigned long, - * not on u64. - */ -#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) - -/** - * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an - * address range - * - * @gt: GT structure - * @fence: invalidation fence which will be signal on TLB invalidation - * completion - * @start: start address - * @end: end address - * @asid: address space id - * - * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can use - * the invalidation fence to wait for completion. - * - * Return: Negative error code on error, 0 on success - */ -int xe_gt_tlb_invalidation_range(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - u64 start, u64 end, u32 asid) -{ - struct xe_device *xe = gt_to_xe(gt); -#define MAX_TLB_INVALIDATION_LEN 7 - u32 action[MAX_TLB_INVALIDATION_LEN]; - u64 length = end - start; - int len = 0; - - xe_gt_assert(gt, fence); - - /* Execlists not supported */ - if (gt_to_xe(gt)->info.force_execlist) { - __invalidation_fence_signal(xe, fence); - return 0; - } - - action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; - action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ - if (!xe->info.has_range_tlb_invalidation || - length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { - action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); - } else { - u64 orig_start = start; - u64 align; - - if (length < SZ_4K) - length = SZ_4K; - - /* - * We need to invalidate a higher granularity if start address - * is not aligned to length. When start is not aligned with - * length we need to find the length large enough to create an - * address mask covering the required range. - */ - align = roundup_pow_of_two(length); - start = ALIGN_DOWN(start, align); - end = ALIGN(end, align); - length = align; - while (start + length < end) { - length <<= 1; - start = ALIGN_DOWN(orig_start, length); - } - - /* - * Minimum invalidation size for a 2MB page that the hardware - * expects is 16MB - */ - if (length >= SZ_2M) { - length = max_t(u64, SZ_16M, length); - start = ALIGN_DOWN(orig_start, length); - } - - xe_gt_assert(gt, length >= SZ_4K); - xe_gt_assert(gt, is_power_of_2(length)); - xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, - ilog2(SZ_2M) + 1))); - xe_gt_assert(gt, IS_ALIGNED(start, length)); - - action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); - action[len++] = asid; - action[len++] = lower_32_bits(start); - action[len++] = upper_32_bits(start); - action[len++] = ilog2(length) - ilog2(SZ_4K); - } - - xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); - - return send_tlb_invalidation(>->uc.guc, fence, action, len); -} - -/** - * xe_gt_tlb_invalidation_vm - Issue a TLB invalidation on this GT for a VM - * @gt: graphics tile - * @vm: VM to invalidate - * - * Invalidate entire VM's address space - */ -void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm) -{ - struct xe_gt_tlb_invalidation_fence fence; - u64 range = 1ull << vm->xe->info.va_bits; - int ret; - - xe_gt_tlb_invalidation_fence_init(gt, &fence, true); - - ret = xe_gt_tlb_invalidation_range(gt, &fence, 0, range, vm->usm.asid); - if (ret < 0) - return; - - xe_gt_tlb_invalidation_fence_wait(&fence); -} - -/** - * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA - * @gt: GT structure - * @fence: invalidation fence which will be signal on TLB invalidation - * completion, can be NULL - * @vma: VMA to invalidate - * - * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can use - * the invalidation fence to wait for completion. - * - * Return: Negative error code on error, 0 on success - */ -int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - struct xe_vma *vma) -{ - xe_gt_assert(gt, vma); - - return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); -} - -/** - * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler - * @guc: guc - * @msg: message indicating TLB invalidation done - * @len: length of message - * - * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any - * invalidation fences for seqno. Algorithm for this depends on seqno being - * received in-order and asserts this assumption. - * - * Return: 0 on success, -EPROTO for malformed messages. - */ -int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = gt_to_xe(gt); - struct xe_gt_tlb_invalidation_fence *fence, *next; - unsigned long flags; - - if (unlikely(len != 1)) - return -EPROTO; - - /* - * This can also be run both directly from the IRQ handler and also in - * process_g2h_msg(). Only one may process any individual CT message, - * however the order they are processed here could result in skipping a - * seqno. To handle that we just process all the seqnos from the last - * seqno_recv up to and including the one in msg[0]. The delta should be - * very small so there shouldn't be much of pending_fences we actually - * need to iterate over here. - * - * From GuC POV we expect the seqnos to always appear in-order, so if we - * see something later in the timeline we can be sure that anything - * appearing earlier has already signalled, just that we have yet to - * officially process the CT message like if racing against - * process_g2h_msg(). - */ - spin_lock_irqsave(>->tlb_invalidation.pending_lock, flags); - if (tlb_invalidation_seqno_past(gt, msg[0])) { - spin_unlock_irqrestore(>->tlb_invalidation.pending_lock, flags); - return 0; - } - - WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]); - - list_for_each_entry_safe(fence, next, - >->tlb_invalidation.pending_fences, link) { - trace_xe_gt_tlb_invalidation_fence_recv(xe, fence); - - if (!tlb_invalidation_seqno_past(gt, fence->seqno)) - break; - - invalidation_fence_signal(xe, fence); - } - - if (!list_empty(>->tlb_invalidation.pending_fences)) - mod_delayed_work(system_wq, - >->tlb_invalidation.fence_tdr, - tlb_timeout_jiffies(gt)); - else - cancel_delayed_work(>->tlb_invalidation.fence_tdr); - - spin_unlock_irqrestore(>->tlb_invalidation.pending_lock, flags); - - return 0; -} - -static const char * -invalidation_fence_get_driver_name(struct dma_fence *dma_fence) -{ - return "xe"; -} - -static const char * -invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) -{ - return "invalidation_fence"; -} - -static const struct dma_fence_ops invalidation_fence_ops = { - .get_driver_name = invalidation_fence_get_driver_name, - .get_timeline_name = invalidation_fence_get_timeline_name, -}; - -/** - * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence - * @gt: GT - * @fence: TLB invalidation fence to initialize - * @stack: fence is stack variable - * - * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini - * will be automatically called when fence is signalled (all fences must signal), - * even on error. - */ -void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - bool stack) -{ - xe_pm_runtime_get_noresume(gt_to_xe(gt)); - - spin_lock_irq(>->tlb_invalidation.lock); - dma_fence_init(&fence->base, &invalidation_fence_ops, - >->tlb_invalidation.lock, - dma_fence_context_alloc(1), 1); - spin_unlock_irq(>->tlb_invalidation.lock); - INIT_LIST_HEAD(&fence->link); - if (stack) - set_bit(FENCE_STACK_BIT, &fence->base.flags); - else - dma_fence_get(&fence->base); - fence->gt = gt; -} diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h deleted file mode 100644 index abe9b03d543e..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ /dev/null @@ -1,42 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_GT_TLB_INVALIDATION_H_ -#define _XE_GT_TLB_INVALIDATION_H_ - -#include <linux/types.h> - -#include "xe_gt_tlb_invalidation_types.h" - -struct xe_gt; -struct xe_guc; -struct xe_vm; -struct xe_vma; - -int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt); - -void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); -int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); -int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - struct xe_vma *vma); -void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm); -int xe_gt_tlb_invalidation_range(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - u64 start, u64 end, u32 asid); -int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); - -void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - bool stack); -void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence); - -static inline void -xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) -{ - dma_fence_wait(&fence->base, false); -} - -#endif /* _XE_GT_TLB_INVALIDATION_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h deleted file mode 100644 index de6e825e0851..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_GT_TLB_INVALIDATION_TYPES_H_ -#define _XE_GT_TLB_INVALIDATION_TYPES_H_ - -#include <linux/dma-fence.h> - -struct xe_gt; - -/** - * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence - * - * Optionally passed to xe_gt_tlb_invalidation and will be signaled upon TLB - * invalidation completion. - */ -struct xe_gt_tlb_invalidation_fence { - /** @base: dma fence base */ - struct dma_fence base; - /** @gt: GT which fence belong to */ - struct xe_gt *gt; - /** @link: link into list of pending tlb fences */ - struct list_head link; - /** @seqno: seqno of TLB invalidation to signal fence one */ - int seqno; - /** @invalidation_time: time of TLB invalidation */ - ktime_t invalidation_time; -}; - -#endif diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 516c81e3b8dd..bd5260221d8d 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -12,23 +12,21 @@ #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_gt.h" +#include "xe_gt_mcr.h" +#include "xe_gt_printk.h" #include "xe_mmio.h" #include "xe_wa.h" -static void -load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) +static void load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, + const struct xe_reg regs[]) { - va_list argp; u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {}; int i; - if (drm_WARN_ON(>_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS)) - numregs = XE_MAX_DSS_FUSE_REGS; + xe_gt_assert(gt, numregs <= ARRAY_SIZE(fuse_val)); - va_start(argp, numregs); for (i = 0; i < numregs; i++) - fuse_val[i] = xe_mmio_read32(>->mmio, va_arg(argp, struct xe_reg)); - va_end(argp); + fuse_val[i] = xe_mmio_read32(>->mmio, regs[i]); bitmap_from_arr32(mask, fuse_val, numregs * 32); } @@ -125,6 +123,21 @@ gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst, } } +bool xe_gt_topology_report_l3(struct xe_gt *gt) +{ + /* + * No known userspace needs/uses the L3 bank mask reported by + * the media GT, and the hardware itself is known to report bogus + * values on several platforms. Only report L3 bank mask as part + * of the media GT's topology on pre-Xe3 platforms since that's + * already part of our ABI. + */ + if (xe_gt_is_media_type(gt) && MEDIA_VER(gt_to_xe(gt)) >= 30) + return false; + + return true; +} + static void load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) { @@ -132,19 +145,14 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) struct xe_mmio *mmio = >->mmio; u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3); - /* - * PTL platforms with media version 30.00 do not provide proper values - * for the media GT's L3 bank registers. Skip the readout since we - * don't have any way to obtain real values. - * - * This may get re-described as an official workaround in the future, - * but there's no tracking number assigned yet so we use a custom - * OOB workaround descriptor. - */ - if (XE_WA(gt, no_media_l3)) + if (!xe_gt_topology_report_l3(gt)) return; - if (GRAPHICS_VER(xe) >= 30) { + if (GRAPHICS_VER(xe) >= 35) { + u32 fuse_val = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE); + + bitmap_from_arr32(l3_bank_mask, &fuse_val, 32); + } else if (GRAPHICS_VER(xe) >= 30) { xe_l3_bank_mask_t per_node = {}; u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3); u32 mirror_l3bank_enable = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE); @@ -218,9 +226,19 @@ get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs) void xe_gt_topology_init(struct xe_gt *gt) { + static const struct xe_reg geometry_regs[] = { + XELP_GT_GEOMETRY_DSS_ENABLE, + XE2_GT_GEOMETRY_DSS_1, + XE2_GT_GEOMETRY_DSS_2, + }; + static const struct xe_reg compute_regs[] = { + XEHP_GT_COMPUTE_DSS_ENABLE, + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, + XE2_GT_COMPUTE_DSS_2, + }; + int num_geometry_regs, num_compute_regs; struct xe_device *xe = gt_to_xe(gt); struct drm_printer p; - int num_geometry_regs, num_compute_regs; get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs); @@ -228,23 +246,18 @@ xe_gt_topology_init(struct xe_gt *gt) * Register counts returned shouldn't exceed the number of registers * passed as parameters below. */ - drm_WARN_ON(&xe->drm, num_geometry_regs > 3); - drm_WARN_ON(&xe->drm, num_compute_regs > 3); + xe_gt_assert(gt, num_geometry_regs <= ARRAY_SIZE(geometry_regs)); + xe_gt_assert(gt, num_compute_regs <= ARRAY_SIZE(compute_regs)); load_dss_mask(gt, gt->fuse_topo.g_dss_mask, - num_geometry_regs, - XELP_GT_GEOMETRY_DSS_ENABLE, - XE2_GT_GEOMETRY_DSS_1, - XE2_GT_GEOMETRY_DSS_2); - load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs, - XEHP_GT_COMPUTE_DSS_ENABLE, - XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, - XE2_GT_COMPUTE_DSS_2); + num_geometry_regs, geometry_regs); + load_dss_mask(gt, gt->fuse_topo.c_dss_mask, + num_compute_regs, compute_regs); + load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, >->fuse_topo.eu_type); load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask); - p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology"); - + p = xe_gt_dbg_printer(gt); xe_gt_topology_dump(gt, &p); } @@ -260,8 +273,14 @@ static const char *eu_type_to_str(enum xe_gt_eu_type eu_type) return NULL; } -void -xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) +/** + * xe_gt_topology_dump() - Dump GT topology into a drm printer. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Return: always 0. + */ +int xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) { drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS, gt->fuse_topo.g_dss_mask); @@ -273,8 +292,10 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "EU type: %s\n", eu_type_to_str(gt->fuse_topo.eu_type)); - drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, - gt->fuse_topo.l3_bank_mask); + if (xe_gt_topology_report_l3(gt)) + drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, + gt->fuse_topo.l3_bank_mask); + return 0; } /* @@ -288,9 +309,11 @@ xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum) return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); } -bool xe_dss_mask_empty(const xe_dss_mask_t mask) +/* Used to obtain the index of the first L3 bank. */ +unsigned int +xe_l3_bank_mask_ffs(const xe_l3_bank_mask_t mask) { - return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); + return find_first_bit(mask, XE_MAX_L3_BANK_MASK_BITS); } /** @@ -331,3 +354,19 @@ bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss) { return test_bit(dss, gt->fuse_topo.c_dss_mask); } + +bool xe_gt_has_discontiguous_dss_groups(const struct xe_gt *gt) +{ + unsigned int xecore; + int last_group = -1; + u16 group, instance; + + for_each_dss_steering(xecore, gt, group, instance) { + if (last_group != group) { + if (group - last_group > 1) + return true; + last_group = group; + } + } + return false; +} diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index a72d26ba0653..162d603c9b81 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -23,7 +23,7 @@ struct drm_printer; void xe_gt_topology_init(struct xe_gt *gt); -void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p); /** * xe_gt_topology_mask_last_dss() - Returns the index of the last DSS in a mask. @@ -40,8 +40,8 @@ xe_gt_topology_mask_last_dss(const xe_dss_mask_t mask) unsigned int xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum); - -bool xe_dss_mask_empty(const xe_dss_mask_t mask); +unsigned int +xe_l3_bank_mask_ffs(const xe_l3_bank_mask_t mask); bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad); @@ -49,4 +49,8 @@ xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad); bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss); bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss); +bool xe_gt_has_discontiguous_dss_groups(const struct xe_gt *gt); + +bool xe_gt_topology_report_l3(struct xe_gt *gt); + #endif /* _XE_GT_TOPOLOGY_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 7def0959da35..0a728180b6fe 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -17,6 +17,7 @@ #include "xe_oa_types.h" #include "xe_reg_sr_types.h" #include "xe_sa_types.h" +#include "xe_tlb_inval_types.h" #include "xe_uc_types.h" struct xe_exec_queue_ops; @@ -65,6 +66,7 @@ struct xe_mmio_range { */ enum xe_steering_type { L3BANK, + NODE, MSLICE, LNCF, DSS, @@ -72,6 +74,13 @@ enum xe_steering_type { SQIDI_PSMI, /* + * Although most GAM ranges must be steered to (0,0) and thus use the + * INSTANCE0 type farther down, some platforms have special rules + * for specific subtypes that require steering to (1,0) instead. + */ + GAM1, + + /* * On some platforms there are multiple types of MCR registers that * will always return a non-terminated value at instance (0, 0). We'll * lump those all into a single category to keep things simple. @@ -185,34 +194,8 @@ struct xe_gt { struct work_struct worker; } reset; - /** @tlb_invalidation: TLB invalidation state */ - struct { - /** @tlb_invalidation.seqno: TLB invalidation seqno, protected by CT lock */ -#define TLB_INVALIDATION_SEQNO_MAX 0x100000 - int seqno; - /** - * @tlb_invalidation.seqno_recv: last received TLB invalidation seqno, - * protected by CT lock - */ - int seqno_recv; - /** - * @tlb_invalidation.pending_fences: list of pending fences waiting TLB - * invaliations, protected by CT lock - */ - struct list_head pending_fences; - /** - * @tlb_invalidation.pending_lock: protects @tlb_invalidation.pending_fences - * and updating @tlb_invalidation.seqno_recv. - */ - spinlock_t pending_lock; - /** - * @tlb_invalidation.fence_tdr: schedules a delayed call to - * xe_gt_tlb_fence_timeout after the timeut interval is over. - */ - struct delayed_work fence_tdr; - /** @tlb_invalidation.lock: protects TLB invalidation fences */ - spinlock_t lock; - } tlb_invalidation; + /** @tlb_inval: TLB invalidation state */ + struct xe_tlb_inval tlb_inval; /** * @ccs_mode: Number of compute engines enabled. @@ -227,81 +210,16 @@ struct xe_gt { /** * @usm.bb_pool: Pool from which batchbuffers, for USM operations * (e.g. migrations, fixing page tables), are allocated. - * Dedicated pool needed so USM operations to not get blocked + * Dedicated pool needed so USM operations do not get blocked * behind any user operations which may have resulted in a * fault. */ struct xe_sa_manager *bb_pool; /** * @usm.reserved_bcs_instance: reserved BCS instance used for USM - * operations (e.g. mmigrations, fixing page tables) + * operations (e.g. migrations, fixing page tables) */ u16 reserved_bcs_instance; - /** @usm.pf_wq: page fault work queue, unbound, high priority */ - struct workqueue_struct *pf_wq; - /** @usm.acc_wq: access counter work queue, unbound, high priority */ - struct workqueue_struct *acc_wq; - /** - * @usm.pf_queue: Page fault queue used to sync faults so faults can - * be processed not under the GuC CT lock. The queue is sized so - * it can sync all possible faults (1 per physical engine). - * Multiple queues exists for page faults from different VMs are - * be processed in parallel. - */ - struct pf_queue { - /** @usm.pf_queue.gt: back pointer to GT */ - struct xe_gt *gt; - /** @usm.pf_queue.data: data in the page fault queue */ - u32 *data; - /** - * @usm.pf_queue.num_dw: number of DWORDS in the page - * fault queue. Dynamically calculated based on the number - * of compute resources available. - */ - u32 num_dw; - /** - * @usm.pf_queue.tail: tail pointer in DWs for page fault queue, - * moved by worker which processes faults (consumer). - */ - u16 tail; - /** - * @usm.pf_queue.head: head pointer in DWs for page fault queue, - * moved by G2H handler (producer). - */ - u16 head; - /** @usm.pf_queue.lock: protects page fault queue */ - spinlock_t lock; - /** @usm.pf_queue.worker: to process page faults */ - struct work_struct worker; -#define NUM_PF_QUEUE 4 - } pf_queue[NUM_PF_QUEUE]; - /** - * @usm.acc_queue: Same as page fault queue, cannot process access - * counters under CT lock. - */ - struct acc_queue { - /** @usm.acc_queue.gt: back pointer to GT */ - struct xe_gt *gt; -#define ACC_QUEUE_NUM_DW 128 - /** @usm.acc_queue.data: data in the page fault queue */ - u32 data[ACC_QUEUE_NUM_DW]; - /** - * @usm.acc_queue.tail: tail pointer in DWs for access counter queue, - * moved by worker which processes counters - * (consumer). - */ - u16 tail; - /** - * @usm.acc_queue.head: head pointer in DWs for access counter queue, - * moved by G2H handler (producer). - */ - u16 head; - /** @usm.acc_queue.lock: protects page fault queue */ - spinlock_t lock; - /** @usm.acc_queue.worker: to process access counters */ - struct work_struct worker; -#define NUM_ACC_QUEUE 4 - } acc_queue[NUM_ACC_QUEUE]; } usm; /** @ordered_wq: used to serialize GT resets and TDRs */ @@ -377,6 +295,8 @@ struct xe_gt { u16 group_target; /** @steering.instance_target: instance to steer accesses to */ u16 instance_target; + /** @steering.initialized: Whether this steering range is initialized */ + bool initialized; } steering[NUM_STEERING_TYPES]; /** @@ -409,8 +329,8 @@ struct xe_gt { unsigned long *oob; /** * @wa_active.oob_initialized: mark oob as initialized to help - * detecting misuse of XE_WA() - it can only be called on - * initialization after OOB WAs have being processed + * detecting misuse of XE_GT_WA() - it can only be called on + * initialization after OOB WAs have been processed */ bool oob_initialized; } wa_active; diff --git a/drivers/gpu/drm/xe/xe_guard.h b/drivers/gpu/drm/xe/xe_guard.h new file mode 100644 index 000000000000..333f8e13b5a1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guard.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_GUARD_H_ +#define _XE_GUARD_H_ + +#include <linux/spinlock.h> + +/** + * struct xe_guard - Simple logic to protect a feature. + * + * Implements simple semaphore-like logic that can be used to lockdown the + * feature unless it is already in use. Allows enabling of the otherwise + * incompatible features, where we can't follow the strict owner semantics + * required by the &rw_semaphore. + * + * NOTE! It shouldn't be used to protect a data, use &rw_semaphore instead. + */ +struct xe_guard { + /** + * @counter: implements simple exclusive/lockdown logic: + * if == 0 then guard/feature is idle/not in use, + * if < 0 then feature is active and can't be locked-down, + * if > 0 then feature is lockded-down and can't be activated. + */ + int counter; + + /** @name: the name of the guard (useful for debug) */ + const char *name; + + /** @owner: the info about the last owner of the guard (for debug) */ + void *owner; + + /** @lock: protects guard's data */ + spinlock_t lock; +}; + +/** + * xe_guard_init() - Initialize the guard. + * @guard: the &xe_guard to init + * @name: name of the guard + */ +static inline void xe_guard_init(struct xe_guard *guard, const char *name) +{ + spin_lock_init(&guard->lock); + guard->counter = 0; + guard->name = name; +} + +/** + * xe_guard_arm() - Arm the guard for the exclusive/lockdown mode. + * @guard: the &xe_guard to arm + * @lockdown: arm for lockdown(true) or exclusive(false) mode + * @who: optional owner info (for debug only) + * + * Multiple lockdown requests are allowed. + * Only single exclusive access can be granted. + * Will fail if the guard is already in exclusive mode. + * On success, must call the xe_guard_disarm() to release. + * + * Return: 0 on success or a negative error code on failure. + */ +static inline int xe_guard_arm(struct xe_guard *guard, bool lockdown, void *who) +{ + guard(spinlock)(&guard->lock); + + if (lockdown) { + if (guard->counter < 0) + return -EBUSY; + guard->counter++; + } else { + if (guard->counter > 0) + return -EPERM; + if (guard->counter < 0) + return -EUSERS; + guard->counter--; + } + + guard->owner = who; + return 0; +} + +/** + * xe_guard_disarm() - Disarm the guard from exclusive/lockdown mode. + * @guard: the &xe_guard to disarm + * @lockdown: disarm from lockdown(true) or exclusive(false) mode + * + * Return: true if successfully disarmed or false in case of mismatch. + */ +static inline bool xe_guard_disarm(struct xe_guard *guard, bool lockdown) +{ + guard(spinlock)(&guard->lock); + + if (lockdown) { + if (guard->counter <= 0) + return false; + guard->counter--; + } else { + if (guard->counter != -1) + return false; + guard->counter++; + } + return true; +} + +/** + * xe_guard_mode_str() - Convert guard mode into a string. + * @lockdown: flag used to select lockdown or exclusive mode + * + * Return: "lockdown" or "exclusive" string. + */ +static inline const char *xe_guard_mode_str(bool lockdown) +{ + return lockdown ? "lockdown" : "exclusive"; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index bac5471a1a78..a686b04879d6 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -5,6 +5,7 @@ #include "xe_guc.h" +#include <linux/iopoll.h> #include <drm/drm_managed.h> #include <generated/xe_wa_oob.h> @@ -16,12 +17,14 @@ #include "regs/xe_guc_regs.h" #include "regs/xe_irq_regs.h" #include "xe_bo.h" +#include "xe_configfs.h" #include "xe_device.h" #include "xe_force_wake.h" #include "xe_gt.h" #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_gt_throttle.h" +#include "xe_gt_sriov_pf_migration.h" #include "xe_guc_ads.h" #include "xe_guc_buf.h" #include "xe_guc_capture.h" @@ -29,6 +32,7 @@ #include "xe_guc_db_mgr.h" #include "xe_guc_engine_activity.h" #include "xe_guc_hwconfig.h" +#include "xe_guc_klv_helpers.h" #include "xe_guc_log.h" #include "xe_guc_pc.h" #include "xe_guc_relay.h" @@ -37,6 +41,7 @@ #include "xe_mmio.h" #include "xe_platform_types.h" #include "xe_sriov.h" +#include "xe_sriov_pf_migration.h" #include "xe_uc.h" #include "xe_uc_fw.h" #include "xe_wa.h" @@ -59,7 +64,7 @@ static u32 guc_bo_ggtt_addr(struct xe_guc *guc, /* GuC addresses above GUC_GGTT_TOP don't map through the GTT */ xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc))); xe_assert(xe, addr < GUC_GGTT_TOP); - xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr); + xe_assert(xe, xe_bo_size(bo) <= GUC_GGTT_TOP - addr); return addr; } @@ -72,19 +77,25 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc) if (!GUC_LOG_LEVEL_IS_VERBOSE(level)) flags |= GUC_LOG_DISABLED; else - flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) << - GUC_LOG_VERBOSITY_SHIFT; + flags |= FIELD_PREP(GUC_LOG_VERBOSITY, GUC_LOG_LEVEL_TO_VERBOSITY(level)); return flags; } static u32 guc_ctl_feature_flags(struct xe_guc *guc) { + struct xe_device *xe = guc_to_xe(guc); u32 flags = GUC_CTL_ENABLE_LITE_RESTORE; - if (!guc_to_xe(guc)->info.skip_guc_pc) + if (!xe->info.skip_guc_pc) flags |= GUC_CTL_ENABLE_SLPC; + if (xe_configfs_get_psmi_enabled(to_pci_dev(xe->drm.dev))) + flags |= GUC_CTL_ENABLE_PSMI_LOGGING; + + if (xe_guc_using_main_gamctrl_queues(guc)) + flags |= GUC_CTL_MAIN_GAMCTRL_QUEUES; + return flags; } @@ -116,22 +127,14 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc) BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE); BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT)); - BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) > - (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT)); - BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) > - (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT)); - BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) > - (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT)); - flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | CAPTURE_FLAG | LOG_FLAG | - ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) | - ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | - ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) << - GUC_LOG_CAPTURE_SHIFT) | - (offset << GUC_LOG_BUF_ADDR_SHIFT); + FIELD_PREP(GUC_LOG_CRASH, CRASH_BUFFER_SIZE / LOG_UNIT - 1) | + FIELD_PREP(GUC_LOG_DEBUG, DEBUG_BUFFER_SIZE / LOG_UNIT - 1) | + FIELD_PREP(GUC_LOG_CAPTURE, CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) | + FIELD_PREP(GUC_LOG_BUF_ADDR, offset); #undef LOG_UNIT #undef LOG_FLAG @@ -144,7 +147,7 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc) static u32 guc_ctl_ads_flags(struct xe_guc *guc) { u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT; - u32 flags = ads << GUC_ADS_ADDR_SHIFT; + u32 flags = FIELD_PREP(GUC_ADS_ADDR, ads); return flags; } @@ -156,7 +159,7 @@ static bool needs_wa_dual_queue(struct xe_gt *gt) * on RCS and CCSes with different address spaces, which on DG2 is * required as a WA for an HW bug. */ - if (XE_WA(gt, 22011391025)) + if (XE_GT_WA(gt, 22011391025)) return true; /* @@ -183,10 +186,10 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) struct xe_gt *gt = guc_to_gt(guc); u32 flags = 0; - if (XE_WA(gt, 22012773006)) + if (XE_GT_WA(gt, 22012773006)) flags |= GUC_WA_POLLCS; - if (XE_WA(gt, 14014475959)) + if (XE_GT_WA(gt, 14014475959)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; if (needs_wa_dual_queue(gt)) @@ -200,19 +203,22 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) if (GRAPHICS_VERx100(xe) < 1270) flags |= GUC_WA_PRE_PARSER; - if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685)) + if (XE_GT_WA(gt, 22012727170) || XE_GT_WA(gt, 22012727685)) flags |= GUC_WA_CONTEXT_ISOLATION; - if (XE_WA(gt, 18020744125) && + if (XE_GT_WA(gt, 18020744125) && !xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER)) flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; - if (XE_WA(gt, 1509372804)) + if (XE_GT_WA(gt, 1509372804)) flags |= GUC_WA_RENDER_RST_RC6_EXIT; - if (XE_WA(gt, 14018913170)) + if (XE_GT_WA(gt, 14018913170)) flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6; + if (XE_GT_WA(gt, 16023683509)) + flags |= GUC_WA_SAVE_RESTORE_MCFG_REG_AT_MC6; + return flags; } @@ -420,7 +426,7 @@ static int guc_g2g_register(struct xe_guc *near_guc, struct xe_gt *far_gt, u32 t buf = base + G2G_DESC_AREA_SIZE + slot * G2G_BUFFER_SIZE; xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE); - xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= g2g_bo->size); + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(g2g_bo)); return guc_action_register_g2g_buffer(near_guc, type, far_tile, far_dev, desc, buf, G2G_BUFFER_SIZE); @@ -570,6 +576,86 @@ err_deregister: return err; } +static int __guc_opt_in_features_enable(struct xe_guc *guc, u64 addr, u32 num_dwords) +{ + u32 action[] = { + XE_GUC_ACTION_OPT_IN_FEATURE_KLV, + lower_32_bits(addr), + upper_32_bits(addr), + num_dwords + }; + + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); +} + +static bool supports_dynamic_ics(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + /* Dynamic ICS is available for PVC and Xe2 and newer platforms. */ + if (xe->info.platform != XE_PVC && GRAPHICS_VER(xe) < 20) + return false; + + /* + * The feature is currently not compatible with multi-lrc, so the GuC + * does not support it at all on the media engines (which are the main + * users of mlrc). On the primary GT side, to avoid it being used in + * conjunction with mlrc, we only enable it if we are in single CCS + * mode. + */ + if (xe_gt_is_media_type(gt) || gt->ccs_mode > 1) + return false; + + /* + * Dynamic ICS requires GuC v70.40.1, which maps to compatibility + * version v1.18.4. + */ + return GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 18, 4); +} + +#define OPT_IN_MAX_DWORDS 16 +int xe_guc_opt_in_features_enable(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + CLASS(xe_guc_buf, buf)(&guc->buf, OPT_IN_MAX_DWORDS); + u32 count = 0; + u32 *klvs; + int ret; + + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + klvs = xe_guc_buf_cpu_ptr(buf); + + /* + * The extra CAT error type opt-in was added in GuC v70.17.0, which maps + * to compatibility version v1.7.0. + * Note that the GuC allows enabling this KLV even on platforms that do + * not support the extra type; in such case the returned type variable + * will be set to a known invalid value which we can check against. + */ + if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 7, 0)) + klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_EXT_CAT_ERR_TYPE); + + if (supports_dynamic_ics(guc)) + klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH); + + if (count) { + xe_assert(xe, count <= OPT_IN_MAX_DWORDS); + + ret = __guc_opt_in_features_enable(guc, xe_guc_buf_flush(buf), count); + if (ret < 0) { + xe_gt_err(guc_to_gt(guc), + "failed to enable GuC opt-in features: %pe\n", + ERR_PTR(ret)); + return ret; + } + } + + return 0; +} + static void guc_fini_hw(void *arg) { struct xe_guc *guc = arg; @@ -577,7 +663,7 @@ static void guc_fini_hw(void *arg) unsigned int fw_ref; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_uc_fini_hw(&guc_to_gt(guc)->uc); + xe_uc_sanitize_reset(&guc_to_gt(guc)->uc); xe_force_wake_put(gt_to_fw(gt), fw_ref); guc_g2g_fini(guc); @@ -620,30 +706,54 @@ static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc) if (ret) return ret; - ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->ct.bo); - if (ret) - return ret; - return 0; } -static int vf_guc_init(struct xe_guc *guc) +static int vf_guc_init_noalloc(struct xe_guc *guc) { + struct xe_gt *gt = guc_to_gt(guc); int err; - xe_guc_comm_init_early(guc); - - err = xe_guc_ct_init(&guc->ct); + err = xe_gt_sriov_vf_bootstrap(gt); if (err) return err; - err = xe_guc_relay_init(&guc->relay); + err = xe_gt_sriov_vf_query_config(gt); if (err) return err; return 0; } +int xe_guc_init_noalloc(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + xe_guc_comm_init_early(guc); + + ret = xe_guc_ct_init_noalloc(&guc->ct); + if (ret) + goto out; + + ret = xe_guc_relay_init(&guc->relay); + if (ret) + goto out; + + if (IS_SRIOV_VF(xe)) { + ret = vf_guc_init_noalloc(guc); + if (ret) + goto out; + } + + return 0; + +out: + xe_gt_err(gt, "GuC init failed with %pe\n", ERR_PTR(ret)); + return ret; +} + int xe_guc_init(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); @@ -653,13 +763,13 @@ int xe_guc_init(struct xe_guc *guc) guc->fw.type = XE_UC_FW_TYPE_GUC; ret = xe_uc_fw_init(&guc->fw); if (ret) - goto out; + return ret; if (!xe_uc_fw_is_enabled(&guc->fw)) return 0; if (IS_SRIOV_VF(xe)) { - ret = vf_guc_init(guc); + ret = xe_guc_ct_init(&guc->ct); if (ret) goto out; return 0; @@ -681,10 +791,6 @@ int xe_guc_init(struct xe_guc *guc) if (ret) goto out; - ret = xe_guc_relay_init(&guc->relay); - if (ret) - goto out; - xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc); @@ -693,8 +799,6 @@ int xe_guc_init(struct xe_guc *guc) guc_init_params(guc); - xe_guc_comm_init_early(guc); - return 0; out: @@ -710,11 +814,23 @@ static int vf_guc_init_post_hwconfig(struct xe_guc *guc) if (err) return err; + err = xe_guc_buf_cache_init(&guc->buf); + if (err) + return err; + /* XXX xe_guc_db_mgr_init not needed for now */ return 0; } +static u32 guc_additional_cache_size(struct xe_device *xe) +{ + if (IS_SRIOV_PF(xe) && xe_sriov_pf_migration_supported(xe)) + return XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE; + else + return 0; /* Fallback to default size */ +} + /** * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load * @guc: The GuC object @@ -732,6 +848,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc) if (ret) return ret; + ret = xe_guc_ct_init_post_hwconfig(&guc->ct); + if (ret) + return ret; + guc_init_params_post_hwconfig(guc); ret = xe_guc_submit_init(guc, ~0); @@ -750,7 +870,8 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc) if (ret) return ret; - ret = xe_guc_buf_cache_init(&guc->buf); + ret = xe_guc_buf_cache_init_with_size(&guc->buf, + guc_additional_cache_size(guc_to_xe(guc))); if (ret) return ret; @@ -763,15 +884,17 @@ int xe_guc_post_load_init(struct xe_guc *guc) xe_guc_ads_populate_post_load(&guc->ads); + ret = xe_guc_opt_in_features_enable(guc); + if (ret) + return ret; + if (xe_guc_g2g_wanted(guc_to_xe(guc))) { ret = guc_g2g_start(guc); if (ret) return ret; } - guc->submission_state.enabled = true; - - return 0; + return xe_guc_submit_enable(guc); } int xe_guc_reset(struct xe_guc *guc) @@ -863,33 +986,109 @@ static int guc_xfer_rsa(struct xe_guc *guc) } /* - * Check a previously read GuC status register (GUC_STATUS) looking for - * known terminal states (either completion or failure) of either the - * microkernel status field or the boot ROM status field. Returns +1 for - * successful completion, -1 for failure and 0 for any intermediate state. + * Wait for the GuC to start up. + * + * Measurements indicate this should take no more than 20ms (assuming the GT + * clock is at maximum frequency). However, thermal throttling and other issues + * can prevent the clock hitting max and thus making the load take significantly + * longer. Allow up to 3s as a safety margin in normal builds. For + * CONFIG_DRM_XE_DEBUG allow up to 10s to account for slower execution, issues + * in PCODE, driver, fan, etc. + * + * Keep checking the GUC_STATUS every 10ms with a debug message every 100 + * attempts as a "I'm slow, but alive" message. Regardless, if it takes more + * than 200ms, emit a warning. + */ + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +#define GUC_LOAD_TIMEOUT_SEC 20 +#else +#define GUC_LOAD_TIMEOUT_SEC 3 +#endif +#define GUC_LOAD_TIME_WARN_MSEC 200 + +static void print_load_status_err(struct xe_gt *gt, u32 status) +{ + struct xe_mmio *mmio = >->mmio; + u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status); + u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status); + + xe_gt_err(gt, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n", + REG_FIELD_GET(GS_MIA_IN_RESET, status), + bootrom, ukernel, + REG_FIELD_GET(GS_MIA_MASK, status), + REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); + + switch (bootrom) { + case XE_BOOTROM_STATUS_NO_KEY_FOUND: + xe_gt_err(gt, "invalid key requested, header = 0x%08X\n", + xe_mmio_read32(mmio, GUC_HEADER_INFO)); + break; + case XE_BOOTROM_STATUS_RSA_FAILED: + xe_gt_err(gt, "firmware signature verification failed\n"); + break; + case XE_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE: + xe_gt_err(gt, "firmware production part check failure\n"); + break; + } + + switch (ukernel) { + case XE_GUC_LOAD_STATUS_HWCONFIG_START: + xe_gt_err(gt, "still extracting hwconfig table.\n"); + break; + case XE_GUC_LOAD_STATUS_EXCEPTION: + xe_gt_err(gt, "firmware exception. EIP: %#x\n", + xe_mmio_read32(mmio, SOFT_SCRATCH(13))); + break; + case XE_GUC_LOAD_STATUS_INIT_DATA_INVALID: + xe_gt_err(gt, "illegal init/ADS data\n"); + break; + case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: + xe_gt_err(gt, "illegal register in save/restore workaround list\n"); + break; + case XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR: + xe_gt_err(gt, "illegal workaround KLV data\n"); + break; + case XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG: + xe_gt_err(gt, "illegal feature flag specified\n"); + break; + } +} + +/* + * Check GUC_STATUS looking for known terminal states (either completion or + * failure) of either the microkernel status field or the boot ROM status field. + * + * Returns 1 for successful completion, -1 for failure and 0 for any + * intermediate state. */ -static int guc_load_done(u32 status) +static int guc_load_done(struct xe_gt *gt, u32 *status, u32 *tries) { - u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, status); - u32 br_val = REG_FIELD_GET(GS_BOOTROM_MASK, status); + u32 ukernel, bootrom; + + *status = xe_mmio_read32(>->mmio, GUC_STATUS); + ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, *status); + bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, *status); - switch (uk_val) { + switch (ukernel) { case XE_GUC_LOAD_STATUS_READY: return 1; - case XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH: case XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH: case XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE: case XE_GUC_LOAD_STATUS_HWCONFIG_ERROR: + case XE_GUC_LOAD_STATUS_BOOTROM_VERSION_MISMATCH: case XE_GUC_LOAD_STATUS_DPC_ERROR: case XE_GUC_LOAD_STATUS_EXCEPTION: case XE_GUC_LOAD_STATUS_INIT_DATA_INVALID: case XE_GUC_LOAD_STATUS_MPU_DATA_INVALID: case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: + case XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR: + case XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG: return -1; } - switch (br_val) { + switch (bootrom) { case XE_BOOTROM_STATUS_NO_KEY_FOUND: case XE_BOOTROM_STATUS_RSA_FAILED: case XE_BOOTROM_STATUS_PAVPC_FAILED: @@ -903,155 +1102,63 @@ static int guc_load_done(u32 status) return -1; } - return 0; -} + if (++*tries >= 100) { + struct xe_guc_pc *guc_pc = >->uc.guc.pc; -static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc) -{ - u32 freq; - int ret = xe_guc_pc_get_cur_freq(guc_pc, &freq); + *tries = 0; + xe_gt_dbg(gt, "GuC load still in progress, freq = %dMHz (req %dMHz), status = 0x%08X [0x%02X/%02X]\n", + xe_guc_pc_get_act_freq(guc_pc), + xe_guc_pc_get_cur_freq_fw(guc_pc), + *status, ukernel, bootrom); + } - return ret ? ret : freq; + return 0; } -/* - * Wait for the GuC to start up. - * - * Measurements indicate this should take no more than 20ms (assuming the GT - * clock is at maximum frequency). However, thermal throttling and other issues - * can prevent the clock hitting max and thus making the load take significantly - * longer. Allow up to 200ms as a safety margin for real world worst case situations. - * - * However, bugs anywhere from KMD to GuC to PCODE to fan failure in a CI farm can - * lead to even longer times. E.g. if the GT is clamped to minimum frequency then - * the load times can be in the seconds range. So the timeout is increased for debug - * builds to ensure that problems can be correctly analysed. For release builds, the - * timeout is kept short so that users don't wait forever to find out that there is a - * problem. In either case, if the load took longer than is reasonable even with some - * 'sensible' throttling, then flag a warning because something is not right. - * - * Note that there is a limit on how long an individual usleep_range() can wait for, - * hence longer waits require wrapping a shorter wait in a loop. - * - * Note that the only reason an end user should hit the shorter timeout is in case of - * extreme thermal throttling. And a system that is that hot during boot is probably - * dead anyway! - */ -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) -#define GUC_LOAD_RETRY_LIMIT 20 -#else -#define GUC_LOAD_RETRY_LIMIT 3 -#endif -#define GUC_LOAD_TIME_WARN_MS 200 - -static void guc_wait_ucode(struct xe_guc *guc) +static int guc_wait_ucode(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); - struct xe_mmio *mmio = >->mmio; struct xe_guc_pc *guc_pc = >->uc.guc.pc; - ktime_t before, after, delta; - int load_done; - u32 status = 0; - int count = 0; + u32 before_freq, act_freq, cur_freq; + u32 status = 0, tries = 0; + ktime_t before; u64 delta_ms; - u32 before_freq; + int ret; before_freq = xe_guc_pc_get_act_freq(guc_pc); before = ktime_get(); - /* - * Note, can't use any kind of timing information from the call to xe_mmio_wait. - * It could return a thousand intermediate stages at random times. Instead, must - * manually track the total time taken and locally implement the timeout. - */ - do { - u32 last_status = status & (GS_UKERNEL_MASK | GS_BOOTROM_MASK); - int ret; - /* - * Wait for any change (intermediate or terminal) in the status register. - * Note, the return value is a don't care. The only failure code is timeout - * but the timeouts need to be accumulated over all the intermediate partial - * timeouts rather than allowing a huge timeout each time. So basically, need - * to treat a timeout no different to a value change. - */ - ret = xe_mmio_wait32_not(mmio, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK, - last_status, 1000 * 1000, &status, false); - if (ret < 0) - count++; - after = ktime_get(); - delta = ktime_sub(after, before); - delta_ms = ktime_to_ms(delta); - - load_done = guc_load_done(status); - if (load_done != 0) - break; + ret = poll_timeout_us(ret = guc_load_done(gt, &status, &tries), ret, + 10 * USEC_PER_MSEC, + GUC_LOAD_TIMEOUT_SEC * USEC_PER_SEC, false); - if (delta_ms >= (GUC_LOAD_RETRY_LIMIT * 1000)) - break; + delta_ms = ktime_to_ms(ktime_sub(ktime_get(), before)); + act_freq = xe_guc_pc_get_act_freq(guc_pc); + cur_freq = xe_guc_pc_get_cur_freq_fw(guc_pc); - xe_gt_dbg(gt, "load still in progress, timeouts = %d, freq = %dMHz (req %dMHz), status = 0x%08X [0x%02X/%02X]\n", - count, xe_guc_pc_get_act_freq(guc_pc), - guc_pc_get_cur_freq(guc_pc), status, - REG_FIELD_GET(GS_BOOTROM_MASK, status), - REG_FIELD_GET(GS_UKERNEL_MASK, status)); - } while (1); - - if (load_done != 1) { - u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status); - u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status); - - xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz), done = %d\n", + if (ret) { + xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz)\n", status, delta_ms, xe_guc_pc_get_act_freq(guc_pc), - guc_pc_get_cur_freq(guc_pc), load_done); - xe_gt_err(gt, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n", - REG_FIELD_GET(GS_MIA_IN_RESET, status), - bootrom, ukernel, - REG_FIELD_GET(GS_MIA_MASK, status), - REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); - - switch (bootrom) { - case XE_BOOTROM_STATUS_NO_KEY_FOUND: - xe_gt_err(gt, "invalid key requested, header = 0x%08X\n", - xe_mmio_read32(mmio, GUC_HEADER_INFO)); - break; - - case XE_BOOTROM_STATUS_RSA_FAILED: - xe_gt_err(gt, "firmware signature verification failed\n"); - break; + xe_guc_pc_get_cur_freq_fw(guc_pc)); + print_load_status_err(gt, status); - case XE_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE: - xe_gt_err(gt, "firmware production part check failure\n"); - break; - } - - switch (ukernel) { - case XE_GUC_LOAD_STATUS_EXCEPTION: - xe_gt_err(gt, "firmware exception. EIP: %#x\n", - xe_mmio_read32(mmio, SOFT_SCRATCH(13))); - break; - - case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: - xe_gt_err(gt, "illegal register in save/restore workaround list\n"); - break; - - case XE_GUC_LOAD_STATUS_HWCONFIG_START: - xe_gt_err(gt, "still extracting hwconfig table.\n"); - break; - } + return -EPROTO; + } - xe_device_declare_wedged(gt_to_xe(gt)); - } else if (delta_ms > GUC_LOAD_TIME_WARN_MS) { - xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, timeouts = %d]\n", - delta_ms, status, count); - xe_gt_warn(gt, "excessive init time: [freq = %dMHz (req = %dMHz), before = %dMHz, perf_limit_reasons = 0x%08X]\n", - xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc), - before_freq, xe_gt_throttle_get_limit_reasons(gt)); + if (delta_ms > GUC_LOAD_TIME_WARN_MSEC) { + xe_gt_warn(gt, "GuC load: excessive init time: %lldms! [status = 0x%08X]\n", + delta_ms, status); + xe_gt_warn(gt, "GuC load: excessive init time: [freq = %dMHz (req = %dMHz), before = %dMHz, perf_limit_reasons = 0x%08X]\n", + act_freq, cur_freq, before_freq, + xe_gt_throttle_get_limit_reasons(gt)); } else { - xe_gt_dbg(gt, "init took %lldms, freq = %dMHz (req = %dMHz), before = %dMHz, status = 0x%08X, timeouts = %d\n", - delta_ms, xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc), - before_freq, status, count); + xe_gt_dbg(gt, "GuC load: init took %lldms, freq = %dMHz (req = %dMHz), before = %dMHz, status = 0x%08X\n", + delta_ms, act_freq, cur_freq, before_freq, status); } + + return 0; } +ALLOW_ERROR_INJECTION(guc_wait_ucode, ERRNO); static int __xe_guc_upload(struct xe_guc *guc) { @@ -1083,14 +1190,16 @@ static int __xe_guc_upload(struct xe_guc *guc) goto out; /* Wait for authentication */ - guc_wait_ucode(guc); + ret = guc_wait_ucode(guc); + if (ret) + goto out; xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING); return 0; out: xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOAD_FAIL); - return 0 /* FIXME: ret, don't want to stop load currently */; + return ret; } static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc) @@ -1098,14 +1207,6 @@ static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc) struct xe_gt *gt = guc_to_gt(guc); int ret; - ret = xe_gt_sriov_vf_bootstrap(gt); - if (ret) - return ret; - - ret = xe_gt_sriov_vf_query_config(gt); - if (ret) - return ret; - ret = xe_guc_hwconfig_init(guc); if (ret) return ret; @@ -1116,13 +1217,17 @@ static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc) ret = xe_gt_sriov_vf_connect(gt); if (ret) - return ret; + goto err_out; ret = xe_gt_sriov_vf_query_runtime(gt); if (ret) - return ret; + goto err_out; return 0; + +err_out: + xe_guc_sanitize(guc); + return ret; } /** @@ -1164,8 +1269,13 @@ int xe_guc_min_load_for_hwconfig(struct xe_guc *guc) int xe_guc_upload(struct xe_guc *guc) { + struct xe_gt *gt = guc_to_gt(guc); + xe_guc_ads_populate(&guc->ads); + if (xe_guc_using_main_gamctrl_queues(guc)) + xe_mmio_write32(>->mmio, MAIN_GAMCTRL_MODE, MAIN_GAMCTRL_QUEUE_SELECT); + return __xe_guc_upload(guc); } @@ -1285,6 +1395,7 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, struct xe_reg reply_reg = xe_gt_is_media_type(gt) ? MED_VF_SW_FLAG(0) : VF_SW_FLAG(0); const u32 LAST_INDEX = VF_SW_FLAG_COUNT - 1; + bool lost = false; int ret; int i; @@ -1318,6 +1429,12 @@ retry: FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC), 50000, &reply, false); if (ret) { + /* scratch registers might be cleared during FLR, try once more */ + if (!reply && !lost) { + xe_gt_dbg(gt, "GuC mmio request %#x: lost, trying again\n", request[0]); + lost = true; + goto retry; + } timeout: xe_gt_err(gt, "GuC mmio request %#x: no reply %#x\n", request[0], reply); @@ -1341,7 +1458,7 @@ timeout: BUILD_BUG_ON((GUC_HXG_TYPE_RESPONSE_SUCCESS ^ GUC_HXG_TYPE_RESPONSE_FAILURE) != 1); ret = xe_mmio_wait32(mmio, reply_reg, resp_mask, resp_mask, - 1000000, &header, false); + 2000000, &header, false); if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != GUC_HXG_ORIGIN_GUC)) @@ -1465,7 +1582,7 @@ void xe_guc_sanitize(struct xe_guc *guc) { xe_uc_fw_sanitize(&guc->fw); xe_guc_ct_disable(&guc->ct); - guc->submission_state.enabled = false; + xe_guc_submit_disable(guc); } int xe_guc_reset_prepare(struct xe_guc *guc) @@ -1558,3 +1675,45 @@ void xe_guc_declare_wedged(struct xe_guc *guc) xe_guc_ct_stop(&guc->ct); xe_guc_submit_wedge(guc); } + +/** + * xe_guc_using_main_gamctrl_queues() - Detect which reporting queues to use. + * @guc: The GuC object + * + * For Xe3p and beyond, we want to program the hardware to use the + * "Main GAMCTRL queue" rather than the legacy queue before we upload + * the GuC firmware. This will allow the GuC to use a new set of + * registers for pagefault handling and avoid some unnecessary + * complications with MCR register range handling. + * + * Return: true if can use new main gamctrl queues. + */ +bool xe_guc_using_main_gamctrl_queues(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + + /* + * For Xe3p media gt (35), the GuC and the CS subunits may be still Xe3 + * that lacks the Main GAMCTRL support. Reserved bits from the GMD_ID + * inform the IP version of the subunits. + */ + if (xe_gt_is_media_type(gt) && MEDIA_VER(gt_to_xe(gt)) == 35) { + u32 val = xe_mmio_read32(>->mmio, GMD_ID); + u32 subip = REG_FIELD_GET(GMD_ID_SUBIP_FLAG_MASK, val); + + if (!subip) + return true; + + xe_gt_WARN(gt, subip != 1, + "GMD_ID has unknown value in the SUBIP_FLAG field - 0x%x\n", + subip); + + return false; + } + + return GT_VER(gt) >= 35; +} + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_guc_g2g_test.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 58338be44558..e2d4c5f44ae3 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -26,6 +26,7 @@ struct drm_printer; void xe_guc_comm_init_early(struct xe_guc *guc); +int xe_guc_init_noalloc(struct xe_guc *guc); int xe_guc_init(struct xe_guc *guc); int xe_guc_init_post_hwconfig(struct xe_guc *guc); int xe_guc_post_load_init(struct xe_guc *guc); @@ -33,6 +34,7 @@ int xe_guc_reset(struct xe_guc *guc); int xe_guc_upload(struct xe_guc *guc); int xe_guc_min_load_for_hwconfig(struct xe_guc *guc); int xe_guc_enable_communication(struct xe_guc *guc); +int xe_guc_opt_in_features_enable(struct xe_guc *guc); int xe_guc_suspend(struct xe_guc *guc); void xe_guc_notify(struct xe_guc *guc); int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr); @@ -50,6 +52,11 @@ void xe_guc_stop_prepare(struct xe_guc *guc); void xe_guc_stop(struct xe_guc *guc); int xe_guc_start(struct xe_guc *guc); void xe_guc_declare_wedged(struct xe_guc *guc); +bool xe_guc_using_main_gamctrl_queues(struct xe_guc *guc); + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *payload, u32 len); +#endif static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) { diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 44c1fa2fe7c8..bcb85a1bf26d 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -18,8 +18,10 @@ #include "xe_bo.h" #include "xe_gt.h" #include "xe_gt_ccs_mode.h" +#include "xe_gt_mcr.h" #include "xe_gt_printk.h" #include "xe_guc.h" +#include "xe_guc_buf.h" #include "xe_guc_capture.h" #include "xe_guc_ct.h" #include "xe_hw_engine.h" @@ -29,7 +31,6 @@ #include "xe_platform_types.h" #include "xe_uc_fw.h" #include "xe_wa.h" -#include "xe_gt_mcr.h" /* Slack of a few additional entries per engine */ #define ADS_REGSET_EXTRA_MAX 8 @@ -246,7 +247,7 @@ static size_t calculate_regset_size(struct xe_gt *gt) count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES; - if (XE_WA(gt, 1607983814)) + if (XE_GT_WA(gt, 1607983814)) count += LNCFCMOCS_REG_COUNT; return count * sizeof(struct guc_mmio_reg); @@ -283,52 +284,26 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) return total_size; } -static void guc_waklv_enable_one_word(struct xe_guc_ads *ads, - enum xe_guc_klv_ids klv_id, - u32 value, - u32 *offset, u32 *remain) +static void guc_waklv_enable(struct xe_guc_ads *ads, + u32 data[], u32 data_len_dw, + u32 *offset, u32 *remain, + enum xe_guc_klv_ids klv_id) { - u32 size; - u32 klv_entry[] = { - /* 16:16 key/length */ - FIELD_PREP(GUC_KLV_0_KEY, klv_id) | - FIELD_PREP(GUC_KLV_0_LEN, 1), - value, - /* 1 dword data */ - }; - - size = sizeof(klv_entry); + size_t size = sizeof(u32) * (1 + data_len_dw); if (*remain < size) { drm_warn(&ads_to_xe(ads)->drm, - "w/a klv buffer too small to add klv id %d\n", klv_id); - } else { - xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset, - klv_entry, size); - *offset += size; - *remain -= size; + "w/a klv buffer too small to add klv id 0x%04X\n", klv_id); + return; } -} - -static void guc_waklv_enable_simple(struct xe_guc_ads *ads, - enum xe_guc_klv_ids klv_id, u32 *offset, u32 *remain) -{ - u32 klv_entry[] = { - /* 16:16 key/length */ - FIELD_PREP(GUC_KLV_0_KEY, klv_id) | - FIELD_PREP(GUC_KLV_0_LEN, 0), - /* 0 dwords data */ - }; - u32 size; - size = sizeof(klv_entry); - - if (xe_gt_WARN(ads_to_gt(ads), *remain < size, - "w/a klv buffer too small to add klv id %d\n", klv_id)) - return; + /* 16:16 key/length */ + xe_map_wr(ads_to_xe(ads), ads_to_map(ads), *offset, u32, + FIELD_PREP(GUC_KLV_0_KEY, klv_id) | FIELD_PREP(GUC_KLV_0_LEN, data_len_dw)); + /* data_len_dw dwords of data */ + xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), + *offset + sizeof(u32), data, data_len_dw * sizeof(u32)); - xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset, - klv_entry, size); *offset += size; *remain -= size; } @@ -342,44 +317,51 @@ static void guc_waklv_init(struct xe_guc_ads *ads) offset = guc_ads_waklv_offset(ads); remain = guc_ads_waklv_size(ads); - if (XE_WA(gt, 14019882105) || XE_WA(gt, 16021333562)) - guc_waklv_enable_simple(ads, - GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED, - &offset, &remain); - if (XE_WA(gt, 18024947630)) - guc_waklv_enable_simple(ads, - GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING, - &offset, &remain); - if (XE_WA(gt, 16022287689)) - guc_waklv_enable_simple(ads, - GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE, - &offset, &remain); - - if (XE_WA(gt, 14022866841)) - guc_waklv_enable_simple(ads, - GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO, - &offset, &remain); + if (XE_GT_WA(gt, 14019882105) || XE_GT_WA(gt, 16021333562)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED); + if (XE_GT_WA(gt, 18024947630)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING); + if (XE_GT_WA(gt, 16022287689)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE); + + if (XE_GT_WA(gt, 14022866841)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO); /* * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now, * the default value for this register is determined to be 0xC40. This could change in the * future, so GuC depends on KMD to send it the correct value. */ - if (XE_WA(gt, 13011645652)) - guc_waklv_enable_one_word(ads, - GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE, - 0xC40, - &offset, &remain); - - if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406)) - guc_waklv_enable_simple(ads, - GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET, - &offset, &remain); - - if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_WA(gt, 16026508708)) - guc_waklv_enable_simple(ads, - GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH, - &offset, &remain); + if (XE_GT_WA(gt, 13011645652)) { + u32 data = 0xC40; + + guc_waklv_enable(ads, &data, 1, &offset, &remain, + GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE); + } + + if (XE_GT_WA(gt, 14022293748) || XE_GT_WA(gt, 22019794406)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET); + + if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_GT_WA(gt, 16026508708)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH); + if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 47, 0) && XE_GT_WA(gt, 16026007364)) { + u32 data[] = { + 0x0, + 0xF, + }; + guc_waklv_enable(ads, data, ARRAY_SIZE(data), &offset, &remain, + GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG); + } + + if (XE_GT_WA(gt, 14020001231)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WORKAROUND_KLV_DISABLE_PSMI_INTERRUPTS_AT_C6_ENTRY_RESTORE_AT_EXIT); size = guc_ads_waklv_size(ads) - remain; if (!size) @@ -783,7 +765,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, guc_mmio_regset_write_one(ads, regset_map, e->reg, count++); } - if (XE_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) { + if (XE_GT_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) { for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { guc_mmio_regset_write_one(ads, regset_map, XELP_LNCFCMOCS(i), count++); @@ -838,16 +820,20 @@ static void guc_mmio_reg_state_init(struct xe_guc_ads *ads) static void guc_um_init_params(struct xe_guc_ads *ads) { u32 um_queue_offset = guc_ads_um_queues_offset(ads); + struct xe_guc *guc = ads_to_guc(ads); u64 base_dpa; u32 base_ggtt; + bool with_dpa; int i; + with_dpa = !xe_guc_using_main_gamctrl_queues(guc); + base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset; base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset; for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) { ads_blob_write(ads, um_init_params.queue_params[i].base_dpa, - base_dpa + (i * GUC_UM_QUEUE_SIZE)); + with_dpa ? (base_dpa + (i * GUC_UM_QUEUE_SIZE)) : 0); ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address, base_ggtt + (i * GUC_UM_QUEUE_SIZE)); ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes, @@ -889,7 +875,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) xe_gt_assert(gt, ads->bo); - xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo)); guc_policies_init(ads); guc_golden_lrc_init(ads); guc_mapping_table_init_invalid(gt, &info_map); @@ -913,7 +899,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) xe_gt_assert(gt, ads->bo); - xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo)); guc_policies_init(ads); fill_engine_enable_masks(gt, &info_map); guc_mmio_reg_state_init(ads); @@ -1004,16 +990,16 @@ static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_off */ int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) { - struct xe_device *xe = ads_to_xe(ads); - struct xe_gt *gt = ads_to_gt(ads); - struct xe_tile *tile = gt_to_tile(gt); struct guc_policies *policies; - struct xe_bo *bo; - int ret = 0; + struct xe_guc *guc = ads_to_guc(ads); + struct xe_device *xe = ads_to_xe(ads); + CLASS(xe_guc_buf, buf)(&guc->buf, sizeof(*policies)); - policies = kmalloc(sizeof(*policies), GFP_KERNEL); - if (!policies) - return -ENOMEM; + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + policies = xe_guc_buf_cpu_ptr(buf); + memset(policies, 0, sizeof(*policies)); policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time); policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items); @@ -1023,16 +1009,5 @@ int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) else policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET; - bo = xe_managed_bo_create_from_data(xe, tile, policies, sizeof(struct guc_policies), - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT); - if (IS_ERR(bo)) { - ret = PTR_ERR(bo); - goto out; - } - - ret = guc_ads_action_update_policies(ads, xe_bo_ggtt_addr(bo)); -out: - kfree(policies); - return ret; + return guc_ads_action_update_policies(ads, xe_guc_buf_flush(buf)); } diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h index 70c132458ac3..48a8e092023f 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h @@ -14,7 +14,7 @@ struct xe_bo; * struct xe_guc_ads - GuC additional data structures (ADS) */ struct xe_guc_ads { - /** @bo: XE BO for GuC ads blob */ + /** @bo: Xe BO for GuC ads blob */ struct xe_bo *bo; /** @golden_lrc_size: golden LRC size */ size_t golden_lrc_size; diff --git a/drivers/gpu/drm/xe/xe_guc_buf.c b/drivers/gpu/drm/xe/xe_guc_buf.c index 0193c94dd6a0..3ce442500130 100644 --- a/drivers/gpu/drm/xe/xe_guc_buf.c +++ b/drivers/gpu/drm/xe/xe_guc_buf.c @@ -13,6 +13,8 @@ #include "xe_guc_buf.h" #include "xe_sa.h" +#define XE_GUC_BUF_CACHE_DEFAULT_SIZE SZ_8K + static struct xe_guc *cache_to_guc(struct xe_guc_buf_cache *cache) { return container_of(cache, struct xe_guc, buf); @@ -23,25 +25,12 @@ static struct xe_gt *cache_to_gt(struct xe_guc_buf_cache *cache) return guc_to_gt(cache_to_guc(cache)); } -/** - * xe_guc_buf_cache_init() - Initialize the GuC Buffer Cache. - * @cache: the &xe_guc_buf_cache to initialize - * - * The Buffer Cache allows to obtain a reusable buffer that can be used to pass - * indirect H2G data to GuC without a need to create a ad-hoc allocation. - * - * Return: 0 on success or a negative error code on failure. - */ -int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache) +static int guc_buf_cache_init(struct xe_guc_buf_cache *cache, u32 size) { struct xe_gt *gt = cache_to_gt(cache); struct xe_sa_manager *sam; - /* XXX: currently it's useful only for the PF actions */ - if (!IS_SRIOV_PF(gt_to_xe(gt))) - return 0; - - sam = __xe_sa_bo_manager_init(gt_to_tile(gt), SZ_8K, 0, sizeof(u32)); + sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32)); if (IS_ERR(sam)) return PTR_ERR(sam); cache->sam = sam; @@ -53,6 +42,35 @@ int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache) } /** + * xe_guc_buf_cache_init() - Initialize the GuC Buffer Cache. + * @cache: the &xe_guc_buf_cache to initialize + * + * The Buffer Cache allows to obtain a reusable buffer that can be used to pass + * data to GuC or read data from GuC without a need to create a ad-hoc allocation. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache) +{ + return guc_buf_cache_init(cache, XE_GUC_BUF_CACHE_DEFAULT_SIZE); +} + +/** + * xe_guc_buf_cache_init_with_size() - Initialize the GuC Buffer Cache. + * @cache: the &xe_guc_buf_cache to initialize + * @size: size in bytes + * + * Like xe_guc_buf_cache_init(), except it allows the caller to make the cache + * buffer larger, allowing to accommodate larger objects. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_guc_buf_cache_init_with_size(struct xe_guc_buf_cache *cache, u32 size) +{ + return guc_buf_cache_init(cache, max(XE_GUC_BUF_CACHE_DEFAULT_SIZE, size)); +} + +/** * xe_guc_buf_cache_dwords() - Number of dwords the GuC Buffer Cache supports. * @cache: the &xe_guc_buf_cache to query * @@ -120,6 +138,19 @@ void xe_guc_buf_release(const struct xe_guc_buf buf) } /** + * xe_guc_buf_sync_read() - Copy the data from the GPU memory to the sub-allocation. + * @buf: the &xe_guc_buf to sync + * + * Return: a CPU pointer of the sub-allocation. + */ +void *xe_guc_buf_sync_read(const struct xe_guc_buf buf) +{ + xe_sa_bo_sync_read(buf.sa); + + return xe_sa_bo_cpu_addr(buf.sa); +} + +/** * xe_guc_buf_flush() - Copy the data from the sub-allocation to the GPU memory. * @buf: the &xe_guc_buf to flush * @@ -168,7 +199,7 @@ u64 xe_guc_cache_gpu_addr_from_ptr(struct xe_guc_buf_cache *cache, const void *p if (offset < 0 || offset + size > cache->sam->base.size) return 0; - return cache->sam->gpu_addr + offset; + return xe_sa_manager_gpu_addr(cache->sam) + offset; } #if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) diff --git a/drivers/gpu/drm/xe/xe_guc_buf.h b/drivers/gpu/drm/xe/xe_guc_buf.h index 0d67604d96bd..e3cca553fb00 100644 --- a/drivers/gpu/drm/xe/xe_guc_buf.h +++ b/drivers/gpu/drm/xe/xe_guc_buf.h @@ -12,6 +12,7 @@ #include "xe_guc_buf_types.h" int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache); +int xe_guc_buf_cache_init_with_size(struct xe_guc_buf_cache *cache, u32 size); u32 xe_guc_buf_cache_dwords(struct xe_guc_buf_cache *cache); struct xe_guc_buf xe_guc_buf_reserve(struct xe_guc_buf_cache *cache, u32 dwords); struct xe_guc_buf xe_guc_buf_from_data(struct xe_guc_buf_cache *cache, @@ -30,6 +31,7 @@ static inline bool xe_guc_buf_is_valid(const struct xe_guc_buf buf) } void *xe_guc_buf_cpu_ptr(const struct xe_guc_buf buf); +void *xe_guc_buf_sync_read(const struct xe_guc_buf buf); u64 xe_guc_buf_flush(const struct xe_guc_buf buf); u64 xe_guc_buf_gpu_addr(const struct xe_guc_buf buf); u64 xe_guc_cache_gpu_addr_from_ptr(struct xe_guc_buf_cache *cache, const void *ptr, u32 size); diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index 859a3ba91be5..0c1fbe97b8bf 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -122,6 +122,7 @@ struct __guc_capture_parsed_output { { RING_IPEHR(0), REG_32BIT, 0, 0, 0, "IPEHR"}, \ { RING_INSTDONE(0), REG_32BIT, 0, 0, 0, "RING_INSTDONE"}, \ { INDIRECT_RING_STATE(0), REG_32BIT, 0, 0, 0, "INDIRECT_RING_STATE"}, \ + { RING_CURRENT_LRCA(0), REG_32BIT, 0, 0, 0, "CURRENT_LRCA"}, \ { RING_ACTHD(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ { RING_ACTHD_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "ACTHD"}, \ { RING_BBADDR(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ @@ -149,6 +150,9 @@ struct __guc_capture_parsed_output { { SFC_DONE(2), 0, 0, 0, 0, "SFC_DONE[2]"}, \ { SFC_DONE(3), 0, 0, 0, 0, "SFC_DONE[3]"} +#define XE3P_BASE_ENGINE_INSTANCE \ + { RING_CSMQDEBUG(0), REG_32BIT, 0, 0, 0, "CSMQDEBUG"} + /* XE_LP Global */ static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = { COMMON_XELP_BASE_GLOBAL, @@ -195,6 +199,12 @@ static const struct __guc_mmio_reg_descr xe_lp_gsc_inst_regs[] = { COMMON_BASE_ENGINE_INSTANCE, }; +/* Render / Compute Per-Engine-Instance */ +static const struct __guc_mmio_reg_descr xe3p_rc_inst_regs[] = { + COMMON_BASE_ENGINE_INSTANCE, + XE3P_BASE_ENGINE_INSTANCE, +}; + /* * Empty list to prevent warnings about unknown class/instance types * as not all class/instance types have entries on all platforms. @@ -245,6 +255,21 @@ static const struct __guc_mmio_reg_descr_group xe_hpg_lists[] = { {} }; + /* List of lists for Xe3p and beyond */ +static const struct __guc_mmio_reg_descr_group xe3p_lists[] = { + MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0), + MAKE_REGLIST(xe_hpg_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE), + MAKE_REGLIST(xe3p_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO), + MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO), + MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE), + MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER), + MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER), + MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER), + {} +}; static const char * const capture_list_type_names[] = { "Global", "Class", @@ -292,7 +317,9 @@ guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc, static const struct __guc_mmio_reg_descr_group * guc_capture_get_device_reglist(struct xe_device *xe) { - if (GRAPHICS_VERx100(xe) >= 1255) + if (GRAPHICS_VER(xe) >= 35) + return xe3p_lists; + else if (GRAPHICS_VERx100(xe) >= 1255) return xe_hpg_lists; else return xe_lp_lists; @@ -1817,6 +1844,12 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm str_yes_no(snapshot->kernel_reserved)); for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) { + /* + * FIXME: During devcoredump print we should avoid accessing the + * driver pointers for gt or engine. Printing should be done only + * using the snapshot captured. Here we are accessing the gt + * pointer. It should be fixed. + */ list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type, capture_class, false); snapshot_print_by_list_order(snapshot, p, type, list); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 2447de0ebedf..a5019d1e741b 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -21,19 +21,27 @@ #include "xe_devcoredump.h" #include "xe_device.h" #include "xe_gt.h" -#include "xe_gt_pagefault.h" #include "xe_gt_printk.h" #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_monitor.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" #include "xe_guc_log.h" +#include "xe_guc_pagefault.h" #include "xe_guc_relay.h" #include "xe_guc_submit.h" +#include "xe_guc_tlb_inval.h" #include "xe_map.h" #include "xe_pm.h" +#include "xe_sriov_vf.h" #include "xe_trace_guc.h" +static void receive_g2h(struct xe_guc_ct *ct); +static void g2h_worker_func(struct work_struct *w); +static void safe_mode_worker_func(struct work_struct *w); +static void ct_exit_safe_mode(struct xe_guc_ct *ct); +static void guc_ct_change_state(struct xe_guc_ct *ct, + enum xe_guc_ct_state state); + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) enum { /* Internal states, not error conditions */ @@ -79,6 +87,7 @@ struct g2h_fence { u16 error; u16 hint; u16 reason; + bool cancel; bool retry; bool fail; bool done; @@ -86,15 +95,20 @@ struct g2h_fence { static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) { + memset(g2h_fence, 0, sizeof(*g2h_fence)); g2h_fence->response_buffer = response_buffer; - g2h_fence->response_data = 0; - g2h_fence->response_len = 0; - g2h_fence->fail = false; - g2h_fence->retry = false; - g2h_fence->done = false; g2h_fence->seqno = ~0x0; } +static void g2h_fence_cancel(struct g2h_fence *g2h_fence) +{ + g2h_fence->cancel = true; + g2h_fence->fail = true; + + /* WRITE_ONCE pairs with READ_ONCEs in guc_ct_send_recv. */ + WRITE_ONCE(g2h_fence->done, true); +} + static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) { return g2h_fence->seqno == ~0x0; @@ -155,6 +169,7 @@ ct_to_xe(struct xe_guc_ct *ct) */ #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) +#define CTB_H2G_BUFFER_OFFSET (CTB_DESC_SIZE * 2) #define CTB_H2G_BUFFER_SIZE (SZ_4K) #define CTB_G2H_BUFFER_SIZE (SZ_128K) #define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 2) @@ -178,7 +193,7 @@ long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct) static size_t guc_ct_size(void) { - return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE + + return CTB_H2G_BUFFER_OFFSET + CTB_H2G_BUFFER_SIZE + CTB_G2H_BUFFER_SIZE; } @@ -186,14 +201,14 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) + cancel_work_sync(&ct->dead.worker); +#endif + ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); } -static void receive_g2h(struct xe_guc_ct *ct); -static void g2h_worker_func(struct work_struct *w); -static void safe_mode_worker_func(struct work_struct *w); - static void primelockdep(struct xe_guc_ct *ct) { if (!IS_ENABLED(CONFIG_LOCKDEP)) @@ -204,16 +219,20 @@ static void primelockdep(struct xe_guc_ct *ct) fs_reclaim_release(GFP_KERNEL); } -int xe_guc_ct_init(struct xe_guc_ct *ct) +int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct) { struct xe_device *xe = ct_to_xe(ct); struct xe_gt *gt = ct_to_gt(ct); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_bo *bo; int err; xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE)); + err = drmm_mutex_init(&xe->drm, &ct->lock); + if (err) + return err; + + primelockdep(ct); + ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", WQ_MEM_RECLAIM); if (!ct->g2h_wq) return -ENOMEM; @@ -225,15 +244,36 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) spin_lock_init(&ct->dead.lock); INIT_WORK(&ct->dead.worker, ct_dead_worker_func); +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + stack_depot_init(); +#endif #endif init_waitqueue_head(&ct->wq); init_waitqueue_head(&ct->g2h_fence_wq); - err = drmm_mutex_init(&xe->drm, &ct->lock); + err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); if (err) return err; - primelockdep(ct); + xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); + ct->state = XE_GUC_CT_STATE_DISABLED; + return 0; +} +ALLOW_ERROR_INJECTION(xe_guc_ct_init_noalloc, ERRNO); /* See xe_pci_probe() */ + +static void guc_action_disable_ct(void *arg) +{ + struct xe_guc_ct *ct = arg; + + guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED); +} + +int xe_guc_ct_init(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bo *bo; bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), XE_BO_FLAG_SYSTEM | @@ -245,16 +285,38 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) ct->bo = bo; - err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); - if (err) - return err; - - xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); - ct->state = XE_GUC_CT_STATE_DISABLED; - return 0; + return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct); } ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */ +/** + * xe_guc_ct_init_post_hwconfig - Reinitialize the GuC CTB in VRAM + * @ct: the &xe_guc_ct + * + * Allocate a new BO in VRAM and free the previous BO that was allocated + * in system memory (SMEM). Applicable only for DGFX products. + * + * Return: 0 on success, or a negative errno on failure. + */ +int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); + struct xe_tile *tile = gt_to_tile(gt); + int ret; + + xe_assert(xe, !xe_guc_ct_enabled(ct)); + + if (IS_DGFX(xe)) { + ret = xe_managed_bo_reinit_in_vram(xe, tile, &ct->bo); + if (ret) + return ret; + } + + devm_remove_action(xe->drm.dev, guc_action_disable_ct, ct); + return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct); +} + #define desc_read(xe_, guc_ctb__, field_) \ xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \ struct guc_ct_buffer_desc, field_) @@ -278,7 +340,7 @@ static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g, h2g->desc = *map; xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc)); - h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2); + h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_H2G_BUFFER_OFFSET); } static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h, @@ -296,7 +358,7 @@ static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h, g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE); xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc)); - g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 + + g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_H2G_BUFFER_OFFSET + CTB_H2G_BUFFER_SIZE); } @@ -307,7 +369,7 @@ static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct) int err; desc_addr = xe_bo_ggtt_addr(ct->bo); - ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2; + ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_H2G_BUFFER_OFFSET; size = ct->ctbs.h2g.info.size * sizeof(u32); err = xe_guc_self_cfg64(guc, @@ -334,7 +396,7 @@ static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct) int err; desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE; - ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 + + ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_H2G_BUFFER_OFFSET + CTB_H2G_BUFFER_SIZE; size = ct->ctbs.g2h.info.size * sizeof(u32); @@ -371,9 +433,13 @@ static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable) return ret > 0 ? -EPROTO : ret; } -static void xe_guc_ct_set_state(struct xe_guc_ct *ct, +static void guc_ct_change_state(struct xe_guc_ct *ct, enum xe_guc_ct_state state) { + struct xe_gt *gt = ct_to_gt(ct); + struct g2h_fence *g2h_fence; + unsigned long idx; + mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */ spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */ @@ -385,8 +451,20 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct, ct->g2h_outstanding = 0; ct->state = state; + xe_gt_dbg(gt, "GuC CT communication channel %s\n", + state == XE_GUC_CT_STATE_STOPPED ? "stopped" : + str_enabled_disabled(state == XE_GUC_CT_STATE_ENABLED)); + spin_unlock_irq(&ct->fast_lock); + /* cancel all in-flight send-recv requests */ + xa_for_each(&ct->fence_lookup, idx, g2h_fence) + g2h_fence_cancel(g2h_fence); + + /* make sure guc_ct_send_recv() will see g2h_fence changes */ + smp_mb(); + wake_up_all(&ct->g2h_fence_wq); + /* * Lockdep doesn't like this under the fast lock and he destroy only * needs to be serialized with the send path which ct lock provides. @@ -432,7 +510,7 @@ static void ct_exit_safe_mode(struct xe_guc_ct *ct) xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode disabled\n"); } -int xe_guc_ct_enable(struct xe_guc_ct *ct) +static int __xe_guc_ct_start(struct xe_guc_ct *ct, bool needs_register) { struct xe_device *xe = ct_to_xe(ct); struct xe_gt *gt = ct_to_gt(ct); @@ -440,27 +518,34 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) xe_gt_assert(gt, !xe_guc_ct_enabled(ct)); - xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size); - guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); - guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); + if (needs_register) { + xe_map_memset(xe, &ct->bo->vmap, 0, 0, xe_bo_size(ct->bo)); + guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); + guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); - err = guc_ct_ctb_h2g_register(ct); - if (err) - goto err_out; + err = guc_ct_ctb_h2g_register(ct); + if (err) + goto err_out; - err = guc_ct_ctb_g2h_register(ct); - if (err) - goto err_out; + err = guc_ct_ctb_g2h_register(ct); + if (err) + goto err_out; - err = guc_ct_control_toggle(ct, true); - if (err) - goto err_out; + err = guc_ct_control_toggle(ct, true); + if (err) + goto err_out; + } else { + ct->ctbs.h2g.info.broken = false; + ct->ctbs.g2h.info.broken = false; + /* Skip everything in H2G buffer */ + xe_map_memset(xe, &ct->bo->vmap, CTB_H2G_BUFFER_OFFSET, 0, + CTB_H2G_BUFFER_SIZE); + } - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED); + guc_ct_change_state(ct, XE_GUC_CT_STATE_ENABLED); smp_mb(); wake_up_all(&ct->wq); - xe_gt_dbg(gt, "GuC CT communication channel enabled\n"); if (ct_needs_safe_mode(ct)) ct_enter_safe_mode(ct); @@ -487,6 +572,32 @@ err_out: return err; } +/** + * xe_guc_ct_restart() - Restart GuC CT + * @ct: the &xe_guc_ct + * + * Restart GuC CT to an empty state without issuing a CT register MMIO command. + * + * Return: 0 on success, or a negative errno on failure. + */ +int xe_guc_ct_restart(struct xe_guc_ct *ct) +{ + return __xe_guc_ct_start(ct, false); +} + +/** + * xe_guc_ct_enable() - Enable GuC CT + * @ct: the &xe_guc_ct + * + * Enable GuC CT to an empty state and issue a CT register MMIO command. + * + * Return: 0 on success, or a negative errno on failure. + */ +int xe_guc_ct_enable(struct xe_guc_ct *ct) +{ + return __xe_guc_ct_start(ct, true); +} + static void stop_g2h_handler(struct xe_guc_ct *ct) { cancel_work_sync(&ct->g2h_worker); @@ -501,12 +612,22 @@ static void stop_g2h_handler(struct xe_guc_ct *ct) */ void xe_guc_ct_disable(struct xe_guc_ct *ct) { - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED); + guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED); ct_exit_safe_mode(ct); stop_g2h_handler(ct); } /** + * xe_guc_ct_flush_and_stop - Flush and stop all processing of G2H / H2G + * @ct: the &xe_guc_ct + */ +void xe_guc_ct_flush_and_stop(struct xe_guc_ct *ct) +{ + receive_g2h(ct); + xe_guc_ct_stop(ct); +} + +/** * xe_guc_ct_stop - Set GuC to stopped state * @ct: the &xe_guc_ct * @@ -514,7 +635,10 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct) */ void xe_guc_ct_stop(struct xe_guc_ct *ct) { - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED); + if (!xe_guc_ct_initialized(ct)) + return; + + guc_ct_change_state(ct, XE_GUC_CT_STATE_STOPPED); stop_g2h_handler(ct); } @@ -625,6 +749,69 @@ static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) spin_unlock_irq(&ct->fast_lock); } +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action) +{ + unsigned int slot = fence % ARRAY_SIZE(ct->fast_req); +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + unsigned long entries[SZ_32]; + unsigned int n; + + n = stack_trace_save(entries, ARRAY_SIZE(entries), 1); + + /* May be called under spinlock, so avoid sleeping */ + ct->fast_req[slot].stack = stack_depot_save(entries, n, GFP_NOWAIT); +#endif + ct->fast_req[slot].fence = fence; + ct->fast_req[slot].action = action; +} +#else +static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action) +{ +} +#endif + +/* + * The CT protocol accepts a 16 bits fence. This field is fully owned by the + * driver, the GuC will just copy it to the reply message. Since we need to + * be able to distinguish between replies to REQUEST and FAST_REQUEST messages, + * we use one bit of the seqno as an indicator for that and a rolling counter + * for the remaining 15 bits. + */ +#define CT_SEQNO_MASK GENMASK(14, 0) +#define CT_SEQNO_UNTRACKED BIT(15) +static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence) +{ + u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK; + + if (!is_g2h_fence) + seqno |= CT_SEQNO_UNTRACKED; + + return seqno; +} + +#define MAKE_ACTION(type, __action) \ +({ \ + FIELD_PREP(GUC_HXG_MSG_0_TYPE, type) | \ + FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | \ + GUC_HXG_EVENT_MSG_0_DATA0, __action); \ +}) + +static bool vf_action_can_safely_fail(struct xe_device *xe, u32 action) +{ + /* + * When resuming a VF, we can't reliably track whether context + * registration has completed in the GuC state machine. It is harmless + * to resend the request, as it will fail silently if GUC_HXG_TYPE_EVENT + * is used. Additionally, if there is an H2G protocol issue on a VF, + * subsequent H2G messages sent as GUC_HXG_TYPE_FAST_REQUEST will likely + * fail. + */ + return IS_SRIOV_VF(xe) && xe_sriov_vf_migration_supported(xe) && + (action == XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC || + action == XE_GUC_ACTION_REGISTER_CONTEXT); +} + #define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, @@ -696,15 +883,14 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value); if (want_response) { - cmd[1] = - FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | - FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | - GUC_HXG_EVENT_MSG_0_DATA0, action[0]); + cmd[1] = MAKE_ACTION(GUC_HXG_TYPE_REQUEST, action[0]); + } else if (vf_action_can_safely_fail(xe, action[0])) { + cmd[1] = MAKE_ACTION(GUC_HXG_TYPE_EVENT, action[0]); } else { - cmd[1] = - FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_FAST_REQUEST) | - FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | - GUC_HXG_EVENT_MSG_0_DATA0, action[0]); + fast_req_track(ct, ct_fence_value, + FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, action[0])); + + cmd[1] = MAKE_ACTION(GUC_HXG_TYPE_FAST_REQUEST, action[0]); } /* H2G header in cmd[1] replaces action[0] so: */ @@ -733,34 +919,15 @@ corrupted: return -EPIPE; } -/* - * The CT protocol accepts a 16 bits fence. This field is fully owned by the - * driver, the GuC will just copy it to the reply message. Since we need to - * be able to distinguish between replies to REQUEST and FAST_REQUEST messages, - * we use one bit of the seqno as an indicator for that and a rolling counter - * for the remaining 15 bits. - */ -#define CT_SEQNO_MASK GENMASK(14, 0) -#define CT_SEQNO_UNTRACKED BIT(15) -static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence) -{ - u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK; - - if (!is_g2h_fence) - seqno |= CT_SEQNO_UNTRACKED; - - return seqno; -} - static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) { - struct xe_gt *gt __maybe_unused = ct_to_gt(ct); + struct xe_gt *gt = ct_to_gt(ct); u16 seqno; int ret; - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); xe_gt_assert(gt, !g2h_len || !g2h_fence); xe_gt_assert(gt, !num_g2h || !g2h_fence); xe_gt_assert(gt, !g2h_len || num_g2h); @@ -777,7 +944,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, goto out; } - if (ct->state == XE_GUC_CT_STATE_STOPPED) { + if (ct->state == XE_GUC_CT_STATE_STOPPED || xe_gt_recovery_pending(gt)) { ret = -ECANCELED; goto out; } @@ -832,22 +999,15 @@ static void kick_reset(struct xe_guc_ct *ct) static int dequeue_one_g2h(struct xe_guc_ct *ct); -static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, - u32 g2h_len, u32 num_g2h, - struct g2h_fence *g2h_fence) +/* + * wait before retry of sending h2g message + * Return: true if ready for retry, false if the wait timeouted + */ +static bool guc_ct_send_wait_for_retry(struct xe_guc_ct *ct, u32 len, + u32 g2h_len, struct g2h_fence *g2h_fence, + unsigned int *sleep_period_ms) { struct xe_device *xe = ct_to_xe(ct); - struct xe_gt *gt = ct_to_gt(ct); - unsigned int sleep_period_ms = 1; - int ret; - - xe_gt_assert(gt, !g2h_len || !g2h_fence); - lockdep_assert_held(&ct->lock); - xe_device_assert_mem_access(ct_to_xe(ct)); - -try_again: - ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, - g2h_fence); /* * We wait to try to restore credits for about 1 second before bailing. @@ -856,24 +1016,22 @@ try_again: * the case of G2H we process any G2H in the channel, hopefully freeing * credits as we consume the G2H messages. */ - if (unlikely(ret == -EBUSY && - !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) { + if (!h2g_has_room(ct, len + GUC_CTB_HDR_LEN)) { struct guc_ctb *h2g = &ct->ctbs.h2g; - if (sleep_period_ms == 1024) - goto broken; + if (*sleep_period_ms == 1024) + return false; trace_xe_guc_ct_h2g_flow_control(xe, h2g->info.head, h2g->info.tail, h2g->info.size, h2g->info.space, len + GUC_CTB_HDR_LEN); - msleep(sleep_period_ms); - sleep_period_ms <<= 1; - - goto try_again; - } else if (unlikely(ret == -EBUSY)) { + msleep(*sleep_period_ms); + *sleep_period_ms <<= 1; + } else { struct xe_device *xe = ct_to_xe(ct); struct guc_ctb *g2h = &ct->ctbs.g2h; + int ret; trace_xe_guc_ct_g2h_flow_control(xe, g2h->info.head, desc_read(xe, g2h, tail), @@ -887,7 +1045,7 @@ try_again: (desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.info.head) if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding || g2h_avail(ct), HZ)) - goto broken; + return false; #undef g2h_avail ret = dequeue_one_g2h(ct); @@ -895,9 +1053,32 @@ try_again: if (ret != -ECANCELED) xe_gt_err(ct_to_gt(ct), "CTB receive failed (%pe)", ERR_PTR(ret)); - goto broken; + return false; } + } + return true; +} + +static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h, + struct g2h_fence *g2h_fence) +{ + struct xe_gt *gt = ct_to_gt(ct); + unsigned int sleep_period_ms = 1; + int ret; + xe_gt_assert(gt, !g2h_len || !g2h_fence); + lockdep_assert_held(&ct->lock); + xe_device_assert_mem_access(ct_to_xe(ct)); + +try_again: + ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, + g2h_fence); + + if (unlikely(ret == -EBUSY)) { + if (!guc_ct_send_wait_for_retry(ct, len, g2h_len, g2h_fence, + &sleep_period_ms)) + goto broken; goto try_again; } @@ -980,11 +1161,15 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret) return true; } +#define GUC_SEND_RETRY_LIMIT 50 +#define GUC_SEND_RETRY_MSLEEP 5 + static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 *response_buffer, bool no_fail) { struct xe_gt *gt = ct_to_gt(ct); struct g2h_fence g2h_fence; + unsigned int retries = 0; int ret = 0; /* @@ -1020,10 +1205,13 @@ retry_same_fence: return ret; } - ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ); + /* READ_ONCEs pairs with WRITE_ONCEs in parse_g2h_response + * and g2h_fence_cancel. + */ + ret = wait_event_timeout(ct->g2h_fence_wq, READ_ONCE(g2h_fence.done), HZ); if (!ret) { LNL_FLUSH_WORK(&ct->g2h_worker); - if (g2h_fence.done) { + if (READ_ONCE(g2h_fence.done)) { xe_gt_warn(gt, "G2H fence %u, action %04x, done\n", g2h_fence.seqno, action[0]); ret = 1; @@ -1049,9 +1237,20 @@ retry_same_fence: xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n", action[0], g2h_fence.reason); mutex_unlock(&ct->lock); + if (++retries > GUC_SEND_RETRY_LIMIT) { + xe_gt_err(gt, "H2G action %#x reached retry limit=%u, aborting\n", + action[0], GUC_SEND_RETRY_LIMIT); + return -ELOOP; + } + msleep(GUC_SEND_RETRY_MSLEEP * retries); goto retry; } if (g2h_fence.fail) { + if (g2h_fence.cancel) { + xe_gt_dbg(gt, "H2G request %#x canceled!\n", action[0]); + ret = -ECANCELED; + goto unlock; + } xe_gt_err(gt, "H2G request %#x failed: error %#x hint %#x\n", action[0], g2h_fence.error, g2h_fence.hint); ret = -EIO; @@ -1060,6 +1259,7 @@ retry_same_fence: if (ret > 0) ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data; +unlock: mutex_unlock(&ct->lock); return ret; @@ -1143,6 +1343,55 @@ static int guc_crash_process_msg(struct xe_guc_ct *ct, u32 action) return 0; } +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +static void fast_req_report(struct xe_guc_ct *ct, u16 fence) +{ + u16 fence_min = U16_MAX, fence_max = 0; + struct xe_gt *gt = ct_to_gt(ct); + bool found = false; + unsigned int n; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + char *buf; +#endif + + lockdep_assert_held(&ct->lock); + + for (n = 0; n < ARRAY_SIZE(ct->fast_req); n++) { + if (ct->fast_req[n].fence < fence_min) + fence_min = ct->fast_req[n].fence; + if (ct->fast_req[n].fence > fence_max) + fence_max = ct->fast_req[n].fence; + + if (ct->fast_req[n].fence != fence) + continue; + found = true; + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + buf = kmalloc(SZ_4K, GFP_NOWAIT); + if (buf && stack_depot_snprint(ct->fast_req[n].stack, buf, SZ_4K, 0)) + xe_gt_err(gt, "Fence 0x%x was used by action %#04x sent at:\n%s", + fence, ct->fast_req[n].action, buf); + else + xe_gt_err(gt, "Fence 0x%x was used by action %#04x [failed to retrieve stack]\n", + fence, ct->fast_req[n].action); + kfree(buf); +#else + xe_gt_err(gt, "Fence 0x%x was used by action %#04x\n", + fence, ct->fast_req[n].action); +#endif + break; + } + + if (!found) + xe_gt_warn(gt, "Fence 0x%x not found - tracking buffer wrapped? [range = 0x%x -> 0x%x, next = 0x%X]\n", + fence, fence_min, fence_max, ct->fence_seqno); +} +#else +static void fast_req_report(struct xe_guc_ct *ct, u16 fence) +{ +} +#endif + static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) { struct xe_gt *gt = ct_to_gt(ct); @@ -1171,6 +1420,13 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) else xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n", type, fence); + + fast_req_report(ct, fence); + + /* FIXME: W/A race in the GuC, will get in firmware soon */ + if (xe_gt_recovery_pending(gt)) + return 0; + CT_DEAD(ct, NULL, PARSE_G2H_RESPONSE); return -EPROTO; @@ -1203,7 +1459,8 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); - g2h_fence->done = true; + /* WRITE_ONCE pairs with READ_ONCEs in guc_ct_send_recv. */ + WRITE_ONCE(g2h_fence->done, true); smp_mb(); wake_up_all(&ct->g2h_fence_wq); @@ -1298,12 +1555,7 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) ret = xe_guc_pagefault_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_TLB_INVALIDATION_DONE: - ret = xe_guc_tlb_invalidation_done_handler(guc, payload, - adj_len); - break; - case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY: - ret = xe_guc_access_counter_notify_handler(guc, payload, - adj_len); + ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF: ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len); @@ -1321,6 +1573,11 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) case XE_GUC_ACTION_NOTIFY_EXCEPTION: ret = guc_crash_process_msg(ct, action); break; +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) + case XE_GUC_ACTION_TEST_G2G_RECV: + ret = xe_guc_g2g_test_notification(guc, payload, adj_len); + break; +#endif default: xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action); } @@ -1344,7 +1601,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) u32 action; u32 *hxg; - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); lockdep_assert_held(&ct->fast_lock); if (ct->state == XE_GUC_CT_STATE_DISABLED) @@ -1500,8 +1757,7 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) break; case XE_GUC_ACTION_TLB_INVALIDATION_DONE: __g2h_release_space(ct, len); - ret = xe_guc_tlb_invalidation_done_handler(guc, payload, - adj_len); + ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len); break; default: xe_gt_warn(gt, "NOT_POSSIBLE"); @@ -1634,7 +1890,7 @@ static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bo return NULL; if (ct->bo && want_ctb) { - snapshot->ctb_size = ct->bo->size; + snapshot->ctb_size = xe_bo_size(ct->bo); snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL); } @@ -1770,6 +2026,24 @@ void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb) } #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) + +#ifdef CONFIG_FUNCTION_ERROR_INJECTION +/* + * This is a helper function which assists the driver in identifying if a fault + * injection test is currently active, allowing it to reduce unnecessary debug + * output. Typically, the function returns zero, but the fault injection + * framework can alter this to return an error. Since faults are injected + * through this function, it's important to ensure the compiler doesn't optimize + * it into an inline function. To avoid such optimization, the 'noinline' + * attribute is applied. Compiler optimizes the static function defined in the + * header file as an inline function. + */ +noinline int xe_is_injection_active(void) { return 0; } +ALLOW_ERROR_INJECTION(xe_is_injection_active, ERRNO); +#else +int xe_is_injection_active(void) { return 0; } +#endif + static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code) { struct xe_guc_log_snapshot *snapshot_log; @@ -1780,6 +2054,12 @@ static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reaso if (ctb) ctb->info.broken = true; + /* + * Huge dump is getting generated when injecting error for guc CT/MMIO + * functions. So, let us suppress the dump when fault is injected. + */ + if (xe_is_injection_active()) + return; /* Ignore further errors after the first dump until a reset */ if (ct->dead.reported) @@ -1830,7 +2110,6 @@ static void ct_dead_print(struct xe_dead_ct *dead) return; } - /* Can't generate a genuine core dump at this point, so just do the good bits */ drm_puts(&lp, "**** Xe Device Coredump ****\n"); drm_printf(&lp, "Reason: CTB is dead - 0x%X\n", dead->reason); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 82c4ae458dda..ca1ce2b3c354 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -11,10 +11,14 @@ struct drm_printer; struct xe_device; +int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct); int xe_guc_ct_init(struct xe_guc_ct *ct); +int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct); int xe_guc_ct_enable(struct xe_guc_ct *ct); +int xe_guc_ct_restart(struct xe_guc_ct *ct); void xe_guc_ct_disable(struct xe_guc_ct *ct); void xe_guc_ct_stop(struct xe_guc_ct *ct); +void xe_guc_ct_flush_and_stop(struct xe_guc_ct *ct); void xe_guc_ct_fast_path(struct xe_guc_ct *ct); struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct); @@ -22,6 +26,11 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_pr void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb); +static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct) +{ + return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED; +} + static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct) { return ct->state == XE_GUC_CT_STATE_ENABLED; @@ -65,4 +74,13 @@ xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len) long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct); +/** + * xe_guc_ct_wake_waiters() - GuC CT wake up waiters + * @ct: GuC CT object + */ +static inline void xe_guc_ct_wake_waiters(struct xe_guc_ct *ct) +{ + wake_up_all(&ct->wq); +} + #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index 8e1b9d981d61..09d7ff1ef42a 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -9,6 +9,7 @@ #include <linux/interrupt.h> #include <linux/iosys-map.h> #include <linux/spinlock_types.h> +#include <linux/stackdepot.h> #include <linux/wait.h> #include <linux/xarray.h> @@ -104,6 +105,18 @@ struct xe_dead_ct { /** snapshot_log: copy of GuC log at point of error */ struct xe_guc_log_snapshot *snapshot_log; }; + +/** struct xe_fast_req_fence - Used to track FAST_REQ messages by fence to match error responses */ +struct xe_fast_req_fence { + /** @fence: sequence number sent in H2G and return in G2H error */ + u16 fence; + /** @action: H2G action code */ + u16 action; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + /** @stack: call stack from when the H2G was sent */ + depot_stack_handle_t stack; +#endif +}; #endif /** @@ -113,7 +126,7 @@ struct xe_dead_ct { * for the H2G and G2H requests sent and received through the buffers. */ struct xe_guc_ct { - /** @bo: XE BO for CT */ + /** @bo: Xe BO for CT */ struct xe_bo *bo; /** @lock: protects everything in CT layer */ struct mutex lock; @@ -152,6 +165,8 @@ struct xe_guc_ct { #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) /** @dead: information for debugging dead CTs */ struct xe_dead_ct dead; + /** @fast_req: history of FAST_REQ messages for matching with G2H error responses */ + struct xe_fast_req_fence fast_req[SZ_32]; #endif }; diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.c b/drivers/gpu/drm/xe/xe_guc_engine_activity.c index 0fb48f8f05d8..2b99c1ebdd58 100644 --- a/drivers/gpu/drm/xe/xe_guc_engine_activity.c +++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.c @@ -94,16 +94,17 @@ static int allocate_engine_activity_buffers(struct xe_guc *guc, struct xe_tile *tile = gt_to_tile(gt); struct xe_bo *bo, *metadata_bo; - metadata_bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(metadata_size), - ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE); + metadata_bo = xe_bo_create_pin_map_novm(gt_to_xe(gt), tile, PAGE_ALIGN(metadata_size), + ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE, + false); if (IS_ERR(metadata_bo)) return PTR_ERR(metadata_bo); - bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(size), - ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE); + bo = xe_bo_create_pin_map_novm(gt_to_xe(gt), tile, PAGE_ALIGN(size), + ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE, false); if (IS_ERR(bo)) { xe_bo_unpin_map_no_vm(metadata_bo); @@ -124,7 +125,7 @@ static void free_engine_activity_buffers(struct engine_activity_buffer *buffer) static bool is_engine_activity_supported(struct xe_guc *guc) { struct xe_uc_fw_version *version = &guc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; - struct xe_uc_fw_version required = { 1, 14, 1 }; + struct xe_uc_fw_version required = { .major = 1, .minor = 14, .patch = 1 }; struct xe_gt *gt = guc_to_gt(guc); if (IS_SRIOV_VF(gt_to_xe(gt))) { diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h index 4c39f01e4f52..a3b034e4b205 100644 --- a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h @@ -20,6 +20,8 @@ struct xe_exec_queue; struct xe_guc_exec_queue { /** @q: Backpointer to parent xe_exec_queue */ struct xe_exec_queue *q; + /** @rcu: For safe freeing of exported dma fences */ + struct rcu_head rcu; /** @sched: GPU scheduler for this xe_exec_queue */ struct xe_gpu_scheduler sched; /** @entity: Scheduler entity for this xe_exec_queue */ @@ -33,8 +35,8 @@ struct xe_guc_exec_queue { struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE]; /** @lr_tdr: long running TDR worker */ struct work_struct lr_tdr; - /** @fini_async: do final fini async from this worker */ - struct work_struct fini_async; + /** @destroy_async: do final destroy async from this worker */ + struct work_struct destroy_async; /** @resume_time: time of last resume */ u64 resume_time; /** @state: GuC specific state for this xe_exec_queue */ @@ -49,6 +51,21 @@ struct xe_guc_exec_queue { wait_queue_head_t suspend_wait; /** @suspend_pending: a suspend of the exec_queue is pending */ bool suspend_pending; + /** + * @needs_cleanup: Needs a cleanup message during VF post migration + * recovery. + */ + bool needs_cleanup; + /** + * @needs_suspend: Needs a suspend message during VF post migration + * recovery. + */ + bool needs_suspend; + /** + * @needs_resume: Needs a resume message during VF post migration + * recovery. + */ + bool needs_resume; }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 6f57578b07cb..c90dd266e9cf 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -15,6 +15,7 @@ #define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4 #define G2H_LEN_DW_DEREGISTER_CONTEXT 3 #define G2H_LEN_DW_TLB_INVALIDATE 3 +#define G2H_LEN_DW_G2G_NOTIFY_MIN 3 #define GUC_ID_MAX 65535 #define GUC_ID_UNKNOWN 0xffffffff @@ -45,6 +46,11 @@ #define GUC_MAX_ENGINE_CLASSES 16 #define GUC_MAX_INSTANCES_PER_CLASS 32 +#define GUC_CONTEXT_NORMAL 0 +#define GUC_CONTEXT_COMPRESSION_SAVE 1 +#define GUC_CONTEXT_COMPRESSION_RESTORE 2 +#define GUC_CONTEXT_COUNT (GUC_CONTEXT_COMPRESSION_RESTORE + 1) + /* Helper for context registration H2G */ struct guc_ctxt_registration_info { u32 flags; @@ -60,6 +66,7 @@ struct guc_ctxt_registration_info { u32 hwlrca_hi; }; #define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) +#define CONTEXT_REGISTRATION_FLAG_TYPE GENMASK(2, 1) /* 32-bit KLV structure as used by policy updates and others */ struct guc_klv_generic_dw_t { @@ -84,13 +91,10 @@ struct guc_update_exec_queue_policy { #define GUC_LOG_NOTIFY_ON_HALF_FULL BIT(1) #define GUC_LOG_CAPTURE_ALLOC_UNITS BIT(2) #define GUC_LOG_LOG_ALLOC_UNITS BIT(3) -#define GUC_LOG_CRASH_SHIFT 4 -#define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT) -#define GUC_LOG_DEBUG_SHIFT 6 -#define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT) -#define GUC_LOG_CAPTURE_SHIFT 10 -#define GUC_LOG_CAPTURE_MASK (0x3 << GUC_LOG_CAPTURE_SHIFT) -#define GUC_LOG_BUF_ADDR_SHIFT 12 +#define GUC_LOG_CRASH REG_GENMASK(5, 4) +#define GUC_LOG_DEBUG REG_GENMASK(9, 6) +#define GUC_LOG_CAPTURE REG_GENMASK(11, 10) +#define GUC_LOG_BUF_ADDR REG_GENMASK(31, 12) #define GUC_CTL_WA 1 #define GUC_WA_GAM_CREDITS BIT(10) @@ -103,28 +107,24 @@ struct guc_update_exec_queue_policy { #define GUC_WA_RENDER_RST_RC6_EXIT BIT(19) #define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21) #define GUC_WA_ENABLE_TSC_CHECK_ON_RC6 BIT(22) +#define GUC_WA_SAVE_RESTORE_MCFG_REG_AT_MC6 BIT(25) #define GUC_CTL_FEATURE 2 #define GUC_CTL_ENABLE_SLPC BIT(2) #define GUC_CTL_ENABLE_LITE_RESTORE BIT(4) +#define GUC_CTL_ENABLE_PSMI_LOGGING BIT(7) +#define GUC_CTL_MAIN_GAMCTRL_QUEUES BIT(9) #define GUC_CTL_DISABLE_SCHEDULER BIT(14) #define GUC_CTL_DEBUG 3 -#define GUC_LOG_VERBOSITY_SHIFT 0 -#define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT) -#define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT) -#define GUC_LOG_VERBOSITY_HIGH (2 << GUC_LOG_VERBOSITY_SHIFT) -#define GUC_LOG_VERBOSITY_ULTRA (3 << GUC_LOG_VERBOSITY_SHIFT) -#define GUC_LOG_VERBOSITY_MIN 0 +#define GUC_LOG_VERBOSITY REG_GENMASK(1, 0) #define GUC_LOG_VERBOSITY_MAX 3 -#define GUC_LOG_VERBOSITY_MASK 0x0000000f -#define GUC_LOG_DESTINATION_MASK (3 << 4) -#define GUC_LOG_DISABLED (1 << 6) -#define GUC_PROFILE_ENABLED (1 << 7) +#define GUC_LOG_DESTINATION REG_GENMASK(5, 4) +#define GUC_LOG_DISABLED BIT(6) +#define GUC_PROFILE_ENABLED BIT(7) #define GUC_CTL_ADS 4 -#define GUC_ADS_ADDR_SHIFT 1 -#define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT) +#define GUC_ADS_ADDR REG_GENMASK(21, 1) #define GUC_CTL_DEVID 5 diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 38039c411387..c01ccb35dc75 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -79,7 +79,7 @@ static struct xe_guc_log_snapshot *xe_guc_log_snapshot_alloc(struct xe_guc_log * * Also, can't use vmalloc as might be called from atomic context. So need * to break the buffer up into smaller chunks that can be allocated. */ - snapshot->size = log->bo->size; + snapshot->size = xe_bo_size(log->bo); snapshot->num_chunks = DIV_ROUND_UP(snapshot->size, GUC_LOG_CHUNK_SIZE); snapshot->copy = kcalloc(snapshot->num_chunks, sizeof(*snapshot->copy), diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h index 5b896f5fafaf..98a47ac42b08 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.h +++ b/drivers/gpu/drm/xe/xe_guc_log.h @@ -12,12 +12,12 @@ struct drm_printer; struct xe_device; -#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER) +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) #define CRASH_BUFFER_SIZE SZ_1M #define DEBUG_BUFFER_SIZE SZ_8M #define CAPTURE_BUFFER_SIZE SZ_2M #else -#define CRASH_BUFFER_SIZE SZ_8K +#define CRASH_BUFFER_SIZE SZ_16K #define DEBUG_BUFFER_SIZE SZ_64K #define CAPTURE_BUFFER_SIZE SZ_1M #endif diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h index b3d5c72ac752..02851b924aa4 100644 --- a/drivers/gpu/drm/xe/xe_guc_log_types.h +++ b/drivers/gpu/drm/xe/xe_guc_log_types.h @@ -44,7 +44,7 @@ struct xe_guc_log_snapshot { struct xe_guc_log { /** @level: GuC log level */ u32 level; - /** @bo: XE BO for GuC log */ + /** @bo: Xe BO for GuC log */ struct xe_bo *bo; /** @stats: logging related stats */ struct { diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.c b/drivers/gpu/drm/xe/xe_guc_pagefault.c new file mode 100644 index 000000000000..719a18187a31 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_pagefault.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "abi/guc_actions_abi.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_pagefault.h" +#include "xe_pagefault.h" + +static void guc_ack_fault(struct xe_pagefault *pf, int err) +{ + u32 vfid = FIELD_GET(PFD_VFID, pf->producer.msg[2]); + u32 engine_instance = FIELD_GET(PFD_ENG_INSTANCE, pf->producer.msg[0]); + u32 engine_class = FIELD_GET(PFD_ENG_CLASS, pf->producer.msg[0]); + u32 pdata = FIELD_GET(PFD_PDATA_LO, pf->producer.msg[0]) | + (FIELD_GET(PFD_PDATA_HI, pf->producer.msg[1]) << + PFD_PDATA_HI_SHIFT); + u32 action[] = { + XE_GUC_ACTION_PAGE_FAULT_RES_DESC, + + FIELD_PREP(PFR_VALID, 1) | + FIELD_PREP(PFR_SUCCESS, !!err) | + FIELD_PREP(PFR_REPLY, PFR_ACCESS) | + FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | + FIELD_PREP(PFR_ASID, pf->consumer.asid), + + FIELD_PREP(PFR_VFID, vfid) | + FIELD_PREP(PFR_ENG_INSTANCE, engine_instance) | + FIELD_PREP(PFR_ENG_CLASS, engine_class) | + FIELD_PREP(PFR_PDATA, pdata), + }; + struct xe_guc *guc = pf->producer.private; + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); +} + +static const struct xe_pagefault_ops guc_pagefault_ops = { + .ack_fault = guc_ack_fault, +}; + +/** + * xe_guc_pagefault_handler() - G2H page fault handler + * @guc: GuC object + * @msg: G2H message + * @len: Length of G2H message + * + * Parse GuC to host (G2H) message into a struct xe_pagefault and forward onto + * the Xe page fault layer. + * + * Return: 0 on success, errno on failure + */ +int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_pagefault pf; + int i; + +#define GUC_PF_MSG_LEN_DW \ + (sizeof(struct xe_guc_pagefault_desc) / sizeof(u32)) + + BUILD_BUG_ON(GUC_PF_MSG_LEN_DW > XE_PAGEFAULT_PRODUCER_MSG_LEN_DW); + + if (len != GUC_PF_MSG_LEN_DW) + return -EPROTO; + + pf.gt = guc_to_gt(guc); + + /* + * XXX: These values happen to match the enum in xe_pagefault_types.h. + * If that changes, we’ll need to remap them here. + */ + pf.consumer.page_addr = ((u64)FIELD_GET(PFD_VIRTUAL_ADDR_HI, msg[3]) + << PFD_VIRTUAL_ADDR_HI_SHIFT) | + (FIELD_GET(PFD_VIRTUAL_ADDR_LO, msg[2]) << + PFD_VIRTUAL_ADDR_LO_SHIFT); + pf.consumer.asid = FIELD_GET(PFD_ASID, msg[1]); + pf.consumer.access_type = FIELD_GET(PFD_ACCESS_TYPE, msg[2]); + pf.consumer.fault_type = FIELD_GET(PFD_FAULT_TYPE, msg[2]); + if (FIELD_GET(XE2_PFD_TRVA_FAULT, msg[0])) + pf.consumer.fault_level = XE_PAGEFAULT_LEVEL_NACK; + else + pf.consumer.fault_level = FIELD_GET(PFD_FAULT_LEVEL, msg[0]); + pf.consumer.engine_class = FIELD_GET(PFD_ENG_CLASS, msg[0]); + pf.consumer.engine_instance = FIELD_GET(PFD_ENG_INSTANCE, msg[0]); + + pf.producer.private = guc; + pf.producer.ops = &guc_pagefault_ops; + for (i = 0; i < GUC_PF_MSG_LEN_DW; ++i) + pf.producer.msg[i] = msg[i]; + +#undef GUC_PF_MSG_LEN_DW + + return xe_pagefault_handler(guc_to_xe(guc), &pf); +} diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.h b/drivers/gpu/drm/xe/xe_guc_pagefault.h new file mode 100644 index 000000000000..3bd599e7207c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_pagefault.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_GUC_PAGEFAULT_H_ +#define _XE_GUC_PAGEFAULT_H_ + +#include <linux/types.h> + +struct xe_guc; + +int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 18c623992035..951a49fb1d3e 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -5,11 +5,16 @@ #include "xe_guc_pc.h" +#include <linux/cleanup.h> #include <linux/delay.h> +#include <linux/iopoll.h> +#include <linux/jiffies.h> #include <linux/ktime.h> +#include <linux/wait_bit.h> #include <drm/drm_managed.h> #include <drm/drm_print.h> +#include <generated/xe_device_wa_oob.h> #include <generated/xe_wa_oob.h> #include "abi/guc_actions_slpc_abi.h" @@ -51,9 +56,12 @@ #define LNL_MERT_FREQ_CAP 800 #define BMG_MERT_FREQ_CAP 2133 +#define BMG_MIN_FREQ 1200 +#define BMG_MERT_FLUSH_FREQ_CAP 2600 #define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ #define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ +#define SLPC_ACT_FREQ_TIMEOUT_MS 100 /** * DOC: GuC Power Conservation (PC) @@ -73,6 +81,11 @@ * Xe driver enables SLPC with all of its defaults features and frequency * selection, which varies per platform. * + * Power profiles add another level of control to SLPC. When power saving + * profile is chosen, SLPC will use conservative thresholds to ramp frequency, + * thus saving power. Base profile is default and ensures balanced performance + * for any workload. + * * Render-C States: * ================ * @@ -119,26 +132,37 @@ static struct iosys_map *pc_to_maps(struct xe_guc_pc *pc) FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count)) static int wait_for_pc_state(struct xe_guc_pc *pc, - enum slpc_global_state state, + enum slpc_global_state target_state, int timeout_ms) { - int timeout_us = 1000 * timeout_ms; - int slept, wait = 10; + enum slpc_global_state state; xe_device_assert_mem_access(pc_to_xe(pc)); - for (slept = 0; slept < timeout_us;) { - if (slpc_shared_data_read(pc, header.global_state) == state) - return 0; + return poll_timeout_us(state = slpc_shared_data_read(pc, header.global_state), + state == target_state, + 20, timeout_ms * USEC_PER_MSEC, false); +} - usleep_range(wait, wait << 1); - slept += wait; - wait <<= 1; - if (slept + wait > timeout_us) - wait = timeout_us - slept; - } +static int wait_for_flush_complete(struct xe_guc_pc *pc) +{ + const unsigned long timeout = msecs_to_jiffies(30); + + if (!wait_var_event_timeout(&pc->flush_freq_limit, + !atomic_read(&pc->flush_freq_limit), + timeout)) + return -ETIMEDOUT; + + return 0; +} + +static int wait_for_act_freq_max_limit(struct xe_guc_pc *pc, u32 max_limit) +{ + u32 freq; - return -ETIMEDOUT; + return poll_timeout_us(freq = xe_guc_pc_get_act_freq(pc), + freq <= max_limit, + 20, SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC, false); } static int pc_action_reset(struct xe_guc_pc *pc) @@ -153,7 +177,7 @@ static int pc_action_reset(struct xe_guc_pc *pc) int ret; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC PC reset failed: %pe\n", ERR_PTR(ret)); @@ -177,7 +201,7 @@ static int pc_action_query_task_state(struct xe_guc_pc *pc) /* Blocking here to ensure the results are ready before reading them */ ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action)); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC PC query task state failed: %pe\n", ERR_PTR(ret)); @@ -200,7 +224,7 @@ static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) return -EAGAIN; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC PC set param[%u]=%u failed: %pe\n", id, value, ERR_PTR(ret)); @@ -222,7 +246,7 @@ static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id) return -EAGAIN; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC PC unset param failed: %pe", ERR_PTR(ret)); @@ -239,7 +263,7 @@ static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) int ret; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC RC enable mode=%u failed: %pe\n", mode, ERR_PTR(ret)); return ret; @@ -307,7 +331,7 @@ static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) * Our goal is to have the admin choices respected. */ pc_action_set_param(pc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, - freq < pc->rpe_freq); + freq < xe_guc_pc_get_rpe_freq(pc)); return pc_action_set_param(pc, SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, @@ -339,7 +363,7 @@ static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) freq); } -static void mtl_update_rpa_value(struct xe_guc_pc *pc) +static u32 mtl_get_rpa_freq(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); u32 reg; @@ -349,10 +373,10 @@ static void mtl_update_rpa_value(struct xe_guc_pc *pc) else reg = xe_mmio_read32(>->mmio, MTL_GT_RPA_FREQUENCY); - pc->rpa_freq = decode_freq(REG_FIELD_GET(MTL_RPA_MASK, reg)); + return decode_freq(REG_FIELD_GET(MTL_RPA_MASK, reg)); } -static void mtl_update_rpe_value(struct xe_guc_pc *pc) +static u32 mtl_get_rpe_freq(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); u32 reg; @@ -362,68 +386,56 @@ static void mtl_update_rpe_value(struct xe_guc_pc *pc) else reg = xe_mmio_read32(>->mmio, MTL_GT_RPE_FREQUENCY); - pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg)); + return decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg)); } -static void tgl_update_rpa_value(struct xe_guc_pc *pc) +static u32 pvc_get_rpa_freq(struct xe_guc_pc *pc) { - struct xe_gt *gt = pc_to_gt(pc); - struct xe_device *xe = gt_to_xe(gt); - u32 reg; - /* * For PVC we still need to use fused RP0 as the approximation for RPa * For other platforms than PVC we get the resolved RPa directly from * PCODE at a different register */ - if (xe->info.platform == XE_PVC) { - reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); - pc->rpa_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER; - } else { - reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); - pc->rpa_freq = REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER; - } + + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + + reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); + return REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER; } -static void tgl_update_rpe_value(struct xe_guc_pc *pc) +static u32 tgl_get_rpa_freq(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + + reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); + return REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER; +} + +static u32 pvc_get_rpe_freq(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); - struct xe_device *xe = gt_to_xe(gt); u32 reg; /* * For PVC we still need to use fused RP1 as the approximation for RPe - * For other platforms than PVC we get the resolved RPe directly from - * PCODE at a different register */ - if (xe->info.platform == XE_PVC) { - reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); - pc->rpe_freq = REG_FIELD_GET(RP1_MASK, reg) * GT_FREQUENCY_MULTIPLIER; - } else { - reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); - pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; - } + reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); + return REG_FIELD_GET(RP1_MASK, reg) * GT_FREQUENCY_MULTIPLIER; } -static void pc_update_rp_values(struct xe_guc_pc *pc) +static u32 tgl_get_rpe_freq(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); - struct xe_device *xe = gt_to_xe(gt); - - if (GRAPHICS_VERx100(xe) >= 1270) { - mtl_update_rpa_value(pc); - mtl_update_rpe_value(pc); - } else { - tgl_update_rpa_value(pc); - tgl_update_rpe_value(pc); - } + u32 reg; /* - * RPe is decided at runtime by PCODE. In the rare case where that's - * smaller than the fused min, we will trust the PCODE and use that - * as our minimum one. + * For other platforms than PVC, we get the resolved RPe directly from + * PCODE at a different register */ - pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq); + reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); + return REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; } /** @@ -524,9 +536,15 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc) */ u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc) { - pc_update_rp_values(pc); + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); - return pc->rpa_freq; + if (GRAPHICS_VERx100(xe) == 1260) + return pvc_get_rpa_freq(pc); + else if (GRAPHICS_VERx100(xe) >= 1270) + return mtl_get_rpa_freq(pc); + else + return tgl_get_rpa_freq(pc); } /** @@ -537,9 +555,17 @@ u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc) */ u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc) { - pc_update_rp_values(pc); + struct xe_device *xe = pc_to_xe(pc); + u32 freq; + + if (GRAPHICS_VERx100(xe) == 1260) + freq = pvc_get_rpe_freq(pc); + else if (GRAPHICS_VERx100(xe) >= 1270) + freq = mtl_get_rpe_freq(pc); + else + freq = tgl_get_rpe_freq(pc); - return pc->rpe_freq; + return freq; } /** @@ -553,6 +579,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) return pc->rpn_freq; } +static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ + int ret; + + lockdep_assert_held(&pc->freq_lock); + + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); + if (ret) + return ret; + + *freq = pc_get_min_freq(pc); + + return 0; +} + /** * xe_guc_pc_get_min_freq - Get the min operational frequency * @pc: The GuC PC @@ -563,26 +608,28 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) */ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - xe_device_assert_mem_access(pc_to_xe(pc)); + lockdep_assert_held(&pc->freq_lock); - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; - ret = pc_action_query_task_state(pc); + ret = pc_set_min_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_min_freq(pc); + pc->user_requested_min = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -596,24 +643,28 @@ out: */ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_set_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_set_min_freq(pc, freq); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); if (ret) - goto out; + return ret; - pc->user_requested_min = freq; + *freq = pc_get_max_freq(pc); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -626,24 +677,28 @@ out: */ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_max_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_action_query_task_state(pc); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_set_max_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_max_freq(pc); + pc->user_requested_max = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -657,24 +712,14 @@ out: */ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { - int ret; - - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; + if (XE_GT_WA(pc_to_gt(pc), 22019338487)) { + if (wait_for_flush_complete(pc) != 0) + return -EAGAIN; } - ret = pc_set_max_freq(pc, freq); - if (ret) - goto out; + guard(mutex)(&pc->freq_lock); - pc->user_requested_max = freq; - -out: - mutex_unlock(&pc->freq_lock); - return ret; + return xe_guc_pc_set_max_freq_locked(pc, freq); } /** @@ -780,7 +825,7 @@ static u32 pc_max_freq_cap(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); - if (XE_WA(gt, 22019338487)) { + if (XE_GT_WA(gt, 22019338487)) { if (xe_gt_is_media_type(gt)) return min(LNL_MERT_FREQ_CAP, pc->rp0_freq); else @@ -817,6 +862,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc) static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) { + struct xe_tile *tile = gt_to_tile(pc_to_gt(pc)); int ret; lockdep_assert_held(&pc->freq_lock); @@ -843,6 +889,9 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) if (pc_get_min_freq(pc) > pc->rp0_freq) ret = pc_set_min_freq(pc, pc->rp0_freq); + if (XE_DEVICE_WA(tile_to_xe(tile), 14022085890)) + ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc))); + out: return ret; } @@ -868,30 +917,117 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) return ret; } -static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +static bool needs_flush_freq_limit(struct xe_guc_pc *pc) { - int ret = 0; + struct xe_gt *gt = pc_to_gt(pc); - if (XE_WA(pc_to_gt(pc), 22019338487)) { - /* - * Get updated min/max and stash them. - */ - ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq); - if (!ret) - ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq); - if (ret) - return ret; + return XE_GT_WA(gt, 22019338487) && + pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP; +} + +/** + * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush + * @pc: the xe_guc_pc object + * + * As per the WA, reduce max GT frequency during L2 cache flush + */ +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 max_freq; + int ret; + + if (!needs_flush_freq_limit(pc)) + return; + + guard(mutex)(&pc->freq_lock); + + ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq); + if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) { + ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) { + xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); + return; + } + + atomic_set(&pc->flush_freq_limit, 1); /* - * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + * If user has previously changed max freq, stash that value to + * restore later, otherwise use the current max. New user + * requests wait on flush. */ - mutex_lock(&pc->freq_lock); - ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); - if (!ret) - ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); - mutex_unlock(&pc->freq_lock); + if (pc->user_requested_max != 0) + pc->stashed_max_freq = pc->user_requested_max; + else + pc->stashed_max_freq = max_freq; } + /* + * Wait for actual freq to go below the flush cap: even if the previous + * max was below cap, the current one might still be above it + */ + ret = wait_for_act_freq_max_limit(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) + xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); +} + +/** + * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes. + * @pc: the xe_guc_pc object + * + * Retrieve the previous GT max frequency value. + */ +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + int ret = 0; + + if (!needs_flush_freq_limit(pc)) + return; + + if (!atomic_read(&pc->flush_freq_limit)) + return; + + mutex_lock(&pc->freq_lock); + + ret = pc_set_max_freq(>->uc.guc.pc, pc->stashed_max_freq); + if (ret) + xe_gt_err_once(gt, "Failed to restore max freq %u:%d", + pc->stashed_max_freq, ret); + + atomic_set(&pc->flush_freq_limit, 0); + mutex_unlock(&pc->freq_lock); + wake_up_var(&pc->flush_freq_limit); +} + +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +{ + int ret; + + if (!XE_GT_WA(pc_to_gt(pc), 22019338487)) + return 0; + + guard(mutex)(&pc->freq_lock); + + /* + * Get updated min/max and stash them. + */ + ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq); + if (!ret) + ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq); + if (ret) + return ret; + + /* + * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + */ + ret = pc_set_min_freq(pc, min(xe_guc_pc_get_rpe_freq(pc), pc_max_freq_cap(pc))); + if (!ret) + ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); + return ret; } @@ -930,7 +1066,6 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) { struct xe_device *xe = pc_to_xe(pc); struct xe_gt *gt = pc_to_gt(pc); - unsigned int fw_ref; int ret = 0; if (xe->info.skip_guc_pc) @@ -940,17 +1075,7 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) if (ret) return ret; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return -ETIMEDOUT; - } - - xe_gt_idle_disable_c6(gt); - - xe_force_wake_put(gt_to_fw(gt), fw_ref); - - return 0; + return xe_gt_idle_disable_c6(gt); } /** @@ -1010,8 +1135,6 @@ static int pc_init_freqs(struct xe_guc_pc *pc) if (ret) goto out; - pc_update_rp_values(pc); - pc_init_pcode_freq(pc); /* @@ -1036,6 +1159,61 @@ static int pc_action_set_strategy(struct xe_guc_pc *pc, u32 val) return ret; } +static const char *power_profile_to_string(struct xe_guc_pc *pc) +{ + switch (pc->power_profile) { + case SLPC_POWER_PROFILE_BASE: + return "base"; + case SLPC_POWER_PROFILE_POWER_SAVING: + return "power_saving"; + default: + return "invalid"; + } +} + +void xe_guc_pc_get_power_profile(struct xe_guc_pc *pc, char *profile) +{ + switch (pc->power_profile) { + case SLPC_POWER_PROFILE_BASE: + sprintf(profile, "[%s] %s\n", "base", "power_saving"); + break; + case SLPC_POWER_PROFILE_POWER_SAVING: + sprintf(profile, "%s [%s]\n", "base", "power_saving"); + break; + default: + sprintf(profile, "invalid"); + } +} + +int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf) +{ + int ret = 0; + u32 val; + + if (strncmp("base", buf, strlen("base")) == 0) + val = SLPC_POWER_PROFILE_BASE; + else if (strncmp("power_saving", buf, strlen("power_saving")) == 0) + val = SLPC_POWER_PROFILE_POWER_SAVING; + else + return -EINVAL; + + guard(mutex)(&pc->freq_lock); + xe_pm_runtime_get_noresume(pc_to_xe(pc)); + + ret = pc_action_set_param(pc, + SLPC_PARAM_POWER_PROFILE, + val); + if (ret) + xe_gt_err_once(pc_to_gt(pc), "Failed to set power profile to %d: %pe\n", + val, ERR_PTR(ret)); + else + pc->power_profile = val; + + xe_pm_runtime_put(pc_to_xe(pc)); + + return ret; +} + /** * xe_guc_pc_start - Start GuC's Power Conservation component * @pc: Xe_GuC_PC instance @@ -1068,7 +1246,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) goto out; } - memset(pc->bo->vmap.vaddr, 0, size); + xe_map_memset(xe, &pc->bo->vmap, 0, 0, size); slpc_shared_data_write(pc, header.size, size); earlier = ktime_get(); @@ -1114,6 +1292,11 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) /* Enable SLPC Optimized Strategy for compute */ ret = pc_action_set_strategy(pc, SLPC_OPTIMIZED_STRATEGY_COMPUTE); + /* Set cached value of power_profile */ + ret = xe_guc_pc_set_power_profile(pc, power_profile_to_string(pc)); + if (unlikely(ret)) + xe_gt_err(gt, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret)); + out: xe_force_wake_put(gt_to_fw(gt), fw_ref); return ret; @@ -1157,7 +1340,7 @@ static void xe_guc_pc_fini_hw(void *arg) XE_WARN_ON(xe_guc_pc_stop(pc)); /* Bind requested freq to mert_freq_cap before unload */ - pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq)); + pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), xe_guc_pc_get_rpe_freq(pc))); xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), fw_ref); } @@ -1192,6 +1375,8 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) pc->bo = bo; + pc->power_profile = SLPC_POWER_PROFILE_BASE; + return devm_add_action_or_reset(xe->drm.dev, xe_guc_pc_fini_hw, pc); } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 0a2664d5c811..0e31396f103c 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -31,6 +31,8 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq); int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq); int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq); int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq); +int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf); +void xe_guc_pc_get_power_profile(struct xe_guc_pc *pc, char *profile); enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc); u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc); @@ -38,5 +40,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); void xe_guc_pc_init_early(struct xe_guc_pc *pc); int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc); void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc); +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc); +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 2978ac9a249b..711bbcdcb0d3 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -15,12 +15,10 @@ struct xe_guc_pc { /** @bo: GGTT buffer object that is shared with GuC PC */ struct xe_bo *bo; + /** @flush_freq_limit: 1 when max freq changes are limited by driver */ + atomic_t flush_freq_limit; /** @rp0_freq: HW RP0 frequency - The Maximum one */ u32 rp0_freq; - /** @rpa_freq: HW RPa frequency - The Achievable one */ - u32 rpa_freq; - /** @rpe_freq: HW RPe frequency - The Efficient one */ - u32 rpe_freq; /** @rpn_freq: HW RPN frequency - The Minimum one */ u32 rpn_freq; /** @user_requested_min: Stash the minimum requested freq by user */ @@ -35,6 +33,8 @@ struct xe_guc_pc { struct mutex freq_lock; /** @freq_ready: Only handle freq changes, if they are really ready */ bool freq_ready; + /** @power_profile: Base or power_saving profile */ + u32 power_profile; }; #endif /* _XE_GUC_PC_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c index e5dc94f3e618..0c0ff24ba62a 100644 --- a/drivers/gpu/drm/xe/xe_guc_relay.c +++ b/drivers/gpu/drm/xe/xe_guc_relay.c @@ -56,9 +56,19 @@ static struct xe_device *relay_to_xe(struct xe_guc_relay *relay) return gt_to_xe(relay_to_gt(relay)); } +#define XE_RELAY_DIAG_RATELIMIT_INTERVAL (10 * HZ) +#define XE_RELAY_DIAG_RATELIMIT_BURST 10 + +#define relay_ratelimit_printk(relay, _level, fmt...) ({ \ + typeof(relay) _r = (relay); \ + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) || \ + ___ratelimit(&_r->diag_ratelimit, "xe_guc_relay")) \ + xe_gt_sriov_##_level(relay_to_gt(_r), "relay: " fmt); \ +}) + #define relay_assert(relay, condition) xe_gt_assert(relay_to_gt(relay), condition) -#define relay_notice(relay, msg...) xe_gt_sriov_notice(relay_to_gt(relay), "relay: " msg) -#define relay_debug(relay, msg...) xe_gt_sriov_dbg_verbose(relay_to_gt(relay), "relay: " msg) +#define relay_notice(relay, msg...) relay_ratelimit_printk((relay), notice, msg) +#define relay_debug(relay, msg...) relay_ratelimit_printk((relay), dbg_verbose, msg) static int relay_get_totalvfs(struct xe_guc_relay *relay) { @@ -345,6 +355,9 @@ int xe_guc_relay_init(struct xe_guc_relay *relay) INIT_WORK(&relay->worker, relays_worker_fn); INIT_LIST_HEAD(&relay->pending_relays); INIT_LIST_HEAD(&relay->incoming_actions); + ratelimit_state_init(&relay->diag_ratelimit, + XE_RELAY_DIAG_RATELIMIT_INTERVAL, + XE_RELAY_DIAG_RATELIMIT_BURST); err = mempool_init_kmalloc_pool(&relay->pool, XE_RELAY_MEMPOOL_MIN_NUM + relay_get_totalvfs(relay), diff --git a/drivers/gpu/drm/xe/xe_guc_relay_types.h b/drivers/gpu/drm/xe/xe_guc_relay_types.h index 5999fcb77e96..20eee10856b2 100644 --- a/drivers/gpu/drm/xe/xe_guc_relay_types.h +++ b/drivers/gpu/drm/xe/xe_guc_relay_types.h @@ -7,6 +7,7 @@ #define _XE_GUC_RELAY_TYPES_H_ #include <linux/mempool.h> +#include <linux/ratelimit_types.h> #include <linux/spinlock.h> #include <linux/workqueue.h> @@ -31,6 +32,9 @@ struct xe_guc_relay { /** @last_rid: last Relay-ID used while sending a message. */ u32 last_rid; + + /** @diag_ratelimit: ratelimit state used to throttle diagnostics messages. */ + struct ratelimit_state diag_ratelimit; }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 2ad38f6b103e..f6ba2b0f074d 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -32,6 +32,7 @@ #include "xe_guc_ct.h" #include "xe_guc_exec_queue_types.h" #include "xe_guc_id_mgr.h" +#include "xe_guc_klv_helpers.h" #include "xe_guc_submit_types.h" #include "xe_hw_engine.h" #include "xe_hw_fence.h" @@ -43,6 +44,7 @@ #include "xe_ring_ops_types.h" #include "xe_sched_job.h" #include "xe_trace.h" +#include "xe_uc_fw.h" #include "xe_vm.h" static struct xe_guc * @@ -68,6 +70,8 @@ exec_queue_to_guc(struct xe_exec_queue *q) #define EXEC_QUEUE_STATE_BANNED (1 << 9) #define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10) #define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11) +#define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 12) +#define EXEC_QUEUE_STATE_PENDING_TDR_EXIT (1 << 13) static bool exec_queue_registered(struct xe_exec_queue *q) { @@ -139,6 +143,11 @@ static void set_exec_queue_destroyed(struct xe_exec_queue *q) atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); } +static void clear_exec_queue_destroyed(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); +} + static bool exec_queue_banned(struct xe_exec_queue *q) { return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; @@ -219,6 +228,41 @@ static void set_exec_queue_extra_ref(struct xe_exec_queue *q) atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); } +static void clear_exec_queue_extra_ref(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); +} + +static bool exec_queue_pending_resume(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; +} + +static void set_exec_queue_pending_resume(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); +} + +static void clear_exec_queue_pending_resume(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); +} + +static bool exec_queue_pending_tdr_exit(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_TDR_EXIT; +} + +static void set_exec_queue_pending_tdr_exit(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state); +} + +static void clear_exec_queue_pending_tdr_exit(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state); +} + static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) { return (atomic_read(&q->guc->state) & @@ -229,6 +273,17 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) static void guc_submit_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + ret = wait_event_timeout(guc->submission_state.fini_wq, + xa_empty(&guc->submission_state.exec_queue_lookup), + HZ * 5); + + drain_workqueue(xe->destroy_wq); + + xe_gt_assert(gt, ret); xa_destroy(&guc->submission_state.exec_queue_lookup); } @@ -305,6 +360,71 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); } +/* + * Given that we want to guarantee enough RCS throughput to avoid missing + * frames, we set the yield policy to 20% of each 80ms interval. + */ +#define RC_YIELD_DURATION 80 /* in ms */ +#define RC_YIELD_RATIO 20 /* in percent */ +static u32 *emit_render_compute_yield_klv(u32 *emit) +{ + *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); + *emit++ = RC_YIELD_DURATION; + *emit++ = RC_YIELD_RATIO; + + return emit; +} + +#define SCHEDULING_POLICY_MAX_DWORDS 16 +static int guc_init_global_schedule_policy(struct xe_guc *guc) +{ + u32 data[SCHEDULING_POLICY_MAX_DWORDS]; + u32 *emit = data; + u32 count = 0; + int ret; + + if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) + return 0; + + *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; + + if (CCS_MASK(guc_to_gt(guc))) + emit = emit_render_compute_yield_klv(emit); + + count = emit - data; + if (count > 1) { + xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); + + ret = xe_guc_ct_send_block(&guc->ct, data, count); + if (ret < 0) { + xe_gt_err(guc_to_gt(guc), + "failed to enable GuC scheduling policies: %pe\n", + ERR_PTR(ret)); + return ret; + } + } + + return 0; +} + +int xe_guc_submit_enable(struct xe_guc *guc) +{ + int ret; + + ret = guc_init_global_schedule_policy(guc); + if (ret) + return ret; + + guc->submission_state.enabled = true; + + return 0; +} + +void xe_guc_submit_disable(struct xe_guc *guc) +{ + guc->submission_state.enabled = false; +} + static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) { int i; @@ -487,6 +607,15 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc, action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); } + /* explicitly checks some fields that we might fixup later */ + xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); + xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); + xe_gt_assert(guc_to_gt(guc), q->width == + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); + xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); #undef MAX_MLRC_REG_SIZE @@ -511,10 +640,18 @@ static void __register_exec_queue(struct xe_guc *guc, info->hwlrca_hi, }; + /* explicitly checks some fields that we might fixup later */ + xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == + action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); + xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == + action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); + xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == + action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); } -static void register_exec_queue(struct xe_exec_queue *q) +static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); @@ -522,6 +659,7 @@ static void register_exec_queue(struct xe_exec_queue *q) struct guc_ctxt_registration_info info; xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); memset(&info, 0, sizeof(info)); info.context_idx = q->guc->id; @@ -529,7 +667,8 @@ static void register_exec_queue(struct xe_exec_queue *q) info.engine_submit_mask = q->logical_mask; info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); - info.flags = CONTEXT_REGISTRATION_FLAG_KMD; + info.flags = CONTEXT_REGISTRATION_FLAG_KMD | + FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); if (xe_exec_queue_is_parallel(q)) { u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); @@ -573,26 +712,51 @@ static u32 wq_space_until_wrap(struct xe_exec_queue *q) return (WQ_SIZE - q->guc->wqi_tail); } +static bool vf_recovery(struct xe_guc *guc) +{ + return xe_gt_recovery_pending(guc_to_gt(guc)); +} + +static inline void relaxed_ms_sleep(unsigned int delay_ms) +{ + unsigned long min_us, max_us; + + if (!delay_ms) + return; + + if (delay_ms > 20) { + msleep(delay_ms); + return; + } + + min_us = mul_u32_u32(delay_ms, 1000); + max_us = min_us + 500; + + usleep_range(min_us, max_us); +} + static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); - unsigned int sleep_period_ms = 1; + unsigned int sleep_period_ms = 1, sleep_total_ms = 0; #define AVAILABLE_SPACE \ CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) - if (wqi_size > AVAILABLE_SPACE) { + if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { try_again: q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); - if (wqi_size > AVAILABLE_SPACE) { - if (sleep_period_ms == 1024) { + if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { + if (sleep_total_ms > 2000) { xe_gt_reset_async(q->gt); return -ENODEV; } msleep(sleep_period_ms); - sleep_period_ms <<= 1; + sleep_total_ms += sleep_period_ms; + if (sleep_period_ms < 64) + sleep_period_ms <<= 1; goto try_again; } } @@ -666,7 +830,7 @@ static void wq_item_append(struct xe_exec_queue *q) } #define RESUME_PENDING ~0x0ull -static void submit_exec_queue(struct xe_exec_queue *q) +static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_lrc *lrc = q->lrc[0]; @@ -678,10 +842,13 @@ static void submit_exec_queue(struct xe_exec_queue *q) xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); - if (xe_exec_queue_is_parallel(q)) - wq_item_append(q); - else - xe_lrc_set_ring_tail(lrc, lrc->ring.tail); + if (!job->restore_replay || job->last_replay) { + if (xe_exec_queue_is_parallel(q)) + wq_item_append(q); + else + xe_lrc_set_ring_tail(lrc, lrc->ring.tail); + job->last_replay = false; + } if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) return; @@ -723,30 +890,33 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) struct xe_sched_job *job = to_xe_sched_job(drm_job); struct xe_exec_queue *q = job->q; struct xe_guc *guc = exec_queue_to_guc(q); - struct dma_fence *fence = NULL; - bool lr = xe_exec_queue_is_lr(q); + bool lr = xe_exec_queue_is_lr(q), killed_or_banned_or_wedged = + exec_queue_killed_or_banned_or_wedged(q); xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || exec_queue_banned(q) || exec_queue_suspended(q)); trace_xe_sched_job_run(job); - if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) { + if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { if (!exec_queue_registered(q)) - register_exec_queue(q); - if (!lr) /* LR jobs are emitted in the exec IOCTL */ + register_exec_queue(q, GUC_CONTEXT_NORMAL); + if (!job->restore_replay) q->ring_ops->emit_job(job); - submit_exec_queue(q); + submit_exec_queue(q, job); + job->restore_replay = false; } - if (lr) { - xe_sched_job_set_error(job, -EOPNOTSUPP); - dma_fence_put(job->fence); /* Drop ref from xe_sched_job_arm */ - } else { - fence = job->fence; - } + /* + * We don't care about job-fence ordering in LR VMs because these fences + * are never exported; they are used solely to keep jobs on the pending + * list. Once a queue enters an error state, there's no need to track + * them. + */ + if (killed_or_banned_or_wedged && lr) + xe_sched_job_set_error(job, -ECANCELED); - return fence; + return job->fence; } static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) @@ -780,15 +950,17 @@ static void disable_scheduling_deregister(struct xe_guc *guc, ret = wait_event_timeout(guc->ct.wq, (!exec_queue_pending_enable(q) && !exec_queue_pending_disable(q)) || - xe_guc_read_stopped(guc), + xe_guc_read_stopped(guc) || + vf_recovery(guc), HZ * 5); - if (!ret) { + if (!ret && !vf_recovery(guc)) { struct xe_gpu_scheduler *sched = &q->guc->sched; xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); xe_sched_submission_start(sched); xe_gt_reset_async(q->gt); - xe_sched_tdr_queue_imm(sched); + if (!xe_exec_queue_is_lr(q)) + xe_sched_tdr_queue_imm(sched); return; } @@ -880,12 +1052,18 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_gpu_scheduler *sched = &ge->sched; - bool wedged; + struct xe_sched_job *job; + bool wedged = false; xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); + + if (vf_recovery(guc)) + return; + trace_xe_exec_queue_lr_cleanup(q); - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Kill the run_job / process_msg entry points */ xe_sched_submission_stop(sched); @@ -914,7 +1092,11 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) */ ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_disable(q) || - xe_guc_read_stopped(guc), HZ * 5); + xe_guc_read_stopped(guc) || + vf_recovery(guc), HZ * 5); + if (vf_recovery(guc)) + return; + if (!ret) { xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n", q->guc->id); @@ -929,7 +1111,16 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0])) xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d", q->guc->id); + xe_hw_fence_irq_stop(q->fence_irq); + xe_sched_submission_start(sched); + + spin_lock(&sched->base.job_list_lock); + list_for_each_entry(job, &sched->base.pending_list, drm.list) + xe_sched_job_set_error(job, -ECANCELED); + spin_unlock(&sched->base.job_list_lock); + + xe_hw_fence_irq_start(q->fence_irq); } #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) @@ -959,10 +1150,7 @@ static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) */ xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); - if (ctx_timestamp < ctx_job_timestamp) - diff = ctx_timestamp + U32_MAX - ctx_job_timestamp; - else - diff = ctx_timestamp - ctx_job_timestamp; + diff = ctx_timestamp - ctx_job_timestamp; /* * Ensure timeout is within 5% to account for an GuC scheduling latency @@ -998,12 +1186,14 @@ static void enable_scheduling(struct xe_exec_queue *q) ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || - xe_guc_read_stopped(guc), HZ * 5); - if (!ret || xe_guc_read_stopped(guc)) { + xe_guc_read_stopped(guc) || + vf_recovery(guc), HZ * 5); + if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) { xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); set_exec_queue_banned(q); xe_gt_reset_async(q->gt); - xe_sched_tdr_queue_imm(&q->guc->sched); + if (!xe_exec_queue_is_lr(q)) + xe_sched_tdr_queue_imm(&q->guc->sched); } } @@ -1059,7 +1249,9 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) int err = -ETIME; pid_t pid = -1; int i = 0; - bool wedged, skip_timeout_check; + bool wedged = false, skip_timeout_check; + + xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_lr(q)); /* * TDR has fired before free job worker. Common if exec queue @@ -1067,12 +1259,9 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * list so job can be freed and kick scheduler ensuring free job is not * lost. */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); - - return DRM_GPU_SCHED_STAT_NOMINAL; - } + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) || + vf_recovery(guc)) + return DRM_GPU_SCHED_STAT_NO_HANG; /* Kill the run_job entry point */ xe_sched_submission_stop(sched); @@ -1105,7 +1294,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * doesn't work for SRIOV. For now assuming timeouts in wedged mode are * genuine timeouts. */ - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Engine state now stable, disable scheduling to check timestamp */ if (!wedged && exec_queue_registered(q)) { @@ -1122,7 +1312,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) ret = wait_event_timeout(guc->ct.wq, (!exec_queue_pending_enable(q) && !exec_queue_pending_disable(q)) || - xe_guc_read_stopped(guc), HZ * 5); + xe_guc_read_stopped(guc) || + vf_recovery(guc), HZ * 5); + if (vf_recovery(guc)) + goto handle_vf_resume; if (!ret || xe_guc_read_stopped(guc)) goto trigger_reset; @@ -1147,7 +1340,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) smp_rmb(); ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_disable(q) || - xe_guc_read_stopped(guc), HZ * 5); + xe_guc_read_stopped(guc) || + vf_recovery(guc), HZ * 5); + if (vf_recovery(guc)) + goto handle_vf_resume; if (!ret || xe_guc_read_stopped(guc)) { trigger_reset: if (!ret) @@ -1240,9 +1436,10 @@ trigger_reset: /* Start fence signaling */ xe_hw_fence_irq_start(q->fence_irq); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; sched_enable: + set_exec_queue_pending_tdr_exit(q); enable_scheduling(q); rearm: /* @@ -1250,50 +1447,62 @@ rearm: * but there is not currently an easy way to do in DRM scheduler. With * some thought, do this in a follow up. */ - xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); +handle_vf_resume: + return DRM_GPU_SCHED_STAT_NO_HANG; +} + +static void guc_exec_queue_fini(struct xe_exec_queue *q) +{ + struct xe_guc_exec_queue *ge = q->guc; + struct xe_guc *guc = exec_queue_to_guc(q); - return DRM_GPU_SCHED_STAT_NOMINAL; + release_guc_id(guc, q); + xe_sched_entity_fini(&ge->entity); + xe_sched_fini(&ge->sched); + + /* + * RCU free due sched being exported via DRM scheduler fences + * (timeline name). + */ + kfree_rcu(ge, rcu); } -static void __guc_exec_queue_fini_async(struct work_struct *w) +static void __guc_exec_queue_destroy_async(struct work_struct *w) { struct xe_guc_exec_queue *ge = - container_of(w, struct xe_guc_exec_queue, fini_async); + container_of(w, struct xe_guc_exec_queue, destroy_async); struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); xe_pm_runtime_get(guc_to_xe(guc)); trace_xe_exec_queue_destroy(q); - release_guc_id(guc, q); if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&ge->sched.base.work_tdr); - xe_sched_entity_fini(&ge->entity); - xe_sched_fini(&ge->sched); - kfree(ge); xe_exec_queue_fini(q); + xe_pm_runtime_put(guc_to_xe(guc)); } -static void guc_exec_queue_fini_async(struct xe_exec_queue *q) +static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); + INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); /* We must block on kernel engines so slabs are empty on driver unload */ if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) - __guc_exec_queue_fini_async(&q->guc->fini_async); + __guc_exec_queue_destroy_async(&q->guc->destroy_async); else - queue_work(xe->destroy_wq, &q->guc->fini_async); + queue_work(xe->destroy_wq, &q->guc->destroy_async); } -static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) +static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) { /* * Might be done from within the GPU scheduler, need to do async as we @@ -1302,7 +1511,7 @@ static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) * this we and don't really care when everything is fini'd, just that it * is. */ - guc_exec_queue_fini_async(q); + guc_exec_queue_destroy_async(q); } static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) @@ -1313,10 +1522,20 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); trace_xe_exec_queue_cleanup_entity(q); - if (exec_queue_registered(q)) + /* + * Expected state transitions for cleanup: + * - If the exec queue is registered and GuC firmware is running, we must first + * disable scheduling and deregister the queue to ensure proper teardown and + * resource release in the GuC, then destroy the exec queue on driver side. + * - If the GuC is already stopped (e.g., during driver unload or GPU reset), + * we cannot expect a response for the deregister request. In this case, + * it is safe to directly destroy the exec queue on driver side, as the GuC + * will not process further requests and all resources must be cleaned up locally. + */ + if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) disable_scheduling_deregister(guc, q); else - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) @@ -1336,11 +1555,24 @@ static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *ms static void __suspend_fence_signal(struct xe_exec_queue *q) { + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + if (!q->guc->suspend_pending) return; WRITE_ONCE(q->guc->suspend_pending, false); - wake_up(&q->guc->suspend_wait); + + /* + * We use a GuC shared wait queue for VFs because the VF resfix start + * interrupt must be able to wake all instances of suspend_wait. This + * prevents the VF migration worker from being starved during + * scheduling. + */ + if (IS_SRIOV_VF(xe)) + wake_up_all(&guc->ct.wq); + else + wake_up(&q->guc->suspend_wait); } static void suspend_fence_signal(struct xe_exec_queue *q) @@ -1361,8 +1593,9 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && exec_queue_enabled(q)) { - wait_event(guc->ct.wq, (q->guc->resume_time != RESUME_PENDING || - xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q)); + wait_event(guc->ct.wq, vf_recovery(guc) || + ((q->guc->resume_time != RESUME_PENDING || + xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q))); if (!xe_guc_read_stopped(guc)) { s64 since_resume_ms = @@ -1372,7 +1605,7 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) since_resume_ms; if (wait_ms > 0 && q->guc->resume_time) - msleep(wait_ms); + relaxed_ms_sleep(wait_ms); set_exec_queue_suspended(q); disable_scheduling(q, false); @@ -1391,6 +1624,7 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) clear_exec_queue_suspended(q); if (!exec_queue_enabled(q)) { q->guc->resume_time = RESUME_PENDING; + set_exec_queue_pending_resume(q); enable_scheduling(q); } } else { @@ -1404,6 +1638,7 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) #define RESUME 4 #define OPCODE_MASK 0xf #define MSG_LOCKED BIT(8) +#define MSG_HEAD BIT(9) static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) { @@ -1457,6 +1692,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) q->guc = ge; ge->q = q; + init_rcu_head(&ge->rcu); init_waitqueue_head(&ge->suspend_wait); for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) @@ -1465,7 +1701,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(q->sched_props.job_timeout_ms); err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, - NULL, q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64, + NULL, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, timeout, guc_to_gt(guc)->ordered_wq, NULL, q->name, gt_to_xe(q->gt)->drm.dev); if (err) @@ -1487,7 +1723,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) q->entity = &ge->entity; - if (xe_guc_read_stopped(guc)) + if (xe_guc_read_stopped(guc) || vf_recovery(guc)) xe_sched_stop(sched); mutex_unlock(&guc->submission_state.lock); @@ -1527,12 +1763,24 @@ static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg msg->private_data = q; trace_xe_sched_msg_add(msg); - if (opcode & MSG_LOCKED) + if (opcode & MSG_HEAD) + xe_sched_add_msg_head(&q->guc->sched, msg); + else if (opcode & MSG_LOCKED) xe_sched_add_msg_locked(&q->guc->sched, msg); else xe_sched_add_msg(&q->guc->sched, msg); } +static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q, + struct xe_sched_msg *msg, + u32 opcode) +{ + if (!list_empty(&msg->link)) + return; + + guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD); +} + static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, u32 opcode) @@ -1548,14 +1796,14 @@ static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, #define STATIC_MSG_CLEANUP 0 #define STATIC_MSG_SUSPEND 1 #define STATIC_MSG_RESUME 2 -static void guc_exec_queue_fini(struct xe_exec_queue *q) +static void guc_exec_queue_destroy(struct xe_exec_queue *q) { struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) guc_exec_queue_add_msg(q, msg, CLEANUP); else - __guc_exec_queue_fini(exec_queue_to_guc(q), q); + __guc_exec_queue_destroy(exec_queue_to_guc(q), q); } static int guc_exec_queue_set_priority(struct xe_exec_queue *q, @@ -1633,6 +1881,7 @@ static int guc_exec_queue_suspend(struct xe_exec_queue *q) static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); int ret; /* @@ -1640,11 +1889,21 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) * suspend_pending upon kill but to be paranoid but races in which * suspend_pending is set after kill also check kill here. */ - ret = wait_event_interruptible_timeout(q->guc->suspend_wait, - !READ_ONCE(q->guc->suspend_pending) || - exec_queue_killed(q) || - xe_guc_read_stopped(guc), - HZ * 5); +#define WAIT_COND \ + (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \ + xe_guc_read_stopped(guc)) + +retry: + if (IS_SRIOV_VF(xe)) + ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND || + vf_recovery(guc), + HZ * 5); + else + ret = wait_event_interruptible_timeout(q->guc->suspend_wait, + WAIT_COND, HZ * 5); + + if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc)))) + return -EAGAIN; if (!ret) { xe_gt_warn(guc_to_gt(guc), @@ -1652,8 +1911,13 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) q->guc->id); /* XXX: Trigger GT reset? */ return -ETIME; + } else if (IS_SRIOV_VF(xe) && !WAIT_COND) { + /* Corner case on RESFIX DONE where vf_recovery() changes */ + goto retry; } +#undef WAIT_COND + return ret < 0 ? ret : 0; } @@ -1676,7 +1940,7 @@ static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) } /* - * All of these functions are an abstraction layer which other parts of XE can + * All of these functions are an abstraction layer which other parts of Xe can * use to trap into the GuC backend. All of these functions, aside from init, * really shouldn't do much other than trap into the DRM scheduler which * synchronizes these operations. @@ -1685,6 +1949,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = { .init = guc_exec_queue_init, .kill = guc_exec_queue_kill, .fini = guc_exec_queue_fini, + .destroy = guc_exec_queue_destroy, .set_priority = guc_exec_queue_set_priority, .set_timeslice = guc_exec_queue_set_timeslice, .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, @@ -1706,7 +1971,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); else if (exec_queue_destroyed(q)) - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } if (q->guc->suspend_pending) { set_exec_queue_suspended(q); @@ -1751,6 +2016,12 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc) { int ret; + if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) + return 0; + + if (!guc->submission_state.initialized) + return 0; + /* * Using an atomic here rather than submission_state.lock as this * function can be called while holding the CT lock (engine reset @@ -1797,16 +2068,177 @@ void xe_guc_submit_stop(struct xe_guc *guc) } +static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, + struct xe_exec_queue *q) +{ + bool pending_enable, pending_disable, pending_resume; + + pending_enable = exec_queue_pending_enable(q); + pending_resume = exec_queue_pending_resume(q); + + if (pending_enable && pending_resume) { + q->guc->needs_resume = true; + xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d", + q->guc->id); + } + + if (pending_enable && !pending_resume && + !exec_queue_pending_tdr_exit(q)) { + clear_exec_queue_registered(q); + if (xe_exec_queue_is_lr(q)) + xe_exec_queue_put(q); + xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d", + q->guc->id); + } + + if (pending_enable) { + clear_exec_queue_enabled(q); + clear_exec_queue_pending_resume(q); + clear_exec_queue_pending_tdr_exit(q); + clear_exec_queue_pending_enable(q); + xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d", + q->guc->id); + } + + if (exec_queue_destroyed(q) && exec_queue_registered(q)) { + clear_exec_queue_destroyed(q); + if (exec_queue_extra_ref(q)) + xe_exec_queue_put(q); + else + q->guc->needs_cleanup = true; + clear_exec_queue_extra_ref(q); + xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d", + q->guc->id); + } + + pending_disable = exec_queue_pending_disable(q); + + if (pending_disable && exec_queue_suspended(q)) { + clear_exec_queue_suspended(q); + q->guc->needs_suspend = true; + xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d", + q->guc->id); + } + + if (pending_disable) { + if (!pending_enable) + set_exec_queue_enabled(q); + clear_exec_queue_pending_disable(q); + clear_exec_queue_check_timeout(q); + xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d", + q->guc->id); + } + + q->guc->resume_time = 0; +} + +static void lrc_parallel_clear(struct xe_lrc *lrc) +{ + struct xe_device *xe = gt_to_xe(lrc->gt); + struct iosys_map map = xe_lrc_parallel_map(lrc); + int i; + + for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) + parallel_write(xe, map, wq[i], + FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | + FIELD_PREP(WQ_LEN_MASK, 0)); +} + +/* + * This function is quite complex but only real way to ensure no state is lost + * during VF resume flows. The function scans the queue state, make adjustments + * as needed, and queues jobs / messages which replayed upon unpause. + */ +static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_sched_job *job; + int i; + + lockdep_assert_held(&guc->submission_state.lock); + + /* Stop scheduling + flush any DRM scheduler operations */ + xe_sched_submission_stop(sched); + if (xe_exec_queue_is_lr(q)) + cancel_work_sync(&q->guc->lr_tdr); + else + cancel_delayed_work_sync(&sched->base.work_tdr); + + guc_exec_queue_revert_pending_state_change(guc, q); + + if (xe_exec_queue_is_parallel(q)) { + /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ + struct xe_lrc *lrc = READ_ONCE(q->lrc[0]); + + /* + * NOP existing WQ commands that may contain stale GGTT + * addresses. These will be replayed upon unpause. The hardware + * seems to get confused if the WQ head/tail pointers are + * adjusted. + */ + if (lrc) + lrc_parallel_clear(lrc); + } + + job = xe_sched_first_pending_job(sched); + if (job) { + job->restore_replay = true; + + /* + * Adjust software tail so jobs submitted overwrite previous + * position in ring buffer with new GGTT addresses. + */ + for (i = 0; i < q->width; ++i) + q->lrc[i]->ring.tail = job->ptrs[i].head; + } +} + +/** + * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. + * @guc: the &xe_guc struct instance whose scheduler is to be disabled + */ +void xe_guc_submit_pause(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* Prevent redundant attempts to stop parallel queues */ + if (q->guc->id != index) + continue; + + guc_exec_queue_pause(guc, q); + } + mutex_unlock(&guc->submission_state.lock); +} + static void guc_exec_queue_start(struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched = &q->guc->sched; if (!exec_queue_killed_or_banned_or_wedged(q)) { + struct xe_sched_job *job = xe_sched_first_pending_job(sched); int i; trace_xe_exec_queue_resubmit(q); - for (i = 0; i < q->width; ++i) - xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail); + if (job) { + for (i = 0; i < q->width; ++i) { + /* + * The GuC context is unregistered at this point + * time, adjusting software ring tail ensures + * jobs are rewritten in original placement, + * adjusting LRC tail ensures the newly loaded + * GuC / contexts only view the LRC tail + * increasing as jobs are written out. + */ + q->lrc[i]->ring.tail = job->ptrs[i].head; + xe_lrc_set_ring_tail(q->lrc[i], + xe_lrc_ring_head(q->lrc[i])); + } + } xe_sched_resubmit_jobs(sched); } @@ -1837,6 +2269,152 @@ int xe_guc_submit_start(struct xe_guc *guc) return 0; } +static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, + struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_sched_job *job = NULL, *__job; + bool restore_replay = false; + + list_for_each_entry(__job, &sched->base.pending_list, drm.list) { + job = __job; + restore_replay |= job->restore_replay; + if (restore_replay) { + xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", + q->guc->id, xe_sched_job_seqno(job)); + + q->ring_ops->emit_job(job); + job->restore_replay = true; + } + } + + if (job) + job->last_replay = true; +} + +/** + * xe_guc_submit_unpause_prepare - Prepare unpause submission tasks on given GuC. + * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause + */ +void xe_guc_submit_unpause_prepare(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* Prevent redundant attempts to stop parallel queues */ + if (q->guc->id != index) + continue; + + guc_exec_queue_unpause_prepare(guc, q); + } + mutex_unlock(&guc->submission_state.lock); +} + +static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_sched_msg *msg; + + if (q->guc->needs_cleanup) { + msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; + + guc_exec_queue_add_msg(q, msg, CLEANUP); + q->guc->needs_cleanup = false; + } + + if (q->guc->needs_suspend) { + msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; + + xe_sched_msg_lock(sched); + guc_exec_queue_try_add_msg_head(q, msg, SUSPEND); + xe_sched_msg_unlock(sched); + + q->guc->needs_suspend = false; + } + + /* + * The resume must be in the message queue before the suspend as it is + * not possible for a resume to be issued if a suspend pending is, but + * the inverse is possible. + */ + if (q->guc->needs_resume) { + msg = q->guc->static_msgs + STATIC_MSG_RESUME; + + xe_sched_msg_lock(sched); + guc_exec_queue_try_add_msg_head(q, msg, RESUME); + xe_sched_msg_unlock(sched); + + q->guc->needs_resume = false; + } +} + +static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q); + + lockdep_assert_held(&guc->submission_state.lock); + + xe_sched_resubmit_jobs(sched); + guc_exec_queue_replay_pending_state_change(q); + xe_sched_submission_start(sched); + if (needs_tdr) + xe_guc_exec_queue_trigger_cleanup(q); + xe_sched_submission_resume_tdr(sched); +} + +/** + * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. + * @guc: the &xe_guc struct instance whose scheduler is to be enabled + */ +void xe_guc_submit_unpause(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* + * Prevent redundant attempts to stop parallel queues, or queues + * created after resfix done. + */ + if (q->guc->id != index || + !READ_ONCE(q->guc->sched.base.pause_submit)) + continue; + + guc_exec_queue_unpause(guc, q); + } + mutex_unlock(&guc->submission_state.lock); +} + +/** + * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC. + * @guc: the &xe_guc struct instance whose scheduler is to be aborted + */ +void xe_guc_submit_pause_abort(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + struct xe_gpu_scheduler *sched = &q->guc->sched; + + /* Prevent redundant attempts to stop parallel queues */ + if (q->guc->id != index) + continue; + + xe_sched_submission_start(sched); + if (exec_queue_killed_or_banned_or_wedged(q)) + xe_guc_exec_queue_trigger_cleanup(q); + } + mutex_unlock(&guc->submission_state.lock); +} + static struct xe_exec_queue * g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) { @@ -1850,7 +2428,7 @@ g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); if (unlikely(!q)) { - xe_gt_err(gt, "Not engine present for guc_id %u\n", guc_id); + xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); return NULL; } @@ -1886,6 +2464,8 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); q->guc->resume_time = ktime_get(); + clear_exec_queue_pending_resume(q); + clear_exec_queue_pending_tdr_exit(q); clear_exec_queue_pending_enable(q); smp_wmb(); wake_up_all(&guc->ct.wq); @@ -1960,7 +2540,7 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); else - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) @@ -2057,12 +2637,16 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; u32 guc_id; + u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; - if (unlikely(len < 1)) + if (unlikely(!len || len > 2)) return -EPROTO; guc_id = msg[0]; + if (len == 2) + type = msg[1]; + if (guc_id == GUC_ID_UNKNOWN) { /* * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF @@ -2076,8 +2660,19 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, if (unlikely(!q)) return -EPROTO; - xe_gt_dbg(gt, "Engine memory cat error: engine_class=%s, logical_mask: 0x%x, guc_id=%d", - xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + /* + * The type is HW-defined and changes based on platform, so we don't + * decode it in the kernel and only check if it is valid. + * See bspec 54047 and 72187 for details. + */ + if (type != XE_GUC_CAT_ERR_TYPE_INVALID) + xe_gt_dbg(gt, + "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", + type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + else + xe_gt_dbg(gt, + "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", + xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); trace_xe_exec_queue_memory_cat_error(q); @@ -2334,6 +2929,34 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) } /** + * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. + * @q: Execution queue + * @ctx_type: Type of the context + * + * This function registers the execution queue with the guc. Special context + * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE + * are only applicable for IGPU and in the VF. + * Submits the execution queue to GUC after registering it. + * + * Returns - None. + */ +void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + xe_gt_assert(gt, IS_SRIOV_VF(xe)); + xe_gt_assert(gt, !IS_DGFX(xe)); + xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || + ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); + xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); + + register_exec_queue(q, ctx_type); + enable_scheduling(q); +} + +/** * xe_guc_submit_print - GuC Submit Print. * @guc: GuC. * @p: drm_printer where it will be printed out. @@ -2353,3 +2976,32 @@ void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) guc_exec_queue_print(q, p); mutex_unlock(&guc->submission_state.lock); } + +/** + * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all + * exec queues registered to given GuC. + * @guc: the &xe_guc struct instance + * @scratch: scratch buffer to be used as temporary storage + * + * Returns: zero on success, negative error code on failure. + */ +int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) +{ + struct xe_exec_queue *q; + unsigned long index; + int err = 0; + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* Prevent redundant attempts to stop parallel queues */ + if (q->guc->id != index) + continue; + + err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); + if (err) + break; + } + mutex_unlock(&guc->submission_state.lock); + + return err; +} diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 9b71a986c6ca..b49a2748ec46 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -13,11 +13,17 @@ struct xe_exec_queue; struct xe_guc; int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids); +int xe_guc_submit_enable(struct xe_guc *guc); +void xe_guc_submit_disable(struct xe_guc *guc); int xe_guc_submit_reset_prepare(struct xe_guc *guc); void xe_guc_submit_reset_wait(struct xe_guc *guc); void xe_guc_submit_stop(struct xe_guc *guc); int xe_guc_submit_start(struct xe_guc *guc); +void xe_guc_submit_pause(struct xe_guc *guc); +void xe_guc_submit_unpause(struct xe_guc *guc); +void xe_guc_submit_unpause_prepare(struct xe_guc *guc); +void xe_guc_submit_pause_abort(struct xe_guc *guc); void xe_guc_submit_wedge(struct xe_guc *guc); int xe_guc_read_stopped(struct xe_guc *guc); @@ -39,5 +45,8 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot); void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p); +void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type); + +int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c new file mode 100644 index 000000000000..a80175c7c478 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "abi/guc_actions_abi.h" + +#include "xe_device.h" +#include "xe_gt_stats.h" +#include "xe_gt_types.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_tlb_inval.h" +#include "xe_force_wake.h" +#include "xe_mmio.h" +#include "xe_tlb_inval.h" + +#include "regs/xe_guc_regs.h" + +/* + * XXX: The seqno algorithm relies on TLB invalidation being processed in order + * which they currently are by the GuC, if that changes the algorithm will need + * to be updated. + */ + +static int send_tlb_inval(struct xe_guc *guc, const u32 *action, int len) +{ + struct xe_gt *gt = guc_to_gt(guc); + + xe_gt_assert(gt, action[1]); /* Seqno */ + + xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1); + return xe_guc_ct_send(&guc->ct, action, len, + G2H_LEN_DW_TLB_INVALIDATE, 1); +} + +#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ + XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ + XE_GUC_TLB_INVAL_FLUSH_CACHE) + +static int send_tlb_inval_all(struct xe_tlb_inval *tlb_inval, u32 seqno) +{ + struct xe_guc *guc = tlb_inval->private; + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION_ALL, + seqno, + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL), + }; + + return send_tlb_inval(guc, action, ARRAY_SIZE(action)); +} + +static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno) +{ + struct xe_guc *guc = tlb_inval->private; + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = guc_to_xe(guc); + + /* + * Returning -ECANCELED in this function is squashed at the caller and + * signals waiters. + */ + + if (xe_guc_ct_enabled(&guc->ct) && guc->submission_state.enabled) { + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION, + seqno, + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), + }; + + return send_tlb_inval(guc, action, ARRAY_SIZE(action)); + } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { + struct xe_mmio *mmio = >->mmio; + unsigned int fw_ref; + + if (IS_SRIOV_VF(xe)) + return -ECANCELED; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { + xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1, + PVC_GUC_TLB_INV_DESC1_INVALIDATE); + xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0, + PVC_GUC_TLB_INV_DESC0_VALID); + } else { + xe_mmio_write32(mmio, GUC_TLB_INV_CR, + GUC_TLB_INV_CR_INVALIDATE); + } + xe_force_wake_put(gt_to_fw(gt), fw_ref); + } + + return -ECANCELED; +} + +/* + * Ensure that roundup_pow_of_two(length) doesn't overflow. + * Note that roundup_pow_of_two() operates on unsigned long, + * not on u64. + */ +#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) + +static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, + u64 start, u64 end, u32 asid) +{ +#define MAX_TLB_INVALIDATION_LEN 7 + struct xe_guc *guc = tlb_inval->private; + struct xe_gt *gt = guc_to_gt(guc); + u32 action[MAX_TLB_INVALIDATION_LEN]; + u64 length = end - start; + int len = 0; + + if (guc_to_xe(guc)->info.force_execlist) + return -ECANCELED; + + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; + action[len++] = seqno; + if (!gt_to_xe(gt)->info.has_range_tlb_inval || + length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { + action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); + } else { + u64 orig_start = start; + u64 align; + + if (length < SZ_4K) + length = SZ_4K; + + /* + * We need to invalidate a higher granularity if start address + * is not aligned to length. When start is not aligned with + * length we need to find the length large enough to create an + * address mask covering the required range. + */ + align = roundup_pow_of_two(length); + start = ALIGN_DOWN(start, align); + end = ALIGN(end, align); + length = align; + while (start + length < end) { + length <<= 1; + start = ALIGN_DOWN(orig_start, length); + } + + /* + * Minimum invalidation size for a 2MB page that the hardware + * expects is 16MB + */ + if (length >= SZ_2M) { + length = max_t(u64, SZ_16M, length); + start = ALIGN_DOWN(orig_start, length); + } + + xe_gt_assert(gt, length >= SZ_4K); + xe_gt_assert(gt, is_power_of_2(length)); + xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, + ilog2(SZ_2M) + 1))); + xe_gt_assert(gt, IS_ALIGNED(start, length)); + + action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); + action[len++] = asid; + action[len++] = lower_32_bits(start); + action[len++] = upper_32_bits(start); + action[len++] = ilog2(length) - ilog2(SZ_4K); + } + + xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); + + return send_tlb_inval(guc, action, len); +} + +static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval) +{ + struct xe_guc *guc = tlb_inval->private; + + return xe_guc_ct_initialized(&guc->ct); +} + +static void tlb_inval_flush(struct xe_tlb_inval *tlb_inval) +{ + struct xe_guc *guc = tlb_inval->private; + + LNL_FLUSH_WORK(&guc->ct.g2h_worker); +} + +static long tlb_inval_timeout_delay(struct xe_tlb_inval *tlb_inval) +{ + struct xe_guc *guc = tlb_inval->private; + + /* this reflects what HW/GuC needs to process TLB inv request */ + const long hw_tlb_timeout = HZ / 4; + + /* this estimates actual delay caused by the CTB transport */ + long delay = xe_guc_ct_queue_proc_time_jiffies(&guc->ct); + + return hw_tlb_timeout + 2 * delay; +} + +static const struct xe_tlb_inval_ops guc_tlb_inval_ops = { + .all = send_tlb_inval_all, + .ggtt = send_tlb_inval_ggtt, + .ppgtt = send_tlb_inval_ppgtt, + .initialized = tlb_inval_initialized, + .flush = tlb_inval_flush, + .timeout_delay = tlb_inval_timeout_delay, +}; + +/** + * xe_guc_tlb_inval_init_early() - Init GuC TLB invalidation early + * @guc: GuC object + * @tlb_inval: TLB invalidation client + * + * Initialize GuC TLB invalidation by setting back pointer in TLB invalidation + * client to the GuC and setting GuC backend ops. + */ +void xe_guc_tlb_inval_init_early(struct xe_guc *guc, + struct xe_tlb_inval *tlb_inval) +{ + tlb_inval->private = guc; + tlb_inval->ops = &guc_tlb_inval_ops; +} + +/** + * xe_guc_tlb_inval_done_handler() - TLB invalidation done handler + * @guc: guc + * @msg: message indicating TLB invalidation done + * @len: length of message + * + * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any + * invalidation fences for seqno. Algorithm for this depends on seqno being + * received in-order and asserts this assumption. + * + * Return: 0 on success, -EPROTO for malformed messages. + */ +int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_gt *gt = guc_to_gt(guc); + + if (unlikely(len != 1)) + return -EPROTO; + + xe_tlb_inval_done_handler(>->tlb_inval, msg[0]); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.h b/drivers/gpu/drm/xe/xe_guc_tlb_inval.h new file mode 100644 index 000000000000..07d668b02e3d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_GUC_TLB_INVAL_H_ +#define _XE_GUC_TLB_INVAL_H_ + +#include <linux/types.h> + +struct xe_guc; +struct xe_tlb_inval; + +void xe_guc_tlb_inval_init_early(struct xe_guc *guc, + struct xe_tlb_inval *tlb_inval); + +int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 1fde7614fcc5..c7b9642b41ba 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -85,6 +85,12 @@ struct xe_guc { struct xarray exec_queue_lookup; /** @submission_state.stopped: submissions are stopped */ atomic_t stopped; + /** + * @submission_state.reset_blocked: reset attempts are blocked; + * blocking reset in order to delay it may be required if running + * an operation which is sensitive to resets. + */ + atomic_t reset_blocked; /** @submission_state.lock: protects submission state */ struct mutex lock; /** @submission_state.enabled: submission is enabled */ diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index 27d11e06a82b..495cdd4f948d 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -8,18 +8,17 @@ #include <linux/pci.h> #include <linux/sizes.h> +#include <drm/drm_print.h> + #include "xe_device_types.h" #include "xe_drv.h" #include "xe_heci_gsc.h" +#include "regs/xe_gsc_regs.h" #include "xe_platform_types.h" #include "xe_survivability_mode.h" #define GSC_BAR_LENGTH 0x00000FFC -#define DG1_GSC_HECI2_BASE 0x259000 -#define PVC_GSC_HECI2_BASE 0x285000 -#define DG2_GSC_HECI2_BASE 0x374000 - static void heci_gsc_irq_mask(struct irq_data *d) { /* generic irq handling */ @@ -200,7 +199,7 @@ int xe_heci_gsc_init(struct xe_device *xe) if (ret) return ret; - if (!def->use_polling && !xe_survivability_mode_is_enabled(xe)) { + if (!def->use_polling && !xe_survivability_mode_is_boot_enabled(xe)) { ret = heci_gsc_irq_setup(xe); if (ret) return ret; @@ -224,7 +223,7 @@ void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir) if (xe->heci_gsc.irq < 0) return; - ret = generic_handle_irq(xe->heci_gsc.irq); + ret = generic_handle_irq_safe(xe->heci_gsc.irq); if (ret) drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret); } @@ -244,7 +243,7 @@ void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir) if (xe->heci_gsc.irq < 0) return; - ret = generic_handle_irq(xe->heci_gsc.irq); + ret = generic_handle_irq_safe(xe->heci_gsc.irq); if (ret) drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret); } diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c deleted file mode 100644 index 57b71956ddf4..000000000000 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ /dev/null @@ -1,325 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2024 Intel Corporation - */ - -#include <linux/scatterlist.h> -#include <linux/mmu_notifier.h> -#include <linux/dma-mapping.h> -#include <linux/memremap.h> -#include <linux/swap.h> -#include <linux/hmm.h> -#include <linux/mm.h> -#include "xe_hmm.h" -#include "xe_vm.h" -#include "xe_bo.h" - -static u64 xe_npages_in_range(unsigned long start, unsigned long end) -{ - return (end - start) >> PAGE_SHIFT; -} - -static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st, - struct hmm_range *range, struct rw_semaphore *notifier_sem) -{ - unsigned long i, npages, hmm_pfn; - unsigned long num_chunks = 0; - int ret; - - /* HMM docs says this is needed. */ - ret = down_read_interruptible(notifier_sem); - if (ret) - return ret; - - if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) { - up_read(notifier_sem); - return -EAGAIN; - } - - npages = xe_npages_in_range(range->start, range->end); - for (i = 0; i < npages;) { - unsigned long len; - - hmm_pfn = range->hmm_pfns[i]; - xe_assert(xe, hmm_pfn & HMM_PFN_VALID); - - len = 1UL << hmm_pfn_to_map_order(hmm_pfn); - - /* If order > 0 the page may extend beyond range->start */ - len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1); - i += len; - num_chunks++; - } - up_read(notifier_sem); - - return sg_alloc_table(st, num_chunks, GFP_KERNEL); -} - -/** - * xe_build_sg() - build a scatter gather table for all the physical pages/pfn - * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table - * and will be used to program GPU page table later. - * @xe: the xe device who will access the dma-address in sg table - * @range: the hmm range that we build the sg table from. range->hmm_pfns[] - * has the pfn numbers of pages that back up this hmm address range. - * @st: pointer to the sg table. - * @notifier_sem: The xe notifier lock. - * @write: whether we write to this range. This decides dma map direction - * for system pages. If write we map it bi-diretional; otherwise - * DMA_TO_DEVICE - * - * All the contiguous pfns will be collapsed into one entry in - * the scatter gather table. This is for the purpose of efficiently - * programming GPU page table. - * - * The dma_address in the sg table will later be used by GPU to - * access memory. So if the memory is system memory, we need to - * do a dma-mapping so it can be accessed by GPU/DMA. - * - * FIXME: This function currently only support pages in system - * memory. If the memory is GPU local memory (of the GPU who - * is going to access memory), we need gpu dpa (device physical - * address), and there is no need of dma-mapping. This is TBD. - * - * FIXME: dma-mapping for peer gpu device to access remote gpu's - * memory. Add this when you support p2p - * - * This function allocates the storage of the sg table. It is - * caller's responsibility to free it calling sg_free_table. - * - * Returns 0 if successful; -ENOMEM if fails to allocate memory - */ -static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, - struct sg_table *st, - struct rw_semaphore *notifier_sem, - bool write) -{ - unsigned long npages = xe_npages_in_range(range->start, range->end); - struct device *dev = xe->drm.dev; - struct scatterlist *sgl; - struct page *page; - unsigned long i, j; - - lockdep_assert_held(notifier_sem); - - i = 0; - for_each_sg(st->sgl, sgl, st->nents, j) { - unsigned long hmm_pfn, size; - - hmm_pfn = range->hmm_pfns[i]; - page = hmm_pfn_to_page(hmm_pfn); - xe_assert(xe, !is_device_private_page(page)); - - size = 1UL << hmm_pfn_to_map_order(hmm_pfn); - size -= page_to_pfn(page) & (size - 1); - i += size; - - if (unlikely(j == st->nents - 1)) { - xe_assert(xe, i >= npages); - if (i > npages) - size -= (i - npages); - - sg_mark_end(sgl); - } else { - xe_assert(xe, i < npages); - } - - sg_set_page(sgl, page, size << PAGE_SHIFT, 0); - } - - return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); -} - -static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma) -{ - struct xe_userptr *userptr = &uvma->userptr; - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - - lockdep_assert_held_write(&vm->lock); - lockdep_assert_held(&vm->userptr.notifier_lock); - - mutex_lock(&userptr->unmap_mutex); - xe_assert(vm->xe, !userptr->mapped); - userptr->mapped = true; - mutex_unlock(&userptr->unmap_mutex); -} - -void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma) -{ - struct xe_userptr *userptr = &uvma->userptr; - struct xe_vma *vma = &uvma->vma; - bool write = !xe_vma_read_only(vma); - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_device *xe = vm->xe; - - if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) && - !lockdep_is_held_type(&vm->lock, 0) && - !(vma->gpuva.flags & XE_VMA_DESTROYED)) { - /* Don't unmap in exec critical section. */ - xe_vm_assert_held(vm); - /* Don't unmap while mapping the sg. */ - lockdep_assert_held(&vm->lock); - } - - mutex_lock(&userptr->unmap_mutex); - if (userptr->sg && userptr->mapped) - dma_unmap_sgtable(xe->drm.dev, userptr->sg, - write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); - userptr->mapped = false; - mutex_unlock(&userptr->unmap_mutex); -} - -/** - * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr - * @uvma: the userptr vma which hold the scatter gather table - * - * With function xe_userptr_populate_range, we allocate storage of - * the userptr sg table. This is a helper function to free this - * sg table, and dma unmap the address in the table. - */ -void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma) -{ - struct xe_userptr *userptr = &uvma->userptr; - - xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg); - xe_hmm_userptr_unmap(uvma); - sg_free_table(userptr->sg); - userptr->sg = NULL; -} - -/** - * xe_hmm_userptr_populate_range() - Populate physical pages of a virtual - * address range - * - * @uvma: userptr vma which has information of the range to populate. - * @is_mm_mmap_locked: True if mmap_read_lock is already acquired by caller. - * - * This function populate the physical pages of a virtual - * address range. The populated physical pages is saved in - * userptr's sg table. It is similar to get_user_pages but call - * hmm_range_fault. - * - * This function also read mmu notifier sequence # ( - * mmu_interval_read_begin), for the purpose of later - * comparison (through mmu_interval_read_retry). - * - * This must be called with mmap read or write lock held. - * - * This function allocates the storage of the userptr sg table. - * It is caller's responsibility to free it calling sg_free_table. - * - * returns: 0 for success; negative error no on failure - */ -int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, - bool is_mm_mmap_locked) -{ - unsigned long timeout = - jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); - unsigned long *pfns; - struct xe_userptr *userptr; - struct xe_vma *vma = &uvma->vma; - u64 userptr_start = xe_vma_userptr(vma); - u64 userptr_end = userptr_start + xe_vma_size(vma); - struct xe_vm *vm = xe_vma_vm(vma); - struct hmm_range hmm_range = { - .pfn_flags_mask = 0, /* ignore pfns */ - .default_flags = HMM_PFN_REQ_FAULT, - .start = userptr_start, - .end = userptr_end, - .notifier = &uvma->userptr.notifier, - .dev_private_owner = vm->xe, - }; - bool write = !xe_vma_read_only(vma); - unsigned long notifier_seq; - u64 npages; - int ret; - - userptr = &uvma->userptr; - - if (is_mm_mmap_locked) - mmap_assert_locked(userptr->notifier.mm); - - if (vma->gpuva.flags & XE_VMA_DESTROYED) - return 0; - - notifier_seq = mmu_interval_read_begin(&userptr->notifier); - if (notifier_seq == userptr->notifier_seq) - return 0; - - if (userptr->sg) - xe_hmm_userptr_free_sg(uvma); - - npages = xe_npages_in_range(userptr_start, userptr_end); - pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); - if (unlikely(!pfns)) - return -ENOMEM; - - if (write) - hmm_range.default_flags |= HMM_PFN_REQ_WRITE; - - if (!mmget_not_zero(userptr->notifier.mm)) { - ret = -EFAULT; - goto free_pfns; - } - - hmm_range.hmm_pfns = pfns; - - while (true) { - hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier); - - if (!is_mm_mmap_locked) - mmap_read_lock(userptr->notifier.mm); - - ret = hmm_range_fault(&hmm_range); - - if (!is_mm_mmap_locked) - mmap_read_unlock(userptr->notifier.mm); - - if (ret == -EBUSY) { - if (time_after(jiffies, timeout)) - break; - - continue; - } - break; - } - - mmput(userptr->notifier.mm); - - if (ret) - goto free_pfns; - - ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock); - if (ret) - goto free_pfns; - - ret = down_read_interruptible(&vm->userptr.notifier_lock); - if (ret) - goto free_st; - - if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) { - ret = -EAGAIN; - goto out_unlock; - } - - ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, - &vm->userptr.notifier_lock, write); - if (ret) - goto out_unlock; - - userptr->sg = &userptr->sgt; - xe_hmm_userptr_set_mapped(uvma); - userptr->notifier_seq = hmm_range.notifier_seq; - up_read(&vm->userptr.notifier_lock); - kvfree(pfns); - return 0; - -out_unlock: - up_read(&vm->userptr.notifier_lock); -free_st: - sg_free_table(&userptr->sgt); -free_pfns: - kvfree(pfns); - return ret; -} diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h deleted file mode 100644 index 0ea98d8e7bbc..000000000000 --- a/drivers/gpu/drm/xe/xe_hmm.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: MIT - * - * Copyright © 2024 Intel Corporation - */ - -#ifndef _XE_HMM_H_ -#define _XE_HMM_H_ - -#include <linux/types.h> - -struct xe_userptr_vma; - -int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked); - -void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma); - -void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma); -#endif diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 6a846e4cb221..0a70c8924582 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -66,14 +66,18 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc) int xe_huc_init(struct xe_huc *huc) { struct xe_gt *gt = huc_to_gt(huc); - struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); int ret; huc->fw.type = XE_UC_FW_TYPE_HUC; - /* On platforms with a media GT the HuC is only available there */ - if (tile->media_gt && (gt != tile->media_gt)) { + /* + * The HuC is only available on the media GT on most platforms. The + * exception to that rule are the old Xe1 platforms where there was + * no separate GT for media IP, so the HuC was part of the primary + * GT. Such platforms have graphics versions 12.55 and earlier. + */ + if (!xe_gt_is_media_type(gt) && GRAPHICS_VERx100(xe) > 1255) { xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED); return 0; } @@ -171,7 +175,7 @@ static int huc_auth_via_gsccs(struct xe_huc *huc) sizeof(struct pxp43_new_huc_auth_in)); wr_offset = huc_emit_pxp_auth_msg(xe, &pkt->vmap, wr_offset, xe_bo_ggtt_addr(huc->fw.bo), - huc->fw.bo->size); + xe_bo_size(huc->fw.bo)); do { err = xe_gsc_pkt_submit_kernel(>->uc.gsc, ggtt_offset, wr_offset, ggtt_offset + PXP43_HUC_AUTH_INOUT_SIZE, diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 93241fd0a4ba..6a9e2a4272dd 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -17,6 +17,7 @@ #include "regs/xe_irq_regs.h" #include "xe_assert.h" #include "xe_bo.h" +#include "xe_configfs.h" #include "xe_device.h" #include "xe_execlist.h" #include "xe_force_wake.h" @@ -345,17 +346,26 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); } -static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt, +static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe) { + /* + * Xe3p no longer supports load balance mode, so "fixed cslice" mode + * is automatic and no RCU_MODE programming is required. + */ + if (GRAPHICS_VER(gt_to_xe(gt)) >= 35) + return false; + return xe_gt_ccs_mode_enabled(gt) && - xe_rtp_match_first_render_or_compute(gt, hwe); + xe_rtp_match_first_render_or_compute(xe, gt, hwe); } -static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt, +static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe) { - if (GRAPHICS_VER(gt_to_xe(gt)) < 20) + if (GRAPHICS_VER(xe) < 20) return false; if (hwe->class != XE_ENGINE_CLASS_COMPUTE && @@ -575,7 +585,7 @@ static void adjust_idledly(struct xe_hw_engine *hwe) u32 maxcnt_units_ns = 640; bool inhibit_switch = 0; - if (!IS_SRIOV_VF(gt_to_xe(hwe->gt)) && XE_WA(gt, 16023105232)) { + if (!IS_SRIOV_VF(gt_to_xe(hwe->gt)) && XE_GT_WA(gt, 16023105232)) { idledly = xe_mmio_read32(>->mmio, RING_IDLEDLY(hwe->mmio_base)); maxcnt = xe_mmio_read32(>->mmio, RING_PWRCTX_MAXCNT(hwe->mmio_base)); @@ -693,7 +703,7 @@ static void read_media_fuses(struct xe_gt *gt) if (!(BIT(j) & vdbox_mask)) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "vcs%u fused off\n", j); + xe_gt_info(gt, "vcs%u fused off\n", j); } } @@ -703,40 +713,63 @@ static void read_media_fuses(struct xe_gt *gt) if (!(BIT(j) & vebox_mask)) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "vecs%u fused off\n", j); + xe_gt_info(gt, "vecs%u fused off\n", j); } } } +static u32 infer_svccopy_from_meml3(struct xe_gt *gt) +{ + u32 meml3 = REG_FIELD_GET(MEML3_EN_MASK, + xe_mmio_read32(>->mmio, MIRROR_FUSE3)); + u32 svccopy_mask = 0; + + /* + * Each of the four meml3 bits determines the fusing of two service + * copy engines. + */ + for (int i = 0; i < 4; i++) + svccopy_mask |= (meml3 & BIT(i)) ? 0b11 << 2 * i : 0; + + return svccopy_mask; +} + +static u32 read_svccopy_fuses(struct xe_gt *gt) +{ + return REG_FIELD_GET(FUSE_SERVICE_COPY_ENABLE_MASK, + xe_mmio_read32(>->mmio, SERVICE_COPY_ENABLE)); +} + static void read_copy_fuses(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); u32 bcs_mask; - if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270) - return; - xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); - bcs_mask = xe_mmio_read32(>->mmio, MIRROR_FUSE3); - bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask); + if (GRAPHICS_VER(xe) >= 35) + bcs_mask = read_svccopy_fuses(gt); + else if (GRAPHICS_VERx100(xe) == 1260) + bcs_mask = infer_svccopy_from_meml3(gt); + else + return; - /* BCS0 is always present; only BCS1-BCS8 may be fused off */ - for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) { + /* Only BCS1-BCS8 may be fused off */ + bcs_mask <<= XE_HW_ENGINE_BCS1; + for (int i = XE_HW_ENGINE_BCS1; i <= XE_HW_ENGINE_BCS8; ++i) { if (!(gt->info.engine_mask & BIT(i))) continue; - if (!(BIT(j / 2) & bcs_mask)) { + if (!(bcs_mask & BIT(i))) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "bcs%u fused off\n", j); + xe_gt_info(gt, "bcs%u fused off\n", + i - XE_HW_ENGINE_BCS0); } } } static void read_compute_fuses_from_dss(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); - /* * CCS fusing based on DSS masks only applies to platforms that can * have more than one CCS. @@ -755,14 +788,13 @@ static void read_compute_fuses_from_dss(struct xe_gt *gt) if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "ccs%u fused off\n", j); + xe_gt_info(gt, "ccs%u fused off\n", j); } } } static void read_compute_fuses_from_reg(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); u32 ccs_mask; ccs_mask = xe_mmio_read32(>->mmio, XEHP_FUSE4); @@ -774,7 +806,7 @@ static void read_compute_fuses_from_reg(struct xe_gt *gt) if ((ccs_mask & BIT(j)) == 0) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "ccs%u fused off\n", j); + xe_gt_info(gt, "ccs%u fused off\n", j); } } } @@ -789,8 +821,6 @@ static void read_compute_fuses(struct xe_gt *gt) static void check_gsc_availability(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); - if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))) return; @@ -806,7 +836,25 @@ static void check_gsc_availability(struct xe_gt *gt) xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_ENABLE, 0); xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_MASK, ~0); - drm_dbg(&xe->drm, "GSC FW not used, disabling gsccs\n"); + xe_gt_dbg(gt, "GSC FW not used, disabling gsccs\n"); + } +} + +static void check_sw_disable(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + u64 sw_allowed = xe_configfs_get_engines_allowed(to_pci_dev(xe->drm.dev)); + enum xe_hw_engine_id id; + + for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { + if (!(gt->info.engine_mask & BIT(id))) + continue; + + if (!(sw_allowed & BIT(id))) { + gt->info.engine_mask &= ~BIT(id); + xe_gt_info(gt, "%s disabled via configfs\n", + engine_infos[id].name); + } } } @@ -818,6 +866,7 @@ int xe_hw_engines_init_early(struct xe_gt *gt) read_copy_fuses(gt); read_compute_fuses(gt); check_gsc_availability(gt); + check_sw_disable(gt); BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN); BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX); @@ -855,7 +904,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) if (hwe->irq_handler) hwe->irq_handler(hwe, intr_vec); - if (intr_vec & GT_RENDER_USER_INTERRUPT) + if (intr_vec & GT_MI_USER_INTERRUPT) xe_hw_fence_irq_run(hwe->fence_irq); } @@ -1044,12 +1093,13 @@ struct xe_hw_engine * xe_hw_engine_lookup(struct xe_device *xe, struct drm_xe_engine_class_instance eci) { + struct xe_gt *gt = xe_device_get_gt(xe, eci.gt_id); unsigned int idx; if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) return NULL; - if (eci.gt_id >= xe->info.gt_count) + if (!gt) return NULL; idx = array_index_nospec(eci.engine_class, diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 2d68c5b5262a..fa4db5f23342 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -13,15 +13,6 @@ #include "xe_vm.h" static void -hw_engine_group_free(struct drm_device *drm, void *arg) -{ - struct xe_hw_engine_group *group = arg; - - destroy_workqueue(group->resume_wq); - kfree(group); -} - -static void hw_engine_group_resume_lr_jobs_func(struct work_struct *w) { struct xe_exec_queue *q; @@ -53,7 +44,7 @@ hw_engine_group_alloc(struct xe_device *xe) struct xe_hw_engine_group *group; int err; - group = kzalloc(sizeof(*group), GFP_KERNEL); + group = drmm_kzalloc(&xe->drm, sizeof(*group), GFP_KERNEL); if (!group) return ERR_PTR(-ENOMEM); @@ -61,14 +52,14 @@ hw_engine_group_alloc(struct xe_device *xe) if (!group->resume_wq) return ERR_PTR(-ENOMEM); + err = drmm_add_action_or_reset(&xe->drm, __drmm_workqueue_release, group->resume_wq); + if (err) + return ERR_PTR(err); + init_rwsem(&group->mode_sem); INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); INIT_LIST_HEAD(&group->exec_queue_list); - err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); - if (err) - return ERR_PTR(err); - return group; } @@ -84,25 +75,18 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) enum xe_hw_engine_id id; struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; struct xe_device *xe = gt_to_xe(gt); - int err; group_rcs_ccs = hw_engine_group_alloc(xe); - if (IS_ERR(group_rcs_ccs)) { - err = PTR_ERR(group_rcs_ccs); - goto err_group_rcs_ccs; - } + if (IS_ERR(group_rcs_ccs)) + return PTR_ERR(group_rcs_ccs); group_bcs = hw_engine_group_alloc(xe); - if (IS_ERR(group_bcs)) { - err = PTR_ERR(group_bcs); - goto err_group_bcs; - } + if (IS_ERR(group_bcs)) + return PTR_ERR(group_bcs); group_vcs_vecs = hw_engine_group_alloc(xe); - if (IS_ERR(group_vcs_vecs)) { - err = PTR_ERR(group_vcs_vecs); - goto err_group_vcs_vecs; - } + if (IS_ERR(group_vcs_vecs)) + return PTR_ERR(group_vcs_vecs); for_each_hw_engine(hwe, gt, id) { switch (hwe->class) { @@ -119,21 +103,12 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) break; case XE_ENGINE_CLASS_OTHER: break; - default: - drm_warn(&xe->drm, "NOT POSSIBLE"); + case XE_ENGINE_CLASS_MAX: + xe_gt_assert(gt, false); } } return 0; - -err_group_vcs_vecs: - kfree(group_vcs_vecs); -err_group_bcs: - kfree(group_bcs); -err_group_rcs_ccs: - kfree(group_rcs_ccs); - - return err; } /** @@ -238,17 +213,13 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group err = q->ops->suspend_wait(q); if (err) - goto err_suspend; + return err; } if (need_resume) xe_hw_engine_group_resume_faulting_lr_jobs(group); return 0; - -err_suspend: - up_write(&group->mode_sem); - return err; } /** diff --git a/drivers/gpu/drm/xe/xe_hw_error.c b/drivers/gpu/drm/xe/xe_hw_error.c new file mode 100644 index 000000000000..8c65291f36fc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_error.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/fault-inject.h> + +#include "regs/xe_gsc_regs.h" +#include "regs/xe_hw_error_regs.h" +#include "regs/xe_irq_regs.h" + +#include "xe_device.h" +#include "xe_hw_error.h" +#include "xe_mmio.h" +#include "xe_survivability_mode.h" + +#define HEC_UNCORR_FW_ERR_BITS 4 +extern struct fault_attr inject_csc_hw_error; + +/* Error categories reported by hardware */ +enum hardware_error { + HARDWARE_ERROR_CORRECTABLE = 0, + HARDWARE_ERROR_NONFATAL = 1, + HARDWARE_ERROR_FATAL = 2, + HARDWARE_ERROR_MAX, +}; + +static const char * const hec_uncorrected_fw_errors[] = { + "Fatal", + "CSE Disabled", + "FD Corruption", + "Data Corruption" +}; + +static const char *hw_error_to_str(const enum hardware_error hw_err) +{ + switch (hw_err) { + case HARDWARE_ERROR_CORRECTABLE: + return "CORRECTABLE"; + case HARDWARE_ERROR_NONFATAL: + return "NONFATAL"; + case HARDWARE_ERROR_FATAL: + return "FATAL"; + default: + return "UNKNOWN"; + } +} + +static bool fault_inject_csc_hw_error(void) +{ + return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(&inject_csc_hw_error, 1); +} + +static void csc_hw_error_work(struct work_struct *work) +{ + struct xe_tile *tile = container_of(work, typeof(*tile), csc_hw_error_work); + struct xe_device *xe = tile_to_xe(tile); + int ret; + + ret = xe_survivability_mode_runtime_enable(xe); + if (ret) + drm_err(&xe->drm, "Failed to enable runtime survivability mode\n"); +} + +static void csc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err) +{ + const char *hw_err_str = hw_error_to_str(hw_err); + struct xe_device *xe = tile_to_xe(tile); + struct xe_mmio *mmio = &tile->mmio; + u32 base, err_bit, err_src; + unsigned long fw_err; + + if (xe->info.platform != XE_BATTLEMAGE) + return; + + base = BMG_GSC_HECI1_BASE; + lockdep_assert_held(&xe->irq.lock); + err_src = xe_mmio_read32(mmio, HEC_UNCORR_ERR_STATUS(base)); + if (!err_src) { + drm_err_ratelimited(&xe->drm, HW_ERR "Tile%d reported HEC_ERR_STATUS_%s blank\n", + tile->id, hw_err_str); + return; + } + + if (err_src & UNCORR_FW_REPORTED_ERR) { + fw_err = xe_mmio_read32(mmio, HEC_UNCORR_FW_ERR_DW0(base)); + for_each_set_bit(err_bit, &fw_err, HEC_UNCORR_FW_ERR_BITS) { + drm_err_ratelimited(&xe->drm, HW_ERR + "%s: HEC Uncorrected FW %s error reported, bit[%d] is set\n", + hw_err_str, hec_uncorrected_fw_errors[err_bit], + err_bit); + + schedule_work(&tile->csc_hw_error_work); + } + } + + xe_mmio_write32(mmio, HEC_UNCORR_ERR_STATUS(base), err_src); +} + +static void hw_error_source_handler(struct xe_tile *tile, const enum hardware_error hw_err) +{ + const char *hw_err_str = hw_error_to_str(hw_err); + struct xe_device *xe = tile_to_xe(tile); + unsigned long flags; + u32 err_src; + + if (xe->info.platform != XE_BATTLEMAGE) + return; + + spin_lock_irqsave(&xe->irq.lock, flags); + err_src = xe_mmio_read32(&tile->mmio, DEV_ERR_STAT_REG(hw_err)); + if (!err_src) { + drm_err_ratelimited(&xe->drm, HW_ERR "Tile%d reported DEV_ERR_STAT_%s blank!\n", + tile->id, hw_err_str); + goto unlock; + } + + if (err_src & XE_CSC_ERROR) + csc_hw_error_handler(tile, hw_err); + + xe_mmio_write32(&tile->mmio, DEV_ERR_STAT_REG(hw_err), err_src); + +unlock: + spin_unlock_irqrestore(&xe->irq.lock, flags); +} + +/** + * xe_hw_error_irq_handler - irq handling for hw errors + * @tile: tile instance + * @master_ctl: value read from master interrupt register + * + * Xe platforms add three error bits to the master interrupt register to support error handling. + * These three bits are used to convey the class of error FATAL, NONFATAL, or CORRECTABLE. + * To process the interrupt, determine the source of error by reading the Device Error Source + * Register that corresponds to the class of error being serviced. + */ +void xe_hw_error_irq_handler(struct xe_tile *tile, const u32 master_ctl) +{ + enum hardware_error hw_err; + + if (fault_inject_csc_hw_error()) + schedule_work(&tile->csc_hw_error_work); + + for (hw_err = 0; hw_err < HARDWARE_ERROR_MAX; hw_err++) + if (master_ctl & ERROR_IRQ(hw_err)) + hw_error_source_handler(tile, hw_err); +} + +/* + * Process hardware errors during boot + */ +static void process_hw_errors(struct xe_device *xe) +{ + struct xe_tile *tile; + u32 master_ctl; + u8 id; + + for_each_tile(tile, xe, id) { + master_ctl = xe_mmio_read32(&tile->mmio, GFX_MSTR_IRQ); + xe_hw_error_irq_handler(tile, master_ctl); + xe_mmio_write32(&tile->mmio, GFX_MSTR_IRQ, master_ctl); + } +} + +/** + * xe_hw_error_init - Initialize hw errors + * @xe: xe device instance + * + * Initialize and check for errors that occurred during boot + * prior to driver load + */ +void xe_hw_error_init(struct xe_device *xe) +{ + struct xe_tile *tile = xe_device_get_root_tile(xe); + + if (!IS_DGFX(xe) || IS_SRIOV_VF(xe)) + return; + + INIT_WORK(&tile->csc_hw_error_work, csc_hw_error_work); + + process_hw_errors(xe); +} diff --git a/drivers/gpu/drm/xe/xe_hw_error.h b/drivers/gpu/drm/xe/xe_hw_error.h new file mode 100644 index 000000000000..d86e28c5180c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_error.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ +#ifndef XE_HW_ERROR_H_ +#define XE_HW_ERROR_H_ + +#include <linux/types.h> + +struct xe_tile; +struct xe_device; + +void xe_hw_error_irq_handler(struct xe_tile *tile, const u32 master_ctl); +void xe_hw_error_init(struct xe_device *xe); +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index 0b4f12be3692..b2a0c46dfcd4 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -100,6 +100,9 @@ void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq) spin_unlock_irqrestore(&irq->lock, flags); dma_fence_end_signalling(tmp); } + + /* Safe release of the irq->lock used in dma_fence_init. */ + synchronize_rcu(); } void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq) @@ -165,7 +168,7 @@ static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); return dma_fence->error || - !__dma_fence_is_later(dma_fence->seqno, seqno, dma_fence->ops); + !__dma_fence_is_later(dma_fence, dma_fence->seqno, seqno); } static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index eb293aec36a0..97879daeefc1 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -20,6 +20,8 @@ #include "xe_pcode_api.h" #include "xe_sriov.h" #include "xe_pm.h" +#include "xe_vsec.h" +#include "regs/xe_pmt.h" enum xe_hwmon_reg { REG_TEMP, @@ -51,6 +53,22 @@ enum xe_fan_channel { FAN_MAX, }; +/* Attribute index for powerX_xxx_interval sysfs entries */ +enum sensor_attr_power { + SENSOR_INDEX_PSYS_PL1, + SENSOR_INDEX_PKG_PL1, + SENSOR_INDEX_PSYS_PL2, + SENSOR_INDEX_PKG_PL2, +}; + +/* + * For platforms that support mailbox commands for power limits, REG_PKG_POWER_SKU_UNIT is + * not supported and below are SKU units to be used. + */ +#define PWR_UNIT 0x3 +#define ENERGY_UNIT 0xe +#define TIME_UNIT 0xa + /* * SF_* - scale factors for particular quantities according to hwmon spec. */ @@ -60,6 +78,19 @@ enum xe_fan_channel { #define SF_ENERGY 1000000 /* microjoules */ #define SF_TIME 1000 /* milliseconds */ +/* + * PL*_HWMON_ATTR - mapping of hardware power limits to corresponding hwmon power attribute. + */ +#define PL1_HWMON_ATTR hwmon_power_max +#define PL2_HWMON_ATTR hwmon_power_cap + +#define PWR_ATTR_TO_STR(attr) (((attr) == hwmon_power_max) ? "PL1" : "PL2") + +/* + * Timeout for power limit write mailbox command. + */ +#define PL_WRITE_MBX_TIMEOUT_MS (1) + /** * struct xe_hwmon_energy_info - to accumulate energy */ @@ -100,8 +131,87 @@ struct xe_hwmon { struct xe_hwmon_energy_info ei[CHANNEL_MAX]; /** @fi: Fan info for fanN_input */ struct xe_hwmon_fan_info fi[FAN_MAX]; + /** @boot_power_limit_read: is boot power limits read */ + bool boot_power_limit_read; + /** @pl1_on_boot: power limit PL1 on boot */ + u32 pl1_on_boot[CHANNEL_MAX]; + /** @pl2_on_boot: power limit PL2 on boot */ + u32 pl2_on_boot[CHANNEL_MAX]; + }; +static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 attr, int channel, + u32 *uval) +{ + struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); + u32 val0 = 0, val1 = 0; + int ret = 0; + + ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, + (channel == CHANNEL_CARD) ? + READ_PSYSGPU_POWER_LIMIT : + READ_PACKAGE_POWER_LIMIT, + hwmon->boot_power_limit_read ? + READ_PL_FROM_PCODE : READ_PL_FROM_FW), + &val0, &val1); + + if (ret) { + drm_dbg(&hwmon->xe->drm, "read failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n", + channel, val0, val1, ret); + *uval = 0; + return ret; + } + + /* return the value only if limit is enabled */ + if (attr == PL1_HWMON_ATTR) + *uval = (val0 & PWR_LIM_EN) ? val0 : 0; + else if (attr == PL2_HWMON_ATTR) + *uval = (val1 & PWR_LIM_EN) ? val1 : 0; + else if (attr == hwmon_power_label) + *uval = (val0 & PWR_LIM_EN) ? 1 : (val1 & PWR_LIM_EN) ? 1 : 0; + else + *uval = 0; + + return ret; +} + +static int xe_hwmon_pcode_rmw_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel, + u32 clr, u32 set) +{ + struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); + u32 val0 = 0, val1 = 0; + int ret = 0; + + ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, + (channel == CHANNEL_CARD) ? + READ_PSYSGPU_POWER_LIMIT : + READ_PACKAGE_POWER_LIMIT, + hwmon->boot_power_limit_read ? + READ_PL_FROM_PCODE : READ_PL_FROM_FW), + &val0, &val1); + + if (ret) + drm_dbg(&hwmon->xe->drm, "read failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n", + channel, val0, val1, ret); + + if (attr == PL1_HWMON_ATTR) + val0 = (val0 & ~clr) | set; + else if (attr == PL2_HWMON_ATTR) + val1 = (val1 & ~clr) | set; + else + return -EIO; + + ret = xe_pcode_write64_timeout(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, + (channel == CHANNEL_CARD) ? + WRITE_PSYSGPU_POWER_LIMIT : + WRITE_PACKAGE_POWER_LIMIT, 0), + val0, val1, PL_WRITE_MBX_TIMEOUT_MS); + if (ret) + drm_dbg(&hwmon->xe->drm, "write failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n", + channel, val0, val1, ret); + return ret; +} + static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, int channel) { @@ -122,29 +232,19 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg } break; case REG_PKG_RAPL_LIMIT: - if (xe->info.platform == XE_BATTLEMAGE) { - if (channel == CHANNEL_PKG) - return BMG_PACKAGE_RAPL_LIMIT; - else - return BMG_PLATFORM_POWER_LIMIT; - } else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) { + if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) return PVC_GT0_PACKAGE_RAPL_LIMIT; - } else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) { + else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) return PCU_CR_PACKAGE_RAPL_LIMIT; - } break; case REG_PKG_POWER_SKU: - if (xe->info.platform == XE_BATTLEMAGE) - return BMG_PACKAGE_POWER_SKU; - else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) + if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) return PVC_GT0_PACKAGE_POWER_SKU; else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) return PCU_CR_PACKAGE_POWER_SKU; break; case REG_PKG_POWER_SKU_UNIT: - if (xe->info.platform == XE_BATTLEMAGE) - return BMG_PACKAGE_POWER_SKU_UNIT; - else if (xe->info.platform == XE_PVC) + if (xe->info.platform == XE_PVC) return PVC_GT0_PACKAGE_POWER_SKU_UNIT; else if (xe->info.platform == XE_DG2) return PCU_CR_PACKAGE_POWER_SKU_UNIT; @@ -154,12 +254,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg return GT_PERF_STATUS; break; case REG_PKG_ENERGY_STATUS: - if (xe->info.platform == XE_BATTLEMAGE) { - if (channel == CHANNEL_PKG) - return BMG_PACKAGE_ENERGY_STATUS; - else - return BMG_PLATFORM_ENERGY_STATUS; - } else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) { + if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) { return PVC_GT0_PLATFORM_ENERGY_STATUS; } else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) { return PCU_CR_PACKAGE_ENERGY_STATUS; @@ -181,7 +276,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg return XE_REG(0); } -#define PL1_DISABLE 0 +#define PL_DISABLE 0 /* * HW allows arbitrary PL1 limits to be set but silently clamps these values to @@ -189,94 +284,145 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg * same pattern for sysfs, allow arbitrary PL1 limits to be set but display * clamped values when read. */ -static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *value) +static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *value) { - u64 reg_val, min, max; + u32 reg_val = 0; struct xe_device *xe = hwmon->xe; struct xe_reg rapl_limit, pkg_power_sku; struct xe_mmio *mmio = xe_root_tile_mmio(xe); - rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); - pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); + mutex_lock(&hwmon->hwmon_lock); - /* - * Valid check of REG_PKG_RAPL_LIMIT is already done in xe_hwmon_power_is_visible. - * So not checking it again here. - */ - if (!xe_reg_is_valid(pkg_power_sku)) { - drm_warn(&xe->drm, "pkg_power_sku invalid\n"); - *value = 0; - return; + if (hwmon->xe->info.has_mbx_power_limits) { + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, ®_val); + } else { + rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); + pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); + reg_val = xe_mmio_read32(mmio, rapl_limit); } - mutex_lock(&hwmon->hwmon_lock); - - reg_val = xe_mmio_read32(mmio, rapl_limit); - /* Check if PL1 limit is disabled */ - if (!(reg_val & PKG_PWR_LIM_1_EN)) { - *value = PL1_DISABLE; + /* Check if PL limits are disabled. */ + if (!(reg_val & PWR_LIM_EN)) { + *value = PL_DISABLE; + drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%08x\n", + PWR_ATTR_TO_STR(attr), channel, reg_val); goto unlock; } - reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val); - *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); + reg_val = REG_FIELD_GET(PWR_LIM_VAL, reg_val); + *value = mul_u32_u32(reg_val, SF_POWER) >> hwmon->scl_shift_power; - reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku); - min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); - min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); - max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); - max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); + /* For platforms with mailbox power limit support clamping would be done by pcode. */ + if (!hwmon->xe->info.has_mbx_power_limits) { + u64 pkg_pwr, min, max; - if (min && max) - *value = clamp_t(u64, *value, min, max); + pkg_pwr = xe_mmio_read64_2x32(mmio, pkg_power_sku); + min = REG_FIELD_GET(PKG_MIN_PWR, pkg_pwr); + max = REG_FIELD_GET(PKG_MAX_PWR, pkg_pwr); + min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); + max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); + if (min && max) + *value = clamp_t(u64, *value, min, max); + } unlock: mutex_unlock(&hwmon->hwmon_lock); } -static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value) +static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channel, long value) { struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); int ret = 0; - u64 reg_val; + u32 reg_val, max; struct xe_reg rapl_limit; + u64 max_supp_power_limit = 0; + + mutex_lock(&hwmon->hwmon_lock); rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); - mutex_lock(&hwmon->hwmon_lock); + /* Disable Power Limit and verify, as limit cannot be disabled on all platforms. */ + if (value == PL_DISABLE) { + if (hwmon->xe->info.has_mbx_power_limits) { + drm_dbg(&hwmon->xe->drm, "disabling %s on channel %d\n", + PWR_ATTR_TO_STR(attr), channel); + xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM_EN, 0); + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, ®_val); + } else { + reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN, 0); + reg_val = xe_mmio_read32(mmio, rapl_limit); + } - /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */ - if (value == PL1_DISABLE) { - reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN, 0); - reg_val = xe_mmio_read32(mmio, rapl_limit); - if (reg_val & PKG_PWR_LIM_1_EN) { - drm_warn(&hwmon->xe->drm, "PL1 disable is not supported!\n"); + if (reg_val & PWR_LIM_EN) { + drm_warn(&hwmon->xe->drm, "Power limit disable is not supported!\n"); ret = -EOPNOTSUPP; } goto unlock; } + /* + * If the sysfs value exceeds the maximum pcode supported power limit value, clamp it to + * the supported maximum (U12.3 format). + * This is to avoid truncation during reg_val calculation below and ensure the valid + * power limit is sent for pcode which would clamp it to card-supported value. + */ + max_supp_power_limit = ((PWR_LIM_VAL) >> hwmon->scl_shift_power) * SF_POWER; + if (value > max_supp_power_limit) { + value = max_supp_power_limit; + drm_info(&hwmon->xe->drm, + "Power limit clamped as selected %s exceeds channel %d limit\n", + PWR_ATTR_TO_STR(attr), channel); + } + /* Computation in 64-bits to avoid overflow. Round to nearest. */ reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); - reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val); - reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val); + /* + * Clamp power limit to GPU firmware default as maximum, as an additional protection to + * pcode clamp. + */ + if (hwmon->xe->info.has_mbx_power_limits) { + max = (attr == PL1_HWMON_ATTR) ? + hwmon->pl1_on_boot[channel] : hwmon->pl2_on_boot[channel]; + max = REG_FIELD_PREP(PWR_LIM_VAL, max); + if (reg_val > max) { + reg_val = max; + drm_dbg(&hwmon->xe->drm, + "Clamping power limit to GPU firmware default 0x%x\n", + reg_val); + } + } + + reg_val = PWR_LIM_EN | REG_FIELD_PREP(PWR_LIM_VAL, reg_val); + + if (hwmon->xe->info.has_mbx_power_limits) + ret = xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM, reg_val); + else + reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM, reg_val); unlock: mutex_unlock(&hwmon->hwmon_lock); return ret; } -static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value) +static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, + long *value) { struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); - struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); - u64 reg_val; + u32 reg_val; + + if (hwmon->xe->info.has_mbx_power_limits) { + /* PL1 is rated max if supported. */ + xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, channel, ®_val); + } else { + /* + * This sysfs file won't be visible if REG_PKG_POWER_SKU is invalid, so valid check + * for this register can be skipped. + * See xe_hwmon_power_is_visible. + */ + struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); + + reg_val = xe_mmio_read32(mmio, reg); + } - /* - * This sysfs file won't be visible if REG_PKG_POWER_SKU is invalid, so valid check - * for this register can be skipped. - * See xe_hwmon_power_is_visible. - */ - reg_val = xe_mmio_read32(mmio, reg); reg_val = REG_FIELD_GET(PKG_TDP, reg_val); *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); } @@ -306,16 +452,37 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy) { struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); struct xe_hwmon_energy_info *ei = &hwmon->ei[channel]; - u64 reg_val; + u32 reg_val; + int ret = 0; - reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, - channel)); + /* Energy is supported only for card and pkg */ + if (channel > CHANNEL_PKG) { + *energy = 0; + return; + } - if (reg_val >= ei->reg_val_prev) - ei->accum_energy += reg_val - ei->reg_val_prev; - else - ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val; + if (hwmon->xe->info.platform == XE_BATTLEMAGE) { + u64 pmt_val; + ret = xe_pmt_telem_read(to_pci_dev(hwmon->xe->drm.dev), + xe_mmio_read32(mmio, PUNIT_TELEMETRY_GUID), + &pmt_val, BMG_ENERGY_STATUS_PMT_OFFSET, sizeof(pmt_val)); + if (ret != sizeof(pmt_val)) { + drm_warn(&hwmon->xe->drm, "energy read from pmt failed, ret %d\n", ret); + *energy = 0; + return; + } + + if (channel == CHANNEL_PKG) + reg_val = REG_FIELD_GET64(ENERGY_PKG, pmt_val); + else + reg_val = REG_FIELD_GET64(ENERGY_CARD, pmt_val); + } else { + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, + channel)); + } + + ei->accum_energy += reg_val - ei->reg_val_prev; ei->reg_val_prev = reg_val; *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, @@ -328,25 +495,39 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at { struct xe_hwmon *hwmon = dev_get_drvdata(dev); struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); - u32 x, y, x_w = 2; /* 2 bits */ - u64 r, tau4, out; - int sensor_index = to_sensor_dev_attr(attr)->index; + u32 reg_val, x, y, x_w = 2; /* 2 bits */ + u64 tau4, out; + int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD; + u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; + + int ret = 0; xe_pm_runtime_get(hwmon->xe); mutex_lock(&hwmon->hwmon_lock); - r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index)); + if (hwmon->xe->info.has_mbx_power_limits) { + ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, ®_val); + if (ret) { + drm_err(&hwmon->xe->drm, + "power interval read fail, ch %d, attr %d, val 0x%08x, ret %d\n", + channel, power_attr, reg_val, ret); + reg_val = 0; + } + } else { + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, + channel)); + } mutex_unlock(&hwmon->hwmon_lock); xe_pm_runtime_put(hwmon->xe); - x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r); - y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r); + x = REG_FIELD_GET(PWR_LIM_TIME_X, reg_val); + y = REG_FIELD_GET(PWR_LIM_TIME_Y, reg_val); /* - * tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17) + * tau = (1 + (x / 4)) * power(2,y), x = bits(23:22), y = bits(21:17) * = (4 | x) << (y - 2) * * Here (y - 2) ensures a 1.x fixed point representation of 1.x @@ -372,15 +553,16 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a u32 x, y, rxy, x_w = 2; /* 2 bits */ u64 tau4, r, max_win; unsigned long val; + int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD; + u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; int ret; - int sensor_index = to_sensor_dev_attr(attr)->index; ret = kstrtoul(buf, 0, &val); if (ret) return ret; /* - * Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12. + * Max HW supported tau in '(1 + (x / 4)) * power(2,y)' format, x = 0, y = 0x12. * The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds. * * The ideal scenario is for PKG_MAX_WIN to be read from the PKG_PWR_SKU register. @@ -404,7 +586,7 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a return -EINVAL; /* val in hw units */ - val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME); + val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME) + 1; /* * Convert val to 1.x * power(2,y) @@ -419,14 +601,18 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a x = (val - (1ul << y)) << x_w >> y; } - rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y); + rxy = REG_FIELD_PREP(PWR_LIM_TIME_X, x) | + REG_FIELD_PREP(PWR_LIM_TIME_Y, y); xe_pm_runtime_get(hwmon->xe); mutex_lock(&hwmon->hwmon_lock); - r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index), - PKG_PWR_LIM_1_TIME, rxy); + if (hwmon->xe->info.has_mbx_power_limits) + xe_hwmon_pcode_rmw_power_limit(hwmon, power_attr, channel, PWR_LIM_TIME, rxy); + else + r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel), + PWR_LIM_TIME, rxy); mutex_unlock(&hwmon->hwmon_lock); @@ -435,17 +621,28 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a return count; } +/* PSYS PL1 */ static SENSOR_DEVICE_ATTR(power1_max_interval, 0664, xe_hwmon_power_max_interval_show, - xe_hwmon_power_max_interval_store, CHANNEL_CARD); - + xe_hwmon_power_max_interval_store, SENSOR_INDEX_PSYS_PL1); +/* PKG PL1 */ static SENSOR_DEVICE_ATTR(power2_max_interval, 0664, xe_hwmon_power_max_interval_show, - xe_hwmon_power_max_interval_store, CHANNEL_PKG); + xe_hwmon_power_max_interval_store, SENSOR_INDEX_PKG_PL1); +/* PSYS PL2 */ +static SENSOR_DEVICE_ATTR(power1_cap_interval, 0664, + xe_hwmon_power_max_interval_show, + xe_hwmon_power_max_interval_store, SENSOR_INDEX_PSYS_PL2); +/* PKG PL2 */ +static SENSOR_DEVICE_ATTR(power2_cap_interval, 0664, + xe_hwmon_power_max_interval_show, + xe_hwmon_power_max_interval_store, SENSOR_INDEX_PKG_PL2); static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_power1_max_interval.dev_attr.attr, &sensor_dev_attr_power2_max_interval.dev_attr.attr, + &sensor_dev_attr_power1_cap_interval.dev_attr.attr, + &sensor_dev_attr_power2_cap_interval.dev_attr.attr, NULL }; @@ -455,12 +652,20 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret = 0; + int channel = (index % 2) ? CHANNEL_PKG : CHANNEL_CARD; + u32 power_attr = (index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; + u32 uval = 0; + struct xe_reg rapl_limit; + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); - xe_pm_runtime_get(hwmon->xe); - - ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index)) ? attr->mode : 0; - - xe_pm_runtime_put(hwmon->xe); + if (hwmon->xe->info.has_mbx_power_limits) { + xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, &uval); + } else if (power_attr != PL2_HWMON_ATTR) { + rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); + if (xe_reg_is_valid(rapl_limit)) + uval = xe_mmio_read32(mmio, rapl_limit); + } + ret = (uval & PWR_LIM_EN) ? attr->mode : 0; return ret; } @@ -478,8 +683,9 @@ static const struct attribute_group *hwmon_groups[] = { static const struct hwmon_channel_info * const hwmon_info[] = { HWMON_CHANNEL_INFO(temp, HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL), - HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL, - HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT | HWMON_P_LABEL), + HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CRIT | + HWMON_P_CAP, + HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CAP), HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL), HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL), HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL), @@ -527,7 +733,7 @@ static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, long *value, u32 scale_factor) { int ret; - u32 uval; + u32 uval = 0; mutex_lock(&hwmon->hwmon_lock); @@ -547,9 +753,23 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel, { int ret; u32 uval; + u64 max_crit_power_curr = 0; mutex_lock(&hwmon->hwmon_lock); + /* + * If the sysfs value exceeds the pcode mailbox cmd POWER_SETUP_SUBCOMMAND_WRITE_I1 + * max supported value, clamp it to the command's max (U10.6 format). + * This is to avoid truncation during uval calculation below and ensure the valid power + * limit is sent for pcode which would clamp it to card-supported value. + */ + max_crit_power_curr = (POWER_SETUP_I1_DATA_MASK >> POWER_SETUP_I1_SHIFT) * scale_factor; + if (value > max_crit_power_curr) { + value = max_crit_power_curr; + drm_info(&hwmon->xe->drm, + "Power limit clamped as selected exceeds channel %d limit\n", + channel); + } uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor); ret = xe_hwmon_pcode_write_i1(hwmon, uval); @@ -600,23 +820,62 @@ xe_hwmon_temp_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) static umode_t xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) { - u32 uval; + u32 uval = 0; + struct xe_reg reg; + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); switch (attr) { case hwmon_power_max: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, - channel)) ? 0664 : 0; + case hwmon_power_cap: + if (hwmon->xe->info.has_mbx_power_limits) { + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &uval); + } else if (attr != PL2_HWMON_ATTR) { + reg = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); + if (xe_reg_is_valid(reg)) + uval = xe_mmio_read32(mmio, reg); + } + if (uval & PWR_LIM_EN) { + drm_info(&hwmon->xe->drm, "%s is supported on channel %d\n", + PWR_ATTR_TO_STR(attr), channel); + return 0664; + } + drm_dbg(&hwmon->xe->drm, "%s is unsupported on channel %d\n", + PWR_ATTR_TO_STR(attr), channel); + return 0; case hwmon_power_rated_max: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, - channel)) ? 0444 : 0; + if (hwmon->xe->info.has_mbx_power_limits) { + return 0; + } else { + reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); + if (xe_reg_is_valid(reg)) + uval = xe_mmio_read32(mmio, reg); + return uval ? 0444 : 0; + } case hwmon_power_crit: - if (channel == CHANNEL_PKG) - return (xe_hwmon_pcode_read_i1(hwmon, &uval) || - !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + if (channel == CHANNEL_CARD) { + xe_hwmon_pcode_read_i1(hwmon, &uval); + return (uval & POWER_SETUP_I1_WATTS) ? 0644 : 0; + } break; case hwmon_power_label: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, - channel)) ? 0444 : 0; + if (hwmon->xe->info.has_mbx_power_limits) { + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &uval); + } else { + reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); + if (xe_reg_is_valid(reg)) + uval = xe_mmio_read32(mmio, reg); + + if (!uval) { + reg = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); + if (xe_reg_is_valid(reg)) + uval = xe_mmio_read32(mmio, reg); + } + } + if ((!(uval & PWR_LIM_EN)) && channel == CHANNEL_CARD) { + xe_hwmon_pcode_read_i1(hwmon, &uval); + return (uval & POWER_SETUP_I1_WATTS) ? 0444 : 0; + } + return (uval) ? 0444 : 0; default: return 0; } @@ -628,10 +887,11 @@ xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) { switch (attr) { case hwmon_power_max: - xe_hwmon_power_max_read(hwmon, channel, val); + case hwmon_power_cap: + xe_hwmon_power_max_read(hwmon, attr, channel, val); return 0; case hwmon_power_rated_max: - xe_hwmon_power_rated_max_read(hwmon, channel, val); + xe_hwmon_power_rated_max_read(hwmon, attr, channel, val); return 0; case hwmon_power_crit: return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_POWER); @@ -644,8 +904,9 @@ static int xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) { switch (attr) { + case hwmon_power_cap: case hwmon_power_max: - return xe_hwmon_power_max_write(hwmon, channel, val); + return xe_hwmon_power_max_write(hwmon, attr, channel, val); case hwmon_power_crit: return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_POWER); default: @@ -656,7 +917,7 @@ xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) static umode_t xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel) { - u32 uval; + u32 uval = 0; /* hwmon sysfs attribute of current available only for package */ if (channel != CHANNEL_PKG) @@ -726,11 +987,18 @@ xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) static umode_t xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) { + long energy = 0; + switch (attr) { case hwmon_energy_input: case hwmon_energy_label: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, - channel)) ? 0444 : 0; + if (hwmon->xe->info.platform == XE_BATTLEMAGE) { + xe_hwmon_energy_get(hwmon, channel, &energy); + return energy ? 0444 : 0; + } else { + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, + channel)) ? 0444 : 0; + } default: return 0; } @@ -751,7 +1019,7 @@ xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) static umode_t xe_hwmon_fan_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) { - u32 uval; + u32 uval = 0; if (!hwmon->xe->info.has_fan_control) return 0; @@ -824,8 +1092,6 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata; int ret; - xe_pm_runtime_get(hwmon->xe); - switch (type) { case hwmon_temp: ret = xe_hwmon_temp_is_visible(hwmon, attr, channel); @@ -850,8 +1116,6 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, break; } - xe_pm_runtime_put(hwmon->xe); - return ret; } @@ -965,18 +1229,52 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) int channel; struct xe_reg pkg_power_sku_unit; - /* - * The contents of register PKG_POWER_SKU_UNIT do not change, - * so read it once and store the shift values. - */ - pkg_power_sku_unit = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0); - if (xe_reg_is_valid(pkg_power_sku_unit)) { - val_sku_unit = xe_mmio_read32(mmio, pkg_power_sku_unit); - hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); - hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); - hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit); + if (hwmon->xe->info.has_mbx_power_limits) { + /* Check if GPU firmware support mailbox power limits commands. */ + if (xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_CARD, + &hwmon->pl1_on_boot[CHANNEL_CARD]) | + xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_PKG, + &hwmon->pl1_on_boot[CHANNEL_PKG]) | + xe_hwmon_pcode_read_power_limit(hwmon, PL2_HWMON_ATTR, CHANNEL_CARD, + &hwmon->pl2_on_boot[CHANNEL_CARD]) | + xe_hwmon_pcode_read_power_limit(hwmon, PL2_HWMON_ATTR, CHANNEL_PKG, + &hwmon->pl2_on_boot[CHANNEL_PKG])) { + drm_warn(&hwmon->xe->drm, + "Failed to read power limits, check GPU firmware !\n"); + } else { + drm_info(&hwmon->xe->drm, "Using mailbox commands for power limits\n"); + /* Write default limits to read from pcode from now on. */ + xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, + CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME, + hwmon->pl1_on_boot[CHANNEL_CARD]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, + CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME, + hwmon->pl1_on_boot[CHANNEL_PKG]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL2_HWMON_ATTR, + CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME, + hwmon->pl2_on_boot[CHANNEL_CARD]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL2_HWMON_ATTR, + CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME, + hwmon->pl2_on_boot[CHANNEL_PKG]); + hwmon->scl_shift_power = PWR_UNIT; + hwmon->scl_shift_energy = ENERGY_UNIT; + hwmon->scl_shift_time = TIME_UNIT; + hwmon->boot_power_limit_read = true; + } + } else { + drm_info(&hwmon->xe->drm, "Using register for power limits\n"); + /* + * The contents of register PKG_POWER_SKU_UNIT do not change, + * so read it once and store the shift values. + */ + pkg_power_sku_unit = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0); + if (xe_reg_is_valid(pkg_power_sku_unit)) { + val_sku_unit = xe_mmio_read32(mmio, pkg_power_sku_unit); + hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); + hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); + hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit); + } } - /* * Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the * first value of the energy register read @@ -991,13 +1289,6 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) xe_hwmon_fan_input_read(hwmon, channel, &fan_speed); } -static void xe_hwmon_mutex_destroy(void *arg) -{ - struct xe_hwmon *hwmon = arg; - - mutex_destroy(&hwmon->hwmon_lock); -} - int xe_hwmon_register(struct xe_device *xe) { struct device *dev = xe->drm.dev; @@ -1016,8 +1307,7 @@ int xe_hwmon_register(struct xe_device *xe) if (!hwmon) return -ENOMEM; - mutex_init(&hwmon->hwmon_lock); - ret = devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon); + ret = devm_mutex_init(dev, &hwmon->hwmon_lock); if (ret) return ret; @@ -1041,4 +1331,4 @@ int xe_hwmon_register(struct xe_device *xe) return 0; } - +MODULE_IMPORT_NS("INTEL_PMT_TELEMETRY"); diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c new file mode 100644 index 000000000000..0b5452be0c87 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -0,0 +1,372 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Intel Xe I2C attached Microcontroller Units (MCU) + * + * Copyright (C) 2025 Intel Corporation. + */ + +#include <linux/array_size.h> +#include <linux/container_of.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/i2c.h> +#include <linux/ioport.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/notifier.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include <linux/property.h> +#include <linux/regmap.h> +#include <linux/sprintf.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +#include "regs/xe_i2c_regs.h" +#include "regs/xe_irq_regs.h" + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_i2c.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" + +/** + * DOC: Xe I2C devices + * + * Register a platform device for the I2C host controller (Synpsys DesignWare + * I2C) if the registers of that controller are mapped to the MMIO, and also the + * I2C client device for the Add-In Management Controller (the MCU) attached to + * the host controller. + * + * See drivers/i2c/busses/i2c-designware-* for more information on the I2C host + * controller. + */ + +static const char adapter_name[] = "i2c_designware"; + +static const struct property_entry xe_i2c_adapter_properties[] = { + PROPERTY_ENTRY_STRING("compatible", "intel,xe-i2c"), + PROPERTY_ENTRY_U32("clock-frequency", I2C_MAX_FAST_MODE_PLUS_FREQ), + { } +}; + +static inline void xe_i2c_read_endpoint(struct xe_mmio *mmio, void *ep) +{ + u32 *val = ep; + + val[0] = xe_mmio_read32(mmio, REG_SG_REMAP_ADDR_PREFIX); + val[1] = xe_mmio_read32(mmio, REG_SG_REMAP_ADDR_POSTFIX); +} + +static void xe_i2c_client_work(struct work_struct *work) +{ + struct xe_i2c *i2c = container_of(work, struct xe_i2c, work); + struct i2c_board_info info = { + .type = "amc", + .flags = I2C_CLIENT_HOST_NOTIFY, + .addr = i2c->ep.addr[1], + }; + + i2c->client[0] = i2c_new_client_device(i2c->adapter, &info); +} + +static int xe_i2c_notifier(struct notifier_block *nb, unsigned long action, void *data) +{ + struct xe_i2c *i2c = container_of(nb, struct xe_i2c, bus_notifier); + struct i2c_adapter *adapter = i2c_verify_adapter(data); + struct device *dev = data; + + if (action == BUS_NOTIFY_ADD_DEVICE && + adapter && dev->parent == &i2c->pdev->dev) { + i2c->adapter = adapter; + schedule_work(&i2c->work); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static int xe_i2c_register_adapter(struct xe_i2c *i2c) +{ + struct pci_dev *pci = to_pci_dev(i2c->drm_dev); + struct platform_device *pdev; + struct fwnode_handle *fwnode; + int ret; + + fwnode = fwnode_create_software_node(xe_i2c_adapter_properties, NULL); + if (IS_ERR(fwnode)) + return PTR_ERR(fwnode); + + /* + * Not using platform_device_register_full() here because we don't have + * a handle to the platform_device before it returns. xe_i2c_notifier() + * uses that handle, but it may be called before + * platform_device_register_full() is done. + */ + pdev = platform_device_alloc(adapter_name, pci_dev_id(pci)); + if (!pdev) { + ret = -ENOMEM; + goto err_fwnode_remove; + } + + if (i2c->adapter_irq) { + struct resource res; + + res = DEFINE_RES_IRQ_NAMED(i2c->adapter_irq, "xe_i2c"); + + ret = platform_device_add_resources(pdev, &res, 1); + if (ret) + goto err_pdev_put; + } + + pdev->dev.parent = i2c->drm_dev; + pdev->dev.fwnode = fwnode; + i2c->adapter_node = fwnode; + i2c->pdev = pdev; + + ret = platform_device_add(pdev); + if (ret) + goto err_pdev_put; + + return 0; + +err_pdev_put: + platform_device_put(pdev); +err_fwnode_remove: + fwnode_remove_software_node(fwnode); + + return ret; +} + +static void xe_i2c_unregister_adapter(struct xe_i2c *i2c) +{ + platform_device_unregister(i2c->pdev); + fwnode_remove_software_node(i2c->adapter_node); +} + +/** + * xe_i2c_present - I2C controller is present and functional + * @xe: xe device instance + * + * Check whether the I2C controller is present and functioning with valid + * endpoint cookie. + * + * Return: %true if present, %false otherwise. + */ +bool xe_i2c_present(struct xe_device *xe) +{ + return xe->i2c && xe->i2c->ep.cookie == XE_I2C_EP_COOKIE_DEVICE; +} + +static bool xe_i2c_irq_present(struct xe_device *xe) +{ + return xe->i2c && xe->i2c->adapter_irq; +} + +/** + * xe_i2c_irq_handler: Handler for I2C interrupts + * @xe: xe device instance + * @master_ctl: interrupt register + * + * Forward interrupts generated by the I2C host adapter to the I2C host adapter + * driver. + */ +void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) +{ + if (!xe_i2c_irq_present(xe)) + return; + + if (master_ctl & I2C_IRQ) + generic_handle_irq_safe(xe->i2c->adapter_irq); +} + +void xe_i2c_irq_reset(struct xe_device *xe) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!xe_i2c_irq_present(xe)) + return; + + xe_mmio_rmw32(mmio, I2C_BRIDGE_PCICFGCTL, ACPI_INTR_EN, 0); +} + +void xe_i2c_irq_postinstall(struct xe_device *xe) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!xe_i2c_irq_present(xe)) + return; + + xe_mmio_rmw32(mmio, I2C_BRIDGE_PCICFGCTL, 0, ACPI_INTR_EN); +} + +static int xe_i2c_irq_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw_irq_num) +{ + irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); + return 0; +} + +static const struct irq_domain_ops xe_i2c_irq_ops = { + .map = xe_i2c_irq_map, +}; + +static int xe_i2c_create_irq(struct xe_i2c *i2c) +{ + struct irq_domain *domain; + + if (!(i2c->ep.capabilities & XE_I2C_EP_CAP_IRQ)) + return 0; + + domain = irq_domain_create_linear(dev_fwnode(i2c->drm_dev), 1, &xe_i2c_irq_ops, NULL); + if (!domain) + return -ENOMEM; + + i2c->adapter_irq = irq_create_mapping(domain, 0); + i2c->irqdomain = domain; + + return 0; +} + +static void xe_i2c_remove_irq(struct xe_i2c *i2c) +{ + if (!i2c->irqdomain) + return; + + irq_dispose_mapping(i2c->adapter_irq); + irq_domain_remove(i2c->irqdomain); +} + +static int xe_i2c_read(void *context, unsigned int reg, unsigned int *val) +{ + struct xe_i2c *i2c = context; + + *val = xe_mmio_read32(i2c->mmio, XE_REG(reg + I2C_MEM_SPACE_OFFSET)); + + return 0; +} + +static int xe_i2c_write(void *context, unsigned int reg, unsigned int val) +{ + struct xe_i2c *i2c = context; + + xe_mmio_write32(i2c->mmio, XE_REG(reg + I2C_MEM_SPACE_OFFSET), val); + + return 0; +} + +static const struct regmap_config i2c_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_read = xe_i2c_read, + .reg_write = xe_i2c_write, + .fast_io = true, +}; + +void xe_i2c_pm_suspend(struct xe_device *xe) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!xe_i2c_present(xe)) + return; + + xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D3hot); + drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR)); +} + +void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!xe_i2c_present(xe)) + return; + + if (d3cold) + xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + + xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D0); + drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR)); +} + +static void xe_i2c_remove(void *data) +{ + struct xe_i2c *i2c = data; + unsigned int i; + + for (i = 0; i < XE_I2C_MAX_CLIENTS; i++) + i2c_unregister_device(i2c->client[i]); + + bus_unregister_notifier(&i2c_bus_type, &i2c->bus_notifier); + xe_i2c_unregister_adapter(i2c); + xe_i2c_remove_irq(i2c); +} + +/** + * xe_i2c_probe: Probe the I2C host adapter and the I2C clients attached to it + * @xe: xe device instance + * + * Register all the I2C devices described in the I2C Endpoint data structure. + * + * Return: 0 on success, error code on failure + */ +int xe_i2c_probe(struct xe_device *xe) +{ + struct device *drm_dev = xe->drm.dev; + struct xe_i2c_endpoint ep; + struct regmap *regmap; + struct xe_i2c *i2c; + int ret; + + if (xe->info.platform != XE_BATTLEMAGE) + return 0; + + if (IS_SRIOV_VF(xe)) + return 0; + + xe_i2c_read_endpoint(xe_root_tile_mmio(xe), &ep); + if (ep.cookie != XE_I2C_EP_COOKIE_DEVICE) + return 0; + + i2c = devm_kzalloc(drm_dev, sizeof(*i2c), GFP_KERNEL); + if (!i2c) + return -ENOMEM; + + INIT_WORK(&i2c->work, xe_i2c_client_work); + i2c->mmio = xe_root_tile_mmio(xe); + i2c->drm_dev = drm_dev; + i2c->ep = ep; + xe->i2c = i2c; + + /* PCI PM isn't aware of this device, bring it up and match it with SGUnit state. */ + xe_i2c_pm_resume(xe, true); + + regmap = devm_regmap_init(drm_dev, NULL, i2c, &i2c_regmap_config); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + i2c->bus_notifier.notifier_call = xe_i2c_notifier; + ret = bus_register_notifier(&i2c_bus_type, &i2c->bus_notifier); + if (ret) + return ret; + + ret = xe_i2c_create_irq(i2c); + if (ret) + goto err_unregister_notifier; + + ret = xe_i2c_register_adapter(i2c); + if (ret) + goto err_remove_irq; + + xe_i2c_irq_postinstall(xe); + return devm_add_action_or_reset(drm_dev, xe_i2c_remove, i2c); + +err_remove_irq: + xe_i2c_remove_irq(i2c); + +err_unregister_notifier: + bus_unregister_notifier(&i2c_bus_type, &i2c->bus_notifier); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_i2c.h b/drivers/gpu/drm/xe/xe_i2c.h new file mode 100644 index 000000000000..425d8160835f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_i2c.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _XE_I2C_H_ +#define _XE_I2C_H_ + +#include <linux/bits.h> +#include <linux/notifier.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +struct device; +struct fwnode_handle; +struct i2c_adapter; +struct i2c_client; +struct irq_domain; +struct platform_device; +struct xe_device; +struct xe_mmio; + +#define XE_I2C_MAX_CLIENTS 3 + +#define XE_I2C_EP_COOKIE_DEVICE 0xde + +/* Endpoint Capabilities */ +#define XE_I2C_EP_CAP_IRQ BIT(0) + +struct xe_i2c_endpoint { + u8 cookie; + u8 capabilities; + u16 addr[XE_I2C_MAX_CLIENTS]; +}; + +struct xe_i2c { + struct fwnode_handle *adapter_node; + struct platform_device *pdev; + struct i2c_adapter *adapter; + struct i2c_client *client[XE_I2C_MAX_CLIENTS]; + + struct notifier_block bus_notifier; + struct work_struct work; + + struct irq_domain *irqdomain; + int adapter_irq; + + struct xe_i2c_endpoint ep; + struct device *drm_dev; + + struct xe_mmio *mmio; +}; + +#if IS_ENABLED(CONFIG_I2C) +int xe_i2c_probe(struct xe_device *xe); +bool xe_i2c_present(struct xe_device *xe); +void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl); +void xe_i2c_irq_postinstall(struct xe_device *xe); +void xe_i2c_irq_reset(struct xe_device *xe); +void xe_i2c_pm_suspend(struct xe_device *xe); +void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold); +#else +static inline int xe_i2c_probe(struct xe_device *xe) { return 0; } +static inline bool xe_i2c_present(struct xe_device *xe) { return false; } +static inline void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) { } +static inline void xe_i2c_irq_postinstall(struct xe_device *xe) { } +static inline void xe_i2c_irq_reset(struct xe_device *xe) { } +static inline void xe_i2c_pm_suspend(struct xe_device *xe) { } +static inline void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) { } +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 5362d3174b06..024e13e606ec 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -18,10 +18,13 @@ #include "xe_gt.h" #include "xe_guc.h" #include "xe_hw_engine.h" +#include "xe_hw_error.h" +#include "xe_i2c.h" #include "xe_memirq.h" #include "xe_mmio.h" #include "xe_pxp.h" #include "xe_sriov.h" +#include "xe_tile.h" /* * Interrupt registers for a unit are always consecutive and ordered @@ -136,68 +139,112 @@ void xe_irq_enable_hwe(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); struct xe_mmio *mmio = >->mmio; - u32 ccs_mask, bcs_mask; - u32 irqs, dmask, smask; - u32 gsc_mask = 0; - u32 heci_mask = 0; + u32 common_mask, val, gsc_mask = 0, heci_mask = 0, + rcs_mask = 0, bcs_mask = 0, vcs_mask = 0, vecs_mask = 0, + ccs_mask = 0; if (xe_device_uses_memirq(xe)) return; if (xe_device_uc_enabled(xe)) { - irqs = GT_RENDER_USER_INTERRUPT | - GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; + common_mask = GT_MI_USER_INTERRUPT | + GT_FLUSH_COMPLETE_INTERRUPT; + + /* Enable Compute Walker Interrupt for non-MSIX platforms */ + if (GRAPHICS_VERx100(xe) >= 3511 && !xe_device_has_msix(xe)) { + rcs_mask |= GT_COMPUTE_WALKER_INTERRUPT; + ccs_mask |= GT_COMPUTE_WALKER_INTERRUPT; + } } else { - irqs = GT_RENDER_USER_INTERRUPT | - GT_CS_MASTER_ERROR_INTERRUPT | - GT_CONTEXT_SWITCH_INTERRUPT | - GT_WAIT_SEMAPHORE_INTERRUPT; + common_mask = GT_MI_USER_INTERRUPT | + GT_CS_MASTER_ERROR_INTERRUPT | + GT_CONTEXT_SWITCH_INTERRUPT | + GT_WAIT_SEMAPHORE_INTERRUPT; } - ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE); - bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); + rcs_mask |= common_mask; + bcs_mask |= common_mask; + vcs_mask |= common_mask; + vecs_mask |= common_mask; + ccs_mask |= common_mask; - dmask = irqs << 16 | irqs; - smask = irqs << 16; + if (xe_gt_is_main_type(gt)) { + /* + * For enabling the interrupts, the information about fused off + * engines doesn't matter much, but this also allows to check if + * the engine is available architecturally in the platform + */ + u32 ccs_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE); + u32 bcs_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); - if (!xe_gt_is_media_type(gt)) { /* Enable interrupts for each engine class */ - xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask); - if (ccs_mask) - xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, smask); + xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, + REG_FIELD_PREP(ENGINE1_MASK, rcs_mask) | + REG_FIELD_PREP(ENGINE0_MASK, bcs_mask)); + if (ccs_fuse_mask) + xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, + REG_FIELD_PREP(ENGINE1_MASK, ccs_mask)); /* Unmask interrupts for each engine instance */ - xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, ~smask); - xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, ~smask); - if (bcs_mask & (BIT(1)|BIT(2))) - xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask); - if (bcs_mask & (BIT(3)|BIT(4))) - xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask); - if (bcs_mask & (BIT(5)|BIT(6))) - xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask); - if (bcs_mask & (BIT(7)|BIT(8))) - xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask); - if (ccs_mask & (BIT(0)|BIT(1))) - xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~dmask); - if (ccs_mask & (BIT(2)|BIT(3))) - xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~dmask); + val = ~REG_FIELD_PREP(ENGINE1_MASK, rcs_mask); + xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, val); + val = ~REG_FIELD_PREP(ENGINE1_MASK, bcs_mask); + xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, val); + + val = ~(REG_FIELD_PREP(ENGINE1_MASK, bcs_mask) | + REG_FIELD_PREP(ENGINE0_MASK, bcs_mask)); + if (bcs_fuse_mask & (BIT(1)|BIT(2))) + xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, val); + if (bcs_fuse_mask & (BIT(3)|BIT(4))) + xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, val); + if (bcs_fuse_mask & (BIT(5)|BIT(6))) + xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, val); + if (bcs_fuse_mask & (BIT(7)|BIT(8))) + xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, val); + + val = ~(REG_FIELD_PREP(ENGINE1_MASK, ccs_mask) | + REG_FIELD_PREP(ENGINE0_MASK, ccs_mask)); + if (ccs_fuse_mask & (BIT(0)|BIT(1))) + xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, val); + if (ccs_fuse_mask & (BIT(2)|BIT(3))) + xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, val); } if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) { + u32 vcs_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_DECODE); + u32 vecs_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE); + u32 other_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER); + /* Enable interrupts for each engine class */ - xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, dmask); + xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, + REG_FIELD_PREP(ENGINE1_MASK, vcs_mask) | + REG_FIELD_PREP(ENGINE0_MASK, vecs_mask)); /* Unmask interrupts for each engine instance */ - xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, ~dmask); - xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, ~dmask); - xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, ~dmask); + val = ~(REG_FIELD_PREP(ENGINE1_MASK, vcs_mask) | + REG_FIELD_PREP(ENGINE0_MASK, vcs_mask)); + if (vcs_fuse_mask & (BIT(0) | BIT(1))) + xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, val); + if (vcs_fuse_mask & (BIT(2) | BIT(3))) + xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, val); + if (vcs_fuse_mask & (BIT(4) | BIT(5))) + xe_mmio_write32(mmio, VCS4_VCS5_INTR_MASK, val); + if (vcs_fuse_mask & (BIT(6) | BIT(7))) + xe_mmio_write32(mmio, VCS6_VCS7_INTR_MASK, val); + + val = ~(REG_FIELD_PREP(ENGINE1_MASK, vecs_mask) | + REG_FIELD_PREP(ENGINE0_MASK, vecs_mask)); + if (vecs_fuse_mask & (BIT(0) | BIT(1))) + xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, val); + if (vecs_fuse_mask & (BIT(2) | BIT(3))) + xe_mmio_write32(mmio, VECS2_VECS3_INTR_MASK, val); /* * the heci2 interrupt is enabled via the same register as the * GSCCS interrupts, but it has its own mask register. */ - if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) { - gsc_mask = irqs | GSC_ER_COMPLETE; + if (other_fuse_mask) { + gsc_mask = common_mask | GSC_ER_COMPLETE; heci_mask = GSC_IRQ_INTF(1); } else if (xe->info.has_heci_gscfi) { gsc_mask = GSC_IRQ_INTF(1); @@ -260,7 +307,7 @@ gt_engine_identity(struct xe_device *xe, static void gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir) { - if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt)) + if (instance == OTHER_GUC_INSTANCE && xe_gt_is_main_type(gt)) return xe_guc_irq_handler(>->uc.guc, iir); if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt)) return xe_guc_irq_handler(>->uc.guc, iir); @@ -466,6 +513,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) xe_mmio_write32(mmio, GFX_MSTR_IRQ, master_ctl); gt_irq_handler(tile, master_ctl, intr_dw, identity); + xe_hw_error_irq_handler(tile, master_ctl); /* * Display interrupts (including display backlight operations @@ -476,6 +524,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) if (xe->info.has_heci_cscfi) xe_heci_csc_irq_handler(xe, master_ctl); xe_display_irq_handler(xe, master_ctl); + xe_i2c_irq_handler(xe, master_ctl); gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); } } @@ -489,11 +538,15 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) static void gt_irq_reset(struct xe_tile *tile) { struct xe_mmio *mmio = &tile->mmio; - - u32 ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, - XE_ENGINE_CLASS_COMPUTE); - u32 bcs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, - XE_ENGINE_CLASS_COPY); + u32 ccs_mask = ~0; + u32 bcs_mask = ~0; + + if (tile->primary_gt) { + ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, + XE_ENGINE_CLASS_COMPUTE); + bcs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, + XE_ENGINE_CLASS_COPY); + } /* Disable RCS, BCS, VCS and VECS class engines. */ xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, 0); @@ -550,7 +603,7 @@ static void xelp_irq_reset(struct xe_tile *tile) static void dg1_irq_reset(struct xe_tile *tile) { - if (tile->id == 0) + if (xe_tile_is_root(tile)) dg1_intr_disable(tile_to_xe(tile)); gt_irq_reset(tile); @@ -611,6 +664,7 @@ static void xe_irq_reset(struct xe_device *xe) tile = xe_device_get_root_tile(xe); mask_and_disable(tile, GU_MISC_IRQ_OFFSET); xe_display_irq_reset(xe); + xe_i2c_irq_reset(xe); /* * The tile's top-level status register should be the last one @@ -651,7 +705,8 @@ static void xe_irq_postinstall(struct xe_device *xe) xe_memirq_postinstall(&tile->memirq); } - xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe)); + xe_display_irq_postinstall(xe); + xe_i2c_irq_postinstall(xe); /* * ASLE backlight operations are reported via GUnit GSE interrupts @@ -753,6 +808,8 @@ int xe_irq_install(struct xe_device *xe) int nvec = 1; int err; + xe_hw_error_init(xe); + xe_irq_reset(xe); if (xe_device_has_msix(xe)) { @@ -840,22 +897,6 @@ static int xe_irq_msix_init(struct xe_device *xe) return 0; } -static irqreturn_t guc2host_irq_handler(int irq, void *arg) -{ - struct xe_device *xe = arg; - struct xe_tile *tile; - u8 id; - - if (!atomic_read(&xe->irq.enabled)) - return IRQ_NONE; - - for_each_tile(tile, xe, id) - xe_guc_irq_handler(&tile->primary_gt->uc.guc, - GUC_INTR_GUC2HOST); - - return IRQ_HANDLED; -} - static irqreturn_t xe_irq_msix_default_hwe_handler(int irq, void *arg) { unsigned int tile_id, gt_id; @@ -972,7 +1013,7 @@ int xe_irq_msix_request_irqs(struct xe_device *xe) u16 msix; msix = GUC2HOST_MSIX; - err = xe_irq_msix_request_irq(xe, guc2host_irq_handler, xe, + err = xe_irq_msix_request_irq(xe, xe_irq_handler(xe), xe, DRIVER_NAME "-guc2host", false, &msix); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c b/drivers/gpu/drm/xe/xe_late_bind_fw.c new file mode 100644 index 000000000000..768442ca7da6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c @@ -0,0 +1,464 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/component.h> +#include <linux/delay.h> +#include <linux/firmware.h> + +#include <drm/drm_managed.h> +#include <drm/intel/i915_component.h> +#include <drm/intel/intel_lb_mei_interface.h> +#include <drm/drm_print.h> + +#include "xe_device.h" +#include "xe_late_bind_fw.h" +#include "xe_pcode.h" +#include "xe_pcode_api.h" +#include "xe_pm.h" + +/* + * The component should load quite quickly in most cases, but it could take + * a bit. Using a very big timeout just to cover the worst case scenario + */ +#define LB_INIT_TIMEOUT_MS 20000 + +/* + * Retry interval set to 6 seconds, in steps of 200 ms, to allow time for + * other OS components to release the MEI CL handle + */ +#define LB_FW_LOAD_RETRY_MAXCOUNT 30 +#define LB_FW_LOAD_RETRY_PAUSE_MS 200 + +static const u32 fw_id_to_type[] = { + [XE_LB_FW_FAN_CONTROL] = INTEL_LB_TYPE_FAN_CONTROL, + }; + +static const char * const fw_id_to_name[] = { + [XE_LB_FW_FAN_CONTROL] = "fan_control", + }; + +static struct xe_device * +late_bind_to_xe(struct xe_late_bind *late_bind) +{ + return container_of(late_bind, struct xe_device, late_bind); +} + +static struct xe_device * +late_bind_fw_to_xe(struct xe_late_bind_fw *lb_fw) +{ + return container_of(lb_fw, struct xe_device, late_bind.late_bind_fw[lb_fw->id]); +} + +/* Refer to the "Late Bind based Firmware Layout" documentation entry for details */ +static int parse_cpd_header(struct xe_late_bind_fw *lb_fw, + const void *data, size_t size, const char *manifest_entry) +{ + struct xe_device *xe = late_bind_fw_to_xe(lb_fw); + const struct gsc_cpd_header_v2 *header = data; + const struct gsc_manifest_header *manifest; + const struct gsc_cpd_entry *entry; + size_t min_size = sizeof(*header); + u32 offset = 0; + int i; + + /* manifest_entry is mandatory */ + xe_assert(xe, manifest_entry); + + if (size < min_size || header->header_marker != GSC_CPD_HEADER_MARKER) + return -ENOENT; + + if (header->header_length < sizeof(struct gsc_cpd_header_v2)) { + drm_err(&xe->drm, "%s late binding fw: Invalid CPD header length %u!\n", + fw_id_to_name[lb_fw->id], header->header_length); + return -EINVAL; + } + + min_size = header->header_length + sizeof(struct gsc_cpd_entry) * header->num_of_entries; + if (size < min_size) { + drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n", + fw_id_to_name[lb_fw->id], size, min_size); + return -ENODATA; + } + + /* Look for the manifest first */ + entry = (void *)header + header->header_length; + for (i = 0; i < header->num_of_entries; i++, entry++) + if (strcmp(entry->name, manifest_entry) == 0) + offset = entry->offset & GSC_CPD_ENTRY_OFFSET_MASK; + + if (!offset) { + drm_err(&xe->drm, "%s late binding fw: Failed to find manifest_entry\n", + fw_id_to_name[lb_fw->id]); + return -ENODATA; + } + + min_size = offset + sizeof(struct gsc_manifest_header); + if (size < min_size) { + drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n", + fw_id_to_name[lb_fw->id], size, min_size); + return -ENODATA; + } + + manifest = data + offset; + + lb_fw->version = manifest->fw_version; + + return 0; +} + +/* Refer to the "Late Bind based Firmware Layout" documentation entry for details */ +static int parse_lb_layout(struct xe_late_bind_fw *lb_fw, + const void *data, size_t size, const char *fpt_entry) +{ + struct xe_device *xe = late_bind_fw_to_xe(lb_fw); + const struct csc_fpt_header *header = data; + const struct csc_fpt_entry *entry; + size_t min_size = sizeof(*header); + u32 offset = 0; + int i; + + /* fpt_entry is mandatory */ + xe_assert(xe, fpt_entry); + + if (size < min_size || header->header_marker != CSC_FPT_HEADER_MARKER) + return -ENOENT; + + if (header->header_length < sizeof(struct csc_fpt_header)) { + drm_err(&xe->drm, "%s late binding fw: Invalid FPT header length %u!\n", + fw_id_to_name[lb_fw->id], header->header_length); + return -EINVAL; + } + + min_size = header->header_length + sizeof(struct csc_fpt_entry) * header->num_of_entries; + if (size < min_size) { + drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n", + fw_id_to_name[lb_fw->id], size, min_size); + return -ENODATA; + } + + /* Look for the cpd header first */ + entry = (void *)header + header->header_length; + for (i = 0; i < header->num_of_entries; i++, entry++) + if (strcmp(entry->name, fpt_entry) == 0) + offset = entry->offset; + + if (!offset) { + drm_err(&xe->drm, "%s late binding fw: Failed to find fpt_entry\n", + fw_id_to_name[lb_fw->id]); + return -ENODATA; + } + + min_size = offset + sizeof(struct gsc_cpd_header_v2); + if (size < min_size) { + drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n", + fw_id_to_name[lb_fw->id], size, min_size); + return -ENODATA; + } + + return parse_cpd_header(lb_fw, data + offset, size - offset, "LTES.man"); +} + +static const char *xe_late_bind_parse_status(uint32_t status) +{ + switch (status) { + case INTEL_LB_STATUS_SUCCESS: + return "success"; + case INTEL_LB_STATUS_4ID_MISMATCH: + return "4Id Mismatch"; + case INTEL_LB_STATUS_ARB_FAILURE: + return "ARB Failure"; + case INTEL_LB_STATUS_GENERAL_ERROR: + return "General Error"; + case INTEL_LB_STATUS_INVALID_PARAMS: + return "Invalid Params"; + case INTEL_LB_STATUS_INVALID_SIGNATURE: + return "Invalid Signature"; + case INTEL_LB_STATUS_INVALID_PAYLOAD: + return "Invalid Payload"; + case INTEL_LB_STATUS_TIMEOUT: + return "Timeout"; + default: + return "Unknown error"; + } +} + +static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind, u32 *num_fans) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + struct xe_tile *root_tile = xe_device_get_root_tile(xe); + + return xe_pcode_read(root_tile, + PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), num_fans, NULL); +} + +void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + struct xe_late_bind_fw *lbfw; + int fw_id; + + for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) { + lbfw = &late_bind->late_bind_fw[fw_id]; + if (lbfw->payload && late_bind->wq) { + drm_dbg(&xe->drm, "Flush work: load %s firmware\n", + fw_id_to_name[lbfw->id]); + flush_work(&lbfw->work); + } + } +} + +static void xe_late_bind_work(struct work_struct *work) +{ + struct xe_late_bind_fw *lbfw = container_of(work, struct xe_late_bind_fw, work); + struct xe_late_bind *late_bind = container_of(lbfw, struct xe_late_bind, + late_bind_fw[lbfw->id]); + struct xe_device *xe = late_bind_to_xe(late_bind); + int retry = LB_FW_LOAD_RETRY_MAXCOUNT; + int ret; + int slept; + + xe_device_assert_mem_access(xe); + + /* we can queue this before the component is bound */ + for (slept = 0; slept < LB_INIT_TIMEOUT_MS; slept += 100) { + if (late_bind->component.ops) + break; + msleep(100); + } + + if (!late_bind->component.ops) { + drm_err(&xe->drm, "Late bind component not bound\n"); + /* Do not re-attempt fw load */ + drmm_kfree(&xe->drm, (void *)lbfw->payload); + lbfw->payload = NULL; + goto out; + } + + drm_dbg(&xe->drm, "Load %s firmware\n", fw_id_to_name[lbfw->id]); + + do { + ret = late_bind->component.ops->push_payload(late_bind->component.mei_dev, + lbfw->type, + lbfw->flags, + lbfw->payload, + lbfw->payload_size); + if (!ret) + break; + msleep(LB_FW_LOAD_RETRY_PAUSE_MS); + } while (--retry && ret == -EBUSY); + + if (!ret) { + drm_dbg(&xe->drm, "Load %s firmware successful\n", + fw_id_to_name[lbfw->id]); + goto out; + } + + if (ret > 0) + drm_err(&xe->drm, "Load %s firmware failed with err %d, %s\n", + fw_id_to_name[lbfw->id], ret, xe_late_bind_parse_status(ret)); + else + drm_err(&xe->drm, "Load %s firmware failed with err %d", + fw_id_to_name[lbfw->id], ret); + /* Do not re-attempt fw load */ + drmm_kfree(&xe->drm, (void *)lbfw->payload); + lbfw->payload = NULL; + +out: + xe_pm_runtime_put(xe); +} + +int xe_late_bind_fw_load(struct xe_late_bind *late_bind) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + struct xe_late_bind_fw *lbfw; + int fw_id; + + if (!late_bind->component_added) + return -ENODEV; + + if (late_bind->disable) + return 0; + + for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) { + lbfw = &late_bind->late_bind_fw[fw_id]; + if (lbfw->payload) { + xe_pm_runtime_get_noresume(xe); + queue_work(late_bind->wq, &lbfw->work); + } + } + return 0; +} + +static int __xe_late_bind_fw_init(struct xe_late_bind *late_bind, u32 fw_id) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct xe_late_bind_fw *lb_fw; + const struct firmware *fw; + u32 num_fans; + int ret; + + if (fw_id >= XE_LB_FW_MAX_ID) + return -EINVAL; + + lb_fw = &late_bind->late_bind_fw[fw_id]; + + lb_fw->id = fw_id; + lb_fw->type = fw_id_to_type[lb_fw->id]; + lb_fw->flags &= ~INTEL_LB_FLAG_IS_PERSISTENT; + + if (lb_fw->type == INTEL_LB_TYPE_FAN_CONTROL) { + ret = xe_late_bind_fw_num_fans(late_bind, &num_fans); + if (ret) { + drm_dbg(&xe->drm, "Failed to read number of fans: %d\n", ret); + return 0; /* Not a fatal error, continue without fan control */ + } + drm_dbg(&xe->drm, "Number of Fans: %d\n", num_fans); + if (!num_fans) + return 0; + } + + snprintf(lb_fw->blob_path, sizeof(lb_fw->blob_path), "xe/%s_8086_%04x_%04x_%04x.bin", + fw_id_to_name[lb_fw->id], pdev->device, + pdev->subsystem_vendor, pdev->subsystem_device); + + drm_dbg(&xe->drm, "Request late binding firmware %s\n", lb_fw->blob_path); + ret = firmware_request_nowarn(&fw, lb_fw->blob_path, xe->drm.dev); + if (ret) { + drm_dbg(&xe->drm, "%s late binding fw not available for current device", + fw_id_to_name[lb_fw->id]); + return 0; + } + + if (fw->size > XE_LB_MAX_PAYLOAD_SIZE) { + drm_err(&xe->drm, "Firmware %s size %zu is larger than max pay load size %u\n", + lb_fw->blob_path, fw->size, XE_LB_MAX_PAYLOAD_SIZE); + release_firmware(fw); + return -ENODATA; + } + + ret = parse_lb_layout(lb_fw, fw->data, fw->size, "LTES"); + if (ret) + return ret; + + lb_fw->payload_size = fw->size; + lb_fw->payload = drmm_kzalloc(&xe->drm, lb_fw->payload_size, GFP_KERNEL); + if (!lb_fw->payload) { + release_firmware(fw); + return -ENOMEM; + } + + drm_info(&xe->drm, "Using %s firmware from %s version %u.%u.%u.%u\n", + fw_id_to_name[lb_fw->id], lb_fw->blob_path, + lb_fw->version.major, lb_fw->version.minor, + lb_fw->version.hotfix, lb_fw->version.build); + + memcpy((void *)lb_fw->payload, fw->data, lb_fw->payload_size); + release_firmware(fw); + INIT_WORK(&lb_fw->work, xe_late_bind_work); + + return 0; +} + +static int xe_late_bind_fw_init(struct xe_late_bind *late_bind) +{ + int ret; + int fw_id; + + late_bind->wq = alloc_ordered_workqueue("late-bind-ordered-wq", 0); + if (!late_bind->wq) + return -ENOMEM; + + for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) { + ret = __xe_late_bind_fw_init(late_bind, fw_id); + if (ret) + return ret; + } + + return 0; +} + +static int xe_late_bind_component_bind(struct device *xe_kdev, + struct device *mei_kdev, void *data) +{ + struct xe_device *xe = kdev_to_xe_device(xe_kdev); + struct xe_late_bind *late_bind = &xe->late_bind; + + late_bind->component.ops = data; + late_bind->component.mei_dev = mei_kdev; + + return 0; +} + +static void xe_late_bind_component_unbind(struct device *xe_kdev, + struct device *mei_kdev, void *data) +{ + struct xe_device *xe = kdev_to_xe_device(xe_kdev); + struct xe_late_bind *late_bind = &xe->late_bind; + + xe_late_bind_wait_for_worker_completion(late_bind); + + late_bind->component.ops = NULL; +} + +static const struct component_ops xe_late_bind_component_ops = { + .bind = xe_late_bind_component_bind, + .unbind = xe_late_bind_component_unbind, +}; + +static void xe_late_bind_remove(void *arg) +{ + struct xe_late_bind *late_bind = arg; + struct xe_device *xe = late_bind_to_xe(late_bind); + + xe_late_bind_wait_for_worker_completion(late_bind); + + late_bind->component_added = false; + + component_del(xe->drm.dev, &xe_late_bind_component_ops); + if (late_bind->wq) { + destroy_workqueue(late_bind->wq); + late_bind->wq = NULL; + } +} + +/** + * xe_late_bind_init() - add xe mei late binding component + * @late_bind: pointer to late bind structure. + * + * Return: 0 if the initialization was successful, a negative errno otherwise. + */ +int xe_late_bind_init(struct xe_late_bind *late_bind) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + int err; + + if (!xe->info.has_late_bind) + return 0; + + if (!IS_ENABLED(CONFIG_INTEL_MEI_LB) || !IS_ENABLED(CONFIG_INTEL_MEI_GSC)) { + drm_info(&xe->drm, "Can't init xe mei late bind missing mei component\n"); + return 0; + } + + err = component_add_typed(xe->drm.dev, &xe_late_bind_component_ops, + INTEL_COMPONENT_LB); + if (err < 0) { + drm_err(&xe->drm, "Failed to add mei late bind component (%pe)\n", ERR_PTR(err)); + return err; + } + + late_bind->component_added = true; + + err = devm_add_action_or_reset(xe->drm.dev, xe_late_bind_remove, late_bind); + if (err) + return err; + + err = xe_late_bind_fw_init(late_bind); + if (err) + return err; + + return xe_late_bind_fw_load(late_bind); +} diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.h b/drivers/gpu/drm/xe/xe_late_bind_fw.h new file mode 100644 index 000000000000..07e437390539 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_LATE_BIND_FW_H_ +#define _XE_LATE_BIND_FW_H_ + +#include <linux/types.h> + +struct xe_late_bind; + +int xe_late_bind_init(struct xe_late_bind *late_bind); +int xe_late_bind_fw_load(struct xe_late_bind *late_bind); +void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind); + +#endif diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h new file mode 100644 index 000000000000..0f5da89ce98b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_LATE_BIND_TYPES_H_ +#define _XE_LATE_BIND_TYPES_H_ + +#include <linux/iosys-map.h> +#include <linux/mutex.h> +#include <linux/types.h> +#include <linux/workqueue.h> +#include "xe_uc_fw_abi.h" + +#define XE_LB_MAX_PAYLOAD_SIZE SZ_4K + +/** + * xe_late_bind_fw_id - enum to determine late binding fw index + */ +enum xe_late_bind_fw_id { + XE_LB_FW_FAN_CONTROL = 0, + XE_LB_FW_MAX_ID +}; + +/** + * struct xe_late_bind_fw + */ +struct xe_late_bind_fw { + /** @id: firmware index */ + u32 id; + /** @blob_path: firmware binary path */ + char blob_path[PATH_MAX]; + /** @type: firmware type */ + u32 type; + /** @flags: firmware flags */ + u32 flags; + /** @payload: to store the late binding blob */ + const u8 *payload; + /** @payload_size: late binding blob payload_size */ + size_t payload_size; + /** @work: worker to upload latebind blob */ + struct work_struct work; + /** @version: late binding blob manifest version */ + struct gsc_version version; +}; + +/** + * struct xe_late_bind_component - Late Binding services component + * @mei_dev: device that provide Late Binding service. + * @ops: Ops implemented by Late Binding driver, used by Xe driver. + * + * Communication between Xe and MEI drivers for Late Binding services + */ +struct xe_late_bind_component { + struct device *mei_dev; + const struct intel_lb_component_ops *ops; +}; + +/** + * struct xe_late_bind + */ +struct xe_late_bind { + /** @component: struct for communication with mei component */ + struct xe_late_bind_component component; + /** @late_bind_fw: late binding firmware array */ + struct xe_late_bind_fw late_bind_fw[XE_LB_FW_MAX_ID]; + /** @wq: workqueue to submit request to download late bind blob */ + struct workqueue_struct *wq; + /** @component_added: whether the component has been added */ + bool component_added; + /** @disable: to block late binding reload during pm resume flow*/ + bool disable; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 63db66df064b..4dc1de482eee 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -11,12 +11,13 @@ #include "xe_assert.h" #include "xe_bo.h" +#include "xe_tlb_inval.h" #include "xe_lmtt.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_res_cursor.h" #include "xe_sriov.h" -#include "xe_sriov_printk.h" +#include "xe_tile_sriov_printk.h" /** * DOC: Local Memory Translation Table @@ -31,7 +32,7 @@ */ #define lmtt_assert(lmtt, condition) xe_tile_assert(lmtt_to_tile(lmtt), condition) -#define lmtt_debug(lmtt, msg...) xe_sriov_dbg_verbose(lmtt_to_xe(lmtt), "LMTT: " msg) +#define lmtt_debug(lmtt, msg...) xe_tile_sriov_dbg_verbose(lmtt_to_tile(lmtt), "LMTT: " msg) static bool xe_has_multi_level_lmtt(struct xe_device *xe) { @@ -66,18 +67,21 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level goto out; } - bo = xe_bo_create_pin_map(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), NULL, - PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * - lmtt->ops->lmtt_pte_num(level)), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | - XE_BO_FLAG_NEEDS_64K); + bo = xe_bo_create_pin_map_novm(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), + PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * + lmtt->ops->lmtt_pte_num(level)), + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | + XE_BO_FLAG_NEEDS_64K, false); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out_free_pt; } lmtt_assert(lmtt, xe_bo_is_vram(bo)); + lmtt_debug(lmtt, "level=%u addr=%#llx\n", level, (u64)xe_bo_main_addr(bo, XE_PAGE_SIZE)); + + xe_map_memset(lmtt_to_xe(lmtt), &bo->vmap, 0, 0, xe_bo_size(bo)); pt->level = level; pt->bo = bo; @@ -91,6 +95,9 @@ out: static void lmtt_pt_free(struct xe_lmtt_pt *pt) { + lmtt_debug(&pt->bo->tile->sriov.pf.lmtt, "level=%u addr=%llx\n", + pt->level, (u64)xe_bo_main_addr(pt->bo, XE_PAGE_SIZE)); + xe_bo_unpin_map_no_vm(pt->bo); kfree(pt); } @@ -188,14 +195,17 @@ static void lmtt_setup_dir_ptr(struct xe_lmtt *lmtt) struct xe_tile *tile = lmtt_to_tile(lmtt); struct xe_device *xe = tile_to_xe(tile); dma_addr_t offset = xe_bo_main_addr(lmtt->pd->bo, XE_PAGE_SIZE); + struct xe_gt *gt; + u8 id; lmtt_debug(lmtt, "DIR offset %pad\n", &offset); lmtt_assert(lmtt, xe_bo_is_vram(lmtt->pd->bo)); lmtt_assert(lmtt, IS_ALIGNED(offset, SZ_64K)); - xe_mmio_write32(&tile->mmio, - GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG, - LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K)); + for_each_gt_on_tile(gt, tile, id) + xe_mmio_write32(>->mmio, + GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG, + LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K)); } /** @@ -216,6 +226,57 @@ void xe_lmtt_init_hw(struct xe_lmtt *lmtt) lmtt_setup_dir_ptr(lmtt); } +static int lmtt_invalidate_hw(struct xe_lmtt *lmtt) +{ + struct xe_tlb_inval_fence fences[XE_MAX_GT_PER_TILE]; + struct xe_tlb_inval_fence *fence = fences; + struct xe_tile *tile = lmtt_to_tile(lmtt); + struct xe_gt *gt; + int result = 0; + int err; + u8 id; + + for_each_gt_on_tile(gt, tile, id) { + xe_tlb_inval_fence_init(>->tlb_inval, fence, true); + err = xe_tlb_inval_all(>->tlb_inval, fence); + result = result ?: err; + fence++; + } + + lmtt_debug(lmtt, "num_fences=%d err=%d\n", (int)(fence - fences), result); + + /* + * It is fine to wait for all fences, even for those which covers the + * invalidation request that failed, as such fence should be already + * marked as signaled. + */ + fence = fences; + for_each_gt_on_tile(gt, tile, id) + xe_tlb_inval_fence_wait(fence++); + + return result; +} + +/** + * xe_lmtt_invalidate_hw - Invalidate LMTT hardware. + * @lmtt: the &xe_lmtt to invalidate + * + * Send requests to all GuCs on this tile to invalidate all TLBs. + * + * This function should be called only when running as a PF driver. + */ +void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt) +{ + int err; + + lmtt_assert(lmtt, IS_SRIOV_PF(lmtt_to_xe(lmtt))); + + err = lmtt_invalidate_hw(lmtt); + if (err) + xe_tile_sriov_err(lmtt_to_tile(lmtt), "LMTT invalidation failed (%pe)", + ERR_PTR(err)); +} + static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, u64 pte, unsigned int idx) { @@ -226,9 +287,14 @@ static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, switch (lmtt->ops->lmtt_pte_size(level)) { case sizeof(u32): + lmtt_assert(lmtt, !overflows_type(pte, u32)); + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u32), u32)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte); break; case sizeof(u64): + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u64), u64)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte); break; default: @@ -265,6 +331,7 @@ static void lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid) return; lmtt_write_pte(lmtt, pd, LMTT_PTE_INVALID, vfid); + lmtt_invalidate_hw(lmtt); lmtt_assert(lmtt, pd->level > 0); lmtt_assert(lmtt, pt->level == pd->level - 1); @@ -386,11 +453,11 @@ static void lmtt_insert_bo(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo u64 addr, vram_offset; lmtt_assert(lmtt, IS_ALIGNED(start, page_size)); - lmtt_assert(lmtt, IS_ALIGNED(bo->size, page_size)); + lmtt_assert(lmtt, IS_ALIGNED(xe_bo_size(bo), page_size)); lmtt_assert(lmtt, xe_bo_is_vram(bo)); vram_offset = vram_region_gpu_offset(bo->ttm.resource); - xe_res_first(bo->ttm.resource, 0, bo->size, &cur); + xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); while (cur.remaining) { addr = xe_res_dma(&cur); addr += vram_offset; /* XXX */ diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h index cb10ef994db6..75a234fbf367 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.h +++ b/drivers/gpu/drm/xe/xe_lmtt.h @@ -15,6 +15,7 @@ struct xe_lmtt_ops; #ifdef CONFIG_PCI_IOV int xe_lmtt_init(struct xe_lmtt *lmtt); void xe_lmtt_init_hw(struct xe_lmtt *lmtt); +void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt); int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range); int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset); void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 61a2e87990a9..b5083c99dd50 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -8,6 +8,7 @@ #include <generated/xe_wa_oob.h> #include <linux/ascii85.h> +#include <linux/panic.h> #include "instructions/xe_mi_commands.h" #include "instructions/xe_gfxpipe_commands.h" @@ -16,6 +17,7 @@ #include "regs/xe_lrc_layout.h" #include "xe_bb.h" #include "xe_bo.h" +#include "xe_configfs.h" #include "xe_device.h" #include "xe_drm_client.h" #include "xe_exec_queue_types.h" @@ -39,14 +41,56 @@ #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) #define LRC_PPHWSP_SIZE SZ_4K +#define LRC_INDIRECT_CTX_BO_SIZE SZ_4K #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K +/* + * Layout of the LRC and associated data allocated as + * lrc->bo: + * + * Region Size + * +============================+=================================+ <- __xe_lrc_ring_offset() + * | Ring | ring_size, see | + * | | xe_lrc_init() | + * +============================+=================================+ <- __xe_lrc_pphwsp_offset() + * | PPHWSP (includes SW state) | 4K | + * +----------------------------+---------------------------------+ <- __xe_lrc_regs_offset() + * | Engine Context Image | n * 4K, see | + * | | xe_gt_lrc_size() | + * +----------------------------+---------------------------------+ <- __xe_lrc_indirect_ring_offset() + * | Indirect Ring State Page | 0 or 4k, see | + * | | XE_LRC_FLAG_INDIRECT_RING_STATE | + * +============================+=================================+ <- __xe_lrc_indirect_ctx_offset() + * | Indirect Context Page | 0 or 4k, see | + * | | XE_LRC_FLAG_INDIRECT_CTX | + * +============================+=================================+ <- __xe_lrc_wa_bb_offset() + * | WA BB Per Ctx | 4k | + * +============================+=================================+ <- xe_bo_size(lrc->bo) + */ + static struct xe_device * lrc_to_xe(struct xe_lrc *lrc) { return gt_to_xe(lrc->fence_ctx.gt); } +static bool +gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (XE_GT_WA(gt, 16010904313) && + (class == XE_ENGINE_CLASS_RENDER || + class == XE_ENGINE_CLASS_COMPUTE)) + return true; + + if (xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev), + class, NULL)) + return true; + + return false; +} + size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) { struct xe_device *xe = gt_to_xe(gt); @@ -581,8 +625,6 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) if (xe_gt_has_indirect_ring_state(hwe->gt)) regs[CTX_CONTEXT_CONTROL] |= _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); - - /* TODO: Timestamp */ } static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) @@ -654,15 +696,21 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) #define LRC_SEQNO_PPHWSP_OFFSET 512 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) +#define LRC_ENGINE_ID_PPHWSP_OFFSET 1024 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 -#define LRC_ENGINE_ID_PPHWSP_OFFSET 2096 u32 xe_lrc_regs_offset(struct xe_lrc *lrc) { return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; } -static size_t lrc_reg_size(struct xe_device *xe) +/** + * xe_lrc_reg_size() - Get size of the LRC registers area within queues + * @xe: the &xe_device struct instance + * + * Returns: Size of the LRC registers area for current platform + */ +size_t xe_lrc_reg_size(struct xe_device *xe) { if (GRAPHICS_VERx100(xe) >= 1250) return 96 * sizeof(u32); @@ -672,7 +720,7 @@ static size_t lrc_reg_size(struct xe_device *xe) size_t xe_lrc_skip_size(struct xe_device *xe) { - return LRC_PPHWSP_SIZE + lrc_reg_size(xe); + return LRC_PPHWSP_SIZE + xe_lrc_reg_size(xe); } static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) @@ -716,8 +764,23 @@ static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc) static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) { - /* Indirect ring state page is at the very end of LRC */ - return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; + u32 offset = xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - + LRC_INDIRECT_RING_STATE_SIZE; + + if (lrc->flags & XE_LRC_FLAG_INDIRECT_CTX) + offset -= LRC_INDIRECT_CTX_BO_SIZE; + + return offset; +} + +static inline u32 __xe_lrc_indirect_ctx_offset(struct xe_lrc *lrc) +{ + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_CTX_BO_SIZE; +} + +static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc) +{ + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE; } #define DECL_MAP_ADDR_HELPERS(elem) \ @@ -898,6 +961,47 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe) return data; } +/** + * xe_default_lrc_update_memirq_regs_with_address - Re-compute GGTT references in default LRC + * of given engine. + * @hwe: the &xe_hw_engine struct instance + */ +void xe_default_lrc_update_memirq_regs_with_address(struct xe_hw_engine *hwe) +{ + struct xe_gt *gt = hwe->gt; + u32 *regs; + + if (!gt->default_lrc[hwe->class]) + return; + + regs = gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE; + set_memory_based_intr(regs, hwe); +} + +/** + * xe_lrc_update_memirq_regs_with_address - Re-compute GGTT references in mem interrupt data + * for given LRC. + * @lrc: the &xe_lrc struct instance + * @hwe: the &xe_hw_engine struct instance + * @regs: scratch buffer to be used as temporary storage + */ +void xe_lrc_update_memirq_regs_with_address(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + u32 *regs) +{ + struct xe_gt *gt = hwe->gt; + struct iosys_map map; + size_t regs_len; + + if (!xe_device_uses_memirq(gt_to_xe(gt))) + return; + + map = __xe_lrc_regs_map(lrc); + regs_len = xe_lrc_reg_size(gt_to_xe(gt)); + xe_map_memcpy_from(gt_to_xe(gt), regs, &map, 0, regs_len); + set_memory_based_intr(regs, hwe); + xe_map_memcpy_to(gt_to_xe(gt), &map, 0, regs, regs_len); +} + static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) { u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt)); @@ -909,17 +1013,12 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) static void xe_lrc_finish(struct xe_lrc *lrc) { xe_hw_fence_ctx_finish(&lrc->fence_ctx); - xe_bo_lock(lrc->bo, false); - xe_bo_unpin(lrc->bo); - xe_bo_unlock(lrc->bo); - xe_bo_put(lrc->bo); - xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo); + xe_bo_unpin_map_no_vm(lrc->bo); } /* - * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active - * context run ticks. - * @lrc: Pointer to the lrc. + * wa_bb_setup_utilization() - Write commands to wa bb to assist + * in calculating active context run ticks. * * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the * context, but only gets updated when the context switches out. In order to @@ -944,11 +1043,15 @@ static void xe_lrc_finish(struct xe_lrc *lrc) * store it in the PPHSWP. */ #define CONTEXT_ACTIVE 1ULL -static void xe_lrc_setup_utilization(struct xe_lrc *lrc) +static ssize_t setup_utilization_wa(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, + size_t max_len) { - u32 *cmd; + u32 *cmd = batch; - cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + if (xe_gt_WARN_ON(lrc->gt, max_len < 12)) + return -ENOSPC; *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; *cmd++ = ENGINE_ID(0).addr; @@ -967,88 +1070,388 @@ static void xe_lrc_setup_utilization(struct xe_lrc *lrc) *cmd++ = upper_32_bits(CONTEXT_ACTIVE); } - *cmd++ = MI_BATCH_BUFFER_END; + return cmd - batch; +} + +static ssize_t setup_timestamp_wa(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + u32 *batch, size_t max_len) +{ + const u32 ts_addr = __xe_lrc_ctx_timestamp_ggtt_addr(lrc); + u32 *cmd = batch; + + if (!XE_GT_WA(lrc->gt, 16010904313) || + !(hwe->class == XE_ENGINE_CLASS_RENDER || + hwe->class == XE_ENGINE_CLASS_COMPUTE || + hwe->class == XE_ENGINE_CLASS_COPY || + hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE || + hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE)) + return 0; + + if (xe_gt_WARN_ON(lrc->gt, max_len < 12)) + return -ENOSPC; + + *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO | + MI_LRM_ASYNC; + *cmd++ = RING_CTX_TIMESTAMP(0).addr; + *cmd++ = ts_addr; + *cmd++ = 0; + + *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO | + MI_LRM_ASYNC; + *cmd++ = RING_CTX_TIMESTAMP(0).addr; + *cmd++ = ts_addr; + *cmd++ = 0; + + *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO; + *cmd++ = RING_CTX_TIMESTAMP(0).addr; + *cmd++ = ts_addr; + *cmd++ = 0; + + return cmd - batch; +} + +static ssize_t setup_configfs_post_ctx_restore_bb(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, size_t max_len) +{ + struct xe_device *xe = gt_to_xe(lrc->gt); + const u32 *user_batch; + u32 *cmd = batch; + u32 count; + + count = xe_configfs_get_ctx_restore_post_bb(to_pci_dev(xe->drm.dev), + hwe->class, &user_batch); + if (!count) + return 0; + + if (count > max_len) + return -ENOSPC; + + /* + * This should be used only for tests and validation. Taint the kernel + * as anything could be submitted directly in context switches + */ + add_taint(TAINT_TEST, LOCKDEP_STILL_OK); + + memcpy(cmd, user_batch, count * sizeof(u32)); + cmd += count; + + return cmd - batch; +} + +static ssize_t setup_configfs_mid_ctx_restore_bb(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, size_t max_len) +{ + struct xe_device *xe = gt_to_xe(lrc->gt); + const u32 *user_batch; + u32 *cmd = batch; + u32 count; + + count = xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev), + hwe->class, &user_batch); + if (!count) + return 0; + + if (count > max_len) + return -ENOSPC; + + /* + * This should be used only for tests and validation. Taint the kernel + * as anything could be submitted directly in context switches + */ + add_taint(TAINT_TEST, LOCKDEP_STILL_OK); + + memcpy(cmd, user_batch, count * sizeof(u32)); + cmd += count; + + return cmd - batch; +} + +static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, size_t max_len) +{ + u32 *cmd = batch; + + if (!XE_GT_WA(lrc->gt, 18022495364) || + hwe->class != XE_ENGINE_CLASS_RENDER) + return 0; + + if (xe_gt_WARN_ON(lrc->gt, max_len < 3)) + return -ENOSPC; + + *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cmd++ = CS_DEBUG_MODE1(0).addr; + *cmd++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE); + + return cmd - batch; +} + +struct bo_setup { + ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + u32 *batch, size_t max_size); +}; + +struct bo_setup_state { + /* Input: */ + struct xe_lrc *lrc; + struct xe_hw_engine *hwe; + size_t max_size; + size_t reserve_dw; + unsigned int offset; + const struct bo_setup *funcs; + unsigned int num_funcs; + + /* State: */ + u32 *buffer; + u32 *ptr; + unsigned int written; +}; + +static int setup_bo(struct bo_setup_state *state) +{ + ssize_t remain; + + if (state->lrc->bo->vmap.is_iomem) { + xe_gt_assert(state->hwe->gt, state->buffer); + state->ptr = state->buffer; + } else { + state->ptr = state->lrc->bo->vmap.vaddr + state->offset; + } + + remain = state->max_size / sizeof(u32); + + for (size_t i = 0; i < state->num_funcs; i++) { + ssize_t len = state->funcs[i].setup(state->lrc, state->hwe, + state->ptr, remain); + + remain -= len; + + /* + * Caller has asked for at least reserve_dw to remain unused. + */ + if (len < 0 || + xe_gt_WARN_ON(state->lrc->gt, remain < state->reserve_dw)) + goto fail; + + state->ptr += len; + state->written += len; + } + + return 0; + +fail: + return -ENOSPC; +} + +static void finish_bo(struct bo_setup_state *state) +{ + if (!state->lrc->bo->vmap.is_iomem) + return; + + xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap, + state->offset, state->buffer, + state->written * sizeof(u32)); +} + +/** + * xe_lrc_setup_wa_bb_with_scratch - Execute all wa bb setup callbacks. + * @lrc: the &xe_lrc struct instance + * @hwe: the &xe_hw_engine struct instance + * @scratch: preallocated scratch buffer for temporary storage + * Return: 0 on success, negative error code on failure + */ +int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *scratch) +{ + static const struct bo_setup funcs[] = { + { .setup = setup_timestamp_wa }, + { .setup = setup_invalidate_state_cache_wa }, + { .setup = setup_utilization_wa }, + { .setup = setup_configfs_post_ctx_restore_bb }, + }; + struct bo_setup_state state = { + .lrc = lrc, + .hwe = hwe, + .max_size = LRC_WA_BB_SIZE, + .buffer = scratch, + .reserve_dw = 1, + .offset = __xe_lrc_wa_bb_offset(lrc), + .funcs = funcs, + .num_funcs = ARRAY_SIZE(funcs), + }; + int ret; + + ret = setup_bo(&state); + if (ret) + return ret; + + *state.ptr++ = MI_BATCH_BUFFER_END; + state.written++; + + finish_bo(&state); xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, - xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); + xe_bo_ggtt_addr(lrc->bo) + state.offset + 1); + return 0; } -#define PVC_CTX_ASID (0x2e + 1) -#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) +static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +{ + u32 *buf = NULL; + int ret; + + if (lrc->bo->vmap.is_iomem) { + buf = kmalloc(LRC_WA_BB_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + } + + ret = xe_lrc_setup_wa_bb_with_scratch(lrc, hwe, buf); + + kfree(buf); + + return ret; +} + +static int +setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +{ + static const struct bo_setup rcs_funcs[] = { + { .setup = setup_timestamp_wa }, + { .setup = setup_configfs_mid_ctx_restore_bb }, + }; + static const struct bo_setup xcs_funcs[] = { + { .setup = setup_configfs_mid_ctx_restore_bb }, + }; + struct bo_setup_state state = { + .lrc = lrc, + .hwe = hwe, + .max_size = (63 * 64) /* max 63 cachelines */, + .buffer = NULL, + .offset = __xe_lrc_indirect_ctx_offset(lrc), + }; + int ret; + + if (!(lrc->flags & XE_LRC_FLAG_INDIRECT_CTX)) + return 0; + + if (hwe->class == XE_ENGINE_CLASS_RENDER || + hwe->class == XE_ENGINE_CLASS_COMPUTE) { + state.funcs = rcs_funcs; + state.num_funcs = ARRAY_SIZE(rcs_funcs); + } else { + state.funcs = xcs_funcs; + state.num_funcs = ARRAY_SIZE(xcs_funcs); + } + + if (xe_gt_WARN_ON(lrc->gt, !state.funcs)) + return 0; + + if (lrc->bo->vmap.is_iomem) { + state.buffer = kmalloc(state.max_size, GFP_KERNEL); + if (!state.buffer) + return -ENOMEM; + } + + ret = setup_bo(&state); + if (ret) { + kfree(state.buffer); + return ret; + } + + /* + * Align to 64B cacheline so there's no garbage at the end for CS to + * execute: size for indirect ctx must be a multiple of 64. + */ + while (state.written & 0xf) { + *state.ptr++ = MI_NOOP; + state.written++; + } + + finish_bo(&state); + kfree(state.buffer); + + /* + * Enable INDIRECT_CTX leaving INDIRECT_CTX_OFFSET at its default: it + * varies per engine class, but the default is good enough + */ + xe_lrc_write_ctx_reg(lrc, + CTX_CS_INDIRECT_CTX, + (xe_bo_ggtt_addr(lrc->bo) + state.offset) | + /* Size in CLs. */ + (state.written * sizeof(u32) / 64)); + + return 0; +} static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_vm *vm, u32 ring_size, u16 msix_vec, u32 init_flags) { struct xe_gt *gt = hwe->gt; + const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class); + u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); struct iosys_map map; - void *init_data = NULL; u32 arb_enable; - u32 lrc_size; u32 bo_flags; int err; kref_init(&lrc->refcount); lrc->gt = gt; + lrc->size = lrc_size; lrc->flags = 0; - lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); + lrc->ring.size = ring_size; + lrc->ring.tail = 0; + + if (gt_engine_needs_indirect_ctx(gt, hwe->class)) { + lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX; + bo_size += LRC_INDIRECT_CTX_BO_SIZE; + } + if (xe_gt_has_indirect_ring_state(gt)) lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE; - if (vm && vm->xef) /* userspace */ - bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; - /* - * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address - * via VM bind calls. - */ - lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, - ttm_bo_type_kernel, - bo_flags); + if ((vm && vm->xef) || init_flags & XE_LRC_CREATE_USER_CTX) /* userspace */ + bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE | XE_BO_FLAG_FORCE_USER_VRAM; + + lrc->bo = xe_bo_create_pin_map_novm(xe, tile, + bo_size, + ttm_bo_type_kernel, + bo_flags, false); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); - lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, - ttm_bo_type_kernel, - bo_flags); - if (IS_ERR(lrc->bb_per_ctx_bo)) { - err = PTR_ERR(lrc->bb_per_ctx_bo); - goto err_lrc_finish; - } - - lrc->size = lrc_size; - lrc->ring.size = ring_size; - lrc->ring.tail = 0; - xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, hwe->fence_irq, hwe->name); - if (!gt->default_lrc[hwe->class]) { - init_data = empty_lrc_data(hwe); - if (!init_data) { - err = -ENOMEM; - goto err_lrc_finish; - } - } - /* * Init Per-Process of HW status Page, LRC / context state to known - * values + * values. If there's already a primed default_lrc, just copy it, otherwise + * it's the early submission to record the lrc: build a new empty one from + * scratch. */ map = __xe_lrc_pphwsp_map(lrc); - if (!init_data) { + if (gt->default_lrc[hwe->class]) { xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, - xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); + lrc_size - LRC_PPHWSP_SIZE); } else { - xe_map_memcpy_to(xe, &map, 0, init_data, - xe_gt_lrc_size(gt, hwe->class)); + void *init_data = empty_lrc_data(hwe); + + if (!init_data) { + err = -ENOMEM; + goto err_lrc_finish; + } + + xe_map_memcpy_to(xe, &map, 0, init_data, lrc_size); kfree(init_data); } @@ -1102,7 +1505,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0); if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); + xe_lrc_write_ctx_reg(lrc, CTX_ASID, vm->usm.asid); lrc->desc = LRC_VALID; lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); @@ -1128,7 +1531,13 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, map = __xe_lrc_start_seqno_map(lrc); xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); - xe_lrc_setup_utilization(lrc); + err = setup_wa_bb(lrc, hwe); + if (err) + goto err_lrc_finish; + + err = setup_indirect_ctx(lrc, hwe); + if (err) + goto err_lrc_finish; return 0; @@ -1184,6 +1593,23 @@ void xe_lrc_destroy(struct kref *ref) kfree(lrc); } +/** + * xe_lrc_update_hwctx_regs_with_address - Re-compute GGTT references within given LRC. + * @lrc: the &xe_lrc struct instance + */ +void xe_lrc_update_hwctx_regs_with_address(struct xe_lrc *lrc) +{ + if (xe_lrc_has_indirect_ring_state(lrc)) { + xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, + __xe_lrc_indirect_ring_ggtt_addr(lrc)); + + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, + __xe_lrc_ring_ggtt_addr(lrc)); + } else { + xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); + } +} + void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail) { if (xe_lrc_has_indirect_ring_state(lrc)) @@ -1722,7 +2148,7 @@ static const struct instr_state xe_hpg_svg_state[] = { { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, }; -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) +u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs) { struct xe_gt *gt = q->hwe->gt; struct xe_device *xe = gt_to_xe(gt); @@ -1749,7 +2175,7 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b * continue to emit all of the SVG state since it's best not to leak * any of the state between contexts, even if that leakage is harmless. */ - if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { + if (XE_GT_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { state_table = xe_hpg_svg_state; state_table_size = ARRAY_SIZE(xe_hpg_svg_state); } @@ -1757,7 +2183,7 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b if (!state_table) { xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); - return; + return cs; } for (int i = 0; i < state_table_size; i++) { @@ -1780,12 +2206,14 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b instr == CMD_3DSTATE_DRAWING_RECTANGLE) instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; - bb->cs[bb->len] = instr; + *cs = instr; if (!is_single_dw) - bb->cs[bb->len] |= (num_dw - 2); + *cs |= (num_dw - 2); - bb->len += num_dw; + cs += num_dw; } + + return cs; } struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) @@ -1795,9 +2223,6 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; - if (lrc->bo->vm) - xe_vm_get(lrc->bo->vm); - snapshot->context_desc = xe_lrc_ggtt_addr(lrc); snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc); snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); @@ -1809,7 +2234,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->seqno = xe_lrc_seqno(lrc); snapshot->lrc_bo = xe_bo_get(lrc->bo); snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; + snapshot->lrc_size = lrc->size; snapshot->lrc_snapshot = NULL; snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); @@ -1819,14 +2244,12 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) { struct xe_bo *bo; - struct xe_vm *vm; struct iosys_map src; if (!snapshot) return; bo = snapshot->lrc_bo; - vm = bo->vm; snapshot->lrc_bo = NULL; snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); @@ -1846,8 +2269,6 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) xe_bo_unlock(bo); put_bo: xe_bo_put(bo); - if (vm) - xe_vm_put(vm); } void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) @@ -1900,14 +2321,9 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) return; kvfree(snapshot->lrc_snapshot); - if (snapshot->lrc_bo) { - struct xe_vm *vm; - - vm = snapshot->lrc_bo->vm; + if (snapshot->lrc_bo) xe_bo_put(snapshot->lrc_bo); - if (vm) - xe_vm_put(vm); - } + kfree(snapshot); } diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index eb6e8de8c939..2fb628da5c43 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -42,8 +42,12 @@ struct xe_lrc_snapshot { #define LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR (0x34 * 4) #define LRC_PPHWSP_PXP_INVAL_SCRATCH_ADDR (0x40 * 4) -#define XE_LRC_CREATE_RUNALONE 0x1 -#define XE_LRC_CREATE_PXP 0x2 +#define LRC_WA_BB_SIZE SZ_4K + +#define XE_LRC_CREATE_RUNALONE BIT(0) +#define XE_LRC_CREATE_PXP BIT(1) +#define XE_LRC_CREATE_USER_CTX BIT(2) + struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, u32 ring_size, u16 msix_vec, u32 flags); void xe_lrc_destroy(struct kref *ref); @@ -72,6 +76,16 @@ static inline void xe_lrc_put(struct xe_lrc *lrc) kref_put(&lrc->refcount, xe_lrc_destroy); } +/** + * xe_lrc_ring_size() - Xe LRC ring size + * + * Return: Size of LRC ring buffer + */ +static inline size_t xe_lrc_ring_size(void) +{ + return SZ_16K; +} + size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class); u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc); u32 xe_lrc_regs_offset(struct xe_lrc *lrc); @@ -88,6 +102,10 @@ bool xe_lrc_ring_is_idle(struct xe_lrc *lrc); u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc); u32 *xe_lrc_regs(struct xe_lrc *lrc); +void xe_lrc_update_hwctx_regs_with_address(struct xe_lrc *lrc); +void xe_default_lrc_update_memirq_regs_with_address(struct xe_hw_engine *hwe); +void xe_lrc_update_memirq_regs_with_address(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + u32 *regs); u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr); void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val); @@ -106,13 +124,14 @@ s32 xe_lrc_start_seqno(struct xe_lrc *lrc); u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc); struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc); +size_t xe_lrc_reg_size(struct xe_device *xe); size_t xe_lrc_skip_size(struct xe_device *xe); void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, enum xe_engine_class); -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb); +u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs); struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc); void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot); @@ -124,6 +143,8 @@ u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc); u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); +int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + u32 *scratch); /** * xe_lrc_update_timestamp - readout LRC timestamp and update cached value diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index ae24cf6f8dd9..e9883706e004 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -22,14 +22,15 @@ struct xe_lrc { */ struct xe_bo *bo; - /** @size: size of lrc including any indirect ring state page */ + /** @size: size of the lrc and optional indirect ring state */ u32 size; /** @gt: gt which this LRC belongs to */ struct xe_gt *gt; /** @flags: LRC flags */ -#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 +#define XE_LRC_FLAG_INDIRECT_CTX 0x1 +#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x2 u32 flags; /** @refcount: ref count of this lrc */ @@ -53,9 +54,6 @@ struct xe_lrc { /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */ u64 ctx_timestamp; - - /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */ - struct xe_bo *bb_per_ctx_bo; }; struct xe_lrc_snapshot; diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h index f62e0c8b67ab..c44777125691 100644 --- a/drivers/gpu/drm/xe/xe_map.h +++ b/drivers/gpu/drm/xe/xe_map.h @@ -14,9 +14,9 @@ * DOC: Map layer * * All access to any memory shared with a device (both sysmem and vram) in the - * XE driver should go through this layer (xe_map). This layer is built on top + * Xe driver should go through this layer (xe_map). This layer is built on top * of :ref:`driver-api/device-io:Generalizing Access to System and I/O Memory` - * and with extra hooks into the XE driver that allows adding asserts to memory + * and with extra hooks into the Xe driver that allows adding asserts to memory * accesses (e.g. for blocking runtime_pm D3Cold on Discrete Graphics). */ diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c index 49c45ec3e83c..b0c7ce0a5d1e 100644 --- a/drivers/gpu/drm/xe/xe_memirq.c +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -14,16 +14,15 @@ #include "xe_device.h" #include "xe_device_types.h" #include "xe_gt.h" -#include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_hw_engine.h" #include "xe_map.h" #include "xe_memirq.h" +#include "xe_tile_printk.h" #define memirq_assert(m, condition) xe_tile_assert(memirq_to_tile(m), condition) #define memirq_printk(m, _level, _fmt, ...) \ - drm_##_level(&memirq_to_xe(m)->drm, "MEMIRQ%u: " _fmt, \ - memirq_to_tile(m)->id, ##__VA_ARGS__) + xe_tile_##_level(memirq_to_tile(m), "MEMIRQ: " _fmt, ##__VA_ARGS__) #ifdef CONFIG_DRM_XE_DEBUG_MEMIRQ #define memirq_debug(m, _fmt, ...) memirq_printk(m, dbg, _fmt, ##__VA_ARGS__) @@ -398,8 +397,9 @@ void xe_memirq_postinstall(struct xe_memirq *memirq) memirq_set_enable(memirq, true); } -static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector, - u16 offset, const char *name) +static bool __memirq_received(struct xe_memirq *memirq, + struct iosys_map *vector, u16 offset, + const char *name, bool clear) { u8 value; @@ -409,19 +409,33 @@ static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector, memirq_err_ratelimited(memirq, "Unexpected memirq value %#x from %s at %u\n", value, name, offset); - iosys_map_wr(vector, offset, u8, 0x00); + if (clear) + iosys_map_wr(vector, offset, u8, 0x00); } return value; } +static bool memirq_received_noclear(struct xe_memirq *memirq, + struct iosys_map *vector, + u16 offset, const char *name) +{ + return __memirq_received(memirq, vector, offset, name, false); +} + +static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector, + u16 offset, const char *name) +{ + return __memirq_received(memirq, vector, offset, name, true); +} + static void memirq_dispatch_engine(struct xe_memirq *memirq, struct iosys_map *status, struct xe_hw_engine *hwe) { memirq_debug(memirq, "STATUS %s %*ph\n", hwe->name, 16, status->vaddr); - if (memirq_received(memirq, status, ilog2(GT_RENDER_USER_INTERRUPT), hwe->name)) - xe_hw_engine_handle_irq(hwe, GT_RENDER_USER_INTERRUPT); + if (memirq_received(memirq, status, ilog2(GT_MI_USER_INTERRUPT), hwe->name)) + xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT); } static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *status, @@ -434,8 +448,16 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name)) xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST); - if (memirq_received(memirq, status, ilog2(GUC_INTR_SW_INT_0), name)) + /* + * This is a software interrupt that must be cleared after it's consumed + * to avoid race conditions where xe_gt_sriov_vf_recovery_pending() + * returns false. + */ + if (memirq_received_noclear(memirq, status, ilog2(GUC_INTR_SW_INT_0), + name)) { xe_guc_irq_handler(guc, GUC_INTR_SW_INT_0); + iosys_map_wr(status, ilog2(GUC_INTR_SW_INT_0), u8, 0x00); + } } /** @@ -461,6 +483,23 @@ void xe_memirq_hwe_handler(struct xe_memirq *memirq, struct xe_hw_engine *hwe) } /** + * xe_memirq_guc_sw_int_0_irq_pending() - SW_INT_0 IRQ is pending + * @memirq: the &xe_memirq + * @guc: the &xe_guc to check for IRQ + * + * Return: True if SW_INT_0 IRQ is pending on @guc, False otherwise + */ +bool xe_memirq_guc_sw_int_0_irq_pending(struct xe_memirq *memirq, struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 offset = xe_gt_is_media_type(gt) ? ilog2(INTR_MGUC) : ilog2(INTR_GUC); + struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&memirq->status, offset * SZ_16); + + return memirq_received_noclear(memirq, &map, ilog2(GUC_INTR_SW_INT_0), + guc_name(guc)); +} + +/** * xe_memirq_handler - The `Memory Based Interrupts`_ Handler. * @memirq: the &xe_memirq * diff --git a/drivers/gpu/drm/xe/xe_memirq.h b/drivers/gpu/drm/xe/xe_memirq.h index 06130650e9d6..e25d2234ab87 100644 --- a/drivers/gpu/drm/xe/xe_memirq.h +++ b/drivers/gpu/drm/xe/xe_memirq.h @@ -25,4 +25,6 @@ void xe_memirq_handler(struct xe_memirq *memirq); int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc); +bool xe_memirq_guc_sw_int_0_irq_pending(struct xe_memirq *memirq, struct xe_guc *guc); + #endif diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 8f8e9fdfb2a8..5a95b08a4723 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -9,6 +9,7 @@ #include <linux/sizes.h> #include <drm/drm_managed.h> +#include <drm/drm_pagemap.h> #include <drm/ttm/ttm_tt.h> #include <uapi/drm/xe_drm.h> @@ -28,12 +29,16 @@ #include "xe_lrc.h" #include "xe_map.h" #include "xe_mocs.h" +#include "xe_printk.h" #include "xe_pt.h" #include "xe_res_cursor.h" +#include "xe_sa.h" #include "xe_sched_job.h" #include "xe_sync.h" #include "xe_trace_bo.h" +#include "xe_validation.h" #include "xe_vm.h" +#include "xe_vram.h" /** * struct xe_migrate - migrate context. @@ -53,6 +58,13 @@ struct xe_migrate { u64 usm_batch_base_ofs; /** @cleared_mem_ofs: VM offset of @cleared_bo. */ u64 cleared_mem_ofs; + /** @large_page_copy_ofs: VM offset of 2M pages used for large copies */ + u64 large_page_copy_ofs; + /** + * @large_page_copy_pdes: BO offset to writeout 2M pages (PDEs) used for + * large copies + */ + u64 large_page_copy_pdes; /** * @fence: dma-fence representing the last migration job batch. * Protected by @job_mutex. @@ -82,20 +94,7 @@ struct xe_migrate { * of the instruction. Subtracting the instruction header (1 dword) and * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values. */ -#define MAX_PTE_PER_SDI 0x1FE - -/** - * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue. - * @tile: The tile. - * - * Returns the default migrate exec queue of this tile. - * - * Return: The default migrate exec queue - */ -struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile) -{ - return tile->migrate->q; -} +#define MAX_PTE_PER_SDI 0x1FEU static void xe_migrate_fini(void *arg) { @@ -130,38 +129,39 @@ static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr, bool is_comp_pte) u64 identity_offset = IDENTITY_OFFSET; if (GRAPHICS_VER(xe) >= 20 && is_comp_pte) - identity_offset += DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); + identity_offset += DIV_ROUND_UP_ULL(xe_vram_region_actual_physical_size + (xe->mem.vram), SZ_1G); - addr -= xe->mem.vram.dpa_base; + addr -= xe_vram_region_dpa_base(xe->mem.vram); return addr + (identity_offset << xe_pt_shift(2)); } static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm, struct xe_bo *bo, u64 map_ofs, u64 vram_offset, u16 pat_index, u64 pt_2m_ofs) { + struct xe_vram_region *vram = xe->mem.vram; + resource_size_t dpa_base = xe_vram_region_dpa_base(vram); u64 pos, ofs, flags; u64 entry; /* XXX: Unclear if this should be usable_size? */ - u64 vram_limit = xe->mem.vram.actual_physical_size + - xe->mem.vram.dpa_base; + u64 vram_limit = xe_vram_region_actual_physical_size(vram) + dpa_base; u32 level = 2; ofs = map_ofs + XE_PAGE_SIZE * level + vram_offset * 8; flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, true, 0); - xe_assert(xe, IS_ALIGNED(xe->mem.vram.usable_size, SZ_2M)); + xe_assert(xe, IS_ALIGNED(xe_vram_region_usable_size(vram), SZ_2M)); /* * Use 1GB pages when possible, last chunk always use 2M * pages as mixing reserved memory (stolen, WOCPM) with a single * mapping is not allowed on certain platforms. */ - for (pos = xe->mem.vram.dpa_base; pos < vram_limit; + for (pos = dpa_base; pos < vram_limit; pos += SZ_1G, ofs += 8) { if (pos + SZ_1G >= vram_limit) { - entry = vm->pt_ops->pde_encode_bo(bo, pt_2m_ofs, - pat_index); + entry = vm->pt_ops->pde_encode_bo(bo, pt_2m_ofs); xe_map_wr(xe, &bo->vmap, ofs, u64, entry); flags = vm->pt_ops->pte_encode_addr(xe, 0, @@ -182,7 +182,7 @@ static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm, } static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, - struct xe_vm *vm) + struct xe_vm *vm, struct drm_exec *exec) { struct xe_device *xe = tile_to_xe(tile); u16 pat_index = xe->pat.idx[XE_CACHE_WB]; @@ -203,19 +203,19 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1)); /* Need to be sure everything fits in the first PT, or create more */ - xe_tile_assert(tile, m->batch_base_ofs + batch->size < SZ_2M); + xe_tile_assert(tile, m->batch_base_ofs + xe_bo_size(batch) < SZ_2M); bo = xe_bo_create_pin_map(vm->xe, tile, vm, num_entries * XE_PAGE_SIZE, ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_PAGETABLE); + XE_BO_FLAG_PAGETABLE, exec); if (IS_ERR(bo)) return PTR_ERR(bo); /* PT30 & PT31 reserved for 2M identity map */ - pt29_ofs = bo->size - 3 * XE_PAGE_SIZE; - entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs, pat_index); + pt29_ofs = xe_bo_size(bo) - 3 * XE_PAGE_SIZE; + entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs); xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); map_ofs = (num_entries - num_setup) * XE_PAGE_SIZE; @@ -236,7 +236,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (!IS_DGFX(xe)) { /* Write out batch too */ m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; - for (i = 0; i < batch->size; + for (i = 0; i < xe_bo_size(batch); i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { entry = vm->pt_ops->pte_encode_bo(batch, i, @@ -247,13 +247,13 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, level++; } if (xe->info.has_usm) { - xe_tile_assert(tile, batch->size == SZ_1M); + xe_tile_assert(tile, xe_bo_size(batch) == SZ_1M); batch = tile->primary_gt->usm.bb_pool->bo; m->usm_batch_base_ofs = m->batch_base_ofs + SZ_1M; - xe_tile_assert(tile, batch->size == SZ_512K); + xe_tile_assert(tile, xe_bo_size(batch) == SZ_512K); - for (i = 0; i < batch->size; + for (i = 0; i < xe_bo_size(batch); i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { entry = vm->pt_ops->pte_encode_bo(batch, i, @@ -283,20 +283,25 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, flags = XE_PDE_64K; entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (u64)(level - 1) * - XE_PAGE_SIZE, pat_index); + XE_PAGE_SIZE); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64, entry | flags); } /* Write PDE's that point to our BO. */ - for (i = 0; i < map_ofs / PAGE_SIZE; i++) { - entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE, - pat_index); + for (i = 0; i < map_ofs / XE_PAGE_SIZE; i++) { + entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE + (i + 1) * 8, u64, entry); } + /* Reserve 2M PDEs */ + level = 1; + m->large_page_copy_ofs = NUM_PT_SLOTS << xe_pt_shift(level); + m->large_page_copy_pdes = map_ofs + XE_PAGE_SIZE * level + + NUM_PT_SLOTS * 8; + /* Set up a 1GiB NULL mapping at 255GiB offset. */ level = 2; xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level + 255 * 8, u64, @@ -306,12 +311,12 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Identity map the entire vram at 256GiB offset */ if (IS_DGFX(xe)) { - u64 pt30_ofs = bo->size - 2 * XE_PAGE_SIZE; + u64 pt30_ofs = xe_bo_size(bo) - 2 * XE_PAGE_SIZE; + resource_size_t actual_phy_size = xe_vram_region_actual_physical_size(xe->mem.vram); xe_migrate_program_identity(xe, vm, bo, map_ofs, IDENTITY_OFFSET, pat_index, pt30_ofs); - xe_assert(xe, xe->mem.vram.actual_physical_size <= - (MAX_NUM_PTE - IDENTITY_OFFSET) * SZ_1G); + xe_assert(xe, actual_phy_size <= (MAX_NUM_PTE - IDENTITY_OFFSET) * SZ_1G); /* * Identity map the entire vram for compressed pat_index for xe2+ @@ -320,11 +325,11 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (GRAPHICS_VER(xe) >= 20 && xe_device_has_flat_ccs(xe)) { u16 comp_pat_index = xe->pat.idx[XE_CACHE_NONE_COMPRESSION]; u64 vram_offset = IDENTITY_OFFSET + - DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); - u64 pt31_ofs = bo->size - XE_PAGE_SIZE; + DIV_ROUND_UP_ULL(actual_phy_size, SZ_1G); + u64 pt31_ofs = xe_bo_size(bo) - XE_PAGE_SIZE; - xe_assert(xe, xe->mem.vram.actual_physical_size <= (MAX_NUM_PTE - - IDENTITY_OFFSET - IDENTITY_OFFSET / 2) * SZ_1G); + xe_assert(xe, actual_phy_size <= (MAX_NUM_PTE - IDENTITY_OFFSET - + IDENTITY_OFFSET / 2) * SZ_1G); xe_migrate_program_identity(xe, vm, bo, map_ofs, vram_offset, comp_pat_index, pt31_ofs); } @@ -387,38 +392,63 @@ static bool xe_migrate_needs_ccs_emit(struct xe_device *xe) } /** - * xe_migrate_init() - Initialize a migrate context - * @tile: Back-pointer to the tile we're initializing for. + * xe_migrate_alloc - Allocate a migrate struct for a given &xe_tile + * @tile: &xe_tile + * + * Allocates a &xe_migrate for a given tile. * - * Return: Pointer to a migrate context on success. Error pointer on error. + * Return: &xe_migrate on success, or NULL when out of memory. */ -struct xe_migrate *xe_migrate_init(struct xe_tile *tile) +struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile) +{ + struct xe_migrate *m = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*m), GFP_KERNEL); + + if (m) + m->tile = tile; + return m; +} + +static int xe_migrate_lock_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, struct xe_vm *vm) { struct xe_device *xe = tile_to_xe(tile); + struct xe_validation_ctx ctx; + struct drm_exec exec; + int err = 0; + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { + err = xe_vm_drm_exec_lock(vm, &exec); + drm_exec_retry_on_contention(&exec); + err = xe_migrate_prepare_vm(tile, m, vm, &exec); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + } + + return err; +} + +/** + * xe_migrate_init() - Initialize a migrate context + * @m: The migration context + * + * Return: 0 if successful, negative error code on failure + */ +int xe_migrate_init(struct xe_migrate *m) +{ + struct xe_tile *tile = m->tile; struct xe_gt *primary_gt = tile->primary_gt; - struct xe_migrate *m; + struct xe_device *xe = tile_to_xe(tile); struct xe_vm *vm; int err; - m = devm_kzalloc(xe->drm.dev, sizeof(*m), GFP_KERNEL); - if (!m) - return ERR_PTR(-ENOMEM); - - m->tile = tile; - /* Special layout, prepared below.. */ vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION | - XE_VM_FLAG_SET_TILE_ID(tile)); + XE_VM_FLAG_SET_TILE_ID(tile), NULL); if (IS_ERR(vm)) - return ERR_CAST(vm); + return PTR_ERR(vm); - xe_vm_lock(vm, false); - err = xe_migrate_prepare_vm(tile, m, vm); - xe_vm_unlock(vm); - if (err) { - xe_vm_close_and_put(vm); - return ERR_PTR(err); - } + err = xe_migrate_lock_prepare_vm(tile, m, vm); + if (err) + goto err_out; if (xe->info.has_usm) { struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt, @@ -427,8 +457,10 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) false); u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt); - if (!hwe || !logical_mask) - return ERR_PTR(-EINVAL); + if (!hwe || !logical_mask) { + err = -EINVAL; + goto err_out; + } /* * XXX: Currently only reserving 1 (likely slow) BCS instance on @@ -437,16 +469,18 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT | - EXEC_QUEUE_FLAG_HIGH_PRIORITY, 0); + EXEC_QUEUE_FLAG_HIGH_PRIORITY | + EXEC_QUEUE_FLAG_MIGRATE, 0); } else { m->q = xe_exec_queue_create_class(xe, primary_gt, vm, XE_ENGINE_CLASS_COPY, EXEC_QUEUE_FLAG_KERNEL | - EXEC_QUEUE_FLAG_PERMANENT, 0); + EXEC_QUEUE_FLAG_PERMANENT | + EXEC_QUEUE_FLAG_MIGRATE, 0); } if (IS_ERR(m->q)) { - xe_vm_close_and_put(vm); - return ERR_CAST(m->q); + err = PTR_ERR(m->q); + goto err_out; } mutex_init(&m->job_mutex); @@ -456,7 +490,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) err = devm_add_action_or_reset(xe->drm.dev, xe_migrate_fini, m); if (err) - return ERR_PTR(err); + return err; if (IS_DGFX(xe)) { if (xe_migrate_needs_ccs_emit(xe)) @@ -471,7 +505,12 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) (unsigned long long)m->min_chunk_size); } - return m; + return err; + +err_out: + xe_vm_close_and_put(vm); + return err; + } static u64 max_mem_transfer_per_pass(struct xe_device *xe) @@ -661,9 +700,9 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb, } #define EMIT_COPY_DW 10 -static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, - u64 src_ofs, u64 dst_ofs, unsigned int size, - unsigned int pitch) +static void emit_xy_fast_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, + u64 dst_ofs, unsigned int size, + unsigned int pitch) { struct xe_device *xe = gt_to_xe(gt); u32 mocs = 0; @@ -692,6 +731,61 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, bb->cs[bb->len++] = upper_32_bits(src_ofs); } +#define PAGE_COPY_MODE_PS SZ_256 /* hw uses 256 bytes as the page-size */ +static void emit_mem_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, + u64 dst_ofs, unsigned int size, unsigned int pitch) +{ + u32 mode, copy_type, width; + + xe_gt_assert(gt, IS_ALIGNED(size, pitch)); + xe_gt_assert(gt, pitch <= U16_MAX); + xe_gt_assert(gt, pitch); + xe_gt_assert(gt, size); + + if (IS_ALIGNED(size, PAGE_COPY_MODE_PS) && + IS_ALIGNED(lower_32_bits(src_ofs), PAGE_COPY_MODE_PS) && + IS_ALIGNED(lower_32_bits(dst_ofs), PAGE_COPY_MODE_PS)) { + mode = MEM_COPY_PAGE_COPY_MODE; + copy_type = 0; /* linear copy */ + width = size / PAGE_COPY_MODE_PS; + } else if (pitch > 1) { + xe_gt_assert(gt, size / pitch <= U16_MAX); + mode = 0; /* BYTE_COPY */ + copy_type = MEM_COPY_MATRIX_COPY; + width = pitch; + } else { + mode = 0; /* BYTE_COPY */ + copy_type = 0; /* linear copy */ + width = size; + } + + xe_gt_assert(gt, width <= U16_MAX); + + bb->cs[bb->len++] = MEM_COPY_CMD | mode | copy_type; + bb->cs[bb->len++] = width - 1; + bb->cs[bb->len++] = size / pitch - 1; /* ignored by hw for page-copy/linear above */ + bb->cs[bb->len++] = pitch - 1; + bb->cs[bb->len++] = pitch - 1; + bb->cs[bb->len++] = lower_32_bits(src_ofs); + bb->cs[bb->len++] = upper_32_bits(src_ofs); + bb->cs[bb->len++] = lower_32_bits(dst_ofs); + bb->cs[bb->len++] = upper_32_bits(dst_ofs); + bb->cs[bb->len++] = FIELD_PREP(MEM_COPY_SRC_MOCS_INDEX_MASK, gt->mocs.uc_index) | + FIELD_PREP(MEM_COPY_DST_MOCS_INDEX_MASK, gt->mocs.uc_index); +} + +static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, + u64 src_ofs, u64 dst_ofs, unsigned int size, + unsigned int pitch) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (xe->info.has_mem_copy_instr) + emit_mem_copy(gt, bb, src_ofs, dst_ofs, size, pitch); + else + emit_xy_fast_copy(gt, bb, src_ofs, dst_ofs, size, pitch); +} + static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm) { return usm ? m->usm_batch_base_ofs : m->batch_base_ofs; @@ -768,7 +862,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; - u64 size = src_bo->size; + u64 size = xe_bo_size(src_bo); struct xe_res_cursor src_it, dst_it, ccs_it; u64 src_L0_ofs, dst_L0_ofs; u32 src_L0_pt, dst_L0_pt; @@ -791,7 +885,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (XE_WARN_ON(copy_ccs && src_bo != dst_bo)) return ERR_PTR(-EINVAL); - if (src_bo != dst_bo && XE_WARN_ON(src_bo->size != dst_bo->size)) + if (src_bo != dst_bo && XE_WARN_ON(xe_bo_size(src_bo) != xe_bo_size(dst_bo))) return ERR_PTR(-EINVAL); if (!src_is_vram) @@ -809,7 +903,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, &ccs_it); while (size) { - u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */ + u32 batch_size = 1; /* MI_BATCH_BUFFER_END */ struct xe_sched_job *job; struct xe_bb *bb; u32 flush_flags = 0; @@ -834,11 +928,15 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, batch_size += pte_update_size(m, pte_flags, src, &src_it, &src_L0, &src_L0_ofs, &src_L0_pt, 0, 0, avail_pts); - - pte_flags = dst_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0; - batch_size += pte_update_size(m, pte_flags, dst, &dst_it, &src_L0, - &dst_L0_ofs, &dst_L0_pt, 0, - avail_pts, avail_pts); + if (copy_only_ccs) { + dst_L0_ofs = src_L0_ofs; + } else { + pte_flags = dst_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0; + batch_size += pte_update_size(m, pte_flags, dst, + &dst_it, &src_L0, + &dst_L0_ofs, &dst_L0_pt, + 0, avail_pts, avail_pts); + } if (copy_system_ccs) { xe_assert(xe, type_device); @@ -863,12 +961,12 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) xe_res_next(&src_it, src_L0); else - emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs, + emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat, &src_it, src_L0, src); if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) xe_res_next(&dst_it, src_L0); - else + else if (!copy_only_ccs) emit_pte(m, bb, dst_L0_pt, dst_is_vram, copy_system_ccs, &dst_it, src_L0, dst); @@ -896,11 +994,11 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, goto err; } - xe_sched_job_add_migrate_flush(job, flush_flags); + xe_sched_job_add_migrate_flush(job, flush_flags | MI_INVALIDATE_TLB); if (!fence) { err = xe_sched_job_add_deps(job, src_bo->ttm.base.resv, DMA_RESV_USAGE_BOOKKEEP); - if (!err && src_bo != dst_bo) + if (!err && src_bo->ttm.base.resv != dst_bo->ttm.base.resv) err = xe_sched_job_add_deps(job, dst_bo->ttm.base.resv, DMA_RESV_USAGE_BOOKKEEP); if (err) @@ -940,6 +1038,301 @@ err_sync: return fence; } +/** + * xe_migrate_lrc() - Get the LRC from migrate context. + * @migrate: Migrate context. + * + * Return: Pointer to LRC on success, error on failure + */ +struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate) +{ + return migrate->q->lrc[0]; +} + +static u64 migrate_vm_ppgtt_addr_tlb_inval(void) +{ + /* + * The migrate VM is self-referential so it can modify its own PTEs (see + * pte_update_size() or emit_pte() functions). We reserve NUM_KERNEL_PDE + * entries for kernel operations (copies, clears, CCS migrate), and + * suballocate the rest to user operations (binds/unbinds). With + * NUM_KERNEL_PDE = 15, NUM_KERNEL_PDE - 1 is already used for PTE updates, + * so assign NUM_KERNEL_PDE - 2 for TLB invalidation. + */ + return (NUM_KERNEL_PDE - 2) * XE_PAGE_SIZE; +} + +static int emit_flush_invalidate(u32 *dw, int i, u32 flags) +{ + u64 addr = migrate_vm_ppgtt_addr_tlb_inval(); + + dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | + MI_FLUSH_IMM_DW | flags; + dw[i++] = lower_32_bits(addr); + dw[i++] = upper_32_bits(addr); + dw[i++] = MI_NOOP; + dw[i++] = MI_NOOP; + + return i; +} + +/** + * xe_migrate_ccs_rw_copy() - Copy content of TTM resources. + * @tile: Tile whose migration context to be used. + * @q : Execution to be used along with migration context. + * @src_bo: The buffer object @src is currently bound to. + * @read_write : Creates BB commands for CCS read/write. + * + * Creates batch buffer instructions to copy CCS metadata from CCS pool to + * memory and vice versa. + * + * This function should only be called for IGPU. + * + * Return: 0 if successful, negative error code on failure. + */ +int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, + struct xe_bo *src_bo, + enum xe_sriov_vf_ccs_rw_ctxs read_write) + +{ + bool src_is_pltt = read_write == XE_SRIOV_VF_CCS_READ_CTX; + bool dst_is_pltt = read_write == XE_SRIOV_VF_CCS_WRITE_CTX; + struct ttm_resource *src = src_bo->ttm.resource; + struct xe_migrate *m = tile->migrate; + struct xe_gt *gt = tile->primary_gt; + u32 batch_size, batch_size_allocated; + struct xe_device *xe = gt_to_xe(gt); + struct xe_res_cursor src_it, ccs_it; + u64 size = xe_bo_size(src_bo); + struct xe_bb *bb = NULL; + u64 src_L0, src_L0_ofs; + u32 src_L0_pt; + int err; + + xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it); + + xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo), + PAGE_ALIGN(xe_device_ccs_bytes(xe, size)), + &ccs_it); + + /* Calculate Batch buffer size */ + batch_size = 0; + while (size) { + batch_size += 10; /* Flush + ggtt addr + 2 NOP */ + u64 ccs_ofs, ccs_size; + u32 ccs_pt; + + u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; + + src_L0 = min_t(u64, max_mem_transfer_per_pass(xe), size); + + batch_size += pte_update_size(m, false, src, &src_it, &src_L0, + &src_L0_ofs, &src_L0_pt, 0, 0, + avail_pts); + + ccs_size = xe_device_ccs_bytes(xe, src_L0); + batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs, + &ccs_pt, 0, avail_pts, avail_pts); + xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); + + /* Add copy commands size here */ + batch_size += EMIT_COPY_CCS_DW; + + size -= src_L0; + } + + bb = xe_bb_ccs_new(gt, batch_size, read_write); + if (IS_ERR(bb)) { + drm_err(&xe->drm, "BB allocation failed.\n"); + err = PTR_ERR(bb); + goto err_ret; + } + + batch_size_allocated = batch_size; + size = xe_bo_size(src_bo); + batch_size = 0; + + /* + * Emit PTE and copy commands here. + * The CCS copy command can only support limited size. If the size to be + * copied is more than the limit, divide copy into chunks. So, calculate + * sizes here again before copy command is emitted. + */ + while (size) { + batch_size += 10; /* Flush + ggtt addr + 2 NOP */ + u32 flush_flags = 0; + u64 ccs_ofs, ccs_size; + u32 ccs_pt; + + u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; + + src_L0 = xe_migrate_res_sizes(m, &src_it); + + batch_size += pte_update_size(m, false, src, &src_it, &src_L0, + &src_L0_ofs, &src_L0_pt, 0, 0, + avail_pts); + + ccs_size = xe_device_ccs_bytes(xe, src_L0); + batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs, + &ccs_pt, 0, avail_pts, avail_pts); + xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); + batch_size += EMIT_COPY_CCS_DW; + + emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src); + + emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); + + bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); + flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt, + src_L0_ofs, dst_is_pltt, + src_L0, ccs_ofs, true); + bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); + + size -= src_L0; + } + + xe_assert(xe, (batch_size_allocated == bb->len)); + src_bo->bb_ccs[read_write] = bb; + + return 0; + +err_ret: + return err; +} + +/** + * xe_get_migrate_exec_queue() - Get the execution queue from migrate context. + * @migrate: Migrate context. + * + * Return: Pointer to execution queue on success, error on failure + */ +struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate) +{ + return migrate->q; +} + +/** + * xe_migrate_vram_copy_chunk() - Copy a chunk of a VRAM buffer object. + * @vram_bo: The VRAM buffer object. + * @vram_offset: The VRAM offset. + * @sysmem_bo: The sysmem buffer object. + * @sysmem_offset: The sysmem offset. + * @size: The size of VRAM chunk to copy. + * @dir: The direction of the copy operation. + * + * Copies a portion of a buffer object between VRAM and system memory. + * On Xe2 platforms that support flat CCS, VRAM data is decompressed when + * copying to system memory. + * + * Return: Pointer to a dma_fence representing the last copy batch, or + * an error pointer on failure. If there is a failure, any copy operation + * started by the function call has been synced. + */ +struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset, + struct xe_bo *sysmem_bo, u64 sysmem_offset, + u64 size, enum xe_migrate_copy_dir dir) +{ + struct xe_device *xe = xe_bo_device(vram_bo); + struct xe_tile *tile = vram_bo->tile; + struct xe_gt *gt = tile->primary_gt; + struct xe_migrate *m = tile->migrate; + struct dma_fence *fence = NULL; + struct ttm_resource *vram = vram_bo->ttm.resource; + struct ttm_resource *sysmem = sysmem_bo->ttm.resource; + struct xe_res_cursor vram_it, sysmem_it; + u64 vram_L0_ofs, sysmem_L0_ofs; + u32 vram_L0_pt, sysmem_L0_pt; + u64 vram_L0, sysmem_L0; + bool to_sysmem = (dir == XE_MIGRATE_COPY_TO_SRAM); + bool use_comp_pat = to_sysmem && + GRAPHICS_VER(xe) >= 20 && xe_device_has_flat_ccs(xe); + int pass = 0; + int err; + + xe_assert(xe, IS_ALIGNED(vram_offset | sysmem_offset | size, PAGE_SIZE)); + xe_assert(xe, xe_bo_is_vram(vram_bo)); + xe_assert(xe, !xe_bo_is_vram(sysmem_bo)); + xe_assert(xe, !range_overflows(vram_offset, size, (u64)vram_bo->ttm.base.size)); + xe_assert(xe, !range_overflows(sysmem_offset, size, (u64)sysmem_bo->ttm.base.size)); + + xe_res_first(vram, vram_offset, size, &vram_it); + xe_res_first_sg(xe_bo_sg(sysmem_bo), sysmem_offset, size, &sysmem_it); + + while (size) { + u32 pte_flags = PTE_UPDATE_FLAG_IS_VRAM; + u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */ + struct xe_sched_job *job; + struct xe_bb *bb; + u32 update_idx; + bool usm = xe->info.has_usm; + u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; + + sysmem_L0 = xe_migrate_res_sizes(m, &sysmem_it); + vram_L0 = min(xe_migrate_res_sizes(m, &vram_it), sysmem_L0); + + xe_dbg(xe, "Pass %u, size: %llu\n", pass++, vram_L0); + + pte_flags |= use_comp_pat ? PTE_UPDATE_FLAG_IS_COMP_PTE : 0; + batch_size += pte_update_size(m, pte_flags, vram, &vram_it, &vram_L0, + &vram_L0_ofs, &vram_L0_pt, 0, 0, avail_pts); + + batch_size += pte_update_size(m, 0, sysmem, &sysmem_it, &vram_L0, &sysmem_L0_ofs, + &sysmem_L0_pt, 0, avail_pts, avail_pts); + batch_size += EMIT_COPY_DW; + + bb = xe_bb_new(gt, batch_size, usm); + if (IS_ERR(bb)) { + err = PTR_ERR(bb); + return ERR_PTR(err); + } + + if (xe_migrate_allow_identity(vram_L0, &vram_it)) + xe_res_next(&vram_it, vram_L0); + else + emit_pte(m, bb, vram_L0_pt, true, use_comp_pat, &vram_it, vram_L0, vram); + + emit_pte(m, bb, sysmem_L0_pt, false, false, &sysmem_it, vram_L0, sysmem); + + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + update_idx = bb->len; + + if (to_sysmem) + emit_copy(gt, bb, vram_L0_ofs, sysmem_L0_ofs, vram_L0, XE_PAGE_SIZE); + else + emit_copy(gt, bb, sysmem_L0_ofs, vram_L0_ofs, vram_L0, XE_PAGE_SIZE); + + job = xe_bb_create_migration_job(m->q, bb, xe_migrate_batch_base(m, usm), + update_idx); + if (IS_ERR(job)) { + xe_bb_free(bb, NULL); + err = PTR_ERR(job); + return ERR_PTR(err); + } + + xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB); + + xe_assert(xe, dma_resv_test_signaled(vram_bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP)); + xe_assert(xe, dma_resv_test_signaled(sysmem_bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP)); + + scoped_guard(mutex, &m->job_mutex) { + xe_sched_job_arm(job); + dma_fence_put(fence); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + dma_fence_put(m->fence); + m->fence = dma_fence_get(fence); + } + + xe_bb_free(bb, fence); + size -= vram_L0; + } + + return fence; +} + static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, u32 size, u32 pitch) { @@ -1064,7 +1457,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_device *xe = gt_to_xe(gt); bool clear_only_system_ccs = false; struct dma_fence *fence = NULL; - u64 size = bo->size; + u64 size = xe_bo_size(bo); struct xe_res_cursor src_it; struct ttm_resource *src = dst; int err; @@ -1076,9 +1469,9 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, clear_only_system_ccs = true; if (!clear_vram) - xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); + xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &src_it); else - xe_res_first(src, 0, bo->size, &src_it); + xe_res_first(src, 0, xe_bo_size(bo), &src_it); while (size) { u64 clear_L0_ofs; @@ -1097,7 +1490,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, /* Calculate final sizes and batch size.. */ pte_flags = clear_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0; - batch_size = 2 + + batch_size = 1 + pte_update_size(m, pte_flags, src, &src_it, &clear_L0, &clear_L0_ofs, &clear_L0_pt, clear_bo_data ? emit_clear_cmd_len(gt) : 0, 0, @@ -1119,11 +1512,13 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, size -= clear_L0; /* Preemption is enabled again by the ring ops. */ - if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) + if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) { xe_res_next(&src_it, clear_L0); - else - emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs, - &src_it, clear_L0, dst); + } else { + emit_pte(m, bb, clear_L0_pt, clear_vram, + clear_only_system_ccs, &src_it, clear_L0, dst); + flush_flags |= MI_INVALIDATE_TLB; + } bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; @@ -1134,7 +1529,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, if (xe_migrate_needs_ccs_emit(xe)) { emit_copy_ccs(gt, bb, clear_L0_ofs, true, m->cleared_mem_ofs, false, clear_L0); - flush_flags = MI_FLUSH_DW_CCS; + flush_flags |= MI_FLUSH_DW_CCS; } job = xe_bb_create_migration_job(m->q, bb, @@ -1407,7 +1802,7 @@ __xe_migrate_update_pgtables(struct xe_migrate *m, if (idx == chunk) goto next_cmd; - xe_tile_assert(tile, pt_bo->size == SZ_4K); + xe_tile_assert(tile, xe_bo_size(pt_bo) == SZ_4K); /* Map a PT at most once */ if (pt_bo->update_index < 0) @@ -1469,6 +1864,8 @@ next_cmd: goto err_sa; } + xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB); + if (ops->pre_commit) { pt_update->job = job; err = ops->pre_commit(pt_update); @@ -1553,15 +1950,17 @@ static u32 pte_update_cmd_size(u64 size) u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE); XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER); + /* * MI_STORE_DATA_IMM command is used to update page table. Each - * instruction can update maximumly 0x1ff pte entries. To update - * n (n <= 0x1ff) pte entries, we need: - * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) - * 2 dword for the page table's physical location - * 2*n dword for value of pte to fill (each pte entry is 2 dwords) + * instruction can update maximumly MAX_PTE_PER_SDI pte entries. To + * update n (n <= MAX_PTE_PER_SDI) pte entries, we need: + * + * - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) + * - 2 dword for the page table's physical location + * - 2*n dword for value of pte to fill (each pte entry is 2 dwords) */ - num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff); + num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, MAX_PTE_PER_SDI); num_dword += entries * 2; return num_dword; @@ -1569,15 +1968,22 @@ static u32 pte_update_cmd_size(u64 size) static void build_pt_update_batch_sram(struct xe_migrate *m, struct xe_bb *bb, u32 pt_offset, - dma_addr_t *sram_addr, u32 size) + struct drm_pagemap_addr *sram_addr, + u32 size, int level) { u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB]; + u64 gpu_page_size = 0x1ull << xe_pt_shift(level); u32 ptes; int i = 0; - ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); + xe_tile_assert(m->tile, PAGE_ALIGNED(size)); + + ptes = DIV_ROUND_UP(size, gpu_page_size); while (ptes) { - u32 chunk = min(0x1ffU, ptes); + u32 chunk = min(MAX_PTE_PER_SDI, ptes); + + if (!level) + chunk = ALIGN_DOWN(chunk, PAGE_SIZE / XE_PAGE_SIZE); bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = pt_offset; @@ -1587,53 +1993,101 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, ptes -= chunk; while (chunk--) { - u64 addr = sram_addr[i++] & PAGE_MASK; + u64 addr = sram_addr[i].addr; + u64 pte; + xe_tile_assert(m->tile, sram_addr[i].proto == + DRM_INTERCONNECT_SYSTEM); xe_tile_assert(m->tile, addr); - addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, - addr, pat_index, - 0, false, 0); - bb->cs[bb->len++] = lower_32_bits(addr); - bb->cs[bb->len++] = upper_32_bits(addr); + xe_tile_assert(m->tile, PAGE_ALIGNED(addr)); + +again: + pte = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, + addr, pat_index, + level, false, 0); + bb->cs[bb->len++] = lower_32_bits(pte); + bb->cs[bb->len++] = upper_32_bits(pte); + + if (gpu_page_size < PAGE_SIZE) { + addr += XE_PAGE_SIZE; + if (!PAGE_ALIGNED(addr)) { + chunk--; + goto again; + } + i++; + } else { + i += gpu_page_size / PAGE_SIZE; + } } } } -enum xe_migrate_copy_dir { - XE_MIGRATE_COPY_TO_VRAM, - XE_MIGRATE_COPY_TO_SRAM, -}; +static bool xe_migrate_vram_use_pde(struct drm_pagemap_addr *sram_addr, + unsigned long size) +{ + u32 large_size = (0x1 << xe_pt_shift(1)); + unsigned long i, incr = large_size / PAGE_SIZE; + + for (i = 0; i < DIV_ROUND_UP(size, PAGE_SIZE); i += incr) + if (PAGE_SIZE << sram_addr[i].order != large_size) + return false; + + return true; +} #define XE_CACHELINE_BYTES 64ull #define XE_CACHELINE_MASK (XE_CACHELINE_BYTES - 1) +static u32 xe_migrate_copy_pitch(struct xe_device *xe, u32 len) +{ + u32 pitch; + + if (IS_ALIGNED(len, PAGE_SIZE)) + pitch = PAGE_SIZE; + else if (IS_ALIGNED(len, SZ_4K)) + pitch = SZ_4K; + else if (IS_ALIGNED(len, SZ_256)) + pitch = SZ_256; + else if (IS_ALIGNED(len, 4)) + pitch = 4; + else + pitch = 1; + + xe_assert(xe, pitch > 1 || xe->info.has_mem_copy_instr); + return pitch; +} + static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, unsigned long len, unsigned long sram_offset, - dma_addr_t *sram_addr, u64 vram_addr, + struct drm_pagemap_addr *sram_addr, + u64 vram_addr, + struct dma_fence *deps, const enum xe_migrate_copy_dir dir) { struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); bool use_usm_batch = xe->info.has_usm; struct dma_fence *fence = NULL; - u32 batch_size = 2; + u32 batch_size = 1; u64 src_L0_ofs, dst_L0_ofs; struct xe_sched_job *job; struct xe_bb *bb; u32 update_idx, pt_slot = 0; unsigned long npages = DIV_ROUND_UP(len + sram_offset, PAGE_SIZE); - unsigned int pitch = len >= PAGE_SIZE && !(len & ~PAGE_MASK) ? - PAGE_SIZE : 4; + unsigned int pitch = xe_migrate_copy_pitch(xe, len); int err; + unsigned long i, j; + bool use_pde = xe_migrate_vram_use_pde(sram_addr, len + sram_offset); - if (drm_WARN_ON(&xe->drm, (len & XE_CACHELINE_MASK) || - (sram_offset | vram_addr) & XE_CACHELINE_MASK)) + if (!xe->info.has_mem_copy_instr && + drm_WARN_ON(&xe->drm, + (!IS_ALIGNED(len, pitch)) || (sram_offset | vram_addr) & XE_CACHELINE_MASK)) return ERR_PTR(-EOPNOTSUPP); xe_assert(xe, npages * PAGE_SIZE <= MAX_PREEMPTDISABLE_TRANSFER); - batch_size += pte_update_cmd_size(len); + batch_size += pte_update_cmd_size(npages << PAGE_SHIFT); batch_size += EMIT_COPY_DW; bb = xe_bb_new(gt, batch_size, use_usm_batch); @@ -1642,16 +2096,44 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, return ERR_PTR(err); } - build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE, - sram_addr, len + sram_offset); + /* + * If the order of a struct drm_pagemap_addr entry is greater than 0, + * the entry is populated by GPU pagemap but subsequent entries within + * the range of that order are not populated. + * build_pt_update_batch_sram() expects a fully populated array of + * struct drm_pagemap_addr. Ensure this is the case even with higher + * orders. + */ + for (i = 0; !use_pde && i < npages;) { + unsigned int order = sram_addr[i].order; + + for (j = 1; j < NR_PAGES(order) && i + j < npages; j++) + if (!sram_addr[i + j].addr) + sram_addr[i + j].addr = sram_addr[i].addr + j * PAGE_SIZE; + + i += NR_PAGES(order); + } + + if (use_pde) + build_pt_update_batch_sram(m, bb, m->large_page_copy_pdes, + sram_addr, npages << PAGE_SHIFT, 1); + else + build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE, + sram_addr, npages << PAGE_SHIFT, 0); if (dir == XE_MIGRATE_COPY_TO_VRAM) { - src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; + if (use_pde) + src_L0_ofs = m->large_page_copy_ofs + sram_offset; + else + src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; dst_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false); } else { src_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false); - dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; + if (use_pde) + dst_L0_ofs = m->large_page_copy_ofs + sram_offset; + else + dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; } bb->cs[bb->len++] = MI_BATCH_BUFFER_END; @@ -1667,7 +2149,15 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, goto err; } - xe_sched_job_add_migrate_flush(job, 0); + xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB); + + if (deps && !dma_fence_is_signaled(deps)) { + dma_fence_get(deps); + err = drm_sched_job_add_dependency(&job->drm, deps); + if (err) + dma_fence_wait(deps, false); + err = 0; + } mutex_lock(&m->job_mutex); xe_sched_job_arm(job); @@ -1692,21 +2182,24 @@ err: * xe_migrate_to_vram() - Migrate to VRAM * @m: The migration context. * @npages: Number of pages to migrate. - * @src_addr: Array of dma addresses (source of migrate) + * @src_addr: Array of DMA information (source of migrate) * @dst_addr: Device physical address of VRAM (destination of migrate) + * @deps: struct dma_fence representing the dependencies that need + * to be signaled before migration. * * Copy from an array dma addresses to a VRAM device physical address * - * Return: dma fence for migrate to signal completion on succees, ERR_PTR on + * Return: dma fence for migrate to signal completion on success, ERR_PTR on * failure */ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, unsigned long npages, - dma_addr_t *src_addr, - u64 dst_addr) + struct drm_pagemap_addr *src_addr, + u64 dst_addr, + struct dma_fence *deps) { return xe_migrate_vram(m, npages * PAGE_SIZE, 0, src_addr, dst_addr, - XE_MIGRATE_COPY_TO_VRAM); + deps, XE_MIGRATE_COPY_TO_VRAM); } /** @@ -1714,71 +2207,78 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, * @m: The migration context. * @npages: Number of pages to migrate. * @src_addr: Device physical address of VRAM (source of migrate) - * @dst_addr: Array of dma addresses (destination of migrate) + * @dst_addr: Array of DMA information (destination of migrate) + * @deps: struct dma_fence representing the dependencies that need + * to be signaled before migration. * * Copy from a VRAM device physical address to an array dma addresses * - * Return: dma fence for migrate to signal completion on succees, ERR_PTR on + * Return: dma fence for migrate to signal completion on success, ERR_PTR on * failure */ struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m, unsigned long npages, u64 src_addr, - dma_addr_t *dst_addr) + struct drm_pagemap_addr *dst_addr, + struct dma_fence *deps) { return xe_migrate_vram(m, npages * PAGE_SIZE, 0, dst_addr, src_addr, - XE_MIGRATE_COPY_TO_SRAM); + deps, XE_MIGRATE_COPY_TO_SRAM); } -static void xe_migrate_dma_unmap(struct xe_device *xe, dma_addr_t *dma_addr, +static void xe_migrate_dma_unmap(struct xe_device *xe, + struct drm_pagemap_addr *pagemap_addr, int len, int write) { unsigned long i, npages = DIV_ROUND_UP(len, PAGE_SIZE); for (i = 0; i < npages; ++i) { - if (!dma_addr[i]) + if (!pagemap_addr[i].addr) break; - dma_unmap_page(xe->drm.dev, dma_addr[i], PAGE_SIZE, + dma_unmap_page(xe->drm.dev, pagemap_addr[i].addr, PAGE_SIZE, write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); } - kfree(dma_addr); + kfree(pagemap_addr); } -static dma_addr_t *xe_migrate_dma_map(struct xe_device *xe, - void *buf, int len, int write) +static struct drm_pagemap_addr *xe_migrate_dma_map(struct xe_device *xe, + void *buf, int len, + int write) { - dma_addr_t *dma_addr; + struct drm_pagemap_addr *pagemap_addr; unsigned long i, npages = DIV_ROUND_UP(len, PAGE_SIZE); - dma_addr = kcalloc(npages, sizeof(*dma_addr), GFP_KERNEL); - if (!dma_addr) + pagemap_addr = kcalloc(npages, sizeof(*pagemap_addr), GFP_KERNEL); + if (!pagemap_addr) return ERR_PTR(-ENOMEM); for (i = 0; i < npages; ++i) { dma_addr_t addr; struct page *page; + enum dma_data_direction dir = write ? DMA_TO_DEVICE : + DMA_FROM_DEVICE; if (is_vmalloc_addr(buf)) page = vmalloc_to_page(buf); else page = virt_to_page(buf); - addr = dma_map_page(xe->drm.dev, - page, 0, PAGE_SIZE, - write ? DMA_TO_DEVICE : - DMA_FROM_DEVICE); + addr = dma_map_page(xe->drm.dev, page, 0, PAGE_SIZE, dir); if (dma_mapping_error(xe->drm.dev, addr)) goto err_fault; - dma_addr[i] = addr; + pagemap_addr[i] = + drm_pagemap_addr_encode(addr, + DRM_INTERCONNECT_SYSTEM, + 0, dir); buf += PAGE_SIZE; } - return dma_addr; + return pagemap_addr; err_fault: - xe_migrate_dma_unmap(xe, dma_addr, len, write); + xe_migrate_dma_unmap(xe, pagemap_addr, len, write); return ERR_PTR(-EFAULT); } @@ -1807,7 +2307,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, struct xe_device *xe = tile_to_xe(tile); struct xe_res_cursor cursor; struct dma_fence *fence = NULL; - dma_addr_t *dma_addr; + struct drm_pagemap_addr *pagemap_addr; unsigned long page_offset = (unsigned long)buf & ~PAGE_MASK; int bytes_left = len, current_page = 0; void *orig_buf = buf; @@ -1815,18 +2315,24 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, xe_bo_assert_held(bo); /* Use bounce buffer for small access and unaligned access */ - if (len & XE_CACHELINE_MASK || - ((uintptr_t)buf | offset) & XE_CACHELINE_MASK) { + if (!xe->info.has_mem_copy_instr && + (!IS_ALIGNED(len, 4) || + !IS_ALIGNED(page_offset, XE_CACHELINE_BYTES) || + !IS_ALIGNED(offset, XE_CACHELINE_BYTES))) { int buf_offset = 0; + void *bounce; + int err; + + BUILD_BUG_ON(!is_power_of_2(XE_CACHELINE_BYTES)); + bounce = kmalloc(XE_CACHELINE_BYTES, GFP_KERNEL); + if (!bounce) + return -ENOMEM; /* * Less than ideal for large unaligned access but this should be * fairly rare, can fixup if this becomes common. */ do { - u8 bounce[XE_CACHELINE_BYTES]; - void *ptr = (void *)bounce; - int err; int copy_bytes = min_t(int, bytes_left, XE_CACHELINE_BYTES - (offset & XE_CACHELINE_MASK)); @@ -1835,22 +2341,22 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, err = xe_migrate_access_memory(m, bo, offset & ~XE_CACHELINE_MASK, - (void *)ptr, - sizeof(bounce), 0); + bounce, + XE_CACHELINE_BYTES, 0); if (err) - return err; + break; if (write) { - memcpy(ptr + ptr_offset, buf + buf_offset, copy_bytes); + memcpy(bounce + ptr_offset, buf + buf_offset, copy_bytes); err = xe_migrate_access_memory(m, bo, offset & ~XE_CACHELINE_MASK, - (void *)ptr, - sizeof(bounce), 0); + bounce, + XE_CACHELINE_BYTES, write); if (err) - return err; + break; } else { - memcpy(buf + buf_offset, ptr + ptr_offset, + memcpy(buf + buf_offset, bounce + ptr_offset, copy_bytes); } @@ -1859,20 +2365,22 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, offset += copy_bytes; } while (bytes_left); - return 0; + kfree(bounce); + return err; } - dma_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write); - if (IS_ERR(dma_addr)) - return PTR_ERR(dma_addr); + pagemap_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write); + if (IS_ERR(pagemap_addr)) + return PTR_ERR(pagemap_addr); - xe_res_first(bo->ttm.resource, offset, bo->size - offset, &cursor); + xe_res_first(bo->ttm.resource, offset, xe_bo_size(bo) - offset, &cursor); do { struct dma_fence *__fence; u64 vram_addr = vram_region_gpu_offset(bo->ttm.resource) + cursor.start; int current_bytes; + u32 pitch; if (cursor.size > MAX_PREEMPTDISABLE_TRANSFER) current_bytes = min_t(int, bytes_left, @@ -1880,21 +2388,30 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, else current_bytes = min_t(int, bytes_left, cursor.size); - if (fence) - dma_fence_put(fence); + pitch = xe_migrate_copy_pitch(xe, current_bytes); + if (xe->info.has_mem_copy_instr) + current_bytes = min_t(int, current_bytes, U16_MAX * pitch); + else + current_bytes = min_t(int, current_bytes, + round_down(S16_MAX * pitch, + XE_CACHELINE_BYTES)); __fence = xe_migrate_vram(m, current_bytes, (unsigned long)buf & ~PAGE_MASK, - dma_addr + current_page, - vram_addr, write ? + &pagemap_addr[current_page], + vram_addr, NULL, write ? XE_MIGRATE_COPY_TO_VRAM : XE_MIGRATE_COPY_TO_SRAM); if (IS_ERR(__fence)) { - if (fence) + if (fence) { dma_fence_wait(fence, false); + dma_fence_put(fence); + } fence = __fence; goto out_err; } + + dma_fence_put(fence); fence = __fence; buf += current_bytes; @@ -1909,10 +2426,60 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, dma_fence_put(fence); out_err: - xe_migrate_dma_unmap(xe, dma_addr, len + page_offset, write); + xe_migrate_dma_unmap(xe, pagemap_addr, len + page_offset, write); return IS_ERR(fence) ? PTR_ERR(fence) : 0; } +/** + * xe_migrate_job_lock() - Lock migrate job lock + * @m: The migration context. + * @q: Queue associated with the operation which requires a lock + * + * Lock the migrate job lock if the queue is a migration queue, otherwise + * assert the VM's dma-resv is held (user queue's have own locking). + */ +void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue *q) +{ + bool is_migrate = q == m->q; + + if (is_migrate) + mutex_lock(&m->job_mutex); + else + xe_vm_assert_held(q->vm); /* User queues VM's should be locked */ +} + +/** + * xe_migrate_job_unlock() - Unlock migrate job lock + * @m: The migration context. + * @q: Queue associated with the operation which requires a lock + * + * Unlock the migrate job lock if the queue is a migration queue, otherwise + * assert the VM's dma-resv is held (user queue's have own locking). + */ +void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q) +{ + bool is_migrate = q == m->q; + + if (is_migrate) + mutex_unlock(&m->job_mutex); + else + xe_vm_assert_held(q->vm); /* User queues VM's should be locked */ +} + +#if IS_ENABLED(CONFIG_PROVE_LOCKING) +/** + * xe_migrate_job_lock_assert() - Assert migrate job lock held of queue + * @q: Migrate queue + */ +void xe_migrate_job_lock_assert(struct xe_exec_queue *q) +{ + struct xe_migrate *m = gt_to_tile(q->gt)->migrate; + + xe_gt_assert(q->gt, q == m->q); + lockdep_assert_held(&m->job_mutex); +} +#endif + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) #include "tests/xe_migrate.c" #endif diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index fb9839c1bae0..b76441f062b4 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -9,11 +9,13 @@ #include <linux/types.h> struct dma_fence; +struct drm_pagemap_addr; struct iosys_map; struct ttm_resource; struct xe_bo; struct xe_gt; +struct xe_tlb_inval_job; struct xe_exec_queue; struct xe_migrate; struct xe_migrate_pt_update; @@ -24,6 +26,13 @@ struct xe_vm; struct xe_vm_pgtable_update; struct xe_vma; +enum xe_sriov_vf_ccs_rw_ctxs; + +enum xe_migrate_copy_dir { + XE_MIGRATE_COPY_TO_VRAM, + XE_MIGRATE_COPY_TO_SRAM, +}; + /** * struct xe_migrate_pt_update_ops - Callbacks for the * xe_migrate_update_pgtables() function. @@ -89,21 +98,32 @@ struct xe_migrate_pt_update { struct xe_vma_ops *vops; /** @job: The job if a GPU page-table update. NULL otherwise */ struct xe_sched_job *job; + /** + * @ijob: The TLB invalidation job for primary GT. NULL otherwise + */ + struct xe_tlb_inval_job *ijob; + /** + * @mjob: The TLB invalidation job for media GT. NULL otherwise + */ + struct xe_tlb_inval_job *mjob; /** @tile_id: Tile ID of the update */ u8 tile_id; }; -struct xe_migrate *xe_migrate_init(struct xe_tile *tile); +struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile); +int xe_migrate_init(struct xe_migrate *m); struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, unsigned long npages, - dma_addr_t *src_addr, - u64 dst_addr); + struct drm_pagemap_addr *src_addr, + u64 dst_addr, + struct dma_fence *deps); struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m, unsigned long npages, u64 src_addr, - dma_addr_t *dst_addr); + struct drm_pagemap_addr *dst_addr, + struct dma_fence *deps); struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_bo *src_bo, @@ -112,6 +132,15 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct ttm_resource *dst, bool copy_only_ccs); +int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, + struct xe_bo *src_bo, + enum xe_sriov_vf_ccs_rw_ctxs read_write); + +struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate); +struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate); +struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset, + struct xe_bo *sysmem_bo, u64 sysmem_offset, + u64 size, enum xe_migrate_copy_dir dir); int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, unsigned long offset, void *buf, int len, int write); @@ -133,5 +162,15 @@ xe_migrate_update_pgtables(struct xe_migrate *m, void xe_migrate_wait(struct xe_migrate *m); -struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile); +#if IS_ENABLED(CONFIG_PROVE_LOCKING) +void xe_migrate_job_lock_assert(struct xe_exec_queue *q); +#else +static inline void xe_migrate_job_lock_assert(struct xe_exec_queue *q) +{ +} +#endif + +void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue *q); +void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q); + #endif diff --git a/drivers/gpu/drm/xe/xe_migrate_doc.h b/drivers/gpu/drm/xe/xe_migrate_doc.h index 63c7d67b5b62..c082bc0b7068 100644 --- a/drivers/gpu/drm/xe/xe_migrate_doc.h +++ b/drivers/gpu/drm/xe/xe_migrate_doc.h @@ -9,7 +9,7 @@ /** * DOC: Migrate Layer * - * The XE migrate layer is used generate jobs which can copy memory (eviction), + * The Xe migrate layer is used generate jobs which can copy memory (eviction), * clear memory, or program tables (binds). This layer exists in every GT, has * a migrate engine, and uses a special VM for all generated jobs. * diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 7357458bc0d2..350dca1f0925 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -22,6 +22,9 @@ #include "xe_macros.h" #include "xe_sriov.h" #include "xe_trace.h" +#include "xe_wa.h" + +#include "generated/xe_device_wa_oob.h" static void tiles_fini(void *arg) { @@ -64,35 +67,6 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) if (xe->info.tile_count == 1) return; - /* Possibly override number of tile based on configuration register */ - if (!xe->info.skip_mtcfg) { - struct xe_mmio *mmio = xe_root_tile_mmio(xe); - u8 tile_count; - u32 mtcfg; - - /* - * Although the per-tile mmio regs are not yet initialized, this - * is fine as it's going to the root tile's mmio, that's - * guaranteed to be initialized earlier in xe_mmio_probe_early() - */ - mtcfg = xe_mmio_read32(mmio, XEHP_MTCFG_ADDR); - tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; - - if (tile_count < xe->info.tile_count) { - drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", - xe->info.tile_count, tile_count); - xe->info.tile_count = tile_count; - - /* - * FIXME: Needs some work for standalone media, but - * should be impossible with multi-tile for now: - * multi-tile platform with standalone media doesn't - * exist - */ - xe->info.gt_count = xe->info.tile_count; - } - } - for_each_remote_tile(tile, xe, id) xe_mmio_init(&tile->mmio, tile, xe->mmio.regs + id * tile_mmio_size, SZ_4M); } @@ -163,7 +137,7 @@ static void mmio_flush_pending_writes(struct xe_mmio *mmio) #define DUMMY_REG_OFFSET 0x130030 int i; - if (mmio->tile->xe->info.platform != XE_LUNARLAKE) + if (!XE_DEVICE_WA(mmio->tile->xe, 15015404425)) return; /* 4 dummy writes */ @@ -176,7 +150,6 @@ u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u8 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); val = readb(mmio->regs + addr); @@ -190,7 +163,6 @@ u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u16 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); val = readw(mmio->regs + addr); @@ -217,7 +189,6 @@ u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u32 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe)) @@ -408,3 +379,32 @@ int xe_mmio_wait32_not(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 va { return __xe_mmio_wait32(mmio, reg, mask, val, timeout_us, out_val, atomic, false); } + +#ifdef CONFIG_PCI_IOV +static size_t vf_regs_stride(struct xe_device *xe) +{ + return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000; +} + +/** + * xe_mmio_init_vf_view() - Initialize an MMIO instance for accesses like the VF + * @mmio: the target &xe_mmio to initialize as VF's view + * @base: the source &xe_mmio to initialize from + * @vfid: the VF identifier + */ +void xe_mmio_init_vf_view(struct xe_mmio *mmio, const struct xe_mmio *base, unsigned int vfid) +{ + struct xe_tile *tile = base->tile; + struct xe_device *xe = tile->xe; + size_t offset = vf_regs_stride(xe) * vfid; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, vfid); + xe_assert(xe, !base->sriov_vf_gt); + xe_assert(xe, base->regs_size > offset); + + *mmio = *base; + mmio->regs += offset; + mmio->regs_size -= offset; +} +#endif diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index c151ba569003..15362789ab99 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -42,4 +42,8 @@ static inline struct xe_mmio *xe_root_tile_mmio(struct xe_device *xe) return &xe->tiles[0].mmio; } +#ifdef CONFIG_PCI_IOV +void xe_mmio_init_vf_view(struct xe_mmio *mmio, const struct xe_mmio *base, unsigned int vfid); +#endif + #endif diff --git a/drivers/gpu/drm/xe/xe_mmio_gem.c b/drivers/gpu/drm/xe/xe_mmio_gem.c new file mode 100644 index 000000000000..9a97c4387e4f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mmio_gem.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_mmio_gem.h" + +#include <drm/drm_drv.h> +#include <drm/drm_gem.h> +#include <drm/drm_managed.h> + +#include "xe_device_types.h" + +/** + * DOC: Exposing MMIO regions to userspace + * + * In certain cases, the driver may allow userspace to mmap a portion of the hardware registers. + * + * This can be done as follows: + * 1. Call xe_mmio_gem_create() to create a GEM object with an mmap-able fake offset. + * 2. Use xe_mmio_gem_mmap_offset() on the created GEM object to retrieve the fake offset. + * 3. Provide the fake offset to userspace. + * 4. Userspace can call mmap with the fake offset. The length provided to mmap + * must match the size of the GEM object. + * 5. When the region is no longer needed, call xe_mmio_gem_destroy() to release the GEM object. + * + * NOTE: The exposed MMIO region must be page-aligned with regards to its BAR offset and size. + * + * WARNING: Exposing MMIO regions to userspace can have security and stability implications. + * Make sure not to expose any sensitive registers. + */ + +static void xe_mmio_gem_free(struct drm_gem_object *); +static int xe_mmio_gem_mmap(struct drm_gem_object *, struct vm_area_struct *); +static vm_fault_t xe_mmio_gem_vm_fault(struct vm_fault *); + +struct xe_mmio_gem { + struct drm_gem_object base; + phys_addr_t phys_addr; +}; + +static const struct vm_operations_struct vm_ops = { + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, + .fault = xe_mmio_gem_vm_fault, +}; + +static const struct drm_gem_object_funcs xe_mmio_gem_funcs = { + .free = xe_mmio_gem_free, + .mmap = xe_mmio_gem_mmap, + .vm_ops = &vm_ops, +}; + +static inline struct xe_mmio_gem *to_xe_mmio_gem(struct drm_gem_object *obj) +{ + return container_of(obj, struct xe_mmio_gem, base); +} + +/** + * xe_mmio_gem_create - Expose an MMIO region to userspace + * @xe: The xe device + * @file: DRM file descriptor + * @phys_addr: Start of the exposed MMIO region + * @size: The size of the exposed MMIO region + * + * This function creates a GEM object that exposes an MMIO region with an mmap-able + * fake offset. + * + * See: "Exposing MMIO regions to userspace" + */ +struct xe_mmio_gem *xe_mmio_gem_create(struct xe_device *xe, struct drm_file *file, + phys_addr_t phys_addr, size_t size) +{ + struct xe_mmio_gem *obj; + struct drm_gem_object *base; + int err; + + if ((phys_addr % PAGE_SIZE != 0) || (size % PAGE_SIZE != 0)) + return ERR_PTR(-EINVAL); + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return ERR_PTR(-ENOMEM); + + base = &obj->base; + base->funcs = &xe_mmio_gem_funcs; + obj->phys_addr = phys_addr; + + drm_gem_private_object_init(&xe->drm, base, size); + + err = drm_gem_create_mmap_offset(base); + if (err) + goto free_gem; + + err = drm_vma_node_allow(&base->vma_node, file); + if (err) + goto free_gem; + + return obj; + +free_gem: + xe_mmio_gem_free(base); + return ERR_PTR(err); +} + +/** + * xe_mmio_gem_mmap_offset - Return the mmap-able fake offset + * @gem: the GEM object created with xe_mmio_gem_create() + * + * This function returns the mmap-able fake offset allocated during + * xe_mmio_gem_create(). + * + * See: "Exposing MMIO regions to userspace" + */ +u64 xe_mmio_gem_mmap_offset(struct xe_mmio_gem *gem) +{ + return drm_vma_node_offset_addr(&gem->base.vma_node); +} + +static void xe_mmio_gem_free(struct drm_gem_object *base) +{ + struct xe_mmio_gem *obj = to_xe_mmio_gem(base); + + drm_gem_object_release(base); + kfree(obj); +} + +/** + * xe_mmio_gem_destroy - Destroy the GEM object that exposes an MMIO region + * @gem: the GEM object to destroy + * + * This function releases resources associated with the GEM object created by + * xe_mmio_gem_create(). + * + * See: "Exposing MMIO regions to userspace" + */ +void xe_mmio_gem_destroy(struct xe_mmio_gem *gem) +{ + xe_mmio_gem_free(&gem->base); +} + +static int xe_mmio_gem_mmap(struct drm_gem_object *base, struct vm_area_struct *vma) +{ + if (vma->vm_end - vma->vm_start != base->size) + return -EINVAL; + + if ((vma->vm_flags & VM_SHARED) == 0) + return -EINVAL; + + /* Set vm_pgoff (used as a fake buffer offset by DRM) to 0 */ + vma->vm_pgoff = 0; + vma->vm_page_prot = pgprot_noncached(vm_get_page_prot(vma->vm_flags)); + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | + VM_DONTCOPY | VM_NORESERVE); + + /* Defer actual mapping to the fault handler. */ + return 0; +} + +static void xe_mmio_gem_release_dummy_page(struct drm_device *dev, void *res) +{ + __free_page((struct page *)res); +} + +static vm_fault_t xe_mmio_gem_vm_fault_dummy_page(struct vm_area_struct *vma) +{ + struct drm_gem_object *base = vma->vm_private_data; + struct drm_device *dev = base->dev; + vm_fault_t ret = VM_FAULT_NOPAGE; + struct page *page; + unsigned long pfn; + unsigned long i; + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + return VM_FAULT_OOM; + + if (drmm_add_action_or_reset(dev, xe_mmio_gem_release_dummy_page, page)) + return VM_FAULT_OOM; + + pfn = page_to_pfn(page); + + /* Map the entire VMA to the same dummy page */ + for (i = 0; i < base->size; i += PAGE_SIZE) { + unsigned long addr = vma->vm_start + i; + + ret = vmf_insert_pfn(vma, addr, pfn); + if (ret & VM_FAULT_ERROR) + break; + } + + return ret; +} + +static vm_fault_t xe_mmio_gem_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct drm_gem_object *base = vma->vm_private_data; + struct xe_mmio_gem *obj = to_xe_mmio_gem(base); + struct drm_device *dev = base->dev; + vm_fault_t ret = VM_FAULT_NOPAGE; + unsigned long i; + int idx; + + if (!drm_dev_enter(dev, &idx)) { + /* + * Provide a dummy page to avoid SIGBUS for events such as hot-unplug. + * This gives the userspace the option to recover instead of crashing. + * It is assumed the userspace will receive the notification via some + * other channel (e.g. drm uevent). + */ + return xe_mmio_gem_vm_fault_dummy_page(vma); + } + + for (i = 0; i < base->size; i += PAGE_SIZE) { + unsigned long addr = vma->vm_start + i; + unsigned long phys_addr = obj->phys_addr + i; + + ret = vmf_insert_pfn(vma, addr, PHYS_PFN(phys_addr)); + if (ret & VM_FAULT_ERROR) + break; + } + + drm_dev_exit(idx); + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_mmio_gem.h b/drivers/gpu/drm/xe/xe_mmio_gem.h new file mode 100644 index 000000000000..4b76d5586ebb --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mmio_gem.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_MMIO_GEM_H_ +#define _XE_MMIO_GEM_H_ + +#include <linux/types.h> + +struct drm_file; +struct xe_device; +struct xe_mmio_gem; + +struct xe_mmio_gem *xe_mmio_gem_create(struct xe_device *xe, struct drm_file *file, + phys_addr_t phys_addr, size_t size); +u64 xe_mmio_gem_mmap_offset(struct xe_mmio_gem *gem); +void xe_mmio_gem_destroy(struct xe_mmio_gem *gem); + +#endif /* _XE_MMIO_GEM_H_ */ diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 0c737413fcb6..6613d3b48a84 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -568,6 +568,23 @@ static const struct xe_mocs_ops xe2_mocs_ops = { .dump = xe2_mocs_dump, }; +/* + * Note that the "L3" and "L4" register fields actually control the L2 and L3 + * caches respectively on this platform. + */ +static const struct xe_mocs_entry xe3p_xpc_mocs_table[] = { + /* Defer to PAT */ + MOCS_ENTRY(0, XE2_L3_0_WB | L4_3_UC, 0), + /* UC */ + MOCS_ENTRY(1, IG_PAT | XE2_L3_3_UC | L4_3_UC, 0), + /* L2 */ + MOCS_ENTRY(2, IG_PAT | XE2_L3_0_WB | L4_3_UC, 0), + /* L3 */ + MOCS_ENTRY(3, IG_PAT | XE2_L3_3_UC | L4_0_WB, 0), + /* L2 + L3 */ + MOCS_ENTRY(4, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0), +}; + static unsigned int get_mocs_settings(struct xe_device *xe, struct xe_mocs_info *info) { @@ -576,6 +593,16 @@ static unsigned int get_mocs_settings(struct xe_device *xe, memset(info, 0, sizeof(struct xe_mocs_info)); switch (xe->info.platform) { + case XE_CRESCENTISLAND: + info->ops = &xe2_mocs_ops; + info->table_size = ARRAY_SIZE(xe3p_xpc_mocs_table); + info->table = xe3p_xpc_mocs_table; + info->num_mocs_regs = XE2_NUM_MOCS_ENTRIES; + info->uc_index = 1; + info->wb_index = 4; + info->unused_entries_index = 4; + break; + case XE_NOVALAKE_S: case XE_PANTHERLAKE: case XE_LUNARLAKE: case XE_BATTLEMAGE: @@ -772,12 +799,20 @@ void xe_mocs_init(struct xe_gt *gt) init_l3cc_table(gt, &table); } -void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) +/** + * xe_mocs_dump() - Dump MOCS table. + * @gt: the &xe_gt with MOCS table + * @p: the &drm_printer to dump info to + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); enum xe_force_wake_domains domain; struct xe_mocs_info table; unsigned int fw_ref, flags; + int err = 0; flags = get_mocs_settings(xe, &table); @@ -785,14 +820,17 @@ void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) xe_pm_runtime_get_noresume(xe); fw_ref = xe_force_wake_get(gt_to_fw(gt), domain); - if (!xe_force_wake_ref_has_domain(fw_ref, domain)) + if (!xe_force_wake_ref_has_domain(fw_ref, domain)) { + err = -ETIMEDOUT; goto err_fw; + } table.ops->dump(&table, flags, gt, p); err_fw: xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(xe); + return err; } #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h index dc972ffd4d07..f00bbb269829 100644 --- a/drivers/gpu/drm/xe/xe_mocs.h +++ b/drivers/gpu/drm/xe/xe_mocs.h @@ -11,12 +11,6 @@ struct xe_gt; void xe_mocs_init_early(struct xe_gt *gt); void xe_mocs_init(struct xe_gt *gt); - -/** - * xe_mocs_dump - Dump mocs table - * @gt: GT structure - * @p: Printer to dump info to - */ -void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p); +int xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index e4742e27e2cd..d08338fc3bc1 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -18,29 +18,50 @@ #include "xe_observation.h" #include "xe_sched_job.h" +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +#define DEFAULT_GUC_LOG_LEVEL 3 +#else +#define DEFAULT_GUC_LOG_LEVEL 1 +#endif + +#define DEFAULT_PROBE_DISPLAY true +#define DEFAULT_VRAM_BAR_SIZE 0 +#define DEFAULT_FORCE_PROBE CONFIG_DRM_XE_FORCE_PROBE +#define DEFAULT_MAX_VFS ~0 +#define DEFAULT_MAX_VFS_STR "unlimited" +#define DEFAULT_WEDGED_MODE 1 +#define DEFAULT_SVM_NOTIFIER_SIZE 512 + struct xe_modparam xe_modparam = { - .probe_display = true, - .guc_log_level = 3, - .force_probe = CONFIG_DRM_XE_FORCE_PROBE, - .wedged_mode = 1, - .svm_notifier_size = 512, + .probe_display = DEFAULT_PROBE_DISPLAY, + .guc_log_level = DEFAULT_GUC_LOG_LEVEL, + .force_probe = DEFAULT_FORCE_PROBE, +#ifdef CONFIG_PCI_IOV + .max_vfs = DEFAULT_MAX_VFS, +#endif + .wedged_mode = DEFAULT_WEDGED_MODE, + .svm_notifier_size = DEFAULT_SVM_NOTIFIER_SIZE, /* the rest are 0 by default */ }; module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600); -MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2"); +MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size in MiB, must be power of 2 " + "[default=" __stringify(DEFAULT_SVM_NOTIFIER_SIZE) "]"); module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); -MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)"); +MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched " + "[default=" __stringify(DEFAULT_PROBE_DISPLAY) "])"); module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, int, 0600); -MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size (in MiB) - <0=disable-resize, 0=max-needed-size[default], >0=force-size"); +MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size in MiB (<0=disable-resize, 0=max-needed-size, >0=force-size " + "[default=" __stringify(DEFAULT_VRAM_BAR_SIZE) "])"); module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600); -MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)"); +MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1=normal, 2..5=verbose-levels " + "[default=" __stringify(DEFAULT_GUC_LOG_LEVEL) "])"); module_param_named_unsafe(guc_firmware_path, xe_modparam.guc_firmware_path, charp, 0400); MODULE_PARM_DESC(guc_firmware_path, @@ -56,18 +77,21 @@ MODULE_PARM_DESC(gsc_firmware_path, module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400); MODULE_PARM_DESC(force_probe, - "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details."); + "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details " + "[default=" DEFAULT_FORCE_PROBE "])"); #ifdef CONFIG_PCI_IOV module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400); MODULE_PARM_DESC(max_vfs, "Limit number of Virtual Functions (VFs) that could be managed. " - "(0 = no VFs [default]; N = allow up to N VFs)"); + "(0=no VFs; N=allow up to N VFs " + "[default=" DEFAULT_MAX_VFS_STR "])"); #endif module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600); MODULE_PARM_DESC(wedged_mode, - "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang"); + "Module's default policy for the wedged mode (0=never, 1=upon-critical-errors, 2=upon-any-hang " + "[default=" __stringify(DEFAULT_WEDGED_MODE) "])"); static int xe_check_nomodeset(void) { @@ -111,24 +135,17 @@ static const struct init_funcs init_funcs[] = { }, }; -static int __init xe_call_init_func(unsigned int i) +static int __init xe_call_init_func(const struct init_funcs *func) { - if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) - return 0; - if (!init_funcs[i].init) - return 0; - - return init_funcs[i].init(); + if (func->init) + return func->init(); + return 0; } -static void xe_call_exit_func(unsigned int i) +static void xe_call_exit_func(const struct init_funcs *func) { - if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) - return; - if (!init_funcs[i].exit) - return; - - init_funcs[i].exit(); + if (func->exit) + func->exit(); } static int __init xe_init(void) @@ -136,10 +153,12 @@ static int __init xe_init(void) int err, i; for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { - err = xe_call_init_func(i); + err = xe_call_init_func(init_funcs + i); if (err) { + pr_info("%s: module_init aborted at %ps %pe\n", + DRIVER_NAME, init_funcs[i].init, ERR_PTR(err)); while (i--) - xe_call_exit_func(i); + xe_call_exit_func(init_funcs + i); return err; } } @@ -152,7 +171,7 @@ static void __exit xe_exit(void) int i; for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--) - xe_call_exit_func(i); + xe_call_exit_func(init_funcs + i); } module_init(xe_init); diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c new file mode 100644 index 000000000000..33f4ac82fc80 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2019-2025, Intel Corporation. All rights reserved. + */ + +#include <linux/intel_dg_nvm_aux.h> +#include <linux/pci.h> + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_mmio.h" +#include "xe_nvm.h" +#include "regs/xe_gsc_regs.h" +#include "xe_sriov.h" + +#define GEN12_GUNIT_NVM_BASE 0x00102040 +#define GEN12_DEBUG_NVM_BASE 0x00101018 + +#define GEN12_CNTL_PROTECTED_NVM_REG 0x0010100C + +#define GEN12_GUNIT_NVM_SIZE 0x80 +#define GEN12_DEBUG_NVM_SIZE 0x4 + +#define NVM_NON_POSTED_ERASE_CHICKEN_BIT BIT(13) + +#define HECI_FW_STATUS_2_NVM_ACCESS_MODE BIT(3) + +static const struct intel_dg_nvm_region regions[INTEL_DG_NVM_REGIONS] = { + [0] = { .name = "DESCRIPTOR", }, + [2] = { .name = "GSC", }, + [9] = { .name = "PADDING", }, + [11] = { .name = "OptionROM", }, + [12] = { .name = "DAM", }, +}; + +static void xe_nvm_release_dev(struct device *dev) +{ + struct auxiliary_device *aux = container_of(dev, struct auxiliary_device, dev); + struct intel_dg_nvm_dev *nvm = container_of(aux, struct intel_dg_nvm_dev, aux_dev); + + kfree(nvm); +} + +static bool xe_nvm_non_posted_erase(struct xe_device *xe) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (xe->info.platform != XE_BATTLEMAGE) + return false; + return !(xe_mmio_read32(mmio, XE_REG(GEN12_CNTL_PROTECTED_NVM_REG)) & + NVM_NON_POSTED_ERASE_CHICKEN_BIT); +} + +static bool xe_nvm_writable_override(struct xe_device *xe) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + bool writable_override; + resource_size_t base; + + switch (xe->info.platform) { + case XE_BATTLEMAGE: + base = DG2_GSC_HECI2_BASE; + break; + case XE_PVC: + base = PVC_GSC_HECI2_BASE; + break; + case XE_DG2: + base = DG2_GSC_HECI2_BASE; + break; + case XE_DG1: + base = DG1_GSC_HECI2_BASE; + break; + default: + drm_err(&xe->drm, "Unknown platform\n"); + return true; + } + + writable_override = + !(xe_mmio_read32(mmio, HECI_FWSTS2(base)) & + HECI_FW_STATUS_2_NVM_ACCESS_MODE); + if (writable_override) + drm_info(&xe->drm, "NVM access overridden by jumper\n"); + return writable_override; +} + +int xe_nvm_init(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct auxiliary_device *aux_dev; + struct intel_dg_nvm_dev *nvm; + int ret; + + if (!xe->info.has_gsc_nvm) + return 0; + + /* No access to internal NVM from VFs */ + if (IS_SRIOV_VF(xe)) + return 0; + + /* Nvm pointer should be NULL here */ + if (WARN_ON(xe->nvm)) + return -EFAULT; + + xe->nvm = kzalloc(sizeof(*nvm), GFP_KERNEL); + if (!xe->nvm) + return -ENOMEM; + + nvm = xe->nvm; + + nvm->writable_override = xe_nvm_writable_override(xe); + nvm->non_posted_erase = xe_nvm_non_posted_erase(xe); + nvm->bar.parent = &pdev->resource[0]; + nvm->bar.start = GEN12_GUNIT_NVM_BASE + pdev->resource[0].start; + nvm->bar.end = nvm->bar.start + GEN12_GUNIT_NVM_SIZE - 1; + nvm->bar.flags = IORESOURCE_MEM; + nvm->bar.desc = IORES_DESC_NONE; + nvm->regions = regions; + + nvm->bar2.parent = &pdev->resource[0]; + nvm->bar2.start = GEN12_DEBUG_NVM_BASE + pdev->resource[0].start; + nvm->bar2.end = nvm->bar2.start + GEN12_DEBUG_NVM_SIZE - 1; + nvm->bar2.flags = IORESOURCE_MEM; + nvm->bar2.desc = IORES_DESC_NONE; + + aux_dev = &nvm->aux_dev; + + aux_dev->name = "nvm"; + aux_dev->id = (pci_domain_nr(pdev->bus) << 16) | pci_dev_id(pdev); + aux_dev->dev.parent = &pdev->dev; + aux_dev->dev.release = xe_nvm_release_dev; + + ret = auxiliary_device_init(aux_dev); + if (ret) { + drm_err(&xe->drm, "xe-nvm aux init failed %d\n", ret); + goto err; + } + + ret = auxiliary_device_add(aux_dev); + if (ret) { + drm_err(&xe->drm, "xe-nvm aux add failed %d\n", ret); + auxiliary_device_uninit(aux_dev); + goto err; + } + return 0; + +err: + kfree(nvm); + xe->nvm = NULL; + return ret; +} + +void xe_nvm_fini(struct xe_device *xe) +{ + struct intel_dg_nvm_dev *nvm = xe->nvm; + + if (!xe->info.has_gsc_nvm) + return; + + /* No access to internal NVM from VFs */ + if (IS_SRIOV_VF(xe)) + return; + + /* Nvm pointer should not be NULL here */ + if (WARN_ON(!nvm)) + return; + + auxiliary_device_delete(&nvm->aux_dev); + auxiliary_device_uninit(&nvm->aux_dev); + xe->nvm = NULL; +} diff --git a/drivers/gpu/drm/xe/xe_nvm.h b/drivers/gpu/drm/xe/xe_nvm.h new file mode 100644 index 000000000000..7f3d5f57bed0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_nvm.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2019-2025 Intel Corporation. All rights reserved. + */ + +#ifndef __XE_NVM_H__ +#define __XE_NVM_H__ + +struct xe_device; + +int xe_nvm_init(struct xe_device *xe); + +void xe_nvm_fini(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index fb842fa0552e..f8bb28ab8124 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -10,6 +10,7 @@ #include <drm/drm_drv.h> #include <drm/drm_managed.h> +#include <drm/drm_syncobj.h> #include <uapi/drm/xe_drm.h> #include <generated/xe_wa_oob.h> @@ -43,6 +44,12 @@ #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) #define XE_OA_UNIT_INVALID U32_MAX +enum xe_oam_unit_type { + XE_OAM_UNIT_SAG, + XE_OAM_UNIT_SCMI_0, + XE_OAM_UNIT_SCMI_1, +}; + enum xe_oa_submit_deps { XE_OA_SUBMIT_NO_DEPS, XE_OA_SUBMIT_ADD_DEPS, @@ -77,7 +84,7 @@ struct xe_oa_config { struct xe_oa_open_param { struct xe_file *xef; - u32 oa_unit_id; + struct xe_oa_unit *oa_unit; bool sample; u32 metric_set; enum xe_oa_format_name oa_format; @@ -194,7 +201,7 @@ static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *l static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) { - return &stream->hwe->oa_unit->regs; + return &stream->oa_unit->regs; } static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) @@ -397,7 +404,7 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - int size_exponent = __ffs(stream->oa_buffer.bo->size); + int size_exponent = __ffs(xe_bo_size(stream->oa_buffer.bo)); u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT; struct xe_mmio *mmio = &stream->gt->mmio; unsigned long flags; @@ -429,7 +436,7 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ - memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); + memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo)); } static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) @@ -454,7 +461,7 @@ static u32 __oa_ccs_select(struct xe_oa_stream *stream) static u32 __oactrl_used_bits(struct xe_oa_stream *stream) { - return stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ? + return stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ? OAG_OACONTROL_USED_BITS : OAM_OACONTROL_USED_BITS; } @@ -475,7 +482,7 @@ static void xe_oa_enable(struct xe_oa_stream *stream) __oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE; if (GRAPHICS_VER(stream->oa->xe) >= 20 && - stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) val |= OAG_OACONTROL_OA_PES_DISAG_EN; xe_mmio_rmw32(&stream->gt->mmio, regs->oa_ctrl, __oactrl_used_bits(stream), val); @@ -816,7 +823,7 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) u32 sqcnt1; /* Enable thread stall DOP gating and EU DOP gating. */ - if (XE_WA(stream->gt, 1508761755)) { + if (XE_GT_WA(stream->gt, 1508761755)) { xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE)); xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, @@ -831,18 +838,24 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) xe_oa_configure_oa_context(stream, false); /* Make sure we disable noa to save power. */ - xe_mmio_rmw32(mmio, RPM_CONFIG1, GT_NOA_ENABLE, 0); + if (GT_VER(stream->gt) < 35) + xe_mmio_rmw32(mmio, RPM_CONFIG1, GT_NOA_ENABLE, 0); sqcnt1 = SQCNT1_PMON_ENABLE | (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); /* Reset PMON Enable to save power. */ xe_mmio_rmw32(mmio, XELPMP_SQCNT1, sqcnt1, 0); + + if ((stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM || + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM_SAG) && + GRAPHICS_VER(stream->oa->xe) >= 30) + xe_mmio_rmw32(mmio, OAM_COMPRESSION_T3_CONTROL, OAM_LAT_MEASURE_ENABLE, 0); } static void xe_oa_stream_destroy(struct xe_oa_stream *stream) { - struct xe_oa_unit *u = stream->hwe->oa_unit; + struct xe_oa_unit *u = stream->oa_unit; struct xe_gt *gt = stream->hwe->gt; if (WARN_ON(stream != u->exclusive_stream)) @@ -857,7 +870,7 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) xe_oa_free_oa_buffer(stream); - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + xe_force_wake_put(gt_to_fw(gt), stream->fw_ref); xe_pm_runtime_put(stream->oa->xe); /* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ @@ -872,9 +885,9 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) { struct xe_bo *bo; - bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL, - size, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); + bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile, + size, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -1054,7 +1067,7 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) static u32 oag_buf_size_select(const struct xe_oa_stream *stream) { return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT, - stream->oa_buffer.bo->size > SZ_16M ? + xe_bo_size(stream->oa_buffer.bo) > SZ_16M ? OAG_OA_DEBUG_BUF_SIZE_SELECT : 0); } @@ -1068,7 +1081,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) * EU NOA signals behave incorrectly if EU clock gating is enabled. * Disable thread stall DOP gating and EU DOP gating. */ - if (XE_WA(stream->gt, 1508761755)) { + if (XE_GT_WA(stream->gt, 1508761755)) { xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, @@ -1092,11 +1105,12 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) oag_buf_size_select(stream) | oag_configure_mmio_trigger(stream, true)); - xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? - (OAG_OAGLBCTXCTRL_COUNTER_RESUME | + xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, + OAG_OAGLBCTXCTRL_COUNTER_RESUME | + (stream->periodic ? OAG_OAGLBCTXCTRL_TIMER_ENABLE | REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK, - stream->period_exponent)) : 0); + stream->period_exponent) : 0)); /* * Initialize Super Queue Internal Cnt Register @@ -1105,9 +1119,13 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) */ sqcnt1 = SQCNT1_PMON_ENABLE | (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); - xe_mmio_rmw32(mmio, XELPMP_SQCNT1, 0, sqcnt1); + if ((stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM || + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM_SAG) && + GRAPHICS_VER(stream->oa->xe) >= 30) + xe_mmio_rmw32(mmio, OAM_COMPRESSION_T3_CONTROL, 0, OAM_LAT_MEASURE_ENABLE); + /* Configure OAR/OAC */ if (stream->exec_q) { ret = xe_oa_configure_oa_context(stream, true); @@ -1139,14 +1157,31 @@ static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *n return -EINVAL; } +static struct xe_oa_unit *xe_oa_lookup_oa_unit(struct xe_oa *oa, u32 oa_unit_id) +{ + struct xe_gt *gt; + int gt_id, i; + + for_each_gt(gt, oa->xe, gt_id) { + for (i = 0; i < gt->oa.num_oa_units; i++) { + struct xe_oa_unit *u = >->oa.oa_unit[i]; + + if (u->oa_unit_id == oa_unit_id) + return u; + } + } + + return NULL; +} + static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, struct xe_oa_open_param *param) { - if (value >= oa->oa_unit_ids) { + param->oa_unit = xe_oa_lookup_oa_unit(oa, value); + if (!param->oa_unit) { drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); return -EINVAL; } - param->oa_unit_id = value; return 0; } @@ -1220,6 +1255,9 @@ static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, static int xe_oa_set_prop_num_syncs(struct xe_oa *oa, u64 value, struct xe_oa_open_param *param) { + if (XE_IOCTL_DBG(oa->xe, value > DRM_XE_MAX_SYNCS)) + return -EINVAL; + param->num_syncs = value; return 0; } @@ -1309,7 +1347,7 @@ static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_fr ARRAY_SIZE(xe_oa_set_property_funcs_config)); if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs_open)) || - XE_IOCTL_DBG(oa->xe, ext.pad)) + XE_IOCTL_DBG(oa->xe, !ext.property) || XE_IOCTL_DBG(oa->xe, ext.pad)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs_open)); @@ -1357,7 +1395,9 @@ static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from fro return 0; } -static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) +static int xe_oa_parse_syncs(struct xe_oa *oa, + struct xe_oa_stream *stream, + struct xe_oa_open_param *param) { int ret, num_syncs, num_ufence = 0; @@ -1377,7 +1417,9 @@ static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) { ret = xe_sync_entry_parse(oa->xe, param->xef, ¶m->syncs[num_syncs], - ¶m->syncs_user[num_syncs], 0); + ¶m->syncs_user[num_syncs], + stream->ufence_syncobj, + ++stream->ufence_timeline_value, 0); if (ret) goto err_syncs; @@ -1507,7 +1549,7 @@ static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) return -ENODEV; param.xef = stream->xef; - err = xe_oa_parse_syncs(stream->oa, ¶m); + err = xe_oa_parse_syncs(stream->oa, stream, ¶m); if (err) goto err_config_put; @@ -1550,7 +1592,7 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) { - struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, }; + struct drm_xe_oa_stream_info info = { .oa_buf_size = xe_bo_size(stream->oa_buffer.bo), }; void __user *uaddr = (void __user *)arg; if (copy_to_user(uaddr, &info, sizeof(info))) @@ -1603,6 +1645,7 @@ static void xe_oa_destroy_locked(struct xe_oa_stream *stream) if (stream->exec_q) xe_exec_queue_put(stream->exec_q); + drm_syncobj_put(stream->ufence_syncobj); kfree(stream); } @@ -1636,7 +1679,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) } /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ - if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) { + if (vma->vm_end - vma->vm_start != xe_bo_size(stream->oa_buffer.bo)) { drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); return -EINVAL; } @@ -1677,13 +1720,12 @@ static const struct file_operations xe_oa_fops = { static int xe_oa_stream_init(struct xe_oa_stream *stream, struct xe_oa_open_param *param) { - struct xe_oa_unit *u = param->hwe->oa_unit; struct xe_gt *gt = param->hwe->gt; - unsigned int fw_ref; int ret; stream->exec_q = param->exec_q; stream->poll_period_ns = DEFAULT_POLL_PERIOD_NS; + stream->oa_unit = param->oa_unit; stream->hwe = param->hwe; stream->gt = stream->hwe->gt; stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; @@ -1704,7 +1746,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, * buffer whose size, circ_size, is a multiple of the report size */ if (GRAPHICS_VER(stream->oa->xe) >= 20 && - stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) stream->oa_buffer.circ_size = param->oa_buffer_size - param->oa_buffer_size % stream->oa_buffer.format->size; @@ -1722,7 +1764,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, * GuC reset of engines causes OA to lose configuration * state. Prevent this by overriding GUCRC mode. */ - if (XE_WA(stream->gt, 1509372804)) { + if (XE_GT_WA(stream->gt, 1509372804)) { ret = xe_guc_pc_override_gucrc_mode(>->uc.guc.pc, SLPC_GUCRC_MODE_GUCRC_NO_RC6); if (ret) @@ -1733,8 +1775,8 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, /* Take runtime pm ref and forcewake to disable RC6 */ xe_pm_runtime_get(stream->oa->xe); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + stream->fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(stream->fw_ref, XE_FORCEWAKE_ALL)) { ret = -ETIMEDOUT; goto err_fw_put; } @@ -1762,7 +1804,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, drm_dbg(&stream->oa->xe->drm, "opening stream oa config uuid=%s\n", stream->oa_config->uuid); - WRITE_ONCE(u->exclusive_stream, stream); + WRITE_ONCE(stream->oa_unit->exclusive_stream, stream); hrtimer_setup(&stream->poll_check_timer, xe_oa_poll_check_timer_cb, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -1779,7 +1821,7 @@ err_put_k_exec_q: err_free_oa_buf: xe_oa_free_oa_buffer(stream); err_fw_put: - xe_force_wake_put(gt_to_fw(gt), fw_ref); + xe_force_wake_put(gt_to_fw(gt), stream->fw_ref); xe_pm_runtime_put(stream->oa->xe); if (stream->override_gucrc) xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); @@ -1794,27 +1836,42 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, struct xe_oa_open_param *param) { struct xe_oa_stream *stream; + struct drm_syncobj *ufence_syncobj; int stream_fd; int ret; /* We currently only allow exclusive access */ - if (param->hwe->oa_unit->exclusive_stream) { + if (param->oa_unit->exclusive_stream) { drm_dbg(&oa->xe->drm, "OA unit already in use\n"); ret = -EBUSY; goto exit; } + ret = drm_syncobj_create(&ufence_syncobj, DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (ret) + goto exit; + stream = kzalloc(sizeof(*stream), GFP_KERNEL); if (!stream) { ret = -ENOMEM; - goto exit; + goto err_syncobj; } - + stream->ufence_syncobj = ufence_syncobj; stream->oa = oa; - ret = xe_oa_stream_init(stream, param); + + ret = xe_oa_parse_syncs(oa, stream, param); if (ret) goto err_free; + ret = xe_oa_stream_init(stream, param); + if (ret) { + while (param->num_syncs--) + xe_sync_entry_cleanup(¶m->syncs[param->num_syncs]); + kfree(param->syncs); + goto err_free; + } + if (!param->disabled) { ret = xe_oa_enable_locked(stream); if (ret) @@ -1838,6 +1895,8 @@ err_destroy: xe_oa_stream_destroy(stream); err_free: kfree(stream); +err_syncobj: + drm_syncobj_put(ufence_syncobj); exit: return ret; } @@ -1854,7 +1913,7 @@ u32 xe_oa_timestamp_frequency(struct xe_gt *gt) { u32 reg, shift; - if (XE_WA(gt, 18013179988) || XE_WA(gt, 14015568240)) { + if (XE_GT_WA(gt, 18013179988) || XE_GT_WA(gt, 14015568240)) { xe_pm_runtime_get(gt_to_xe(gt)); reg = xe_mmio_read32(>->mmio, RPM_CONFIG0); xe_pm_runtime_put(gt_to_xe(gt)); @@ -1874,13 +1933,14 @@ static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent) return div_u64(nom + den - 1, den); } -static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) +static bool oa_unit_supports_oa_format(struct xe_oa_open_param *param, int type) { - switch (hwe->oa_unit->type) { + switch (param->oa_unit->type) { case DRM_XE_OA_UNIT_TYPE_OAG: return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR || type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; case DRM_XE_OA_UNIT_TYPE_OAM: + case DRM_XE_OA_UNIT_TYPE_OAM_SAG: return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; default: return false; @@ -1899,37 +1959,48 @@ u16 xe_oa_unit_id(struct xe_hw_engine *hwe) hwe->oa_unit->oa_unit_id : U16_MAX; } +/* A hwe must be assigned to stream/oa_unit for batch submissions */ static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) { - struct xe_gt *gt; - int i, ret = 0; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int ret = 0; + + /* If not provided, OA unit defaults to OA unit 0 as per uapi */ + if (!param->oa_unit) + param->oa_unit = &xe_root_mmio_gt(oa->xe)->oa.oa_unit[0]; + /* When we have an exec_q, get hwe from the exec_q */ if (param->exec_q) { - /* When we have an exec_q, get hwe from the exec_q */ param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class, param->engine_instance, true); - } else { - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - - /* Else just get the first hwe attached to the oa unit */ - for_each_gt(gt, oa->xe, i) { - for_each_hw_engine(hwe, gt, id) { - if (xe_oa_unit_id(hwe) == param->oa_unit_id) { - param->hwe = hwe; - goto out; - } - } - } + if (!param->hwe || param->hwe->oa_unit != param->oa_unit) + goto err; + goto out; } -out: - if (!param->hwe || xe_oa_unit_id(param->hwe) != param->oa_unit_id) { - drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", - param->exec_q ? param->exec_q->class : -1, - param->engine_instance, param->oa_unit_id); - ret = -EINVAL; + + /* Else just get the first hwe attached to the oa unit */ + for_each_hw_engine(hwe, param->oa_unit->gt, id) { + if (hwe->oa_unit == param->oa_unit) { + param->hwe = hwe; + goto out; + } } + /* If we still didn't find a hwe, just get one with a valid oa_unit from the same gt */ + for_each_hw_engine(hwe, param->oa_unit->gt, id) { + if (!hwe->oa_unit) + continue; + + param->hwe = hwe; + goto out; + } +err: + drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", + param->exec_q ? param->exec_q->class : -1, + param->engine_instance, param->oa_unit->oa_unit_id); + ret = -EINVAL; +out: return ret; } @@ -2007,7 +2078,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f f = &oa->oa_formats[param.oa_format]; if (!param.oa_format || !f->size || - !engine_supports_oa_format(param.hwe, f->type)) { + !oa_unit_supports_oa_format(¶m, f->type)) { drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n", param.oa_format, f->type, f->size, param.hwe->class); ret = -EINVAL; @@ -2039,22 +2110,14 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f goto err_exec_q; } - ret = xe_oa_parse_syncs(oa, ¶m); - if (ret) - goto err_exec_q; - mutex_lock(¶m.hwe->gt->oa.gt_lock); ret = xe_oa_stream_open_ioctl_locked(oa, ¶m); mutex_unlock(¶m.hwe->gt->oa.gt_lock); if (ret < 0) - goto err_sync_cleanup; + goto err_exec_q; return ret; -err_sync_cleanup: - while (param.num_syncs--) - xe_sync_entry_cleanup(¶m.syncs[param.num_syncs]); - kfree(param.syncs); err_exec_q: if (param.exec_q) xe_exec_queue_put(param.exec_q); @@ -2155,6 +2218,7 @@ static const struct xe_mmio_range gen12_oa_mux_regs[] = { static const struct xe_mmio_range xe2_oa_mux_regs[] = { { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ + { .start = 0xB01C, .end = 0xB01C }, /* LNCF_MISC_CONFIG_REGISTER0 */ { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ { .start = 0xD0E0, .end = 0xD0F4 }, /* VISACTL */ { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ @@ -2343,11 +2407,13 @@ int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *fi goto sysfs_err; } - mutex_unlock(&oa->metrics_lock); + id = oa_config->id; - drm_dbg(&oa->xe->drm, "Added config %s id=%i\n", oa_config->uuid, oa_config->id); + drm_dbg(&oa->xe->drm, "Added config %s id=%i\n", oa_config->uuid, id); - return oa_config->id; + mutex_unlock(&oa->metrics_lock); + + return id; sysfs_err: mutex_unlock(&oa->metrics_lock); @@ -2448,20 +2514,38 @@ int xe_oa_register(struct xe_device *xe) static u32 num_oa_units_per_gt(struct xe_gt *gt) { - return 1; + if (xe_gt_is_main_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) + return 1; + else if (!IS_DGFX(gt_to_xe(gt))) + return XE_OAM_UNIT_SCMI_0 + 1; /* SAG + SCMI_0 */ + else + return XE_OAM_UNIT_SCMI_1 + 1; /* SAG + SCMI_0 + SCMI_1 */ } static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) { - if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) { - /* - * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices - * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA - */ - xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA); + if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) < 1270) + return XE_OA_UNIT_INVALID; + + xe_gt_WARN_ON(hwe->gt, xe_gt_is_main_type(hwe->gt)); + if (GRAPHICS_VER(gt_to_xe(hwe->gt)) < 20) return 0; - } + /* + * XE_OAM_UNIT_SAG has only GSCCS attached to it, but only on some platforms. Also + * GSCCS cannot be used to submit batches to program the OAM unit. Therefore we don't + * assign an OA unit to GSCCS. This means that XE_OAM_UNIT_SAG is exposed as an OA + * unit without attached engines. Fused off engines can also result in oa_unit's with + * num_engines == 0. OA streams can be opened on all OA units. + */ + else if (hwe->engine_id == XE_HW_ENGINE_GSCCS0) + return XE_OA_UNIT_INVALID; + else if (!IS_DGFX(gt_to_xe(hwe->gt))) + return XE_OAM_UNIT_SCMI_0; + else if (hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE) + return (hwe->instance / 2 & 0x1) + 1; + else if (hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE) + return (hwe->instance & 0x1) + 1; return XE_OA_UNIT_INVALID; } @@ -2475,6 +2559,7 @@ static u32 __hwe_oa_unit(struct xe_hw_engine *hwe) case XE_ENGINE_CLASS_VIDEO_DECODE: case XE_ENGINE_CLASS_VIDEO_ENHANCE: + case XE_ENGINE_CLASS_OTHER: return __hwe_oam_unit(hwe); default: @@ -2514,20 +2599,29 @@ static struct xe_oa_regs __oag_regs(void) static void __xe_oa_init_oa_units(struct xe_gt *gt) { - const u32 mtl_oa_base[] = { 0x13000 }; + /* Actual address is MEDIA_GT_GSI_OFFSET + oam_base_addr[i] */ + const u32 oam_base_addr[] = { + [XE_OAM_UNIT_SAG] = 0x13000, + [XE_OAM_UNIT_SCMI_0] = 0x14000, + [XE_OAM_UNIT_SCMI_1] = 0x14800, + }; int i, num_units = gt->oa.num_oa_units; for (i = 0; i < num_units; i++) { struct xe_oa_unit *u = >->oa.oa_unit[i]; - if (gt->info.type != XE_GT_TYPE_MEDIA) { + if (xe_gt_is_main_type(gt)) { u->regs = __oag_regs(); u->type = DRM_XE_OA_UNIT_TYPE_OAG; - } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { - u->regs = __oam_regs(mtl_oa_base[i]); - u->type = DRM_XE_OA_UNIT_TYPE_OAM; + } else { + xe_gt_assert(gt, GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270); + u->regs = __oam_regs(oam_base_addr[i]); + u->type = i == XE_OAM_UNIT_SAG && GRAPHICS_VER(gt_to_xe(gt)) >= 20 ? + DRM_XE_OA_UNIT_TYPE_OAM_SAG : DRM_XE_OA_UNIT_TYPE_OAM; } + u->gt = gt; + xe_mmio_write32(>->mmio, u->regs.oa_ctrl, 0); /* Ensure MMIO trigger remains disabled till there is a stream */ @@ -2560,10 +2654,6 @@ static int xe_oa_init_gt(struct xe_gt *gt) } } - /* - * Fused off engines can result in oa_unit's with num_engines == 0. These units - * will appear in OA unit query, but no OA streams can be opened on them. - */ gt->oa.num_oa_units = num_oa_units; gt->oa.oa_unit = u; @@ -2574,17 +2664,54 @@ static int xe_oa_init_gt(struct xe_gt *gt) return 0; } +static void xe_oa_print_gt_oa_units(struct xe_gt *gt) +{ + enum xe_hw_engine_id hwe_id; + struct xe_hw_engine *hwe; + struct xe_oa_unit *u; + char buf[256]; + int i, n; + + for (i = 0; i < gt->oa.num_oa_units; i++) { + u = >->oa.oa_unit[i]; + buf[0] = '\0'; + n = 0; + + for_each_hw_engine(hwe, gt, hwe_id) + if (xe_oa_unit_id(hwe) == u->oa_unit_id) + n += scnprintf(buf + n, sizeof(buf) - n, "%s ", hwe->name); + + xe_gt_dbg(gt, "oa_unit %d, type %d, Engines: %s\n", u->oa_unit_id, u->type, buf); + } +} + +static void xe_oa_print_oa_units(struct xe_oa *oa) +{ + struct xe_gt *gt; + int gt_id; + + for_each_gt(gt, oa->xe, gt_id) + xe_oa_print_gt_oa_units(gt); +} + static int xe_oa_init_oa_units(struct xe_oa *oa) { struct xe_gt *gt; int i, ret; + /* Needed for OAM implementation here */ + BUILD_BUG_ON(XE_OAM_UNIT_SAG != 0); + BUILD_BUG_ON(XE_OAM_UNIT_SCMI_0 != 1); + BUILD_BUG_ON(XE_OAM_UNIT_SCMI_1 != 2); + for_each_gt(gt, oa->xe, i) { ret = xe_oa_init_gt(gt); if (ret) return ret; } + xe_oa_print_oa_units(oa); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 52e33c37d5ee..cf080f412189 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -15,6 +15,8 @@ #include "regs/xe_reg_defs.h" #include "xe_hw_engine_types.h" +struct drm_syncobj; + #define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M enum xe_oa_report_header { @@ -95,6 +97,9 @@ struct xe_oa_unit { /** @oa_unit_id: identifier for the OA unit */ u16 oa_unit_id; + /** @gt: gt associated with the OA unit */ + struct xe_gt *gt; + /** @type: Type of OA unit - OAM, OAG etc. */ enum drm_xe_oa_unit_type type; @@ -182,6 +187,9 @@ struct xe_oa_stream { /** @gt: gt associated with the oa stream */ struct xe_gt *gt; + /** @oa_unit: oa unit for this stream */ + struct xe_oa_unit *oa_unit; + /** @hwe: hardware engine associated with this oa stream */ struct xe_hw_engine *hwe; @@ -242,6 +250,12 @@ struct xe_oa_stream { /** @xef: xe_file with which the stream was opened */ struct xe_file *xef; + /** @ufence_syncobj: User fence syncobj */ + struct drm_syncobj *ufence_syncobj; + + /** @ufence_timeline_value: User fence timeline value */ + u64 ufence_timeline_value; + /** @last_fence: fence to use in stream destroy when needed */ struct dma_fence *last_fence; @@ -250,5 +264,8 @@ struct xe_oa_stream { /** @syncs: syncs to wait on and to signal */ struct xe_sync_entry *syncs; + + /** @fw_ref: Forcewake reference */ + unsigned int fw_ref; }; #endif diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c new file mode 100644 index 000000000000..afb06598b6e1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pagefault.c @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/circ_buf.h> + +#include <drm/drm_exec.h> +#include <drm/drm_managed.h> + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt_printk.h" +#include "xe_gt_types.h" +#include "xe_gt_stats.h" +#include "xe_hw_engine.h" +#include "xe_pagefault.h" +#include "xe_pagefault_types.h" +#include "xe_svm.h" +#include "xe_trace_bo.h" +#include "xe_vm.h" + +/** + * DOC: Xe page faults + * + * Xe page faults are handled in two layers. The producer layer interacts with + * hardware or firmware to receive and parse faults into struct xe_pagefault, + * then forwards them to the consumer. The consumer layer services the faults + * (e.g., memory migration, page table updates) and acknowledges the result back + * to the producer, which then forwards the results to the hardware or firmware. + * The consumer uses a page fault queue sized to absorb all potential faults and + * a multi-threaded worker to process them. Multiple producers are supported, + * with a single shared consumer. + * + * xe_pagefault.c implements the consumer layer. + */ + +static int xe_pagefault_entry_size(void) +{ + /* + * Power of two alignment is not a hardware requirement, rather a + * software restriction which makes the math for page fault queue + * management simplier. + */ + return roundup_pow_of_two(sizeof(struct xe_pagefault)); +} + +static int xe_pagefault_begin(struct drm_exec *exec, struct xe_vma *vma, + struct xe_vram_region *vram, bool need_vram_move) +{ + struct xe_bo *bo = xe_vma_bo(vma); + struct xe_vm *vm = xe_vma_vm(vma); + int err; + + err = xe_vm_lock_vma(exec, vma); + if (err) + return err; + + if (!bo) + return 0; + + return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) : + xe_bo_validate(bo, vm, true, exec); +} + +static int xe_pagefault_handle_vma(struct xe_gt *gt, struct xe_vma *vma, + bool atomic) +{ + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_validation_ctx ctx; + struct drm_exec exec; + struct dma_fence *fence; + int err, needs_vram; + + lockdep_assert_held_write(&vm->lock); + + needs_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic); + if (needs_vram < 0 || (needs_vram && xe_vma_is_userptr(vma))) + return needs_vram < 0 ? needs_vram : -EACCES; + + xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1); + xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, + xe_vma_size(vma) / SZ_1K); + + trace_xe_vma_pagefault(vma); + + /* Check if VMA is valid, opportunistic check only */ + if (xe_vm_has_valid_gpu_mapping(tile, vma->tile_present, + vma->tile_invalidated) && !atomic) + return 0; + +retry_userptr: + if (xe_vma_is_userptr(vma) && + xe_vma_userptr_check_repin(to_userptr_vma(vma))) { + struct xe_userptr_vma *uvma = to_userptr_vma(vma); + + err = xe_vma_userptr_pin_pages(uvma); + if (err) + return err; + } + + /* Lock VM and BOs dma-resv */ + xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {}); + drm_exec_until_all_locked(&exec) { + err = xe_pagefault_begin(&exec, vma, tile->mem.vram, + needs_vram == 1); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + if (err) + goto unlock_dma_resv; + + /* Bind VMA only to the GT that has faulted */ + trace_xe_vma_pf_bind(vma); + xe_vm_set_validation_exec(vm, &exec); + fence = xe_vma_rebind(vm, vma, BIT(tile->id)); + xe_vm_set_validation_exec(vm, NULL); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + xe_validation_retry_on_oom(&ctx, &err); + goto unlock_dma_resv; + } + } + + dma_fence_wait(fence, false); + dma_fence_put(fence); + +unlock_dma_resv: + xe_validation_ctx_fini(&ctx); + if (err == -EAGAIN) + goto retry_userptr; + + return err; +} + +static bool +xe_pagefault_access_is_atomic(enum xe_pagefault_access_type access_type) +{ + return access_type == XE_PAGEFAULT_ACCESS_TYPE_ATOMIC; +} + +static struct xe_vm *xe_pagefault_asid_to_vm(struct xe_device *xe, u32 asid) +{ + struct xe_vm *vm; + + down_read(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, asid); + if (vm && xe_vm_in_fault_mode(vm)) + xe_vm_get(vm); + else + vm = ERR_PTR(-EINVAL); + up_read(&xe->usm.lock); + + return vm; +} + +static int xe_pagefault_service(struct xe_pagefault *pf) +{ + struct xe_gt *gt = pf->gt; + struct xe_device *xe = gt_to_xe(gt); + struct xe_vm *vm; + struct xe_vma *vma = NULL; + int err; + bool atomic; + + /* Producer flagged this fault to be nacked */ + if (pf->consumer.fault_level == XE_PAGEFAULT_LEVEL_NACK) + return -EFAULT; + + vm = xe_pagefault_asid_to_vm(xe, pf->consumer.asid); + if (IS_ERR(vm)) + return PTR_ERR(vm); + + /* + * TODO: Change to read lock? Using write lock for simplicity. + */ + down_write(&vm->lock); + + if (xe_vm_is_closed(vm)) { + err = -ENOENT; + goto unlock_vm; + } + + vma = xe_vm_find_vma_by_addr(vm, pf->consumer.page_addr); + if (!vma) { + err = -EINVAL; + goto unlock_vm; + } + + atomic = xe_pagefault_access_is_atomic(pf->consumer.access_type); + + if (xe_vma_is_cpu_addr_mirror(vma)) + err = xe_svm_handle_pagefault(vm, vma, gt, + pf->consumer.page_addr, atomic); + else + err = xe_pagefault_handle_vma(gt, vma, atomic); + +unlock_vm: + if (!err) + vm->usm.last_fault_vma = vma; + up_write(&vm->lock); + xe_vm_put(vm); + + return err; +} + +static bool xe_pagefault_queue_pop(struct xe_pagefault_queue *pf_queue, + struct xe_pagefault *pf) +{ + bool found_fault = false; + + spin_lock_irq(&pf_queue->lock); + if (pf_queue->tail != pf_queue->head) { + memcpy(pf, pf_queue->data + pf_queue->tail, sizeof(*pf)); + pf_queue->tail = (pf_queue->tail + xe_pagefault_entry_size()) % + pf_queue->size; + found_fault = true; + } + spin_unlock_irq(&pf_queue->lock); + + return found_fault; +} + +static void xe_pagefault_print(struct xe_pagefault *pf) +{ + xe_gt_dbg(pf->gt, "\n\tASID: %d\n" + "\tFaulted Address: 0x%08x%08x\n" + "\tFaultType: %d\n" + "\tAccessType: %d\n" + "\tFaultLevel: %d\n" + "\tEngineClass: %d %s\n" + "\tEngineInstance: %d\n", + pf->consumer.asid, + upper_32_bits(pf->consumer.page_addr), + lower_32_bits(pf->consumer.page_addr), + pf->consumer.fault_type, + pf->consumer.access_type, + pf->consumer.fault_level, + pf->consumer.engine_class, + xe_hw_engine_class_to_str(pf->consumer.engine_class), + pf->consumer.engine_instance); +} + +static void xe_pagefault_queue_work(struct work_struct *w) +{ + struct xe_pagefault_queue *pf_queue = + container_of(w, typeof(*pf_queue), worker); + struct xe_pagefault pf; + unsigned long threshold; + +#define USM_QUEUE_MAX_RUNTIME_MS 20 + threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); + + while (xe_pagefault_queue_pop(pf_queue, &pf)) { + int err; + + if (!pf.gt) /* Fault squashed during reset */ + continue; + + err = xe_pagefault_service(&pf); + if (err) { + xe_pagefault_print(&pf); + xe_gt_dbg(pf.gt, "Fault response: Unsuccessful %pe\n", + ERR_PTR(err)); + } + + pf.producer.ops->ack_fault(&pf, err); + + if (time_after(jiffies, threshold)) { + queue_work(gt_to_xe(pf.gt)->usm.pf_wq, w); + break; + } + } +#undef USM_QUEUE_MAX_RUNTIME_MS +} + +static int xe_pagefault_queue_init(struct xe_device *xe, + struct xe_pagefault_queue *pf_queue) +{ + struct xe_gt *gt; + int total_num_eus = 0; + u8 id; + + for_each_gt(gt, xe, id) { + xe_dss_mask_t all_dss; + int num_dss, num_eus; + + bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, + gt->fuse_topo.c_dss_mask, XE_MAX_DSS_FUSE_BITS); + + num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS); + num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, + XE_MAX_EU_FUSE_BITS) * num_dss; + + total_num_eus += num_eus; + } + + xe_assert(xe, total_num_eus); + + /* + * user can issue separate page faults per EU and per CS + * + * XXX: Multiplier required as compute UMD are getting PF queue errors + * without it. Follow on why this multiplier is required. + */ +#define PF_MULTIPLIER 8 + pf_queue->size = (total_num_eus + XE_NUM_HW_ENGINES) * + xe_pagefault_entry_size() * PF_MULTIPLIER; + pf_queue->size = roundup_pow_of_two(pf_queue->size); +#undef PF_MULTIPLIER + + drm_dbg(&xe->drm, "xe_pagefault_entry_size=%d, total_num_eus=%d, pf_queue->size=%u", + xe_pagefault_entry_size(), total_num_eus, pf_queue->size); + + spin_lock_init(&pf_queue->lock); + INIT_WORK(&pf_queue->worker, xe_pagefault_queue_work); + + pf_queue->data = drmm_kzalloc(&xe->drm, pf_queue->size, GFP_KERNEL); + if (!pf_queue->data) + return -ENOMEM; + + return 0; +} + +static void xe_pagefault_fini(void *arg) +{ + struct xe_device *xe = arg; + + destroy_workqueue(xe->usm.pf_wq); +} + +/** + * xe_pagefault_init() - Page fault init + * @xe: xe device instance + * + * Initialize Xe page fault state. Must be done after reading fuses. + * + * Return: 0 on Success, errno on failure + */ +int xe_pagefault_init(struct xe_device *xe) +{ + int err, i; + + if (!xe->info.has_usm) + return 0; + + xe->usm.pf_wq = alloc_workqueue("xe_page_fault_work_queue", + WQ_UNBOUND | WQ_HIGHPRI, + XE_PAGEFAULT_QUEUE_COUNT); + if (!xe->usm.pf_wq) + return -ENOMEM; + + for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i) { + err = xe_pagefault_queue_init(xe, xe->usm.pf_queue + i); + if (err) + goto err_out; + } + + return devm_add_action_or_reset(xe->drm.dev, xe_pagefault_fini, xe); + +err_out: + destroy_workqueue(xe->usm.pf_wq); + return err; +} + +static void xe_pagefault_queue_reset(struct xe_device *xe, struct xe_gt *gt, + struct xe_pagefault_queue *pf_queue) +{ + u32 i; + + /* Driver load failure guard / USM not enabled guard */ + if (!pf_queue->data) + return; + + /* Squash all pending faults on the GT */ + + spin_lock_irq(&pf_queue->lock); + for (i = pf_queue->tail; i != pf_queue->head; + i = (i + xe_pagefault_entry_size()) % pf_queue->size) { + struct xe_pagefault *pf = pf_queue->data + i; + + if (pf->gt == gt) + pf->gt = NULL; + } + spin_unlock_irq(&pf_queue->lock); +} + +/** + * xe_pagefault_reset() - Page fault reset for a GT + * @xe: xe device instance + * @gt: GT being reset + * + * Reset the Xe page fault state for a GT; that is, squash any pending faults on + * the GT. + */ +void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt) +{ + int i; + + for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i) + xe_pagefault_queue_reset(xe, gt, xe->usm.pf_queue + i); +} + +static bool xe_pagefault_queue_full(struct xe_pagefault_queue *pf_queue) +{ + lockdep_assert_held(&pf_queue->lock); + + return CIRC_SPACE(pf_queue->head, pf_queue->tail, pf_queue->size) <= + xe_pagefault_entry_size(); +} + +/** + * xe_pagefault_handler() - Page fault handler + * @xe: xe device instance + * @pf: Page fault + * + * Sink the page fault to a queue (i.e., a memory buffer) and queue a worker to + * service it. Safe to be called from IRQ or process context. Reclaim safe. + * + * Return: 0 on success, errno on failure + */ +int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf) +{ + struct xe_pagefault_queue *pf_queue = xe->usm.pf_queue + + (pf->consumer.asid % XE_PAGEFAULT_QUEUE_COUNT); + unsigned long flags; + bool full; + + spin_lock_irqsave(&pf_queue->lock, flags); + full = xe_pagefault_queue_full(pf_queue); + if (!full) { + memcpy(pf_queue->data + pf_queue->head, pf, sizeof(*pf)); + pf_queue->head = (pf_queue->head + xe_pagefault_entry_size()) % + pf_queue->size; + queue_work(xe->usm.pf_wq, &pf_queue->worker); + } else { + drm_warn(&xe->drm, + "PageFault Queue (%d) full, shouldn't be possible\n", + pf->consumer.asid % XE_PAGEFAULT_QUEUE_COUNT); + } + spin_unlock_irqrestore(&pf_queue->lock, flags); + + return full ? -ENOSPC : 0; +} diff --git a/drivers/gpu/drm/xe/xe_pagefault.h b/drivers/gpu/drm/xe/xe_pagefault.h new file mode 100644 index 000000000000..bd0cdf9ed37f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pagefault.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PAGEFAULT_H_ +#define _XE_PAGEFAULT_H_ + +struct xe_device; +struct xe_gt; +struct xe_pagefault; + +int xe_pagefault_init(struct xe_device *xe); + +void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt); + +int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf); + +#endif diff --git a/drivers/gpu/drm/xe/xe_pagefault_types.h b/drivers/gpu/drm/xe/xe_pagefault_types.h new file mode 100644 index 000000000000..d3b516407d60 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pagefault_types.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PAGEFAULT_TYPES_H_ +#define _XE_PAGEFAULT_TYPES_H_ + +#include <linux/workqueue.h> + +struct xe_gt; +struct xe_pagefault; + +/** enum xe_pagefault_access_type - Xe page fault access type */ +enum xe_pagefault_access_type { + /** @XE_PAGEFAULT_ACCESS_TYPE_READ: Read access type */ + XE_PAGEFAULT_ACCESS_TYPE_READ = 0, + /** @XE_PAGEFAULT_ACCESS_TYPE_WRITE: Write access type */ + XE_PAGEFAULT_ACCESS_TYPE_WRITE = 1, + /** @XE_PAGEFAULT_ACCESS_TYPE_ATOMIC: Atomic access type */ + XE_PAGEFAULT_ACCESS_TYPE_ATOMIC = 2, +}; + +/** enum xe_pagefault_type - Xe page fault type */ +enum xe_pagefault_type { + /** @XE_PAGEFAULT_TYPE_NOT_PRESENT: Not present */ + XE_PAGEFAULT_TYPE_NOT_PRESENT = 0, + /** @XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION: Write access violation */ + XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION = 1, + /** @XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION: Atomic access violation */ + XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION = 2, +}; + +/** struct xe_pagefault_ops - Xe pagefault ops (producer) */ +struct xe_pagefault_ops { + /** + * @ack_fault: Ack fault + * @pf: Page fault + * @err: Error state of fault + * + * Page fault producer receives acknowledgment from the consumer and + * sends the result to the HW/FW interface. + */ + void (*ack_fault)(struct xe_pagefault *pf, int err); +}; + +/** + * struct xe_pagefault - Xe page fault + * + * Generic page fault structure for communication between producer and consumer. + * Carefully sized to be 64 bytes. Upon a device page fault, the producer + * populates this structure, and the consumer copies it into the page-fault + * queue for deferred handling. + */ +struct xe_pagefault { + /** + * @gt: GT of fault + */ + struct xe_gt *gt; + /** + * @consumer: State for the software handling the fault. Populated by + * the producer and may be modified by the consumer to communicate + * information back to the producer upon fault acknowledgment. + */ + struct { + /** @consumer.page_addr: address of page fault */ + u64 page_addr; + /** @consumer.asid: address space ID */ + u32 asid; + /** + * @consumer.access_type: access type, u8 rather than enum to + * keep size compact + */ + u8 access_type; + /** + * @consumer.fault_type: fault type, u8 rather than enum to + * keep size compact + */ + u8 fault_type; +#define XE_PAGEFAULT_LEVEL_NACK 0xff /* Producer indicates nack fault */ + /** @consumer.fault_level: fault level */ + u8 fault_level; + /** @consumer.engine_class: engine class */ + u8 engine_class; + /** @consumer.engine_instance: engine instance */ + u8 engine_instance; + /** consumer.reserved: reserved bits for future expansion */ + u8 reserved[7]; + } consumer; + /** + * @producer: State for the producer (i.e., HW/FW interface). Populated + * by the producer and should not be modified—or even inspected—by the + * consumer, except for calling operations. + */ + struct { + /** @producer.private: private pointer */ + void *private; + /** @producer.ops: operations */ + const struct xe_pagefault_ops *ops; +#define XE_PAGEFAULT_PRODUCER_MSG_LEN_DW 4 + /** + * @producer.msg: page fault message, used by producer in fault + * acknowledgment to formulate response to HW/FW interface. + * Included in the page-fault message because the producer + * typically receives the fault in a context where memory cannot + * be allocated (e.g., atomic context or the reclaim path). + */ + u32 msg[XE_PAGEFAULT_PRODUCER_MSG_LEN_DW]; + } producer; +}; + +/** + * struct xe_pagefault_queue: Xe pagefault queue (consumer) + * + * Used to capture all device page faults for deferred processing. Size this + * queue to absorb the device’s worst-case number of outstanding faults. + */ +struct xe_pagefault_queue { + /** + * @data: Data in queue containing struct xe_pagefault, protected by + * @lock + */ + void *data; + /** @size: Size of queue in bytes */ + u32 size; + /** @head: Head pointer in bytes, moved by producer, protected by @lock */ + u32 head; + /** @tail: Tail pointer in bytes, moved by consumer, protected by @lock */ + u32 tail; + /** @lock: protects page fault queue */ + spinlock_t lock; + /** @worker: to process page faults */ + struct work_struct worker; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 30fdbdb9341e..68171cceea18 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -57,7 +57,7 @@ struct xe_pat_ops { int n_entries); void (*program_media)(struct xe_gt *gt, const struct xe_pat_table_entry table[], int n_entries); - void (*dump)(struct xe_gt *gt, struct drm_printer *p); + int (*dump)(struct xe_gt *gt, struct drm_printer *p); }; static const struct xe_pat_table_entry xelp_pat_table[] = { @@ -103,7 +103,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { * * Note: There is an implicit assumption in the driver that compression and * coh_1way+ are mutually exclusive. If this is ever not true then userptr - * and imported dma-buf from external device will have uncleared ccs state. + * and imported dma-buf from external device will have uncleared ccs state. See + * also xe_bo_needs_ccs_pages(). */ #define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \ { \ @@ -114,7 +115,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \ REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \ .coh_mode = (BUILD_BUG_ON_ZERO(__coh_mode && comp_en) || __coh_mode) ? \ - XE_COH_AT_LEAST_1WAY : XE_COH_NONE \ + XE_COH_AT_LEAST_1WAY : XE_COH_NONE, \ + .valid = 1 \ } static const struct xe_pat_table_entry xe2_pat_table[] = { @@ -153,6 +155,41 @@ static const struct xe_pat_table_entry xe2_pat_table[] = { static const struct xe_pat_table_entry xe2_pat_ats = XE2_PAT( 0, 0, 0, 0, 3, 3 ); static const struct xe_pat_table_entry xe2_pat_pta = XE2_PAT( 0, 0, 0, 0, 3, 0 ); +/* + * Xe3p_XPC PAT table uses the same layout as Xe2/Xe3, except that there's no + * option for compression. Also note that the "L3" and "L4" register fields + * actually control L2 and L3 cache respectively on this platform. + */ +#define XE3P_XPC_PAT(no_promote, l3clos, l3_policy, l4_policy, __coh_mode) \ + XE2_PAT(no_promote, 0, l3clos, l3_policy, l4_policy, __coh_mode) + +static const struct xe_pat_table_entry xe3p_xpc_pat_ats = XE3P_XPC_PAT( 0, 0, 0, 0, 3 ); +static const struct xe_pat_table_entry xe3p_xpc_pat_pta = XE3P_XPC_PAT( 0, 0, 0, 0, 0 ); + +static const struct xe_pat_table_entry xe3p_xpc_pat_table[] = { + [ 0] = XE3P_XPC_PAT( 0, 0, 0, 0, 0 ), + [ 1] = XE3P_XPC_PAT( 0, 0, 0, 0, 2 ), + [ 2] = XE3P_XPC_PAT( 0, 0, 0, 0, 3 ), + [ 3] = XE3P_XPC_PAT( 0, 0, 3, 3, 0 ), + [ 4] = XE3P_XPC_PAT( 0, 0, 3, 3, 2 ), + [ 5] = XE3P_XPC_PAT( 0, 0, 3, 0, 0 ), + [ 6] = XE3P_XPC_PAT( 0, 0, 3, 0, 2 ), + [ 7] = XE3P_XPC_PAT( 0, 0, 3, 0, 3 ), + [ 8] = XE3P_XPC_PAT( 0, 0, 0, 3, 0 ), + [ 9] = XE3P_XPC_PAT( 0, 0, 0, 3, 2 ), + [10] = XE3P_XPC_PAT( 0, 0, 0, 3, 3 ), + /* 11..22 are reserved; leave set to all 0's */ + [23] = XE3P_XPC_PAT( 0, 1, 0, 0, 0 ), + [24] = XE3P_XPC_PAT( 0, 1, 0, 0, 2 ), + [25] = XE3P_XPC_PAT( 0, 1, 0, 0, 3 ), + [26] = XE3P_XPC_PAT( 0, 2, 0, 0, 0 ), + [27] = XE3P_XPC_PAT( 0, 2, 0, 0, 2 ), + [28] = XE3P_XPC_PAT( 0, 2, 0, 0, 3 ), + [29] = XE3P_XPC_PAT( 0, 3, 0, 0, 0 ), + [30] = XE3P_XPC_PAT( 0, 3, 0, 0, 2 ), + [31] = XE3P_XPC_PAT( 0, 3, 0, 0, 3 ), +}; + u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index) { WARN_ON(pat_index >= xe->pat.n_entries); @@ -162,24 +199,38 @@ u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index) static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], int n_entries) { + struct xe_device *xe = gt_to_xe(gt); + for (int i = 0; i < n_entries; i++) { struct xe_reg reg = XE_REG(_PAT_INDEX(i)); xe_mmio_write32(>->mmio, reg, table[i].value); } + + if (xe->pat.pat_ats) + xe_mmio_write32(>->mmio, XE_REG(_PAT_ATS), xe->pat.pat_ats->value); + if (xe->pat.pat_pta) + xe_mmio_write32(>->mmio, XE_REG(_PAT_PTA), xe->pat.pat_pta->value); } static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry table[], int n_entries) { + struct xe_device *xe = gt_to_xe(gt); + for (int i = 0; i < n_entries; i++) { struct xe_reg_mcr reg_mcr = XE_REG_MCR(_PAT_INDEX(i)); xe_gt_mcr_multicast_write(gt, reg_mcr, table[i].value); } + + if (xe->pat.pat_ats) + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe->pat.pat_ats->value); + if (xe->pat.pat_pta) + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe->pat.pat_pta->value); } -static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) +static int xelp_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); unsigned int fw_ref; @@ -187,7 +238,7 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (!fw_ref) - return; + return -ETIMEDOUT; drm_printf(p, "PAT table:\n"); @@ -200,6 +251,7 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) } xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } static const struct xe_pat_ops xelp_pat_ops = { @@ -207,7 +259,7 @@ static const struct xe_pat_ops xelp_pat_ops = { .dump = xelp_dump, }; -static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) +static int xehp_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); unsigned int fw_ref; @@ -215,7 +267,7 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (!fw_ref) - return; + return -ETIMEDOUT; drm_printf(p, "PAT table:\n"); @@ -230,6 +282,7 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) } xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } static const struct xe_pat_ops xehp_pat_ops = { @@ -237,7 +290,7 @@ static const struct xe_pat_ops xehp_pat_ops = { .dump = xehp_dump, }; -static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) +static int xehpc_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); unsigned int fw_ref; @@ -245,7 +298,7 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (!fw_ref) - return; + return -ETIMEDOUT; drm_printf(p, "PAT table:\n"); @@ -258,6 +311,7 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) } xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } static const struct xe_pat_ops xehpc_pat_ops = { @@ -265,7 +319,7 @@ static const struct xe_pat_ops xehpc_pat_ops = { .dump = xehpc_dump, }; -static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) +static int xelpg_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); unsigned int fw_ref; @@ -273,7 +327,7 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (!fw_ref) - return; + return -ETIMEDOUT; drm_printf(p, "PAT table:\n"); @@ -291,6 +345,7 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) } xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } /* @@ -303,27 +358,7 @@ static const struct xe_pat_ops xelpg_pat_ops = { .dump = xelpg_dump, }; -static void xe2lpg_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], - int n_entries) -{ - program_pat_mcr(gt, table, n_entries); - xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe2_pat_ats.value); - - if (IS_DGFX(gt_to_xe(gt))) - xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe2_pat_pta.value); -} - -static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], - int n_entries) -{ - program_pat(gt, table, n_entries); - xe_mmio_write32(>->mmio, XE_REG(_PAT_ATS), xe2_pat_ats.value); - - if (IS_DGFX(gt_to_xe(gt))) - xe_mmio_write32(>->mmio, XE_REG(_PAT_PTA), xe2_pat_pta.value); -} - -static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) +static int xe2_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); unsigned int fw_ref; @@ -332,9 +367,9 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (!fw_ref) - return; + return -ETIMEDOUT; - drm_printf(p, "PAT table:\n"); + drm_printf(p, "PAT table: (* = reserved entry)\n"); for (i = 0; i < xe->pat.n_entries; i++) { if (xe_gt_is_media_type(gt)) @@ -342,14 +377,14 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) else pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); - drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ] (%#8x)\n", i, + drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ] (%#8x)%s\n", i, !!(pat & XE2_NO_PROMOTE), !!(pat & XE2_COMP_EN), REG_FIELD_GET(XE2_L3_CLOS, pat), REG_FIELD_GET(XE2_L3_POLICY, pat), REG_FIELD_GET(XE2_L4_POLICY, pat), REG_FIELD_GET(XE2_COH_MODE, pat), - pat); + pat, xe->pat.table[i].valid ? "" : " *"); } /* @@ -372,19 +407,82 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) pat); xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } static const struct xe_pat_ops xe2_pat_ops = { - .program_graphics = xe2lpg_program_pat, - .program_media = xe2lpm_program_pat, + .program_graphics = program_pat_mcr, + .program_media = program_pat, .dump = xe2_dump, }; +static int xe3p_xpc_dump(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + unsigned int fw_ref; + u32 pat; + int i; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; + + drm_printf(p, "PAT table: (* = reserved entry)\n"); + + for (i = 0; i < xe->pat.n_entries; i++) { + pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); + + drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u ] (%#8x)%s\n", i, + !!(pat & XE2_NO_PROMOTE), + REG_FIELD_GET(XE2_L3_CLOS, pat), + REG_FIELD_GET(XE2_L3_POLICY, pat), + REG_FIELD_GET(XE2_L4_POLICY, pat), + REG_FIELD_GET(XE2_COH_MODE, pat), + pat, xe->pat.table[i].valid ? "" : " *"); + } + + /* + * Also print PTA_MODE, which describes how the hardware accesses + * PPGTT entries. + */ + pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA)); + + drm_printf(p, "Page Table Access:\n"); + drm_printf(p, "PTA_MODE= [ %u, %u, %u, %u, %u ] (%#8x)\n", + !!(pat & XE2_NO_PROMOTE), + REG_FIELD_GET(XE2_L3_CLOS, pat), + REG_FIELD_GET(XE2_L3_POLICY, pat), + REG_FIELD_GET(XE2_L4_POLICY, pat), + REG_FIELD_GET(XE2_COH_MODE, pat), + pat); + + xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; +} + +static const struct xe_pat_ops xe3p_xpc_pat_ops = { + .program_graphics = program_pat_mcr, + .program_media = program_pat, + .dump = xe3p_xpc_dump, +}; + void xe_pat_init_early(struct xe_device *xe) { - if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) { + if (GRAPHICS_VERx100(xe) == 3511) { + xe->pat.ops = &xe3p_xpc_pat_ops; + xe->pat.table = xe3p_xpc_pat_table; + xe->pat.pat_ats = &xe3p_xpc_pat_ats; + xe->pat.pat_pta = &xe3p_xpc_pat_pta; + xe->pat.n_entries = ARRAY_SIZE(xe3p_xpc_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 3; + xe->pat.idx[XE_CACHE_WT] = 3; /* N/A (no display); use UC */ + xe->pat.idx[XE_CACHE_WB] = 2; + } else if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) { xe->pat.ops = &xe2_pat_ops; xe->pat.table = xe2_pat_table; + xe->pat.pat_ats = &xe2_pat_ats; + if (IS_DGFX(xe)) + xe->pat.pat_pta = &xe2_pat_pta; /* Wa_16023588340. XXX: Should use XE_WA */ if (GRAPHICS_VERx100(xe) == 2001) @@ -464,12 +562,19 @@ void xe_pat_init(struct xe_gt *gt) xe->pat.ops->program_graphics(gt, xe->pat.table, xe->pat.n_entries); } -void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p) +/** + * xe_pat_dump() - Dump GT PAT table into a drm printer. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); if (!xe->pat.ops) - return; + return -EOPNOTSUPP; - xe->pat.ops->dump(gt, p); + return xe->pat.ops->dump(gt, p); } diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h index fa0dfbe525cd..05dae03a5f54 100644 --- a/drivers/gpu/drm/xe/xe_pat.h +++ b/drivers/gpu/drm/xe/xe_pat.h @@ -29,6 +29,11 @@ struct xe_pat_table_entry { #define XE_COH_NONE 1 #define XE_COH_AT_LEAST_1WAY 2 u16 coh_mode; + + /** + * @valid: Set to 1 if the entry is valid, 0 if it's reserved. + */ + u16 valid; }; /** @@ -43,12 +48,7 @@ void xe_pat_init_early(struct xe_device *xe); */ void xe_pat_init(struct xe_gt *gt); -/** - * xe_pat_dump - Dump PAT table - * @gt: GT structure - * @p: Printer to dump info to - */ -void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p); +int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p); /** * xe_pat_index_get_coh_mode - Extract the coherency mode for the given diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 024175cfe61e..9c9ea10d994c 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -17,6 +17,8 @@ #include "display/xe_display.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" +#include "xe_configfs.h" #include "xe_device.h" #include "xe_drv.h" #include "xe_gt.h" @@ -28,6 +30,7 @@ #include "xe_pci_sriov.h" #include "xe_pci_types.h" #include "xe_pm.h" +#include "xe_printk.h" #include "xe_sriov.h" #include "xe_step.h" #include "xe_survivability_mode.h" @@ -38,42 +41,6 @@ enum toggle_d3cold { D3COLD_ENABLE, }; -struct xe_subplatform_desc { - enum xe_subplatform subplatform; - const char *name; - const u16 *pciidlist; -}; - -struct xe_device_desc { - /* Should only ever be set for platforms without GMD_ID */ - const struct xe_ip *pre_gmdid_graphics_ip; - /* Should only ever be set for platforms without GMD_ID */ - const struct xe_ip *pre_gmdid_media_ip; - - const char *platform_name; - const struct xe_subplatform_desc *subplatforms; - - enum xe_platform platform; - - u8 dma_mask_size; - u8 max_remote_tiles:2; - - u8 require_force_probe:1; - u8 is_dgfx:1; - - u8 has_display:1; - u8 has_fan_control:1; - u8 has_heci_gscfi:1; - u8 has_heci_cscfi:1; - u8 has_llc:1; - u8 has_pxp:1; - u8 has_sriov:1; - u8 needs_scratch:1; - u8 skip_guc_pc:1; - u8 skip_mtcfg:1; - u8 skip_pcode:1; -}; - __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); @@ -85,15 +52,10 @@ __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); static const struct xe_graphics_desc graphics_xelp = { .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0), - - .va_bits = 48, - .vm_max_level = 3, }; #define XE_HP_FEATURES \ - .has_range_tlb_invalidation = true, \ - .va_bits = 48, \ - .vm_max_level = 3 + .has_range_tlb_inval = true static const struct xe_graphics_desc graphics_xehpg = { .hw_engine_mask = @@ -102,9 +64,6 @@ static const struct xe_graphics_desc graphics_xehpg = { BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), XE_HP_FEATURES, - .vram_flags = XE_VRAM_FLAGS_NEED64K, - - .has_flat_ccs = 1, }; static const struct xe_graphics_desc graphics_xehpc = { @@ -118,9 +77,6 @@ static const struct xe_graphics_desc graphics_xehpc = { BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), XE_HP_FEATURES, - .va_bits = 57, - .vm_max_level = 4, - .vram_flags = XE_VRAM_FLAGS_NEED64K, .has_asid = 1, .has_atomic_enable_pte_bit = 1, @@ -138,13 +94,9 @@ static const struct xe_graphics_desc graphics_xelpg = { #define XE2_GFX_FEATURES \ .has_asid = 1, \ .has_atomic_enable_pte_bit = 1, \ - .has_flat_ccs = 1, \ - .has_indirect_ring_state = 1, \ - .has_range_tlb_invalidation = 1, \ + .has_range_tlb_inval = 1, \ .has_usm = 1, \ .has_64bit_timestamp = 1, \ - .va_bits = 48, \ - .vm_max_level = 4, \ .hw_engine_mask = \ BIT(XE_HW_ENGINE_RCS0) | \ BIT(XE_HW_ENGINE_BCS8) | BIT(XE_HW_ENGINE_BCS0) | \ @@ -154,6 +106,13 @@ static const struct xe_graphics_desc graphics_xe2 = { XE2_GFX_FEATURES, }; +static const struct xe_graphics_desc graphics_xe3p_xpc = { + XE2_GFX_FEATURES, + .hw_engine_mask = + GENMASK(XE_HW_ENGINE_BCS8, XE_HW_ENGINE_BCS1) | + GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0), +}; + static const struct xe_media_desc media_xem = { .hw_engine_mask = GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) | @@ -179,9 +138,14 @@ static const struct xe_ip graphics_ips[] = { { 1271, "Xe_LPG", &graphics_xelpg }, { 1274, "Xe_LPG+", &graphics_xelpg }, { 2001, "Xe2_HPG", &graphics_xe2 }, + { 2002, "Xe2_HPG", &graphics_xe2 }, { 2004, "Xe2_LPG", &graphics_xe2 }, { 3000, "Xe3_LPG", &graphics_xe2 }, { 3001, "Xe3_LPG", &graphics_xe2 }, + { 3003, "Xe3_LPG", &graphics_xe2 }, + { 3004, "Xe3_LPG", &graphics_xe2 }, + { 3005, "Xe3_LPG", &graphics_xe2 }, + { 3511, "Xe3p_XPC", &graphics_xe3p_xpc }, }; /* Pre-GMDID Media IPs */ @@ -194,6 +158,9 @@ static const struct xe_ip media_ips[] = { { 1301, "Xe2_HPM", &media_xelpmp }, { 2000, "Xe2_LPM", &media_xelpmp }, { 3000, "Xe3_LPM", &media_xelpmp }, + { 3002, "Xe3_LPM", &media_xelpmp }, + { 3500, "Xe3p_LPM", &media_xelpmp }, + { 3503, "Xe3p_HPM", &media_xelpmp }, }; static const struct xe_device_desc tgl_desc = { @@ -203,7 +170,11 @@ static const struct xe_device_desc tgl_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .has_sriov = true, + .max_gt_per_tile = 1, .require_force_probe = true, + .va_bits = 48, + .vm_max_level = 3, }; static const struct xe_device_desc rkl_desc = { @@ -213,7 +184,10 @@ static const struct xe_device_desc rkl_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, + .va_bits = 48, + .vm_max_level = 3, }; static const u16 adls_rpls_ids[] = { INTEL_RPLS_IDS(NOP), 0 }; @@ -225,11 +199,15 @@ static const struct xe_device_desc adl_s_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .has_sriov = true, + .max_gt_per_tile = 1, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids }, {}, }, + .va_bits = 48, + .vm_max_level = 3, }; static const u16 adlp_rplu_ids[] = { INTEL_RPLU_IDS(NOP), 0 }; @@ -241,11 +219,15 @@ static const struct xe_device_desc adl_p_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .has_sriov = true, + .max_gt_per_tile = 1, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids }, {}, }, + .va_bits = 48, + .vm_max_level = 3, }; static const struct xe_device_desc adl_n_desc = { @@ -255,7 +237,11 @@ static const struct xe_device_desc adl_n_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .has_sriov = true, + .max_gt_per_tile = 1, .require_force_probe = true, + .va_bits = 48, + .vm_max_level = 3, }; #define DGFX_FEATURES \ @@ -268,8 +254,12 @@ static const struct xe_device_desc dg1_desc = { PLATFORM(DG1), .dma_mask_size = 39, .has_display = true, + .has_gsc_nvm = 1, .has_heci_gscfi = 1, + .max_gt_per_tile = 1, .require_force_probe = true, + .va_bits = 48, + .vm_max_level = 3, }; static const u16 dg2_g10_ids[] = { INTEL_DG2_G10_IDS(NOP), INTEL_ATS_M150_IDS(NOP), 0 }; @@ -279,33 +269,42 @@ static const u16 dg2_g12_ids[] = { INTEL_DG2_G12_IDS(NOP), 0 }; #define DG2_FEATURES \ DGFX_FEATURES, \ PLATFORM(DG2), \ + .has_flat_ccs = 1, \ + .has_gsc_nvm = 1, \ .has_heci_gscfi = 1, \ .subplatforms = (const struct xe_subplatform_desc[]) { \ { XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \ { XE_SUBPLATFORM_DG2_G11, "G11", dg2_g11_ids }, \ { XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \ { } \ - } + }, \ + .va_bits = 48, \ + .vm_max_level = 3, \ + .vram_flags = XE_VRAM_FLAGS_NEED64K static const struct xe_device_desc ats_m_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xehpg, .pre_gmdid_media_ip = &media_ip_xehpm, .dma_mask_size = 46, + .max_gt_per_tile = 1, .require_force_probe = true, DG2_FEATURES, .has_display = false, + .has_sriov = true, }; static const struct xe_device_desc dg2_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xehpg, .pre_gmdid_media_ip = &media_ip_xehpm, .dma_mask_size = 46, + .max_gt_per_tile = 1, .require_force_probe = true, DG2_FEATURES, .has_display = true, .has_fan_control = true, + .has_mbx_power_limits = false, }; static const __maybe_unused struct xe_device_desc pvc_desc = { @@ -314,9 +313,15 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { PLATFORM(PVC), .dma_mask_size = 52, .has_display = false, + .has_gsc_nvm = 1, .has_heci_gscfi = 1, + .max_gt_per_tile = 1, .max_remote_tiles = 1, .require_force_probe = true, + .va_bits = 57, + .vm_max_level = 4, + .vram_flags = XE_VRAM_FLAGS_NEED64K, + .has_mbx_power_limits = false, }; static const struct xe_device_desc mtl_desc = { @@ -326,33 +331,87 @@ static const struct xe_device_desc mtl_desc = { .dma_mask_size = 46, .has_display = true, .has_pxp = true, + .max_gt_per_tile = 2, + .va_bits = 48, + .vm_max_level = 3, }; static const struct xe_device_desc lnl_desc = { PLATFORM(LUNARLAKE), .dma_mask_size = 46, .has_display = true, + .has_flat_ccs = 1, .has_pxp = true, + .has_mem_copy_instr = true, + .max_gt_per_tile = 2, .needs_scratch = true, + .va_bits = 48, + .vm_max_level = 4, }; +static const u16 bmg_g21_ids[] = { INTEL_BMG_G21_IDS(NOP), 0 }; + static const struct xe_device_desc bmg_desc = { DGFX_FEATURES, PLATFORM(BATTLEMAGE), .dma_mask_size = 46, .has_display = true, .has_fan_control = true, + .has_flat_ccs = 1, + .has_mbx_power_limits = true, + .has_gsc_nvm = 1, .has_heci_cscfi = 1, + .has_late_bind = true, + .has_sriov = true, + .has_mem_copy_instr = true, + .max_gt_per_tile = 2, .needs_scratch = true, + .subplatforms = (const struct xe_subplatform_desc[]) { + { XE_SUBPLATFORM_BATTLEMAGE_G21, "G21", bmg_g21_ids }, + { } + }, + .va_bits = 48, + .vm_max_level = 4, }; static const struct xe_device_desc ptl_desc = { PLATFORM(PANTHERLAKE), .dma_mask_size = 46, .has_display = true, + .has_flat_ccs = 1, .has_sriov = true, - .require_force_probe = true, + .has_mem_copy_instr = true, + .max_gt_per_tile = 2, .needs_scratch = true, + .needs_shared_vf_gt_wq = true, + .va_bits = 48, + .vm_max_level = 4, +}; + +static const struct xe_device_desc nvls_desc = { + PLATFORM(NOVALAKE_S), + .dma_mask_size = 46, + .has_display = true, + .has_flat_ccs = 1, + .has_mem_copy_instr = true, + .max_gt_per_tile = 2, + .require_force_probe = true, + .va_bits = 48, + .vm_max_level = 4, +}; + +static const struct xe_device_desc cri_desc = { + DGFX_FEATURES, + PLATFORM(CRESCENTISLAND), + .dma_mask_size = 52, + .has_display = false, + .has_flat_ccs = false, + .has_mbx_power_limits = true, + .has_sriov = true, + .max_gt_per_tile = 2, + .require_force_probe = true, + .va_bits = 57, + .vm_max_level = 4, }; #undef PLATFORM @@ -381,6 +440,9 @@ static const struct pci_device_id pciidlist[] = { INTEL_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc), INTEL_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc), INTEL_PTL_IDS(INTEL_VGA_DEVICE, &ptl_desc), + INTEL_WCL_IDS(INTEL_VGA_DEVICE, &ptl_desc), + INTEL_NVLS_IDS(INTEL_VGA_DEVICE, &nvls_desc), + INTEL_CRI_IDS(INTEL_PCI_DEVICE, &cri_desc), { } }; MODULE_DEVICE_TABLE(pci, pciidlist); @@ -453,7 +515,7 @@ enum xe_gmdid_type { GMDID_MEDIA }; -static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid) +static int read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid) { struct xe_mmio *mmio = xe_root_tile_mmio(xe); struct xe_reg gmdid_reg = GMD_ID; @@ -462,22 +524,24 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, KUNIT_STATIC_STUB_REDIRECT(read_gmdid, xe, type, ver, revid); if (IS_SRIOV_VF(xe)) { - struct xe_gt *gt = xe_root_mmio_gt(xe); - /* * To get the value of the GMDID register, VFs must obtain it * from the GuC using MMIO communication. * - * Note that at this point the xe_gt is not fully uninitialized - * and only basic access to MMIO registers is possible. To use - * our existing GuC communication functions we must perform at - * least basic xe_gt and xe_guc initialization. - * - * Since to obtain the value of GMDID_MEDIA we need to use the - * media GuC, temporarily tweak the gt type. + * Note that at this point the GTs are not initialized and only + * tile-level access to MMIO registers is possible. To use our + * existing GuC communication functions we must create a dummy + * GT structure and perform at least basic xe_gt and xe_guc + * initialization. */ - xe_gt_assert(gt, gt->info.type == XE_GT_TYPE_UNINITIALIZED); + struct xe_gt *gt __free(kfree) = NULL; + int err; + gt = kzalloc(sizeof(*gt), GFP_KERNEL); + if (!gt) + return -ENOMEM; + + gt->tile = &xe->tiles[0]; if (type == GMDID_MEDIA) { gt->info.id = 1; gt->info.type = XE_GT_TYPE_MEDIA; @@ -489,15 +553,11 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, xe_gt_mmio_init(gt); xe_guc_comm_init_early(>->uc.guc); - /* Don't bother with GMDID if failed to negotiate the GuC ABI */ - val = xe_gt_sriov_vf_bootstrap(gt) ? 0 : xe_gt_sriov_vf_gmdid(gt); + err = xe_gt_sriov_vf_bootstrap(gt); + if (err) + return err; - /* - * Only undo xe_gt.info here, the remaining changes made above - * will be overwritten as part of the regular initialization. - */ - gt->info.id = 0; - gt->info.type = XE_GT_TYPE_UNINITIALIZED; + val = xe_gt_sriov_vf_gmdid(gt); } else { /* * GMD_ID is a GT register, but at this point in the driver @@ -515,55 +575,71 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, *ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 + REG_FIELD_GET(GMD_ID_RELEASE_MASK, val); *revid = REG_FIELD_GET(GMD_ID_REVID, val); + + return 0; +} + +static const struct xe_ip *find_graphics_ip(unsigned int verx100) +{ + KUNIT_STATIC_STUB_REDIRECT(find_graphics_ip, verx100); + + for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++) + if (graphics_ips[i].verx100 == verx100) + return &graphics_ips[i]; + return NULL; +} + +static const struct xe_ip *find_media_ip(unsigned int verx100) +{ + KUNIT_STATIC_STUB_REDIRECT(find_media_ip, verx100); + + for (int i = 0; i < ARRAY_SIZE(media_ips); i++) + if (media_ips[i].verx100 == verx100) + return &media_ips[i]; + return NULL; } /* * Read IP version from hardware and select graphics/media IP descriptors * based on the result. */ -static void handle_gmdid(struct xe_device *xe, - const struct xe_ip **graphics_ip, - const struct xe_ip **media_ip, - u32 *graphics_revid, - u32 *media_revid) +static int handle_gmdid(struct xe_device *xe, + const struct xe_ip **graphics_ip, + const struct xe_ip **media_ip, + u32 *graphics_revid, + u32 *media_revid) { u32 ver; + int ret; *graphics_ip = NULL; *media_ip = NULL; - read_gmdid(xe, GMDID_GRAPHICS, &ver, graphics_revid); - - for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++) { - if (ver == graphics_ips[i].verx100) { - *graphics_ip = &graphics_ips[i]; - - break; - } - } + ret = read_gmdid(xe, GMDID_GRAPHICS, &ver, graphics_revid); + if (ret) + return ret; + *graphics_ip = find_graphics_ip(ver); if (!*graphics_ip) { drm_err(&xe->drm, "Hardware reports unknown graphics version %u.%02u\n", ver / 100, ver % 100); } - read_gmdid(xe, GMDID_MEDIA, &ver, media_revid); + ret = read_gmdid(xe, GMDID_MEDIA, &ver, media_revid); + if (ret) + return ret; + /* Media may legitimately be fused off / not present */ if (ver == 0) - return; - - for (int i = 0; i < ARRAY_SIZE(media_ips); i++) { - if (ver == media_ips[i].verx100) { - *media_ip = &media_ips[i]; - - break; - } - } + return 0; + *media_ip = find_media_ip(ver); if (!*media_ip) { drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n", ver / 100, ver % 100); } + + return 0; } /* @@ -582,21 +658,37 @@ static int xe_info_init_early(struct xe_device *xe, subplatform_desc->subplatform : XE_SUBPLATFORM_NONE; xe->info.dma_mask_size = desc->dma_mask_size; + xe->info.va_bits = desc->va_bits; + xe->info.vm_max_level = desc->vm_max_level; + xe->info.vram_flags = desc->vram_flags; + xe->info.is_dgfx = desc->is_dgfx; xe->info.has_fan_control = desc->has_fan_control; + /* runtime fusing may force flat_ccs to disabled later */ + xe->info.has_flat_ccs = desc->has_flat_ccs; + xe->info.has_mbx_power_limits = desc->has_mbx_power_limits; + xe->info.has_gsc_nvm = desc->has_gsc_nvm; xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.has_heci_cscfi = desc->has_heci_cscfi; + xe->info.has_late_bind = desc->has_late_bind; xe->info.has_llc = desc->has_llc; xe->info.has_pxp = desc->has_pxp; - xe->info.has_sriov = desc->has_sriov; + xe->info.has_sriov = xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev)) && + desc->has_sriov; + xe->info.has_mem_copy_instr = desc->has_mem_copy_instr; xe->info.skip_guc_pc = desc->skip_guc_pc; xe->info.skip_mtcfg = desc->skip_mtcfg; xe->info.skip_pcode = desc->skip_pcode; xe->info.needs_scratch = desc->needs_scratch; + xe->info.needs_shared_vf_gt_wq = desc->needs_shared_vf_gt_wq; xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && xe_modparam.probe_display && desc->has_display; + + xe_assert(xe, desc->max_gt_per_tile > 0); + xe_assert(xe, desc->max_gt_per_tile <= XE_MAX_GT_PER_TILE); + xe->info.max_gt_per_tile = desc->max_gt_per_tile; xe->info.tile_count = 1 + desc->max_remote_tiles; err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); @@ -607,6 +699,101 @@ static int xe_info_init_early(struct xe_device *xe, } /* + * Possibly override number of tile based on configuration register. + */ +static void xe_info_probe_tile_count(struct xe_device *xe) +{ + struct xe_mmio *mmio; + u8 tile_count; + u32 mtcfg; + + KUNIT_STATIC_STUB_REDIRECT(xe_info_probe_tile_count, xe); + + /* + * Probe for tile count only for platforms that support multiple + * tiles. + */ + if (xe->info.tile_count == 1) + return; + + if (xe->info.skip_mtcfg) + return; + + mmio = xe_root_tile_mmio(xe); + + /* + * Although the per-tile mmio regs are not yet initialized, this + * is fine as it's going to the root tile's mmio, that's + * guaranteed to be initialized earlier in xe_mmio_probe_early() + */ + mtcfg = xe_mmio_read32(mmio, XEHP_MTCFG_ADDR); + tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; + + if (tile_count < xe->info.tile_count) { + drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", + xe->info.tile_count, tile_count); + xe->info.tile_count = tile_count; + } +} + +static struct xe_gt *alloc_primary_gt(struct xe_tile *tile, + const struct xe_graphics_desc *graphics_desc, + const struct xe_media_desc *media_desc) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_gt *gt; + + if (!xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev))) { + xe_info(xe, "Primary GT disabled via configfs\n"); + return NULL; + } + + gt = xe_gt_alloc(tile); + if (IS_ERR(gt)) + return gt; + + gt->info.type = XE_GT_TYPE_MAIN; + gt->info.id = tile->id * xe->info.max_gt_per_tile; + gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; + gt->info.engine_mask = graphics_desc->hw_engine_mask; + + /* + * Before media version 13, the media IP was part of the primary GT + * so we need to add the media engines to the primary GT's engine list. + */ + if (MEDIA_VER(xe) < 13 && media_desc) + gt->info.engine_mask |= media_desc->hw_engine_mask; + + return gt; +} + +static struct xe_gt *alloc_media_gt(struct xe_tile *tile, + const struct xe_media_desc *media_desc) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_gt *gt; + + if (!xe_configfs_media_gt_allowed(to_pci_dev(xe->drm.dev))) { + xe_info(xe, "Media GT disabled via configfs\n"); + return NULL; + } + + if (MEDIA_VER(xe) < 13 || !media_desc) + return NULL; + + gt = xe_gt_alloc(tile); + if (IS_ERR(gt)) + return gt; + + gt->info.type = XE_GT_TYPE_MEDIA; + gt->info.id = tile->id * xe->info.max_gt_per_tile + 1; + gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state; + gt->info.engine_mask = media_desc->hw_engine_mask; + + return gt; +} + +/* * Initialize device info content that does require knowledge about * graphics / media IP version. * Make sure that GT / tile structures allocated by the driver match the data @@ -622,6 +809,7 @@ static int xe_info_init(struct xe_device *xe, const struct xe_media_desc *media_desc; struct xe_tile *tile; struct xe_gt *gt; + int ret; u8 id; /* @@ -637,8 +825,11 @@ static int xe_info_init(struct xe_device *xe, xe->info.step = xe_step_pre_gmdid_get(xe); } else { xe_assert(xe, !desc->pre_gmdid_media_ip); - handle_gmdid(xe, &graphics_ip, &media_ip, - &graphics_gmdid_revid, &media_gmdid_revid); + ret = handle_gmdid(xe, &graphics_ip, &media_ip, + &graphics_gmdid_revid, &media_gmdid_revid); + if (ret) + return ret; + xe->info.step = xe_step_gmdid_get(xe, graphics_gmdid_revid, media_gmdid_revid); @@ -665,21 +856,17 @@ static int xe_info_init(struct xe_device *xe, media_desc = NULL; } - xe->info.vram_flags = graphics_desc->vram_flags; - xe->info.va_bits = graphics_desc->va_bits; - xe->info.vm_max_level = graphics_desc->vm_max_level; xe->info.has_asid = graphics_desc->has_asid; xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit; if (xe->info.platform != XE_PVC) xe->info.has_device_atomics_on_smem = 1; - /* Runtime detection may change this later */ - xe->info.has_flat_ccs = graphics_desc->has_flat_ccs; - - xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; + xe->info.has_range_tlb_inval = graphics_desc->has_range_tlb_inval; xe->info.has_usm = graphics_desc->has_usm; xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp; + xe_info_probe_tile_count(xe); + for_each_remote_tile(tile, xe, id) { int err; @@ -688,48 +875,42 @@ static int xe_info_init(struct xe_device *xe, return err; } - /* - * All platforms have at least one primary GT. Any platform with media - * version 13 or higher has an additional dedicated media GT. And - * depending on the graphics IP there may be additional "remote tiles." - * All of these together determine the overall GT count. - */ + /* Allocate any GT and VRAM structures necessary for the platform. */ for_each_tile(tile, xe, id) { - gt = tile->primary_gt; - gt->info.id = xe->info.gt_count++; - gt->info.type = XE_GT_TYPE_MAIN; - gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; - gt->info.engine_mask = graphics_desc->hw_engine_mask; + int err; - if (MEDIA_VER(xe) < 13 && media_desc) - gt->info.engine_mask |= media_desc->hw_engine_mask; + err = xe_tile_alloc_vram(tile); + if (err) + return err; - if (MEDIA_VER(xe) < 13 || !media_desc) - continue; + tile->primary_gt = alloc_primary_gt(tile, graphics_desc, media_desc); + if (IS_ERR(tile->primary_gt)) + return PTR_ERR(tile->primary_gt); /* - * Allocate and setup media GT for platforms with standalone - * media. + * It's not currently possible to probe a device with the + * primary GT disabled. With some work, this may be future in + * the possible for igpu platforms (although probably not for + * dgpu's since access to the primary GT's BCS engines is + * required for VRAM management). */ - tile->media_gt = xe_gt_alloc(tile); + if (!tile->primary_gt) { + drm_err(&xe->drm, "Cannot probe device with without a primary GT\n"); + return -ENODEV; + } + + tile->media_gt = alloc_media_gt(tile, media_desc); if (IS_ERR(tile->media_gt)) return PTR_ERR(tile->media_gt); - - gt = tile->media_gt; - gt->info.type = XE_GT_TYPE_MEDIA; - gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state; - gt->info.engine_mask = media_desc->hw_engine_mask; - - /* - * FIXME: At the moment multi-tile and standalone media are - * mutually exclusive on current platforms. We'll need to - * come up with a better way to number GTs if we ever wind - * up with platforms that support both together. - */ - drm_WARN_ON(&xe->drm, id != 0); - gt->info.id = xe->info.gt_count++; } + /* + * Now that we have tiles and GTs defined, let's loop over valid GTs + * in order to define gt_count. + */ + for_each_gt(gt, xe, id) + xe->info.gt_count++; + return 0; } @@ -740,7 +921,7 @@ static void xe_pci_remove(struct pci_dev *pdev) if (IS_SRIOV_PF(xe)) xe_pci_sriov_configure(pdev, 0); - if (xe_survivability_mode_is_enabled(xe)) + if (xe_survivability_mode_is_boot_enabled(xe)) return; xe_device_remove(xe); @@ -773,6 +954,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct xe_device *xe; int err; + xe_configfs_check_device(pdev); + if (desc->require_force_probe && !id_forced(pdev->device)) { dev_info(&pdev->dev, "Your graphics device %04x is not officially supported\n" @@ -813,6 +996,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; + xe_vram_resize_bar(xe); + err = xe_device_probe_early(xe); /* * In Boot Survivability mode, no drm card is exposed and driver @@ -820,7 +1005,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * flashed through mei. Return success, if survivability mode * is enabled due to pcode failure or configfs being set */ - if (xe_survivability_mode_is_enabled(xe)) + if (xe_survivability_mode_is_boot_enabled(xe)) return 0; if (err) @@ -914,7 +1099,7 @@ static int xe_pci_suspend(struct device *dev) struct xe_device *xe = pdev_to_xe_device(pdev); int err; - if (xe_survivability_mode_is_enabled(xe)) + if (xe_survivability_mode_is_boot_enabled(xe)) return -EBUSY; err = xe_pm_suspend(xe); @@ -1038,6 +1223,23 @@ static struct pci_driver xe_pci_driver = { #endif }; +/** + * xe_pci_to_pf_device() - Get PF &xe_device. + * @pdev: the VF &pci_dev device + * + * Return: pointer to PF &xe_device, NULL otherwise. + */ +struct xe_device *xe_pci_to_pf_device(struct pci_dev *pdev) +{ + struct drm_device *drm; + + drm = pci_iov_get_pf_drvdata(pdev, &xe_pci_driver); + if (IS_ERR(drm)) + return NULL; + + return to_xe_device(drm); +} + int xe_register_pci_driver(void) { return pci_register_driver(&xe_pci_driver); diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h index 611c1209b14c..11bcc5fe2c5b 100644 --- a/drivers/gpu/drm/xe/xe_pci.h +++ b/drivers/gpu/drm/xe/xe_pci.h @@ -6,7 +6,10 @@ #ifndef _XE_PCI_H_ #define _XE_PCI_H_ +struct pci_dev; + int xe_register_pci_driver(void); void xe_unregister_pci_driver(void); +struct xe_device *xe_pci_to_pf_device(struct pci_dev *pdev); #endif diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index 8813efdcafbb..9ff69c4843b0 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -3,6 +3,10 @@ * Copyright © 2023-2024 Intel Corporation */ +#include <linux/bitops.h> +#include <linux/pci.h> + +#include "regs/xe_bars.h" #include "xe_assert.h" #include "xe_device.h" #include "xe_gt_sriov_pf_config.h" @@ -12,68 +16,19 @@ #include "xe_pci_sriov.h" #include "xe_pm.h" #include "xe_sriov.h" +#include "xe_sriov_pf.h" +#include "xe_sriov_pf_control.h" #include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_provision.h" +#include "xe_sriov_pf_sysfs.h" #include "xe_sriov_printk.h" -static int pf_needs_provisioning(struct xe_gt *gt, unsigned int num_vfs) -{ - unsigned int n; - - for (n = 1; n <= num_vfs; n++) - if (!xe_gt_sriov_pf_config_is_empty(gt, n)) - return false; - - return true; -} - -static int pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs) -{ - struct xe_gt *gt; - unsigned int id; - int result = 0, err; - - for_each_gt(gt, xe, id) { - if (!pf_needs_provisioning(gt, num_vfs)) - continue; - err = xe_gt_sriov_pf_config_set_fair(gt, VFID(1), num_vfs); - result = result ?: err; - } - - return result; -} - -static void pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs) -{ - struct xe_gt *gt; - unsigned int id; - unsigned int n; - - for_each_gt(gt, xe, id) - for (n = 1; n <= num_vfs; n++) - xe_gt_sriov_pf_config_release(gt, n, true); -} - static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs) { - struct xe_gt *gt; - unsigned int id; unsigned int n; - for_each_gt(gt, xe, id) - for (n = 1; n <= num_vfs; n++) - xe_gt_sriov_pf_control_trigger_flr(gt, n); -} - -static struct pci_dev *xe_pci_pf_get_vf_dev(struct xe_device *xe, unsigned int vf_id) -{ - struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - - xe_assert(xe, IS_SRIOV_PF(xe)); - - /* caller must use pci_dev_put() */ - return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus), - pdev->bus->number, - pci_iov_virtfn_devfn(pdev, vf_id)); + for (n = 1; n <= num_vfs; n++) + xe_sriov_pf_control_reset_vf(xe, n); } static void pf_link_vfs(struct xe_device *xe, int num_vfs) @@ -94,7 +49,7 @@ static void pf_link_vfs(struct xe_device *xe, int num_vfs) * enforce correct resume order. */ for (n = 1; n <= num_vfs; n++) { - pdev_vf = xe_pci_pf_get_vf_dev(xe, n - 1); + pdev_vf = xe_pci_sriov_get_vf_pdev(pdev_pf, n); /* unlikely, something weird is happening, abort */ if (!pdev_vf) { @@ -127,6 +82,32 @@ static void pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs, } } +static int resize_vf_vram_bar(struct xe_device *xe, int num_vfs) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + u32 sizes; + + sizes = pci_iov_vf_bar_get_sizes(pdev, VF_LMEM_BAR, num_vfs); + if (!sizes) + return 0; + + return pci_iov_vf_bar_set_size(pdev, VF_LMEM_BAR, __fls(sizes)); +} + +static int pf_prepare_vfs_enabling(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + /* make sure we are not locked-down by other components */ + return xe_sriov_pf_arm_guard(xe, &xe->sriov.pf.guard_vfs_enabling, false, NULL); +} + +static void pf_finish_vfs_enabling(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + /* allow other components to lockdown VFs enabling */ + xe_sriov_pf_disarm_guard(xe, &xe->sriov.pf.guard_vfs_enabling, false, NULL); +} + static int pf_enable_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -138,6 +119,14 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) xe_assert(xe, num_vfs <= total_vfs); xe_sriov_dbg(xe, "enabling %u VF%s\n", num_vfs, str_plural(num_vfs)); + err = xe_sriov_pf_wait_ready(xe); + if (err) + goto out; + + err = pf_prepare_vfs_enabling(xe); + if (err) + goto out; + /* * We must hold additional reference to the runtime PM to keep PF in D0 * during VFs lifetime, as our VFs do not implement the PM capability. @@ -149,10 +138,16 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) */ xe_pm_runtime_get_noresume(xe); - err = pf_provision_vfs(xe, num_vfs); + err = xe_sriov_pf_provision_vfs(xe, num_vfs); if (err < 0) goto failed; + if (IS_DGFX(xe)) { + err = resize_vf_vram_bar(xe, num_vfs); + if (err) + xe_sriov_info(xe, "Failed to set VF LMEM BAR size: %d\n", err); + } + err = pci_enable_sriov(pdev, num_vfs); if (err < 0) goto failed; @@ -162,14 +157,17 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) xe_sriov_info(xe, "Enabled %u of %u VF%s\n", num_vfs, total_vfs, str_plural(total_vfs)); + xe_sriov_pf_sysfs_link_vfs(xe, num_vfs); + pf_engine_activity_stats(xe, num_vfs, true); return num_vfs; failed: - pf_unprovision_vfs(xe, num_vfs); + xe_sriov_pf_unprovision_vfs(xe, num_vfs); xe_pm_runtime_put(xe); - + pf_finish_vfs_enabling(xe); +out: xe_sriov_notice(xe, "Failed to enable %u VF%s (%pe)\n", num_vfs, str_plural(num_vfs), ERR_PTR(err)); return err; @@ -189,15 +187,19 @@ static int pf_disable_vfs(struct xe_device *xe) pf_engine_activity_stats(xe, num_vfs, false); + xe_sriov_pf_sysfs_unlink_vfs(xe, num_vfs); + pci_disable_sriov(pdev); pf_reset_vfs(xe, num_vfs); - pf_unprovision_vfs(xe, num_vfs); + xe_sriov_pf_unprovision_vfs(xe, num_vfs); /* not needed anymore - see pf_enable_vfs() */ xe_pm_runtime_put(xe); + pf_finish_vfs_enabling(xe); + xe_sriov_info(xe, "Disabled %u VF%s\n", num_vfs, str_plural(num_vfs)); return 0; } @@ -240,3 +242,25 @@ int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) return ret; } + +/** + * xe_pci_sriov_get_vf_pdev() - Lookup the VF's PCI device using the VF identifier. + * @pdev: the PF's &pci_dev + * @vfid: VF identifier (1-based) + * + * The caller must decrement the reference count by calling pci_dev_put(). + * + * Return: the VF's &pci_dev or NULL if the VF device was not found. + */ +struct pci_dev *xe_pci_sriov_get_vf_pdev(struct pci_dev *pdev, unsigned int vfid) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + + xe_assert(xe, dev_is_pf(&pdev->dev)); + xe_assert(xe, vfid); + xe_assert(xe, vfid <= pci_sriov_get_totalvfs(pdev)); + + return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus), + pdev->bus->number, + pci_iov_virtfn_devfn(pdev, vfid - 1)); +} diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.h b/drivers/gpu/drm/xe/xe_pci_sriov.h index c76dd0d90495..b9105d71dbb1 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.h +++ b/drivers/gpu/drm/xe/xe_pci_sriov.h @@ -10,6 +10,7 @@ struct pci_dev; #ifdef CONFIG_PCI_IOV int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs); +struct pci_dev *xe_pci_sriov_get_vf_pdev(struct pci_dev *pdev, unsigned int vfid); #else static inline int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) { diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index ca6b10d35573..9892c063a9c5 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -8,18 +8,61 @@ #include <linux/types.h> -struct xe_graphics_desc { +#include "xe_platform_types.h" + +struct xe_subplatform_desc { + enum xe_subplatform subplatform; + const char *name; + const u16 *pciidlist; +}; + +struct xe_device_desc { + /* Should only ever be set for platforms without GMD_ID */ + const struct xe_ip *pre_gmdid_graphics_ip; + /* Should only ever be set for platforms without GMD_ID */ + const struct xe_ip *pre_gmdid_media_ip; + + const char *platform_name; + const struct xe_subplatform_desc *subplatforms; + + enum xe_platform platform; + + u8 dma_mask_size; + u8 max_remote_tiles:2; + u8 max_gt_per_tile:2; u8 va_bits; u8 vm_max_level; u8 vram_flags; + u8 require_force_probe:1; + u8 is_dgfx:1; + + u8 has_display:1; + u8 has_fan_control:1; + u8 has_flat_ccs:1; + u8 has_gsc_nvm:1; + u8 has_heci_gscfi:1; + u8 has_heci_cscfi:1; + u8 has_late_bind:1; + u8 has_llc:1; + u8 has_mbx_power_limits:1; + u8 has_mem_copy_instr:1; + u8 has_pxp:1; + u8 has_sriov:1; + u8 needs_scratch:1; + u8 skip_guc_pc:1; + u8 skip_mtcfg:1; + u8 skip_pcode:1; + u8 needs_shared_vf_gt_wq:1; +}; + +struct xe_graphics_desc { u64 hw_engine_mask; /* hardware engines provided by graphics IP */ u8 has_asid:1; u8 has_atomic_enable_pte_bit:1; - u8 has_flat_ccs:1; u8 has_indirect_ring_state:1; - u8 has_range_tlb_invalidation:1; + u8 has_range_tlb_inval:1; u8 has_usm:1; u8 has_64bit_timestamp:1; }; diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index cf955b3ed52c..0d33c14ea0cf 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -32,27 +32,39 @@ static int pcode_mailbox_status(struct xe_tile *tile) { + const char *err_str; + int err_decode; u32 err; - static const struct pcode_err_decode err_decode[] = { - [PCODE_ILLEGAL_CMD] = {-ENXIO, "Illegal Command"}, - [PCODE_TIMEOUT] = {-ETIMEDOUT, "Timed out"}, - [PCODE_ILLEGAL_DATA] = {-EINVAL, "Illegal Data"}, - [PCODE_ILLEGAL_SUBCOMMAND] = {-ENXIO, "Illegal Subcommand"}, - [PCODE_LOCKED] = {-EBUSY, "PCODE Locked"}, - [PCODE_GT_RATIO_OUT_OF_RANGE] = {-EOVERFLOW, - "GT ratio out of range"}, - [PCODE_REJECTED] = {-EACCES, "PCODE Rejected"}, - [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"}, - }; + +#define CASE_ERR(_err, _err_decode, _err_str) \ + case _err: \ + err_decode = _err_decode; \ + err_str = _err_str; \ + break err = xe_mmio_read32(&tile->mmio, PCODE_MAILBOX) & PCODE_ERROR_MASK; + switch (err) { + CASE_ERR(PCODE_ILLEGAL_CMD, -ENXIO, "Illegal Command"); + CASE_ERR(PCODE_TIMEOUT, -ETIMEDOUT, "Timed out"); + CASE_ERR(PCODE_ILLEGAL_DATA, -EINVAL, "Illegal Data"); + CASE_ERR(PCODE_ILLEGAL_SUBCOMMAND, -ENXIO, "Illegal Subcommand"); + CASE_ERR(PCODE_LOCKED, -EBUSY, "PCODE Locked"); + CASE_ERR(PCODE_GT_RATIO_OUT_OF_RANGE, -EOVERFLOW, "GT ratio out of range"); + CASE_ERR(PCODE_REJECTED, -EACCES, "PCODE Rejected"); + default: + err_decode = -EPROTO; + err_str = "Unknown"; + } + if (err) { - drm_err(&tile_to_xe(tile)->drm, "PCODE Mailbox failed: %d %s", err, - err_decode[err].str ?: "Unknown"); - return err_decode[err].errno ?: -EPROTO; + drm_err(&tile_to_xe(tile)->drm, "PCODE Mailbox failed: %d %s", + err_decode, err_str); + + return err_decode; } return 0; +#undef CASE_ERR } static int __pcode_mailbox_rw(struct xe_tile *tile, u32 mbox, u32 *data0, u32 *data1, @@ -109,6 +121,17 @@ int xe_pcode_write_timeout(struct xe_tile *tile, u32 mbox, u32 data, int timeout return err; } +int xe_pcode_write64_timeout(struct xe_tile *tile, u32 mbox, u32 data0, u32 data1, int timeout) +{ + int err; + + mutex_lock(&tile->pcode.lock); + err = pcode_mailbox_rw(tile, mbox, &data0, &data1, timeout, false, false); + mutex_unlock(&tile->pcode.lock); + + return err; +} + int xe_pcode_read(struct xe_tile *tile, u32 mbox, u32 *val, u32 *val1) { int err; @@ -325,3 +348,33 @@ int xe_pcode_probe_early(struct xe_device *xe) return xe_pcode_ready(xe, false); } ALLOW_ERROR_INJECTION(xe_pcode_probe_early, ERRNO); /* See xe_pci_probe */ + +/* Helpers with drm device. These should only be called by the display side */ +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) + +int intel_pcode_read(struct drm_device *drm, u32 mbox, u32 *val, u32 *val1) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_tile *tile = xe_device_get_root_tile(xe); + + return xe_pcode_read(tile, mbox, val, val1); +} + +int intel_pcode_write_timeout(struct drm_device *drm, u32 mbox, u32 val, int timeout_ms) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_tile *tile = xe_device_get_root_tile(xe); + + return xe_pcode_write_timeout(tile, mbox, val, timeout_ms); +} + +int intel_pcode_request(struct drm_device *drm, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_tile *tile = xe_device_get_root_tile(xe); + + return xe_pcode_request(tile, mbox, request, reply_mask, reply, timeout_base_ms); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h index ba33991d72a7..a5584c1c75f9 100644 --- a/drivers/gpu/drm/xe/xe_pcode.h +++ b/drivers/gpu/drm/xe/xe_pcode.h @@ -7,8 +7,10 @@ #define _XE_PCODE_H_ #include <linux/types.h> -struct xe_tile; + +struct drm_device; struct xe_device; +struct xe_tile; void xe_pcode_init(struct xe_tile *tile); int xe_pcode_probe_early(struct xe_device *xe); @@ -18,6 +20,9 @@ int xe_pcode_init_min_freq_table(struct xe_tile *tile, u32 min_gt_freq, int xe_pcode_read(struct xe_tile *tile, u32 mbox, u32 *val, u32 *val1); int xe_pcode_write_timeout(struct xe_tile *tile, u32 mbox, u32 val, int timeout_ms); +int xe_pcode_write64_timeout(struct xe_tile *tile, u32 mbox, u32 data0, + u32 data1, int timeout); + #define xe_pcode_write(tile, mbox, val) \ xe_pcode_write_timeout(tile, mbox, val, 1) @@ -29,4 +34,12 @@ int xe_pcode_request(struct xe_tile *tile, u32 mbox, u32 request, | FIELD_PREP(PCODE_MB_PARAM1, param1)\ | FIELD_PREP(PCODE_MB_PARAM2, param2)) +/* Helpers with drm device */ +int intel_pcode_read(struct drm_device *drm, u32 mbox, u32 *val, u32 *val1); +int intel_pcode_write_timeout(struct drm_device *drm, u32 mbox, u32 val, int timeout_ms); +#define intel_pcode_write(drm, mbox, val) \ + intel_pcode_write_timeout((drm), (mbox), (val), 1) +int intel_pcode_request(struct drm_device *drm, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms); + #endif diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 127d4d26c4cf..70dcd6625680 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -43,6 +43,28 @@ #define POWER_SETUP_I1_SHIFT 6 /* 10.6 fixed point format */ #define POWER_SETUP_I1_DATA_MASK REG_GENMASK(15, 0) +#define READ_PSYSGPU_POWER_LIMIT 0x6 +#define WRITE_PSYSGPU_POWER_LIMIT 0x7 +#define READ_PACKAGE_POWER_LIMIT 0x8 +#define WRITE_PACKAGE_POWER_LIMIT 0x9 +#define READ_PL_FROM_FW 0x1 +#define READ_PL_FROM_PCODE 0x0 + +#define PCODE_LATE_BINDING 0x5C +#define GET_CAPABILITY_STATUS 0x0 +#define V1_FAN_SUPPORTED REG_BIT(0) +#define VR_PARAMS_SUPPORTED REG_BIT(3) +#define V1_FAN_PROVISIONED REG_BIT(16) +#define VR_PARAMS_PROVISIONED REG_BIT(19) +#define GET_VERSION_LOW 0x1 +#define GET_VERSION_HIGH 0x2 +#define MAJOR_VERSION_MASK REG_GENMASK(31, 16) +#define MINOR_VERSION_MASK REG_GENMASK(15, 0) +#define HOTFIX_VERSION_MASK REG_GENMASK(31, 16) +#define BUILD_VERSION_MASK REG_GENMASK(15, 0) +#define FAN_TABLE 1 +#define VR_CONFIG 2 + #define PCODE_FREQUENCY_CONFIG 0x6e /* Frequency Config Sub Commands (param1) */ #define PCODE_MBOX_FC_SC_READ_FUSED_P0 0x0 @@ -70,9 +92,3 @@ #define BMG_PCIE_CAP XE_REG(0x138340) #define LINK_DOWNGRADE REG_GENMASK(1, 0) #define DOWNGRADE_CAPABLE 2 - -struct pcode_err_decode { - int errno; - const char *str; -}; - diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h index d08574c4cdb8..f516dbddfd88 100644 --- a/drivers/gpu/drm/xe/xe_platform_types.h +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -24,6 +24,8 @@ enum xe_platform { XE_LUNARLAKE, XE_BATTLEMAGE, XE_PANTHERLAKE, + XE_NOVALAKE_S, + XE_CRESCENTISLAND, }; enum xe_subplatform { @@ -34,6 +36,7 @@ enum xe_subplatform { XE_SUBPLATFORM_DG2_G10, XE_SUBPLATFORM_DG2_G11, XE_SUBPLATFORM_DG2_G12, + XE_SUBPLATFORM_BATTLEMAGE_G21, }; #endif diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index ff749edc005b..766922530265 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -18,11 +18,15 @@ #include "xe_device.h" #include "xe_ggtt.h" #include "xe_gt.h" -#include "xe_guc.h" +#include "xe_gt_idle.h" +#include "xe_i2c.h" #include "xe_irq.h" +#include "xe_late_bind_fw.h" #include "xe_pcode.h" #include "xe_pxp.h" +#include "xe_sriov_vf_ccs.h" #include "xe_trace.h" +#include "xe_vm.h" #include "xe_wa.h" /** @@ -79,8 +83,58 @@ static struct lockdep_map xe_pm_runtime_d3cold_map = { static struct lockdep_map xe_pm_runtime_nod3cold_map = { .name = "xe_rpm_nod3cold_map" }; + +static struct lockdep_map xe_pm_block_lockdep_map = { + .name = "xe_pm_block_map", +}; #endif +static void xe_pm_block_begin_signalling(void) +{ + lock_acquire_shared_recursive(&xe_pm_block_lockdep_map, 0, 1, NULL, _RET_IP_); +} + +static void xe_pm_block_end_signalling(void) +{ + lock_release(&xe_pm_block_lockdep_map, _RET_IP_); +} + +/** + * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend + * + * Annotation to use where the code might block or seize to make + * progress pending resume completion. + */ +void xe_pm_might_block_on_suspend(void) +{ + lock_map_acquire(&xe_pm_block_lockdep_map); + lock_map_release(&xe_pm_block_lockdep_map); +} + +/** + * xe_pm_block_on_suspend() - Block pending suspend. + * @xe: The xe device about to be suspended. + * + * Block if the pm notifier has start evicting bos, to avoid + * racing and validating those bos back. The function is + * annotated to ensure no locks are held that are also grabbed + * in the pm notifier or the device suspend / resume. + * This is intended to be used by freezable tasks only. + * (Not freezable workqueues), with the intention that the function + * returns %-ERESTARTSYS when tasks are frozen during suspend, + * and allows the task to freeze. The caller must be able to + * handle the %-ERESTARTSYS. + * + * Return: %0 on success, %-ERESTARTSYS on signal pending or + * if freezing requested. + */ +int xe_pm_block_on_suspend(struct xe_device *xe) +{ + xe_pm_might_block_on_suspend(); + + return wait_for_completion_interruptible(&xe->pm_block); +} + /** * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context * @xe: The xe device. @@ -120,12 +174,15 @@ int xe_pm_suspend(struct xe_device *xe) int err; drm_dbg(&xe->drm, "Suspending device\n"); + xe_pm_block_begin_signalling(); trace_xe_pm_suspend(xe, __builtin_return_address(0)); err = xe_pxp_pm_suspend(xe->pxp); if (err) goto err; + xe_late_bind_wait_for_worker_completion(&xe->late_bind); + for_each_gt(gt, xe, id) xe_gt_suspend_prepare(gt); @@ -134,7 +191,7 @@ int xe_pm_suspend(struct xe_device *xe) /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) - goto err_pxp; + goto err_display; for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); @@ -146,15 +203,19 @@ int xe_pm_suspend(struct xe_device *xe) xe_display_pm_suspend_late(xe); + xe_i2c_pm_suspend(xe); + drm_dbg(&xe->drm, "Device suspended\n"); + xe_pm_block_end_signalling(); + return 0; err_display: xe_display_pm_resume(xe); -err_pxp: xe_pxp_pm_resume(xe->pxp); err: drm_dbg(&xe->drm, "Device suspend failed %d\n", err); + xe_pm_block_end_signalling(); return err; } @@ -171,9 +232,13 @@ int xe_pm_resume(struct xe_device *xe) u8 id; int err; + xe_pm_block_begin_signalling(); drm_dbg(&xe->drm, "Resuming device\n"); trace_xe_pm_resume(xe, __builtin_return_address(0)); + for_each_gt(gt, xe, id) + xe_gt_idle_disable_c6(gt); + for_each_tile(tile, xe, id) xe_wa_apply_tile_workarounds(tile); @@ -191,6 +256,8 @@ int xe_pm_resume(struct xe_device *xe) if (err) goto err; + xe_i2c_pm_resume(xe, true); + xe_irq_resume(xe); for_each_gt(gt, xe, id) @@ -204,10 +271,17 @@ int xe_pm_resume(struct xe_device *xe) xe_pxp_pm_resume(xe->pxp); + if (IS_VF_CCS_READY(xe)) + xe_sriov_vf_ccs_register_context(xe); + + xe_late_bind_fw_load(&xe->late_bind); + drm_dbg(&xe->drm, "Device resumed\n"); + xe_pm_block_end_signalling(); return 0; err: drm_dbg(&xe->drm, "Device resume failed %d\n", err); + xe_pm_block_end_signalling(); return err; } @@ -239,6 +313,10 @@ static void xe_pm_runtime_init(struct xe_device *xe) { struct device *dev = xe->drm.dev; + /* Our current VFs do not support RPM. so, disable it */ + if (IS_SRIOV_VF(xe)) + return; + /* * Disable the system suspend direct complete optimization. * We need to ensure that the regular device suspend/resume functions @@ -286,6 +364,19 @@ static u32 vram_threshold_value(struct xe_device *xe) return DEFAULT_VRAM_THRESHOLD; } +static void xe_pm_wake_rebind_workers(struct xe_device *xe) +{ + struct xe_vm *vm, *next; + + mutex_lock(&xe->rebind_resume_lock); + list_for_each_entry_safe(vm, next, &xe->rebind_resume_list, + preempt.pm_activate_link) { + list_del_init(&vm->preempt.pm_activate_link); + xe_vm_resume_rebind_worker(vm); + } + mutex_unlock(&xe->rebind_resume_lock); +} + static int xe_pm_notifier_callback(struct notifier_block *nb, unsigned long action, void *data) { @@ -295,30 +386,39 @@ static int xe_pm_notifier_callback(struct notifier_block *nb, switch (action) { case PM_HIBERNATION_PREPARE: case PM_SUSPEND_PREPARE: + { + struct xe_validation_ctx ctx; + + reinit_completion(&xe->pm_block); + xe_pm_block_begin_signalling(); xe_pm_runtime_get(xe); + (void)xe_validation_ctx_init(&ctx, &xe->val, NULL, + (struct xe_val_flags) {.exclusive = true}); err = xe_bo_evict_all_user(xe); - if (err) { + xe_validation_ctx_fini(&ctx); + if (err) drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err); - xe_pm_runtime_put(xe); - break; - } err = xe_bo_notifier_prepare_all_pinned(xe); - if (err) { + if (err) drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err); - xe_pm_runtime_put(xe); - } + /* + * Keep the runtime pm reference until post hibernation / post suspend to + * avoid a runtime suspend interfering with evicted objects or backup + * allocations. + */ + xe_pm_block_end_signalling(); break; + } case PM_POST_HIBERNATION: case PM_POST_SUSPEND: + complete_all(&xe->pm_block); + xe_pm_wake_rebind_workers(xe); xe_bo_notifier_unprepare_all_pinned(xe); xe_pm_runtime_put(xe); break; } - if (err) - return NOTIFY_BAD; - return NOTIFY_DONE; } @@ -340,6 +440,14 @@ int xe_pm_init(struct xe_device *xe) if (err) return err; + err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock); + if (err) + goto err_unregister; + + init_completion(&xe->pm_block); + complete_all(&xe->pm_block); + INIT_LIST_HEAD(&xe->rebind_resume_list); + /* For now suspend/resume is only allowed with GuC */ if (!xe_device_uc_enabled(xe)) return 0; @@ -363,6 +471,10 @@ static void xe_pm_runtime_fini(struct xe_device *xe) { struct device *dev = xe->drm.dev; + /* Our current VFs do not support RPM. so, disable it */ + if (IS_SRIOV_VF(xe)) + return; + pm_runtime_get_sync(dev); pm_runtime_forbid(dev); } @@ -488,6 +600,8 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_display_pm_runtime_suspend_late(xe); + xe_i2c_pm_suspend(xe); + xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return 0; @@ -519,6 +633,9 @@ int xe_pm_runtime_resume(struct xe_device *xe) xe_rpm_lockmap_acquire(xe); + for_each_gt(gt, xe, id) + xe_gt_idle_disable_c6(gt); + if (xe->d3cold.allowed) { err = xe_pcode_ready(xe, true); if (err) @@ -535,6 +652,8 @@ int xe_pm_runtime_resume(struct xe_device *xe) goto out; } + xe_i2c_pm_resume(xe, xe->d3cold.allowed); + xe_irq_resume(xe); for_each_gt(gt, xe, id) @@ -550,6 +669,12 @@ int xe_pm_runtime_resume(struct xe_device *xe) xe_pxp_pm_resume(xe->pxp); + if (IS_VF_CCS_READY(xe)) + xe_sriov_vf_ccs_register_context(xe); + + if (xe->d3cold.allowed) + xe_late_bind_fw_load(&xe->late_bind); + out: xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); @@ -601,6 +726,13 @@ static void xe_pm_runtime_lockdep_prime(void) /** * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously * @xe: xe device instance + * + * When possible, scope-based runtime PM (through guard(xe_pm_runtime)) is + * be preferred over direct usage of this function. Manual get/put handling + * should only be used when the function contains goto-based logic which + * can break scope-based handling, or when the lifetime of the runtime PM + * reference does not match a specific scope (e.g., runtime PM obtained in one + * function and released in a different one). */ void xe_pm_runtime_get(struct xe_device *xe) { @@ -633,6 +765,13 @@ void xe_pm_runtime_put(struct xe_device *xe) * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl * @xe: xe device instance * + * When possible, scope-based runtime PM (through + * ACQUIRE(xe_pm_runtime_ioctl, ...)) is be preferred over direct usage of this + * function. Manual get/put handling should only be used when the function + * contains goto-based logic which can break scope-based handling, or when the + * lifetime of the runtime PM reference does not match a specific scope (e.g., + * runtime PM obtained in one function and released in a different one). + * * Returns: Any number greater than or equal to 0 for success, negative error * code otherwise. */ @@ -702,6 +841,13 @@ static bool xe_pm_suspending_or_resuming(struct xe_device *xe) * It will warn if not protected. * The reference should be put back after this function regardless, since it * will always bump the usage counter, regardless. + * + * When possible, scope-based runtime PM (through guard(xe_pm_runtime_noresume)) + * is be preferred over direct usage of this function. Manual get/put handling + * should only be used when the function contains goto-based logic which can + * break scope-based handling, or when the lifetime of the runtime PM reference + * does not match a specific scope (e.g., runtime PM obtained in one function + * and released in a different one). */ void xe_pm_runtime_get_noresume(struct xe_device *xe) { @@ -753,11 +899,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) } /** - * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold + * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold * @xe: xe device instance - * @threshold: VRAM size in bites for the D3cold threshold + * @threshold: VRAM size in MiB for the D3cold threshold * - * Returns 0 for success, negative error code otherwise. + * Return: + * * 0 - success + * * -EINVAL - invalid argument */ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) { diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 59678b310e55..6b27039e7b2d 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -6,6 +6,7 @@ #ifndef _XE_PM_H_ #define _XE_PM_H_ +#include <linux/cleanup.h> #include <linux/pm_runtime.h> #define DEFAULT_VRAM_THRESHOLD 300 /* in MB */ @@ -33,6 +34,24 @@ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); void xe_pm_d3cold_allowed_toggle(struct xe_device *xe); bool xe_rpm_reclaim_safe(const struct xe_device *xe); struct task_struct *xe_pm_read_callback_task(struct xe_device *xe); +int xe_pm_block_on_suspend(struct xe_device *xe); +void xe_pm_might_block_on_suspend(void); int xe_pm_module_init(void); +static inline void __xe_pm_runtime_noop(struct xe_device *xe) {} + +DEFINE_GUARD(xe_pm_runtime, struct xe_device *, + xe_pm_runtime_get(_T), xe_pm_runtime_put(_T)) +DEFINE_GUARD(xe_pm_runtime_noresume, struct xe_device *, + xe_pm_runtime_get_noresume(_T), xe_pm_runtime_put(_T)) +DEFINE_GUARD_COND(xe_pm_runtime, _ioctl, xe_pm_runtime_get_ioctl(_T), _RET >= 0) + +/* + * Used when a function needs to release runtime PM in all possible cases + * and error paths, but the wakeref was already acquired by a different + * function (i.e., get() has already happened so only a put() is needed). + */ +DEFINE_GUARD(xe_pm_runtime_release_only, struct xe_device *, + __xe_pm_runtime_noop(_T), xe_pm_runtime_put(_T)); + #endif diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c index 69df0e3520a5..c63335eb69e5 100644 --- a/drivers/gpu/drm/xe/xe_pmu.c +++ b/drivers/gpu/drm/xe/xe_pmu.c @@ -157,10 +157,13 @@ static bool event_gt_forcewake(struct perf_event *event) return true; } -static bool event_supported(struct xe_pmu *pmu, unsigned int gt, +static bool event_supported(struct xe_pmu *pmu, unsigned int gt_id, unsigned int id) { - if (gt >= XE_MAX_GT_PER_TILE) + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); + struct xe_gt *gt = xe_device_get_gt(xe, gt_id); + + if (!gt) return false; return id < sizeof(pmu->supported_events) * BITS_PER_BYTE && @@ -494,7 +497,12 @@ static const struct attribute_group *pmu_events_attr_update[] = { static void set_supported_events(struct xe_pmu *pmu) { struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); - struct xe_gt *gt = xe_device_get_gt(xe, 0); + struct xe_gt *gt; + int id; + + /* If there are no GTs, don't support any GT-related events */ + if (xe->info.gt_count == 0) + return; if (!xe->info.skip_guc_pc) { pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_C6_RESIDENCY); @@ -502,6 +510,10 @@ static void set_supported_events(struct xe_pmu *pmu) pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_REQUESTED_FREQUENCY); } + /* Find the first available GT to query engine event capabilities */ + for_each_gt(gt, xe, id) + break; + if (xe_guc_engine_activity_supported(>->uc.guc)) { pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_ENGINE_ACTIVE_TICKS); pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_ENGINE_TOTAL_TICKS); diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 83fbeea5aa20..7f587ca3947d 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -8,6 +8,8 @@ #include <linux/slab.h> #include "xe_exec_queue.h" +#include "xe_gt_printk.h" +#include "xe_guc_exec_queue_types.h" #include "xe_vm.h" static void preempt_fence_work_func(struct work_struct *w) @@ -22,6 +24,15 @@ static void preempt_fence_work_func(struct work_struct *w) } else if (!q->ops->reset_status(q)) { int err = q->ops->suspend_wait(q); + if (err == -EAGAIN) { + xe_gt_dbg(q->gt, "PREEMPT FENCE RETRY guc_id=%d", + q->guc->id); + queue_work(q->vm->xe->preempt_fence_wq, + &pfence->preempt_work); + dma_fence_end_signalling(cookie); + return; + } + if (err) dma_fence_set_error(&pfence->base, err); } else { diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h index 312c3372a49f..ac125c697a41 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence_types.h +++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h @@ -12,7 +12,7 @@ struct xe_exec_queue; /** - * struct xe_preempt_fence - XE preempt fence + * struct xe_preempt_fence - Xe preempt fence * * hardware and triggers a callback once the xe_engine is complete. */ diff --git a/drivers/gpu/drm/xe/xe_printk.h b/drivers/gpu/drm/xe/xe_printk.h new file mode 100644 index 000000000000..c5be2385aa95 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_printk.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PRINTK_H_ +#define _XE_PRINTK_H_ + +#include <drm/drm_print.h> + +#include "xe_device_types.h" + +#define __XE_PRINTK_FMT(_xe, _fmt, _args...) _fmt, ##_args + +#define xe_printk(_xe, _level, _fmt, ...) \ + drm_##_level(&(_xe)->drm, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__)) + +#define xe_err(_xe, _fmt, ...) \ + xe_printk((_xe), err, _fmt, ##__VA_ARGS__) + +#define xe_err_once(_xe, _fmt, ...) \ + xe_printk((_xe), err_once, _fmt, ##__VA_ARGS__) + +#define xe_err_ratelimited(_xe, _fmt, ...) \ + xe_printk((_xe), err_ratelimited, _fmt, ##__VA_ARGS__) + +#define xe_warn(_xe, _fmt, ...) \ + xe_printk((_xe), warn, _fmt, ##__VA_ARGS__) + +#define xe_notice(_xe, _fmt, ...) \ + xe_printk((_xe), notice, _fmt, ##__VA_ARGS__) + +#define xe_info(_xe, _fmt, ...) \ + xe_printk((_xe), info, _fmt, ##__VA_ARGS__) + +#define xe_dbg(_xe, _fmt, ...) \ + xe_printk((_xe), dbg, _fmt, ##__VA_ARGS__) + +#define xe_WARN_type(_xe, _type, _condition, _fmt, ...) \ + drm_WARN##_type(&(_xe)->drm, _condition, _fmt, ## __VA_ARGS__) + +#define xe_WARN(_xe, _condition, _fmt, ...) \ + xe_WARN_type((_xe),, _condition, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__)) + +#define xe_WARN_ONCE(_xe, _condition, _fmt, ...) \ + xe_WARN_type((_xe), _ONCE, _condition, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__)) + +#define xe_WARN_ON(_xe, _condition) \ + xe_WARN((_xe), _condition, "%s(%s)", "WARN_ON", __stringify(_condition)) + +#define xe_WARN_ON_ONCE(_xe, _condition) \ + xe_WARN_ONCE((_xe), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition)) + +static inline void __xe_printfn_err(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_device *xe = p->arg; + + xe_err(xe, "%pV", vaf); +} + +static inline void __xe_printfn_info(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_device *xe = p->arg; + + xe_info(xe, "%pV", vaf); +} + +static inline void __xe_printfn_dbg(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_device *xe = p->arg; + struct drm_printer ddp; + + /* + * The original xe_dbg() callsite annotations are useless here, + * redirect to the tweaked drm_dbg_printer() instead. + */ + ddp = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL); + ddp.origin = p->origin; + + drm_printf(&ddp, __XE_PRINTK_FMT(xe, "%pV", vaf)); +} + +/** + * xe_err_printer - Construct a &drm_printer that outputs to xe_err() + * @xe: the &xe_device pointer to use in xe_err() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_err_printer(struct xe_device *xe) +{ + struct drm_printer p = { + .printfn = __xe_printfn_err, + .arg = xe, + }; + return p; +} + +/** + * xe_info_printer - Construct a &drm_printer that outputs to xe_info() + * @xe: the &xe_device pointer to use in xe_info() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_info_printer(struct xe_device *xe) +{ + struct drm_printer p = { + .printfn = __xe_printfn_info, + .arg = xe, + }; + return p; +} + +/** + * xe_dbg_printer - Construct a &drm_printer that outputs like xe_dbg() + * @xe: the &xe_device pointer to use in xe_dbg() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_dbg_printer(struct xe_device *xe) +{ + struct drm_printer p = { + .printfn = __xe_printfn_dbg, + .arg = xe, + .origin = (const void *)_THIS_IP_, + }; + return p; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_psmi.c b/drivers/gpu/drm/xe/xe_psmi.c new file mode 100644 index 000000000000..6a54e38b81ba --- /dev/null +++ b/drivers/gpu/drm/xe/xe_psmi.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/debugfs.h> + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_configfs.h" +#include "xe_psmi.h" + +/* + * PSMI capture support + * + * Requirement for PSMI capture is to have a physically contiguous buffer. The + * PSMI tool owns doing all necessary configuration (MMIO register writes are + * done from user-space). However, KMD needs to provide the PSMI tool with the + * required physical address of the base of PSMI buffer in case of VRAM. + * + * VRAM backed PSMI buffer: + * Buffer is allocated as GEM object and with XE_BO_CREATE_PINNED_BIT flag which + * creates a contiguous allocation. The physical address is returned from + * psmi_debugfs_capture_addr_show(). PSMI tool can mmap the buffer via the + * PCIBAR through sysfs. + * + * SYSTEM memory backed PSMI buffer: + * Interface here does not support allocating from SYSTEM memory region. The + * PSMI tool needs to allocate memory themselves using hugetlbfs. In order to + * get the physical address, user-space can query /proc/[pid]/pagemap. As an + * alternative, CMA debugfs could also be used to allocate reserved CMA memory. + */ + +static bool psmi_enabled(struct xe_device *xe) +{ + return xe_configfs_get_psmi_enabled(to_pci_dev(xe->drm.dev)); +} + +static void psmi_free_object(struct xe_bo *bo) +{ + xe_bo_lock(bo, NULL); + xe_bo_unpin(bo); + xe_bo_unlock(bo); + xe_bo_put(bo); +} + +/* + * Free PSMI capture buffer objects. + */ +static void psmi_cleanup(struct xe_device *xe) +{ + unsigned long id, region_mask = xe->psmi.region_mask; + struct xe_bo *bo; + + for_each_set_bit(id, ®ion_mask, + ARRAY_SIZE(xe->psmi.capture_obj)) { + /* smem should never be set */ + xe_assert(xe, id); + + bo = xe->psmi.capture_obj[id]; + if (bo) { + psmi_free_object(bo); + xe->psmi.capture_obj[id] = NULL; + } + } +} + +static struct xe_bo *psmi_alloc_object(struct xe_device *xe, + unsigned int id, size_t bo_size) +{ + struct xe_tile *tile; + + xe_assert(xe, id); + xe_assert(xe, bo_size); + + tile = &xe->tiles[id - 1]; + + /* VRAM: Allocate GEM object for the capture buffer */ + return xe_bo_create_pin_range_novm(xe, tile, bo_size, 0, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED | + XE_BO_FLAG_PINNED_LATE_RESTORE | + XE_BO_FLAG_NEEDS_CPU_ACCESS); +} + +/* + * Allocate PSMI capture buffer objects (via debugfs set function), based on + * which regions the user has selected in region_mask. @size: size in bytes + * (should be power of 2) + * + * Always release/free the current buffer objects before attempting to allocate + * new ones. Size == 0 will free all current buffers. + * + * Note, we don't write any registers as the capture tool is already configuring + * all PSMI registers itself via mmio space. + */ +static int psmi_resize_object(struct xe_device *xe, size_t size) +{ + unsigned long id, region_mask = xe->psmi.region_mask; + struct xe_bo *bo = NULL; + int err = 0; + + /* if resizing, free currently allocated buffers first */ + psmi_cleanup(xe); + + /* can set size to 0, in which case, now done */ + if (!size) + return 0; + + for_each_set_bit(id, ®ion_mask, + ARRAY_SIZE(xe->psmi.capture_obj)) { + /* smem should never be set */ + xe_assert(xe, id); + + bo = psmi_alloc_object(xe, id, size); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + break; + } + xe->psmi.capture_obj[id] = bo; + + drm_info(&xe->drm, + "PSMI capture size requested: %zu bytes, allocated: %lu:%zu\n", + size, id, bo ? xe_bo_size(bo) : 0); + } + + /* on error, reverse what was allocated */ + if (err) + psmi_cleanup(xe); + + return err; +} + +/* + * Returns an address for the capture tool to use to find start of capture + * buffer. Capture tool requires the capability to have a buffer allocated per + * each tile (VRAM region), thus we return an address for each region. + */ +static int psmi_debugfs_capture_addr_show(struct seq_file *m, void *data) +{ + struct xe_device *xe = m->private; + unsigned long id, region_mask; + struct xe_bo *bo; + u64 val; + + region_mask = xe->psmi.region_mask; + for_each_set_bit(id, ®ion_mask, + ARRAY_SIZE(xe->psmi.capture_obj)) { + /* smem should never be set */ + xe_assert(xe, id); + + /* VRAM region */ + bo = xe->psmi.capture_obj[id]; + if (!bo) + continue; + + /* pinned, so don't need bo_lock */ + val = __xe_bo_addr(bo, 0, PAGE_SIZE); + seq_printf(m, "%ld: 0x%llx\n", id, val); + } + + return 0; +} + +/* + * Return capture buffer size, using the size from first allocated object that + * is found. This works because all objects must be of the same size. + */ +static int psmi_debugfs_capture_size_get(void *data, u64 *val) +{ + unsigned long id, region_mask; + struct xe_device *xe = data; + struct xe_bo *bo; + + region_mask = xe->psmi.region_mask; + for_each_set_bit(id, ®ion_mask, + ARRAY_SIZE(xe->psmi.capture_obj)) { + /* smem should never be set */ + xe_assert(xe, id); + + bo = xe->psmi.capture_obj[id]; + if (bo) { + *val = xe_bo_size(bo); + return 0; + } + } + + /* no capture objects are allocated */ + *val = 0; + + return 0; +} + +/* + * Set size of PSMI capture buffer. This triggers the allocation of capture + * buffer in each memory region as specified with prior write to + * psmi_capture_region_mask. + */ +static int psmi_debugfs_capture_size_set(void *data, u64 val) +{ + struct xe_device *xe = data; + + /* user must have specified at least one region */ + if (!xe->psmi.region_mask) + return -EINVAL; + + return psmi_resize_object(xe, val); +} + +static int psmi_debugfs_capture_region_mask_get(void *data, u64 *val) +{ + struct xe_device *xe = data; + + *val = xe->psmi.region_mask; + + return 0; +} + +/* + * Select VRAM regions for multi-tile devices, only allowed when buffer is not + * currently allocated. + */ +static int psmi_debugfs_capture_region_mask_set(void *data, u64 region_mask) +{ + struct xe_device *xe = data; + u64 size = 0; + + /* SMEM is not supported (see comments at top of file) */ + if (region_mask & 0x1) + return -EOPNOTSUPP; + + /* input bitmask should contain only valid TTM regions */ + if (!region_mask || region_mask & ~xe->info.mem_region_mask) + return -EINVAL; + + /* only allow setting mask if buffer is not yet allocated */ + psmi_debugfs_capture_size_get(xe, &size); + if (size) + return -EBUSY; + + xe->psmi.region_mask = region_mask; + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(psmi_debugfs_capture_addr); + +DEFINE_DEBUGFS_ATTRIBUTE(psmi_debugfs_capture_region_mask_fops, + psmi_debugfs_capture_region_mask_get, + psmi_debugfs_capture_region_mask_set, + "0x%llx\n"); + +DEFINE_DEBUGFS_ATTRIBUTE(psmi_debugfs_capture_size_fops, + psmi_debugfs_capture_size_get, + psmi_debugfs_capture_size_set, + "%lld\n"); + +void xe_psmi_debugfs_register(struct xe_device *xe) +{ + struct drm_minor *minor; + + if (!psmi_enabled(xe)) + return; + + minor = xe->drm.primary; + if (!minor->debugfs_root) + return; + + debugfs_create_file("psmi_capture_addr", + 0400, minor->debugfs_root, xe, + &psmi_debugfs_capture_addr_fops); + + debugfs_create_file("psmi_capture_region_mask", + 0600, minor->debugfs_root, xe, + &psmi_debugfs_capture_region_mask_fops); + + debugfs_create_file("psmi_capture_size", + 0600, minor->debugfs_root, xe, + &psmi_debugfs_capture_size_fops); +} + +static void psmi_fini(void *arg) +{ + psmi_cleanup(arg); +} + +int xe_psmi_init(struct xe_device *xe) +{ + if (!psmi_enabled(xe)) + return 0; + + return devm_add_action(xe->drm.dev, psmi_fini, xe); +} diff --git a/drivers/gpu/drm/xe/xe_psmi.h b/drivers/gpu/drm/xe/xe_psmi.h new file mode 100644 index 000000000000..b1dfba80d893 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_psmi.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PSMI_H_ +#define _XE_PSMI_H_ + +struct xe_device; + +int xe_psmi_init(struct xe_device *xe); +void xe_psmi_debugfs_register(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index b04756a97cdc..884127b4d97d 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -3,8 +3,6 @@ * Copyright © 2022 Intel Corporation */ -#include <linux/dma-fence-array.h> - #include "xe_pt.h" #include "regs/xe_gtt_defs.h" @@ -13,16 +11,17 @@ #include "xe_drm_client.h" #include "xe_exec_queue.h" #include "xe_gt.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_pt_types.h" #include "xe_pt_walk.h" #include "xe_res_cursor.h" #include "xe_sched_job.h" -#include "xe_sync.h" #include "xe_svm.h" +#include "xe_sync.h" +#include "xe_tlb_inval_job.h" #include "xe_trace.h" #include "xe_ttm_stolen_mgr.h" +#include "xe_userptr.h" #include "xe_vm.h" struct xe_pt_dir { @@ -69,7 +68,7 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, if (level > MAX_HUGEPTE_LEVEL) return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, pat_index); + 0); return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | XE_PTE_NULL; @@ -88,6 +87,7 @@ static void xe_pt_free(struct xe_pt *pt) * @vm: The vm to create for. * @tile: The tile to create for. * @level: The page-table level. + * @exec: The drm_exec object used to lock the vm. * * Allocate and initialize a single struct xe_pt metadata structure. Also * create the corresponding page-table bo, but don't initialize it. If the @@ -99,7 +99,7 @@ static void xe_pt_free(struct xe_pt *pt) * error. */ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level) + unsigned int level, struct drm_exec *exec) { struct xe_pt *pt; struct xe_bo *bo; @@ -120,12 +120,14 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | XE_BO_FLAG_NO_RESV_EVICT | XE_BO_FLAG_PAGETABLE; if (vm->xef) /* userspace */ - bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; + bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE | XE_BO_FLAG_FORCE_USER_VRAM; pt->level = level; + + drm_WARN_ON(&vm->xe->drm, IS_ERR_OR_NULL(exec)); bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, ttm_bo_type_kernel, - bo_flags); + bo_flags, exec); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto err_kfree; @@ -518,7 +520,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, { struct xe_pt_stage_bind_walk *xe_walk = container_of(walk, typeof(*xe_walk), base); - u16 pat_index = xe_walk->vma->pat_index; + u16 pat_index = xe_walk->vma->attr.pat_index; struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); struct xe_vm *vm = xe_walk->vm; struct xe_pt *xe_child; @@ -589,7 +591,8 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, if (covers || !*child) { u64 flags = 0; - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); + xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1, + xe_vm_validation_exec(vm)); if (IS_ERR(xe_child)) return PTR_ERR(xe_child); @@ -616,7 +619,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, xe_child->is_compact = true; } - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; + pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0) | flags; ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, pte); } @@ -640,28 +643,31 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { * - In all other cases device atomics will be disabled with AE=0 until an application * request differently using a ioctl like madvise. */ -static bool xe_atomic_for_vram(struct xe_vm *vm) +static bool xe_atomic_for_vram(struct xe_vm *vm, struct xe_vma *vma) { + if (vma->attr.atomic_access == DRM_XE_ATOMIC_CPU) + return false; + return true; } -static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_bo *bo) +static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_vma *vma) { struct xe_device *xe = vm->xe; + struct xe_bo *bo = xe_vma_bo(vma); - if (!xe->info.has_device_atomics_on_smem) + if (!xe->info.has_device_atomics_on_smem || + vma->attr.atomic_access == DRM_XE_ATOMIC_CPU) return false; + if (vma->attr.atomic_access == DRM_XE_ATOMIC_DEVICE) + return true; + /* * If a SMEM+LMEM allocation is backed by SMEM, a device * atomics will cause a gpu page fault and which then * gets migrated to LMEM, bind such allocations with * device atomics enabled. - * - * TODO: Revisit this. Perhaps add something like a - * fault_on_atomics_in_system UAPI flag. - * Note that this also prohibits GPU atomics in LR mode for - * userptr and system memory on DGFX. */ return (!IS_DGFX(xe) || (!xe_vm_in_lr_mode(vm) || (bo && xe_bo_has_single_placement(bo)))); @@ -707,7 +713,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, .vm = vm, .tile = tile, .curs = &curs, - .va_curs_start = range ? range->base.itree.start : + .va_curs_start = range ? xe_svm_range_start(range) : xe_vma_start(vma), .vma = vma, .wupd.entries = entries, @@ -725,8 +731,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, return -EAGAIN; } if (xe_svm_range_has_dma_mapping(range)) { - xe_res_first_dma(range->base.dma_addr, 0, - range->base.itree.last + 1 - range->base.itree.start, + xe_res_first_dma(range->base.pages.dma_addr, 0, + xe_svm_range_size(range), &curs); xe_svm_range_debug(range, "BIND PREPARE - MIXED"); } else { @@ -744,8 +750,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, goto walk_pt; if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - xe_walk.default_vram_pte = xe_atomic_for_vram(vm) ? XE_USM_PPGTT_PTE_AE : 0; - xe_walk.default_system_pte = xe_atomic_for_system(vm, bo) ? + xe_walk.default_vram_pte = xe_atomic_for_vram(vm, vma) ? XE_USM_PPGTT_PTE_AE : 0; + xe_walk.default_system_pte = xe_atomic_for_system(vm, vma) ? XE_USM_PPGTT_PTE_AE : 0; } @@ -756,8 +762,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, if (!xe_vma_is_null(vma) && !range) { if (xe_vma_is_userptr(vma)) - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, - xe_vma_size(vma), &curs); + xe_res_first_dma(to_userptr_vma(vma)->userptr.pages.dma_addr, 0, + xe_vma_size(vma), &curs); else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), xe_vma_size(vma), &curs); @@ -770,8 +776,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, walk_pt: ret = xe_pt_walk_range(&pt->base, pt->level, - range ? range->base.itree.start : xe_vma_start(vma), - range ? range->base.itree.last + 1 : xe_vma_end(vma), + range ? xe_svm_range_start(range) : xe_vma_start(vma), + range ? xe_svm_range_end(range) : xe_vma_end(vma), &xe_walk.base); *num_entries = xe_walk.wupd.num_used_entries; @@ -907,6 +913,11 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); + if (xe_vma_bo(vma)) + xe_bo_assert_held(xe_vma_bo(vma)); + else if (xe_vma_is_userptr(vma)) + lockdep_assert_held(&xe_vma_vm(vma)->svm.gpusvm.notifier_lock); + if (!(pt_mask & BIT(tile->id))) return false; @@ -945,13 +956,25 @@ bool xe_pt_zap_ptes_range(struct xe_tile *tile, struct xe_vm *vm, struct xe_pt *pt = vm->pt_root[tile->id]; u8 pt_mask = (range->tile_present & ~range->tile_invalidated); - xe_svm_assert_in_notifier(vm); + /* + * Locking rules: + * + * - notifier_lock (write): full protection against page table changes + * and MMU notifier invalidations. + * + * - notifier_lock (read) + vm_lock (write): combined protection against + * invalidations and concurrent page table modifications. (e.g., madvise) + * + */ + lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || + (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && + lockdep_is_held_type(&vm->lock, 0))); if (!(pt_mask & BIT(tile->id))) return false; - (void)xe_pt_walk_shared(&pt->base, pt->level, range->base.itree.start, - range->base.itree.last + 1, &xe_walk.base); + (void)xe_pt_walk_shared(&pt->base, pt->level, xe_svm_range_start(range), + xe_svm_range_end(range), &xe_walk.base); return xe_walk.needs_invalidate; } @@ -1028,7 +1051,7 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma) xe_pt_commit_prepare_locks_assert(vma); if (xe_vma_is_userptr(vma)) - lockdep_assert_held_read(&vm->userptr.notifier_lock); + xe_svm_assert_held_read(vm); } static void xe_pt_commit(struct xe_vma *vma, @@ -1256,6 +1279,8 @@ static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op, } static int xe_pt_vm_dependencies(struct xe_sched_job *job, + struct xe_tlb_inval_job *ijob, + struct xe_tlb_inval_job *mjob, struct xe_vm *vm, struct xe_vma_ops *vops, struct xe_vm_pgtable_update_ops *pt_update_ops, @@ -1313,16 +1338,23 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job, return err; } - if (!(pt_update_ops->q->flags & EXEC_QUEUE_FLAG_KERNEL)) { - if (job) - err = xe_sched_job_last_fence_add_dep(job, vm); - else - err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); - } - for (i = 0; job && !err && i < vops->num_syncs; i++) err = xe_sync_entry_add_deps(&vops->syncs[i], job); + if (job) { + if (ijob) { + err = xe_tlb_inval_job_alloc_dep(ijob); + if (err) + return err; + } + + if (mjob) { + err = xe_tlb_inval_job_alloc_dep(mjob); + if (err) + return err; + } + } + return err; } @@ -1334,10 +1366,12 @@ static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) struct xe_vm_pgtable_update_ops *pt_update_ops = &vops->pt_update_ops[pt_update->tile_id]; - return xe_pt_vm_dependencies(pt_update->job, vm, pt_update->vops, + return xe_pt_vm_dependencies(pt_update->job, pt_update->ijob, + pt_update->mjob, vm, pt_update->vops, pt_update_ops, rftree); } +#if IS_ENABLED(CONFIG_DRM_GPUSVM) #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) @@ -1368,7 +1402,7 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, struct xe_userptr_vma *uvma; unsigned long notifier_seq; - lockdep_assert_held_read(&vm->userptr.notifier_lock); + xe_svm_assert_held_read(vm); if (!xe_vma_is_userptr(vma)) return 0; @@ -1377,7 +1411,7 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, if (xe_pt_userptr_inject_eagain(uvma)) xe_vma_userptr_force_invalidate(uvma); - notifier_seq = uvma->userptr.notifier_seq; + notifier_seq = uvma->userptr.pages.notifier_seq; if (!mmu_interval_read_retry(&uvma->userptr.notifier, notifier_seq)) @@ -1393,12 +1427,12 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, return 0; } -static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_vm_pgtable_update_ops *pt_update) +static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op, + struct xe_vm_pgtable_update_ops *pt_update) { int err = 0; - lockdep_assert_held_read(&vm->userptr.notifier_lock); + xe_svm_assert_held_read(vm); switch (op->base.op) { case DRM_GPUVA_OP_MAP: @@ -1416,9 +1450,40 @@ static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, case DRM_GPUVA_OP_UNMAP: break; case DRM_GPUVA_OP_PREFETCH: - err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), - pt_update); + if (xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))) { + struct xe_svm_range *range = op->map_range.range; + unsigned long i; + + xe_assert(vm->xe, + xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))); + xa_for_each(&op->prefetch_range.range, i, range) { + xe_svm_range_debug(range, "PRE-COMMIT"); + + if (!xe_svm_range_pages_valid(range)) { + xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); + return -ENODATA; + } + } + } else { + err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), pt_update); + } + break; +#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) + case DRM_GPUVA_OP_DRIVER: + if (op->subop == XE_VMA_SUBOP_MAP_RANGE) { + struct xe_svm_range *range = op->map_range.range; + + xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); + + xe_svm_range_debug(range, "PRE-COMMIT"); + + if (!xe_svm_range_pages_valid(range)) { + xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); + return -EAGAIN; + } + } break; +#endif default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } @@ -1426,7 +1491,7 @@ static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, return err; } -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) +static int xe_pt_svm_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) { struct xe_vm *vm = pt_update->vops->vm; struct xe_vma_ops *vops = pt_update->vops; @@ -1439,124 +1504,20 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) if (err) return err; - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); list_for_each_entry(op, &vops->list, link) { - err = op_check_userptr(vm, op, pt_update_ops); + err = op_check_svm_userptr(vm, op, pt_update_ops); if (err) { - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); break; } } return err; } - -#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) -static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_vm *vm = pt_update->vops->vm; - struct xe_vma_ops *vops = pt_update->vops; - struct xe_vma_op *op; - int err; - - err = xe_pt_pre_commit(pt_update); - if (err) - return err; - - xe_svm_notifier_lock(vm); - - list_for_each_entry(op, &vops->list, link) { - struct xe_svm_range *range = op->map_range.range; - - if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) - continue; - - xe_svm_range_debug(range, "PRE-COMMIT"); - - xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); - xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE); - - if (!xe_svm_range_pages_valid(range)) { - xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); - xe_svm_notifier_unlock(vm); - return -EAGAIN; - } - } - - return 0; -} #endif -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; - u64 start; - u64 end; - u32 asid; -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - xe_gt_tlb_invalidation_fence_signal(&ifence->base); - } - dma_fence_put(ifence->fence); -} - -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, - ifence->end, ifence->asid); -} - -static void invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) -{ - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - - ifence->fence = fence; - ifence->gt = gt; - ifence->start = start; - ifence->end = end; - ifence->asid = asid; - - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ - } - - xe_gt_assert(gt, !ret || ret == -ENOENT); -} - struct xe_pt_stage_unbind_walk { /** @base: The pagewalk base-class. */ struct xe_pt_walk base; @@ -1691,8 +1652,8 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_svm_range *range, struct xe_vm_pgtable_update *entries) { - u64 start = range ? range->base.itree.start : xe_vma_start(vma); - u64 end = range ? range->base.itree.last + 1 : xe_vma_end(vma); + u64 start = range ? xe_svm_range_start(range) : xe_vma_start(vma); + u64 end = range ? xe_svm_range_end(range) : xe_vma_end(vma); struct xe_pt_stage_unbind_walk xe_walk = { .base = { .ops = &xe_pt_stage_unbind_ops, @@ -1858,7 +1819,7 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, xe_vma_start(vma), xe_vma_end(vma)); ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); + pt_update_ops->needs_svm_lock |= xe_vma_is_userptr(vma); /* * If rebind, we have to invalidate TLB on !LR vms to invalidate @@ -1902,7 +1863,7 @@ static int bind_range_prepare(struct xe_vm *vm, struct xe_tile *tile, vm_dbg(&xe_vma_vm(vma)->xe->drm, "Preparing bind, with range [%lx...%lx)\n", - range->base.itree.start, range->base.itree.last); + xe_svm_range_start(range), xe_svm_range_end(range) - 1); pt_op->vma = NULL; pt_op->bind = true; @@ -1917,8 +1878,8 @@ static int bind_range_prepare(struct xe_vm *vm, struct xe_tile *tile, pt_op->num_entries, true); xe_pt_update_ops_rfence_interval(pt_update_ops, - range->base.itree.start, - range->base.itree.last + 1); + xe_svm_range_start(range), + xe_svm_range_end(range)); ++pt_update_ops->current_op; pt_update_ops->needs_svm_lock = true; @@ -1966,7 +1927,7 @@ static int unbind_op_prepare(struct xe_tile *tile, xe_pt_update_ops_rfence_interval(pt_update_ops, xe_vma_start(vma), xe_vma_end(vma)); ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); + pt_update_ops->needs_svm_lock |= xe_vma_is_userptr(vma); pt_update_ops->needs_invalidation = true; xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries); @@ -1974,6 +1935,32 @@ static int unbind_op_prepare(struct xe_tile *tile, return 0; } +static bool +xe_pt_op_check_range_skip_invalidation(struct xe_vm_pgtable_update_op *pt_op, + struct xe_svm_range *range) +{ + struct xe_vm_pgtable_update *update = pt_op->entries; + + XE_WARN_ON(!pt_op->num_entries); + + /* + * We can't skip the invalidation if we are removing PTEs that span more + * than the range, do some checks to ensure we are removing PTEs that + * are invalid. + */ + + if (pt_op->num_entries > 1) + return false; + + if (update->pt->level == 0) + return true; + + if (update->pt->level == 1) + return xe_svm_range_size(range) >= SZ_2M; + + return false; +} + static int unbind_range_prepare(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, @@ -1987,7 +1974,7 @@ static int unbind_range_prepare(struct xe_vm *vm, vm_dbg(&vm->xe->drm, "Preparing unbind, with range [%lx...%lx)\n", - range->base.itree.start, range->base.itree.last); + xe_svm_range_start(range), xe_svm_range_end(range) - 1); pt_op->vma = XE_INVALID_VMA; pt_op->bind = false; @@ -1998,11 +1985,14 @@ static int unbind_range_prepare(struct xe_vm *vm, xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, pt_op->num_entries, false); - xe_pt_update_ops_rfence_interval(pt_update_ops, range->base.itree.start, - range->base.itree.last + 1); + xe_pt_update_ops_rfence_interval(pt_update_ops, xe_svm_range_start(range), + xe_svm_range_end(range)); ++pt_update_ops->current_op; pt_update_ops->needs_svm_lock = true; - pt_update_ops->needs_invalidation = true; + pt_update_ops->needs_invalidation |= xe_vm_has_scratch(vm) || + xe_vm_has_valid_gpu_mapping(tile, range->tile_present, + range->tile_invalidated) || + !xe_pt_op_check_range_skip_invalidation(pt_op, range); xe_pt_commit_prepare_unbind(XE_INVALID_VMA, pt_op->entries, pt_op->num_entries); @@ -2023,7 +2013,7 @@ static int op_prepare(struct xe_vm *vm, case DRM_GPUVA_OP_MAP: if ((!op->map.immediate && xe_vm_in_fault_mode(vm) && !op->map.invalidate_on_bind) || - op->map.is_cpu_addr_mirror) + (op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) break; err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma, @@ -2065,11 +2055,20 @@ static int op_prepare(struct xe_vm *vm, { struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); - if (xe_vma_is_cpu_addr_mirror(vma)) - break; + if (xe_vma_is_cpu_addr_mirror(vma)) { + struct xe_svm_range *range; + unsigned long i; - err = bind_op_prepare(vm, tile, pt_update_ops, vma, false); - pt_update_ops->wait_vm_kernel = true; + xa_for_each(&op->prefetch_range.range, i, range) { + err = bind_range_prepare(vm, tile, pt_update_ops, + vma, range); + if (err) + return err; + } + } else { + err = bind_op_prepare(vm, tile, pt_update_ops, vma, false); + pt_update_ops->wait_vm_kernel = true; + } break; } case DRM_GPUVA_OP_DRIVER: @@ -2166,12 +2165,17 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); } - vma->tile_present |= BIT(tile->id); - vma->tile_staged &= ~BIT(tile->id); + /* All WRITE_ONCE pair with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ + WRITE_ONCE(vma->tile_present, vma->tile_present | BIT(tile->id)); if (invalidate_on_bind) - vma->tile_invalidated |= BIT(tile->id); + WRITE_ONCE(vma->tile_invalidated, + vma->tile_invalidated | BIT(tile->id)); + else + WRITE_ONCE(vma->tile_invalidated, + vma->tile_invalidated & ~BIT(tile->id)); + vma->tile_staged &= ~BIT(tile->id); if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); + xe_svm_assert_held_read(vm); to_userptr_vma(vma)->userptr.initial_bind = true; } @@ -2207,7 +2211,7 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, if (!vma->tile_present) { list_del_init(&vma->combined_links.rebind); if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); + xe_svm_assert_held_read(vm); spin_lock(&vm->userptr.invalidated_lock); list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); @@ -2216,6 +2220,18 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, } } +static void range_present_and_invalidated_tile(struct xe_vm *vm, + struct xe_svm_range *range, + u8 tile_id) +{ + /* All WRITE_ONCE pair with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ + + lockdep_assert_held(&vm->svm.gpusvm.notifier_lock); + + WRITE_ONCE(range->tile_present, range->tile_present | BIT(tile_id)); + WRITE_ONCE(range->tile_invalidated, range->tile_invalidated & ~BIT(tile_id)); +} + static void op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, @@ -2227,7 +2243,7 @@ static void op_commit(struct xe_vm *vm, switch (op->base.op) { case DRM_GPUVA_OP_MAP: if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) || - op->map.is_cpu_addr_mirror) + (op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) break; bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, @@ -2263,27 +2279,28 @@ static void op_commit(struct xe_vm *vm, { struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); - if (!xe_vma_is_cpu_addr_mirror(vma)) + if (xe_vma_is_cpu_addr_mirror(vma)) { + struct xe_svm_range *range = NULL; + unsigned long i; + + xa_for_each(&op->prefetch_range.range, i, range) + range_present_and_invalidated_tile(vm, range, tile->id); + } else { bind_op_commit(vm, tile, pt_update_ops, vma, fence, fence2, false); + } break; } case DRM_GPUVA_OP_DRIVER: { - /* WRITE_ONCE pairs with READ_ONCE in xe_svm.c */ - - if (op->subop == XE_VMA_SUBOP_MAP_RANGE) { - WRITE_ONCE(op->map_range.range->tile_present, - op->map_range.range->tile_present | - BIT(tile->id)); - WRITE_ONCE(op->map_range.range->tile_invalidated, - op->map_range.range->tile_invalidated & - ~BIT(tile->id)); - } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) { + /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ + if (op->subop == XE_VMA_SUBOP_MAP_RANGE) + range_present_and_invalidated_tile(vm, op->map_range.range, tile->id); + else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) WRITE_ONCE(op->unmap_range.range->tile_present, op->unmap_range.range->tile_present & ~BIT(tile->id)); - } + break; } default: @@ -2297,22 +2314,25 @@ static const struct xe_migrate_pt_update_ops migrate_ops = { .pre_commit = xe_pt_pre_commit, }; -static const struct xe_migrate_pt_update_ops userptr_migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) -static const struct xe_migrate_pt_update_ops svm_migrate_ops = { +#if IS_ENABLED(CONFIG_DRM_GPUSVM) +static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops = { .populate = xe_vm_populate_pgtable, .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_svm_pre_commit, + .pre_commit = xe_pt_svm_userptr_pre_commit, }; #else -static const struct xe_migrate_pt_update_ops svm_migrate_ops; +static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops; #endif +static struct xe_dep_scheduler *to_dep_scheduler(struct xe_exec_queue *q, + struct xe_gt *gt) +{ + if (xe_gt_is_media_type(gt)) + return q->tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT].dep_scheduler; + + return q->tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT].dep_scheduler; +} + /** * xe_pt_update_ops_run() - Run PT update operations * @tile: Tile of PT update operations @@ -2330,18 +2350,15 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) struct xe_vm *vm = vops->vm; struct xe_vm_pgtable_update_ops *pt_update_ops = &vops->pt_update_ops[tile->id]; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL, *mfence = NULL; - struct dma_fence **fences = NULL; - struct dma_fence_array *cf = NULL; + struct xe_exec_queue *q = pt_update_ops->q; + struct dma_fence *fence, *ifence = NULL, *mfence = NULL; + struct xe_tlb_inval_job *ijob = NULL, *mjob = NULL; struct xe_range_fence *rfence; struct xe_vma_op *op; int err = 0, i; struct xe_migrate_pt_update update = { .ops = pt_update_ops->needs_svm_lock ? - &svm_migrate_ops : - pt_update_ops->needs_userptr_lock ? - &userptr_migrate_ops : + &svm_userptr_migrate_ops : &migrate_ops, .vops = vops, .tile_id = tile->id, @@ -2363,34 +2380,41 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) #endif if (pt_update_ops->needs_invalidation) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) { - err = -ENOMEM; + struct xe_dep_scheduler *dep_scheduler = + to_dep_scheduler(q, tile->primary_gt); + + ijob = xe_tlb_inval_job_create(q, &tile->primary_gt->tlb_inval, + dep_scheduler, vm, + pt_update_ops->start, + pt_update_ops->last, + XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + if (IS_ERR(ijob)) { + err = PTR_ERR(ijob); goto kill_vm_tile1; } + update.ijob = ijob; + if (tile->media_gt) { - mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!mfence) { - err = -ENOMEM; - goto free_ifence; - } - fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL); - if (!fences) { - err = -ENOMEM; - goto free_ifence; - } - cf = dma_fence_array_alloc(2); - if (!cf) { - err = -ENOMEM; - goto free_ifence; + dep_scheduler = to_dep_scheduler(q, tile->media_gt); + + mjob = xe_tlb_inval_job_create(q, + &tile->media_gt->tlb_inval, + dep_scheduler, vm, + pt_update_ops->start, + pt_update_ops->last, + XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT); + if (IS_ERR(mjob)) { + err = PTR_ERR(mjob); + goto free_ijob; } + update.mjob = mjob; } } rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); if (!rfence) { err = -ENOMEM; - goto free_ifence; + goto free_ijob; } fence = xe_migrate_update_pgtables(tile->migrate, &update); @@ -2414,30 +2438,12 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) pt_update_ops->last, fence)) dma_fence_wait(fence, false); - /* tlb invalidation must be done before signaling rebind */ - if (ifence) { - if (mfence) - dma_fence_get(fence); - invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - if (mfence) { - invalidation_fence_init(tile->media_gt, mfence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - fences[0] = &ifence->base.base; - fences[1] = &mfence->base.base; - dma_fence_array_init(cf, 2, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - fence = &cf->base; - } else { - fence = &ifence->base.base; - } - } + if (ijob) + ifence = xe_tlb_inval_job_push(ijob, tile->migrate, fence); + if (mjob) + mfence = xe_tlb_inval_job_push(mjob, tile->migrate, fence); - if (!mfence) { + if (!mjob && !ijob) { dma_resv_add_fence(xe_vm_resv(vm), fence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : @@ -2445,38 +2451,54 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) list_for_each_entry(op, &vops->list, link) op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); + } else if (ijob && !mjob) { + dma_resv_add_fence(xe_vm_resv(vm), ifence, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + + list_for_each_entry(op, &vops->list, link) + op_commit(vops->vm, tile, pt_update_ops, op, ifence, NULL); } else { - dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, + dma_resv_add_fence(xe_vm_resv(vm), ifence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); - dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, + dma_resv_add_fence(xe_vm_resv(vm), mfence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, - &ifence->base.base, &mfence->base.base); + op_commit(vops->vm, tile, pt_update_ops, op, ifence, + mfence); } if (pt_update_ops->needs_svm_lock) xe_svm_notifier_unlock(vm); - if (pt_update_ops->needs_userptr_lock) - up_read(&vm->userptr.notifier_lock); + + /* + * The last fence is only used for zero bind queue idling; migrate + * queues are not exposed to user space. + */ + if (!(q->flags & EXEC_QUEUE_FLAG_MIGRATE)) + xe_exec_queue_last_fence_set(q, vm, fence); + + xe_tlb_inval_job_put(mjob); + xe_tlb_inval_job_put(ijob); + dma_fence_put(ifence); + dma_fence_put(mfence); return fence; free_rfence: kfree(rfence); -free_ifence: - kfree(cf); - kfree(fences); - kfree(mfence); - kfree(ifence); +free_ijob: + xe_tlb_inval_job_put(mjob); + xe_tlb_inval_job_put(ijob); kill_vm_tile1: - if (err != -EAGAIN && tile->id) + if (err != -EAGAIN && err != -ENODATA && tile->id) xe_vm_kill(vops->vm, false); return ERR_PTR(err); diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h index 5ecf003d513c..4daeebaab5a1 100644 --- a/drivers/gpu/drm/xe/xe_pt.h +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -10,6 +10,7 @@ #include "xe_pt_types.h" struct dma_fence; +struct drm_exec; struct xe_bo; struct xe_device; struct xe_exec_queue; @@ -29,7 +30,7 @@ struct xe_vma_ops; unsigned int xe_pt_shift(unsigned int level); struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level); + unsigned int level, struct drm_exec *exec); void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, struct xe_pt *pt); diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h index 69eab6f37cfe..881f01e14db8 100644 --- a/drivers/gpu/drm/xe/xe_pt_types.h +++ b/drivers/gpu/drm/xe/xe_pt_types.h @@ -45,8 +45,7 @@ struct xe_pt_ops { u64 (*pte_encode_addr)(struct xe_device *xe, u64 addr, u16 pat_index, u32 pt_level, bool devmem, u64 flags); - u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset, - u16 pat_index); + u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset); }; struct xe_pt_entry { @@ -106,8 +105,6 @@ struct xe_vm_pgtable_update_ops { u32 current_op; /** @needs_svm_lock: Needs SVM lock */ bool needs_svm_lock; - /** @needs_userptr_lock: Needs userptr lock */ - bool needs_userptr_lock; /** @needs_invalidation: Needs invalidation */ bool needs_invalidation; /** diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c index 454ea7dc08ac..bdbdbbf6a678 100644 --- a/drivers/gpu/drm/xe/xe_pxp.c +++ b/drivers/gpu/drm/xe/xe_pxp.c @@ -504,65 +504,62 @@ int xe_pxp_exec_queue_set_type(struct xe_pxp *pxp, struct xe_exec_queue *q, u8 t return 0; } -static void __exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) +static int __exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) { - spin_lock_irq(&pxp->queues.lock); - list_add_tail(&q->pxp.link, &pxp->queues.list); - spin_unlock_irq(&pxp->queues.lock); + int ret = 0; + + /* + * A queue can be added to the list only if the PXP is in active status, + * otherwise the termination might not handle it correctly. + */ + mutex_lock(&pxp->mutex); + + if (pxp->status == XE_PXP_ACTIVE) { + spin_lock_irq(&pxp->queues.lock); + list_add_tail(&q->pxp.link, &pxp->queues.list); + spin_unlock_irq(&pxp->queues.lock); + } else if (pxp->status == XE_PXP_ERROR || pxp->status == XE_PXP_SUSPENDED) { + ret = -EIO; + } else { + ret = -EBUSY; /* try again later */ + } + + mutex_unlock(&pxp->mutex); + + return ret; } -/** - * xe_pxp_exec_queue_add - add a queue to the PXP list - * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) - * @q: the queue to add to the list - * - * If PXP is enabled and the prerequisites are done, start the PXP ARB - * session (if not already running) and add the queue to the PXP list. Note - * that the queue must have previously been marked as using PXP with - * xe_pxp_exec_queue_set_type. - * - * Returns 0 if the PXP ARB session is running and the queue is in the list, - * -ENODEV if PXP is disabled, -EBUSY if the PXP prerequisites are not done, - * other errno value if something goes wrong during the session start. - */ -int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) +static int pxp_start(struct xe_pxp *pxp, u8 type) { int ret = 0; + bool restart = false; if (!xe_pxp_is_enabled(pxp)) return -ENODEV; /* we only support HWDRM sessions right now */ - xe_assert(pxp->xe, q->pxp.type == DRM_XE_PXP_TYPE_HWDRM); + xe_assert(pxp->xe, type == DRM_XE_PXP_TYPE_HWDRM); - /* - * Runtime suspend kills PXP, so we take a reference to prevent it from - * happening while we have active queues that use PXP - */ - xe_pm_runtime_get(pxp->xe); + /* get_readiness_status() returns 0 for in-progress and 1 for done */ + ret = xe_pxp_get_readiness_status(pxp); + if (ret <= 0) + return ret ?: -EBUSY; - if (!pxp_prerequisites_done(pxp)) { - ret = -EBUSY; - goto out; - } + ret = 0; wait_for_idle: /* * if there is an action in progress, wait for it. We need to wait * outside the lock because the completion is done from within the lock. - * Note that the two action should never be pending at the same time. + * Note that the two actions should never be pending at the same time. */ if (!wait_for_completion_timeout(&pxp->termination, - msecs_to_jiffies(PXP_TERMINATION_TIMEOUT_MS))) { - ret = -ETIMEDOUT; - goto out; - } + msecs_to_jiffies(PXP_TERMINATION_TIMEOUT_MS))) + return -ETIMEDOUT; if (!wait_for_completion_timeout(&pxp->activation, - msecs_to_jiffies(PXP_ACTIVATION_TIMEOUT_MS))) { - ret = -ETIMEDOUT; - goto out; - } + msecs_to_jiffies(PXP_ACTIVATION_TIMEOUT_MS))) + return -ETIMEDOUT; mutex_lock(&pxp->mutex); @@ -570,11 +567,9 @@ wait_for_idle: switch (pxp->status) { case XE_PXP_ERROR: ret = -EIO; - break; + goto out_unlock; case XE_PXP_ACTIVE: - __exec_queue_add(pxp, q); - mutex_unlock(&pxp->mutex); - goto out; + goto out_unlock; case XE_PXP_READY_TO_START: pxp->status = XE_PXP_START_IN_PROGRESS; reinit_completion(&pxp->activation); @@ -582,8 +577,8 @@ wait_for_idle: case XE_PXP_START_IN_PROGRESS: /* If a start is in progress then the completion must not be done */ XE_WARN_ON(completion_done(&pxp->activation)); - mutex_unlock(&pxp->mutex); - goto wait_for_idle; + restart = true; + goto out_unlock; case XE_PXP_NEEDS_TERMINATION: mark_termination_in_progress(pxp); break; @@ -591,29 +586,25 @@ wait_for_idle: case XE_PXP_NEEDS_ADDITIONAL_TERMINATION: /* If a termination is in progress then the completion must not be done */ XE_WARN_ON(completion_done(&pxp->termination)); - mutex_unlock(&pxp->mutex); - goto wait_for_idle; + restart = true; + goto out_unlock; case XE_PXP_SUSPENDED: default: drm_err(&pxp->xe->drm, "unexpected state during PXP start: %u\n", pxp->status); ret = -EIO; - break; + goto out_unlock; } mutex_unlock(&pxp->mutex); - if (ret) - goto out; - if (!completion_done(&pxp->termination)) { ret = pxp_terminate_hw(pxp); if (ret) { drm_err(&pxp->xe->drm, "PXP termination failed before start\n"); mutex_lock(&pxp->mutex); pxp->status = XE_PXP_ERROR; - mutex_unlock(&pxp->mutex); - goto out; + goto out_unlock; } goto wait_for_idle; @@ -635,21 +626,59 @@ wait_for_idle: if (pxp->status != XE_PXP_START_IN_PROGRESS) { drm_err(&pxp->xe->drm, "unexpected state after PXP start: %u\n", pxp->status); pxp->status = XE_PXP_NEEDS_TERMINATION; - mutex_unlock(&pxp->mutex); - goto wait_for_idle; + restart = true; + goto out_unlock; } /* If everything went ok, update the status and add the queue to the list */ - if (!ret) { + if (!ret) pxp->status = XE_PXP_ACTIVE; - __exec_queue_add(pxp, q); - } else { + else pxp->status = XE_PXP_ERROR; - } +out_unlock: mutex_unlock(&pxp->mutex); -out: + if (restart) + goto wait_for_idle; + + return ret; +} + +/** + * xe_pxp_exec_queue_add - add a queue to the PXP list + * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) + * @q: the queue to add to the list + * + * If PXP is enabled and the prerequisites are done, start the PXP default + * session (if not already running) and add the queue to the PXP list. + * + * Returns 0 if the PXP session is running and the queue is in the list, + * -ENODEV if PXP is disabled, -EBUSY if the PXP prerequisites are not done, + * other errno value if something goes wrong during the session start. + */ +int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) +{ + int ret; + + if (!xe_pxp_is_enabled(pxp)) + return -ENODEV; + + /* + * Runtime suspend kills PXP, so we take a reference to prevent it from + * happening while we have active queues that use PXP + */ + xe_pm_runtime_get(pxp->xe); + +start: + ret = pxp_start(pxp, q->pxp.type); + + if (!ret) { + ret = __exec_queue_add(pxp, q); + if (ret == -EBUSY) + goto start; + } + /* * in the successful case the PM ref is released from * xe_pxp_exec_queue_remove @@ -659,6 +688,7 @@ out: return ret; } +ALLOW_ERROR_INJECTION(xe_pxp_exec_queue_add, ERRNO); static void __pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q, bool lock) { diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c b/drivers/gpu/drm/xe/xe_pxp_submit.c index d92ec0f515b0..e60526e30030 100644 --- a/drivers/gpu/drm/xe/xe_pxp_submit.c +++ b/drivers/gpu/drm/xe/xe_pxp_submit.c @@ -54,8 +54,9 @@ static int allocate_vcs_execution_resources(struct xe_pxp *pxp) * Each termination is 16 DWORDS, so 4K is enough to contain a * termination for each sessions. */ - bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT); + bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT, + false); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out_queue; @@ -87,7 +88,9 @@ static int allocate_gsc_client_resources(struct xe_gt *gt, { struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = tile_to_xe(tile); + struct xe_validation_ctx ctx; struct xe_hw_engine *hwe; + struct drm_exec exec; struct xe_vm *vm; struct xe_bo *bo; struct xe_exec_queue *q; @@ -101,20 +104,31 @@ static int allocate_gsc_client_resources(struct xe_gt *gt, xe_assert(xe, hwe); /* PXP instructions must be issued from PPGTT */ - vm = xe_vm_create(xe, XE_VM_FLAG_GSC); + vm = xe_vm_create(xe, XE_VM_FLAG_GSC, NULL); if (IS_ERR(vm)) return PTR_ERR(vm); /* We allocate a single object for the batch and the in/out memory */ - xe_vm_lock(vm, false); - bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_NEEDS_UC); - xe_vm_unlock(vm); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - goto vm_out; + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags){}, err) { + err = xe_vm_drm_exec_lock(vm, &exec); + drm_exec_retry_on_contention(&exec); + if (err) + break; + + bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2, + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | + XE_BO_FLAG_NEEDS_UC, &exec); + drm_exec_retry_on_contention(&exec); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &err); + break; + } } + if (err) + goto vm_out; fence = xe_vm_bind_kernel_bo(vm, bo, NULL, 0, XE_CACHE_WB); if (IS_ERR(fence)) { diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 2dbf4066d86f..1c0915e2cc16 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -21,12 +21,14 @@ #include "xe_force_wake.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_gt_topology.h" #include "xe_guc_hwconfig.h" #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" #include "xe_pxp.h" #include "xe_ttm_vram_mgr.h" +#include "xe_vram_types.h" #include "xe_wa.h" static const u16 xe_to_user_engine_class[] = { @@ -141,7 +143,7 @@ query_engine_cycles(struct xe_device *xe, return -EINVAL; eci = &resp.eci; - if (eci->gt_id >= XE_MAX_GT_PER_TILE) + if (eci->gt_id >= xe->info.max_gt_per_tile) return -EINVAL; gt = xe_device_get_gt(xe, eci->gt_id); @@ -274,8 +276,7 @@ static int query_mem_regions(struct xe_device *xe, mem_regions->mem_regions[0].instance = 0; mem_regions->mem_regions[0].min_page_size = PAGE_SIZE; mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT; - if (perfmon_capable()) - mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man); + mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man); mem_regions->num_mem_regions = 1; for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { @@ -291,13 +292,11 @@ static int query_mem_regions(struct xe_device *xe, mem_regions->mem_regions[mem_regions->num_mem_regions].total_size = man->size; - if (perfmon_capable()) { - xe_ttm_vram_get_used(man, - &mem_regions->mem_regions - [mem_regions->num_mem_regions].used, - &mem_regions->mem_regions - [mem_regions->num_mem_regions].cpu_visible_used); - } + xe_ttm_vram_get_used(man, + &mem_regions->mem_regions + [mem_regions->num_mem_regions].used, + &mem_regions->mem_regions + [mem_regions->num_mem_regions].cpu_visible_used); mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size = xe_ttm_vram_get_cpu_visible_size(man); @@ -337,7 +336,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) config->num_params = num_params; config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] = xe->info.devid | (xe->info.revid << 16); - if (xe_device_get_root_tile(xe)->mem.vram.usable_size) + if (xe->mem.vram) config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM; if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_XE_GPUSVM)) @@ -368,6 +367,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query struct drm_xe_query_gt_list __user *query_ptr = u64_to_user_ptr(query->data); struct drm_xe_query_gt_list *gt_list; + int iter = 0; u8 id; if (query->size == 0) { @@ -385,12 +385,12 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) - gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA; + gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MEDIA; else - gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN; - gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id; - gt_list->gt_list[id].gt_id = gt->info.id; - gt_list->gt_list[id].reference_clock = gt->info.reference_clock; + gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MAIN; + gt_list->gt_list[iter].tile_id = gt_to_tile(gt)->id; + gt_list->gt_list[iter].gt_id = gt->info.id; + gt_list->gt_list[iter].reference_clock = gt->info.reference_clock; /* * The mem_regions indexes in the mask below need to * directly identify the struct @@ -406,19 +406,21 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query * assumption. */ if (!IS_DGFX(xe)) - gt_list->gt_list[id].near_mem_regions = 0x1; + gt_list->gt_list[iter].near_mem_regions = 0x1; else - gt_list->gt_list[id].near_mem_regions = - BIT(gt_to_tile(gt)->id) << 1; - gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^ - gt_list->gt_list[id].near_mem_regions; + gt_list->gt_list[iter].near_mem_regions = + BIT(gt_to_tile(gt)->mem.vram->id) << 1; + gt_list->gt_list[iter].far_mem_regions = xe->info.mem_region_mask ^ + gt_list->gt_list[iter].near_mem_regions; - gt_list->gt_list[id].ip_ver_major = + gt_list->gt_list[iter].ip_ver_major = REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid); - gt_list->gt_list[id].ip_ver_minor = + gt_list->gt_list[iter].ip_ver_minor = REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid); - gt_list->gt_list[id].ip_ver_rev = + gt_list->gt_list[iter].ip_ver_rev = REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid); + + iter++; } if (copy_to_user(query_ptr, gt_list, size)) { @@ -434,7 +436,7 @@ static int query_hwconfig(struct xe_device *xe, struct drm_xe_device_query *query) { struct xe_gt *gt = xe_root_mmio_gt(xe); - size_t size = xe_guc_hwconfig_size(>->uc.guc); + size_t size = gt ? xe_guc_hwconfig_size(>->uc.guc) : 0; void __user *query_ptr = u64_to_user_ptr(query->data); void *hwconfig; @@ -473,7 +475,7 @@ static size_t calc_topo_query_size(struct xe_device *xe) sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss); /* L3bank mask may not be available for some GTs */ - if (!XE_WA(gt, no_media_l3)) + if (xe_gt_topology_report_l3(gt)) query_size += sizeof(struct drm_xe_query_topology_mask) + sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask); } @@ -536,7 +538,7 @@ static int query_gt_topology(struct xe_device *xe, * mask, then it's better to omit L3 from the query rather than * reporting bogus or zeroed information to userspace. */ - if (!XE_WA(gt, no_media_l3)) { + if (xe_gt_topology_report_l3(gt)) { topo.type = DRM_XE_TOPO_L3_BANK; err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask, sizeof(gt->fuse_topo.l3_bank_mask)); @@ -683,8 +685,8 @@ static int query_oa_units(struct xe_device *xe, du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS | DRM_XE_OA_CAPS_OA_BUFFER_SIZE | - DRM_XE_OA_CAPS_WAIT_NUM_REPORTS; - + DRM_XE_OA_CAPS_WAIT_NUM_REPORTS | + DRM_XE_OA_CAPS_OAM; j = 0; for_each_hw_engine(hwe, gt, hwe_id) { if (!xe_hw_engine_is_reserved(hwe) && @@ -745,10 +747,8 @@ static int query_eu_stall(struct xe_device *xe, u32 num_rates; int ret; - if (!xe_eu_stall_supported_on_platform(xe)) { - drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n"); + if (!xe_eu_stall_supported_on_platform(xe)) return -ENODEV; - } array_size = xe_eu_stall_get_sampling_rates(&num_rates, &rates); size = sizeof(struct drm_xe_query_eu_stall) + array_size; diff --git a/drivers/gpu/drm/xe/xe_range_fence.h b/drivers/gpu/drm/xe/xe_range_fence.h index edd58b34f5c0..4934729dd904 100644 --- a/drivers/gpu/drm/xe/xe_range_fence.h +++ b/drivers/gpu/drm/xe/xe_range_fence.h @@ -13,13 +13,13 @@ struct xe_range_fence_tree; struct xe_range_fence; -/** struct xe_range_fence_ops - XE range fence ops */ +/** struct xe_range_fence_ops - Xe range fence ops */ struct xe_range_fence_ops { /** @free: free range fence op */ void (*free)(struct xe_range_fence *rfence); }; -/** struct xe_range_fence - XE range fence (address conflict tracking) */ +/** struct xe_range_fence - Xe range fence (address conflict tracking) */ struct xe_range_fence { /** @rb: RB tree node inserted into interval tree */ struct rb_node rb; diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 23f6c81d9994..7ca360b2c20d 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -19,7 +19,8 @@ #undef XE_REG_MCR #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) -static bool match_not_render(const struct xe_gt *gt, +static bool match_not_render(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe) { return hwe->class != XE_ENGINE_CLASS_RENDER; @@ -88,6 +89,13 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { RING_FORCE_TO_NONPRIV_ACCESS_RD | RING_FORCE_TO_NONPRIV_RANGE_4)) }, + { XE_RTP_NAME("14024997852"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(WHITELIST(FF_MODE, + RING_FORCE_TO_NONPRIV_ACCESS_RW), + WHITELIST(VFLSKPD, + RING_FORCE_TO_NONPRIV_ACCESS_RW)) + }, }; static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe) diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index d1a403cfb628..4e00008b7081 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -55,8 +55,8 @@ struct xe_res_cursor { u32 mem_type; /** @sgl: Scatterlist for cursor */ struct scatterlist *sgl; - /** @dma_addr: Current element in a struct drm_pagemap_device_addr array */ - const struct drm_pagemap_device_addr *dma_addr; + /** @dma_addr: Current element in a struct drm_pagemap_addr array */ + const struct drm_pagemap_addr *dma_addr; /** @mm: Buddy allocator for VRAM cursor */ struct drm_buddy *mm; /** @@ -170,7 +170,7 @@ static inline void __xe_res_sg_next(struct xe_res_cursor *cur) */ static inline void __xe_res_dma_next(struct xe_res_cursor *cur) { - const struct drm_pagemap_device_addr *addr = cur->dma_addr; + const struct drm_pagemap_addr *addr = cur->dma_addr; u64 start = cur->start; while (start >= cur->dma_seg_size) { @@ -222,14 +222,14 @@ static inline void xe_res_first_sg(const struct sg_table *sg, /** * xe_res_first_dma - initialize a xe_res_cursor with dma_addr array * - * @dma_addr: struct drm_pagemap_device_addr array to walk + * @dma_addr: struct drm_pagemap_addr array to walk * @start: Start of the range * @size: Size of the range * @cur: cursor object to initialize * * Start walking over the range of allocations between @start and @size. */ -static inline void xe_res_first_dma(const struct drm_pagemap_device_addr *dma_addr, +static inline void xe_res_first_dma(const struct drm_pagemap_addr *dma_addr, u64 start, u64 size, struct xe_res_cursor *cur) { diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index bc1689db4cd7..ac0c6dcffe15 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -110,13 +110,14 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) return i; } -static int emit_flush_invalidate(u32 *dw, int i) +static int emit_flush_invalidate(u32 addr, u32 val, u32 flush_flags, u32 *dw, int i) { - dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | - MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX; - dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR; - dw[i++] = 0; + dw[i++] = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | + MI_FLUSH_IMM_DW | (flush_flags & MI_INVALIDATE_TLB) ?: 0; + + dw[i++] = addr | MI_FLUSH_DW_USE_GTT; dw[i++] = 0; + dw[i++] = val; return i; } @@ -178,7 +179,7 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i) bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); u32 flags; - if (XE_WA(gt, 14016712196)) + if (XE_GT_WA(gt, 14016712196)) i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0); @@ -189,7 +190,7 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE); - if (XE_WA(gt, 1409600907)) + if (XE_GT_WA(gt, 1409600907)) flags |= PIPE_CONTROL_DEPTH_STALL; if (lacks_render) @@ -205,7 +206,7 @@ static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int if (hwe->class != XE_ENGINE_CLASS_RENDER) return i; - if (XE_WA(hwe->gt, 16020292621)) + if (XE_GT_WA(hwe->gt, 16020292621)) i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_LRI_POST_SYNC, RING_NOPID(hwe->mmio_base).addr, 0); @@ -244,12 +245,14 @@ static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) /* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc, - u64 batch_addr, u32 seqno) + u64 batch_addr, u32 *head, u32 seqno) { u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); struct xe_gt *gt = job->q->gt; + *head = lrc->ring.tail; + i = emit_copy_timestamp(lrc, dw, i); if (job->ring_ops_flush_tlb) { @@ -295,7 +298,7 @@ static bool has_aux_ccs(struct xe_device *xe) } static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, - u64 batch_addr, u32 seqno) + u64 batch_addr, u32 *head, u32 seqno) { u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); @@ -303,6 +306,8 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, struct xe_device *xe = gt_to_xe(gt); bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE; + *head = lrc->ring.tail; + i = emit_copy_timestamp(lrc, dw, i); dw[i++] = preparser_disable(true); @@ -345,7 +350,8 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, static void __emit_job_gen12_render_compute(struct xe_sched_job *job, struct xe_lrc *lrc, - u64 batch_addr, u32 seqno) + u64 batch_addr, u32 *head, + u32 seqno) { u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); @@ -354,6 +360,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); u32 mask_flags = 0; + *head = lrc->ring.tail; + i = emit_copy_timestamp(lrc, dw, i); dw[i++] = preparser_disable(true); @@ -395,33 +403,31 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, } static void emit_migration_job_gen12(struct xe_sched_job *job, - struct xe_lrc *lrc, u32 seqno) + struct xe_lrc *lrc, u32 *head, + u32 seqno) { + u32 saddr = xe_lrc_start_seqno_ggtt_addr(lrc); u32 dw[MAX_JOB_SIZE_DW], i = 0; + *head = lrc->ring.tail; + i = emit_copy_timestamp(lrc, dw, i); - i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), - seqno, dw, i); + i = emit_store_imm_ggtt(saddr, seqno, dw, i); dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */ i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i); - if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) { - /* XXX: Do we need this? Leaving for now. */ - dw[i++] = preparser_disable(true); - i = emit_flush_invalidate(dw, i); - dw[i++] = preparser_disable(false); - } + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(saddr, seqno, job->migrate_flush_flags, dw, i); + dw[i++] = preparser_disable(false); i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i); - dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags | - MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW; - dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT; - dw[i++] = 0; - dw[i++] = seqno; /* value */ + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, + job->migrate_flush_flags, + dw, i); i = emit_user_interrupt(dw, i); @@ -438,6 +444,7 @@ static void emit_job_gen12_gsc(struct xe_sched_job *job) __emit_job_gen12_simple(job, job->q->lrc[0], job->ptrs[0].batch_addr, + &job->ptrs[0].head, xe_sched_job_lrc_seqno(job)); } @@ -447,6 +454,7 @@ static void emit_job_gen12_copy(struct xe_sched_job *job) if (xe_sched_job_is_migration(job->q)) { emit_migration_job_gen12(job, job->q->lrc[0], + &job->ptrs[0].head, xe_sched_job_lrc_seqno(job)); return; } @@ -454,6 +462,7 @@ static void emit_job_gen12_copy(struct xe_sched_job *job) for (i = 0; i < job->q->width; ++i) __emit_job_gen12_simple(job, job->q->lrc[i], job->ptrs[i].batch_addr, + &job->ptrs[i].head, xe_sched_job_lrc_seqno(job)); } @@ -465,6 +474,7 @@ static void emit_job_gen12_video(struct xe_sched_job *job) for (i = 0; i < job->q->width; ++i) __emit_job_gen12_video(job, job->q->lrc[i], job->ptrs[i].batch_addr, + &job->ptrs[i].head, xe_sched_job_lrc_seqno(job)); } @@ -475,6 +485,7 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job) for (i = 0; i < job->q->width; ++i) __emit_job_gen12_render_compute(job, job->q->lrc[i], job->ptrs[i].batch_addr, + &job->ptrs[i].head, xe_sched_job_lrc_seqno(job)); } diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 29e694bb1219..ed509b1c8cfc 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -9,6 +9,7 @@ #include <uapi/drm/xe_drm.h> +#include "xe_configfs.h" #include "xe_gt.h" #include "xe_gt_topology.h" #include "xe_macros.h" @@ -56,37 +57,61 @@ static bool rule_matches(const struct xe_device *xe, xe->info.subplatform == r->subplatform; break; case XE_RTP_MATCH_GRAPHICS_VERSION: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 == r->ver_start && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 >= r->ver_start && xe->info.graphics_verx100 <= r->ver_end && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 == r->ver_start; break; case XE_RTP_MATCH_GRAPHICS_STEP: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.step.graphics >= r->step_start && xe->info.step.graphics < r->step_end && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 == r->ver_start && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION_RANGE: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 >= r->ver_start && xe->info.media_verx100 <= r->ver_end && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_STEP: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.step.media >= r->step_start && xe->info.step.media < r->step_end && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION_ANY_GT: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 == r->ver_start; break; case XE_RTP_MATCH_INTEGRATED: @@ -108,7 +133,7 @@ static bool rule_matches(const struct xe_device *xe, match = hwe->class != r->engine_class; break; case XE_RTP_MATCH_FUNC: - match = r->match_func(gt, hwe); + match = r->match_func(xe, gt, hwe); break; default: drm_warn(&xe->drm, "Invalid RTP match %u\n", @@ -186,6 +211,11 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx, struct xe_device **xe) { switch (ctx->type) { + case XE_RTP_PROCESS_TYPE_DEVICE: + *hwe = NULL; + *gt = NULL; + *xe = ctx->xe; + break; case XE_RTP_PROCESS_TYPE_GT: *hwe = NULL; *gt = ctx->gt; @@ -310,13 +340,15 @@ void xe_rtp_process(struct xe_rtp_process_ctx *ctx, } EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process); -bool xe_rtp_match_even_instance(const struct xe_gt *gt, +bool xe_rtp_match_even_instance(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe) { return hwe->instance % 2 == 0; } -bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, +bool xe_rtp_match_first_render_or_compute(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe) { u64 render_compute_mask = gt->info.engine_mask & @@ -326,23 +358,30 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, hwe->engine_id == __ffs(render_compute_mask); } -bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, - const struct xe_hw_engine *hwe) +bool xe_rtp_match_not_sriov_vf(const struct xe_device *xe, + const struct xe_gt *gt, + const struct xe_hw_engine *hwe) { - unsigned int dss_per_gslice = 4; - unsigned int dss; - - if (drm_WARN(>_to_xe(gt)->drm, xe_dss_mask_empty(gt->fuse_topo.g_dss_mask), - "Checking gslice for platform without geometry pipeline\n")) - return false; + return !IS_SRIOV_VF(xe); +} - dss = xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0); +bool xe_rtp_match_psmi_enabled(const struct xe_device *xe, + const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return xe_configfs_get_psmi_enabled(to_pci_dev(xe->drm.dev)); +} - return dss >= dss_per_gslice; +bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_device *xe, + const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return xe_gt_has_discontiguous_dss_groups(gt); } -bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, +bool xe_rtp_match_has_flat_ccs(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe) { - return !IS_SRIOV_VF(gt_to_xe(gt)); + return xe->info.has_flat_ccs; } diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 4fe736a11c42..ba5f940c0a96 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -422,7 +422,8 @@ struct xe_reg_sr; #define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__), \ struct xe_hw_engine * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE }, \ - struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }) + struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }, \ + struct xe_device * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_DEVICE }) void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, unsigned long *active_entries, @@ -439,18 +440,21 @@ void xe_rtp_process(struct xe_rtp_process_ctx *ctx, /** * xe_rtp_match_even_instance - Match if engine instance is even + * @xe: Device structure * @gt: GT structure * @hwe: Engine instance * * Returns: true if engine instance is even, false otherwise */ -bool xe_rtp_match_even_instance(const struct xe_gt *gt, +bool xe_rtp_match_even_instance(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe); /* * xe_rtp_match_first_render_or_compute - Match if it's first render or compute * engine in the GT * + * @xe: Device structure * @gt: GT structure * @hwe: Engine instance * @@ -462,29 +466,41 @@ bool xe_rtp_match_even_instance(const struct xe_gt *gt, * Returns: true if engine id is the first to match the render reset domain, * false otherwise. */ -bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, +bool xe_rtp_match_first_render_or_compute(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe); /* - * xe_rtp_match_first_gslice_fused_off - Match when first gslice is fused off + * xe_rtp_match_not_sriov_vf - Match when not on SR-IOV VF device * + * @xe: Device structure * @gt: GT structure * @hwe: Engine instance * - * Returns: true if first gslice is fused off, false otherwise. + * Returns: true if device is not VF, false otherwise. */ -bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, - const struct xe_hw_engine *hwe); +bool xe_rtp_match_not_sriov_vf(const struct xe_device *xe, + const struct xe_gt *gt, + const struct xe_hw_engine *hwe); -/* - * xe_rtp_match_not_sriov_vf - Match when not on SR-IOV VF device - * +bool xe_rtp_match_psmi_enabled(const struct xe_device *xe, + const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + +bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_device *xe, + const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + +/** + * xe_rtp_match_has_flat_ccs - Match when platform has FlatCCS compression + * @xe: Device structure * @gt: GT structure * @hwe: Engine instance * - * Returns: true if device is not VF, false otherwise. + * Returns: true if platform has FlatCCS compression, false otherwise */ -bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, +bool xe_rtp_match_has_flat_ccs(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe); #endif diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 1b76b947c706..6ba7f226c227 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -10,6 +10,7 @@ #include "regs/xe_reg_defs.h" +struct xe_device; struct xe_hw_engine; struct xe_gt; @@ -86,7 +87,8 @@ struct xe_rtp_rule { u8 engine_class; }; /* MATCH_FUNC */ - bool (*match_func)(const struct xe_gt *gt, + bool (*match_func)(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe); }; }; @@ -110,12 +112,14 @@ struct xe_rtp_entry { }; enum xe_rtp_process_type { + XE_RTP_PROCESS_TYPE_DEVICE, XE_RTP_PROCESS_TYPE_GT, XE_RTP_PROCESS_TYPE_ENGINE, }; struct xe_rtp_process_ctx { union { + struct xe_device *xe; struct xe_gt *gt; struct xe_hw_engine *hwe; }; diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index 1d43e183ca21..63a5263dcf1b 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -69,7 +69,6 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3 } sa_manager->bo = bo; sa_manager->is_iomem = bo->vmap.is_iomem; - sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); if (bo->vmap.is_iomem) { sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL); @@ -111,6 +110,10 @@ struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, return drm_suballoc_new(&sa_manager->base, size, gfp, true, 0); } +/** + * xe_sa_bo_flush_write() - Copy the data from the sub-allocation to the GPU memory. + * @sa_bo: the &drm_suballoc to flush + */ void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo) { struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager); @@ -124,6 +127,23 @@ void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo) drm_suballoc_size(sa_bo)); } +/** + * xe_sa_bo_sync_read() - Copy the data from GPU memory to the sub-allocation. + * @sa_bo: the &drm_suballoc to sync + */ +void xe_sa_bo_sync_read(struct drm_suballoc *sa_bo) +{ + struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager); + struct xe_device *xe = tile_to_xe(sa_manager->bo->tile); + + if (!sa_manager->bo->vmap.is_iomem) + return; + + xe_map_memcpy_from(xe, xe_sa_bo_cpu_addr(sa_bo), &sa_manager->bo->vmap, + drm_suballoc_soffset(sa_bo), + drm_suballoc_size(sa_bo)); +} + void xe_sa_bo_free(struct drm_suballoc *sa_bo, struct dma_fence *fence) { diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h index 1170ee5a81a8..1be744350836 100644 --- a/drivers/gpu/drm/xe/xe_sa.h +++ b/drivers/gpu/drm/xe/xe_sa.h @@ -7,6 +7,8 @@ #include <linux/sizes.h> #include <linux/types.h> + +#include "xe_bo.h" #include "xe_sa_types.h" struct dma_fence; @@ -35,6 +37,7 @@ static inline struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager } void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo); +void xe_sa_bo_sync_read(struct drm_suballoc *sa_bo); void xe_sa_bo_free(struct drm_suballoc *sa_bo, struct dma_fence *fence); static inline struct xe_sa_manager * @@ -43,9 +46,20 @@ to_xe_sa_manager(struct drm_suballoc_manager *mng) return container_of(mng, struct xe_sa_manager, base); } +/** + * xe_sa_manager_gpu_addr - Retrieve GPU address of a back storage BO + * within suballocator. + * @sa_manager: the &xe_sa_manager struct instance + * Return: GGTT address of the back storage BO. + */ +static inline u64 xe_sa_manager_gpu_addr(struct xe_sa_manager *sa_manager) +{ + return xe_bo_ggtt_addr(sa_manager->bo); +} + static inline u64 xe_sa_bo_gpu_addr(struct drm_suballoc *sa) { - return to_xe_sa_manager(sa->manager)->gpu_addr + + return xe_sa_manager_gpu_addr(to_xe_sa_manager(sa->manager)) + drm_suballoc_soffset(sa); } diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h index 2b070ff1292e..cb7238799dcb 100644 --- a/drivers/gpu/drm/xe/xe_sa_types.h +++ b/drivers/gpu/drm/xe/xe_sa_types.h @@ -12,7 +12,6 @@ struct xe_bo; struct xe_sa_manager { struct drm_suballoc_manager base; struct xe_bo *bo; - u64 gpu_addr; void *cpu_ptr; bool is_iomem; }; diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 1905ca590965..cb674a322113 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -113,7 +113,8 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, kref_init(&job->refcount); xe_exec_queue_get(job->q); - err = drm_sched_job_init(&job->drm, q->entity, 1, NULL); + err = drm_sched_job_init(&job->drm, q->entity, 1, NULL, + q->xef ? q->xef->drm->client_id : 0); if (err) goto err_free; @@ -145,6 +146,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, for (i = 0; i < width; ++i) job->ptrs[i].batch_addr = batch_addr[i]; + atomic_inc(&q->job_cnt); xe_pm_runtime_get_noresume(job_to_xe(job)); trace_xe_sched_job_create(job); return job; @@ -159,11 +161,11 @@ err_free: } /** - * xe_sched_job_destroy - Destroy XE schedule job - * @ref: reference to XE schedule job + * xe_sched_job_destroy - Destroy Xe schedule job + * @ref: reference to Xe schedule job * * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup - * base DRM schedule job, and free memory for XE schedule job. + * base DRM schedule job, and free memory for Xe schedule job. */ void xe_sched_job_destroy(struct kref *ref) { @@ -176,6 +178,7 @@ void xe_sched_job_destroy(struct kref *ref) dma_fence_put(job->fence); drm_sched_job_cleanup(&job->drm); job_free(job); + atomic_dec(&q->job_cnt); xe_exec_queue_put(q); xe_pm_runtime_put(xe); } @@ -216,15 +219,17 @@ void xe_sched_job_set_error(struct xe_sched_job *job, int error) bool xe_sched_job_started(struct xe_sched_job *job) { + struct dma_fence *fence = dma_fence_chain_contained(job->fence); struct xe_lrc *lrc = job->q->lrc[0]; - return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job), - xe_lrc_start_seqno(lrc), - dma_fence_chain_contained(job->fence)->ops); + return !__dma_fence_is_later(fence, + xe_sched_job_lrc_seqno(job), + xe_lrc_start_seqno(lrc)); } bool xe_sched_job_completed(struct xe_sched_job *job) { + struct dma_fence *fence = dma_fence_chain_contained(job->fence); struct xe_lrc *lrc = job->q->lrc[0]; /* @@ -232,9 +237,9 @@ bool xe_sched_job_completed(struct xe_sched_job *job) * parallel handshake is done. */ - return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job), - xe_lrc_seqno(lrc), - dma_fence_chain_contained(job->fence)->ops); + return !__dma_fence_is_later(fence, + xe_sched_job_lrc_seqno(job), + xe_lrc_seqno(lrc)); } void xe_sched_job_arm(struct xe_sched_job *job) @@ -293,23 +298,6 @@ void xe_sched_job_push(struct xe_sched_job *job) } /** - * xe_sched_job_last_fence_add_dep - Add last fence dependency to job - * @job:job to add the last fence dependency to - * @vm: virtual memory job belongs to - * - * Returns: - * 0 on success, or an error on failing to expand the array. - */ -int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm) -{ - struct dma_fence *fence; - - fence = xe_exec_queue_last_fence_get(job->q, vm); - - return drm_sched_job_add_dependency(&job->drm, fence); -} - -/** * xe_sched_job_init_user_fence - Initialize user_fence for the job * @job: job whose user_fence needs an init * @sync: sync to be use to init user_fence diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h index 3dc72c5c1f13..1c1cb44216c3 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.h +++ b/drivers/gpu/drm/xe/xe_sched_job.h @@ -23,10 +23,10 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, void xe_sched_job_destroy(struct kref *ref); /** - * xe_sched_job_get - get reference to XE schedule job - * @job: XE schedule job object + * xe_sched_job_get - get reference to Xe schedule job + * @job: Xe schedule job object * - * Increment XE schedule job's reference count + * Increment Xe schedule job's reference count */ static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job) { @@ -35,10 +35,10 @@ static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job) } /** - * xe_sched_job_put - put reference to XE schedule job - * @job: XE schedule job object + * xe_sched_job_put - put reference to Xe schedule job + * @job: Xe schedule job object * - * Decrement XE schedule job's reference count, call xe_sched_job_destroy when + * Decrement Xe schedule job's reference count, call xe_sched_job_destroy when * reference count == 0. */ static inline void xe_sched_job_put(struct xe_sched_job *job) @@ -58,7 +58,6 @@ bool xe_sched_job_completed(struct xe_sched_job *job); void xe_sched_job_arm(struct xe_sched_job *job); void xe_sched_job_push(struct xe_sched_job *job); -int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm); void xe_sched_job_init_user_fence(struct xe_sched_job *job, struct xe_sync_entry *sync); diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index dbf260dded8d..7c4c54fe920a 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -24,10 +24,15 @@ struct xe_job_ptrs { struct dma_fence_chain *chain_fence; /** @batch_addr: Batch buffer address. */ u64 batch_addr; + /** + * @head: The tail pointer of the LRC (so head pointer of job) when the + * job was submitted + */ + u32 head; }; /** - * struct xe_sched_job - XE schedule job (batch buffer tracking) + * struct xe_sched_job - Xe schedule job (batch buffer tracking) */ struct xe_sched_job { /** @drm: base DRM scheduler job */ @@ -58,6 +63,10 @@ struct xe_sched_job { bool ring_ops_flush_tlb; /** @ggtt: mapped in ggtt. */ bool ggtt; + /** @restore_replay: job being replayed for restore */ + bool restore_replay; + /** @last_replay: last job being replayed */ + bool last_replay; /** @ptrs: per instance pointers. */ struct xe_job_ptrs ptrs[]; }; diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c index 86d47aaf0358..90244fe59b59 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.c +++ b/drivers/gpu/drm/xe/xe_shrinker.c @@ -5,6 +5,7 @@ #include <linux/shrinker.h> +#include <drm/drm_managed.h> #include <drm/ttm/ttm_backup.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_tt.h> @@ -53,10 +54,10 @@ xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgea write_unlock(&shrinker->lock); } -static s64 xe_shrinker_walk(struct xe_device *xe, - struct ttm_operation_ctx *ctx, - const struct xe_bo_shrink_flags flags, - unsigned long to_scan, unsigned long *scanned) +static s64 __xe_shrinker_walk(struct xe_device *xe, + struct ttm_operation_ctx *ctx, + const struct xe_bo_shrink_flags flags, + unsigned long to_scan, unsigned long *scanned) { unsigned int mem_type; s64 freed = 0, lret; @@ -65,11 +66,15 @@ static s64 xe_shrinker_walk(struct xe_device *xe, struct ttm_resource_manager *man = ttm_manager_type(&xe->ttm, mem_type); struct ttm_bo_lru_cursor curs; struct ttm_buffer_object *ttm_bo; + struct ttm_lru_walk_arg arg = { + .ctx = ctx, + .trylock_only = true, + }; if (!man || !man->use_tt) continue; - ttm_bo_lru_for_each_reserved_guarded(&curs, man, ctx, ttm_bo) { + ttm_bo_lru_for_each_reserved_guarded(&curs, man, &arg, ttm_bo) { if (!ttm_bo_shrink_suitable(ttm_bo, ctx)) continue; @@ -81,6 +86,50 @@ static s64 xe_shrinker_walk(struct xe_device *xe, if (*scanned >= to_scan) break; } + /* Trylocks should never error, just fail. */ + xe_assert(xe, !IS_ERR(ttm_bo)); + } + + return freed; +} + +/* + * Try shrinking idle objects without writeback first, then if not sufficient, + * try also non-idle objects and finally if that's not sufficient either, + * add writeback. This avoids stalls and explicit writebacks with light or + * moderate memory pressure. + */ +static s64 xe_shrinker_walk(struct xe_device *xe, + struct ttm_operation_ctx *ctx, + const struct xe_bo_shrink_flags flags, + unsigned long to_scan, unsigned long *scanned) +{ + bool no_wait_gpu = true; + struct xe_bo_shrink_flags save_flags = flags; + s64 lret, freed; + + swap(no_wait_gpu, ctx->no_wait_gpu); + save_flags.writeback = false; + lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned); + swap(no_wait_gpu, ctx->no_wait_gpu); + if (lret < 0 || *scanned >= to_scan) + return lret; + + freed = lret; + if (!ctx->no_wait_gpu) { + lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned); + if (lret < 0) + return lret; + freed += lret; + if (*scanned >= to_scan) + return freed; + } + + if (flags.writeback) { + lret = __xe_shrinker_walk(xe, ctx, flags, to_scan, scanned); + if (lret < 0) + return lret; + freed += lret; } return freed; @@ -192,6 +241,7 @@ static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_con runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0, can_backup); shrink_flags.purge = false; + lret = xe_shrinker_walk(shrinker->xe, &ctx, shrink_flags, nr_to_scan, &nr_scanned); if (lret >= 0) @@ -213,24 +263,34 @@ static void xe_shrinker_pm(struct work_struct *work) xe_pm_runtime_put(shrinker->xe); } +static void xe_shrinker_fini(struct drm_device *drm, void *arg) +{ + struct xe_shrinker *shrinker = arg; + + xe_assert(shrinker->xe, !shrinker->shrinkable_pages); + xe_assert(shrinker->xe, !shrinker->purgeable_pages); + shrinker_free(shrinker->shrink); + flush_work(&shrinker->pm_worker); + kfree(shrinker); +} + /** * xe_shrinker_create() - Create an xe per-device shrinker * @xe: Pointer to the xe device. * - * Returns: A pointer to the created shrinker on success, - * Negative error code on failure. + * Return: %0 on success. Negative error code on failure. */ -struct xe_shrinker *xe_shrinker_create(struct xe_device *xe) +int xe_shrinker_create(struct xe_device *xe) { struct xe_shrinker *shrinker = kzalloc(sizeof(*shrinker), GFP_KERNEL); if (!shrinker) - return ERR_PTR(-ENOMEM); + return -ENOMEM; shrinker->shrink = shrinker_alloc(0, "drm-xe_gem:%s", xe->drm.unique); if (!shrinker->shrink) { kfree(shrinker); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } INIT_WORK(&shrinker->pm_worker, xe_shrinker_pm); @@ -240,19 +300,7 @@ struct xe_shrinker *xe_shrinker_create(struct xe_device *xe) shrinker->shrink->scan_objects = xe_shrinker_scan; shrinker->shrink->private_data = shrinker; shrinker_register(shrinker->shrink); + xe->mem.shrinker = shrinker; - return shrinker; -} - -/** - * xe_shrinker_destroy() - Destroy an xe per-device shrinker - * @shrinker: Pointer to the shrinker to destroy. - */ -void xe_shrinker_destroy(struct xe_shrinker *shrinker) -{ - xe_assert(shrinker->xe, !shrinker->shrinkable_pages); - xe_assert(shrinker->xe, !shrinker->purgeable_pages); - shrinker_free(shrinker->shrink); - flush_work(&shrinker->pm_worker); - kfree(shrinker); + return drmm_add_action_or_reset(&xe->drm, xe_shrinker_fini, shrinker); } diff --git a/drivers/gpu/drm/xe/xe_shrinker.h b/drivers/gpu/drm/xe/xe_shrinker.h index 28a038f4fcbf..5132ae5192e1 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.h +++ b/drivers/gpu/drm/xe/xe_shrinker.h @@ -11,8 +11,6 @@ struct xe_device; void xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgeable); -struct xe_shrinker *xe_shrinker_create(struct xe_device *xe); - -void xe_shrinker_destroy(struct xe_shrinker *shrinker); +int xe_shrinker_create(struct xe_device *xe); #endif diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index a0eab44c0e76..ea411944609b 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -15,6 +15,7 @@ #include "xe_sriov.h" #include "xe_sriov_pf.h" #include "xe_sriov_vf.h" +#include "xe_sriov_vf_ccs.h" /** * xe_sriov_mode_to_string - Convert enum value to string. @@ -157,3 +158,19 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t size) strscpy(buf, "PF", size); return buf; } + +/** + * xe_sriov_init_late() - SR-IOV late initialization functions. + * @xe: the &xe_device to initialize + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_init_late(struct xe_device *xe) +{ + if (IS_SRIOV_PF(xe)) + return xe_sriov_pf_init_late(xe); + if (IS_SRIOV_VF(xe)) + return xe_sriov_vf_init_late(xe); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h index 688fbabf08f1..6db45df55615 100644 --- a/drivers/gpu/drm/xe/xe_sriov.h +++ b/drivers/gpu/drm/xe/xe_sriov.h @@ -18,6 +18,7 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t len); void xe_sriov_probe_early(struct xe_device *xe); void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p); int xe_sriov_init(struct xe_device *xe); +int xe_sriov_init_late(struct xe_device *xe); static inline enum xe_sriov_mode xe_device_sriov_mode(const struct xe_device *xe) { diff --git a/drivers/gpu/drm/xe/xe_sriov_packet.c b/drivers/gpu/drm/xe/xe_sriov_packet.c new file mode 100644 index 000000000000..bab994696896 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_packet.c @@ -0,0 +1,520 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_guc_klv_helpers.h" +#include "xe_printk.h" +#include "xe_sriov_packet.h" +#include "xe_sriov_packet_types.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_migration.h" +#include "xe_sriov_printk.h" + +static struct mutex *pf_migration_mutex(struct xe_device *xe, unsigned int vfid) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe)); + + return &xe->sriov.pf.vfs[vfid].migration.lock; +} + +static struct xe_sriov_packet **pf_pick_pending(struct xe_device *xe, unsigned int vfid) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe)); + lockdep_assert_held(pf_migration_mutex(xe, vfid)); + + return &xe->sriov.pf.vfs[vfid].migration.pending; +} + +static struct xe_sriov_packet ** +pf_pick_descriptor(struct xe_device *xe, unsigned int vfid) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe)); + lockdep_assert_held(pf_migration_mutex(xe, vfid)); + + return &xe->sriov.pf.vfs[vfid].migration.descriptor; +} + +static struct xe_sriov_packet **pf_pick_trailer(struct xe_device *xe, unsigned int vfid) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe)); + lockdep_assert_held(pf_migration_mutex(xe, vfid)); + + return &xe->sriov.pf.vfs[vfid].migration.trailer; +} + +static struct xe_sriov_packet **pf_pick_read_packet(struct xe_device *xe, + unsigned int vfid) +{ + struct xe_sriov_packet **data; + + data = pf_pick_descriptor(xe, vfid); + if (*data) + return data; + + data = pf_pick_pending(xe, vfid); + if (!*data) + *data = xe_sriov_pf_migration_save_consume(xe, vfid); + if (*data) + return data; + + data = pf_pick_trailer(xe, vfid); + if (*data) + return data; + + return NULL; +} + +static bool pkt_needs_bo(struct xe_sriov_packet *data) +{ + return data->hdr.type == XE_SRIOV_PACKET_TYPE_VRAM; +} + +/** + * xe_sriov_packet_alloc() - Allocate migration data packet + * @xe: the &xe_device + * + * Only allocates the "outer" structure, without initializing the migration + * data backing storage. + * + * Return: Pointer to &xe_sriov_packet on success, + * NULL in case of error. + */ +struct xe_sriov_packet *xe_sriov_packet_alloc(struct xe_device *xe) +{ + struct xe_sriov_packet *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + + data->xe = xe; + data->hdr_remaining = sizeof(data->hdr); + + return data; +} + +/** + * xe_sriov_packet_free() - Free migration data packet. + * @data: the &xe_sriov_packet + */ +void xe_sriov_packet_free(struct xe_sriov_packet *data) +{ + if (IS_ERR_OR_NULL(data)) + return; + + if (pkt_needs_bo(data)) + xe_bo_unpin_map_no_vm(data->bo); + else + kvfree(data->buff); + + kfree(data); +} + +static int pkt_init(struct xe_sriov_packet *data) +{ + struct xe_gt *gt = xe_device_get_gt(data->xe, data->hdr.gt_id); + + if (!gt) + return -EINVAL; + + if (data->hdr.size == 0) + return 0; + + if (pkt_needs_bo(data)) { + struct xe_bo *bo; + + bo = xe_bo_create_pin_map_novm(data->xe, gt->tile, PAGE_ALIGN(data->hdr.size), + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED, false); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + data->bo = bo; + data->vaddr = bo->vmap.vaddr; + } else { + void *buff = kvzalloc(data->hdr.size, GFP_KERNEL); + + if (!buff) + return -ENOMEM; + + data->buff = buff; + data->vaddr = buff; + } + + return 0; +} + +#define XE_SRIOV_PACKET_SUPPORTED_VERSION 1 + +/** + * xe_sriov_packet_init() - Initialize migration packet header and backing storage. + * @data: the &xe_sriov_packet + * @tile_id: tile identifier + * @gt_id: GT identifier + * @type: &xe_sriov_packet_type + * @offset: offset of data packet payload (within wider resource) + * @size: size of data packet payload + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_packet_init(struct xe_sriov_packet *data, u8 tile_id, u8 gt_id, + enum xe_sriov_packet_type type, loff_t offset, size_t size) +{ + data->hdr.version = XE_SRIOV_PACKET_SUPPORTED_VERSION; + data->hdr.type = type; + data->hdr.tile_id = tile_id; + data->hdr.gt_id = gt_id; + data->hdr.offset = offset; + data->hdr.size = size; + data->remaining = size; + + return pkt_init(data); +} + +/** + * xe_sriov_packet_init_from_hdr() - Initialize migration packet backing storage based on header. + * @data: the &xe_sriov_packet + * + * Header data is expected to be filled prior to calling this function. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_packet_init_from_hdr(struct xe_sriov_packet *data) +{ + xe_assert(data->xe, !data->hdr_remaining); + + if (data->hdr.version != XE_SRIOV_PACKET_SUPPORTED_VERSION) + return -EINVAL; + + data->remaining = data->hdr.size; + + return pkt_init(data); +} + +static ssize_t pkt_hdr_read(struct xe_sriov_packet *data, + char __user *buf, size_t len) +{ + loff_t offset = sizeof(data->hdr) - data->hdr_remaining; + + if (!data->hdr_remaining) + return -EINVAL; + + if (len > data->hdr_remaining) + len = data->hdr_remaining; + + if (copy_to_user(buf, (void *)&data->hdr + offset, len)) + return -EFAULT; + + data->hdr_remaining -= len; + + return len; +} + +static ssize_t pkt_data_read(struct xe_sriov_packet *data, + char __user *buf, size_t len) +{ + if (len > data->remaining) + len = data->remaining; + + if (copy_to_user(buf, data->vaddr + (data->hdr.size - data->remaining), len)) + return -EFAULT; + + data->remaining -= len; + + return len; +} + +static ssize_t pkt_read_single(struct xe_sriov_packet **data, + unsigned int vfid, char __user *buf, size_t len) +{ + ssize_t copied = 0; + + if ((*data)->hdr_remaining) + copied = pkt_hdr_read(*data, buf, len); + else + copied = pkt_data_read(*data, buf, len); + + if ((*data)->remaining == 0 && (*data)->hdr_remaining == 0) { + xe_sriov_packet_free(*data); + *data = NULL; + } + + return copied; +} + +/** + * xe_sriov_packet_read_single() - Read migration data from a single packet. + * @xe: the &xe_device + * @vfid: the VF identifier + * @buf: start address of userspace buffer + * @len: requested read size from userspace + * + * Return: number of bytes that has been successfully read, + * 0 if no more migration data is available, + * -errno on failure. + */ +ssize_t xe_sriov_packet_read_single(struct xe_device *xe, unsigned int vfid, + char __user *buf, size_t len) +{ + struct xe_sriov_packet **data = pf_pick_read_packet(xe, vfid); + + if (!data) + return -ENODATA; + if (IS_ERR(*data)) + return PTR_ERR(*data); + + return pkt_read_single(data, vfid, buf, len); +} + +static ssize_t pkt_hdr_write(struct xe_sriov_packet *data, + const char __user *buf, size_t len) +{ + loff_t offset = sizeof(data->hdr) - data->hdr_remaining; + int ret; + + if (len > data->hdr_remaining) + len = data->hdr_remaining; + + if (copy_from_user((void *)&data->hdr + offset, buf, len)) + return -EFAULT; + + data->hdr_remaining -= len; + + if (!data->hdr_remaining) { + ret = xe_sriov_packet_init_from_hdr(data); + if (ret) + return ret; + } + + return len; +} + +static ssize_t pkt_data_write(struct xe_sriov_packet *data, + const char __user *buf, size_t len) +{ + if (len > data->remaining) + len = data->remaining; + + if (copy_from_user(data->vaddr + (data->hdr.size - data->remaining), buf, len)) + return -EFAULT; + + data->remaining -= len; + + return len; +} + +/** + * xe_sriov_packet_write_single() - Write migration data to a single packet. + * @xe: the &xe_device + * @vfid: the VF identifier + * @buf: start address of userspace buffer + * @len: requested write size from userspace + * + * Return: number of bytes that has been successfully written, + * -errno on failure. + */ +ssize_t xe_sriov_packet_write_single(struct xe_device *xe, unsigned int vfid, + const char __user *buf, size_t len) +{ + struct xe_sriov_packet **data = pf_pick_pending(xe, vfid); + int ret; + ssize_t copied; + + if (IS_ERR_OR_NULL(*data)) { + *data = xe_sriov_packet_alloc(xe); + if (!*data) + return -ENOMEM; + } + + if ((*data)->hdr_remaining) + copied = pkt_hdr_write(*data, buf, len); + else + copied = pkt_data_write(*data, buf, len); + + if ((*data)->hdr_remaining == 0 && (*data)->remaining == 0) { + ret = xe_sriov_pf_migration_restore_produce(xe, vfid, *data); + if (ret) { + xe_sriov_packet_free(*data); + return ret; + } + + *data = NULL; + } + + return copied; +} + +#define MIGRATION_KLV_DEVICE_DEVID_KEY 0xf001u +#define MIGRATION_KLV_DEVICE_DEVID_LEN 1u +#define MIGRATION_KLV_DEVICE_REVID_KEY 0xf002u +#define MIGRATION_KLV_DEVICE_REVID_LEN 1u + +#define MIGRATION_DESCRIPTOR_DWORDS (GUC_KLV_LEN_MIN + MIGRATION_KLV_DEVICE_DEVID_LEN + \ + GUC_KLV_LEN_MIN + MIGRATION_KLV_DEVICE_REVID_LEN) +static size_t pf_descriptor_init(struct xe_device *xe, unsigned int vfid) +{ + struct xe_sriov_packet **desc = pf_pick_descriptor(xe, vfid); + struct xe_sriov_packet *data; + unsigned int len = 0; + u32 *klvs; + int ret; + + data = xe_sriov_packet_alloc(xe); + if (!data) + return -ENOMEM; + + ret = xe_sriov_packet_init(data, 0, 0, XE_SRIOV_PACKET_TYPE_DESCRIPTOR, + 0, MIGRATION_DESCRIPTOR_DWORDS * sizeof(u32)); + if (ret) { + xe_sriov_packet_free(data); + return ret; + } + + klvs = data->vaddr; + klvs[len++] = PREP_GUC_KLV_CONST(MIGRATION_KLV_DEVICE_DEVID_KEY, + MIGRATION_KLV_DEVICE_DEVID_LEN); + klvs[len++] = xe->info.devid; + klvs[len++] = PREP_GUC_KLV_CONST(MIGRATION_KLV_DEVICE_REVID_KEY, + MIGRATION_KLV_DEVICE_REVID_LEN); + klvs[len++] = xe->info.revid; + + xe_assert(xe, len == MIGRATION_DESCRIPTOR_DWORDS); + + *desc = data; + + return 0; +} + +/** + * xe_sriov_packet_process_descriptor() - Process migration data descriptor packet. + * @xe: the &xe_device + * @vfid: the VF identifier + * @data: the &xe_sriov_packet containing the descriptor + * + * The descriptor uses the same KLV format as GuC, and contains metadata used for + * checking migration data compatibility. + * + * Return: 0 on success, -errno on failure. + */ +int xe_sriov_packet_process_descriptor(struct xe_device *xe, unsigned int vfid, + struct xe_sriov_packet *data) +{ + u32 num_dwords = data->hdr.size / sizeof(u32); + u32 *klvs = data->vaddr; + + xe_assert(xe, data->hdr.type == XE_SRIOV_PACKET_TYPE_DESCRIPTOR); + + if (data->hdr.size % sizeof(u32)) { + xe_sriov_warn(xe, "Aborting migration, descriptor not in KLV format (size=%llu)\n", + data->hdr.size); + return -EINVAL; + } + + while (num_dwords >= GUC_KLV_LEN_MIN) { + u32 key = FIELD_GET(GUC_KLV_0_KEY, klvs[0]); + u32 len = FIELD_GET(GUC_KLV_0_LEN, klvs[0]); + + klvs += GUC_KLV_LEN_MIN; + num_dwords -= GUC_KLV_LEN_MIN; + + if (len > num_dwords) { + xe_sriov_warn(xe, "Aborting migration, truncated KLV %#x, len %u\n", + key, len); + return -EINVAL; + } + + switch (key) { + case MIGRATION_KLV_DEVICE_DEVID_KEY: + if (*klvs != xe->info.devid) { + xe_sriov_warn(xe, + "Aborting migration, devid mismatch %#06x!=%#06x\n", + *klvs, xe->info.devid); + return -ENODEV; + } + break; + case MIGRATION_KLV_DEVICE_REVID_KEY: + if (*klvs != xe->info.revid) { + xe_sriov_warn(xe, + "Aborting migration, revid mismatch %#06x!=%#06x\n", + *klvs, xe->info.revid); + return -ENODEV; + } + break; + default: + xe_sriov_dbg(xe, + "Skipping unknown migration KLV %#x, len=%u\n", + key, len); + print_hex_dump_bytes("desc: ", DUMP_PREFIX_OFFSET, klvs, + min(SZ_64, len * sizeof(u32))); + break; + } + + klvs += len; + num_dwords -= len; + } + + return 0; +} + +static void pf_pending_init(struct xe_device *xe, unsigned int vfid) +{ + struct xe_sriov_packet **data = pf_pick_pending(xe, vfid); + + *data = NULL; +} + +#define MIGRATION_TRAILER_SIZE 0 +static int pf_trailer_init(struct xe_device *xe, unsigned int vfid) +{ + struct xe_sriov_packet **trailer = pf_pick_trailer(xe, vfid); + struct xe_sriov_packet *data; + int ret; + + data = xe_sriov_packet_alloc(xe); + if (!data) + return -ENOMEM; + + ret = xe_sriov_packet_init(data, 0, 0, XE_SRIOV_PACKET_TYPE_TRAILER, + 0, MIGRATION_TRAILER_SIZE); + if (ret) { + xe_sriov_packet_free(data); + return ret; + } + + *trailer = data; + + return 0; +} + +/** + * xe_sriov_packet_save_init() - Initialize the pending save migration packets. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * Return: 0 on success, -errno on failure. + */ +int xe_sriov_packet_save_init(struct xe_device *xe, unsigned int vfid) +{ + int ret; + + scoped_cond_guard(mutex_intr, return -EINTR, pf_migration_mutex(xe, vfid)) { + ret = pf_descriptor_init(xe, vfid); + if (ret) + return ret; + + ret = pf_trailer_init(xe, vfid); + if (ret) + return ret; + + pf_pending_init(xe, vfid); + } + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_sriov_packet.h b/drivers/gpu/drm/xe/xe_sriov_packet.h new file mode 100644 index 000000000000..2731e52cf7ef --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_packet.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PACKET_H_ +#define _XE_SRIOV_PACKET_H_ + +#include <linux/types.h> + +struct xe_device; +struct xe_sriov_packet; +enum xe_sriov_packet_type; + +struct xe_sriov_packet *xe_sriov_packet_alloc(struct xe_device *xe); +void xe_sriov_packet_free(struct xe_sriov_packet *data); + +int xe_sriov_packet_init(struct xe_sriov_packet *data, u8 tile_id, u8 gt_id, + enum xe_sriov_packet_type, loff_t offset, size_t size); +int xe_sriov_packet_init_from_hdr(struct xe_sriov_packet *data); + +ssize_t xe_sriov_packet_read_single(struct xe_device *xe, unsigned int vfid, + char __user *buf, size_t len); +ssize_t xe_sriov_packet_write_single(struct xe_device *xe, unsigned int vfid, + const char __user *buf, size_t len); +int xe_sriov_packet_save_init(struct xe_device *xe, unsigned int vfid); +int xe_sriov_packet_process_descriptor(struct xe_device *xe, unsigned int vfid, + struct xe_sriov_packet *data); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_packet_types.h b/drivers/gpu/drm/xe/xe_sriov_packet_types.h new file mode 100644 index 000000000000..078a1c95e786 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_packet_types.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PACKET_TYPES_H_ +#define _XE_SRIOV_PACKET_TYPES_H_ + +#include <linux/types.h> + +/** + * enum xe_sriov_packet_type - Xe SR-IOV VF migration data packet type + * @XE_SRIOV_PACKET_TYPE_DESCRIPTOR: Descriptor with VF device metadata + * @XE_SRIOV_PACKET_TYPE_TRAILER: Trailer indicating end-of-stream + * @XE_SRIOV_PACKET_TYPE_GGTT: Global GTT migration data + * @XE_SRIOV_PACKET_TYPE_MMIO: MMIO registers migration data + * @XE_SRIOV_PACKET_TYPE_GUC: GuC firmware migration data + * @XE_SRIOV_PACKET_TYPE_VRAM: VRAM migration data + */ +enum xe_sriov_packet_type { + /* Skipping 0 to catch uninitialized data */ + XE_SRIOV_PACKET_TYPE_DESCRIPTOR = 1, + XE_SRIOV_PACKET_TYPE_TRAILER, + XE_SRIOV_PACKET_TYPE_GGTT, + XE_SRIOV_PACKET_TYPE_MMIO, + XE_SRIOV_PACKET_TYPE_GUC, + XE_SRIOV_PACKET_TYPE_VRAM, +}; + +/** + * struct xe_sriov_packet_hdr - Xe SR-IOV VF migration data packet header + */ +struct xe_sriov_packet_hdr { + /** @version: migration data protocol version */ + u8 version; + /** @type: migration data type */ + u8 type; + /** @tile_id: migration data tile id */ + u8 tile_id; + /** @gt_id: migration data gt id */ + u8 gt_id; + /** @flags: migration data flags */ + u32 flags; + /** + * @offset: offset into the resource; + * used when multiple packets of given type are used for migration + */ + u64 offset; + /** @size: migration data size */ + u64 size; +} __packed; + +/** + * struct xe_sriov_packet - Xe SR-IOV VF migration data packet + */ +struct xe_sriov_packet { + /** @xe: the PF &xe_device this data packet belongs to */ + struct xe_device *xe; + /** @vaddr: CPU pointer to payload data */ + void *vaddr; + /** @remaining: payload data remaining */ + size_t remaining; + /** @hdr_remaining: header data remaining */ + size_t hdr_remaining; + union { + /** @bo: Buffer object with migration data */ + struct xe_bo *bo; + /** @buff: Buffer with migration data */ + void *buff; + }; + /** @hdr: data packet header */ + struct xe_sriov_packet_hdr hdr; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 0f721ae17b26..7c779d63179f 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -3,17 +3,27 @@ * Copyright © 2023-2024 Intel Corporation */ +#include <linux/debugfs.h> +#include <drm/drm_debugfs.h> #include <drm/drm_managed.h> #include "xe_assert.h" +#include "xe_configfs.h" #include "xe_device.h" +#include "xe_gt_sriov_pf.h" #include "xe_module.h" #include "xe_sriov.h" #include "xe_sriov_pf.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_migration.h" +#include "xe_sriov_pf_service.h" +#include "xe_sriov_pf_sysfs.h" #include "xe_sriov_printk.h" static unsigned int wanted_max_vfs(struct xe_device *xe) { + if (IS_ENABLED(CONFIG_CONFIGFS_FS)) + return xe_configfs_get_max_vfs(to_pci_dev(xe->drm.dev)); return xe_modparam.max_vfs; } @@ -80,9 +90,178 @@ bool xe_sriov_pf_readiness(struct xe_device *xe) */ int xe_sriov_pf_init_early(struct xe_device *xe) { + int err; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + xe->sriov.pf.vfs = drmm_kcalloc(&xe->drm, 1 + xe_sriov_pf_get_totalvfs(xe), + sizeof(*xe->sriov.pf.vfs), GFP_KERNEL); + if (!xe->sriov.pf.vfs) + return -ENOMEM; + + err = drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock); + if (err) + return err; + + err = xe_sriov_pf_migration_init(xe); + if (err) + return err; + + xe_guard_init(&xe->sriov.pf.guard_vfs_enabling, "vfs_enabling"); + + xe_sriov_pf_service_init(xe); + + return 0; +} + +/** + * xe_sriov_pf_init_late() - Late initialization of the SR-IOV PF. + * @xe: the &xe_device to initialize + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_init_late(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + int err; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_init(gt); + if (err) + return err; + } + + err = xe_sriov_pf_sysfs_init(xe); + if (err) + return err; + + return 0; +} + +/** + * xe_sriov_pf_wait_ready() - Wait until PF is ready to operate. + * @xe: the &xe_device to test + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_wait_ready(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + int err; + + if (xe_device_wedged(xe)) + return -ECANCELED; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_wait_ready(gt); + if (err) + return err; + } + + return 0; +} + +/** + * xe_sriov_pf_arm_guard() - Arm the guard for exclusive/lockdown mode. + * @xe: the PF &xe_device + * @guard: the &xe_guard to arm + * @lockdown: arm for lockdown(true) or exclusive(false) mode + * @who: the address of the new owner, or NULL if it's a caller + * + * This function can only be called on PF. + * + * It is a simple wrapper for xe_guard_arm() with additional debug + * messages. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_arm_guard(struct xe_device *xe, struct xe_guard *guard, + bool lockdown, void *who) +{ + void *new_owner = who ?: __builtin_return_address(0); + int err; + + err = xe_guard_arm(guard, lockdown, new_owner); + if (err) { + xe_sriov_dbg(xe, "%s/%s mode denied (%pe) last owner %ps\n", + guard->name, xe_guard_mode_str(lockdown), + ERR_PTR(err), guard->owner); + return err; + } + + xe_sriov_dbg_verbose(xe, "%s/%s by %ps\n", + guard->name, xe_guard_mode_str(lockdown), + new_owner); + return 0; +} + +/** + * xe_sriov_pf_disarm_guard() - Disarm the guard. + * @xe: the PF &xe_device + * @guard: the &xe_guard to disarm + * @lockdown: disarm from lockdown(true) or exclusive(false) mode + * @who: the address of the indirect owner, or NULL if it's a caller + * + * This function can only be called on PF. + * + * It is a simple wrapper for xe_guard_disarm() with additional debug + * messages and xe_assert() to easily catch any illegal calls. + */ +void xe_sriov_pf_disarm_guard(struct xe_device *xe, struct xe_guard *guard, + bool lockdown, void *who) +{ + bool disarmed; + + xe_sriov_dbg_verbose(xe, "%s/%s by %ps\n", + guard->name, xe_guard_mode_str(lockdown), + who ?: __builtin_return_address(0)); + + disarmed = xe_guard_disarm(guard, lockdown); + xe_assert_msg(xe, disarmed, "%s/%s not armed? last owner %ps", + guard->name, xe_guard_mode_str(lockdown), guard->owner); +} + +/** + * xe_sriov_pf_lockdown() - Lockdown the PF to prevent VFs enabling. + * @xe: the PF &xe_device + * + * This function can only be called on PF. + * + * Once the PF is locked down, it will not enable VFs. + * If VFs are already enabled, the -EBUSY will be returned. + * To allow the PF enable VFs again call xe_sriov_pf_end_lockdown(). + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_lockdown(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + + return xe_sriov_pf_arm_guard(xe, &xe->sriov.pf.guard_vfs_enabling, true, + __builtin_return_address(0)); +} + +/** + * xe_sriov_pf_end_lockdown() - Allow the PF to enable VFs again. + * @xe: the PF &xe_device + * + * This function can only be called on PF. + * See xe_sriov_pf_lockdown() for details. + */ +void xe_sriov_pf_end_lockdown(struct xe_device *xe) +{ xe_assert(xe, IS_SRIOV_PF(xe)); - return drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock); + xe_sriov_pf_disarm_guard(xe, &xe->sriov.pf.guard_vfs_enabling, true, + __builtin_return_address(0)); } /** diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h index d1220e70e1c0..b4d050ad5b7c 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf.h @@ -8,23 +8,24 @@ #include <linux/types.h> +struct dentry; struct drm_printer; struct xe_device; #ifdef CONFIG_PCI_IOV bool xe_sriov_pf_readiness(struct xe_device *xe); int xe_sriov_pf_init_early(struct xe_device *xe); +int xe_sriov_pf_init_late(struct xe_device *xe); +int xe_sriov_pf_wait_ready(struct xe_device *xe); +int xe_sriov_pf_lockdown(struct xe_device *xe); +void xe_sriov_pf_end_lockdown(struct xe_device *xe); void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p); #else -static inline bool xe_sriov_pf_readiness(struct xe_device *xe) -{ - return false; -} - -static inline int xe_sriov_pf_init_early(struct xe_device *xe) -{ - return 0; -} +static inline bool xe_sriov_pf_readiness(struct xe_device *xe) { return false; } +static inline int xe_sriov_pf_init_early(struct xe_device *xe) { return 0; } +static inline int xe_sriov_pf_init_late(struct xe_device *xe) { return 0; } +static inline int xe_sriov_pf_lockdown(struct xe_device *xe) { return 0; } +static inline void xe_sriov_pf_end_lockdown(struct xe_device *xe) { } #endif #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_sriov_pf_control.c new file mode 100644 index 000000000000..ed4b9820b06e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_device.h" +#include "xe_gt_sriov_pf_control.h" +#include "xe_gt_sriov_pf_migration.h" +#include "xe_sriov_packet.h" +#include "xe_sriov_pf_control.h" +#include "xe_sriov_printk.h" + +/** + * xe_sriov_pf_control_pause_vf() - Pause a VF on all GTs. + * @xe: the &xe_device + * @vfid: the VF identifier (can't be 0 == PFID) + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_pause_vf(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_control_pause_vf(gt, vfid); + result = result ? -EUCLEAN : err; + } + + if (result) + return result; + + xe_sriov_info(xe, "VF%u paused!\n", vfid); + return 0; +} + +/** + * xe_sriov_pf_control_resume_vf() - Resume a VF on all GTs. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_resume_vf(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_control_resume_vf(gt, vfid); + result = result ? -EUCLEAN : err; + } + + if (result) + return result; + + xe_sriov_info(xe, "VF%u resumed!\n", vfid); + return 0; +} + +/** + * xe_sriov_pf_control_stop_vf - Stop a VF on all GTs. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_stop_vf(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_control_stop_vf(gt, vfid); + result = result ? -EUCLEAN : err; + } + + if (result) + return result; + + xe_sriov_info(xe, "VF%u stopped!\n", vfid); + return 0; +} + +/** + * xe_sriov_pf_control_reset_vf() - Perform a VF reset (FLR). + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_control_trigger_flr(gt, vfid); + result = result ? -EUCLEAN : err; + } + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_control_wait_flr(gt, vfid); + result = result ? -EUCLEAN : err; + } + + return result; +} + +/** + * xe_sriov_pf_control_wait_flr() - Wait for a VF reset (FLR) to complete. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_wait_flr(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_control_wait_flr(gt, vfid); + result = result ? -EUCLEAN : err; + } + + return result; +} + +/** + * xe_sriov_pf_control_sync_flr() - Synchronize a VF FLR between all GTs. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int ret; + + for_each_gt(gt, xe, id) { + ret = xe_gt_sriov_pf_control_sync_flr(gt, vfid, false); + if (ret < 0) + return ret; + } + for_each_gt(gt, xe, id) { + ret = xe_gt_sriov_pf_control_sync_flr(gt, vfid, true); + if (ret < 0) + return ret; + } + + return 0; +} + +/** + * xe_sriov_pf_control_trigger_save_vf() - Start VF migration data SAVE sequence on all GTs. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_trigger_save_vf(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int ret; + + ret = xe_sriov_packet_save_init(xe, vfid); + if (ret) + return ret; + + for_each_gt(gt, xe, id) { + xe_gt_sriov_pf_migration_save_init(gt, vfid); + + ret = xe_gt_sriov_pf_control_trigger_save_vf(gt, vfid); + if (ret) + return ret; + } + + return 0; +} + +/** + * xe_sriov_pf_control_finish_save_vf() - Complete VF migration data SAVE sequence on all GTs. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_finish_save_vf(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int ret; + + for_each_gt(gt, xe, id) { + ret = xe_gt_sriov_pf_control_finish_save_vf(gt, vfid); + if (ret) + break; + } + + return ret; +} + +/** + * xe_sriov_pf_control_trigger_restore_vf() - Start VF migration data RESTORE sequence on all GTs. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_trigger_restore_vf(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int ret; + + for_each_gt(gt, xe, id) { + ret = xe_gt_sriov_pf_control_trigger_restore_vf(gt, vfid); + if (ret) + return ret; + } + + return ret; +} + +/** + * xe_sriov_pf_control_finish_restore_vf() - Complete VF migration data RESTORE sequence on all GTs. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_finish_restore_vf(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int ret; + + for_each_gt(gt, xe, id) { + ret = xe_gt_sriov_pf_control_finish_restore_vf(gt, vfid); + if (ret) + break; + } + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_sriov_pf_control.h new file mode 100644 index 000000000000..ef9f219b2109 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_CONTROL_H_ +#define _XE_SRIOV_PF_CONTROL_H_ + +struct xe_device; + +int xe_sriov_pf_control_pause_vf(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_resume_vf(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_stop_vf(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_wait_flr(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_trigger_save_vf(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_finish_save_vf(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_trigger_restore_vf(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_finish_restore_vf(struct xe_device *xe, unsigned int vfid); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c new file mode 100644 index 000000000000..bad751217e1e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c @@ -0,0 +1,395 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/debugfs.h> +#include <drm/drm_debugfs.h> + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_pm.h" +#include "xe_sriov_pf.h" +#include "xe_sriov_pf_control.h" +#include "xe_sriov_pf_debugfs.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_migration.h" +#include "xe_sriov_pf_provision.h" +#include "xe_sriov_pf_service.h" +#include "xe_sriov_printk.h" +#include "xe_tile_sriov_pf_debugfs.h" + +/* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov # d_inode->i_private = (xe_device*) + * │ ├── pf # d_inode->i_private = (xe_device*) + * │ ├── vf1 # d_inode->i_private = VFID(1) + * : : + * │ ├── vfN # d_inode->i_private = VFID(N) + */ + +static void *extract_priv(struct dentry *d) +{ + return d->d_inode->i_private; +} + +static struct xe_device *extract_xe(struct dentry *d) +{ + return extract_priv(d->d_parent); +} + +static unsigned int extract_vfid(struct dentry *d) +{ + void *p = extract_priv(d); + + return p == extract_xe(d) ? PFID : (uintptr_t)p; +} + +/* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * │ ├── restore_auto_provisioning + * │ : + * │ ├── pf/ + * │ ├── vf1 + * │ │ ├── ... + */ + +static ssize_t from_file_write_to_xe_call(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos, + int (*call)(struct xe_device *)) +{ + struct dentry *dent = file_dentry(file); + struct xe_device *xe = extract_xe(dent); + bool yes; + int ret; + + if (*ppos) + return -EINVAL; + ret = kstrtobool_from_user(userbuf, count, &yes); + if (ret < 0) + return ret; + if (yes) { + xe_pm_runtime_get(xe); + ret = call(xe); + xe_pm_runtime_put(xe); + } + if (ret < 0) + return ret; + return count; +} + +#define DEFINE_SRIOV_ATTRIBUTE(OP) \ +static int OP##_show(struct seq_file *s, void *unused) \ +{ \ + return 0; \ +} \ +static ssize_t OP##_write(struct file *file, const char __user *userbuf, \ + size_t count, loff_t *ppos) \ +{ \ + return from_file_write_to_xe_call(file, userbuf, count, ppos, \ + xe_sriov_pf_##OP); \ +} \ +DEFINE_SHOW_STORE_ATTRIBUTE(OP) + +static inline int xe_sriov_pf_restore_auto_provisioning(struct xe_device *xe) +{ + return xe_sriov_pf_provision_set_mode(xe, XE_SRIOV_PROVISIONING_MODE_AUTO); +} + +DEFINE_SRIOV_ATTRIBUTE(restore_auto_provisioning); + +static int lockdown_vfs_enabling_open(struct inode *inode, struct file *file) +{ + struct dentry *dent = file_dentry(file); + struct xe_device *xe = extract_xe(dent); + ssize_t ret; + + ret = xe_sriov_pf_lockdown(xe); + if (ret < 0) + return ret; + + file->private_data = xe; + return nonseekable_open(inode, file); +} + +static int lockdown_vfs_enabling_release(struct inode *inode, struct file *file) +{ + struct xe_device *xe = file->private_data; + + xe_sriov_pf_end_lockdown(xe); + return 0; +} + +static const struct file_operations lockdown_vfs_enabling_fops = { + .owner = THIS_MODULE, + .open = lockdown_vfs_enabling_open, + .release = lockdown_vfs_enabling_release, +}; + +static void pf_populate_root(struct xe_device *xe, struct dentry *dent) +{ + debugfs_create_file("restore_auto_provisioning", 0200, dent, xe, + &restore_auto_provisioning_fops); + debugfs_create_file("lockdown_vfs_enabling", 0400, dent, xe, + &lockdown_vfs_enabling_fops); +} + +static int simple_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct dentry *parent = node->dent->d_parent; + struct xe_device *xe = parent->d_inode->i_private; + void (*print)(struct xe_device *, struct drm_printer *) = node->info_ent->data; + + print(xe, &p); + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + { .name = "vfs", .show = simple_show, .data = xe_sriov_pf_print_vfs_summary }, + { .name = "versions", .show = simple_show, .data = xe_sriov_pf_service_print_versions }, +}; + +static void pf_populate_pf(struct xe_device *xe, struct dentry *pfdent) +{ + struct drm_minor *minor = xe->drm.primary; + + drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), pfdent, minor); +} + +/* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * │ ├── vf1 + * │ │ ├── migration_data + * │ │ ├── pause + * │ │ ├── reset + * │ │ ├── resume + * │ │ ├── stop + * │ │ ├── save + * │ │ ├── restore + * │ │ : + * │ ├── vf2 + * │ │ ├── ... + */ + +static int from_file_read_to_vf_call(struct seq_file *s, + int (*call)(struct xe_device *, unsigned int)) +{ + struct dentry *dent = file_dentry(s->file)->d_parent; + struct xe_device *xe = extract_xe(dent); + unsigned int vfid = extract_vfid(dent); + int ret; + + xe_pm_runtime_get(xe); + ret = call(xe, vfid); + xe_pm_runtime_put(xe); + + if (ret < 0) + return ret; + + return 0; +} + +static ssize_t from_file_write_to_vf_call(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos, + int (*call)(struct xe_device *, unsigned int)) +{ + struct dentry *dent = file_dentry(file)->d_parent; + struct xe_device *xe = extract_xe(dent); + unsigned int vfid = extract_vfid(dent); + bool yes; + int ret; + + if (*ppos) + return -EINVAL; + ret = kstrtobool_from_user(userbuf, count, &yes); + if (ret < 0) + return ret; + if (yes) { + xe_pm_runtime_get(xe); + ret = call(xe, vfid); + xe_pm_runtime_put(xe); + } + if (ret < 0) + return ret; + return count; +} + +#define DEFINE_VF_CONTROL_ATTRIBUTE(OP) \ +static int OP##_show(struct seq_file *s, void *unused) \ +{ \ + return 0; \ +} \ +static ssize_t OP##_write(struct file *file, const char __user *userbuf, \ + size_t count, loff_t *ppos) \ +{ \ + return from_file_write_to_vf_call(file, userbuf, count, ppos, \ + xe_sriov_pf_control_##OP); \ +} \ +DEFINE_SHOW_STORE_ATTRIBUTE(OP) + +#define DEFINE_VF_CONTROL_ATTRIBUTE_RW(OP) \ +static int OP##_show(struct seq_file *s, void *unused) \ +{ \ + return from_file_read_to_vf_call(s, \ + xe_sriov_pf_control_finish_##OP); \ +} \ +static ssize_t OP##_write(struct file *file, const char __user *userbuf, \ + size_t count, loff_t *ppos) \ +{ \ + return from_file_write_to_vf_call(file, userbuf, count, ppos, \ + xe_sriov_pf_control_trigger_##OP); \ +} \ +DEFINE_SHOW_STORE_ATTRIBUTE(OP) + +DEFINE_VF_CONTROL_ATTRIBUTE(pause_vf); +DEFINE_VF_CONTROL_ATTRIBUTE(resume_vf); +DEFINE_VF_CONTROL_ATTRIBUTE(stop_vf); +DEFINE_VF_CONTROL_ATTRIBUTE(reset_vf); +DEFINE_VF_CONTROL_ATTRIBUTE_RW(save_vf); +DEFINE_VF_CONTROL_ATTRIBUTE_RW(restore_vf); + +static ssize_t data_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) +{ + struct dentry *dent = file_dentry(file)->d_parent; + struct xe_device *xe = extract_xe(dent); + unsigned int vfid = extract_vfid(dent); + + if (*pos) + return -ESPIPE; + + return xe_sriov_pf_migration_write(xe, vfid, buf, count); +} + +static ssize_t data_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + struct dentry *dent = file_dentry(file)->d_parent; + struct xe_device *xe = extract_xe(dent); + unsigned int vfid = extract_vfid(dent); + + if (*ppos) + return -ESPIPE; + + return xe_sriov_pf_migration_read(xe, vfid, buf, count); +} + +static const struct file_operations data_vf_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = data_write, + .read = data_read, + .llseek = default_llseek, +}; + +static ssize_t size_read(struct file *file, char __user *ubuf, size_t count, loff_t *ppos) +{ + struct dentry *dent = file_dentry(file)->d_parent; + struct xe_device *xe = extract_xe(dent); + unsigned int vfid = extract_vfid(dent); + char buf[21]; + ssize_t ret; + int len; + + xe_pm_runtime_get(xe); + ret = xe_sriov_pf_migration_size(xe, vfid); + xe_pm_runtime_put(xe); + if (ret < 0) + return ret; + + len = scnprintf(buf, sizeof(buf), "%zd\n", ret); + + return simple_read_from_buffer(ubuf, count, ppos, buf, len); +} + +static const struct file_operations size_vf_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = size_read, + .llseek = default_llseek, +}; + +static void pf_populate_vf(struct xe_device *xe, struct dentry *vfdent) +{ + debugfs_create_file("pause", 0200, vfdent, xe, &pause_vf_fops); + debugfs_create_file("resume", 0200, vfdent, xe, &resume_vf_fops); + debugfs_create_file("stop", 0200, vfdent, xe, &stop_vf_fops); + debugfs_create_file("reset", 0200, vfdent, xe, &reset_vf_fops); + debugfs_create_file("save", 0600, vfdent, xe, &save_vf_fops); + debugfs_create_file("restore", 0600, vfdent, xe, &restore_vf_fops); + debugfs_create_file("migration_data", 0600, vfdent, xe, &data_vf_fops); + debugfs_create_file("migration_size", 0400, vfdent, xe, &size_vf_fops); +} + +static void pf_populate_with_tiles(struct xe_device *xe, struct dentry *dent, unsigned int vfid) +{ + struct xe_tile *tile; + unsigned int id; + + for_each_tile(tile, xe, id) + xe_tile_sriov_pf_debugfs_populate(tile, dent, vfid); +} + +/** + * xe_sriov_pf_debugfs_register - Register PF debugfs attributes. + * @xe: the &xe_device + * @root: the root &dentry + * + * Create separate directory that will contain all SR-IOV related files, + * organized per each SR-IOV function (PF, VF1, VF2, ..., VFn). + */ +void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) +{ + int totalvfs = xe_sriov_pf_get_totalvfs(xe); + struct dentry *pfdent; + struct dentry *vfdent; + struct dentry *dent; + char vfname[16]; /* should be more than enough for "vf%u\0" and VFID(UINT_MAX) */ + unsigned int n; + + /* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov # d_inode->i_private = (xe_device*) + * │ ├── ... + */ + dent = debugfs_create_dir("sriov", root); + if (IS_ERR(dent)) + return; + dent->d_inode->i_private = xe; + + pf_populate_root(xe, dent); + + /* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov # d_inode->i_private = (xe_device*) + * │ ├── pf # d_inode->i_private = (xe_device*) + * │ │ ├── ... + */ + pfdent = debugfs_create_dir("pf", dent); + if (IS_ERR(pfdent)) + return; + pfdent->d_inode->i_private = xe; + + pf_populate_pf(xe, pfdent); + pf_populate_with_tiles(xe, pfdent, PFID); + + /* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov # d_inode->i_private = (xe_device*) + * │ ├── vf1 # d_inode->i_private = VFID(1) + * │ ├── vf2 # d_inode->i_private = VFID(2) + * │ ├── ... + */ + for (n = 1; n <= totalvfs; n++) { + snprintf(vfname, sizeof(vfname), "vf%u", VFID(n)); + vfdent = debugfs_create_dir(vfname, dent); + if (IS_ERR(vfdent)) + return; + vfdent->d_inode->i_private = (void *)(uintptr_t)VFID(n); + + pf_populate_vf(xe, vfdent); + pf_populate_with_tiles(xe, vfdent, VFID(n)); + } +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.h b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.h new file mode 100644 index 000000000000..93db13585b82 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_DEBUGFS_H_ +#define _XE_SRIOV_PF_DEBUGFS_H_ + +struct dentry; +struct xe_device; + +#ifdef CONFIG_PCI_IOV +void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root); +#else +static inline void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) { } +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h index dd1df950b021..9054fdc34597 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h @@ -37,10 +37,37 @@ static inline int xe_sriov_pf_get_totalvfs(struct xe_device *xe) return xe->sriov.pf.driver_max_vfs; } +/** + * xe_sriov_pf_num_vfs() - Number of enabled VFs on the PF. + * @xe: the PF &xe_device + * + * Return: Number of enabled VFs on the PF. + */ +static inline unsigned int xe_sriov_pf_num_vfs(const struct xe_device *xe) +{ + return pci_num_vf(to_pci_dev(xe->drm.dev)); +} + +/** + * xe_sriov_pf_admin_only() - Check if PF is mainly used for VFs administration. + * @xe: the PF &xe_device + * + * Return: True if PF is mainly used for VFs administration. + */ +static inline bool xe_sriov_pf_admin_only(const struct xe_device *xe) +{ + return !xe->info.probe_display; +} + static inline struct mutex *xe_sriov_pf_master_mutex(struct xe_device *xe) { xe_assert(xe, IS_SRIOV_PF(xe)); return &xe->sriov.pf.master_lock; } +int xe_sriov_pf_arm_guard(struct xe_device *xe, struct xe_guard *guard, + bool write, void *who); +void xe_sriov_pf_disarm_guard(struct xe_device *xe, struct xe_guard *guard, + bool write, void *who); + #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c new file mode 100644 index 000000000000..6c4b16409cc9 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c @@ -0,0 +1,365 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "xe_device.h" +#include "xe_gt_sriov_pf_control.h" +#include "xe_gt_sriov_pf_migration.h" +#include "xe_pm.h" +#include "xe_sriov.h" +#include "xe_sriov_packet.h" +#include "xe_sriov_packet_types.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_migration.h" +#include "xe_sriov_printk.h" + +static struct xe_sriov_migration_state *pf_pick_migration(struct xe_device *xe, unsigned int vfid) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe)); + + return &xe->sriov.pf.vfs[vfid].migration; +} + +/** + * xe_sriov_pf_migration_waitqueue() - Get waitqueue for migration. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * Return: pointer to the migration waitqueue. + */ +wait_queue_head_t *xe_sriov_pf_migration_waitqueue(struct xe_device *xe, unsigned int vfid) +{ + return &pf_pick_migration(xe, vfid)->wq; +} + +/** + * xe_sriov_pf_migration_supported() - Check if SR-IOV VF migration is supported by the device + * @xe: the &xe_device + * + * Return: true if migration is supported, false otherwise + */ +bool xe_sriov_pf_migration_supported(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + + return IS_ENABLED(CONFIG_DRM_XE_DEBUG) || !xe->sriov.pf.migration.disabled; +} + +/** + * xe_sriov_pf_migration_disable() - Turn off SR-IOV VF migration support on PF. + * @xe: the &xe_device instance. + * @fmt: format string for the log message, to be combined with following VAs. + */ +void xe_sriov_pf_migration_disable(struct xe_device *xe, const char *fmt, ...) +{ + struct va_format vaf; + va_list va_args; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + va_start(va_args, fmt); + vaf.fmt = fmt; + vaf.va = &va_args; + xe_sriov_notice(xe, "migration %s: %pV\n", + IS_ENABLED(CONFIG_DRM_XE_DEBUG) ? + "missing prerequisite" : "disabled", + &vaf); + va_end(va_args); + + xe->sriov.pf.migration.disabled = true; +} + +static void pf_migration_check_support(struct xe_device *xe) +{ + if (!xe_device_has_memirq(xe)) + xe_sriov_pf_migration_disable(xe, "requires memory-based IRQ support"); +} + +static void pf_migration_cleanup(void *arg) +{ + struct xe_sriov_migration_state *migration = arg; + + xe_sriov_packet_free(migration->pending); + xe_sriov_packet_free(migration->trailer); + xe_sriov_packet_free(migration->descriptor); +} + +/** + * xe_sriov_pf_migration_init() - Initialize support for SR-IOV VF migration. + * @xe: the &xe_device + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_migration_init(struct xe_device *xe) +{ + unsigned int n, totalvfs; + int err; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + pf_migration_check_support(xe); + + if (!xe_sriov_pf_migration_supported(xe)) + return 0; + + totalvfs = xe_sriov_pf_get_totalvfs(xe); + for (n = 1; n <= totalvfs; n++) { + struct xe_sriov_migration_state *migration = pf_pick_migration(xe, n); + + err = drmm_mutex_init(&xe->drm, &migration->lock); + if (err) + return err; + + init_waitqueue_head(&migration->wq); + + err = devm_add_action_or_reset(xe->drm.dev, pf_migration_cleanup, migration); + if (err) + return err; + } + + return 0; +} + +static bool pf_migration_data_ready(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + u8 gt_id; + + for_each_gt(gt, xe, gt_id) { + if (xe_gt_sriov_pf_control_check_save_failed(gt, vfid) || + xe_gt_sriov_pf_control_check_save_data_done(gt, vfid) || + !xe_gt_sriov_pf_migration_ring_empty(gt, vfid)) + return true; + } + + return false; +} + +static struct xe_sriov_packet * +pf_migration_consume(struct xe_device *xe, unsigned int vfid) +{ + struct xe_sriov_packet *data; + bool more_data = false; + struct xe_gt *gt; + u8 gt_id; + + for_each_gt(gt, xe, gt_id) { + data = xe_gt_sriov_pf_migration_save_consume(gt, vfid); + if (data && PTR_ERR(data) != EAGAIN) + return data; + if (PTR_ERR(data) == -EAGAIN) + more_data = true; + } + + if (!more_data) + return NULL; + + return ERR_PTR(-EAGAIN); +} + +/** + * xe_sriov_pf_migration_save_consume() - Consume a VF migration data packet from the device. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * Called by the save migration data consumer (userspace) when + * processing migration data. + * If there is no migration data to process, wait until more data is available. + * + * Return: Pointer to &xe_sriov_packet on success, + * NULL if ring is empty and no more migration data is expected, + * ERR_PTR value in case of error. + */ +struct xe_sriov_packet * +xe_sriov_pf_migration_save_consume(struct xe_device *xe, unsigned int vfid) +{ + struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid); + struct xe_sriov_packet *data; + int ret; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + for (;;) { + data = pf_migration_consume(xe, vfid); + if (PTR_ERR(data) != -EAGAIN) + break; + + ret = wait_event_interruptible(migration->wq, + pf_migration_data_ready(xe, vfid)); + if (ret) + return ERR_PTR(ret); + } + + return data; +} + +static int pf_handle_descriptor(struct xe_device *xe, unsigned int vfid, + struct xe_sriov_packet *data) +{ + int ret; + + if (data->hdr.tile_id != 0 || data->hdr.gt_id != 0) + return -EINVAL; + + ret = xe_sriov_packet_process_descriptor(xe, vfid, data); + if (ret) + return ret; + + xe_sriov_packet_free(data); + + return 0; +} + +static int pf_handle_trailer(struct xe_device *xe, unsigned int vfid, + struct xe_sriov_packet *data) +{ + struct xe_gt *gt; + u8 gt_id; + + if (data->hdr.tile_id != 0 || data->hdr.gt_id != 0) + return -EINVAL; + if (data->hdr.offset != 0 || data->hdr.size != 0 || data->buff || data->bo) + return -EINVAL; + + xe_sriov_packet_free(data); + + for_each_gt(gt, xe, gt_id) + xe_gt_sriov_pf_control_restore_data_done(gt, vfid); + + return 0; +} + +/** + * xe_sriov_pf_migration_restore_produce() - Produce a VF migration data packet to the device. + * @xe: the &xe_device + * @vfid: the VF identifier + * @data: Pointer to &xe_sriov_packet + * + * Called by the restore migration data producer (userspace) when processing + * migration data. + * If the underlying data structure is full, wait until there is space. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_migration_restore_produce(struct xe_device *xe, unsigned int vfid, + struct xe_sriov_packet *data) +{ + struct xe_gt *gt; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + if (data->hdr.type == XE_SRIOV_PACKET_TYPE_DESCRIPTOR) + return pf_handle_descriptor(xe, vfid, data); + if (data->hdr.type == XE_SRIOV_PACKET_TYPE_TRAILER) + return pf_handle_trailer(xe, vfid, data); + + gt = xe_device_get_gt(xe, data->hdr.gt_id); + if (!gt || data->hdr.tile_id != gt->tile->id || data->hdr.type == 0) { + xe_sriov_err_ratelimited(xe, "Received invalid restore packet for VF%u (type:%u, tile:%u, GT:%u)\n", + vfid, data->hdr.type, data->hdr.tile_id, data->hdr.gt_id); + return -EINVAL; + } + + return xe_gt_sriov_pf_migration_restore_produce(gt, vfid, data); +} + +/** + * xe_sriov_pf_migration_read() - Read migration data from the device. + * @xe: the &xe_device + * @vfid: the VF identifier + * @buf: start address of userspace buffer + * @len: requested read size from userspace + * + * Return: number of bytes that has been successfully read, + * 0 if no more migration data is available, + * -errno on failure. + */ +ssize_t xe_sriov_pf_migration_read(struct xe_device *xe, unsigned int vfid, + char __user *buf, size_t len) +{ + struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid); + ssize_t ret, consumed = 0; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + scoped_cond_guard(mutex_intr, return -EINTR, &migration->lock) { + while (consumed < len) { + ret = xe_sriov_packet_read_single(xe, vfid, buf, len - consumed); + if (ret == -ENODATA) + break; + if (ret < 0) + return ret; + + consumed += ret; + buf += ret; + } + } + + return consumed; +} + +/** + * xe_sriov_pf_migration_write() - Write migration data to the device. + * @xe: the &xe_device + * @vfid: the VF identifier + * @buf: start address of userspace buffer + * @len: requested write size from userspace + * + * Return: number of bytes that has been successfully written, + * -errno on failure. + */ +ssize_t xe_sriov_pf_migration_write(struct xe_device *xe, unsigned int vfid, + const char __user *buf, size_t len) +{ + struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid); + ssize_t ret, produced = 0; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + scoped_cond_guard(mutex_intr, return -EINTR, &migration->lock) { + while (produced < len) { + ret = xe_sriov_packet_write_single(xe, vfid, buf, len - produced); + if (ret < 0) + return ret; + + produced += ret; + buf += ret; + } + } + + return produced; +} + +/** + * xe_sriov_pf_migration_size() - Total size of migration data from all components within a device + * @xe: the &xe_device + * @vfid: the VF identifier (can't be 0) + * + * This function is for PF only. + * + * Return: total migration data size in bytes or a negative error code on failure. + */ +ssize_t xe_sriov_pf_migration_size(struct xe_device *xe, unsigned int vfid) +{ + size_t size = 0; + struct xe_gt *gt; + ssize_t ret; + u8 gt_id; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, vfid); + + for_each_gt(gt, xe, gt_id) { + ret = xe_gt_sriov_pf_migration_size(gt, vfid); + if (ret < 0) + return ret; + + size += ret; + } + + return size; +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration.h b/drivers/gpu/drm/xe/xe_sriov_pf_migration.h new file mode 100644 index 000000000000..f8f408df8481 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_MIGRATION_H_ +#define _XE_SRIOV_PF_MIGRATION_H_ + +#include <linux/types.h> +#include <linux/wait.h> + +struct xe_device; +struct xe_sriov_packet; + +int xe_sriov_pf_migration_init(struct xe_device *xe); +bool xe_sriov_pf_migration_supported(struct xe_device *xe); +void xe_sriov_pf_migration_disable(struct xe_device *xe, const char *fmt, ...); +int xe_sriov_pf_migration_restore_produce(struct xe_device *xe, unsigned int vfid, + struct xe_sriov_packet *data); +struct xe_sriov_packet * +xe_sriov_pf_migration_save_consume(struct xe_device *xe, unsigned int vfid); +ssize_t xe_sriov_pf_migration_size(struct xe_device *xe, unsigned int vfid); +wait_queue_head_t *xe_sriov_pf_migration_waitqueue(struct xe_device *xe, unsigned int vfid); + +ssize_t xe_sriov_pf_migration_read(struct xe_device *xe, unsigned int vfid, + char __user *buf, size_t len); +ssize_t xe_sriov_pf_migration_write(struct xe_device *xe, unsigned int vfid, + const char __user *buf, size_t len); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_migration_types.h new file mode 100644 index 000000000000..7d9a8a278d91 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration_types.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_MIGRATION_TYPES_H_ +#define _XE_SRIOV_PF_MIGRATION_TYPES_H_ + +#include <linux/types.h> +#include <linux/mutex_types.h> +#include <linux/wait.h> + +/** + * struct xe_sriov_pf_migration - Xe device level VF migration data + */ +struct xe_sriov_pf_migration { + /** @disabled: indicates whether VF migration feature is disabled */ + bool disabled; +}; + +/** + * struct xe_sriov_migration_state - Per VF device-level migration related data + */ +struct xe_sriov_migration_state { + /** @wq: waitqueue used to avoid busy-waiting for snapshot production/consumption */ + wait_queue_head_t wq; + /** @lock: Mutex protecting the migration data */ + struct mutex lock; + /** @pending: currently processed data packet of VF resource */ + struct xe_sriov_packet *pending; + /** @trailer: data packet used to indicate the end of stream */ + struct xe_sriov_packet *trailer; + /** @descriptor: data packet containing the metadata describing the device */ + struct xe_sriov_packet *descriptor; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c new file mode 100644 index 000000000000..01470c42e8a7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c @@ -0,0 +1,438 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_gt_sriov_pf_config.h" +#include "xe_gt_sriov_pf_policy.h" +#include "xe_sriov.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_provision.h" +#include "xe_sriov_pf_provision_types.h" +#include "xe_sriov_printk.h" + +static const char *mode_to_string(enum xe_sriov_provisioning_mode mode) +{ + switch (mode) { + case XE_SRIOV_PROVISIONING_MODE_AUTO: + return "auto"; + case XE_SRIOV_PROVISIONING_MODE_CUSTOM: + return "custom"; + default: + return "<invalid>"; + } +} + +static bool pf_auto_provisioning_mode(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + + return xe->sriov.pf.provision.mode == XE_SRIOV_PROVISIONING_MODE_AUTO; +} + +static bool pf_needs_provisioning(struct xe_gt *gt, unsigned int num_vfs) +{ + unsigned int n; + + for (n = 1; n <= num_vfs; n++) + if (!xe_gt_sriov_pf_config_is_empty(gt, n)) + return false; + + return true; +} + +static int pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + for_each_gt(gt, xe, id) { + if (!pf_needs_provisioning(gt, num_vfs)) + return -EUCLEAN; + err = xe_gt_sriov_pf_config_set_fair(gt, VFID(1), num_vfs); + result = result ?: err; + } + + return result; +} + +static void pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + struct xe_gt *gt; + unsigned int id; + unsigned int n; + + for_each_gt(gt, xe, id) + for (n = 1; n <= num_vfs; n++) + xe_gt_sriov_pf_config_release(gt, n, true); +} + +static void pf_unprovision_all_vfs(struct xe_device *xe) +{ + pf_unprovision_vfs(xe, xe_sriov_pf_get_totalvfs(xe)); +} + +/** + * xe_sriov_pf_provision_vfs() - Provision VFs in auto-mode. + * @xe: the PF &xe_device + * @num_vfs: the number of VFs to auto-provision + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + + if (!pf_auto_provisioning_mode(xe)) + return 0; + + return pf_provision_vfs(xe, num_vfs); +} + +/** + * xe_sriov_pf_unprovision_vfs() - Unprovision VFs in auto-mode. + * @xe: the PF &xe_device + * @num_vfs: the number of VFs to unprovision + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + + if (!pf_auto_provisioning_mode(xe)) + return 0; + + pf_unprovision_vfs(xe, num_vfs); + return 0; +} + +/** + * xe_sriov_pf_provision_set_mode() - Change VFs provision mode. + * @xe: the PF &xe_device + * @mode: the new VFs provisioning mode + * + * When changing from AUTO to CUSTOM mode, any already allocated VFs resources + * will remain allocated and will not be released upon VFs disabling. + * + * When changing back to AUTO mode, if VFs are not enabled, already allocated + * VFs resources will be immediately released. If VFs are still enabled, such + * mode change is rejected. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_set_mode(struct xe_device *xe, enum xe_sriov_provisioning_mode mode) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + + if (mode == xe->sriov.pf.provision.mode) + return 0; + + if (mode == XE_SRIOV_PROVISIONING_MODE_AUTO) { + if (xe_sriov_pf_num_vfs(xe)) { + xe_sriov_dbg(xe, "can't restore %s: VFs must be disabled!\n", + mode_to_string(mode)); + return -EBUSY; + } + pf_unprovision_all_vfs(xe); + } + + xe_sriov_dbg(xe, "mode %s changed to %s by %ps\n", + mode_to_string(xe->sriov.pf.provision.mode), + mode_to_string(mode), __builtin_return_address(0)); + xe->sriov.pf.provision.mode = mode; + return 0; +} + +/** + * xe_sriov_pf_provision_bulk_apply_eq() - Change execution quantum for all VFs and PF. + * @xe: the PF &xe_device + * @eq: execution quantum in [ms] to set + * + * Change execution quantum (EQ) provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_bulk_apply_eq(struct xe_device *xe, u32 eq) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(gt, eq); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_apply_vf_eq() - Change VF's execution quantum. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @eq: execution quantum in [ms] to set + * + * Change VF's execution quantum (EQ) provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_apply_vf_eq(struct xe_device *xe, unsigned int vfid, u32 eq) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_config_set_exec_quantum_locked(gt, vfid, eq); + result = result ?: err; + } + + return result; +} + +static int pf_report_unclean(struct xe_gt *gt, unsigned int vfid, + const char *what, u32 found, u32 expected) +{ + char name[8]; + + xe_sriov_dbg(gt_to_xe(gt), "%s on GT%u has %s=%u (expected %u)\n", + xe_sriov_function_name(vfid, name, sizeof(name)), + gt->info.id, what, found, expected); + return -EUCLEAN; +} + +/** + * xe_sriov_pf_provision_query_vf_eq() - Query VF's execution quantum. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @eq: placeholder for the returned execution quantum in [ms] + * + * Query VF's execution quantum (EQ) provisioning from all tiles/GTs. + * If values across tiles/GTs are inconsistent then -EUCLEAN error will be returned. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_query_vf_eq(struct xe_device *xe, unsigned int vfid, u32 *eq) +{ + struct xe_gt *gt; + unsigned int id; + int count = 0; + u32 value; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + value = xe_gt_sriov_pf_config_get_exec_quantum_locked(gt, vfid); + if (!count++) + *eq = value; + else if (value != *eq) + return pf_report_unclean(gt, vfid, "EQ", value, *eq); + } + + return !count ? -ENODATA : 0; +} + +/** + * xe_sriov_pf_provision_bulk_apply_pt() - Change preemption timeout for all VFs and PF. + * @xe: the PF &xe_device + * @pt: preemption timeout in [us] to set + * + * Change preemption timeout (PT) provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_bulk_apply_pt(struct xe_device *xe, u32 pt) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(gt, pt); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_apply_vf_pt() - Change VF's preemption timeout. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @pt: preemption timeout in [us] to set + * + * Change VF's preemption timeout (PT) provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_apply_vf_pt(struct xe_device *xe, unsigned int vfid, u32 pt) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_config_set_preempt_timeout_locked(gt, vfid, pt); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_query_vf_pt() - Query VF's preemption timeout. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @pt: placeholder for the returned preemption timeout in [us] + * + * Query VF's preemption timeout (PT) provisioning from all tiles/GTs. + * If values across tiles/GTs are inconsistent then -EUCLEAN error will be returned. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_query_vf_pt(struct xe_device *xe, unsigned int vfid, u32 *pt) +{ + struct xe_gt *gt; + unsigned int id; + int count = 0; + u32 value; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + value = xe_gt_sriov_pf_config_get_preempt_timeout_locked(gt, vfid); + if (!count++) + *pt = value; + else if (value != *pt) + return pf_report_unclean(gt, vfid, "PT", value, *pt); + } + + return !count ? -ENODATA : 0; +} + +/** + * xe_sriov_pf_provision_bulk_apply_priority() - Change scheduling priority of all VFs and PF. + * @xe: the PF &xe_device + * @prio: scheduling priority to set + * + * Change the scheduling priority provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_bulk_apply_priority(struct xe_device *xe, u32 prio) +{ + bool sched_if_idle; + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + /* + * Currently, priority changes that involves VFs are only allowed using + * the 'sched_if_idle' policy KLV, so only LOW and NORMAL are supported. + */ + xe_assert(xe, prio < GUC_SCHED_PRIORITY_HIGH); + sched_if_idle = prio == GUC_SCHED_PRIORITY_NORMAL; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_policy_set_sched_if_idle(gt, sched_if_idle); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_apply_vf_priority() - Change VF's scheduling priority. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @prio: scheduling priority to set + * + * Change VF's scheduling priority provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_apply_vf_priority(struct xe_device *xe, unsigned int vfid, u32 prio) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_config_set_sched_priority(gt, vfid, prio); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_query_vf_priority() - Query VF's scheduling priority. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @prio: placeholder for the returned scheduling priority + * + * Query VF's scheduling priority provisioning from all tiles/GTs. + * If values across tiles/GTs are inconsistent then -EUCLEAN error will be returned. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_query_vf_priority(struct xe_device *xe, unsigned int vfid, u32 *prio) +{ + struct xe_gt *gt; + unsigned int id; + int count = 0; + u32 value; + + for_each_gt(gt, xe, id) { + value = xe_gt_sriov_pf_config_get_sched_priority(gt, vfid); + if (!count++) + *prio = value; + else if (value != *prio) + return pf_report_unclean(gt, vfid, "priority", value, *prio); + } + + return !count ? -ENODATA : 0; +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.h b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h new file mode 100644 index 000000000000..bccf23d51396 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_PROVISION_H_ +#define _XE_SRIOV_PF_PROVISION_H_ + +#include <linux/types.h> + +#include "xe_sriov_pf_provision_types.h" + +struct xe_device; + +int xe_sriov_pf_provision_bulk_apply_eq(struct xe_device *xe, u32 eq); +int xe_sriov_pf_provision_apply_vf_eq(struct xe_device *xe, unsigned int vfid, u32 eq); +int xe_sriov_pf_provision_query_vf_eq(struct xe_device *xe, unsigned int vfid, u32 *eq); + +int xe_sriov_pf_provision_bulk_apply_pt(struct xe_device *xe, u32 pt); +int xe_sriov_pf_provision_apply_vf_pt(struct xe_device *xe, unsigned int vfid, u32 pt); +int xe_sriov_pf_provision_query_vf_pt(struct xe_device *xe, unsigned int vfid, u32 *pt); + +int xe_sriov_pf_provision_bulk_apply_priority(struct xe_device *xe, u32 prio); +int xe_sriov_pf_provision_apply_vf_priority(struct xe_device *xe, unsigned int vfid, u32 prio); +int xe_sriov_pf_provision_query_vf_priority(struct xe_device *xe, unsigned int vfid, u32 *prio); + +int xe_sriov_pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs); +int xe_sriov_pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs); + +int xe_sriov_pf_provision_set_mode(struct xe_device *xe, enum xe_sriov_provisioning_mode mode); + +/** + * xe_sriov_pf_provision_set_custom_mode() - Change VFs provision mode to custom. + * @xe: the PF &xe_device + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +static inline int xe_sriov_pf_provision_set_custom_mode(struct xe_device *xe) +{ + return xe_sriov_pf_provision_set_mode(xe, XE_SRIOV_PROVISIONING_MODE_CUSTOM); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h new file mode 100644 index 000000000000..a847b8a4c4da --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_PROVISION_TYPES_H_ +#define _XE_SRIOV_PF_PROVISION_TYPES_H_ + +#include <linux/build_bug.h> + +/** + * enum xe_sriov_provisioning_mode - SR-IOV provisioning mode. + * + * @XE_SRIOV_PROVISIONING_MODE_AUTO: VFs are provisioned during VFs enabling. + * Any allocated resources to the VFs will be + * automatically released when disabling VFs. + * This is a default mode. + * @XE_SRIOV_PROVISIONING_MODE_CUSTOM: Explicit VFs provisioning using uABI interfaces. + * VFs resources remains allocated regardless if + * VFs are enabled or not. + */ +enum xe_sriov_provisioning_mode { + XE_SRIOV_PROVISIONING_MODE_AUTO, + XE_SRIOV_PROVISIONING_MODE_CUSTOM, +}; +static_assert(XE_SRIOV_PROVISIONING_MODE_AUTO == 0); + +/** + * struct xe_sriov_pf_provision - Data used by the PF provisioning. + */ +struct xe_sriov_pf_provision { + /** @mode: selected provisioning mode. */ + enum xe_sriov_provisioning_mode mode; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_sriov_pf_service.c new file mode 100644 index 000000000000..eee3b2a1ba41 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#include "abi/guc_relay_actions_abi.h" + +#include "xe_device_types.h" +#include "xe_sriov.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_printk.h" + +#include "xe_sriov_pf_service.h" +#include "xe_sriov_pf_service_types.h" + +/** + * xe_sriov_pf_service_init - Early initialization of the SR-IOV PF service. + * @xe: the &xe_device to initialize + * + * Performs early initialization of the SR-IOV PF service. + * + * This function can only be called on PF. + */ +void xe_sriov_pf_service_init(struct xe_device *xe) +{ + BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR); + BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR); + + xe_assert(xe, IS_SRIOV_PF(xe)); + + /* base versions may differ between platforms */ + xe->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR; + xe->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR; + + /* latest version is same for all platforms */ + xe->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR; + xe->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR; +} + +/* Return: 0 on success or a negative error code on failure. */ +static int pf_negotiate_version(struct xe_device *xe, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + struct xe_sriov_pf_service_version base = xe->sriov.pf.service.version.base; + struct xe_sriov_pf_service_version latest = xe->sriov.pf.service.version.latest; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, base.major); + xe_assert(xe, base.major <= latest.major); + xe_assert(xe, (base.major < latest.major) || (base.minor <= latest.minor)); + + /* VF doesn't care - return our latest */ + if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY && + wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants newer than our - return our latest */ + if (wanted_major > latest.major) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants older than min required - reject */ + if (wanted_major < base.major || + (wanted_major == base.major && wanted_minor < base.minor)) { + return -EPERM; + } + + /* previous major - return wanted, as we should still support it */ + if (wanted_major < latest.major) { + /* XXX: we are not prepared for multi-versions yet */ + xe_assert(xe, base.major == latest.major); + return -ENOPKG; + } + + /* same major - return common minor */ + *major = wanted_major; + *minor = min_t(u32, latest.minor, wanted_minor); + return 0; +} + +static void pf_connect(struct xe_device *xe, u32 vfid, u32 major, u32 minor) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + xe_assert(xe, major || minor); + + xe->sriov.pf.vfs[vfid].version.major = major; + xe->sriov.pf.vfs[vfid].version.minor = minor; +} + +static void pf_disconnect(struct xe_device *xe, u32 vfid) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + + xe->sriov.pf.vfs[vfid].version.major = 0; + xe->sriov.pf.vfs[vfid].version.minor = 0; +} + +/** + * xe_sriov_pf_service_is_negotiated - Check if VF has negotiated given ABI version. + * @xe: the &xe_device + * @vfid: the VF identifier + * @major: the major version to check + * @minor: the minor version to check + * + * Performs early initialization of the SR-IOV PF service. + * + * This function can only be called on PF. + * + * Returns: true if VF can use given ABI version functionality. + */ +bool xe_sriov_pf_service_is_negotiated(struct xe_device *xe, u32 vfid, u32 major, u32 minor) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + + return major == xe->sriov.pf.vfs[vfid].version.major && + minor <= xe->sriov.pf.vfs[vfid].version.minor; +} + +/** + * xe_sriov_pf_service_handshake_vf - Confirm a connection with the VF. + * @xe: the &xe_device + * @vfid: the VF identifier + * @wanted_major: the major service version expected by the VF + * @wanted_minor: the minor service version expected by the VF + * @major: the major service version to be used by the VF + * @minor: the minor service version to be used by the VF + * + * Negotiate a VF/PF ABI version to allow VF use the PF services. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_service_handshake_vf(struct xe_device *xe, u32 vfid, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + int err; + + xe_sriov_dbg_verbose(xe, "VF%u wants ABI version %u.%u\n", + vfid, wanted_major, wanted_minor); + + err = pf_negotiate_version(xe, wanted_major, wanted_minor, major, minor); + + if (err < 0) { + xe_sriov_notice(xe, "VF%u failed to negotiate ABI %u.%u (%pe)\n", + vfid, wanted_major, wanted_minor, ERR_PTR(err)); + pf_disconnect(xe, vfid); + } else { + xe_sriov_dbg(xe, "VF%u negotiated ABI version %u.%u\n", + vfid, *major, *minor); + pf_connect(xe, vfid, *major, *minor); + } + + return err; +} + +/** + * xe_sriov_pf_service_reset_vf - Reset a connection with the VF. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * Reset a VF driver negotiated VF/PF ABI version. + * + * After that point, the VF driver will have to perform new version handshake + * to continue use of the PF services again. + * + * This function can only be called on PF. + */ +void xe_sriov_pf_service_reset_vf(struct xe_device *xe, unsigned int vfid) +{ + pf_disconnect(xe, vfid); +} + +static void print_pf_version(struct drm_printer *p, const char *name, + const struct xe_sriov_pf_service_version *version) +{ + drm_printf(p, "%s:\t%u.%u\n", name, version->major, version->minor); +} + +/** + * xe_sriov_pf_service_print_versions - Print ABI versions negotiated with VFs. + * @xe: the &xe_device + * @p: the &drm_printer + * + * This function is for PF use only. + */ +void xe_sriov_pf_service_print_versions(struct xe_device *xe, struct drm_printer *p) +{ + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); + struct xe_sriov_pf_service_version *version; + char name[8]; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + print_pf_version(p, "base", &xe->sriov.pf.service.version.base); + print_pf_version(p, "latest", &xe->sriov.pf.service.version.latest); + + for (n = 1; n <= total_vfs; n++) { + version = &xe->sriov.pf.vfs[n].version; + if (!version->major && !version->minor) + continue; + + print_pf_version(p, xe_sriov_function_name(n, name, sizeof(name)), version); + } +} + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_sriov_pf_service_kunit.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_sriov_pf_service.h new file mode 100644 index 000000000000..d38c18f5ed10 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SERVICE_H_ +#define _XE_SRIOV_PF_SERVICE_H_ + +#include <linux/types.h> + +struct drm_printer; +struct xe_device; + +void xe_sriov_pf_service_init(struct xe_device *xe); +void xe_sriov_pf_service_print_versions(struct xe_device *xe, struct drm_printer *p); + +int xe_sriov_pf_service_handshake_vf(struct xe_device *xe, u32 vfid, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor); +bool xe_sriov_pf_service_is_negotiated(struct xe_device *xe, u32 vfid, u32 major, u32 minor); +void xe_sriov_pf_service_reset_vf(struct xe_device *xe, unsigned int vfid); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h new file mode 100644 index 000000000000..0835dde358c1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SERVICE_TYPES_H_ +#define _XE_SRIOV_PF_SERVICE_TYPES_H_ + +#include <linux/types.h> + +/** + * struct xe_sriov_pf_service_version - VF/PF ABI Version. + * @major: the major version of the VF/PF ABI + * @minor: the minor version of the VF/PF ABI + * + * See `GuC Relay Communication`_. + */ +struct xe_sriov_pf_service_version { + u16 major; + u16 minor; +}; + +/** + * struct xe_sriov_pf_service - Data used by the PF service. + * @version: information about VF/PF ABI versions for current platform. + * @version.base: lowest VF/PF ABI version that could be negotiated with VF. + * @version.latest: latest VF/PF ABI version supported by the PF driver. + */ +struct xe_sriov_pf_service { + struct { + struct xe_sriov_pf_service_version base; + struct xe_sriov_pf_service_version latest; + } version; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c new file mode 100644 index 000000000000..c0b767ac735c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c @@ -0,0 +1,647 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/kobject.h> +#include <linux/sysfs.h> + +#include <drm/drm_managed.h> + +#include "xe_assert.h" +#include "xe_pci_sriov.h" +#include "xe_pm.h" +#include "xe_sriov.h" +#include "xe_sriov_pf.h" +#include "xe_sriov_pf_control.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_provision.h" +#include "xe_sriov_pf_sysfs.h" +#include "xe_sriov_printk.h" + +static int emit_choice(char *buf, int choice, const char * const *array, size_t size) +{ + int pos = 0; + int n; + + for (n = 0; n < size; n++) { + pos += sysfs_emit_at(buf, pos, "%s%s%s%s", + n ? " " : "", + n == choice ? "[" : "", + array[n], + n == choice ? "]" : ""); + } + pos += sysfs_emit_at(buf, pos, "\n"); + + return pos; +} + +/* + * /sys/bus/pci/drivers/xe/BDF/ + * : + * ├── sriov_admin/ + * ├── ... + * ├── .bulk_profile + * │ ├── exec_quantum_ms + * │ ├── preempt_timeout_us + * │ └── sched_priority + * ├── pf/ + * │ ├── ... + * │ ├── device -> ../../../BDF + * │ └── profile + * │ ├── exec_quantum_ms + * │ ├── preempt_timeout_us + * │ └── sched_priority + * ├── vf1/ + * │ ├── ... + * │ ├── device -> ../../../BDF.1 + * │ ├── stop + * │ └── profile + * │ ├── exec_quantum_ms + * │ ├── preempt_timeout_us + * │ └── sched_priority + * ├── vf2/ + * : + * └── vfN/ + */ + +struct xe_sriov_kobj { + struct kobject base; + struct xe_device *xe; + unsigned int vfid; +}; +#define to_xe_sriov_kobj(p) container_of_const((p), struct xe_sriov_kobj, base) + +struct xe_sriov_dev_attr { + struct attribute attr; + ssize_t (*show)(struct xe_device *xe, char *buf); + ssize_t (*store)(struct xe_device *xe, const char *buf, size_t count); +}; +#define to_xe_sriov_dev_attr(p) container_of_const((p), struct xe_sriov_dev_attr, attr) + +#define XE_SRIOV_DEV_ATTR(NAME) \ +struct xe_sriov_dev_attr xe_sriov_dev_attr_##NAME = \ + __ATTR(NAME, 0644, xe_sriov_dev_attr_##NAME##_show, xe_sriov_dev_attr_##NAME##_store) + +#define XE_SRIOV_DEV_ATTR_RO(NAME) \ +struct xe_sriov_dev_attr xe_sriov_dev_attr_##NAME = \ + __ATTR(NAME, 0444, xe_sriov_dev_attr_##NAME##_show, NULL) + +#define XE_SRIOV_DEV_ATTR_WO(NAME) \ +struct xe_sriov_dev_attr xe_sriov_dev_attr_##NAME = \ + __ATTR(NAME, 0200, NULL, xe_sriov_dev_attr_##NAME##_store) + +struct xe_sriov_vf_attr { + struct attribute attr; + ssize_t (*show)(struct xe_device *xe, unsigned int vfid, char *buf); + ssize_t (*store)(struct xe_device *xe, unsigned int vfid, const char *buf, size_t count); +}; +#define to_xe_sriov_vf_attr(p) container_of_const((p), struct xe_sriov_vf_attr, attr) + +#define XE_SRIOV_VF_ATTR(NAME) \ +struct xe_sriov_vf_attr xe_sriov_vf_attr_##NAME = \ + __ATTR(NAME, 0644, xe_sriov_vf_attr_##NAME##_show, xe_sriov_vf_attr_##NAME##_store) + +#define XE_SRIOV_VF_ATTR_RO(NAME) \ +struct xe_sriov_vf_attr xe_sriov_vf_attr_##NAME = \ + __ATTR(NAME, 0444, xe_sriov_vf_attr_##NAME##_show, NULL) + +#define XE_SRIOV_VF_ATTR_WO(NAME) \ +struct xe_sriov_vf_attr xe_sriov_vf_attr_##NAME = \ + __ATTR(NAME, 0200, NULL, xe_sriov_vf_attr_##NAME##_store) + +/* device level attributes go here */ + +#define DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(NAME, ITEM, TYPE) \ + \ +static ssize_t xe_sriov_dev_attr_##NAME##_store(struct xe_device *xe, \ + const char *buf, size_t count) \ +{ \ + TYPE value; \ + int err; \ + \ + err = kstrto##TYPE(buf, 0, &value); \ + if (err) \ + return err; \ + \ + err = xe_sriov_pf_provision_bulk_apply_##ITEM(xe, value); \ + return err ?: count; \ +} \ + \ +static XE_SRIOV_DEV_ATTR_WO(NAME) + +DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(exec_quantum_ms, eq, u32); +DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(preempt_timeout_us, pt, u32); + +static const char * const sched_priority_names[] = { + [GUC_SCHED_PRIORITY_LOW] = "low", + [GUC_SCHED_PRIORITY_NORMAL] = "normal", + [GUC_SCHED_PRIORITY_HIGH] = "high", +}; + +static bool sched_priority_change_allowed(unsigned int vfid) +{ + /* As of today GuC FW allows to selectively change only the PF priority. */ + return vfid == PFID; +} + +static bool sched_priority_high_allowed(unsigned int vfid) +{ + /* As of today GuC FW allows to select 'high' priority only for the PF. */ + return vfid == PFID; +} + +static bool sched_priority_bulk_high_allowed(struct xe_device *xe) +{ + /* all VFs are equal - it's sufficient to check VF1 only */ + return sched_priority_high_allowed(VFID(1)); +} + +static ssize_t xe_sriov_dev_attr_sched_priority_store(struct xe_device *xe, + const char *buf, size_t count) +{ + size_t num_priorities = ARRAY_SIZE(sched_priority_names); + int match; + int err; + + if (!sched_priority_bulk_high_allowed(xe)) + num_priorities--; + + match = __sysfs_match_string(sched_priority_names, num_priorities, buf); + if (match < 0) + return -EINVAL; + + err = xe_sriov_pf_provision_bulk_apply_priority(xe, match); + return err ?: count; +} + +static XE_SRIOV_DEV_ATTR_WO(sched_priority); + +static struct attribute *bulk_profile_dev_attrs[] = { + &xe_sriov_dev_attr_exec_quantum_ms.attr, + &xe_sriov_dev_attr_preempt_timeout_us.attr, + &xe_sriov_dev_attr_sched_priority.attr, + NULL +}; + +static const struct attribute_group bulk_profile_dev_attr_group = { + .name = ".bulk_profile", + .attrs = bulk_profile_dev_attrs, +}; + +static const struct attribute_group *xe_sriov_dev_attr_groups[] = { + &bulk_profile_dev_attr_group, + NULL +}; + +/* and VF-level attributes go here */ + +#define DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(NAME, ITEM, TYPE, FORMAT) \ +static ssize_t xe_sriov_vf_attr_##NAME##_show(struct xe_device *xe, unsigned int vfid, \ + char *buf) \ +{ \ + TYPE value = 0; \ + int err; \ + \ + err = xe_sriov_pf_provision_query_vf_##ITEM(xe, vfid, &value); \ + if (err) \ + return err; \ + \ + return sysfs_emit(buf, FORMAT, value); \ +} \ + \ +static ssize_t xe_sriov_vf_attr_##NAME##_store(struct xe_device *xe, unsigned int vfid, \ + const char *buf, size_t count) \ +{ \ + TYPE value; \ + int err; \ + \ + err = kstrto##TYPE(buf, 0, &value); \ + if (err) \ + return err; \ + \ + err = xe_sriov_pf_provision_apply_vf_##ITEM(xe, vfid, value); \ + return err ?: count; \ +} \ + \ +static XE_SRIOV_VF_ATTR(NAME) + +DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(exec_quantum_ms, eq, u32, "%u\n"); +DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(preempt_timeout_us, pt, u32, "%u\n"); + +static ssize_t xe_sriov_vf_attr_sched_priority_show(struct xe_device *xe, unsigned int vfid, + char *buf) +{ + size_t num_priorities = ARRAY_SIZE(sched_priority_names); + u32 priority; + int err; + + err = xe_sriov_pf_provision_query_vf_priority(xe, vfid, &priority); + if (err) + return err; + + if (!sched_priority_high_allowed(vfid)) + num_priorities--; + + xe_assert(xe, priority < num_priorities); + return emit_choice(buf, priority, sched_priority_names, num_priorities); +} + +static ssize_t xe_sriov_vf_attr_sched_priority_store(struct xe_device *xe, unsigned int vfid, + const char *buf, size_t count) +{ + size_t num_priorities = ARRAY_SIZE(sched_priority_names); + int match; + int err; + + if (!sched_priority_change_allowed(vfid)) + return -EOPNOTSUPP; + + if (!sched_priority_high_allowed(vfid)) + num_priorities--; + + match = __sysfs_match_string(sched_priority_names, num_priorities, buf); + if (match < 0) + return -EINVAL; + + err = xe_sriov_pf_provision_apply_vf_priority(xe, vfid, match); + return err ?: count; +} + +static XE_SRIOV_VF_ATTR(sched_priority); + +static struct attribute *profile_vf_attrs[] = { + &xe_sriov_vf_attr_exec_quantum_ms.attr, + &xe_sriov_vf_attr_preempt_timeout_us.attr, + &xe_sriov_vf_attr_sched_priority.attr, + NULL +}; + +static umode_t profile_vf_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int index) +{ + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + + if (attr == &xe_sriov_vf_attr_sched_priority.attr && + !sched_priority_change_allowed(vkobj->vfid)) + return attr->mode & 0444; + + return attr->mode; +} + +static const struct attribute_group profile_vf_attr_group = { + .name = "profile", + .attrs = profile_vf_attrs, + .is_visible = profile_vf_attr_is_visible, +}; + +#define DEFINE_SIMPLE_CONTROL_SRIOV_VF_ATTR(NAME) \ + \ +static ssize_t xe_sriov_vf_attr_##NAME##_store(struct xe_device *xe, unsigned int vfid, \ + const char *buf, size_t count) \ +{ \ + bool yes; \ + int err; \ + \ + if (!vfid) \ + return -EPERM; \ + \ + err = kstrtobool(buf, &yes); \ + if (err) \ + return err; \ + if (!yes) \ + return count; \ + \ + err = xe_sriov_pf_control_##NAME##_vf(xe, vfid); \ + return err ?: count; \ +} \ + \ +static XE_SRIOV_VF_ATTR_WO(NAME) + +DEFINE_SIMPLE_CONTROL_SRIOV_VF_ATTR(stop); + +static struct attribute *control_vf_attrs[] = { + &xe_sriov_vf_attr_stop.attr, + NULL +}; + +static umode_t control_vf_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int index) +{ + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + + if (vkobj->vfid == PFID) + return 0; + + return attr->mode; +} + +static const struct attribute_group control_vf_attr_group = { + .attrs = control_vf_attrs, + .is_visible = control_vf_attr_is_visible, +}; + +static const struct attribute_group *xe_sriov_vf_attr_groups[] = { + &profile_vf_attr_group, + &control_vf_attr_group, + NULL +}; + +/* no user serviceable parts below */ + +static struct kobject *create_xe_sriov_kobj(struct xe_device *xe, unsigned int vfid) +{ + struct xe_sriov_kobj *vkobj; + + xe_sriov_pf_assert_vfid(xe, vfid); + + vkobj = kzalloc(sizeof(*vkobj), GFP_KERNEL); + if (!vkobj) + return NULL; + + vkobj->xe = xe; + vkobj->vfid = vfid; + return &vkobj->base; +} + +static void release_xe_sriov_kobj(struct kobject *kobj) +{ + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + + kfree(vkobj); +} + +static ssize_t xe_sriov_dev_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct xe_sriov_dev_attr *vattr = to_xe_sriov_dev_attr(attr); + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + struct xe_device *xe = vkobj->xe; + + if (!vattr->show) + return -EPERM; + + return vattr->show(xe, buf); +} + +static ssize_t xe_sriov_dev_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct xe_sriov_dev_attr *vattr = to_xe_sriov_dev_attr(attr); + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + struct xe_device *xe = vkobj->xe; + ssize_t ret; + + if (!vattr->store) + return -EPERM; + + xe_pm_runtime_get(xe); + ret = xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, buf, count); + xe_pm_runtime_put(xe); + + return ret; +} + +static ssize_t xe_sriov_vf_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct xe_sriov_vf_attr *vattr = to_xe_sriov_vf_attr(attr); + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + struct xe_device *xe = vkobj->xe; + unsigned int vfid = vkobj->vfid; + + xe_sriov_pf_assert_vfid(xe, vfid); + + if (!vattr->show) + return -EPERM; + + return vattr->show(xe, vfid, buf); +} + +static ssize_t xe_sriov_vf_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct xe_sriov_vf_attr *vattr = to_xe_sriov_vf_attr(attr); + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + struct xe_device *xe = vkobj->xe; + unsigned int vfid = vkobj->vfid; + ssize_t ret; + + xe_sriov_pf_assert_vfid(xe, vfid); + + if (!vattr->store) + return -EPERM; + + xe_pm_runtime_get(xe); + ret = xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, vfid, buf, count); + xe_pm_runtime_get(xe); + + return ret; +} + +static const struct sysfs_ops xe_sriov_dev_sysfs_ops = { + .show = xe_sriov_dev_attr_show, + .store = xe_sriov_dev_attr_store, +}; + +static const struct sysfs_ops xe_sriov_vf_sysfs_ops = { + .show = xe_sriov_vf_attr_show, + .store = xe_sriov_vf_attr_store, +}; + +static const struct kobj_type xe_sriov_dev_ktype = { + .release = release_xe_sriov_kobj, + .sysfs_ops = &xe_sriov_dev_sysfs_ops, + .default_groups = xe_sriov_dev_attr_groups, +}; + +static const struct kobj_type xe_sriov_vf_ktype = { + .release = release_xe_sriov_kobj, + .sysfs_ops = &xe_sriov_vf_sysfs_ops, + .default_groups = xe_sriov_vf_attr_groups, +}; + +static int pf_sysfs_error(struct xe_device *xe, int err, const char *what) +{ + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) + xe_sriov_dbg(xe, "Failed to setup sysfs %s (%pe)\n", what, ERR_PTR(err)); + return err; +} + +static void pf_sysfs_note(struct xe_device *xe, int err, const char *what) +{ + xe_sriov_dbg(xe, "Failed to setup sysfs %s (%pe)\n", what, ERR_PTR(err)); +} + +static void action_put_kobject(void *arg) +{ + struct kobject *kobj = arg; + + kobject_put(kobj); +} + +static int pf_setup_root(struct xe_device *xe) +{ + struct kobject *parent = &xe->drm.dev->kobj; + struct kobject *root; + int err; + + root = create_xe_sriov_kobj(xe, PFID); + if (!root) + return pf_sysfs_error(xe, -ENOMEM, "root obj"); + + err = devm_add_action_or_reset(xe->drm.dev, action_put_kobject, root); + if (err) + return pf_sysfs_error(xe, err, "root action"); + + err = kobject_init_and_add(root, &xe_sriov_dev_ktype, parent, "sriov_admin"); + if (err) + return pf_sysfs_error(xe, err, "root init"); + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, !xe->sriov.pf.sysfs.root); + xe->sriov.pf.sysfs.root = root; + return 0; +} + +static int pf_setup_tree(struct xe_device *xe) +{ + unsigned int totalvfs = xe_sriov_pf_get_totalvfs(xe); + struct kobject *root, *kobj; + unsigned int n; + int err; + + xe_assert(xe, IS_SRIOV_PF(xe)); + root = xe->sriov.pf.sysfs.root; + + for (n = 0; n <= totalvfs; n++) { + kobj = create_xe_sriov_kobj(xe, VFID(n)); + if (!kobj) + return pf_sysfs_error(xe, -ENOMEM, "tree obj"); + + err = devm_add_action_or_reset(xe->drm.dev, action_put_kobject, root); + if (err) + return pf_sysfs_error(xe, err, "tree action"); + + if (n) + err = kobject_init_and_add(kobj, &xe_sriov_vf_ktype, + root, "vf%u", n); + else + err = kobject_init_and_add(kobj, &xe_sriov_vf_ktype, + root, "pf"); + if (err) + return pf_sysfs_error(xe, err, "tree init"); + + xe_assert(xe, !xe->sriov.pf.vfs[n].kobj); + xe->sriov.pf.vfs[n].kobj = kobj; + } + + return 0; +} + +static void action_rm_device_link(void *arg) +{ + struct kobject *kobj = arg; + + sysfs_remove_link(kobj, "device"); +} + +static int pf_link_pf_device(struct xe_device *xe) +{ + struct kobject *kobj = xe->sriov.pf.vfs[PFID].kobj; + int err; + + err = sysfs_create_link(kobj, &xe->drm.dev->kobj, "device"); + if (err) + return pf_sysfs_error(xe, err, "PF device link"); + + err = devm_add_action_or_reset(xe->drm.dev, action_rm_device_link, kobj); + if (err) + return pf_sysfs_error(xe, err, "PF unlink action"); + + return 0; +} + +/** + * xe_sriov_pf_sysfs_init() - Setup PF's SR-IOV sysfs tree. + * @xe: the PF &xe_device to setup sysfs + * + * This function will create additional nodes that will represent PF and VFs + * devices, each populated with SR-IOV Xe specific attributes. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_sysfs_init(struct xe_device *xe) +{ + int err; + + err = pf_setup_root(xe); + if (err) + return err; + + err = pf_setup_tree(xe); + if (err) + return err; + + err = pf_link_pf_device(xe); + if (err) + return err; + + return 0; +} + +/** + * xe_sriov_pf_sysfs_link_vfs() - Add VF's links in SR-IOV sysfs tree. + * @xe: the &xe_device where to update sysfs + * @num_vfs: number of enabled VFs to link + * + * This function is specific for the PF driver. + * + * This function will add symbolic links between VFs represented in the SR-IOV + * sysfs tree maintained by the PF and enabled VF PCI devices. + * + * The @xe_sriov_pf_sysfs_unlink_vfs() shall be used to remove those links. + */ +void xe_sriov_pf_sysfs_link_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + unsigned int totalvfs = xe_sriov_pf_get_totalvfs(xe); + struct pci_dev *pf_pdev = to_pci_dev(xe->drm.dev); + struct pci_dev *vf_pdev = NULL; + unsigned int n; + int err; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, num_vfs <= totalvfs); + + for (n = 1; n <= num_vfs; n++) { + vf_pdev = xe_pci_sriov_get_vf_pdev(pf_pdev, VFID(n)); + if (!vf_pdev) + return pf_sysfs_note(xe, -ENOENT, "VF link"); + + err = sysfs_create_link(xe->sriov.pf.vfs[VFID(n)].kobj, + &vf_pdev->dev.kobj, "device"); + + /* must balance xe_pci_sriov_get_vf_pdev() */ + pci_dev_put(vf_pdev); + + if (err) + return pf_sysfs_note(xe, err, "VF link"); + } +} + +/** + * xe_sriov_pf_sysfs_unlink_vfs() - Remove VF's links from SR-IOV sysfs tree. + * @xe: the &xe_device where to update sysfs + * @num_vfs: number of VFs to unlink + * + * This function shall be called only on the PF. + * This function will remove "device" links added by @xe_sriov_sysfs_link_vfs(). + */ +void xe_sriov_pf_sysfs_unlink_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + unsigned int n; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, num_vfs <= xe_sriov_pf_get_totalvfs(xe)); + + for (n = 1; n <= num_vfs; n++) + sysfs_remove_link(xe->sriov.pf.vfs[VFID(n)].kobj, "device"); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h new file mode 100644 index 000000000000..ae92ed1766e7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SYSFS_H_ +#define _XE_SRIOV_PF_SYSFS_H_ + +struct xe_device; + +int xe_sriov_pf_sysfs_init(struct xe_device *xe); + +void xe_sriov_pf_sysfs_link_vfs(struct xe_device *xe, unsigned int num_vfs); +void xe_sriov_pf_sysfs_unlink_vfs(struct xe_device *xe, unsigned int num_vfs); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_types.h new file mode 100644 index 000000000000..b0253e1ae5da --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_types.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_TYPES_H_ +#define _XE_SRIOV_PF_TYPES_H_ + +#include <linux/mutex.h> +#include <linux/types.h> + +#include "xe_guard.h" +#include "xe_sriov_pf_migration_types.h" +#include "xe_sriov_pf_provision_types.h" +#include "xe_sriov_pf_service_types.h" + +struct kobject; + +/** + * struct xe_sriov_metadata - per-VF device level metadata + */ +struct xe_sriov_metadata { + /** @kobj: kobject representing VF in PF's SR-IOV sysfs tree. */ + struct kobject *kobj; + + /** @version: negotiated VF/PF ABI version */ + struct xe_sriov_pf_service_version version; + /** @migration: migration state */ + struct xe_sriov_migration_state migration; +}; + +/** + * struct xe_device_pf - Xe PF related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_PF mode. + */ +struct xe_device_pf { + /** @device_total_vfs: Maximum number of VFs supported by the device. */ + u16 device_total_vfs; + + /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ + u16 driver_max_vfs; + + /** @guard_vfs_enabling: guards VFs enabling */ + struct xe_guard guard_vfs_enabling; + + /** @master_lock: protects all VFs configurations across GTs */ + struct mutex master_lock; + + /** @provision: device level provisioning data. */ + struct xe_sriov_pf_provision provision; + + /** @migration: device level migration data. */ + struct xe_sriov_pf_migration migration; + + /** @service: device level service data. */ + struct xe_sriov_pf_service service; + + /** @sysfs: device level sysfs data. */ + struct { + /** @sysfs.root: the root kobject for all SR-IOV entries in sysfs. */ + struct kobject *root; + } sysfs; + + /** @vfs: metadata for all VFs. */ + struct xe_sriov_metadata *vfs; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_printk.h b/drivers/gpu/drm/xe/xe_sriov_printk.h index 117e1d541692..4c6b5c3d2190 100644 --- a/drivers/gpu/drm/xe/xe_sriov_printk.h +++ b/drivers/gpu/drm/xe/xe_sriov_printk.h @@ -1,22 +1,22 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright © 2023 Intel Corporation + * Copyright © 2023-2025 Intel Corporation */ #ifndef _XE_SRIOV_PRINTK_H_ #define _XE_SRIOV_PRINTK_H_ -#include <drm/drm_print.h> - -#include "xe_device_types.h" -#include "xe_sriov_types.h" +#include "xe_printk.h" #define xe_sriov_printk_prefix(xe) \ ((xe)->sriov.__mode == XE_SRIOV_MODE_PF ? "PF: " : \ (xe)->sriov.__mode == XE_SRIOV_MODE_VF ? "VF: " : "") +#define __XE_SRIOV_PRINTK_FMT(_xe, _fmt, _args...) \ + "%s" _fmt, xe_sriov_printk_prefix(_xe), ##_args + #define xe_sriov_printk(xe, _level, fmt, ...) \ - drm_##_level(&(xe)->drm, "%s" fmt, xe_sriov_printk_prefix(xe), ##__VA_ARGS__) + xe_##_level((xe), __XE_SRIOV_PRINTK_FMT((xe), fmt, ##__VA_ARGS__)) #define xe_sriov_err(xe, fmt, ...) \ xe_sriov_printk((xe), err, fmt, ##__VA_ARGS__) diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h index ca94382a721e..1a138108d139 100644 --- a/drivers/gpu/drm/xe/xe_sriov_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_types.h @@ -7,9 +7,6 @@ #define _XE_SRIOV_TYPES_H_ #include <linux/build_bug.h> -#include <linux/mutex.h> -#include <linux/types.h> -#include <linux/workqueue_types.h> /** * VFID - Virtual Function Identifier @@ -40,37 +37,4 @@ enum xe_sriov_mode { }; static_assert(XE_SRIOV_MODE_NONE); -/** - * struct xe_device_pf - Xe PF related data - * - * The data in this structure is valid only if driver is running in the - * @XE_SRIOV_MODE_PF mode. - */ -struct xe_device_pf { - /** @device_total_vfs: Maximum number of VFs supported by the device. */ - u16 device_total_vfs; - - /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ - u16 driver_max_vfs; - - /** @master_lock: protects all VFs configurations across GTs */ - struct mutex master_lock; -}; - -/** - * struct xe_device_vf - Xe Virtual Function related data - * - * The data in this structure is valid only if driver is running in the - * @XE_SRIOV_MODE_VF mode. - */ -struct xe_device_vf { - /** @migration: VF Migration state data */ - struct { - /** @migration.worker: VF migration recovery worker */ - struct work_struct worker; - /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ - unsigned long gt_flags; - } migration; -}; - #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index c1275e64aa9c..284ce37ca92d 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -3,16 +3,15 @@ * Copyright © 2023-2024 Intel Corporation */ +#include <drm/drm_debugfs.h> #include <drm/drm_managed.h> -#include "xe_assert.h" -#include "xe_device.h" -#include "xe_gt_sriov_printk.h" +#include "xe_gt.h" #include "xe_gt_sriov_vf.h" -#include "xe_pm.h" -#include "xe_sriov.h" +#include "xe_guc.h" #include "xe_sriov_printk.h" #include "xe_sriov_vf.h" +#include "xe_sriov_vf_ccs.h" /** * DOC: VF restore procedure in PF KMD and VF KMD @@ -121,143 +120,92 @@ * | | | */ -static void migration_worker_func(struct work_struct *w); - /** - * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data. - * @xe: the &xe_device to initialize + * xe_sriov_vf_migration_supported - Report whether SR-IOV VF migration is + * supported or not. + * @xe: the &xe_device to check + * + * Returns: true if VF migration is supported, false otherwise. */ -void xe_sriov_vf_init_early(struct xe_device *xe) +bool xe_sriov_vf_migration_supported(struct xe_device *xe) { - INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); + xe_assert(xe, IS_SRIOV_VF(xe)); + return !xe->sriov.vf.migration.disabled; } /** - * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning. - * @xe: the &xe_device struct instance - * - * After migration, we need to re-query all VF configuration to make sure - * they match previous provisioning. Note that most of VF provisioning - * shall be the same, except GGTT range, since GGTT is not virtualized per-VF. - * - * Returns: 0 if the operation completed successfully, or a negative error - * code otherwise. + * xe_sriov_vf_migration_disable - Turn off VF migration with given log message. + * @xe: the &xe_device instance. + * @fmt: format string for the log message, to be combined with following VAs. */ -static int vf_post_migration_requery_guc(struct xe_device *xe) +void xe_sriov_vf_migration_disable(struct xe_device *xe, const char *fmt, ...) { - struct xe_gt *gt; - unsigned int id; - int err, ret = 0; + struct va_format vaf; + va_list va_args; - for_each_gt(gt, xe, id) { - err = xe_gt_sriov_vf_query_config(gt); - ret = ret ?: err; - } + xe_assert(xe, IS_SRIOV_VF(xe)); - return ret; -} + va_start(va_args, fmt); + vaf.fmt = fmt; + vaf.va = &va_args; + xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf); + va_end(va_args); -/* - * vf_post_migration_imminent - Check if post-restore recovery is coming. - * @xe: the &xe_device struct instance - * - * Return: True if migration recovery worker will soon be running. Any worker currently - * executing does not affect the result. - */ -static bool vf_post_migration_imminent(struct xe_device *xe) -{ - return xe->sriov.vf.migration.gt_flags != 0 || - work_pending(&xe->sriov.vf.migration.worker); + xe->sriov.vf.migration.disabled = true; } -/* - * Notify all GuCs about resource fixups apply finished. - */ -static void vf_post_migration_notify_resfix_done(struct xe_device *xe) +static void vf_migration_init_early(struct xe_device *xe) { - struct xe_gt *gt; - unsigned int id; + if (!xe_device_has_memirq(xe)) + return xe_sriov_vf_migration_disable(xe, "requires memory-based IRQ support"); - for_each_gt(gt, xe, id) { - if (vf_post_migration_imminent(xe)) - goto skip; - xe_gt_sriov_vf_notify_resfix_done(gt); - } - return; - -skip: - drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n"); } -static void vf_post_migration_recovery(struct xe_device *xe) +/** + * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data. + * @xe: the &xe_device to initialize + */ +void xe_sriov_vf_init_early(struct xe_device *xe) { - int err; - - drm_dbg(&xe->drm, "migration recovery in progress\n"); - xe_pm_runtime_get(xe); - err = vf_post_migration_requery_guc(xe); - if (vf_post_migration_imminent(xe)) - goto defer; - if (unlikely(err)) - goto fail; - - /* FIXME: add the recovery steps */ - vf_post_migration_notify_resfix_done(xe); - xe_pm_runtime_put(xe); - drm_notice(&xe->drm, "migration recovery ended\n"); - return; -defer: - xe_pm_runtime_put(xe); - drm_dbg(&xe->drm, "migration recovery deferred\n"); - return; -fail: - xe_pm_runtime_put(xe); - drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err)); - xe_device_declare_wedged(xe); + vf_migration_init_early(xe); } -static void migration_worker_func(struct work_struct *w) +/** + * xe_sriov_vf_init_late() - SR-IOV VF late initialization functions. + * @xe: the &xe_device to initialize + * + * This function initializes code for CCS migration. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_vf_init_late(struct xe_device *xe) { - struct xe_device *xe = container_of(w, struct xe_device, - sriov.vf.migration.worker); - - vf_post_migration_recovery(xe); + return xe_sriov_vf_ccs_init(xe); } -static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe) +static int sa_info_vf_ccs(struct seq_file *m, void *data) { - struct xe_gt *gt; - unsigned int id; + struct drm_info_node *node = m->private; + struct xe_device *xe = to_xe_device(node->minor->dev); + struct drm_printer p = drm_seq_file_printer(m); - for_each_gt(gt, xe, id) { - if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) { - xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n"); - return false; - } - } - return true; + xe_sriov_vf_ccs_print(xe, &p); + return 0; } +static const struct drm_info_list debugfs_list[] = { + { .name = "sa_info_vf_ccs", .show = sa_info_vf_ccs }, +}; + /** - * xe_sriov_vf_start_migration_recovery - Start VF migration recovery. - * @xe: the &xe_device to start recovery on + * xe_sriov_vf_debugfs_register - Register VF debugfs attributes. + * @xe: the &xe_device + * @root: the root &dentry * - * This function shall be called only by VF. + * Prepare debugfs attributes exposed by the VF. */ -void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) +void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root) { - bool started; - - xe_assert(xe, IS_SRIOV_VF(xe)); - - if (!vf_ready_to_recovery_on_all_gts(xe)) - return; - - WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0); - /* Ensure other threads see that no flags are set now. */ - smp_mb(); - - started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); - drm_info(&xe->drm, "VF migration recovery %s\n", started ? - "scheduled" : "already in progress"); + drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), + root, xe->drm.primary); } diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h index 7b8622cff2b7..e967d4166a43 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf.h @@ -6,9 +6,15 @@ #ifndef _XE_SRIOV_VF_H_ #define _XE_SRIOV_VF_H_ +#include <linux/types.h> + +struct dentry; struct xe_device; void xe_sriov_vf_init_early(struct xe_device *xe); -void xe_sriov_vf_start_migration_recovery(struct xe_device *xe); +int xe_sriov_vf_init_late(struct xe_device *xe); +bool xe_sriov_vf_migration_supported(struct xe_device *xe); +void xe_sriov_vf_migration_disable(struct xe_device *xe, const char *fmt, ...); +void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root); #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c new file mode 100644 index 000000000000..797a4b866226 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c @@ -0,0 +1,480 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "instructions/xe_mi_commands.h" +#include "instructions/xe_gpu_commands.h" +#include "xe_bb.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_exec_queue_types.h" +#include "xe_gt_sriov_vf.h" +#include "xe_guc.h" +#include "xe_guc_submit.h" +#include "xe_lrc.h" +#include "xe_migrate.h" +#include "xe_pm.h" +#include "xe_sa.h" +#include "xe_sriov_printk.h" +#include "xe_sriov_vf.h" +#include "xe_sriov_vf_ccs.h" +#include "xe_sriov_vf_ccs_types.h" + +/** + * DOC: VF save/restore of compression Meta Data + * + * VF KMD registers two special contexts/LRCAs. + * + * Save Context/LRCA: contain necessary cmds+page table to trigger Meta data / + * compression control surface (Aka CCS) save in regular System memory in VM. + * + * Restore Context/LRCA: contain necessary cmds+page table to trigger Meta data / + * compression control surface (Aka CCS) Restore from regular System memory in + * VM to corresponding CCS pool. + * + * Below diagram explain steps needed for VF save/Restore of compression Meta Data:: + * + * CCS Save CCS Restore VF KMD Guc BCS + * LRCA LRCA + * | | | | | + * | | | | | + * | Create Save LRCA | | | + * [ ]<----------------------------- [ ] | | + * | | | | | + * | | | | | + * | | | Register save LRCA | | + * | | | with Guc | | + * | | [ ]--------------------------->[ ] | + * | | | | | + * | | Create restore LRCA | | | + * | [ ]<------------------[ ] | | + * | | | | | + * | | | Register restore LRCA | | + * | | | with Guc | | + * | | [ ]--------------------------->[ ] | + * | | | | | + * | | | | | + * | | [ ]------------------------- | | + * | | [ ] Allocate main memory. | | | + * | | [ ] Allocate CCS memory. | | | + * | | [ ] Update Main memory & | | | + * [ ]<------------------------------[ ] CCS pages PPGTT + BB | | | + * | [ ]<------------------[ ] cmds to save & restore.| | | + * | | [ ]<------------------------ | | + * | | | | | + * | | | | | + * | | | | | + * : : : : : + * ---------------------------- VF Paused ------------------------------------- + * | | | | | + * | | | | | + * | | | |Schedule | + * | | | |CCS Save | + * | | | | LRCA | + * | | | [ ]------>[ ] + * | | | | | + * | | | | | + * | | | |CCS save | + * | | | |completed| + * | | | [ ]<------[ ] + * | | | | | + * : : : : : + * ---------------------------- VM Migrated ----------------------------------- + * | | | | | + * | | | | | + * : : : : : + * ---------------------------- VF Resumed ------------------------------------ + * | | | | | + * | | | | | + * | | [ ]-------------- | | + * | | [ ] Fix up GGTT | | | + * | | [ ]<------------- | | + * | | | | | + * | | | | | + * | | | Notify VF_RESFIX_DONE | | + * | | [ ]--------------------------->[ ] | + * | | | | | + * | | | |Schedule | + * | | | |CCS | + * | | | |Restore | + * | | | |LRCA | + * | | | [ ]------>[ ] + * | | | | | + * | | | | | + * | | | |CCS | + * | | | |restore | + * | | | |completed| + * | | | [ ]<------[ ] + * | | | | | + * | | | | | + * | | | VF_RESFIX_DONE complete | | + * | | | notification | | + * | | [ ]<---------------------------[ ] | + * | | | | | + * | | | | | + * : : : : : + * ------------------------- Continue VM restore ------------------------------ + */ + +static u64 get_ccs_bb_pool_size(struct xe_device *xe) +{ + u64 sys_mem_size, ccs_mem_size, ptes, bb_pool_size; + struct sysinfo si; + + si_meminfo(&si); + sys_mem_size = si.totalram * si.mem_unit; + ccs_mem_size = div64_u64(sys_mem_size, NUM_BYTES_PER_CCS_BYTE(xe)); + ptes = DIV_ROUND_UP_ULL(sys_mem_size + ccs_mem_size, XE_PAGE_SIZE); + + /** + * We need below BB size to hold PTE mappings and some DWs for copy + * command. In reality, we need space for many copy commands. So, let + * us allocate double the calculated size which is enough to holds GPU + * instructions for the whole region. + */ + bb_pool_size = ptes * sizeof(u32); + + return round_up(bb_pool_size * 2, SZ_1M); +} + +static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_sa_manager *sa_manager; + u64 bb_pool_size; + int offset, err; + + bb_pool_size = get_ccs_bb_pool_size(xe); + xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n", + ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M); + + sa_manager = xe_sa_bo_manager_init(tile, bb_pool_size, SZ_16); + + if (IS_ERR(sa_manager)) { + xe_sriov_err(xe, "Suballocator init failed with error: %pe\n", + sa_manager); + err = PTR_ERR(sa_manager); + return err; + } + + offset = 0; + xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP, + bb_pool_size); + + offset = bb_pool_size - sizeof(u32); + xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END); + + ctx->mem.ccs_bb_pool = sa_manager; + + return 0; +} + +static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx) +{ + u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); + struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); + u32 dw[10], i = 0; + + /* + * XXX: Save/restore fixes — for some reason, the GuC only accepts the + * save/restore context if the LRC head pointer is zero. This is evident + * from repeated VF migrations failing when the LRC head pointer is + * non-zero. + */ + lrc->ring.tail = 0; + xe_lrc_set_ring_head(lrc, 0); + + dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE; + dw[i++] = MI_BATCH_BUFFER_START | XE_INSTR_NUM_DW(3); + dw[i++] = lower_32_bits(addr); + dw[i++] = upper_32_bits(addr); + dw[i++] = MI_NOOP; + dw[i++] = MI_NOOP; + + xe_lrc_write_ring(lrc, dw, i * sizeof(u32)); + xe_lrc_set_ring_tail(lrc, lrc->ring.tail); +} + +/** + * xe_sriov_vf_ccs_rebase - Rebase GGTT addresses for CCS save / restore + * @xe: the &xe_device. + */ +void xe_sriov_vf_ccs_rebase(struct xe_device *xe) +{ + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + + if (!IS_VF_CCS_READY(xe)) + return; + + for_each_ccs_rw_ctx(ctx_id) { + struct xe_sriov_vf_ccs_ctx *ctx = + &xe->sriov.vf.ccs.contexts[ctx_id]; + + ccs_rw_update_ring(ctx); + } +} + +static int register_save_restore_context(struct xe_sriov_vf_ccs_ctx *ctx) +{ + int ctx_type; + + switch (ctx->ctx_id) { + case XE_SRIOV_VF_CCS_READ_CTX: + ctx_type = GUC_CONTEXT_COMPRESSION_SAVE; + break; + case XE_SRIOV_VF_CCS_WRITE_CTX: + ctx_type = GUC_CONTEXT_COMPRESSION_RESTORE; + break; + default: + return -EINVAL; + } + + xe_guc_register_vf_exec_queue(ctx->mig_q, ctx_type); + return 0; +} + +/** + * xe_sriov_vf_ccs_register_context - Register read/write contexts with guc. + * @xe: the &xe_device to register contexts on. + * + * This function registers read and write contexts with Guc. Re-registration + * is needed whenever resuming from pm runtime suspend. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_sriov_vf_ccs_register_context(struct xe_device *xe) +{ + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + struct xe_sriov_vf_ccs_ctx *ctx; + int err; + + xe_assert(xe, IS_VF_CCS_READY(xe)); + + for_each_ccs_rw_ctx(ctx_id) { + ctx = &xe->sriov.vf.ccs.contexts[ctx_id]; + err = register_save_restore_context(ctx); + if (err) + return err; + } + + return err; +} + +/* + * Whether GuC requires CCS copy BBs for VF migration. + * @xe: the &xe_device instance. + * + * Only selected platforms require VF KMD to maintain CCS copy BBs and linked LRCAs. + * + * Return: true if VF driver must participate in the CCS migration, false otherwise. + */ +static bool vf_migration_ccs_bb_needed(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_VF(xe)); + + return !IS_DGFX(xe) && xe_device_has_flat_ccs(xe); +} + +/* + * Check for disable migration due to no CCS BBs support in GuC FW. + * @xe: the &xe_device instance. + * + * Performs late disable of VF migration feature in case GuC FW cannot support it. + * + * Returns: True if VF migration with CCS BBs is supported, false otherwise. + */ +static bool vf_migration_ccs_bb_support_check(struct xe_device *xe) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_uc_fw_version guc_version; + + xe_gt_sriov_vf_guc_versions(gt, NULL, &guc_version); + if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0)) { + xe_sriov_vf_migration_disable(xe, + "CCS migration requires GuC ABI >= 1.23 but only %u.%u found", + guc_version.major, guc_version.minor); + return false; + } + + return true; +} + +static void xe_sriov_vf_ccs_fini(void *arg) +{ + struct xe_sriov_vf_ccs_ctx *ctx = arg; + struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); + + /* + * Make TAIL = HEAD in the ring so that no issues are seen if Guc + * submits this context to HW on VF pause after unbinding device. + */ + xe_lrc_set_ring_tail(lrc, xe_lrc_ring_head(lrc)); + xe_exec_queue_put(ctx->mig_q); +} + +/** + * xe_sriov_vf_ccs_init - Setup LRCA for save & restore. + * @xe: the &xe_device to start recovery on + * + * This function shall be called only by VF. It initializes + * LRCA and suballocator needed for CCS save & restore. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_sriov_vf_ccs_init(struct xe_device *xe) +{ + struct xe_tile *tile = xe_device_get_root_tile(xe); + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + struct xe_sriov_vf_ccs_ctx *ctx; + struct xe_exec_queue *q; + u32 flags; + int err; + + xe_assert(xe, IS_SRIOV_VF(xe)); + + if (!xe_sriov_vf_migration_supported(xe) || + !vf_migration_ccs_bb_needed(xe) || + !vf_migration_ccs_bb_support_check(xe)) + return 0; + + for_each_ccs_rw_ctx(ctx_id) { + ctx = &xe->sriov.vf.ccs.contexts[ctx_id]; + ctx->ctx_id = ctx_id; + + flags = EXEC_QUEUE_FLAG_KERNEL | + EXEC_QUEUE_FLAG_PERMANENT | + EXEC_QUEUE_FLAG_MIGRATE; + q = xe_exec_queue_create_bind(xe, tile, flags, 0); + if (IS_ERR(q)) { + err = PTR_ERR(q); + goto err_ret; + } + ctx->mig_q = q; + + err = alloc_bb_pool(tile, ctx); + if (err) + goto err_free_queue; + + ccs_rw_update_ring(ctx); + + err = register_save_restore_context(ctx); + if (err) + goto err_free_queue; + + err = devm_add_action_or_reset(xe->drm.dev, + xe_sriov_vf_ccs_fini, + ctx); + if (err) + goto err_ret; + } + + xe->sriov.vf.ccs.initialized = 1; + + return 0; + +err_free_queue: + xe_exec_queue_put(q); + +err_ret: + return err; +} + +/** + * xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO. + * @bo: the &buffer object to which batch buffer commands will be added. + * + * This function shall be called only by VF. It inserts the PTEs and copy + * command instructions in the BO by calling xe_migrate_ccs_rw_copy() + * function. + * + * Returns: 0 if successful, negative error code on failure. + */ +int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + struct xe_sriov_vf_ccs_ctx *ctx; + struct xe_tile *tile; + struct xe_bb *bb; + int err = 0; + + xe_assert(xe, IS_VF_CCS_READY(xe)); + + tile = xe_device_get_root_tile(xe); + + for_each_ccs_rw_ctx(ctx_id) { + bb = bo->bb_ccs[ctx_id]; + /* bb should be NULL here. Assert if not NULL */ + xe_assert(xe, !bb); + + ctx = &xe->sriov.vf.ccs.contexts[ctx_id]; + err = xe_migrate_ccs_rw_copy(tile, ctx->mig_q, bo, ctx_id); + } + return err; +} + +/** + * xe_sriov_vf_ccs_detach_bo - Remove CCS read write commands from the BO. + * @bo: the &buffer object from which batch buffer commands will be removed. + * + * This function shall be called only by VF. It removes the PTEs and copy + * command instructions from the BO. Make sure to update the BB with MI_NOOP + * before freeing. + * + * Returns: 0 if successful. + */ +int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + struct xe_bb *bb; + + xe_assert(xe, IS_VF_CCS_READY(xe)); + + if (!xe_bo_has_valid_ccs_bb(bo)) + return 0; + + for_each_ccs_rw_ctx(ctx_id) { + bb = bo->bb_ccs[ctx_id]; + if (!bb) + continue; + + memset(bb->cs, MI_NOOP, bb->len * sizeof(u32)); + xe_bb_free(bb, NULL); + bo->bb_ccs[ctx_id] = NULL; + } + return 0; +} + +/** + * xe_sriov_vf_ccs_print - Print VF CCS details. + * @xe: the &xe_device + * @p: the &drm_printer + * + * This function is for VF use only. + */ +void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p) +{ + struct xe_sa_manager *bb_pool; + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + + if (!IS_VF_CCS_READY(xe)) + return; + + xe_pm_runtime_get(xe); + + for_each_ccs_rw_ctx(ctx_id) { + bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool; + if (!bb_pool) + break; + + drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read"); + drm_printf(p, "-------------------------\n"); + drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool)); + drm_puts(p, "\n"); + } + + xe_pm_runtime_put(xe); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h new file mode 100644 index 000000000000..f8ca6efce9ee --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_VF_CCS_H_ +#define _XE_SRIOV_VF_CCS_H_ + +#include "xe_device_types.h" +#include "xe_sriov.h" +#include "xe_sriov_vf_ccs_types.h" + +struct drm_printer; +struct xe_device; +struct xe_bo; + +int xe_sriov_vf_ccs_init(struct xe_device *xe); +int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo); +int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo); +int xe_sriov_vf_ccs_register_context(struct xe_device *xe); +void xe_sriov_vf_ccs_rebase(struct xe_device *xe); +void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p); + +static inline bool xe_sriov_vf_ccs_ready(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_VF(xe)); + return xe->sriov.vf.ccs.initialized; +} + +#define IS_VF_CCS_READY(xe) ({ \ + struct xe_device *xe__ = (xe); \ + IS_SRIOV_VF(xe__) && xe_sriov_vf_ccs_ready(xe__); \ + }) + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h new file mode 100644 index 000000000000..22c499943d2a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_VF_CCS_TYPES_H_ +#define _XE_SRIOV_VF_CCS_TYPES_H_ + +#include <linux/types.h> + +#define for_each_ccs_rw_ctx(id__) \ + for ((id__) = 0; (id__) < XE_SRIOV_VF_CCS_CTX_COUNT; (id__)++) + +enum xe_sriov_vf_ccs_rw_ctxs { + XE_SRIOV_VF_CCS_READ_CTX, + XE_SRIOV_VF_CCS_WRITE_CTX, + XE_SRIOV_VF_CCS_CTX_COUNT +}; + +struct xe_migrate; +struct xe_sa_manager; + +/** + * struct xe_sriov_vf_ccs_ctx - VF CCS migration context data. + */ +struct xe_sriov_vf_ccs_ctx { + /** @ctx_id: Id to which context it belongs to */ + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + + /** @mig_q: exec queues used for migration */ + struct xe_exec_queue *mig_q; + + /** @mem: memory data */ + struct { + /** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */ + struct xe_sa_manager *ccs_bb_pool; + } mem; +}; + +/** + * struct xe_sriov_vf_ccs - The VF CCS migration support data. + */ +struct xe_sriov_vf_ccs { + /** @contexts: CCS read and write contexts for VF. */ + struct xe_sriov_vf_ccs_ctx contexts[XE_SRIOV_VF_CCS_CTX_COUNT]; + + /** @initialized: Initialization of VF CCS is completed or not. */ + bool initialized; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h new file mode 100644 index 000000000000..d5f72d667817 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_VF_TYPES_H_ +#define _XE_SRIOV_VF_TYPES_H_ + +#include <linux/types.h> +#include <linux/workqueue_types.h> + +#include "xe_sriov_vf_ccs_types.h" + +/** + * struct xe_sriov_vf_relay_version - PF ABI version details. + */ +struct xe_sriov_vf_relay_version { + /** @major: major version. */ + u16 major; + /** @minor: minor version. */ + u16 minor; +}; + +/** + * struct xe_device_vf - Xe Virtual Function related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_VF mode. + */ +struct xe_device_vf { + /** @pf_version: negotiated VF/PF ABI version. */ + struct xe_sriov_vf_relay_version pf_version; + + /** @migration: VF Migration state data */ + struct { + /** + * @migration.disabled: flag indicating if migration support + * was turned off due to missing prerequisites + */ + bool disabled; + } migration; + + /** @ccs: VF CCS state data */ + struct xe_sriov_vf_ccs ccs; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vfio.c b/drivers/gpu/drm/xe/xe_sriov_vfio.c new file mode 100644 index 000000000000..3da81af97b8b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vfio.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <drm/intel/xe_sriov_vfio.h> +#include <linux/cleanup.h> + +#include "xe_pci.h" +#include "xe_pm.h" +#include "xe_sriov_pf_control.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_migration.h" + +struct xe_device *xe_sriov_vfio_get_pf(struct pci_dev *pdev) +{ + return xe_pci_to_pf_device(pdev); +} +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_get_pf, "xe-vfio-pci"); + +bool xe_sriov_vfio_migration_supported(struct xe_device *xe) +{ + if (!IS_SRIOV_PF(xe)) + return false; + + return xe_sriov_pf_migration_supported(xe); +} +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_migration_supported, "xe-vfio-pci"); + +#define DEFINE_XE_SRIOV_VFIO_FUNCTION(_type, _func, _impl) \ +_type xe_sriov_vfio_##_func(struct xe_device *xe, unsigned int vfid) \ +{ \ + if (!IS_SRIOV_PF(xe)) \ + return -EPERM; \ + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe)) \ + return -EINVAL; \ + \ + guard(xe_pm_runtime_noresume)(xe); \ + \ + return xe_sriov_pf_##_impl(xe, vfid); \ +} \ +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_##_func, "xe-vfio-pci") + +DEFINE_XE_SRIOV_VFIO_FUNCTION(int, wait_flr_done, control_wait_flr); +DEFINE_XE_SRIOV_VFIO_FUNCTION(int, suspend_device, control_pause_vf); +DEFINE_XE_SRIOV_VFIO_FUNCTION(int, resume_device, control_resume_vf); +DEFINE_XE_SRIOV_VFIO_FUNCTION(int, stop_copy_enter, control_trigger_save_vf); +DEFINE_XE_SRIOV_VFIO_FUNCTION(int, stop_copy_exit, control_finish_save_vf); +DEFINE_XE_SRIOV_VFIO_FUNCTION(int, resume_data_enter, control_trigger_restore_vf); +DEFINE_XE_SRIOV_VFIO_FUNCTION(int, resume_data_exit, control_finish_restore_vf); +DEFINE_XE_SRIOV_VFIO_FUNCTION(int, error, control_stop_vf); +DEFINE_XE_SRIOV_VFIO_FUNCTION(ssize_t, stop_copy_size, migration_size); + +ssize_t xe_sriov_vfio_data_read(struct xe_device *xe, unsigned int vfid, + char __user *buf, size_t len) +{ + if (!IS_SRIOV_PF(xe)) + return -EPERM; + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe)) + return -EINVAL; + + guard(xe_pm_runtime_noresume)(xe); + + return xe_sriov_pf_migration_read(xe, vfid, buf, len); +} +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_data_read, "xe-vfio-pci"); + +ssize_t xe_sriov_vfio_data_write(struct xe_device *xe, unsigned int vfid, + const char __user *buf, size_t len) +{ + if (!IS_SRIOV_PF(xe)) + return -EPERM; + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe)) + return -EINVAL; + + guard(xe_pm_runtime_noresume)(xe); + + return xe_sriov_pf_migration_write(xe, vfid, buf, len); +} +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_data_write, "xe-vfio-pci"); diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index c77b5c317fa0..10e88f2c9615 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -5,6 +5,7 @@ #include "xe_step.h" +#include <kunit/visibility.h> #include <linux/bitfield.h> #include "xe_device.h" @@ -255,3 +256,4 @@ const char *xe_step_name(enum xe_step step) return "**"; } } +EXPORT_SYMBOL_IF_KUNIT(xe_step_name); diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c index 1f710b3fc599..1662bfddd4bc 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.c +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -14,6 +14,7 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_heci_gsc.h" +#include "xe_i2c.h" #include "xe_mmio.h" #include "xe_pcode_api.h" #include "xe_vsec.h" @@ -21,15 +22,18 @@ #define MAX_SCRATCH_MMIO 8 /** - * DOC: Xe Boot Survivability + * DOC: Survivability Mode * - * Boot Survivability is a software based workflow for recovering a system in a failed boot state + * Survivability Mode is a software based workflow for recovering a system in a failed boot state * Here system recoverability is concerned with recovering the firmware responsible for boot. * - * This is implemented by loading the driver with bare minimum (no drm card) to allow the firmware - * to be flashed through mei and collect telemetry. The driver's probe flow is modified - * such that it enters survivability mode when pcode initialization is incomplete and boot status - * denotes a failure. + * Boot Survivability + * =================== + * + * Boot Survivability is implemented by loading the driver with bare minimum (no drm card) to allow + * the firmware to be flashed through mei driver and collect telemetry. The driver's probe flow is + * modified such that it enters survivability mode when pcode initialization is incomplete and boot + * status denotes a failure. * * Survivability mode can also be entered manually using the survivability mode attribute available * through configfs which is beneficial in several usecases. It can be used to address scenarios @@ -40,12 +44,14 @@ * * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode * + * It is the responsibility of the user to clear the mode once firmware flash is complete. + * * Refer :ref:`xe_configfs` for more details on how to use configfs * * Survivability mode is indicated by the below admin-only readable sysfs which provides additional * debug information:: * - * /sys/bus/pci/devices/<device>/surivability_mode + * /sys/bus/pci/devices/<device>/survivability_mode * * Capability Information: * Provides boot status @@ -55,6 +61,22 @@ * Provides history of previous failures * Auxiliary Information * Certain failures may have information in addition to postcode information + * + * Runtime Survivability + * ===================== + * + * Certain runtime firmware errors can cause the device to enter a wedged state + * (:ref:`xe-device-wedging`) requiring a firmware flash to restore normal operation. + * Runtime Survivability Mode indicates that a firmware flash is necessary to recover the device and + * is indicated by the presence of survivability mode sysfs:: + * + * /sys/bus/pci/devices/<device>/survivability_mode + * + * Survivability mode sysfs provides information about the type of survivability mode. + * + * When such errors occur, userspace is notified with the drm device wedged uevent and runtime + * survivability mode. User can then initiate a firmware flash using userspace tools like fwupd + * to restore device to normal operation. */ static u32 aux_history_offset(u32 reg_value) @@ -120,6 +142,14 @@ static void log_survivability_info(struct pci_dev *pdev) } } +static int check_boot_failure(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + + return survivability->boot_status == NON_CRITICAL_FAILURE || + survivability->boot_status == CRITICAL_FAILURE; +} + static ssize_t survivability_mode_show(struct device *dev, struct device_attribute *attr, char *buff) { @@ -129,6 +159,12 @@ static ssize_t survivability_mode_show(struct device *dev, struct xe_survivability_info *info = survivability->info; int index = 0, count = 0; + count += sysfs_emit_at(buff, count, "Survivability mode type: %s\n", + survivability->type ? "Runtime" : "Boot"); + + if (!check_boot_failure(xe)) + return count; + for (index = 0; index < MAX_SCRATCH_MMIO; index++) { if (info[index].reg) count += sysfs_emit_at(buff, count, "%s: 0x%x - 0x%x\n", info[index].name, @@ -146,16 +182,14 @@ static void xe_survivability_mode_fini(void *arg) struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct device *dev = &pdev->dev; - xe_configfs_clear_survivability_mode(pdev); sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr); } -static int enable_survivability_mode(struct pci_dev *pdev) +static int create_survivability_sysfs(struct pci_dev *pdev) { struct device *dev = &pdev->dev; struct xe_device *xe = pdev_to_xe_device(pdev); - struct xe_survivability *survivability = &xe->survivability; - int ret = 0; + int ret; /* create survivability mode sysfs */ ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr); @@ -169,35 +203,72 @@ static int enable_survivability_mode(struct pci_dev *pdev) if (ret) return ret; + return 0; +} + +static int enable_boot_survivability_mode(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct xe_device *xe = pdev_to_xe_device(pdev); + struct xe_survivability *survivability = &xe->survivability; + int ret = 0; + + ret = create_survivability_sysfs(pdev); + if (ret) + return ret; + /* Make sure xe_heci_gsc_init() knows about survivability mode */ survivability->mode = true; ret = xe_heci_gsc_init(xe); - if (ret) { - /* - * But if it fails, device can't enter survivability - * so move it back for correct error handling - */ - survivability->mode = false; - return ret; - } + if (ret) + goto err; xe_vsec_init(xe); + ret = xe_i2c_probe(xe); + if (ret) + goto err; + dev_err(dev, "In Survivability Mode\n"); return 0; + +err: + survivability->mode = false; + return ret; +} + +static int init_survivability_mode(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + struct xe_survivability_info *info; + + survivability->size = MAX_SCRATCH_MMIO; + + info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info), + GFP_KERNEL); + if (!info) + return -ENOMEM; + + survivability->info = info; + + populate_survivability_info(xe); + + return 0; } /** - * xe_survivability_mode_is_enabled - check if survivability mode is enabled + * xe_survivability_mode_is_boot_enabled- check if boot survivability mode is enabled * @xe: xe device instance * - * Returns true if in survivability mode, false otherwise + * Returns true if in boot survivability mode of type, else false */ -bool xe_survivability_mode_is_enabled(struct xe_device *xe) +bool xe_survivability_mode_is_boot_enabled(struct xe_device *xe) { - return xe->survivability.mode; + struct xe_survivability *survivability = &xe->survivability; + + return survivability->mode && survivability->type == XE_SURVIVABILITY_TYPE_BOOT; } /** @@ -218,19 +289,10 @@ bool xe_survivability_mode_is_requested(struct xe_device *xe) u32 data; bool survivability_mode; - if (!IS_DGFX(xe) || IS_SRIOV_VF(xe)) + if (!IS_DGFX(xe) || IS_SRIOV_VF(xe) || xe->info.platform < XE_BATTLEMAGE) return false; survivability_mode = xe_configfs_get_survivability_mode(pdev); - - if (xe->info.platform < XE_BATTLEMAGE) { - if (survivability_mode) { - dev_err(&pdev->dev, "Survivability Mode is not supported on this card\n"); - xe_configfs_clear_survivability_mode(pdev); - } - return false; - } - /* Enable survivability mode if set via configfs */ if (survivability_mode) return true; @@ -238,44 +300,78 @@ bool xe_survivability_mode_is_requested(struct xe_device *xe) data = xe_mmio_read32(mmio, PCODE_SCRATCH(0)); survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data); - return survivability->boot_status == NON_CRITICAL_FAILURE || - survivability->boot_status == CRITICAL_FAILURE; + return check_boot_failure(xe); } /** - * xe_survivability_mode_enable - Initialize and enable the survivability mode + * xe_survivability_mode_runtime_enable - Initialize and enable runtime survivability mode * @xe: xe device instance * - * Initialize survivability information and enable survivability mode + * Initialize survivability information and enable runtime survivability mode. + * Runtime survivability mode is enabled when certain errors cause the device to be + * in non-recoverable state. The device is declared wedged with the appropriate + * recovery method and survivability mode sysfs exposed to userspace * - * Return: 0 if survivability mode is enabled or not requested; negative error - * code otherwise. + * Return: 0 if runtime survivability mode is enabled, negative error code otherwise. */ -int xe_survivability_mode_enable(struct xe_device *xe) +int xe_survivability_mode_runtime_enable(struct xe_device *xe) { struct xe_survivability *survivability = &xe->survivability; - struct xe_survivability_info *info; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int ret; - if (!xe_survivability_mode_is_requested(xe)) - return 0; + if (!IS_DGFX(xe) || IS_SRIOV_VF(xe) || xe->info.platform < XE_BATTLEMAGE) { + dev_err(&pdev->dev, "Runtime Survivability Mode not supported\n"); + return -EINVAL; + } - survivability->size = MAX_SCRATCH_MMIO; + ret = init_survivability_mode(xe); + if (ret) + return ret; - info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info), - GFP_KERNEL); - if (!info) - return -ENOMEM; + ret = create_survivability_sysfs(pdev); + if (ret) + dev_err(&pdev->dev, "Failed to create survivability mode sysfs\n"); - survivability->info = info; + survivability->type = XE_SURVIVABILITY_TYPE_RUNTIME; + dev_err(&pdev->dev, "Runtime Survivability mode enabled\n"); - populate_survivability_info(xe); + xe_device_set_wedged_method(xe, DRM_WEDGE_RECOVERY_VENDOR); + xe_device_declare_wedged(xe); + dev_err(&pdev->dev, "Firmware flash required, Please refer to the userspace documentation for more details!\n"); + + return 0; +} + +/** + * xe_survivability_mode_boot_enable - Initialize and enable boot survivability mode + * @xe: xe device instance + * + * Initialize survivability information and enable boot survivability mode + * + * Return: 0 if boot survivability mode is enabled or not requested, negative error + * code otherwise. + */ +int xe_survivability_mode_boot_enable(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int ret; + + if (!xe_survivability_mode_is_requested(xe)) + return 0; + + ret = init_survivability_mode(xe); + if (ret) + return ret; - /* Only log debug information and exit if it is a critical failure */ + /* Log breadcrumbs but do not enter survivability mode for Critical boot errors */ if (survivability->boot_status == CRITICAL_FAILURE) { log_survivability_info(pdev); return -ENXIO; } - return enable_survivability_mode(pdev); + survivability->type = XE_SURVIVABILITY_TYPE_BOOT; + + return enable_boot_survivability_mode(pdev); } diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h index 02231c2bf008..1cc94226aa82 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.h +++ b/drivers/gpu/drm/xe/xe_survivability_mode.h @@ -10,8 +10,9 @@ struct xe_device; -int xe_survivability_mode_enable(struct xe_device *xe); -bool xe_survivability_mode_is_enabled(struct xe_device *xe); +int xe_survivability_mode_boot_enable(struct xe_device *xe); +int xe_survivability_mode_runtime_enable(struct xe_device *xe); +bool xe_survivability_mode_is_boot_enabled(struct xe_device *xe); bool xe_survivability_mode_is_requested(struct xe_device *xe); #endif /* _XE_SURVIVABILITY_MODE_H_ */ diff --git a/drivers/gpu/drm/xe/xe_survivability_mode_types.h b/drivers/gpu/drm/xe/xe_survivability_mode_types.h index 19d433e253df..cd65a5d167c9 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode_types.h +++ b/drivers/gpu/drm/xe/xe_survivability_mode_types.h @@ -9,6 +9,11 @@ #include <linux/limits.h> #include <linux/types.h> +enum xe_survivability_type { + XE_SURVIVABILITY_TYPE_BOOT, + XE_SURVIVABILITY_TYPE_RUNTIME, +}; + struct xe_survivability_info { char name[NAME_MAX]; u32 reg; @@ -30,6 +35,9 @@ struct xe_survivability { /** @mode: boolean to indicate survivability mode */ bool mode; + + /** @type: survivability type */ + enum xe_survivability_type type; }; #endif /* _XE_SURVIVABILITY_MODE_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 6345896585de..f97e0af6a9b0 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -3,16 +3,21 @@ * Copyright © 2024 Intel Corporation */ +#include <drm/drm_drv.h> + #include "xe_bo.h" +#include "xe_exec_queue_types.h" #include "xe_gt_stats.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_module.h" +#include "xe_pm.h" #include "xe_pt.h" #include "xe_svm.h" +#include "xe_tile.h" #include "xe_ttm_vram_mgr.h" #include "xe_vm.h" #include "xe_vm_types.h" +#include "xe_vram_types.h" static bool xe_svm_range_in_vram(struct xe_svm_range *range) { @@ -21,9 +26,9 @@ static bool xe_svm_range_in_vram(struct xe_svm_range *range) * memory. */ - struct drm_gpusvm_range_flags flags = { + struct drm_gpusvm_pages_flags flags = { /* Pairs with WRITE_ONCE in drm_gpusvm.c */ - .__flags = READ_ONCE(range->base.flags.__flags), + .__flags = READ_ONCE(range->base.pages.flags.__flags), }; return flags.has_devmem_pages; @@ -45,30 +50,15 @@ static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r) return gpusvm_to_vm(r->gpusvm); } -static unsigned long xe_svm_range_start(struct xe_svm_range *range) -{ - return drm_gpusvm_range_start(&range->base); -} - -static unsigned long xe_svm_range_end(struct xe_svm_range *range) -{ - return drm_gpusvm_range_end(&range->base); -} - -static unsigned long xe_svm_range_size(struct xe_svm_range *range) -{ - return drm_gpusvm_range_size(&range->base); -} - -#define range_debug(r__, operaton__) \ +#define range_debug(r__, operation__) \ vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \ "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \ "start=0x%014lx, end=0x%014lx, size=%lu", \ - (operaton__), range_to_vm(&(r__)->base)->usm.asid, \ + (operation__), range_to_vm(&(r__)->base)->usm.asid, \ (r__)->base.gpusvm, \ xe_svm_range_in_vram((r__)) ? 1 : 0, \ xe_svm_range_has_vram_binding((r__)) ? 1 : 0, \ - (r__)->base.notifier_seq, \ + (r__)->base.pages.notifier_seq, \ xe_svm_range_start((r__)), xe_svm_range_end((r__)), \ xe_svm_range_size((r__))) @@ -77,11 +67,6 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) range_debug(range, operation); } -static void *xe_svm_devm_owner(struct xe_device *xe) -{ - return xe; -} - static struct drm_gpusvm_range * xe_svm_range_alloc(struct drm_gpusvm *gpusvm) { @@ -103,11 +88,6 @@ static void xe_svm_range_free(struct drm_gpusvm_range *range) kfree(range); } -static struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r) -{ - return container_of(r, struct xe_svm_range, base); -} - static void xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range, const struct mmu_notifier_range *mmu_range) @@ -124,8 +104,12 @@ xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range, &vm->svm.garbage_collector.range_list); spin_unlock(&vm->svm.garbage_collector.lock); - queue_work(xe_device_get_root_tile(xe)->primary_gt->usm.pf_wq, - &vm->svm.garbage_collector.work); + queue_work(xe->usm.pf_wq, &vm->svm.garbage_collector.work); +} + +static void xe_svm_tlb_inval_count_stats_incr(struct xe_gt *gt) +{ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT, 1); } static u8 @@ -144,7 +128,7 @@ xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r, range_debug(range, "NOTIFIER"); /* Skip if already unmapped or if no binding exist */ - if (range->base.flags.unmapped || !range->tile_present) + if (range->base.pages.flags.unmapped || !range->tile_present) return 0; range_debug(range, "NOTIFIER - EXECUTE"); @@ -160,8 +144,19 @@ xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r, */ for_each_tile(tile, xe, id) if (xe_pt_zap_ptes_range(tile, vm, range)) { - tile_mask |= BIT(id); - range->tile_invalidated |= BIT(id); + /* + * WRITE_ONCE pairs with READ_ONCE in + * xe_vm_has_valid_gpu_mapping() + */ + WRITE_ONCE(range->tile_invalidated, + range->tile_invalidated | BIT(id)); + + if (!(tile_mask & BIT(id))) { + xe_svm_tlb_inval_count_stats_incr(tile->primary_gt); + if (tile->media_gt) + xe_svm_tlb_inval_count_stats_incr(tile->media_gt); + tile_mask |= BIT(id); + } } return tile_mask; @@ -181,20 +176,35 @@ xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r, mmu_range); } +static s64 xe_svm_stats_ktime_us_delta(ktime_t start) +{ + return IS_ENABLED(CONFIG_DEBUG_FS) ? + ktime_us_delta(ktime_get(), start) : 0; +} + +static void xe_svm_tlb_inval_us_stats_incr(struct xe_gt *gt, ktime_t start) +{ + s64 us_delta = xe_svm_stats_ktime_us_delta(start); + + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_US, us_delta); +} + +static ktime_t xe_svm_stats_ktime_get(void) +{ + return IS_ENABLED(CONFIG_DEBUG_FS) ? ktime_get() : 0; +} + static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, struct drm_gpusvm_notifier *notifier, const struct mmu_notifier_range *mmu_range) { struct xe_vm *vm = gpusvm_to_vm(gpusvm); struct xe_device *xe = vm->xe; - struct xe_tile *tile; struct drm_gpusvm_range *r, *first; - struct xe_gt_tlb_invalidation_fence - fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; + struct xe_tile *tile; + ktime_t start = xe_svm_stats_ktime_get(); u64 adj_start = mmu_range->start, adj_end = mmu_range->end; - u8 tile_mask = 0; - u8 id; - u32 fence_id = 0; + u8 tile_mask = 0, id; long err; xe_svm_assert_in_notifier(vm); @@ -240,47 +250,20 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, xe_device_wmb(xe); - for_each_tile(tile, xe, id) { - if (tile_mask & BIT(id)) { - int err; - - xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[fence_id], true); - - err = xe_gt_tlb_invalidation_range(tile->primary_gt, - &fence[fence_id], - adj_start, - adj_end, - vm->usm.asid); - if (WARN_ON_ONCE(err < 0)) - goto wait; - ++fence_id; - - if (!tile->media_gt) - continue; - - xe_gt_tlb_invalidation_fence_init(tile->media_gt, - &fence[fence_id], true); - - err = xe_gt_tlb_invalidation_range(tile->media_gt, - &fence[fence_id], - adj_start, - adj_end, - vm->usm.asid); - if (WARN_ON_ONCE(err < 0)) - goto wait; - ++fence_id; - } - } - -wait: - for (id = 0; id < fence_id; ++id) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); + err = xe_vm_range_tilemask_tlb_inval(vm, adj_start, adj_end, tile_mask); + WARN_ON_ONCE(err); range_notifier_event_end: r = first; drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) xe_svm_range_notifier_event_end(vm, r, mmu_range); + for_each_tile(tile, xe, id) { + if (tile_mask & BIT(id)) { + xe_svm_tlb_inval_us_stats_incr(tile->primary_gt, start); + if (tile->media_gt) + xe_svm_tlb_inval_us_stats_incr(tile->media_gt, start); + } + } } static int __xe_svm_garbage_collector(struct xe_vm *vm, @@ -302,24 +285,78 @@ static int __xe_svm_garbage_collector(struct xe_vm *vm, return 0; } +static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64 range_end) +{ + struct xe_vma *vma; + struct xe_vma_mem_attr default_attr = { + .preferred_loc = { + .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, + .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, + }, + .atomic_access = DRM_XE_ATOMIC_UNDEFINED, + }; + int err = 0; + + vma = xe_vm_find_vma_by_addr(vm, range_start); + if (!vma) + return -EINVAL; + + if (!(vma->gpuva.flags & XE_VMA_MADV_AUTORESET)) { + drm_dbg(&vm->xe->drm, "Skipping madvise reset for vma.\n"); + return 0; + } + + if (xe_vma_has_default_mem_attrs(vma)) + return 0; + + vm_dbg(&vm->xe->drm, "Existing VMA start=0x%016llx, vma_end=0x%016llx", + xe_vma_start(vma), xe_vma_end(vma)); + + if (xe_vma_start(vma) == range_start && xe_vma_end(vma) == range_end) { + default_attr.pat_index = vma->attr.default_pat_index; + default_attr.default_pat_index = vma->attr.default_pat_index; + vma->attr = default_attr; + } else { + vm_dbg(&vm->xe->drm, "Split VMA start=0x%016llx, vma_end=0x%016llx", + range_start, range_end); + err = xe_vm_alloc_cpu_addr_mirror_vma(vm, range_start, range_end - range_start); + if (err) { + drm_warn(&vm->xe->drm, "VMA SPLIT failed: %pe\n", ERR_PTR(err)); + xe_vm_kill(vm, true); + return err; + } + } + + /* + * On call from xe_svm_handle_pagefault original VMA might be changed + * signal this to lookup for VMA again. + */ + return -EAGAIN; +} + static int xe_svm_garbage_collector(struct xe_vm *vm) { struct xe_svm_range *range; - int err; + u64 range_start; + u64 range_end; + int err, ret = 0; lockdep_assert_held_write(&vm->lock); if (xe_vm_is_closed_or_banned(vm)) return -ENOENT; - spin_lock(&vm->svm.garbage_collector.lock); for (;;) { + spin_lock(&vm->svm.garbage_collector.lock); range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list, typeof(*range), garbage_collector_link); if (!range) break; + range_start = xe_svm_range_start(range); + range_end = xe_svm_range_end(range); + list_del(&range->garbage_collector_link); spin_unlock(&vm->svm.garbage_collector.lock); @@ -332,11 +369,17 @@ static int xe_svm_garbage_collector(struct xe_vm *vm) return err; } - spin_lock(&vm->svm.garbage_collector.lock); + err = xe_svm_range_set_default_attr(vm, range_start, range_end); + if (err) { + if (err == -EAGAIN) + ret = -EAGAIN; + else + return err; + } } spin_unlock(&vm->svm.garbage_collector.lock); - return 0; + return ret; } static void xe_svm_garbage_collector_work_func(struct work_struct *w) @@ -349,28 +392,22 @@ static void xe_svm_garbage_collector_work_func(struct work_struct *w) up_write(&vm->lock); } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) static struct xe_vram_region *page_to_vr(struct page *page) { return container_of(page_pgmap(page), struct xe_vram_region, pagemap); } -static struct xe_tile *vr_to_tile(struct xe_vram_region *vr) -{ - return container_of(vr, struct xe_tile, mem.vram); -} - static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr, struct page *page) { u64 dpa; - struct xe_tile *tile = vr_to_tile(vr); u64 pfn = page_to_pfn(page); u64 offset; - xe_tile_assert(tile, is_device_private_page(page)); - xe_tile_assert(tile, (pfn << PAGE_SHIFT) >= vr->hpa_base); + xe_assert(vr->xe, is_device_private_page(page)); + xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= vr->hpa_base); offset = (pfn << PAGE_SHIFT) - vr->hpa_base; dpa = vr->dpa_base + offset; @@ -383,17 +420,75 @@ enum xe_svm_copy_dir { XE_SVM_COPY_TO_SRAM, }; -static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, - unsigned long npages, const enum xe_svm_copy_dir dir) +static void xe_svm_copy_kb_stats_incr(struct xe_gt *gt, + const enum xe_svm_copy_dir dir, + int kb) +{ + if (dir == XE_SVM_COPY_TO_VRAM) + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, kb); + else + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_KB, kb); +} + +static void xe_svm_copy_us_stats_incr(struct xe_gt *gt, + const enum xe_svm_copy_dir dir, + unsigned long npages, + ktime_t start) +{ + s64 us_delta = xe_svm_stats_ktime_us_delta(start); + + if (dir == XE_SVM_COPY_TO_VRAM) { + switch (npages) { + case 1: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US, + us_delta); + break; + case 16: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US, + us_delta); + break; + case 512: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US, + us_delta); + break; + } + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_US, + us_delta); + } else { + switch (npages) { + case 1: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_CPU_COPY_US, + us_delta); + break; + case 16: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_CPU_COPY_US, + us_delta); + break; + case 512: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_CPU_COPY_US, + us_delta); + break; + } + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_US, + us_delta); + } +} + +static int xe_svm_copy(struct page **pages, + struct drm_pagemap_addr *pagemap_addr, + unsigned long npages, const enum xe_svm_copy_dir dir, + struct dma_fence *pre_migrate_fence) { struct xe_vram_region *vr = NULL; - struct xe_tile *tile; + struct xe_gt *gt = NULL; + struct xe_device *xe; struct dma_fence *fence = NULL; unsigned long i; #define XE_VRAM_ADDR_INVALID ~0x0ull u64 vram_addr = XE_VRAM_ADDR_INVALID; int err = 0, pos = 0; bool sram = dir == XE_SVM_COPY_TO_SRAM; + ktime_t start = xe_svm_stats_ktime_get(); /* * This flow is complex: it locates physically contiguous device pages, @@ -415,12 +510,13 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, last = (i + 1) == npages; /* No CPU page and no device pages queue'd to copy */ - if (!dma_addr[i] && vram_addr == XE_VRAM_ADDR_INVALID) + if (!pagemap_addr[i].addr && vram_addr == XE_VRAM_ADDR_INVALID) continue; if (!vr && spage) { vr = page_to_vr(spage); - tile = vr_to_tile(vr); + gt = xe_migrate_exec_queue(vr->migrate)->gt; + xe = vr->xe; } XE_WARN_ON(spage && page_to_vr(spage) != vr); @@ -429,7 +525,7 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, * first device page, check if physical contiguous on subsequent * device pages. */ - if (dma_addr[i] && spage) { + if (pagemap_addr[i].addr && spage) { __vram_addr = xe_vram_region_page_to_dpa(vr, spage); if (vram_addr == XE_VRAM_ADDR_INVALID) { vram_addr = __vram_addr; @@ -437,6 +533,14 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, } match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr; + /* Expected with contiguous memory */ + xe_assert(vr->xe, match); + + if (pagemap_addr[i].order) { + i += NR_PAGES(pagemap_addr[i].order) - 1; + chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE); + last = (i + 1) == npages; + } } /* @@ -451,34 +555,41 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, int incr = (match && last) ? 1 : 0; if (vram_addr != XE_VRAM_ADDR_INVALID) { + xe_svm_copy_kb_stats_incr(gt, dir, + (i - pos + incr) * + (PAGE_SIZE / SZ_1K)); if (sram) { - vm_dbg(&tile->xe->drm, + vm_dbg(&xe->drm, "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", - vram_addr, (u64)dma_addr[pos], i - pos + incr); - __fence = xe_migrate_from_vram(tile->migrate, + vram_addr, + (u64)pagemap_addr[pos].addr, i - pos + incr); + __fence = xe_migrate_from_vram(vr->migrate, i - pos + incr, vram_addr, - dma_addr + pos); + &pagemap_addr[pos], + pre_migrate_fence); } else { - vm_dbg(&tile->xe->drm, + vm_dbg(&xe->drm, "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", - (u64)dma_addr[pos], vram_addr, i - pos + incr); - __fence = xe_migrate_to_vram(tile->migrate, + (u64)pagemap_addr[pos].addr, vram_addr, + i - pos + incr); + __fence = xe_migrate_to_vram(vr->migrate, i - pos + incr, - dma_addr + pos, - vram_addr); + &pagemap_addr[pos], + vram_addr, + pre_migrate_fence); } if (IS_ERR(__fence)) { err = PTR_ERR(__fence); goto err_out; } - + pre_migrate_fence = NULL; dma_fence_put(fence); fence = __fence; } /* Setup physical address of next device page */ - if (dma_addr[i] && spage) { + if (pagemap_addr[i].addr && spage) { vram_addr = __vram_addr; pos = i; } else { @@ -487,26 +598,30 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, /* Extra mismatched device page, copy it */ if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) { + xe_svm_copy_kb_stats_incr(gt, dir, + (PAGE_SIZE / SZ_1K)); if (sram) { - vm_dbg(&tile->xe->drm, + vm_dbg(&xe->drm, "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", - vram_addr, (u64)dma_addr[pos], 1); - __fence = xe_migrate_from_vram(tile->migrate, 1, + vram_addr, (u64)pagemap_addr[pos].addr, 1); + __fence = xe_migrate_from_vram(vr->migrate, 1, vram_addr, - dma_addr + pos); + &pagemap_addr[pos], + pre_migrate_fence); } else { - vm_dbg(&tile->xe->drm, + vm_dbg(&xe->drm, "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", - (u64)dma_addr[pos], vram_addr, 1); - __fence = xe_migrate_to_vram(tile->migrate, 1, - dma_addr + pos, - vram_addr); + (u64)pagemap_addr[pos].addr, vram_addr, 1); + __fence = xe_migrate_to_vram(vr->migrate, 1, + &pagemap_addr[pos], + vram_addr, + pre_migrate_fence); } if (IS_ERR(__fence)) { err = PTR_ERR(__fence); goto err_out; } - + pre_migrate_fence = NULL; dma_fence_put(fence); fence = __fence; } @@ -519,34 +634,53 @@ err_out: dma_fence_wait(fence, false); dma_fence_put(fence); } + if (pre_migrate_fence) + dma_fence_wait(pre_migrate_fence, false); + + /* + * XXX: We can't derive the GT here (or anywhere in this functions, but + * compute always uses the primary GT so accumulate stats on the likely + * GT of the fault. + */ + if (gt) + xe_svm_copy_us_stats_incr(gt, dir, npages, start); return err; #undef XE_MIGRATE_CHUNK_SIZE #undef XE_VRAM_ADDR_INVALID } -static int xe_svm_copy_to_devmem(struct page **pages, dma_addr_t *dma_addr, - unsigned long npages) +static int xe_svm_copy_to_devmem(struct page **pages, + struct drm_pagemap_addr *pagemap_addr, + unsigned long npages, + struct dma_fence *pre_migrate_fence) { - return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_VRAM); + return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_VRAM, + pre_migrate_fence); } -static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr, - unsigned long npages) +static int xe_svm_copy_to_ram(struct page **pages, + struct drm_pagemap_addr *pagemap_addr, + unsigned long npages, + struct dma_fence *pre_migrate_fence) { - return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM); + return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_SRAM, + pre_migrate_fence); } -static struct xe_bo *to_xe_bo(struct drm_gpusvm_devmem *devmem_allocation) +static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation) { return container_of(devmem_allocation, struct xe_bo, devmem_allocation); } -static void xe_svm_devmem_release(struct drm_gpusvm_devmem *devmem_allocation) +static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation) { struct xe_bo *bo = to_xe_bo(devmem_allocation); + struct xe_device *xe = xe_bo_device(bo); + dma_fence_put(devmem_allocation->pre_migrate_fence); xe_bo_put_async(bo); + xe_pm_runtime_put(xe); } static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset) @@ -554,12 +688,12 @@ static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset) return PHYS_PFN(offset + vr->hpa_base); } -static struct drm_buddy *tile_to_buddy(struct xe_tile *tile) +static struct drm_buddy *vram_to_buddy(struct xe_vram_region *vram) { - return &tile->mem.vram.ttm.mm; + return &vram->ttm.mm; } -static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocation, +static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocation, unsigned long npages, unsigned long *pfn) { struct xe_bo *bo = to_xe_bo(devmem_allocation); @@ -570,8 +704,7 @@ static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocatio list_for_each_entry(block, blocks, link) { struct xe_vram_region *vr = block->private; - struct xe_tile *tile = vr_to_tile(vr); - struct drm_buddy *buddy = tile_to_buddy(tile); + struct drm_buddy *buddy = vram_to_buddy(vr); u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block)); int i; @@ -582,7 +715,7 @@ static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocatio return 0; } -static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = { +static const struct drm_pagemap_devmem_ops dpagemap_devmem_ops = { .devmem_release = xe_svm_devmem_release, .populate_devmem_pfn = xe_svm_populate_devmem_pfn, .copy_to_devmem = xe_svm_copy_to_devmem, @@ -615,22 +748,25 @@ int xe_svm_init(struct xe_vm *vm) { int err; - spin_lock_init(&vm->svm.garbage_collector.lock); - INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list); - INIT_WORK(&vm->svm.garbage_collector.work, - xe_svm_garbage_collector_work_func); - - err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, - current->mm, xe_svm_devm_owner(vm->xe), 0, - vm->size, xe_modparam.svm_notifier_size * SZ_1M, - &gpusvm_ops, fault_chunk_sizes, - ARRAY_SIZE(fault_chunk_sizes)); - if (err) - return err; - - drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); + if (vm->flags & XE_VM_FLAG_FAULT_MODE) { + spin_lock_init(&vm->svm.garbage_collector.lock); + INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list); + INIT_WORK(&vm->svm.garbage_collector.work, + xe_svm_garbage_collector_work_func); + + err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, + current->mm, 0, vm->size, + xe_modparam.svm_notifier_size * SZ_1M, + &gpusvm_ops, fault_chunk_sizes, + ARRAY_SIZE(fault_chunk_sizes)); + drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); + } else { + err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", + &vm->xe->drm, NULL, 0, 0, 0, NULL, + NULL, 0); + } - return 0; + return err; } /** @@ -662,84 +798,144 @@ static bool xe_svm_range_is_valid(struct xe_svm_range *range, struct xe_tile *tile, bool devmem_only) { - /* - * Advisory only check whether the range currently has a valid mapping, - * READ_ONCE pairs with WRITE_ONCE in xe_pt.c - */ - return ((READ_ONCE(range->tile_present) & - ~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) && - (!devmem_only || xe_svm_range_in_vram(range)); + return (xe_vm_has_valid_gpu_mapping(tile, range->tile_present, + range->tile_invalidated) && + (!devmem_only || xe_svm_range_in_vram(range))); } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) -static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) +/** xe_svm_range_migrate_to_smem() - Move range pages from VRAM to SMEM + * @vm: xe_vm pointer + * @range: Pointer to the SVM range structure + * + * The xe_svm_range_migrate_to_smem() checks range has pages in VRAM + * and migrates them to SMEM + */ +void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range) { - return &tile->mem.vram; + if (xe_svm_range_in_vram(range)) + drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); } -static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, - struct xe_svm_range *range, - const struct drm_gpusvm_ctx *ctx) +/** + * xe_svm_range_validate() - Check if the SVM range is valid + * @vm: xe_vm pointer + * @range: Pointer to the SVM range structure + * @tile_mask: Mask representing the tiles to be checked + * @devmem_preferred : if true range needs to be in devmem + * + * The xe_svm_range_validate() function checks if a range is + * valid and located in the desired memory region. + * + * Return: true if the range is valid, false otherwise + */ +bool xe_svm_range_validate(struct xe_vm *vm, + struct xe_svm_range *range, + u8 tile_mask, bool devmem_preferred) { - struct mm_struct *mm = vm->svm.gpusvm.mm; - struct xe_vram_region *vr = tile_to_vr(tile); - struct drm_buddy_block *block; - struct list_head *blocks; - struct xe_bo *bo; - ktime_t end = 0; - int err; + bool ret; - range_debug(range, "ALLOCATE VRAM"); + xe_svm_notifier_lock(vm); - if (!mmget_not_zero(mm)) - return -EFAULT; - mmap_read_lock(mm); + ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask && + (devmem_preferred == range->base.pages.flags.has_devmem_pages); -retry: - bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, - xe_svm_range_size(range), - ttm_bo_type_device, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_CPU_ADDR_MIRROR); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - if (xe_vm_validate_should_retry(NULL, err, &end)) - goto retry; - goto unlock; - } + xe_svm_notifier_unlock(vm); - drm_gpusvm_devmem_init(&bo->devmem_allocation, - vm->xe->drm.dev, mm, - &gpusvm_devmem_ops, - &tile->mem.vram.dpagemap, - xe_svm_range_size(range)); + return ret; +} - blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; - list_for_each_entry(block, blocks, link) - block->private = vr; +/** + * xe_svm_find_vma_start - Find start of CPU VMA + * @vm: xe_vm pointer + * @start: start address + * @end: end address + * @vma: Pointer to struct xe_vma + * + * + * This function searches for a cpu vma, within the specified + * range [start, end] in the given VM. It adjusts the range based on the + * xe_vma start and end addresses. If no cpu VMA is found, it returns ULONG_MAX. + * + * Return: The starting address of the VMA within the range, + * or ULONG_MAX if no VMA is found + */ +u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 start, u64 end, struct xe_vma *vma) +{ + return drm_gpusvm_find_vma_start(&vm->svm.gpusvm, + max(start, xe_vma_start(vma)), + min(end, xe_vma_end(vma))); +} - xe_bo_get(bo); - err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base, - &bo->devmem_allocation, ctx); - if (err) - xe_svm_devmem_release(&bo->devmem_allocation); +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, + unsigned long start, unsigned long end, + struct mm_struct *mm, + unsigned long timeslice_ms) +{ + struct xe_vram_region *vr = container_of(dpagemap, typeof(*vr), dpagemap); + struct dma_fence *pre_migrate_fence = NULL; + struct xe_device *xe = vr->xe; + struct device *dev = xe->drm.dev; + struct drm_buddy_block *block; + struct xe_validation_ctx vctx; + struct list_head *blocks; + struct drm_exec exec; + struct xe_bo *bo; + int err = 0, idx; - xe_bo_unlock(bo); - xe_bo_put(bo); + if (!drm_dev_enter(&xe->drm, &idx)) + return -ENODEV; -unlock: - mmap_read_unlock(mm); - mmput(mm); + xe_pm_runtime_get(xe); + + xe_validation_guard(&vctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { + bo = xe_bo_create_locked(xe, NULL, NULL, end - start, + ttm_bo_type_device, + (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) | + XE_BO_FLAG_CPU_ADDR_MIRROR, &exec); + drm_exec_retry_on_contention(&exec); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + xe_validation_retry_on_oom(&vctx, &err); + break; + } + + /* Ensure that any clearing or async eviction will complete before migration. */ + if (!dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL)) { + err = dma_resv_get_singleton(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + &pre_migrate_fence); + if (err) + dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + else if (pre_migrate_fence) + dma_fence_enable_sw_signaling(pre_migrate_fence); + } + + drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, + &dpagemap_devmem_ops, dpagemap, end - start, + pre_migrate_fence); + + blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; + list_for_each_entry(block, blocks, link) + block->private = vr; + + xe_bo_get(bo); + + /* Ensure the device has a pm ref while there are device pages active. */ + xe_pm_runtime_get_noresume(xe); + err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm, + start, end, timeslice_ms, + xe_svm_devm_owner(xe)); + if (err) + xe_svm_devmem_release(&bo->devmem_allocation); + xe_bo_unlock(bo); + xe_bo_put(bo); + } + xe_pm_runtime_put(xe); + drm_dev_exit(idx); return err; } -#else -static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, - struct xe_svm_range *range, - const struct drm_gpusvm_ctx *ctx) -{ - return -EOPNOTSUPP; -} #endif static bool supports_4K_migration(struct xe_device *xe) @@ -750,21 +946,31 @@ static bool supports_4K_migration(struct xe_device *xe) return true; } -static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, - struct xe_vma *vma) +/** + * xe_svm_range_needs_migrate_to_vram() - SVM range needs migrate to VRAM or not + * @range: SVM range for which migration needs to be decided + * @vma: vma which has range + * @preferred_region_is_vram: preferred region for range is vram + * + * Return: True for range needing migration and migration is supported else false + */ +bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, + bool preferred_region_is_vram) { struct xe_vm *vm = range_to_vm(&range->base); u64 range_size = xe_svm_range_size(range); - if (!range->base.flags.migrate_devmem) + if (!range->base.pages.flags.migrate_devmem || !preferred_region_is_vram) return false; + xe_assert(vm->xe, IS_DGFX(vm->xe)); + if (xe_svm_range_in_vram(range)) { drm_dbg(&vm->xe->drm, "Range is already in VRAM\n"); return false; } - if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) { + if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) { drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); return false; } @@ -772,41 +978,78 @@ static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, return true; } -/** - * xe_svm_handle_pagefault() - SVM handle page fault - * @vm: The VM. - * @vma: The CPU address mirror VMA. - * @gt: The gt upon the fault occurred. - * @fault_addr: The GPU fault address. - * @atomic: The fault atomic access bit. - * - * Create GPU bindings for a SVM page fault. Optionally migrate to device - * memory. - * - * Return: 0 on success, negative error code on error. - */ -int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, - struct xe_gt *gt, u64 fault_addr, - bool atomic) +#define DECL_SVM_RANGE_COUNT_STATS(elem, stat) \ +static void xe_svm_range_##elem##_count_stats_incr(struct xe_gt *gt, \ + struct xe_svm_range *range) \ +{ \ + switch (xe_svm_range_size(range)) { \ + case SZ_4K: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_COUNT, 1); \ + break; \ + case SZ_64K: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_COUNT, 1); \ + break; \ + case SZ_2M: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_COUNT, 1); \ + break; \ + } \ +} \ + +DECL_SVM_RANGE_COUNT_STATS(fault, PAGEFAULT) +DECL_SVM_RANGE_COUNT_STATS(valid_fault, VALID_PAGEFAULT) +DECL_SVM_RANGE_COUNT_STATS(migrate, MIGRATE) + +#define DECL_SVM_RANGE_US_STATS(elem, stat) \ +static void xe_svm_range_##elem##_us_stats_incr(struct xe_gt *gt, \ + struct xe_svm_range *range, \ + ktime_t start) \ +{ \ + s64 us_delta = xe_svm_stats_ktime_us_delta(start); \ +\ + switch (xe_svm_range_size(range)) { \ + case SZ_4K: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_US, \ + us_delta); \ + break; \ + case SZ_64K: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_US, \ + us_delta); \ + break; \ + case SZ_2M: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_US, \ + us_delta); \ + break; \ + } \ +} \ + +DECL_SVM_RANGE_US_STATS(migrate, MIGRATE) +DECL_SVM_RANGE_US_STATS(get_pages, GET_PAGES) +DECL_SVM_RANGE_US_STATS(bind, BIND) +DECL_SVM_RANGE_US_STATS(fault, PAGEFAULT) + +static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, + struct xe_gt *gt, u64 fault_addr, + bool need_vram) { + int devmem_possible = IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); struct drm_gpusvm_ctx ctx = { .read_only = xe_vma_read_only(vma), - .devmem_possible = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), - .check_pages_threshold = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, - .devmem_only = atomic && IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), - .timeslice_ms = atomic && IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0, + .devmem_possible = devmem_possible, + .check_pages_threshold = devmem_possible ? SZ_64K : 0, + .devmem_only = need_vram && devmem_possible, + .timeslice_ms = need_vram && devmem_possible ? + vm->xe->atomic_svm_timeslice_ms : 0, + .device_private_page_owner = xe_svm_devm_owner(vm->xe), }; - struct xe_svm_range *range; - struct drm_gpusvm_range *r; + struct xe_validation_ctx vctx; struct drm_exec exec; + struct xe_svm_range *range; struct dma_fence *fence; - int migrate_try_count = ctx.devmem_only ? 3 : 1; + struct drm_pagemap *dpagemap; struct xe_tile *tile = gt_to_tile(gt); - ktime_t end = 0; + int migrate_try_count = ctx.devmem_only ? 3 : 1; + ktime_t start = xe_svm_stats_ktime_get(), bind_start, get_pages_start; int err; lockdep_assert_held_write(&vm->lock); @@ -820,31 +1063,58 @@ retry: if (err) return err; - r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, fault_addr, - xe_vma_start(vma), xe_vma_end(vma), - &ctx); - if (IS_ERR(r)) - return PTR_ERR(r); + dpagemap = xe_vma_resolve_pagemap(vma, tile); + if (!dpagemap && !ctx.devmem_only) + ctx.device_private_page_owner = NULL; + range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx); - if (ctx.devmem_only && !r->flags.migrate_devmem) - return -EACCES; + if (IS_ERR(range)) + return PTR_ERR(range); - range = to_xe_range(r); - if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) - return 0; + xe_svm_range_fault_count_stats_incr(gt, range); + + if (ctx.devmem_only && !range->base.pages.flags.migrate_devmem) { + err = -EACCES; + goto out; + } + + if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) { + xe_svm_range_valid_fault_count_stats_incr(gt, range); + range_debug(range, "PAGE FAULT - VALID"); + goto out; + } range_debug(range, "PAGE FAULT"); if (--migrate_try_count >= 0 && - xe_svm_range_needs_migrate_to_vram(range, vma)) { - err = xe_svm_alloc_vram(vm, tile, range, &ctx); + xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) { + ktime_t migrate_start = xe_svm_stats_ktime_get(); + + /* TODO : For multi-device dpagemap will be used to find the + * remote tile and remote device. Will need to modify + * xe_svm_alloc_vram to use dpagemap for future multi-device + * support. + */ + xe_svm_range_migrate_count_stats_incr(gt, range); + err = xe_svm_alloc_vram(tile, range, &ctx); + xe_svm_range_migrate_us_stats_incr(gt, range, migrate_start); ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (err) { if (migrate_try_count || !ctx.devmem_only) { drm_dbg(&vm->xe->drm, "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n", vm->usm.asid, ERR_PTR(err)); - goto retry; + + /* + * In the devmem-only case, mixed mappings may + * be found. The get_pages function will fix + * these up to a single location, allowing the + * page fault handler to make forward progress. + */ + if (ctx.devmem_only) + goto get_pages; + else + goto retry; } else { drm_err(&vm->xe->drm, "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n", @@ -854,17 +1124,15 @@ retry: } } +get_pages: + get_pages_start = xe_svm_stats_ktime_get(); + range_debug(range, "GET PAGES"); - err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx); + err = xe_svm_range_get_pages(vm, range, &ctx); /* Corner where CPU mappings have changed */ if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (migrate_try_count > 0 || !ctx.devmem_only) { - if (err == -EOPNOTSUPP) { - range_debug(range, "PAGE FAULT - EVICT PAGES"); - drm_gpusvm_range_evict(&vm->svm.gpusvm, - &range->base); - } drm_dbg(&vm->xe->drm, "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); @@ -878,46 +1146,89 @@ retry: } if (err) { range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT"); - goto err_out; + goto out; } + xe_svm_range_get_pages_us_stats_incr(gt, range, get_pages_start); range_debug(range, "PAGE FAULT - BIND"); -retry_bind: - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - err = drm_exec_lock_obj(&exec, vm->gpuvm.r_obj); + bind_start = xe_svm_stats_ktime_get(); + xe_validation_guard(&vctx, &vm->xe->val, &exec, (struct xe_val_flags) {}, err) { + err = xe_vm_drm_exec_lock(vm, &exec); drm_exec_retry_on_contention(&exec); - if (err) { - drm_exec_fini(&exec); - goto err_out; - } + xe_vm_set_validation_exec(vm, &exec); fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); + xe_vm_set_validation_exec(vm, NULL); if (IS_ERR(fence)) { - drm_exec_fini(&exec); + drm_exec_retry_on_contention(&exec); err = PTR_ERR(fence); - if (err == -EAGAIN) { - ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ - range_debug(range, "PAGE FAULT - RETRY BIND"); - goto retry; - } - if (xe_vm_validate_should_retry(&exec, err, &end)) - goto retry_bind; - goto err_out; + xe_validation_retry_on_oom(&vctx, &err); + xe_svm_range_bind_us_stats_incr(gt, range, bind_start); + break; } } - drm_exec_fini(&exec); + if (err) + goto err_out; dma_fence_wait(fence, false); dma_fence_put(fence); + xe_svm_range_bind_us_stats_incr(gt, range, bind_start); + +out: + xe_svm_range_fault_us_stats_incr(gt, range, start); + return 0; err_out: + if (err == -EAGAIN) { + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ + range_debug(range, "PAGE FAULT - RETRY BIND"); + goto retry; + } return err; } /** + * xe_svm_handle_pagefault() - SVM handle page fault + * @vm: The VM. + * @vma: The CPU address mirror VMA. + * @gt: The gt upon the fault occurred. + * @fault_addr: The GPU fault address. + * @atomic: The fault atomic access bit. + * + * Create GPU bindings for a SVM page fault. Optionally migrate to device + * memory. + * + * Return: 0 on success, negative error code on error. + */ +int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, + struct xe_gt *gt, u64 fault_addr, + bool atomic) +{ + int need_vram, ret; +retry: + need_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic); + if (need_vram < 0) + return need_vram; + + ret = __xe_svm_handle_pagefault(vm, vma, gt, fault_addr, + need_vram ? true : false); + if (ret == -EAGAIN) { + /* + * Retry once on -EAGAIN to re-lookup the VMA, as the original VMA + * may have been split by xe_svm_range_set_default_attr. + */ + vma = xe_vm_find_vma_by_addr(vm, fault_addr); + if (!vma) + return -EINVAL; + + goto retry; + } + return ret; +} + +/** * xe_svm_has_mapping() - SVM has mappings * @vm: The VM. * @start: Start address. @@ -933,6 +1244,41 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end) } /** + * xe_svm_unmap_address_range - UNMAP SVM mappings and ranges + * @vm: The VM + * @start: start addr + * @end: end addr + * + * This function UNMAPS svm ranges if start or end address are inside them. + */ +void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end) +{ + struct drm_gpusvm_notifier *notifier, *next; + + lockdep_assert_held_write(&vm->lock); + + drm_gpusvm_for_each_notifier_safe(notifier, next, &vm->svm.gpusvm, start, end) { + struct drm_gpusvm_range *range, *__next; + + drm_gpusvm_for_each_range_safe(range, __next, notifier, start, end) { + if (start > drm_gpusvm_range_start(range) || + end < drm_gpusvm_range_end(range)) { + if (IS_DGFX(vm->xe) && xe_svm_range_in_vram(to_xe_range(range))) + drm_gpusvm_range_evict(&vm->svm.gpusvm, range); + drm_gpusvm_range_get(range); + __xe_svm_garbage_collector(vm, to_xe_range(range)); + if (!list_empty(&to_xe_range(range)->garbage_collector_link)) { + spin_lock(&vm->svm.garbage_collector.lock); + list_del(&to_xe_range(range)->garbage_collector_link); + spin_unlock(&vm->svm.garbage_collector.lock); + } + drm_gpusvm_range_put(range); + } + } + } +} + +/** * xe_svm_bo_evict() - SVM evict BO to system memory * @bo: BO to evict * @@ -943,12 +1289,172 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end) */ int xe_svm_bo_evict(struct xe_bo *bo) { - return drm_gpusvm_evict_to_ram(&bo->devmem_allocation); + return drm_pagemap_evict_to_ram(&bo->devmem_allocation); } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) +/** + * xe_svm_range_find_or_insert- Find or insert GPU SVM range + * @vm: xe_vm pointer + * @addr: address for which range needs to be found/inserted + * @vma: Pointer to struct xe_vma which mirrors CPU + * @ctx: GPU SVM context + * + * This function finds or inserts a newly allocated a SVM range based on the + * address. + * + * Return: Pointer to the SVM range on success, ERR_PTR() on failure. + */ +struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, + struct xe_vma *vma, struct drm_gpusvm_ctx *ctx) +{ + struct drm_gpusvm_range *r; + + r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, max(addr, xe_vma_start(vma)), + xe_vma_start(vma), xe_vma_end(vma), ctx); + if (IS_ERR(r)) + return ERR_CAST(r); + + return to_xe_range(r); +} -static struct drm_pagemap_device_addr +/** + * xe_svm_range_get_pages() - Get pages for a SVM range + * @vm: Pointer to the struct xe_vm + * @range: Pointer to the xe SVM range structure + * @ctx: GPU SVM context + * + * This function gets pages for a SVM range and ensures they are mapped for + * DMA access. In case of failure with -EOPNOTSUPP, it evicts the range. + * + * Return: 0 on success, negative error code on failure. + */ +int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, + struct drm_gpusvm_ctx *ctx) +{ + int err = 0; + + err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, &range->base, ctx); + if (err == -EOPNOTSUPP) { + range_debug(range, "PAGE FAULT - EVICT PAGES"); + drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); + } + + return err; +} + +/** + * xe_svm_ranges_zap_ptes_in_range - clear ptes of svm ranges in input range + * @vm: Pointer to the xe_vm structure + * @start: Start of the input range + * @end: End of the input range + * + * This function removes the page table entries (PTEs) associated + * with the svm ranges within the given input start and end + * + * Return: tile_mask for which gt's need to be tlb invalidated. + */ +u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end) +{ + struct drm_gpusvm_notifier *notifier; + struct xe_svm_range *range; + u64 adj_start, adj_end; + struct xe_tile *tile; + u8 tile_mask = 0; + u8 id; + + lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && + lockdep_is_held_type(&vm->lock, 0)); + + drm_gpusvm_for_each_notifier(notifier, &vm->svm.gpusvm, start, end) { + struct drm_gpusvm_range *r = NULL; + + adj_start = max(start, drm_gpusvm_notifier_start(notifier)); + adj_end = min(end, drm_gpusvm_notifier_end(notifier)); + drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) { + range = to_xe_range(r); + for_each_tile(tile, vm->xe, id) { + if (xe_pt_zap_ptes_range(tile, vm, range)) { + tile_mask |= BIT(id); + /* + * WRITE_ONCE pairs with READ_ONCE in + * xe_vm_has_valid_gpu_mapping(). + * Must not fail after setting + * tile_invalidated and before + * TLB invalidation. + */ + WRITE_ONCE(range->tile_invalidated, + range->tile_invalidated | BIT(id)); + } + } + } + } + + return tile_mask; +} + +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) + +static struct drm_pagemap *tile_local_pagemap(struct xe_tile *tile) +{ + return &tile->mem.vram->dpagemap; +} + +/** + * xe_vma_resolve_pagemap - Resolve the appropriate DRM pagemap for a VMA + * @vma: Pointer to the xe_vma structure containing memory attributes + * @tile: Pointer to the xe_tile structure used as fallback for VRAM mapping + * + * This function determines the correct DRM pagemap to use for a given VMA. + * It first checks if a valid devmem_fd is provided in the VMA's preferred + * location. If the devmem_fd is negative, it returns NULL, indicating no + * pagemap is available and smem to be used as preferred location. + * If the devmem_fd is equal to the default faulting + * GT identifier, it returns the VRAM pagemap associated with the tile. + * + * Future support for multi-device configurations may use drm_pagemap_from_fd() + * to resolve pagemaps from arbitrary file descriptors. + * + * Return: A pointer to the resolved drm_pagemap, or NULL if none is applicable. + */ +struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile) +{ + s32 fd = (s32)vma->attr.preferred_loc.devmem_fd; + + if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM) + return NULL; + + if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE) + return IS_DGFX(tile_to_xe(tile)) ? tile_local_pagemap(tile) : NULL; + + /* TODO: Support multi-device with drm_pagemap_from_fd(fd) */ + return NULL; +} + +/** + * xe_svm_alloc_vram()- Allocate device memory pages for range, + * migrating existing data. + * @tile: tile to allocate vram from + * @range: SVM range + * @ctx: DRM GPU SVM context + * + * Return: 0 on success, error code on failure. + */ +int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx) +{ + struct drm_pagemap *dpagemap; + + xe_assert(tile_to_xe(tile), range->base.pages.flags.migrate_devmem); + range_debug(range, "ALLOCATE VRAM"); + + dpagemap = tile_local_pagemap(tile); + return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range), + xe_svm_range_end(range), + range->base.gpusvm->mm, + ctx->timeslice_ms); +} + +static struct drm_pagemap_addr xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, struct device *dev, struct page *page, @@ -967,11 +1473,12 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, prot = 0; } - return drm_pagemap_device_addr_encode(addr, prot, order, dir); + return drm_pagemap_addr_encode(addr, prot, order, dir); } static const struct drm_pagemap_ops xe_drm_pagemap_ops = { .device_map = xe_drm_pagemap_device_map, + .populate_mm = xe_drm_pagemap_populate_mm, }; /** @@ -1003,7 +1510,7 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) vr->pagemap.range.start = res->start; vr->pagemap.range.end = res->end; vr->pagemap.nr_range = 1; - vr->pagemap.ops = drm_gpusvm_pagemap_ops_get(); + vr->pagemap.ops = drm_pagemap_pagemap_ops_get(); vr->pagemap.owner = xe_svm_devm_owner(xe); addr = devm_memremap_pages(dev, &vr->pagemap); @@ -1024,10 +1531,22 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) return 0; } #else +int xe_svm_alloc_vram(struct xe_tile *tile, + struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx) +{ + return -EOPNOTSUPP; +} + int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) { return 0; } + +struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile) +{ + return NULL; +} #endif /** diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index 30fc78b85b30..fa757dd07954 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -6,6 +6,20 @@ #ifndef _XE_SVM_H_ #define _XE_SVM_H_ +struct xe_device; + +/** + * xe_svm_devm_owner() - Return the owner of device private memory + * @xe: The xe device. + * + * Return: The owner of this device's device private memory to use in + * hmm_range_fault()- + */ +static inline void *xe_svm_devm_owner(struct xe_device *xe) +{ + return xe; +} + #if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) #include <drm/drm_pagemap.h> @@ -70,6 +84,32 @@ int xe_svm_bo_evict(struct xe_bo *bo); void xe_svm_range_debug(struct xe_svm_range *range, const char *operation); +int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx); + +struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, + struct xe_vma *vma, struct drm_gpusvm_ctx *ctx); + +int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, + struct drm_gpusvm_ctx *ctx); + +bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, + bool preferred_region_is_vram); + +void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range); + +bool xe_svm_range_validate(struct xe_vm *vm, + struct xe_svm_range *range, + u8 tile_mask, bool devmem_preferred); + +u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vma); + +void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end); + +u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end); + +struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile); + /** * xe_svm_range_has_dma_mapping() - SVM range has DMA mapping * @range: SVM range @@ -79,24 +119,65 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation); static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range) { lockdep_assert_held(&range->base.gpusvm->notifier_lock); - return range->base.flags.has_dma_mapping; + return range->base.pages.flags.has_dma_mapping; } -#define xe_svm_assert_in_notifier(vm__) \ - lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock) +/** + * to_xe_range - Convert a drm_gpusvm_range pointer to a xe_svm_range + * @r: Pointer to the drm_gpusvm_range structure + * + * This function takes a pointer to a drm_gpusvm_range structure and + * converts it to a pointer to the containing xe_svm_range structure. + * + * Return: Pointer to the xe_svm_range structure + */ +static inline struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r) +{ + return container_of(r, struct xe_svm_range, base); +} -#define xe_svm_notifier_lock(vm__) \ - drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm) +/** + * xe_svm_range_start() - SVM range start address + * @range: SVM range + * + * Return: start address of range. + */ +static inline unsigned long xe_svm_range_start(struct xe_svm_range *range) +{ + return drm_gpusvm_range_start(&range->base); +} -#define xe_svm_notifier_unlock(vm__) \ - drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm) +/** + * xe_svm_range_end() - SVM range end address + * @range: SVM range + * + * Return: end address of range. + */ +static inline unsigned long xe_svm_range_end(struct xe_svm_range *range) +{ + return drm_gpusvm_range_end(&range->base); +} + +/** + * xe_svm_range_size() - SVM range size + * @range: SVM range + * + * Return: Size of range. + */ +static inline unsigned long xe_svm_range_size(struct xe_svm_range *range) +{ + return drm_gpusvm_range_size(&range->base); +} void xe_svm_flush(struct xe_vm *vm); #else #include <linux/interval_tree.h> +#include "xe_vm.h" -struct drm_pagemap_device_addr; +struct drm_pagemap_addr; +struct drm_gpusvm_ctx; +struct drm_gpusvm_range; struct xe_bo; struct xe_gt; struct xe_vm; @@ -109,7 +190,9 @@ struct xe_vram_region; struct xe_svm_range { struct { struct interval_tree_node itree; - const struct drm_pagemap_device_addr *dma_addr; + struct { + const struct drm_pagemap_addr *dma_addr; + } pages; } base; u32 tile_present; u32 tile_invalidated; @@ -129,12 +212,21 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) static inline int xe_svm_init(struct xe_vm *vm) { +#if IS_ENABLED(CONFIG_DRM_GPUSVM) + return drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", &vm->xe->drm, + NULL, 0, 0, 0, NULL, NULL, 0); +#else return 0; +#endif } static inline void xe_svm_fini(struct xe_vm *vm) { +#if IS_ENABLED(CONFIG_DRM_GPUSVM) + xe_assert(vm->xe, xe_vm_is_closed(vm)); + drm_gpusvm_fini(&vm->svm.gpusvm); +#endif } static inline @@ -167,19 +259,131 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) { } -#define xe_svm_assert_in_notifier(...) do {} while (0) +static inline int +xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx) +{ + return -EOPNOTSUPP; +} + +static inline +struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, + struct xe_vma *vma, struct drm_gpusvm_ctx *ctx) +{ + return ERR_PTR(-EINVAL); +} + +static inline +int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, + struct drm_gpusvm_ctx *ctx) +{ + return -EINVAL; +} + +static inline struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r) +{ + return NULL; +} + +static inline unsigned long xe_svm_range_start(struct xe_svm_range *range) +{ + return 0; +} + +static inline unsigned long xe_svm_range_end(struct xe_svm_range *range) +{ + return 0; +} + +static inline unsigned long xe_svm_range_size(struct xe_svm_range *range) +{ + return 0; +} + +static inline +bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, + u32 region) +{ + return false; +} + +static inline +void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range) +{ +} + +static inline +bool xe_svm_range_validate(struct xe_vm *vm, + struct xe_svm_range *range, + u8 tile_mask, bool devmem_preferred) +{ + return false; +} + +static inline +u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vma) +{ + return ULONG_MAX; +} + +static inline +void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end) +{ +} + +static inline +u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end) +{ + return 0; +} + +static inline +struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile) +{ + return NULL; +} + +static inline void xe_svm_flush(struct xe_vm *vm) +{ +} #define xe_svm_range_has_dma_mapping(...) false +#endif /* CONFIG_DRM_XE_GPUSVM */ + +#if IS_ENABLED(CONFIG_DRM_GPUSVM) /* Need to support userptr without XE_GPUSVM */ +#define xe_svm_assert_in_notifier(vm__) \ + lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock) + +#define xe_svm_assert_held_read(vm__) \ + lockdep_assert_held_read(&(vm__)->svm.gpusvm.notifier_lock) + +#define xe_svm_notifier_lock(vm__) \ + drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm) + +#define xe_svm_notifier_lock_interruptible(vm__) \ + down_read_interruptible(&(vm__)->svm.gpusvm.notifier_lock) + +#define xe_svm_notifier_unlock(vm__) \ + drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm) + +#else +#define xe_svm_assert_in_notifier(...) do {} while (0) + +static inline void xe_svm_assert_held_read(struct xe_vm *vm) +{ +} static inline void xe_svm_notifier_lock(struct xe_vm *vm) { } -static inline void xe_svm_notifier_unlock(struct xe_vm *vm) +static inline int xe_svm_notifier_lock_interruptible(struct xe_vm *vm) { + return 0; } -static inline void xe_svm_flush(struct xe_vm *vm) +static inline void xe_svm_notifier_unlock(struct xe_vm *vm) { } -#endif +#endif /* CONFIG_DRM_GPUSVM */ + #endif diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index f87276df18f2..ff74528ca0c6 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -14,7 +14,7 @@ #include <drm/drm_syncobj.h> #include <uapi/drm/xe_drm.h> -#include "xe_device_types.h" +#include "xe_device.h" #include "xe_exec_queue.h" #include "xe_macros.h" #include "xe_sched_job_types.h" @@ -77,6 +77,7 @@ static void user_fence_worker(struct work_struct *w) { struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker); + WRITE_ONCE(ufence->signalled, 1); if (mmget_not_zero(ufence->mm)) { kthread_use_mm(ufence->mm); if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value))) @@ -91,7 +92,6 @@ static void user_fence_worker(struct work_struct *w) * Wake up waiters only after updating the ufence state, allowing the UMD * to safely reuse the same ufence without encountering -EBUSY errors. */ - WRITE_ONCE(ufence->signalled, 1); wake_up_all(&ufence->xe->ufence_wq); user_fence_put(ufence); } @@ -113,6 +113,8 @@ static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, struct drm_xe_sync __user *sync_user, + struct drm_syncobj *ufence_syncobj, + u64 ufence_timeline_value, unsigned int flags) { struct drm_xe_sync sync_in; @@ -192,10 +194,15 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (exec) { sync->addr = sync_in.addr; } else { + sync->ufence_timeline_value = ufence_timeline_value; sync->ufence = user_fence_create(xe, sync_in.addr, sync_in.timeline_value); if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) return PTR_ERR(sync->ufence); + sync->ufence_chain_fence = dma_fence_chain_alloc(); + if (!sync->ufence_chain_fence) + return -ENOMEM; + sync->ufence_syncobj = ufence_syncobj; } break; @@ -239,7 +246,12 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence) } else if (sync->ufence) { int err; - dma_fence_get(fence); + drm_syncobj_add_point(sync->ufence_syncobj, + sync->ufence_chain_fence, + fence, sync->ufence_timeline_value); + sync->ufence_chain_fence = NULL; + + fence = drm_syncobj_fence_get(sync->ufence_syncobj); user_fence_get(sync->ufence); err = dma_fence_add_callback(fence, &sync->ufence->cb, user_fence_cb); @@ -259,7 +271,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) drm_syncobj_put(sync->syncobj); dma_fence_put(sync->fence); dma_fence_chain_free(sync->chain_fence); - if (sync->ufence) + dma_fence_chain_free(sync->ufence_chain_fence); + if (!IS_ERR_OR_NULL(sync->ufence)) user_fence_put(sync->ufence); } @@ -284,51 +297,59 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; struct dma_fence *fence; - int i, num_in_fence = 0, current_fence = 0; + int i, num_fence = 0, current_fence = 0; lockdep_assert_held(&vm->lock); - /* Count in-fences */ - for (i = 0; i < num_sync; ++i) { - if (sync[i].fence) { - ++num_in_fence; - fence = sync[i].fence; + /* Reject in fences */ + for (i = 0; i < num_sync; ++i) + if (sync[i].fence) + return ERR_PTR(-EOPNOTSUPP); + + if (q->flags & EXEC_QUEUE_FLAG_VM) { + struct xe_exec_queue *__q; + struct xe_tile *tile; + u8 id; + + for_each_tile(tile, vm->xe, id) + num_fence += (1 + XE_MAX_GT_PER_TILE); + + fences = kmalloc_array(num_fence, sizeof(*fences), + GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + + fences[current_fence++] = + xe_exec_queue_last_fence_get(q, vm); + for_each_tlb_inval(i) + fences[current_fence++] = + xe_exec_queue_tlb_inval_last_fence_get(q, vm, i); + list_for_each_entry(__q, &q->multi_gt_list, + multi_gt_link) { + fences[current_fence++] = + xe_exec_queue_last_fence_get(__q, vm); + for_each_tlb_inval(i) + fences[current_fence++] = + xe_exec_queue_tlb_inval_last_fence_get(__q, vm, i); } - } - /* Easy case... */ - if (!num_in_fence) { - fence = xe_exec_queue_last_fence_get(q, vm); - return fence; - } + xe_assert(vm->xe, current_fence == num_fence); + cf = dma_fence_array_create(num_fence, fences, + dma_fence_context_alloc(1), + 1, false); + if (!cf) + goto err_out; - /* Create composite fence */ - fences = kmalloc_array(num_in_fence + 1, sizeof(*fences), GFP_KERNEL); - if (!fences) - return ERR_PTR(-ENOMEM); - for (i = 0; i < num_sync; ++i) { - if (sync[i].fence) { - dma_fence_get(sync[i].fence); - fences[current_fence++] = sync[i].fence; - } - } - fences[current_fence++] = xe_exec_queue_last_fence_get(q, vm); - cf = dma_fence_array_create(num_in_fence, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - if (!cf) { - --vm->composite_fence_seqno; - goto err_out; + return &cf->base; } - return &cf->base; + fence = xe_exec_queue_last_fence_get(q, vm); + return fence; err_out: while (current_fence) dma_fence_put(fences[--current_fence]); kfree(fences); - kfree(cf); return ERR_PTR(-ENOMEM); } diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index 256ffc1e54dc..51f2d803e977 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -8,6 +8,7 @@ #include "xe_sync_types.h" +struct drm_syncobj; struct xe_device; struct xe_exec_queue; struct xe_file; @@ -21,6 +22,8 @@ struct xe_vm; int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, struct drm_xe_sync __user *sync_user, + struct drm_syncobj *ufence_syncobj, + u64 ufence_timeline_value, unsigned int flags); int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job); diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h index 30ac3f51993b..b88f1833e28c 100644 --- a/drivers/gpu/drm/xe/xe_sync_types.h +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -18,9 +18,12 @@ struct xe_sync_entry { struct drm_syncobj *syncobj; struct dma_fence *fence; struct dma_fence_chain *chain_fence; + struct dma_fence_chain *ufence_chain_fence; + struct drm_syncobj *ufence_syncobj; struct xe_user_fence *ufence; u64 addr; u64 timeline_value; + u64 ufence_timeline_value; u32 type; u32 flags; }; diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 0771acbbf367..4f4f9a5c43af 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -7,9 +7,11 @@ #include <drm/drm_managed.h> +#include "xe_bo.h" #include "xe_device.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_memirq.h" #include "xe_migrate.h" #include "xe_pcode.h" #include "xe_sa.h" @@ -17,6 +19,8 @@ #include "xe_tile.h" #include "xe_tile_sysfs.h" #include "xe_ttm_vram_mgr.h" +#include "xe_vram.h" +#include "xe_vram_types.h" #include "xe_wa.h" /** @@ -87,13 +91,46 @@ */ static int xe_tile_alloc(struct xe_tile *tile) { - struct drm_device *drm = &tile_to_xe(tile)->drm; - - tile->mem.ggtt = drmm_kzalloc(drm, sizeof(*tile->mem.ggtt), - GFP_KERNEL); + tile->mem.ggtt = xe_ggtt_alloc(tile); if (!tile->mem.ggtt) return -ENOMEM; - tile->mem.ggtt->tile = tile; + + tile->migrate = xe_migrate_alloc(tile); + if (!tile->migrate) + return -ENOMEM; + + return 0; +} + +/** + * xe_tile_alloc_vram - Perform per-tile VRAM structs allocation + * @tile: Tile to perform allocations for + * + * Allocates VRAM per-tile data structures using DRM-managed allocations. + * Does not touch the hardware. + * + * Returns -ENOMEM if allocations fail, otherwise 0. + */ +int xe_tile_alloc_vram(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_vram_region *vram; + + if (!IS_DGFX(xe)) + return 0; + + vram = xe_vram_region_alloc(xe, tile->id, XE_PL_VRAM0 + tile->id); + if (!vram) + return -ENOMEM; + tile->mem.vram = vram; + + /* + * If the kernel_vram is not already allocated, + * it means that tile has common VRAM region for + * kernel and user space. + */ + if (!tile->mem.kernel_vram) + tile->mem.kernel_vram = tile->mem.vram; return 0; } @@ -120,31 +157,12 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id) if (err) return err; - tile->primary_gt = xe_gt_alloc(tile); - if (IS_ERR(tile->primary_gt)) - return PTR_ERR(tile->primary_gt); - xe_pcode_init(tile); return 0; } ALLOW_ERROR_INJECTION(xe_tile_init_early, ERRNO); /* See xe_pci_probe() */ -static int tile_ttm_mgr_init(struct xe_tile *tile) -{ - struct xe_device *xe = tile_to_xe(tile); - int err; - - if (tile->mem.vram.usable_size) { - err = xe_ttm_vram_mgr_init(tile, &tile->mem.vram.ttm); - if (err) - return err; - xe->info.mem_region_mask |= BIT(tile->id) << 1; - } - - return 0; -} - /** * xe_tile_init_noalloc - Init tile up to the point where allocations can happen. * @tile: The tile to initialize. @@ -162,22 +180,31 @@ static int tile_ttm_mgr_init(struct xe_tile *tile) int xe_tile_init_noalloc(struct xe_tile *tile) { struct xe_device *xe = tile_to_xe(tile); - int err; - - err = tile_ttm_mgr_init(tile); - if (err) - return err; xe_wa_apply_tile_workarounds(tile); if (xe->info.has_usm && IS_DGFX(xe)) - xe_devm_add(tile, &tile->mem.vram); + xe_devm_add(tile, tile->mem.vram); + + if (IS_DGFX(xe) && !ttm_resource_manager_used(&tile->mem.vram->ttm.manager)) { + int err = xe_ttm_vram_mgr_init(xe, tile->mem.vram); + + if (err) + return err; + xe->info.mem_region_mask |= BIT(tile->mem.vram->id) << 1; + } return xe_tile_sysfs_init(tile); } int xe_tile_init(struct xe_tile *tile) { + int err; + + err = xe_memirq_init(&tile->memirq); + if (err) + return err; + tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); if (IS_ERR(tile->mem.kernel_bb_pool)) return PTR_ERR(tile->mem.kernel_bb_pool); diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index eb939316d55b..dceb6297aa01 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -14,6 +14,13 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id); int xe_tile_init_noalloc(struct xe_tile *tile); int xe_tile_init(struct xe_tile *tile); +int xe_tile_alloc_vram(struct xe_tile *tile); + void xe_tile_migrate_wait(struct xe_tile *tile); +static inline bool xe_tile_is_root(struct xe_tile *tile) +{ + return tile->id == 0; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.c b/drivers/gpu/drm/xe/xe_tile_debugfs.c new file mode 100644 index 000000000000..fff242a5ae56 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_debugfs.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/debugfs.h> +#include <drm/drm_debugfs.h> + +#include "xe_ggtt.h" +#include "xe_pm.h" +#include "xe_sa.h" +#include "xe_tile_debugfs.h" + +static struct xe_tile *node_to_tile(struct drm_info_node *node) +{ + return node->dent->d_parent->d_inode->i_private; +} + +/** + * xe_tile_debugfs_simple_show() - A show callback for struct drm_info_list + * @m: the &seq_file + * @data: data used by the drm debugfs helpers + * + * This callback can be used in struct drm_info_list to describe debugfs + * files that are &xe_tile specific. + * + * It is assumed that those debugfs files will be created on directory entry + * which struct dentry d_inode->i_private points to &xe_tile. + * + * /sys/kernel/debug/dri/0/ + * ├── tile0/ # tile = dentry->d_inode->i_private + * │ │ ├── id # tile = dentry->d_parent->d_inode->i_private + * + * This function assumes that &m->private will be set to the &struct + * drm_info_node corresponding to the instance of the info on a given &struct + * drm_minor (see struct drm_info_list.show for details). + * + * This function also assumes that struct drm_info_list.data will point to the + * function code that will actually print a file content:: + * + * int (*print)(struct xe_tile *, struct drm_printer *) + * + * Example:: + * + * int tile_id(struct xe_tile *tile, struct drm_printer *p) + * { + * drm_printf(p, "%u\n", tile->id); + * return 0; + * } + * + * static const struct drm_info_list info[] = { + * { name = "id", .show = tile_debugfs_simple_show, .data = tile_id }, + * }; + * + * dir = debugfs_create_dir("tile0", parent); + * dir->d_inode->i_private = tile; + * drm_debugfs_create_files(info, ARRAY_SIZE(info), dir, minor); + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_tile_debugfs_simple_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct xe_tile *tile = node_to_tile(node); + int (*print)(struct xe_tile *, struct drm_printer *) = node->info_ent->data; + + return print(tile, &p); +} + +/** + * xe_tile_debugfs_show_with_rpm() - A show callback for struct drm_info_list + * @m: the &seq_file + * @data: data used by the drm debugfs helpers + * + * Similar to tile_debugfs_simple_show() but implicitly takes a RPM ref. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_tile_debugfs_show_with_rpm(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct xe_tile *tile = node_to_tile(node); + struct xe_device *xe = tile_to_xe(tile); + int ret; + + xe_pm_runtime_get(xe); + ret = xe_tile_debugfs_simple_show(m, data); + xe_pm_runtime_put(xe); + + return ret; +} + +static int ggtt(struct xe_tile *tile, struct drm_printer *p) +{ + return xe_ggtt_dump(tile->mem.ggtt, p); +} + +static int sa_info(struct xe_tile *tile, struct drm_printer *p) +{ + drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p, + xe_sa_manager_gpu_addr(tile->mem.kernel_bb_pool)); + + return 0; +} + +/* only for debugfs files which can be safely used on the VF */ +static const struct drm_info_list vf_safe_debugfs_list[] = { + { "ggtt", .show = xe_tile_debugfs_show_with_rpm, .data = ggtt }, + { "sa_info", .show = xe_tile_debugfs_show_with_rpm, .data = sa_info }, +}; + +/** + * xe_tile_debugfs_register - Register tile's debugfs attributes + * @tile: the &xe_tile to register + * + * Create debugfs sub-directory with a name that includes a tile ID and + * then creates set of debugfs files (attributes) specific to this tile. + */ +void xe_tile_debugfs_register(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + struct drm_minor *minor = xe->drm.primary; + struct dentry *root = minor->debugfs_root; + char name[8]; + + snprintf(name, sizeof(name), "tile%u", tile->id); + tile->debugfs = debugfs_create_dir(name, root); + if (IS_ERR(tile->debugfs)) + return; + + /* + * Store the xe_tile pointer as private data of the tile/ directory + * node so other tile specific attributes under that directory may + * refer to it by looking at its parent node private data. + */ + tile->debugfs->d_inode->i_private = tile; + + drm_debugfs_create_files(vf_safe_debugfs_list, + ARRAY_SIZE(vf_safe_debugfs_list), + tile->debugfs, minor); +} diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.h b/drivers/gpu/drm/xe/xe_tile_debugfs.h new file mode 100644 index 000000000000..4429c22542f4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_debugfs.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TILE_DEBUGFS_H_ +#define _XE_TILE_DEBUGFS_H_ + +struct seq_file; +struct xe_tile; + +void xe_tile_debugfs_register(struct xe_tile *tile); +int xe_tile_debugfs_simple_show(struct seq_file *m, void *data); +int xe_tile_debugfs_show_with_rpm(struct seq_file *m, void *data); + +#endif diff --git a/drivers/gpu/drm/xe/xe_tile_printk.h b/drivers/gpu/drm/xe/xe_tile_printk.h new file mode 100644 index 000000000000..63640a42685d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_printk.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _xe_tile_printk_H_ +#define _xe_tile_printk_H_ + +#include "xe_printk.h" + +#define __XE_TILE_PRINTK_FMT(_tile, _fmt, _args...) "Tile%u: " _fmt, (_tile)->id, ##_args + +#define xe_tile_printk(_tile, _level, _fmt, ...) \ + xe_printk((_tile)->xe, _level, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__)) + +#define xe_tile_err(_tile, _fmt, ...) \ + xe_tile_printk((_tile), err, _fmt, ##__VA_ARGS__) + +#define xe_tile_err_once(_tile, _fmt, ...) \ + xe_tile_printk((_tile), err_once, _fmt, ##__VA_ARGS__) + +#define xe_tile_err_ratelimited(_tile, _fmt, ...) \ + xe_tile_printk((_tile), err_ratelimited, _fmt, ##__VA_ARGS__) + +#define xe_tile_warn(_tile, _fmt, ...) \ + xe_tile_printk((_tile), warn, _fmt, ##__VA_ARGS__) + +#define xe_tile_notice(_tile, _fmt, ...) \ + xe_tile_printk((_tile), notice, _fmt, ##__VA_ARGS__) + +#define xe_tile_info(_tile, _fmt, ...) \ + xe_tile_printk((_tile), info, _fmt, ##__VA_ARGS__) + +#define xe_tile_dbg(_tile, _fmt, ...) \ + xe_tile_printk((_tile), dbg, _fmt, ##__VA_ARGS__) + +#define xe_tile_WARN_type(_tile, _type, _condition, _fmt, ...) \ + xe_WARN##_type((_tile)->xe, _condition, _fmt, ## __VA_ARGS__) + +#define xe_tile_WARN(_tile, _condition, _fmt, ...) \ + xe_tile_WARN_type((_tile),, _condition, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__)) + +#define xe_tile_WARN_ONCE(_tile, _condition, _fmt, ...) \ + xe_tile_WARN_type((_tile), _ONCE, _condition, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__)) + +#define xe_tile_WARN_ON(_tile, _condition) \ + xe_tile_WARN((_tile), _condition, "%s(%s)", "WARN_ON", __stringify(_condition)) + +#define xe_tile_WARN_ON_ONCE(_tile, _condition) \ + xe_tile_WARN_ONCE((_tile), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition)) + +static inline void __xe_tile_printfn_err(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_tile *tile = p->arg; + + xe_tile_err(tile, "%pV", vaf); +} + +static inline void __xe_tile_printfn_info(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_tile *tile = p->arg; + + xe_tile_info(tile, "%pV", vaf); +} + +static inline void __xe_tile_printfn_dbg(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_tile *tile = p->arg; + struct drm_printer dbg; + + /* + * The original xe_tile_dbg() callsite annotations are useless here, + * redirect to the tweaked xe_dbg_printer() instead. + */ + dbg = xe_dbg_printer(tile->xe); + dbg.origin = p->origin; + + drm_printf(&dbg, __XE_TILE_PRINTK_FMT(tile, "%pV", vaf)); +} + +/** + * xe_tile_err_printer - Construct a &drm_printer that outputs to xe_tile_err() + * @tile: the &xe_tile pointer to use in xe_tile_err() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_tile_err_printer(struct xe_tile *tile) +{ + struct drm_printer p = { + .printfn = __xe_tile_printfn_err, + .arg = tile, + }; + return p; +} + +/** + * xe_tile_info_printer - Construct a &drm_printer that outputs to xe_tile_info() + * @tile: the &xe_tile pointer to use in xe_tile_info() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_tile_info_printer(struct xe_tile *tile) +{ + struct drm_printer p = { + .printfn = __xe_tile_printfn_info, + .arg = tile, + }; + return p; +} + +/** + * xe_tile_dbg_printer - Construct a &drm_printer that outputs like xe_tile_dbg() + * @tile: the &xe_tile pointer to use in xe_tile_dbg() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_tile_dbg_printer(struct xe_tile *tile) +{ + struct drm_printer p = { + .printfn = __xe_tile_printfn_dbg, + .arg = tile, + .origin = (const void *)_THIS_IP_, + }; + return p; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c new file mode 100644 index 000000000000..f3f478f14ff5 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/debugfs.h> +#include <drm/drm_debugfs.h> + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_gt_sriov_pf_config.h" +#include "xe_gt_sriov_pf_debugfs.h" +#include "xe_pm.h" +#include "xe_tile_debugfs.h" +#include "xe_tile_sriov_pf_debugfs.h" +#include "xe_sriov.h" +#include "xe_sriov_pf.h" +#include "xe_sriov_pf_provision.h" + +/* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov # d_inode->i_private = (xe_device*) + * │ ├── pf # d_inode->i_private = (xe_device*) + * │ │ ├── tile0 # d_inode->i_private = (xe_tile*) + * │ │ ├── tile1 + * │ │ : : + * │ ├── vf1 # d_inode->i_private = VFID(1) + * │ │ ├── tile0 # d_inode->i_private = (xe_tile*) + * │ │ ├── tile1 + * │ │ : : + * │ ├── vfN # d_inode->i_private = VFID(N) + * │ │ ├── tile0 # d_inode->i_private = (xe_tile*) + * │ │ ├── tile1 + * : : : : + */ + +static void *extract_priv(struct dentry *d) +{ + return d->d_inode->i_private; +} + +__maybe_unused +static struct xe_tile *extract_tile(struct dentry *d) +{ + return extract_priv(d); +} + +static struct xe_device *extract_xe(struct dentry *d) +{ + return extract_priv(d->d_parent->d_parent); +} + +__maybe_unused +static unsigned int extract_vfid(struct dentry *d) +{ + void *pp = extract_priv(d->d_parent); + + return pp == extract_xe(d) ? PFID : (uintptr_t)pp; +} + +/* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── pf + * : ├── tile0 + * : ├── ggtt_available + * ├── ggtt_provisioned + */ + +static int pf_config_print_available_ggtt(struct xe_tile *tile, struct drm_printer *p) +{ + return xe_gt_sriov_pf_config_print_available_ggtt(tile->primary_gt, p); +} + +static int pf_config_print_ggtt(struct xe_tile *tile, struct drm_printer *p) +{ + return xe_gt_sriov_pf_config_print_ggtt(tile->primary_gt, p); +} + +static const struct drm_info_list pf_ggtt_info[] = { + { + "ggtt_available", + .show = xe_tile_debugfs_simple_show, + .data = pf_config_print_available_ggtt, + }, + { + "ggtt_provisioned", + .show = xe_tile_debugfs_simple_show, + .data = pf_config_print_ggtt, + }, +}; + +/* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── pf + * : ├── tile0 + * : ├── vram_provisioned + */ + +static int pf_config_print_vram(struct xe_tile *tile, struct drm_printer *p) +{ + return xe_gt_sriov_pf_config_print_lmem(tile->primary_gt, p); +} + +static const struct drm_info_list pf_vram_info[] = { + { + "vram_provisioned", + .show = xe_tile_debugfs_simple_show, + .data = pf_config_print_vram, + }, +}; + +/* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * │ ├── pf + * │ │ ├── tile0 + * │ │ │ ├── ggtt_spare + * │ │ │ ├── vram_spare + * │ │ ├── tile1 + * │ │ : : + * │ ├── vf1 + * │ : ├── tile0 + * │ │ ├── ggtt_quota + * │ │ ├── vram_quota + * │ ├── tile1 + * │ : : + */ + +#define DEFINE_SRIOV_TILE_CONFIG_DEBUGFS_ATTRIBUTE(NAME, CONFIG, TYPE, FORMAT) \ + \ +static int NAME##_set(void *data, u64 val) \ +{ \ + struct xe_tile *tile = extract_tile(data); \ + unsigned int vfid = extract_vfid(data); \ + struct xe_gt *gt = tile->primary_gt; \ + struct xe_device *xe = tile->xe; \ + int err; \ + \ + if (val > (TYPE)~0ull) \ + return -EOVERFLOW; \ + \ + xe_pm_runtime_get(xe); \ + err = xe_sriov_pf_wait_ready(xe) ?: \ + xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \ + if (!err) \ + xe_sriov_pf_provision_set_custom_mode(xe); \ + xe_pm_runtime_put(xe); \ + \ + return err; \ +} \ + \ +static int NAME##_get(void *data, u64 *val) \ +{ \ + struct xe_tile *tile = extract_tile(data); \ + unsigned int vfid = extract_vfid(data); \ + struct xe_gt *gt = tile->primary_gt; \ + \ + *val = xe_gt_sriov_pf_config_get_##CONFIG(gt, vfid); \ + return 0; \ +} \ + \ +DEFINE_DEBUGFS_ATTRIBUTE(NAME##_fops, NAME##_get, NAME##_set, FORMAT) + +DEFINE_SRIOV_TILE_CONFIG_DEBUGFS_ATTRIBUTE(ggtt, ggtt, u64, "%llu\n"); +DEFINE_SRIOV_TILE_CONFIG_DEBUGFS_ATTRIBUTE(vram, lmem, u64, "%llu\n"); + +static void pf_add_config_attrs(struct xe_tile *tile, struct dentry *dent, unsigned int vfid) +{ + struct xe_device *xe = tile->xe; + + xe_tile_assert(tile, tile == extract_tile(dent)); + xe_tile_assert(tile, vfid == extract_vfid(dent)); + + debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare", + 0644, dent, dent, &ggtt_fops); + if (IS_DGFX(xe)) + debugfs_create_file_unsafe(vfid ? "vram_quota" : "vram_spare", + xe_device_has_lmtt(xe) ? 0644 : 0444, + dent, dent, &vram_fops); +} + +static void pf_populate_tile(struct xe_tile *tile, struct dentry *dent, unsigned int vfid) +{ + struct xe_device *xe = tile->xe; + struct drm_minor *minor = xe->drm.primary; + struct xe_gt *gt; + unsigned int id; + + pf_add_config_attrs(tile, dent, vfid); + + if (!vfid) { + drm_debugfs_create_files(pf_ggtt_info, + ARRAY_SIZE(pf_ggtt_info), + dent, minor); + if (IS_DGFX(xe)) + drm_debugfs_create_files(pf_vram_info, + ARRAY_SIZE(pf_vram_info), + dent, minor); + } + + for_each_gt_on_tile(gt, tile, id) + xe_gt_sriov_pf_debugfs_populate(gt, dent, vfid); +} + +/** + * xe_tile_sriov_pf_debugfs_populate() - Populate SR-IOV debugfs tree with tile files. + * @tile: the &xe_tile to register + * @parent: the parent &dentry that represents the SR-IOV @vfid function + * @vfid: the VF identifier + * + * Add to the @parent directory new debugfs directory that will represent a @tile and + * populate it with files that are related to the SR-IOV @vfid function. + * + * This function can only be called on PF. + */ +void xe_tile_sriov_pf_debugfs_populate(struct xe_tile *tile, struct dentry *parent, + unsigned int vfid) +{ + struct xe_device *xe = tile->xe; + struct dentry *dent; + char name[10]; /* should be enough up to "tile%u\0" for 2^16 - 1 */ + + xe_tile_assert(tile, IS_SRIOV_PF(xe)); + xe_tile_assert(tile, extract_priv(parent->d_parent) == xe); + xe_tile_assert(tile, extract_priv(parent) == tile->xe || + (uintptr_t)extract_priv(parent) == vfid); + + /* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * │ ├── pf # parent, d_inode->i_private = (xe_device*) + * │ │ ├── tile0 # d_inode->i_private = (xe_tile*) + * │ │ ├── tile1 + * │ │ : : + * │ ├── vf1 # parent, d_inode->i_private = VFID(1) + * │ │ ├── tile0 # d_inode->i_private = (xe_tile*) + * │ │ ├── tile1 + * : : : : + */ + snprintf(name, sizeof(name), "tile%u", tile->id); + dent = debugfs_create_dir(name, parent); + if (IS_ERR(dent)) + return; + dent->d_inode->i_private = tile; + + xe_tile_assert(tile, extract_tile(dent) == tile); + xe_tile_assert(tile, extract_vfid(dent) == vfid); + xe_tile_assert(tile, extract_xe(dent) == xe); + + pf_populate_tile(tile, dent, vfid); +} diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.h b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.h new file mode 100644 index 000000000000..55d179c44634 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TILE_SRIOV_PF_DEBUGFS_H_ +#define _XE_TILE_SRIOV_PF_DEBUGFS_H_ + +struct dentry; +struct xe_tile; + +void xe_tile_sriov_pf_debugfs_populate(struct xe_tile *tile, struct dentry *parent, + unsigned int vfid); + +#endif diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_printk.h b/drivers/gpu/drm/xe/xe_tile_sriov_printk.h new file mode 100644 index 000000000000..68323512872c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sriov_printk.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TILE_SRIOV_PRINTK_H_ +#define _XE_TILE_SRIOV_PRINTK_H_ + +#include "xe_tile_printk.h" +#include "xe_sriov_printk.h" + +#define __XE_TILE_SRIOV_PRINTK_FMT(_tile, _fmt, ...) \ + __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__) + +#define xe_tile_sriov_printk(_tile, _level, _fmt, ...) \ + xe_sriov_##_level((_tile)->xe, __XE_TILE_SRIOV_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__)) + +#define xe_tile_sriov_err(_tile, _fmt, ...) \ + xe_tile_sriov_printk(_tile, err, _fmt, ##__VA_ARGS__) + +#define xe_tile_sriov_notice(_tile, _fmt, ...) \ + xe_tile_sriov_printk(_tile, notice, _fmt, ##__VA_ARGS__) + +#define xe_tile_sriov_info(_tile, _fmt, ...) \ + xe_tile_sriov_printk(_tile, info, _fmt, ##__VA_ARGS__) + +#define xe_tile_sriov_dbg(_tile, _fmt, ...) \ + xe_tile_sriov_printk(_tile, dbg, _fmt, ##__VA_ARGS__) + +#define xe_tile_sriov_dbg_verbose(_tile, _fmt, ...) \ + xe_tile_sriov_printk(_tile, dbg_verbose, _fmt, ##__VA_ARGS__) + +#endif diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf.c b/drivers/gpu/drm/xe/xe_tile_sriov_vf.c new file mode 100644 index 000000000000..c9bac2cfdd04 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "regs/xe_gtt_defs.h" + +#include "xe_assert.h" +#include "xe_ggtt.h" +#include "xe_sriov.h" +#include "xe_sriov_printk.h" +#include "xe_tile_sriov_vf.h" +#include "xe_wopcm.h" + +static int vf_init_ggtt_balloons(struct xe_tile *tile) +{ + struct xe_ggtt *ggtt = tile->mem.ggtt; + + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + tile->sriov.vf.ggtt_balloon[0] = xe_ggtt_node_init(ggtt); + if (IS_ERR(tile->sriov.vf.ggtt_balloon[0])) + return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]); + + tile->sriov.vf.ggtt_balloon[1] = xe_ggtt_node_init(ggtt); + if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) { + xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]); + return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]); + } + + return 0; +} + +/** + * xe_tile_sriov_vf_balloon_ggtt_locked - Insert balloon nodes to limit used GGTT address range. + * @tile: the &xe_tile struct instance + * + * Return: 0 on success or a negative error code on failure. + */ +static int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile) +{ + u64 ggtt_base = tile->sriov.vf.self_config.ggtt_base; + u64 ggtt_size = tile->sriov.vf.self_config.ggtt_size; + struct xe_device *xe = tile_to_xe(tile); + u64 wopcm = xe_wopcm_size(xe); + u64 start, end; + int err; + + xe_tile_assert(tile, IS_SRIOV_VF(xe)); + xe_tile_assert(tile, ggtt_size); + lockdep_assert_held(&tile->mem.ggtt->lock); + + /* + * VF can only use part of the GGTT as allocated by the PF: + * + * WOPCM GUC_GGTT_TOP + * |<------------ Total GGTT size ------------------>| + * + * VF GGTT base -->|<- size ->| + * + * +--------------------+----------+-----------------+ + * |////////////////////| block |\\\\\\\\\\\\\\\\\| + * +--------------------+----------+-----------------+ + * + * |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->| + */ + + if (ggtt_base < wopcm || ggtt_base > GUC_GGTT_TOP || + ggtt_size > GUC_GGTT_TOP - ggtt_base) { + xe_sriov_err(xe, "tile%u: Invalid GGTT configuration: %#llx-%#llx\n", + tile->id, ggtt_base, ggtt_base + ggtt_size - 1); + return -ERANGE; + } + + start = wopcm; + end = ggtt_base; + if (end != start) { + err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[0], + start, end); + if (err) + return err; + } + + start = ggtt_base + ggtt_size; + end = GUC_GGTT_TOP; + if (end != start) { + err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[1], + start, end); + if (err) { + xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]); + return err; + } + } + + return 0; +} + +static int vf_balloon_ggtt(struct xe_tile *tile) +{ + struct xe_ggtt *ggtt = tile->mem.ggtt; + int err; + + mutex_lock(&ggtt->lock); + err = xe_tile_sriov_vf_balloon_ggtt_locked(tile); + mutex_unlock(&ggtt->lock); + + return err; +} + +/** + * xe_tile_sriov_vf_deballoon_ggtt_locked - Remove balloon nodes. + * @tile: the &xe_tile struct instance + */ +void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile) +{ + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[1]); + xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]); +} + +static void vf_deballoon_ggtt(struct xe_tile *tile) +{ + mutex_lock(&tile->mem.ggtt->lock); + xe_tile_sriov_vf_deballoon_ggtt_locked(tile); + mutex_unlock(&tile->mem.ggtt->lock); +} + +static void vf_fini_ggtt_balloons(struct xe_tile *tile) +{ + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[1]); + xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]); +} + +static void cleanup_ggtt(struct drm_device *drm, void *arg) +{ + struct xe_tile *tile = arg; + + vf_deballoon_ggtt(tile); + vf_fini_ggtt_balloons(tile); +} + +/** + * xe_tile_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration. + * @tile: the &xe_tile + * + * This function is for VF use only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + int err; + + err = vf_init_ggtt_balloons(tile); + if (err) + return err; + + err = vf_balloon_ggtt(tile); + if (err) { + vf_fini_ggtt_balloons(tile); + return err; + } + + return drmm_add_action_or_reset(&xe->drm, cleanup_ggtt, tile); +} + +/** + * DOC: GGTT nodes shifting during VF post-migration recovery + * + * The first fixup applied to the VF KMD structures as part of post-migration + * recovery is shifting nodes within &xe_ggtt instance. The nodes are moved + * from range previously assigned to this VF, into newly provisioned area. + * The changes include balloons, which are resized accordingly. + * + * The balloon nodes are there to eliminate unavailable ranges from use: one + * reserves the GGTT area below the range for current VF, and another one + * reserves area above. + * + * Below is a GGTT layout of example VF, with a certain address range assigned to + * said VF, and inaccessible areas above and below: + * + * 0 4GiB + * |<--------------------------- Total GGTT size ----------------------------->| + * WOPCM GUC_TOP + * |<-------------- Area mappable by xe_ggtt instance ---------------->| + * + * +---+---------------------------------+----------+----------------------+---+ + * |\\\|/////////////////////////////////| VF mem |//////////////////////|\\\| + * +---+---------------------------------+----------+----------------------+---+ + * + * Hardware enforced access rules before migration: + * + * |<------- inaccessible for VF ------->|<VF owned>|<-- inaccessible for VF ->| + * + * GGTT nodes used for tracking allocations: + * + * |<---------- balloon ------------>|<- nodes->|<----- balloon ------>| + * + * After the migration, GGTT area assigned to the VF might have shifted, either + * to lower or to higher address. But we expect the total size and extra areas to + * be identical, as migration can only happen between matching platforms. + * Below is an example of GGTT layout of the VF after migration. Content of the + * GGTT for VF has been moved to a new area, and we receive its address from GuC: + * + * +---+----------------------+----------+---------------------------------+---+ + * |\\\|//////////////////////| VF mem |/////////////////////////////////|\\\| + * +---+----------------------+----------+---------------------------------+---+ + * + * Hardware enforced access rules after migration: + * + * |<- inaccessible for VF -->|<VF owned>|<------- inaccessible for VF ------->| + * + * So the VF has a new slice of GGTT assigned, and during migration process, the + * memory content was copied to that new area. But the &xe_ggtt nodes are still + * tracking allocations using the old addresses. The nodes within VF owned area + * have to be shifted, and balloon nodes need to be resized to properly mask out + * areas not owned by the VF. + * + * Fixed &xe_ggtt nodes used for tracking allocations: + * + * |<------ balloon ------>|<- nodes->|<----------- balloon ----------->| + * + * Due to use of GPU profiles, we do not expect the old and new GGTT ares to + * overlap; but our node shifting will fix addresses properly regardless. + */ + +/** + * xe_tile_sriov_vf_fixup_ggtt_nodes_locked - Shift GGTT allocations to match assigned range. + * @tile: the &xe_tile struct instance + * @shift: the shift value + * + * Since Global GTT is not virtualized, each VF has an assigned range + * within the global space. This range might have changed during migration, + * which requires all memory addresses pointing to GGTT to be shifted. + */ +void xe_tile_sriov_vf_fixup_ggtt_nodes_locked(struct xe_tile *tile, s64 shift) +{ + struct xe_ggtt *ggtt = tile->mem.ggtt; + + lockdep_assert_held(&ggtt->lock); + + xe_tile_sriov_vf_deballoon_ggtt_locked(tile); + xe_ggtt_shift_nodes_locked(ggtt, shift); + xe_tile_sriov_vf_balloon_ggtt_locked(tile); +} + +/** + * xe_tile_sriov_vf_lmem - VF LMEM configuration. + * @tile: the &xe_tile + * + * This function is for VF use only. + * + * Return: size of the LMEM assigned to VF. + */ +u64 xe_tile_sriov_vf_lmem(struct xe_tile *tile) +{ + struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config; + + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + return config->lmem_size; +} + +/** + * xe_tile_sriov_vf_lmem_store - Store VF LMEM configuration + * @tile: the &xe_tile + * @lmem_size: VF LMEM size to store + * + * This function is for VF use only. + */ +void xe_tile_sriov_vf_lmem_store(struct xe_tile *tile, u64 lmem_size) +{ + struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config; + + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + config->lmem_size = lmem_size; +} + +/** + * xe_tile_sriov_vf_ggtt - VF GGTT configuration. + * @tile: the &xe_tile + * + * This function is for VF use only. + * + * Return: size of the GGTT assigned to VF. + */ +u64 xe_tile_sriov_vf_ggtt(struct xe_tile *tile) +{ + struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config; + + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + return config->ggtt_size; +} + +/** + * xe_tile_sriov_vf_ggtt_store - Store VF GGTT configuration + * @tile: the &xe_tile + * @ggtt_size: VF GGTT size to store + * + * This function is for VF use only. + */ +void xe_tile_sriov_vf_ggtt_store(struct xe_tile *tile, u64 ggtt_size) +{ + struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config; + + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + config->ggtt_size = ggtt_size; +} + +/** + * xe_tile_sriov_vf_ggtt_base - VF GGTT base configuration. + * @tile: the &xe_tile + * + * This function is for VF use only. + * + * Return: base of the GGTT assigned to VF. + */ +u64 xe_tile_sriov_vf_ggtt_base(struct xe_tile *tile) +{ + struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config; + + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + return config->ggtt_base; +} + +/** + * xe_tile_sriov_vf_ggtt_base_store - Store VF GGTT base configuration + * @tile: the &xe_tile + * @ggtt_base: VF GGTT base to store + * + * This function is for VF use only. + */ +void xe_tile_sriov_vf_ggtt_base_store(struct xe_tile *tile, u64 ggtt_base) +{ + struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config; + + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + config->ggtt_base = ggtt_base; +} diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf.h b/drivers/gpu/drm/xe/xe_tile_sriov_vf.h new file mode 100644 index 000000000000..749f41504883 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TILE_SRIOV_VF_H_ +#define _XE_TILE_SRIOV_VF_H_ + +#include <linux/types.h> + +struct xe_tile; + +int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile); +void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile); +void xe_tile_sriov_vf_fixup_ggtt_nodes_locked(struct xe_tile *tile, s64 shift); +u64 xe_tile_sriov_vf_ggtt(struct xe_tile *tile); +void xe_tile_sriov_vf_ggtt_store(struct xe_tile *tile, u64 ggtt_size); +u64 xe_tile_sriov_vf_ggtt_base(struct xe_tile *tile); +void xe_tile_sriov_vf_ggtt_base_store(struct xe_tile *tile, u64 ggtt_size); +u64 xe_tile_sriov_vf_lmem(struct xe_tile *tile); +void xe_tile_sriov_vf_lmem_store(struct xe_tile *tile, u64 lmem_size); + +#endif diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h new file mode 100644 index 000000000000..4807ca51614c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TILE_SRIOV_VF_TYPES_H_ +#define _XE_TILE_SRIOV_VF_TYPES_H_ + +#include <linux/types.h> + +/** + * struct xe_tile_sriov_vf_selfconfig - VF configuration data. + */ +struct xe_tile_sriov_vf_selfconfig { + /** @ggtt_base: assigned base offset of the GGTT region. */ + u64 ggtt_base; + /** @ggtt_size: assigned size of the GGTT region. */ + u64 ggtt_size; + /** @lmem_size: assigned size of the LMEM. */ + u64 lmem_size; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c index b804234a6551..9e1236a9ec67 100644 --- a/drivers/gpu/drm/xe/xe_tile_sysfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c @@ -44,16 +44,18 @@ int xe_tile_sysfs_init(struct xe_tile *tile) kt->tile = tile; err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id); - if (err) { - kobject_put(&kt->base); - return err; - } + if (err) + goto err_object; tile->sysfs = &kt->base; err = xe_vram_freq_sysfs_init(tile); if (err) - return err; + goto err_object; return devm_add_action_or_reset(xe->drm.dev, tile_sysfs_fini, tile); + +err_object: + kobject_put(&kt->base); + return err; } diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c new file mode 100644 index 000000000000..918a59e686ea --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tlb_inval.c @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "abi/guc_actions_abi.h" +#include "xe_device.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_gt_printk.h" +#include "xe_gt_stats.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_tlb_inval.h" +#include "xe_mmio.h" +#include "xe_pm.h" +#include "xe_tlb_inval.h" +#include "xe_trace.h" + +/** + * DOC: Xe TLB invalidation + * + * Xe TLB invalidation is implemented in two layers. The first is the frontend + * API, which provides an interface for TLB invalidations to the driver code. + * The frontend handles seqno assignment, synchronization (fences), and the + * timeout mechanism. The frontend is implemented via an embedded structure + * xe_tlb_inval that includes a set of ops hooking into the backend. The backend + * interacts with the hardware (or firmware) to perform the actual invalidation. + */ + +#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS + +static void xe_tlb_inval_fence_fini(struct xe_tlb_inval_fence *fence) +{ + if (WARN_ON_ONCE(!fence->tlb_inval)) + return; + + xe_pm_runtime_put(fence->tlb_inval->xe); + fence->tlb_inval = NULL; /* fini() should be called once */ +} + +static void +xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence) +{ + bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); + + lockdep_assert_held(&fence->tlb_inval->pending_lock); + + list_del(&fence->link); + trace_xe_tlb_inval_fence_signal(fence->tlb_inval->xe, fence); + xe_tlb_inval_fence_fini(fence); + dma_fence_signal(&fence->base); + if (!stack) + dma_fence_put(&fence->base); +} + +static void +xe_tlb_inval_fence_signal_unlocked(struct xe_tlb_inval_fence *fence) +{ + struct xe_tlb_inval *tlb_inval = fence->tlb_inval; + + spin_lock_irq(&tlb_inval->pending_lock); + xe_tlb_inval_fence_signal(fence); + spin_unlock_irq(&tlb_inval->pending_lock); +} + +static void xe_tlb_inval_fence_timeout(struct work_struct *work) +{ + struct xe_tlb_inval *tlb_inval = container_of(work, struct xe_tlb_inval, + fence_tdr.work); + struct xe_device *xe = tlb_inval->xe; + struct xe_tlb_inval_fence *fence, *next; + long timeout_delay = tlb_inval->ops->timeout_delay(tlb_inval); + + tlb_inval->ops->flush(tlb_inval); + + spin_lock_irq(&tlb_inval->pending_lock); + list_for_each_entry_safe(fence, next, + &tlb_inval->pending_fences, link) { + s64 since_inval_ms = ktime_ms_delta(ktime_get(), + fence->inval_time); + + if (msecs_to_jiffies(since_inval_ms) < timeout_delay) + break; + + trace_xe_tlb_inval_fence_timeout(xe, fence); + drm_err(&xe->drm, + "TLB invalidation fence timeout, seqno=%d recv=%d", + fence->seqno, tlb_inval->seqno_recv); + + fence->base.error = -ETIME; + xe_tlb_inval_fence_signal(fence); + } + if (!list_empty(&tlb_inval->pending_fences)) + queue_delayed_work(system_wq, &tlb_inval->fence_tdr, + timeout_delay); + spin_unlock_irq(&tlb_inval->pending_lock); +} + +/** + * tlb_inval_fini - Clean up TLB invalidation state + * @drm: @drm_device + * @arg: pointer to struct @xe_tlb_inval + * + * Cancel pending fence workers and clean up any additional + * TLB invalidation state. + */ +static void tlb_inval_fini(struct drm_device *drm, void *arg) +{ + struct xe_tlb_inval *tlb_inval = arg; + + xe_tlb_inval_reset(tlb_inval); +} + +/** + * xe_gt_tlb_inval_init - Initialize TLB invalidation state + * @gt: GT structure + * + * Initialize TLB invalidation state, purely software initialization, should + * be called once during driver load. + * + * Return: 0 on success, negative error code on error. + */ +int xe_gt_tlb_inval_init_early(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_tlb_inval *tlb_inval = >->tlb_inval; + int err; + + tlb_inval->xe = xe; + tlb_inval->seqno = 1; + INIT_LIST_HEAD(&tlb_inval->pending_fences); + spin_lock_init(&tlb_inval->pending_lock); + spin_lock_init(&tlb_inval->lock); + INIT_DELAYED_WORK(&tlb_inval->fence_tdr, xe_tlb_inval_fence_timeout); + + err = drmm_mutex_init(&xe->drm, &tlb_inval->seqno_lock); + if (err) + return err; + + tlb_inval->job_wq = drmm_alloc_ordered_workqueue(&xe->drm, + "gt-tbl-inval-job-wq", + WQ_MEM_RECLAIM); + if (IS_ERR(tlb_inval->job_wq)) + return PTR_ERR(tlb_inval->job_wq); + + /* XXX: Blindly setting up backend to GuC */ + xe_guc_tlb_inval_init_early(>->uc.guc, tlb_inval); + + return drmm_add_action_or_reset(&xe->drm, tlb_inval_fini, tlb_inval); +} + +/** + * xe_tlb_inval_reset() - TLB invalidation reset + * @tlb_inval: TLB invalidation client + * + * Signal any pending invalidation fences, should be called during a GT reset + */ +void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval) +{ + struct xe_tlb_inval_fence *fence, *next; + int pending_seqno; + + /* + * we can get here before the backends are even initialized if we're + * wedging very early, in which case there are not going to be any + * pendind fences so we can bail immediately. + */ + if (!tlb_inval->ops->initialized(tlb_inval)) + return; + + /* + * Backend is already disabled at this point. No new TLB requests can + * appear. + */ + + mutex_lock(&tlb_inval->seqno_lock); + spin_lock_irq(&tlb_inval->pending_lock); + cancel_delayed_work(&tlb_inval->fence_tdr); + /* + * We might have various kworkers waiting for TLB flushes to complete + * which are not tracked with an explicit TLB fence, however at this + * stage that will never happen since the backend is already disabled, + * so make sure we signal them here under the assumption that we have + * completed a full GT reset. + */ + if (tlb_inval->seqno == 1) + pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1; + else + pending_seqno = tlb_inval->seqno - 1; + WRITE_ONCE(tlb_inval->seqno_recv, pending_seqno); + + list_for_each_entry_safe(fence, next, + &tlb_inval->pending_fences, link) + xe_tlb_inval_fence_signal(fence); + spin_unlock_irq(&tlb_inval->pending_lock); + mutex_unlock(&tlb_inval->seqno_lock); +} + +static bool xe_tlb_inval_seqno_past(struct xe_tlb_inval *tlb_inval, int seqno) +{ + int seqno_recv = READ_ONCE(tlb_inval->seqno_recv); + + lockdep_assert_held(&tlb_inval->pending_lock); + + if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2)) + return false; + + if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) + return true; + + return seqno_recv >= seqno; +} + +static void xe_tlb_inval_fence_prep(struct xe_tlb_inval_fence *fence) +{ + struct xe_tlb_inval *tlb_inval = fence->tlb_inval; + + fence->seqno = tlb_inval->seqno; + trace_xe_tlb_inval_fence_send(tlb_inval->xe, fence); + + spin_lock_irq(&tlb_inval->pending_lock); + fence->inval_time = ktime_get(); + list_add_tail(&fence->link, &tlb_inval->pending_fences); + + if (list_is_singular(&tlb_inval->pending_fences)) + queue_delayed_work(system_wq, &tlb_inval->fence_tdr, + tlb_inval->ops->timeout_delay(tlb_inval)); + spin_unlock_irq(&tlb_inval->pending_lock); + + tlb_inval->seqno = (tlb_inval->seqno + 1) % + TLB_INVALIDATION_SEQNO_MAX; + if (!tlb_inval->seqno) + tlb_inval->seqno = 1; +} + +#define xe_tlb_inval_issue(__tlb_inval, __fence, op, args...) \ +({ \ + int __ret; \ + \ + xe_assert((__tlb_inval)->xe, (__tlb_inval)->ops); \ + xe_assert((__tlb_inval)->xe, (__fence)); \ + \ + mutex_lock(&(__tlb_inval)->seqno_lock); \ + xe_tlb_inval_fence_prep((__fence)); \ + __ret = op((__tlb_inval), (__fence)->seqno, ##args); \ + if (__ret < 0) \ + xe_tlb_inval_fence_signal_unlocked((__fence)); \ + mutex_unlock(&(__tlb_inval)->seqno_lock); \ + \ + __ret == -ECANCELED ? 0 : __ret; \ +}) + +/** + * xe_tlb_inval_all() - Issue a TLB invalidation for all TLBs + * @tlb_inval: TLB invalidation client + * @fence: invalidation fence which will be signal on TLB invalidation + * completion + * + * Issue a TLB invalidation for all TLBs. Completion of TLB is asynchronous and + * caller can use the invalidation fence to wait for completion. + * + * Return: 0 on success, negative error code on error + */ +int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence) +{ + return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->all); +} + +/** + * xe_tlb_inval_ggtt() - Issue a TLB invalidation for the GGTT + * @tlb_inval: TLB invalidation client + * + * Issue a TLB invalidation for the GGTT. Completion of TLB is asynchronous and + * caller can use the invalidation fence to wait for completion. + * + * Return: 0 on success, negative error code on error + */ +int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval) +{ + struct xe_tlb_inval_fence fence, *fence_ptr = &fence; + int ret; + + xe_tlb_inval_fence_init(tlb_inval, fence_ptr, true); + ret = xe_tlb_inval_issue(tlb_inval, fence_ptr, tlb_inval->ops->ggtt); + xe_tlb_inval_fence_wait(fence_ptr); + + return ret; +} + +/** + * xe_tlb_inval_range() - Issue a TLB invalidation for an address range + * @tlb_inval: TLB invalidation client + * @fence: invalidation fence which will be signal on TLB invalidation + * completion + * @start: start address + * @end: end address + * @asid: address space id + * + * Issue a range based TLB invalidation if supported, if not fallback to a full + * TLB invalidation. Completion of TLB is asynchronous and caller can use + * the invalidation fence to wait for completion. + * + * Return: Negative error code on error, 0 on success + */ +int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence, u64 start, u64 end, + u32 asid) +{ + return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->ppgtt, + start, end, asid); +} + +/** + * xe_tlb_inval_vm() - Issue a TLB invalidation for a VM + * @tlb_inval: TLB invalidation client + * @vm: VM to invalidate + * + * Invalidate entire VM's address space + */ +void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm) +{ + struct xe_tlb_inval_fence fence; + u64 range = 1ull << vm->xe->info.va_bits; + + xe_tlb_inval_fence_init(tlb_inval, &fence, true); + xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid); + xe_tlb_inval_fence_wait(&fence); +} + +/** + * xe_tlb_inval_done_handler() - TLB invalidation done handler + * @tlb_inval: TLB invalidation client + * @seqno: seqno of invalidation that is done + * + * Update recv seqno, signal any TLB invalidation fences, and restart TDR + */ +void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno) +{ + struct xe_device *xe = tlb_inval->xe; + struct xe_tlb_inval_fence *fence, *next; + unsigned long flags; + + /* + * This can also be run both directly from the IRQ handler and also in + * process_g2h_msg(). Only one may process any individual CT message, + * however the order they are processed here could result in skipping a + * seqno. To handle that we just process all the seqnos from the last + * seqno_recv up to and including the one in msg[0]. The delta should be + * very small so there shouldn't be much of pending_fences we actually + * need to iterate over here. + * + * From GuC POV we expect the seqnos to always appear in-order, so if we + * see something later in the timeline we can be sure that anything + * appearing earlier has already signalled, just that we have yet to + * officially process the CT message like if racing against + * process_g2h_msg(). + */ + spin_lock_irqsave(&tlb_inval->pending_lock, flags); + if (xe_tlb_inval_seqno_past(tlb_inval, seqno)) { + spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); + return; + } + + WRITE_ONCE(tlb_inval->seqno_recv, seqno); + + list_for_each_entry_safe(fence, next, + &tlb_inval->pending_fences, link) { + trace_xe_tlb_inval_fence_recv(xe, fence); + + if (!xe_tlb_inval_seqno_past(tlb_inval, fence->seqno)) + break; + + xe_tlb_inval_fence_signal(fence); + } + + if (!list_empty(&tlb_inval->pending_fences)) + mod_delayed_work(system_wq, + &tlb_inval->fence_tdr, + tlb_inval->ops->timeout_delay(tlb_inval)); + else + cancel_delayed_work(&tlb_inval->fence_tdr); + + spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); +} + +static const char * +xe_inval_fence_get_driver_name(struct dma_fence *dma_fence) +{ + return "xe"; +} + +static const char * +xe_inval_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + return "tlb_inval_fence"; +} + +static const struct dma_fence_ops inval_fence_ops = { + .get_driver_name = xe_inval_fence_get_driver_name, + .get_timeline_name = xe_inval_fence_get_timeline_name, +}; + +/** + * xe_tlb_inval_fence_init() - Initialize TLB invalidation fence + * @tlb_inval: TLB invalidation client + * @fence: TLB invalidation fence to initialize + * @stack: fence is stack variable + * + * Initialize TLB invalidation fence for use. xe_tlb_inval_fence_fini + * will be automatically called when fence is signalled (all fences must signal), + * even on error. + */ +void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence, + bool stack) +{ + xe_pm_runtime_get_noresume(tlb_inval->xe); + + spin_lock_irq(&tlb_inval->lock); + dma_fence_init(&fence->base, &inval_fence_ops, &tlb_inval->lock, + dma_fence_context_alloc(1), 1); + spin_unlock_irq(&tlb_inval->lock); + INIT_LIST_HEAD(&fence->link); + if (stack) + set_bit(FENCE_STACK_BIT, &fence->base.flags); + else + dma_fence_get(&fence->base); + fence->tlb_inval = tlb_inval; +} diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.h b/drivers/gpu/drm/xe/xe_tlb_inval.h new file mode 100644 index 000000000000..05614915463a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tlb_inval.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TLB_INVAL_H_ +#define _XE_TLB_INVAL_H_ + +#include <linux/types.h> + +#include "xe_tlb_inval_types.h" + +struct xe_gt; +struct xe_guc; +struct xe_vm; + +int xe_gt_tlb_inval_init_early(struct xe_gt *gt); + +void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval); +int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence); +int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval); +void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm); +int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence, + u64 start, u64 end, u32 asid); + +void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence, + bool stack); + +/** + * xe_tlb_inval_fence_wait() - TLB invalidiation fence wait + * @fence: TLB invalidation fence to wait on + * + * Wait on a TLB invalidiation fence until it signals, non interruptible + */ +static inline void +xe_tlb_inval_fence_wait(struct xe_tlb_inval_fence *fence) +{ + dma_fence_wait(&fence->base, false); +} + +void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno); + +#endif /* _XE_TLB_INVAL_ */ diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c new file mode 100644 index 000000000000..1ae0dec2cf31 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_assert.h" +#include "xe_dep_job_types.h" +#include "xe_dep_scheduler.h" +#include "xe_exec_queue.h" +#include "xe_gt_types.h" +#include "xe_tlb_inval.h" +#include "xe_tlb_inval_job.h" +#include "xe_migrate.h" +#include "xe_pm.h" +#include "xe_vm.h" + +/** struct xe_tlb_inval_job - TLB invalidation job */ +struct xe_tlb_inval_job { + /** @dep: base generic dependency Xe job */ + struct xe_dep_job dep; + /** @tlb_inval: TLB invalidation client */ + struct xe_tlb_inval *tlb_inval; + /** @q: exec queue issuing the invalidate */ + struct xe_exec_queue *q; + /** @vm: VM which TLB invalidation is being issued for */ + struct xe_vm *vm; + /** @refcount: ref count of this job */ + struct kref refcount; + /** + * @fence: dma fence to indicate completion. 1 way relationship - job + * can safely reference fence, fence cannot safely reference job. + */ + struct dma_fence *fence; + /** @start: Start address to invalidate */ + u64 start; + /** @end: End address to invalidate */ + u64 end; + /** @type: GT type */ + int type; + /** @fence_armed: Fence has been armed */ + bool fence_armed; +}; + +static struct dma_fence *xe_tlb_inval_job_run(struct xe_dep_job *dep_job) +{ + struct xe_tlb_inval_job *job = + container_of(dep_job, typeof(*job), dep); + struct xe_tlb_inval_fence *ifence = + container_of(job->fence, typeof(*ifence), base); + + xe_tlb_inval_range(job->tlb_inval, ifence, job->start, + job->end, job->vm->usm.asid); + + return job->fence; +} + +static void xe_tlb_inval_job_free(struct xe_dep_job *dep_job) +{ + struct xe_tlb_inval_job *job = + container_of(dep_job, typeof(*job), dep); + + /* Pairs with get in xe_tlb_inval_job_push */ + xe_tlb_inval_job_put(job); +} + +static const struct xe_dep_job_ops dep_job_ops = { + .run_job = xe_tlb_inval_job_run, + .free_job = xe_tlb_inval_job_free, +}; + +/** + * xe_tlb_inval_job_create() - TLB invalidation job create + * @q: exec queue issuing the invalidate + * @tlb_inval: TLB invalidation client + * @dep_scheduler: Dependency scheduler for job + * @vm: VM which TLB invalidation is being issued for + * @start: Start address to invalidate + * @end: End address to invalidate + * @type: GT type + * + * Create a TLB invalidation job and initialize internal fields. The caller is + * responsible for releasing the creation reference. + * + * Return: TLB invalidation job object on success, ERR_PTR failure + */ +struct xe_tlb_inval_job * +xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, + struct xe_dep_scheduler *dep_scheduler, + struct xe_vm *vm, u64 start, u64 end, int type) +{ + struct xe_tlb_inval_job *job; + struct drm_sched_entity *entity = + xe_dep_scheduler_entity(dep_scheduler); + struct xe_tlb_inval_fence *ifence; + int err; + + xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + + job = kmalloc(sizeof(*job), GFP_KERNEL); + if (!job) + return ERR_PTR(-ENOMEM); + + job->q = q; + job->vm = vm; + job->tlb_inval = tlb_inval; + job->start = start; + job->end = end; + job->fence_armed = false; + job->dep.ops = &dep_job_ops; + job->type = type; + kref_init(&job->refcount); + xe_exec_queue_get(q); /* Pairs with put in xe_tlb_inval_job_destroy */ + xe_vm_get(vm); /* Pairs with put in xe_tlb_inval_job_destroy */ + + ifence = kmalloc(sizeof(*ifence), GFP_KERNEL); + if (!ifence) { + err = -ENOMEM; + goto err_job; + } + job->fence = &ifence->base; + + err = drm_sched_job_init(&job->dep.drm, entity, 1, NULL, + q->xef ? q->xef->drm->client_id : 0); + if (err) + goto err_fence; + + /* Pairs with put in xe_tlb_inval_job_destroy */ + xe_pm_runtime_get_noresume(gt_to_xe(q->gt)); + + return job; + +err_fence: + kfree(ifence); +err_job: + xe_vm_put(vm); + xe_exec_queue_put(q); + kfree(job); + + return ERR_PTR(err); +} + +static void xe_tlb_inval_job_destroy(struct kref *ref) +{ + struct xe_tlb_inval_job *job = container_of(ref, typeof(*job), + refcount); + struct xe_tlb_inval_fence *ifence = + container_of(job->fence, typeof(*ifence), base); + struct xe_exec_queue *q = job->q; + struct xe_device *xe = gt_to_xe(q->gt); + struct xe_vm *vm = job->vm; + + if (!job->fence_armed) + kfree(ifence); + else + /* Ref from xe_tlb_inval_fence_init */ + dma_fence_put(job->fence); + + drm_sched_job_cleanup(&job->dep.drm); + kfree(job); + xe_vm_put(vm); /* Pairs with get from xe_tlb_inval_job_create */ + xe_exec_queue_put(q); /* Pairs with get from xe_tlb_inval_job_create */ + xe_pm_runtime_put(xe); /* Pairs with get from xe_tlb_inval_job_create */ +} + +/** + * xe_tlb_inval_alloc_dep() - TLB invalidation job alloc dependency + * @job: TLB invalidation job to alloc dependency for + * + * Allocate storage for a dependency in the TLB invalidation fence. This + * function should be called at most once per job and must be paired with + * xe_tlb_inval_job_push being called with a real fence. + * + * Return: 0 on success, -errno on failure + */ +int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job) +{ + xe_assert(gt_to_xe(job->q->gt), !xa_load(&job->dep.drm.dependencies, 0)); + might_alloc(GFP_KERNEL); + + return drm_sched_job_add_dependency(&job->dep.drm, + dma_fence_get_stub()); +} + +/** + * xe_tlb_inval_job_push() - TLB invalidation job push + * @job: TLB invalidation job to push + * @m: The migration object being used + * @fence: Dependency for TLB invalidation job + * + * Pushes a TLB invalidation job for execution, using @fence as a dependency. + * Storage for @fence must be preallocated with xe_tlb_inval_job_alloc_dep + * prior to this call if @fence is not signaled. Takes a reference to the job’s + * finished fence, which the caller is responsible for releasing, and return it + * to the caller. This function is safe to be called in the path of reclaim. + * + * Return: Job's finished fence on success, cannot fail + */ +struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job, + struct xe_migrate *m, + struct dma_fence *fence) +{ + struct xe_tlb_inval_fence *ifence = + container_of(job->fence, typeof(*ifence), base); + + if (!dma_fence_is_signaled(fence)) { + void *ptr; + + /* + * Can be in path of reclaim, hence the preallocation of fence + * storage in xe_tlb_inval_job_alloc_dep. Verify caller did + * this correctly. + */ + xe_assert(gt_to_xe(job->q->gt), + xa_load(&job->dep.drm.dependencies, 0) == + dma_fence_get_stub()); + + dma_fence_get(fence); /* ref released once dependency processed by scheduler */ + ptr = xa_store(&job->dep.drm.dependencies, 0, fence, + GFP_ATOMIC); + xe_assert(gt_to_xe(job->q->gt), !xa_is_err(ptr)); + } + + xe_tlb_inval_job_get(job); /* Pairs with put in free_job */ + job->fence_armed = true; + + /* + * We need the migration lock to protect the job's seqno and the spsc + * queue, only taken on migration queue, user queues protected dma-resv + * VM lock. + */ + xe_migrate_job_lock(m, job->q); + + /* Creation ref pairs with put in xe_tlb_inval_job_destroy */ + xe_tlb_inval_fence_init(job->tlb_inval, ifence, false); + dma_fence_get(job->fence); /* Pairs with put in DRM scheduler */ + + drm_sched_job_arm(&job->dep.drm); + /* + * caller ref, get must be done before job push as it could immediately + * signal and free. + */ + dma_fence_get(&job->dep.drm.s_fence->finished); + drm_sched_entity_push_job(&job->dep.drm); + + /* Let the upper layers fish this out */ + xe_exec_queue_tlb_inval_last_fence_set(job->q, job->vm, + &job->dep.drm.s_fence->finished, + job->type); + + xe_migrate_job_unlock(m, job->q); + + /* + * Not using job->fence, as it has its own dma-fence context, which does + * not allow TLB invalidation fences on the same queue, GT tuple to + * be squashed in dma-resv/DRM scheduler. Instead, we use the DRM scheduler + * context and job's finished fence, which enables squashing. + */ + return &job->dep.drm.s_fence->finished; +} + +/** + * xe_tlb_inval_job_get() - Get a reference to TLB invalidation job + * @job: TLB invalidation job object + * + * Increment the TLB invalidation job's reference count + */ +void xe_tlb_inval_job_get(struct xe_tlb_inval_job *job) +{ + kref_get(&job->refcount); +} + +/** + * xe_tlb_inval_job_put() - Put a reference to TLB invalidation job + * @job: TLB invalidation job object + * + * Decrement the TLB invalidation job's reference count, call + * xe_tlb_inval_job_destroy when reference count == 0. Skips decrement if + * input @job is NULL or IS_ERR. + */ +void xe_tlb_inval_job_put(struct xe_tlb_inval_job *job) +{ + if (!IS_ERR_OR_NULL(job)) + kref_put(&job->refcount, xe_tlb_inval_job_destroy); +} diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.h b/drivers/gpu/drm/xe/xe_tlb_inval_job.h new file mode 100644 index 000000000000..4d6df1a6c6ca --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TLB_INVAL_JOB_H_ +#define _XE_TLB_INVAL_JOB_H_ + +#include <linux/types.h> + +struct dma_fence; +struct xe_dep_scheduler; +struct xe_exec_queue; +struct xe_migrate; +struct xe_tlb_inval; +struct xe_tlb_inval_job; +struct xe_vm; + +struct xe_tlb_inval_job * +xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, + struct xe_dep_scheduler *dep_scheduler, + struct xe_vm *vm, u64 start, u64 end, int type); + +int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job); + +struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job, + struct xe_migrate *m, + struct dma_fence *fence); + +void xe_tlb_inval_job_get(struct xe_tlb_inval_job *job); + +void xe_tlb_inval_job_put(struct xe_tlb_inval_job *job); + +#endif diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_types.h b/drivers/gpu/drm/xe/xe_tlb_inval_types.h new file mode 100644 index 000000000000..8f8b060e9005 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tlb_inval_types.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_TLB_INVAL_TYPES_H_ +#define _XE_TLB_INVAL_TYPES_H_ + +#include <linux/workqueue.h> +#include <linux/dma-fence.h> + +struct xe_tlb_inval; + +/** struct xe_tlb_inval_ops - TLB invalidation ops (backend) */ +struct xe_tlb_inval_ops { + /** + * @all: Invalidate all TLBs + * @tlb_inval: TLB invalidation client + * @seqno: Seqno of TLB invalidation + * + * Return 0 on success, -ECANCELED if backend is mid-reset, error on + * failure + */ + int (*all)(struct xe_tlb_inval *tlb_inval, u32 seqno); + + /** + * @ggtt: Invalidate global translation TLBs + * @tlb_inval: TLB invalidation client + * @seqno: Seqno of TLB invalidation + * + * Return 0 on success, -ECANCELED if backend is mid-reset, error on + * failure + */ + int (*ggtt)(struct xe_tlb_inval *tlb_inval, u32 seqno); + + /** + * @ppgtt: Invalidate per-process translation TLBs + * @tlb_inval: TLB invalidation client + * @seqno: Seqno of TLB invalidation + * @start: Start address + * @end: End address + * @asid: Address space ID + * + * Return 0 on success, -ECANCELED if backend is mid-reset, error on + * failure + */ + int (*ppgtt)(struct xe_tlb_inval *tlb_inval, u32 seqno, u64 start, + u64 end, u32 asid); + + /** + * @initialized: Backend is initialized + * @tlb_inval: TLB invalidation client + * + * Return: True if back is initialized, False otherwise + */ + bool (*initialized)(struct xe_tlb_inval *tlb_inval); + + /** + * @flush: Flush pending TLB invalidations + * @tlb_inval: TLB invalidation client + */ + void (*flush)(struct xe_tlb_inval *tlb_inval); + + /** + * @timeout_delay: Timeout delay for TLB invalidation + * @tlb_inval: TLB invalidation client + * + * Return: Timeout delay for TLB invalidation in jiffies + */ + long (*timeout_delay)(struct xe_tlb_inval *tlb_inval); +}; + +/** struct xe_tlb_inval - TLB invalidation client (frontend) */ +struct xe_tlb_inval { + /** @private: Backend private pointer */ + void *private; + /** @xe: Pointer to Xe device */ + struct xe_device *xe; + /** @ops: TLB invalidation ops */ + const struct xe_tlb_inval_ops *ops; + /** @tlb_inval.seqno: TLB invalidation seqno, protected by CT lock */ +#define TLB_INVALIDATION_SEQNO_MAX 0x100000 + int seqno; + /** @tlb_invalidation.seqno_lock: protects @tlb_invalidation.seqno */ + struct mutex seqno_lock; + /** + * @seqno_recv: last received TLB invalidation seqno, protected by + * CT lock + */ + int seqno_recv; + /** + * @pending_fences: list of pending fences waiting TLB invaliations, + * protected CT lock + */ + struct list_head pending_fences; + /** + * @pending_lock: protects @pending_fences and updating @seqno_recv. + */ + spinlock_t pending_lock; + /** + * @fence_tdr: schedules a delayed call to xe_tlb_fence_timeout after + * the timeout interval is over. + */ + struct delayed_work fence_tdr; + /** @job_wq: schedules TLB invalidation jobs */ + struct workqueue_struct *job_wq; + /** @tlb_inval.lock: protects TLB invalidation fences */ + spinlock_t lock; +}; + +/** + * struct xe_tlb_inval_fence - TLB invalidation fence + * + * Optionally passed to xe_tlb_inval* functions and will be signaled upon TLB + * invalidation completion. + */ +struct xe_tlb_inval_fence { + /** @base: dma fence base */ + struct dma_fence base; + /** @tlb_inval: TLB invalidation client which fence belong to */ + struct xe_tlb_inval *tlb_inval; + /** @link: link into list of pending tlb fences */ + struct list_head link; + /** @seqno: seqno of TLB invalidation to signal fence one */ + int seqno; + /** @inval_time: time of TLB invalidation */ + ktime_t inval_time; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index b4a3577df70c..79a97b086cb2 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -14,10 +14,10 @@ #include "xe_exec_queue_types.h" #include "xe_gpu_scheduler_types.h" -#include "xe_gt_tlb_invalidation_types.h" #include "xe_gt_types.h" #include "xe_guc_exec_queue_types.h" #include "xe_sched_job.h" +#include "xe_tlb_inval_types.h" #include "xe_vm.h" #define __dev_name_xe(xe) dev_name((xe)->drm.dev) @@ -25,13 +25,13 @@ #define __dev_name_gt(gt) __dev_name_xe(gt_to_xe((gt))) #define __dev_name_eq(q) __dev_name_gt((q)->gt) -DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), +DECLARE_EVENT_CLASS(xe_tlb_inval_fence, + TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence), TP_ARGS(xe, fence), TP_STRUCT__entry( __string(dev, __dev_name_xe(xe)) - __field(struct xe_gt_tlb_invalidation_fence *, fence) + __field(struct xe_tlb_inval_fence *, fence) __field(int, seqno) ), @@ -45,39 +45,23 @@ DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, __get_str(dev), __entry->fence, __entry->seqno) ); -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_create, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), +DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_send, + TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence), TP_ARGS(xe, fence) ); -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, - xe_gt_tlb_invalidation_fence_work_func, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), +DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_recv, + TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence), TP_ARGS(xe, fence) ); -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_cb, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), +DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_signal, + TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence), TP_ARGS(xe, fence) ); -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_send, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(xe, fence) -); - -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_recv, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(xe, fence) -); - -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(xe, fence) -); - -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_timeout, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), +DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_timeout, + TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence), TP_ARGS(xe, fence) ); @@ -457,6 +441,29 @@ TRACE_EVENT(xe_eu_stall_data_read, __entry->read_size, __entry->total_size) ); +TRACE_EVENT(xe_exec_queue_reach_max_job_count, + TP_PROTO(struct xe_exec_queue *q, int max_cnt), + TP_ARGS(q, max_cnt), + + TP_STRUCT__entry(__string(dev, __dev_name_eq(q)) + __field(enum xe_engine_class, class) + __field(u32, logical_mask) + __field(u16, guc_id) + __field(int, max_cnt) + ), + + TP_fast_assign(__assign_str(dev); + __entry->class = q->class; + __entry->logical_mask = q->logical_mask; + __entry->guc_id = q->guc->id; + __entry->max_cnt = max_cnt; + ), + + TP_printk("dev=%s, job count exceeded the maximum limit (%d) per exec queue. engine_class=0x%x, logical_mask=0x%x, guc_id=%d", + __get_str(dev), __entry->max_cnt, + __entry->class, __entry->logical_mask, __entry->guc_id) +); + #endif /* This part must be outside protection */ diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index ccebd5f0878e..86323cf3be2c 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -33,7 +33,7 @@ DECLARE_EVENT_CLASS(xe_bo, TP_fast_assign( __assign_str(dev); - __entry->size = bo->size; + __entry->size = xe_bo_size(bo); __entry->flags = bo->flags; __entry->vm = bo->vm; ), @@ -73,7 +73,7 @@ TRACE_EVENT(xe_bo_move, TP_fast_assign( __entry->bo = bo; - __entry->size = bo->size; + __entry->size = xe_bo_size(bo); __assign_str(new_placement_name); __assign_str(old_placement_name); __assign_str(device_id); diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index d9c9d2547aad..1bddecfb723a 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: MIT /* * Copyright © 2021-2023 Intel Corporation - * Copyright (C) 2021-2002 Red Hat + * Copyright (C) 2021-2022 Red Hat */ #include <drm/drm_managed.h> @@ -24,6 +24,7 @@ #include "xe_sriov.h" #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_vram_mgr.h" +#include "xe_vram.h" #include "xe_wa.h" struct xe_ttm_stolen_mgr { @@ -80,17 +81,18 @@ static u32 get_wopcm_size(struct xe_device *xe) return wopcm_size; } -static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) +static u64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { - struct xe_tile *tile = xe_device_get_root_tile(xe); + struct xe_vram_region *tile_vram = xe_device_get_root_tile(xe)->mem.vram; + resource_size_t tile_io_start = xe_vram_region_io_start(tile_vram); struct xe_mmio *mmio = xe_root_tile_mmio(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); u64 stolen_size, wopcm_size; u64 tile_offset; u64 tile_size; - tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start; - tile_size = tile->mem.vram.actual_physical_size; + tile_offset = tile_io_start - xe_vram_region_io_start(xe->mem.vram); + tile_size = xe_vram_region_actual_physical_size(tile_vram); /* Use DSM base address instead for stolen memory */ mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset; @@ -103,11 +105,13 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) return 0; stolen_size = tile_size - mgr->stolen_base; + + xe_assert(xe, stolen_size >= wopcm_size); stolen_size -= wopcm_size; /* Verify usage fits in the actual resource available */ if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR)) - mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base; + mgr->io_base = tile_io_start + mgr->stolen_base; /* * There may be few KB of platform dependent reserved memory at the end @@ -164,7 +168,7 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr stolen_size -= wopcm_size; - if (media_gt && XE_WA(media_gt, 14019821291)) { + if (media_gt && XE_GT_WA(media_gt, 14019821291)) { u64 gscpsmi_base = xe_mmio_read64_2x32(&media_gt->mmio, GSCPSMI_BASE) & ~GENMASK_ULL(5, 0); diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c index d38b91872da3..3e404eb8d098 100644 --- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: MIT /* * Copyright © 2021-2022 Intel Corporation - * Copyright (C) 2021-2002 Red Hat + * Copyright (C) 2021-2022 Red Hat */ #include "xe_ttm_sys_mgr.h" @@ -85,7 +85,7 @@ static const struct ttm_resource_manager_func xe_ttm_sys_mgr_func = { .debug = xe_ttm_sys_mgr_debug }; -static void ttm_sys_mgr_fini(struct drm_device *drm, void *arg) +static void xe_ttm_sys_mgr_fini(struct drm_device *drm, void *arg) { struct xe_device *xe = (struct xe_device *)arg; struct ttm_resource_manager *man = &xe->mem.sys_mgr; @@ -116,5 +116,5 @@ int xe_ttm_sys_mgr_init(struct xe_device *xe) ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT); ttm_set_driver_manager(&xe->ttm, XE_PL_TT, man); ttm_resource_manager_set_used(man, true); - return drmm_add_action_or_reset(&xe->drm, ttm_sys_mgr_fini, xe); + return drmm_add_action_or_reset(&xe->drm, xe_ttm_sys_mgr_fini, xe); } diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 9e375a40aee9..9f70802fce92 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: MIT /* * Copyright © 2021-2022 Intel Corporation - * Copyright (C) 2021-2002 Red Hat + * Copyright (C) 2021-2022 Red Hat */ #include <drm/drm_managed.h> @@ -15,6 +15,7 @@ #include "xe_gt.h" #include "xe_res_cursor.h" #include "xe_ttm_vram_mgr.h" +#include "xe_vram_types.h" static inline struct drm_buddy_block * xe_ttm_vram_mgr_first_block(struct list_head *list) @@ -283,7 +284,7 @@ static const struct ttm_resource_manager_func xe_ttm_vram_mgr_func = { .debug = xe_ttm_vram_mgr_debug }; -static void ttm_vram_mgr_fini(struct drm_device *dev, void *arg) +static void xe_ttm_vram_mgr_fini(struct drm_device *dev, void *arg) { struct xe_device *xe = to_xe_device(dev); struct xe_ttm_vram_mgr *mgr = arg; @@ -334,16 +335,23 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, ttm_set_driver_manager(&xe->ttm, mem_type, &mgr->manager); ttm_resource_manager_set_used(&mgr->manager, true); - return drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr); + return drmm_add_action_or_reset(&xe->drm, xe_ttm_vram_mgr_fini, mgr); } -int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr) +/** + * xe_ttm_vram_mgr_init - initialize TTM VRAM region + * @xe: pointer to Xe device + * @vram: pointer to xe_vram_region that contains the memory region attributes + * + * Initialize the Xe TTM for given @vram region using the given parameters. + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_vram_region *vram) { - struct xe_device *xe = tile_to_xe(tile); - struct xe_vram_region *vram = &tile->mem.vram; - - return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id, - vram->usable_size, vram->io_size, + return __xe_ttm_vram_mgr_init(xe, &vram->ttm, vram->placement, + xe_vram_region_usable_size(vram), + xe_vram_region_io_size(vram), PAGE_SIZE); } @@ -392,7 +400,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, */ xe_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { - phys_addr_t phys = cursor.start + tile->mem.vram.io_start; + phys_addr_t phys = cursor.start + xe_vram_region_io_start(tile->mem.vram); size_t size = min_t(u64, cursor.size, SZ_2G); dma_addr_t addr; diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h index cc76050e376d..87b7fae5edba 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h @@ -11,11 +11,12 @@ enum dma_data_direction; struct xe_device; struct xe_tile; +struct xe_vram_region; int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, u32 mem_type, u64 size, u64 io_size, u64 default_page_size); -int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr); +int xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_vram_region *vram); int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, struct ttm_resource *res, u64 offset, u64 length, diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h index 1144f9232ebb..a71e14818ec2 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h @@ -10,7 +10,7 @@ #include <drm/ttm/ttm_device.h> /** - * struct xe_ttm_vram_mgr - XE TTM VRAM manager + * struct xe_ttm_vram_mgr - Xe TTM VRAM manager * * Manages placement of TTM resource in VRAM. */ @@ -32,7 +32,7 @@ struct xe_ttm_vram_mgr { }; /** - * struct xe_ttm_vram_mgr_resource - XE TTM VRAM resource + * struct xe_ttm_vram_mgr_resource - Xe TTM VRAM resource */ struct xe_ttm_vram_mgr_resource { /** @base: Base TTM resource */ diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 49ddbda7cdef..5766fa7742d3 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -8,6 +8,7 @@ #include <kunit/visibility.h> #include <drm/drm_managed.h> +#include <drm/drm_print.h> #include "regs/xe_gt_regs.h" #include "xe_gt_types.h" @@ -40,7 +41,8 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, { XE_RTP_NAME("Tuning: Compression Overfetch"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), + FUNC(xe_rtp_match_has_flat_ccs)), XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX), SET(CCCHKNREG1, L3CMPCTRL)) }, @@ -58,12 +60,14 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN)) }, { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), + FUNC(xe_rtp_match_has_flat_ccs)), XE_RTP_ACTIONS(SET(L3SQCREG2, COMPMEMRD256BOVRFETCHEN)) }, { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"), - XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED), + FUNC(xe_rtp_match_has_flat_ccs)), XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2, COMPMEMRD256BOVRFETCHEN)) }, @@ -98,6 +102,11 @@ static const struct xe_rtp_entry_sr engine_tunings[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) }, + { XE_RTP_NAME("Tuning: Disable NULL query for Anyhit Shader"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) + }, }; static const struct xe_rtp_entry_sr lrc_tunings[] = { @@ -209,7 +218,14 @@ void xe_tuning_process_lrc(struct xe_hw_engine *hwe) xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), &hwe->reg_lrc); } -void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p) +/** + * xe_tuning_dump() - Dump GT tuning info into a drm printer. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Return: always 0. + */ +int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p) { size_t idx; @@ -217,11 +233,15 @@ void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p) for_each_set_bit(idx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings)) drm_printf_indent(p, 1, "%s\n", gt_tunings[idx].name); - drm_printf(p, "\nEngine Tunings\n"); + drm_puts(p, "\n"); + drm_printf(p, "Engine Tunings\n"); for_each_set_bit(idx, gt->tuning_active.engine, ARRAY_SIZE(engine_tunings)) drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name); - drm_printf(p, "\nLRC Tunings\n"); + drm_puts(p, "\n"); + drm_printf(p, "LRC Tunings\n"); for_each_set_bit(idx, gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings)) drm_printf_indent(p, 1, "%s\n", lrc_tunings[idx].name); + + return 0; } diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h index dd0d3ccc9c65..c1cc5927fda7 100644 --- a/drivers/gpu/drm/xe/xe_tuning.h +++ b/drivers/gpu/drm/xe/xe_tuning.h @@ -14,6 +14,6 @@ int xe_tuning_init(struct xe_gt *gt); void xe_tuning_process_gt(struct xe_gt *gt); void xe_tuning_process_engine(struct xe_hw_engine *hwe); void xe_tuning_process_lrc(struct xe_hw_engine *hwe); -void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p); +int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 3a8751a8b92d..465bda355443 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -33,6 +33,22 @@ uc_to_xe(struct xe_uc *uc) } /* Should be called once at driver load only */ +int xe_uc_init_noalloc(struct xe_uc *uc) +{ + int ret; + + ret = xe_guc_init_noalloc(&uc->guc); + if (ret) + goto err; + + /* HuC and GSC have no early dependencies and will be initialized during xe_uc_init(). */ + return 0; + +err: + xe_gt_err(uc_to_gt(uc), "Failed to early initialize uC (%pe)\n", ERR_PTR(ret)); + return ret; +} + int xe_uc_init(struct xe_uc *uc) { int ret; @@ -56,15 +72,17 @@ int xe_uc_init(struct xe_uc *uc) if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; - if (IS_SRIOV_VF(uc_to_xe(uc))) - return 0; + if (!IS_SRIOV_VF(uc_to_xe(uc))) { + ret = xe_wopcm_init(&uc->wopcm); + if (ret) + goto err; + } - ret = xe_wopcm_init(&uc->wopcm); + ret = xe_guc_min_load_for_hwconfig(&uc->guc); if (ret) goto err; return 0; - err: xe_gt_err(uc_to_gt(uc), "Failed to initialize uC (%pe)\n", ERR_PTR(ret)); return ret; @@ -126,28 +144,7 @@ int xe_uc_sanitize_reset(struct xe_uc *uc) return uc_reset(uc); } -/** - * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig - * @uc: The UC object - * - * Return: 0 on success, negative error code on error. - */ -int xe_uc_init_hwconfig(struct xe_uc *uc) -{ - int ret; - - /* GuC submission not enabled, nothing to do */ - if (!xe_device_uc_enabled(uc_to_xe(uc))) - return 0; - - ret = xe_guc_min_load_for_hwconfig(&uc->guc); - if (ret) - return ret; - - return 0; -} - -static int vf_uc_init_hw(struct xe_uc *uc) +static int vf_uc_load_hw(struct xe_uc *uc) { int err; @@ -161,22 +158,30 @@ static int vf_uc_init_hw(struct xe_uc *uc) err = xe_gt_sriov_vf_connect(uc_to_gt(uc)); if (err) - return err; + goto err_out; uc->guc.submission_state.enabled = true; + err = xe_guc_opt_in_features_enable(&uc->guc); + if (err) + goto err_out; + err = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (err) - return err; + goto err_out; return 0; + +err_out: + xe_guc_sanitize(&uc->guc); + return err; } /* * Should be called during driver load, after every GT reset, and after every * suspend to reload / auth the firmwares. */ -int xe_uc_init_hw(struct xe_uc *uc) +int xe_uc_load_hw(struct xe_uc *uc) { int ret; @@ -185,7 +190,7 @@ int xe_uc_init_hw(struct xe_uc *uc) return 0; if (IS_SRIOV_VF(uc_to_xe(uc))) - return vf_uc_init_hw(uc); + return vf_uc_load_hw(uc); ret = xe_huc_upload(&uc->huc); if (ret) @@ -201,15 +206,15 @@ int xe_uc_init_hw(struct xe_uc *uc) ret = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (ret) - return ret; + goto err_out; ret = xe_guc_post_load_init(&uc->guc); if (ret) - return ret; + goto err_out; ret = xe_guc_pc_start(&uc->guc.pc); if (ret) - return ret; + goto err_out; xe_guc_engine_activity_enable_stats(&uc->guc); @@ -221,11 +226,10 @@ int xe_uc_init_hw(struct xe_uc *uc) xe_gsc_load_start(&uc->gsc); return 0; -} -int xe_uc_fini_hw(struct xe_uc *uc) -{ - return xe_uc_sanitize_reset(uc); +err_out: + xe_guc_sanitize(&uc->guc); + return ret; } int xe_uc_reset_prepare(struct xe_uc *uc) diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index c23e6f5e2514..21c9306098cf 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -8,11 +8,10 @@ struct xe_uc; +int xe_uc_init_noalloc(struct xe_uc *uc); int xe_uc_init(struct xe_uc *uc); -int xe_uc_init_hwconfig(struct xe_uc *uc); int xe_uc_init_post_hwconfig(struct xe_uc *uc); -int xe_uc_init_hw(struct xe_uc *uc); -int xe_uc_fini_hw(struct xe_uc *uc); +int xe_uc_load_hw(struct xe_uc *uc); void xe_uc_gucrc_disable(struct xe_uc *uc); int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 2741849bbf4d..622b76078567 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -16,6 +16,7 @@ #include "xe_gsc.h" #include "xe_gt.h" #include "xe_gt_printk.h" +#include "xe_gt_sriov_vf.h" #include "xe_guc.h" #include "xe_map.h" #include "xe_mmio.h" @@ -114,10 +115,11 @@ struct fw_blobs_by_type { #define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \ - fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \ + fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 49, 4)) \ + fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 49, 4)) \ + fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \ fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ - fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \ + fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \ fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \ fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \ @@ -126,6 +128,7 @@ struct fw_blobs_by_type { fw_def(TIGERLAKE, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ + fw_def(PANTHERLAKE, GT_TYPE_ANY, no_ver(xe, huc, ptl)) \ fw_def(BATTLEMAGE, GT_TYPE_ANY, no_ver(xe, huc, bmg)) \ fw_def(LUNARLAKE, GT_TYPE_ANY, no_ver(xe, huc, lnl)) \ fw_def(METEORLAKE, GT_TYPE_ANY, no_ver(i915, huc_gsc, mtl)) \ @@ -325,7 +328,7 @@ static void uc_fw_fini(struct drm_device *drm, void *arg) xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED); } -static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) +static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_guc_info *guc_info) { struct xe_gt *gt = uc_fw_to_gt(uc_fw); struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE]; @@ -340,11 +343,12 @@ static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) return -EINVAL; } - compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->submission_version); - compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->submission_version); - compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->submission_version); + compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, guc_info->submission_version); + compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, guc_info->submission_version); + compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, guc_info->submission_version); - uc_fw->private_data_size = css->private_data_size; + uc_fw->build_type = FIELD_GET(CSS_UKERNEL_INFO_BUILDTYPE, guc_info->ukernel_info); + uc_fw->private_data_size = guc_info->private_data_size; return 0; } @@ -413,8 +417,8 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t css = (struct uc_css_header *)fw_data; /* Check integrity of size values inside CSS header */ - size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw - - css->exponent_size_dw) * sizeof(u32); + size = (css->header_size_dw - css->rsa_info.key_size_dw - css->rsa_info.modulus_size_dw - + css->rsa_info.exponent_size_dw) * sizeof(u32); if (unlikely(size != sizeof(struct uc_css_header))) { drm_warn(&xe->drm, "%s firmware %s: unexpected header size: %zu != %zu\n", @@ -427,7 +431,7 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); /* now RSA */ - uc_fw->rsa_size = css->key_size_dw * sizeof(u32); + uc_fw->rsa_size = css->rsa_info.key_size_dw * sizeof(u32); /* At least, it should have header, uCode and RSA. Size of all three. */ size = sizeof(struct uc_css_header) + uc_fw->ucode_size + @@ -440,12 +444,12 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t } /* Get version numbers from the CSS header */ - release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->sw_version); - release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version); - release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version); + release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->guc_info.sw_version); + release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->guc_info.sw_version); + release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->guc_info.sw_version); if (uc_fw->type == XE_UC_FW_TYPE_GUC) - return guc_read_css_info(uc_fw, css); + return guc_read_css_info(uc_fw, &css->guc_info); return 0; } @@ -662,11 +666,39 @@ do { \ ver_->major, ver_->minor, ver_->patch); \ } while (0) +static void uc_fw_vf_override(struct xe_uc_fw *uc_fw) +{ + struct xe_uc_fw_version *compat = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY]; + struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted; + + /* Only GuC/HuC are supported */ + if (uc_fw->type != XE_UC_FW_TYPE_GUC && uc_fw->type != XE_UC_FW_TYPE_HUC) + uc_fw->path = NULL; + + /* VF will support only firmwares that driver can autoselect */ + xe_uc_fw_change_status(uc_fw, uc_fw->path ? + XE_UC_FIRMWARE_PRELOADED : + XE_UC_FIRMWARE_NOT_SUPPORTED); + + if (!xe_uc_fw_is_supported(uc_fw)) + return; + + /* PF is doing the loading, so we don't need a path on the VF */ + uc_fw->path = "Loaded by PF"; + + /* The GuC versions are set up during the VF bootstrap */ + if (uc_fw->type == XE_UC_FW_TYPE_GUC) { + uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY; + xe_gt_sriov_vf_guc_versions(uc_fw_to_gt(uc_fw), wanted, compat); + } +} + static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmware_p) { struct xe_device *xe = uc_fw_to_xe(uc_fw); + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + struct drm_printer p = xe_gt_info_printer(gt); struct device *dev = xe->drm.dev; - struct drm_printer p = drm_info_printer(dev); const struct firmware *fw = NULL; int err; @@ -675,20 +707,13 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar * before we're looked at the HW caps to see if we have uc support */ BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED); - xe_assert(xe, !uc_fw->status); - xe_assert(xe, !uc_fw->path); + xe_gt_assert(gt, !uc_fw->status); + xe_gt_assert(gt, !uc_fw->path); uc_fw_auto_select(xe, uc_fw); if (IS_SRIOV_VF(xe)) { - /* Only GuC/HuC are supported */ - if (uc_fw->type != XE_UC_FW_TYPE_GUC && - uc_fw->type != XE_UC_FW_TYPE_HUC) - uc_fw->path = NULL; - /* VF will support only firmwares that driver can autoselect */ - xe_uc_fw_change_status(uc_fw, uc_fw->path ? - XE_UC_FIRMWARE_PRELOADED : - XE_UC_FIRMWARE_NOT_SUPPORTED); + uc_fw_vf_override(uc_fw); return 0; } @@ -700,7 +725,7 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar if (!xe_uc_fw_is_supported(uc_fw)) { if (uc_fw->type == XE_UC_FW_TYPE_GUC) { - drm_err(&xe->drm, "No GuC firmware defined for platform\n"); + xe_gt_err(gt, "No GuC firmware defined for platform\n"); return -ENOENT; } return 0; @@ -709,7 +734,7 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar /* an empty path means the firmware is disabled */ if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) { xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED); - drm_dbg(&xe->drm, "%s disabled", xe_uc_fw_type_repr(uc_fw->type)); + xe_gt_dbg(gt, "%s disabled\n", xe_uc_fw_type_repr(uc_fw->type)); return 0; } @@ -742,10 +767,10 @@ fail: XE_UC_FIRMWARE_MISSING : XE_UC_FIRMWARE_ERROR); - drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n", - xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); - drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n", - xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL); + xe_gt_notice(gt, "%s firmware %s: fetch failed with error %pe\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, ERR_PTR(err)); + xe_gt_info(gt, "%s firmware(s) can be downloaded from %s\n", + xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL); release_firmware(fw); /* OK even if fw is NULL */ diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h index 87ade41209d0..3c9a63d13032 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_abi.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h @@ -44,6 +44,39 @@ * in fw. So driver will load a truncated firmware in this case. */ +struct uc_css_rsa_info { + u32 key_size_dw; + u32 modulus_size_dw; + u32 exponent_size_dw; +} __packed; + +struct uc_css_guc_info { + u32 time; +#define CSS_TIME_HOUR (0xFF << 0) +#define CSS_TIME_MIN (0xFF << 8) +#define CSS_TIME_SEC (0xFFFF << 16) + u32 reserved0[5]; + u32 sw_version; +#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) +#define CSS_SW_VERSION_UC_MINOR (0xFF << 8) +#define CSS_SW_VERSION_UC_PATCH (0xFF << 0) + u32 submission_version; + u32 reserved1[11]; + u32 header_info; +#define CSS_HEADER_INFO_SVN (0xFF) +#define CSS_HEADER_INFO_COPY_VALID (0x1 << 31) + u32 private_data_size; + u32 ukernel_info; +#define CSS_UKERNEL_INFO_DEVICEID (0xFFFF << 16) +#define CSS_UKERNEL_INFO_PRODKEY (0xFF << 8) +#define CSS_UKERNEL_INFO_BUILDTYPE (0x3 << 2) +#define CSS_UKERNEL_INFO_BUILDTYPE_PROD 0 +#define CSS_UKERNEL_INFO_BUILDTYPE_PREPROD 1 +#define CSS_UKERNEL_INFO_BUILDTYPE_DEBUG 2 +#define CSS_UKERNEL_INFO_ENCSTATUS (0x1 << 1) +#define CSS_UKERNEL_INFO_COPY_VALID (0x1 << 0) +} __packed; + struct uc_css_header { u32 module_type; /* @@ -52,36 +85,21 @@ struct uc_css_header { */ u32 header_size_dw; u32 header_version; - u32 module_id; + u32 reserved0; u32 module_vendor; u32 date; -#define CSS_DATE_DAY (0xFF << 0) -#define CSS_DATE_MONTH (0xFF << 8) -#define CSS_DATE_YEAR (0xFFFF << 16) +#define CSS_DATE_DAY (0xFF << 0) +#define CSS_DATE_MONTH (0xFF << 8) +#define CSS_DATE_YEAR (0xFFFF << 16) u32 size_dw; /* uCode plus header_size_dw */ - u32 key_size_dw; - u32 modulus_size_dw; - u32 exponent_size_dw; - u32 time; -#define CSS_TIME_HOUR (0xFF << 0) -#define CSS_DATE_MIN (0xFF << 8) -#define CSS_DATE_SEC (0xFFFF << 16) - char username[8]; - char buildnumber[12]; - u32 sw_version; -#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) -#define CSS_SW_VERSION_UC_MINOR (0xFF << 8) -#define CSS_SW_VERSION_UC_PATCH (0xFF << 0) union { - u32 submission_version; /* only applies to GuC */ - u32 reserved2; + u32 reserved1[3]; + struct uc_css_rsa_info rsa_info; }; - u32 reserved0[12]; union { - u32 private_data_size; /* only applies to GuC */ - u32 reserved1; + u32 reserved2[22]; + struct uc_css_guc_info guc_info; }; - u32 header_info; } __packed; static_assert(sizeof(struct uc_css_header) == 128); @@ -318,4 +336,70 @@ struct gsc_manifest_header { u32 exponent_size; /* in dwords */ } __packed; +/** + * DOC: Late binding Firmware Layout + * + * The Late binding binary starts with FPT header, which contains locations + * of various partitions of the binary. Here we're interested in finding out + * manifest version. To the manifest version, we need to locate CPD header + * one of the entry in CPD header points to manifest header. Manifest header + * contains the version. + * + * +================================================+ + * | FPT Header | + * +================================================+ + * | FPT entries[] | + * | entry1 | + * | ... | + * | entryX | + * | "LTES" | + * | ... | + * | offset >-----------------------------|------o + * +================================================+ | + * | + * +================================================+ | + * | CPD Header |<-----o + * +================================================+ + * | CPD entries[] | + * | entry1 | + * | ... | + * | entryX | + * | "LTES.man" | + * | ... | + * | offset >----------------------------|------o + * +================================================+ | + * | + * +================================================+ | + * | Manifest Header |<-----o + * | ... | + * | FW version | + * | ... | + * +================================================+ + */ + +/* FPT Headers */ +struct csc_fpt_header { + u32 header_marker; +#define CSC_FPT_HEADER_MARKER 0x54504624 + u32 num_of_entries; + u8 header_version; + u8 entry_version; + u8 header_length; /* in bytes */ + u8 flags; + u16 ticks_to_add; + u16 tokens_to_add; + u32 uma_size; + u32 crc32; + struct gsc_version fitc_version; +} __packed; + +struct csc_fpt_entry { + u8 name[4]; /* partition name */ + u32 reserved1; + u32 offset; /* offset from beginning of CSE region */ + u32 length; /* partition length in bytes */ + u32 reserved2[3]; + u32 partition_flags; +} __packed; + #endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index ad3b35a0e6eb..2ebe8c9db6ce 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -62,9 +62,11 @@ enum xe_uc_fw_type { }; /** - * struct xe_uc_fw_version - Version for XE micro controller firmware + * struct xe_uc_fw_version - Version for Xe micro controller firmware */ struct xe_uc_fw_version { + /** @branch: branch version of the FW (not always available) */ + u16 branch; /** @major: major version of the FW */ u16 major; /** @minor: minor version of the FW */ @@ -82,7 +84,7 @@ enum xe_uc_fw_version_types { }; /** - * struct xe_uc_fw - XE micro controller firmware + * struct xe_uc_fw - Xe micro controller firmware */ struct xe_uc_fw { /** @type: type uC firmware */ @@ -110,7 +112,7 @@ struct xe_uc_fw { /** @size: size of uC firmware including css header */ size_t size; - /** @bo: XE BO for uC firmware */ + /** @bo: Xe BO for uC firmware */ struct xe_bo *bo; /** @has_gsc_headers: whether the FW image starts with GSC headers */ @@ -145,6 +147,9 @@ struct xe_uc_fw { /** @private_data_size: size of private data found in uC css header */ u32 private_data_size; + + /** @build_type: Firmware build type (see CSS_UKERNEL_INFO_BUILDTYPE for definitions) */ + u32 build_type; }; #endif diff --git a/drivers/gpu/drm/xe/xe_uc_types.h b/drivers/gpu/drm/xe/xe_uc_types.h index 9924e4484866..1708379dc834 100644 --- a/drivers/gpu/drm/xe/xe_uc_types.h +++ b/drivers/gpu/drm/xe/xe_uc_types.h @@ -12,7 +12,7 @@ #include "xe_wopcm_types.h" /** - * struct xe_uc - XE micro controllers + * struct xe_uc - Xe micro controllers */ struct xe_uc { /** @guc: Graphics micro controller */ diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c new file mode 100644 index 000000000000..0d9130b1958a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_userptr.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_svm.h" +#include "xe_userptr.h" + +#include <linux/mm.h> + +#include "xe_trace_bo.h" + +/** + * xe_vma_userptr_check_repin() - Advisory check for repin needed + * @uvma: The userptr vma + * + * Check if the userptr vma has been invalidated since last successful + * repin. The check is advisory only and can the function can be called + * without the vm->svm.gpusvm.notifier_lock held. There is no guarantee that the + * vma userptr will remain valid after a lockless check, so typically + * the call needs to be followed by a proper check under the notifier_lock. + * + * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. + */ +int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) +{ + return mmu_interval_check_retry(&uvma->userptr.notifier, + uvma->userptr.pages.notifier_seq) ? + -EAGAIN : 0; +} + +/** + * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs + * that need repinning. + * @vm: The VM. + * + * This function checks for whether the VM has userptrs that need repinning, + * and provides a release-type barrier on the svm.gpusvm.notifier_lock after + * checking. + * + * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. + */ +int __xe_vm_userptr_needs_repin(struct xe_vm *vm) +{ + lockdep_assert_held_read(&vm->svm.gpusvm.notifier_lock); + + return (list_empty(&vm->userptr.repin_list) && + list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; +} + +int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) +{ + struct xe_vma *vma = &uvma->vma; + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_device *xe = vm->xe; + struct drm_gpusvm_ctx ctx = { + .read_only = xe_vma_read_only(vma), + .device_private_page_owner = xe_svm_devm_owner(xe), + .allow_mixed = true, + }; + + lockdep_assert_held(&vm->lock); + xe_assert(xe, xe_vma_is_userptr(vma)); + + if (vma->gpuva.flags & XE_VMA_DESTROYED) + return 0; + + return drm_gpusvm_get_pages(&vm->svm.gpusvm, &uvma->userptr.pages, + uvma->userptr.notifier.mm, + &uvma->userptr.notifier, + xe_vma_userptr(vma), + xe_vma_userptr(vma) + xe_vma_size(vma), + &ctx); +} + +static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vma *vma = &uvma->vma; + struct dma_resv_iter cursor; + struct dma_fence *fence; + struct drm_gpusvm_ctx ctx = { + .in_notifier = true, + .read_only = xe_vma_read_only(vma), + }; + long err; + + /* + * Tell exec and rebind worker they need to repin and rebind this + * userptr. + */ + if (!xe_vm_in_fault_mode(vm) && + !(vma->gpuva.flags & XE_VMA_DESTROYED)) { + spin_lock(&vm->userptr.invalidated_lock); + list_move_tail(&userptr->invalidate_link, + &vm->userptr.invalidated); + spin_unlock(&vm->userptr.invalidated_lock); + } + + /* + * Preempt fences turn into schedule disables, pipeline these. + * Note that even in fault mode, we need to wait for binds and + * unbinds to complete, and those are attached as BOOKMARK fences + * to the vm. + */ + dma_resv_iter_begin(&cursor, xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP); + dma_resv_for_each_fence_unlocked(&cursor, fence) + dma_fence_enable_sw_signaling(fence); + dma_resv_iter_end(&cursor); + + err = dma_resv_wait_timeout(xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + XE_WARN_ON(err <= 0); + + if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { + err = xe_vm_invalidate_vma(vma); + XE_WARN_ON(err); + } + + drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages, + xe_vma_size(vma) >> PAGE_SHIFT, &ctx); +} + +static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); + struct xe_vma *vma = &uvma->vma; + struct xe_vm *vm = xe_vma_vm(vma); + + xe_assert(vm->xe, xe_vma_is_userptr(vma)); + trace_xe_vma_userptr_invalidate(vma); + + if (!mmu_notifier_range_blockable(range)) + return false; + + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "NOTIFIER: addr=0x%016llx, range=0x%016llx", + xe_vma_start(vma), xe_vma_size(vma)); + + down_write(&vm->svm.gpusvm.notifier_lock); + mmu_interval_set_seq(mni, cur_seq); + + __vma_userptr_invalidate(vm, uvma); + up_write(&vm->svm.gpusvm.notifier_lock); + trace_xe_vma_userptr_invalidate_complete(vma); + + return true; +} + +static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { + .invalidate = vma_userptr_invalidate, +}; + +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +/** + * xe_vma_userptr_force_invalidate() - force invalidate a userptr + * @uvma: The userptr vma to invalidate + * + * Perform a forced userptr invalidation for testing purposes. + */ +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + /* Protect against concurrent userptr pinning */ + lockdep_assert_held(&vm->lock); + /* Protect against concurrent notifiers */ + lockdep_assert_held(&vm->svm.gpusvm.notifier_lock); + /* + * Protect against concurrent instances of this function and + * the critical exec sections + */ + xe_vm_assert_held(vm); + + if (!mmu_interval_read_retry(&uvma->userptr.notifier, + uvma->userptr.pages.notifier_seq)) + uvma->userptr.pages.notifier_seq -= 2; + __vma_userptr_invalidate(vm, uvma); +} +#endif + +int xe_vm_userptr_pin(struct xe_vm *vm) +{ + struct xe_userptr_vma *uvma, *next; + int err = 0; + + xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); + lockdep_assert_held_write(&vm->lock); + + /* Collect invalidated userptrs */ + spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); + list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, + userptr.invalidate_link) { + list_del_init(&uvma->userptr.invalidate_link); + list_add_tail(&uvma->userptr.repin_link, + &vm->userptr.repin_list); + } + spin_unlock(&vm->userptr.invalidated_lock); + + /* Pin and move to bind list */ + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, + userptr.repin_link) { + err = xe_vma_userptr_pin_pages(uvma); + if (err == -EFAULT) { + list_del_init(&uvma->userptr.repin_link); + /* + * We might have already done the pin once already, but + * then had to retry before the re-bind happened, due + * some other condition in the caller, but in the + * meantime the userptr got dinged by the notifier such + * that we need to revalidate here, but this time we hit + * the EFAULT. In such a case make sure we remove + * ourselves from the rebind list to avoid going down in + * flames. + */ + if (!list_empty(&uvma->vma.combined_links.rebind)) + list_del_init(&uvma->vma.combined_links.rebind); + + /* Wait for pending binds */ + xe_vm_lock(vm, false); + dma_resv_wait_timeout(xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + + down_read(&vm->svm.gpusvm.notifier_lock); + err = xe_vm_invalidate_vma(&uvma->vma); + up_read(&vm->svm.gpusvm.notifier_lock); + xe_vm_unlock(vm); + if (err) + break; + } else { + if (err) + break; + + list_del_init(&uvma->userptr.repin_link); + list_move_tail(&uvma->vma.combined_links.rebind, + &vm->rebind_list); + } + } + + if (err) { + down_write(&vm->svm.gpusvm.notifier_lock); + spin_lock(&vm->userptr.invalidated_lock); + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, + userptr.repin_link) { + list_del_init(&uvma->userptr.repin_link); + list_move_tail(&uvma->userptr.invalidate_link, + &vm->userptr.invalidated); + } + spin_unlock(&vm->userptr.invalidated_lock); + up_write(&vm->svm.gpusvm.notifier_lock); + } + return err; +} + +/** + * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs + * that need repinning. + * @vm: The VM. + * + * This function does an advisory check for whether the VM has userptrs that + * need repinning. + * + * Return: 0 if there are no indications of userptrs needing repinning, + * -EAGAIN if there are. + */ +int xe_vm_userptr_check_repin(struct xe_vm *vm) +{ + return (list_empty_careful(&vm->userptr.repin_list) && + list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; +} + +int xe_userptr_setup(struct xe_userptr_vma *uvma, unsigned long start, + unsigned long range) +{ + struct xe_userptr *userptr = &uvma->userptr; + int err; + + INIT_LIST_HEAD(&userptr->invalidate_link); + INIT_LIST_HEAD(&userptr->repin_link); + + err = mmu_interval_notifier_insert(&userptr->notifier, current->mm, + start, range, + &vma_userptr_notifier_ops); + if (err) + return err; + + userptr->pages.notifier_seq = LONG_MAX; + + return 0; +} + +void xe_userptr_remove(struct xe_userptr_vma *uvma) +{ + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + struct xe_userptr *userptr = &uvma->userptr; + + drm_gpusvm_free_pages(&vm->svm.gpusvm, &uvma->userptr.pages, + xe_vma_size(&uvma->vma) >> PAGE_SHIFT); + + /* + * Since userptr pages are not pinned, we can't remove + * the notifier until we're sure the GPU is not accessing + * them anymore + */ + mmu_interval_notifier_remove(&userptr->notifier); +} + +void xe_userptr_destroy(struct xe_userptr_vma *uvma) +{ + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&uvma->userptr.repin_link)); + list_del(&uvma->userptr.invalidate_link); + spin_unlock(&vm->userptr.invalidated_lock); +} diff --git a/drivers/gpu/drm/xe/xe_userptr.h b/drivers/gpu/drm/xe/xe_userptr.h new file mode 100644 index 000000000000..ef801234991e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_userptr.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_USERPTR_H_ +#define _XE_USERPTR_H_ + +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/notifier.h> +#include <linux/scatterlist.h> +#include <linux/spinlock.h> + +#include <drm/drm_gpusvm.h> + +struct xe_vm; +struct xe_vma; +struct xe_userptr_vma; + +/** struct xe_userptr_vm - User pointer VM level state */ +struct xe_userptr_vm { + /** + * @userptr.repin_list: list of VMAs which are user pointers, + * and needs repinning. Protected by @lock. + */ + struct list_head repin_list; + /** + * @userptr.invalidated_lock: Protects the + * @userptr.invalidated list. + */ + spinlock_t invalidated_lock; + /** + * @userptr.invalidated: List of invalidated userptrs, not yet + * picked + * up for revalidation. Protected from access with the + * @invalidated_lock. Removing items from the list + * additionally requires @lock in write mode, and adding + * items to the list requires either the @svm.gpusvm.notifier_lock in + * write mode, OR @lock in write mode. + */ + struct list_head invalidated; +}; + +/** struct xe_userptr - User pointer */ +struct xe_userptr { + /** @invalidate_link: Link for the vm::userptr.invalidated list */ + struct list_head invalidate_link; + /** @userptr: link into VM repin list if userptr. */ + struct list_head repin_link; + /** + * @pages: gpusvm pages for this user pointer. + */ + struct drm_gpusvm_pages pages; + /** + * @notifier: MMU notifier for user pointer (invalidation call back) + */ + struct mmu_interval_notifier notifier; + + /** + * @initial_bind: user pointer has been bound at least once. + * write: vm->svm.gpusvm.notifier_lock in read mode and vm->resv held. + * read: vm->svm.gpusvm.notifier_lock in write mode or vm->resv held. + */ + bool initial_bind; +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) + u32 divisor; +#endif +}; + +#if IS_ENABLED(CONFIG_DRM_GPUSVM) +void xe_userptr_remove(struct xe_userptr_vma *uvma); +int xe_userptr_setup(struct xe_userptr_vma *uvma, unsigned long start, + unsigned long range); +void xe_userptr_destroy(struct xe_userptr_vma *uvma); + +int xe_vm_userptr_pin(struct xe_vm *vm); +int __xe_vm_userptr_needs_repin(struct xe_vm *vm); +int xe_vm_userptr_check_repin(struct xe_vm *vm); +int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma); +int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma); +#else +static inline void xe_userptr_remove(struct xe_userptr_vma *uvma) {} + +static inline int xe_userptr_setup(struct xe_userptr_vma *uvma, + unsigned long start, unsigned long range) +{ + return -ENODEV; +} + +static inline void xe_userptr_destroy(struct xe_userptr_vma *uvma) {} + +static inline int xe_vm_userptr_pin(struct xe_vm *vm) { return 0; } +static inline int __xe_vm_userptr_needs_repin(struct xe_vm *vm) { return 0; } +static inline int xe_vm_userptr_check_repin(struct xe_vm *vm) { return 0; } +static inline int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) { return -ENODEV; } +static inline int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) { return -ENODEV; }; +#endif + +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); +#else +static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ +} +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_validation.c b/drivers/gpu/drm/xe/xe_validation.c new file mode 100644 index 000000000000..826cd09966ef --- /dev/null +++ b/drivers/gpu/drm/xe/xe_validation.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ +#include "xe_bo.h" +#include <drm/drm_exec.h> +#include <drm/drm_gem.h> +#include <drm/drm_gpuvm.h> + +#include "xe_assert.h" +#include "xe_validation.h" + +#ifdef CONFIG_DRM_XE_DEBUG +/** + * xe_validation_assert_exec() - Assert that the drm_exec pointer is suitable + * for validation. + * @xe: Pointer to the xe device. + * @exec: The drm_exec pointer to check. + * @obj: Pointer to the object subject to validation. + * + * NULL exec pointers are not allowed. + * For XE_VALIDATION_UNIMPLEMENTED, no checking. + * For XE_VLIDATION_OPT_OUT, check that the caller is a kunit test + * For XE_VALIDATION_UNSUPPORTED, check that the object subject to + * validation is a dma-buf, for which support for ww locking is + * not in place in the dma-buf layer. + */ +void xe_validation_assert_exec(const struct xe_device *xe, + const struct drm_exec *exec, + const struct drm_gem_object *obj) +{ + xe_assert(xe, exec); + if (IS_ERR(exec)) { + switch (PTR_ERR(exec)) { + case __XE_VAL_UNIMPLEMENTED: + break; + case __XE_VAL_UNSUPPORTED: + xe_assert(xe, !!obj->dma_buf); + break; +#if IS_ENABLED(CONFIG_KUNIT) + case __XE_VAL_OPT_OUT: + xe_assert(xe, current->kunit_test); + break; +#endif + default: + xe_assert(xe, false); + } + } +} +#endif + +static int xe_validation_lock(struct xe_validation_ctx *ctx) +{ + struct xe_validation_device *val = ctx->val; + int ret = 0; + + if (ctx->val_flags.interruptible) { + if (ctx->request_exclusive) + ret = down_write_killable(&val->lock); + else + ret = down_read_interruptible(&val->lock); + } else { + if (ctx->request_exclusive) + down_write(&val->lock); + else + down_read(&val->lock); + } + + if (!ret) { + ctx->lock_held = true; + ctx->lock_held_exclusive = ctx->request_exclusive; + } + + return ret; +} + +static int xe_validation_trylock(struct xe_validation_ctx *ctx) +{ + struct xe_validation_device *val = ctx->val; + bool locked; + + if (ctx->request_exclusive) + locked = down_write_trylock(&val->lock); + else + locked = down_read_trylock(&val->lock); + + if (locked) { + ctx->lock_held = true; + ctx->lock_held_exclusive = ctx->request_exclusive; + } + + return locked ? 0 : -EWOULDBLOCK; +} + +static void xe_validation_unlock(struct xe_validation_ctx *ctx) +{ + if (!ctx->lock_held) + return; + + if (ctx->lock_held_exclusive) + up_write(&ctx->val->lock); + else + up_read(&ctx->val->lock); + + ctx->lock_held = false; +} + +/** + * xe_validation_ctx_init() - Initialize an xe_validation_ctx + * @ctx: The xe_validation_ctx to initialize. + * @val: The xe_validation_device representing the validation domain. + * @exec: The struct drm_exec to use for the transaction. May be NULL. + * @flags: The flags to use for initialization. + * + * Initialize and lock a an xe_validation transaction using the validation domain + * represented by @val. Also initialize the drm_exec object forwarding parts of + * @flags to the drm_exec initialization. The @flags.exclusive flag should + * typically be set to false to avoid locking out other validators from the + * domain until an OOM is hit. For testing- or final attempt purposes it can, + * however, be set to true. + * + * Return: %0 on success, %-EINTR if interruptible initial locking failed with a + * signal pending. If @flags.no_block is set to true, a failed trylock + * returns %-EWOULDBLOCK. + */ +int xe_validation_ctx_init(struct xe_validation_ctx *ctx, struct xe_validation_device *val, + struct drm_exec *exec, const struct xe_val_flags flags) +{ + int ret; + + ctx->exec = exec; + ctx->val = val; + ctx->lock_held = false; + ctx->lock_held_exclusive = false; + ctx->request_exclusive = flags.exclusive; + ctx->val_flags = flags; + ctx->exec_flags = 0; + ctx->nr = 0; + + if (flags.no_block) + ret = xe_validation_trylock(ctx); + else + ret = xe_validation_lock(ctx); + if (ret) + return ret; + + if (exec) { + if (flags.interruptible) + ctx->exec_flags |= DRM_EXEC_INTERRUPTIBLE_WAIT; + if (flags.exec_ignore_duplicates) + ctx->exec_flags |= DRM_EXEC_IGNORE_DUPLICATES; + drm_exec_init(exec, ctx->exec_flags, ctx->nr); + } + + return 0; +} + +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH +/* + * This abuses both drm_exec and ww_mutex internals and should be + * replaced by checking for -EDEADLK when we can make TTM + * stop converting -EDEADLK to -ENOMEM. + * An alternative is to not have exhaustive eviction with + * CONFIG_DEBUG_WW_MUTEX_SLOWPATH until that happens. + */ +static bool xe_validation_contention_injected(struct drm_exec *exec) +{ + return !!exec->ticket.contending_lock; +} + +#else + +static bool xe_validation_contention_injected(struct drm_exec *exec) +{ + return false; +} + +#endif + +static bool __xe_validation_should_retry(struct xe_validation_ctx *ctx, int ret) +{ + if (ret == -ENOMEM && + ((ctx->request_exclusive && + xe_validation_contention_injected(ctx->exec)) || + !ctx->request_exclusive)) { + ctx->request_exclusive = true; + return true; + } + + return false; +} + +/** + * xe_validation_exec_lock() - Perform drm_gpuvm_exec_lock within a validation + * transaction. + * @ctx: An uninitialized xe_validation_ctx. + * @vm_exec: An initialized struct vm_exec. + * @val: The validation domain. + * + * The drm_gpuvm_exec_lock() function internally initializes its drm_exec + * transaction and therefore doesn't lend itself very well to be using + * xe_validation_ctx_init(). Provide a helper that takes an uninitialized + * xe_validation_ctx and calls drm_gpuvm_exec_lock() with OOM retry. + * + * Return: %0 on success, negative error code on failure. + */ +int xe_validation_exec_lock(struct xe_validation_ctx *ctx, + struct drm_gpuvm_exec *vm_exec, + struct xe_validation_device *val) +{ + int ret; + + memset(ctx, 0, sizeof(*ctx)); + ctx->exec = &vm_exec->exec; + ctx->exec_flags = vm_exec->flags; + ctx->val = val; + if (ctx->exec_flags & DRM_EXEC_INTERRUPTIBLE_WAIT) + ctx->val_flags.interruptible = 1; + if (ctx->exec_flags & DRM_EXEC_IGNORE_DUPLICATES) + ctx->val_flags.exec_ignore_duplicates = 1; +retry: + ret = xe_validation_lock(ctx); + if (ret) + return ret; + + ret = drm_gpuvm_exec_lock(vm_exec); + if (ret) { + xe_validation_unlock(ctx); + if (__xe_validation_should_retry(ctx, ret)) + goto retry; + } + + return ret; +} + +/** + * xe_validation_ctx_fini() - Finalize a validation transaction + * @ctx: The Validation transaction to finalize. + * + * Finalize a validation transaction and its related drm_exec transaction. + */ +void xe_validation_ctx_fini(struct xe_validation_ctx *ctx) +{ + if (ctx->exec) + drm_exec_fini(ctx->exec); + xe_validation_unlock(ctx); +} + +/** + * xe_validation_should_retry() - Determine if a validation transaction should retry + * @ctx: The validation transaction. + * @ret: Pointer to a return value variable. + * + * Determines whether a validation transaction should retry based on the + * internal transaction state and the return value pointed to by @ret. + * If a validation should be retried, the transaction is prepared for that, + * and the validation locked might be re-locked in exclusive mode, and *@ret + * is set to %0. If the re-locking errors, typically due to interruptible + * locking with signal pending, *@ret is instead set to -EINTR and the + * function returns %false. + * + * Return: %true if validation should be retried, %false otherwise. + */ +bool xe_validation_should_retry(struct xe_validation_ctx *ctx, int *ret) +{ + if (__xe_validation_should_retry(ctx, *ret)) { + drm_exec_fini(ctx->exec); + *ret = 0; + if (ctx->request_exclusive != ctx->lock_held_exclusive) { + xe_validation_unlock(ctx); + *ret = xe_validation_lock(ctx); + } + drm_exec_init(ctx->exec, ctx->exec_flags, ctx->nr); + return !*ret; + } + + return false; +} diff --git a/drivers/gpu/drm/xe/xe_validation.h b/drivers/gpu/drm/xe/xe_validation.h new file mode 100644 index 000000000000..a30e732c4d51 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_validation.h @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ +#ifndef _XE_VALIDATION_H_ +#define _XE_VALIDATION_H_ + +#include <linux/dma-resv.h> +#include <linux/types.h> +#include <linux/rwsem.h> + +struct drm_exec; +struct drm_gem_object; +struct drm_gpuvm_exec; +struct xe_device; + +#ifdef CONFIG_PROVE_LOCKING +/** + * xe_validation_lockdep() - Assert that a drm_exec locking transaction can + * be initialized at this point. + */ +static inline void xe_validation_lockdep(void) +{ + struct ww_acquire_ctx ticket; + + ww_acquire_init(&ticket, &reservation_ww_class); + ww_acquire_fini(&ticket); +} +#else +static inline void xe_validation_lockdep(void) +{ +} +#endif + +/* + * Various values of the drm_exec pointer where we've not (yet) + * implemented full ww locking. + * + * XE_VALIDATION_UNIMPLEMENTED means implementation is pending. + * A lockdep check is made to assure that a drm_exec locking + * transaction can actually take place where the macro is + * used. If this asserts, the exec pointer needs to be assigned + * higher up in the callchain and passed down. + * + * XE_VALIDATION_UNSUPPORTED is for dma-buf code only where + * the dma-buf layer doesn't support WW locking. + * + * XE_VALIDATION_OPT_OUT is for simplification of kunit tests where + * exhaustive eviction isn't necessary. + */ +#define __XE_VAL_UNIMPLEMENTED -EINVAL +#define XE_VALIDATION_UNIMPLEMENTED (xe_validation_lockdep(), \ + (struct drm_exec *)ERR_PTR(__XE_VAL_UNIMPLEMENTED)) + +#define __XE_VAL_UNSUPPORTED -EOPNOTSUPP +#define XE_VALIDATION_UNSUPPORTED ((struct drm_exec *)ERR_PTR(__XE_VAL_UNSUPPORTED)) + +#define __XE_VAL_OPT_OUT -ENOMEM +#define XE_VALIDATION_OPT_OUT (xe_validation_lockdep(), \ + (struct drm_exec *)ERR_PTR(__XE_VAL_OPT_OUT)) +#ifdef CONFIG_DRM_XE_DEBUG +void xe_validation_assert_exec(const struct xe_device *xe, const struct drm_exec *exec, + const struct drm_gem_object *obj); +#else +#define xe_validation_assert_exec(_xe, _exec, _obj) \ + do { \ + (void)_xe; (void)_exec; (void)_obj; \ + } while (0) +#endif + +/** + * struct xe_validation_device - The domain for exhaustive eviction + * @lock: The lock used to exclude other processes from allocating graphics memory + * + * The struct xe_validation_device represents the domain for which we want to use + * exhaustive eviction. The @lock is typically grabbed in read mode for allocations + * but when graphics memory allocation fails, it is retried with the write mode held. + */ +struct xe_validation_device { + struct rw_semaphore lock; +}; + +/** + * struct xe_val_flags - Flags for xe_validation_ctx_init(). + * @exclusive: Start the validation transaction by locking out all other validators. + * @no_block: Don't block on initialization. + * @interruptible: Block interruptible if blocking. Implies initializing the drm_exec + * context with the DRM_EXEC_INTERRUPTIBLE_WAIT flag. + * @exec_ignore_duplicates: Initialize the drm_exec context with the + * DRM_EXEC_IGNORE_DUPLICATES flag. + */ +struct xe_val_flags { + u32 exclusive :1; + u32 no_block :1; + u32 interruptible :1; + u32 exec_ignore_duplicates :1; +}; + +/** + * struct xe_validation_ctx - A struct drm_exec subclass with support for + * exhaustive eviction + * @exec: The drm_exec object base class. Note that we use a pointer instead of + * embedding to avoid diamond inheritance. + * @val: The exhaustive eviction domain. + * @val_flags: Copy of the struct xe_val_flags passed to xe_validation_ctx_init. + * @lock_held: Whether The domain lock is currently held. + * @lock_held_exclusive: Whether the domain lock is held in exclusive mode. + * @request_exclusive: Whether to lock exclusively (write mode) the next time + * the domain lock is locked. + * @exec_flags: The drm_exec flags used for drm_exec (re-)initialization. + * @nr: The drm_exec nr parameter used for drm_exec (re-)initialization. + */ +struct xe_validation_ctx { + struct drm_exec *exec; + struct xe_validation_device *val; + struct xe_val_flags val_flags; + bool lock_held; + bool lock_held_exclusive; + bool request_exclusive; + u32 exec_flags; + unsigned int nr; +}; + +int xe_validation_ctx_init(struct xe_validation_ctx *ctx, struct xe_validation_device *val, + struct drm_exec *exec, const struct xe_val_flags flags); + +int xe_validation_exec_lock(struct xe_validation_ctx *ctx, struct drm_gpuvm_exec *vm_exec, + struct xe_validation_device *val); + +void xe_validation_ctx_fini(struct xe_validation_ctx *ctx); + +bool xe_validation_should_retry(struct xe_validation_ctx *ctx, int *ret); + +/** + * xe_validation_retry_on_oom() - Retry on oom in an xe_validaton transaction + * @_ctx: Pointer to the xe_validation_ctx + * @_ret: The current error value possibly holding -ENOMEM + * + * Use this in way similar to drm_exec_retry_on_contention(). + * If @_ret contains -ENOMEM the transaction is restarted once in a way that + * blocks other transactions and allows exhastive eviction. If the transaction + * was already restarted once, Just return the -ENOMEM. May also set + * _ret to -EINTR if not retrying and waits are interruptible. + * May only be used within a drm_exec_until_all_locked() loop. + */ +#define xe_validation_retry_on_oom(_ctx, _ret) \ + do { \ + if (xe_validation_should_retry(_ctx, _ret)) \ + goto *__drm_exec_retry_ptr; \ + } while (0) + +/** + * xe_validation_device_init - Initialize a struct xe_validation_device + * @val: The xe_validation_device to init. + */ +static inline void +xe_validation_device_init(struct xe_validation_device *val) +{ + init_rwsem(&val->lock); +} + +/* + * Make guard() and scoped_guard() work with xe_validation_ctx + * so that we can exit transactions without caring about the + * cleanup. + */ +DEFINE_CLASS(xe_validation, struct xe_validation_ctx *, + if (_T) xe_validation_ctx_fini(_T);, + ({*_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags); + *_ret ? NULL : _ctx; }), + struct xe_validation_ctx *_ctx, struct xe_validation_device *_val, + struct drm_exec *_exec, const struct xe_val_flags _flags, int *_ret); +static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T) +{return *_T; } +#define class_xe_validation_is_conditional true + +/** + * xe_validation_guard() - An auto-cleanup xe_validation_ctx transaction + * @_ctx: The xe_validation_ctx. + * @_val: The xe_validation_device. + * @_exec: The struct drm_exec object + * @_flags: Flags for the xe_validation_ctx initialization. + * @_ret: Return in / out parameter. May be set by this macro. Typically 0 when called. + * + * This macro is will initiate a drm_exec transaction with additional support for + * exhaustive eviction. + */ +#define xe_validation_guard(_ctx, _val, _exec, _flags, _ret) \ + scoped_guard(xe_validation, _ctx, _val, _exec, _flags, &_ret) \ + drm_exec_until_all_locked(_exec) + +#endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 79323c78130f..79ab6c512d3e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -27,8 +27,6 @@ #include "xe_device.h" #include "xe_drm_client.h" #include "xe_exec_queue.h" -#include "xe_gt_pagefault.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_pat.h" #include "xe_pm.h" @@ -36,11 +34,13 @@ #include "xe_pt.h" #include "xe_pxp.h" #include "xe_res_cursor.h" +#include "xe_sriov_vf.h" #include "xe_svm.h" #include "xe_sync.h" +#include "xe_tile.h" +#include "xe_tlb_inval.h" #include "xe_trace_bo.h" #include "xe_wa.h" -#include "xe_hmm.h" static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) { @@ -48,34 +48,17 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) } /** - * xe_vma_userptr_check_repin() - Advisory check for repin needed - * @uvma: The userptr vma + * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction + * @vm: The vm whose resv is to be locked. + * @exec: The drm_exec transaction. * - * Check if the userptr vma has been invalidated since last successful - * repin. The check is advisory only and can the function can be called - * without the vm->userptr.notifier_lock held. There is no guarantee that the - * vma userptr will remain valid after a lockless check, so typically - * the call needs to be followed by a proper check under the notifier_lock. + * Helper to lock the vm's resv as part of a drm_exec transaction. * - * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. + * Return: %0 on success. See drm_exec_lock_obj() for error codes. */ -int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) +int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec) { - return mmu_interval_check_retry(&uvma->userptr.notifier, - uvma->userptr.notifier_seq) ? - -EAGAIN : 0; -} - -int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) -{ - struct xe_vma *vma = &uvma->vma; - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_device *xe = vm->xe; - - lockdep_assert_held(&vm->lock); - xe_assert(xe, xe_vma_is_userptr(vma)); - - return xe_hmm_userptr_populate_range(uvma, false); + return drm_exec_lock_obj(exec, xe_vm_obj(vm)); } static bool preempt_fences_waiting(struct xe_vm *vm) @@ -128,12 +111,22 @@ static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, static int wait_for_existing_preempt_fences(struct xe_vm *vm) { struct xe_exec_queue *q; + bool vf_migration = IS_SRIOV_VF(vm->xe) && + xe_sriov_vf_migration_supported(vm->xe); + signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT; xe_vm_assert_held(vm); list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { if (q->lr.pfence) { - long timeout = dma_fence_wait(q->lr.pfence, false); + long timeout; + + timeout = dma_fence_wait_timeout(q->lr.pfence, false, + wait_time); + if (!timeout) { + xe_assert(vm->xe, vf_migration); + return -EAGAIN; + } /* Only -ETIME on fence indicates VM needs to be killed */ if (timeout < 0 || q->lr.pfence->error == -ETIME) @@ -227,6 +220,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) .num_fences = 1, }; struct drm_exec *exec = &vm_exec.exec; + struct xe_validation_ctx ctx; struct dma_fence *pfence; int err; bool wait; @@ -234,14 +228,14 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); down_write(&vm->lock); - err = drm_gpuvm_exec_lock(&vm_exec); + err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val); if (err) goto out_up_write; pfence = xe_preempt_fence_create(q, q->lr.context, ++q->lr.seqno); - if (!pfence) { - err = -ENOMEM; + if (IS_ERR(pfence)) { + err = PTR_ERR(pfence); goto out_fini; } @@ -249,7 +243,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) ++vm->preempt.num_exec_queues; q->lr.pfence = pfence; - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); @@ -263,10 +257,10 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) if (wait) dma_fence_enable_sw_signaling(pfence); - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); out_fini: - drm_exec_fini(exec); + xe_validation_ctx_fini(&ctx); out_up_write: up_write(&vm->lock); @@ -299,25 +293,6 @@ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) up_write(&vm->lock); } -/** - * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs - * that need repinning. - * @vm: The VM. - * - * This function checks for whether the VM has userptrs that need repinning, - * and provides a release-type barrier on the userptr.notifier_lock after - * checking. - * - * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. - */ -int __xe_vm_userptr_needs_repin(struct xe_vm *vm) -{ - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - return (list_empty(&vm->userptr.repin_list) && - list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; -} - #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 /** @@ -349,39 +324,6 @@ void xe_vm_kill(struct xe_vm *vm, bool unlocked) /* TODO: Inform user the VM is banned */ } -/** - * xe_vm_validate_should_retry() - Whether to retry after a validate error. - * @exec: The drm_exec object used for locking before validation. - * @err: The error returned from ttm_bo_validate(). - * @end: A ktime_t cookie that should be set to 0 before first use and - * that should be reused on subsequent calls. - * - * With multiple active VMs, under memory pressure, it is possible that - * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. - * Until ttm properly handles locking in such scenarios, best thing the - * driver can do is retry with a timeout. Check if that is necessary, and - * if so unlock the drm_exec's objects while keeping the ticket to prepare - * for a rerun. - * - * Return: true if a retry after drm_exec_init() is recommended; - * false otherwise. - */ -bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) -{ - ktime_t cur; - - if (err != -ENOMEM) - return false; - - cur = ktime_get(); - *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); - if (!ktime_before(cur, *end)) - return false; - - msleep(20); - return true; -} - static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) { struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); @@ -393,7 +335,10 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, &vm->rebind_list); - ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); + if (!try_wait_for_completion(&vm->xe->pm_block)) + return -EAGAIN; + + ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec); if (ret) return ret; @@ -479,13 +424,40 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); } +static bool vm_suspend_rebind_worker(struct xe_vm *vm) +{ + struct xe_device *xe = vm->xe; + bool ret = false; + + mutex_lock(&xe->rebind_resume_lock); + if (!try_wait_for_completion(&vm->xe->pm_block)) { + ret = true; + list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); + } + mutex_unlock(&xe->rebind_resume_lock); + + return ret; +} + +/** + * xe_vm_resume_rebind_worker() - Resume the rebind worker. + * @vm: The vm whose preempt worker to resume. + * + * Resume a preempt worker that was previously suspended by + * vm_suspend_rebind_worker(). + */ +void xe_vm_resume_rebind_worker(struct xe_vm *vm) +{ + queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); +} + static void preempt_rebind_work_func(struct work_struct *w) { struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); + struct xe_validation_ctx ctx; struct drm_exec exec; unsigned int fence_count = 0; LIST_HEAD(preempt_fences); - ktime_t end = 0; int err = 0; long wait; int __maybe_unused tries = 0; @@ -502,24 +474,32 @@ static void preempt_rebind_work_func(struct work_struct *w) } retry: + if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { + up_write(&vm->lock); + /* We don't actually block but don't make progress. */ + xe_pm_might_block_on_suspend(); + return; + } + if (xe_vm_userptr_check_repin(vm)) { err = xe_vm_userptr_pin(vm); if (err) goto out_unlock_outer; } - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, + (struct xe_val_flags) {.interruptible = true}); + if (err) + goto out_unlock_outer; drm_exec_until_all_locked(&exec) { bool done = false; err = xe_preempt_work_begin(&exec, vm, &done); drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); if (err || done) { - drm_exec_fini(&exec); - if (err && xe_vm_validate_should_retry(&exec, err, &end)) - err = -EAGAIN; - + xe_validation_ctx_fini(&ctx); goto out_unlock_outer; } } @@ -528,7 +508,9 @@ retry: if (err) goto out_unlock; + xe_vm_set_validation_exec(vm, &exec); err = xe_vm_rebind(vm, true); + xe_vm_set_validation_exec(vm, NULL); if (err) goto out_unlock; @@ -546,9 +528,9 @@ retry: (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ __xe_vm_userptr_needs_repin(__vm)) - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); if (retry_required(tries, vm)) { - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); err = -EAGAIN; goto out_unlock; } @@ -562,13 +544,26 @@ retry: /* Point of no return. */ arm_preempt_fences(vm, &preempt_fences); resume_and_reinstall_preempt_fences(vm, &exec); - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); out_unlock: - drm_exec_fini(&exec); + xe_validation_ctx_fini(&ctx); out_unlock_outer: if (err == -EAGAIN) { trace_xe_vm_rebind_worker_retry(vm); + + /* + * We can't block in workers on a VF which supports migration + * given this can block the VF post-migration workers from + * getting scheduled. + */ + if (IS_SRIOV_VF(vm->xe) && + xe_sriov_vf_migration_supported(vm->xe)) { + up_write(&vm->lock); + xe_vm_queue_rebind_worker(vm); + return; + } + goto retry; } @@ -583,201 +578,6 @@ out_unlock_outer: trace_xe_vm_rebind_worker_exit(vm); } -static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) -{ - struct xe_userptr *userptr = &uvma->userptr; - struct xe_vma *vma = &uvma->vma; - struct dma_resv_iter cursor; - struct dma_fence *fence; - long err; - - /* - * Tell exec and rebind worker they need to repin and rebind this - * userptr. - */ - if (!xe_vm_in_fault_mode(vm) && - !(vma->gpuva.flags & XE_VMA_DESTROYED)) { - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&userptr->invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - } - - /* - * Preempt fences turn into schedule disables, pipeline these. - * Note that even in fault mode, we need to wait for binds and - * unbinds to complete, and those are attached as BOOKMARK fences - * to the vm. - */ - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - dma_resv_for_each_fence_unlocked(&cursor, fence) - dma_fence_enable_sw_signaling(fence); - dma_resv_iter_end(&cursor); - - err = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - XE_WARN_ON(err <= 0); - - if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { - err = xe_vm_invalidate_vma(vma); - XE_WARN_ON(err); - } - - xe_hmm_userptr_unmap(uvma); -} - -static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, - const struct mmu_notifier_range *range, - unsigned long cur_seq) -{ - struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); - struct xe_vma *vma = &uvma->vma; - struct xe_vm *vm = xe_vma_vm(vma); - - xe_assert(vm->xe, xe_vma_is_userptr(vma)); - trace_xe_vma_userptr_invalidate(vma); - - if (!mmu_notifier_range_blockable(range)) - return false; - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "NOTIFIER: addr=0x%016llx, range=0x%016llx", - xe_vma_start(vma), xe_vma_size(vma)); - - down_write(&vm->userptr.notifier_lock); - mmu_interval_set_seq(mni, cur_seq); - - __vma_userptr_invalidate(vm, uvma); - up_write(&vm->userptr.notifier_lock); - trace_xe_vma_userptr_invalidate_complete(vma); - - return true; -} - -static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { - .invalidate = vma_userptr_invalidate, -}; - -#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) -/** - * xe_vma_userptr_force_invalidate() - force invalidate a userptr - * @uvma: The userptr vma to invalidate - * - * Perform a forced userptr invalidation for testing purposes. - */ -void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) -{ - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - - /* Protect against concurrent userptr pinning */ - lockdep_assert_held(&vm->lock); - /* Protect against concurrent notifiers */ - lockdep_assert_held(&vm->userptr.notifier_lock); - /* - * Protect against concurrent instances of this function and - * the critical exec sections - */ - xe_vm_assert_held(vm); - - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - uvma->userptr.notifier_seq)) - uvma->userptr.notifier_seq -= 2; - __vma_userptr_invalidate(vm, uvma); -} -#endif - -int xe_vm_userptr_pin(struct xe_vm *vm) -{ - struct xe_userptr_vma *uvma, *next; - int err = 0; - - xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); - lockdep_assert_held_write(&vm->lock); - - /* Collect invalidated userptrs */ - spin_lock(&vm->userptr.invalidated_lock); - xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); - list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, - userptr.invalidate_link) { - list_del_init(&uvma->userptr.invalidate_link); - list_add_tail(&uvma->userptr.repin_link, - &vm->userptr.repin_list); - } - spin_unlock(&vm->userptr.invalidated_lock); - - /* Pin and move to bind list */ - list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, - userptr.repin_link) { - err = xe_vma_userptr_pin_pages(uvma); - if (err == -EFAULT) { - list_del_init(&uvma->userptr.repin_link); - /* - * We might have already done the pin once already, but - * then had to retry before the re-bind happened, due - * some other condition in the caller, but in the - * meantime the userptr got dinged by the notifier such - * that we need to revalidate here, but this time we hit - * the EFAULT. In such a case make sure we remove - * ourselves from the rebind list to avoid going down in - * flames. - */ - if (!list_empty(&uvma->vma.combined_links.rebind)) - list_del_init(&uvma->vma.combined_links.rebind); - - /* Wait for pending binds */ - xe_vm_lock(vm, false); - dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - - err = xe_vm_invalidate_vma(&uvma->vma); - xe_vm_unlock(vm); - if (err) - break; - } else { - if (err) - break; - - list_del_init(&uvma->userptr.repin_link); - list_move_tail(&uvma->vma.combined_links.rebind, - &vm->rebind_list); - } - } - - if (err) { - down_write(&vm->userptr.notifier_lock); - spin_lock(&vm->userptr.invalidated_lock); - list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, - userptr.repin_link) { - list_del_init(&uvma->userptr.repin_link); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - } - spin_unlock(&vm->userptr.invalidated_lock); - up_write(&vm->userptr.notifier_lock); - } - return err; -} - -/** - * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs - * that need repinning. - * @vm: The VM. - * - * This function does an advisory check for whether the VM has userptrs that - * need repinning. - * - * Return: 0 if there are no indications of userptrs needing repinning, - * -EAGAIN if there are. - */ -int xe_vm_userptr_check_repin(struct xe_vm *vm) -{ - return (list_empty_careful(&vm->userptr.repin_list) && - list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; -} - static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) { int i; @@ -798,23 +598,56 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) } ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); +static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) +{ + struct xe_vma *vma; + + vma = gpuva_to_vma(op->base.prefetch.va); + + if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) + xa_destroy(&op->prefetch_range.range); +} + +static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) +{ + struct xe_vma_op *op; + + if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) + return; + + list_for_each_entry(op, &vops->list, link) + xe_vma_svm_prefetch_op_fini(op); +} + static void xe_vma_ops_fini(struct xe_vma_ops *vops) { int i; + xe_vma_svm_prefetch_ops_fini(vops); + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) kfree(vops->pt_update_ops[i].ops); } -static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask) +static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) { int i; + if (!inc_val) + return; + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) if (BIT(i) & tile_mask) - ++vops->pt_update_ops[i].num_ops; + vops->pt_update_ops[i].num_ops += inc_val; } +#define XE_VMA_CREATE_MASK ( \ + XE_VMA_READ_ONLY | \ + XE_VMA_DUMPABLE | \ + XE_VMA_SYSTEM_ALLOCATOR | \ + DRM_GPUVA_SPARSE | \ + XE_VMA_MADV_AUTORESET) + static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, u8 tile_mask) { @@ -827,8 +660,7 @@ static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, op->base.map.gem.offset = vma->gpuva.gem.offset; op->map.vma = vma; op->map.immediate = true; - op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; - op->map.is_null = xe_vma_is_null(vma); + op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK; } static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, @@ -842,7 +674,7 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, xe_vm_populate_rebind(op, vma, tile_mask); list_add_tail(&op->link, &vops->list); - xe_vma_ops_incr_pt_update_ops(vops, tile_mask); + xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); return 0; } @@ -922,10 +754,11 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; for_each_tile(tile, vm->xe, id) { vops.pt_update_ops[id].wait_vm_bookkeep = true; vops.pt_update_ops[tile->id].q = - xe_tile_migrate_exec_queue(tile); + xe_migrate_exec_queue(tile->migrate); } err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); @@ -977,7 +810,7 @@ xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, xe_vm_populate_range_rebind(op, vma, range, tile_mask); list_add_tail(&op->link, &vops->list); - xe_vma_ops_incr_pt_update_ops(vops, tile_mask); + xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); return 0; } @@ -991,7 +824,7 @@ xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, * * (re)bind SVM range setting up GPU page tables for the range. * - * Return: dma fence for rebind to signal completion on succees, ERR_PTR on + * Return: dma fence for rebind to signal completion on success, ERR_PTR on * failure */ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, @@ -1012,10 +845,11 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; for_each_tile(tile, vm->xe, id) { vops.pt_update_ops[id].wait_vm_bookkeep = true; vops.pt_update_ops[tile->id].q = - xe_tile_migrate_exec_queue(tile); + xe_migrate_exec_queue(tile->migrate); } err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); @@ -1062,7 +896,7 @@ xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, xe_vm_populate_range_unbind(op, range); list_add_tail(&op->link, &vops->list); - xe_vma_ops_incr_pt_update_ops(vops, range->tile_present); + xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); return 0; } @@ -1074,7 +908,7 @@ xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, * * Unbind SVM range removing the GPU page tables for the range. * - * Return: dma fence for unbind to signal completion on succees, ERR_PTR on + * Return: dma fence for unbind to signal completion on success, ERR_PTR on * failure */ struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, @@ -1098,7 +932,7 @@ struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, for_each_tile(tile, vm->xe, id) { vops.pt_update_ops[id].wait_vm_bookkeep = true; vops.pt_update_ops[tile->id].q = - xe_tile_migrate_exec_queue(tile); + xe_migrate_exec_queue(tile->migrate); } err = xe_vm_ops_add_range_unbind(&vops, range); @@ -1131,25 +965,18 @@ static void xe_vma_free(struct xe_vma *vma) kfree(vma); } -#define VMA_CREATE_FLAG_READ_ONLY BIT(0) -#define VMA_CREATE_FLAG_IS_NULL BIT(1) -#define VMA_CREATE_FLAG_DUMPABLE BIT(2) -#define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3) - static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_bo *bo, u64 bo_offset_or_userptr, u64 start, u64 end, - u16 pat_index, unsigned int flags) + struct xe_vma_mem_attr *attr, + unsigned int flags) { struct xe_vma *vma; struct xe_tile *tile; u8 id; - bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); - bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); - bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); - bool is_cpu_addr_mirror = - (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR); + bool is_null = (flags & DRM_GPUVA_SPARSE); + bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR); xe_assert(vm->xe, start < end); xe_assert(vm->xe, end < vm->size); @@ -1170,10 +997,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, if (!vma) return ERR_PTR(-ENOMEM); - if (is_cpu_addr_mirror) - vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR; - if (is_null) - vma->gpuva.flags |= DRM_GPUVA_SPARSE; if (bo) vma->gpuva.gem.obj = &bo->ttm.base; } @@ -1184,10 +1007,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->gpuva.vm = &vm->gpuvm; vma->gpuva.va.addr = start; vma->gpuva.va.range = end - start + 1; - if (read_only) - vma->gpuva.flags |= XE_VMA_READ_ONLY; - if (dumpable) - vma->gpuva.flags |= XE_VMA_DUMPABLE; + vma->gpuva.flags = flags; for_each_tile(tile, vm->xe, id) vma->tile_mask |= 0x1 << id; @@ -1195,7 +1015,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, if (vm->xe->info.has_atomic_enable_pte_bit) vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; - vma->pat_index = pat_index; + vma->attr = *attr; if (bo) { struct drm_gpuvm_bo *vm_bo; @@ -1215,25 +1035,17 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, drm_gpuvm_bo_put(vm_bo); } else /* userptr or null */ { if (!is_null && !is_cpu_addr_mirror) { - struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; + struct xe_userptr_vma *uvma = to_userptr_vma(vma); u64 size = end - start + 1; int err; - INIT_LIST_HEAD(&userptr->invalidate_link); - INIT_LIST_HEAD(&userptr->repin_link); vma->gpuva.gem.offset = bo_offset_or_userptr; - mutex_init(&userptr->unmap_mutex); - err = mmu_interval_notifier_insert(&userptr->notifier, - current->mm, - xe_vma_userptr(vma), size, - &vma_userptr_notifier_ops); + err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size); if (err) { xe_vma_free(vma); return ERR_PTR(err); } - - userptr->notifier_seq = LONG_MAX; } xe_vm_get(vm); @@ -1253,18 +1065,8 @@ static void xe_vma_destroy_late(struct xe_vma *vma) if (xe_vma_is_userptr(vma)) { struct xe_userptr_vma *uvma = to_userptr_vma(vma); - struct xe_userptr *userptr = &uvma->userptr; - if (userptr->sg) - xe_hmm_userptr_free_sg(uvma); - - /* - * Since userptr pages are not pinned, we can't remove - * the notifier until we're sure the GPU is not accessing - * them anymore - */ - mmu_interval_notifier_remove(&userptr->notifier); - mutex_destroy(&userptr->unmap_mutex); + xe_userptr_remove(uvma); xe_vm_put(vm); } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { xe_vm_put(vm); @@ -1301,11 +1103,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) if (xe_vma_is_userptr(vma)) { xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); - - spin_lock(&vm->userptr.invalidated_lock); - xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); - list_del(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); + xe_userptr_destroy(to_userptr_vma(vma)); } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { xe_bo_assert_held(xe_vma_bo(vma)); @@ -1353,20 +1151,19 @@ int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) static void xe_vma_destroy_unlocked(struct xe_vma *vma) { + struct xe_device *xe = xe_vma_vm(vma)->xe; + struct xe_validation_ctx ctx; struct drm_exec exec; - int err; + int err = 0; - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { err = xe_vm_lock_vma(&exec, vma); drm_exec_retry_on_contention(&exec); if (XE_WARN_ON(err)) break; + xe_vma_destroy(vma, NULL); } - - xe_vma_destroy(vma, NULL); - - drm_exec_fini(&exec); + xe_assert(xe, !err); } struct xe_vma * @@ -1484,14 +1281,39 @@ static u64 pte_encode_ps(u32 pt_level) return 0; } -static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, - const u16 pat_index) +static u16 pde_pat_index(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + u16 pat_index; + + /* + * We only have two bits to encode the PAT index in non-leaf nodes, but + * these only point to other paging structures so we only need a minimal + * selection of options. The user PAT index is only for encoding leaf + * nodes, where we have use of more bits to do the encoding. The + * non-leaf nodes are instead under driver control so the chosen index + * here should be distinct from the user PAT index. Also the + * corresponding coherency of the PAT index should be tied to the + * allocation type of the page table (or at least we should pick + * something which is always safe). + */ + if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) + pat_index = xe->pat.idx[XE_CACHE_WB]; + else + pat_index = xe->pat.idx[XE_CACHE_NONE]; + + xe_assert(xe, pat_index <= 3); + + return pat_index; +} + +static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) { u64 pde; pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pde |= XE_PAGE_PRESENT | XE_PAGE_RW; - pde |= pde_encode_pat_index(pat_index); + pde |= pde_encode_pat_index(pde_pat_index(bo)); return pde; } @@ -1566,6 +1388,7 @@ static void vm_destroy_work_func(struct work_struct *w); * @xe: xe device. * @tile: tile to set up for. * @vm: vm to set up for. + * @exec: The struct drm_exec object used to lock the vm resv. * * Sets up a pagetable tree with one page-table per level and a single * leaf PTE. All pagetable entries point to the single page-table or, @@ -1575,16 +1398,19 @@ static void vm_destroy_work_func(struct work_struct *w); * Return: 0 on success, negative error code on error. */ static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm) + struct xe_vm *vm, struct drm_exec *exec) { u8 id = tile->id; int i; for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { - vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); - if (IS_ERR(vm->scratch_pt[id][i])) - return PTR_ERR(vm->scratch_pt[id][i]); + vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec); + if (IS_ERR(vm->scratch_pt[id][i])) { + int err = PTR_ERR(vm->scratch_pt[id][i]); + vm->scratch_pt[id][i] = NULL; + return err; + } xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); } @@ -1612,11 +1438,28 @@ static void xe_vm_free_scratch(struct xe_vm *vm) } } -struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) +static void xe_vm_pt_destroy(struct xe_vm *vm) +{ + struct xe_tile *tile; + u8 id; + + xe_vm_assert_held(vm); + + for_each_tile(tile, vm->xe, id) { + if (vm->pt_root[id]) { + xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); + vm->pt_root[id] = NULL; + } + } +} + +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) { struct drm_gem_object *vm_resv_obj; + struct xe_validation_ctx ctx; + struct drm_exec exec; struct xe_vm *vm; - int err, number_tiles = 0; + int err; struct xe_tile *tile; u8 id; @@ -1633,9 +1476,10 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->xe = xe; vm->size = 1ull << xe->info.va_bits; - vm->flags = flags; + if (xef) + vm->xef = xe_file_get(xef); /** * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be * manipulated under the PXP mutex. However, the PXP mutex can be taken @@ -1657,7 +1501,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) INIT_LIST_HEAD(&vm->userptr.repin_list); INIT_LIST_HEAD(&vm->userptr.invalidated); - init_rwsem(&vm->userptr.notifier_lock); spin_lock_init(&vm->userptr.invalidated_lock); ttm_lru_bulk_move_init(&vm->lru_bulk_move); @@ -1665,7 +1508,10 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) INIT_WORK(&vm->destroy_work, vm_destroy_work_func); INIT_LIST_HEAD(&vm->preempt.exec_queues); - vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ + if (flags & XE_VM_FLAG_FAULT_MODE) + vm->preempt.min_run_period_ms = 0; + else + vm->preempt.min_run_period_ms = 5; for_each_tile(tile, xe, id) xe_range_fence_tree_init(&vm->rftree[id]); @@ -1678,13 +1524,20 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) * scheduler drops all the references of it, hence protecting the VM * for this case is necessary. */ - if (flags & XE_VM_FLAG_LR_MODE) + if (flags & XE_VM_FLAG_LR_MODE) { + INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); xe_pm_runtime_get_noresume(xe); + INIT_LIST_HEAD(&vm->preempt.pm_activate_link); + } + + err = xe_svm_init(vm); + if (err) + goto err_no_resv; vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); if (!vm_resv_obj) { err = -ENOMEM; - goto err_no_resv; + goto err_svm_fini; } drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, @@ -1692,51 +1545,68 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) drm_gem_object_put(vm_resv_obj); - err = xe_vm_lock(vm, true); - if (err) - goto err_close; - - if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) - vm->flags |= XE_VM_FLAG_64K; - - for_each_tile(tile, xe, id) { - if (flags & XE_VM_FLAG_MIGRATION && - tile->id != XE_VM_FLAG_TILE_ID(flags)) - continue; + err = 0; + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, + err) { + err = xe_vm_drm_exec_lock(vm, &exec); + drm_exec_retry_on_contention(&exec); - vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); - if (IS_ERR(vm->pt_root[id])) { - err = PTR_ERR(vm->pt_root[id]); - vm->pt_root[id] = NULL; - goto err_unlock_close; - } - } + if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) + vm->flags |= XE_VM_FLAG_64K; - if (xe_vm_has_scratch(vm)) { for_each_tile(tile, xe, id) { - if (!vm->pt_root[id]) + if (flags & XE_VM_FLAG_MIGRATION && + tile->id != XE_VM_FLAG_TILE_ID(flags)) continue; - err = xe_vm_create_scratch(xe, tile, vm); + vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level, + &exec); + if (IS_ERR(vm->pt_root[id])) { + err = PTR_ERR(vm->pt_root[id]); + vm->pt_root[id] = NULL; + xe_vm_pt_destroy(vm); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + break; + } + } + if (err) + break; + + if (xe_vm_has_scratch(vm)) { + for_each_tile(tile, xe, id) { + if (!vm->pt_root[id]) + continue; + + err = xe_vm_create_scratch(xe, tile, vm, &exec); + if (err) { + xe_vm_free_scratch(vm); + xe_vm_pt_destroy(vm); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + break; + } + } if (err) - goto err_unlock_close; + break; + vm->batch_invalidate_tlb = true; } - vm->batch_invalidate_tlb = true; - } - if (vm->flags & XE_VM_FLAG_LR_MODE) { - INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); - vm->batch_invalidate_tlb = false; - } + if (vm->flags & XE_VM_FLAG_LR_MODE) { + INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); + vm->batch_invalidate_tlb = false; + } - /* Fill pt_root after allocating scratch tables */ - for_each_tile(tile, xe, id) { - if (!vm->pt_root[id]) - continue; + /* Fill pt_root after allocating scratch tables */ + for_each_tile(tile, xe, id) { + if (!vm->pt_root[id]) + continue; - xe_pt_populate_empty(tile, vm, vm->pt_root[id]); + xe_pt_populate_empty(tile, vm, vm->pt_root[id]); + } } - xe_vm_unlock(vm); + if (err) + goto err_close; /* Kernel migration VM shouldn't have a circular loop.. */ if (!(flags & XE_VM_FLAG_MIGRATION)) { @@ -1753,34 +1623,43 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) goto err_close; } vm->q[id] = q; - number_tiles++; } } - if (flags & XE_VM_FLAG_FAULT_MODE) { - err = xe_svm_init(vm); - if (err) + if (xef && xe->info.has_asid) { + u32 asid; + + down_write(&xe->usm.lock); + err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, + XA_LIMIT(1, XE_MAX_ASID - 1), + &xe->usm.next_asid, GFP_KERNEL); + up_write(&xe->usm.lock); + if (err < 0) goto err_close; - } - if (number_tiles > 1) - vm->composite_fence_ctx = dma_fence_context_alloc(1); + vm->usm.asid = asid; + } trace_xe_vm_create(vm); return vm; -err_unlock_close: - xe_vm_unlock(vm); err_close: xe_vm_close_and_put(vm); return ERR_PTR(err); +err_svm_fini: + if (flags & XE_VM_FLAG_FAULT_MODE) { + vm->size = 0; /* close the vm */ + xe_svm_fini(vm); + } err_no_resv: mutex_destroy(&vm->snap_mutex); for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); + if (vm->xef) + xe_file_put(vm->xef); kfree(vm); if (flags & XE_VM_FLAG_LR_MODE) xe_pm_runtime_put(xe); @@ -1817,7 +1696,7 @@ static void xe_vm_close(struct xe_vm *vm) xe_pt_clear(xe, vm->pt_root[id]); for_each_gt(gt, xe, id) - xe_gt_tlb_invalidation_vm(gt, vm); + xe_tlb_inval_vm(>->tlb_inval, vm); } } @@ -1841,15 +1720,24 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_assert(xe, !vm->preempt.num_exec_queues); xe_vm_close(vm); - if (xe_vm_in_preempt_fence_mode(vm)) + if (xe_vm_in_preempt_fence_mode(vm)) { + mutex_lock(&xe->rebind_resume_lock); + list_del_init(&vm->preempt.pm_activate_link); + mutex_unlock(&xe->rebind_resume_lock); flush_work(&vm->preempt.rebind_work); + } if (xe_vm_in_fault_mode(vm)) xe_svm_close(vm); down_write(&vm->lock); for_each_tile(tile, xe, id) { - if (vm->q[id]) + if (vm->q[id]) { + int i; + xe_exec_queue_last_fence_put(vm->q[id], vm); + for_each_tlb_inval(i) + xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i); + } } up_write(&vm->lock); @@ -1867,9 +1755,9 @@ void xe_vm_close_and_put(struct xe_vm *vm) vma = gpuva_to_vma(gpuva); if (xe_vma_has_no_bo(vma)) { - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); vma->gpuva.flags |= XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); } xe_vm_remove_vma(vm, vma); @@ -1893,13 +1781,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) * destroy the pagetables immediately. */ xe_vm_free_scratch(vm); - - for_each_tile(tile, xe, id) { - if (vm->pt_root[id]) { - xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); - vm->pt_root[id] = NULL; - } - } + xe_vm_pt_destroy(vm); xe_vm_unlock(vm); /* @@ -1913,8 +1795,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_vma_destroy_unlocked(vma); } - if (xe_vm_in_fault_mode(vm)) - xe_svm_fini(vm); + xe_svm_fini(vm); up_write(&vm->lock); @@ -1991,8 +1872,7 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) { - return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, - tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); + return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); } static struct xe_exec_queue * @@ -2026,16 +1906,16 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); struct drm_xe_vm_create *args = data; - struct xe_tile *tile; + struct xe_gt *wa_gt = xe_root_mmio_gt(xe); struct xe_vm *vm; - u32 id, asid; + u32 id; int err; u32 flags = 0; if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; - if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) + if (wa_gt && XE_GT_WA(wa_gt, 22014953428)) args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && @@ -2064,29 +1944,10 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) flags |= XE_VM_FLAG_FAULT_MODE; - vm = xe_vm_create(xe, flags); + vm = xe_vm_create(xe, flags, xef); if (IS_ERR(vm)) return PTR_ERR(vm); - if (xe->info.has_asid) { - down_write(&xe->usm.lock); - err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, - XA_LIMIT(1, XE_MAX_ASID - 1), - &xe->usm.next_asid, GFP_KERNEL); - up_write(&xe->usm.lock); - if (err < 0) - goto err_close_and_put; - - vm->usm.asid = asid; - } - - vm->xef = xe_file_get(xef); - - /* Record BO memory for VM pagetable created against client */ - for_each_tile(tile, xe, id) - if (vm->pt_root[id]) - xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); - #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) /* Warning: Security issue - never enable by default */ args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); @@ -2136,6 +1997,139 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, return err; } +static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end) +{ + struct drm_gpuva *gpuva; + u32 num_vmas = 0; + + lockdep_assert_held(&vm->lock); + drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) + num_vmas++; + + return num_vmas; +} + +static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start, + u64 end, struct drm_xe_mem_range_attr *attrs) +{ + struct drm_gpuva *gpuva; + int i = 0; + + lockdep_assert_held(&vm->lock); + + drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + + if (i == *num_vmas) + return -ENOSPC; + + attrs[i].start = xe_vma_start(vma); + attrs[i].end = xe_vma_end(vma); + attrs[i].atomic.val = vma->attr.atomic_access; + attrs[i].pat_index.val = vma->attr.pat_index; + attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd; + attrs[i].preferred_mem_loc.migration_policy = + vma->attr.preferred_loc.migration_policy; + + i++; + } + + *num_vmas = i; + return 0; +} + +int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_mem_range_attr *mem_attrs; + struct drm_xe_vm_query_mem_range_attr *args = data; + u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); + struct xe_vm *vm; + int err = 0; + + if (XE_IOCTL_DBG(xe, + ((args->num_mem_ranges == 0 && + (attrs_user || args->sizeof_mem_range_attr != 0)) || + (args->num_mem_ranges > 0 && + (!attrs_user || + args->sizeof_mem_range_attr != + sizeof(struct drm_xe_mem_range_attr)))))) + return -EINVAL; + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) + return -EINVAL; + + err = down_read_interruptible(&vm->lock); + if (err) + goto put_vm; + + attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); + + if (args->num_mem_ranges == 0 && !attrs_user) { + args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range); + args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr); + goto unlock_vm; + } + + mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr, + GFP_KERNEL | __GFP_ACCOUNT | + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + if (!mem_attrs) { + err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM; + goto unlock_vm; + } + + memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr); + err = get_mem_attrs(vm, &args->num_mem_ranges, args->start, + args->start + args->range, mem_attrs); + if (err) + goto free_mem_attrs; + + err = copy_to_user(attrs_user, mem_attrs, + args->sizeof_mem_range_attr * args->num_mem_ranges); + if (err) + err = -EFAULT; + +free_mem_attrs: + kvfree(mem_attrs); +unlock_vm: + up_read(&vm->lock); +put_vm: + xe_vm_put(vm); + return err; +} + +static bool vma_matches(struct xe_vma *vma, u64 page_addr) +{ + if (page_addr > xe_vma_end(vma) - 1 || + page_addr + SZ_4K - 1 < xe_vma_start(vma)) + return false; + + return true; +} + +/** + * xe_vm_find_vma_by_addr() - Find a VMA by its address + * + * @vm: the xe_vm the vma belongs to + * @page_addr: address to look up + */ +struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) +{ + struct xe_vma *vma = NULL; + + if (vm->usm.last_fault_vma) { /* Fast lookup */ + if (vma_matches(vm->usm.last_fault_vma, page_addr)) + vma = vm->usm.last_fault_vma; + } + if (!vma) + vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); + + return vma; +} + static const u32 region_to_mem_type[] = { XE_PL_TT, XE_PL_VRAM0, @@ -2145,9 +2139,9 @@ static const u32 region_to_mem_type[] = { static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, bool post_commit) { - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); vma->gpuva.flags |= XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); if (post_commit) xe_vm_remove_vma(vm, vma); } @@ -2216,13 +2210,25 @@ static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) return true; } +static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) +{ + struct drm_gpuva_op *__op; + + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + + xe_vma_svm_prefetch_op_fini(op); + } +} + /* * Create operations list from IOCTL arguments, setup operations fields so parse * and commit steps are decoupled from IOCTL arguments. This step can fail. */ static struct drm_gpuva_ops * -vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, - u64 bo_offset_or_userptr, u64 addr, u64 range, +vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, + struct xe_bo *bo, u64 bo_offset_or_userptr, + u64 addr, u64 range, u32 operation, u32 flags, u32 prefetch_region, u16 pat_index) { @@ -2230,6 +2236,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct drm_gpuva_ops *ops; struct drm_gpuva_op *__op; struct drm_gpuvm_bo *vm_bo; + u64 range_end = addr + range; int err; lockdep_assert_held_write(&vm->lock); @@ -2241,10 +2248,17 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, switch (operation) { case DRM_XE_VM_BIND_OP_MAP: - case DRM_XE_VM_BIND_OP_MAP_USERPTR: - ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, - obj, bo_offset_or_userptr); + case DRM_XE_VM_BIND_OP_MAP_USERPTR: { + struct drm_gpuvm_map_req map_req = { + .map.va.addr = addr, + .map.va.range = range, + .map.gem.obj = obj, + .map.gem.offset = bo_offset_or_userptr, + }; + + ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req); break; + } case DRM_XE_VM_BIND_OP_UNMAP: ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); break; @@ -2281,30 +2295,118 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, if (__op->op == DRM_GPUVA_OP_MAP) { op->map.immediate = flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; - op->map.read_only = - flags & DRM_XE_VM_BIND_FLAG_READONLY; - op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; - op->map.is_cpu_addr_mirror = flags & - DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; - op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; + if (flags & DRM_XE_VM_BIND_FLAG_READONLY) + op->map.vma_flags |= XE_VMA_READ_ONLY; + if (flags & DRM_XE_VM_BIND_FLAG_NULL) + op->map.vma_flags |= DRM_GPUVA_SPARSE; + if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) + op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; + if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) + op->map.vma_flags |= XE_VMA_DUMPABLE; + if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) + op->map.vma_flags |= XE_VMA_MADV_AUTORESET; op->map.pat_index = pat_index; op->map.invalidate_on_bind = __xe_vm_needs_clear_scratch_pages(vm, flags); } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { - op->prefetch.region = prefetch_region; - } + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + struct xe_tile *tile; + struct xe_svm_range *svm_range; + struct drm_gpusvm_ctx ctx = {}; + struct drm_pagemap *dpagemap; + u8 id, tile_mask = 0; + u32 i; + + if (!xe_vma_is_cpu_addr_mirror(vma)) { + op->prefetch.region = prefetch_region; + break; + } + + ctx.read_only = xe_vma_read_only(vma); + ctx.devmem_possible = IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); + + for_each_tile(tile, vm->xe, id) + tile_mask |= 0x1 << id; + + xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); + op->prefetch_range.ranges_count = 0; + tile = NULL; + + if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { + dpagemap = xe_vma_resolve_pagemap(vma, + xe_device_get_root_tile(vm->xe)); + /* + * TODO: Once multigpu support is enabled will need + * something to dereference tile from dpagemap. + */ + if (dpagemap) + tile = xe_device_get_root_tile(vm->xe); + } else if (prefetch_region) { + tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - + XE_PL_VRAM0]; + } + + op->prefetch_range.tile = tile; +alloc_next_range: + svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); + + if (PTR_ERR(svm_range) == -ENOENT) { + u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); + addr = ret == ULONG_MAX ? 0 : ret; + if (addr) + goto alloc_next_range; + else + goto print_op_label; + } + + if (IS_ERR(svm_range)) { + err = PTR_ERR(svm_range); + goto unwind_prefetch_ops; + } + + if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) { + xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); + goto check_next_range; + } + + err = xa_alloc(&op->prefetch_range.range, + &i, svm_range, xa_limit_32b, + GFP_KERNEL); + + if (err) + goto unwind_prefetch_ops; + + op->prefetch_range.ranges_count++; + vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; + xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); +check_next_range: + if (range_end > xe_svm_range_end(svm_range) && + xe_svm_range_end(svm_range) < xe_vma_end(vma)) { + addr = xe_svm_range_end(svm_range); + goto alloc_next_range; + } + } +print_op_label: print_op(vm->xe, __op); } return ops; + +unwind_prefetch_ops: + xe_svm_prefetch_gpuva_ops_fini(ops); + drm_gpuva_ops_free(&vm->gpuvm, ops); + return ERR_PTR(err); } + ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, - u16 pat_index, unsigned int flags) + struct xe_vma_mem_attr *attr, unsigned int flags) { struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; + struct xe_validation_ctx ctx; struct drm_exec exec; struct xe_vma *vma; int err = 0; @@ -2312,9 +2414,9 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, lockdep_assert_held_write(&vm->lock); if (bo) { - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); - drm_exec_until_all_locked(&exec) { - err = 0; + err = 0; + xe_validation_guard(&ctx, &vm->xe->val, &exec, + (struct xe_val_flags) {.interruptible = true}, err) { if (!bo->vm) { err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); drm_exec_retry_on_contention(&exec); @@ -2323,27 +2425,35 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, err = drm_exec_lock_obj(&exec, &bo->ttm.base); drm_exec_retry_on_contention(&exec); } - if (err) { - drm_exec_fini(&exec); + if (err) return ERR_PTR(err); + + vma = xe_vma_create(vm, bo, op->gem.offset, + op->va.addr, op->va.addr + + op->va.range - 1, attr, flags); + if (IS_ERR(vma)) + return vma; + + if (!bo->vm) { + err = add_preempt_fences(vm, bo); + if (err) { + prep_vma_destroy(vm, vma, false); + xe_vma_destroy(vma, NULL); + } } } + if (err) + return ERR_PTR(err); + } else { + vma = xe_vma_create(vm, NULL, op->gem.offset, + op->va.addr, op->va.addr + + op->va.range - 1, attr, flags); + if (IS_ERR(vma)) + return vma; + + if (xe_vma_is_userptr(vma)) + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); } - vma = xe_vma_create(vm, bo, op->gem.offset, - op->va.addr, op->va.addr + - op->va.range - 1, pat_index, flags); - if (IS_ERR(vma)) - goto err_unlock; - - if (xe_vma_is_userptr(vma)) - err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); - else if (!xe_vma_has_no_bo(vma) && !bo->vm) - err = add_preempt_fences(vm, bo); - -err_unlock: - if (bo) - drm_exec_fini(&exec); - if (err) { prep_vma_destroy(vm, vma, false); xe_vma_destroy_unlocked(vma); @@ -2448,6 +2558,29 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) return err; } +/** + * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes + * @vma: Pointer to the xe_vma structure to check + * + * This function determines whether the given VMA (Virtual Memory Area) + * has its memory attributes set to their default values. Specifically, + * it checks the following conditions: + * + * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED` + * - `pat_index` is equal to `default_pat_index` + * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE` + * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES` + * + * Return: true if all attributes are at their default values, false otherwise. + */ +bool xe_vma_has_default_mem_attrs(struct xe_vma *vma) +{ + return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED && + vma->attr.pat_index == vma->attr.default_pat_index && + vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && + vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES); +} + static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, struct xe_vma_ops *vops) { @@ -2474,26 +2607,29 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, switch (op->base.op) { case DRM_GPUVA_OP_MAP: { - flags |= op->map.read_only ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->map.is_null ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->map.dumpable ? - VMA_CREATE_FLAG_DUMPABLE : 0; - flags |= op->map.is_cpu_addr_mirror ? - VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; - - vma = new_vma(vm, &op->base.map, op->map.pat_index, + struct xe_vma_mem_attr default_attr = { + .preferred_loc = { + .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, + .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, + }, + .atomic_access = DRM_XE_ATOMIC_UNDEFINED, + .default_pat_index = op->map.pat_index, + .pat_index = op->map.pat_index, + }; + + flags |= op->map.vma_flags & XE_VMA_CREATE_MASK; + + vma = new_vma(vm, &op->base.map, &default_attr, flags); if (IS_ERR(vma)) return PTR_ERR(vma); op->map.vma = vma; if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && - !op->map.is_cpu_addr_mirror) || + !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) || op->map.invalidate_on_bind) xe_vma_ops_incr_pt_update_ops(vops, - op->tile_mask); + op->tile_mask, 1); break; } case DRM_GPUVA_OP_REMAP: @@ -2502,6 +2638,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, gpuva_to_vma(op->base.remap.unmap->va); bool skip = xe_vma_is_cpu_addr_mirror(old); u64 start = xe_vma_start(old), end = xe_vma_end(old); + int num_remap_ops = 0; if (op->base.remap.prev) start = op->base.remap.prev->va.addr + @@ -2510,27 +2647,20 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, end = op->base.remap.next->va.addr; if (xe_vma_is_cpu_addr_mirror(old) && - xe_svm_has_mapping(vm, start, end)) - return -EBUSY; + xe_svm_has_mapping(vm, start, end)) { + if (vops->flags & XE_VMA_OPS_FLAG_MADVISE) + xe_svm_unmap_address_range(vm, start, end); + else + return -EBUSY; + } op->remap.start = xe_vma_start(old); op->remap.range = xe_vma_size(old); - flags |= op->base.remap.unmap->va->flags & - XE_VMA_READ_ONLY ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->base.remap.unmap->va->flags & - DRM_GPUVA_SPARSE ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->base.remap.unmap->va->flags & - XE_VMA_DUMPABLE ? - VMA_CREATE_FLAG_DUMPABLE : 0; - flags |= xe_vma_is_cpu_addr_mirror(old) ? - VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; - + flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; if (op->base.remap.prev) { vma = new_vma(vm, op->base.remap.prev, - old->pat_index, flags); + &old->attr, flags); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2554,13 +2684,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, (ULL)op->remap.start, (ULL)op->remap.range); } else { - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + num_remap_ops++; } } if (op->base.remap.next) { vma = new_vma(vm, op->base.remap.next, - old->pat_index, flags); + &old->attr, flags); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2583,11 +2713,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, (ULL)op->remap.start, (ULL)op->remap.range); } else { - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + num_remap_ops++; } } if (!skip) - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + num_remap_ops++; + + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); break; } case DRM_GPUVA_OP_UNMAP: @@ -2599,7 +2731,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, return -EBUSY; if (!xe_vma_is_cpu_addr_mirror(vma)) - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); break; case DRM_GPUVA_OP_PREFETCH: vma = gpuva_to_vma(op->base.prefetch.va); @@ -2610,8 +2742,12 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, return err; } - if (!xe_vma_is_cpu_addr_mirror(vma)) - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + if (xe_vma_is_cpu_addr_mirror(vma)) + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, + op->prefetch_range.ranges_count); + else + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); + break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); @@ -2643,9 +2779,9 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); if (vma) { - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); vma->gpuva.flags &= ~XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); if (post_commit) xe_vm_insert_vma(vm, vma); } @@ -2664,9 +2800,9 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, xe_vma_destroy_unlocked(op->remap.next); } if (vma) { - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); vma->gpuva.flags &= ~XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); if (post_commit) xe_vm_insert_vma(vm, vma); } @@ -2705,7 +2841,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, } static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, - bool validate) + bool res_evict, bool validate) { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); @@ -2716,7 +2852,8 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, err = drm_exec_lock_obj(exec, &bo->ttm.base); if (!err && validate) err = xe_bo_validate(bo, vm, - !xe_vm_in_preempt_fence_mode(vm)); + !xe_vm_in_preempt_fence_mode(vm) && + res_evict, exec); } return err; @@ -2737,15 +2874,72 @@ static int check_ufence(struct xe_vma *vma) return 0; } +static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) +{ + bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + struct xe_tile *tile = op->prefetch_range.tile; + int err = 0; + + struct xe_svm_range *svm_range; + struct drm_gpusvm_ctx ctx = {}; + unsigned long i; + + if (!xe_vma_is_cpu_addr_mirror(vma)) + return 0; + + ctx.read_only = xe_vma_read_only(vma); + ctx.devmem_possible = devmem_possible; + ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; + ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe); + + /* TODO: Threading the migration */ + xa_for_each(&op->prefetch_range.range, i, svm_range) { + if (!tile) + xe_svm_range_migrate_to_smem(vm, svm_range); + + if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) { + err = xe_svm_alloc_vram(tile, svm_range, &ctx); + if (err) { + drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); + return -ENODATA; + } + xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); + } + + err = xe_svm_range_get_pages(vm, svm_range, &ctx); + if (err) { + drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); + if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) + err = -ENODATA; + return err; + } + xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); + } + + return err; +} + static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, - struct xe_vma_op *op) + struct xe_vma_ops *vops, struct xe_vma_op *op) { int err = 0; + bool res_evict; + + /* + * We only allow evicting a BO within the VM if it is not part of an + * array of binds, as an array of binds can evict another BO within the + * bind. + */ + res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS); switch (op->base.op) { case DRM_GPUVA_OP_MAP: if (!op->map.invalidate_on_bind) err = vma_lock_and_validate(exec, op->map.vma, + res_evict, !xe_vm_in_fault_mode(vm) || op->map.immediate); break; @@ -2756,11 +2950,13 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.remap.unmap->va), - false); + res_evict, false); if (!err && op->remap.prev) - err = vma_lock_and_validate(exec, op->remap.prev, true); + err = vma_lock_and_validate(exec, op->remap.prev, + res_evict, true); if (!err && op->remap.next) - err = vma_lock_and_validate(exec, op->remap.next, true); + err = vma_lock_and_validate(exec, op->remap.next, + res_evict, true); break; case DRM_GPUVA_OP_UNMAP: err = check_ufence(gpuva_to_vma(op->base.unmap.va)); @@ -2769,21 +2965,27 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.unmap.va), - false); + res_evict, false); break; case DRM_GPUVA_OP_PREFETCH: { struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); - u32 region = op->prefetch.region; + u32 region; - xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); + if (!xe_vma_is_cpu_addr_mirror(vma)) { + region = op->prefetch.region; + xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC || + region <= ARRAY_SIZE(region_to_mem_type)); + } err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.prefetch.va), - false); + res_evict, false); if (!err && !xe_vma_has_no_bo(vma)) err = xe_bo_migrate(xe_vma_bo(vma), - region_to_mem_type[region]); + region_to_mem_type[region], + NULL, + exec); break; } default: @@ -2793,6 +2995,25 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, return err; } +static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) +{ + struct xe_vma_op *op; + int err; + + if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) + return 0; + + list_for_each_entry(op, &vops->list, link) { + if (op->base.op == DRM_GPUVA_OP_PREFETCH) { + err = prefetch_ranges(vm, op); + if (err) + return err; + } + } + + return 0; +} + static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, struct xe_vma_ops *vops) @@ -2805,7 +3026,7 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, return err; list_for_each_entry(op, &vops->list, link) { - err = op_lock_and_prep(exec, vm, op); + err = op_lock_and_prep(exec, vm, vops, op); if (err) return err; } @@ -2886,20 +3107,31 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, struct dma_fence *fence = NULL; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; - int number_tiles = 0, current_fence = 0, err; + int number_tiles = 0, current_fence = 0, n_fence = 0, err; u8 id; number_tiles = vm_ops_setup_tile_args(vm, vops); if (number_tiles == 0) return ERR_PTR(-ENODATA); - if (number_tiles > 1) { - fences = kmalloc_array(number_tiles, sizeof(*fences), - GFP_KERNEL); - if (!fences) { - fence = ERR_PTR(-ENOMEM); - goto err_trace; - } + if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) { + for_each_tile(tile, vm->xe, id) + ++n_fence; + } else { + for_each_tile(tile, vm->xe, id) + n_fence += (1 + XE_MAX_GT_PER_TILE); + } + + fences = kmalloc_array(n_fence, sizeof(*fences), GFP_KERNEL); + if (!fences) { + fence = ERR_PTR(-ENOMEM); + goto err_trace; + } + + cf = dma_fence_array_alloc(n_fence); + if (!cf) { + fence = ERR_PTR(-ENOMEM); + goto err_out; } for_each_tile(tile, vm->xe, id) { @@ -2916,30 +3148,34 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, trace_xe_vm_ops_execute(vops); for_each_tile(tile, vm->xe, id) { + struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q; + int i; + + fence = NULL; if (!vops->pt_update_ops[id].num_ops) - continue; + goto collect_fences; fence = xe_pt_update_ops_run(tile, vops); if (IS_ERR(fence)) goto err_out; - if (fences) - fences[current_fence++] = fence; - } +collect_fences: + fences[current_fence++] = fence ?: dma_fence_get_stub(); + if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) + continue; - if (fences) { - cf = dma_fence_array_create(number_tiles, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - if (!cf) { - --vm->composite_fence_seqno; - fence = ERR_PTR(-ENOMEM); - goto err_out; - } - fence = &cf->base; + xe_migrate_job_lock(tile->migrate, q); + for_each_tlb_inval(i) + fences[current_fence++] = + xe_exec_queue_tlb_inval_last_fence_get(q, vm, i); + xe_migrate_job_unlock(tile->migrate, q); } + xe_assert(vm->xe, current_fence == n_fence); + dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1), + 1, false); + fence = &cf->base; + for_each_tile(tile, vm->xe, id) { if (!vops->pt_update_ops[id].num_ops) continue; @@ -2999,7 +3235,6 @@ static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, struct dma_fence *fence) { - struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); struct xe_user_fence *ufence; struct xe_vma_op *op; int i; @@ -3020,42 +3255,43 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, if (fence) { for (i = 0; i < vops->num_syncs; i++) xe_sync_entry_signal(vops->syncs + i, fence); - xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); } } static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, struct xe_vma_ops *vops) { + struct xe_validation_ctx ctx; struct drm_exec exec; struct dma_fence *fence; - int err; + int err = 0; lockdep_assert_held_write(&vm->lock); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES, 0); - drm_exec_until_all_locked(&exec) { + xe_validation_guard(&ctx, &vm->xe->val, &exec, + ((struct xe_val_flags) { + .interruptible = true, + .exec_ignore_duplicates = true, + }), err) { err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); drm_exec_retry_on_contention(&exec); - if (err) { - fence = ERR_PTR(err); - goto unlock; - } + xe_validation_retry_on_oom(&ctx, &err); + if (err) + return ERR_PTR(err); + xe_vm_set_validation_exec(vm, &exec); fence = ops_execute(vm, vops); + xe_vm_set_validation_exec(vm, NULL); if (IS_ERR(fence)) { if (PTR_ERR(fence) == -ENODATA) vm_bind_ioctl_ops_fini(vm, vops, NULL); - goto unlock; + return fence; } vm_bind_ioctl_ops_fini(vm, vops, fence); } -unlock: - drm_exec_fini(&exec); - return fence; + return err ? ERR_PTR(err) : fence; } ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); @@ -3065,7 +3301,8 @@ ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); DRM_XE_VM_BIND_FLAG_NULL | \ DRM_XE_VM_BIND_FLAG_DUMPABLE | \ DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ - DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) + DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \ + DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) #ifdef TEST_VM_OPS_ERROR #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) @@ -3090,6 +3327,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; + if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS)) + return -EINVAL; + if (args->num_binds > 1) { u64 __user *bind_user = u64_to_user_ptr(args->vector_of_binds); @@ -3171,14 +3411,20 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || + XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && + !IS_ENABLED(CONFIG_DRM_GPUSVM)) || XE_IOCTL_DBG(xe, obj && op == DRM_XE_VM_BIND_OP_PREFETCH) || XE_IOCTL_DBG(xe, prefetch_region && op != DRM_XE_VM_BIND_OP_PREFETCH) || - XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & - xe->info.mem_region_mask)) || + XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && + /* Guard against undefined shift in BIT(prefetch_region) */ + (prefetch_region >= (sizeof(xe->info.mem_region_mask) * 8) || + !(BIT(prefetch_region) & xe->info.mem_region_mask)))) || XE_IOCTL_DBG(xe, obj && - op == DRM_XE_VM_BIND_OP_UNMAP)) { + op == DRM_XE_VM_BIND_OP_UNMAP) || + XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) && + (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) { err = -EINVAL; goto free_bind_ops; } @@ -3198,6 +3444,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, free_bind_ops: if (args->num_binds > 1) kvfree(*bind_ops); + *bind_ops = NULL; return err; } @@ -3206,19 +3453,19 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, struct xe_sync_entry *syncs, int num_syncs) { - struct dma_fence *fence; + struct dma_fence *fence = NULL; int i, err = 0; - fence = xe_sync_in_fence_get(syncs, num_syncs, - to_wait_exec_queue(vm, q), vm); - if (IS_ERR(fence)) - return PTR_ERR(fence); + if (num_syncs) { + fence = xe_sync_in_fence_get(syncs, num_syncs, + to_wait_exec_queue(vm, q), vm); + if (IS_ERR(fence)) + return PTR_ERR(fence); - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], fence); + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], fence); + } - xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, - fence); dma_fence_put(fence); return err; @@ -3234,6 +3481,7 @@ static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, vops->q = q; vops->syncs = syncs; vops->num_syncs = num_syncs; + vops->flags = 0; } static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, @@ -3242,9 +3490,9 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, { u16 coh_mode; - if (XE_IOCTL_DBG(xe, range > bo->size) || + if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || XE_IOCTL_DBG(xe, obj_offset > - bo->size - range)) { + xe_bo_size(bo) - range)) { return -EINVAL; } @@ -3303,7 +3551,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct xe_exec_queue *q = NULL; u32 num_syncs, num_ufence = 0; struct xe_sync_entry *syncs = NULL; - struct drm_xe_vm_bind_op *bind_ops; + struct drm_xe_vm_bind_op *bind_ops = NULL; struct xe_vma_ops vops; struct dma_fence *fence; int err; @@ -3321,7 +3569,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) q = xe_exec_queue_lookup(xef, args->exec_queue_id); if (XE_IOCTL_DBG(xe, !q)) { err = -ENOENT; - goto put_vm; + goto free_bind_ops; } if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { @@ -3367,7 +3615,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!ops) { err = -ENOMEM; - goto release_vm_lock; + goto free_bos; } } @@ -3408,8 +3656,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) syncs_user = u64_to_user_ptr(args->syncs); for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { + struct xe_exec_queue *__q = q ?: vm->q[0]; + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], &syncs_user[num_syncs], + __q->ufence_syncobj, + ++__q->ufence_timeline_value, (xe_vm_in_lr_mode(vm) ? SYNC_PARSE_FLAG_LR_MODE : 0) | (!args->num_binds ? @@ -3432,6 +3684,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); + if (args->num_binds > 1) + vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS; for (i = 0; i < args->num_binds; ++i) { u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; @@ -3441,7 +3695,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; u16 pat_index = bind_ops[i].pat_index; - ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, + ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, addr, range, op, flags, prefetch_region, pat_index); if (IS_ERR(ops[i])) { @@ -3474,6 +3728,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (err) goto unwind_ops; + err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); + if (err) + goto unwind_ops; + fence = vm_bind_ioctl_ops_execute(vm, &vops); if (IS_ERR(fence)) err = PTR_ERR(fence); @@ -3497,17 +3755,20 @@ free_syncs: put_obj: for (i = 0; i < args->num_binds; ++i) xe_bo_put(bos[i]); + + kvfree(ops); +free_bos: + kvfree(bos); release_vm_lock: up_write(&vm->lock); put_exec_queue: if (q) xe_exec_queue_put(q); -put_vm: - xe_vm_put(vm); - kvfree(bos); - kvfree(ops); +free_bind_ops: if (args->num_binds > 1) kvfree(bind_ops); +put_vm: + xe_vm_put(vm); return err; } @@ -3543,7 +3804,7 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, xe_vma_ops_init(&vops, vm, q, NULL, 0); - ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size, + ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), DRM_XE_VM_BIND_OP_MAP, 0, 0, vm->xe->pat.idx[cache_lvl]); if (IS_ERR(ops)) { @@ -3597,10 +3858,14 @@ release_vm_lock: */ int xe_vm_lock(struct xe_vm *vm, bool intr) { + int ret; + if (intr) - return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); + ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); + else + ret = dma_resv_lock(xe_vm_resv(vm), NULL); - return dma_resv_lock(xe_vm_resv(vm), NULL); + return ret; } /** @@ -3615,6 +3880,66 @@ void xe_vm_unlock(struct xe_vm *vm) } /** + * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an + * address range + * @vm: The VM + * @start: start address + * @end: end address + * @tile_mask: mask for which gt's issue tlb invalidation + * + * Issue a range based TLB invalidation for gt's in tilemask + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, + u64 end, u8 tile_mask) +{ + struct xe_tlb_inval_fence + fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; + struct xe_tile *tile; + u32 fence_id = 0; + u8 id; + int err; + + if (!tile_mask) + return 0; + + for_each_tile(tile, vm->xe, id) { + if (!(tile_mask & BIT(id))) + continue; + + xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval, + &fence[fence_id], true); + + err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval, + &fence[fence_id], start, end, + vm->usm.asid); + if (err) + goto wait; + ++fence_id; + + if (!tile->media_gt) + continue; + + xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval, + &fence[fence_id], true); + + err = xe_tlb_inval_range(&tile->media_gt->tlb_inval, + &fence[fence_id], start, end, + vm->usm.asid); + if (err) + goto wait; + ++fence_id; + } + +wait: + for (id = 0; id < fence_id; ++id) + xe_tlb_inval_fence_wait(&fence[id]); + + return err; +} + +/** * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock * @vma: VMA to invalidate * @@ -3627,28 +3952,34 @@ void xe_vm_unlock(struct xe_vm *vm) int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; + struct xe_vm *vm = xe_vma_vm(vma); struct xe_tile *tile; - struct xe_gt_tlb_invalidation_fence - fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; - u8 id; - u32 fence_id = 0; + u8 tile_mask = 0; int ret = 0; + u8 id; xe_assert(xe, !xe_vma_is_null(vma)); xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); trace_xe_vma_invalidate(vma); - vm_dbg(&xe_vma_vm(vma)->xe->drm, + vm_dbg(&vm->xe->drm, "INVALIDATE: addr=0x%016llx, range=0x%016llx", xe_vma_start(vma), xe_vma_size(vma)); - /* Check that we don't race with page-table updates */ + /* + * Check that we don't race with page-table updates, tile_invalidated + * update is safe + */ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { if (xe_vma_is_userptr(vma)) { + lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || + (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && + lockdep_is_held(&xe_vm_resv(vm)->lock.base))); + WARN_ON_ONCE(!mmu_interval_check_retry (&to_userptr_vma(vma)->userptr.notifier, - to_userptr_vma(vma)->userptr.notifier_seq)); - WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), + to_userptr_vma(vma)->userptr.pages.notifier_seq)); + WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP)); } else { @@ -3656,39 +3987,17 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) } } - for_each_tile(tile, xe, id) { - if (xe_pt_zap_ptes(tile, vma)) { - xe_device_wmb(xe); - xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[fence_id], - true); - - ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, - &fence[fence_id], vma); - if (ret) - goto wait; - ++fence_id; - - if (!tile->media_gt) - continue; - - xe_gt_tlb_invalidation_fence_init(tile->media_gt, - &fence[fence_id], - true); + for_each_tile(tile, xe, id) + if (xe_pt_zap_ptes(tile, vma)) + tile_mask |= BIT(id); - ret = xe_gt_tlb_invalidation_vma(tile->media_gt, - &fence[fence_id], vma); - if (ret) - goto wait; - ++fence_id; - } - } + xe_device_wmb(xe); -wait: - for (id = 0; id < fence_id; ++id) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); + ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma), + xe_vma_end(vma), tile_mask); - vma->tile_invalidated = vma->tile_mask; + /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ + WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); return ret; } @@ -3887,3 +4196,221 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) } kvfree(snap); } + +/** + * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations + * @xe: Pointer to the Xe device structure + * @vma: Pointer to the virtual memory area (VMA) structure + * @is_atomic: In pagefault path and atomic operation + * + * This function determines whether the given VMA needs to be migrated to + * VRAM in order to do atomic GPU operation. + * + * Return: + * 1 - Migration to VRAM is required + * 0 - Migration is not required + * -EACCES - Invalid access for atomic memory attr + * + */ +int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) +{ + u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : + vma->attr.atomic_access; + + if (!IS_DGFX(xe) || !is_atomic) + return false; + + /* + * NOTE: The checks implemented here are platform-specific. For + * instance, on a device supporting CXL atomics, these would ideally + * work universally without additional handling. + */ + switch (atomic_access) { + case DRM_XE_ATOMIC_DEVICE: + return !xe->info.has_device_atomics_on_smem; + + case DRM_XE_ATOMIC_CPU: + return -EACCES; + + case DRM_XE_ATOMIC_UNDEFINED: + case DRM_XE_ATOMIC_GLOBAL: + default: + return 1; + } +} + +static int xe_vm_alloc_vma(struct xe_vm *vm, + struct drm_gpuvm_map_req *map_req, + bool is_madvise) +{ + struct xe_vma_ops vops; + struct drm_gpuva_ops *ops = NULL; + struct drm_gpuva_op *__op; + unsigned int vma_flags = 0; + bool remap_op = false; + struct xe_vma_mem_attr tmp_attr; + u16 default_pat; + int err; + + lockdep_assert_held_write(&vm->lock); + + if (is_madvise) + ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); + else + ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); + + if (IS_ERR(ops)) + return PTR_ERR(ops); + + if (list_empty(&ops->list)) { + err = 0; + goto free_ops; + } + + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + struct xe_vma *vma = NULL; + + if (!is_madvise) { + if (__op->op == DRM_GPUVA_OP_UNMAP) { + vma = gpuva_to_vma(op->base.unmap.va); + XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); + default_pat = vma->attr.default_pat_index; + vma_flags = vma->gpuva.flags; + } + + if (__op->op == DRM_GPUVA_OP_REMAP) { + vma = gpuva_to_vma(op->base.remap.unmap->va); + default_pat = vma->attr.default_pat_index; + vma_flags = vma->gpuva.flags; + } + + if (__op->op == DRM_GPUVA_OP_MAP) { + op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; + op->map.pat_index = default_pat; + } + } else { + if (__op->op == DRM_GPUVA_OP_REMAP) { + vma = gpuva_to_vma(op->base.remap.unmap->va); + xe_assert(vm->xe, !remap_op); + xe_assert(vm->xe, xe_vma_has_no_bo(vma)); + remap_op = true; + vma_flags = vma->gpuva.flags; + } + + if (__op->op == DRM_GPUVA_OP_MAP) { + xe_assert(vm->xe, remap_op); + remap_op = false; + /* + * In case of madvise ops DRM_GPUVA_OP_MAP is + * always after DRM_GPUVA_OP_REMAP, so ensure + * to propagate the flags from the vma we're + * unmapping. + */ + op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; + } + } + print_op(vm->xe, __op); + } + + xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + + if (is_madvise) + vops.flags |= XE_VMA_OPS_FLAG_MADVISE; + + err = vm_bind_ioctl_ops_parse(vm, ops, &vops); + if (err) + goto unwind_ops; + + xe_vm_lock(vm, false); + + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + struct xe_vma *vma; + + if (__op->op == DRM_GPUVA_OP_UNMAP) { + vma = gpuva_to_vma(op->base.unmap.va); + /* There should be no unmap for madvise */ + if (is_madvise) + XE_WARN_ON("UNEXPECTED UNMAP"); + + xe_vma_destroy(vma, NULL); + } else if (__op->op == DRM_GPUVA_OP_REMAP) { + vma = gpuva_to_vma(op->base.remap.unmap->va); + /* In case of madvise ops Store attributes for REMAP UNMAPPED + * VMA, so they can be assigned to newly MAP created vma. + */ + if (is_madvise) + tmp_attr = vma->attr; + + xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); + } else if (__op->op == DRM_GPUVA_OP_MAP) { + vma = op->map.vma; + /* In case of madvise call, MAP will always be followed by REMAP. + * Therefore temp_attr will always have sane values, making it safe to + * copy them to new vma. + */ + if (is_madvise) + vma->attr = tmp_attr; + } + } + + xe_vm_unlock(vm); + drm_gpuva_ops_free(&vm->gpuvm, ops); + return 0; + +unwind_ops: + vm_bind_ioctl_ops_unwind(vm, &ops, 1); +free_ops: + drm_gpuva_ops_free(&vm->gpuvm, ops); + return err; +} + +/** + * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops + * @vm: Pointer to the xe_vm structure + * @start: Starting input address + * @range: Size of the input range + * + * This function splits existing vma to create new vma for user provided input range + * + * Return: 0 if success + */ +int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) +{ + struct drm_gpuvm_map_req map_req = { + .map.va.addr = start, + .map.va.range = range, + }; + + lockdep_assert_held_write(&vm->lock); + + vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); + + return xe_vm_alloc_vma(vm, &map_req, true); +} + +/** + * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma + * @vm: Pointer to the xe_vm structure + * @start: Starting input address + * @range: Size of the input range + * + * This function splits/merges existing vma to create new vma for user provided input range + * + * Return: 0 if success + */ +int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) +{ + struct drm_gpuvm_map_req map_req = { + .map.va.addr = start, + .map.va.range = range, + }; + + lockdep_assert_held_write(&vm->lock); + + vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", + start, range); + + return xe_vm_alloc_vma(vm, &map_req, false); +} diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 0ef811fc2bde..ef8a5019574e 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -26,7 +26,7 @@ struct xe_sync_entry; struct xe_svm_range; struct drm_exec; -struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags); +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef); struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id); int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node); @@ -66,6 +66,8 @@ static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm) struct xe_vma * xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range); +bool xe_vma_has_default_mem_attrs(struct xe_vma *vma); + /** * xe_vm_has_scratch() - Whether the vm is configured for scratch PTEs * @vm: The vm @@ -169,6 +171,14 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma) !xe_vma_is_cpu_addr_mirror(vma); } +struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr); + +int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic); + +int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t addr, uint64_t size); + +int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t addr, uint64_t size); + /** * to_userptr_vma() - Return a pointer to an embedding userptr vma * @vma: Pointer to the embedded struct xe_vma @@ -189,7 +199,7 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file); - +int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file); void xe_vm_close_and_put(struct xe_vm *vm); static inline bool xe_vm_in_fault_mode(struct xe_vm *vm) @@ -210,12 +220,6 @@ static inline bool xe_vm_in_preempt_fence_mode(struct xe_vm *vm) int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); -int xe_vm_userptr_pin(struct xe_vm *vm); - -int __xe_vm_userptr_needs_repin(struct xe_vm *vm); - -int xe_vm_userptr_check_repin(struct xe_vm *vm); - int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask); @@ -226,6 +230,9 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, struct xe_svm_range *range); +int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, + u64 end, u8 tile_mask); + int xe_vm_invalidate_vma(struct xe_vma *vma); int xe_vm_validate_protected(struct xe_vm *vm); @@ -253,12 +260,6 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm) } } -int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma); - -int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma); - -bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end); - int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma); int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, @@ -268,6 +269,8 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, struct xe_exec_queue *q, u64 addr, enum xe_cache_level cache_lvl); +void xe_vm_resume_rebind_worker(struct xe_vm *vm); + /** * xe_vm_resv() - Return's the vm's reservation object * @vm: The vm @@ -287,6 +290,8 @@ void xe_vm_kill(struct xe_vm *vm, bool unlocked); */ #define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm)) +int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec); + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) #define vm_dbg drm_dbg #else @@ -301,11 +306,109 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); -#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) -void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); -#else -static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +/** + * xe_vm_set_validating() - Register this task as currently making bos resident + * @allow_res_evict: Allow eviction of buffer objects bound to @vm when + * validating. + * @vm: Pointer to the vm or NULL. + * + * Register this task as currently making bos resident for the vm. Intended + * to avoid eviction by the same task of shared bos bound to the vm. + * Call with the vm's resv lock held. + */ +static inline void xe_vm_set_validating(struct xe_vm *vm, bool allow_res_evict) { + if (vm && !allow_res_evict) { + xe_vm_assert_held(vm); + /* Pairs with READ_ONCE in xe_vm_is_validating() */ + WRITE_ONCE(vm->validation.validating, current); + } } -#endif + +/** + * xe_vm_clear_validating() - Unregister this task as currently making bos resident + * @vm: Pointer to the vm or NULL + * @allow_res_evict: Eviction from @vm was allowed. Must be set to the same + * value as for xe_vm_set_validation(). + * + * Register this task as currently making bos resident for the vm. Intended + * to avoid eviction by the same task of shared bos bound to the vm. + * Call with the vm's resv lock held. + */ +static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict) +{ + if (vm && !allow_res_evict) { + /* Pairs with READ_ONCE in xe_vm_is_validating() */ + WRITE_ONCE(vm->validation.validating, NULL); + } +} + +/** + * xe_vm_is_validating() - Whether bos bound to the vm are currently being made resident + * by the current task. + * @vm: Pointer to the vm. + * + * If this function returns %true, we should be in a vm resv locked region, since + * the current process is the same task that called xe_vm_set_validating(). + * The function asserts that that's indeed the case. + * + * Return: %true if the task is currently making bos resident, %false otherwise. + */ +static inline bool xe_vm_is_validating(struct xe_vm *vm) +{ + /* Pairs with WRITE_ONCE in xe_vm_is_validating() */ + if (READ_ONCE(vm->validation.validating) == current) { + xe_vm_assert_held(vm); + return true; + } + return false; +} + +/** + * xe_vm_set_validation_exec() - Accessor to set the drm_exec object + * @vm: The vm we want to register a drm_exec object with. + * @exec: The exec object we want to register. + * + * Set the drm_exec object used to lock the vm's resv. + */ +static inline void xe_vm_set_validation_exec(struct xe_vm *vm, struct drm_exec *exec) +{ + xe_vm_assert_held(vm); + xe_assert(vm->xe, !!exec ^ !!vm->validation._exec); + vm->validation._exec = exec; +} + +/** + * xe_vm_set_validation_exec() - Accessor to read the drm_exec object + * @vm: The vm we want to register a drm_exec object with. + * + * Return: The drm_exec object used to lock the vm's resv. The value + * is a valid pointer, %NULL, or one of the special values defined in + * xe_validation.h. + */ +static inline struct drm_exec *xe_vm_validation_exec(struct xe_vm *vm) +{ + xe_vm_assert_held(vm); + return vm->validation._exec; +} + +/** + * xe_vm_has_valid_gpu_mapping() - Advisory helper to check if VMA or SVM range has + * a valid GPU mapping + * @tile: The tile which the GPU mapping belongs to + * @tile_present: Tile present mask + * @tile_invalidated: Tile invalidated mask + * + * The READ_ONCEs pair with WRITE_ONCEs in either the TLB invalidation paths + * (xe_vm.c, xe_svm.c) or the binding paths (xe_pt.c). These are not reliable + * without the notifier lock in userptr or SVM cases, and not reliable without + * the BO dma-resv lock in the BO case. As such, they should only be used in + * opportunistic cases (e.g., skipping a page fault fix or not skipping a TLB + * invalidation) where it is harmless. + * + * Return: True is there are valid GPU pages, False otherwise + */ +#define xe_vm_has_valid_gpu_mapping(tile, tile_present, tile_invalidated) \ + ((READ_ONCE(tile_present) & ~READ_ONCE(tile_invalidated)) & BIT((tile)->id)) + #endif diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h index 1030ce214032..02e5288373c9 100644 --- a/drivers/gpu/drm/xe/xe_vm_doc.h +++ b/drivers/gpu/drm/xe/xe_vm_doc.h @@ -7,7 +7,7 @@ #define _XE_VM_DOC_H_ /** - * DOC: XE VM (user address space) + * DOC: Xe VM (user address space) * * VM creation * =========== @@ -202,13 +202,13 @@ * User pointers are user allocated memory (malloc'd, mmap'd, etc..) for which the * user wants to create a GPU mapping. Typically in other DRM drivers a dummy BO * was created and then a binding was created. We bypass creating a dummy BO in - * XE and simply create a binding directly from the userptr. + * Xe and simply create a binding directly from the userptr. * * Invalidation * ------------ * * Since this a core kernel managed memory the kernel can move this memory - * whenever it wants. We register an invalidation MMU notifier to alert XE when + * whenever it wants. We register an invalidation MMU notifier to alert Xe when * a user pointer is about to move. The invalidation notifier needs to block * until all pending users (jobs or compute mode engines) of the userptr are * idle to ensure no faults. This done by waiting on all of VM's dma-resv slots. @@ -419,7 +419,7 @@ * ======= * * VM locking protects all of the core data paths (bind operations, execs, - * evictions, and compute mode rebind worker) in XE. + * evictions, and compute mode rebind worker) in Xe. * * Locks * ----- diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c new file mode 100644 index 000000000000..cad3cf627c3f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -0,0 +1,431 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_vm_madvise.h" + +#include <linux/nospec.h> +#include <drm/xe_drm.h> + +#include "xe_bo.h" +#include "xe_pat.h" +#include "xe_pt.h" +#include "xe_svm.h" + +struct xe_vmas_in_madvise_range { + u64 addr; + u64 range; + struct xe_vma **vmas; + int num_vmas; + bool has_bo_vmas; + bool has_svm_userptr_vmas; +}; + +static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range) +{ + u64 addr = madvise_range->addr; + u64 range = madvise_range->range; + + struct xe_vma **__vmas; + struct drm_gpuva *gpuva; + int max_vmas = 8; + + lockdep_assert_held(&vm->lock); + + madvise_range->num_vmas = 0; + madvise_range->vmas = kmalloc_array(max_vmas, sizeof(*madvise_range->vmas), GFP_KERNEL); + if (!madvise_range->vmas) + return -ENOMEM; + + vm_dbg(&vm->xe->drm, "VMA's in range: start=0x%016llx, end=0x%016llx", addr, addr + range); + + drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + + if (xe_vma_bo(vma)) + madvise_range->has_bo_vmas = true; + else if (xe_vma_is_cpu_addr_mirror(vma) || xe_vma_is_userptr(vma)) + madvise_range->has_svm_userptr_vmas = true; + + if (madvise_range->num_vmas == max_vmas) { + max_vmas <<= 1; + __vmas = krealloc(madvise_range->vmas, + max_vmas * sizeof(*madvise_range->vmas), + GFP_KERNEL); + if (!__vmas) { + kfree(madvise_range->vmas); + return -ENOMEM; + } + madvise_range->vmas = __vmas; + } + + madvise_range->vmas[madvise_range->num_vmas] = vma; + (madvise_range->num_vmas)++; + } + + if (!madvise_range->num_vmas) + kfree(madvise_range->vmas); + + vm_dbg(&vm->xe->drm, "madvise_range-num_vmas = %d\n", madvise_range->num_vmas); + + return 0; +} + +static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op) +{ + int i; + + xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC); + + for (i = 0; i < num_vmas; i++) { + /*TODO: Extend attributes to bo based vmas */ + if ((vmas[i]->attr.preferred_loc.devmem_fd == op->preferred_mem_loc.devmem_fd && + vmas[i]->attr.preferred_loc.migration_policy == + op->preferred_mem_loc.migration_policy) || + !xe_vma_is_cpu_addr_mirror(vmas[i])) { + vmas[i]->skip_invalidation = true; + } else { + vmas[i]->skip_invalidation = false; + vmas[i]->attr.preferred_loc.devmem_fd = op->preferred_mem_loc.devmem_fd; + /* Till multi-device support is not added migration_policy + * is of no use and can be ignored. + */ + vmas[i]->attr.preferred_loc.migration_policy = + op->preferred_mem_loc.migration_policy; + } + } +} + +static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op) +{ + struct xe_bo *bo; + int i; + + xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC); + xe_assert(vm->xe, op->atomic.val <= DRM_XE_ATOMIC_CPU); + + for (i = 0; i < num_vmas; i++) { + if (xe_vma_is_userptr(vmas[i]) && + !(op->atomic.val == DRM_XE_ATOMIC_DEVICE && + xe->info.has_device_atomics_on_smem)) { + vmas[i]->skip_invalidation = true; + continue; + } + + if (vmas[i]->attr.atomic_access == op->atomic.val) { + vmas[i]->skip_invalidation = true; + } else { + vmas[i]->skip_invalidation = false; + vmas[i]->attr.atomic_access = op->atomic.val; + } + + bo = xe_vma_bo(vmas[i]); + if (!bo || bo->attr.atomic_access == op->atomic.val) + continue; + + vmas[i]->skip_invalidation = false; + xe_bo_assert_held(bo); + bo->attr.atomic_access = op->atomic.val; + + /* Invalidate cpu page table, so bo can migrate to smem in next access */ + if (xe_bo_is_vram(bo) && + (bo->attr.atomic_access == DRM_XE_ATOMIC_CPU || + bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL)) + ttm_bo_unmap_virtual(&bo->ttm); + } +} + +static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op) +{ + int i; + + xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PAT); + + for (i = 0; i < num_vmas; i++) { + if (vmas[i]->attr.pat_index == op->pat_index.val) { + vmas[i]->skip_invalidation = true; + } else { + vmas[i]->skip_invalidation = false; + vmas[i]->attr.pat_index = op->pat_index.val; + } + } +} + +typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op); + +static const madvise_func madvise_funcs[] = { + [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc, + [DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic, + [DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index, +}; + +static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end) +{ + struct drm_gpuva *gpuva; + struct xe_tile *tile; + u8 id, tile_mask = 0; + + lockdep_assert_held_write(&vm->lock); + + /* Wait for pending binds */ + if (dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT) <= 0) + XE_WARN_ON(1); + + drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + + if (vma->skip_invalidation || xe_vma_is_null(vma)) + continue; + + if (xe_vma_is_cpu_addr_mirror(vma)) { + tile_mask |= xe_svm_ranges_zap_ptes_in_range(vm, + xe_vma_start(vma), + xe_vma_end(vma)); + } else { + for_each_tile(tile, vm->xe, id) { + if (xe_pt_zap_ptes(tile, vma)) { + tile_mask |= BIT(id); + + /* + * WRITE_ONCE pairs with READ_ONCE + * in xe_vm_has_valid_gpu_mapping() + */ + WRITE_ONCE(vma->tile_invalidated, + vma->tile_invalidated | BIT(id)); + } + } + } + } + + return tile_mask; +} + +static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end) +{ + u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end); + + if (!tile_mask) + return 0; + + xe_device_wmb(vm->xe); + + return xe_vm_range_tilemask_tlb_inval(vm, start, end, tile_mask); +} + +static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args) +{ + if (XE_IOCTL_DBG(xe, !args)) + return false; + + if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->start, SZ_4K))) + return false; + + if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->range, SZ_4K))) + return false; + + if (XE_IOCTL_DBG(xe, args->range < SZ_4K)) + return false; + + switch (args->type) { + case DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC: + { + s32 fd = (s32)args->preferred_mem_loc.devmem_fd; + + if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)) + return false; + + if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy > + DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES)) + return false; + + if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad)) + return false; + + if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved)) + return false; + break; + } + case DRM_XE_MEM_RANGE_ATTR_ATOMIC: + if (XE_IOCTL_DBG(xe, args->atomic.val > DRM_XE_ATOMIC_CPU)) + return false; + + if (XE_IOCTL_DBG(xe, args->atomic.pad)) + return false; + + if (XE_IOCTL_DBG(xe, args->atomic.reserved)) + return false; + + break; + case DRM_XE_MEM_RANGE_ATTR_PAT: + { + u16 coh_mode = xe_pat_index_get_coh_mode(xe, args->pat_index.val); + + if (XE_IOCTL_DBG(xe, !coh_mode)) + return false; + + if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) + return false; + + if (XE_IOCTL_DBG(xe, args->pat_index.pad)) + return false; + + if (XE_IOCTL_DBG(xe, args->pat_index.reserved)) + return false; + break; + } + default: + if (XE_IOCTL_DBG(xe, 1)) + return false; + } + + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return false; + + return true; +} + +static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, + int num_vmas, u32 atomic_val) +{ + struct xe_device *xe = vm->xe; + struct xe_bo *bo; + int i; + + for (i = 0; i < num_vmas; i++) { + bo = xe_vma_bo(vmas[i]); + if (!bo) + continue; + /* + * NOTE: The following atomic checks are platform-specific. For example, + * if a device supports CXL atomics, these may not be necessary or + * may behave differently. + */ + if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_CPU && + !(bo->flags & XE_BO_FLAG_SYSTEM))) + return false; + + if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_DEVICE && + !(bo->flags & XE_BO_FLAG_VRAM0) && + !(bo->flags & XE_BO_FLAG_VRAM1) && + !(bo->flags & XE_BO_FLAG_SYSTEM && + xe->info.has_device_atomics_on_smem))) + return false; + + if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_GLOBAL && + (!(bo->flags & XE_BO_FLAG_SYSTEM) || + (!(bo->flags & XE_BO_FLAG_VRAM0) && + !(bo->flags & XE_BO_FLAG_VRAM1))))) + return false; + } + return true; +} +/** + * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM + * @dev: DRM device pointer + * @data: Pointer to ioctl data (drm_xe_madvise*) + * @file: DRM file pointer + * + * Handles the MADVISE ioctl to provide memory advice for vma's within + * input range. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_madvise *args = data; + struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start, + .range = args->range, }; + struct xe_vm *vm; + struct drm_exec exec; + int err, attr_type; + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) + return -EINVAL; + + if (!madvise_args_are_sane(vm->xe, args)) { + err = -EINVAL; + goto put_vm; + } + + xe_svm_flush(vm); + + err = down_write_killable(&vm->lock); + if (err) + goto put_vm; + + if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { + err = -ENOENT; + goto unlock_vm; + } + + err = xe_vm_alloc_madvise_vma(vm, args->start, args->range); + if (err) + goto unlock_vm; + + err = get_vmas(vm, &madvise_range); + if (err || !madvise_range.num_vmas) + goto unlock_vm; + + if (madvise_range.has_bo_vmas) { + if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) { + if (!check_bo_args_are_sane(vm, madvise_range.vmas, + madvise_range.num_vmas, + args->atomic.val)) { + err = -EINVAL; + goto unlock_vm; + } + } + + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + for (int i = 0; i < madvise_range.num_vmas; i++) { + struct xe_bo *bo = xe_vma_bo(madvise_range.vmas[i]); + + if (!bo) + continue; + err = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + if (err) + goto err_fini; + } + } + } + + if (madvise_range.has_svm_userptr_vmas) { + err = xe_svm_notifier_lock_interruptible(vm); + if (err) + goto err_fini; + } + + attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs)); + madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args); + + err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range); + + if (madvise_range.has_svm_userptr_vmas) + xe_svm_notifier_unlock(vm); + +err_fini: + if (madvise_range.has_bo_vmas) + drm_exec_fini(&exec); + kfree(madvise_range.vmas); + madvise_range.vmas = NULL; +unlock_vm: + up_write(&vm->lock); +put_vm: + xe_vm_put(vm); + return err; +} diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h new file mode 100644 index 000000000000..b0e1fc445f23 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_madvise.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_VM_MADVISE_H_ +#define _XE_VM_MADVISE_H_ + +struct drm_device; +struct drm_file; + +int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 1662604c4486..2168ef052499 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -17,6 +17,7 @@ #include "xe_device_types.h" #include "xe_pt_types.h" #include "xe_range_fence.h" +#include "xe_userptr.h" struct xe_bo; struct xe_svm_range; @@ -45,36 +46,44 @@ struct xe_vm_pgtable_update_op; #define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 7) #define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 8) #define XE_VMA_SYSTEM_ALLOCATOR (DRM_GPUVA_USERBITS << 9) +#define XE_VMA_MADV_AUTORESET (DRM_GPUVA_USERBITS << 10) + +/** + * struct xe_vma_mem_attr - memory attributes associated with vma + */ +struct xe_vma_mem_attr { + /** @preferred_loc: preferred memory_location */ + struct { + /** @preferred_loc.migration_policy: Pages migration policy */ + u32 migration_policy; + + /** + * @preferred_loc.devmem_fd: used for determining pagemap_fd + * requested by user DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM and + * DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE mean system memory or + * closest device memory respectively. + */ + u32 devmem_fd; + } preferred_loc; -/** struct xe_userptr - User pointer */ -struct xe_userptr { - /** @invalidate_link: Link for the vm::userptr.invalidated list */ - struct list_head invalidate_link; - /** @userptr: link into VM repin list if userptr. */ - struct list_head repin_link; /** - * @notifier: MMU notifier for user pointer (invalidation call back) + * @atomic_access: The atomic access type for the vma + * See %DRM_XE_VMA_ATOMIC_UNDEFINED, %DRM_XE_VMA_ATOMIC_DEVICE, + * %DRM_XE_VMA_ATOMIC_GLOBAL, and %DRM_XE_VMA_ATOMIC_CPU for possible + * values. These are defined in uapi/drm/xe_drm.h. */ - struct mmu_interval_notifier notifier; - /** @sgt: storage for a scatter gather table */ - struct sg_table sgt; - /** @sg: allocated scatter gather table */ - struct sg_table *sg; - /** @notifier_seq: notifier sequence number */ - unsigned long notifier_seq; - /** @unmap_mutex: Mutex protecting dma-unmapping */ - struct mutex unmap_mutex; + u32 atomic_access; + /** - * @initial_bind: user pointer has been bound at least once. - * write: vm->userptr.notifier_lock in read mode and vm->resv held. - * read: vm->userptr.notifier_lock in write mode or vm->resv held. + * @default_pat_index: The pat index for VMA set during first bind by user. */ - bool initial_bind; - /** @mapped: Whether the @sgt sg-table is dma-mapped. Protected by @unmap_mutex. */ - bool mapped; -#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) - u32 divisor; -#endif + u16 default_pat_index; + + /** + * @pat_index: The pat index to use when encoding the PTEs for this vma. + * same as default_pat_index unless overwritten by madvise. + */ + u16 pat_index; }; struct xe_vma { @@ -100,16 +109,23 @@ struct xe_vma { struct work_struct destroy_work; }; - /** @tile_invalidated: VMA has been invalidated */ + /** + * @tile_invalidated: Tile mask of binding are invalidated for this VMA. + * protected by BO's resv and for userptrs, vm->svm.gpusvm.notifier_lock in + * write mode for writing or vm->svm.gpusvm.notifier_lock in read mode and + * the vm->resv. For stable reading, BO's resv or userptr + * vm->svm.gpusvm.notifier_lock in read mode is required. Can be + * opportunistically read with READ_ONCE outside of locks. + */ u8 tile_invalidated; /** @tile_mask: Tile mask of where to create binding for this VMA */ u8 tile_mask; /** - * @tile_present: GT mask of binding are present for this VMA. + * @tile_present: Tile mask of binding are present for this VMA. * protected by vm->lock, vm->resv and for userptrs, - * vm->userptr.notifier_lock for writing. Needs either for reading, + * vm->svm.gpusvm.notifier_lock for writing. Needs either for reading, * but if reading is done under the vm->lock only, it needs to be held * in write mode. */ @@ -119,15 +135,22 @@ struct xe_vma { u8 tile_staged; /** - * @pat_index: The pat index to use when encoding the PTEs for this vma. + * @skip_invalidation: Used in madvise to avoid invalidation + * if mem attributes doesn't change */ - u16 pat_index; + bool skip_invalidation; /** * @ufence: The user fence that was provided with MAP. * Needs to be signalled before UNMAP can be processed. */ struct xe_user_fence *ufence; + + /** + * @attr: The attributes of vma which determines the migration policy + * and encoding of the PTEs for this vma. + */ + struct xe_vma_mem_attr attr; }; /** @@ -198,11 +221,6 @@ struct xe_vm { #define XE_VM_FLAG_GSC BIT(8) unsigned long flags; - /** @composite_fence_ctx: context composite fence */ - u64 composite_fence_ctx; - /** @composite_fence_seqno: seqno for composite fence */ - u32 composite_fence_seqno; - /** * @lock: outer most lock, protects objects of anything attached to this * VM @@ -237,33 +255,7 @@ struct xe_vm { const struct xe_pt_ops *pt_ops; /** @userptr: user pointer state */ - struct { - /** - * @userptr.repin_list: list of VMAs which are user pointers, - * and needs repinning. Protected by @lock. - */ - struct list_head repin_list; - /** - * @notifier_lock: protects notifier in write mode and - * submission in read mode. - */ - struct rw_semaphore notifier_lock; - /** - * @userptr.invalidated_lock: Protects the - * @userptr.invalidated list. - */ - spinlock_t invalidated_lock; - /** - * @userptr.invalidated: List of invalidated userptrs, not yet - * picked - * up for revalidation. Protected from access with the - * @invalidated_lock. Removing items from the list - * additionally requires @lock in write mode, and adding - * items to the list requires either the @userptr.notifer_lock in - * write mode, OR @lock in write mode. - */ - struct list_head invalidated; - } userptr; + struct xe_userptr_vm userptr; /** @preempt: preempt state */ struct { @@ -271,7 +263,7 @@ struct xe_vm { * @min_run_period_ms: The minimum run period before preempting * an engine again */ - s64 min_run_period_ms; + unsigned int min_run_period_ms; /** @exec_queues: list of exec queues attached to this VM */ struct list_head exec_queues; /** @num_exec_queues: number exec queues attached to this VM */ @@ -286,6 +278,11 @@ struct xe_vm { * BOs */ struct work_struct rebind_work; + /** + * @preempt.pm_activate_link: Link to list of rebind workers to be + * kicked on resume. + */ + struct list_head pm_activate_link; } preempt; /** @um: unified memory state */ @@ -306,13 +303,37 @@ struct xe_vm { } error_capture; /** + * @validation: Validation data only valid with the vm resv held. + * Note: This is really task state of the task holding the vm resv, + * and moving forward we should + * come up with a better way of passing this down the call- + * chain. + */ + struct { + /** + * @validation.validating: The task that is currently making bos resident. + * for this vm. + * Protected by the VM's resv for writing. Opportunistic reading can be done + * using READ_ONCE. Note: This is a workaround for the + * TTM eviction_valuable() callback not being passed a struct + * ttm_operation_context(). Future work might want to address this. + */ + struct task_struct *validating; + /** + * @validation.exec The drm_exec context used when locking the vm resv. + * Protected by the vm's resv. + */ + struct drm_exec *_exec; + } validation; + + /** * @tlb_flush_seqno: Required TLB flush seqno for the next exec. * protected by the vm resv. */ u64 tlb_flush_seqno; /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ bool batch_invalidate_tlb; - /** @xef: XE file handle for tracking this VM's drm client */ + /** @xef: Xe file handle for tracking this VM's drm client */ struct xe_file *xef; }; @@ -320,17 +341,10 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; + unsigned int vma_flags; /** @immediate: Immediate bind */ bool immediate; /** @read_only: Read only */ - bool read_only; - /** @is_null: is NULL binding */ - bool is_null; - /** @is_cpu_addr_mirror: is CPU address mirror binding */ - bool is_cpu_addr_mirror; - /** @dumpable: whether BO is dumped on GPU hang */ - bool dumpable; - /** @invalidate: invalidate the VMA before bind */ bool invalidate_on_bind; /** @pat_index: The pat index to use for this operation. */ u16 pat_index; @@ -374,6 +388,19 @@ struct xe_vma_op_unmap_range { struct xe_svm_range *range; }; +/** struct xe_vma_op_prefetch_range - VMA prefetch range operation */ +struct xe_vma_op_prefetch_range { + /** @range: xarray for SVM ranges data */ + struct xarray range; + /** @ranges_count: number of svm ranges to map */ + u32 ranges_count; + /** + * @tile: Pointer to the tile structure containing memory to prefetch. + * NULL if prefetch requested region is smem + */ + struct xe_tile *tile; +}; + /** enum xe_vma_op_flags - flags for VMA operation */ enum xe_vma_op_flags { /** @XE_VMA_OP_COMMITTED: VMA operation committed */ @@ -416,6 +443,8 @@ struct xe_vma_op { struct xe_vma_op_map_range map_range; /** @unmap_range: VMA unmap range operation specific data */ struct xe_vma_op_unmap_range unmap_range; + /** @prefetch_range: VMA prefetch range operation specific data */ + struct xe_vma_op_prefetch_range prefetch_range; }; }; @@ -433,6 +462,12 @@ struct xe_vma_ops { u32 num_syncs; /** @pt_update_ops: page table update operations */ struct xe_vm_pgtable_update_ops pt_update_ops[XE_MAX_TILES_PER_DEVICE]; + /** @flag: signify the properties within xe_vma_ops*/ +#define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0) +#define XE_VMA_OPS_FLAG_MADVISE BIT(1) +#define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2) +#define XE_VMA_OPS_FLAG_SKIP_TLB_WAIT BIT(3) + u32 flags; #ifdef TEST_VM_OPS_ERROR /** @inject_error: inject error to test error handling */ bool inject_error; diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index e421a74fb87c..d50baefcd124 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -3,6 +3,7 @@ * Copyright © 2021-2024 Intel Corporation */ +#include <kunit/visibility.h> #include <linux/pci.h> #include <drm/drm_managed.h> @@ -12,28 +13,25 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" #include "xe_assert.h" +#include "xe_bo.h" #include "xe_device.h" #include "xe_force_wake.h" #include "xe_gt_mcr.h" -#include "xe_gt_sriov_vf.h" #include "xe_mmio.h" #include "xe_module.h" #include "xe_sriov.h" +#include "xe_tile_sriov_vf.h" +#include "xe_ttm_vram_mgr.h" #include "xe_vram.h" +#include "xe_vram_types.h" -#define BAR_SIZE_SHIFT 20 - -static void -_resize_bar(struct xe_device *xe, int resno, resource_size_t size) +static void resize_bar(struct xe_device *xe, int resno, resource_size_t size) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); int bar_size = pci_rebar_bytes_to_size(size); int ret; - if (pci_resource_len(pdev, resno)) - pci_release_resource(pdev, resno); - - ret = pci_resize_resource(pdev, resno, bar_size); + ret = pci_resize_resource(pdev, resno, bar_size, 0); if (ret) { drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n", resno, 1 << bar_size, ERR_PTR(ret)); @@ -47,7 +45,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size) * if force_vram_bar_size is set, attempt to set to the requested size * else set to maximum possible size */ -static void resize_vram_bar(struct xe_device *xe) +void xe_vram_resize_bar(struct xe_device *xe) { int force_vram_bar_size = xe_modparam.force_vram_bar_size; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -55,41 +53,37 @@ static void resize_vram_bar(struct xe_device *xe) resource_size_t current_size; resource_size_t rebar_size; struct resource *root_res; - u32 bar_size_mask; + int max_size, i; u32 pci_cmd; - int i; /* gather some relevant info */ current_size = pci_resource_len(pdev, LMEM_BAR); - bar_size_mask = pci_rebar_get_possible_sizes(pdev, LMEM_BAR); - - if (!bar_size_mask) - return; if (force_vram_bar_size < 0) return; /* set to a specific size? */ if (force_vram_bar_size) { - u32 bar_size_bit; - - rebar_size = force_vram_bar_size * (resource_size_t)SZ_1M; - - bar_size_bit = bar_size_mask & BIT(pci_rebar_bytes_to_size(rebar_size)); + rebar_size = pci_rebar_bytes_to_size(force_vram_bar_size * + (resource_size_t)SZ_1M); - if (!bar_size_bit) { + if (!pci_rebar_size_supported(pdev, LMEM_BAR, rebar_size)) { drm_info(&xe->drm, - "Requested size: %lluMiB is not supported by rebar sizes: 0x%x. Leaving default: %lluMiB\n", - (u64)rebar_size >> 20, bar_size_mask, (u64)current_size >> 20); + "Requested size: %lluMiB is not supported by rebar sizes: 0x%llx. Leaving default: %lluMiB\n", + (u64)pci_rebar_size_to_bytes(rebar_size) >> 20, + pci_rebar_get_possible_sizes(pdev, LMEM_BAR), + (u64)current_size >> 20); return; } - rebar_size = 1ULL << (__fls(bar_size_bit) + BAR_SIZE_SHIFT); - + rebar_size = pci_rebar_size_to_bytes(rebar_size); if (rebar_size == current_size) return; } else { - rebar_size = 1ULL << (__fls(bar_size_mask) + BAR_SIZE_SHIFT); + max_size = pci_rebar_get_max_size(pdev, LMEM_BAR); + if (max_size < 0) + return; + rebar_size = pci_rebar_size_to_bytes(max_size); /* only resize if larger than current */ if (rebar_size <= current_size) @@ -116,7 +110,7 @@ static void resize_vram_bar(struct xe_device *xe) pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); - _resize_bar(xe, LMEM_BAR, rebar_size); + resize_bar(xe, LMEM_BAR, rebar_size); pci_assign_unassigned_bus_resources(pdev->bus); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); @@ -136,7 +130,7 @@ static bool resource_is_valid(struct pci_dev *pdev, int bar) return true; } -static int determine_lmem_bar_size(struct xe_device *xe) +static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region *lmem_bar) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -145,28 +139,31 @@ static int determine_lmem_bar_size(struct xe_device *xe) return -ENXIO; } - resize_vram_bar(xe); - - xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR); - xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR); - if (!xe->mem.vram.io_size) + lmem_bar->io_start = pci_resource_start(pdev, LMEM_BAR); + lmem_bar->io_size = pci_resource_len(pdev, LMEM_BAR); + if (!lmem_bar->io_size) return -EIO; /* XXX: Need to change when xe link code is ready */ - xe->mem.vram.dpa_base = 0; + lmem_bar->dpa_base = 0; /* set up a map to the total memory area. */ - xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); + lmem_bar->mapping = devm_ioremap_wc(&pdev->dev, lmem_bar->io_start, lmem_bar->io_size); return 0; } -static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size) +static int get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size, u64 *poffset) { struct xe_device *xe = gt_to_xe(gt); + unsigned int fw_ref; u64 offset; u32 reg; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; + if (GRAPHICS_VER(xe) >= 20) { u64 ccs_size = tile_size / 512; u64 offset_hi, offset_lo; @@ -196,7 +193,10 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size) offset = (u64)REG_FIELD_GET(XEHP_FLAT_CCS_PTR, reg) * SZ_64K; } - return offset; + xe_force_wake_put(gt_to_fw(gt), fw_ref); + *poffset = offset; + + return 0; } /* @@ -223,7 +223,6 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size, { struct xe_device *xe = tile_to_xe(tile); struct xe_gt *gt = tile->primary_gt; - unsigned int fw_ref; u64 offset; u32 reg; @@ -234,32 +233,31 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size, offset = 0; for_each_tile(t, xe, id) for_each_if(t->id < tile->id) - offset += xe_gt_sriov_vf_lmem(t->primary_gt); + offset += xe_tile_sriov_vf_lmem(t); - *tile_size = xe_gt_sriov_vf_lmem(gt); + *tile_size = xe_tile_sriov_vf_lmem(tile); *vram_size = *tile_size; *tile_offset = offset; return 0; } - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) - return -ETIMEDOUT; - /* actual size */ if (unlikely(xe->info.platform == XE_DG1)) { *tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), LMEM_BAR); *tile_offset = 0; } else { - reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id)); + reg = xe_mmio_read32(&tile->mmio, SG_TILE_ADDR_RANGE(tile->id)); *tile_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G; *tile_offset = (u64)REG_FIELD_GET(GENMASK(7, 1), reg) * SZ_1G; } /* minus device usage */ if (xe->info.has_flat_ccs) { - offset = get_flat_ccs_offset(gt, *tile_size); + int ret = get_flat_ccs_offset(gt, *tile_size, &offset); + + if (ret) + return ret; } else { offset = xe_mmio_read64_2x32(&tile->mmio, GSMBASE); } @@ -267,8 +265,6 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size, /* remove the tile offset so we have just the available size */ *vram_size = offset - *tile_offset; - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return 0; } @@ -278,13 +274,74 @@ static void vram_fini(void *arg) struct xe_tile *tile; int id; - if (xe->mem.vram.mapping) - iounmap(xe->mem.vram.mapping); + xe->mem.vram->mapping = NULL; + + for_each_tile(tile, xe, id) { + tile->mem.vram->mapping = NULL; + if (tile->mem.kernel_vram) + tile->mem.kernel_vram->mapping = NULL; + } +} + +struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement) +{ + struct xe_vram_region *vram; + struct drm_device *drm = &xe->drm; + + xe_assert(xe, id < xe->info.tile_count); + + vram = drmm_kzalloc(drm, sizeof(*vram), GFP_KERNEL); + if (!vram) + return NULL; + + vram->xe = xe; + vram->id = id; + vram->placement = placement; +#if defined(CONFIG_DRM_XE_PAGEMAP) + vram->migrate = xe->tiles[id].migrate; +#endif + return vram; +} + +static void print_vram_region_info(struct xe_device *xe, struct xe_vram_region *vram) +{ + struct drm_device *drm = &xe->drm; - xe->mem.vram.mapping = NULL; + if (vram->io_size < vram->usable_size) + drm_info(drm, "Small BAR device\n"); - for_each_tile(tile, xe, id) - tile->mem.vram.mapping = NULL; + drm_info(drm, + "VRAM[%u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", + vram->id, &vram->actual_physical_size, &vram->usable_size, &vram->io_size); + drm_info(drm, "VRAM[%u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", + vram->id, &vram->dpa_base, vram->dpa_base + (u64)vram->actual_physical_size, + &vram->io_start, vram->io_start + (u64)vram->io_size); +} + +static int vram_region_init(struct xe_device *xe, struct xe_vram_region *vram, + struct xe_vram_region *lmem_bar, u64 offset, u64 usable_size, + u64 region_size, resource_size_t remain_io_size) +{ + /* Check if VRAM region is already initialized */ + if (vram->mapping) + return 0; + + vram->actual_physical_size = region_size; + vram->io_start = lmem_bar->io_start + offset; + vram->io_size = min_t(u64, usable_size, remain_io_size); + + if (!vram->io_size) { + drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); + return -ENODEV; + } + + vram->dpa_base = lmem_bar->dpa_base + offset; + vram->mapping = lmem_bar->mapping + offset; + vram->usable_size = usable_size; + + print_vram_region_info(xe, vram); + + return 0; } /** @@ -298,78 +355,108 @@ static void vram_fini(void *arg) int xe_vram_probe(struct xe_device *xe) { struct xe_tile *tile; - resource_size_t io_size; + struct xe_vram_region lmem_bar; + resource_size_t remain_io_size; u64 available_size = 0; u64 total_size = 0; - u64 tile_offset; - u64 tile_size; - u64 vram_size; int err; u8 id; if (!IS_DGFX(xe)) return 0; - /* Get the size of the root tile's vram for later accessibility comparison */ - tile = xe_device_get_root_tile(xe); - err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); - if (err) - return err; - - err = determine_lmem_bar_size(xe); + err = determine_lmem_bar_size(xe, &lmem_bar); if (err) return err; + drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &lmem_bar.io_start, &lmem_bar.io_size); - drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start, - &xe->mem.vram.io_size); - - io_size = xe->mem.vram.io_size; + remain_io_size = lmem_bar.io_size; - /* tile specific ranges */ for_each_tile(tile, xe, id) { - err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); + u64 region_size; + u64 usable_size; + u64 tile_offset; + + err = tile_vram_size(tile, &usable_size, ®ion_size, &tile_offset); if (err) return err; - tile->mem.vram.actual_physical_size = tile_size; - tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset; - tile->mem.vram.io_size = min_t(u64, vram_size, io_size); + total_size += region_size; + available_size += usable_size; - if (!tile->mem.vram.io_size) { - drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); - return -ENODEV; + err = vram_region_init(xe, tile->mem.vram, &lmem_bar, tile_offset, usable_size, + region_size, remain_io_size); + if (err) + return err; + + if (total_size > lmem_bar.io_size) { + drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n", + &total_size, &lmem_bar.io_size); } - tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset; - tile->mem.vram.usable_size = vram_size; - tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; + remain_io_size -= min_t(u64, tile->mem.vram->actual_physical_size, remain_io_size); + } - if (tile->mem.vram.io_size < tile->mem.vram.usable_size) - drm_info(&xe->drm, "Small BAR device\n"); - drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id, - tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size); - drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id, - &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + (u64)tile->mem.vram.actual_physical_size, - &tile->mem.vram.io_start, tile->mem.vram.io_start + (u64)tile->mem.vram.io_size); + err = vram_region_init(xe, xe->mem.vram, &lmem_bar, 0, available_size, total_size, + lmem_bar.io_size); + if (err) + return err; - /* calculate total size using tile size to get the correct HW sizing */ - total_size += tile_size; - available_size += vram_size; + return devm_add_action_or_reset(xe->drm.dev, vram_fini, xe); +} - if (total_size > xe->mem.vram.io_size) { - drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n", - &total_size, &xe->mem.vram.io_size); - } +/** + * xe_vram_region_io_start - Get the IO start of a VRAM region + * @vram: the VRAM region + * + * Return: the IO start of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_io_start(const struct xe_vram_region *vram) +{ + return vram ? vram->io_start : 0; +} - io_size -= min_t(u64, tile_size, io_size); - } +/** + * xe_vram_region_io_size - Get the IO size of a VRAM region + * @vram: the VRAM region + * + * Return: the IO size of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_io_size(const struct xe_vram_region *vram) +{ + return vram ? vram->io_size : 0; +} - xe->mem.vram.actual_physical_size = total_size; +/** + * xe_vram_region_dpa_base - Get the DPA base of a VRAM region + * @vram: the VRAM region + * + * Return: the DPA base of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_dpa_base(const struct xe_vram_region *vram) +{ + return vram ? vram->dpa_base : 0; +} - drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start, - &xe->mem.vram.actual_physical_size); - drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start, - &available_size); +/** + * xe_vram_region_usable_size - Get the usable size of a VRAM region + * @vram: the VRAM region + * + * Return: the usable size of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_usable_size(const struct xe_vram_region *vram) +{ + return vram ? vram->usable_size : 0; +} - return devm_add_action_or_reset(xe->drm.dev, vram_fini, xe); +/** + * xe_vram_region_actual_physical_size - Get the actual physical size of a VRAM region + * @vram: the VRAM region + * + * Return: the actual physical size of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_actual_physical_size(const struct xe_vram_region *vram) +{ + return vram ? vram->actual_physical_size : 0; } +EXPORT_SYMBOL_IF_KUNIT(xe_vram_region_actual_physical_size); diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h index e31cc04ec0db..13505cfb184d 100644 --- a/drivers/gpu/drm/xe/xe_vram.h +++ b/drivers/gpu/drm/xe/xe_vram.h @@ -6,8 +6,20 @@ #ifndef _XE_VRAM_H_ #define _XE_VRAM_H_ +#include <linux/types.h> + struct xe_device; +struct xe_vram_region; +void xe_vram_resize_bar(struct xe_device *xe); int xe_vram_probe(struct xe_device *xe); +struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement); + +resource_size_t xe_vram_region_io_start(const struct xe_vram_region *vram); +resource_size_t xe_vram_region_io_size(const struct xe_vram_region *vram); +resource_size_t xe_vram_region_dpa_base(const struct xe_vram_region *vram); +resource_size_t xe_vram_region_usable_size(const struct xe_vram_region *vram); +resource_size_t xe_vram_region_actual_physical_size(const struct xe_vram_region *vram); + #endif diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c index b26e26d73dae..17bc84da4cdc 100644 --- a/drivers/gpu/drm/xe/xe_vram_freq.c +++ b/drivers/gpu/drm/xe/xe_vram_freq.c @@ -34,7 +34,7 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_tile *tile = dev_to_tile(dev); - u32 val, mbox; + u32 val = 0, mbox; int err; mbox = REG_FIELD_PREP(PCODE_MB_COMMAND, PCODE_FREQUENCY_CONFIG) @@ -56,7 +56,7 @@ static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_tile *tile = dev_to_tile(dev); - u32 val, mbox; + u32 val = 0, mbox; int err; mbox = REG_FIELD_PREP(PCODE_MB_COMMAND, PCODE_FREQUENCY_CONFIG) diff --git a/drivers/gpu/drm/xe/xe_vram_types.h b/drivers/gpu/drm/xe/xe_vram_types.h new file mode 100644 index 000000000000..83772dcbf1af --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vram_types.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_VRAM_TYPES_H_ +#define _XE_VRAM_TYPES_H_ + +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +#include <drm/drm_pagemap.h> +#endif + +#include "xe_ttm_vram_mgr_types.h" + +struct xe_device; +struct xe_migrate; + +/** + * struct xe_vram_region - memory region structure + * This is used to describe a memory region in xe + * device, such as HBM memory or CXL extension memory. + */ +struct xe_vram_region { + /** @xe: Back pointer to xe device */ + struct xe_device *xe; + /** + * @id: VRAM region instance id + * + * The value should be unique for VRAM region. + */ + u8 id; + /** @io_start: IO start address of this VRAM instance */ + resource_size_t io_start; + /** + * @io_size: IO size of this VRAM instance + * + * This represents how much of this VRAM we can access + * via the CPU through the VRAM BAR. This can be smaller + * than @usable_size, in which case only part of VRAM is CPU + * accessible (typically the first 256M). This + * configuration is known as small-bar. + */ + resource_size_t io_size; + /** @dpa_base: This memory regions's DPA (device physical address) base */ + resource_size_t dpa_base; + /** + * @usable_size: usable size of VRAM + * + * Usable size of VRAM excluding reserved portions + * (e.g stolen mem) + */ + resource_size_t usable_size; + /** + * @actual_physical_size: Actual VRAM size + * + * Actual VRAM size including reserved portions + * (e.g stolen mem) + */ + resource_size_t actual_physical_size; + /** @mapping: pointer to VRAM mappable space */ + void __iomem *mapping; + /** @ttm: VRAM TTM manager */ + struct xe_ttm_vram_mgr ttm; + /** @placement: TTM placement dedicated for this region */ + u32 placement; +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) + /** @migrate: Back pointer to migrate */ + struct xe_migrate *migrate; + /** @pagemap: Used to remap device memory as ZONE_DEVICE */ + struct dev_pagemap pagemap; + /** + * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory + * pages of this tile. + */ + struct drm_pagemap dpagemap; + /** + * @hpa_base: base host physical address + * + * This is generated when remap device memory as ZONE_DEVICE + */ + resource_size_t hpa_base; +#endif +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_vsec.c b/drivers/gpu/drm/xe/xe_vsec.c index b378848d3b7b..8f23a27871b6 100644 --- a/drivers/gpu/drm/xe/xe_vsec.c +++ b/drivers/gpu/drm/xe/xe_vsec.c @@ -24,6 +24,7 @@ #define BMG_DEVICE_ID 0xE2F8 static struct intel_vsec_header bmg_telemetry = { + .rev = 1, .length = 0x10, .id = VSEC_ID_TELEMETRY, .num_entries = 2, @@ -32,28 +33,19 @@ static struct intel_vsec_header bmg_telemetry = { .offset = BMG_DISCOVERY_OFFSET, }; -static struct intel_vsec_header bmg_punit_crashlog = { +static struct intel_vsec_header bmg_crashlog = { + .rev = 1, .length = 0x10, .id = VSEC_ID_CRASHLOG, - .num_entries = 1, - .entry_size = 4, + .num_entries = 2, + .entry_size = 6, .tbir = 0, .offset = BMG_DISCOVERY_OFFSET + 0x60, }; -static struct intel_vsec_header bmg_oobmsm_crashlog = { - .length = 0x10, - .id = VSEC_ID_CRASHLOG, - .num_entries = 1, - .entry_size = 4, - .tbir = 0, - .offset = BMG_DISCOVERY_OFFSET + 0x78, -}; - static struct intel_vsec_header *bmg_capabilities[] = { &bmg_telemetry, - &bmg_punit_crashlog, - &bmg_oobmsm_crashlog, + &bmg_crashlog, NULL }; @@ -149,8 +141,8 @@ static int xe_guid_decode(u32 guid, int *index, u32 *offset) return 0; } -static int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, - u32 count) +int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, + u32 count) { struct xe_device *xe = pdev_to_xe_device(pdev); void __iomem *telem_addr = xe->mmio.regs + BMG_TELEMETRY_OFFSET; diff --git a/drivers/gpu/drm/xe/xe_vsec.h b/drivers/gpu/drm/xe/xe_vsec.h index 5777c53faec2..dabfb4e02d70 100644 --- a/drivers/gpu/drm/xe/xe_vsec.h +++ b/drivers/gpu/drm/xe/xe_vsec.h @@ -4,8 +4,12 @@ #ifndef _XE_VSEC_H_ #define _XE_VSEC_H_ +#include <linux/types.h> + +struct pci_dev; struct xe_device; void xe_vsec_init(struct xe_device *xe); +int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, u32 count); #endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 67196baa4249..e32dd2fde6f1 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -10,6 +10,7 @@ #include <linux/compiler_types.h> #include <linux/fault-inject.h> +#include <generated/xe_device_wa_oob.h> #include <generated/xe_wa_oob.h> #include "regs/xe_engine_regs.h" @@ -38,7 +39,8 @@ * Register Immediate commands) once when initializing the device and saved in * the default context. That default context is then used on every context * creation to have a "primed golden context", i.e. a context image that - * already contains the changes needed to all the registers. + * already contains the changes needed to all the registers. See + * drivers/gpu/drm/xe/xe_lrc.c for default context handling. * * - Engine workarounds: the list of these WAs is applied whenever the specific * engine is reset. It's also possible that a set of engine classes share a @@ -47,10 +49,10 @@ * them need to keeep the workaround programming: the approach taken in the * driver is to tie those workarounds to the first compute/render engine that * is registered. When executing with GuC submission, engine resets are - * outside of kernel driver control, hence the list of registers involved in + * outside of kernel driver control, hence the list of registers involved is * written once, on engine initialization, and then passed to GuC, that * saves/restores their values before/after the reset takes place. See - * ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference. + * drivers/gpu/drm/xe/xe_guc_ads.c for reference. * * - GT workarounds: the list of these WAs is applied whenever these registers * revert to their default values: on GPU reset, suspend/resume [1]_, etc. @@ -65,21 +67,39 @@ * hardware on every HW context restore. These buffers are created and * programmed in the default context so the hardware always go through those * programming sequences when switching contexts. The support for workaround - * batchbuffers is enabled these hardware mechanisms: + * batchbuffers is enabled via these hardware mechanisms: * - * #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default - * context, pointing the hardware to jump to that location when that offset - * is reached in the context restore. Workaround batchbuffer in the driver - * currently uses this mechanism for all platforms. + * #. INDIRECT_CTX (also known as **mid context restore bb**): A batchbuffer + * and an offset are provided in the default context, pointing the hardware + * to jump to that location when that offset is reached in the context + * restore. When a context is being restored, this is executed after the + * ring context, in the middle (or beginning) of the engine context image. * - * #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context, - * pointing the hardware to a buffer to continue executing after the - * engine registers are restored in a context restore sequence. This is - * currently not used in the driver. + * #. BB_PER_CTX_PTR (also known as **post context restore bb**): A + * batchbuffer is provided in the default context, pointing the hardware to + * a buffer to continue executing after the engine registers are restored + * in a context restore sequence. + * + * Below is the timeline for a context restore sequence: + * + * .. code:: + * + * INDIRECT_CTX_OFFSET + * |----------->| + * .------------.------------.-------------.------------.--------------.-----------. + * |Ring | Engine | Mid-context | Engine | Post-context | Ring | + * |Restore | Restore (1)| BB Restore | Restore (2)| BB Restore | Execution | + * `------------'------------'-------------'------------'--------------'-----------' * * - Other/OOB: There are WAs that, due to their nature, cannot be applied from * a central place. Those are peppered around the rest of the code, as needed. - * Workarounds related to the display IP are the main example. + * There's a central place to control which workarounds are enabled: + * drivers/gpu/drm/xe/xe_wa_oob.rules for GT workarounds and + * drivers/gpu/drm/xe/xe_device_wa_oob.rules for device/SoC workarounds. + * These files only record which workarounds are enabled: during early device + * initialization those rules are evaluated and recorded by the driver. Then + * later the driver checks with ``XE_GT_WA()`` and ``XE_DEVICE_WA()`` to + * implement them. * * .. [1] Technically, some registers are powercontext saved & restored, so they * survive a suspend/resume. In practice, writing them again is not too @@ -250,14 +270,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, - { XE_RTP_NAME("14020316580"), - XE_RTP_RULES(MEDIA_VERSION(1301)), - XE_RTP_ACTIONS(CLR(POWERGATE_ENABLE, - VDN_HCP_POWERGATE_ENABLE(0) | - VDN_MFXVDENC_POWERGATE_ENABLE(0) | - VDN_HCP_POWERGATE_ENABLE(2) | - VDN_MFXVDENC_POWERGATE_ENABLE(2))), - }, { XE_RTP_NAME("14019449301"), XE_RTP_RULES(MEDIA_VERSION(1301), ENGINE_CLASS(VIDEO_DECODE)), XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), @@ -285,6 +297,18 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + { XE_RTP_NAME("16021865536"), + XE_RTP_RULES(MEDIA_VERSION(3002), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + { XE_RTP_NAME("16021867713"), + XE_RTP_RULES(MEDIA_VERSION(3002), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, { XE_RTP_NAME("14021486841"), XE_RTP_RULES(MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), ENGINE_CLASS(VIDEO_DECODE)), @@ -503,10 +527,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) }, - { XE_RTP_NAME("16018737384"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) - }, /* * These two workarounds are the same, just applying to different * engines. Although Wa_18032095049 (for the RCS) isn't required on @@ -529,35 +549,47 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, + { XE_RTP_NAME("13012615864"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) + }, /* Xe2_HPG */ { XE_RTP_NAME("16018712365"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) }, { XE_RTP_NAME("16018737384"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) }, { XE_RTP_NAME("14019988906"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) }, { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) }, { XE_RTP_NAME("14020338487"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) }, { XE_RTP_NAME("18032247524"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE)) }, { XE_RTP_NAME("14018471104"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) }, /* @@ -566,7 +598,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { * apply this to all engines for simplicity. */ { XE_RTP_NAME("16021639441"), - XE_RTP_RULES(GRAPHICS_VERSION(2001)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002)), XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), GHWSP_CSB_REPORT_DIS | PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, @@ -578,13 +610,26 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, WR_REQ_CHAINING_DIS)) }, { XE_RTP_NAME("14021402888"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, - { XE_RTP_NAME("14021821874"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + { XE_RTP_NAME("14021821874, 14022954250"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) }, + { XE_RTP_NAME("13012615864"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) + }, + { XE_RTP_NAME("18041344222"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute), + FUNC(xe_rtp_match_not_sriov_vf), + FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), + XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) + }, /* Xe2_LPM */ @@ -626,11 +671,14 @@ static const struct xe_rtp_entry_sr engine_was[] = { }, { XE_RTP_NAME("14023061436"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + FUNC(xe_rtp_match_first_render_or_compute), OR, + GRAPHICS_VERSION_RANGE(3003, 3005), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE)) }, { XE_RTP_NAME("13012615864"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), OR, + GRAPHICS_VERSION_RANGE(3003, 3005), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) }, @@ -640,6 +688,17 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + { XE_RTP_NAME("14021402888"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3003, 3005), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) + }, + { XE_RTP_NAME("18041344222"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + FUNC(xe_rtp_match_first_render_or_compute), + FUNC(xe_rtp_match_not_sriov_vf), + FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), + XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) + }, }; static const struct xe_rtp_entry_sr lrc_was[] = { @@ -774,7 +833,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_ACTIONS(SET(INSTPM(RENDER_RING_BASE), ENABLE_SEMAPHORE_POLL_BIT)) }, { XE_RTP_NAME("18033852989"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) }, { XE_RTP_NAME("14021567978"), @@ -807,7 +866,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN)) }, { XE_RTP_NAME("14019386621"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) }, { XE_RTP_NAME("14020756599"), @@ -824,13 +883,17 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_AUTOSTRIP)) }, { XE_RTP_NAME("15016589081"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) }, { XE_RTP_NAME("22021007897"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) }, + { XE_RTP_NAME("18033852989"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) + }, /* Xe3_LPG */ { XE_RTP_NAME("14021490052"), @@ -843,6 +906,19 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_PARTIAL_AUTOSTRIP | DIS_AUTOSTRIP)) }, + { XE_RTP_NAME("22021007897"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) + }, + { XE_RTP_NAME("14024681466"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1, FAST_CLEAR_VALIGN_FIX)) + }, + { XE_RTP_NAME("15016589081"), + XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) + }, }; static __maybe_unused const struct xe_rtp_entry oob_was[] = { @@ -852,16 +928,41 @@ static __maybe_unused const struct xe_rtp_entry oob_was[] = { static_assert(ARRAY_SIZE(oob_was) - 1 == _XE_WA_OOB_COUNT); +static __maybe_unused const struct xe_rtp_entry device_oob_was[] = { +#include <generated/xe_device_wa_oob.c> + {} +}; + +static_assert(ARRAY_SIZE(device_oob_was) - 1 == _XE_DEVICE_WA_OOB_COUNT); + __diag_pop(); /** - * xe_wa_process_oob - process OOB workaround table + * xe_wa_process_device_oob - process OOB workaround table + * @xe: device instance to process workarounds for + * + * process OOB workaround table for this device, marking in @xe the + * workarounds that are active. + */ + +void xe_wa_process_device_oob(struct xe_device *xe) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(xe); + + xe_rtp_process_ctx_enable_active_tracking(&ctx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was)); + + xe->wa_active.oob_initialized = true; + xe_rtp_process(&ctx, device_oob_was); +} + +/** + * xe_wa_process_gt_oob - process GT OOB workaround table * @gt: GT instance to process workarounds for * * Process OOB workaround table for this platform, marking in @gt the * workarounds that are active. */ -void xe_wa_process_oob(struct xe_gt *gt) +void xe_wa_process_gt_oob(struct xe_gt *gt) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); @@ -923,12 +1024,34 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe) } /** - * xe_wa_init - initialize gt with workaround bookkeeping + * xe_wa_device_init - initialize device with workaround oob bookkeeping + * @xe: Xe device instance to initialize + * + * Returns 0 for success, negative with error code otherwise + */ +int xe_wa_device_init(struct xe_device *xe) +{ + unsigned long *p; + + p = drmm_kzalloc(&xe->drm, + sizeof(*p) * BITS_TO_LONGS(ARRAY_SIZE(device_oob_was)), + GFP_KERNEL); + + if (!p) + return -ENOMEM; + + xe->wa_active.oob = p; + + return 0; +} + +/** + * xe_wa_gt_init - initialize gt with workaround bookkeeping * @gt: GT instance to initialize * * Returns 0 for success, negative error code otherwise. */ -int xe_wa_init(struct xe_gt *gt) +int xe_wa_gt_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); size_t n_oob, n_lrc, n_engine, n_gt, total; @@ -954,9 +1077,26 @@ int xe_wa_init(struct xe_gt *gt) return 0; } -ALLOW_ERROR_INJECTION(xe_wa_init, ERRNO); /* See xe_pci_probe() */ +ALLOW_ERROR_INJECTION(xe_wa_gt_init, ERRNO); /* See xe_pci_probe() */ + +void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p) +{ + size_t idx; + + drm_printf(p, "Device OOB Workarounds\n"); + for_each_set_bit(idx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was)) + if (device_oob_was[idx].name) + drm_printf_indent(p, 1, "%s\n", device_oob_was[idx].name); +} -void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p) +/** + * xe_wa_gt_dump() - Dump GT workarounds into a drm printer. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Return: always 0. + */ +int xe_wa_gt_dump(struct xe_gt *gt, struct drm_printer *p) { size_t idx; @@ -964,18 +1104,22 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p) for_each_set_bit(idx, gt->wa_active.gt, ARRAY_SIZE(gt_was)) drm_printf_indent(p, 1, "%s\n", gt_was[idx].name); - drm_printf(p, "\nEngine Workarounds\n"); + drm_puts(p, "\n"); + drm_printf(p, "Engine Workarounds\n"); for_each_set_bit(idx, gt->wa_active.engine, ARRAY_SIZE(engine_was)) drm_printf_indent(p, 1, "%s\n", engine_was[idx].name); - drm_printf(p, "\nLRC Workarounds\n"); + drm_puts(p, "\n"); + drm_printf(p, "LRC Workarounds\n"); for_each_set_bit(idx, gt->wa_active.lrc, ARRAY_SIZE(lrc_was)) drm_printf_indent(p, 1, "%s\n", lrc_was[idx].name); - drm_printf(p, "\nOOB Workarounds\n"); + drm_puts(p, "\n"); + drm_printf(p, "OOB Workarounds\n"); for_each_set_bit(idx, gt->wa_active.oob, ARRAY_SIZE(oob_was)) if (oob_was[idx].name) drm_printf_indent(p, 1, "%s\n", oob_was[idx].name); + return 0; } /* @@ -997,6 +1141,6 @@ void xe_wa_apply_tile_workarounds(struct xe_tile *tile) if (IS_SRIOV_VF(tile->xe)) return; - if (XE_WA(tile->primary_gt, 22010954014)) + if (XE_DEVICE_WA(tile->xe, 22010954014)) xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS); } diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index 52337405b5bc..8fd6a5af0910 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -13,23 +13,41 @@ struct xe_gt; struct xe_hw_engine; struct xe_tile; -int xe_wa_init(struct xe_gt *gt); -void xe_wa_process_oob(struct xe_gt *gt); +int xe_wa_device_init(struct xe_device *xe); +int xe_wa_gt_init(struct xe_gt *gt); +void xe_wa_process_device_oob(struct xe_device *xe); +void xe_wa_process_gt_oob(struct xe_gt *gt); void xe_wa_process_gt(struct xe_gt *gt); void xe_wa_process_engine(struct xe_hw_engine *hwe); void xe_wa_process_lrc(struct xe_hw_engine *hwe); void xe_wa_apply_tile_workarounds(struct xe_tile *tile); -void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); +void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p); +int xe_wa_gt_dump(struct xe_gt *gt, struct drm_printer *p); /** - * XE_WA - Out-of-band workarounds, that don't fit the lifecycle any - * other more specific type + * XE_GT_WA - Out-of-band GT workarounds, to be queried and called as needed. * @gt__: gt instance * @id__: XE_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h */ -#define XE_WA(gt__, id__) ({ \ +#define XE_GT_WA(gt__, id__) ({ \ xe_gt_assert(gt__, (gt__)->wa_active.oob_initialized); \ test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob); \ }) +/** + * XE_DEVICE_WA - Out-of-band Device workarounds, to be queried and called + * as needed. + * @xe__: xe_device + * @id__: XE_DEVICE_WA_OOB_<id__>, as generated by build system in generated/xe_device_wa_oob.h + */ +#define XE_DEVICE_WA(xe__, id__) ({ \ + xe_assert(xe__, (xe__)->wa_active.oob_initialized); \ + test_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \ +}) + +#define XE_DEVICE_WA_DISABLE(xe__, id__) ({ \ + xe_assert(xe__, (xe__)->wa_active.oob_initialized); \ + clear_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \ +}) + #endif diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 9efc5accd43d..7ca7258eb5d8 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -1,4 +1,6 @@ 1607983814 GRAPHICS_VERSION_RANGE(1200, 1210) +16010904313 GRAPHICS_VERSION_RANGE(1200, 1210) +18022495364 GRAPHICS_VERSION_RANGE(1200, 1210) 22012773006 GRAPHICS_VERSION_RANGE(1200, 1250) 14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0) PLATFORM(DG2) @@ -9,10 +11,9 @@ 18020744125 PLATFORM(PVC) 1509372804 PLATFORM(PVC), GRAPHICS_STEP(A0, C0) 1409600907 GRAPHICS_VERSION_RANGE(1200, 1250) -14016763929 SUBPLATFORM(DG2, G10) +22014953428 SUBPLATFORM(DG2, G10) SUBPLATFORM(DG2, G12) 16017236439 PLATFORM(PVC) -22010954014 PLATFORM(DG2) 14019821291 MEDIA_VERSION_RANGE(1300, 2000) 14015076503 MEDIA_VERSION(1300) 16020292621 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) @@ -21,7 +22,8 @@ GRAPHICS_VERSION_RANGE(1270, 1274) MEDIA_VERSION(1300) PLATFORM(DG2) -14018094691 GRAPHICS_VERSION(2004) +14018094691 GRAPHICS_VERSION_RANGE(2001, 2002) + GRAPHICS_VERSION(2004) 14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) 18024947630 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) @@ -29,21 +31,23 @@ 16022287689 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) 13011645652 GRAPHICS_VERSION(2004) - GRAPHICS_VERSION(3001) -14022293748 GRAPHICS_VERSION(2001) - GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) -22019794406 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(3003) + GRAPHICS_VERSION_RANGE(3004, 3005) +14022293748 GRAPHICS_VERSION_RANGE(2001, 2002) + GRAPHICS_VERSION(2004) + GRAPHICS_VERSION_RANGE(3000, 3005) +22019794406 GRAPHICS_VERSION_RANGE(2001, 2002) GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) + GRAPHICS_VERSION(3003) + GRAPHICS_VERSION_RANGE(3004, 3005) 22019338487 MEDIA_VERSION(2000) - GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) -22019338487_display PLATFORM(LUNARLAKE) -16023588340 GRAPHICS_VERSION(2001) +16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) 14019789679 GRAPHICS_VERSION(1255) GRAPHICS_VERSION_RANGE(1270, 2004) -no_media_l3 MEDIA_VERSION(3000) 14022866841 GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0) 16021333562 GRAPHICS_VERSION_RANGE(1200, 1274) @@ -57,5 +61,19 @@ no_media_l3 MEDIA_VERSION(3000) GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0) 16023105232 GRAPHICS_VERSION_RANGE(2001, 3001) MEDIA_VERSION_RANGE(1301, 3000) + MEDIA_VERSION(3002) + GRAPHICS_VERSION_RANGE(3003, 3005) 16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) MEDIA_VERSION_RANGE(1300, 3000) + MEDIA_VERSION(3002) + GRAPHICS_VERSION_RANGE(3003, 3005) +14020001231 GRAPHICS_VERSION_RANGE(2001,2004), FUNC(xe_rtp_match_psmi_enabled) + MEDIA_VERSION(2000), FUNC(xe_rtp_match_psmi_enabled) + MEDIA_VERSION(3000), FUNC(xe_rtp_match_psmi_enabled) + MEDIA_VERSION(3002), FUNC(xe_rtp_match_psmi_enabled) +16023683509 MEDIA_VERSION(2000), FUNC(xe_rtp_match_psmi_enabled) + MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_psmi_enabled) + +15015404425_disable PLATFORM(PANTHERLAKE), MEDIA_STEP(B0, FOREVER) +16026007364 MEDIA_VERSION(3000) +14020316580 MEDIA_VERSION(1301) |
