diff options
Diffstat (limited to 'drivers/gpu/drm/xe')
-rw-r--r-- | drivers/gpu/drm/xe/Kconfig | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/regs/xe_mchbar_regs.h | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/regs/xe_pcode_regs.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_bo.c | 48 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_device_sysfs.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_device_types.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_exec_queue.c | 15 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_gpu_scheduler.h | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_gt_freq.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_guc_submit.c | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_hwmon.c | 384 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_lrc.c | 23 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_pci.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_pcode.c | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_pcode.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_pcode_api.h | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_pxp.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm.c | 27 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm.h | 69 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm_types.h | 8 |
20 files changed, 481 insertions, 176 deletions
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 2169bc969ea1..fcc2677a4229 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -2,6 +2,8 @@ config DRM_XE tristate "Intel Xe Graphics" depends on DRM && PCI && (m || (y && KUNIT=y)) + depends on INTEL_VSEC || !INTEL_VSEC + depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs @@ -27,7 +29,6 @@ config DRM_XE select BACKLIGHT_CLASS_DEVICE if ACPI select INPUT if ACPI select ACPI_VIDEO if X86 && ACPI - select X86_PLATFORM_DEVICES if X86 && ACPI select ACPI_WMI if X86 && ACPI select SYNC_FILE select IOSF_MBI diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h index f5e5234857c1..5394a1373a6b 100644 --- a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h @@ -38,10 +38,10 @@ #define TEMP_MASK REG_GENMASK(7, 0) #define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0) -#define PKG_PWR_LIM_1 REG_GENMASK(14, 0) -#define PKG_PWR_LIM_1_EN REG_BIT(15) -#define PKG_PWR_LIM_1_TIME REG_GENMASK(23, 17) -#define PKG_PWR_LIM_1_TIME_X REG_GENMASK(23, 22) -#define PKG_PWR_LIM_1_TIME_Y REG_GENMASK(21, 17) +#define PWR_LIM_VAL REG_GENMASK(14, 0) +#define PWR_LIM_EN REG_BIT(15) +#define PWR_LIM_TIME REG_GENMASK(23, 17) +#define PWR_LIM_TIME_X REG_GENMASK(23, 22) +#define PWR_LIM_TIME_Y REG_GENMASK(21, 17) #endif /* _XE_MCHBAR_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h index c7d5d782e3f9..c556a04670ee 100644 --- a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h @@ -18,16 +18,12 @@ #define PVC_GT0_PLATFORM_ENERGY_STATUS XE_REG(0x28106c) #define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080) -#define BMG_PACKAGE_POWER_SKU XE_REG(0x138098) -#define BMG_PACKAGE_POWER_SKU_UNIT XE_REG(0x1380dc) #define BMG_PACKAGE_ENERGY_STATUS XE_REG(0x138120) #define BMG_FAN_1_SPEED XE_REG(0x138140) #define BMG_FAN_2_SPEED XE_REG(0x138170) #define BMG_FAN_3_SPEED XE_REG(0x1381a0) #define BMG_VRAM_TEMPERATURE XE_REG(0x1382c0) #define BMG_PACKAGE_TEMPERATURE XE_REG(0x138434) -#define BMG_PACKAGE_RAPL_LIMIT XE_REG(0x138440) #define BMG_PLATFORM_ENERGY_STATUS XE_REG(0x138458) -#define BMG_PLATFORM_POWER_LIMIT XE_REG(0x138460) #endif /* _XE_PCODE_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index d99d91fe8aa9..7aa2c17825da 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -841,21 +841,6 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, goto out; } - /* Reject BO eviction if BO is bound to current VM. */ - if (evict && ctx->resv) { - struct drm_gpuvm_bo *vm_bo; - - drm_gem_for_each_gpuvm_bo(vm_bo, &bo->ttm.base) { - struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); - - if (xe_vm_resv(vm) == ctx->resv && - xe_vm_in_preempt_fence_mode(vm)) { - ret = -EBUSY; - goto out; - } - } - } - /* * Failed multi-hop where the old_mem is still marked as * TTM_PL_FLAG_TEMPORARY, should just be a dummy move. @@ -1013,6 +998,25 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx, return lret; } +static bool +xe_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) +{ + struct drm_gpuvm_bo *vm_bo; + + if (!ttm_bo_eviction_valuable(bo, place)) + return false; + + if (!xe_bo_is_xe_bo(bo)) + return true; + + drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) { + if (xe_vm_is_validating(gpuvm_to_vm(vm_bo->vm))) + return false; + } + + return true; +} + /** * xe_bo_shrink() - Try to shrink an xe bo. * @ctx: The struct ttm_operation_ctx used for shrinking. @@ -1047,7 +1051,7 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, (flags.purge && !xe_tt->purgeable)) return -EBUSY; - if (!ttm_bo_eviction_valuable(bo, &place)) + if (!xe_bo_eviction_valuable(bo, &place)) return -EBUSY; if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo)) @@ -1588,7 +1592,7 @@ const struct ttm_device_funcs xe_ttm_funcs = { .io_mem_pfn = xe_ttm_io_mem_pfn, .access_memory = xe_ttm_access_memory, .release_notify = xe_ttm_bo_release_notify, - .eviction_valuable = ttm_bo_eviction_valuable, + .eviction_valuable = xe_bo_eviction_valuable, .delete_mem_notify = xe_ttm_bo_delete_mem_notify, .swap_notify = xe_ttm_bo_swap_notify, }; @@ -2431,6 +2435,8 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) .no_wait_gpu = false, .gfp_retry_mayfail = true, }; + struct pin_cookie cookie; + int ret; if (vm) { lockdep_assert_held(&vm->lock); @@ -2440,8 +2446,12 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) ctx.resv = xe_vm_resv(vm); } + cookie = xe_vm_set_validating(vm, allow_res_evict); trace_xe_bo_validate(bo); - return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); + ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); + xe_vm_clear_validating(vm, allow_res_evict, cookie); + + return ret; } bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo) @@ -2557,7 +2567,7 @@ typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe, u64 value); static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = { - [DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_set_pxp_type, + [DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE] = gem_create_set_pxp_type, }; static int gem_create_user_ext_set_property(struct xe_device *xe, diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index 2e657692e5b5..b9440f8c781e 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -115,7 +115,7 @@ auto_link_downgrade_capable_show(struct device *dev, struct device_attribute *at xe_pm_runtime_put(xe); cap = REG_FIELD_GET(LINK_DOWNGRADE, val); - return sysfs_emit(buf, "%u\n", cap == DOWNGRADE_CAPABLE ? true : false); + return sysfs_emit(buf, "%u\n", cap == DOWNGRADE_CAPABLE); } static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_capable); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index c8fa2c011666..6383a1c0d478 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -325,6 +325,10 @@ struct xe_device { u8 has_heci_gscfi:1; /** @info.has_llc: Device has a shared CPU+GPU last level cache */ u8 has_llc:1; + /** @info.has_mbx_power_limits: Device has support to manage power limits using + * pcode mailbox commands. + */ + u8 has_mbx_power_limits:1; /** @info.has_pxp: Device has PXP support */ u8 has_pxp:1; /** @info.has_range_tlb_invalidation: Has range based TLB invalidations */ diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index ce78cee5dec6..fee22358cc09 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -114,7 +114,6 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, static int __xe_exec_queue_init(struct xe_exec_queue *q) { - struct xe_vm *vm = q->vm; int i, err; u32 flags = 0; @@ -132,32 +131,20 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q) flags |= XE_LRC_CREATE_RUNALONE; } - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - return err; - } - for (i = 0; i < q->width; ++i) { q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec, flags); if (IS_ERR(q->lrc[i])) { err = PTR_ERR(q->lrc[i]); - goto err_unlock; + goto err_lrc; } } - if (vm) - xe_vm_unlock(vm); - err = q->ops->init(q); if (err) goto err_lrc; return 0; -err_unlock: - if (vm) - xe_vm_unlock(vm); err_lrc: for (i = i - 1; i >= 0; --i) xe_lrc_put(q->lrc[i]); diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index c250ea773491..308061f0cf37 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -51,7 +51,15 @@ static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched) static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched) { - drm_sched_resubmit_jobs(&sched->base); + struct drm_sched_job *s_job; + + list_for_each_entry(s_job, &sched->base.pending_list, list) { + struct drm_sched_fence *s_fence = s_job->s_fence; + struct dma_fence *hw_fence = s_fence->parent; + + if (hw_fence && !dma_fence_is_signaled(hw_fence)) + sched->base.ops->run_job(s_job); + } } static inline bool diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 868a5d2c1a52..60d9354e7dbf 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -32,13 +32,18 @@ * Xe's Freq provides a sysfs API for frequency management: * * device/tile#/gt#/freq0/<item>_freq *read-only* files: + * * - act_freq: The actual resolved frequency decided by PCODE. * - cur_freq: The current one requested by GuC PC to the PCODE. * - rpn_freq: The Render Performance (RP) N level, which is the minimal one. + * - rpa_freq: The Render Performance (RP) A level, which is the achiveable one. + * Calculated by PCODE at runtime based on multiple running conditions * - rpe_freq: The Render Performance (RP) E level, which is the efficient one. + * Calculated by PCODE at runtime based on multiple running conditions * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one. * * device/tile#/gt#/freq0/<item>_freq *read-write* files: + * * - min_freq: Min frequency request. * - max_freq: Max frequency request. * If max <= min, then freq_min becomes a fixed frequency request. diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 2ad38f6b103e..6d84a52b660a 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -229,6 +229,17 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) static void guc_submit_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + ret = wait_event_timeout(guc->submission_state.fini_wq, + xa_empty(&guc->submission_state.exec_queue_lookup), + HZ * 5); + + drain_workqueue(xe->destroy_wq); + + xe_gt_assert(gt, ret); xa_destroy(&guc->submission_state.exec_queue_lookup); } diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index eb293aec36a0..74f31639b37f 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -52,6 +52,14 @@ enum xe_fan_channel { }; /* + * For platforms that support mailbox commands for power limits, REG_PKG_POWER_SKU_UNIT is + * not supported and below are SKU units to be used. + */ +#define PWR_UNIT 0x3 +#define ENERGY_UNIT 0xe +#define TIME_UNIT 0xa + +/* * SF_* - scale factors for particular quantities according to hwmon spec. */ #define SF_POWER 1000000 /* microwatts */ @@ -60,6 +68,18 @@ enum xe_fan_channel { #define SF_ENERGY 1000000 /* microjoules */ #define SF_TIME 1000 /* milliseconds */ +/* + * PL*_HWMON_ATTR - mapping of hardware power limits to corresponding hwmon power attribute. + */ +#define PL1_HWMON_ATTR hwmon_power_max + +#define PWR_ATTR_TO_STR(attr) (((attr) == hwmon_power_max) ? "PL1" : "Invalid") + +/* + * Timeout for power limit write mailbox command. + */ +#define PL_WRITE_MBX_TIMEOUT_MS (1) + /** * struct xe_hwmon_energy_info - to accumulate energy */ @@ -100,8 +120,80 @@ struct xe_hwmon { struct xe_hwmon_energy_info ei[CHANNEL_MAX]; /** @fi: Fan info for fanN_input */ struct xe_hwmon_fan_info fi[FAN_MAX]; + /** @boot_power_limit_read: is boot power limits read */ + bool boot_power_limit_read; + /** @pl1_on_boot: power limit PL1 on boot */ + u32 pl1_on_boot[CHANNEL_MAX]; }; +static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 attr, int channel, + u32 *uval) +{ + struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); + u32 val0 = 0, val1 = 0; + int ret = 0; + + ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, + (channel == CHANNEL_CARD) ? + READ_PSYSGPU_POWER_LIMIT : + READ_PACKAGE_POWER_LIMIT, + hwmon->boot_power_limit_read ? + READ_PL_FROM_PCODE : READ_PL_FROM_FW), + &val0, &val1); + + if (ret) { + drm_dbg(&hwmon->xe->drm, "read failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n", + channel, val0, val1, ret); + *uval = 0; + return ret; + } + + /* return the value only if limit is enabled */ + if (attr == PL1_HWMON_ATTR) + *uval = (val0 & PWR_LIM_EN) ? val0 : 0; + else if (attr == hwmon_power_label) + *uval = (val0 & PWR_LIM_EN) ? 1 : 0; + else + *uval = 0; + + return ret; +} + +static int xe_hwmon_pcode_write_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel, + u32 uval) +{ + struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); + u32 val0, val1; + int ret = 0; + + ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, + (channel == CHANNEL_CARD) ? + READ_PSYSGPU_POWER_LIMIT : + READ_PACKAGE_POWER_LIMIT, + hwmon->boot_power_limit_read ? + READ_PL_FROM_PCODE : READ_PL_FROM_FW), + &val0, &val1); + + if (ret) + drm_dbg(&hwmon->xe->drm, "read failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n", + channel, val0, val1, ret); + + if (attr == PL1_HWMON_ATTR) + val0 = uval; + else + return -EIO; + + ret = xe_pcode_write64_timeout(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, + (channel == CHANNEL_CARD) ? + WRITE_PSYSGPU_POWER_LIMIT : + WRITE_PACKAGE_POWER_LIMIT, 0), + val0, val1, PL_WRITE_MBX_TIMEOUT_MS); + if (ret) + drm_dbg(&hwmon->xe->drm, "write failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n", + channel, val0, val1, ret); + return ret; +} + static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, int channel) { @@ -122,29 +214,19 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg } break; case REG_PKG_RAPL_LIMIT: - if (xe->info.platform == XE_BATTLEMAGE) { - if (channel == CHANNEL_PKG) - return BMG_PACKAGE_RAPL_LIMIT; - else - return BMG_PLATFORM_POWER_LIMIT; - } else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) { + if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) return PVC_GT0_PACKAGE_RAPL_LIMIT; - } else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) { + else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) return PCU_CR_PACKAGE_RAPL_LIMIT; - } break; case REG_PKG_POWER_SKU: - if (xe->info.platform == XE_BATTLEMAGE) - return BMG_PACKAGE_POWER_SKU; - else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) + if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) return PVC_GT0_PACKAGE_POWER_SKU; else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) return PCU_CR_PACKAGE_POWER_SKU; break; case REG_PKG_POWER_SKU_UNIT: - if (xe->info.platform == XE_BATTLEMAGE) - return BMG_PACKAGE_POWER_SKU_UNIT; - else if (xe->info.platform == XE_PVC) + if (xe->info.platform == XE_PVC) return PVC_GT0_PACKAGE_POWER_SKU_UNIT; else if (xe->info.platform == XE_DG2) return PCU_CR_PACKAGE_POWER_SKU_UNIT; @@ -181,7 +263,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg return XE_REG(0); } -#define PL1_DISABLE 0 +#define PL_DISABLE 0 /* * HW allows arbitrary PL1 limits to be set but silently clamps these values to @@ -189,67 +271,83 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg * same pattern for sysfs, allow arbitrary PL1 limits to be set but display * clamped values when read. */ -static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *value) +static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *value) { u64 reg_val, min, max; struct xe_device *xe = hwmon->xe; struct xe_reg rapl_limit, pkg_power_sku; struct xe_mmio *mmio = xe_root_tile_mmio(xe); - rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); - pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); + mutex_lock(&hwmon->hwmon_lock); - /* - * Valid check of REG_PKG_RAPL_LIMIT is already done in xe_hwmon_power_is_visible. - * So not checking it again here. - */ - if (!xe_reg_is_valid(pkg_power_sku)) { - drm_warn(&xe->drm, "pkg_power_sku invalid\n"); - *value = 0; - return; + if (hwmon->xe->info.has_mbx_power_limits) { + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, (u32 *)®_val); + } else { + rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); + pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); + + /* + * Valid check of REG_PKG_RAPL_LIMIT is already done in xe_hwmon_power_is_visible. + * So not checking it again here. + */ + if (!xe_reg_is_valid(pkg_power_sku)) { + drm_warn(&xe->drm, "pkg_power_sku invalid\n"); + *value = 0; + goto unlock; + } + reg_val = xe_mmio_read32(mmio, rapl_limit); } - mutex_lock(&hwmon->hwmon_lock); - - reg_val = xe_mmio_read32(mmio, rapl_limit); - /* Check if PL1 limit is disabled */ - if (!(reg_val & PKG_PWR_LIM_1_EN)) { - *value = PL1_DISABLE; + /* Check if PL limits are disabled. */ + if (!(reg_val & PWR_LIM_EN)) { + *value = PL_DISABLE; + drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%016llx\n", + PWR_ATTR_TO_STR(attr), channel, reg_val); goto unlock; } - reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val); + reg_val = REG_FIELD_GET(PWR_LIM_VAL, reg_val); *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); - reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku); - min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); - min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); - max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); - max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); - - if (min && max) - *value = clamp_t(u64, *value, min, max); + /* For platforms with mailbox power limit support clamping would be done by pcode. */ + if (!hwmon->xe->info.has_mbx_power_limits) { + reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku); + min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); + max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); + min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); + max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); + if (min && max) + *value = clamp_t(u64, *value, min, max); + } unlock: mutex_unlock(&hwmon->hwmon_lock); } -static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value) +static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channel, long value) { struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); int ret = 0; - u64 reg_val; + u32 reg_val; struct xe_reg rapl_limit; + mutex_lock(&hwmon->hwmon_lock); + rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); - mutex_lock(&hwmon->hwmon_lock); + /* Disable Power Limit and verify, as limit cannot be disabled on all platforms. */ + if (value == PL_DISABLE) { + if (hwmon->xe->info.has_mbx_power_limits) { + drm_dbg(&hwmon->xe->drm, "disabling %s on channel %d\n", + PWR_ATTR_TO_STR(attr), channel); + xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, 0); + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, ®_val); + } else { + reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN, 0); + reg_val = xe_mmio_read32(mmio, rapl_limit); + } - /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */ - if (value == PL1_DISABLE) { - reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN, 0); - reg_val = xe_mmio_read32(mmio, rapl_limit); - if (reg_val & PKG_PWR_LIM_1_EN) { - drm_warn(&hwmon->xe->drm, "PL1 disable is not supported!\n"); + if (reg_val & PWR_LIM_EN) { + drm_warn(&hwmon->xe->drm, "Power limit disable is not supported!\n"); ret = -EOPNOTSUPP; } goto unlock; @@ -257,26 +355,50 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va /* Computation in 64-bits to avoid overflow. Round to nearest. */ reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); - reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val); - reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val); + reg_val = PWR_LIM_EN | REG_FIELD_PREP(PWR_LIM_VAL, reg_val); + /* + * Clamp power limit to card-firmware default as maximum, as an additional protection to + * pcode clamp. + */ + if (hwmon->xe->info.has_mbx_power_limits) { + if (reg_val > REG_FIELD_GET(PWR_LIM_VAL, hwmon->pl1_on_boot[channel])) { + reg_val = REG_FIELD_GET(PWR_LIM_VAL, hwmon->pl1_on_boot[channel]); + drm_dbg(&hwmon->xe->drm, "Clamping power limit to firmware default 0x%x\n", + reg_val); + } + } + + if (hwmon->xe->info.has_mbx_power_limits) + ret = xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, reg_val); + else + reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN | PWR_LIM_VAL, + reg_val); unlock: mutex_unlock(&hwmon->hwmon_lock); return ret; } -static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value) +static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, + long *value) { struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); - struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); - u64 reg_val; + u32 reg_val; + + if (hwmon->xe->info.has_mbx_power_limits) { + /* PL1 is rated max if supported. */ + xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, channel, ®_val); + } else { + /* + * This sysfs file won't be visible if REG_PKG_POWER_SKU is invalid, so valid check + * for this register can be skipped. + * See xe_hwmon_power_is_visible. + */ + struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); + + reg_val = xe_mmio_read32(mmio, reg); + } - /* - * This sysfs file won't be visible if REG_PKG_POWER_SKU is invalid, so valid check - * for this register can be skipped. - * See xe_hwmon_power_is_visible. - */ - reg_val = xe_mmio_read32(mmio, reg); reg_val = REG_FIELD_GET(PKG_TDP, reg_val); *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); } @@ -330,23 +452,35 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); u32 x, y, x_w = 2; /* 2 bits */ u64 r, tau4, out; - int sensor_index = to_sensor_dev_attr(attr)->index; + int channel = to_sensor_dev_attr(attr)->index; + u32 power_attr = PL1_HWMON_ATTR; + int ret = 0; xe_pm_runtime_get(hwmon->xe); mutex_lock(&hwmon->hwmon_lock); - r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index)); + if (hwmon->xe->info.has_mbx_power_limits) { + ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r); + if (ret) { + drm_err(&hwmon->xe->drm, + "power interval read fail, ch %d, attr %d, r 0%llx, ret %d\n", + channel, power_attr, r, ret); + r = 0; + } + } else { + r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel)); + } mutex_unlock(&hwmon->hwmon_lock); xe_pm_runtime_put(hwmon->xe); - x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r); - y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r); + x = REG_FIELD_GET(PWR_LIM_TIME_X, r); + y = REG_FIELD_GET(PWR_LIM_TIME_Y, r); /* - * tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17) + * tau = (1 + (x / 4)) * power(2,y), x = bits(23:22), y = bits(21:17) * = (4 | x) << (y - 2) * * Here (y - 2) ensures a 1.x fixed point representation of 1.x @@ -373,14 +507,15 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a u64 tau4, r, max_win; unsigned long val; int ret; - int sensor_index = to_sensor_dev_attr(attr)->index; + int channel = to_sensor_dev_attr(attr)->index; + u32 power_attr = PL1_HWMON_ATTR; ret = kstrtoul(buf, 0, &val); if (ret) return ret; /* - * Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12. + * Max HW supported tau in '(1 + (x / 4)) * power(2,y)' format, x = 0, y = 0x12. * The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds. * * The ideal scenario is for PKG_MAX_WIN to be read from the PKG_PWR_SKU register. @@ -400,11 +535,13 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a tau4 = (u64)((1 << x_w) | x) << y; max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); - if (val > max_win) + if (val > max_win) { + drm_warn(&hwmon->xe->drm, "power_interval invalid val 0x%lx\n", val); return -EINVAL; + } /* val in hw units */ - val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME); + val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME) + 1; /* * Convert val to 1.x * power(2,y) @@ -419,14 +556,21 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a x = (val - (1ul << y)) << x_w >> y; } - rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y); + rxy = REG_FIELD_PREP(PWR_LIM_TIME_X, x) | + REG_FIELD_PREP(PWR_LIM_TIME_Y, y); xe_pm_runtime_get(hwmon->xe); mutex_lock(&hwmon->hwmon_lock); - r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index), - PKG_PWR_LIM_1_TIME, rxy); + if (hwmon->xe->info.has_mbx_power_limits) { + ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r); + r = (r & ~PWR_LIM_TIME) | rxy; + xe_hwmon_pcode_write_power_limit(hwmon, power_attr, channel, r); + } else { + r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel), + PWR_LIM_TIME, rxy); + } mutex_unlock(&hwmon->hwmon_lock); @@ -435,6 +579,7 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a return count; } +/* PSYS PL1 */ static SENSOR_DEVICE_ATTR(power1_max_interval, 0664, xe_hwmon_power_max_interval_show, xe_hwmon_power_max_interval_store, CHANNEL_CARD); @@ -455,10 +600,19 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret = 0; + int channel = index ? CHANNEL_PKG : CHANNEL_CARD; + u32 power_attr = PL1_HWMON_ATTR; + u32 uval; xe_pm_runtime_get(hwmon->xe); - ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index)) ? attr->mode : 0; + if (hwmon->xe->info.has_mbx_power_limits) { + xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, &uval); + ret = (uval & PWR_LIM_EN) ? attr->mode : 0; + } else { + ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, + channel)) ? attr->mode : 0; + } xe_pm_runtime_put(hwmon->xe); @@ -478,8 +632,8 @@ static const struct attribute_group *hwmon_groups[] = { static const struct hwmon_channel_info * const hwmon_info[] = { HWMON_CHANNEL_INFO(temp, HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL), - HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL, - HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT | HWMON_P_LABEL), + HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CRIT, + HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL), HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL), HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL), HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL), @@ -604,19 +758,27 @@ xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) switch (attr) { case hwmon_power_max: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, + if (hwmon->xe->info.has_mbx_power_limits) { + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &uval); + return (uval) ? 0664 : 0; + } else { + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel)) ? 0664 : 0; + } case hwmon_power_rated_max: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, - channel)) ? 0444 : 0; + if (hwmon->xe->info.has_mbx_power_limits) + return 0; + else + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, + channel)) ? 0444 : 0; case hwmon_power_crit: - if (channel == CHANNEL_PKG) - return (xe_hwmon_pcode_read_i1(hwmon, &uval) || - !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; - break; case hwmon_power_label: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, - channel)) ? 0444 : 0; + if (channel == CHANNEL_CARD) { + xe_hwmon_pcode_read_i1(hwmon, &uval); + return (uval & POWER_SETUP_I1_WATTS) ? (attr == hwmon_power_label) ? + 0444 : 0644 : 0; + } + break; default: return 0; } @@ -628,10 +790,10 @@ xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) { switch (attr) { case hwmon_power_max: - xe_hwmon_power_max_read(hwmon, channel, val); + xe_hwmon_power_max_read(hwmon, attr, channel, val); return 0; case hwmon_power_rated_max: - xe_hwmon_power_rated_max_read(hwmon, channel, val); + xe_hwmon_power_rated_max_read(hwmon, attr, channel, val); return 0; case hwmon_power_crit: return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_POWER); @@ -645,7 +807,7 @@ xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) { switch (attr) { case hwmon_power_max: - return xe_hwmon_power_max_write(hwmon, channel, val); + return xe_hwmon_power_max_write(hwmon, attr, channel, val); case hwmon_power_crit: return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_POWER); default: @@ -965,18 +1127,42 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) int channel; struct xe_reg pkg_power_sku_unit; - /* - * The contents of register PKG_POWER_SKU_UNIT do not change, - * so read it once and store the shift values. - */ - pkg_power_sku_unit = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0); - if (xe_reg_is_valid(pkg_power_sku_unit)) { - val_sku_unit = xe_mmio_read32(mmio, pkg_power_sku_unit); - hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); - hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); - hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit); + if (hwmon->xe->info.has_mbx_power_limits) { + /* Check if card firmware support mailbox power limits commands. */ + if (xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_CARD, + &hwmon->pl1_on_boot[CHANNEL_CARD]) | + xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_PKG, + &hwmon->pl1_on_boot[CHANNEL_PKG])) { + drm_warn(&hwmon->xe->drm, + "Failed to read power limits, check card firmware !\n"); + } else { + drm_info(&hwmon->xe->drm, "Using mailbox commands for power limits\n"); + /* Write default limits to read from pcode from now on. */ + xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR, + CHANNEL_CARD, + hwmon->pl1_on_boot[CHANNEL_CARD]); + xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR, + CHANNEL_PKG, + hwmon->pl1_on_boot[CHANNEL_PKG]); + hwmon->scl_shift_power = PWR_UNIT; + hwmon->scl_shift_energy = ENERGY_UNIT; + hwmon->scl_shift_time = TIME_UNIT; + hwmon->boot_power_limit_read = true; + } + } else { + drm_info(&hwmon->xe->drm, "Using register for power limits\n"); + /* + * The contents of register PKG_POWER_SKU_UNIT do not change, + * so read it once and store the shift values. + */ + pkg_power_sku_unit = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0); + if (xe_reg_is_valid(pkg_power_sku_unit)) { + val_sku_unit = xe_mmio_read32(mmio, pkg_power_sku_unit); + hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); + hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); + hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit); + } } - /* * Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the * first value of the energy register read diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 61a2e87990a9..63d74e27f54c 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -909,10 +909,7 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) static void xe_lrc_finish(struct xe_lrc *lrc) { xe_hw_fence_ctx_finish(&lrc->fence_ctx); - xe_bo_lock(lrc->bo, false); - xe_bo_unpin(lrc->bo); - xe_bo_unlock(lrc->bo); - xe_bo_put(lrc->bo); + xe_bo_unpin_map_no_vm(lrc->bo); xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo); } @@ -1007,7 +1004,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address * via VM bind calls. */ - lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, + lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, lrc_size, ttm_bo_type_kernel, bo_flags); if (IS_ERR(lrc->bo)) @@ -1795,9 +1792,6 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; - if (lrc->bo->vm) - xe_vm_get(lrc->bo->vm); - snapshot->context_desc = xe_lrc_ggtt_addr(lrc); snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc); snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); @@ -1819,14 +1813,12 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) { struct xe_bo *bo; - struct xe_vm *vm; struct iosys_map src; if (!snapshot) return; bo = snapshot->lrc_bo; - vm = bo->vm; snapshot->lrc_bo = NULL; snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); @@ -1846,8 +1838,6 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) xe_bo_unlock(bo); put_bo: xe_bo_put(bo); - if (vm) - xe_vm_put(vm); } void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) @@ -1900,14 +1890,9 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) return; kvfree(snapshot->lrc_snapshot); - if (snapshot->lrc_bo) { - struct xe_vm *vm; - - vm = snapshot->lrc_bo->vm; + if (snapshot->lrc_bo) xe_bo_put(snapshot->lrc_bo); - if (vm) - xe_vm_put(vm); - } + kfree(snapshot); } diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 024175cfe61e..ac4beaed58ff 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -66,6 +66,7 @@ struct xe_device_desc { u8 has_heci_gscfi:1; u8 has_heci_cscfi:1; u8 has_llc:1; + u8 has_mbx_power_limits:1; u8 has_pxp:1; u8 has_sriov:1; u8 needs_scratch:1; @@ -306,6 +307,7 @@ static const struct xe_device_desc dg2_desc = { DG2_FEATURES, .has_display = true, .has_fan_control = true, + .has_mbx_power_limits = false, }; static const __maybe_unused struct xe_device_desc pvc_desc = { @@ -317,6 +319,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { .has_heci_gscfi = 1, .max_remote_tiles = 1, .require_force_probe = true, + .has_mbx_power_limits = false, }; static const struct xe_device_desc mtl_desc = { @@ -342,6 +345,7 @@ static const struct xe_device_desc bmg_desc = { .dma_mask_size = 46, .has_display = true, .has_fan_control = true, + .has_mbx_power_limits = true, .has_heci_cscfi = 1, .needs_scratch = true, }; @@ -584,6 +588,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.dma_mask_size = desc->dma_mask_size; xe->info.is_dgfx = desc->is_dgfx; xe->info.has_fan_control = desc->has_fan_control; + xe->info.has_mbx_power_limits = desc->has_mbx_power_limits; xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.has_heci_cscfi = desc->has_heci_cscfi; xe->info.has_llc = desc->has_llc; diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index cf955b3ed52c..9189117fe825 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -109,6 +109,17 @@ int xe_pcode_write_timeout(struct xe_tile *tile, u32 mbox, u32 data, int timeout return err; } +int xe_pcode_write64_timeout(struct xe_tile *tile, u32 mbox, u32 data0, u32 data1, int timeout) +{ + int err; + + mutex_lock(&tile->pcode.lock); + err = pcode_mailbox_rw(tile, mbox, &data0, &data1, timeout, false, false); + mutex_unlock(&tile->pcode.lock); + + return err; +} + int xe_pcode_read(struct xe_tile *tile, u32 mbox, u32 *val, u32 *val1) { int err; diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h index ba33991d72a7..de38f44f3201 100644 --- a/drivers/gpu/drm/xe/xe_pcode.h +++ b/drivers/gpu/drm/xe/xe_pcode.h @@ -18,6 +18,9 @@ int xe_pcode_init_min_freq_table(struct xe_tile *tile, u32 min_gt_freq, int xe_pcode_read(struct xe_tile *tile, u32 mbox, u32 *val, u32 *val1); int xe_pcode_write_timeout(struct xe_tile *tile, u32 mbox, u32 val, int timeout_ms); +int xe_pcode_write64_timeout(struct xe_tile *tile, u32 mbox, u32 data0, + u32 data1, int timeout); + #define xe_pcode_write(tile, mbox, val) \ xe_pcode_write_timeout(tile, mbox, val, 1) diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 127d4d26c4cf..0befdea77db1 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -43,6 +43,13 @@ #define POWER_SETUP_I1_SHIFT 6 /* 10.6 fixed point format */ #define POWER_SETUP_I1_DATA_MASK REG_GENMASK(15, 0) +#define READ_PSYSGPU_POWER_LIMIT 0x6 +#define WRITE_PSYSGPU_POWER_LIMIT 0x7 +#define READ_PACKAGE_POWER_LIMIT 0x8 +#define WRITE_PACKAGE_POWER_LIMIT 0x9 +#define READ_PL_FROM_FW 0x1 +#define READ_PL_FROM_PCODE 0x0 + #define PCODE_FREQUENCY_CONFIG 0x6e /* Frequency Config Sub Commands (param1) */ #define PCODE_MBOX_FC_SC_READ_FUSED_P0 0x0 diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c index 454ea7dc08ac..b5bc15f436fa 100644 --- a/drivers/gpu/drm/xe/xe_pxp.c +++ b/drivers/gpu/drm/xe/xe_pxp.c @@ -541,10 +541,14 @@ int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) */ xe_pm_runtime_get(pxp->xe); - if (!pxp_prerequisites_done(pxp)) { - ret = -EBUSY; + /* get_readiness_status() returns 0 for in-progress and 1 for done */ + ret = xe_pxp_get_readiness_status(pxp); + if (ret <= 0) { + if (!ret) + ret = -EBUSY; goto out; } + ret = 0; wait_for_idle: /* diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 79323c78130f..861577746929 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1678,13 +1678,21 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) * scheduler drops all the references of it, hence protecting the VM * for this case is necessary. */ - if (flags & XE_VM_FLAG_LR_MODE) + if (flags & XE_VM_FLAG_LR_MODE) { + INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); xe_pm_runtime_get_noresume(xe); + } + + if (flags & XE_VM_FLAG_FAULT_MODE) { + err = xe_svm_init(vm); + if (err) + goto err_no_resv; + } vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); if (!vm_resv_obj) { err = -ENOMEM; - goto err_no_resv; + goto err_svm_fini; } drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, @@ -1724,10 +1732,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->batch_invalidate_tlb = true; } - if (vm->flags & XE_VM_FLAG_LR_MODE) { - INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); + if (vm->flags & XE_VM_FLAG_LR_MODE) vm->batch_invalidate_tlb = false; - } /* Fill pt_root after allocating scratch tables */ for_each_tile(tile, xe, id) { @@ -1757,12 +1763,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) } } - if (flags & XE_VM_FLAG_FAULT_MODE) { - err = xe_svm_init(vm); - if (err) - goto err_close; - } - if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); @@ -1776,6 +1776,11 @@ err_close: xe_vm_close_and_put(vm); return ERR_PTR(err); +err_svm_fini: + if (flags & XE_VM_FLAG_FAULT_MODE) { + vm->size = 0; /* close the vm */ + xe_svm_fini(vm); + } err_no_resv: mutex_destroy(&vm->snap_mutex); for_each_tile(tile, xe, id) diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 0ef811fc2bde..494af6bdc646 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -301,6 +301,75 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); +/** + * xe_vm_set_validating() - Register this task as currently making bos resident + * @allow_res_evict: Allow eviction of buffer objects bound to @vm when + * validating. + * @vm: Pointer to the vm or NULL. + * + * Register this task as currently making bos resident for the vm. Intended + * to avoid eviction by the same task of shared bos bound to the vm. + * Call with the vm's resv lock held. + * + * Return: A pin cookie that should be used for xe_vm_clear_validating(). + */ +static inline struct pin_cookie xe_vm_set_validating(struct xe_vm *vm, + bool allow_res_evict) +{ + struct pin_cookie cookie = {}; + + if (vm && !allow_res_evict) { + xe_vm_assert_held(vm); + cookie = lockdep_pin_lock(&xe_vm_resv(vm)->lock.base); + /* Pairs with READ_ONCE in xe_vm_is_validating() */ + WRITE_ONCE(vm->validating, current); + } + + return cookie; +} + +/** + * xe_vm_clear_validating() - Unregister this task as currently making bos resident + * @vm: Pointer to the vm or NULL + * @allow_res_evict: Eviction from @vm was allowed. Must be set to the same + * value as for xe_vm_set_validation(). + * @cookie: Cookie obtained from xe_vm_set_validating(). + * + * Register this task as currently making bos resident for the vm. Intended + * to avoid eviction by the same task of shared bos bound to the vm. + * Call with the vm's resv lock held. + */ +static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict, + struct pin_cookie cookie) +{ + if (vm && !allow_res_evict) { + lockdep_unpin_lock(&xe_vm_resv(vm)->lock.base, cookie); + /* Pairs with READ_ONCE in xe_vm_is_validating() */ + WRITE_ONCE(vm->validating, NULL); + } +} + +/** + * xe_vm_is_validating() - Whether bos bound to the vm are currently being made resident + * by the current task. + * @vm: Pointer to the vm. + * + * If this function returns %true, we should be in a vm resv locked region, since + * the current process is the same task that called xe_vm_set_validating(). + * The function asserts that that's indeed the case. + * + * Return: %true if the task is currently making bos resident, %false otherwise. + */ +static inline bool xe_vm_is_validating(struct xe_vm *vm) +{ + /* Pairs with WRITE_ONCE in xe_vm_is_validating() */ + if (READ_ONCE(vm->validating) == current) { + xe_vm_assert_held(vm); + return true; + } + return false; +} + #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); #else diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 1662604c4486..1979e9bdbdf3 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -310,6 +310,14 @@ struct xe_vm { * protected by the vm resv. */ u64 tlb_flush_seqno; + /** + * @validating: The task that is currently making bos resident for this vm. + * Protected by the VM's resv for writing. Opportunistic reading can be done + * using READ_ONCE. Note: This is a workaround for the + * TTM eviction_valuable() callback not being passed a struct + * ttm_operation_context(). Future work might want to address this. + */ + struct task_struct *validating; /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ bool batch_invalidate_tlb; /** @xef: XE file handle for tracking this VM's drm client */ |