diff options
32 files changed, 756 insertions, 244 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index adbb9bce15a5..4ca917ac6382 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -111,7 +111,7 @@ Description: RO. Package current voltage in millivolt. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp2_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.15 Contact: intel-xe@lists.freedesktop.org Description: RO. Package temperature in millidegree Celsius. @@ -119,7 +119,7 @@ Description: RO. Package temperature in millidegree Celsius. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp3_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.15 Contact: intel-xe@lists.freedesktop.org Description: RO. VRAM temperature in millidegree Celsius. @@ -127,7 +127,7 @@ Description: RO. VRAM temperature in millidegree Celsius. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan1_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.16 Contact: intel-xe@lists.freedesktop.org Description: RO. Fan 1 speed in RPM. @@ -135,7 +135,7 @@ Description: RO. Fan 1 speed in RPM. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan2_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.16 Contact: intel-xe@lists.freedesktop.org Description: RO. Fan 2 speed in RPM. @@ -143,7 +143,7 @@ Description: RO. Fan 2 speed in RPM. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan3_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.16 Contact: intel-xe@lists.freedesktop.org Description: RO. Fan 3 speed in RPM. diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 230eb824550f..378dcd0fb414 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -60,7 +60,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, } /* Evict to system. CCS data should be copied. */ - ret = xe_bo_evict(bo, true); + ret = xe_bo_evict(bo); if (ret) { KUNIT_FAIL(test, "Failed to evict bo.\n"); return ret; diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index cedd3e88a6fb..c53f67ce4b0a 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -65,7 +65,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, * the exporter and the importer should be the same bo. */ swap(exported->ttm.base.dma_buf, dmabuf); - ret = xe_bo_evict(exported, true); + ret = xe_bo_evict(exported); swap(exported->ttm.base.dma_buf, dmabuf); if (ret) { if (ret != -EINTR && ret != -ERESTARTSYS) diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 52f89476bf62..4a65e3103f77 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -509,7 +509,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, dma_fence_put(fence); kunit_info(test, "Evict vram buffer object\n"); - ret = xe_bo_evict(vram_bo, true); + ret = xe_bo_evict(vram_bo); if (ret) { KUNIT_FAIL(test, "Failed to evict bo.\n"); return; diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 89ab502a3849..3a84a9d92c48 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1085,6 +1085,80 @@ out_unref: } /** + * xe_bo_notifier_prepare_pinned() - Prepare a pinned VRAM object to be backed + * up in system memory. + * @bo: The buffer object to prepare. + * + * On successful completion, the object backup pages are allocated. Expectation + * is that this is called from the PM notifier, prior to suspend/hibernation. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_bo_notifier_prepare_pinned(struct xe_bo *bo) +{ + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct xe_bo *backup; + int ret = 0; + + xe_bo_lock(bo, false); + + xe_assert(xe, !bo->backup_obj); + + /* + * Since this is called from the PM notifier we might have raced with + * someone unpinning this after we dropped the pinned list lock and + * grabbing the above bo lock. + */ + if (!xe_bo_is_pinned(bo)) + goto out_unlock_bo; + + if (!xe_bo_is_vram(bo)) + goto out_unlock_bo; + + if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) + goto out_unlock_bo; + + backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, + DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); + if (IS_ERR(backup)) { + ret = PTR_ERR(backup); + goto out_unlock_bo; + } + + backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ + ttm_bo_pin(&backup->ttm); + bo->backup_obj = backup; + +out_unlock_bo: + xe_bo_unlock(bo); + return ret; +} + +/** + * xe_bo_notifier_unprepare_pinned() - Undo the previous prepare operation. + * @bo: The buffer object to undo the prepare for. + * + * Always returns 0. The backup object is removed, if still present. Expectation + * it that this called from the PM notifier when undoing the prepare step. + * + * Return: Always returns 0. + */ +int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo) +{ + xe_bo_lock(bo, false); + if (bo->backup_obj) { + ttm_bo_unpin(&bo->backup_obj->ttm); + xe_bo_put(bo->backup_obj); + bo->backup_obj = NULL; + } + xe_bo_unlock(bo); + + return 0; +} + +/** * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory * @bo: The buffer object to move. * @@ -1098,7 +1172,8 @@ out_unref: int xe_bo_evict_pinned(struct xe_bo *bo) { struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); - struct xe_bo *backup; + struct xe_bo *backup = bo->backup_obj; + bool backup_created = false; bool unmap = false; int ret = 0; @@ -1120,12 +1195,17 @@ int xe_bo_evict_pinned(struct xe_bo *bo) if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) goto out_unlock_bo; - backup = xe_bo_create_locked(xe, NULL, NULL, bo->size, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); - if (IS_ERR(backup)) { - ret = PTR_ERR(backup); - goto out_unlock_bo; + if (!backup) { + backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, + DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); + if (IS_ERR(backup)) { + ret = PTR_ERR(backup); + goto out_unlock_bo; + } + backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ + backup_created = true; } if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) { @@ -1173,12 +1253,12 @@ int xe_bo_evict_pinned(struct xe_bo *bo) bo->size); } - bo->backup_obj = backup; + if (!bo->backup_obj) + bo->backup_obj = backup; out_backup: xe_bo_vunmap(backup); - xe_bo_unlock(backup); - if (ret) + if (ret && backup_created) xe_bo_put(backup); out_unlock_bo: if (unmap) @@ -1212,15 +1292,12 @@ int xe_bo_restore_pinned(struct xe_bo *bo) if (!backup) return 0; - xe_bo_lock(backup, false); - - ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx); - if (ret) - goto out_backup; + xe_bo_lock(bo, false); - if (WARN_ON(!dma_resv_trylock(bo->ttm.base.resv))) { - ret = -EBUSY; - goto out_backup; + if (!xe_bo_is_pinned(backup)) { + ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx); + if (ret) + goto out_unlock_bo; } if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) { @@ -1261,7 +1338,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo) if (iosys_map_is_null(&bo->vmap)) { ret = xe_bo_vmap(bo); if (ret) - goto out_unlock_bo; + goto out_backup; unmap = true; } @@ -1271,15 +1348,17 @@ int xe_bo_restore_pinned(struct xe_bo *bo) bo->backup_obj = NULL; +out_backup: + xe_bo_vunmap(backup); + if (!bo->backup_obj) { + if (xe_bo_is_pinned(backup)) + ttm_bo_unpin(&backup->ttm); + xe_bo_put(backup); + } out_unlock_bo: if (unmap) xe_bo_vunmap(bo); xe_bo_unlock(bo); -out_backup: - xe_bo_vunmap(backup); - xe_bo_unlock(backup); - if (!bo->backup_obj) - xe_bo_put(backup); return ret; } @@ -1455,6 +1534,7 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, struct xe_res_cursor cursor; struct xe_vram_region *vram; int bytes_left = len; + int err = 0; xe_bo_assert_held(bo); xe_device_assert_mem_access(xe); @@ -1462,9 +1542,14 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, if (!mem_type_is_vram(ttm_bo->resource->mem_type)) return -EIO; - /* FIXME: Use GPU for non-visible VRAM */ - if (!xe_ttm_resource_visible(ttm_bo->resource)) - return -EIO; + if (!xe_ttm_resource_visible(ttm_bo->resource) || len >= SZ_16K) { + struct xe_migrate *migrate = + mem_type_to_migrate(xe, ttm_bo->resource->mem_type); + + err = xe_migrate_access_memory(migrate, bo, offset, buf, len, + write); + goto out; + } vram = res_to_mem_region(ttm_bo->resource); xe_res_first(ttm_bo->resource, offset & PAGE_MASK, @@ -1488,7 +1573,8 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, xe_res_next(&cursor, PAGE_SIZE); } while (bytes_left); - return len; +out: + return err ?: len; } const struct ttm_device_funcs xe_ttm_funcs = { @@ -1532,6 +1618,9 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) if (bo->vm && xe_bo_is_user(bo)) xe_vm_put(bo->vm); + if (bo->parent_obj) + xe_bo_put(bo->parent_obj); + mutex_lock(&xe->mem_access.vram_userfault.lock); if (!list_empty(&bo->vram_userfault_link)) list_del(&bo->vram_userfault_link); @@ -2306,6 +2395,13 @@ void xe_bo_unpin(struct xe_bo *bo) xe_assert(xe, !list_empty(&bo->pinned_link)); list_del_init(&bo->pinned_link); spin_unlock(&xe->pinned.lock); + + if (bo->backup_obj) { + if (xe_bo_is_pinned(bo->backup_obj)) + ttm_bo_unpin(&bo->backup_obj->ttm); + xe_bo_put(bo->backup_obj); + bo->backup_obj = NULL; + } } ttm_bo_unpin(&bo->ttm); if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index b39b46570808..02ada1fb8a23 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -277,6 +277,8 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type); int xe_bo_evict(struct xe_bo *bo); int xe_bo_evict_pinned(struct xe_bo *bo); +int xe_bo_notifier_prepare_pinned(struct xe_bo *bo); +int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo); int xe_bo_restore_pinned(struct xe_bo *bo); int xe_bo_dma_unmap_pinned(struct xe_bo *bo); diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 2bf74eb7f281..ed3746d32b27 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -34,7 +34,13 @@ static int xe_bo_apply_to_pinned(struct xe_device *xe, ret = pinned_fn(bo); if (ret && pinned_list != new_list) { spin_lock(&xe->pinned.lock); - list_move(&bo->pinned_link, pinned_list); + /* + * We might no longer be pinned, since PM notifier can + * call this. If the pinned link is now empty, keep it + * that way. + */ + if (!list_empty(&bo->pinned_link)) + list_move(&bo->pinned_link, pinned_list); spin_unlock(&xe->pinned.lock); } xe_bo_put(bo); @@ -47,25 +53,60 @@ static int xe_bo_apply_to_pinned(struct xe_device *xe, } /** - * xe_bo_evict_all - evict all BOs from VRAM + * xe_bo_notifier_prepare_all_pinned() - Pre-allocate the backing pages for all + * pinned VRAM objects which need to be saved. + * @xe: xe device * + * Should be called from PM notifier when preparing for s3/s4. + * + * Return: 0 on success, negative error code on error. + */ +int xe_bo_notifier_prepare_all_pinned(struct xe_device *xe) +{ + int ret; + + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present, + &xe->pinned.early.kernel_bo_present, + xe_bo_notifier_prepare_pinned); + if (!ret) + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, + &xe->pinned.late.kernel_bo_present, + xe_bo_notifier_prepare_pinned); + + return ret; +} + +/** + * xe_bo_notifier_unprepare_all_pinned() - Remove the backing pages for all + * pinned VRAM objects which have been restored. * @xe: xe device * - * Evict non-pinned user BOs first (via GPU), evict pinned external BOs next - * (via GPU), wait for evictions, and finally evict pinned kernel BOs via CPU. - * All eviction magic done via TTM calls. + * Should be called from PM notifier after exiting s3/s4 (either on success or + * failure). + */ +void xe_bo_notifier_unprepare_all_pinned(struct xe_device *xe) +{ + (void)xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present, + &xe->pinned.early.kernel_bo_present, + xe_bo_notifier_unprepare_pinned); + + (void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, + &xe->pinned.late.kernel_bo_present, + xe_bo_notifier_unprepare_pinned); +} + +/** + * xe_bo_evict_all_user - evict all non-pinned user BOs from VRAM + * @xe: xe device * - * Evict == move VRAM BOs to temporary (typically system) memory. + * Evict non-pinned user BOs (via GPU). * - * This function should be called before the device goes into a suspend state - * where the VRAM loses power. + * Evict == move VRAM BOs to temporary (typically system) memory. */ -int xe_bo_evict_all(struct xe_device *xe) +int xe_bo_evict_all_user(struct xe_device *xe) { struct ttm_device *bdev = &xe->ttm; - struct xe_tile *tile; u32 mem_type; - u8 id; int ret; /* User memory */ @@ -91,9 +132,34 @@ int xe_bo_evict_all(struct xe_device *xe) } } - ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external, - &xe->pinned.late.external, - xe_bo_evict_pinned); + return 0; +} + +/** + * xe_bo_evict_all - evict all BOs from VRAM + * @xe: xe device + * + * Evict non-pinned user BOs first (via GPU), evict pinned external BOs next + * (via GPU), wait for evictions, and finally evict pinned kernel BOs via CPU. + * All eviction magic done via TTM calls. + * + * Evict == move VRAM BOs to temporary (typically system) memory. + * + * This function should be called before the device goes into a suspend state + * where the VRAM loses power. + */ +int xe_bo_evict_all(struct xe_device *xe) +{ + struct xe_tile *tile; + u8 id; + int ret; + + ret = xe_bo_evict_all_user(xe); + if (ret) + return ret; + + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, + &xe->pinned.late.evicted, xe_bo_evict_pinned); if (!ret) ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h b/drivers/gpu/drm/xe/xe_bo_evict.h index d63eb3fc5cc9..e8385cb7f5e9 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.h +++ b/drivers/gpu/drm/xe/xe_bo_evict.h @@ -9,6 +9,9 @@ struct xe_device; int xe_bo_evict_all(struct xe_device *xe); +int xe_bo_evict_all_user(struct xe_device *xe); +int xe_bo_notifier_prepare_all_pinned(struct xe_device *xe); +void xe_bo_notifier_unprepare_all_pinned(struct xe_device *xe); int xe_bo_restore_early(struct xe_device *xe); int xe_bo_restore_late(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 81396181aaea..eb5e83c5f233 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -30,6 +30,8 @@ struct xe_bo { struct ttm_buffer_object ttm; /** @backup_obj: The backup object when pinned and suspended (vram only) */ struct xe_bo *backup_obj; + /** @parent_obj: Ref to parent bo if this a backup_obj */ + struct xe_bo *parent_obj; /** @size: Size of this buffer object */ size_t size; /** @flags: flags for this buffer object */ diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 81b9d9bb3f57..a9e618abf8ac 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -80,7 +80,8 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q) return &q->gt->uc.guc; } -static ssize_t __xe_devcoredump_read(char *buffer, size_t count, +static ssize_t __xe_devcoredump_read(char *buffer, ssize_t count, + ssize_t start, struct xe_devcoredump *coredump) { struct xe_device *xe; @@ -94,7 +95,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count, ss = &coredump->snapshot; iter.data = buffer; - iter.start = 0; + iter.start = start; iter.remain = count; p = drm_coredump_printer(&iter); @@ -168,6 +169,8 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) ss->vm = NULL; } +#define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G) + static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { @@ -183,6 +186,9 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, /* Ensure delayed work is captured before continuing */ flush_work(&ss->work); + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + xe_pm_runtime_get(gt_to_xe(ss->gt)); + mutex_lock(&coredump->lock); if (!ss->read.buffer) { @@ -195,12 +201,26 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, return 0; } + if (offset >= ss->read.chunk_position + XE_DEVCOREDUMP_CHUNK_MAX || + offset < ss->read.chunk_position) { + ss->read.chunk_position = + ALIGN_DOWN(offset, XE_DEVCOREDUMP_CHUNK_MAX); + + __xe_devcoredump_read(ss->read.buffer, + XE_DEVCOREDUMP_CHUNK_MAX, + ss->read.chunk_position, coredump); + } + byte_copied = count < ss->read.size - offset ? count : ss->read.size - offset; - memcpy(buffer, ss->read.buffer + offset, byte_copied); + memcpy(buffer, ss->read.buffer + + (offset % XE_DEVCOREDUMP_CHUNK_MAX), byte_copied); mutex_unlock(&coredump->lock); + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + xe_pm_runtime_put(gt_to_xe(ss->gt)); + return byte_copied; } @@ -254,17 +274,32 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); - xe_pm_runtime_put(xe); + ss->read.chunk_position = 0; /* Calculate devcoredump size */ - ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); - - ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); - if (!ss->read.buffer) - return; + ss->read.size = __xe_devcoredump_read(NULL, LONG_MAX, 0, coredump); + + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) { + ss->read.buffer = kvmalloc(XE_DEVCOREDUMP_CHUNK_MAX, + GFP_USER); + if (!ss->read.buffer) + goto put_pm; + + __xe_devcoredump_read(ss->read.buffer, + XE_DEVCOREDUMP_CHUNK_MAX, + 0, coredump); + } else { + ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); + if (!ss->read.buffer) + goto put_pm; + + __xe_devcoredump_read(ss->read.buffer, ss->read.size, 0, + coredump); + xe_devcoredump_snapshot_free(ss); + } - __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); - xe_devcoredump_snapshot_free(ss); +put_pm: + xe_pm_runtime_put(xe); } static void devcoredump_snapshot(struct xe_devcoredump *coredump, @@ -425,7 +460,7 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffi if (offset & 3) drm_printf(p, "Offset not word aligned: %zu", offset); - line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_KERNEL); + line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_ATOMIC); if (!line_buff) { drm_printf(p, "Failed to allocate line buffer\n"); return; diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 1a1d16a96b2d..a174385a6d83 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -66,6 +66,8 @@ struct xe_devcoredump_snapshot { struct { /** @read.size: size of devcoredump in human readable format */ ssize_t size; + /** @read.chunk_position: position of devcoredump chunk */ + ssize_t chunk_position; /** @read.buffer: buffer of devcoredump in human readable format */ char *buffer; } read; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 0369fc09c9da..495bc00ebed4 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -522,6 +522,9 @@ struct xe_device { struct mutex lock; } d3cold; + /** @pm_notifier: Our PM notifier to perform actions in response to various PM events. */ + struct notifier_block pm_notifier; + /** @pmt: Support the PMT driver callback interface */ struct { /** @pmt.lock: protect access for telemetry data */ diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index f2bb9168967c..e2bb156c71fb 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -52,6 +52,8 @@ struct xe_eu_stall_data_stream { struct xe_gt *gt; struct xe_bo *bo; + /* Lock to protect data buffer pointers */ + struct mutex xecore_buf_lock; struct per_xecore_buf *xecore_buf; struct { bool reported_to_user; @@ -208,6 +210,9 @@ int xe_eu_stall_init(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); int ret; + if (!xe_eu_stall_supported_on_platform(xe)) + return 0; + gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL); if (!gt->eu_stall) { ret = -ENOMEM; @@ -378,7 +383,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) u16 group, instance; unsigned int xecore; - mutex_lock(>->eu_stall->stream_lock); + mutex_lock(&stream->xecore_buf_lock); for_each_dss_steering(xecore, gt, group, instance) { xecore_buf = &stream->xecore_buf[xecore]; read_ptr = xecore_buf->read; @@ -396,7 +401,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) set_bit(xecore, stream->data_drop.mask); xecore_buf->write = write_ptr; } - mutex_unlock(>->eu_stall->stream_lock); + mutex_unlock(&stream->xecore_buf_lock); return min_data_present; } @@ -511,11 +516,13 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st unsigned int xecore; int ret = 0; + mutex_lock(&stream->xecore_buf_lock); if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) { if (!stream->data_drop.reported_to_user) { stream->data_drop.reported_to_user = true; xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n", XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask); + mutex_unlock(&stream->xecore_buf_lock); return -EIO; } stream->data_drop.reported_to_user = false; @@ -527,6 +534,7 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st if (ret || count == total_size) break; } + mutex_unlock(&stream->xecore_buf_lock); return total_size ?: (ret ?: -EAGAIN); } @@ -583,6 +591,7 @@ static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream) { struct xe_gt *gt = stream->gt; + mutex_destroy(&stream->xecore_buf_lock); gt->eu_stall->stream = NULL; kfree(stream); } @@ -718,6 +727,7 @@ static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream, } init_waitqueue_head(&stream->poll_wq); + mutex_init(&stream->xecore_buf_lock); INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn); stream->per_xecore_buf_size = per_xecore_buf_size; stream->sampling_rate_mult = props->sampling_rate_mult; diff --git a/drivers/gpu/drm/xe/xe_eu_stall.h b/drivers/gpu/drm/xe/xe_eu_stall.h index ed9d0f233566..d1c76e503799 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.h +++ b/drivers/gpu/drm/xe/xe_eu_stall.h @@ -7,6 +7,7 @@ #define __XE_EU_STALL_H__ #include "xe_gt_types.h" +#include "xe_sriov.h" size_t xe_eu_stall_get_per_xecore_buf_size(void); size_t xe_eu_stall_data_record_size(struct xe_device *xe); @@ -19,6 +20,6 @@ int xe_eu_stall_stream_open(struct drm_device *dev, static inline bool xe_eu_stall_supported_on_platform(struct xe_device *xe) { - return xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20; + return !IS_SRIOV_VF(xe) && (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20); } #endif diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 604bdc7c8173..868a5d2c1a52 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -56,9 +56,10 @@ dev_to_xe(struct device *dev) return gt_to_xe(kobj_to_gt(dev->kobj.parent)); } -static ssize_t act_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t act_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; @@ -68,11 +69,12 @@ static ssize_t act_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static DEVICE_ATTR_RO(act_freq); +static struct kobj_attribute attr_act_freq = __ATTR_RO(act_freq); -static ssize_t cur_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t cur_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; ssize_t ret; @@ -85,11 +87,12 @@ static ssize_t cur_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static DEVICE_ATTR_RO(cur_freq); +static struct kobj_attribute attr_cur_freq = __ATTR_RO(cur_freq); -static ssize_t rp0_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t rp0_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; @@ -99,11 +102,12 @@ static ssize_t rp0_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static DEVICE_ATTR_RO(rp0_freq); +static struct kobj_attribute attr_rp0_freq = __ATTR_RO(rp0_freq); -static ssize_t rpe_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t rpe_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; @@ -113,11 +117,12 @@ static ssize_t rpe_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static DEVICE_ATTR_RO(rpe_freq); +static struct kobj_attribute attr_rpe_freq = __ATTR_RO(rpe_freq); -static ssize_t rpa_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t rpa_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; @@ -127,20 +132,22 @@ static ssize_t rpa_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static DEVICE_ATTR_RO(rpa_freq); +static struct kobj_attribute attr_rpa_freq = __ATTR_RO(rpa_freq); -static ssize_t rpn_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t rpn_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpn_freq(pc)); } -static DEVICE_ATTR_RO(rpn_freq); +static struct kobj_attribute attr_rpn_freq = __ATTR_RO(rpn_freq); -static ssize_t min_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t min_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; ssize_t ret; @@ -154,9 +161,10 @@ static ssize_t min_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, - const char *buff, size_t count) +static ssize_t min_freq_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buff, size_t count) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; ssize_t ret; @@ -173,11 +181,12 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, return count; } -static DEVICE_ATTR_RW(min_freq); +static struct kobj_attribute attr_min_freq = __ATTR_RW(min_freq); -static ssize_t max_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t max_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; ssize_t ret; @@ -191,9 +200,10 @@ static ssize_t max_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, - const char *buff, size_t count) +static ssize_t max_freq_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buff, size_t count) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; ssize_t ret; @@ -210,17 +220,17 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, return count; } -static DEVICE_ATTR_RW(max_freq); +static struct kobj_attribute attr_max_freq = __ATTR_RW(max_freq); static const struct attribute *freq_attrs[] = { - &dev_attr_act_freq.attr, - &dev_attr_cur_freq.attr, - &dev_attr_rp0_freq.attr, - &dev_attr_rpa_freq.attr, - &dev_attr_rpe_freq.attr, - &dev_attr_rpn_freq.attr, - &dev_attr_min_freq.attr, - &dev_attr_max_freq.attr, + &attr_act_freq.attr, + &attr_cur_freq.attr, + &attr_rp0_freq.attr, + &attr_rpa_freq.attr, + &attr_rpe_freq.attr, + &attr_rpn_freq.attr, + &attr_min_freq.attr, + &attr_max_freq.attr, NULL }; diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index fbbace7b0b12..c11206410a4d 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -249,9 +249,10 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) return 0; } -static ssize_t name_show(struct device *dev, - struct device_attribute *attr, char *buff) +static ssize_t name_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt_idle *gtidle = dev_to_gtidle(dev); struct xe_guc_pc *pc = gtidle_to_pc(gtidle); ssize_t ret; @@ -262,11 +263,12 @@ static ssize_t name_show(struct device *dev, return ret; } -static DEVICE_ATTR_RO(name); +static struct kobj_attribute name_attr = __ATTR_RO(name); -static ssize_t idle_status_show(struct device *dev, - struct device_attribute *attr, char *buff) +static ssize_t idle_status_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt_idle *gtidle = dev_to_gtidle(dev); struct xe_guc_pc *pc = gtidle_to_pc(gtidle); enum xe_gt_idle_state state; @@ -277,6 +279,7 @@ static ssize_t idle_status_show(struct device *dev, return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state)); } +static struct kobj_attribute idle_status_attr = __ATTR_RO(idle_status); u64 xe_gt_idle_residency_msec(struct xe_gt_idle *gtidle) { @@ -291,10 +294,11 @@ u64 xe_gt_idle_residency_msec(struct xe_gt_idle *gtidle) return residency; } -static DEVICE_ATTR_RO(idle_status); -static ssize_t idle_residency_ms_show(struct device *dev, - struct device_attribute *attr, char *buff) + +static ssize_t idle_residency_ms_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt_idle *gtidle = dev_to_gtidle(dev); struct xe_guc_pc *pc = gtidle_to_pc(gtidle); u64 residency; @@ -305,12 +309,12 @@ static ssize_t idle_residency_ms_show(struct device *dev, return sysfs_emit(buff, "%llu\n", residency); } -static DEVICE_ATTR_RO(idle_residency_ms); +static struct kobj_attribute idle_residency_attr = __ATTR_RO(idle_residency_ms); static const struct attribute *gt_idle_attrs[] = { - &dev_attr_name.attr, - &dev_attr_idle_status.attr, - &dev_attr_idle_residency_ms.attr, + &name_attr.attr, + &idle_status_attr.attr, + &idle_residency_attr.attr, NULL, }; diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c index 8db78d616b6f..aa962c783cdf 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle.c +++ b/drivers/gpu/drm/xe/xe_gt_throttle.c @@ -114,115 +114,115 @@ static u32 read_reason_vr_tdc(struct xe_gt *gt) return tdc; } -static ssize_t status_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t status_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool status = !!read_status(gt); return sysfs_emit(buff, "%u\n", status); } -static DEVICE_ATTR_RO(status); +static struct kobj_attribute attr_status = __ATTR_RO(status); -static ssize_t reason_pl1_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_pl1_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool pl1 = !!read_reason_pl1(gt); return sysfs_emit(buff, "%u\n", pl1); } -static DEVICE_ATTR_RO(reason_pl1); +static struct kobj_attribute attr_reason_pl1 = __ATTR_RO(reason_pl1); -static ssize_t reason_pl2_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_pl2_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool pl2 = !!read_reason_pl2(gt); return sysfs_emit(buff, "%u\n", pl2); } -static DEVICE_ATTR_RO(reason_pl2); +static struct kobj_attribute attr_reason_pl2 = __ATTR_RO(reason_pl2); -static ssize_t reason_pl4_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_pl4_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool pl4 = !!read_reason_pl4(gt); return sysfs_emit(buff, "%u\n", pl4); } -static DEVICE_ATTR_RO(reason_pl4); +static struct kobj_attribute attr_reason_pl4 = __ATTR_RO(reason_pl4); -static ssize_t reason_thermal_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_thermal_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool thermal = !!read_reason_thermal(gt); return sysfs_emit(buff, "%u\n", thermal); } -static DEVICE_ATTR_RO(reason_thermal); +static struct kobj_attribute attr_reason_thermal = __ATTR_RO(reason_thermal); -static ssize_t reason_prochot_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_prochot_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool prochot = !!read_reason_prochot(gt); return sysfs_emit(buff, "%u\n", prochot); } -static DEVICE_ATTR_RO(reason_prochot); +static struct kobj_attribute attr_reason_prochot = __ATTR_RO(reason_prochot); -static ssize_t reason_ratl_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_ratl_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool ratl = !!read_reason_ratl(gt); return sysfs_emit(buff, "%u\n", ratl); } -static DEVICE_ATTR_RO(reason_ratl); +static struct kobj_attribute attr_reason_ratl = __ATTR_RO(reason_ratl); -static ssize_t reason_vr_thermalert_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_vr_thermalert_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool thermalert = !!read_reason_vr_thermalert(gt); return sysfs_emit(buff, "%u\n", thermalert); } -static DEVICE_ATTR_RO(reason_vr_thermalert); +static struct kobj_attribute attr_reason_vr_thermalert = __ATTR_RO(reason_vr_thermalert); -static ssize_t reason_vr_tdc_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_vr_tdc_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool tdc = !!read_reason_vr_tdc(gt); return sysfs_emit(buff, "%u\n", tdc); } -static DEVICE_ATTR_RO(reason_vr_tdc); +static struct kobj_attribute attr_reason_vr_tdc = __ATTR_RO(reason_vr_tdc); static struct attribute *throttle_attrs[] = { - &dev_attr_status.attr, - &dev_attr_reason_pl1.attr, - &dev_attr_reason_pl2.attr, - &dev_attr_reason_pl4.attr, - &dev_attr_reason_thermal.attr, - &dev_attr_reason_prochot.attr, - &dev_attr_reason_ratl.attr, - &dev_attr_reason_vr_thermalert.attr, - &dev_attr_reason_vr_tdc.attr, + &attr_status.attr, + &attr_reason_pl1.attr, + &attr_reason_pl2.attr, + &attr_reason_pl4.attr, + &attr_reason_thermal.attr, + &attr_reason_prochot.attr, + &attr_reason_ratl.attr, + &attr_reason_vr_thermalert.attr, + &attr_reason_vr_tdc.attr, NULL }; diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 38866135c019..c5aace59b62c 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1394,6 +1394,7 @@ proto: /* Use data from the GuC response as our return value */ return FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header); } +ALLOW_ERROR_INJECTION(xe_guc_mmio_send_recv, ERRNO); int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len) { diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index f6d523e4c5fe..859a3ba91be5 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -105,49 +105,49 @@ struct __guc_capture_parsed_output { * 3. Incorrect order will trigger XE_WARN. */ #define COMMON_XELP_BASE_GLOBAL \ - { FORCEWAKE_GT, REG_32BIT, 0, 0, "FORCEWAKE_GT"} + { FORCEWAKE_GT, REG_32BIT, 0, 0, 0, "FORCEWAKE_GT"} #define COMMON_BASE_ENGINE_INSTANCE \ - { RING_HWSTAM(0), REG_32BIT, 0, 0, "HWSTAM"}, \ - { RING_HWS_PGA(0), REG_32BIT, 0, 0, "RING_HWS_PGA"}, \ - { RING_HEAD(0), REG_32BIT, 0, 0, "RING_HEAD"}, \ - { RING_TAIL(0), REG_32BIT, 0, 0, "RING_TAIL"}, \ - { RING_CTL(0), REG_32BIT, 0, 0, "RING_CTL"}, \ - { RING_MI_MODE(0), REG_32BIT, 0, 0, "RING_MI_MODE"}, \ - { RING_MODE(0), REG_32BIT, 0, 0, "RING_MODE"}, \ - { RING_ESR(0), REG_32BIT, 0, 0, "RING_ESR"}, \ - { RING_EMR(0), REG_32BIT, 0, 0, "RING_EMR"}, \ - { RING_EIR(0), REG_32BIT, 0, 0, "RING_EIR"}, \ - { RING_IMR(0), REG_32BIT, 0, 0, "RING_IMR"}, \ - { RING_IPEHR(0), REG_32BIT, 0, 0, "IPEHR"}, \ - { RING_INSTDONE(0), REG_32BIT, 0, 0, "RING_INSTDONE"}, \ - { INDIRECT_RING_STATE(0), REG_32BIT, 0, 0, "INDIRECT_RING_STATE"}, \ - { RING_ACTHD(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_ACTHD_UDW(0), REG_64BIT_HI_DW, 0, 0, "ACTHD"}, \ - { RING_BBADDR(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_BBADDR_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_BBADDR"}, \ - { RING_START(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_START_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_START"}, \ - { RING_DMA_FADD(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_DMA_FADD_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_DMA_FADD"}, \ - { RING_EXECLIST_STATUS_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_EXECLIST_STATUS_HI(0), REG_64BIT_HI_DW, 0, 0, "RING_EXECLIST_STATUS"}, \ - { RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0, 0, "RING_EXECLIST_SQ_CONTENTS"} + { RING_HWSTAM(0), REG_32BIT, 0, 0, 0, "HWSTAM"}, \ + { RING_HWS_PGA(0), REG_32BIT, 0, 0, 0, "RING_HWS_PGA"}, \ + { RING_HEAD(0), REG_32BIT, 0, 0, 0, "RING_HEAD"}, \ + { RING_TAIL(0), REG_32BIT, 0, 0, 0, "RING_TAIL"}, \ + { RING_CTL(0), REG_32BIT, 0, 0, 0, "RING_CTL"}, \ + { RING_MI_MODE(0), REG_32BIT, 0, 0, 0, "RING_MI_MODE"}, \ + { RING_MODE(0), REG_32BIT, 0, 0, 0, "RING_MODE"}, \ + { RING_ESR(0), REG_32BIT, 0, 0, 0, "RING_ESR"}, \ + { RING_EMR(0), REG_32BIT, 0, 0, 0, "RING_EMR"}, \ + { RING_EIR(0), REG_32BIT, 0, 0, 0, "RING_EIR"}, \ + { RING_IMR(0), REG_32BIT, 0, 0, 0, "RING_IMR"}, \ + { RING_IPEHR(0), REG_32BIT, 0, 0, 0, "IPEHR"}, \ + { RING_INSTDONE(0), REG_32BIT, 0, 0, 0, "RING_INSTDONE"}, \ + { INDIRECT_RING_STATE(0), REG_32BIT, 0, 0, 0, "INDIRECT_RING_STATE"}, \ + { RING_ACTHD(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_ACTHD_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "ACTHD"}, \ + { RING_BBADDR(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_BBADDR_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_BBADDR"}, \ + { RING_START(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_START_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_START"}, \ + { RING_DMA_FADD(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_DMA_FADD_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_DMA_FADD"}, \ + { RING_EXECLIST_STATUS_LO(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_EXECLIST_STATUS_HI(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_EXECLIST_STATUS"}, \ + { RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_EXECLIST_SQ_CONTENTS"} #define COMMON_XELP_RC_CLASS \ - { RCU_MODE, REG_32BIT, 0, 0, "RCU_MODE"} + { RCU_MODE, REG_32BIT, 0, 0, 0, "RCU_MODE"} #define COMMON_XELP_RC_CLASS_INSTDONE \ - { SC_INSTDONE, REG_32BIT, 0, 0, "SC_INSTDONE"}, \ - { SC_INSTDONE_EXTRA, REG_32BIT, 0, 0, "SC_INSTDONE_EXTRA"}, \ - { SC_INSTDONE_EXTRA2, REG_32BIT, 0, 0, "SC_INSTDONE_EXTRA2"} + { SC_INSTDONE, REG_32BIT, 0, 0, 0, "SC_INSTDONE"}, \ + { SC_INSTDONE_EXTRA, REG_32BIT, 0, 0, 0, "SC_INSTDONE_EXTRA"}, \ + { SC_INSTDONE_EXTRA2, REG_32BIT, 0, 0, 0, "SC_INSTDONE_EXTRA2"} #define XELP_VEC_CLASS_REGS \ - { SFC_DONE(0), 0, 0, 0, "SFC_DONE[0]"}, \ - { SFC_DONE(1), 0, 0, 0, "SFC_DONE[1]"}, \ - { SFC_DONE(2), 0, 0, 0, "SFC_DONE[2]"}, \ - { SFC_DONE(3), 0, 0, 0, "SFC_DONE[3]"} + { SFC_DONE(0), 0, 0, 0, 0, "SFC_DONE[0]"}, \ + { SFC_DONE(1), 0, 0, 0, 0, "SFC_DONE[1]"}, \ + { SFC_DONE(2), 0, 0, 0, 0, "SFC_DONE[2]"}, \ + { SFC_DONE(3), 0, 0, 0, 0, "SFC_DONE[3]"} /* XE_LP Global */ static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = { @@ -352,15 +352,16 @@ static const struct __ext_steer_reg xehpg_extregs[] = { static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, const struct __ext_steer_reg *extlist, - int slice_id, int subslice_id) + u32 dss_id, u16 slice_id, u16 subslice_id) { if (!ext || !extlist) return; ext->reg = XE_REG(extlist->reg.__reg.addr); ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1); - ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); + ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id); + ext->dss_id = dss_id; ext->regname = extlist->name; } @@ -397,7 +398,7 @@ static void guc_capture_alloc_steered_lists(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); u16 slice, subslice; - int iter, i, total = 0; + int dss, i, total = 0; const struct __guc_mmio_reg_descr_group *lists = guc->capture->reglists; const struct __guc_mmio_reg_descr_group *list; struct __guc_mmio_reg_descr_group *extlists; @@ -454,15 +455,15 @@ static void guc_capture_alloc_steered_lists(struct xe_guc *guc) /* For steering registers, the list is generated at run-time */ extarray = (struct __guc_mmio_reg_descr *)extlists[0].list; - for_each_dss_steering(iter, gt, slice, subslice) { + for_each_dss_steering(dss, gt, slice, subslice) { for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) { - __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice); + __fill_ext_reg(extarray, &xe_extregs[i], dss, slice, subslice); ++extarray; } if (has_xehpg_extregs) for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) { - __fill_ext_reg(extarray, &xehpg_extregs[i], slice, subslice); + __fill_ext_reg(extarray, &xehpg_extregs[i], dss, slice, subslice); ++extarray; } } @@ -1672,18 +1673,16 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_ { struct xe_gt *gt = snapshot->hwe->gt; struct xe_device *xe = gt_to_xe(gt); - struct xe_guc *guc = >->uc.guc; struct xe_devcoredump *devcoredump = &xe->devcoredump; struct xe_devcoredump_snapshot *devcore_snapshot = &devcoredump->snapshot; struct gcap_reg_list_info *reginfo = NULL; u32 i, last_value = 0; - bool is_ext, low32_ready = false; + bool low32_ready = false; if (!list || !list->list || list->num_regs == 0) return; XE_WARN_ON(!devcore_snapshot->matched_node); - is_ext = list == guc->capture->extlists; reginfo = &devcore_snapshot->matched_node->reginfo[type]; /* @@ -1749,17 +1748,12 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_ */ XE_WARN_ON(low32_ready); - if (is_ext) { - int dss, group, instance; - - group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags); - instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags); - dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance); - - drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value); - } else { + if (FIELD_GET(GUC_REGSET_STEERING_NEEDED, reg_desc->flags)) + drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, + reg_desc->dss_id, value); + else drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value); - } + break; } } diff --git a/drivers/gpu/drm/xe/xe_guc_capture_types.h b/drivers/gpu/drm/xe/xe_guc_capture_types.h index ca2d390ccbee..6cb439115597 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture_types.h +++ b/drivers/gpu/drm/xe/xe_guc_capture_types.h @@ -39,6 +39,8 @@ struct __guc_mmio_reg_descr { u32 flags; /** @mask: The mask to apply */ u32 mask; + /** @dss_id: Cached index for steered registers */ + u32 dss_id; /** @regname: Name of the register */ const char *regname; }; diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 0a4fef7d7225..2447de0ebedf 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1089,6 +1089,7 @@ int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, KUNIT_STATIC_STUB_REDIRECT(xe_guc_ct_send_recv, ct, action, len, response_buffer); return guc_ct_send_recv(ct, action, len, response_buffer, false); } +ALLOW_ERROR_INJECTION(xe_guc_ct_send_recv, ERRNO); int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 *response_buffer) diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.c b/drivers/gpu/drm/xe/xe_guc_engine_activity.c index b96fea78df8b..0fb48f8f05d8 100644 --- a/drivers/gpu/drm/xe/xe_guc_engine_activity.c +++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.c @@ -304,6 +304,8 @@ static void engine_activity_set_cpu_ts(struct xe_guc *guc, unsigned int index) struct engine_activity_group *eag = &engine_activity->eag[index]; int i, j; + xe_gt_assert(guc_to_gt(guc), index < engine_activity->num_activity_group); + for (i = 0; i < GUC_MAX_ENGINE_CLASSES; i++) for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; j++) eag->engine[i][j].last_cpu_ts = ktime_get(); @@ -374,8 +376,9 @@ static int engine_activity_enable_function_stats(struct xe_guc *guc, int num_vfs return ret; } - for (i = 0; i < engine_activity->num_functions; i++) - engine_activity_set_cpu_ts(guc, i + 1); + /* skip PF as it was already setup */ + for (i = 1; i < engine_activity->num_functions; i++) + engine_activity_set_cpu_ts(guc, i); return 0; } diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 3777cc30d688..8f8e9fdfb2a8 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -669,6 +669,7 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, u32 mocs = 0; u32 tile_y = 0; + xe_gt_assert(gt, !(pitch & 3)); xe_gt_assert(gt, size / pitch <= S16_MAX); xe_gt_assert(gt, pitch / 4 <= S16_MAX); xe_gt_assert(gt, pitch <= U16_MAX); @@ -1546,7 +1547,6 @@ void xe_migrate_wait(struct xe_migrate *m) dma_fence_wait(m->fence, false); } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) static u32 pte_update_cmd_size(u64 size) { u32 num_dword; @@ -1604,8 +1604,12 @@ enum xe_migrate_copy_dir { XE_MIGRATE_COPY_TO_SRAM, }; +#define XE_CACHELINE_BYTES 64ull +#define XE_CACHELINE_MASK (XE_CACHELINE_BYTES - 1) + static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, - unsigned long npages, + unsigned long len, + unsigned long sram_offset, dma_addr_t *sram_addr, u64 vram_addr, const enum xe_migrate_copy_dir dir) { @@ -1615,17 +1619,21 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, struct dma_fence *fence = NULL; u32 batch_size = 2; u64 src_L0_ofs, dst_L0_ofs; - u64 round_update_size; struct xe_sched_job *job; struct xe_bb *bb; u32 update_idx, pt_slot = 0; + unsigned long npages = DIV_ROUND_UP(len + sram_offset, PAGE_SIZE); + unsigned int pitch = len >= PAGE_SIZE && !(len & ~PAGE_MASK) ? + PAGE_SIZE : 4; int err; - if (npages * PAGE_SIZE > MAX_PREEMPTDISABLE_TRANSFER) - return ERR_PTR(-EINVAL); + if (drm_WARN_ON(&xe->drm, (len & XE_CACHELINE_MASK) || + (sram_offset | vram_addr) & XE_CACHELINE_MASK)) + return ERR_PTR(-EOPNOTSUPP); - round_update_size = npages * PAGE_SIZE; - batch_size += pte_update_cmd_size(round_update_size); + xe_assert(xe, npages * PAGE_SIZE <= MAX_PREEMPTDISABLE_TRANSFER); + + batch_size += pte_update_cmd_size(len); batch_size += EMIT_COPY_DW; bb = xe_bb_new(gt, batch_size, use_usm_batch); @@ -1635,22 +1643,21 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, } build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE, - sram_addr, round_update_size); + sram_addr, len + sram_offset); if (dir == XE_MIGRATE_COPY_TO_VRAM) { - src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0); + src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; dst_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false); } else { src_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false); - dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0); + dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; } bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; - emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, round_update_size, - XE_PAGE_SIZE); + emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, len, pitch); job = xe_bb_create_migration_job(m->q, bb, xe_migrate_batch_base(m, use_usm_batch), @@ -1698,7 +1705,7 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, dma_addr_t *src_addr, u64 dst_addr) { - return xe_migrate_vram(m, npages, src_addr, dst_addr, + return xe_migrate_vram(m, npages * PAGE_SIZE, 0, src_addr, dst_addr, XE_MIGRATE_COPY_TO_VRAM); } @@ -1719,11 +1726,192 @@ struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m, u64 src_addr, dma_addr_t *dst_addr) { - return xe_migrate_vram(m, npages, dst_addr, src_addr, + return xe_migrate_vram(m, npages * PAGE_SIZE, 0, dst_addr, src_addr, XE_MIGRATE_COPY_TO_SRAM); } -#endif +static void xe_migrate_dma_unmap(struct xe_device *xe, dma_addr_t *dma_addr, + int len, int write) +{ + unsigned long i, npages = DIV_ROUND_UP(len, PAGE_SIZE); + + for (i = 0; i < npages; ++i) { + if (!dma_addr[i]) + break; + + dma_unmap_page(xe->drm.dev, dma_addr[i], PAGE_SIZE, + write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); + } + kfree(dma_addr); +} + +static dma_addr_t *xe_migrate_dma_map(struct xe_device *xe, + void *buf, int len, int write) +{ + dma_addr_t *dma_addr; + unsigned long i, npages = DIV_ROUND_UP(len, PAGE_SIZE); + + dma_addr = kcalloc(npages, sizeof(*dma_addr), GFP_KERNEL); + if (!dma_addr) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < npages; ++i) { + dma_addr_t addr; + struct page *page; + + if (is_vmalloc_addr(buf)) + page = vmalloc_to_page(buf); + else + page = virt_to_page(buf); + + addr = dma_map_page(xe->drm.dev, + page, 0, PAGE_SIZE, + write ? DMA_TO_DEVICE : + DMA_FROM_DEVICE); + if (dma_mapping_error(xe->drm.dev, addr)) + goto err_fault; + + dma_addr[i] = addr; + buf += PAGE_SIZE; + } + + return dma_addr; + +err_fault: + xe_migrate_dma_unmap(xe, dma_addr, len, write); + return ERR_PTR(-EFAULT); +} + +/** + * xe_migrate_access_memory - Access memory of a BO via GPU + * + * @m: The migration context. + * @bo: buffer object + * @offset: access offset into buffer object + * @buf: pointer to caller memory to read into or write from + * @len: length of access + * @write: write access + * + * Access memory of a BO via GPU either reading in or writing from a passed in + * pointer. Pointer is dma mapped for GPU access and GPU commands are issued to + * read to or write from pointer. + * + * Returns: + * 0 if successful, negative error code on failure. + */ +int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, + unsigned long offset, void *buf, int len, + int write) +{ + struct xe_tile *tile = m->tile; + struct xe_device *xe = tile_to_xe(tile); + struct xe_res_cursor cursor; + struct dma_fence *fence = NULL; + dma_addr_t *dma_addr; + unsigned long page_offset = (unsigned long)buf & ~PAGE_MASK; + int bytes_left = len, current_page = 0; + void *orig_buf = buf; + + xe_bo_assert_held(bo); + + /* Use bounce buffer for small access and unaligned access */ + if (len & XE_CACHELINE_MASK || + ((uintptr_t)buf | offset) & XE_CACHELINE_MASK) { + int buf_offset = 0; + + /* + * Less than ideal for large unaligned access but this should be + * fairly rare, can fixup if this becomes common. + */ + do { + u8 bounce[XE_CACHELINE_BYTES]; + void *ptr = (void *)bounce; + int err; + int copy_bytes = min_t(int, bytes_left, + XE_CACHELINE_BYTES - + (offset & XE_CACHELINE_MASK)); + int ptr_offset = offset & XE_CACHELINE_MASK; + + err = xe_migrate_access_memory(m, bo, + offset & + ~XE_CACHELINE_MASK, + (void *)ptr, + sizeof(bounce), 0); + if (err) + return err; + + if (write) { + memcpy(ptr + ptr_offset, buf + buf_offset, copy_bytes); + + err = xe_migrate_access_memory(m, bo, + offset & ~XE_CACHELINE_MASK, + (void *)ptr, + sizeof(bounce), 0); + if (err) + return err; + } else { + memcpy(buf + buf_offset, ptr + ptr_offset, + copy_bytes); + } + + bytes_left -= copy_bytes; + buf_offset += copy_bytes; + offset += copy_bytes; + } while (bytes_left); + + return 0; + } + + dma_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write); + if (IS_ERR(dma_addr)) + return PTR_ERR(dma_addr); + + xe_res_first(bo->ttm.resource, offset, bo->size - offset, &cursor); + + do { + struct dma_fence *__fence; + u64 vram_addr = vram_region_gpu_offset(bo->ttm.resource) + + cursor.start; + int current_bytes; + + if (cursor.size > MAX_PREEMPTDISABLE_TRANSFER) + current_bytes = min_t(int, bytes_left, + MAX_PREEMPTDISABLE_TRANSFER); + else + current_bytes = min_t(int, bytes_left, cursor.size); + + if (fence) + dma_fence_put(fence); + + __fence = xe_migrate_vram(m, current_bytes, + (unsigned long)buf & ~PAGE_MASK, + dma_addr + current_page, + vram_addr, write ? + XE_MIGRATE_COPY_TO_VRAM : + XE_MIGRATE_COPY_TO_SRAM); + if (IS_ERR(__fence)) { + if (fence) + dma_fence_wait(fence, false); + fence = __fence; + goto out_err; + } + fence = __fence; + + buf += current_bytes; + offset += current_bytes; + current_page = (int)(buf - orig_buf) / PAGE_SIZE; + bytes_left -= current_bytes; + if (bytes_left) + xe_res_next(&cursor, current_bytes); + } while (bytes_left); + + dma_fence_wait(fence, false); + dma_fence_put(fence); + +out_err: + xe_migrate_dma_unmap(xe, dma_addr, len + page_offset, write); + return IS_ERR(fence) ? PTR_ERR(fence) : 0; +} #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) #include "tests/xe_migrate.c" diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 6ff9a963425c..fb9839c1bae0 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -112,6 +112,10 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct ttm_resource *dst, bool copy_only_ccs); +int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, + unsigned long offset, void *buf, int len, + int write); + #define XE_MIGRATE_CLEAR_FLAG_BO_DATA BIT(0) #define XE_MIGRATE_CLEAR_FLAG_CCS_DATA BIT(1) #define XE_MIGRATE_CLEAR_FLAG_FULL (XE_MIGRATE_CLEAR_FLAG_BO_DATA | \ diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 07fe994f2a80..882398e09b7e 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -742,7 +742,7 @@ static void xe_pci_remove(struct pci_dev *pdev) return; xe_device_remove(xe); - xe_pm_runtime_fini(xe); + xe_pm_fini(xe); } /* diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index d69b6b2a3061..8813efdcafbb 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -7,6 +7,7 @@ #include "xe_device.h" #include "xe_gt_sriov_pf_config.h" #include "xe_gt_sriov_pf_control.h" +#include "xe_gt_sriov_printk.h" #include "xe_guc_engine_activity.h" #include "xe_pci_sriov.h" #include "xe_pm.h" @@ -121,8 +122,8 @@ static void pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs, for_each_gt(gt, xe, id) { ret = xe_guc_engine_activity_function_stats(>->uc.guc, num_vfs, enable); if (ret) - xe_sriov_info(xe, "Failed to %s engine activity function stats (%pe)\n", - str_enable_disable(enable), ERR_PTR(ret)); + xe_gt_sriov_info(gt, "Failed to %s engine activity function stats (%pe)\n", + str_enable_disable(enable), ERR_PTR(ret)); } } diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 4e112fbacada..38514cef817e 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -286,6 +286,42 @@ static u32 vram_threshold_value(struct xe_device *xe) return DEFAULT_VRAM_THRESHOLD; } +static int xe_pm_notifier_callback(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct xe_device *xe = container_of(nb, struct xe_device, pm_notifier); + int err = 0; + + switch (action) { + case PM_HIBERNATION_PREPARE: + case PM_SUSPEND_PREPARE: + xe_pm_runtime_get(xe); + err = xe_bo_evict_all_user(xe); + if (err) { + drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err); + xe_pm_runtime_put(xe); + break; + } + + err = xe_bo_notifier_prepare_all_pinned(xe); + if (err) { + drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err); + xe_pm_runtime_put(xe); + } + break; + case PM_POST_HIBERNATION: + case PM_POST_SUSPEND: + xe_bo_notifier_unprepare_all_pinned(xe); + xe_pm_runtime_put(xe); + break; + } + + if (err) + return NOTIFY_BAD; + + return NOTIFY_DONE; +} + /** * xe_pm_init - Initialize Xe Power Management * @xe: xe device instance @@ -299,6 +335,11 @@ int xe_pm_init(struct xe_device *xe) u32 vram_threshold; int err; + xe->pm_notifier.notifier_call = xe_pm_notifier_callback; + err = register_pm_notifier(&xe->pm_notifier); + if (err) + return err; + /* For now suspend/resume is only allowed with GuC */ if (!xe_device_uc_enabled(xe)) return 0; @@ -308,24 +349,23 @@ int xe_pm_init(struct xe_device *xe) if (xe->d3cold.capable) { err = xe_device_sysfs_init(xe); if (err) - return err; + goto err_unregister; vram_threshold = vram_threshold_value(xe); err = xe_pm_set_vram_threshold(xe, vram_threshold); if (err) - return err; + goto err_unregister; } xe_pm_runtime_init(xe); - return 0; + +err_unregister: + unregister_pm_notifier(&xe->pm_notifier); + return err; } -/** - * xe_pm_runtime_fini - Finalize Runtime PM - * @xe: xe device instance - */ -void xe_pm_runtime_fini(struct xe_device *xe) +static void xe_pm_runtime_fini(struct xe_device *xe) { struct device *dev = xe->drm.dev; @@ -333,6 +373,18 @@ void xe_pm_runtime_fini(struct xe_device *xe) pm_runtime_forbid(dev); } +/** + * xe_pm_fini - Finalize PM + * @xe: xe device instance + */ +void xe_pm_fini(struct xe_device *xe) +{ + if (xe_device_uc_enabled(xe)) + xe_pm_runtime_fini(xe); + + unregister_pm_notifier(&xe->pm_notifier); +} + static void xe_pm_write_callback_task(struct xe_device *xe, struct task_struct *task) { diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 998d1ed64556..59678b310e55 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -17,7 +17,7 @@ int xe_pm_resume(struct xe_device *xe); int xe_pm_init_early(struct xe_device *xe); int xe_pm_init(struct xe_device *xe); -void xe_pm_runtime_fini(struct xe_device *xe); +void xe_pm_fini(struct xe_device *xe); bool xe_pm_runtime_suspended(struct xe_device *xe); int xe_pm_runtime_suspend(struct xe_device *xe); int xe_pm_runtime_resume(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_pxp_debugfs.c b/drivers/gpu/drm/xe/xe_pxp_debugfs.c index ccfbacf08efc..525a2f6bb076 100644 --- a/drivers/gpu/drm/xe/xe_pxp_debugfs.c +++ b/drivers/gpu/drm/xe/xe_pxp_debugfs.c @@ -66,9 +66,18 @@ static int pxp_terminate(struct seq_file *m, void *data) { struct xe_pxp *pxp = node_to_pxp(m->private); struct drm_printer p = drm_seq_file_printer(m); + int ready = xe_pxp_get_readiness_status(pxp); - if (!xe_pxp_is_enabled(pxp)) - return -ENODEV; + if (ready < 0) + return ready; /* disabled or error occurred */ + else if (!ready) + return -EBUSY; /* init still in progress */ + + /* no need for a termination if PXP is not active */ + if (pxp->status != XE_PXP_ACTIVE) { + drm_printf(&p, "PXP not active\n"); + return 0; + } /* simulate a termination interrupt */ spin_lock_irq(&pxp->xe->irq.lock); diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 56b18a293bbc..890f6b2f40e9 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -80,7 +80,7 @@ xe_svm_range_alloc(struct drm_gpusvm *gpusvm) range = kzalloc(sizeof(*range), GFP_KERNEL); if (!range) - return ERR_PTR(-ENOMEM); + return NULL; INIT_LIST_HEAD(&range->garbage_collector_link); xe_vm_get(gpusvm_to_vm(gpusvm)); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 0c69ef6b5ec5..80e56e232685 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3866,6 +3866,9 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) } drm_puts(p, "\n"); + + if (drm_coredump_printer_is_full(p)) + return; } } diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index f31eba1c7cab..ab017b05e175 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -345,6 +345,26 @@ drm_coredump_printer(struct drm_print_iterator *iter) } /** + * drm_coredump_printer_is_full() - DRM coredump printer output is full + * @p: DRM coredump printer + * + * DRM printer output is full, useful to short circuit coredump printing once + * printer is full. + * + * RETURNS: + * True if DRM coredump printer output buffer is full, False otherwise + */ +static inline bool drm_coredump_printer_is_full(struct drm_printer *p) +{ + struct drm_print_iterator *iterator = p->arg; + + if (p->printfn != __drm_printfn_coredump) + return true; + + return !iterator->remain; +} + +/** * drm_seq_file_printer - construct a &drm_printer that outputs to &seq_file * @f: the &struct seq_file to output to * |