summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 12:47:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 12:47:25 -0700
commit58809f614e0e3f4e12b489bddf680bfeb31c0a20 (patch)
tree6b1468e6c1fbed9e04b0701ae49b634add62f794 /drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
parent05a54fa773284d1a7923cdfdd8f0c8dabb98bd26 (diff)
parentb2ec5ca9d5c2c019e2316f7ba447596d1dcd8fde (diff)
Merge tag 'drm-next-2025-10-01' of https://gitlab.freedesktop.org/drm/kernel
Pull drm updates from Dave Airlie: "cross-subsystem: - i2c-hid: Make elan touch controllers power on after panel is enabled - dt bindings for STM32MP25 SoC - pci vgaarb: use screen_info helpers - rust pin-init updates - add MEI driver for late binding firmware update/load uapi: - add ioctl for reassigning GEM handles - provide boot_display attribute on boot-up devices core: - document DRM_MODE_PAGE_FLIP_EVENT - add vendor specific recovery method to drm device wedged uevent gem: - Simplify gpuvm locking ttm: - add interface to populate buffers sched: - Fix race condition in trace code atomic: - Reallow no-op async page flips display: - dp: Fix command length video: - Improve pixel-format handling for struct screen_info rust: - drop Opaque<> from ioctl args - Alloc: - BorrowedPage type and AsPageIter traits - Implement Vmalloc::to_page() and VmallocPageIter - DMA/Scatterlist: - Add dma::DataDirection and type alias for dma_addr_t - Abstraction for struct scatterlist and sg_table - DRM: - simplify use of generics - add DriverFile type alias - drop Object::SIZE - Rust: - pin-init tree merge - Various methods for AsBytes and FromBytes traits gpuvm: - Support madvice in Xe driver gpusvm: - fix hmm_pfn_to_map_order usage in gpusvm bridge: - Improve and fix ref counting on bridge management - cdns-dsi: Various improvements to mode setting - Support Solomon SSD2825 plus DT bindings - Support Waveshare DSI2DPI plus DT bindings - Support Content Protection property - display-connector: Improve DP display detection - Add support for Radxa Ra620 plus DT bindings - adv7511: Provide SPD and HDMI infoframes - it6505: Replace crypto_shash with sha() - synopsys: Add support for DW DPTX Controller plus DT bindings - adv7511: Write full Audio infoframe - ite6263: Support vendor-specific infoframes - simple: Add support for Realtek RTD2171 DP-to-HDMI plus DT bindings panel: - panel-edp: Support mt8189 Chromebooks; Support BOE NV140WUM-N64; Support SHP LQ134Z1; Fixes - panel-simple: Support Olimex LCD-OLinuXino-5CTS plus DT bindings - Support Samsung AMS561RA01 - Support Hydis HV101HD1 plus DT bindings - ilitek-ili9881c: Refactor mode setting; Add support for Bestar BSD1218-A101KL68 LCD plus DT bindings - lvds: Add support for Ampire AMP19201200B5TZQW-T03 to DT bindings - edp: Add support for additonal mt8189 Chromebook panels - lvds: Add DT bindings for EDT ETML0700Z8DHA amdgpu: - add CRIU support for gem objects - RAS updates - VCN SRAM load fixes - EDID read fixes - eDP ALPM support - Documentation updates - Rework PTE flag generation - DCE6 fixes - VCN devcoredump cleanup - MMHUB client id fixes - VCN 5.0.1 RAS support - SMU 13.0.x updates - Expanded PCIe DPC support - Expanded VCN reset support - VPE per queue reset support - give kernel jobs unique id for tracing - pre-populate exported buffers - cyan skillfish updates - make vbios build number available in sysfs - userq updates - HDCP updates - support MMIO remap page as ttm pool - JPEG parser updates - DCE6 DC updates - use devm for i2c buses - GPUVM locking updates - Drop non-DC DCE11 code - improve fallback handling for pixel encoding amdkfd: - SVM/page migration fixes - debugfs fixes - add CRIO support for gem objects - SVM updates radeon: - use dev_warn_once in CS parsers xe: - add madvise interface - add DRM_IOCTL_XE_VM_QUERY_MEMORY_RANGE_ATTRS to query VMA count and memory attributes - drop L# bank mask reporting from media GT3 on Xe3+. - add SLPC power_profile sysfs interface - add configs attribs to add post/mid context-switch commands - handle firmware reported hardware errors notifying userspace with device wedged uevent - use same dir structure across sysfs/debugfs - cleanup and future proof vram region init - add G-states and PCI link states to debugfs - Add SRIOV support for CCS surfaces on Xe2+ - Enable SRIOV PF mode by default on supported platforms - move flush to common code - extended core workarounds for Xe2/3 - use DRM scheduler for delayed GT TLB invalidations - configs improvements and allow VF device enablement - prep work to expose mmio regions to userspace - VF migration support added - prepare GPU SVM for THP migration - start fixing XE_PAGE_SIZE vs PAGE_SIZE - add PSMI support for hw validation - resize VF bars to max possible size according to number of VFs - Ensure GT is in C0 during resume - pre-populate exported buffers - replace xe_hmm with gpusvm - add more SVM GT stats to debugfs - improve fake pci and WA kunnit handle for new platform testing - Test GuC to GuC comms to add debugging - use attribute groups to simplify sysfs registration - add Late Binding firmware code to interact with MEI i915: - apply multiple JSL/EHL/Gen7/Gen6 workarounds properly - protect against overflow in active_engine() - Use try_cmpxchg64() in __active_lookup() - include GuC registers in error state - get rid of dev->struct_mutex - iopoll: generalize read_poll_timout - lots more display refactoring - Reject HBR3 in any eDP Panel - Prune modes for YUV420 - Display Wa fix, additions, and updates - DP: Fix 2.7 Gbps link training on g4x - DP: Adjust the idle pattern handling - DP: Shuffle the link training code a bit - Don't set/read the DSI C clock divider on GLK - Enable_psr kernel parameter changes - Type-C enabled/disconnected dp-alt sink - Wildcat Lake enabling - DP HDR updates - DRAM detection - wait PSR idle on dsb commit - Remove FBC modulo 4 restriction for ADL-P+ - panic: refactor framebuffer allocation habanalabs: - debug/visibility improvements - vmalloc-backed coherent mmap support - HLDIO infrastructure nova-core: - various register!() macro improvements - minor vbios/firmware fixes/refactoring - advance firmware boot stages; process Booter and patch signatures - process GSP and GSP bootloader - Add r570.144 firmware bindings and update to it - Move GSP boot code to own module - Use new pin-init features to store driver's private data in a single allocation - Update ARef import from sync::aref nova-drm: - Update ARef import from sync::aref tyr: - initial driver skeleton for a rust driver for ARM Mali GPUs - capable of powering up, query metadata and provide it to userspace. msm: - GPU and Core: - in DT bindings describe clocks per GPU type - GMU bandwidth voting for x1-85 - a623/a663 speedbins - cleanup some remaining no-iommu leftovers after VM_BIND conversion - fix GEM obj 32b size truncation - add missing VM_BIND param validation - IFPC for x1-85 and a750 - register xml and gen_header.py sync from mesa - Display: - add missing bindings for display on SC8180X - added DisplayPort MST bindings - conversion from round_rate() to determine_rate() amdxdna: - add IOCTL_AMDXDNA_GET_ARRAY - support user space allocated buffers - streamline PM interfaces - Refactoring wrt. hardware contexts - improve error reporting nouveau: - use GSP firmware by default - improve error reporting - Pre-populate exported buffers ast: - Clean up detection of DRAM config exynos: - add DSIM bridge driver support for Exynos7870 - Document Exynos7870 DSIM compatible in dt-binding panthor: - Print task/pid on errors - Add support for Mali G710, G510, G310, Gx15, Gx20, Gx25 - Improve cache flushing - Fail VM bind if BO has offset renesas: - convert to RUNTIME_PM_OPS rcar-du: - Make number of lanes configurable - Use RUNTIME_PM_OPS - Add support for DSI commands rocket: - Add driver for Rockchip NPU plus DT bindings - Use kfree() and sizeof() correctly - Test DMA status rockchip: - dsi2: Add support for RK3576 plus DT bindings - Add support for RK3588 DPTX output tidss: - Use crtc_ fields for programming display mode - Remove other drivers from aperture pixpaper: - Add support for Mayqueen Pixpaper plus DT bindings v3d: - Support querying nubmer of GPU resets for KHR_robustness stm: - Clean up logging - ltdc: Add support support for STM32MP257F-EV1 plus DT bindings sitronix: - st7571-i2c: Add support for inverted displays and 2-bit grayscale tidss: - Convert to kernel's FIELD_ macros vesadrm: - Support 8-bit palette mode imagination: - Improve power management - Add support for TH1520 GPU - Support Risc-V architectures v3d: - Improve job management and locking vkms: - Support variants of ARGB8888, ARGB16161616, RGB565, RGB888 and P01x - Spport YUV with 16-bit components" * tag 'drm-next-2025-10-01' of https://gitlab.freedesktop.org/drm/kernel: (1455 commits) drm/amd: Add name to modes from amdgpu_connector_add_common_modes() drm/amd: Drop some common modes from amdgpu_connector_add_common_modes() drm/amdgpu: update MODULE_PARM_DESC for freesync_video drm/amd: Use dynamic array size declaration for amdgpu_connector_add_common_modes() drm/amd/display: Share dce100_validate_global with DCE6-8 drm/amd/display: Share dce100_validate_bandwidth with DCE6-8 drm/amdgpu: Fix fence signaling race condition in userqueue amd/amdkfd: enhance kfd process check in switch partition amd/amdkfd: resolve a race in amdgpu_amdkfd_device_fini_sw drm/amd/display: Reject modes with too high pixel clock on DCE6-10 drm/amd: Drop unnecessary check in amdgpu_connector_add_common_modes() drm/amd/display: Only enable common modes for eDP and LVDS drm/amdgpu: remove the redeclaration of variable i drm/amdgpu/userq: assign an error code for invalid userq va drm/amdgpu: revert "rework reserved VMID handling" v2 drm/amdgpu: remove leftover from enforcing isolation by VMID drm/amdgpu: Add fallback to pipe reset if KCQ ring reset fails accel/habanalabs: add Infineon version check accel/habanalabs/gaudi2: read preboot status after recovering from dirty state accel/habanalabs: add HL_GET_P_STATE passthrough type ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c358
1 files changed, 233 insertions, 125 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 8190c24a649a..48e0932f5b62 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -44,8 +44,41 @@ u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
return userq_ip_mask;
}
+int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
+ u64 expected_size)
+{
+ struct amdgpu_bo_va_mapping *va_map;
+ u64 user_addr;
+ u64 size;
+ int r = 0;
+
+ user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT;
+ size = expected_size >> AMDGPU_GPU_PAGE_SHIFT;
+
+ r = amdgpu_bo_reserve(vm->root.bo, false);
+ if (r)
+ return r;
+
+ va_map = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
+ if (!va_map) {
+ r = -EINVAL;
+ goto out_err;
+ }
+ /* Only validate the userq whether resident in the VM mapping range */
+ if (user_addr >= va_map->start &&
+ va_map->last - user_addr + 1 >= size) {
+ amdgpu_bo_unreserve(vm->root.bo);
+ return 0;
+ }
+
+ r = -EINVAL;
+out_err:
+ amdgpu_bo_unreserve(vm->root.bo);
+ return r;
+}
+
static int
-amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
+amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_queue *queue)
{
struct amdgpu_device *adev = uq_mgr->adev;
@@ -54,6 +87,49 @@ amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
int r = 0;
if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
+ r = userq_funcs->preempt(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_PREEMPTED;
+ }
+ }
+
+ return r;
+}
+
+static int
+amdgpu_userq_restore_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ int r = 0;
+
+ if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) {
+ r = userq_funcs->restore(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_MAPPED;
+ }
+ }
+
+ return r;
+}
+
+static int
+amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ int r = 0;
+
+ if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
+ (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
r = userq_funcs->unmap(uq_mgr, queue);
if (r)
queue->state = AMDGPU_USERQ_STATE_HUNG;
@@ -112,22 +188,6 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
kfree(queue);
}
-int
-amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr)
-{
- struct amdgpu_usermode_queue *queue;
- int queue_id;
- int ret = 0;
-
- mutex_lock(&uq_mgr->userq_mutex);
- /* Resume all the queues for this process */
- idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id)
- ret += queue->state == AMDGPU_USERQ_STATE_MAPPED;
-
- mutex_unlock(&uq_mgr->userq_mutex);
- return ret;
-}
-
static struct amdgpu_usermode_queue *
amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
{
@@ -323,6 +383,11 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
debugfs_remove_recursive(queue->debugfs_queue);
#endif
r = amdgpu_userq_unmap_helper(uq_mgr, queue);
+ /*TODO: It requires a reset for userq hw unmap error*/
+ if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) {
+ drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n");
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ }
amdgpu_userq_cleanup(uq_mgr, queue, queue_id);
mutex_unlock(&uq_mgr->userq_mutex);
@@ -364,7 +429,7 @@ static int amdgpu_mqd_info_read(struct seq_file *m, void *unused)
return -EINVAL;
}
- seq_printf(m, "queue_type %d\n", queue->queue_type);
+ seq_printf(m, "queue_type: %d\n", queue->queue_type);
seq_printf(m, "mqd_gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(queue->mqd.obj));
amdgpu_bo_unreserve(bo);
@@ -404,27 +469,10 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
(args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
- /* Usermode queues are only supported for GFX IP as of now */
- if (args->in.ip_type != AMDGPU_HW_IP_GFX &&
- args->in.ip_type != AMDGPU_HW_IP_DMA &&
- args->in.ip_type != AMDGPU_HW_IP_COMPUTE) {
- drm_file_err(uq_mgr->file, "Usermode queue doesn't support IP type %u\n",
- args->in.ip_type);
- return -EINVAL;
- }
-
r = amdgpu_userq_priority_permit(filp, priority);
if (r)
return r;
- if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) &&
- (args->in.ip_type != AMDGPU_HW_IP_GFX) &&
- (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) &&
- !amdgpu_is_tmz(adev)) {
- drm_file_err(uq_mgr->file, "Secure only supported on GFX/Compute queues\n");
- return -EINVAL;
- }
-
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n");
@@ -456,6 +504,15 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
r = -ENOMEM;
goto unlock;
}
+
+ /* Validate the userq virtual address.*/
+ if (amdgpu_userq_input_va_validate(&fpriv->vm, args->in.queue_va, args->in.queue_size) ||
+ amdgpu_userq_input_va_validate(&fpriv->vm, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
+ amdgpu_userq_input_va_validate(&fpriv->vm, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
+ r = -EINVAL;
+ kfree(queue);
+ goto unlock;
+ }
queue->doorbell_handle = args->in.doorbell_handle;
queue->queue_type = args->in.ip_type;
queue->vm = &fpriv->vm;
@@ -543,22 +600,45 @@ unlock:
return r;
}
-int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp)
+static int amdgpu_userq_input_args_validate(struct drm_device *dev,
+ union drm_amdgpu_userq *args,
+ struct drm_file *filp)
{
- union drm_amdgpu_userq *args = data;
- int r;
+ struct amdgpu_device *adev = drm_to_adev(dev);
switch (args->in.op) {
case AMDGPU_USERQ_OP_CREATE:
if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK |
AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE))
return -EINVAL;
- r = amdgpu_userq_create(filp, args);
- if (r)
- drm_file_err(filp, "Failed to create usermode queue\n");
- break;
+ /* Usermode queues are only supported for GFX IP as of now */
+ if (args->in.ip_type != AMDGPU_HW_IP_GFX &&
+ args->in.ip_type != AMDGPU_HW_IP_DMA &&
+ args->in.ip_type != AMDGPU_HW_IP_COMPUTE) {
+ drm_file_err(filp, "Usermode queue doesn't support IP type %u\n",
+ args->in.ip_type);
+ return -EINVAL;
+ }
+
+ if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) &&
+ (args->in.ip_type != AMDGPU_HW_IP_GFX) &&
+ (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) &&
+ !amdgpu_is_tmz(adev)) {
+ drm_file_err(filp, "Secure only supported on GFX/Compute queues\n");
+ return -EINVAL;
+ }
+ if (args->in.queue_va == AMDGPU_BO_INVALID_OFFSET ||
+ args->in.queue_va == 0 ||
+ args->in.queue_size == 0) {
+ drm_file_err(filp, "invalidate userq queue va or size\n");
+ return -EINVAL;
+ }
+ if (!args->in.wptr_va || !args->in.rptr_va) {
+ drm_file_err(filp, "invalidate userq queue rptr or wptr\n");
+ return -EINVAL;
+ }
+ break;
case AMDGPU_USERQ_OP_FREE:
if (args->in.ip_type ||
args->in.doorbell_handle ||
@@ -568,10 +648,34 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
args->in.queue_size ||
args->in.rptr_va ||
args->in.wptr_va ||
- args->in.wptr_va ||
args->in.mqd ||
args->in.mqd_size)
return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ union drm_amdgpu_userq *args = data;
+ int r;
+
+ if (amdgpu_userq_input_args_validate(dev, args, filp) < 0)
+ return -EINVAL;
+
+ switch (args->in.op) {
+ case AMDGPU_USERQ_OP_CREATE:
+ r = amdgpu_userq_create(filp, args);
+ if (r)
+ drm_file_err(filp, "Failed to create usermode queue\n");
+ break;
+
+ case AMDGPU_USERQ_OP_FREE:
r = amdgpu_userq_destroy(filp, args->in.queue_id);
if (r)
drm_file_err(filp, "Failed to destroy usermode queue\n");
@@ -594,7 +698,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
/* Resume all the queues for this process */
idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
- r = amdgpu_userq_map_helper(uq_mgr, queue);
+ r = amdgpu_userq_restore_helper(uq_mgr, queue);
if (r)
ret = r;
}
@@ -604,108 +708,106 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
return ret;
}
+static int amdgpu_userq_validate_vm(void *param, struct amdgpu_bo *bo)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
+/* Handle all BOs on the invalidated list, validate them and update the PTs */
static int
-amdgpu_userq_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
+amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec,
+ struct amdgpu_vm *vm)
{
struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
int ret;
- amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ spin_lock(&vm->invalidated_lock);
+ while (!list_empty(&vm->invalidated)) {
+ bo_va = list_first_entry(&vm->invalidated,
+ struct amdgpu_bo_va,
+ base.vm_status);
+ spin_unlock(&vm->invalidated_lock);
- ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (ret)
- DRM_ERROR("Fail to validate\n");
+ bo = bo_va->base.bo;
+ ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2);
+ if (unlikely(ret))
+ return ret;
- return ret;
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ return ret;
+
+ /* This moves the bo_va to the done list */
+ ret = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (ret)
+ return ret;
+
+ spin_lock(&vm->invalidated_lock);
+ }
+ spin_unlock(&vm->invalidated_lock);
+
+ return 0;
}
+/* Make sure the whole VM is ready to be used */
static int
-amdgpu_userq_validate_bos(struct amdgpu_userq_mgr *uq_mgr)
+amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
{
struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
- struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va *bo_va;
- struct ww_acquire_ctx *ticket;
struct drm_exec exec;
- struct amdgpu_bo *bo;
- struct dma_resv *resv;
- bool clear, unlock;
- int ret = 0;
+ int ret;
drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
- ret = amdgpu_vm_lock_pd(vm, &exec, 2);
+ ret = amdgpu_vm_lock_pd(vm, &exec, 1);
drm_exec_retry_on_contention(&exec);
- if (unlikely(ret)) {
- drm_file_err(uq_mgr->file, "Failed to lock PD\n");
+ if (unlikely(ret))
goto unlock_all;
- }
-
- /* Lock the done list */
- list_for_each_entry(bo_va, &vm->done, base.vm_status) {
- bo = bo_va->base.bo;
- if (!bo)
- continue;
-
- ret = drm_exec_lock_obj(&exec, &bo->tbo.base);
- drm_exec_retry_on_contention(&exec);
- if (unlikely(ret))
- goto unlock_all;
- }
- }
- spin_lock(&vm->status_lock);
- while (!list_empty(&vm->moved)) {
- bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
- base.vm_status);
- spin_unlock(&vm->status_lock);
-
- /* Per VM BOs never need to bo cleared in the page tables */
- ret = amdgpu_vm_bo_update(adev, bo_va, false);
- if (ret)
+ ret = amdgpu_vm_lock_done_list(vm, &exec, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
goto unlock_all;
- spin_lock(&vm->status_lock);
- }
-
- ticket = &exec.ticket;
- while (!list_empty(&vm->invalidated)) {
- bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
- base.vm_status);
- resv = bo_va->base.bo->tbo.base.resv;
- spin_unlock(&vm->status_lock);
- bo = bo_va->base.bo;
- ret = amdgpu_userq_validate_vm_bo(NULL, bo);
- if (ret) {
- drm_file_err(uq_mgr->file, "Failed to validate BO\n");
+ /* This validates PDs, PTs and per VM BOs */
+ ret = amdgpu_vm_validate(adev, vm, NULL,
+ amdgpu_userq_validate_vm,
+ NULL);
+ if (unlikely(ret))
goto unlock_all;
- }
- /* Try to reserve the BO to avoid clearing its ptes */
- if (!adev->debug_vm && dma_resv_trylock(resv)) {
- clear = false;
- unlock = true;
- /* The caller is already holding the reservation lock */
- } else if (dma_resv_locking_ctx(resv) == ticket) {
- clear = false;
- unlock = false;
- /* Somebody else is using the BO right now */
- } else {
- clear = true;
- unlock = false;
- }
+ /* This locks and validates the remaining evicted BOs */
+ ret = amdgpu_userq_bo_validate(adev, &exec, vm);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unlock_all;
+ }
- ret = amdgpu_vm_bo_update(adev, bo_va, clear);
+ ret = amdgpu_vm_handle_moved(adev, vm, NULL);
+ if (ret)
+ goto unlock_all;
- if (unlock)
- dma_resv_unlock(resv);
- if (ret)
- goto unlock_all;
+ ret = amdgpu_vm_update_pdes(adev, vm, false);
+ if (ret)
+ goto unlock_all;
- spin_lock(&vm->status_lock);
- }
- spin_unlock(&vm->status_lock);
+ /*
+ * We need to wait for all VM updates to finish before restarting the
+ * queues. Using the done list like that is now ok since everything is
+ * locked in place.
+ */
+ list_for_each_entry(bo_va, &vm->done, base.vm_status)
+ dma_fence_wait(bo_va->last_pt_update, false);
+ dma_fence_wait(vm->last_update, false);
ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
if (ret)
@@ -726,7 +828,7 @@ static void amdgpu_userq_restore_worker(struct work_struct *work)
mutex_lock(&uq_mgr->userq_mutex);
- ret = amdgpu_userq_validate_bos(uq_mgr);
+ ret = amdgpu_userq_vm_validate(uq_mgr);
if (ret) {
drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n");
goto unlock;
@@ -751,7 +853,7 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
/* Try to unmap all the queues in this process ctx */
idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
- r = amdgpu_userq_unmap_helper(uq_mgr, queue);
+ r = amdgpu_userq_preempt_helper(uq_mgr, queue);
if (r)
ret = r;
}
@@ -877,7 +979,10 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev)
cancel_delayed_work_sync(&uqm->resume_work);
mutex_lock(&uqm->userq_mutex);
idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
- r = amdgpu_userq_unmap_helper(uqm, queue);
+ if (adev->in_s0ix)
+ r = amdgpu_userq_preempt_helper(uqm, queue);
+ else
+ r = amdgpu_userq_unmap_helper(uqm, queue);
if (r)
ret = r;
}
@@ -902,7 +1007,10 @@ int amdgpu_userq_resume(struct amdgpu_device *adev)
list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
mutex_lock(&uqm->userq_mutex);
idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
- r = amdgpu_userq_map_helper(uqm, queue);
+ if (adev->in_s0ix)
+ r = amdgpu_userq_restore_helper(uqm, queue);
+ else
+ r = amdgpu_userq_map_helper(uqm, queue);
if (r)
ret = r;
}
@@ -936,7 +1044,7 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
(queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
(queue->xcp_id == idx)) {
- r = amdgpu_userq_unmap_helper(uqm, queue);
+ r = amdgpu_userq_preempt_helper(uqm, queue);
if (r)
ret = r;
}
@@ -970,7 +1078,7 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
(queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
(queue->xcp_id == idx)) {
- r = amdgpu_userq_map_helper(uqm, queue);
+ r = amdgpu_userq_restore_helper(uqm, queue);
if (r)
ret = r;
}