summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 12:47:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 12:47:25 -0700
commit58809f614e0e3f4e12b489bddf680bfeb31c0a20 (patch)
tree6b1468e6c1fbed9e04b0701ae49b634add62f794 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
parent05a54fa773284d1a7923cdfdd8f0c8dabb98bd26 (diff)
parentb2ec5ca9d5c2c019e2316f7ba447596d1dcd8fde (diff)
Merge tag 'drm-next-2025-10-01' of https://gitlab.freedesktop.org/drm/kernel
Pull drm updates from Dave Airlie: "cross-subsystem: - i2c-hid: Make elan touch controllers power on after panel is enabled - dt bindings for STM32MP25 SoC - pci vgaarb: use screen_info helpers - rust pin-init updates - add MEI driver for late binding firmware update/load uapi: - add ioctl for reassigning GEM handles - provide boot_display attribute on boot-up devices core: - document DRM_MODE_PAGE_FLIP_EVENT - add vendor specific recovery method to drm device wedged uevent gem: - Simplify gpuvm locking ttm: - add interface to populate buffers sched: - Fix race condition in trace code atomic: - Reallow no-op async page flips display: - dp: Fix command length video: - Improve pixel-format handling for struct screen_info rust: - drop Opaque<> from ioctl args - Alloc: - BorrowedPage type and AsPageIter traits - Implement Vmalloc::to_page() and VmallocPageIter - DMA/Scatterlist: - Add dma::DataDirection and type alias for dma_addr_t - Abstraction for struct scatterlist and sg_table - DRM: - simplify use of generics - add DriverFile type alias - drop Object::SIZE - Rust: - pin-init tree merge - Various methods for AsBytes and FromBytes traits gpuvm: - Support madvice in Xe driver gpusvm: - fix hmm_pfn_to_map_order usage in gpusvm bridge: - Improve and fix ref counting on bridge management - cdns-dsi: Various improvements to mode setting - Support Solomon SSD2825 plus DT bindings - Support Waveshare DSI2DPI plus DT bindings - Support Content Protection property - display-connector: Improve DP display detection - Add support for Radxa Ra620 plus DT bindings - adv7511: Provide SPD and HDMI infoframes - it6505: Replace crypto_shash with sha() - synopsys: Add support for DW DPTX Controller plus DT bindings - adv7511: Write full Audio infoframe - ite6263: Support vendor-specific infoframes - simple: Add support for Realtek RTD2171 DP-to-HDMI plus DT bindings panel: - panel-edp: Support mt8189 Chromebooks; Support BOE NV140WUM-N64; Support SHP LQ134Z1; Fixes - panel-simple: Support Olimex LCD-OLinuXino-5CTS plus DT bindings - Support Samsung AMS561RA01 - Support Hydis HV101HD1 plus DT bindings - ilitek-ili9881c: Refactor mode setting; Add support for Bestar BSD1218-A101KL68 LCD plus DT bindings - lvds: Add support for Ampire AMP19201200B5TZQW-T03 to DT bindings - edp: Add support for additonal mt8189 Chromebook panels - lvds: Add DT bindings for EDT ETML0700Z8DHA amdgpu: - add CRIU support for gem objects - RAS updates - VCN SRAM load fixes - EDID read fixes - eDP ALPM support - Documentation updates - Rework PTE flag generation - DCE6 fixes - VCN devcoredump cleanup - MMHUB client id fixes - VCN 5.0.1 RAS support - SMU 13.0.x updates - Expanded PCIe DPC support - Expanded VCN reset support - VPE per queue reset support - give kernel jobs unique id for tracing - pre-populate exported buffers - cyan skillfish updates - make vbios build number available in sysfs - userq updates - HDCP updates - support MMIO remap page as ttm pool - JPEG parser updates - DCE6 DC updates - use devm for i2c buses - GPUVM locking updates - Drop non-DC DCE11 code - improve fallback handling for pixel encoding amdkfd: - SVM/page migration fixes - debugfs fixes - add CRIO support for gem objects - SVM updates radeon: - use dev_warn_once in CS parsers xe: - add madvise interface - add DRM_IOCTL_XE_VM_QUERY_MEMORY_RANGE_ATTRS to query VMA count and memory attributes - drop L# bank mask reporting from media GT3 on Xe3+. - add SLPC power_profile sysfs interface - add configs attribs to add post/mid context-switch commands - handle firmware reported hardware errors notifying userspace with device wedged uevent - use same dir structure across sysfs/debugfs - cleanup and future proof vram region init - add G-states and PCI link states to debugfs - Add SRIOV support for CCS surfaces on Xe2+ - Enable SRIOV PF mode by default on supported platforms - move flush to common code - extended core workarounds for Xe2/3 - use DRM scheduler for delayed GT TLB invalidations - configs improvements and allow VF device enablement - prep work to expose mmio regions to userspace - VF migration support added - prepare GPU SVM for THP migration - start fixing XE_PAGE_SIZE vs PAGE_SIZE - add PSMI support for hw validation - resize VF bars to max possible size according to number of VFs - Ensure GT is in C0 during resume - pre-populate exported buffers - replace xe_hmm with gpusvm - add more SVM GT stats to debugfs - improve fake pci and WA kunnit handle for new platform testing - Test GuC to GuC comms to add debugging - use attribute groups to simplify sysfs registration - add Late Binding firmware code to interact with MEI i915: - apply multiple JSL/EHL/Gen7/Gen6 workarounds properly - protect against overflow in active_engine() - Use try_cmpxchg64() in __active_lookup() - include GuC registers in error state - get rid of dev->struct_mutex - iopoll: generalize read_poll_timout - lots more display refactoring - Reject HBR3 in any eDP Panel - Prune modes for YUV420 - Display Wa fix, additions, and updates - DP: Fix 2.7 Gbps link training on g4x - DP: Adjust the idle pattern handling - DP: Shuffle the link training code a bit - Don't set/read the DSI C clock divider on GLK - Enable_psr kernel parameter changes - Type-C enabled/disconnected dp-alt sink - Wildcat Lake enabling - DP HDR updates - DRAM detection - wait PSR idle on dsb commit - Remove FBC modulo 4 restriction for ADL-P+ - panic: refactor framebuffer allocation habanalabs: - debug/visibility improvements - vmalloc-backed coherent mmap support - HLDIO infrastructure nova-core: - various register!() macro improvements - minor vbios/firmware fixes/refactoring - advance firmware boot stages; process Booter and patch signatures - process GSP and GSP bootloader - Add r570.144 firmware bindings and update to it - Move GSP boot code to own module - Use new pin-init features to store driver's private data in a single allocation - Update ARef import from sync::aref nova-drm: - Update ARef import from sync::aref tyr: - initial driver skeleton for a rust driver for ARM Mali GPUs - capable of powering up, query metadata and provide it to userspace. msm: - GPU and Core: - in DT bindings describe clocks per GPU type - GMU bandwidth voting for x1-85 - a623/a663 speedbins - cleanup some remaining no-iommu leftovers after VM_BIND conversion - fix GEM obj 32b size truncation - add missing VM_BIND param validation - IFPC for x1-85 and a750 - register xml and gen_header.py sync from mesa - Display: - add missing bindings for display on SC8180X - added DisplayPort MST bindings - conversion from round_rate() to determine_rate() amdxdna: - add IOCTL_AMDXDNA_GET_ARRAY - support user space allocated buffers - streamline PM interfaces - Refactoring wrt. hardware contexts - improve error reporting nouveau: - use GSP firmware by default - improve error reporting - Pre-populate exported buffers ast: - Clean up detection of DRAM config exynos: - add DSIM bridge driver support for Exynos7870 - Document Exynos7870 DSIM compatible in dt-binding panthor: - Print task/pid on errors - Add support for Mali G710, G510, G310, Gx15, Gx20, Gx25 - Improve cache flushing - Fail VM bind if BO has offset renesas: - convert to RUNTIME_PM_OPS rcar-du: - Make number of lanes configurable - Use RUNTIME_PM_OPS - Add support for DSI commands rocket: - Add driver for Rockchip NPU plus DT bindings - Use kfree() and sizeof() correctly - Test DMA status rockchip: - dsi2: Add support for RK3576 plus DT bindings - Add support for RK3588 DPTX output tidss: - Use crtc_ fields for programming display mode - Remove other drivers from aperture pixpaper: - Add support for Mayqueen Pixpaper plus DT bindings v3d: - Support querying nubmer of GPU resets for KHR_robustness stm: - Clean up logging - ltdc: Add support support for STM32MP257F-EV1 plus DT bindings sitronix: - st7571-i2c: Add support for inverted displays and 2-bit grayscale tidss: - Convert to kernel's FIELD_ macros vesadrm: - Support 8-bit palette mode imagination: - Improve power management - Add support for TH1520 GPU - Support Risc-V architectures v3d: - Improve job management and locking vkms: - Support variants of ARGB8888, ARGB16161616, RGB565, RGB888 and P01x - Spport YUV with 16-bit components" * tag 'drm-next-2025-10-01' of https://gitlab.freedesktop.org/drm/kernel: (1455 commits) drm/amd: Add name to modes from amdgpu_connector_add_common_modes() drm/amd: Drop some common modes from amdgpu_connector_add_common_modes() drm/amdgpu: update MODULE_PARM_DESC for freesync_video drm/amd: Use dynamic array size declaration for amdgpu_connector_add_common_modes() drm/amd/display: Share dce100_validate_global with DCE6-8 drm/amd/display: Share dce100_validate_bandwidth with DCE6-8 drm/amdgpu: Fix fence signaling race condition in userqueue amd/amdkfd: enhance kfd process check in switch partition amd/amdkfd: resolve a race in amdgpu_amdkfd_device_fini_sw drm/amd/display: Reject modes with too high pixel clock on DCE6-10 drm/amd: Drop unnecessary check in amdgpu_connector_add_common_modes() drm/amd/display: Only enable common modes for eDP and LVDS drm/amdgpu: remove the redeclaration of variable i drm/amdgpu/userq: assign an error code for invalid userq va drm/amdgpu: revert "rework reserved VMID handling" v2 drm/amdgpu: remove leftover from enforcing isolation by VMID drm/amdgpu: Add fallback to pipe reset if KCQ ring reset fails accel/habanalabs: add Infineon version check accel/habanalabs/gaudi2: read preboot status after recovering from dirty state accel/habanalabs: add HL_GET_P_STATE passthrough type ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c245
1 files changed, 128 insertions, 117 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c39bb06ebda1..8c28e8923f02 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -128,6 +128,17 @@ struct amdgpu_vm_tlb_seq_struct {
};
/**
+ * amdgpu_vm_assert_locked - check if VM is correctly locked
+ * @vm: the VM which schould be tested
+ *
+ * Asserts that the VM root PD is locked.
+ */
+static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm)
+{
+ dma_resv_assert_held(vm->root.bo->tbo.base.resv);
+}
+
+/**
* amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
*
* @adev: amdgpu_device pointer
@@ -143,6 +154,8 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
{
int r;
+ amdgpu_vm_assert_locked(vm);
+
if (vm->pasid == pasid)
return 0;
@@ -181,12 +194,11 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
struct amdgpu_bo *bo = vm_bo->bo;
vm_bo->moved = true;
- spin_lock(&vm_bo->vm->status_lock);
+ amdgpu_vm_assert_locked(vm);
if (bo->tbo.type == ttm_bo_type_kernel)
list_move(&vm_bo->vm_status, &vm->evicted);
else
list_move_tail(&vm_bo->vm_status, &vm->evicted);
- spin_unlock(&vm_bo->vm->status_lock);
}
/**
* amdgpu_vm_bo_moved - vm_bo is moved
@@ -198,9 +210,8 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
{
- spin_lock(&vm_bo->vm->status_lock);
+ amdgpu_vm_assert_locked(vm_bo->vm);
list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
- spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -213,9 +224,8 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
{
- spin_lock(&vm_bo->vm->status_lock);
+ amdgpu_vm_assert_locked(vm_bo->vm);
list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
- spin_unlock(&vm_bo->vm->status_lock);
vm_bo->moved = false;
}
@@ -229,9 +239,9 @@ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
{
- spin_lock(&vm_bo->vm->status_lock);
+ spin_lock(&vm_bo->vm->invalidated_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
- spin_unlock(&vm_bo->vm->status_lock);
+ spin_unlock(&vm_bo->vm->invalidated_lock);
}
/**
@@ -244,10 +254,9 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo)
{
+ amdgpu_vm_assert_locked(vm_bo->vm);
vm_bo->moved = true;
- spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user);
- spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -260,13 +269,11 @@ static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
{
- if (vm_bo->bo->parent) {
- spin_lock(&vm_bo->vm->status_lock);
+ amdgpu_vm_assert_locked(vm_bo->vm);
+ if (vm_bo->bo->parent)
list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
- spin_unlock(&vm_bo->vm->status_lock);
- } else {
+ else
amdgpu_vm_bo_idle(vm_bo);
- }
}
/**
@@ -279,9 +286,8 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
{
- spin_lock(&vm_bo->vm->status_lock);
+ amdgpu_vm_assert_locked(vm_bo->vm);
list_move(&vm_bo->vm_status, &vm_bo->vm->done);
- spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -295,10 +301,13 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
{
struct amdgpu_vm_bo_base *vm_bo, *tmp;
- spin_lock(&vm->status_lock);
+ spin_lock(&vm->invalidated_lock);
list_splice_init(&vm->done, &vm->invalidated);
list_for_each_entry(vm_bo, &vm->invalidated, vm_status)
vm_bo->moved = true;
+ spin_unlock(&vm->invalidated_lock);
+
+ amdgpu_vm_assert_locked(vm_bo->vm);
list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
struct amdgpu_bo *bo = vm_bo->bo;
@@ -308,14 +317,13 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
else if (bo->parent)
list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
}
- spin_unlock(&vm->status_lock);
}
/**
* amdgpu_vm_update_shared - helper to update shared memory stat
* @base: base structure for tracking BO usage in a VM
*
- * Takes the vm status_lock and updates the shared memory stat. If the basic
+ * Takes the vm stats_lock and updates the shared memory stat. If the basic
* stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called
* as well.
*/
@@ -327,7 +335,8 @@ static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
bool shared;
- spin_lock(&vm->status_lock);
+ dma_resv_assert_held(bo->tbo.base.resv);
+ spin_lock(&vm->stats_lock);
shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
if (base->shared != shared) {
base->shared = shared;
@@ -339,7 +348,7 @@ static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
vm->stats[bo_memtype].drm.private += size;
}
}
- spin_unlock(&vm->status_lock);
+ spin_unlock(&vm->stats_lock);
}
/**
@@ -364,11 +373,11 @@ void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo)
* be bo->tbo.resource
* @sign: if we should add (+1) or subtract (-1) from the stat
*
- * Caller need to have the vm status_lock held. Useful for when multiple update
+ * Caller need to have the vm stats_lock held. Useful for when multiple update
* need to happen at the same time.
*/
static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
- struct ttm_resource *res, int sign)
+ struct ttm_resource *res, int sign)
{
struct amdgpu_vm *vm = base->vm;
struct amdgpu_bo *bo = base->bo;
@@ -392,7 +401,8 @@ static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
*/
if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
vm->stats[res_memtype].drm.purgeable += size;
- if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype)))
+ if (!(bo->preferred_domains &
+ amdgpu_mem_type_to_domain(res_memtype)))
vm->stats[bo_memtype].evicted += size;
}
}
@@ -411,9 +421,9 @@ void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
{
struct amdgpu_vm *vm = base->vm;
- spin_lock(&vm->status_lock);
+ spin_lock(&vm->stats_lock);
amdgpu_vm_update_stats_locked(base, res, sign);
- spin_unlock(&vm->status_lock);
+ spin_unlock(&vm->stats_lock);
}
/**
@@ -439,10 +449,10 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
base->next = bo->vm_bo;
bo->vm_bo = base;
- spin_lock(&vm->status_lock);
+ spin_lock(&vm->stats_lock);
base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1);
- spin_unlock(&vm->status_lock);
+ spin_unlock(&vm->stats_lock);
if (!amdgpu_vm_is_bo_always_valid(vm, bo))
return;
@@ -485,6 +495,42 @@ int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
}
/**
+ * amdgpu_vm_lock_done_list - lock all BOs on the done list
+ * @vm: vm providing the BOs
+ * @exec: drm execution context
+ * @num_fences: number of extra fences to reserve
+ *
+ * Lock the BOs on the done list in the DRM execution context.
+ */
+int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences)
+{
+ struct list_head *prev = &vm->done;
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
+ int ret;
+
+ /* We can only trust prev->next while holding the lock */
+ spin_lock(&vm->invalidated_lock);
+ while (!list_is_head(prev->next, &vm->done)) {
+ bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status);
+ spin_unlock(&vm->invalidated_lock);
+
+ bo = bo_va->base.bo;
+ if (bo) {
+ ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1);
+ if (unlikely(ret))
+ return ret;
+ }
+ spin_lock(&vm->invalidated_lock);
+ prev = prev->next;
+ }
+ spin_unlock(&vm->invalidated_lock);
+
+ return 0;
+}
+
+/**
* amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
*
* @adev: amdgpu device pointer
@@ -575,7 +621,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
void *param)
{
uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm);
- struct amdgpu_vm_bo_base *bo_base;
+ struct amdgpu_vm_bo_base *bo_base, *tmp;
struct amdgpu_bo *bo;
int r;
@@ -588,13 +634,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return r;
}
- spin_lock(&vm->status_lock);
- while (!list_empty(&vm->evicted)) {
- bo_base = list_first_entry(&vm->evicted,
- struct amdgpu_vm_bo_base,
- vm_status);
- spin_unlock(&vm->status_lock);
-
+ list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
bo = bo_base->bo;
r = validate(param, bo);
@@ -607,37 +647,21 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
amdgpu_vm_bo_relocated(bo_base);
}
- spin_lock(&vm->status_lock);
}
- while (ticket && !list_empty(&vm->evicted_user)) {
- bo_base = list_first_entry(&vm->evicted_user,
- struct amdgpu_vm_bo_base,
- vm_status);
- spin_unlock(&vm->status_lock);
-
- bo = bo_base->bo;
- if (dma_resv_locking_ctx(bo->tbo.base.resv) != ticket) {
- struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm);
+ if (ticket) {
+ list_for_each_entry_safe(bo_base, tmp, &vm->evicted_user,
+ vm_status) {
+ bo = bo_base->bo;
+ dma_resv_assert_held(bo->tbo.base.resv);
- pr_warn_ratelimited("Evicted user BO is not reserved\n");
- if (ti) {
- pr_warn_ratelimited("pid %d\n", ti->task.pid);
- amdgpu_vm_put_task_info(ti);
- }
+ r = validate(param, bo);
+ if (r)
+ return r;
- return -EINVAL;
+ amdgpu_vm_bo_invalidated(bo_base);
}
-
- r = validate(param, bo);
- if (r)
- return r;
-
- amdgpu_vm_bo_invalidated(bo_base);
-
- spin_lock(&vm->status_lock);
}
- spin_unlock(&vm->status_lock);
amdgpu_vm_eviction_lock(vm);
vm->evicting = false;
@@ -660,13 +684,13 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
{
bool ret;
+ amdgpu_vm_assert_locked(vm);
+
amdgpu_vm_eviction_lock(vm);
ret = !vm->evicting;
amdgpu_vm_eviction_unlock(vm);
- spin_lock(&vm->status_lock);
ret &= list_empty(&vm->evicted);
- spin_unlock(&vm->status_lock);
spin_lock(&vm->immediate.lock);
ret &= !vm->immediate.stopped;
@@ -957,16 +981,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
struct amdgpu_vm *vm, bool immediate)
{
struct amdgpu_vm_update_params params;
- struct amdgpu_vm_bo_base *entry;
+ struct amdgpu_vm_bo_base *entry, *tmp;
bool flush_tlb_needed = false;
- LIST_HEAD(relocated);
int r, idx;
- spin_lock(&vm->status_lock);
- list_splice_init(&vm->relocated, &relocated);
- spin_unlock(&vm->status_lock);
+ amdgpu_vm_assert_locked(vm);
- if (list_empty(&relocated))
+ if (list_empty(&vm->relocated))
return 0;
if (!drm_dev_enter(adev_to_drm(adev), &idx))
@@ -977,11 +998,12 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
params.vm = vm;
params.immediate = immediate;
- r = vm->update_funcs->prepare(&params, NULL);
+ r = vm->update_funcs->prepare(&params, NULL,
+ AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES);
if (r)
goto error;
- list_for_each_entry(entry, &relocated, vm_status) {
+ list_for_each_entry(entry, &vm->relocated, vm_status) {
/* vm_flush_needed after updating moved PDEs */
flush_tlb_needed |= entry->moved;
@@ -997,9 +1019,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
if (flush_tlb_needed)
atomic64_inc(&vm->tlb_seq);
- while (!list_empty(&relocated)) {
- entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base,
- vm_status);
+ list_for_each_entry_safe(entry, tmp, &vm->relocated, vm_status) {
amdgpu_vm_bo_idle(entry);
}
@@ -1146,7 +1166,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
dma_fence_put(tmp);
}
- r = vm->update_funcs->prepare(&params, sync);
+ r = vm->update_funcs->prepare(&params, sync,
+ AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE);
if (r)
goto error_free;
@@ -1225,9 +1246,9 @@ error_free:
void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM])
{
- spin_lock(&vm->status_lock);
+ spin_lock(&vm->stats_lock);
memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM);
- spin_unlock(&vm->status_lock);
+ spin_unlock(&vm->stats_lock);
}
/**
@@ -1339,13 +1360,14 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
* but in case of something, we filter the flags in first place
*/
- if (!(mapping->flags & AMDGPU_PTE_READABLE))
+ if (!(mapping->flags & AMDGPU_VM_PAGE_READABLE))
update_flags &= ~AMDGPU_PTE_READABLE;
- if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
+ if (!(mapping->flags & AMDGPU_VM_PAGE_WRITEABLE))
update_flags &= ~AMDGPU_PTE_WRITEABLE;
/* Apply ASIC specific mapping flags */
- amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags);
+ amdgpu_gmc_get_vm_pte(adev, vm, bo, mapping->flags,
+ &update_flags);
trace_amdgpu_vm_bo_update(mapping);
@@ -1486,7 +1508,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping,
struct dma_fence *fence)
{
- if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev))
+ if (mapping->flags & AMDGPU_VM_PAGE_PRT)
amdgpu_vm_add_prt_cb(adev, fence);
kfree(mapping);
}
@@ -1593,29 +1615,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct ww_acquire_ctx *ticket)
{
- struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo_va *bo_va, *tmp;
struct dma_resv *resv;
bool clear, unlock;
int r;
- spin_lock(&vm->status_lock);
- while (!list_empty(&vm->moved)) {
- bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
- base.vm_status);
- spin_unlock(&vm->status_lock);
-
+ list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
/* Per VM BOs never need to bo cleared in the page tables */
r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
- spin_lock(&vm->status_lock);
}
+ spin_lock(&vm->invalidated_lock);
while (!list_empty(&vm->invalidated)) {
bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
base.vm_status);
resv = bo_va->base.bo->tbo.base.resv;
- spin_unlock(&vm->status_lock);
+ spin_unlock(&vm->invalidated_lock);
/* Try to reserve the BO to avoid clearing its ptes */
if (!adev->debug_vm && dma_resv_trylock(resv)) {
@@ -1647,9 +1664,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM))
amdgpu_vm_bo_evicted_user(&bo_va->base);
- spin_lock(&vm->status_lock);
+ spin_lock(&vm->invalidated_lock);
}
- spin_unlock(&vm->status_lock);
+ spin_unlock(&vm->invalidated_lock);
return 0;
}
@@ -1765,7 +1782,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
list_add(&mapping->list, &bo_va->invalids);
amdgpu_vm_it_insert(mapping, &vm->va);
- if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev))
+ if (mapping->flags & AMDGPU_VM_PAGE_PRT)
amdgpu_vm_prt_get(adev);
if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
@@ -1825,7 +1842,7 @@ static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev,
int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t saddr, uint64_t offset,
- uint64_t size, uint64_t flags)
+ uint64_t size, uint32_t flags)
{
struct amdgpu_bo_va_mapping *mapping, *tmp;
struct amdgpu_bo *bo = bo_va->base.bo;
@@ -1884,7 +1901,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t saddr, uint64_t offset,
- uint64_t size, uint64_t flags)
+ uint64_t size, uint32_t flags)
{
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_bo *bo = bo_va->base.bo;
@@ -2178,9 +2195,9 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
}
}
- spin_lock(&vm->status_lock);
+ spin_lock(&vm->invalidated_lock);
list_del(&bo_va->base.vm_status);
- spin_unlock(&vm->status_lock);
+ spin_unlock(&vm->invalidated_lock);
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
list_del(&mapping->list);
@@ -2288,10 +2305,10 @@ void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
struct amdgpu_vm *vm = bo_base->vm;
- spin_lock(&vm->status_lock);
+ spin_lock(&vm->stats_lock);
amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1);
amdgpu_vm_update_stats_locked(bo_base, new_mem, +1);
- spin_unlock(&vm->status_lock);
+ spin_unlock(&vm->stats_lock);
}
amdgpu_vm_bo_invalidate(bo, evicted);
@@ -2558,11 +2575,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
INIT_LIST_HEAD(&vm->relocated);
INIT_LIST_HEAD(&vm->moved);
INIT_LIST_HEAD(&vm->idle);
+ spin_lock_init(&vm->invalidated_lock);
INIT_LIST_HEAD(&vm->invalidated);
- spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->freed);
INIT_LIST_HEAD(&vm->done);
INIT_KFIFO(vm->faults);
+ spin_lock_init(&vm->stats_lock);
r = amdgpu_vm_init_entities(adev, vm);
if (r)
@@ -2741,7 +2759,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
dma_fence_put(vm->last_tlb_flush);
list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
- if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev) && prt_fini_needed) {
+ if (mapping->flags & AMDGPU_VM_PAGE_PRT && prt_fini_needed) {
amdgpu_vm_prt_fini(adev, vm);
prt_fini_needed = false;
}
@@ -2772,10 +2790,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
dma_fence_put(vm->last_update);
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
- if (vm->reserved_vmid[i]) {
- amdgpu_vmid_free_reserved(adev, i);
- vm->reserved_vmid[i] = false;
- }
+ amdgpu_vmid_free_reserved(adev, vm, i);
}
ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
@@ -2871,6 +2886,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
union drm_amdgpu_vm *args = data;
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
/* No valid flags defined yet */
if (args->in.flags)
@@ -2879,17 +2895,10 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
switch (args->in.op) {
case AMDGPU_VM_OP_RESERVE_VMID:
/* We only have requirement to reserve vmid from gfxhub */
- if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
- amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
- fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true;
- }
-
+ amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0));
break;
case AMDGPU_VM_OP_UNRESERVE_VMID:
- if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
- amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0));
- fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false;
- }
+ amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0));
break;
default:
return -EINVAL;
@@ -3027,7 +3036,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
unsigned int total_done_objs = 0;
unsigned int id = 0;
- spin_lock(&vm->status_lock);
+ amdgpu_vm_assert_locked(vm);
+
seq_puts(m, "\tIdle BOs:\n");
list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
if (!bo_va->base.bo)
@@ -3065,11 +3075,13 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
id = 0;
seq_puts(m, "\tInvalidated BOs:\n");
+ spin_lock(&vm->invalidated_lock);
list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
if (!bo_va->base.bo)
continue;
total_invalidated += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
}
+ spin_unlock(&vm->invalidated_lock);
total_invalidated_objs = id;
id = 0;
@@ -3079,7 +3091,6 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
continue;
total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
}
- spin_unlock(&vm->status_lock);
total_done_objs = id;
seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle,