summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c211
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c2
10 files changed, 173 insertions, 146 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 7c54fe6b0f5d..83020963dfde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2586,12 +2586,17 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
* from the KFD, trigger a segmentation fault in VM debug mode.
*/
if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
+ struct kfd_process *p;
+
pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n",
pid_nr(process_info->pid), mem->va);
// Send GPU VM fault to user space
- kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid),
- mem->va);
+ p = kfd_lookup_process_by_pid(process_info->pid);
+ if (p) {
+ kfd_signal_vm_fault_event_with_userptr(p, mem->va);
+ kfd_unref_process(p);
+ }
}
ret = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a77000c2e0bb..7a899fb4de29 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -6389,23 +6389,28 @@ static int amdgpu_device_sched_resume(struct list_head *device_list,
if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
- if (tmp_adev->asic_reset_res)
- r = tmp_adev->asic_reset_res;
-
- tmp_adev->asic_reset_res = 0;
-
- if (r) {
+ if (tmp_adev->asic_reset_res) {
/* bad news, how to tell it to userspace ?
* for ras error, we should report GPU bad status instead of
* reset failure
*/
if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
!amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
- dev_info(tmp_adev->dev, "GPU reset(%d) failed\n",
- atomic_read(&tmp_adev->gpu_reset_counter));
- amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
+ dev_info(
+ tmp_adev->dev,
+ "GPU reset(%d) failed with error %d \n",
+ atomic_read(
+ &tmp_adev->gpu_reset_counter),
+ tmp_adev->asic_reset_res);
+ amdgpu_vf_error_put(tmp_adev,
+ AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
+ tmp_adev->asic_reset_res);
+ if (!r)
+ r = tmp_adev->asic_reset_res;
+ tmp_adev->asic_reset_res = 0;
} else {
- dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
+ dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
+ atomic_read(&tmp_adev->gpu_reset_counter));
if (amdgpu_acpi_smart_shift_update(tmp_adev,
AMDGPU_SS_DEV_D0))
dev_warn(tmp_adev->dev,
@@ -7157,28 +7162,35 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
{
- struct pci_dev *parent = pci_upstream_bridge(adev->pdev);
+ struct pci_dev *swus, *swds;
int r;
- if (!parent || parent->vendor != PCI_VENDOR_ID_ATI)
+ swds = pci_upstream_bridge(adev->pdev);
+ if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
+ pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
+ return;
+ swus = pci_upstream_bridge(swds);
+ if (!swus ||
+ (swus->vendor != PCI_VENDOR_ID_ATI &&
+ swus->vendor != PCI_VENDOR_ID_AMD) ||
+ pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
return;
/* If already saved, return */
if (adev->pcie_reset_ctx.swus)
return;
/* Upstream bridge is ATI, assume it's SWUS/DS architecture */
- r = pci_save_state(parent);
+ r = pci_save_state(swds);
if (r)
return;
- adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(parent);
+ adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);
- parent = pci_upstream_bridge(parent);
- r = pci_save_state(parent);
+ r = pci_save_state(swus);
if (r)
return;
- adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(parent);
+ adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);
- adev->pcie_reset_ctx.swus = parent;
+ adev->pcie_reset_ctx.swus = swus;
}
static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a09ccf7d8aa2..ebe2b4c68b0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1102,6 +1102,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ if (amdgpu_in_reset(adev))
+ goto failed_kiq_read;
+
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
@@ -1171,6 +1174,8 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ if (amdgpu_in_reset(adev))
+ goto failed_kiq_write;
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 8676400834fc..a9327472c651 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1421,14 +1421,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
amdgpu_debugfs_vm_init(file_priv);
- r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id);
+ r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, pasid);
if (r)
goto error_pasid;
- r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid);
- if (r)
- goto error_vm;
-
fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
if (!fpriv->prt_va) {
r = -ENOMEM;
@@ -1468,10 +1464,8 @@ error_vm:
amdgpu_vm_fini(adev, &fpriv->vm);
error_pasid:
- if (pasid) {
+ if (pasid)
amdgpu_pasid_free(pasid);
- amdgpu_vm_set_pasid(adev, &fpriv->vm, 0);
- }
kfree(fpriv);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 1578e4e2bf84..8c0e5d03de50 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2352,7 +2352,7 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
}
ret = psp_ta_load(psp, &psp->securedisplay_context.context);
- if (!ret) {
+ if (!ret && !psp->securedisplay_context.context.resp_status) {
psp->securedisplay_context.context.initialized = true;
mutex_init(&psp->securedisplay_context.mutex);
} else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 48e0932f5b62..1add21160d21 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -726,12 +726,12 @@ amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec,
struct amdgpu_bo *bo;
int ret;
- spin_lock(&vm->invalidated_lock);
+ spin_lock(&vm->status_lock);
while (!list_empty(&vm->invalidated)) {
bo_va = list_first_entry(&vm->invalidated,
struct amdgpu_bo_va,
base.vm_status);
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
bo = bo_va->base.bo;
ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2);
@@ -748,9 +748,9 @@ amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec,
if (ret)
return ret;
- spin_lock(&vm->invalidated_lock);
+ spin_lock(&vm->status_lock);
}
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8c28e8923f02..c1a801203949 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -139,48 +139,6 @@ static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm)
}
/**
- * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
- *
- * @adev: amdgpu_device pointer
- * @vm: amdgpu_vm pointer
- * @pasid: the pasid the VM is using on this GPU
- *
- * Set the pasid this VM is using on this GPU, can also be used to remove the
- * pasid by passing in zero.
- *
- */
-int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- u32 pasid)
-{
- int r;
-
- amdgpu_vm_assert_locked(vm);
-
- if (vm->pasid == pasid)
- return 0;
-
- if (vm->pasid) {
- r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid));
- if (r < 0)
- return r;
-
- vm->pasid = 0;
- }
-
- if (pasid) {
- r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm,
- GFP_KERNEL));
- if (r < 0)
- return r;
-
- vm->pasid = pasid;
- }
-
-
- return 0;
-}
-
-/**
* amdgpu_vm_bo_evicted - vm_bo is evicted
*
* @vm_bo: vm_bo which is evicted
@@ -195,10 +153,12 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
vm_bo->moved = true;
amdgpu_vm_assert_locked(vm);
+ spin_lock(&vm_bo->vm->status_lock);
if (bo->tbo.type == ttm_bo_type_kernel)
list_move(&vm_bo->vm_status, &vm->evicted);
else
list_move_tail(&vm_bo->vm_status, &vm->evicted);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
* amdgpu_vm_bo_moved - vm_bo is moved
@@ -211,7 +171,9 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
{
amdgpu_vm_assert_locked(vm_bo->vm);
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -225,7 +187,9 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
{
amdgpu_vm_assert_locked(vm_bo->vm);
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
+ spin_unlock(&vm_bo->vm->status_lock);
vm_bo->moved = false;
}
@@ -239,9 +203,9 @@ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
{
- spin_lock(&vm_bo->vm->invalidated_lock);
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
- spin_unlock(&vm_bo->vm->invalidated_lock);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -254,9 +218,10 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo)
{
- amdgpu_vm_assert_locked(vm_bo->vm);
vm_bo->moved = true;
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -270,10 +235,13 @@ static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo)
static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
{
amdgpu_vm_assert_locked(vm_bo->vm);
- if (vm_bo->bo->parent)
+ if (vm_bo->bo->parent) {
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
- else
+ spin_unlock(&vm_bo->vm->status_lock);
+ } else {
amdgpu_vm_bo_idle(vm_bo);
+ }
}
/**
@@ -287,7 +255,9 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
{
amdgpu_vm_assert_locked(vm_bo->vm);
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->done);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -301,13 +271,13 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
{
struct amdgpu_vm_bo_base *vm_bo, *tmp;
- spin_lock(&vm->invalidated_lock);
+ amdgpu_vm_assert_locked(vm);
+
+ spin_lock(&vm->status_lock);
list_splice_init(&vm->done, &vm->invalidated);
list_for_each_entry(vm_bo, &vm->invalidated, vm_status)
vm_bo->moved = true;
- spin_unlock(&vm->invalidated_lock);
- amdgpu_vm_assert_locked(vm_bo->vm);
list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
struct amdgpu_bo *bo = vm_bo->bo;
@@ -317,13 +287,14 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
else if (bo->parent)
list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
}
+ spin_unlock(&vm->status_lock);
}
/**
* amdgpu_vm_update_shared - helper to update shared memory stat
* @base: base structure for tracking BO usage in a VM
*
- * Takes the vm stats_lock and updates the shared memory stat. If the basic
+ * Takes the vm status_lock and updates the shared memory stat. If the basic
* stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called
* as well.
*/
@@ -336,7 +307,7 @@ static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
bool shared;
dma_resv_assert_held(bo->tbo.base.resv);
- spin_lock(&vm->stats_lock);
+ spin_lock(&vm->status_lock);
shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
if (base->shared != shared) {
base->shared = shared;
@@ -348,7 +319,7 @@ static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
vm->stats[bo_memtype].drm.private += size;
}
}
- spin_unlock(&vm->stats_lock);
+ spin_unlock(&vm->status_lock);
}
/**
@@ -373,11 +344,11 @@ void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo)
* be bo->tbo.resource
* @sign: if we should add (+1) or subtract (-1) from the stat
*
- * Caller need to have the vm stats_lock held. Useful for when multiple update
+ * Caller need to have the vm status_lock held. Useful for when multiple update
* need to happen at the same time.
*/
static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
- struct ttm_resource *res, int sign)
+ struct ttm_resource *res, int sign)
{
struct amdgpu_vm *vm = base->vm;
struct amdgpu_bo *bo = base->bo;
@@ -401,8 +372,7 @@ static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
*/
if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
vm->stats[res_memtype].drm.purgeable += size;
- if (!(bo->preferred_domains &
- amdgpu_mem_type_to_domain(res_memtype)))
+ if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype)))
vm->stats[bo_memtype].evicted += size;
}
}
@@ -421,9 +391,9 @@ void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
{
struct amdgpu_vm *vm = base->vm;
- spin_lock(&vm->stats_lock);
+ spin_lock(&vm->status_lock);
amdgpu_vm_update_stats_locked(base, res, sign);
- spin_unlock(&vm->stats_lock);
+ spin_unlock(&vm->status_lock);
}
/**
@@ -449,10 +419,10 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
base->next = bo->vm_bo;
bo->vm_bo = base;
- spin_lock(&vm->stats_lock);
+ spin_lock(&vm->status_lock);
base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1);
- spin_unlock(&vm->stats_lock);
+ spin_unlock(&vm->status_lock);
if (!amdgpu_vm_is_bo_always_valid(vm, bo))
return;
@@ -511,10 +481,10 @@ int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec,
int ret;
/* We can only trust prev->next while holding the lock */
- spin_lock(&vm->invalidated_lock);
+ spin_lock(&vm->status_lock);
while (!list_is_head(prev->next, &vm->done)) {
bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status);
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
bo = bo_va->base.bo;
if (bo) {
@@ -522,10 +492,10 @@ int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec,
if (unlikely(ret))
return ret;
}
- spin_lock(&vm->invalidated_lock);
+ spin_lock(&vm->status_lock);
prev = prev->next;
}
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
return 0;
}
@@ -621,7 +591,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
void *param)
{
uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm);
- struct amdgpu_vm_bo_base *bo_base, *tmp;
+ struct amdgpu_vm_bo_base *bo_base;
struct amdgpu_bo *bo;
int r;
@@ -634,7 +604,13 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return r;
}
- list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->evicted)) {
+ bo_base = list_first_entry(&vm->evicted,
+ struct amdgpu_vm_bo_base,
+ vm_status);
+ spin_unlock(&vm->status_lock);
+
bo = bo_base->bo;
r = validate(param, bo);
@@ -647,21 +623,26 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
amdgpu_vm_bo_relocated(bo_base);
}
+ spin_lock(&vm->status_lock);
}
+ while (ticket && !list_empty(&vm->evicted_user)) {
+ bo_base = list_first_entry(&vm->evicted_user,
+ struct amdgpu_vm_bo_base,
+ vm_status);
+ spin_unlock(&vm->status_lock);
+
+ bo = bo_base->bo;
+ dma_resv_assert_held(bo->tbo.base.resv);
- if (ticket) {
- list_for_each_entry_safe(bo_base, tmp, &vm->evicted_user,
- vm_status) {
- bo = bo_base->bo;
- dma_resv_assert_held(bo->tbo.base.resv);
+ r = validate(param, bo);
+ if (r)
+ return r;
- r = validate(param, bo);
- if (r)
- return r;
+ amdgpu_vm_bo_invalidated(bo_base);
- amdgpu_vm_bo_invalidated(bo_base);
- }
+ spin_lock(&vm->status_lock);
}
+ spin_unlock(&vm->status_lock);
amdgpu_vm_eviction_lock(vm);
vm->evicting = false;
@@ -690,7 +671,9 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
ret = !vm->evicting;
amdgpu_vm_eviction_unlock(vm);
+ spin_lock(&vm->status_lock);
ret &= list_empty(&vm->evicted);
+ spin_unlock(&vm->status_lock);
spin_lock(&vm->immediate.lock);
ret &= !vm->immediate.stopped;
@@ -981,13 +964,18 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
struct amdgpu_vm *vm, bool immediate)
{
struct amdgpu_vm_update_params params;
- struct amdgpu_vm_bo_base *entry, *tmp;
+ struct amdgpu_vm_bo_base *entry;
bool flush_tlb_needed = false;
+ LIST_HEAD(relocated);
int r, idx;
amdgpu_vm_assert_locked(vm);
- if (list_empty(&vm->relocated))
+ spin_lock(&vm->status_lock);
+ list_splice_init(&vm->relocated, &relocated);
+ spin_unlock(&vm->status_lock);
+
+ if (list_empty(&relocated))
return 0;
if (!drm_dev_enter(adev_to_drm(adev), &idx))
@@ -1003,7 +991,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
if (r)
goto error;
- list_for_each_entry(entry, &vm->relocated, vm_status) {
+ list_for_each_entry(entry, &relocated, vm_status) {
/* vm_flush_needed after updating moved PDEs */
flush_tlb_needed |= entry->moved;
@@ -1019,7 +1007,9 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
if (flush_tlb_needed)
atomic64_inc(&vm->tlb_seq);
- list_for_each_entry_safe(entry, tmp, &vm->relocated, vm_status) {
+ while (!list_empty(&relocated)) {
+ entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base,
+ vm_status);
amdgpu_vm_bo_idle(entry);
}
@@ -1246,9 +1236,9 @@ error_free:
void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM])
{
- spin_lock(&vm->stats_lock);
+ spin_lock(&vm->status_lock);
memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM);
- spin_unlock(&vm->stats_lock);
+ spin_unlock(&vm->status_lock);
}
/**
@@ -1615,24 +1605,29 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct ww_acquire_ctx *ticket)
{
- struct amdgpu_bo_va *bo_va, *tmp;
+ struct amdgpu_bo_va *bo_va;
struct dma_resv *resv;
bool clear, unlock;
int r;
- list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->moved)) {
+ bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
+ base.vm_status);
+ spin_unlock(&vm->status_lock);
+
/* Per VM BOs never need to bo cleared in the page tables */
r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
+ spin_lock(&vm->status_lock);
}
- spin_lock(&vm->invalidated_lock);
while (!list_empty(&vm->invalidated)) {
bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
base.vm_status);
resv = bo_va->base.bo->tbo.base.resv;
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
/* Try to reserve the BO to avoid clearing its ptes */
if (!adev->debug_vm && dma_resv_trylock(resv)) {
@@ -1664,9 +1659,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM))
amdgpu_vm_bo_evicted_user(&bo_va->base);
- spin_lock(&vm->invalidated_lock);
+ spin_lock(&vm->status_lock);
}
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
return 0;
}
@@ -2195,9 +2190,9 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
}
}
- spin_lock(&vm->invalidated_lock);
+ spin_lock(&vm->status_lock);
list_del(&bo_va->base.vm_status);
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
list_del(&mapping->list);
@@ -2305,10 +2300,10 @@ void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
struct amdgpu_vm *vm = bo_base->vm;
- spin_lock(&vm->stats_lock);
+ spin_lock(&vm->status_lock);
amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1);
amdgpu_vm_update_stats_locked(bo_base, new_mem, +1);
- spin_unlock(&vm->stats_lock);
+ spin_unlock(&vm->status_lock);
}
amdgpu_vm_bo_invalidate(bo, evicted);
@@ -2554,6 +2549,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
* @adev: amdgpu_device pointer
* @vm: requested vm
* @xcp_id: GPU partition selection id
+ * @pasid: the pasid the VM is using on this GPU
*
* Init @vm fields.
*
@@ -2561,7 +2557,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
* 0 for success, error for failure.
*/
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int32_t xcp_id)
+ int32_t xcp_id, uint32_t pasid)
{
struct amdgpu_bo *root_bo;
struct amdgpu_bo_vm *root;
@@ -2575,12 +2571,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
INIT_LIST_HEAD(&vm->relocated);
INIT_LIST_HEAD(&vm->moved);
INIT_LIST_HEAD(&vm->idle);
- spin_lock_init(&vm->invalidated_lock);
INIT_LIST_HEAD(&vm->invalidated);
+ spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->freed);
INIT_LIST_HEAD(&vm->done);
INIT_KFIFO(vm->faults);
- spin_lock_init(&vm->stats_lock);
r = amdgpu_vm_init_entities(adev, vm);
if (r)
@@ -2638,12 +2633,26 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (r)
dev_dbg(adev->dev, "Failed to create task info for VM\n");
+ /* Store new PASID in XArray (if non-zero) */
+ if (pasid != 0) {
+ r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, GFP_KERNEL));
+ if (r < 0)
+ goto error_free_root;
+
+ vm->pasid = pasid;
+ }
+
amdgpu_bo_unreserve(vm->root.bo);
amdgpu_bo_unref(&root_bo);
return 0;
error_free_root:
+ /* If PASID was partially set, erase it from XArray before failing */
+ if (vm->pasid != 0) {
+ xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
+ vm->pasid = 0;
+ }
amdgpu_vm_pt_free_root(adev, vm);
amdgpu_bo_unreserve(vm->root.bo);
amdgpu_bo_unref(&root_bo);
@@ -2749,7 +2758,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
root = amdgpu_bo_ref(vm->root.bo);
amdgpu_bo_reserve(root, true);
- amdgpu_vm_set_pasid(adev, vm, 0);
+ /* Remove PASID mapping before destroying VM */
+ if (vm->pasid != 0) {
+ xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
+ vm->pasid = 0;
+ }
dma_fence_wait(vm->last_unlocked, false);
dma_fence_put(vm->last_unlocked);
dma_fence_wait(vm->last_tlb_flush, false);
@@ -3038,6 +3051,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
amdgpu_vm_assert_locked(vm);
+ spin_lock(&vm->status_lock);
seq_puts(m, "\tIdle BOs:\n");
list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
if (!bo_va->base.bo)
@@ -3075,13 +3089,11 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
id = 0;
seq_puts(m, "\tInvalidated BOs:\n");
- spin_lock(&vm->invalidated_lock);
list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
if (!bo_va->base.bo)
continue;
total_invalidated += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
}
- spin_unlock(&vm->invalidated_lock);
total_invalidated_objs = id;
id = 0;
@@ -3091,6 +3103,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
continue;
total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
}
+ spin_unlock(&vm->status_lock);
total_done_objs = id;
seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index adc5c9161fa8..cf0ec94e8a07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -203,11 +203,11 @@ struct amdgpu_vm_bo_base {
/* protected by bo being reserved */
struct amdgpu_vm_bo_base *next;
- /* protected by vm reservation and invalidated_lock */
+ /* protected by vm status_lock */
struct list_head vm_status;
/* if the bo is counted as shared in mem stats
- * protected by vm BO being reserved */
+ * protected by vm status_lock */
bool shared;
/* protected by the BO being reserved */
@@ -343,8 +343,10 @@ struct amdgpu_vm {
bool evicting;
unsigned int saved_flags;
- /* Memory statistics for this vm, protected by stats_lock */
- spinlock_t stats_lock;
+ /* Lock to protect vm_bo add/del/move on all lists of vm */
+ spinlock_t status_lock;
+
+ /* Memory statistics for this vm, protected by status_lock */
struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
/*
@@ -352,8 +354,6 @@ struct amdgpu_vm {
* PDs, PTs or per VM BOs. The state transits are:
*
* evicted -> relocated (PDs, PTs) or moved (per VM BOs) -> idle
- *
- * Lists are protected by the root PD dma_resv lock.
*/
/* Per-VM and PT BOs who needs a validation */
@@ -374,10 +374,7 @@ struct amdgpu_vm {
* state transits are:
*
* evicted_user or invalidated -> done
- *
- * Lists are protected by the invalidated_lock.
*/
- spinlock_t invalidated_lock;
/* BOs for user mode queues that need a validation */
struct list_head evicted_user;
@@ -503,11 +500,8 @@ extern const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs;
void amdgpu_vm_manager_init(struct amdgpu_device *adev);
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
-int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- u32 pasid);
-
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id, uint32_t pasid);
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 7a4c12ff9b18..f794fb1cc06e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -543,7 +543,9 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
entry->bo->vm_bo = NULL;
ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
+ spin_lock(&entry->vm->status_lock);
list_del(&entry->vm_status);
+ spin_unlock(&entry->vm->status_lock);
amdgpu_bo_unref(&entry->bo);
}
@@ -587,6 +589,7 @@ static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params,
struct amdgpu_vm_pt_cursor seek;
struct amdgpu_vm_bo_base *entry;
+ spin_lock(&params->vm->status_lock);
for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) {
if (entry && entry->bo)
list_move(&entry->vm_status, &params->tlb_flush_waitlist);
@@ -594,6 +597,7 @@ static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params,
/* enter start node now */
list_move(&cursor->entry->vm_status, &params->tlb_flush_waitlist);
+ spin_unlock(&params->vm->status_lock);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index 404cc8c2ff2c..f4a19357ccbc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -337,7 +337,7 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
int vmid, i;
if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready &&
- (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x83) {
+ (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x84) {
struct mes_inv_tlbs_pasid_input input = {0};
input.pasid = pasid;
input.flush_type = flush_type;