diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-11-29 13:06:06 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-11-29 13:06:06 -0800 |
commit | 2ba9f676d0a2e408aef14d679984c26373bf37b7 (patch) | |
tree | d84af8f6b6be035d3a013fbd2b6ec9b61c2e29bd /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |
parent | 517363b4949e4442dfe54b281ef5a8bbfafa3bbb (diff) | |
parent | 9794b89c50f7fc972c6b4ddc69693c9f9d1ae7d7 (diff) |
Merge tag 'drm-next-2024-11-29' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie:
"Merge window fixes, mostly amdgpu and xe, with a few other minor ones,
all looks fairly normal,
i915:
- hdcp: Fix when the first read and write are retried
xe:
- Wake up waiters after wait condition set to true
- Mark the preempt fence workqueue as reclaim
- Update xe2 graphics name string
- Fix a couple of guc submit races
- Fix pat index usage in migrate
- Ensure non-cached migrate pagetable bo mappings
- Take a PM ref in the delayed snapshot capture worker
amdgpu:
- SMU 13.0.6 fixes
- XGMI fixes
- SMU 13.0.7 fixes
- Misc code cleanups
- Plane refcount fixes
- DCN 4.0.1 fixes
- DC power fixes
- DTO fixes
- NBIO 7.11 fixes
- SMU 14.0.x fixes
- Reset fixes
- Enable DC on LoongArch
- Sysfs hotplug warning fix
- Misc small fixes
- VCN 4.0.3 fix
- Slab usage fix
- Jpeg delayed work fix
amdkfd:
- wptr handling fixes
radeon:
- Use ttm_bo_move_null()
- Constify struct pci_device_id
- Fix spurious hotplug
- HPD fix
rockchip
- fix 32-bit build"
* tag 'drm-next-2024-11-29' of https://gitlab.freedesktop.org/drm/kernel: (48 commits)
drm/xe: Take PM ref in delayed snapshot capture worker
drm/xe/migrate: use XE_BO_FLAG_PAGETABLE
drm/xe/migrate: fix pat index usage
drm/xe/guc_submit: fix race around suspend_pending
drm/xe/guc_submit: fix race around pending_disable
drm/xe: Update xe2_graphics name string
drm/rockchip: avoid 64-bit division
Revert "drm/radeon: Delay Connector detecting when HPD singals is unstable"
drm/amdgpu/jpeg: cancel the jpeg worker
drm/amdgpu: fix usage slab after free
drm/amdgpu/vcn: reset fw_shared when VCPU buffers corrupted on vcn v4.0.3
drm/amdgpu: Fix sysfs warning when hotplugging
drm/amdgpu: Add sysfs interface for vcn reset mask
drm/amdgpu/gmc7: fix wait_for_idle callers
drm/amd/pm: Remove arcturus min power limit
drm/amd/pm: skip setting the power source on smu v14.0.2/3
drm/amd/pm: disable pcie speed switching on Intel platform for smu v14.0.2/3
drm/amdkfd: Use the correct wptr size
drm/xe: Mark preempt fence workqueue as reclaim
drm/xe/ufence: Wake up waiters after setting ufence->signalled
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 29 |
1 files changed, 24 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0171d240fcb0..9095c05e0269 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -156,6 +156,11 @@ struct amdgpu_init_level amdgpu_init_default = { .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL, }; +struct amdgpu_init_level amdgpu_init_recovery = { + .level = AMDGPU_INIT_LEVEL_RESET_RECOVERY, + .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL, +}; + /* * Minimal blocks needed to be initialized before a XGMI hive can be reset. This * is used for cases like reset on initialization where the entire hive needs to @@ -182,6 +187,9 @@ void amdgpu_set_init_level(struct amdgpu_device *adev, case AMDGPU_INIT_LEVEL_MINIMAL_XGMI: adev->init_lvl = &amdgpu_init_minimal_xgmi; break; + case AMDGPU_INIT_LEVEL_RESET_RECOVERY: + adev->init_lvl = &amdgpu_init_recovery; + break; case AMDGPU_INIT_LEVEL_DEFAULT: fallthrough; default: @@ -3250,7 +3258,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) return r; } - if (!amdgpu_in_reset(adev)) + if (!amdgpu_reset_in_recovery(adev)) amdgpu_ras_set_error_query_ready(adev, true); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); @@ -4669,8 +4677,8 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) int idx; bool px; - amdgpu_fence_driver_sw_fini(adev); amdgpu_device_ip_fini(adev); + amdgpu_fence_driver_sw_fini(adev); amdgpu_ucode_release(&adev->firmware.gpu_info_fw); adev->accel_working = false; dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); @@ -5419,7 +5427,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) struct list_head *device_list_handle; bool full_reset, vram_lost = false; struct amdgpu_device *tmp_adev; - int r; + int r, init_level; device_list_handle = reset_context->reset_device_list; @@ -5428,10 +5436,18 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); + /** + * If it's reset on init, it's default init level, otherwise keep level + * as recovery level. + */ + if (reset_context->method == AMD_RESET_METHOD_ON_INIT) + init_level = AMDGPU_INIT_LEVEL_DEFAULT; + else + init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY; + r = 0; list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - /* After reset, it's default init level */ - amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT); + amdgpu_set_init_level(tmp_adev, init_level); if (full_reset) { /* post card */ amdgpu_ras_set_fed(tmp_adev, false); @@ -5518,6 +5534,9 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) out: if (!r) { + /* IP init is complete now, set level as default */ + amdgpu_set_init_level(tmp_adev, + AMDGPU_INIT_LEVEL_DEFAULT); amdgpu_irq_gpu_reset_resume_helper(tmp_adev); r = amdgpu_ib_ring_tests(tmp_adev); if (r) { |