summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-01-30 08:04:01 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-01-30 08:04:01 -0800
commit9f68e3655aae6d49d6ba05dd263f99f33c2567af (patch)
tree42c2c4579c4acbbb456695326af4f4ad8f402813 /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
parent4cadc60d6bcfee9c626d4b55e9dc1475d21ad3bb (diff)
parentd47c7f06268082bc0082a15297a07c0da59b0fc4 (diff)
Merge tag 'drm-next-2020-01-30' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Davbe Airlie: "This is the main pull request for graphics for 5.6. Usual selection of changes all over. I've got one outstanding vmwgfx pull that touches mm so kept it separate until after all of this lands. I'll try and get it to you soon after this, but it might be early next week (nothing wrong with code, just my schedule is messy) This also hits a lot of fbdev drivers with some cleanups. Other notables: - vulkan timeline semaphore support added to syncobjs - nouveau turing secureboot/graphics support - Displayport MST display stream compression support Detailed summary: uapi: - dma-buf heaps added (and fixed) - command line add support for panel oreientation - command line allow overriding penguin count drm: - mipi dsi definition updates - lockdep annotations for dma_resv - remove dma-buf kmap/kunmap support - constify fb_ops in all fbdev drivers - MST fix for daisy chained hotplug- - CTA-861-G modes with VIC >= 193 added - fix drm_panel_of_backlight export - LVDS decoder support - more device based logging support - scanline alighment for dumb buffers - MST DSC helpers scheduler: - documentation fixes - job distribution improvements panel: - Logic PD type 28 panel support - Jimax8729d MIPI-DSI - igenic JZ4770 - generic DSI devicetree bindings - sony acx424AKP panel - Leadtek LTK500HD1829 - xinpeng XPP055C272 - AUO B116XAK01 - GiantPlus GPM940B0 - BOE NV140FHM-N49 - Satoz SAT050AT40H12R2 - Sharp LS020B1DD01D panels. ttm: - use blocking WW lock i915: - hw/uapi state separation - Lock annotation improvements - selftest improvements - ICL/TGL DSI VDSC support - VBT parsing improvments - Display refactoring - DSI updates + fixes - HDCP 2.2 for CFL - CML PCI ID fixes - GLK+ fbc fix - PSR fixes - GEN/GT refactor improvments - DP MST fixes - switch context id alloc to xarray - workaround updates - LMEM debugfs support - tiled monitor fixes - ICL+ clock gating programming removed - DP MST disable sequence fixed - LMEM discontiguous object maps - prefaulting for discontiguous objects - use LMEM for dumb buffers if possible - add LMEM mmap support amdgpu: - enable sync object timelines for vulkan - MST atomic routines - enable MST DSC support - add DMCUB display microengine support - DC OEM i2c support - Renoir DC fixes - Initial HDCP 2.x support - BACO support for Arcturus - Use BACO for runtime PM power save - gfxoff on navi10 - gfx10 golden updates and fixes - DCN support on POWER - GFXOFF for raven1 refresh - MM engine idle handlers cleanup - 10bpc EDP panel fixes - renoir watermark fixes - SR-IOV fixes - Arcturus VCN fixes - GDDR6 training fixes - freesync fixes - Pollock support amdkfd: - unify more codepath with amdgpu - use KIQ to setup HIQ rather than MMIO radeon: - fix vma fault handler race - PPC DMA fix - register check fixes for r100/r200 nouveau: - mmap_sem vs dma_resv fix - rewrite the ACR secure boot code for Turing - TU10x graphics engine support (TU11x pending) - Page kind mapping for turing - 10-bit LUT support - GP10B Tegra fixes - HD audio regression fix hisilicon/hibmc: - use generic fbdev code and helpers rockchip: - dsi/px30 support virtio: - fb damage support - static some functions vc4: - use dma_resv lock wrappers msm: - use dma_resv lock wrappers - sc7180 display + DSI support - a618 support - UBWC support improvements vmwgfx: - updates + new logging uapi exynos: - enable/disable callback cleanups etnaviv: - use dma_resv lock wrappers atmel-hlcdc: - clock fixes mediatek: - cmdq support - non-smooth cursor fixes - ctm property support sun4i: - suspend support - A64 mipi dsi support rcar-du: - Color management module support - LVDS encoder dual-link support - R8A77980 support analogic: - add support for an6345 ast: - atomic modeset support - primary plane garbage fix arcgpu: - fixes for fourcc handling tegra: - minor fixes and improvments mcde: - vblank support meson: - OSD1 plane AFBC commit gma500: - add pageflip support - reomve global drm_dev komeda: - tweak debugfs output - d32 support - runtime PM suppotr udl: - use generic shmem helpers - cleanup and fixes" * tag 'drm-next-2020-01-30' of git://anongit.freedesktop.org/drm/drm: (1998 commits) drm/nouveau/fb/gp102-: allow module to load even when scrubber binary is missing drm/nouveau/acr: return error when registering LSF if ACR not supported drm/nouveau/disp/gv100-: not all channel types support reporting error codes drm/nouveau/disp/nv50-: prevent oops when no channel method map provided drm/nouveau: support synchronous pushbuf submission drm/nouveau: signal pending fences when channel has been killed drm/nouveau: reject attempts to submit to dead channels drm/nouveau: zero vma pointer even if we only unreference it rather than free drm/nouveau: Add HD-audio component notifier support drm/nouveau: fix build error without CONFIG_IOMMU_API drm/nouveau/kms/nv04: remove set but not used variable 'width' drm/nouveau/kms/nv50: remove set but not unused variable 'nv_connector' drm/nouveau/mmu: fix comptag memory leak drm/nouveau/gr/gp10b: Use gp100_grctx and gp100_gr_zbc drm/nouveau/pmu/gm20b,gp10b: Fix Falcon bootstrapping drm/exynos: Rename Exynos to lowercase drm/exynos: change callback names drm/mst: Don't do atomic checks over disabled managers drm/amdgpu: add the lost mutex_init back drm/amd/display: skip opp blank or unblank if test pattern enabled ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c258
1 files changed, 174 insertions, 84 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index c17505fba988..53d882000101 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -66,6 +66,7 @@
#include "amdgpu_pmu.h"
#include <linux/suspend.h>
+#include <drm/task_barrier.h>
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
@@ -137,14 +138,14 @@ static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
/**
- * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
+ * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
*
* @dev: drm_device pointer
*
* Returns true if the device is a dGPU with HG/PX power control,
* otherwise return false.
*/
-bool amdgpu_device_is_px(struct drm_device *dev)
+bool amdgpu_device_supports_boco(struct drm_device *dev)
{
struct amdgpu_device *adev = dev->dev_private;
@@ -154,6 +155,21 @@ bool amdgpu_device_is_px(struct drm_device *dev)
}
/**
+ * amdgpu_device_supports_baco - Does the device support BACO
+ *
+ * @dev: drm_device pointer
+ *
+ * Returns true if the device supporte BACO,
+ * otherwise return false.
+ */
+bool amdgpu_device_supports_baco(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = dev->dev_private;
+
+ return amdgpu_asic_supports_baco(adev);
+}
+
+/**
* VRAM access helper functions.
*
* amdgpu_device_vram_access - read/write a buffer in vram
@@ -1016,8 +1032,6 @@ def_value:
*/
static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
{
- int ret = 0;
-
if (amdgpu_sched_jobs < 4) {
dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
amdgpu_sched_jobs);
@@ -1057,7 +1071,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
- return ret;
+ return 0;
}
/**
@@ -1072,8 +1086,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
{
struct drm_device *dev = pci_get_drvdata(pdev);
+ int r;
- if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
+ if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
return;
if (state == VGA_SWITCHEROO_ON) {
@@ -1081,7 +1096,12 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
/* don't suspend or resume card normally */
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
- amdgpu_device_resume(dev, true, true);
+ pci_set_power_state(dev->pdev, PCI_D0);
+ pci_restore_state(dev->pdev);
+ r = pci_enable_device(dev->pdev);
+ if (r)
+ DRM_WARN("pci_enable_device failed (%d)\n", r);
+ amdgpu_device_resume(dev, true);
dev->switch_power_state = DRM_SWITCH_POWER_ON;
drm_kms_helper_poll_enable(dev);
@@ -1089,7 +1109,11 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
pr_info("amdgpu: switched off\n");
drm_kms_helper_poll_disable(dev);
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
- amdgpu_device_suspend(dev, true, true);
+ amdgpu_device_suspend(dev, true);
+ pci_save_state(dev->pdev);
+ /* Shut down the device */
+ pci_disable_device(dev->pdev);
+ pci_set_power_state(dev->pdev, PCI_D3cold);
dev->switch_power_state = DRM_SWITCH_POWER_OFF;
}
}
@@ -1527,7 +1551,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
}
parse_soc_bounding_box:
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
/*
* soc bounding box info is not integrated in disocovery table,
* we always need to parse it from gpu info firmware.
@@ -1538,7 +1561,6 @@ parse_soc_bounding_box:
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
}
-#endif
break;
}
default:
@@ -1787,7 +1809,8 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
}
}
- r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
+ if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
+ r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
return r;
}
@@ -1854,6 +1877,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
}
}
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_init_data_exchange(adev);
+
r = amdgpu_ib_pool_init(adev);
if (r) {
dev_err(adev->dev, "IB initialization failed (%d).\n", r);
@@ -1895,11 +1921,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
amdgpu_amdkfd_device_init(adev);
init_failed:
- if (amdgpu_sriov_vf(adev)) {
- if (!r)
- amdgpu_virt_init_data_exchange(adev);
+ if (amdgpu_sriov_vf(adev))
amdgpu_virt_release_full_gpu(adev, true);
- }
return r;
}
@@ -1938,6 +1961,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
* amdgpu_device_set_cg_state - set clockgating for amdgpu device
*
* @adev: amdgpu_device pointer
+ * @state: clockgating state (gate or ungate)
*
* The list of all the hardware IPs that make up the asic is walked and the
* set_clockgating_state callbacks are run.
@@ -1962,6 +1986,7 @@ static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* enable clockgating to save power */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
@@ -1992,6 +2017,7 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
adev->ip_blocks[i].version->funcs->set_powergating_state) {
/* enable powergating to save power */
r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
@@ -2319,14 +2345,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
adev->ip_blocks[i].status.hw = false;
/* handle putting the SMC in the appropriate state */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
- if (is_support_sw_smu(adev)) {
- r = smu_set_mp1_state(&adev->smu, adev->mp1_state);
- } else if (adev->powerplay.pp_funcs &&
- adev->powerplay.pp_funcs->set_mp1_state) {
- r = adev->powerplay.pp_funcs->set_mp1_state(
- adev->powerplay.pp_handle,
- adev->mp1_state);
- }
+ r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
if (r) {
DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
adev->mp1_state, r);
@@ -2413,7 +2432,8 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
AMD_IP_BLOCK_TYPE_GFX,
AMD_IP_BLOCK_TYPE_SDMA,
AMD_IP_BLOCK_TYPE_UVD,
- AMD_IP_BLOCK_TYPE_VCE
+ AMD_IP_BLOCK_TYPE_VCE,
+ AMD_IP_BLOCK_TYPE_VCN
};
for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
@@ -2428,7 +2448,11 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
block->status.hw)
continue;
- r = block->version->funcs->hw_init(adev);
+ if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
+ r = block->version->funcs->resume(adev);
+ else
+ r = block->version->funcs->hw_init(adev);
+
DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
if (r)
return r;
@@ -2600,20 +2624,19 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
case CHIP_RAVEN:
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
case CHIP_RENOIR:
#endif
return amdgpu_dc != 0;
#endif
default:
+ if (amdgpu_dc > 0)
+ DRM_INFO("Display Core has been requested via kernel parameter "
+ "but isn't supported by ASIC, ignoring\n");
return false;
}
}
@@ -2638,8 +2661,38 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
{
struct amdgpu_device *adev =
container_of(__work, struct amdgpu_device, xgmi_reset_work);
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+
+ /* It's a bug to not have a hive within this function */
+ if (WARN_ON(!hive))
+ return;
+
+ /*
+ * Use task barrier to synchronize all xgmi reset works across the
+ * hive. task_barrier_enter and task_barrier_exit will block
+ * until all the threads running the xgmi reset works reach
+ * those points. task_barrier_full will do both blocks.
+ */
+ if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+
+ task_barrier_enter(&hive->tb);
+ adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
+
+ if (adev->asic_reset_res)
+ goto fail;
+
+ task_barrier_exit(&hive->tb);
+ adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
+
+ if (adev->asic_reset_res)
+ goto fail;
+ } else {
+
+ task_barrier_full(&hive->tb);
+ adev->asic_reset_res = amdgpu_asic_reset(adev);
+ }
- adev->asic_reset_res = amdgpu_asic_reset(adev);
+fail:
if (adev->asic_reset_res)
DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
adev->asic_reset_res, adev->ddev->unique);
@@ -2731,7 +2784,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
uint32_t flags)
{
int r, i;
- bool runtime = false;
+ bool boco = false;
u32 max_MBps;
adev->shutdown = false;
@@ -2754,7 +2807,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->mman.buffer_funcs = NULL;
adev->mman.buffer_funcs_ring = NULL;
adev->vm_manager.vm_pte_funcs = NULL;
- adev->vm_manager.vm_pte_num_rqs = 0;
+ adev->vm_manager.vm_pte_num_scheds = 0;
adev->gmc.gmc_funcs = NULL;
adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
@@ -2794,9 +2847,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->virt.vf_errors.lock);
hash_init(adev->mn_hash);
mutex_init(&adev->lock_reset);
- mutex_init(&adev->notifier_lock);
- mutex_init(&adev->virt.dpm_mutex);
mutex_init(&adev->psp.mutex);
+ mutex_init(&adev->notifier_lock);
r = amdgpu_device_check_arguments(adev);
if (r)
@@ -2902,12 +2954,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
* ignore it */
vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
- if (amdgpu_device_is_px(ddev))
- runtime = true;
- if (!pci_is_thunderbolt_attached(adev->pdev))
+ if (amdgpu_device_supports_boco(ddev))
+ boco = true;
+ if (amdgpu_has_atpx() &&
+ (amdgpu_is_atpx_hybrid() ||
+ amdgpu_has_atpx_dgpu_power_cntl()) &&
+ !pci_is_thunderbolt_attached(adev->pdev))
vga_switcheroo_register_client(adev->pdev,
- &amdgpu_switcheroo_ops, runtime);
- if (runtime)
+ &amdgpu_switcheroo_ops, boco);
+ if (boco)
vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
if (amdgpu_emu_mode == 1) {
@@ -2994,11 +3049,17 @@ fence_driver_init:
}
dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
- if (amdgpu_virt_request_full_gpu(adev, false))
- amdgpu_virt_release_full_gpu(adev, false);
goto failed;
}
+ DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
+ adev->gfx.config.max_shader_engines,
+ adev->gfx.config.max_sh_per_se,
+ adev->gfx.config.max_cu_per_sh,
+ adev->gfx.cu_info.number);
+
+ amdgpu_ctx_init_sched(adev);
+
adev->accel_working = true;
amdgpu_vm_check_compute_bug(adev);
@@ -3013,16 +3074,19 @@ fence_driver_init:
amdgpu_fbdev_init(adev);
- if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
- amdgpu_pm_virt_sysfs_init(adev);
-
r = amdgpu_pm_sysfs_init(adev);
- if (r)
+ if (r) {
+ adev->pm_sysfs_en = false;
DRM_ERROR("registering pm debugfs failed (%d).\n", r);
+ } else
+ adev->pm_sysfs_en = true;
r = amdgpu_ucode_sysfs_init(adev);
- if (r)
+ if (r) {
+ adev->ucode_sysfs_en = false;
DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
+ } else
+ adev->ucode_sysfs_en = true;
r = amdgpu_debugfs_gem_init(adev);
if (r)
@@ -3091,7 +3155,7 @@ fence_driver_init:
failed:
amdgpu_vf_error_trans_all(adev);
- if (runtime)
+ if (boco)
vga_switcheroo_fini_domain_pm_ops(adev->dev);
return r;
@@ -3122,7 +3186,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
drm_atomic_helper_shutdown(adev->ddev);
}
amdgpu_fence_driver_fini(adev);
- amdgpu_pm_sysfs_fini(adev);
+ if (adev->pm_sysfs_en)
+ amdgpu_pm_sysfs_fini(adev);
amdgpu_fbdev_fini(adev);
r = amdgpu_device_ip_fini(adev);
if (adev->firmware.gpu_info_fw) {
@@ -3139,9 +3204,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
kfree(adev->bios);
adev->bios = NULL;
- if (!pci_is_thunderbolt_attached(adev->pdev))
+ if (amdgpu_has_atpx() &&
+ (amdgpu_is_atpx_hybrid() ||
+ amdgpu_has_atpx_dgpu_power_cntl()) &&
+ !pci_is_thunderbolt_attached(adev->pdev))
vga_switcheroo_unregister_client(adev->pdev);
- if (adev->flags & AMD_IS_PX)
+ if (amdgpu_device_supports_boco(adev->ddev))
vga_switcheroo_fini_domain_pm_ops(adev->dev);
vga_client_register(adev->pdev, NULL, NULL, NULL);
if (adev->rio_mem)
@@ -3150,12 +3218,11 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
iounmap(adev->rmmio);
adev->rmmio = NULL;
amdgpu_device_doorbell_fini(adev);
- if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
- amdgpu_pm_virt_sysfs_fini(adev);
amdgpu_debugfs_regs_cleanup(adev);
device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
- amdgpu_ucode_sysfs_fini(adev);
+ if (adev->ucode_sysfs_en)
+ amdgpu_ucode_sysfs_fini(adev);
if (IS_ENABLED(CONFIG_PERF_EVENTS))
amdgpu_pmu_fini(adev);
amdgpu_debugfs_preempt_cleanup(adev);
@@ -3178,7 +3245,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
* Returns 0 for success or an error on failure.
* Called at driver suspend.
*/
-int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
+int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
{
struct amdgpu_device *adev;
struct drm_crtc *crtc;
@@ -3261,13 +3328,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
*/
amdgpu_bo_evict_vram(adev);
- if (suspend) {
- pci_save_state(dev->pdev);
- /* Shut down the device */
- pci_disable_device(dev->pdev);
- pci_set_power_state(dev->pdev, PCI_D3hot);
- }
-
return 0;
}
@@ -3282,7 +3342,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
* Returns 0 for success or an error on failure.
* Called at driver resume.
*/
-int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
+int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
{
struct drm_connector *connector;
struct drm_connector_list_iter iter;
@@ -3293,14 +3353,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
- if (resume) {
- pci_set_power_state(dev->pdev, PCI_D0);
- pci_restore_state(dev->pdev);
- r = pci_enable_device(dev->pdev);
- if (r)
- return r;
- }
-
/* post card */
if (amdgpu_device_need_post(adev)) {
r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -3639,13 +3691,12 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
if (r)
return r;
- amdgpu_amdkfd_pre_reset(adev);
-
/* Resume IP prior to SMC */
r = amdgpu_device_ip_reinit_early_sriov(adev);
if (r)
goto error;
+ amdgpu_virt_init_data_exchange(adev);
/* we need recover gart prior to run SMC/CP/SDMA resume */
amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
@@ -3663,7 +3714,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
amdgpu_amdkfd_post_reset(adev);
error:
- amdgpu_virt_init_data_exchange(adev);
amdgpu_virt_release_full_gpu(adev, true);
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
amdgpu_inc_vram_lost(adev);
@@ -3709,6 +3759,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_RAVEN:
+ case CHIP_ARCTURUS:
break;
default:
goto disabled;
@@ -3785,7 +3836,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
/* For XGMI run all resets in parallel to speed up the process */
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
- if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
+ if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
r = -EALREADY;
} else
r = amdgpu_asic_reset(tmp_adev);
@@ -3797,7 +3848,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
}
}
- /* For XGMI wait for all PSP resets to complete before proceed */
+ /* For XGMI wait for all resets to complete before proceed */
if (!r) {
list_for_each_entry(tmp_adev, device_list_handle,
gmc.xgmi.head) {
@@ -3811,6 +3862,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
}
}
+ if (!r && amdgpu_ras_intr_triggered())
+ amdgpu_ras_intr_cleared();
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
if (need_full_reset) {
@@ -3899,7 +3952,7 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
mutex_lock(&adev->lock_reset);
atomic_inc(&adev->gpu_reset_counter);
- adev->in_gpu_reset = 1;
+ adev->in_gpu_reset = true;
switch (amdgpu_asic_reset_method(adev)) {
case AMD_RESET_METHOD_MODE1:
adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
@@ -3919,7 +3972,7 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
{
amdgpu_vf_error_trans_all(adev);
adev->mp1_state = PP_MP1_STATE_NONE;
- adev->in_gpu_reset = 0;
+ adev->in_gpu_reset = false;
mutex_unlock(&adev->lock_reset);
}
@@ -3943,12 +3996,15 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_device *tmp_adev = NULL;
int i, r = 0;
bool in_ras_intr = amdgpu_ras_intr_triggered();
+ bool use_baco =
+ (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
+ true : false;
/*
* Flush RAM to disk so that after reboot
* the user can read log and see why the system rebooted.
*/
- if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) {
+ if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
DRM_WARN("Emergency reboot.");
@@ -3959,7 +4015,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
need_full_reset = job_signaled = false;
INIT_LIST_HEAD(&device_list);
- dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs stop":"reset");
+ dev_info(adev->dev, "GPU %s begin!\n",
+ (in_ras_intr && !use_baco) ? "jobs stop":"reset");
cancel_delayed_work_sync(&adev->delayed_init_work);
@@ -4026,7 +4083,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
amdgpu_unregister_gpu_instance(tmp_adev);
/* disable ras on ALL IPs */
- if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev))
+ if (!(in_ras_intr && !use_baco) &&
+ amdgpu_device_ip_need_full_reset(tmp_adev))
amdgpu_ras_suspend(tmp_adev);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -4037,13 +4095,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
- if (in_ras_intr)
+ if (in_ras_intr && !use_baco)
amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
}
}
- if (in_ras_intr)
+ if (in_ras_intr && !use_baco)
goto skip_sched_resume;
/*
@@ -4136,7 +4194,7 @@ skip_hw_reset:
skip_sched_resume:
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
/*unlock kfd: SRIOV would do it separately */
- if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev))
+ if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
amdgpu_amdkfd_post_reset(tmp_adev);
amdgpu_device_unlock_adev(tmp_adev);
}
@@ -4285,3 +4343,35 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
}
}
+int amdgpu_device_baco_enter(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = dev->dev_private;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (!amdgpu_device_supports_baco(adev->ddev))
+ return -ENOTSUPP;
+
+ if (ras && ras->supported)
+ adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
+
+ return amdgpu_dpm_baco_enter(adev);
+}
+
+int amdgpu_device_baco_exit(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = dev->dev_private;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int ret = 0;
+
+ if (!amdgpu_device_supports_baco(adev->ddev))
+ return -ENOTSUPP;
+
+ ret = amdgpu_dpm_baco_exit(adev);
+ if (ret)
+ return ret;
+
+ if (ras && ras->supported)
+ adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
+
+ return 0;
+}