diff options
Diffstat (limited to 'drivers/gpu')
104 files changed, 1056 insertions, 492 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 2cba2b6ebe1c..f01925ed8176 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -188,7 +188,7 @@ config DRM_DEBUG_DP_MST_TOPOLOGY_REFS bool "Enable refcount backtrace history in the DP MST helpers" depends on STACKTRACE_SUPPORT select STACKDEPOT - depends on DRM_KMS_HELPER + select DRM_KMS_HELPER depends on DEBUG_KERNEL depends on EXPERT help diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c index c98c647f981d..54cde090c3f4 100644 --- a/drivers/gpu/drm/adp/adp_drv.c +++ b/drivers/gpu/drm/adp/adp_drv.c @@ -121,7 +121,6 @@ struct adp_drv_private { dma_addr_t mask_iova; int be_irq; int fe_irq; - spinlock_t irq_lock; struct drm_pending_vblank_event *event; }; @@ -288,6 +287,7 @@ static void adp_crtc_atomic_enable(struct drm_crtc *crtc, writel(BIT(0), adp->be + ADBE_BLEND_EN3); writel(BIT(0), adp->be + ADBE_BLEND_BYPASS); writel(BIT(0), adp->be + ADBE_BLEND_EN4); + drm_crtc_vblank_on(crtc); } static void adp_crtc_atomic_disable(struct drm_crtc *crtc, @@ -310,6 +310,7 @@ static void adp_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state) { u32 frame_num = 1; + unsigned long flags; struct adp_drv_private *adp = crtc_to_adp(crtc); struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state, crtc); u64 new_size = ALIGN(new_state->mode.hdisplay * @@ -330,13 +331,19 @@ static void adp_crtc_atomic_flush(struct drm_crtc *crtc, } writel(ADBE_FIFO_SYNC | frame_num, adp->be + ADBE_FIFO); //FIXME: use adbe flush interrupt - spin_lock_irq(&crtc->dev->event_lock); if (crtc->state->event) { - drm_crtc_vblank_get(crtc); - adp->event = crtc->state->event; + struct drm_pending_vblank_event *event = crtc->state->event; + + crtc->state->event = NULL; + spin_lock_irqsave(&crtc->dev->event_lock, flags); + + if (drm_crtc_vblank_get(crtc) != 0) + drm_crtc_send_vblank_event(crtc, event); + else + adp->event = event; + + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); } - crtc->state->event = NULL; - spin_unlock_irq(&crtc->dev->event_lock); } static const struct drm_crtc_funcs adp_crtc_funcs = { @@ -482,8 +489,6 @@ static irqreturn_t adp_fe_irq(int irq, void *arg) u32 int_status; u32 int_ctl; - spin_lock(&adp->irq_lock); - int_status = readl(adp->fe + ADP_INT_STATUS); if (int_status & ADP_INT_STATUS_VBLANK) { drm_crtc_handle_vblank(&adp->crtc); @@ -501,7 +506,6 @@ static irqreturn_t adp_fe_irq(int irq, void *arg) writel(int_status, adp->fe + ADP_INT_STATUS); - spin_unlock(&adp->irq_lock); return IRQ_HANDLED; } @@ -512,8 +516,7 @@ static int adp_drm_bind(struct device *dev) struct adp_drv_private *adp = to_adp(drm); int err; - adp_disable_vblank(adp); - writel(ADP_CTRL_FIFO_ON | ADP_CTRL_VBLANK_ON, adp->fe + ADP_CTRL); + writel(ADP_CTRL_FIFO_ON, adp->fe + ADP_CTRL); adp->next_bridge = drmm_of_get_bridge(&adp->drm, dev->of_node, 0, 0); if (IS_ERR(adp->next_bridge)) { @@ -567,8 +570,6 @@ static int adp_probe(struct platform_device *pdev) if (IS_ERR(adp)) return PTR_ERR(adp); - spin_lock_init(&adp->irq_lock); - dev_set_drvdata(&pdev->dev, &adp->drm); err = adp_parse_of(pdev, adp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ef6e78224fdf..c3641331d4de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1614,11 +1614,9 @@ static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_cap #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); -void amdgpu_choose_low_power_state(struct amdgpu_device *adev); #else static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } -static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { } #endif void amdgpu_register_gpu_instance(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index b7f8f2ff143d..707e131f89d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1533,22 +1533,4 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) #endif /* CONFIG_AMD_PMC */ } -/** - * amdgpu_choose_low_power_state - * - * @adev: amdgpu_device_pointer - * - * Choose the target low power state for the GPU - */ -void amdgpu_choose_low_power_state(struct amdgpu_device *adev) -{ - if (adev->in_runpm) - return; - - if (amdgpu_acpi_is_s0ix_active(adev)) - adev->in_s0ix = true; - else if (amdgpu_acpi_is_s3_active(adev)) - adev->in_s3 = true; -} - #endif /* CONFIG_SUSPEND */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index cfdf558b48b6..02138aa55793 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -109,7 +109,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; int r; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7f354cd532dc..f8b3e04d71ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4907,28 +4907,20 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) * @data: data * * This function is called when the system is about to suspend or hibernate. - * It is used to evict resources from the device before the system goes to - * sleep while there is still access to swap. + * It is used to set the appropriate flags so that eviction can be optimized + * in the pm prepare callback. */ static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, void *data) { struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb); - int r; switch (mode) { case PM_HIBERNATION_PREPARE: adev->in_s4 = true; - fallthrough; - case PM_SUSPEND_PREPARE: - r = amdgpu_device_evict_resources(adev); - /* - * This is considered non-fatal at this time because - * amdgpu_device_prepare() will also fatally evict resources. - * See https://gitlab.freedesktop.org/drm/amd/-/issues/3781 - */ - if (r) - drm_warn(adev_to_drm(adev), "Failed to evict resources, freeze active processes if problems occur: %d\n", r); + break; + case PM_POST_HIBERNATION: + adev->in_s4 = false; break; } @@ -4949,15 +4941,13 @@ int amdgpu_device_prepare(struct drm_device *dev) struct amdgpu_device *adev = drm_to_adev(dev); int i, r; - amdgpu_choose_low_power_state(adev); - if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; /* Evict the majority of BOs before starting suspend sequence */ r = amdgpu_device_evict_resources(adev); if (r) - goto unprepare; + return r; flush_delayed_work(&adev->gfx.gfx_off_delay_work); @@ -4968,15 +4958,10 @@ int amdgpu_device_prepare(struct drm_device *dev) continue; r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]); if (r) - goto unprepare; + return r; } return 0; - -unprepare: - adev->in_s0ix = adev->in_s3 = adev->in_s4 = false; - - return r; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 667080cc9ae1..44e120f9f764 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -43,6 +43,29 @@ #include <linux/dma-fence-array.h> #include <linux/pci-p2pdma.h> +static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops; + +/** + * dma_buf_attach_adev - Helper to get adev of an attachment + * + * @attach: attachment + * + * Returns: + * A struct amdgpu_device * if the attaching device is an amdgpu device or + * partition, NULL otherwise. + */ +static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *attach) +{ + if (attach->importer_ops == &amdgpu_dma_buf_attach_ops) { + struct drm_gem_object *obj = attach->importer_priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + return amdgpu_ttm_adev(bo->tbo.bdev); + } + + return NULL; +} + /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation * @@ -54,11 +77,13 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) { + struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach); struct drm_gem_object *obj = dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) + if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) && + pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; amdgpu_vm_bo_update_shared(bo); @@ -77,22 +102,32 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) { struct dma_buf *dmabuf = attach->dmabuf; struct amdgpu_bo *bo = gem_to_amdgpu_bo(dmabuf->priv); - u32 domains = bo->preferred_domains; + u32 domains = bo->allowed_domains; dma_resv_assert_held(dmabuf->resv); - /* - * Try pinning into VRAM to allow P2P with RDMA NICs without ODP + /* Try pinning into VRAM to allow P2P with RDMA NICs without ODP * support if all attachments can do P2P. If any attachment can't do * P2P just pin into GTT instead. + * + * To avoid with conflicting pinnings between GPUs and RDMA when move + * notifiers are disabled, only allow pinning in VRAM when move + * notiers are enabled. */ - list_for_each_entry(attach, &dmabuf->attachments, node) - if (!attach->peer2peer) - domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) { + domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + } else { + list_for_each_entry(attach, &dmabuf->attachments, node) + if (!attach->peer2peer) + domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + } if (domains & AMDGPU_GEM_DOMAIN_VRAM) bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + if (WARN_ON(!domains)) + return -EINVAL; + return amdgpu_bo_pin(bo, domains); } @@ -164,6 +199,11 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, break; case TTM_PL_VRAM: + /* XGMI-accessible memory should never be DMA-mapped */ + if (WARN_ON(amdgpu_dmabuf_is_xgmi_accessible( + dma_buf_attach_adev(attach), bo))) + return ERR_PTR(-EINVAL); + r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0, bo->tbo.base.size, attach->dev, dir, &sgt); @@ -470,6 +510,9 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, struct drm_gem_object *obj = &bo->tbo.base; struct drm_gem_object *gobj; + if (!adev) + return false; + if (obj->import_attach) { struct dma_buf *dma_buf = obj->import_attach->dmabuf; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 24ee4710f807..72c807f5822e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2615,13 +2615,8 @@ static int amdgpu_pmops_freeze(struct device *dev) static int amdgpu_pmops_thaw(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct amdgpu_device *adev = drm_to_adev(drm_dev); - int r; - - r = amdgpu_device_resume(drm_dev, true); - adev->in_s4 = false; - return r; + return amdgpu_device_resume(drm_dev, true); } static int amdgpu_pmops_poweroff(struct device *dev) @@ -2634,9 +2629,6 @@ static int amdgpu_pmops_poweroff(struct device *dev) static int amdgpu_pmops_restore(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct amdgpu_device *adev = drm_to_adev(drm_dev); - - adev->in_s4 = false; return amdgpu_device_resume(drm_dev, true); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index cdcdae7f71ce..83adf81defc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -66,7 +66,6 @@ #define VCN_ENC_CMD_REG_WAIT 0x0000000c #define VCN_AON_SOC_ADDRESS_2_0 0x1f800 -#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define VCN_VID_IP_ADDRESS_2_0 0x0 #define VCN_AON_IP_ADDRESS_2_0 0x30000 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index e74e26b6a4f2..fec9a007533a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -752,6 +752,18 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gmc.vram_type = vram_type; adev->gmc.vram_vendor = vram_vendor; + /* The mall_size is already calculated as mall_size_per_umc * num_umc. + * However, for gfx1151, which features a 2-to-1 UMC mapping, + * the result must be multiplied by 2 to determine the actual mall size. + */ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 5, 1): + adev->gmc.mall_size *= 2; + break; + default: + break; + } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index f1dc13b3ab38..cbbeadeb53f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -41,7 +41,12 @@ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index 43195c079748..086a647308df 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -32,7 +32,12 @@ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c index fcb8dd2876bc..40940b4ab400 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c @@ -33,7 +33,17 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev, if (!ring || !ring->funcs->emit_wreg) { WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + if (amdgpu_sriov_vf(adev)) { + /* this is fine because SR_IOV doesn't remap the register */ + RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + } else { + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); + } } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index a88d25a06c29..6ccd31c8bc69 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -35,7 +35,12 @@ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c index 49f7eb4fbd11..2c9239a22f39 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -32,7 +32,12 @@ static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 2ece3ae75ec1..bed5ef4d8788 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -360,7 +360,7 @@ static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev, *flags |= AMD_CG_SUPPORT_BIF_LS; } -#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) +#define MMIO_REG_HOLE_OFFSET 0x44000 static void nbio_v7_11_set_reg_remap(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 8e7a36f26e9c..b8d835c9e17e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -39,6 +39,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x1fd #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x503 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index d716510b8dd6..3eec1b8feaee 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -39,6 +39,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 22ae1939476f..0b19f0ab4480 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -40,6 +40,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index c6f6392c1c20..1f777c125b00 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -46,6 +46,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define VCN_HARVEST_MMSCH 0 @@ -614,7 +615,8 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, /* VCN global tiling registers */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 3e176b4b7c69..012f6ea928ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = { SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index ba603b2246e2..f11df9c2ec13 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -46,6 +46,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 (0x48300 + 0x38000) +#define VCN1_AON_SOC_ADDRESS_3_0 (0x48000 + 0x38000) #define VCN_HARVEST_MMSCH 0 @@ -1022,6 +1023,10 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | VCN_RB1_DB_CTRL__EN_MASK); + /* Keeping one read-back to ensure all register writes are done, otherwise + * it may introduce race conditions */ + RREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL); + return 0; } @@ -1204,6 +1209,10 @@ static int vcn_v4_0_5_start(struct amdgpu_vcn_inst *vinst) WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + /* Keeping one read-back to ensure all register writes are done, otherwise + * it may introduce race conditions */ + RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index d99d05f42f1d..b90da3d3e140 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -533,7 +533,8 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, /* VCN global tiling registers */ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 581d8629b9d9..e0e84ef7f568 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -503,6 +503,52 @@ static void vcn_v5_0_1_enable_clock_gating(struct amdgpu_vcn_inst *vinst) } /** + * vcn_v5_0_1_pause_dpg_mode - VCN pause with dpg mode + * + * @vinst: VCN instance + * @new_state: pause state + * + * Pause dpg mode for VCN block + */ +static int vcn_v5_0_1_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) +{ + struct amdgpu_device *adev = vinst->adev; + uint32_t reg_data = 0; + int vcn_inst; + + vcn_inst = GET_INST(VCN, vinst->inst); + + /* pause/unpause if state is changed */ + if (vinst->pause_state.fw_based != new_state->fw_based) { + DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d %s\n", + vinst->pause_state.fw_based, new_state->fw_based, + new_state->fw_based ? "VCN_DPG_STATE__PAUSE" : "VCN_DPG_STATE__UNPAUSE"); + reg_data = RREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + } else { + /* unpause DPG, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + } + vinst->pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + + +/** * vcn_v5_0_1_start_dpg_mode - VCN start with dpg mode * * @vinst: VCN instance @@ -518,6 +564,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE}; int vcn_inst; uint32_t tmp; @@ -582,6 +629,9 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, if (indirect) amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + /* Pause dpg */ + vcn_v5_0_1_pause_dpg_mode(vinst, &state); + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr)); @@ -775,9 +825,13 @@ static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) int inst_idx = vinst->inst; uint32_t tmp; int vcn_inst; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; vcn_inst = GET_INST(VCN, inst_idx); + /* Unpause dpg */ + vcn_v5_0_1_pause_dpg_mode(vinst, &state); + /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9fed4471405f..a187cdb43e7e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -372,6 +372,8 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev, static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state, struct dm_crtc_state *new_state) { + if (new_state->stream->adjust.timing_adjust_pending) + return true; if (new_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED) return true; else if (amdgpu_dm_crtc_vrr_active(old_state) != amdgpu_dm_crtc_vrr_active(new_state)) @@ -673,15 +675,21 @@ static void dm_crtc_high_irq(void *interrupt_params) spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); if (acrtc->dm_irq_params.stream && - acrtc->dm_irq_params.vrr_params.supported && - acrtc->dm_irq_params.freesync_config.state == - VRR_STATE_ACTIVE_VARIABLE) { + acrtc->dm_irq_params.vrr_params.supported) { + bool replay_en = acrtc->dm_irq_params.stream->link->replay_settings.replay_feature_enabled; + bool psr_en = acrtc->dm_irq_params.stream->link->psr_settings.psr_feature_enabled; + bool fs_active_var_en = acrtc->dm_irq_params.freesync_config.state == VRR_STATE_ACTIVE_VARIABLE; + mod_freesync_handle_v_update(adev->dm.freesync_module, acrtc->dm_irq_params.stream, &acrtc->dm_irq_params.vrr_params); - dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc->dm_irq_params.stream, - &acrtc->dm_irq_params.vrr_params.adjust); + /* update vmin_vmax only if freesync is enabled, or only if PSR and REPLAY are disabled */ + if (fs_active_var_en || (!fs_active_var_en && !replay_en && !psr_en)) { + dc_stream_adjust_vmin_vmax(adev->dm.dc, + acrtc->dm_irq_params.stream, + &acrtc->dm_irq_params.vrr_params.adjust); + } } /* @@ -1920,26 +1928,6 @@ static enum dmub_ips_disable_type dm_get_default_ips_mode( switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { case IP_VERSION(3, 5, 0): case IP_VERSION(3, 6, 0): - /* - * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to - * cause a hard hang. A fix exists for newer PMFW. - * - * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest - * IPS state in all cases, except for s0ix and all displays off (DPMS), - * where IPS2 is allowed. - * - * When checking pmfw version, use the major and minor only. - */ - if ((adev->pm.fw_version & 0x00FFFF00) < 0x005D6300) - ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; - else if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(11, 5, 0)) - /* - * Other ASICs with DCN35 that have residency issues with - * IPS2 in idle. - * We want them to use IPS2 only in display off cases. - */ - ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; - break; case IP_VERSION(3, 5, 1): ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; break; @@ -3355,16 +3343,16 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state, for (k = 0; k < dc_state->stream_count; k++) { bundle->stream_update.stream = dc_state->streams[k]; - for (m = 0; m < dc_state->stream_status->plane_count; m++) { + for (m = 0; m < dc_state->stream_status[k].plane_count; m++) { bundle->surface_updates[m].surface = - dc_state->stream_status->plane_states[m]; + dc_state->stream_status[k].plane_states[m]; bundle->surface_updates[m].surface->force_full_update = true; } update_planes_and_stream_adapter(dm->dc, UPDATE_TYPE_FULL, - dc_state->stream_status->plane_count, + dc_state->stream_status[k].plane_count, dc_state->streams[k], &bundle->stream_update, bundle->surface_updates); @@ -3481,11 +3469,6 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) return 0; } - - /* leave display off for S4 sequence */ - if (adev->in_s4) - return 0; - /* Recreate dc_state - DC invalidates it when setting power state to S3. */ dc_state_release(dm_state->context); dm_state->context = dc_state_create(dm->dc, NULL); @@ -6521,12 +6504,12 @@ decide_crtc_timing_for_drm_display_mode(struct drm_display_mode *drm_mode, const struct drm_display_mode *native_mode, bool scale_enabled) { - if (scale_enabled) { - copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode); - } else if (native_mode->clock == drm_mode->clock && - native_mode->htotal == drm_mode->htotal && - native_mode->vtotal == drm_mode->vtotal) { - copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode); + if (scale_enabled || ( + native_mode->clock == drm_mode->clock && + native_mode->htotal == drm_mode->htotal && + native_mode->vtotal == drm_mode->vtotal)) { + if (native_mode->crtc_clock) + copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode); } else { /* no scaling nor amdgpu inserted, no need to patch */ } @@ -11043,6 +11026,9 @@ static bool should_reset_plane(struct drm_atomic_state *state, state->allow_modeset) return true; + if (amdgpu_in_reset(adev) && state->allow_modeset) + return true; + /* Exit early if we know that we're adding or removing the plane. */ if (old_plane_state->crtc != new_plane_state->crtc) return true; @@ -12760,7 +12746,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( * Transient states before tunneling is enabled could * lead to this error. We can ignore this for now. */ - if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) { + if (p_notify->result == AUX_RET_ERROR_PROTOCOL_ERROR) { DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n", payload->address, payload->length, p_notify->result); @@ -12769,22 +12755,15 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( goto out; } + payload->reply[0] = adev->dm.dmub_notify->aux_reply.command & 0xF; + if (adev->dm.dmub_notify->aux_reply.command & 0xF0) + /* The reply is stored in the top nibble of the command. */ + payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; - payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; - if (!payload->write && p_notify->aux_reply.length && - (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) { - - if (payload->length != p_notify->aux_reply.length) { - DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n", - p_notify->aux_reply.length, - payload->address, payload->length); - *operation_result = AUX_RET_ERROR_INVALID_REPLY; - goto out; - } - + /*write req may receive a byte indicating partially written number as well*/ + if (p_notify->aux_reply.length) memcpy(payload->data, p_notify->aux_reply.data, p_notify->aux_reply.length); - } /* success */ ret = p_notify->aux_reply.length; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 5198a079b463..8f22ad966543 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -173,6 +173,9 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, unsigned int conn_index = aconnector->base.index; guard(mutex)(&hdcp_w->mutex); + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); hdcp_w->aconnector[conn_index] = aconnector; memset(&link_adjust, 0, sizeof(link_adjust)); @@ -220,7 +223,6 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, unsigned int conn_index = aconnector->base.index; guard(mutex)(&hdcp_w->mutex); - hdcp_w->aconnector[conn_index] = aconnector; /* the removal of display will invoke auth reset -> hdcp destroy and * we'd expect the Content Protection (CP) property changed back to @@ -236,7 +238,10 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, } mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); - + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } process_output(hdcp_w); } @@ -254,6 +259,10 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) { hdcp_w->encryption_status[conn_index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } } process_output(hdcp_w); @@ -488,6 +497,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) struct hdcp_workqueue *hdcp_work = handle; struct amdgpu_dm_connector *aconnector = config->dm_stream_ctx; int link_index = aconnector->dc_link->link_index; + unsigned int conn_index = aconnector->base.index; struct mod_hdcp_display *display = &hdcp_work[link_index].display; struct mod_hdcp_link *link = &hdcp_work[link_index].link; struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; @@ -544,7 +554,10 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) guard(mutex)(&hdcp_w->mutex); mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output); - + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = aconnector; process_output(hdcp_w); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 2cd35392e2da..1395a748d726 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -918,7 +918,7 @@ dm_helpers_probe_acpi_edid(void *data, u8 *buf, unsigned int block, size_t len) { struct drm_connector *connector = data; struct acpi_device *acpidev = ACPI_COMPANION(connector->dev->dev); - unsigned char start = block * EDID_LENGTH; + unsigned short start = block * EDID_LENGTH; struct edid *edid; int r; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 7ceedf626d23..5cdbc86ef8f5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -51,6 +51,9 @@ #define PEAK_FACTOR_X1000 1006 +/* + * This function handles both native AUX and I2C-Over-AUX transactions. + */ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { @@ -59,6 +62,7 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, enum aux_return_code_type operation_result; struct amdgpu_device *adev; struct ddc_service *ddc; + uint8_t copy[16]; if (WARN_ON(msg->size > 16)) return -E2BIG; @@ -74,6 +78,11 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, (msg->request & DP_AUX_I2C_WRITE_STATUS_UPDATE) != 0; payload.defer_delay = 0; + if (payload.write) { + memcpy(copy, msg->buffer, msg->size); + payload.data = copy; + } + result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload, &operation_result); @@ -87,15 +96,25 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, if (adev->dm.aux_hpd_discon_quirk) { if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE && operation_result == AUX_RET_ERROR_HPD_DISCON) { - result = 0; + result = msg->size; operation_result = AUX_RET_SUCCESS; } } - if (payload.write && result >= 0) - result = msg->size; + /* + * result equals to 0 includes the cases of AUX_DEFER/I2C_DEFER + */ + if (payload.write && result >= 0) { + if (result) { + /*one byte indicating partially written bytes*/ + drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX partially written\n"); + result = payload.data[0]; + } else if (!payload.reply[0]) + /*I2C_ACK|AUX_ACK*/ + result = msg->size; + } - if (result < 0) + if (result < 0) { switch (operation_result) { case AUX_RET_SUCCESS: break; @@ -114,6 +133,13 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, break; } + drm_dbg_dp(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result); + } + + if (payload.reply[0]) + drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.", + payload.reply[0]); + return result; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 28d1353f403d..ba4ce8a63158 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -439,9 +439,12 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, * Don't adjust DRR while there's bandwidth optimizations pending to * avoid conflicting with firmware updates. */ - if (dc->ctx->dce_version > DCE_VERSION_MAX) - if (dc->optimized_required || dc->wm_optimized_required) + if (dc->ctx->dce_version > DCE_VERSION_MAX) { + if (dc->optimized_required || dc->wm_optimized_required) { + stream->adjust.timing_adjust_pending = true; return false; + } + } dc_exit_ips_for_hw_access(dc); @@ -3168,7 +3171,8 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->crtc_timing_adjust) { if (stream->adjust.v_total_min != update->crtc_timing_adjust->v_total_min || - stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max) + stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max || + stream->adjust.timing_adjust_pending) update->crtc_timing_adjust->timing_adjust_pending = true; stream->adjust = *update->crtc_timing_adjust; update->crtc_timing_adjust->timing_adjust_pending = false; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 0c8ec30ea672..731fbd4bc600 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -910,7 +910,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm } //TODO : Could be possibly moved to a common helper layer. -static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const struct dc_plane_state *plane, unsigned int *plane_id) +static bool dml21_wrapper_get_plane_id(const struct dc_state *context, unsigned int stream_id, const struct dc_plane_state *plane, unsigned int *plane_id) { int i, j; @@ -918,10 +918,12 @@ static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const str return false; for (i = 0; i < context->stream_count; i++) { - for (j = 0; j < context->stream_status[i].plane_count; j++) { - if (context->stream_status[i].plane_states[j] == plane) { - *plane_id = (i << 16) | j; - return true; + if (context->streams[i]->stream_id == stream_id) { + for (j = 0; j < context->stream_status[i].plane_count; j++) { + if (context->stream_status[i].plane_states[j] == plane) { + *plane_id = (i << 16) | j; + return true; + } } } } @@ -944,14 +946,14 @@ static unsigned int map_stream_to_dml21_display_cfg(const struct dml2_context *d return location; } -static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, +static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, const struct dc_plane_state *plane, const struct dc_state *context) { unsigned int plane_id; int i = 0; int location = -1; - if (!dml21_wrapper_get_plane_id(context, plane, &plane_id)) { + if (!dml21_wrapper_get_plane_id(context, stream_id, plane, &plane_id)) { ASSERT(false); return -1; } @@ -1037,7 +1039,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; } else { for (plane_index = 0; plane_index < context->stream_status[stream_index].plane_count; plane_index++) { - disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->stream_status[stream_index].plane_states[plane_index], context); + disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], context); if (disp_cfg_plane_location < 0) disp_cfg_plane_location = dml_dispcfg->num_planes++; @@ -1048,7 +1050,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index); dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; - if (dml21_wrapper_get_plane_id(context, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) + if (dml21_wrapper_get_plane_id(context, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; /* apply forced pstate policy */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index 5d16f36ec95c..ed6584535e89 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -234,7 +234,9 @@ static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_s if (!result) return false; + DC_FP_START(); result = dml2_build_mode_programming(mode_programming); + DC_FP_END(); if (!result) return false; @@ -277,7 +279,9 @@ static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *co mode_support->dml2_instance = dml_init->dml2_instance; dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params.programming = dml_ctx->v21.mode_programming.programming; + DC_FP_START(); is_supported = dml2_check_mode_supported(mode_support); + DC_FP_END(); if (!is_supported) return false; @@ -288,16 +292,12 @@ bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml { bool out = false; - DC_FP_START(); - /* Use dml_validate_only for fast_validate path */ if (fast_validate) out = dml21_check_mode_support(in_dc, context, dml_ctx); else out = dml21_mode_check_and_programming(in_dc, context, dml_ctx); - DC_FP_END(); - return out; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 2061d43b92e1..ab6baf269801 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -973,7 +973,9 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p } } -static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc_state *context, struct scaler_data *out) +static struct scaler_data *get_scaler_data_for_plane( + const struct dc_plane_state *in, + struct dc_state *context) { int i; struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe; @@ -994,7 +996,7 @@ static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc } ASSERT(i < MAX_PIPES); - memcpy(out, &temp_pipe->plane_res.scl_data, sizeof(*out)); + return &temp_pipe->plane_res.scl_data; } static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, @@ -1057,11 +1059,7 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out const struct dc_plane_state *in, struct dc_state *context, const struct soc_bounding_box_st *soc) { - struct scaler_data *scaler_data = kzalloc(sizeof(*scaler_data), GFP_KERNEL); - if (!scaler_data) - return; - - get_scaler_data_for_plane(in, context, scaler_data); + struct scaler_data *scaler_data = get_scaler_data_for_plane(in, context); out->CursorBPP[location] = dml_cur_32bit; out->CursorWidth[location] = 256; @@ -1126,8 +1124,6 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out out->DynamicMetadataTransmittedBytes[location] = 0; out->NumberOfCursors[location] = 1; - - kfree(scaler_data); } static unsigned int map_stream_to_dml_display_cfg(const struct dml2_context *dml2, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index 1236e0f9a256..712aff7e17f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -120,10 +120,11 @@ void dpp401_set_cursor_attributes( enum dc_cursor_color_format color_format = cursor_attributes->color_format; int cur_rom_en = 0; - // DCN4 should always do Cursor degamma for Cursor Color modes if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA || color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { - cur_rom_en = 1; + if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) { + cur_rom_en = 1; + } } REG_UPDATE_3(CURSOR0_CONTROL, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 5489f3d431f6..3af6a3402b89 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1980,9 +1980,9 @@ void dcn401_program_pipe( dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size); } - if (pipe_ctx->update_flags.raw || - (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) || - pipe_ctx->stream->update_flags.raw) + if (pipe_ctx->plane_state && (pipe_ctx->update_flags.raw || + pipe_ctx->plane_state->update_flags.raw || + pipe_ctx->stream->update_flags.raw)) dc->hwss.update_dchubp_dpp(dc, pipe_ctx, context); if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 268626e73c54..53c961f86d43 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -148,6 +148,7 @@ void link_blank_dp_stream(struct dc_link *link, bool hw_init) void link_set_all_streams_dpms_off_for_link(struct dc_link *link) { struct pipe_ctx *pipes[MAX_PIPES]; + struct dc_stream_state *streams[MAX_PIPES]; struct dc_state *state = link->dc->current_state; uint8_t count; int i; @@ -160,10 +161,18 @@ void link_set_all_streams_dpms_off_for_link(struct dc_link *link) link_get_master_pipes_with_dpms_on(link, state, &count, pipes); + /* The subsequent call to dc_commit_updates_for_stream for a full update + * will release the current state and swap to a new state. Releasing the + * current state results in the stream pointers in the pipe_ctx structs + * to be zero'd. Hence, cache all streams prior to dc_commit_updates_for_stream. + */ + for (i = 0; i < count; i++) + streams[i] = pipes[i]->stream; + for (i = 0; i < count; i++) { - stream_update.stream = pipes[i]->stream; + stream_update.stream = streams[i]; dc_commit_updates_for_stream(link->ctx->dc, NULL, 0, - pipes[i]->stream, &stream_update, + streams[i], &stream_update, state); } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c index 34d2e097ca2e..5a5d48fadbf2 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c @@ -35,6 +35,17 @@ #define DC_LOGGER \ link->ctx->logger +static void get_default_8b_10b_lttpr_aux_rd_interval( + union training_aux_rd_interval *training_rd_interval) +{ + /* LTTPR are required to program DPCD 0000Eh to 0x4 (16ms) upon AUX + * read reply to this register. Since old sinks with DPCD rev 1.1 + * and earlier may not support this register, assume the mandatory + * value is programmed by the LTTPR to avoid AUX timeout issues. + */ + training_rd_interval->raw = 0x4; +} + static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, const struct dc_link_settings *link_settings, enum lttpr_mode lttpr_mode) @@ -43,17 +54,22 @@ static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, uint32_t wait_in_micro_secs = 100; memset(&training_rd_interval, 0, sizeof(training_rd_interval)); - if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING && - link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) { - core_link_read_dpcd( - link, - DP_TRAINING_AUX_RD_INTERVAL, - (uint8_t *)&training_rd_interval, - sizeof(training_rd_interval)); - if (lttpr_mode != LTTPR_MODE_NON_TRANSPARENT) - wait_in_micro_secs = 400; - if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) - wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000; + if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) { + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) + core_link_read_dpcd( + link, + DP_TRAINING_AUX_RD_INTERVAL, + (uint8_t *)&training_rd_interval, + sizeof(training_rd_interval)); + else if (dp_is_lttpr_present(link)) + get_default_8b_10b_lttpr_aux_rd_interval(&training_rd_interval); + + if (training_rd_interval.raw != 0) { + if (lttpr_mode != LTTPR_MODE_NON_TRANSPARENT) + wait_in_micro_secs = 400; + if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) + wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000; + } } return wait_in_micro_secs; } @@ -71,13 +87,15 @@ static uint32_t get_eq_training_aux_rd_interval( DP_128B132B_TRAINING_AUX_RD_INTERVAL, (uint8_t *)&training_rd_interval, sizeof(training_rd_interval)); - } else if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING && - link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) { - core_link_read_dpcd( - link, - DP_TRAINING_AUX_RD_INTERVAL, - (uint8_t *)&training_rd_interval, - sizeof(training_rd_interval)); + } else if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) { + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) + core_link_read_dpcd( + link, + DP_TRAINING_AUX_RD_INTERVAL, + (uint8_t *)&training_rd_interval, + sizeof(training_rd_interval)); + else if (dp_is_lttpr_present(link)) + get_default_8b_10b_lttpr_aux_rd_interval(&training_rd_interval); } switch (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 2a59cc61ed8c..944650cb13de 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -2114,8 +2114,6 @@ static bool dcn32_resource_construct( #define REG_STRUCT dccg_regs dccg_regs_init(); - DC_FP_START(); - ctx->dc_bios->regs = &bios_regs; pool->base.res_cap = &res_cap_dcn32; @@ -2501,14 +2499,10 @@ static bool dcn32_resource_construct( if (ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev) && (dc->config.sdpif_request_limit_words_per_umc == 0)) dc->config.sdpif_request_limit_words_per_umc = 16; - DC_FP_END(); - return true; create_fail: - DC_FP_END(); - dcn32_resource_destruct(pool); return false; diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 17fc5dc708f4..60e5ac179c15 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -549,7 +549,7 @@ int drm_dev_wedged_event(struct drm_device *dev, unsigned long method) if (drm_WARN_ONCE(dev, !recovery, "invalid recovery method %u\n", opt)) break; - len += scnprintf(event_string + len, sizeof(event_string), "%s,", recovery); + len += scnprintf(event_string + len, sizeof(event_string) - len, "%s,", recovery); } if (recovery) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 13bc4c290b17..9edb3247c767 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -6596,6 +6596,7 @@ static void drm_reset_display_info(struct drm_connector *connector) info->has_hdmi_infoframe = false; info->rgb_quant_range_selectable = false; memset(&info->hdmi, 0, sizeof(info->hdmi)); + memset(&connector->hdr_sink_metadata, 0, sizeof(connector->hdr_sink_metadata)); info->edid_hdmi_rgb444_dc_modes = 0; info->edid_hdmi_ycbcr444_dc_modes = 0; diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index c299cd94d3f7..cf2463090d3a 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -964,6 +964,10 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) struct drm_file *file = f->private_data; struct drm_device *dev = file->minor->dev; struct drm_printer p = drm_seq_file_printer(m); + int idx; + + if (!drm_dev_enter(dev, &idx)) + return; drm_printf(&p, "drm-driver:\t%s\n", dev->driver->name); drm_printf(&p, "drm-client-id:\t%llu\n", file->client_id); @@ -983,6 +987,8 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) if (dev->driver->show_fdinfo) dev->driver->show_fdinfo(&p, file); + + drm_dev_exit(idx); } EXPORT_SYMBOL(drm_show_fdinfo); diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 38431e8360e7..4b2f32889f00 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1118,6 +1118,10 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, lockdep_assert_held(&gpusvm->notifier_lock); if (range->flags.has_dma_mapping) { + struct drm_gpusvm_range_flags flags = { + .__flags = range->flags.__flags, + }; + for (i = 0, j = 0; i < npages; j++) { struct drm_pagemap_device_addr *addr = &range->dma_addr[j]; @@ -1131,8 +1135,12 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, dev, *addr); i += 1 << addr->order; } - range->flags.has_devmem_pages = false; - range->flags.has_dma_mapping = false; + + /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ + flags.has_devmem_pages = false; + flags.has_dma_mapping = false; + WRITE_ONCE(range->flags.__flags, flags.__flags); + range->dpagemap = NULL; } } @@ -1334,6 +1342,7 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, int err = 0; struct dev_pagemap *pagemap; struct drm_pagemap *dpagemap; + struct drm_gpusvm_range_flags flags; retry: hmm_range.notifier_seq = mmu_interval_read_begin(notifier); @@ -1378,7 +1387,8 @@ map_pages: */ drm_gpusvm_notifier_lock(gpusvm); - if (range->flags.unmapped) { + flags.__flags = range->flags.__flags; + if (flags.unmapped) { drm_gpusvm_notifier_unlock(gpusvm); err = -EFAULT; goto err_free; @@ -1454,6 +1464,11 @@ map_pages: goto err_unmap; } + if (ctx->devmem_only) { + err = -EFAULT; + goto err_unmap; + } + addr = dma_map_page(gpusvm->drm->dev, page, 0, PAGE_SIZE << order, @@ -1469,14 +1484,17 @@ map_pages: } i += 1 << order; num_dma_mapped = i; + flags.has_dma_mapping = true; } - range->flags.has_dma_mapping = true; if (zdd) { - range->flags.has_devmem_pages = true; + flags.has_devmem_pages = true; range->dpagemap = dpagemap; } + /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ + WRITE_ONCE(range->flags.__flags, flags.__flags); + drm_gpusvm_notifier_unlock(gpusvm); kvfree(pfns); set_seqno: @@ -1765,6 +1783,8 @@ int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm, goto err_finalize; /* Upon success bind devmem allocation to range and zdd */ + devmem_allocation->timeslice_expiration = get_jiffies_64() + + msecs_to_jiffies(ctx->timeslice_ms); zdd->devmem_allocation = devmem_allocation; /* Owns ref */ err_finalize: @@ -1985,6 +2005,13 @@ static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas, void *buf; int i, err = 0; + if (page) { + zdd = page->zone_device_data; + if (time_before64(get_jiffies_64(), + zdd->devmem_allocation->timeslice_expiration)) + return 0; + } + start = ALIGN_DOWN(fault_addr, size); end = ALIGN(fault_addr + 1, size); diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c index 89e05a5bed1d..a4cd476f9b30 100644 --- a/drivers/gpu/drm/drm_mipi_dbi.c +++ b/drivers/gpu/drm/drm_mipi_dbi.c @@ -404,12 +404,16 @@ static void mipi_dbi_blank(struct mipi_dbi_dev *dbidev) u16 height = drm->mode_config.min_height; u16 width = drm->mode_config.min_width; struct mipi_dbi *dbi = &dbidev->dbi; - size_t len = width * height * 2; + const struct drm_format_info *dst_format; + size_t len; int idx; if (!drm_dev_enter(drm, &idx)) return; + dst_format = drm_format_info(dbidev->pixel_format); + len = drm_format_info_min_pitch(dst_format, 0, width) * height; + memset(dbidev->tx_buf, 0, len); mipi_dbi_set_window_address(dbidev, 0, width - 1, 0, height - 1); diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index 5170f72b0830..f91daefa9d2b 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -43,13 +43,13 @@ struct decon_data { unsigned int wincon_burstlen_shift; }; -static struct decon_data exynos7_decon_data = { +static const struct decon_data exynos7_decon_data = { .vidw_buf_start_base = 0x80, .shadowcon_win_protect_shift = 10, .wincon_burstlen_shift = 11, }; -static struct decon_data exynos7870_decon_data = { +static const struct decon_data exynos7870_decon_data = { .vidw_buf_start_base = 0x880, .shadowcon_win_protect_shift = 8, .wincon_burstlen_shift = 10, diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index f313ae7bc3a3..6cc7bf77bcac 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -355,8 +355,7 @@ static void exynos_drm_platform_shutdown(struct platform_device *pdev) { struct drm_device *drm = platform_get_drvdata(pdev); - if (drm) - drm_atomic_helper_shutdown(drm); + drm_atomic_helper_shutdown(drm); } static struct platform_driver exynos_drm_platform_driver = { diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index b150cfd92f6e..09e33a26caaf 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -908,7 +908,7 @@ static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id, u32 buf_num; u32 cfg; - DRM_DEV_DEBUG_KMS(ctx->dev, "buf_id[%d]enqueu[%d]\n", buf_id, enqueue); + DRM_DEV_DEBUG_KMS(ctx->dev, "buf_id[%d]enqueue[%d]\n", buf_id, enqueue); spin_lock_irqsave(&ctx->lock, flags); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 1ad87584b1c2..c394cc702d7d 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -731,7 +731,7 @@ static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win, /* * Setting dma-burst to 16Word causes permanent tearing for very small * buffers, e.g. cursor buffer. Burst Mode switching which based on - * plane size is not recommended as plane size varies alot towards the + * plane size is not recommended as plane size varies a lot towards the * end of the screen and rapid movement causes unstable DMA, but it is * still better to change dma-burst than displaying garbage. */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index 08cf79a62025..e644e2382d77 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -312,9 +312,6 @@ static int vidi_get_modes(struct drm_connector *connector) else drm_edid = drm_edid_alloc(fake_edid_info, sizeof(fake_edid_info)); - if (!drm_edid) - return 0; - drm_edid_connector_update(connector, drm_edid); count = drm_edid_connector_add_modes(connector); diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 02f95108c637..6dc2d31ccb5a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -242,7 +242,7 @@ int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp, to_intel_connector(conn_state->connector); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - bool is_mst = intel_dp->is_mst; + bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); int bpp_x16, slots = -EINVAL; int dsc_slice_count = 0; int max_dpt_bpp_x16; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 64e9317f58fb..71ee01d9ef64 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1001,6 +1001,10 @@ void intel_rps_dec_waiters(struct intel_rps *rps) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); + /* Don't decrement num_waiters for req where increment was skipped */ + if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) + return; + intel_guc_slpc_dec_waiters(slpc); } else { atomic_dec(&rps->num_waiters); @@ -1029,11 +1033,15 @@ void intel_rps_boost(struct i915_request *rq) if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) return; - if (slpc->min_freq_softlimit >= slpc->boost_freq) - return; - /* Return if old value is non zero */ if (!atomic_fetch_inc(&slpc->num_waiters)) { + /* + * Skip queuing boost work if frequency is already boosted, + * but still increment num_waiters. + */ + if (slpc->min_freq_softlimit >= slpc->boost_freq) + return; + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", rq->fence.context, rq->fence.seqno); queue_work(rps_to_gt(rps)->i915->unordered_wq, diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h index 9aae779c4da3..4969d3de2bac 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h @@ -23,6 +23,7 @@ int intel_pxp_gsccs_init(struct intel_pxp *pxp); int intel_pxp_gsccs_create_session(struct intel_pxp *pxp, int arb_session_id); void intel_pxp_gsccs_end_arb_fw_session(struct intel_pxp *pxp, u32 arb_session_id); +bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); #else static inline void intel_pxp_gsccs_fini(struct intel_pxp *pxp) @@ -34,8 +35,11 @@ static inline int intel_pxp_gsccs_init(struct intel_pxp *pxp) return 0; } -#endif +static inline bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp) +{ + return false; +} -bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); +#endif #endif /*__INTEL_PXP_GSCCS_H__ */ diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 81d2ee37e773..49ff9f1f16d3 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -169,7 +169,7 @@ static const struct meson_drm_soc_attr meson_drm_soc_attrs[] = { /* S805X/S805Y HDMI PLL won't lock for HDMI PHY freq > 1,65GHz */ { .limits = { - .max_hdmi_phy_freq = 1650000, + .max_hdmi_phy_freq = 1650000000, }, .attrs = (const struct soc_device_attribute []) { { .soc_id = "GXL (S805*)", }, diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h index 3f9345c14f31..be4b0e4df6e1 100644 --- a/drivers/gpu/drm/meson/meson_drv.h +++ b/drivers/gpu/drm/meson/meson_drv.h @@ -37,7 +37,7 @@ struct meson_drm_match_data { }; struct meson_drm_soc_limits { - unsigned int max_hdmi_phy_freq; + unsigned long long max_hdmi_phy_freq; }; struct meson_drm { diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 6d1c9262a2cf..c08fa93e50a3 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -70,12 +70,12 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, { struct meson_drm *priv = encoder_hdmi->priv; int vic = drm_match_cea_mode(mode); - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int hdmi_freq; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long hdmi_freq; - vclk_freq = mode->clock; + vclk_freq = mode->clock * 1000ULL; /* For 420, pixel clock is half unlike venc clock */ if (encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24) @@ -107,7 +107,8 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, if (mode->flags & DRM_MODE_FLAG_DBLCLK) venc_freq /= 2; - dev_dbg(priv->dev, "vclk:%d phy=%d venc=%d hdmi=%d enci=%d\n", + dev_dbg(priv->dev, + "vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n", phy_freq, vclk_freq, venc_freq, hdmi_freq, priv->venc.hdmi_use_enci); @@ -122,10 +123,11 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge); struct meson_drm *priv = encoder_hdmi->priv; bool is_hdmi2_sink = display_info->hdmi.scdc.supported; - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int hdmi_freq; + unsigned long long clock = mode->clock * 1000ULL; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long hdmi_freq; int vic = drm_match_cea_mode(mode); enum drm_mode_status status; @@ -144,12 +146,12 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri if (status != MODE_OK) return status; - return meson_vclk_dmt_supported_freq(priv, mode->clock); + return meson_vclk_dmt_supported_freq(priv, clock); /* Check against supported VIC modes */ } else if (!meson_venc_hdmi_supported_vic(vic)) return MODE_BAD; - vclk_freq = mode->clock; + vclk_freq = clock; /* For 420, pixel clock is half unlike venc clock */ if (drm_mode_is_420_only(display_info, mode) || @@ -179,7 +181,8 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri if (mode->flags & DRM_MODE_FLAG_DBLCLK) venc_freq /= 2; - dev_dbg(priv->dev, "%s: vclk:%d phy=%d venc=%d hdmi=%d\n", + dev_dbg(priv->dev, + "%s: vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz\n", __func__, phy_freq, vclk_freq, venc_freq, hdmi_freq); return meson_vclk_vic_supported_freq(priv, phy_freq, vclk_freq); diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index 2a942dc6a6dc..3325580d885d 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -110,7 +110,10 @@ #define HDMI_PLL_LOCK BIT(31) #define HDMI_PLL_LOCK_G12A (3 << 30) -#define FREQ_1000_1001(_freq) DIV_ROUND_CLOSEST(_freq * 1000, 1001) +#define PIXEL_FREQ_1000_1001(_freq) \ + DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL) +#define PHY_FREQ_1000_1001(_freq) \ + (PIXEL_FREQ_1000_1001(DIV_ROUND_DOWN_ULL(_freq, 10ULL)) * 10) /* VID PLL Dividers */ enum { @@ -360,11 +363,11 @@ enum { }; struct meson_vclk_params { - unsigned int pll_freq; - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int pixel_freq; + unsigned long long pll_freq; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long pixel_freq; unsigned int pll_od1; unsigned int pll_od2; unsigned int pll_od3; @@ -372,11 +375,11 @@ struct meson_vclk_params { unsigned int vclk_div; } params[] = { [MESON_VCLK_HDMI_ENCI_54000] = { - .pll_freq = 4320000, - .phy_freq = 270000, - .vclk_freq = 54000, - .venc_freq = 54000, - .pixel_freq = 54000, + .pll_freq = 4320000000, + .phy_freq = 270000000, + .vclk_freq = 54000000, + .venc_freq = 54000000, + .pixel_freq = 54000000, .pll_od1 = 4, .pll_od2 = 4, .pll_od3 = 1, @@ -384,11 +387,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_DDR_54000] = { - .pll_freq = 4320000, - .phy_freq = 270000, - .vclk_freq = 54000, - .venc_freq = 54000, - .pixel_freq = 27000, + .pll_freq = 4320000000, + .phy_freq = 270000000, + .vclk_freq = 54000000, + .venc_freq = 54000000, + .pixel_freq = 27000000, .pll_od1 = 4, .pll_od2 = 4, .pll_od3 = 1, @@ -396,11 +399,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_DDR_148500] = { - .pll_freq = 2970000, - .phy_freq = 742500, - .vclk_freq = 148500, - .venc_freq = 148500, - .pixel_freq = 74250, + .pll_freq = 2970000000, + .phy_freq = 742500000, + .vclk_freq = 148500000, + .venc_freq = 148500000, + .pixel_freq = 74250000, .pll_od1 = 4, .pll_od2 = 1, .pll_od3 = 1, @@ -408,11 +411,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_74250] = { - .pll_freq = 2970000, - .phy_freq = 742500, - .vclk_freq = 74250, - .venc_freq = 74250, - .pixel_freq = 74250, + .pll_freq = 2970000000, + .phy_freq = 742500000, + .vclk_freq = 74250000, + .venc_freq = 74250000, + .pixel_freq = 74250000, .pll_od1 = 2, .pll_od2 = 2, .pll_od3 = 2, @@ -420,11 +423,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_148500] = { - .pll_freq = 2970000, - .phy_freq = 1485000, - .vclk_freq = 148500, - .venc_freq = 148500, - .pixel_freq = 148500, + .pll_freq = 2970000000, + .phy_freq = 1485000000, + .vclk_freq = 148500000, + .venc_freq = 148500000, + .pixel_freq = 148500000, .pll_od1 = 1, .pll_od2 = 2, .pll_od3 = 2, @@ -432,11 +435,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_297000] = { - .pll_freq = 5940000, - .phy_freq = 2970000, - .venc_freq = 297000, - .vclk_freq = 297000, - .pixel_freq = 297000, + .pll_freq = 5940000000, + .phy_freq = 2970000000, + .venc_freq = 297000000, + .vclk_freq = 297000000, + .pixel_freq = 297000000, .pll_od1 = 2, .pll_od2 = 1, .pll_od3 = 1, @@ -444,11 +447,11 @@ struct meson_vclk_params { .vclk_div = 2, }, [MESON_VCLK_HDMI_594000] = { - .pll_freq = 5940000, - .phy_freq = 5940000, - .venc_freq = 594000, - .vclk_freq = 594000, - .pixel_freq = 594000, + .pll_freq = 5940000000, + .phy_freq = 5940000000, + .venc_freq = 594000000, + .vclk_freq = 594000000, + .pixel_freq = 594000000, .pll_od1 = 1, .pll_od2 = 1, .pll_od3 = 2, @@ -456,11 +459,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_594000_YUV420] = { - .pll_freq = 5940000, - .phy_freq = 2970000, - .venc_freq = 594000, - .vclk_freq = 594000, - .pixel_freq = 297000, + .pll_freq = 5940000000, + .phy_freq = 2970000000, + .venc_freq = 594000000, + .vclk_freq = 594000000, + .pixel_freq = 297000000, .pll_od1 = 2, .pll_od2 = 1, .pll_od3 = 1, @@ -617,16 +620,16 @@ static void meson_hdmi_pll_set_params(struct meson_drm *priv, unsigned int m, 3 << 20, pll_od_to_reg(od3) << 20); } -#define XTAL_FREQ 24000 +#define XTAL_FREQ (24 * 1000 * 1000) static unsigned int meson_hdmi_pll_get_m(struct meson_drm *priv, - unsigned int pll_freq) + unsigned long long pll_freq) { /* The GXBB PLL has a /2 pre-multiplier */ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) - pll_freq /= 2; + pll_freq = DIV_ROUND_DOWN_ULL(pll_freq, 2); - return pll_freq / XTAL_FREQ; + return DIV_ROUND_DOWN_ULL(pll_freq, XTAL_FREQ); } #define HDMI_FRAC_MAX_GXBB 4096 @@ -635,12 +638,13 @@ static unsigned int meson_hdmi_pll_get_m(struct meson_drm *priv, static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, unsigned int m, - unsigned int pll_freq) + unsigned long long pll_freq) { - unsigned int parent_freq = XTAL_FREQ; + unsigned long long parent_freq = XTAL_FREQ; unsigned int frac_max = HDMI_FRAC_MAX_GXL; unsigned int frac_m; unsigned int frac; + u32 remainder; /* The GXBB PLL has a /2 pre-multiplier and a larger FRAC width */ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { @@ -652,11 +656,11 @@ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, frac_max = HDMI_FRAC_MAX_G12A; /* We can have a perfect match !*/ - if (pll_freq / m == parent_freq && - pll_freq % m == 0) + if (div_u64_rem(pll_freq, m, &remainder) == parent_freq && + remainder == 0) return 0; - frac = div_u64((u64)pll_freq * (u64)frac_max, parent_freq); + frac = mul_u64_u64_div_u64(pll_freq, frac_max, parent_freq); frac_m = m * frac_max; if (frac_m > frac) return frac_max; @@ -666,7 +670,7 @@ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, } static bool meson_hdmi_pll_validate_params(struct meson_drm *priv, - unsigned int m, + unsigned long long m, unsigned int frac) { if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { @@ -694,7 +698,7 @@ static bool meson_hdmi_pll_validate_params(struct meson_drm *priv, } static bool meson_hdmi_pll_find_params(struct meson_drm *priv, - unsigned int freq, + unsigned long long freq, unsigned int *m, unsigned int *frac, unsigned int *od) @@ -706,7 +710,7 @@ static bool meson_hdmi_pll_find_params(struct meson_drm *priv, continue; *frac = meson_hdmi_pll_get_frac(priv, *m, freq * *od); - DRM_DEBUG_DRIVER("PLL params for %dkHz: m=%x frac=%x od=%d\n", + DRM_DEBUG_DRIVER("PLL params for %lluHz: m=%x frac=%x od=%d\n", freq, *m, *frac, *od); if (meson_hdmi_pll_validate_params(priv, *m, *frac)) @@ -718,7 +722,7 @@ static bool meson_hdmi_pll_find_params(struct meson_drm *priv, /* pll_freq is the frequency after the OD dividers */ enum drm_mode_status -meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq) +meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned long long freq) { unsigned int od, m, frac; @@ -741,7 +745,7 @@ EXPORT_SYMBOL_GPL(meson_vclk_dmt_supported_freq); /* pll_freq is the frequency after the OD dividers */ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, - unsigned int pll_freq) + unsigned long long pll_freq) { unsigned int od, m, frac, od1, od2, od3; @@ -756,7 +760,7 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, od1 = od / od2; } - DRM_DEBUG_DRIVER("PLL params for %dkHz: m=%x frac=%x od=%d/%d/%d\n", + DRM_DEBUG_DRIVER("PLL params for %lluHz: m=%x frac=%x od=%d/%d/%d\n", pll_freq, m, frac, od1, od2, od3); meson_hdmi_pll_set_params(priv, m, frac, od1, od2, od3); @@ -764,17 +768,18 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, return; } - DRM_ERROR("Fatal, unable to find parameters for PLL freq %d\n", + DRM_ERROR("Fatal, unable to find parameters for PLL freq %lluHz\n", pll_freq); } enum drm_mode_status -meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, - unsigned int vclk_freq) +meson_vclk_vic_supported_freq(struct meson_drm *priv, + unsigned long long phy_freq, + unsigned long long vclk_freq) { int i; - DRM_DEBUG_DRIVER("phy_freq = %d vclk_freq = %d\n", + DRM_DEBUG_DRIVER("phy_freq = %lluHz vclk_freq = %lluHz\n", phy_freq, vclk_freq); /* Check against soc revision/package limits */ @@ -785,19 +790,19 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, } for (i = 0 ; params[i].pixel_freq ; ++i) { - DRM_DEBUG_DRIVER("i = %d pixel_freq = %d alt = %d\n", + DRM_DEBUG_DRIVER("i = %d pixel_freq = %lluHz alt = %lluHz\n", i, params[i].pixel_freq, - FREQ_1000_1001(params[i].pixel_freq)); - DRM_DEBUG_DRIVER("i = %d phy_freq = %d alt = %d\n", + PIXEL_FREQ_1000_1001(params[i].pixel_freq)); + DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", i, params[i].phy_freq, - FREQ_1000_1001(params[i].phy_freq/1000)*1000); + PHY_FREQ_1000_1001(params[i].phy_freq)); /* Match strict frequency */ if (phy_freq == params[i].phy_freq && vclk_freq == params[i].vclk_freq) return MODE_OK; /* Match 1000/1001 variant */ - if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/1000)*1000) && - vclk_freq == FREQ_1000_1001(params[i].vclk_freq)) + if (phy_freq == PHY_FREQ_1000_1001(params[i].phy_freq) && + vclk_freq == PIXEL_FREQ_1000_1001(params[i].vclk_freq)) return MODE_OK; } @@ -805,8 +810,9 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, } EXPORT_SYMBOL_GPL(meson_vclk_vic_supported_freq); -static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, - unsigned int od1, unsigned int od2, unsigned int od3, +static void meson_vclk_set(struct meson_drm *priv, + unsigned long long pll_base_freq, unsigned int od1, + unsigned int od2, unsigned int od3, unsigned int vid_pll_div, unsigned int vclk_div, unsigned int hdmi_tx_div, unsigned int venc_div, bool hdmi_use_enci, bool vic_alternate_clock) @@ -826,15 +832,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, meson_hdmi_pll_generic_set(priv, pll_base_freq); } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x3d; frac = vic_alternate_clock ? 0xd02 : 0xe00; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0x59 : 0x5a; frac = vic_alternate_clock ? 0xe8f : 0; break; - case 5940000: + case 5940000000: m = 0x7b; frac = vic_alternate_clock ? 0xa05 : 0xc00; break; @@ -844,15 +850,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) || meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXL)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x7b; frac = vic_alternate_clock ? 0x281 : 0x300; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0xb3 : 0xb4; frac = vic_alternate_clock ? 0x347 : 0; break; - case 5940000: + case 5940000000: m = 0xf7; frac = vic_alternate_clock ? 0x102 : 0x200; break; @@ -861,15 +867,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, meson_hdmi_pll_set_params(priv, m, frac, od1, od2, od3); } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x7b; frac = vic_alternate_clock ? 0x140b4 : 0x18000; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0xb3 : 0xb4; frac = vic_alternate_clock ? 0x1a3ee : 0; break; - case 5940000: + case 5940000000: m = 0xf7; frac = vic_alternate_clock ? 0x8148 : 0x10000; break; @@ -1025,14 +1031,14 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, } void meson_vclk_setup(struct meson_drm *priv, unsigned int target, - unsigned int phy_freq, unsigned int vclk_freq, - unsigned int venc_freq, unsigned int dac_freq, + unsigned long long phy_freq, unsigned long long vclk_freq, + unsigned long long venc_freq, unsigned long long dac_freq, bool hdmi_use_enci) { bool vic_alternate_clock = false; - unsigned int freq; - unsigned int hdmi_tx_div; - unsigned int venc_div; + unsigned long long freq; + unsigned long long hdmi_tx_div; + unsigned long long venc_div; if (target == MESON_VCLK_TARGET_CVBS) { meson_venci_cvbs_clock_config(priv); @@ -1052,27 +1058,27 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, return; } - hdmi_tx_div = vclk_freq / dac_freq; + hdmi_tx_div = DIV_ROUND_DOWN_ULL(vclk_freq, dac_freq); if (hdmi_tx_div == 0) { - pr_err("Fatal Error, invalid HDMI-TX freq %d\n", + pr_err("Fatal Error, invalid HDMI-TX freq %lluHz\n", dac_freq); return; } - venc_div = vclk_freq / venc_freq; + venc_div = DIV_ROUND_DOWN_ULL(vclk_freq, venc_freq); if (venc_div == 0) { - pr_err("Fatal Error, invalid HDMI venc freq %d\n", + pr_err("Fatal Error, invalid HDMI venc freq %lluHz\n", venc_freq); return; } for (freq = 0 ; params[freq].pixel_freq ; ++freq) { if ((phy_freq == params[freq].phy_freq || - phy_freq == FREQ_1000_1001(params[freq].phy_freq/1000)*1000) && + phy_freq == PHY_FREQ_1000_1001(params[freq].phy_freq)) && (vclk_freq == params[freq].vclk_freq || - vclk_freq == FREQ_1000_1001(params[freq].vclk_freq))) { + vclk_freq == PIXEL_FREQ_1000_1001(params[freq].vclk_freq))) { if (vclk_freq != params[freq].vclk_freq) vic_alternate_clock = true; else @@ -1098,7 +1104,8 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, } if (!params[freq].pixel_freq) { - pr_err("Fatal Error, invalid HDMI vclk freq %d\n", vclk_freq); + pr_err("Fatal Error, invalid HDMI vclk freq %lluHz\n", + vclk_freq); return; } diff --git a/drivers/gpu/drm/meson/meson_vclk.h b/drivers/gpu/drm/meson/meson_vclk.h index 60617aaf18dd..7ac55744e574 100644 --- a/drivers/gpu/drm/meson/meson_vclk.h +++ b/drivers/gpu/drm/meson/meson_vclk.h @@ -20,17 +20,18 @@ enum { }; /* 27MHz is the CVBS Pixel Clock */ -#define MESON_VCLK_CVBS 27000 +#define MESON_VCLK_CVBS (27 * 1000 * 1000) enum drm_mode_status -meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq); +meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned long long freq); enum drm_mode_status -meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, - unsigned int vclk_freq); +meson_vclk_vic_supported_freq(struct meson_drm *priv, + unsigned long long phy_freq, + unsigned long long vclk_freq); void meson_vclk_setup(struct meson_drm *priv, unsigned int target, - unsigned int phy_freq, unsigned int vclk_freq, - unsigned int venc_freq, unsigned int dac_freq, + unsigned long long phy_freq, unsigned long long vclk_freq, + unsigned long long venc_freq, unsigned long long dac_freq, bool hdmi_use_enci); #endif /* __MESON_VCLK_H */ diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 7cc84472cece..edddfc036c6d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -90,7 +90,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) while (!list_empty(&fctx->pending)) { fence = list_entry(fctx->pending.next, typeof(*fence), head); - if (error) + if (error && !dma_fence_is_signaled_locked(&fence->base)) dma_fence_set_error(&fence->base, error); if (nouveau_fence_signal(fence)) diff --git a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c index 7d68a8acfe2e..eb0f8373258c 100644 --- a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c +++ b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c @@ -129,11 +129,11 @@ static int jadard_unprepare(struct drm_panel *panel) { struct jadard *jadard = panel_to_jadard(panel); - gpiod_set_value(jadard->reset, 1); + gpiod_set_value(jadard->reset, 0); msleep(120); if (jadard->desc->reset_before_power_off_vcioo) { - gpiod_set_value(jadard->reset, 0); + gpiod_set_value(jadard->reset, 1); usleep_range(1000, 2000); } diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 232b03c1a259..33a37539de57 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -1027,27 +1027,28 @@ static const struct panel_desc auo_g070vvn01 = { }, }; -static const struct drm_display_mode auo_g101evn010_mode = { - .clock = 68930, - .hdisplay = 1280, - .hsync_start = 1280 + 82, - .hsync_end = 1280 + 82 + 2, - .htotal = 1280 + 82 + 2 + 84, - .vdisplay = 800, - .vsync_start = 800 + 8, - .vsync_end = 800 + 8 + 2, - .vtotal = 800 + 8 + 2 + 6, +static const struct display_timing auo_g101evn010_timing = { + .pixelclock = { 64000000, 68930000, 85000000 }, + .hactive = { 1280, 1280, 1280 }, + .hfront_porch = { 8, 64, 256 }, + .hback_porch = { 8, 64, 256 }, + .hsync_len = { 40, 168, 767 }, + .vactive = { 800, 800, 800 }, + .vfront_porch = { 4, 8, 100 }, + .vback_porch = { 4, 8, 100 }, + .vsync_len = { 8, 16, 223 }, }; static const struct panel_desc auo_g101evn010 = { - .modes = &auo_g101evn010_mode, - .num_modes = 1, + .timings = &auo_g101evn010_timing, + .num_timings = 1, .bpc = 6, .size = { .width = 216, .height = 135, }, .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, .connector_type = DRM_MODE_CONNECTOR_LVDS, }; diff --git a/drivers/gpu/drm/tests/drm_gem_shmem_test.c b/drivers/gpu/drm/tests/drm_gem_shmem_test.c index fd4215e2f982..925fbc2cda70 100644 --- a/drivers/gpu/drm/tests/drm_gem_shmem_test.c +++ b/drivers/gpu/drm/tests/drm_gem_shmem_test.c @@ -216,6 +216,9 @@ static void drm_gem_shmem_test_get_pages_sgt(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sgt); KUNIT_EXPECT_NULL(test, shmem->sgt); + ret = kunit_add_action_or_reset(test, kfree_wrapper, sgt); + KUNIT_ASSERT_EQ(test, ret, 0); + ret = kunit_add_action_or_reset(test, sg_free_table_wrapper, sgt); KUNIT_ASSERT_EQ(test, ret, 0); diff --git a/drivers/gpu/drm/tiny/panel-mipi-dbi.c b/drivers/gpu/drm/tiny/panel-mipi-dbi.c index 0460ecaef4bd..23914a9f7fd3 100644 --- a/drivers/gpu/drm/tiny/panel-mipi-dbi.c +++ b/drivers/gpu/drm/tiny/panel-mipi-dbi.c @@ -390,7 +390,10 @@ static int panel_mipi_dbi_spi_probe(struct spi_device *spi) spi_set_drvdata(spi, drm); - drm_client_setup(drm, NULL); + if (bpp == 16) + drm_client_setup_with_fourcc(drm, DRM_FORMAT_RGB565); + else + drm_client_setup_with_fourcc(drm, DRM_FORMAT_RGB888); return 0; } diff --git a/drivers/gpu/drm/ttm/ttm_backup.c b/drivers/gpu/drm/ttm/ttm_backup.c index 93c007f18855..9e2d72c447ee 100644 --- a/drivers/gpu/drm/ttm/ttm_backup.c +++ b/drivers/gpu/drm/ttm/ttm_backup.c @@ -8,20 +8,6 @@ #include <linux/swap.h> /* - * Casting from randomized struct file * to struct ttm_backup * is fine since - * struct ttm_backup is never defined nor dereferenced. - */ -static struct file *ttm_backup_to_file(struct ttm_backup *backup) -{ - return (void *)backup; -} - -static struct ttm_backup *ttm_file_to_backup(struct file *file) -{ - return (void *)file; -} - -/* * Need to map shmem indices to handle since a handle value * of 0 means error, following the swp_entry_t convention. */ @@ -40,12 +26,12 @@ static pgoff_t ttm_backup_handle_to_shmem_idx(pgoff_t handle) * @backup: The struct backup pointer used to obtain the handle * @handle: The handle obtained from the @backup_page function. */ -void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle) +void ttm_backup_drop(struct file *backup, pgoff_t handle) { loff_t start = ttm_backup_handle_to_shmem_idx(handle); start <<= PAGE_SHIFT; - shmem_truncate_range(file_inode(ttm_backup_to_file(backup)), start, + shmem_truncate_range(file_inode(backup), start, start + PAGE_SIZE - 1); } @@ -55,16 +41,15 @@ void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle) * @backup: The struct backup pointer used to back up the page. * @dst: The struct page to copy into. * @handle: The handle returned when the page was backed up. - * @intr: Try to perform waits interruptable or at least killable. + * @intr: Try to perform waits interruptible or at least killable. * * Return: 0 on success, Negative error code on failure, notably * -EINTR if @intr was set to true and a signal is pending. */ -int ttm_backup_copy_page(struct ttm_backup *backup, struct page *dst, +int ttm_backup_copy_page(struct file *backup, struct page *dst, pgoff_t handle, bool intr) { - struct file *filp = ttm_backup_to_file(backup); - struct address_space *mapping = filp->f_mapping; + struct address_space *mapping = backup->f_mapping; struct folio *from_folio; pgoff_t idx = ttm_backup_handle_to_shmem_idx(handle); @@ -106,12 +91,11 @@ int ttm_backup_copy_page(struct ttm_backup *backup, struct page *dst, * the folio size- and usage. */ s64 -ttm_backup_backup_page(struct ttm_backup *backup, struct page *page, +ttm_backup_backup_page(struct file *backup, struct page *page, bool writeback, pgoff_t idx, gfp_t page_gfp, gfp_t alloc_gfp) { - struct file *filp = ttm_backup_to_file(backup); - struct address_space *mapping = filp->f_mapping; + struct address_space *mapping = backup->f_mapping; unsigned long handle = 0; struct folio *to_folio; int ret; @@ -161,9 +145,9 @@ ttm_backup_backup_page(struct ttm_backup *backup, struct page *page, * * After a call to this function, it's illegal to use the @backup pointer. */ -void ttm_backup_fini(struct ttm_backup *backup) +void ttm_backup_fini(struct file *backup) { - fput(ttm_backup_to_file(backup)); + fput(backup); } /** @@ -194,14 +178,10 @@ EXPORT_SYMBOL_GPL(ttm_backup_bytes_avail); * * Create a backup utilizing shmem objects. * - * Return: A pointer to a struct ttm_backup on success, + * Return: A pointer to a struct file on success, * an error pointer on error. */ -struct ttm_backup *ttm_backup_shmem_create(loff_t size) +struct file *ttm_backup_shmem_create(loff_t size) { - struct file *filp; - - filp = shmem_file_setup("ttm shmem backup", size, 0); - - return ttm_file_to_backup(filp); + return shmem_file_setup("ttm shmem backup", size, 0); } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 95b86003c50d..5bf3c969907c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1093,7 +1093,8 @@ struct ttm_bo_swapout_walk { struct ttm_lru_walk walk; /** @gfp_flags: The gfp flags to use for ttm_tt_swapout() */ gfp_t gfp_flags; - + /** @hit_low: Whether we should attempt to swap BO's with low watermark threshold */ + /** @evict_low: If we cannot swap a bo when @try_low is false (first pass) */ bool hit_low, evict_low; }; diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 83b10706ba89..c2ea865be657 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -506,7 +506,7 @@ static void ttm_pool_allocated_page_commit(struct page *allocated, * if successful, populate the page-table and dma-address arrays. */ static int ttm_pool_restore_commit(struct ttm_pool_tt_restore *restore, - struct ttm_backup *backup, + struct file *backup, const struct ttm_operation_ctx *ctx, struct ttm_pool_alloc_state *alloc) @@ -655,7 +655,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pgoff_t start_page, pgoff_t end_page) { struct page **pages = &tt->pages[start_page]; - struct ttm_backup *backup = tt->backup; + struct file *backup = tt->backup; pgoff_t i, nr; for (i = start_page; i < end_page; i += nr, pages += nr) { @@ -963,7 +963,7 @@ void ttm_pool_drop_backed_up(struct ttm_tt *tt) long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, const struct ttm_backup_flags *flags) { - struct ttm_backup *backup = tt->backup; + struct file *backup = tt->backup; struct page *page; unsigned long handle; gfp_t alloc_gfp; diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index df0aa6c4b8b8..698cd4bf5e46 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -544,7 +544,7 @@ EXPORT_SYMBOL(ttm_tt_pages_limit); */ int ttm_tt_setup_backup(struct ttm_tt *tt) { - struct ttm_backup *backup = + struct file *backup = ttm_backup_shmem_create(((loff_t)tt->num_pages) << PAGE_SHIFT); if (WARN_ON_ONCE(!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))) diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 4a7701a33cf8..eb35482f6fb5 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -744,11 +744,16 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) return DRM_GPU_SCHED_STAT_NOMINAL; } -/* If the current address or return address have changed, then the GPU - * has probably made progress and we should delay the reset. This - * could fail if the GPU got in an infinite loop in the CL, but that - * is pretty unlikely outside of an i-g-t testcase. - */ +static void +v3d_sched_skip_reset(struct drm_sched_job *sched_job) +{ + struct drm_gpu_scheduler *sched = sched_job->sched; + + spin_lock(&sched->job_list_lock); + list_add(&sched_job->list, &sched->pending_list); + spin_unlock(&sched->job_list_lock); +} + static enum drm_gpu_sched_stat v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, u32 *timedout_ctca, u32 *timedout_ctra) @@ -758,9 +763,16 @@ v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q)); u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q)); + /* If the current address or return address have changed, then the GPU + * has probably made progress and we should delay the reset. This + * could fail if the GPU got in an infinite loop in the CL, but that + * is pretty unlikely outside of an i-g-t testcase. + */ if (*timedout_ctca != ctca || *timedout_ctra != ctra) { *timedout_ctca = ctca; *timedout_ctra = ctra; + + v3d_sched_skip_reset(sched_job); return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -800,11 +812,13 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job) struct v3d_dev *v3d = job->base.v3d; u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4(v3d->ver)); - /* If we've made progress, skip reset and let the timer get - * rearmed. + /* If we've made progress, skip reset, add the job to the pending + * list, and let the timer get rearmed. */ if (job->timedout_batches != batches) { job->timedout_batches = batches; + + v3d_sched_skip_reset(sched_job); return DRM_GPU_SCHED_STAT_NOMINAL; } diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 2d88e390feb4..e32e680c7197 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -128,6 +128,14 @@ static void virtio_gpu_remove(struct virtio_device *vdev) drm_dev_put(dev); } +static void virtio_gpu_shutdown(struct virtio_device *vdev) +{ + /* + * drm does its own synchronization on shutdown. + * Do nothing here, opt out of device reset. + */ +} + static void virtio_gpu_config_changed(struct virtio_device *vdev) { struct drm_device *dev = vdev->priv; @@ -162,6 +170,7 @@ static struct virtio_driver virtio_gpu_driver = { .id_table = id_table, .probe = virtio_gpu_probe, .remove = virtio_gpu_remove, + .shutdown = virtio_gpu_shutdown, .config_changed = virtio_gpu_config_changed }; diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index 167fb0f742de..5a47991b4b81 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -47,6 +47,10 @@ #define MI_LRI_FORCE_POSTED REG_BIT(12) #define MI_LRI_LEN(x) (((x) & 0xff) + 1) +#define MI_STORE_REGISTER_MEM (__MI_INSTR(0x24) | XE_INSTR_NUM_DW(4)) +#define MI_SRM_USE_GGTT REG_BIT(22) +#define MI_SRM_ADD_CS_OFFSET REG_BIT(19) + #define MI_FLUSH_DW __MI_INSTR(0x26) #define MI_FLUSH_DW_PROTECTED_MEM_EN REG_BIT(22) #define MI_FLUSH_DW_STORE_INDEX REG_BIT(21) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index fb8ec317b6ee..891f928d80ce 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -43,6 +43,10 @@ #define XEHPC_BCS8_RING_BASE 0x3ee000 #define GSCCS_RING_BASE 0x11a000 +#define ENGINE_ID(base) XE_REG((base) + 0x8c) +#define ENGINE_INSTANCE_ID REG_GENMASK(9, 4) +#define ENGINE_CLASS_ID REG_GENMASK(2, 0) + #define RING_TAIL(base) XE_REG((base) + 0x30) #define TAIL_ADDR REG_GENMASK(20, 3) @@ -154,6 +158,7 @@ #define STOP_RING REG_BIT(8) #define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8) +#define RING_CTX_TIMESTAMP_UDW(base) XE_REG((base) + 0x3ac) #define CSBE_DEBUG_STATUS(base) XE_REG((base) + 0x3fc) #define RING_FORCE_TO_NONPRIV(base, i) XE_REG(((base) + 0x4d0) + (i) * 4) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index da1f198ac107..181913967ac9 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -157,6 +157,7 @@ #define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108) #define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) +#define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) #define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 57944f90bbf6..994af591a2e8 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -11,7 +11,9 @@ #define CTX_RING_TAIL (0x06 + 1) #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) +#define CTX_BB_PER_CTX_PTR (0x12 + 1) #define CTX_TIMESTAMP (0x22 + 1) +#define CTX_TIMESTAMP_UDW (0x24 + 1) #define CTX_INDIRECT_RING_STATE (0x26 + 1) #define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_LDW (0x32 + 1) diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index ef1e5256c56a..0e502feaca81 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -46,8 +46,11 @@ static void read_l3cc_table(struct xe_gt *gt, unsigned int fw_ref, i; u32 reg_val; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n"); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + xe_force_wake_put(gt_to_fw(gt), fw_ref); + KUNIT_ASSERT_TRUE_MSG(test, true, "Forcewake Failed.\n"); + } for (i = 0; i < info->num_mocs_regs; i++) { if (!(i & 1)) { diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 9f8667ebba85..0482f26aa480 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -330,6 +330,8 @@ struct xe_device { u8 has_sriov:1; /** @info.has_usm: Device has unified shared memory support */ u8 has_usm:1; + /** @info.has_64bit_timestamp: Device supports 64-bit timestamps */ + u8 has_64bit_timestamp:1; /** @info.is_dgfx: is discrete device */ u8 is_dgfx:1; /** diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index f2bb9168967c..e2bb156c71fb 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -52,6 +52,8 @@ struct xe_eu_stall_data_stream { struct xe_gt *gt; struct xe_bo *bo; + /* Lock to protect data buffer pointers */ + struct mutex xecore_buf_lock; struct per_xecore_buf *xecore_buf; struct { bool reported_to_user; @@ -208,6 +210,9 @@ int xe_eu_stall_init(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); int ret; + if (!xe_eu_stall_supported_on_platform(xe)) + return 0; + gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL); if (!gt->eu_stall) { ret = -ENOMEM; @@ -378,7 +383,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) u16 group, instance; unsigned int xecore; - mutex_lock(>->eu_stall->stream_lock); + mutex_lock(&stream->xecore_buf_lock); for_each_dss_steering(xecore, gt, group, instance) { xecore_buf = &stream->xecore_buf[xecore]; read_ptr = xecore_buf->read; @@ -396,7 +401,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) set_bit(xecore, stream->data_drop.mask); xecore_buf->write = write_ptr; } - mutex_unlock(>->eu_stall->stream_lock); + mutex_unlock(&stream->xecore_buf_lock); return min_data_present; } @@ -511,11 +516,13 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st unsigned int xecore; int ret = 0; + mutex_lock(&stream->xecore_buf_lock); if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) { if (!stream->data_drop.reported_to_user) { stream->data_drop.reported_to_user = true; xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n", XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask); + mutex_unlock(&stream->xecore_buf_lock); return -EIO; } stream->data_drop.reported_to_user = false; @@ -527,6 +534,7 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st if (ret || count == total_size) break; } + mutex_unlock(&stream->xecore_buf_lock); return total_size ?: (ret ?: -EAGAIN); } @@ -583,6 +591,7 @@ static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream) { struct xe_gt *gt = stream->gt; + mutex_destroy(&stream->xecore_buf_lock); gt->eu_stall->stream = NULL; kfree(stream); } @@ -718,6 +727,7 @@ static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream, } init_waitqueue_head(&stream->poll_wq); + mutex_init(&stream->xecore_buf_lock); INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn); stream->per_xecore_buf_size = per_xecore_buf_size; stream->sampling_rate_mult = props->sampling_rate_mult; diff --git a/drivers/gpu/drm/xe/xe_eu_stall.h b/drivers/gpu/drm/xe/xe_eu_stall.h index ed9d0f233566..d1c76e503799 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.h +++ b/drivers/gpu/drm/xe/xe_eu_stall.h @@ -7,6 +7,7 @@ #define __XE_EU_STALL_H__ #include "xe_gt_types.h" +#include "xe_sriov.h" size_t xe_eu_stall_get_per_xecore_buf_size(void); size_t xe_eu_stall_data_record_size(struct xe_device *xe); @@ -19,6 +20,6 @@ int xe_eu_stall_stream_open(struct drm_device *dev, static inline bool xe_eu_stall_supported_on_platform(struct xe_device *xe) { - return xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20; + return !IS_SRIOV_VF(xe) && (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20); } #endif diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 606922d9dd73..cd9b1c32f30f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -830,7 +830,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) { struct xe_device *xe = gt_to_xe(q->gt); struct xe_lrc *lrc; - u32 old_ts, new_ts; + u64 old_ts, new_ts; int idx; /* diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index fd41113f8572..0bcf97063ff6 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -555,6 +555,28 @@ void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) flush_work(&gsc->work); } +void xe_gsc_stop_prepare(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + int ret; + + if (!xe_uc_fw_is_loadable(&gsc->fw) || xe_uc_fw_is_in_error_state(&gsc->fw)) + return; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GSC); + + /* + * If the GSC FW load or the proxy init are interrupted, the only way + * to recover it is to do an FLR and reload the GSC from scratch. + * Therefore, let's wait for the init to complete before stopping + * operations. The proxy init is the last step, so we can just wait on + * that + */ + ret = xe_gsc_wait_for_proxy_init_done(gsc); + if (ret) + xe_gt_err(gt, "failed to wait for GSC init completion before uc stop\n"); +} + /* * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a * GSC engine reset by writing a notification bit in the GS1 register and then diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h index d99f66c38075..b8b8e0810ad9 100644 --- a/drivers/gpu/drm/xe/xe_gsc.h +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -16,6 +16,7 @@ struct xe_hw_engine; int xe_gsc_init(struct xe_gsc *gsc); int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc); void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc); +void xe_gsc_stop_prepare(struct xe_gsc *gsc); void xe_gsc_load_start(struct xe_gsc *gsc); void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 8cf70b228ff3..d0519cd6704a 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -71,6 +71,17 @@ bool xe_gsc_proxy_init_done(struct xe_gsc *gsc) HECI1_FWSTS1_PROXY_STATE_NORMAL; } +int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + + /* Proxy init can take up to 500ms, so wait double that for safety */ + return xe_mmio_wait32(>->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE), + HECI1_FWSTS1_CURRENT_STATE, + HECI1_FWSTS1_PROXY_STATE_NORMAL, + USEC_PER_SEC, NULL, false); +} + static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set) { struct xe_gt *gt = gsc_to_gt(gsc); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h index fdef56995cd4..765602221dbc 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.h +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h @@ -12,6 +12,7 @@ struct xe_gsc; int xe_gsc_proxy_init(struct xe_gsc *gsc); bool xe_gsc_proxy_init_done(struct xe_gsc *gsc); +int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc); int xe_gsc_proxy_start(struct xe_gsc *gsc); int xe_gsc_proxy_request_handler(struct xe_gsc *gsc); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 10a9e3c72b36..66198cf2662c 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -857,7 +857,7 @@ void xe_gt_suspend_prepare(struct xe_gt *gt) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_uc_stop_prepare(>->uc); + xe_uc_suspend_prepare(>->uc); xe_force_wake_put(gt_to_fw(gt), fw_ref); } diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 2d63a69cbfa3..f7005a3643e6 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -92,22 +92,23 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p) struct xe_hw_engine *hwe; enum xe_hw_engine_id id; unsigned int fw_ref; + int ret = 0; xe_pm_runtime_get(xe); fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - xe_pm_runtime_put(xe); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto fw_put; } for_each_hw_engine(hwe, gt, id) xe_hw_engine_print(hwe, p); +fw_put: xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(xe); - return 0; + return ret; } static int powergate_info(struct xe_gt *gt, struct drm_printer *p) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index c5ad9a0a89c2..0c22b3a36655 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -435,9 +435,16 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, XE_MAX_EU_FUSE_BITS) * num_dss; - /* user can issue separate page faults per EU and per CS */ + /* + * user can issue separate page faults per EU and per CS + * + * XXX: Multiplier required as compute UMD are getting PF queue errors + * without it. Follow on why this multiplier is required. + */ +#define PF_MULTIPLIER 8 pf_queue->num_dw = - (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW; + (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; +#undef PF_MULTIPLIER pf_queue->gt = gt; pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw, diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index f6d523e4c5fe..9095618648bc 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -359,7 +359,7 @@ static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, ext->reg = XE_REG(extlist->reg.__reg.addr); ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1); - ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); + ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id); ext->regname = extlist->name; } diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 31bc2022bfc2..769781d577df 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -941,7 +941,7 @@ static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) return xe_sched_invalidate_job(job, 2); } - ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); + ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0])); ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); /* diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index df3ceddede07..03bfba696b37 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -24,6 +24,7 @@ #include "xe_hw_fence.h" #include "xe_map.h" #include "xe_memirq.h" +#include "xe_mmio.h" #include "xe_sriov.h" #include "xe_trace_lrc.h" #include "xe_vm.h" @@ -650,6 +651,7 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) #define LRC_PARALLEL_PPHWSP_OFFSET 2048 +#define LRC_ENGINE_ID_PPHWSP_OFFSET 2096 #define LRC_PPHWSP_SIZE SZ_4K u32 xe_lrc_regs_offset(struct xe_lrc *lrc) @@ -684,7 +686,7 @@ static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) { - /* The start seqno is stored in the driver-defined portion of PPHWSP */ + /* This is stored in the driver-defined portion of PPHWSP */ return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; } @@ -694,11 +696,21 @@ static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; } +static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc) +{ + return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET; +} + static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) { return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); } +static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc) +{ + return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32); +} + static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) { /* Indirect ring state page is at the very end of LRC */ @@ -726,8 +738,10 @@ DECL_MAP_ADDR_HELPERS(regs) DECL_MAP_ADDR_HELPERS(start_seqno) DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) DECL_MAP_ADDR_HELPERS(ctx_timestamp) +DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw) DECL_MAP_ADDR_HELPERS(parallel) DECL_MAP_ADDR_HELPERS(indirect_ring) +DECL_MAP_ADDR_HELPERS(engine_id) #undef DECL_MAP_ADDR_HELPERS @@ -743,18 +757,37 @@ u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) } /** + * xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address + * @lrc: Pointer to the lrc. + * + * Returns: ctx timestamp udw GGTT address + */ +u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc); +} + +/** * xe_lrc_ctx_timestamp() - Read ctx timestamp value * @lrc: Pointer to the lrc. * * Returns: ctx timestamp value */ -u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) +u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) { struct xe_device *xe = lrc_to_xe(lrc); struct iosys_map map; + u32 ldw, udw = 0; map = __xe_lrc_ctx_timestamp_map(lrc); - return xe_map_read32(xe, &map); + ldw = xe_map_read32(xe, &map); + + if (xe->info.has_64bit_timestamp) { + map = __xe_lrc_ctx_timestamp_udw_map(lrc); + udw = xe_map_read32(xe, &map); + } + + return (u64)udw << 32 | ldw; } /** @@ -864,7 +897,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe) static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) { - u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); + u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt)); xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); @@ -877,6 +910,65 @@ static void xe_lrc_finish(struct xe_lrc *lrc) xe_bo_unpin(lrc->bo); xe_bo_unlock(lrc->bo); xe_bo_put(lrc->bo); + xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo); +} + +/* + * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active + * context run ticks. + * @lrc: Pointer to the lrc. + * + * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the + * context, but only gets updated when the context switches out. In order to + * check how long a context has been active before it switches out, two things + * are required: + * + * (1) Determine if the context is running: + * To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in + * the LRC. The value chosen is 1 since 0 is the initial value when the LRC is + * initialized. During a query, we just check for this value to determine if the + * context is active. If the context switched out, it would overwrite this + * location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as + * the last part of context restore, so reusing this LRC location will not + * clobber anything. + * + * (2) Calculate the time that the context has been active for: + * The CTX_TIMESTAMP ticks only when the context is active. If a context is + * active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization. + * While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific + * engine instance. Since we do not know which instance the context is running + * on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and + * store it in the PPHSWP. + */ +#define CONTEXT_ACTIVE 1ULL +static void xe_lrc_setup_utilization(struct xe_lrc *lrc) +{ + u32 *cmd; + + cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + + *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; + *cmd++ = ENGINE_ID(0).addr; + *cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc); + *cmd++ = 0; + + *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); + *cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc); + *cmd++ = 0; + *cmd++ = lower_32_bits(CONTEXT_ACTIVE); + + if (lrc_to_xe(lrc)->info.has_64bit_timestamp) { + *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); + *cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc); + *cmd++ = 0; + *cmd++ = upper_32_bits(CONTEXT_ACTIVE); + } + + *cmd++ = MI_BATCH_BUFFER_END; + + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, + xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); + } #define PVC_CTX_ASID (0x2e + 1) @@ -893,31 +985,40 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, void *init_data = NULL; u32 arb_enable; u32 lrc_size; + u32 bo_flags; int err; kref_init(&lrc->refcount); + lrc->gt = gt; lrc->flags = 0; lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); if (xe_gt_has_indirect_ring_state(gt)) lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; + bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE; + /* * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address * via VM bind calls. */ lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + bo_flags); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); + lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, + ttm_bo_type_kernel, + bo_flags); + if (IS_ERR(lrc->bb_per_ctx_bo)) { + err = PTR_ERR(lrc->bb_per_ctx_bo); + goto err_lrc_finish; + } + lrc->size = lrc_size; - lrc->tile = gt_to_tile(hwe->gt); lrc->ring.size = ring_size; lrc->ring.tail = 0; - lrc->ctx_timestamp = 0; xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, hwe->fence_irq, hwe->name); @@ -990,7 +1091,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) | _MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE)); + lrc->ctx_timestamp = 0; xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); + if (lrc_to_xe(lrc)->info.has_64bit_timestamp) + xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0); if (xe->info.has_asid && vm) xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); @@ -1019,6 +1123,8 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, map = __xe_lrc_start_seqno_map(lrc); xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); + xe_lrc_setup_utilization(lrc); + return 0; err_lrc_finish: @@ -1238,6 +1344,21 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) return __xe_lrc_parallel_map(lrc); } +/** + * xe_lrc_engine_id() - Read engine id value + * @lrc: Pointer to the lrc. + * + * Returns: context id value + */ +static u32 xe_lrc_engine_id(struct xe_lrc *lrc) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_engine_id_map(lrc); + return xe_map_read32(xe, &map); +} + static int instr_dw(u32 cmd_header) { /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ @@ -1684,7 +1805,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; snapshot->lrc_snapshot = NULL; - snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); + snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); return snapshot; } @@ -1784,22 +1905,74 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) kfree(snapshot); } +static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts) +{ + u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id); + u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id); + struct xe_hw_engine *hwe; + u64 val; + + hwe = xe_gt_hw_engine(lrc->gt, class, instance, false); + if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe), + "Unexpected engine class:instance %d:%d for context utilization\n", + class, instance)) + return -1; + + if (lrc_to_xe(lrc)->info.has_64bit_timestamp) + val = xe_mmio_read64_2x32(&hwe->gt->mmio, + RING_CTX_TIMESTAMP(hwe->mmio_base)); + else + val = xe_mmio_read32(&hwe->gt->mmio, + RING_CTX_TIMESTAMP(hwe->mmio_base)); + + *reg_ctx_ts = val; + + return 0; +} + /** * xe_lrc_update_timestamp() - Update ctx timestamp * @lrc: Pointer to the lrc. * @old_ts: Old timestamp value * * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and - * update saved value. + * update saved value. With support for active contexts, the calculation may be + * slightly racy, so follow a read-again logic to ensure that the context is + * still active before returning the right timestamp. * * Returns: New ctx timestamp value */ -u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) +u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts) { + u64 lrc_ts, reg_ts; + u32 engine_id; + *old_ts = lrc->ctx_timestamp; - lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); + lrc_ts = xe_lrc_ctx_timestamp(lrc); + /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */ + if (IS_SRIOV_VF(lrc_to_xe(lrc))) { + lrc->ctx_timestamp = lrc_ts; + goto done; + } + + if (lrc_ts == CONTEXT_ACTIVE) { + engine_id = xe_lrc_engine_id(lrc); + if (!get_ctx_timestamp(lrc, engine_id, ®_ts)) + lrc->ctx_timestamp = reg_ts; + + /* read lrc again to ensure context is still active */ + lrc_ts = xe_lrc_ctx_timestamp(lrc); + } + + /* + * If context switched out, just use the lrc_ts. Note that this needs to + * be a separate if condition. + */ + if (lrc_ts != CONTEXT_ACTIVE) + lrc->ctx_timestamp = lrc_ts; +done: trace_xe_lrc_update_timestamp(lrc, *old_ts); return lrc->ctx_timestamp; diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 0b40f349ab95..eb6e8de8c939 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -120,7 +120,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot); u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc); -u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); +u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc); +u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); @@ -136,6 +137,6 @@ u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); * * Returns the current LRC timestamp */ -u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts); +u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts); #endif diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 71ecb453f811..ae24cf6f8dd9 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -25,8 +25,8 @@ struct xe_lrc { /** @size: size of lrc including any indirect ring state page */ u32 size; - /** @tile: tile which this LRC belongs to */ - struct xe_tile *tile; + /** @gt: gt which this LRC belongs to */ + struct xe_gt *gt; /** @flags: LRC flags */ #define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 @@ -52,7 +52,10 @@ struct xe_lrc { struct xe_hw_fence_ctx fence_ctx; /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */ - u32 ctx_timestamp; + u64 ctx_timestamp; + + /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */ + struct xe_bo *bb_per_ctx_bo; }; struct xe_lrc_snapshot; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 70a36e777546..46301f341773 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -75,12 +75,12 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) * is fine as it's going to the root tile's mmio, that's * guaranteed to be initialized earlier in xe_mmio_probe_early() */ - mtcfg = xe_mmio_read64_2x32(mmio, XEHP_MTCFG_ADDR); + mtcfg = xe_mmio_read32(mmio, XEHP_MTCFG_ADDR); tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; if (tile_count < xe->info.tile_count) { drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", - xe->info.tile_count, tile_count); + xe->info.tile_count, tile_count); xe->info.tile_count = tile_count; /* @@ -128,7 +128,7 @@ int xe_mmio_probe_early(struct xe_device *xe) */ xe->mmio.size = pci_resource_len(pdev, GTTMMADR_BAR); xe->mmio.regs = pci_iomap(pdev, GTTMMADR_BAR, 0); - if (xe->mmio.regs == NULL) { + if (!xe->mmio.regs) { drm_err(&xe->drm, "failed to map registers\n"); return -EIO; } @@ -309,8 +309,8 @@ u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg) return (u64)udw << 32 | ldw; } -static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, - u32 *out_val, bool atomic, bool expect_match) +static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, + u32 timeout_us, u32 *out_val, bool atomic, bool expect_match) { ktime_t cur = ktime_get_raw(); const ktime_t end = ktime_add_us(cur, timeout_us); diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 31dade91a089..0c737413fcb6 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -775,22 +775,23 @@ void xe_mocs_init(struct xe_gt *gt) void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); + enum xe_force_wake_domains domain; struct xe_mocs_info table; unsigned int fw_ref, flags; flags = get_mocs_settings(xe, &table); + domain = flags & HAS_LNCF_MOCS ? XE_FORCEWAKE_ALL : XE_FW_GT; xe_pm_runtime_get_noresume(xe); - fw_ref = xe_force_wake_get(gt_to_fw(gt), - flags & HAS_LNCF_MOCS ? - XE_FORCEWAKE_ALL : XE_FW_GT); - if (!fw_ref) + fw_ref = xe_force_wake_get(gt_to_fw(gt), domain); + + if (!xe_force_wake_ref_has_domain(fw_ref, domain)) goto err_fw; table.ops->dump(&table, flags, gt, p); - xe_force_wake_put(gt_to_fw(gt), fw_ref); err_fw: + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(xe); } diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 9f4632e39a1a..e861c694f336 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -29,9 +29,6 @@ struct xe_modparam xe_modparam = { module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600); MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2"); -module_param_named(always_migrate_to_vram, xe_modparam.always_migrate_to_vram, bool, 0444); -MODULE_PARM_DESC(always_migrate_to_vram, "Always migrate to VRAM on GPU fault"); - module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 84339e509c80..5a3bfea8b7b4 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -12,7 +12,6 @@ struct xe_modparam { bool force_execlist; bool probe_display; - bool always_migrate_to_vram; u32 force_vram_bar_size; int guc_log_level; char *guc_firmware_path; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 818f023166d5..f4d108dc49b1 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -140,6 +140,7 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_indirect_ring_state = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 1, \ + .has_64bit_timestamp = 1, \ .va_bits = 48, \ .vm_max_level = 4, \ .hw_engine_mask = \ @@ -668,6 +669,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; xe->info.has_usm = graphics_desc->has_usm; + xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp; for_each_remote_tile(tile, xe, id) { int err; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index e9b9bbc138d3..ca6b10d35573 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -21,6 +21,7 @@ struct xe_graphics_desc { u8 has_indirect_ring_state:1; u8 has_range_tlb_invalidation:1; u8 has_usm:1; + u8 has_64bit_timestamp:1; }; struct xe_media_desc { diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index ffaf0d02dc7d..856038553b81 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -2232,11 +2232,19 @@ static void op_commit(struct xe_vm *vm, } case DRM_GPUVA_OP_DRIVER: { + /* WRITE_ONCE pairs with READ_ONCE in xe_svm.c */ + if (op->subop == XE_VMA_SUBOP_MAP_RANGE) { - op->map_range.range->tile_present |= BIT(tile->id); - op->map_range.range->tile_invalidated &= ~BIT(tile->id); + WRITE_ONCE(op->map_range.range->tile_present, + op->map_range.range->tile_present | + BIT(tile->id)); + WRITE_ONCE(op->map_range.range->tile_invalidated, + op->map_range.range->tile_invalidated & + ~BIT(tile->id)); } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) { - op->unmap_range.range->tile_present &= ~BIT(tile->id); + WRITE_ONCE(op->unmap_range.range->tile_present, + op->unmap_range.range->tile_present & + ~BIT(tile->id)); } break; } diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index a7582b097ae6..bc1689db4cd7 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -234,13 +234,10 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job) static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) { - dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT | - MI_COPY_MEM_MEM_DST_GGTT; + dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; + dw[i++] = RING_CTX_TIMESTAMP(0).addr; dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); dw[i++] = 0; - dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc); - dw[i++] = 0; - dw[i++] = MI_NOOP; return i; } diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c index 8184390f9c7b..86d47aaf0358 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.c +++ b/drivers/gpu/drm/xe/xe_shrinker.c @@ -227,7 +227,7 @@ struct xe_shrinker *xe_shrinker_create(struct xe_device *xe) if (!shrinker) return ERR_PTR(-ENOMEM); - shrinker->shrink = shrinker_alloc(0, "xe system shrinker"); + shrinker->shrink = shrinker_alloc(0, "drm-xe_gem:%s", xe->drm.unique); if (!shrinker->shrink) { kfree(shrinker); return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index f8c128524d9f..975094c1a582 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -15,8 +15,17 @@ static bool xe_svm_range_in_vram(struct xe_svm_range *range) { - /* Not reliable without notifier lock */ - return range->base.flags.has_devmem_pages; + /* + * Advisory only check whether the range is currently backed by VRAM + * memory. + */ + + struct drm_gpusvm_range_flags flags = { + /* Pairs with WRITE_ONCE in drm_gpusvm.c */ + .__flags = READ_ONCE(range->base.flags.__flags), + }; + + return flags.has_devmem_pages; } static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range) @@ -79,7 +88,7 @@ xe_svm_range_alloc(struct drm_gpusvm *gpusvm) range = kzalloc(sizeof(*range), GFP_KERNEL); if (!range) - return ERR_PTR(-ENOMEM); + return NULL; INIT_LIST_HEAD(&range->garbage_collector_link); xe_vm_get(gpusvm_to_vm(gpusvm)); @@ -645,9 +654,16 @@ void xe_svm_fini(struct xe_vm *vm) } static bool xe_svm_range_is_valid(struct xe_svm_range *range, - struct xe_tile *tile) + struct xe_tile *tile, + bool devmem_only) { - return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id); + /* + * Advisory only check whether the range currently has a valid mapping, + * READ_ONCE pairs with WRITE_ONCE in xe_pt.c + */ + return ((READ_ONCE(range->tile_present) & + ~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) && + (!devmem_only || xe_svm_range_in_vram(range)); } static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) @@ -712,6 +728,36 @@ unlock: return err; } +static bool supports_4K_migration(struct xe_device *xe) +{ + if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) + return false; + + return true; +} + +static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, + struct xe_vma *vma) +{ + struct xe_vm *vm = range_to_vm(&range->base); + u64 range_size = xe_svm_range_size(range); + + if (!range->base.flags.migrate_devmem) + return false; + + if (xe_svm_range_in_vram(range)) { + drm_dbg(&vm->xe->drm, "Range is already in VRAM\n"); + return false; + } + + if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) { + drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); + return false; + } + + return true; +} + /** * xe_svm_handle_pagefault() - SVM handle page fault * @vm: The VM. @@ -735,11 +781,16 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), .check_pages_threshold = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, + .devmem_only = atomic && IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), + .timeslice_ms = atomic && IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0, }; struct xe_svm_range *range; struct drm_gpusvm_range *r; struct drm_exec exec; struct dma_fence *fence; + int migrate_try_count = ctx.devmem_only ? 3 : 1; ktime_t end = 0; int err; @@ -758,24 +809,31 @@ retry: if (IS_ERR(r)) return PTR_ERR(r); + if (ctx.devmem_only && !r->flags.migrate_devmem) + return -EACCES; + range = to_xe_range(r); - if (xe_svm_range_is_valid(range, tile)) + if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) return 0; range_debug(range, "PAGE FAULT"); - /* XXX: Add migration policy, for now migrate range once */ - if (!range->skip_migrate && range->base.flags.migrate_devmem && - xe_svm_range_size(range) >= SZ_64K) { - range->skip_migrate = true; - + if (--migrate_try_count >= 0 && + xe_svm_range_needs_migrate_to_vram(range, vma)) { err = xe_svm_alloc_vram(vm, tile, range, &ctx); + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (err) { - drm_dbg(&vm->xe->drm, - "VRAM allocation failed, falling back to " - "retrying fault, asid=%u, errno=%pe\n", - vm->usm.asid, ERR_PTR(err)); - goto retry; + if (migrate_try_count || !ctx.devmem_only) { + drm_dbg(&vm->xe->drm, + "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n", + vm->usm.asid, ERR_PTR(err)); + goto retry; + } else { + drm_err(&vm->xe->drm, + "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n", + vm->usm.asid, ERR_PTR(err)); + return err; + } } } @@ -783,15 +841,23 @@ retry: err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx); /* Corner where CPU mappings have changed */ if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { - if (err == -EOPNOTSUPP) { - range_debug(range, "PAGE FAULT - EVICT PAGES"); - drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ + if (migrate_try_count > 0 || !ctx.devmem_only) { + if (err == -EOPNOTSUPP) { + range_debug(range, "PAGE FAULT - EVICT PAGES"); + drm_gpusvm_range_evict(&vm->svm.gpusvm, + &range->base); + } + drm_dbg(&vm->xe->drm, + "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); + range_debug(range, "PAGE FAULT - RETRY PAGES"); + goto retry; + } else { + drm_err(&vm->xe->drm, + "Get pages failed, retry count exceeded, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); } - drm_dbg(&vm->xe->drm, - "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", - vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); - range_debug(range, "PAGE FAULT - RETRY PAGES"); - goto retry; } if (err) { range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT"); @@ -815,6 +881,7 @@ retry_bind: drm_exec_fini(&exec); err = PTR_ERR(fence); if (err == -EAGAIN) { + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ range_debug(range, "PAGE FAULT - RETRY BIND"); goto retry; } @@ -825,9 +892,6 @@ retry_bind: } drm_exec_fini(&exec); - if (xe_modparam.always_migrate_to_vram) - range->skip_migrate = false; - dma_fence_wait(fence, false); dma_fence_put(fence); @@ -947,3 +1011,15 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) return 0; } #endif + +/** + * xe_svm_flush() - SVM flush + * @vm: The VM. + * + * Flush all SVM actions. + */ +void xe_svm_flush(struct xe_vm *vm) +{ + if (xe_vm_in_fault_mode(vm)) + flush_work(&vm->svm.garbage_collector.work); +} diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index e059590e5076..fe58ac2f4baa 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -36,11 +36,6 @@ struct xe_svm_range { * range. Protected by GPU SVM notifier lock. */ u8 tile_invalidated; - /** - * @skip_migrate: Skip migration to VRAM, protected by GPU fault handler - * locking. - */ - u8 skip_migrate :1; }; #if IS_ENABLED(CONFIG_DRM_GPUSVM) @@ -72,6 +67,9 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end); int xe_svm_bo_evict(struct xe_bo *bo); void xe_svm_range_debug(struct xe_svm_range *range, const char *operation); + +void xe_svm_flush(struct xe_vm *vm); + #else static inline bool xe_svm_range_pages_valid(struct xe_svm_range *range) { @@ -124,6 +122,11 @@ static inline void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) { } + +static inline void xe_svm_flush(struct xe_vm *vm) +{ +} + #endif /** diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.h b/drivers/gpu/drm/xe/xe_trace_lrc.h index 5c669a0b2180..d525cbee1e34 100644 --- a/drivers/gpu/drm/xe/xe_trace_lrc.h +++ b/drivers/gpu/drm/xe/xe_trace_lrc.h @@ -19,12 +19,12 @@ #define __dev_name_lrc(lrc) dev_name(gt_to_xe((lrc)->fence_ctx.gt)->drm.dev) TRACE_EVENT(xe_lrc_update_timestamp, - TP_PROTO(struct xe_lrc *lrc, uint32_t old), + TP_PROTO(struct xe_lrc *lrc, uint64_t old), TP_ARGS(lrc, old), TP_STRUCT__entry( __field(struct xe_lrc *, lrc) - __field(u32, old) - __field(u32, new) + __field(u64, old) + __field(u64, new) __string(name, lrc->fence_ctx.name) __string(device_id, __dev_name_lrc(lrc)) ), @@ -36,7 +36,7 @@ TRACE_EVENT(xe_lrc_update_timestamp, __assign_str(name); __assign_str(device_id); ), - TP_printk("lrc=:%p lrc->name=%s old=%u new=%u device_id:%s", + TP_printk("lrc=:%p lrc->name=%s old=%llu new=%llu device_id:%s", __entry->lrc, __get_str(name), __entry->old, __entry->new, __get_str(device_id)) diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index c14bd2282044..3a8751a8b92d 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -244,7 +244,7 @@ void xe_uc_gucrc_disable(struct xe_uc *uc) void xe_uc_stop_prepare(struct xe_uc *uc) { - xe_gsc_wait_for_worker_completion(&uc->gsc); + xe_gsc_stop_prepare(&uc->gsc); xe_guc_stop_prepare(&uc->guc); } @@ -278,6 +278,12 @@ again: goto again; } +void xe_uc_suspend_prepare(struct xe_uc *uc) +{ + xe_gsc_wait_for_worker_completion(&uc->gsc); + xe_guc_stop_prepare(&uc->guc); +} + int xe_uc_suspend(struct xe_uc *uc) { /* GuC submission not enabled, nothing to do */ diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 3813c1ede450..c23e6f5e2514 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -18,6 +18,7 @@ int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); void xe_uc_stop(struct xe_uc *uc); int xe_uc_start(struct xe_uc *uc); +void xe_uc_suspend_prepare(struct xe_uc *uc); int xe_uc_suspend(struct xe_uc *uc); int xe_uc_sanitize_reset(struct xe_uc *uc); void xe_uc_declare_wedged(struct xe_uc *uc); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 60303998bd61..367c84b90e9e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3312,8 +3312,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } /* Ensure all UNMAPs visible */ - if (xe_vm_in_fault_mode(vm)) - flush_work(&vm->svm.garbage_collector.work); + xe_svm_flush(vm); err = down_write_killable(&vm->lock); if (err) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 24f644c0a673..2f833f0d575f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -815,6 +815,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) }, + { XE_RTP_NAME("22021007897"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) + }, /* Xe3_LPG */ { XE_RTP_NAME("14021490052"), diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 17c9660da450..ab0e5a72a059 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -93,7 +93,7 @@ impl Chipset { // For now, redirect to fmt::Debug for convenience. impl fmt::Display for Chipset { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{:?}", self) + write!(f, "{self:?}") } } |