diff options
Diffstat (limited to 'drivers')
68 files changed, 813 insertions, 308 deletions
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index f0dad0c9ce33..cd24ccd20ba6 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -455,7 +455,7 @@ priority_bands_fops_write(struct file *file, const char __user *user_buf, size_t if (ret < 0) return ret; - buf[size] = '\0'; + buf[ret] = '\0'; ret = sscanf(buf, "%u %u %u %u", &band, &grace_period, &process_grace_period, &process_quantum); if (ret != 4) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index f73ce6e13065..54676e3d82dd 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -231,16 +231,18 @@ static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr, sizeof(struct acpi_table_pptt)); proc_sz = sizeof(struct acpi_pptt_processor); - while ((unsigned long)entry + proc_sz < table_end) { + /* ignore subtable types that are smaller than a processor node */ + while ((unsigned long)entry + proc_sz <= table_end) { cpu_node = (struct acpi_pptt_processor *)entry; + if (entry->type == ACPI_PPTT_TYPE_PROCESSOR && cpu_node->parent == node_entry) return 0; if (entry->length == 0) return 0; + entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, entry->length); - } return 1; } @@ -273,15 +275,18 @@ static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_he proc_sz = sizeof(struct acpi_pptt_processor); /* find the processor structure associated with this cpuid */ - while ((unsigned long)entry + proc_sz < table_end) { + while ((unsigned long)entry + proc_sz <= table_end) { cpu_node = (struct acpi_pptt_processor *)entry; if (entry->length == 0) { pr_warn("Invalid zero length subtable\n"); break; } + /* entry->length may not equal proc_sz, revalidate the processor structure length */ if (entry->type == ACPI_PPTT_TYPE_PROCESSOR && acpi_cpu_id == cpu_node->acpi_processor_id && + (unsigned long)entry + entry->length <= table_end && + entry->length == proc_sz + cpu_node->number_of_priv_resources * sizeof(u32) && acpi_pptt_leaf_node(table_hdr, cpu_node)) { return (struct acpi_pptt_processor *)entry; } diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index f9032076bc06..dc104c025cd5 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1708,7 +1708,7 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, * that ublk_dispatch_req() is always called */ req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); - if (req && blk_mq_request_started(req)) + if (req && blk_mq_request_started(req) && req->tag == tag) return; spin_lock(&ubq->cancel_lock); diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 5f8d010516f0..b1ef4546346d 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -320,8 +320,9 @@ void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, count++; dma_resv_list_set(fobj, i, fence, usage); - /* pointer update must be visible before we extend the num_fences */ - smp_store_mb(fobj->num_fences, count); + /* fence update must be visible before we extend the num_fences */ + smp_wmb(); + fobj->num_fences = count; } EXPORT_SYMBOL(dma_resv_add_fence); diff --git a/drivers/dma/amd/ptdma/ptdma-dmaengine.c b/drivers/dma/amd/ptdma/ptdma-dmaengine.c index 715ac3ae067b..81339664036f 100644 --- a/drivers/dma/amd/ptdma/ptdma-dmaengine.c +++ b/drivers/dma/amd/ptdma/ptdma-dmaengine.c @@ -342,6 +342,9 @@ static void pt_cmd_callback_work(void *data, int err) struct pt_dma_chan *chan; unsigned long flags; + if (!desc) + return; + dma_chan = desc->vd.tx.chan; chan = to_pt_chan(dma_chan); @@ -355,16 +358,14 @@ static void pt_cmd_callback_work(void *data, int err) desc->status = DMA_ERROR; spin_lock_irqsave(&chan->vc.lock, flags); - if (desc) { - if (desc->status != DMA_COMPLETE) { - if (desc->status != DMA_ERROR) - desc->status = DMA_COMPLETE; + if (desc->status != DMA_COMPLETE) { + if (desc->status != DMA_ERROR) + desc->status = DMA_COMPLETE; - dma_cookie_complete(tx_desc); - dma_descriptor_unmap(tx_desc); - } else { - tx_desc = NULL; - } + dma_cookie_complete(tx_desc); + dma_descriptor_unmap(tx_desc); + } else { + tx_desc = NULL; } spin_unlock_irqrestore(&chan->vc.lock, flags); diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index d891dfca358e..91b2fbc0b864 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -841,9 +841,9 @@ static int dmatest_func(void *data) } else { dma_async_issue_pending(chan); - wait_event_timeout(thread->done_wait, - done->done, - msecs_to_jiffies(params->timeout)); + wait_event_freezable_timeout(thread->done_wait, + done->done, + msecs_to_jiffies(params->timeout)); status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 756d67325db5..66bfa28d984e 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -57,7 +57,7 @@ static irqreturn_t fsl_edma3_tx_handler(int irq, void *dev_id) intr = edma_readl_chreg(fsl_chan, ch_int); if (!intr) - return IRQ_HANDLED; + return IRQ_NONE; edma_writel_chreg(fsl_chan, 1, ch_int); diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index ff94ee892339..6d12033649f8 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -222,7 +222,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) struct idxd_wq *wq; struct device *dev, *fdev; int rc = 0; - struct iommu_sva *sva; + struct iommu_sva *sva = NULL; unsigned int pasid; struct idxd_cdev *idxd_cdev; @@ -317,7 +317,7 @@ failed_set_pasid: if (device_user_pasid_enabled(idxd)) idxd_xa_pasid_remove(ctx); failed_get_pasid: - if (device_user_pasid_enabled(idxd)) + if (device_user_pasid_enabled(idxd) && !IS_ERR_OR_NULL(sva)) iommu_sva_unbind_device(sva); failed: mutex_unlock(&wq->wq_lock); @@ -407,6 +407,9 @@ static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma) if (!idxd->user_submission_safe && !capable(CAP_SYS_RAWIO)) return -EPERM; + if (current->mm != ctx->mm) + return -EPERM; + rc = check_vma(wq, vma, __func__); if (rc < 0) return rc; @@ -473,6 +476,9 @@ static ssize_t idxd_cdev_write(struct file *filp, const char __user *buf, size_t ssize_t written = 0; int i; + if (current->mm != ctx->mm) + return -EPERM; + for (i = 0; i < len/sizeof(struct dsa_hw_desc); i++) { int rc = idxd_submit_user_descriptor(ctx, udesc + i); @@ -493,6 +499,9 @@ static __poll_t idxd_cdev_poll(struct file *filp, struct idxd_device *idxd = wq->idxd; __poll_t out = 0; + if (current->mm != ctx->mm) + return POLLNVAL; + poll_wait(filp, &wq->err_queue, wait); spin_lock(&idxd->dev_lock); if (idxd->sw_err.valid) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index fca1d2924999..760b7d81fcd8 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -155,6 +155,25 @@ static void idxd_cleanup_interrupts(struct idxd_device *idxd) pci_free_irq_vectors(pdev); } +static void idxd_clean_wqs(struct idxd_device *idxd) +{ + struct idxd_wq *wq; + struct device *conf_dev; + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + wq = idxd->wqs[i]; + if (idxd->hw.wq_cap.op_config) + bitmap_free(wq->opcap_bmap); + kfree(wq->wqcfg); + conf_dev = wq_confdev(wq); + put_device(conf_dev); + kfree(wq); + } + bitmap_free(idxd->wq_enable_map); + kfree(idxd->wqs); +} + static int idxd_setup_wqs(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -169,8 +188,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) idxd->wq_enable_map = bitmap_zalloc_node(idxd->max_wqs, GFP_KERNEL, dev_to_node(dev)); if (!idxd->wq_enable_map) { - kfree(idxd->wqs); - return -ENOMEM; + rc = -ENOMEM; + goto err_bitmap; } for (i = 0; i < idxd->max_wqs; i++) { @@ -189,10 +208,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) conf_dev->bus = &dsa_bus_type; conf_dev->type = &idxd_wq_device_type; rc = dev_set_name(conf_dev, "wq%d.%d", idxd->id, wq->id); - if (rc < 0) { - put_device(conf_dev); + if (rc < 0) goto err; - } mutex_init(&wq->wq_lock); init_waitqueue_head(&wq->err_queue); @@ -203,7 +220,6 @@ static int idxd_setup_wqs(struct idxd_device *idxd) wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); if (!wq->wqcfg) { - put_device(conf_dev); rc = -ENOMEM; goto err; } @@ -211,9 +227,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) if (idxd->hw.wq_cap.op_config) { wq->opcap_bmap = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL); if (!wq->opcap_bmap) { - put_device(conf_dev); rc = -ENOMEM; - goto err; + goto err_opcap_bmap; } bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); } @@ -224,15 +239,46 @@ static int idxd_setup_wqs(struct idxd_device *idxd) return 0; - err: +err_opcap_bmap: + kfree(wq->wqcfg); + +err: + put_device(conf_dev); + kfree(wq); + while (--i >= 0) { wq = idxd->wqs[i]; + if (idxd->hw.wq_cap.op_config) + bitmap_free(wq->opcap_bmap); + kfree(wq->wqcfg); conf_dev = wq_confdev(wq); put_device(conf_dev); + kfree(wq); + } + bitmap_free(idxd->wq_enable_map); + +err_bitmap: + kfree(idxd->wqs); + return rc; } +static void idxd_clean_engines(struct idxd_device *idxd) +{ + struct idxd_engine *engine; + struct device *conf_dev; + int i; + + for (i = 0; i < idxd->max_engines; i++) { + engine = idxd->engines[i]; + conf_dev = engine_confdev(engine); + put_device(conf_dev); + kfree(engine); + } + kfree(idxd->engines); +} + static int idxd_setup_engines(struct idxd_device *idxd) { struct idxd_engine *engine; @@ -263,6 +309,7 @@ static int idxd_setup_engines(struct idxd_device *idxd) rc = dev_set_name(conf_dev, "engine%d.%d", idxd->id, engine->id); if (rc < 0) { put_device(conf_dev); + kfree(engine); goto err; } @@ -276,10 +323,26 @@ static int idxd_setup_engines(struct idxd_device *idxd) engine = idxd->engines[i]; conf_dev = engine_confdev(engine); put_device(conf_dev); + kfree(engine); } + kfree(idxd->engines); + return rc; } +static void idxd_clean_groups(struct idxd_device *idxd) +{ + struct idxd_group *group; + int i; + + for (i = 0; i < idxd->max_groups; i++) { + group = idxd->groups[i]; + put_device(group_confdev(group)); + kfree(group); + } + kfree(idxd->groups); +} + static int idxd_setup_groups(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -310,6 +373,7 @@ static int idxd_setup_groups(struct idxd_device *idxd) rc = dev_set_name(conf_dev, "group%d.%d", idxd->id, group->id); if (rc < 0) { put_device(conf_dev); + kfree(group); goto err; } @@ -334,20 +398,18 @@ static int idxd_setup_groups(struct idxd_device *idxd) while (--i >= 0) { group = idxd->groups[i]; put_device(group_confdev(group)); + kfree(group); } + kfree(idxd->groups); + return rc; } static void idxd_cleanup_internals(struct idxd_device *idxd) { - int i; - - for (i = 0; i < idxd->max_groups; i++) - put_device(group_confdev(idxd->groups[i])); - for (i = 0; i < idxd->max_engines; i++) - put_device(engine_confdev(idxd->engines[i])); - for (i = 0; i < idxd->max_wqs; i++) - put_device(wq_confdev(idxd->wqs[i])); + idxd_clean_groups(idxd); + idxd_clean_engines(idxd); + idxd_clean_wqs(idxd); destroy_workqueue(idxd->wq); } @@ -390,7 +452,7 @@ static int idxd_init_evl(struct idxd_device *idxd) static int idxd_setup_internals(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; - int rc, i; + int rc; init_waitqueue_head(&idxd->cmd_waitq); @@ -421,14 +483,11 @@ static int idxd_setup_internals(struct idxd_device *idxd) err_evl: destroy_workqueue(idxd->wq); err_wkq_create: - for (i = 0; i < idxd->max_groups; i++) - put_device(group_confdev(idxd->groups[i])); + idxd_clean_groups(idxd); err_group: - for (i = 0; i < idxd->max_engines; i++) - put_device(engine_confdev(idxd->engines[i])); + idxd_clean_engines(idxd); err_engine: - for (i = 0; i < idxd->max_wqs; i++) - put_device(wq_confdev(idxd->wqs[i])); + idxd_clean_wqs(idxd); err_wqs: return rc; } @@ -528,6 +587,17 @@ static void idxd_read_caps(struct idxd_device *idxd) idxd->hw.iaa_cap.bits = ioread64(idxd->reg_base + IDXD_IAACAP_OFFSET); } +static void idxd_free(struct idxd_device *idxd) +{ + if (!idxd) + return; + + put_device(idxd_confdev(idxd)); + bitmap_free(idxd->opcap_bmap); + ida_free(&idxd_ida, idxd->id); + kfree(idxd); +} + static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data) { struct device *dev = &pdev->dev; @@ -545,28 +615,34 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d idxd_dev_set_type(&idxd->idxd_dev, idxd->data->type); idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL); if (idxd->id < 0) - return NULL; + goto err_ida; idxd->opcap_bmap = bitmap_zalloc_node(IDXD_MAX_OPCAP_BITS, GFP_KERNEL, dev_to_node(dev)); - if (!idxd->opcap_bmap) { - ida_free(&idxd_ida, idxd->id); - return NULL; - } + if (!idxd->opcap_bmap) + goto err_opcap; device_initialize(conf_dev); conf_dev->parent = dev; conf_dev->bus = &dsa_bus_type; conf_dev->type = idxd->data->dev_type; rc = dev_set_name(conf_dev, "%s%d", idxd->data->name_prefix, idxd->id); - if (rc < 0) { - put_device(conf_dev); - return NULL; - } + if (rc < 0) + goto err_name; spin_lock_init(&idxd->dev_lock); spin_lock_init(&idxd->cmd_lock); return idxd; + +err_name: + put_device(conf_dev); + bitmap_free(idxd->opcap_bmap); +err_opcap: + ida_free(&idxd_ida, idxd->id); +err_ida: + kfree(idxd); + + return NULL; } static int idxd_enable_system_pasid(struct idxd_device *idxd) @@ -1190,7 +1266,7 @@ int idxd_pci_probe_alloc(struct idxd_device *idxd, struct pci_dev *pdev, err: pci_iounmap(pdev, idxd->reg_base); err_iomap: - put_device(idxd_confdev(idxd)); + idxd_free(idxd); err_idxd_alloc: pci_disable_device(pdev); return rc; @@ -1232,7 +1308,6 @@ static void idxd_shutdown(struct pci_dev *pdev) static void idxd_remove(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); - struct idxd_irq_entry *irq_entry; idxd_unregister_devices(idxd); /* @@ -1245,20 +1320,12 @@ static void idxd_remove(struct pci_dev *pdev) get_device(idxd_confdev(idxd)); device_unregister(idxd_confdev(idxd)); idxd_shutdown(pdev); - if (device_pasid_enabled(idxd)) - idxd_disable_system_pasid(idxd); idxd_device_remove_debugfs(idxd); - - irq_entry = idxd_get_ie(idxd, 0); - free_irq(irq_entry->vector, irq_entry); - pci_free_irq_vectors(pdev); + idxd_cleanup(idxd); pci_iounmap(pdev, idxd->reg_base); - if (device_user_pasid_enabled(idxd)) - idxd_disable_sva(pdev); - pci_disable_device(pdev); - destroy_workqueue(idxd->wq); - perfmon_pmu_remove(idxd); put_device(idxd_confdev(idxd)); + idxd_free(idxd); + pci_disable_device(pdev); } static struct pci_driver idxd_pci_driver = { diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c index d5ddb4e30e71..47c8adfdc155 100644 --- a/drivers/dma/mediatek/mtk-cqdma.c +++ b/drivers/dma/mediatek/mtk-cqdma.c @@ -420,15 +420,11 @@ static struct virt_dma_desc *mtk_cqdma_find_active_desc(struct dma_chan *c, { struct mtk_cqdma_vchan *cvc = to_cqdma_vchan(c); struct virt_dma_desc *vd; - unsigned long flags; - spin_lock_irqsave(&cvc->pc->lock, flags); list_for_each_entry(vd, &cvc->pc->queue, node) if (vd->tx.cookie == cookie) { - spin_unlock_irqrestore(&cvc->pc->lock, flags); return vd; } - spin_unlock_irqrestore(&cvc->pc->lock, flags); list_for_each_entry(vd, &cvc->vc.desc_issued, node) if (vd->tx.cookie == cookie) @@ -452,9 +448,11 @@ static enum dma_status mtk_cqdma_tx_status(struct dma_chan *c, if (ret == DMA_COMPLETE || !txstate) return ret; + spin_lock_irqsave(&cvc->pc->lock, flags); spin_lock_irqsave(&cvc->vc.lock, flags); vd = mtk_cqdma_find_active_desc(c, cookie); spin_unlock_irqrestore(&cvc->vc.lock, flags); + spin_unlock_irqrestore(&cvc->pc->lock, flags); if (vd) { cvd = to_cqdma_vdesc(vd); diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index b223a7aacb0c..b6255c0601bb 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -1091,8 +1091,11 @@ static void udma_check_tx_completion(struct work_struct *work) u32 residue_diff; ktime_t time_diff; unsigned long delay; + unsigned long flags; while (1) { + spin_lock_irqsave(&uc->vc.lock, flags); + if (uc->desc) { /* Get previous residue and time stamp */ residue_diff = uc->tx_drain.residue; @@ -1127,6 +1130,8 @@ static void udma_check_tx_completion(struct work_struct *work) break; } + spin_unlock_irqrestore(&uc->vc.lock, flags); + usleep_range(ktime_to_us(delay), ktime_to_us(delay) + 10); continue; @@ -1143,6 +1148,8 @@ static void udma_check_tx_completion(struct work_struct *work) break; } + + spin_unlock_irqrestore(&uc->vc.lock, flags); } static irqreturn_t udma_ring_irq_handler(int irq, void *data) @@ -4246,7 +4253,6 @@ static struct dma_chan *udma_of_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma) { struct udma_dev *ud = ofdma->of_dma_data; - dma_cap_mask_t mask = ud->ddev.cap_mask; struct udma_filter_param filter_param; struct dma_chan *chan; @@ -4278,7 +4284,7 @@ static struct dma_chan *udma_of_xlate(struct of_phandle_args *dma_spec, } } - chan = __dma_request_channel(&mask, udma_dma_filter_fn, &filter_param, + chan = __dma_request_channel(&ud->ddev.cap_mask, udma_dma_filter_fn, &filter_param, ofdma->of_node); if (!chan) { dev_err(ud->dev, "get channel fail in %s.\n", __func__); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index cfdf558b48b6..02138aa55793 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -109,7 +109,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; int r; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index e74e26b6a4f2..fec9a007533a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -752,6 +752,18 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gmc.vram_type = vram_type; adev->gmc.vram_vendor = vram_vendor; + /* The mall_size is already calculated as mall_size_per_umc * num_umc. + * However, for gfx1151, which features a 2-to-1 UMC mapping, + * the result must be multiplied by 2 to determine the actual mall size. + */ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 5, 1): + adev->gmc.mall_size *= 2; + break; + default: + break; + } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index a1171e6152ed..f11df9c2ec13 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1023,6 +1023,10 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | VCN_RB1_DB_CTRL__EN_MASK); + /* Keeping one read-back to ensure all register writes are done, otherwise + * it may introduce race conditions */ + RREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL); + return 0; } @@ -1205,6 +1209,10 @@ static int vcn_v4_0_5_start(struct amdgpu_vcn_inst *vinst) WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + /* Keeping one read-back to ensure all register writes are done, otherwise + * it may introduce race conditions */ + RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + return 0; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 64df8ca448b3..cc01b9c68b47 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -372,6 +372,8 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev, static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state, struct dm_crtc_state *new_state) { + if (new_state->stream->adjust.timing_adjust_pending) + return true; if (new_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED) return true; else if (amdgpu_dm_crtc_vrr_active(old_state) != amdgpu_dm_crtc_vrr_active(new_state)) @@ -12763,7 +12765,8 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( /* The reply is stored in the top nibble of the command. */ payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; - if (!payload->write && p_notify->aux_reply.length) + /*write req may receive a byte indicating partially written number as well*/ + if (p_notify->aux_reply.length) memcpy(payload->data, p_notify->aux_reply.data, p_notify->aux_reply.length); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 074b79fd5822..5cdbc86ef8f5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -62,6 +62,7 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, enum aux_return_code_type operation_result; struct amdgpu_device *adev; struct ddc_service *ddc; + uint8_t copy[16]; if (WARN_ON(msg->size > 16)) return -E2BIG; @@ -77,6 +78,11 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, (msg->request & DP_AUX_I2C_WRITE_STATUS_UPDATE) != 0; payload.defer_delay = 0; + if (payload.write) { + memcpy(copy, msg->buffer, msg->size); + payload.data = copy; + } + result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload, &operation_result); @@ -100,9 +106,9 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, */ if (payload.write && result >= 0) { if (result) { - /*one byte indicating partially written bytes. Force 0 to retry*/ - drm_info(adev_to_drm(adev), "amdgpu: AUX partially written\n"); - result = 0; + /*one byte indicating partially written bytes*/ + drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX partially written\n"); + result = payload.data[0]; } else if (!payload.reply[0]) /*I2C_ACK|AUX_ACK*/ result = msg->size; @@ -127,11 +133,11 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, break; } - drm_info(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result); + drm_dbg_dp(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result); } if (payload.reply[0]) - drm_info(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.", + drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.", payload.reply[0]); return result; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 28d1353f403d..ba4ce8a63158 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -439,9 +439,12 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, * Don't adjust DRR while there's bandwidth optimizations pending to * avoid conflicting with firmware updates. */ - if (dc->ctx->dce_version > DCE_VERSION_MAX) - if (dc->optimized_required || dc->wm_optimized_required) + if (dc->ctx->dce_version > DCE_VERSION_MAX) { + if (dc->optimized_required || dc->wm_optimized_required) { + stream->adjust.timing_adjust_pending = true; return false; + } + } dc_exit_ips_for_hw_access(dc); @@ -3168,7 +3171,8 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->crtc_timing_adjust) { if (stream->adjust.v_total_min != update->crtc_timing_adjust->v_total_min || - stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max) + stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max || + stream->adjust.timing_adjust_pending) update->crtc_timing_adjust->timing_adjust_pending = true; stream->adjust = *update->crtc_timing_adjust; update->crtc_timing_adjust->timing_adjust_pending = false; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index d9159ca55412..92f0a099d089 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -195,9 +195,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .dcn_downspread_percent = 0.5, .gpuvm_min_page_size_bytes = 4096, .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = 1, + .do_urgent_latency_adjustment = 0, .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, }; void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 0c8ec30ea672..731fbd4bc600 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -910,7 +910,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm } //TODO : Could be possibly moved to a common helper layer. -static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const struct dc_plane_state *plane, unsigned int *plane_id) +static bool dml21_wrapper_get_plane_id(const struct dc_state *context, unsigned int stream_id, const struct dc_plane_state *plane, unsigned int *plane_id) { int i, j; @@ -918,10 +918,12 @@ static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const str return false; for (i = 0; i < context->stream_count; i++) { - for (j = 0; j < context->stream_status[i].plane_count; j++) { - if (context->stream_status[i].plane_states[j] == plane) { - *plane_id = (i << 16) | j; - return true; + if (context->streams[i]->stream_id == stream_id) { + for (j = 0; j < context->stream_status[i].plane_count; j++) { + if (context->stream_status[i].plane_states[j] == plane) { + *plane_id = (i << 16) | j; + return true; + } } } } @@ -944,14 +946,14 @@ static unsigned int map_stream_to_dml21_display_cfg(const struct dml2_context *d return location; } -static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, +static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, const struct dc_plane_state *plane, const struct dc_state *context) { unsigned int plane_id; int i = 0; int location = -1; - if (!dml21_wrapper_get_plane_id(context, plane, &plane_id)) { + if (!dml21_wrapper_get_plane_id(context, stream_id, plane, &plane_id)) { ASSERT(false); return -1; } @@ -1037,7 +1039,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; } else { for (plane_index = 0; plane_index < context->stream_status[stream_index].plane_count; plane_index++) { - disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->stream_status[stream_index].plane_states[plane_index], context); + disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], context); if (disp_cfg_plane_location < 0) disp_cfg_plane_location = dml_dispcfg->num_planes++; @@ -1048,7 +1050,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index); dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; - if (dml21_wrapper_get_plane_id(context, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) + if (dml21_wrapper_get_plane_id(context, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; /* apply forced pstate policy */ diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index 1236e0f9a256..712aff7e17f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -120,10 +120,11 @@ void dpp401_set_cursor_attributes( enum dc_cursor_color_format color_format = cursor_attributes->color_format; int cur_rom_en = 0; - // DCN4 should always do Cursor degamma for Cursor Color modes if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA || color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { - cur_rom_en = 1; + if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) { + cur_rom_en = 1; + } } REG_UPDATE_3(CURSOR0_CONTROL, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 5489f3d431f6..3af6a3402b89 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1980,9 +1980,9 @@ void dcn401_program_pipe( dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size); } - if (pipe_ctx->update_flags.raw || - (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) || - pipe_ctx->stream->update_flags.raw) + if (pipe_ctx->plane_state && (pipe_ctx->update_flags.raw || + pipe_ctx->plane_state->update_flags.raw || + pipe_ctx->stream->update_flags.raw)) dc->hwss.update_dchubp_dpp(dc, pipe_ctx, context); if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 268626e73c54..53c961f86d43 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -148,6 +148,7 @@ void link_blank_dp_stream(struct dc_link *link, bool hw_init) void link_set_all_streams_dpms_off_for_link(struct dc_link *link) { struct pipe_ctx *pipes[MAX_PIPES]; + struct dc_stream_state *streams[MAX_PIPES]; struct dc_state *state = link->dc->current_state; uint8_t count; int i; @@ -160,10 +161,18 @@ void link_set_all_streams_dpms_off_for_link(struct dc_link *link) link_get_master_pipes_with_dpms_on(link, state, &count, pipes); + /* The subsequent call to dc_commit_updates_for_stream for a full update + * will release the current state and swap to a new state. Releasing the + * current state results in the stream pointers in the pipe_ctx structs + * to be zero'd. Hence, cache all streams prior to dc_commit_updates_for_stream. + */ + for (i = 0; i < count; i++) + streams[i] = pipes[i]->stream; + for (i = 0; i < count; i++) { - stream_update.stream = pipes[i]->stream; + stream_update.stream = streams[i]; dc_commit_updates_for_stream(link->ctx->dc, NULL, 0, - pipes[i]->stream, &stream_update, + streams[i], &stream_update, state); } diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index de424e670995..4b2f32889f00 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1118,6 +1118,10 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, lockdep_assert_held(&gpusvm->notifier_lock); if (range->flags.has_dma_mapping) { + struct drm_gpusvm_range_flags flags = { + .__flags = range->flags.__flags, + }; + for (i = 0, j = 0; i < npages; j++) { struct drm_pagemap_device_addr *addr = &range->dma_addr[j]; @@ -1131,8 +1135,12 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, dev, *addr); i += 1 << addr->order; } - range->flags.has_devmem_pages = false; - range->flags.has_dma_mapping = false; + + /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ + flags.has_devmem_pages = false; + flags.has_dma_mapping = false; + WRITE_ONCE(range->flags.__flags, flags.__flags); + range->dpagemap = NULL; } } @@ -1334,6 +1342,7 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, int err = 0; struct dev_pagemap *pagemap; struct drm_pagemap *dpagemap; + struct drm_gpusvm_range_flags flags; retry: hmm_range.notifier_seq = mmu_interval_read_begin(notifier); @@ -1378,7 +1387,8 @@ map_pages: */ drm_gpusvm_notifier_lock(gpusvm); - if (range->flags.unmapped) { + flags.__flags = range->flags.__flags; + if (flags.unmapped) { drm_gpusvm_notifier_unlock(gpusvm); err = -EFAULT; goto err_free; @@ -1454,6 +1464,11 @@ map_pages: goto err_unmap; } + if (ctx->devmem_only) { + err = -EFAULT; + goto err_unmap; + } + addr = dma_map_page(gpusvm->drm->dev, page, 0, PAGE_SIZE << order, @@ -1469,14 +1484,17 @@ map_pages: } i += 1 << order; num_dma_mapped = i; - range->flags.has_dma_mapping = true; + flags.has_dma_mapping = true; } if (zdd) { - range->flags.has_devmem_pages = true; + flags.has_devmem_pages = true; range->dpagemap = dpagemap; } + /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ + WRITE_ONCE(range->flags.__flags, flags.__flags); + drm_gpusvm_notifier_unlock(gpusvm); kvfree(pfns); set_seqno: @@ -1765,6 +1783,8 @@ int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm, goto err_finalize; /* Upon success bind devmem allocation to range and zdd */ + devmem_allocation->timeslice_expiration = get_jiffies_64() + + msecs_to_jiffies(ctx->timeslice_ms); zdd->devmem_allocation = devmem_allocation; /* Owns ref */ err_finalize: @@ -1985,6 +2005,13 @@ static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas, void *buf; int i, err = 0; + if (page) { + zdd = page->zone_device_data; + if (time_before64(get_jiffies_64(), + zdd->devmem_allocation->timeslice_expiration)) + return 0; + } + start = ALIGN_DOWN(fault_addr, size); end = ALIGN(fault_addr + 1, size); diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 7752d8ac85f0..c08fa93e50a3 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -75,7 +75,7 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, unsigned long long venc_freq; unsigned long long hdmi_freq; - vclk_freq = mode->clock * 1000; + vclk_freq = mode->clock * 1000ULL; /* For 420, pixel clock is half unlike venc clock */ if (encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24) @@ -123,7 +123,7 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge); struct meson_drm *priv = encoder_hdmi->priv; bool is_hdmi2_sink = display_info->hdmi.scdc.supported; - unsigned long long clock = mode->clock * 1000; + unsigned long long clock = mode->clock * 1000ULL; unsigned long long phy_freq; unsigned long long vclk_freq; unsigned long long venc_freq; diff --git a/drivers/gpu/drm/tiny/panel-mipi-dbi.c b/drivers/gpu/drm/tiny/panel-mipi-dbi.c index 0460ecaef4bd..23914a9f7fd3 100644 --- a/drivers/gpu/drm/tiny/panel-mipi-dbi.c +++ b/drivers/gpu/drm/tiny/panel-mipi-dbi.c @@ -390,7 +390,10 @@ static int panel_mipi_dbi_spi_probe(struct spi_device *spi) spi_set_drvdata(spi, drm); - drm_client_setup(drm, NULL); + if (bpp == 16) + drm_client_setup_with_fourcc(drm, DRM_FORMAT_RGB565); + else + drm_client_setup_with_fourcc(drm, DRM_FORMAT_RGB888); return 0; } diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index 167fb0f742de..5a47991b4b81 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -47,6 +47,10 @@ #define MI_LRI_FORCE_POSTED REG_BIT(12) #define MI_LRI_LEN(x) (((x) & 0xff) + 1) +#define MI_STORE_REGISTER_MEM (__MI_INSTR(0x24) | XE_INSTR_NUM_DW(4)) +#define MI_SRM_USE_GGTT REG_BIT(22) +#define MI_SRM_ADD_CS_OFFSET REG_BIT(19) + #define MI_FLUSH_DW __MI_INSTR(0x26) #define MI_FLUSH_DW_PROTECTED_MEM_EN REG_BIT(22) #define MI_FLUSH_DW_STORE_INDEX REG_BIT(21) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index fb8ec317b6ee..891f928d80ce 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -43,6 +43,10 @@ #define XEHPC_BCS8_RING_BASE 0x3ee000 #define GSCCS_RING_BASE 0x11a000 +#define ENGINE_ID(base) XE_REG((base) + 0x8c) +#define ENGINE_INSTANCE_ID REG_GENMASK(9, 4) +#define ENGINE_CLASS_ID REG_GENMASK(2, 0) + #define RING_TAIL(base) XE_REG((base) + 0x30) #define TAIL_ADDR REG_GENMASK(20, 3) @@ -154,6 +158,7 @@ #define STOP_RING REG_BIT(8) #define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8) +#define RING_CTX_TIMESTAMP_UDW(base) XE_REG((base) + 0x3ac) #define CSBE_DEBUG_STATUS(base) XE_REG((base) + 0x3fc) #define RING_FORCE_TO_NONPRIV(base, i) XE_REG(((base) + 0x4d0) + (i) * 4) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index da1f198ac107..181913967ac9 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -157,6 +157,7 @@ #define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108) #define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) +#define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) #define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 57944f90bbf6..994af591a2e8 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -11,7 +11,9 @@ #define CTX_RING_TAIL (0x06 + 1) #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) +#define CTX_BB_PER_CTX_PTR (0x12 + 1) #define CTX_TIMESTAMP (0x22 + 1) +#define CTX_TIMESTAMP_UDW (0x24 + 1) #define CTX_INDIRECT_RING_STATE (0x26 + 1) #define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_LDW (0x32 + 1) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 9f8667ebba85..0482f26aa480 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -330,6 +330,8 @@ struct xe_device { u8 has_sriov:1; /** @info.has_usm: Device has unified shared memory support */ u8 has_usm:1; + /** @info.has_64bit_timestamp: Device supports 64-bit timestamps */ + u8 has_64bit_timestamp:1; /** @info.is_dgfx: is discrete device */ u8 is_dgfx:1; /** diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 606922d9dd73..cd9b1c32f30f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -830,7 +830,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) { struct xe_device *xe = gt_to_xe(q->gt); struct xe_lrc *lrc; - u32 old_ts, new_ts; + u64 old_ts, new_ts; int idx; /* diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 31bc2022bfc2..769781d577df 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -941,7 +941,7 @@ static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) return xe_sched_invalidate_job(job, 2); } - ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); + ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0])); ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); /* diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index df3ceddede07..03bfba696b37 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -24,6 +24,7 @@ #include "xe_hw_fence.h" #include "xe_map.h" #include "xe_memirq.h" +#include "xe_mmio.h" #include "xe_sriov.h" #include "xe_trace_lrc.h" #include "xe_vm.h" @@ -650,6 +651,7 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) #define LRC_PARALLEL_PPHWSP_OFFSET 2048 +#define LRC_ENGINE_ID_PPHWSP_OFFSET 2096 #define LRC_PPHWSP_SIZE SZ_4K u32 xe_lrc_regs_offset(struct xe_lrc *lrc) @@ -684,7 +686,7 @@ static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) { - /* The start seqno is stored in the driver-defined portion of PPHWSP */ + /* This is stored in the driver-defined portion of PPHWSP */ return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; } @@ -694,11 +696,21 @@ static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; } +static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc) +{ + return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET; +} + static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) { return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); } +static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc) +{ + return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32); +} + static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) { /* Indirect ring state page is at the very end of LRC */ @@ -726,8 +738,10 @@ DECL_MAP_ADDR_HELPERS(regs) DECL_MAP_ADDR_HELPERS(start_seqno) DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) DECL_MAP_ADDR_HELPERS(ctx_timestamp) +DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw) DECL_MAP_ADDR_HELPERS(parallel) DECL_MAP_ADDR_HELPERS(indirect_ring) +DECL_MAP_ADDR_HELPERS(engine_id) #undef DECL_MAP_ADDR_HELPERS @@ -743,18 +757,37 @@ u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) } /** + * xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address + * @lrc: Pointer to the lrc. + * + * Returns: ctx timestamp udw GGTT address + */ +u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc); +} + +/** * xe_lrc_ctx_timestamp() - Read ctx timestamp value * @lrc: Pointer to the lrc. * * Returns: ctx timestamp value */ -u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) +u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) { struct xe_device *xe = lrc_to_xe(lrc); struct iosys_map map; + u32 ldw, udw = 0; map = __xe_lrc_ctx_timestamp_map(lrc); - return xe_map_read32(xe, &map); + ldw = xe_map_read32(xe, &map); + + if (xe->info.has_64bit_timestamp) { + map = __xe_lrc_ctx_timestamp_udw_map(lrc); + udw = xe_map_read32(xe, &map); + } + + return (u64)udw << 32 | ldw; } /** @@ -864,7 +897,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe) static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) { - u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); + u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt)); xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); @@ -877,6 +910,65 @@ static void xe_lrc_finish(struct xe_lrc *lrc) xe_bo_unpin(lrc->bo); xe_bo_unlock(lrc->bo); xe_bo_put(lrc->bo); + xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo); +} + +/* + * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active + * context run ticks. + * @lrc: Pointer to the lrc. + * + * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the + * context, but only gets updated when the context switches out. In order to + * check how long a context has been active before it switches out, two things + * are required: + * + * (1) Determine if the context is running: + * To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in + * the LRC. The value chosen is 1 since 0 is the initial value when the LRC is + * initialized. During a query, we just check for this value to determine if the + * context is active. If the context switched out, it would overwrite this + * location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as + * the last part of context restore, so reusing this LRC location will not + * clobber anything. + * + * (2) Calculate the time that the context has been active for: + * The CTX_TIMESTAMP ticks only when the context is active. If a context is + * active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization. + * While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific + * engine instance. Since we do not know which instance the context is running + * on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and + * store it in the PPHSWP. + */ +#define CONTEXT_ACTIVE 1ULL +static void xe_lrc_setup_utilization(struct xe_lrc *lrc) +{ + u32 *cmd; + + cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + + *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; + *cmd++ = ENGINE_ID(0).addr; + *cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc); + *cmd++ = 0; + + *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); + *cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc); + *cmd++ = 0; + *cmd++ = lower_32_bits(CONTEXT_ACTIVE); + + if (lrc_to_xe(lrc)->info.has_64bit_timestamp) { + *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); + *cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc); + *cmd++ = 0; + *cmd++ = upper_32_bits(CONTEXT_ACTIVE); + } + + *cmd++ = MI_BATCH_BUFFER_END; + + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, + xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); + } #define PVC_CTX_ASID (0x2e + 1) @@ -893,31 +985,40 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, void *init_data = NULL; u32 arb_enable; u32 lrc_size; + u32 bo_flags; int err; kref_init(&lrc->refcount); + lrc->gt = gt; lrc->flags = 0; lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); if (xe_gt_has_indirect_ring_state(gt)) lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; + bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE; + /* * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address * via VM bind calls. */ lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + bo_flags); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); + lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, + ttm_bo_type_kernel, + bo_flags); + if (IS_ERR(lrc->bb_per_ctx_bo)) { + err = PTR_ERR(lrc->bb_per_ctx_bo); + goto err_lrc_finish; + } + lrc->size = lrc_size; - lrc->tile = gt_to_tile(hwe->gt); lrc->ring.size = ring_size; lrc->ring.tail = 0; - lrc->ctx_timestamp = 0; xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, hwe->fence_irq, hwe->name); @@ -990,7 +1091,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) | _MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE)); + lrc->ctx_timestamp = 0; xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); + if (lrc_to_xe(lrc)->info.has_64bit_timestamp) + xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0); if (xe->info.has_asid && vm) xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); @@ -1019,6 +1123,8 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, map = __xe_lrc_start_seqno_map(lrc); xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); + xe_lrc_setup_utilization(lrc); + return 0; err_lrc_finish: @@ -1238,6 +1344,21 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) return __xe_lrc_parallel_map(lrc); } +/** + * xe_lrc_engine_id() - Read engine id value + * @lrc: Pointer to the lrc. + * + * Returns: context id value + */ +static u32 xe_lrc_engine_id(struct xe_lrc *lrc) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_engine_id_map(lrc); + return xe_map_read32(xe, &map); +} + static int instr_dw(u32 cmd_header) { /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ @@ -1684,7 +1805,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; snapshot->lrc_snapshot = NULL; - snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); + snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); return snapshot; } @@ -1784,22 +1905,74 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) kfree(snapshot); } +static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts) +{ + u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id); + u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id); + struct xe_hw_engine *hwe; + u64 val; + + hwe = xe_gt_hw_engine(lrc->gt, class, instance, false); + if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe), + "Unexpected engine class:instance %d:%d for context utilization\n", + class, instance)) + return -1; + + if (lrc_to_xe(lrc)->info.has_64bit_timestamp) + val = xe_mmio_read64_2x32(&hwe->gt->mmio, + RING_CTX_TIMESTAMP(hwe->mmio_base)); + else + val = xe_mmio_read32(&hwe->gt->mmio, + RING_CTX_TIMESTAMP(hwe->mmio_base)); + + *reg_ctx_ts = val; + + return 0; +} + /** * xe_lrc_update_timestamp() - Update ctx timestamp * @lrc: Pointer to the lrc. * @old_ts: Old timestamp value * * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and - * update saved value. + * update saved value. With support for active contexts, the calculation may be + * slightly racy, so follow a read-again logic to ensure that the context is + * still active before returning the right timestamp. * * Returns: New ctx timestamp value */ -u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) +u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts) { + u64 lrc_ts, reg_ts; + u32 engine_id; + *old_ts = lrc->ctx_timestamp; - lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); + lrc_ts = xe_lrc_ctx_timestamp(lrc); + /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */ + if (IS_SRIOV_VF(lrc_to_xe(lrc))) { + lrc->ctx_timestamp = lrc_ts; + goto done; + } + + if (lrc_ts == CONTEXT_ACTIVE) { + engine_id = xe_lrc_engine_id(lrc); + if (!get_ctx_timestamp(lrc, engine_id, ®_ts)) + lrc->ctx_timestamp = reg_ts; + + /* read lrc again to ensure context is still active */ + lrc_ts = xe_lrc_ctx_timestamp(lrc); + } + + /* + * If context switched out, just use the lrc_ts. Note that this needs to + * be a separate if condition. + */ + if (lrc_ts != CONTEXT_ACTIVE) + lrc->ctx_timestamp = lrc_ts; +done: trace_xe_lrc_update_timestamp(lrc, *old_ts); return lrc->ctx_timestamp; diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 0b40f349ab95..eb6e8de8c939 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -120,7 +120,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot); u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc); -u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); +u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc); +u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); @@ -136,6 +137,6 @@ u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); * * Returns the current LRC timestamp */ -u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts); +u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts); #endif diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 71ecb453f811..ae24cf6f8dd9 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -25,8 +25,8 @@ struct xe_lrc { /** @size: size of lrc including any indirect ring state page */ u32 size; - /** @tile: tile which this LRC belongs to */ - struct xe_tile *tile; + /** @gt: gt which this LRC belongs to */ + struct xe_gt *gt; /** @flags: LRC flags */ #define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 @@ -52,7 +52,10 @@ struct xe_lrc { struct xe_hw_fence_ctx fence_ctx; /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */ - u32 ctx_timestamp; + u64 ctx_timestamp; + + /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */ + struct xe_bo *bb_per_ctx_bo; }; struct xe_lrc_snapshot; diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 9f4632e39a1a..e861c694f336 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -29,9 +29,6 @@ struct xe_modparam xe_modparam = { module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600); MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2"); -module_param_named(always_migrate_to_vram, xe_modparam.always_migrate_to_vram, bool, 0444); -MODULE_PARM_DESC(always_migrate_to_vram, "Always migrate to VRAM on GPU fault"); - module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 84339e509c80..5a3bfea8b7b4 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -12,7 +12,6 @@ struct xe_modparam { bool force_execlist; bool probe_display; - bool always_migrate_to_vram; u32 force_vram_bar_size; int guc_log_level; char *guc_firmware_path; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 818f023166d5..f4d108dc49b1 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -140,6 +140,7 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_indirect_ring_state = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 1, \ + .has_64bit_timestamp = 1, \ .va_bits = 48, \ .vm_max_level = 4, \ .hw_engine_mask = \ @@ -668,6 +669,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; xe->info.has_usm = graphics_desc->has_usm; + xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp; for_each_remote_tile(tile, xe, id) { int err; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index e9b9bbc138d3..ca6b10d35573 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -21,6 +21,7 @@ struct xe_graphics_desc { u8 has_indirect_ring_state:1; u8 has_range_tlb_invalidation:1; u8 has_usm:1; + u8 has_64bit_timestamp:1; }; struct xe_media_desc { diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index ffaf0d02dc7d..856038553b81 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -2232,11 +2232,19 @@ static void op_commit(struct xe_vm *vm, } case DRM_GPUVA_OP_DRIVER: { + /* WRITE_ONCE pairs with READ_ONCE in xe_svm.c */ + if (op->subop == XE_VMA_SUBOP_MAP_RANGE) { - op->map_range.range->tile_present |= BIT(tile->id); - op->map_range.range->tile_invalidated &= ~BIT(tile->id); + WRITE_ONCE(op->map_range.range->tile_present, + op->map_range.range->tile_present | + BIT(tile->id)); + WRITE_ONCE(op->map_range.range->tile_invalidated, + op->map_range.range->tile_invalidated & + ~BIT(tile->id)); } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) { - op->unmap_range.range->tile_present &= ~BIT(tile->id); + WRITE_ONCE(op->unmap_range.range->tile_present, + op->unmap_range.range->tile_present & + ~BIT(tile->id)); } break; } diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index a7582b097ae6..bc1689db4cd7 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -234,13 +234,10 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job) static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) { - dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT | - MI_COPY_MEM_MEM_DST_GGTT; + dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; + dw[i++] = RING_CTX_TIMESTAMP(0).addr; dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); dw[i++] = 0; - dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc); - dw[i++] = 0; - dw[i++] = MI_NOOP; return i; } diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c index 8184390f9c7b..86d47aaf0358 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.c +++ b/drivers/gpu/drm/xe/xe_shrinker.c @@ -227,7 +227,7 @@ struct xe_shrinker *xe_shrinker_create(struct xe_device *xe) if (!shrinker) return ERR_PTR(-ENOMEM); - shrinker->shrink = shrinker_alloc(0, "xe system shrinker"); + shrinker->shrink = shrinker_alloc(0, "drm-xe_gem:%s", xe->drm.unique); if (!shrinker->shrink) { kfree(shrinker); return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 24c578e1170e..975094c1a582 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -15,8 +15,17 @@ static bool xe_svm_range_in_vram(struct xe_svm_range *range) { - /* Not reliable without notifier lock */ - return range->base.flags.has_devmem_pages; + /* + * Advisory only check whether the range is currently backed by VRAM + * memory. + */ + + struct drm_gpusvm_range_flags flags = { + /* Pairs with WRITE_ONCE in drm_gpusvm.c */ + .__flags = READ_ONCE(range->base.flags.__flags), + }; + + return flags.has_devmem_pages; } static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range) @@ -645,9 +654,16 @@ void xe_svm_fini(struct xe_vm *vm) } static bool xe_svm_range_is_valid(struct xe_svm_range *range, - struct xe_tile *tile) + struct xe_tile *tile, + bool devmem_only) { - return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id); + /* + * Advisory only check whether the range currently has a valid mapping, + * READ_ONCE pairs with WRITE_ONCE in xe_pt.c + */ + return ((READ_ONCE(range->tile_present) & + ~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) && + (!devmem_only || xe_svm_range_in_vram(range)); } static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) @@ -712,6 +728,36 @@ unlock: return err; } +static bool supports_4K_migration(struct xe_device *xe) +{ + if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) + return false; + + return true; +} + +static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, + struct xe_vma *vma) +{ + struct xe_vm *vm = range_to_vm(&range->base); + u64 range_size = xe_svm_range_size(range); + + if (!range->base.flags.migrate_devmem) + return false; + + if (xe_svm_range_in_vram(range)) { + drm_dbg(&vm->xe->drm, "Range is already in VRAM\n"); + return false; + } + + if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) { + drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); + return false; + } + + return true; +} + /** * xe_svm_handle_pagefault() - SVM handle page fault * @vm: The VM. @@ -735,11 +781,16 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), .check_pages_threshold = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, + .devmem_only = atomic && IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), + .timeslice_ms = atomic && IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0, }; struct xe_svm_range *range; struct drm_gpusvm_range *r; struct drm_exec exec; struct dma_fence *fence; + int migrate_try_count = ctx.devmem_only ? 3 : 1; ktime_t end = 0; int err; @@ -758,24 +809,31 @@ retry: if (IS_ERR(r)) return PTR_ERR(r); + if (ctx.devmem_only && !r->flags.migrate_devmem) + return -EACCES; + range = to_xe_range(r); - if (xe_svm_range_is_valid(range, tile)) + if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) return 0; range_debug(range, "PAGE FAULT"); - /* XXX: Add migration policy, for now migrate range once */ - if (!range->skip_migrate && range->base.flags.migrate_devmem && - xe_svm_range_size(range) >= SZ_64K) { - range->skip_migrate = true; - + if (--migrate_try_count >= 0 && + xe_svm_range_needs_migrate_to_vram(range, vma)) { err = xe_svm_alloc_vram(vm, tile, range, &ctx); + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (err) { - drm_dbg(&vm->xe->drm, - "VRAM allocation failed, falling back to " - "retrying fault, asid=%u, errno=%pe\n", - vm->usm.asid, ERR_PTR(err)); - goto retry; + if (migrate_try_count || !ctx.devmem_only) { + drm_dbg(&vm->xe->drm, + "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n", + vm->usm.asid, ERR_PTR(err)); + goto retry; + } else { + drm_err(&vm->xe->drm, + "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n", + vm->usm.asid, ERR_PTR(err)); + return err; + } } } @@ -783,15 +841,23 @@ retry: err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx); /* Corner where CPU mappings have changed */ if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { - if (err == -EOPNOTSUPP) { - range_debug(range, "PAGE FAULT - EVICT PAGES"); - drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ + if (migrate_try_count > 0 || !ctx.devmem_only) { + if (err == -EOPNOTSUPP) { + range_debug(range, "PAGE FAULT - EVICT PAGES"); + drm_gpusvm_range_evict(&vm->svm.gpusvm, + &range->base); + } + drm_dbg(&vm->xe->drm, + "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); + range_debug(range, "PAGE FAULT - RETRY PAGES"); + goto retry; + } else { + drm_err(&vm->xe->drm, + "Get pages failed, retry count exceeded, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); } - drm_dbg(&vm->xe->drm, - "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", - vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); - range_debug(range, "PAGE FAULT - RETRY PAGES"); - goto retry; } if (err) { range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT"); @@ -815,6 +881,7 @@ retry_bind: drm_exec_fini(&exec); err = PTR_ERR(fence); if (err == -EAGAIN) { + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ range_debug(range, "PAGE FAULT - RETRY BIND"); goto retry; } @@ -825,9 +892,6 @@ retry_bind: } drm_exec_fini(&exec); - if (xe_modparam.always_migrate_to_vram) - range->skip_migrate = false; - dma_fence_wait(fence, false); dma_fence_put(fence); diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index be306fe7aaa4..fe58ac2f4baa 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -36,11 +36,6 @@ struct xe_svm_range { * range. Protected by GPU SVM notifier lock. */ u8 tile_invalidated; - /** - * @skip_migrate: Skip migration to VRAM, protected by GPU fault handler - * locking. - */ - u8 skip_migrate :1; }; #if IS_ENABLED(CONFIG_DRM_GPUSVM) diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.h b/drivers/gpu/drm/xe/xe_trace_lrc.h index 5c669a0b2180..d525cbee1e34 100644 --- a/drivers/gpu/drm/xe/xe_trace_lrc.h +++ b/drivers/gpu/drm/xe/xe_trace_lrc.h @@ -19,12 +19,12 @@ #define __dev_name_lrc(lrc) dev_name(gt_to_xe((lrc)->fence_ctx.gt)->drm.dev) TRACE_EVENT(xe_lrc_update_timestamp, - TP_PROTO(struct xe_lrc *lrc, uint32_t old), + TP_PROTO(struct xe_lrc *lrc, uint64_t old), TP_ARGS(lrc, old), TP_STRUCT__entry( __field(struct xe_lrc *, lrc) - __field(u32, old) - __field(u32, new) + __field(u64, old) + __field(u64, new) __string(name, lrc->fence_ctx.name) __string(device_id, __dev_name_lrc(lrc)) ), @@ -36,7 +36,7 @@ TRACE_EVENT(xe_lrc_update_timestamp, __assign_str(name); __assign_str(device_id); ), - TP_printk("lrc=:%p lrc->name=%s old=%u new=%u device_id:%s", + TP_printk("lrc=:%p lrc->name=%s old=%llu new=%llu device_id:%s", __entry->lrc, __get_str(name), __entry->old, __entry->new, __get_str(device_id)) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 24f644c0a673..2f833f0d575f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -815,6 +815,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) }, + { XE_RTP_NAME("22021007897"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) + }, /* Xe3_LPG */ { XE_RTP_NAME("14021490052"), diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c index 8e0267c7cc29..f21f9877c040 100644 --- a/drivers/i2c/busses/i2c-designware-pcidrv.c +++ b/drivers/i2c/busses/i2c-designware-pcidrv.c @@ -278,9 +278,11 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev, if ((dev->flags & MODEL_MASK) == MODEL_AMD_NAVI_GPU) { dev->slave = i2c_new_ccgx_ucsi(&dev->adapter, dev->irq, &dgpu_node); - if (IS_ERR(dev->slave)) + if (IS_ERR(dev->slave)) { + i2c_del_adapter(&dev->adapter); return dev_err_probe(device, PTR_ERR(dev->slave), "register UCSI failed\n"); + } } pm_runtime_set_autosuspend_delay(device, 1000); diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index dc98c39d2b20..cc6a6c1585d2 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -252,7 +252,7 @@ static void __init gicv2m_teardown(void) static struct msi_parent_ops gicv2m_msi_parent_ops = { .supported_flags = GICV2M_MSI_FLAGS_SUPPORTED, .required_flags = GICV2M_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, .prefix = "GICv2m-", diff --git a/drivers/irqchip/irq-gic-v3-its-msi-parent.c b/drivers/irqchip/irq-gic-v3-its-msi-parent.c index bdb04c808148..c5a7eb1c0419 100644 --- a/drivers/irqchip/irq-gic-v3-its-msi-parent.c +++ b/drivers/irqchip/irq-gic-v3-its-msi-parent.c @@ -203,7 +203,7 @@ static bool its_init_dev_msi_info(struct device *dev, struct irq_domain *domain, const struct msi_parent_ops gic_v3_its_msi_parent_ops = { .supported_flags = ITS_MSI_FLAGS_SUPPORTED, .required_flags = ITS_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, .prefix = "ITS-", diff --git a/drivers/irqchip/irq-gic-v3-mbi.c b/drivers/irqchip/irq-gic-v3-mbi.c index 34e9ca77a8c3..647b18e24e0c 100644 --- a/drivers/irqchip/irq-gic-v3-mbi.c +++ b/drivers/irqchip/irq-gic-v3-mbi.c @@ -197,7 +197,7 @@ static bool mbi_init_dev_msi_info(struct device *dev, struct irq_domain *domain, static const struct msi_parent_ops gic_v3_mbi_msi_parent_ops = { .supported_flags = MBI_MSI_FLAGS_SUPPORTED, .required_flags = MBI_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, .prefix = "MBI-", diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c index d67f93f6d750..60b976286636 100644 --- a/drivers/irqchip/irq-mvebu-gicp.c +++ b/drivers/irqchip/irq-mvebu-gicp.c @@ -161,7 +161,7 @@ static const struct irq_domain_ops gicp_domain_ops = { static const struct msi_parent_ops gicp_msi_parent_ops = { .supported_flags = GICP_MSI_FLAGS_SUPPORTED, .required_flags = GICP_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_GENERIC_MSI, .bus_select_mask = MATCH_PLATFORM_MSI, .prefix = "GICP-", diff --git a/drivers/irqchip/irq-mvebu-odmi.c b/drivers/irqchip/irq-mvebu-odmi.c index 28f7e81df94f..54f6f0811573 100644 --- a/drivers/irqchip/irq-mvebu-odmi.c +++ b/drivers/irqchip/irq-mvebu-odmi.c @@ -157,7 +157,7 @@ static const struct irq_domain_ops odmi_domain_ops = { static const struct msi_parent_ops odmi_msi_parent_ops = { .supported_flags = ODMI_MSI_FLAGS_SUPPORTED, .required_flags = ODMI_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_GENERIC_MSI, .bus_select_mask = MATCH_PLATFORM_MSI, .prefix = "ODMI-", diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c index bdf5cd2037f2..62f76950a113 100644 --- a/drivers/irqchip/irq-riscv-imsic-state.c +++ b/drivers/irqchip/irq-riscv-imsic-state.c @@ -208,17 +208,17 @@ skip: } #ifdef CONFIG_SMP -static void __imsic_local_timer_start(struct imsic_local_priv *lpriv) +static void __imsic_local_timer_start(struct imsic_local_priv *lpriv, unsigned int cpu) { lockdep_assert_held(&lpriv->lock); if (!timer_pending(&lpriv->timer)) { lpriv->timer.expires = jiffies + 1; - add_timer_on(&lpriv->timer, smp_processor_id()); + add_timer_on(&lpriv->timer, cpu); } } #else -static inline void __imsic_local_timer_start(struct imsic_local_priv *lpriv) +static inline void __imsic_local_timer_start(struct imsic_local_priv *lpriv, unsigned int cpu) { } #endif @@ -233,7 +233,7 @@ void imsic_local_sync_all(bool force_all) if (force_all) bitmap_fill(lpriv->dirty_bitmap, imsic->global.nr_ids + 1); if (!__imsic_local_sync(lpriv)) - __imsic_local_timer_start(lpriv); + __imsic_local_timer_start(lpriv, smp_processor_id()); raw_spin_unlock_irqrestore(&lpriv->lock, flags); } @@ -278,7 +278,7 @@ static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu return; } - __imsic_local_timer_start(lpriv); + __imsic_local_timer_start(lpriv, cpu); } } #else diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ac53629fce68..6b04473c0ab7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2059,7 +2059,21 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id, if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; else - atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; + atomic_bs = (1 + ns->ctrl->awupf) * bs; + + /* + * Set subsystem atomic bs. + */ + if (ns->ctrl->subsys->atomic_bs) { + if (atomic_bs != ns->ctrl->subsys->atomic_bs) { + dev_err_ratelimited(ns->ctrl->device, + "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n", + ns->disk ? ns->disk->disk_name : "?", + ns->ctrl->subsys->atomic_bs, + atomic_bs); + } + } else + ns->ctrl->subsys->atomic_bs = atomic_bs; nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs); } @@ -2201,6 +2215,17 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, nvme_set_chunk_sectors(ns, id, &lim); if (!nvme_update_disk_info(ns, id, &lim)) capacity = 0; + + /* + * Validate the max atomic write size fits within the subsystem's + * atomic write capabilities. + */ + if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) { + blk_mq_unfreeze_queue(ns->disk->queue, memflags); + ret = -ENXIO; + goto out; + } + nvme_config_discard(ns, &lim); if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && ns->head->ids.csi == NVME_CSI_ZNS) @@ -3031,7 +3056,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) kfree(subsys); return -EINVAL; } - subsys->awupf = le16_to_cpu(id->awupf); nvme_mpath_default_iopolicy(subsys); subsys->dev.class = &nvme_subsys_class; @@ -3441,7 +3465,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) dev_pm_qos_expose_latency_tolerance(ctrl->device); else if (!ctrl->apst_enabled && prev_apst_enabled) dev_pm_qos_hide_latency_tolerance(ctrl->device); - + ctrl->awupf = le16_to_cpu(id->awupf); out_free: kfree(id); return ret; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 250f3da67cc9..cf0ef4745564 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -638,7 +638,8 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) blk_set_stacking_limits(&lim); lim.dma_alignment = 3; - lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL; + lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | + BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES; if (head->ids.csi == NVME_CSI_ZNS) lim.features |= BLK_FEAT_ZONED; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 51e078642127..8fc4683418a3 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -410,6 +410,7 @@ struct nvme_ctrl { enum nvme_ctrl_type cntrltype; enum nvme_dctype dctype; + u16 awupf; /* 0's based value. */ }; static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl) @@ -442,11 +443,11 @@ struct nvme_subsystem { u8 cmic; enum nvme_subsys_type subtype; u16 vendor_id; - u16 awupf; /* 0's based awupf value. */ struct ida ns_ida; #ifdef CONFIG_NVME_MULTIPATH enum nvme_iopolicy iopolicy; #endif + u32 atomic_bs; }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2e30e9be7408..f1dd804151b1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -390,7 +390,7 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db, * as it only leads to a small amount of wasted memory for the lifetime of * the I/O. */ -static int nvme_pci_npages_prp(void) +static __always_inline int nvme_pci_npages_prp(void) { unsigned max_bytes = (NVME_MAX_KB_SZ * 1024) + NVME_CTRL_PAGE_SIZE; unsigned nprps = DIV_ROUND_UP(max_bytes, NVME_CTRL_PAGE_SIZE); @@ -1202,7 +1202,9 @@ static void nvme_poll_irqdisable(struct nvme_queue *nvmeq) WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags)); disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); + spin_lock(&nvmeq->cq_poll_lock); nvme_poll_cq(nvmeq, NULL); + spin_unlock(&nvmeq->cq_poll_lock); enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); } @@ -3737,6 +3739,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0x1e49, 0x0041), /* ZHITAI TiPro7000 NVMe SSD */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, + { PCI_DEVICE(0x025e, 0xf1ac), /* SOLIDIGM P44 pro SSDPFKKW020X7 */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0xc0a9, 0x540a), /* Crucial P2 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1d97, 0x2263), /* Lexar NM610 */ diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index 7fab7f3d79b7..7123c855b5a6 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -62,8 +62,7 @@ static DEFINE_MUTEX(nvmet_pci_epf_ports_mutex); #define NVMET_PCI_EPF_CQ_RETRY_INTERVAL msecs_to_jiffies(1) enum nvmet_pci_epf_queue_flags { - NVMET_PCI_EPF_Q_IS_SQ = 0, /* The queue is a submission queue */ - NVMET_PCI_EPF_Q_LIVE, /* The queue is live */ + NVMET_PCI_EPF_Q_LIVE = 0, /* The queue is live */ NVMET_PCI_EPF_Q_IRQ_ENABLED, /* IRQ is enabled for this queue */ }; @@ -596,9 +595,6 @@ static bool nvmet_pci_epf_should_raise_irq(struct nvmet_pci_epf_ctrl *ctrl, struct nvmet_pci_epf_irq_vector *iv = cq->iv; bool ret; - if (!test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) - return false; - /* IRQ coalescing for the admin queue is not allowed. */ if (!cq->qid) return true; @@ -625,7 +621,8 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl, struct pci_epf *epf = nvme_epf->epf; int ret = 0; - if (!test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags)) + if (!test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) || + !test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) return; mutex_lock(&ctrl->irq_lock); @@ -636,14 +633,16 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl, switch (nvme_epf->irq_type) { case PCI_IRQ_MSIX: case PCI_IRQ_MSI: + /* + * If we fail to raise an MSI or MSI-X interrupt, it is likely + * because the host is using legacy INTX IRQs (e.g. BIOS, + * grub), but we can fallback to the INTX type only if the + * endpoint controller supports this type. + */ ret = pci_epc_raise_irq(epf->epc, epf->func_no, epf->vfunc_no, nvme_epf->irq_type, cq->vector + 1); - if (!ret) + if (!ret || !nvme_epf->epc_features->intx_capable) break; - /* - * If we got an error, it is likely because the host is using - * legacy IRQs (e.g. BIOS, grub). - */ fallthrough; case PCI_IRQ_INTX: ret = pci_epc_raise_irq(epf->epc, epf->func_no, epf->vfunc_no, @@ -656,7 +655,9 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl, } if (ret) - dev_err(ctrl->dev, "Failed to raise IRQ (err=%d)\n", ret); + dev_err_ratelimited(ctrl->dev, + "CQ[%u]: Failed to raise IRQ (err=%d)\n", + cq->qid, ret); unlock: mutex_unlock(&ctrl->irq_lock); @@ -1319,8 +1320,14 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags); - dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n", - cqid, qsize, cq->qes, cq->vector); + if (test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) + dev_dbg(ctrl->dev, + "CQ[%u]: %u entries of %zu B, IRQ vector %u\n", + cqid, qsize, cq->qes, cq->vector); + else + dev_dbg(ctrl->dev, + "CQ[%u]: %u entries of %zu B, IRQ disabled\n", + cqid, qsize, cq->qes); return NVME_SC_SUCCESS; @@ -1344,7 +1351,8 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid) cancel_delayed_work_sync(&cq->work); nvmet_pci_epf_drain_queue(cq); - nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); + if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) + nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map); return NVME_SC_SUCCESS; @@ -1533,7 +1541,6 @@ static void nvmet_pci_epf_init_queue(struct nvmet_pci_epf_ctrl *ctrl, if (sq) { queue = &ctrl->sq[qid]; - set_bit(NVMET_PCI_EPF_Q_IS_SQ, &queue->flags); } else { queue = &ctrl->cq[qid]; INIT_DELAYED_WORK(&queue->work, nvmet_pci_epf_cq_work); diff --git a/drivers/phy/phy-can-transceiver.c b/drivers/phy/phy-can-transceiver.c index 2bec70615449..f59caff4b3d4 100644 --- a/drivers/phy/phy-can-transceiver.c +++ b/drivers/phy/phy-can-transceiver.c @@ -93,6 +93,16 @@ static const struct of_device_id can_transceiver_phy_ids[] = { }; MODULE_DEVICE_TABLE(of, can_transceiver_phy_ids); +/* Temporary wrapper until the multiplexer subsystem supports optional muxes */ +static inline struct mux_state * +devm_mux_state_get_optional(struct device *dev, const char *mux_name) +{ + if (!of_property_present(dev->of_node, "mux-states")) + return NULL; + + return devm_mux_state_get(dev, mux_name); +} + static int can_transceiver_phy_probe(struct platform_device *pdev) { struct phy_provider *phy_provider; @@ -114,13 +124,11 @@ static int can_transceiver_phy_probe(struct platform_device *pdev) match = of_match_node(can_transceiver_phy_ids, pdev->dev.of_node); drvdata = match->data; - mux_state = devm_mux_state_get(dev, NULL); - if (IS_ERR(mux_state)) { - if (PTR_ERR(mux_state) == -EPROBE_DEFER) - return PTR_ERR(mux_state); - } else { - can_transceiver_phy->mux_state = mux_state; - } + mux_state = devm_mux_state_get_optional(dev, NULL); + if (IS_ERR(mux_state)) + return PTR_ERR(mux_state); + + can_transceiver_phy->mux_state = mux_state; phy = devm_phy_create(dev, dev->of_node, &can_transceiver_phy_ops); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 45b3b792696e..b33e2e2b5014 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1754,7 +1754,8 @@ static void qmp_ufs_init_registers(struct qmp_ufs *qmp, const struct qmp_phy_cfg qmp_ufs_init_all(qmp, &cfg->tbls_hs_overlay[i]); } - qmp_ufs_init_all(qmp, &cfg->tbls_hs_b); + if (qmp->mode == PHY_MODE_UFS_HS_B) + qmp_ufs_init_all(qmp, &cfg->tbls_hs_b); } static int qmp_ufs_com_init(struct qmp_ufs *qmp) diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index 775f4f973a6c..9fdf17e0848a 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -9,6 +9,7 @@ * Copyright (C) 2014 Cogent Embedded, Inc. */ +#include <linux/cleanup.h> #include <linux/extcon-provider.h> #include <linux/interrupt.h> #include <linux/io.h> @@ -107,7 +108,6 @@ struct rcar_gen3_phy { struct rcar_gen3_chan *ch; u32 int_enable_bits; bool initialized; - bool otg_initialized; bool powered; }; @@ -119,9 +119,8 @@ struct rcar_gen3_chan { struct regulator *vbus; struct reset_control *rstc; struct work_struct work; - struct mutex lock; /* protects rphys[...].powered */ + spinlock_t lock; /* protects access to hardware and driver data structure. */ enum usb_dr_mode dr_mode; - int irq; u32 obint_enable_bits; bool extcon_host; bool is_otg_channel; @@ -320,16 +319,15 @@ static bool rcar_gen3_is_any_rphy_initialized(struct rcar_gen3_chan *ch) return false; } -static bool rcar_gen3_needs_init_otg(struct rcar_gen3_chan *ch) +static bool rcar_gen3_is_any_otg_rphy_initialized(struct rcar_gen3_chan *ch) { - int i; - - for (i = 0; i < NUM_OF_PHYS; i++) { - if (ch->rphys[i].otg_initialized) - return false; + for (enum rcar_gen3_phy_index i = PHY_INDEX_BOTH_HC; i <= PHY_INDEX_EHCI; + i++) { + if (ch->rphys[i].initialized) + return true; } - return true; + return false; } static bool rcar_gen3_are_all_rphys_power_off(struct rcar_gen3_chan *ch) @@ -351,7 +349,9 @@ static ssize_t role_store(struct device *dev, struct device_attribute *attr, bool is_b_device; enum phy_mode cur_mode, new_mode; - if (!ch->is_otg_channel || !rcar_gen3_is_any_rphy_initialized(ch)) + guard(spinlock_irqsave)(&ch->lock); + + if (!ch->is_otg_channel || !rcar_gen3_is_any_otg_rphy_initialized(ch)) return -EIO; if (sysfs_streq(buf, "host")) @@ -389,7 +389,7 @@ static ssize_t role_show(struct device *dev, struct device_attribute *attr, { struct rcar_gen3_chan *ch = dev_get_drvdata(dev); - if (!ch->is_otg_channel || !rcar_gen3_is_any_rphy_initialized(ch)) + if (!ch->is_otg_channel || !rcar_gen3_is_any_otg_rphy_initialized(ch)) return -EIO; return sprintf(buf, "%s\n", rcar_gen3_is_host(ch) ? "host" : @@ -402,6 +402,9 @@ static void rcar_gen3_init_otg(struct rcar_gen3_chan *ch) void __iomem *usb2_base = ch->base; u32 val; + if (!ch->is_otg_channel || rcar_gen3_is_any_otg_rphy_initialized(ch)) + return; + /* Should not use functions of read-modify-write a register */ val = readl(usb2_base + USB2_LINECTRL1); val = (val & ~USB2_LINECTRL1_DP_RPD) | USB2_LINECTRL1_DPRPD_EN | @@ -415,7 +418,7 @@ static void rcar_gen3_init_otg(struct rcar_gen3_chan *ch) val = readl(usb2_base + USB2_ADPCTRL); writel(val | USB2_ADPCTRL_IDPULLUP, usb2_base + USB2_ADPCTRL); } - msleep(20); + mdelay(20); writel(0xffffffff, usb2_base + USB2_OBINTSTA); writel(ch->obint_enable_bits, usb2_base + USB2_OBINTEN); @@ -427,16 +430,27 @@ static irqreturn_t rcar_gen3_phy_usb2_irq(int irq, void *_ch) { struct rcar_gen3_chan *ch = _ch; void __iomem *usb2_base = ch->base; - u32 status = readl(usb2_base + USB2_OBINTSTA); + struct device *dev = ch->dev; irqreturn_t ret = IRQ_NONE; + u32 status; + + pm_runtime_get_noresume(dev); + + if (pm_runtime_suspended(dev)) + goto rpm_put; - if (status & ch->obint_enable_bits) { - dev_vdbg(ch->dev, "%s: %08x\n", __func__, status); - writel(ch->obint_enable_bits, usb2_base + USB2_OBINTSTA); - rcar_gen3_device_recognition(ch); - ret = IRQ_HANDLED; + scoped_guard(spinlock, &ch->lock) { + status = readl(usb2_base + USB2_OBINTSTA); + if (status & ch->obint_enable_bits) { + dev_vdbg(dev, "%s: %08x\n", __func__, status); + writel(ch->obint_enable_bits, usb2_base + USB2_OBINTSTA); + rcar_gen3_device_recognition(ch); + ret = IRQ_HANDLED; + } } +rpm_put: + pm_runtime_put_noidle(dev); return ret; } @@ -446,32 +460,23 @@ static int rcar_gen3_phy_usb2_init(struct phy *p) struct rcar_gen3_chan *channel = rphy->ch; void __iomem *usb2_base = channel->base; u32 val; - int ret; - if (!rcar_gen3_is_any_rphy_initialized(channel) && channel->irq >= 0) { - INIT_WORK(&channel->work, rcar_gen3_phy_usb2_work); - ret = request_irq(channel->irq, rcar_gen3_phy_usb2_irq, - IRQF_SHARED, dev_name(channel->dev), channel); - if (ret < 0) { - dev_err(channel->dev, "No irq handler (%d)\n", channel->irq); - return ret; - } - } + guard(spinlock_irqsave)(&channel->lock); /* Initialize USB2 part */ val = readl(usb2_base + USB2_INT_ENABLE); val |= USB2_INT_ENABLE_UCOM_INTEN | rphy->int_enable_bits; writel(val, usb2_base + USB2_INT_ENABLE); - writel(USB2_SPD_RSM_TIMSET_INIT, usb2_base + USB2_SPD_RSM_TIMSET); - writel(USB2_OC_TIMSET_INIT, usb2_base + USB2_OC_TIMSET); - - /* Initialize otg part */ - if (channel->is_otg_channel) { - if (rcar_gen3_needs_init_otg(channel)) - rcar_gen3_init_otg(channel); - rphy->otg_initialized = true; + + if (!rcar_gen3_is_any_rphy_initialized(channel)) { + writel(USB2_SPD_RSM_TIMSET_INIT, usb2_base + USB2_SPD_RSM_TIMSET); + writel(USB2_OC_TIMSET_INIT, usb2_base + USB2_OC_TIMSET); } + /* Initialize otg part (only if we initialize a PHY with IRQs). */ + if (rphy->int_enable_bits) + rcar_gen3_init_otg(channel); + rphy->initialized = true; return 0; @@ -484,10 +489,9 @@ static int rcar_gen3_phy_usb2_exit(struct phy *p) void __iomem *usb2_base = channel->base; u32 val; - rphy->initialized = false; + guard(spinlock_irqsave)(&channel->lock); - if (channel->is_otg_channel) - rphy->otg_initialized = false; + rphy->initialized = false; val = readl(usb2_base + USB2_INT_ENABLE); val &= ~rphy->int_enable_bits; @@ -495,9 +499,6 @@ static int rcar_gen3_phy_usb2_exit(struct phy *p) val &= ~USB2_INT_ENABLE_UCOM_INTEN; writel(val, usb2_base + USB2_INT_ENABLE); - if (channel->irq >= 0 && !rcar_gen3_is_any_rphy_initialized(channel)) - free_irq(channel->irq, channel); - return 0; } @@ -509,16 +510,17 @@ static int rcar_gen3_phy_usb2_power_on(struct phy *p) u32 val; int ret = 0; - mutex_lock(&channel->lock); - if (!rcar_gen3_are_all_rphys_power_off(channel)) - goto out; - if (channel->vbus) { ret = regulator_enable(channel->vbus); if (ret) - goto out; + return ret; } + guard(spinlock_irqsave)(&channel->lock); + + if (!rcar_gen3_are_all_rphys_power_off(channel)) + goto out; + val = readl(usb2_base + USB2_USBCTR); val |= USB2_USBCTR_PLL_RST; writel(val, usb2_base + USB2_USBCTR); @@ -528,7 +530,6 @@ static int rcar_gen3_phy_usb2_power_on(struct phy *p) out: /* The powered flag should be set for any other phys anyway */ rphy->powered = true; - mutex_unlock(&channel->lock); return 0; } @@ -539,18 +540,20 @@ static int rcar_gen3_phy_usb2_power_off(struct phy *p) struct rcar_gen3_chan *channel = rphy->ch; int ret = 0; - mutex_lock(&channel->lock); - rphy->powered = false; + scoped_guard(spinlock_irqsave, &channel->lock) { + rphy->powered = false; - if (!rcar_gen3_are_all_rphys_power_off(channel)) - goto out; + if (rcar_gen3_are_all_rphys_power_off(channel)) { + u32 val = readl(channel->base + USB2_USBCTR); + + val |= USB2_USBCTR_PLL_RST; + writel(val, channel->base + USB2_USBCTR); + } + } if (channel->vbus) ret = regulator_disable(channel->vbus); -out: - mutex_unlock(&channel->lock); - return ret; } @@ -703,7 +706,7 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct rcar_gen3_chan *channel; struct phy_provider *provider; - int ret = 0, i; + int ret = 0, i, irq; if (!dev->of_node) { dev_err(dev, "This driver needs device tree\n"); @@ -719,8 +722,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) return PTR_ERR(channel->base); channel->obint_enable_bits = USB2_OBINT_BITS; - /* get irq number here and request_irq for OTG in phy_init */ - channel->irq = platform_get_irq_optional(pdev, 0); channel->dr_mode = rcar_gen3_get_dr_mode(dev->of_node); if (channel->dr_mode != USB_DR_MODE_UNKNOWN) { channel->is_otg_channel = true; @@ -763,7 +764,7 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) if (phy_data->no_adp_ctrl) channel->obint_enable_bits = USB2_OBINT_IDCHG_EN; - mutex_init(&channel->lock); + spin_lock_init(&channel->lock); for (i = 0; i < NUM_OF_PHYS; i++) { channel->rphys[i].phy = devm_phy_create(dev, NULL, phy_data->phy_usb2_ops); @@ -789,6 +790,20 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) channel->vbus = NULL; } + irq = platform_get_irq_optional(pdev, 0); + if (irq < 0 && irq != -ENXIO) { + ret = irq; + goto error; + } else if (irq > 0) { + INIT_WORK(&channel->work, rcar_gen3_phy_usb2_work); + ret = devm_request_irq(dev, irq, rcar_gen3_phy_usb2_irq, + IRQF_SHARED, dev_name(dev), channel); + if (ret < 0) { + dev_err(dev, "Failed to request irq (%d)\n", irq); + goto error; + } + } + provider = devm_of_phy_provider_register(dev, rcar_gen3_phy_usb2_xlate); if (IS_ERR(provider)) { dev_err(dev, "Failed to register PHY provider\n"); diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c b/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c index 08c78c1bafc9..28a052e17366 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c @@ -1653,7 +1653,7 @@ static __maybe_unused int samsung_mipi_dcphy_runtime_resume(struct device *dev) return ret; } - clk_prepare_enable(samsung->ref_clk); + ret = clk_prepare_enable(samsung->ref_clk); if (ret) { dev_err(samsung->dev, "Failed to enable reference clock, %d\n", ret); clk_disable_unprepare(samsung->pclk); diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c index fe7c05748356..77236f012a1f 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c @@ -476,6 +476,8 @@ static const struct ropll_config ropll_tmds_cfg[] = { 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, { 650000, 162, 162, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 54, 0, 16, 4, 1, 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, + { 502500, 84, 84, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 11, 1, 4, 5, + 4, 11, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, { 337500, 0x70, 0x70, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 0x2, 0, 0x01, 5, 1, 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, { 400000, 100, 100, 1, 1, 11, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0, diff --git a/drivers/phy/starfive/phy-jh7110-usb.c b/drivers/phy/starfive/phy-jh7110-usb.c index cb5454fbe2c8..b505d89860b4 100644 --- a/drivers/phy/starfive/phy-jh7110-usb.c +++ b/drivers/phy/starfive/phy-jh7110-usb.c @@ -18,6 +18,8 @@ #include <linux/usb/of.h> #define USB_125M_CLK_RATE 125000000 +#define USB_CLK_MODE_OFF 0x0 +#define USB_CLK_MODE_RX_NORMAL_PWR BIT(1) #define USB_LS_KEEPALIVE_OFF 0x4 #define USB_LS_KEEPALIVE_ENABLE BIT(4) @@ -78,6 +80,7 @@ static int jh7110_usb2_phy_init(struct phy *_phy) { struct jh7110_usb2_phy *phy = phy_get_drvdata(_phy); int ret; + unsigned int val; ret = clk_set_rate(phy->usb_125m_clk, USB_125M_CLK_RATE); if (ret) @@ -87,6 +90,10 @@ static int jh7110_usb2_phy_init(struct phy *_phy) if (ret) return ret; + val = readl(phy->regs + USB_CLK_MODE_OFF); + val |= USB_CLK_MODE_RX_NORMAL_PWR; + writel(val, phy->regs + USB_CLK_MODE_OFF); + return 0; } diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c index fae6242aa730..23a23f2d64e5 100644 --- a/drivers/phy/tegra/xusb-tegra186.c +++ b/drivers/phy/tegra/xusb-tegra186.c @@ -237,6 +237,8 @@ #define DATA0_VAL_PD BIT(1) #define USE_XUSB_AO BIT(4) +#define TEGRA_UTMI_PAD_MAX 4 + #define TEGRA186_LANE(_name, _offset, _shift, _mask, _type) \ { \ .name = _name, \ @@ -269,7 +271,7 @@ struct tegra186_xusb_padctl { /* UTMI bias and tracking */ struct clk *usb2_trk_clk; - unsigned int bias_pad_enable; + DECLARE_BITMAP(utmi_pad_enabled, TEGRA_UTMI_PAD_MAX); /* padctl context */ struct tegra186_xusb_padctl_context context; @@ -603,12 +605,8 @@ static void tegra186_utmi_bias_pad_power_on(struct tegra_xusb_padctl *padctl) u32 value; int err; - mutex_lock(&padctl->lock); - - if (priv->bias_pad_enable++ > 0) { - mutex_unlock(&padctl->lock); + if (!bitmap_empty(priv->utmi_pad_enabled, TEGRA_UTMI_PAD_MAX)) return; - } err = clk_prepare_enable(priv->usb2_trk_clk); if (err < 0) @@ -658,8 +656,6 @@ static void tegra186_utmi_bias_pad_power_on(struct tegra_xusb_padctl *padctl) } else { clk_disable_unprepare(priv->usb2_trk_clk); } - - mutex_unlock(&padctl->lock); } static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl) @@ -667,17 +663,8 @@ static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl) struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl); u32 value; - mutex_lock(&padctl->lock); - - if (WARN_ON(priv->bias_pad_enable == 0)) { - mutex_unlock(&padctl->lock); - return; - } - - if (--priv->bias_pad_enable > 0) { - mutex_unlock(&padctl->lock); + if (!bitmap_empty(priv->utmi_pad_enabled, TEGRA_UTMI_PAD_MAX)) return; - } value = padctl_readl(padctl, XUSB_PADCTL_USB2_BIAS_PAD_CTL1); value |= USB2_PD_TRK; @@ -690,13 +677,13 @@ static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl) clk_disable_unprepare(priv->usb2_trk_clk); } - mutex_unlock(&padctl->lock); } static void tegra186_utmi_pad_power_on(struct phy *phy) { struct tegra_xusb_lane *lane = phy_get_drvdata(phy); struct tegra_xusb_padctl *padctl = lane->pad->padctl; + struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl); struct tegra_xusb_usb2_port *port; struct device *dev = padctl->dev; unsigned int index = lane->index; @@ -705,9 +692,16 @@ static void tegra186_utmi_pad_power_on(struct phy *phy) if (!phy) return; + mutex_lock(&padctl->lock); + if (test_bit(index, priv->utmi_pad_enabled)) { + mutex_unlock(&padctl->lock); + return; + } + port = tegra_xusb_find_usb2_port(padctl, index); if (!port) { dev_err(dev, "no port found for USB2 lane %u\n", index); + mutex_unlock(&padctl->lock); return; } @@ -724,18 +718,28 @@ static void tegra186_utmi_pad_power_on(struct phy *phy) value = padctl_readl(padctl, XUSB_PADCTL_USB2_OTG_PADX_CTL1(index)); value &= ~USB2_OTG_PD_DR; padctl_writel(padctl, value, XUSB_PADCTL_USB2_OTG_PADX_CTL1(index)); + + set_bit(index, priv->utmi_pad_enabled); + mutex_unlock(&padctl->lock); } static void tegra186_utmi_pad_power_down(struct phy *phy) { struct tegra_xusb_lane *lane = phy_get_drvdata(phy); struct tegra_xusb_padctl *padctl = lane->pad->padctl; + struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl); unsigned int index = lane->index; u32 value; if (!phy) return; + mutex_lock(&padctl->lock); + if (!test_bit(index, priv->utmi_pad_enabled)) { + mutex_unlock(&padctl->lock); + return; + } + dev_dbg(padctl->dev, "power down UTMI pad %u\n", index); value = padctl_readl(padctl, XUSB_PADCTL_USB2_OTG_PADX_CTL0(index)); @@ -748,7 +752,11 @@ static void tegra186_utmi_pad_power_down(struct phy *phy) udelay(2); + clear_bit(index, priv->utmi_pad_enabled); + tegra186_utmi_bias_pad_power_off(padctl); + + mutex_unlock(&padctl->lock); } static int tegra186_xusb_padctl_vbus_override(struct tegra_xusb_padctl *padctl, diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 79d4814d758d..c89df95aa6ca 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -548,16 +548,16 @@ static int tegra_xusb_port_init(struct tegra_xusb_port *port, err = dev_set_name(&port->dev, "%s-%u", name, index); if (err < 0) - goto unregister; + goto put_device; err = device_add(&port->dev); if (err < 0) - goto unregister; + goto put_device; return 0; -unregister: - device_unregister(&port->dev); +put_device: + put_device(&port->dev); return err; } diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 7a447ff600d2..a8db66428f80 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -169,6 +169,7 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, unsigned int nr_zones, size_t *buflen) { struct request_queue *q = sdkp->disk->queue; + unsigned int max_segments; size_t bufsize; void *buf; @@ -180,12 +181,15 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, * Furthermore, since the report zone command cannot be split, make * sure that the allocated buffer can always be mapped by limiting the * number of pages allocated to the HBA max segments limit. + * Since max segments can be larger than the max inline bio vectors, + * further limit the allocated buffer to BIO_MAX_INLINE_VECS. */ nr_zones = min(nr_zones, sdkp->zone_info.nr_zones); bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE); bufsize = min_t(size_t, bufsize, queue_max_hw_sectors(q) << SECTOR_SHIFT); - bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); + max_segments = min(BIO_MAX_INLINE_VECS, queue_max_segments(q)); + bufsize = min_t(size_t, bufsize, max_segments << PAGE_SHIFT); while (bufsize >= SECTOR_SIZE) { buf = kvzalloc(bufsize, GFP_KERNEL | __GFP_NORETRY); diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 6f8a20014e76..39aecd34c641 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -122,6 +122,10 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, set_bit(SDW_GROUP13_DEV_NUM, bus->assigned); set_bit(SDW_MASTER_DEV_NUM, bus->assigned); + ret = sdw_irq_create(bus, fwnode); + if (ret) + return ret; + /* * SDW is an enumerable bus, but devices can be powered off. So, * they won't be able to report as present. @@ -138,6 +142,7 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, if (ret < 0) { dev_err(bus->dev, "Finding slaves failed:%d\n", ret); + sdw_irq_delete(bus); return ret; } @@ -156,10 +161,6 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, bus->params.curr_bank = SDW_BANK0; bus->params.next_bank = SDW_BANK1; - ret = sdw_irq_create(bus, fwnode); - if (ret) - return ret; - return 0; } EXPORT_SYMBOL(sdw_bus_master_add); |