summaryrefslogtreecommitdiff
path: root/drivers/accel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/amdxdna/TODO1
-rw-r--r--drivers/accel/amdxdna/aie2_ctx.c65
-rw-r--r--drivers/accel/amdxdna/aie2_message.c6
-rw-r--r--drivers/accel/amdxdna/aie2_msg_priv.h10
-rw-r--r--drivers/accel/amdxdna/aie2_pci.c13
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.c22
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.c411
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.h24
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.c11
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.h2
-rw-r--r--drivers/accel/habanalabs/Kconfig2
-rw-r--r--drivers/accel/habanalabs/common/habanalabs_ioctl.c2
-rw-r--r--drivers/accel/ivpu/ivpu_debugfs.c6
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c14
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h1
-rw-r--r--drivers/accel/ivpu/ivpu_fw.c21
-rw-r--r--drivers/accel/ivpu/ivpu_fw.h1
-rw-r--r--drivers/accel/ivpu/ivpu_gem.c12
-rw-r--r--drivers/accel/ivpu/ivpu_hw.c2
-rw-r--r--drivers/accel/ivpu/ivpu_hw.h14
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs.c134
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs.h9
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.c3
-rw-r--r--drivers/accel/ivpu/ivpu_job.c50
-rw-r--r--drivers/accel/ivpu/ivpu_ms.c24
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c38
-rw-r--r--drivers/accel/ivpu/ivpu_sysfs.c49
-rw-r--r--drivers/accel/ivpu/vpu_boot_api.h13
-rw-r--r--drivers/accel/ivpu/vpu_jsm_api.h53
-rw-r--r--drivers/accel/qaic/qaic_data.c8
-rw-r--r--drivers/accel/qaic/qaic_debugfs.c2
31 files changed, 725 insertions, 298 deletions
diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO
index 5119bccd1917..ad8ac6e315b6 100644
--- a/drivers/accel/amdxdna/TODO
+++ b/drivers/accel/amdxdna/TODO
@@ -1,3 +1,2 @@
-- Add import and export BO support
- Add debugfs support
- Add debug BO support
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 00d215ac866e..e04549f64d69 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -758,27 +758,42 @@ int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *bu
static int aie2_populate_range(struct amdxdna_gem_obj *abo)
{
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
- struct mm_struct *mm = abo->mem.notifier.mm;
- struct hmm_range range = { 0 };
+ struct amdxdna_umap *mapp;
unsigned long timeout;
+ struct mm_struct *mm;
+ bool found;
int ret;
- XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx",
- abo->mem.userptr, abo->mem.size);
- range.notifier = &abo->mem.notifier;
- range.start = abo->mem.userptr;
- range.end = abo->mem.userptr + abo->mem.size;
- range.hmm_pfns = abo->mem.pfns;
- range.default_flags = HMM_PFN_REQ_FAULT;
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+again:
+ found = false;
+ down_write(&xdna->notifier_lock);
+ list_for_each_entry(mapp, &abo->mem.umap_list, node) {
+ if (mapp->invalid) {
+ found = true;
+ break;
+ }
+ }
- if (!mmget_not_zero(mm))
+ if (!found) {
+ abo->mem.map_invalid = false;
+ up_write(&xdna->notifier_lock);
+ return 0;
+ }
+ kref_get(&mapp->refcnt);
+ up_write(&xdna->notifier_lock);
+
+ XDNA_DBG(xdna, "populate memory range %lx %lx",
+ mapp->vma->vm_start, mapp->vma->vm_end);
+ mm = mapp->notifier.mm;
+ if (!mmget_not_zero(mm)) {
+ amdxdna_umap_put(mapp);
return -EFAULT;
+ }
- timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
-again:
- range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier);
+ mapp->range.notifier_seq = mmu_interval_read_begin(&mapp->notifier);
mmap_read_lock(mm);
- ret = hmm_range_fault(&range);
+ ret = hmm_range_fault(&mapp->range);
mmap_read_unlock(mm);
if (ret) {
if (time_after(jiffies, timeout)) {
@@ -786,21 +801,27 @@ again:
goto put_mm;
}
- if (ret == -EBUSY)
+ if (ret == -EBUSY) {
+ amdxdna_umap_put(mapp);
goto again;
+ }
goto put_mm;
}
- down_read(&xdna->notifier_lock);
- if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) {
- up_read(&xdna->notifier_lock);
+ down_write(&xdna->notifier_lock);
+ if (mmu_interval_read_retry(&mapp->notifier, mapp->range.notifier_seq)) {
+ up_write(&xdna->notifier_lock);
+ amdxdna_umap_put(mapp);
goto again;
}
- abo->mem.map_invalid = false;
- up_read(&xdna->notifier_lock);
+ mapp->invalid = false;
+ up_write(&xdna->notifier_lock);
+ amdxdna_umap_put(mapp);
+ goto again;
put_mm:
+ amdxdna_umap_put(mapp);
mmput(mm);
return ret;
}
@@ -908,10 +929,6 @@ void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
struct drm_gem_object *gobj = to_gobj(abo);
long ret;
- down_write(&xdna->notifier_lock);
- abo->mem.map_invalid = true;
- mmu_interval_set_seq(&abo->mem.notifier, cur_seq);
- up_write(&xdna->notifier_lock);
ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
true, MAX_SCHEDULE_TIMEOUT);
if (!ret || ret == -ERESTARTSYS)
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index bf4219e32cc1..82412eec9a4b 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -525,7 +525,7 @@ aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
if (!payload)
return -EINVAL;
- if (!slot_cf_has_space(offset, payload_len))
+ if (!slot_has_space(*buf, offset, payload_len))
return -ENOSPC;
buf->cu_idx = cu_idx;
@@ -558,7 +558,7 @@ aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
return -EINVAL;
- if (!slot_dpu_has_space(offset, arg_sz))
+ if (!slot_has_space(*buf, offset, arg_sz))
return -ENOSPC;
buf->inst_buf_addr = sn->buffer;
@@ -569,7 +569,7 @@ aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
memcpy(buf->args, sn->prop_args, arg_sz);
/* Accurate buf size to hint firmware to do necessary copy */
- *size += sizeof(*buf) + arg_sz;
+ *size = sizeof(*buf) + arg_sz;
return 0;
}
diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
index 4e02e744b470..6df9065b13f6 100644
--- a/drivers/accel/amdxdna/aie2_msg_priv.h
+++ b/drivers/accel/amdxdna/aie2_msg_priv.h
@@ -319,18 +319,16 @@ struct async_event_msg_resp {
} __packed;
#define MAX_CHAIN_CMDBUF_SIZE SZ_4K
-#define slot_cf_has_space(offset, payload_size) \
- (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \
- offsetof(struct cmd_chain_slot_execbuf_cf, args[0]))
+#define slot_has_space(slot, offset, payload_size) \
+ (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \
+ sizeof(typeof(slot)))
+
struct cmd_chain_slot_execbuf_cf {
__u32 cu_idx;
__u32 arg_cnt;
__u32 args[] __counted_by(arg_cnt);
};
-#define slot_dpu_has_space(offset, payload_size) \
- (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \
- offsetof(struct cmd_chain_slot_dpu, args[0]))
struct cmd_chain_slot_dpu {
__u64 inst_buf_addr;
__u32 inst_size;
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 5a058e565b01..c6cf7068d23c 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -512,12 +512,6 @@ static int aie2_init(struct amdxdna_dev *xdna)
goto release_fw;
}
- ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
- if (ret) {
- XDNA_ERR(xdna, "Enable PASID failed, ret %d", ret);
- goto free_irq;
- }
-
psp_conf.fw_size = fw->size;
psp_conf.fw_buf = fw->data;
for (i = 0; i < PSP_MAX_REGS; i++)
@@ -526,14 +520,14 @@ static int aie2_init(struct amdxdna_dev *xdna)
if (!ndev->psp_hdl) {
XDNA_ERR(xdna, "failed to create psp");
ret = -ENOMEM;
- goto disable_sva;
+ goto free_irq;
}
xdna->dev_handle = ndev;
ret = aie2_hw_start(xdna);
if (ret) {
XDNA_ERR(xdna, "start npu failed, ret %d", ret);
- goto disable_sva;
+ goto free_irq;
}
ret = aie2_mgmt_fw_query(ndev);
@@ -584,8 +578,6 @@ async_event_free:
aie2_error_async_events_free(ndev);
stop_hw:
aie2_hw_stop(xdna);
-disable_sva:
- iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
free_irq:
pci_free_irq_vectors(pdev);
release_fw:
@@ -601,7 +593,6 @@ static void aie2_fini(struct amdxdna_dev *xdna)
aie2_hw_stop(xdna);
aie2_error_async_events_free(ndev);
- iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
pci_free_irq_vectors(pdev);
}
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index 43442b9e273b..be073224bd69 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -496,11 +496,11 @@ static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client,
struct amdxdna_drm_exec_cmd *args)
{
struct amdxdna_dev *xdna = client->xdna;
- u32 *arg_bo_hdls;
+ u32 *arg_bo_hdls = NULL;
u32 cmd_bo_hdl;
int ret;
- if (!args->arg_count || args->arg_count > MAX_ARG_COUNT) {
+ if (args->arg_count > MAX_ARG_COUNT) {
XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count);
return -EINVAL;
}
@@ -512,14 +512,16 @@ static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client,
}
cmd_bo_hdl = (u32)args->cmd_handles;
- arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL);
- if (!arg_bo_hdls)
- return -ENOMEM;
- ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args),
- args->arg_count * sizeof(u32));
- if (ret) {
- ret = -EFAULT;
- goto free_cmd_bo_hdls;
+ if (args->arg_count) {
+ arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL);
+ if (!arg_bo_hdls)
+ return -ENOMEM;
+ ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args),
+ args->arg_count * sizeof(u32));
+ if (ret) {
+ ret = -EFAULT;
+ goto free_cmd_bo_hdls;
+ }
}
ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls,
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index 606433d73236..26831ec69f89 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -9,7 +9,10 @@
#include <drm/drm_gem.h>
#include <drm/drm_gem_shmem_helper.h>
#include <drm/gpu_scheduler.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-direct.h>
#include <linux/iosys-map.h>
+#include <linux/pagemap.h>
#include <linux/vmalloc.h>
#include "amdxdna_ctx.h"
@@ -18,6 +21,8 @@
#define XDNA_MAX_CMD_BO_SIZE SZ_32K
+MODULE_IMPORT_NS("DMA_BUF");
+
static int
amdxdna_gem_insert_node_locked(struct amdxdna_gem_obj *abo, bool use_vmap)
{
@@ -55,57 +60,38 @@ amdxdna_gem_insert_node_locked(struct amdxdna_gem_obj *abo, bool use_vmap)
return 0;
}
-static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
-{
- struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
- struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
- struct iosys_map map = IOSYS_MAP_INIT_VADDR(abo->mem.kva);
-
- XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr);
- if (abo->pinned)
- amdxdna_gem_unpin(abo);
-
- if (abo->type == AMDXDNA_BO_DEV) {
- mutex_lock(&abo->client->mm_lock);
- drm_mm_remove_node(&abo->mm_node);
- mutex_unlock(&abo->client->mm_lock);
-
- vunmap(abo->mem.kva);
- drm_gem_object_put(to_gobj(abo->dev_heap));
- drm_gem_object_release(gobj);
- mutex_destroy(&abo->lock);
- kfree(abo);
- return;
- }
-
- if (abo->type == AMDXDNA_BO_DEV_HEAP)
- drm_mm_takedown(&abo->mm);
-
- drm_gem_vunmap_unlocked(gobj, &map);
- mutex_destroy(&abo->lock);
- drm_gem_shmem_free(&abo->base);
-}
-
-static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = {
- .free = amdxdna_gem_obj_free,
-};
-
static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
unsigned long cur_seq)
{
- struct amdxdna_gem_obj *abo = container_of(mni, struct amdxdna_gem_obj,
- mem.notifier);
- struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ struct amdxdna_umap *mapp = container_of(mni, struct amdxdna_umap, notifier);
+ struct amdxdna_gem_obj *abo = mapp->abo;
+ struct amdxdna_dev *xdna;
- XDNA_DBG(xdna, "Invalid range 0x%llx, 0x%lx, type %d",
- abo->mem.userptr, abo->mem.size, abo->type);
+ xdna = to_xdna_dev(to_gobj(abo)->dev);
+ XDNA_DBG(xdna, "Invalidating range 0x%lx, 0x%lx, type %d",
+ mapp->vma->vm_start, mapp->vma->vm_end, abo->type);
if (!mmu_notifier_range_blockable(range))
return false;
+ down_write(&xdna->notifier_lock);
+ abo->mem.map_invalid = true;
+ mapp->invalid = true;
+ mmu_interval_set_seq(&mapp->notifier, cur_seq);
+ up_write(&xdna->notifier_lock);
+
xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
+ if (range->event == MMU_NOTIFY_UNMAP) {
+ down_write(&xdna->notifier_lock);
+ if (!mapp->unmapped) {
+ queue_work(xdna->notifier_wq, &mapp->hmm_unreg_work);
+ mapp->unmapped = true;
+ }
+ up_write(&xdna->notifier_lock);
+ }
+
return true;
}
@@ -113,102 +99,310 @@ static const struct mmu_interval_notifier_ops amdxdna_hmm_ops = {
.invalidate = amdxdna_hmm_invalidate,
};
-static void amdxdna_hmm_unregister(struct amdxdna_gem_obj *abo)
+static void amdxdna_hmm_unregister(struct amdxdna_gem_obj *abo,
+ struct vm_area_struct *vma)
{
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ struct amdxdna_umap *mapp;
+
+ down_read(&xdna->notifier_lock);
+ list_for_each_entry(mapp, &abo->mem.umap_list, node) {
+ if (!vma || mapp->vma == vma) {
+ if (!mapp->unmapped) {
+ queue_work(xdna->notifier_wq, &mapp->hmm_unreg_work);
+ mapp->unmapped = true;
+ }
+ if (vma)
+ break;
+ }
+ }
+ up_read(&xdna->notifier_lock);
+}
- if (!xdna->dev_info->ops->hmm_invalidate)
- return;
+static void amdxdna_umap_release(struct kref *ref)
+{
+ struct amdxdna_umap *mapp = container_of(ref, struct amdxdna_umap, refcnt);
+ struct vm_area_struct *vma = mapp->vma;
+ struct amdxdna_dev *xdna;
+
+ mmu_interval_notifier_remove(&mapp->notifier);
+ if (is_import_bo(mapp->abo) && vma->vm_file && vma->vm_file->f_mapping)
+ mapping_clear_unevictable(vma->vm_file->f_mapping);
+
+ xdna = to_xdna_dev(to_gobj(mapp->abo)->dev);
+ down_write(&xdna->notifier_lock);
+ list_del(&mapp->node);
+ up_write(&xdna->notifier_lock);
+
+ kvfree(mapp->range.hmm_pfns);
+ kfree(mapp);
+}
+
+void amdxdna_umap_put(struct amdxdna_umap *mapp)
+{
+ kref_put(&mapp->refcnt, amdxdna_umap_release);
+}
+
+static void amdxdna_hmm_unreg_work(struct work_struct *work)
+{
+ struct amdxdna_umap *mapp = container_of(work, struct amdxdna_umap,
+ hmm_unreg_work);
- mmu_interval_notifier_remove(&abo->mem.notifier);
- kvfree(abo->mem.pfns);
- abo->mem.pfns = NULL;
+ amdxdna_umap_put(mapp);
}
-static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo, unsigned long addr,
- size_t len)
+static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo,
+ struct vm_area_struct *vma)
{
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ unsigned long len = vma->vm_end - vma->vm_start;
+ unsigned long addr = vma->vm_start;
+ struct amdxdna_umap *mapp;
u32 nr_pages;
int ret;
if (!xdna->dev_info->ops->hmm_invalidate)
return 0;
- if (abo->mem.pfns)
- return -EEXIST;
+ mapp = kzalloc(sizeof(*mapp), GFP_KERNEL);
+ if (!mapp)
+ return -ENOMEM;
nr_pages = (PAGE_ALIGN(addr + len) - (addr & PAGE_MASK)) >> PAGE_SHIFT;
- abo->mem.pfns = kvcalloc(nr_pages, sizeof(*abo->mem.pfns),
- GFP_KERNEL);
- if (!abo->mem.pfns)
- return -ENOMEM;
+ mapp->range.hmm_pfns = kvcalloc(nr_pages, sizeof(*mapp->range.hmm_pfns),
+ GFP_KERNEL);
+ if (!mapp->range.hmm_pfns) {
+ ret = -ENOMEM;
+ goto free_map;
+ }
- ret = mmu_interval_notifier_insert_locked(&abo->mem.notifier,
+ ret = mmu_interval_notifier_insert_locked(&mapp->notifier,
current->mm,
addr,
len,
&amdxdna_hmm_ops);
if (ret) {
XDNA_ERR(xdna, "Insert mmu notifier failed, ret %d", ret);
- kvfree(abo->mem.pfns);
+ goto free_pfns;
}
- abo->mem.userptr = addr;
+ mapp->range.notifier = &mapp->notifier;
+ mapp->range.start = vma->vm_start;
+ mapp->range.end = vma->vm_end;
+ mapp->range.default_flags = HMM_PFN_REQ_FAULT;
+ mapp->vma = vma;
+ mapp->abo = abo;
+ kref_init(&mapp->refcnt);
+
+ if (abo->mem.userptr == AMDXDNA_INVALID_ADDR)
+ abo->mem.userptr = addr;
+ INIT_WORK(&mapp->hmm_unreg_work, amdxdna_hmm_unreg_work);
+ if (is_import_bo(abo) && vma->vm_file && vma->vm_file->f_mapping)
+ mapping_set_unevictable(vma->vm_file->f_mapping);
+
+ down_write(&xdna->notifier_lock);
+ list_add_tail(&mapp->node, &abo->mem.umap_list);
+ up_write(&xdna->notifier_lock);
+
+ return 0;
+
+free_pfns:
+ kvfree(mapp->range.hmm_pfns);
+free_map:
+ kfree(mapp);
return ret;
}
+static int amdxdna_insert_pages(struct amdxdna_gem_obj *abo,
+ struct vm_area_struct *vma)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ unsigned long num_pages = vma_pages(vma);
+ unsigned long offset = 0;
+ int ret;
+
+ if (!is_import_bo(abo)) {
+ ret = drm_gem_shmem_mmap(&abo->base, vma);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed shmem mmap %d", ret);
+ return ret;
+ }
+
+ /* The buffer is based on memory pages. Fix the flag. */
+ vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP);
+ ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages,
+ &num_pages);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed insert pages %d", ret);
+ vma->vm_ops->close(vma);
+ return ret;
+ }
+
+ return 0;
+ }
+
+ vma->vm_private_data = NULL;
+ vma->vm_ops = NULL;
+ ret = dma_buf_mmap(to_gobj(abo)->dma_buf, vma, 0);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to mmap dma buf %d", ret);
+ return ret;
+ }
+
+ do {
+ vm_fault_t fault_ret;
+
+ fault_ret = handle_mm_fault(vma, vma->vm_start + offset,
+ FAULT_FLAG_WRITE, NULL);
+ if (fault_ret & VM_FAULT_ERROR) {
+ vma->vm_ops->close(vma);
+ XDNA_ERR(xdna, "Fault in page failed");
+ return -EFAULT;
+ }
+
+ offset += PAGE_SIZE;
+ } while (--num_pages);
+
+ /* Drop the reference drm_gem_mmap_obj() acquired.*/
+ drm_gem_object_put(to_gobj(abo));
+
+ return 0;
+}
+
static int amdxdna_gem_obj_mmap(struct drm_gem_object *gobj,
struct vm_area_struct *vma)
{
+ struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
- unsigned long num_pages;
int ret;
- ret = amdxdna_hmm_register(abo, vma->vm_start, gobj->size);
+ ret = amdxdna_hmm_register(abo, vma);
if (ret)
return ret;
+ ret = amdxdna_insert_pages(abo, vma);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed insert pages, ret %d", ret);
+ goto hmm_unreg;
+ }
+
+ XDNA_DBG(xdna, "BO map_offset 0x%llx type %d userptr 0x%lx size 0x%lx",
+ drm_vma_node_offset_addr(&gobj->vma_node), abo->type,
+ vma->vm_start, gobj->size);
+ return 0;
+
+hmm_unreg:
+ amdxdna_hmm_unregister(abo, vma);
+ return ret;
+}
+
+static int amdxdna_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
+{
+ struct drm_gem_object *gobj = dma_buf->priv;
+ struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+ unsigned long num_pages = vma_pages(vma);
+ int ret;
+
+ vma->vm_ops = &drm_gem_shmem_vm_ops;
+ vma->vm_private_data = gobj;
+
+ drm_gem_object_get(gobj);
ret = drm_gem_shmem_mmap(&abo->base, vma);
if (ret)
- goto hmm_unreg;
+ goto put_obj;
- num_pages = gobj->size >> PAGE_SHIFT;
- /* Try to insert the pages */
+ /* The buffer is based on memory pages. Fix the flag. */
vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP);
- ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, &num_pages);
+ ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages,
+ &num_pages);
if (ret)
- XDNA_ERR(abo->client->xdna, "Failed insert pages, ret %d", ret);
+ goto close_vma;
return 0;
-hmm_unreg:
- amdxdna_hmm_unregister(abo);
+close_vma:
+ vma->vm_ops->close(vma);
+put_obj:
+ drm_gem_object_put(gobj);
return ret;
}
-static vm_fault_t amdxdna_gem_vm_fault(struct vm_fault *vmf)
+static const struct dma_buf_ops amdxdna_dmabuf_ops = {
+ .attach = drm_gem_map_attach,
+ .detach = drm_gem_map_detach,
+ .map_dma_buf = drm_gem_map_dma_buf,
+ .unmap_dma_buf = drm_gem_unmap_dma_buf,
+ .release = drm_gem_dmabuf_release,
+ .mmap = amdxdna_gem_dmabuf_mmap,
+ .vmap = drm_gem_dmabuf_vmap,
+ .vunmap = drm_gem_dmabuf_vunmap,
+};
+
+static struct dma_buf *amdxdna_gem_prime_export(struct drm_gem_object *gobj, int flags)
{
- return drm_gem_shmem_vm_ops.fault(vmf);
+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+ exp_info.ops = &amdxdna_dmabuf_ops;
+ exp_info.size = gobj->size;
+ exp_info.flags = flags;
+ exp_info.priv = gobj;
+ exp_info.resv = gobj->resv;
+
+ return drm_gem_dmabuf_export(gobj->dev, &exp_info);
}
-static void amdxdna_gem_vm_open(struct vm_area_struct *vma)
+static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo)
{
- drm_gem_shmem_vm_ops.open(vma);
+ dma_buf_unmap_attachment_unlocked(abo->attach, abo->base.sgt, DMA_BIDIRECTIONAL);
+ dma_buf_detach(abo->dma_buf, abo->attach);
+ dma_buf_put(abo->dma_buf);
+ drm_gem_object_release(to_gobj(abo));
+ kfree(abo);
}
-static void amdxdna_gem_vm_close(struct vm_area_struct *vma)
+static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
{
- struct drm_gem_object *gobj = vma->vm_private_data;
+ struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
+ struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+ struct iosys_map map = IOSYS_MAP_INIT_VADDR(abo->mem.kva);
+
+ XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr);
+
+ amdxdna_hmm_unregister(abo, NULL);
+ flush_workqueue(xdna->notifier_wq);
+
+ if (abo->pinned)
+ amdxdna_gem_unpin(abo);
- amdxdna_hmm_unregister(to_xdna_obj(gobj));
- drm_gem_shmem_vm_ops.close(vma);
+ if (abo->type == AMDXDNA_BO_DEV) {
+ mutex_lock(&abo->client->mm_lock);
+ drm_mm_remove_node(&abo->mm_node);
+ mutex_unlock(&abo->client->mm_lock);
+
+ vunmap(abo->mem.kva);
+ drm_gem_object_put(to_gobj(abo->dev_heap));
+ drm_gem_object_release(gobj);
+ mutex_destroy(&abo->lock);
+ kfree(abo);
+ return;
+ }
+
+ if (abo->type == AMDXDNA_BO_DEV_HEAP)
+ drm_mm_takedown(&abo->mm);
+
+ drm_gem_vunmap(gobj, &map);
+ mutex_destroy(&abo->lock);
+
+ if (is_import_bo(abo)) {
+ amdxdna_imported_obj_free(abo);
+ return;
+ }
+
+ drm_gem_shmem_free(&abo->base);
}
-static const struct vm_operations_struct amdxdna_gem_vm_ops = {
- .fault = amdxdna_gem_vm_fault,
- .open = amdxdna_gem_vm_open,
- .close = amdxdna_gem_vm_close,
+static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = {
+ .free = amdxdna_gem_obj_free,
};
static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
@@ -220,7 +414,8 @@ static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
.vmap = drm_gem_shmem_object_vmap,
.vunmap = drm_gem_shmem_object_vunmap,
.mmap = amdxdna_gem_obj_mmap,
- .vm_ops = &amdxdna_gem_vm_ops,
+ .vm_ops = &drm_gem_shmem_vm_ops,
+ .export = amdxdna_gem_prime_export,
};
static struct amdxdna_gem_obj *
@@ -239,6 +434,7 @@ amdxdna_gem_create_obj(struct drm_device *dev, size_t size)
abo->mem.userptr = AMDXDNA_INVALID_ADDR;
abo->mem.dev_addr = AMDXDNA_INVALID_ADDR;
abo->mem.size = size;
+ INIT_LIST_HEAD(&abo->mem.umap_list);
return abo;
}
@@ -258,6 +454,51 @@ amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size)
return to_gobj(abo);
}
+struct drm_gem_object *
+amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf)
+{
+ struct dma_buf_attachment *attach;
+ struct amdxdna_gem_obj *abo;
+ struct drm_gem_object *gobj;
+ struct sg_table *sgt;
+ int ret;
+
+ get_dma_buf(dma_buf);
+
+ attach = dma_buf_attach(dma_buf, dev->dev);
+ if (IS_ERR(attach)) {
+ ret = PTR_ERR(attach);
+ goto put_buf;
+ }
+
+ sgt = dma_buf_map_attachment_unlocked(attach, DMA_BIDIRECTIONAL);
+ if (IS_ERR(sgt)) {
+ ret = PTR_ERR(sgt);
+ goto fail_detach;
+ }
+
+ gobj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt);
+ if (IS_ERR(gobj)) {
+ ret = PTR_ERR(gobj);
+ goto fail_unmap;
+ }
+
+ abo = to_xdna_obj(gobj);
+ abo->attach = attach;
+ abo->dma_buf = dma_buf;
+
+ return gobj;
+
+fail_unmap:
+ dma_buf_unmap_attachment_unlocked(attach, sgt, DMA_BIDIRECTIONAL);
+fail_detach:
+ dma_buf_detach(dma_buf, attach);
+put_buf:
+ dma_buf_put(dma_buf);
+
+ return ERR_PTR(ret);
+}
+
static struct amdxdna_gem_obj *
amdxdna_drm_alloc_shmem(struct drm_device *dev,
struct amdxdna_drm_create_bo *args,
@@ -417,7 +658,7 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev,
abo->type = AMDXDNA_BO_CMD;
abo->client = filp->driver_priv;
- ret = drm_gem_vmap_unlocked(to_gobj(abo), &map);
+ ret = drm_gem_vmap(to_gobj(abo), &map);
if (ret) {
XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret);
goto release_obj;
@@ -483,6 +724,9 @@ int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo)
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
int ret;
+ if (is_import_bo(abo))
+ return 0;
+
switch (abo->type) {
case AMDXDNA_BO_SHMEM:
case AMDXDNA_BO_DEV_HEAP:
@@ -515,6 +759,9 @@ int amdxdna_gem_pin(struct amdxdna_gem_obj *abo)
void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo)
{
+ if (is_import_bo(abo))
+ return;
+
if (abo->type == AMDXDNA_BO_DEV)
abo = abo->dev_heap;
@@ -606,7 +853,9 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
goto put_obj;
}
- if (abo->type == AMDXDNA_BO_DEV)
+ if (is_import_bo(abo))
+ drm_clflush_sg(abo->base.sgt);
+ else if (abo->type == AMDXDNA_BO_DEV)
drm_clflush_pages(abo->mem.pages, abo->mem.nr_pages);
else
drm_clflush_pages(abo->base.pages, gobj->size >> PAGE_SHIFT);
diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h
index 8ccc0375dd9d..aee97e971d6d 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.h
+++ b/drivers/accel/amdxdna/amdxdna_gem.h
@@ -6,6 +6,20 @@
#ifndef _AMDXDNA_GEM_H_
#define _AMDXDNA_GEM_H_
+#include <linux/hmm.h>
+
+struct amdxdna_umap {
+ struct vm_area_struct *vma;
+ struct mmu_interval_notifier notifier;
+ struct hmm_range range;
+ struct work_struct hmm_unreg_work;
+ struct amdxdna_gem_obj *abo;
+ struct list_head node;
+ struct kref refcnt;
+ bool invalid;
+ bool unmapped;
+};
+
struct amdxdna_mem {
u64 userptr;
void *kva;
@@ -13,8 +27,7 @@ struct amdxdna_mem {
size_t size;
struct page **pages;
u32 nr_pages;
- struct mmu_interval_notifier notifier;
- unsigned long *pfns;
+ struct list_head umap_list;
bool map_invalid;
};
@@ -31,9 +44,12 @@ struct amdxdna_gem_obj {
struct amdxdna_gem_obj *dev_heap; /* For AMDXDNA_BO_DEV */
struct drm_mm_node mm_node; /* For AMDXDNA_BO_DEV */
u32 assigned_hwctx;
+ struct dma_buf *dma_buf;
+ struct dma_buf_attachment *attach;
};
#define to_gobj(obj) (&(obj)->base.base)
+#define is_import_bo(obj) ((obj)->attach)
static inline struct amdxdna_gem_obj *to_xdna_obj(struct drm_gem_object *gobj)
{
@@ -47,8 +63,12 @@ static inline void amdxdna_gem_put_obj(struct amdxdna_gem_obj *abo)
drm_gem_object_put(to_gobj(abo));
}
+void amdxdna_umap_put(struct amdxdna_umap *mapp);
+
struct drm_gem_object *
amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size);
+struct drm_gem_object *
+amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
struct amdxdna_gem_obj *
amdxdna_drm_alloc_dev_bo(struct drm_device *dev,
struct amdxdna_drm_create_bo *args,
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index f5b8497cf5ad..f2bf1d374cc7 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -226,6 +226,7 @@ const struct drm_driver amdxdna_drm_drv = {
.num_ioctls = ARRAY_SIZE(amdxdna_drm_ioctls),
.gem_create_object = amdxdna_gem_create_object_cb,
+ .gem_prime_import = amdxdna_gem_prime_import,
};
static const struct amdxdna_dev_info *
@@ -266,12 +267,16 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
fs_reclaim_release(GFP_KERNEL);
}
+ xdna->notifier_wq = alloc_ordered_workqueue("notifier_wq", 0);
+ if (!xdna->notifier_wq)
+ return -ENOMEM;
+
mutex_lock(&xdna->dev_lock);
ret = xdna->dev_info->ops->init(xdna);
mutex_unlock(&xdna->dev_lock);
if (ret) {
XDNA_ERR(xdna, "Hardware init failed, ret %d", ret);
- return ret;
+ goto destroy_notifier_wq;
}
ret = amdxdna_sysfs_init(xdna);
@@ -301,6 +306,8 @@ failed_dev_fini:
mutex_lock(&xdna->dev_lock);
xdna->dev_info->ops->fini(xdna);
mutex_unlock(&xdna->dev_lock);
+destroy_notifier_wq:
+ destroy_workqueue(xdna->notifier_wq);
return ret;
}
@@ -310,6 +317,8 @@ static void amdxdna_remove(struct pci_dev *pdev)
struct device *dev = &pdev->dev;
struct amdxdna_client *client;
+ destroy_workqueue(xdna->notifier_wq);
+
pm_runtime_get_noresume(dev);
pm_runtime_forbid(dev);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index 37848a8d8031..ab79600911aa 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -6,6 +6,7 @@
#ifndef _AMDXDNA_PCI_DRV_H_
#define _AMDXDNA_PCI_DRV_H_
+#include <linux/workqueue.h>
#include <linux/xarray.h>
#define XDNA_INFO(xdna, fmt, args...) drm_info(&(xdna)->ddev, fmt, ##args)
@@ -98,6 +99,7 @@ struct amdxdna_dev {
struct list_head client_list;
struct amdxdna_fw_ver fw_ver;
struct rw_semaphore notifier_lock; /* for mmu notifier*/
+ struct workqueue_struct *notifier_wq;
};
/*
diff --git a/drivers/accel/habanalabs/Kconfig b/drivers/accel/habanalabs/Kconfig
index be85336107f9..1919fbb169c7 100644
--- a/drivers/accel/habanalabs/Kconfig
+++ b/drivers/accel/habanalabs/Kconfig
@@ -6,7 +6,7 @@
config DRM_ACCEL_HABANALABS
tristate "HabanaLabs AI accelerators"
depends on DRM_ACCEL
- depends on X86_64
+ depends on X86 && X86_64
depends on PCI && HAS_IOMEM
select GENERIC_ALLOCATOR
select HWMON
diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index 8729a0c57d78..dc80ca921d90 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -17,8 +17,6 @@
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
-#include <asm/msr.h>
-
/* make sure there is space for all the signed info */
static_assert(sizeof(struct cpucp_info) <= SEC_DEV_INFO_BUF_SZ);
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index 0825851656a2..cd24ccd20ba6 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -332,7 +332,7 @@ ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t si
return -EINVAL;
ret = ivpu_rpm_get(vdev);
- if (ret)
+ if (ret < 0)
return ret;
ivpu_pm_trigger_recovery(vdev, "debugfs");
@@ -383,7 +383,7 @@ static int dct_active_set(void *data, u64 active_percent)
return -EINVAL;
ret = ivpu_rpm_get(vdev);
- if (ret)
+ if (ret < 0)
return ret;
if (active_percent)
@@ -455,7 +455,7 @@ priority_bands_fops_write(struct file *file, const char __user *user_buf, size_t
if (ret < 0)
return ret;
- buf[size] = '\0';
+ buf[ret] = '\0';
ret = sscanf(buf, "%u %u %u %u", &band, &grace_period, &process_grace_period,
&process_quantum);
if (ret != 4)
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 4fa73189502e..0e7748c5e117 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#include <linux/firmware.h>
@@ -164,7 +164,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
args->value = vdev->platform;
break;
case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
- args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio);
+ args->value = ivpu_hw_dpu_max_freq_get(vdev);
break;
case DRM_IVPU_PARAM_NUM_CONTEXTS:
args->value = ivpu_get_context_count(vdev);
@@ -374,6 +374,9 @@ int ivpu_boot(struct ivpu_device *vdev)
{
int ret;
+ drm_WARN_ON(&vdev->drm, atomic_read(&vdev->job_timeout_counter));
+ drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
+
/* Update boot params located at first 4KB of FW memory */
ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem));
@@ -421,9 +424,9 @@ void ivpu_prepare_for_reset(struct ivpu_device *vdev)
{
ivpu_hw_irq_disable(vdev);
disable_irq(vdev->irq);
- cancel_work_sync(&vdev->irq_ipc_work);
- cancel_work_sync(&vdev->irq_dct_work);
- cancel_work_sync(&vdev->context_abort_work);
+ flush_work(&vdev->irq_ipc_work);
+ flush_work(&vdev->irq_dct_work);
+ flush_work(&vdev->context_abort_work);
ivpu_ipc_disable(vdev);
ivpu_mmu_disable(vdev);
}
@@ -573,6 +576,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID;
vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID;
atomic64_set(&vdev->unique_id_counter, 0);
+ atomic_set(&vdev->job_timeout_counter, 0);
xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 92753effb1c9..5497e7030e91 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -154,6 +154,7 @@ struct ivpu_device {
struct mutex submitted_jobs_lock; /* Protects submitted_jobs */
struct xarray submitted_jobs_xa;
struct ivpu_ipc_consumer job_done_consumer;
+ atomic_t job_timeout_counter;
atomic64_t unique_id_counter;
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 7a1bb92d8c81..ccaaf6c100c0 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#include <linux/firmware.h>
@@ -233,10 +233,20 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
fw->dvfs_mode = 0;
fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr);
- fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size;
- fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size;
ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS");
+ if (fw_hdr->preemption_buffer_1_max_size)
+ fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size;
+ else
+ fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size;
+
+ if (fw_hdr->preemption_buffer_2_max_size)
+ fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size;
+ else
+ fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size;
+ ivpu_dbg(vdev, FW_BOOT, "Preemption buffer sizes: primary %u, secondary %u\n",
+ fw->primary_preempt_buf_size, fw->secondary_preempt_buf_size);
+
if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address,
fw_hdr->ro_section_size,
fw_hdr->image_load_address,
@@ -534,7 +544,7 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
boot_params->d0i3_entry_vpu_ts);
ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n",
boot_params->system_time_us);
- ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = %u\n",
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = 0x%x\n",
boot_params->power_profile);
}
@@ -566,7 +576,6 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->magic = VPU_BOOT_PARAMS_MAGIC;
boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number;
- boot_params->frequency = ivpu_hw_pll_freq_get(vdev);
/*
* This param is a debug firmware feature. It switches default clock
@@ -637,7 +646,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->d0i3_residency_time_us = 0;
boot_params->d0i3_entry_vpu_ts = 0;
if (IVPU_WA(disable_d0i2))
- boot_params->power_profile = 1;
+ boot_params->power_profile |= BIT(1);
boot_params->system_time_us = ktime_to_us(ktime_get_real());
wmb(); /* Flush WC buffers after writing bootparams */
diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h
index 1d0b2bd9d65c..9a3935be1c05 100644
--- a/drivers/accel/ivpu/ivpu_fw.h
+++ b/drivers/accel/ivpu/ivpu_fw.h
@@ -39,6 +39,7 @@ struct ivpu_fw_info {
u64 read_only_addr;
u32 read_only_size;
u32 sched_mode;
+ u64 last_heartbeat;
};
int ivpu_fw_init(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index 8741c73b92ce..e0d242d9f3e5 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -30,7 +30,7 @@ static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, con
"%6s: bo %8p vpu_addr %9llx size %8zu ctx %d has_pages %d dma_mapped %d mmu_mapped %d wc %d imported %d\n",
action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx ? bo->ctx->id : 0,
(bool)bo->base.pages, (bool)bo->base.sgt, bo->mmu_mapped, bo->base.map_wc,
- (bool)bo->base.base.import_attach);
+ (bool)drm_gem_is_imported(&bo->base.base));
}
/*
@@ -122,7 +122,7 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
bo->ctx = NULL;
}
- if (bo->base.base.import_attach)
+ if (drm_gem_is_imported(&bo->base.base))
return;
dma_resv_lock(bo->base.base.resv, NULL);
@@ -282,7 +282,7 @@ static void ivpu_gem_bo_free(struct drm_gem_object *obj)
ivpu_bo_unbind_locked(bo);
mutex_destroy(&bo->lock);
- drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1);
+ drm_WARN_ON(obj->dev, refcount_read(&bo->base.pages_use_count) > 1);
drm_gem_shmem_free(&bo->base);
}
@@ -362,7 +362,7 @@ ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
if (flags & DRM_IVPU_BO_MAPPABLE) {
dma_resv_lock(bo->base.base.resv, NULL);
- ret = drm_gem_shmem_vmap(&bo->base, &map);
+ ret = drm_gem_shmem_vmap_locked(&bo->base, &map);
dma_resv_unlock(bo->base.base.resv);
if (ret)
@@ -387,7 +387,7 @@ void ivpu_bo_free(struct ivpu_bo *bo)
if (bo->flags & DRM_IVPU_BO_MAPPABLE) {
dma_resv_lock(bo->base.base.resv, NULL);
- drm_gem_shmem_vunmap(&bo->base, &map);
+ drm_gem_shmem_vunmap_locked(&bo->base, &map);
dma_resv_unlock(bo->base.base.resv);
}
@@ -461,7 +461,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
if (bo->mmu_mapped)
drm_printf(p, " mmu_mapped");
- if (bo->base.base.import_attach)
+ if (drm_gem_is_imported(&bo->base.base))
drm_printf(p, " imported");
drm_printf(p, "\n");
diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c
index ec9a3629da3a..633160470c93 100644
--- a/drivers/accel/ivpu/ivpu_hw.c
+++ b/drivers/accel/ivpu/ivpu_hw.c
@@ -119,7 +119,7 @@ static void timeouts_init(struct ivpu_device *vdev)
else
vdev->timeout.autosuspend = 100;
vdev->timeout.d0i3_entry_msg = 5;
- vdev->timeout.state_dump_msg = 10;
+ vdev->timeout.state_dump_msg = 100;
}
}
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index 16435f2756d0..d79668fe1609 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_HW_H__
@@ -82,19 +82,19 @@ static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range)
return range->end - range->start;
}
-static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+static inline u32 ivpu_hw_dpu_max_freq_get(struct ivpu_device *vdev)
{
- return ivpu_hw_btrs_ratio_to_freq(vdev, ratio);
+ return ivpu_hw_btrs_dpu_max_freq_get(vdev);
}
-static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev)
+static inline u32 ivpu_hw_dpu_freq_get(struct ivpu_device *vdev)
{
- ivpu_hw_ip_irq_clear(vdev);
+ return ivpu_hw_btrs_dpu_freq_get(vdev);
}
-static inline u32 ivpu_hw_pll_freq_get(struct ivpu_device *vdev)
+static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev)
{
- return ivpu_hw_btrs_pll_freq_get(vdev);
+ ivpu_hw_ip_irq_clear(vdev);
}
static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c
index 56c56012b980..b236c7234daa 100644
--- a/drivers/accel/ivpu/ivpu_hw_btrs.c
+++ b/drivers/accel/ivpu/ivpu_hw_btrs.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
+#include <linux/units.h>
+
#include "ivpu_drv.h"
#include "ivpu_hw.h"
#include "ivpu_hw_btrs.h"
@@ -28,17 +30,13 @@
#define BTRS_LNL_ALL_IRQ_MASK ((u32)-1)
-#define BTRS_MTL_WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_5_3)
-#define BTRS_MTL_WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_4_3)
-#define BTRS_MTL_WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_5_3)
-#define BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3)
-#define BTRS_MTL_WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0)
#define PLL_CDYN_DEFAULT 0x80
#define PLL_EPP_DEFAULT 0x80
#define PLL_CONFIG_DEFAULT 0x0
-#define PLL_SIMULATION_FREQ 10000000
-#define PLL_REF_CLK_FREQ 50000000
+#define PLL_REF_CLK_FREQ 50000000ull
+#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ)
+
#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC)
#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC)
#define TIMEOUT_US (150 * USEC_PER_MSEC)
@@ -62,6 +60,8 @@
#define DCT_ENABLE 0x1
#define DCT_DISABLE 0x0
+static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio);
+
int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev)
{
REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, BTRS_MTL_ALL_IRQ_MASK);
@@ -156,7 +156,7 @@ static int info_init_mtl(struct ivpu_device *vdev)
hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH;
hw->sku = BTRS_MTL_TILE_SKU_BOTH;
- hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO;
+ hw->config = WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3);
return 0;
}
@@ -334,8 +334,8 @@ int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable)
prepare_wp_request(vdev, &wp, enable);
- ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n",
- PLL_RATIO_TO_FREQ(wp.target), wp.cfg, wp.epp, wp.cdyn);
+ ivpu_dbg(vdev, PM, "PLL workpoint request: %lu MHz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n",
+ pll_ratio_to_dpu_freq(vdev, wp.target) / HZ_PER_MHZ, wp.cfg, wp.epp, wp.cdyn);
ret = wp_request_send(vdev, &wp);
if (ret) {
@@ -573,6 +573,47 @@ int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev)
return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
}
+static u32 pll_config_get_mtl(struct ivpu_device *vdev)
+{
+ return REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL);
+}
+
+static u32 pll_config_get_lnl(struct ivpu_device *vdev)
+{
+ return REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ);
+}
+
+static u32 pll_ratio_to_dpu_freq_mtl(u16 ratio)
+{
+ return (PLL_RATIO_TO_FREQ(ratio) * 2) / 3;
+}
+
+static u32 pll_ratio_to_dpu_freq_lnl(u16 ratio)
+{
+ return PLL_RATIO_TO_FREQ(ratio) / 2;
+}
+
+static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return pll_ratio_to_dpu_freq_mtl(ratio);
+ else
+ return pll_ratio_to_dpu_freq_lnl(ratio);
+}
+
+u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev)
+{
+ return pll_ratio_to_dpu_freq(vdev, vdev->hw->pll.max_ratio);
+}
+
+u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return pll_ratio_to_dpu_freq_mtl(pll_config_get_mtl(vdev));
+ else
+ return pll_ratio_to_dpu_freq_lnl(pll_config_get_lnl(vdev));
+}
+
/* Handler for IRQs from Buttress core (irqB) */
bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq)
{
@@ -582,9 +623,12 @@ bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq)
if (!status)
return false;
- if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status))
- ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x",
- REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL));
+ if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) {
+ u32 pll = pll_config_get_mtl(vdev);
+
+ ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz",
+ pll, pll_ratio_to_dpu_freq_mtl(pll) / HZ_PER_MHZ);
+ }
if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) {
ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0));
@@ -633,8 +677,12 @@ bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq)
queue_work(system_wq, &vdev->irq_dct_work);
}
- if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status))
- ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ));
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) {
+ u32 pll = pll_config_get_lnl(vdev);
+
+ ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz",
+ pll, pll_ratio_to_dpu_freq_lnl(pll) / HZ_PER_MHZ);
+ }
if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) {
ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n",
@@ -717,60 +765,6 @@ void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 acti
REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val);
}
-static u32 pll_ratio_to_freq_mtl(u32 ratio, u32 config)
-{
- u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
- u32 cpu_clock;
-
- if ((config & 0xff) == MTL_PLL_RATIO_4_3)
- cpu_clock = pll_clock * 2 / 4;
- else
- cpu_clock = pll_clock * 2 / 5;
-
- return cpu_clock;
-}
-
-u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
-{
- struct ivpu_hw_info *hw = vdev->hw;
-
- if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
- return pll_ratio_to_freq_mtl(ratio, hw->config);
- else
- return PLL_RATIO_TO_FREQ(ratio);
-}
-
-static u32 pll_freq_get_mtl(struct ivpu_device *vdev)
-{
- u32 pll_curr_ratio;
-
- pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL);
- pll_curr_ratio &= VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK;
-
- if (!ivpu_is_silicon(vdev))
- return PLL_SIMULATION_FREQ;
-
- return pll_ratio_to_freq_mtl(pll_curr_ratio, vdev->hw->config);
-}
-
-static u32 pll_freq_get_lnl(struct ivpu_device *vdev)
-{
- u32 pll_curr_ratio;
-
- pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ);
- pll_curr_ratio &= VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK;
-
- return PLL_RATIO_TO_FREQ(pll_curr_ratio);
-}
-
-u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev)
-{
- if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
- return pll_freq_get_mtl(vdev);
- else
- return pll_freq_get_lnl(vdev);
-}
-
u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev)
{
if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h
index 1fd71b4d4ab0..d2d82651976d 100644
--- a/drivers/accel/ivpu/ivpu_hw_btrs.h
+++ b/drivers/accel/ivpu/ivpu_hw_btrs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_HW_BTRS_H__
@@ -13,9 +13,8 @@
#define PLL_PROFILING_FREQ_DEFAULT 38400000
#define PLL_PROFILING_FREQ_HIGH 400000000
-#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ)
-#define DCT_DEFAULT_ACTIVE_PERCENT 15u
+#define DCT_DEFAULT_ACTIVE_PERCENT 30u
#define DCT_PERIOD_US 35300u
int ivpu_hw_btrs_info_init(struct ivpu_device *vdev);
@@ -32,12 +31,12 @@ int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev);
void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev);
void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev);
void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev);
+u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev);
+u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev);
bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq);
bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq);
int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable);
void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent);
-u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev);
-u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio);
u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 0e096fd9b95d..39f83225c181 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -302,7 +302,8 @@ ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req
struct ivpu_ipc_consumer cons;
int ret;
- drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev));
+ drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev) &&
+ pm_runtime_enabled(vdev->drm.dev));
ivpu_ipc_consumer_add(vdev, &cons, channel, NULL);
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 004059e4f1e8..b28da35c30b6 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -470,8 +470,8 @@ static void ivpu_job_destroy(struct ivpu_job *job)
struct ivpu_device *vdev = job->vdev;
u32 i;
- ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d",
- job->job_id, job->file_priv->ctx.id, job->engine_idx);
+ ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d cmdq_id %u engine %d",
+ job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx);
for (i = 0; i < job->bo_count; i++)
if (job->bos[i])
@@ -564,8 +564,8 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32
dma_fence_signal(job->done_fence);
trace_job("done", job);
- ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n",
- job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
+ ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n",
+ job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, job_status);
ivpu_job_destroy(job);
ivpu_stop_job_timeout_detection(vdev);
@@ -664,8 +664,8 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id)
}
trace_job("submit", job);
- ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d prio %d addr 0x%llx next %d\n",
- job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->priority,
+ ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d cmdq_id %u engine %d prio %d addr 0x%llx next %d\n",
+ job->job_id, file_priv->ctx.id, cmdq->id, job->engine_idx, cmdq->priority,
job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
mutex_unlock(&file_priv->lock);
@@ -681,8 +681,8 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id)
err_erase_xa:
xa_erase(&vdev->submitted_jobs_xa, job->job_id);
err_unlock:
- mutex_unlock(&vdev->submitted_jobs_lock);
mutex_unlock(&file_priv->lock);
+ mutex_unlock(&vdev->submitted_jobs_lock);
ivpu_rpm_put(vdev);
return ret;
}
@@ -777,7 +777,8 @@ static int ivpu_submit(struct drm_file *file, struct ivpu_file_priv *file_priv,
goto err_free_handles;
}
- ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", file_priv->ctx.id, buffer_count);
+ ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u cmdq_id %u buf_count %u\n",
+ file_priv->ctx.id, cmdq_id, buffer_count);
job = ivpu_job_create(file_priv, engine, buffer_count);
if (!job) {
@@ -873,15 +874,21 @@ int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *
int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = file_priv->vdev;
struct drm_ivpu_cmdq_create *args = data;
struct ivpu_cmdq *cmdq;
+ int ret;
- if (!ivpu_is_capable(file_priv->vdev, DRM_IVPU_CAP_MANAGE_CMDQ))
+ if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ))
return -ENODEV;
if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME)
return -EINVAL;
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
mutex_lock(&file_priv->lock);
cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), false);
@@ -890,6 +897,8 @@ int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *
mutex_unlock(&file_priv->lock);
+ ivpu_rpm_put(vdev);
+
return cmdq ? 0 : -ENOMEM;
}
@@ -899,28 +908,35 @@ int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file
struct ivpu_device *vdev = file_priv->vdev;
struct drm_ivpu_cmdq_destroy *args = data;
struct ivpu_cmdq *cmdq;
- u32 cmdq_id;
+ u32 cmdq_id = 0;
int ret;
if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ))
return -ENODEV;
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
mutex_lock(&file_priv->lock);
cmdq = xa_load(&file_priv->cmdq_xa, args->cmdq_id);
if (!cmdq || cmdq->is_legacy) {
ret = -ENOENT;
- goto err_unlock;
+ } else {
+ cmdq_id = cmdq->id;
+ ivpu_cmdq_destroy(file_priv, cmdq);
+ ret = 0;
}
- cmdq_id = cmdq->id;
- ivpu_cmdq_destroy(file_priv, cmdq);
mutex_unlock(&file_priv->lock);
- ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id);
- return 0;
-err_unlock:
- mutex_unlock(&file_priv->lock);
+ /* Abort any pending jobs only if cmdq was destroyed */
+ if (!ret)
+ ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id);
+
+ ivpu_rpm_put(vdev);
+
return ret;
}
diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c
index ffe7b10f8a76..2a043baf10ca 100644
--- a/drivers/accel/ivpu/ivpu_ms.c
+++ b/drivers/accel/ivpu/ivpu_ms.c
@@ -4,6 +4,7 @@
*/
#include <drm/drm_file.h>
+#include <linux/pm_runtime.h>
#include "ivpu_drv.h"
#include "ivpu_gem.h"
@@ -44,6 +45,10 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
args->sampling_period_ns < MS_MIN_SAMPLE_PERIOD_NS)
return -EINVAL;
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
mutex_lock(&file_priv->ms_lock);
if (get_instance_by_mask(file_priv, args->metric_group_mask)) {
@@ -96,6 +101,8 @@ err_free_ms:
kfree(ms);
unlock:
mutex_unlock(&file_priv->ms_lock);
+
+ ivpu_rpm_put(vdev);
return ret;
}
@@ -160,6 +167,10 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *
if (!args->metric_group_mask)
return -EINVAL;
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
mutex_lock(&file_priv->ms_lock);
ms = get_instance_by_mask(file_priv, args->metric_group_mask);
@@ -187,6 +198,7 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *
unlock:
mutex_unlock(&file_priv->ms_lock);
+ ivpu_rpm_put(vdev);
return ret;
}
@@ -204,11 +216,17 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file
{
struct ivpu_file_priv *file_priv = file->driver_priv;
struct drm_ivpu_metric_streamer_stop *args = data;
+ struct ivpu_device *vdev = file_priv->vdev;
struct ivpu_ms_instance *ms;
+ int ret;
if (!args->metric_group_mask)
return -EINVAL;
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
mutex_lock(&file_priv->ms_lock);
ms = get_instance_by_mask(file_priv, args->metric_group_mask);
@@ -217,6 +235,7 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file
mutex_unlock(&file_priv->ms_lock);
+ ivpu_rpm_put(vdev);
return ms ? 0 : -EINVAL;
}
@@ -281,6 +300,9 @@ unlock:
void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
{
struct ivpu_ms_instance *ms, *tmp;
+ struct ivpu_device *vdev = file_priv->vdev;
+
+ pm_runtime_get_sync(vdev->drm.dev);
mutex_lock(&file_priv->ms_lock);
@@ -293,6 +315,8 @@ void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
free_instance(file_priv, ms);
mutex_unlock(&file_priv->ms_lock);
+
+ pm_runtime_put_autosuspend(vdev->drm.dev);
}
void ivpu_ms_cleanup_all(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index b5891e91f7ab..ea30db181cd7 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -34,6 +34,7 @@ module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
#define PM_RESCHEDULE_LIMIT 5
+#define PM_TDR_HEARTBEAT_LIMIT 30
static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
{
@@ -44,6 +45,7 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
ivpu_fw_log_reset(vdev);
ivpu_fw_load(vdev);
fw->entry_point = fw->cold_boot_entry_point;
+ fw->last_heartbeat = 0;
}
static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev)
@@ -189,7 +191,24 @@ static void ivpu_job_timeout_work(struct work_struct *work)
{
struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work);
struct ivpu_device *vdev = pm->vdev;
+ u64 heartbeat;
+ if (ivpu_jsm_get_heartbeat(vdev, 0, &heartbeat) || heartbeat <= vdev->fw->last_heartbeat) {
+ ivpu_err(vdev, "Job timeout detected, heartbeat not progressed\n");
+ goto recovery;
+ }
+
+ if (atomic_fetch_inc(&vdev->job_timeout_counter) > PM_TDR_HEARTBEAT_LIMIT) {
+ ivpu_err(vdev, "Job timeout detected, heartbeat limit exceeded\n");
+ goto recovery;
+ }
+
+ vdev->fw->last_heartbeat = heartbeat;
+ ivpu_start_job_timeout_detection(vdev);
+ return;
+
+recovery:
+ atomic_set(&vdev->job_timeout_counter, 0);
ivpu_pm_trigger_recovery(vdev, "TDR");
}
@@ -204,6 +223,7 @@ void ivpu_start_job_timeout_detection(struct ivpu_device *vdev)
void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev)
{
cancel_delayed_work_sync(&vdev->pm->job_timeout_work);
+ atomic_set(&vdev->job_timeout_counter, 0);
}
int ivpu_pm_suspend_cb(struct device *dev)
@@ -428,16 +448,17 @@ int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent)
active_us = (DCT_PERIOD_US * active_percent) / 100;
inactive_us = DCT_PERIOD_US - active_us;
+ vdev->pm->dct_active_percent = active_percent;
+
+ ivpu_dbg(vdev, PM, "DCT requested %u%% (D0: %uus, D0i2: %uus)\n",
+ active_percent, active_us, inactive_us);
+
ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us);
if (ret) {
ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret);
return ret;
}
- vdev->pm->dct_active_percent = active_percent;
-
- ivpu_dbg(vdev, PM, "DCT set to %u%% (D0: %uus, D0i2: %uus)\n",
- active_percent, active_us, inactive_us);
return 0;
}
@@ -445,15 +466,16 @@ int ivpu_pm_dct_disable(struct ivpu_device *vdev)
{
int ret;
+ vdev->pm->dct_active_percent = 0;
+
+ ivpu_dbg(vdev, PM, "DCT requested to be disabled\n");
+
ret = ivpu_jsm_dct_disable(vdev);
if (ret) {
ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret);
return ret;
}
- vdev->pm->dct_active_percent = 0;
-
- ivpu_dbg(vdev, PM, "DCT disabled\n");
return 0;
}
@@ -466,7 +488,7 @@ void ivpu_pm_irq_dct_work_fn(struct work_struct *work)
if (ivpu_hw_btrs_dct_get_request(vdev, &enable))
return;
- if (vdev->pm->dct_active_percent)
+ if (enable)
ret = ivpu_pm_dct_enable(vdev, DCT_DEFAULT_ACTIVE_PERCENT);
else
ret = ivpu_pm_dct_disable(vdev);
diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c
index 97102feaf8dd..268ab7744a8b 100644
--- a/drivers/accel/ivpu/ivpu_sysfs.c
+++ b/drivers/accel/ivpu/ivpu_sysfs.c
@@ -1,10 +1,12 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2024 Intel Corporation
+ * Copyright (C) 2024-2025 Intel Corporation
*/
#include <linux/device.h>
#include <linux/err.h>
+#include <linux/pm_runtime.h>
+#include <linux/units.h>
#include "ivpu_drv.h"
#include "ivpu_gem.h"
@@ -90,10 +92,55 @@ sched_mode_show(struct device *dev, struct device_attribute *attr, char *buf)
static DEVICE_ATTR_RO(sched_mode);
+/**
+ * DOC: npu_max_frequency
+ *
+ * The npu_max_frequency shows maximum frequency in MHz of the NPU's data
+ * processing unit
+ */
+static ssize_t
+npu_max_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct ivpu_device *vdev = to_ivpu_device(drm);
+ u32 freq = ivpu_hw_dpu_max_freq_get(vdev);
+
+ return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ);
+}
+
+static DEVICE_ATTR_RO(npu_max_frequency_mhz);
+
+/**
+ * DOC: npu_current_frequency_mhz
+ *
+ * The npu_current_frequency_mhz shows current frequency in MHz of the NPU's
+ * data processing unit
+ */
+static ssize_t
+npu_current_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct ivpu_device *vdev = to_ivpu_device(drm);
+ u32 freq = 0;
+
+ /* Read frequency only if device is active, otherwise frequency is 0 */
+ if (pm_runtime_get_if_active(vdev->drm.dev) > 0) {
+ freq = ivpu_hw_dpu_freq_get(vdev);
+
+ pm_runtime_put_autosuspend(vdev->drm.dev);
+ }
+
+ return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ);
+}
+
+static DEVICE_ATTR_RO(npu_current_frequency_mhz);
+
static struct attribute *ivpu_dev_attrs[] = {
&dev_attr_npu_busy_time_us.attr,
&dev_attr_npu_memory_utilization.attr,
&dev_attr_sched_mode.attr,
+ &dev_attr_npu_max_frequency_mhz.attr,
+ &dev_attr_npu_current_frequency_mhz.attr,
NULL,
};
diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h
index 908e68ea1c39..218468bbbcad 100644
--- a/drivers/accel/ivpu/vpu_boot_api.h
+++ b/drivers/accel/ivpu/vpu_boot_api.h
@@ -26,7 +26,7 @@
* Minor version changes when API backward compatibility is preserved.
* Resets to 0 if Major version is incremented.
*/
-#define VPU_BOOT_API_VER_MINOR 26
+#define VPU_BOOT_API_VER_MINOR 28
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
@@ -76,8 +76,15 @@ struct vpu_firmware_header {
* submission queue size and device capabilities.
*/
u32 preemption_buffer_2_size;
+ /*
+ * Maximum preemption buffer size that the FW can use: no need for the host
+ * driver to allocate more space than that specified by these fields.
+ * A value of 0 means no declared limit.
+ */
+ u32 preemption_buffer_1_max_size;
+ u32 preemption_buffer_2_max_size;
/* Space reserved for future preemption-related fields. */
- u32 preemption_reserved[6];
+ u32 preemption_reserved[4];
/* FW image read only section start address, 4KB aligned */
u64 ro_section_start_address;
/* FW image read only section size, 4KB aligned */
@@ -134,7 +141,7 @@ enum vpu_trace_destination {
/*
* Processor bit shifts (for loggable HW components).
*/
-#define VPU_TRACE_PROC_BIT_ARM 0
+#define VPU_TRACE_PROC_BIT_RESERVED 0
#define VPU_TRACE_PROC_BIT_LRT 1
#define VPU_TRACE_PROC_BIT_LNN 2
#define VPU_TRACE_PROC_BIT_SHV_0 3
diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h
index 7215c144158c..4b6b2b3d2583 100644
--- a/drivers/accel/ivpu/vpu_jsm_api.h
+++ b/drivers/accel/ivpu/vpu_jsm_api.h
@@ -22,7 +22,7 @@
/*
* Minor version changes when API backward compatibility is preserved.
*/
-#define VPU_JSM_API_VER_MINOR 25
+#define VPU_JSM_API_VER_MINOR 29
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
@@ -53,8 +53,7 @@
* Engine indexes.
*/
#define VPU_ENGINE_COMPUTE 0
-#define VPU_ENGINE_COPY 1
-#define VPU_ENGINE_NB 2
+#define VPU_ENGINE_NB 1
/*
* VPU status values.
@@ -126,11 +125,13 @@ enum {
* When set, indicates that job queue uses native fences (as inline commands
* in job queue). Such queues may also use legacy fences (as commands in batch buffers).
* When cleared, indicates the job queue only uses legacy fences.
- * NOTE: For queues using native fences, VPU expects that all jobs in the queue
- * are immediately followed by an inline command object. This object is expected
- * to be a fence signal command in most cases, but can also be a NOP in case the host
- * does not need per-job fence signalling. Other inline commands objects can be
- * inserted between "job and inline command" pairs.
+ * NOTES:
+ * 1. For queues using native fences, VPU expects that all jobs in the queue
+ * are immediately followed by an inline command object. This object is expected
+ * to be a fence signal command in most cases, but can also be a NOP in case the host
+ * does not need per-job fence signalling. Other inline commands objects can be
+ * inserted between "job and inline command" pairs.
+ * 2. Native fence queues are only supported on VPU 40xx onwards.
*/
VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U),
@@ -275,6 +276,8 @@ struct vpu_inline_cmd {
u64 value;
/* User VA of the log buffer in which to add log entry on completion. */
u64 log_buffer_va;
+ /* NPU private data. */
+ u64 npu_private_data;
} fence;
/* Other commands do not have a payload. */
/* Payload definition for future inline commands can be inserted here. */
@@ -791,12 +794,22 @@ struct vpu_jsm_metric_streamer_update {
/** Metric group mask that identifies metric streamer instance. */
u64 metric_group_mask;
/**
- * Address and size of the buffer where the VPU will write metric data. If
- * the buffer address is 0 or same as the currently used buffer the VPU will
- * continue writing metric data to the current buffer. In this case the
- * buffer size is ignored and the size of the current buffer is unchanged.
- * If the address is non-zero and differs from the current buffer address the
- * VPU will immediately switch data collection to the new buffer.
+ * Address and size of the buffer where the VPU will write metric data.
+ * This member dictates how the update operation should perform:
+ * 1. client needs information about the number of collected samples and the
+ * amount of data written to the current buffer
+ * 2. client wants to switch to a new buffer
+ *
+ * Case 1. is identified by the buffer address being 0 or the same as the
+ * currently used buffer address. In this case the buffer size is ignored and
+ * the size of the current buffer is unchanged. The VPU will return an update
+ * in the vpu_jsm_metric_streamer_done structure. The internal writing position
+ * into the buffer is not changed.
+ *
+ * Case 2. is identified by the address being non-zero and differs from the
+ * current buffer address. The VPU will immediately switch data collection to
+ * the new buffer. Then the VPU will return an update in the
+ * vpu_jsm_metric_streamer_done structure.
*/
u64 buffer_addr;
u64 buffer_size;
@@ -934,6 +947,7 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
/*
* Default quantum in 100ns units for scheduling across processes
* within a priority band
+ * Minimum value supported by NPU is 1ms (10000 in 100ns units).
*/
u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
/*
@@ -946,8 +960,10 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
* in situations when it's starved by the focus band.
*/
u32 normal_band_percentage;
- /* Reserved */
- u32 reserved_0;
+ /*
+ * TDR timeout value in milliseconds. Default value of 0 meaning no timeout.
+ */
+ u32 tdr_timeout;
};
/*
@@ -1024,7 +1040,10 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties {
s32 in_process_priority;
/* Zero padding / Reserved */
u32 reserved_1;
- /* Context quantum relative to other contexts of same priority in the same process */
+ /*
+ * Context quantum relative to other contexts of same priority in the same process
+ * Minimum value supported by NPU is 1ms (10000 in 100ns units).
+ */
u64 context_quantum;
/* Grace period when preempting context of the same priority within the same process */
u64 grace_period_same_priority;
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index 43aba57b48f0..1bce1af7c72c 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -609,7 +609,7 @@ static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struc
struct scatterlist *sg;
int ret = 0;
- if (obj->import_attach)
+ if (drm_gem_is_imported(obj))
return -EINVAL;
for (sg = bo->sgt->sgl; sg; sg = sg_next(sg)) {
@@ -630,7 +630,7 @@ static void qaic_free_object(struct drm_gem_object *obj)
{
struct qaic_bo *bo = to_qaic_bo(obj);
- if (obj->import_attach) {
+ if (drm_gem_is_imported(obj)) {
/* DMABUF/PRIME Path */
drm_prime_gem_destroy(obj, NULL);
} else {
@@ -870,7 +870,7 @@ static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo,
{
int ret;
- if (bo->base.import_attach)
+ if (drm_gem_is_imported(&bo->base))
ret = qaic_prepare_import_bo(bo, hdr);
else
ret = qaic_prepare_export_bo(qdev, bo, hdr);
@@ -894,7 +894,7 @@ static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *b
static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo)
{
- if (bo->base.import_attach)
+ if (drm_gem_is_imported(&bo->base))
qaic_unprepare_import_bo(bo);
else
qaic_unprepare_export_bo(qdev, bo);
diff --git a/drivers/accel/qaic/qaic_debugfs.c b/drivers/accel/qaic/qaic_debugfs.c
index ba0cf2f94732..a991b8198dc4 100644
--- a/drivers/accel/qaic/qaic_debugfs.c
+++ b/drivers/accel/qaic/qaic_debugfs.c
@@ -240,7 +240,6 @@ static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_d
mhi_unprepare:
mhi_unprepare_from_transfer(mhi_dev);
destroy_workqueue:
- flush_workqueue(qdev->bootlog_wq);
destroy_workqueue(qdev->bootlog_wq);
out:
return ret;
@@ -253,7 +252,6 @@ static void qaic_bootlog_mhi_remove(struct mhi_device *mhi_dev)
qdev = dev_get_drvdata(&mhi_dev->dev);
mhi_unprepare_from_transfer(qdev->bootlog_ch);
- flush_workqueue(qdev->bootlog_wq);
destroy_workqueue(qdev->bootlog_wq);
qdev->bootlog_ch = NULL;
}