summaryrefslogtreecommitdiff
path: root/drivers/accel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/Kconfig1
-rw-r--r--drivers/accel/Makefile1
-rw-r--r--drivers/accel/amdxdna/Makefile1
-rw-r--r--drivers/accel/amdxdna/TODO1
-rw-r--r--drivers/accel/amdxdna/aie2_ctx.c195
-rw-r--r--drivers/accel/amdxdna/aie2_error.c95
-rw-r--r--drivers/accel/amdxdna/aie2_message.c647
-rw-r--r--drivers/accel/amdxdna/aie2_msg_priv.h88
-rw-r--r--drivers/accel/amdxdna/aie2_pci.c269
-rw-r--r--drivers/accel/amdxdna/aie2_pci.h54
-rw-r--r--drivers/accel/amdxdna/aie2_smu.c49
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.c104
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.h45
-rw-r--r--drivers/accel/amdxdna/amdxdna_error.h59
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.c51
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.h6
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox.c14
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox_helper.h6
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.c63
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.h3
-rw-r--r--drivers/accel/amdxdna/amdxdna_pm.c94
-rw-r--r--drivers/accel/amdxdna/amdxdna_pm.h18
-rw-r--r--drivers/accel/amdxdna/npu1_regs.c8
-rw-r--r--drivers/accel/amdxdna/npu2_regs.c2
-rw-r--r--drivers/accel/amdxdna/npu4_regs.c12
-rw-r--r--drivers/accel/amdxdna/npu5_regs.c2
-rw-r--r--drivers/accel/amdxdna/npu6_regs.c2
-rw-r--r--drivers/accel/ethosu/Kconfig11
-rw-r--r--drivers/accel/ethosu/Makefile4
-rw-r--r--drivers/accel/ethosu/ethosu_device.h197
-rw-r--r--drivers/accel/ethosu/ethosu_drv.c403
-rw-r--r--drivers/accel/ethosu/ethosu_drv.h15
-rw-r--r--drivers/accel/ethosu/ethosu_gem.c704
-rw-r--r--drivers/accel/ethosu/ethosu_gem.h46
-rw-r--r--drivers/accel/ethosu/ethosu_job.c497
-rw-r--r--drivers/accel/ethosu/ethosu_job.h40
-rw-r--r--drivers/accel/ivpu/Makefile1
-rw-r--r--drivers/accel/ivpu/ivpu_debugfs.c38
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c18
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h5
-rw-r--r--drivers/accel/ivpu/ivpu_fw.c229
-rw-r--r--drivers/accel/ivpu/ivpu_fw.h14
-rw-r--r--drivers/accel/ivpu/ivpu_gem.c161
-rw-r--r--drivers/accel/ivpu/ivpu_gem.h22
-rw-r--r--drivers/accel/ivpu/ivpu_gem_userptr.c213
-rw-r--r--drivers/accel/ivpu/ivpu_hw.c59
-rw-r--r--drivers/accel/ivpu/ivpu_hw.h10
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs.c20
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs.h2
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h3
-rw-r--r--drivers/accel/ivpu/ivpu_hw_ip.c10
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.c2
-rw-r--r--drivers/accel/ivpu/ivpu_job.c257
-rw-r--r--drivers/accel/ivpu/ivpu_job.h49
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.c2
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.c9
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.h2
-rw-r--r--drivers/accel/ivpu/ivpu_ms.c25
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c18
-rw-r--r--drivers/accel/ivpu/ivpu_sysfs.c3
-rw-r--r--drivers/accel/ivpu/vpu_jsm_api.h653
-rw-r--r--drivers/accel/qaic/Kconfig1
-rw-r--r--drivers/accel/qaic/Makefile2
-rw-r--r--drivers/accel/qaic/qaic.h40
-rw-r--r--drivers/accel/qaic/qaic_control.c25
-rw-r--r--drivers/accel/qaic/qaic_data.c164
-rw-r--r--drivers/accel/qaic/qaic_drv.c116
-rw-r--r--drivers/accel/qaic/qaic_ras.c6
-rw-r--r--drivers/accel/qaic/qaic_ssr.c815
-rw-r--r--drivers/accel/qaic/qaic_ssr.h17
-rw-r--r--drivers/accel/qaic/qaic_sysfs.c109
-rw-r--r--drivers/accel/qaic/qaic_timesync.c9
-rw-r--r--drivers/accel/qaic/qaic_timesync.h3
-rw-r--r--drivers/accel/qaic/sahara.c164
-rw-r--r--drivers/accel/rocket/rocket_gem.c1
75 files changed, 6070 insertions, 1034 deletions
diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig
index bb01cebc42bf..bdf48ccafcf2 100644
--- a/drivers/accel/Kconfig
+++ b/drivers/accel/Kconfig
@@ -25,6 +25,7 @@ menuconfig DRM_ACCEL
and debugfs).
source "drivers/accel/amdxdna/Kconfig"
+source "drivers/accel/ethosu/Kconfig"
source "drivers/accel/habanalabs/Kconfig"
source "drivers/accel/ivpu/Kconfig"
source "drivers/accel/qaic/Kconfig"
diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile
index ffc3fa588666..1d3a7251b950 100644
--- a/drivers/accel/Makefile
+++ b/drivers/accel/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_DRM_ACCEL_AMDXDNA) += amdxdna/
+obj-$(CONFIG_DRM_ACCEL_ARM_ETHOSU) += ethosu/
obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/
obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/
obj-$(CONFIG_DRM_ACCEL_QAIC) += qaic/
diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
index 6797dac65efa..6344aaf523fa 100644
--- a/drivers/accel/amdxdna/Makefile
+++ b/drivers/accel/amdxdna/Makefile
@@ -14,6 +14,7 @@ amdxdna-y := \
amdxdna_mailbox.o \
amdxdna_mailbox_helper.o \
amdxdna_pci_drv.o \
+ amdxdna_pm.o \
amdxdna_sysfs.o \
amdxdna_ubuf.o \
npu1_regs.o \
diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO
index ad8ac6e315b6..0e4bbebeaedf 100644
--- a/drivers/accel/amdxdna/TODO
+++ b/drivers/accel/amdxdna/TODO
@@ -1,2 +1 @@
- Add debugfs support
-- Add debug BO support
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index e9f9b1fa5dc1..42d876a427c5 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -21,6 +21,7 @@
#include "amdxdna_gem.h"
#include "amdxdna_mailbox.h"
#include "amdxdna_pci_drv.h"
+#include "amdxdna_pm.h"
static bool force_cmdlist;
module_param(force_cmdlist, bool, 0600);
@@ -88,7 +89,7 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw
goto out;
}
- ret = aie2_config_cu(hwctx);
+ ret = aie2_config_cu(hwctx, NULL);
if (ret) {
XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
goto out;
@@ -167,14 +168,11 @@ static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg)
int aie2_hwctx_resume(struct amdxdna_client *client)
{
- struct amdxdna_dev *xdna = client->xdna;
-
/*
* The resume path cannot guarantee that mailbox channel can be
* regenerated. If this happen, when submit message to this
* mailbox channel, error will return.
*/
- drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb);
}
@@ -184,12 +182,13 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
struct dma_fence *fence = job->fence;
trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
+
+ amdxdna_pm_suspend_put(job->hwctx->client->xdna);
job->hwctx->priv->completed++;
dma_fence_signal(fence);
up(&job->hwctx->priv->job_sem);
job->job_done = true;
- dma_fence_put(fence);
mmput_async(job->mm);
aie2_job_put(job);
}
@@ -204,10 +203,13 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
cmd_abo = job->cmd_bo;
- if (unlikely(!data))
+ if (unlikely(job->job_timeout)) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
+ ret = -EINVAL;
goto out;
+ }
- if (unlikely(size != sizeof(u32))) {
+ if (unlikely(!data) || unlikely(size != sizeof(u32))) {
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
ret = -EINVAL;
goto out;
@@ -226,11 +228,10 @@ out:
}
static int
-aie2_sched_nocmd_resp_handler(void *handle, void __iomem *data, size_t size)
+aie2_sched_drvcmd_resp_handler(void *handle, void __iomem *data, size_t size)
{
struct amdxdna_sched_job *job = handle;
int ret = 0;
- u32 status;
if (unlikely(!data))
goto out;
@@ -240,8 +241,7 @@ aie2_sched_nocmd_resp_handler(void *handle, void __iomem *data, size_t size)
goto out;
}
- status = readl(data);
- XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
+ job->drv_cmd->result = readl(data);
out:
aie2_sched_notify(job);
@@ -260,6 +260,13 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
int ret = 0;
cmd_abo = job->cmd_bo;
+
+ if (unlikely(job->job_timeout)) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
+ ret = -EINVAL;
+ goto out;
+ }
+
if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
ret = -EINVAL;
@@ -314,8 +321,18 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
kref_get(&job->refcnt);
fence = dma_fence_get(job->fence);
- if (unlikely(!cmd_abo)) {
- ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler);
+ if (job->drv_cmd) {
+ switch (job->drv_cmd->opcode) {
+ case SYNC_DEBUG_BO:
+ ret = aie2_sync_bo(hwctx, job, aie2_sched_drvcmd_resp_handler);
+ break;
+ case ATTACH_DEBUG_BO:
+ ret = aie2_config_debug_bo(hwctx, job, aie2_sched_drvcmd_resp_handler);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
goto out;
}
@@ -362,6 +379,7 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job)
xdna = hwctx->client->xdna;
trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
+ job->job_timeout = true;
mutex_lock(&xdna->dev_lock);
aie2_hwctx_stop(xdna, hwctx, sched_job);
@@ -531,13 +549,12 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
.num_rqs = DRM_SCHED_PRIORITY_COUNT,
.credit_limit = HWCTX_MAX_CMDS,
.timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
- .name = hwctx->name,
+ .name = "amdxdna_js",
.dev = xdna->ddev.dev,
};
struct drm_gpu_scheduler *sched;
struct amdxdna_hwctx_priv *priv;
struct amdxdna_gem_obj *heap;
- struct amdxdna_dev_hdl *ndev;
int i, ret;
priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
@@ -610,10 +627,14 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
goto free_entity;
}
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto free_col_list;
+
ret = aie2_alloc_resource(hwctx);
if (ret) {
XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
- goto free_col_list;
+ goto suspend_put;
}
ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
@@ -628,10 +649,9 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
goto release_resource;
}
+ amdxdna_pm_suspend_put(xdna);
hwctx->status = HWCTX_STAT_INIT;
- ndev = xdna->dev_handle;
- ndev->hwctx_num++;
init_waitqueue_head(&priv->job_free_wq);
XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
@@ -640,6 +660,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
release_resource:
aie2_release_resource(hwctx);
+suspend_put:
+ amdxdna_pm_suspend_put(xdna);
free_col_list:
kfree(hwctx->col_list);
free_entity:
@@ -662,26 +684,25 @@ free_priv:
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
{
- struct amdxdna_dev_hdl *ndev;
struct amdxdna_dev *xdna;
int idx;
xdna = hwctx->client->xdna;
- ndev = xdna->dev_handle;
- ndev->hwctx_num--;
XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
- drm_sched_entity_destroy(&hwctx->priv->entity);
-
aie2_hwctx_wait_for_idle(hwctx);
/* Request fw to destroy hwctx and cancel the rest pending requests */
aie2_release_resource(hwctx);
+ mutex_unlock(&xdna->dev_lock);
+ drm_sched_entity_destroy(&hwctx->priv->entity);
+
/* Wait for all submitted jobs to be completed or canceled */
wait_event(hwctx->priv->job_free_wq,
atomic64_read(&hwctx->job_submit_cnt) ==
atomic64_read(&hwctx->job_free_cnt));
+ mutex_lock(&xdna->dev_lock);
drm_sched_fini(&hwctx->priv->sched);
aie2_ctx_syncobj_destroy(hwctx);
@@ -697,6 +718,14 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
kfree(hwctx->cus);
}
+static int aie2_config_cu_resp_handler(void *handle, void __iomem *data, size_t size)
+{
+ struct amdxdna_hwctx *hwctx = handle;
+
+ amdxdna_pm_suspend_put(hwctx->client->xdna);
+ return 0;
+}
+
static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
{
struct amdxdna_hwctx_param_config_cu *config = buf;
@@ -728,10 +757,14 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
if (!hwctx->cus)
return -ENOMEM;
- ret = aie2_config_cu(hwctx);
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto free_cus;
+
+ ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler);
if (ret) {
XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
- goto free_cus;
+ goto pm_suspend_put;
}
wmb(); /* To avoid locking in command submit when check status */
@@ -739,12 +772,82 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
return 0;
+pm_suspend_put:
+ amdxdna_pm_suspend_put(xdna);
free_cus:
kfree(hwctx->cus);
hwctx->cus = NULL;
return ret;
}
+static void aie2_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq)
+{
+ struct dma_fence *out_fence = aie2_cmd_get_out_fence(hwctx, seq);
+
+ if (!out_fence) {
+ XDNA_ERR(hwctx->client->xdna, "Failed to get fence");
+ return;
+ }
+
+ dma_fence_wait_timeout(out_fence, false, MAX_SCHEDULE_TIMEOUT);
+ dma_fence_put(out_fence);
+}
+
+static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx *hwctx, u32 bo_hdl,
+ bool attach)
+{
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_drv_cmd cmd = { 0 };
+ struct amdxdna_gem_obj *abo;
+ u64 seq;
+ int ret;
+
+ abo = amdxdna_gem_get_obj(client, bo_hdl, AMDXDNA_BO_DEV);
+ if (!abo) {
+ XDNA_ERR(xdna, "Get bo %d failed", bo_hdl);
+ return -EINVAL;
+ }
+
+ if (attach) {
+ if (abo->assigned_hwctx != AMDXDNA_INVALID_CTX_HANDLE) {
+ ret = -EBUSY;
+ goto put_obj;
+ }
+ cmd.opcode = ATTACH_DEBUG_BO;
+ } else {
+ if (abo->assigned_hwctx != hwctx->id) {
+ ret = -EINVAL;
+ goto put_obj;
+ }
+ cmd.opcode = DETACH_DEBUG_BO;
+ }
+
+ ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
+ &bo_hdl, 1, hwctx->id, &seq);
+ if (ret) {
+ XDNA_ERR(xdna, "Submit command failed");
+ goto put_obj;
+ }
+
+ aie2_cmd_wait(hwctx, seq);
+ if (cmd.result) {
+ XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
+ goto put_obj;
+ }
+
+ if (attach)
+ abo->assigned_hwctx = hwctx->id;
+ else
+ abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE;
+
+ XDNA_DBG(xdna, "Config debug BO %d to %s", bo_hdl, hwctx->name);
+
+put_obj:
+ amdxdna_gem_put_obj(abo);
+ return ret;
+}
+
int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
{
struct amdxdna_dev *xdna = hwctx->client->xdna;
@@ -754,14 +857,40 @@ int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *bu
case DRM_AMDXDNA_HWCTX_CONFIG_CU:
return aie2_hwctx_cu_config(hwctx, buf, size);
case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
+ return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, true);
case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
- return -EOPNOTSUPP;
+ return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, false);
default:
XDNA_DBG(xdna, "Not supported type %d", type);
return -EOPNOTSUPP;
}
}
+int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl)
+{
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_drv_cmd cmd = { 0 };
+ u64 seq;
+ int ret;
+
+ cmd.opcode = SYNC_DEBUG_BO;
+ ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
+ &debug_bo_hdl, 1, hwctx->id, &seq);
+ if (ret) {
+ XDNA_ERR(xdna, "Submit command failed");
+ return ret;
+ }
+
+ aie2_cmd_wait(hwctx, seq);
+ if (cmd.result) {
+ XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int aie2_populate_range(struct amdxdna_gem_obj *abo)
{
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
@@ -862,11 +991,15 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
goto free_chain;
}
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto cleanup_job;
+
retry:
ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
if (ret) {
XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
- goto cleanup_job;
+ goto suspend_put;
}
for (i = 0; i < job->bo_cnt; i++) {
@@ -874,7 +1007,7 @@ retry:
if (ret) {
XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
- goto cleanup_job;
+ goto suspend_put;
}
}
@@ -889,12 +1022,12 @@ retry:
msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
} else if (time_after(jiffies, timeout)) {
ret = -ETIME;
- goto cleanup_job;
+ goto suspend_put;
}
ret = aie2_populate_range(abo);
if (ret)
- goto cleanup_job;
+ goto suspend_put;
goto retry;
}
}
@@ -920,6 +1053,8 @@ retry:
return 0;
+suspend_put:
+ amdxdna_pm_suspend_put(xdna);
cleanup_job:
drm_sched_job_cleanup(&job->base);
free_chain:
diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c
index 5ee905632a39..d452008ec4f4 100644
--- a/drivers/accel/amdxdna/aie2_error.c
+++ b/drivers/accel/amdxdna/aie2_error.c
@@ -13,6 +13,7 @@
#include "aie2_msg_priv.h"
#include "aie2_pci.h"
+#include "amdxdna_error.h"
#include "amdxdna_mailbox.h"
#include "amdxdna_pci_drv.h"
@@ -46,6 +47,7 @@ enum aie_module_type {
AIE_MEM_MOD = 0,
AIE_CORE_MOD,
AIE_PL_MOD,
+ AIE_UNKNOWN_MOD,
};
enum aie_error_category {
@@ -143,6 +145,31 @@ static const struct aie_event_category aie_ml_shim_tile_event_cat[] = {
EVENT_CATEGORY(74U, AIE_ERROR_LOCK),
};
+static const enum amdxdna_error_num aie_cat_err_num_map[] = {
+ [AIE_ERROR_SATURATION] = AMDXDNA_ERROR_NUM_AIE_SATURATION,
+ [AIE_ERROR_FP] = AMDXDNA_ERROR_NUM_AIE_FP,
+ [AIE_ERROR_STREAM] = AMDXDNA_ERROR_NUM_AIE_STREAM,
+ [AIE_ERROR_ACCESS] = AMDXDNA_ERROR_NUM_AIE_ACCESS,
+ [AIE_ERROR_BUS] = AMDXDNA_ERROR_NUM_AIE_BUS,
+ [AIE_ERROR_INSTRUCTION] = AMDXDNA_ERROR_NUM_AIE_INSTRUCTION,
+ [AIE_ERROR_ECC] = AMDXDNA_ERROR_NUM_AIE_ECC,
+ [AIE_ERROR_LOCK] = AMDXDNA_ERROR_NUM_AIE_LOCK,
+ [AIE_ERROR_DMA] = AMDXDNA_ERROR_NUM_AIE_DMA,
+ [AIE_ERROR_MEM_PARITY] = AMDXDNA_ERROR_NUM_AIE_MEM_PARITY,
+ [AIE_ERROR_UNKNOWN] = AMDXDNA_ERROR_NUM_UNKNOWN,
+};
+
+static_assert(ARRAY_SIZE(aie_cat_err_num_map) == AIE_ERROR_UNKNOWN + 1);
+
+static const enum amdxdna_error_module aie_err_mod_map[] = {
+ [AIE_MEM_MOD] = AMDXDNA_ERROR_MODULE_AIE_MEMORY,
+ [AIE_CORE_MOD] = AMDXDNA_ERROR_MODULE_AIE_CORE,
+ [AIE_PL_MOD] = AMDXDNA_ERROR_MODULE_AIE_PL,
+ [AIE_UNKNOWN_MOD] = AMDXDNA_ERROR_MODULE_UNKNOWN,
+};
+
+static_assert(ARRAY_SIZE(aie_err_mod_map) == AIE_UNKNOWN_MOD + 1);
+
static enum aie_error_category
aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type)
{
@@ -176,12 +203,40 @@ aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type)
if (event_id != lut[i].event_id)
continue;
+ if (lut[i].category > AIE_ERROR_UNKNOWN)
+ return AIE_ERROR_UNKNOWN;
+
return lut[i].category;
}
return AIE_ERROR_UNKNOWN;
}
+static void aie2_update_last_async_error(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err)
+{
+ struct aie_error *errs = err_info;
+ enum amdxdna_error_module err_mod;
+ enum aie_error_category aie_err;
+ enum amdxdna_error_num err_num;
+ struct aie_error *last_err;
+
+ last_err = &errs[num_err - 1];
+ if (last_err->mod_type >= AIE_UNKNOWN_MOD) {
+ err_num = aie_cat_err_num_map[AIE_ERROR_UNKNOWN];
+ err_mod = aie_err_mod_map[AIE_UNKNOWN_MOD];
+ } else {
+ aie_err = aie_get_error_category(last_err->row,
+ last_err->event_id,
+ last_err->mod_type);
+ err_num = aie_cat_err_num_map[aie_err];
+ err_mod = aie_err_mod_map[last_err->mod_type];
+ }
+
+ ndev->last_async_err.err_code = AMDXDNA_ERROR_ENCODE(err_num, err_mod);
+ ndev->last_async_err.ts_us = ktime_to_us(ktime_get_real());
+ ndev->last_async_err.ex_err_code = AMDXDNA_EXTRA_ERR_ENCODE(last_err->row, last_err->col);
+}
+
static u32 aie2_error_backtrack(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err)
{
struct aie_error *errs = err_info;
@@ -264,29 +319,14 @@ static void aie2_error_worker(struct work_struct *err_work)
}
mutex_lock(&xdna->dev_lock);
+ aie2_update_last_async_error(e->ndev, info->payload, info->err_cnt);
+
/* Re-sent this event to firmware */
if (aie2_error_event_send(e))
XDNA_WARN(xdna, "Unable to register async event");
mutex_unlock(&xdna->dev_lock);
}
-int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev)
-{
- struct amdxdna_dev *xdna = ndev->xdna;
- struct async_event *e;
- int i, ret;
-
- drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
- for (i = 0; i < ndev->async_events->event_cnt; i++) {
- e = &ndev->async_events->event[i];
- ret = aie2_error_event_send(e);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev)
{
struct amdxdna_dev *xdna = ndev->xdna;
@@ -341,6 +381,10 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
e->size = ASYNC_BUF_SIZE;
e->resp.status = MAX_AIE2_STATUS_CODE;
INIT_WORK(&e->work, aie2_error_worker);
+
+ ret = aie2_error_event_send(e);
+ if (ret)
+ goto free_wq;
}
ndev->async_events = events;
@@ -349,6 +393,8 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
events->event_cnt, events->size);
return 0;
+free_wq:
+ destroy_workqueue(events->wq);
free_buf:
dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf,
events->addr, DMA_FROM_DEVICE);
@@ -356,3 +402,18 @@ free_events:
kfree(events);
return ret;
}
+
+int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev, struct amdxdna_drm_get_array *args)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ args->num_element = 1;
+ args->element_size = sizeof(ndev->last_async_err);
+ if (copy_to_user(u64_to_user_ptr(args->buffer),
+ &ndev->last_async_err, args->element_size))
+ return -EFAULT;
+
+ return 0;
+}
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 9caad083543d..d493bb1c3360 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -27,6 +27,8 @@
#define DECLARE_AIE2_MSG(name, op) \
DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE)
+#define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops)
+
static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
struct xdna_mailbox_msg *msg)
{
@@ -37,7 +39,7 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
if (!ndev->mgmt_chann)
return -ENODEV;
- drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ drm_WARN_ON(&xdna->ddev, xdna->rpm_on && !mutex_is_locked(&xdna->dev_lock));
ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
if (ret == -ETIME) {
xdna_mailbox_stop_channel(ndev->mgmt_chann);
@@ -45,7 +47,7 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
ndev->mgmt_chann = NULL;
}
- if (!ret && *hdl->data != AIE2_STATUS_SUCCESS) {
+ if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) {
XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
msg->opcode, *hdl->data);
ret = -EINVAL;
@@ -208,6 +210,14 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
hwctx->fw_ctx_id = resp.context_id;
WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id");
+ if (ndev->force_preempt_enabled) {
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPT, &hwctx->fw_ctx_id);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to enable force preempt %d", ret);
+ return ret;
+ }
+ }
+
cq_pair = &resp.cq_pair[0];
x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr);
x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr);
@@ -233,6 +243,7 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
ret = -EINVAL;
goto out_destroy_context;
}
+ ndev->hwctx_num++;
XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d",
hwctx->name, ret, resp.msix_id);
@@ -267,6 +278,7 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwc
hwctx->fw_ctx_id);
hwctx->priv->mbox_chann = NULL;
hwctx->fw_ctx_id = -1;
+ ndev->hwctx_num--;
return ret;
}
@@ -332,11 +344,6 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
goto fail;
}
- if (resp.status != AIE2_STATUS_SUCCESS) {
- XDNA_ERR(xdna, "Query NPU status failed, status 0x%x", resp.status);
- ret = -EINVAL;
- goto fail;
- }
XDNA_DBG(xdna, "Query NPU status completed");
if (size < resp.size) {
@@ -358,6 +365,55 @@ fail:
return ret;
}
+int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
+ char __user *buf, u32 size,
+ struct amdxdna_drm_query_telemetry_header *header)
+{
+ DECLARE_AIE2_MSG(get_telemetry, MSG_OP_GET_TELEMETRY);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ dma_addr_t dma_addr;
+ u8 *addr;
+ int ret;
+
+ if (header->type >= MAX_TELEMETRY_TYPE)
+ return -EINVAL;
+
+ addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr,
+ DMA_FROM_DEVICE, GFP_KERNEL);
+ if (!addr)
+ return -ENOMEM;
+
+ req.buf_addr = dma_addr;
+ req.buf_size = size;
+ req.type = header->type;
+
+ drm_clflush_virt_range(addr, size); /* device can access */
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret) {
+ XDNA_ERR(xdna, "Query telemetry failed, status %d", ret);
+ goto free_buf;
+ }
+
+ if (size < resp.size) {
+ ret = -EINVAL;
+ XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size);
+ goto free_buf;
+ }
+
+ if (copy_to_user(buf, addr, resp.size)) {
+ ret = -EFAULT;
+ XDNA_ERR(xdna, "Failed to copy telemetry to user space");
+ goto free_buf;
+ }
+
+ header->major = resp.major;
+ header->minor = resp.minor;
+
+free_buf:
+ dma_free_noncoherent(xdna->ddev.dev, size, addr, dma_addr, DMA_FROM_DEVICE);
+ return ret;
+}
+
int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
void *handle, int (*cb)(void*, void __iomem *, size_t))
{
@@ -377,15 +433,17 @@ int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr,
return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT);
}
-int aie2_config_cu(struct amdxdna_hwctx *hwctx)
+int aie2_config_cu(struct amdxdna_hwctx *hwctx,
+ int (*notify_cb)(void *, void __iomem *, size_t))
{
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
struct amdxdna_dev *xdna = hwctx->client->xdna;
u32 shift = xdna->dev_info->dev_mem_buf_shift;
- DECLARE_AIE2_MSG(config_cu, MSG_OP_CONFIG_CU);
+ struct config_cu_req req = { 0 };
+ struct xdna_mailbox_msg msg;
struct drm_gem_object *gobj;
struct amdxdna_gem_obj *abo;
- int ret, i;
+ int i;
if (!chann)
return -ENODEV;
@@ -423,191 +481,386 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx)
}
req.num_cus = hwctx->cus->num_cus;
- ret = xdna_send_msg_wait(xdna, chann, &msg);
- if (ret == -ETIME)
- aie2_destroy_context(xdna->dev_handle, hwctx);
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ msg.handle = hwctx;
+ msg.opcode = MSG_OP_CONFIG_CU;
+ msg.notify_cb = notify_cb;
+ return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+}
- if (resp.status == AIE2_STATUS_SUCCESS) {
- XDNA_DBG(xdna, "Configure %d CUs, ret %d", req.num_cus, ret);
- return 0;
- }
+static int aie2_init_exec_cu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
+ size_t *size, u32 *msg_op)
+{
+ struct execute_buffer_req *cu_req = req;
+ u32 cmd_len;
+ void *cmd;
- XDNA_ERR(xdna, "Command opcode 0x%x failed, status 0x%x ret %d",
- msg.opcode, resp.status, ret);
- return ret;
+ cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ if (cmd_len > sizeof(cu_req->payload))
+ return -EINVAL;
+
+ cu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (cu_req->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ memcpy(cu_req->payload, cmd, cmd_len);
+
+ *size = sizeof(*cu_req);
+ *msg_op = MSG_OP_EXECUTE_BUFFER_CF;
+ return 0;
}
-int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
- int (*notify_cb)(void *, void __iomem *, size_t))
+static int aie2_init_exec_dpu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
+ size_t *size, u32 *msg_op)
{
- struct mailbox_channel *chann = hwctx->priv->mbox_chann;
- struct amdxdna_dev *xdna = hwctx->client->xdna;
- struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
- union {
- struct execute_buffer_req ebuf;
- struct exec_dpu_req dpu;
- } req;
- struct xdna_mailbox_msg msg;
- u32 payload_len;
- void *payload;
- int cu_idx;
- int ret;
- u32 op;
+ struct exec_dpu_req *dpu_req = req;
+ struct amdxdna_cmd_start_npu *sn;
+ u32 cmd_len;
- if (!chann)
- return -ENODEV;
+ sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ if (cmd_len - sizeof(*sn) > sizeof(dpu_req->payload))
+ return -EINVAL;
- payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
- if (!payload) {
- XDNA_ERR(xdna, "Invalid command, cannot get payload");
+ dpu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (dpu_req->cu_idx == INVALID_CU_IDX)
return -EINVAL;
- }
- cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo);
- if (cu_idx < 0) {
- XDNA_DBG(xdna, "Invalid cu idx");
+ dpu_req->inst_buf_addr = sn->buffer;
+ dpu_req->inst_size = sn->buffer_size;
+ dpu_req->inst_prop_cnt = sn->prop_count;
+ memcpy(dpu_req->payload, sn->prop_args, cmd_len - sizeof(*sn));
+
+ *size = sizeof(*dpu_req);
+ *msg_op = MSG_OP_EXEC_DPU;
+ return 0;
+}
+
+static void aie2_init_exec_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt)
+{
+ struct cmd_chain_req *chain_req = req;
+
+ chain_req->buf_addr = slot_addr;
+ chain_req->buf_size = size;
+ chain_req->count = cmd_cnt;
+}
+
+static void aie2_init_npu_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt)
+{
+ struct cmd_chain_npu_req *npu_chain_req = req;
+
+ npu_chain_req->flags = 0;
+ npu_chain_req->reserved = 0;
+ npu_chain_req->buf_addr = slot_addr;
+ npu_chain_req->buf_size = size;
+ npu_chain_req->count = cmd_cnt;
+}
+
+static int
+aie2_cmdlist_fill_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_execbuf_cf *cf_slot = slot;
+ u32 cmd_len;
+ void *cmd;
+
+ cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ if (*size < sizeof(*cf_slot) + cmd_len)
return -EINVAL;
- }
- op = amdxdna_cmd_get_op(cmd_abo);
- switch (op) {
+ cf_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (cf_slot->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ cf_slot->arg_cnt = cmd_len / sizeof(u32);
+ memcpy(cf_slot->args, cmd, cmd_len);
+ /* Accurate slot size to hint firmware to do necessary copy */
+ *size = sizeof(*cf_slot) + cmd_len;
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_dpu *dpu_slot = slot;
+ struct amdxdna_cmd_start_npu *sn;
+ u32 cmd_len;
+ u32 arg_sz;
+
+ sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ arg_sz = cmd_len - sizeof(*sn);
+ if (cmd_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
+ return -EINVAL;
+
+ if (*size < sizeof(*dpu_slot) + arg_sz)
+ return -EINVAL;
+
+ dpu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (dpu_slot->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ dpu_slot->inst_buf_addr = sn->buffer;
+ dpu_slot->inst_size = sn->buffer_size;
+ dpu_slot->inst_prop_cnt = sn->prop_count;
+ dpu_slot->arg_cnt = arg_sz / sizeof(u32);
+ memcpy(dpu_slot->args, sn->prop_args, arg_sz);
+
+ /* Accurate slot size to hint firmware to do necessary copy */
+ *size = sizeof(*dpu_slot) + arg_sz;
+ return 0;
+}
+
+static int aie2_cmdlist_unsupp(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ return -EOPNOTSUPP;
+}
+
+static u32 aie2_get_chain_msg_op(u32 cmd_op)
+{
+ switch (cmd_op) {
case ERT_START_CU:
- if (unlikely(payload_len > sizeof(req.ebuf.payload)))
- XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len);
- req.ebuf.cu_idx = cu_idx;
- memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload));
- msg.send_size = sizeof(req.ebuf);
- msg.opcode = MSG_OP_EXECUTE_BUFFER_CF;
- break;
- case ERT_START_NPU: {
- struct amdxdna_cmd_start_npu *sn = payload;
-
- if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload)))
- XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len);
- req.dpu.inst_buf_addr = sn->buffer;
- req.dpu.inst_size = sn->buffer_size;
- req.dpu.inst_prop_cnt = sn->prop_count;
- req.dpu.cu_idx = cu_idx;
- memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload));
- msg.send_size = sizeof(req.dpu);
- msg.opcode = MSG_OP_EXEC_DPU;
+ return MSG_OP_CHAIN_EXEC_BUFFER_CF;
+ case ERT_START_NPU:
+ return MSG_OP_CHAIN_EXEC_DPU;
+ default:
break;
}
- default:
- XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op);
+
+ return MSG_OP_MAX_OPCODE;
+}
+
+static struct aie2_exec_msg_ops legacy_exec_message_ops = {
+ .init_cu_req = aie2_init_exec_cu_req,
+ .init_dpu_req = aie2_init_exec_dpu_req,
+ .init_chain_req = aie2_init_exec_chain_req,
+ .fill_cf_slot = aie2_cmdlist_fill_cf,
+ .fill_dpu_slot = aie2_cmdlist_fill_dpu,
+ .fill_preempt_slot = aie2_cmdlist_unsupp,
+ .fill_elf_slot = aie2_cmdlist_unsupp,
+ .get_chain_msg_op = aie2_get_chain_msg_op,
+};
+
+static int
+aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_npu *npu_slot = slot;
+ u32 cmd_len;
+ void *cmd;
+
+ cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ if (*size < sizeof(*npu_slot) + cmd_len)
return -EINVAL;
- }
- msg.handle = job;
- msg.notify_cb = notify_cb;
- msg.send_data = (u8 *)&req;
- print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
- 0x40, false);
- ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
- if (ret) {
- XDNA_ERR(xdna, "Send message failed");
- return ret;
- }
+ npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (npu_slot->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+ memset(npu_slot, 0, sizeof(*npu_slot));
+ npu_slot->type = EXEC_NPU_TYPE_NON_ELF;
+ npu_slot->arg_cnt = cmd_len / sizeof(u32);
+ memcpy(npu_slot->args, cmd, cmd_len);
+
+ *size = sizeof(*npu_slot) + cmd_len;
return 0;
}
static int
-aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
- struct amdxdna_gem_obj *abo, u32 *size)
+aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
- struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset;
- int cu_idx = amdxdna_cmd_get_cu_idx(abo);
- u32 payload_len;
- void *payload;
+ struct cmd_chain_slot_npu *npu_slot = slot;
+ struct amdxdna_cmd_start_npu *sn;
+ u32 cmd_len;
+ u32 arg_sz;
- if (cu_idx < 0)
+ sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ arg_sz = cmd_len - sizeof(*sn);
+ if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE)
return -EINVAL;
- payload = amdxdna_cmd_get_payload(abo, &payload_len);
- if (!payload)
+ if (*size < sizeof(*npu_slot) + arg_sz)
return -EINVAL;
- if (!slot_has_space(*buf, offset, payload_len))
- return -ENOSPC;
+ npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (npu_slot->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ memset(npu_slot, 0, sizeof(*npu_slot));
+ npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF;
+ npu_slot->inst_buf_addr = sn->buffer;
+ npu_slot->inst_size = sn->buffer_size;
+ npu_slot->inst_prop_cnt = sn->prop_count;
+ npu_slot->arg_cnt = arg_sz / sizeof(u32);
+ memcpy(npu_slot->args, sn->prop_args, arg_sz);
- buf->cu_idx = cu_idx;
- buf->arg_cnt = payload_len / sizeof(u32);
- memcpy(buf->args, payload, payload_len);
- /* Accurate buf size to hint firmware to do necessary copy */
- *size = sizeof(*buf) + payload_len;
+ *size = sizeof(*npu_slot) + arg_sz;
return 0;
}
static int
-aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
- struct amdxdna_gem_obj *abo, u32 *size)
+aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
- struct cmd_chain_slot_dpu *buf = cmd_buf + offset;
- int cu_idx = amdxdna_cmd_get_cu_idx(abo);
- struct amdxdna_cmd_start_npu *sn;
- u32 payload_len;
- void *payload;
+ struct cmd_chain_slot_npu *npu_slot = slot;
+ struct amdxdna_cmd_preempt_data *pd;
+ u32 cmd_len;
u32 arg_sz;
- if (cu_idx < 0)
+ pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ arg_sz = cmd_len - sizeof(*pd);
+ if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
return -EINVAL;
- payload = amdxdna_cmd_get_payload(abo, &payload_len);
- if (!payload)
+ if (*size < sizeof(*npu_slot) + arg_sz)
return -EINVAL;
- sn = payload;
- arg_sz = payload_len - sizeof(*sn);
- if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
+
+ npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (npu_slot->cu_idx == INVALID_CU_IDX)
return -EINVAL;
- if (!slot_has_space(*buf, offset, arg_sz))
- return -ENOSPC;
+ memset(npu_slot, 0, sizeof(*npu_slot));
+ npu_slot->type = EXEC_NPU_TYPE_PREEMPT;
+ npu_slot->inst_buf_addr = pd->inst_buf;
+ npu_slot->save_buf_addr = pd->save_buf;
+ npu_slot->restore_buf_addr = pd->restore_buf;
+ npu_slot->inst_size = pd->inst_size;
+ npu_slot->save_size = pd->save_size;
+ npu_slot->restore_size = pd->restore_size;
+ npu_slot->inst_prop_cnt = pd->inst_prop_cnt;
+ npu_slot->arg_cnt = arg_sz / sizeof(u32);
+ memcpy(npu_slot->args, pd->prop_args, arg_sz);
+
+ *size = sizeof(*npu_slot) + arg_sz;
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_npu *npu_slot = slot;
+ struct amdxdna_cmd_preempt_data *pd;
+ u32 cmd_len;
+ u32 arg_sz;
+
+ pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ arg_sz = cmd_len - sizeof(*pd);
+ if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
+ return -EINVAL;
- buf->inst_buf_addr = sn->buffer;
- buf->inst_size = sn->buffer_size;
- buf->inst_prop_cnt = sn->prop_count;
- buf->cu_idx = cu_idx;
- buf->arg_cnt = arg_sz / sizeof(u32);
- memcpy(buf->args, sn->prop_args, arg_sz);
+ if (*size < sizeof(*npu_slot) + arg_sz)
+ return -EINVAL;
- /* Accurate buf size to hint firmware to do necessary copy */
- *size = sizeof(*buf) + arg_sz;
+ memset(npu_slot, 0, sizeof(*npu_slot));
+ npu_slot->type = EXEC_NPU_TYPE_ELF;
+ npu_slot->inst_buf_addr = pd->inst_buf;
+ npu_slot->save_buf_addr = pd->save_buf;
+ npu_slot->restore_buf_addr = pd->restore_buf;
+ npu_slot->inst_size = pd->inst_size;
+ npu_slot->save_size = pd->save_size;
+ npu_slot->restore_size = pd->restore_size;
+ npu_slot->inst_prop_cnt = pd->inst_prop_cnt;
+ npu_slot->arg_cnt = 1;
+ npu_slot->args[0] = AIE2_EXEC_BUFFER_KERNEL_OP_TXN;
+
+ *size = struct_size(npu_slot, args, npu_slot->arg_cnt);
return 0;
}
-static int
-aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset,
- struct amdxdna_gem_obj *abo, u32 *size)
+static u32 aie2_get_npu_chain_msg_op(u32 cmd_op)
+{
+ return MSG_OP_CHAIN_EXEC_NPU;
+}
+
+static struct aie2_exec_msg_ops npu_exec_message_ops = {
+ .init_cu_req = aie2_init_exec_cu_req,
+ .init_dpu_req = aie2_init_exec_dpu_req,
+ .init_chain_req = aie2_init_npu_chain_req,
+ .fill_cf_slot = aie2_cmdlist_fill_npu_cf,
+ .fill_dpu_slot = aie2_cmdlist_fill_npu_dpu,
+ .fill_preempt_slot = aie2_cmdlist_fill_npu_preempt,
+ .fill_elf_slot = aie2_cmdlist_fill_npu_elf,
+ .get_chain_msg_op = aie2_get_npu_chain_msg_op,
+};
+
+static int aie2_init_exec_req(void *req, struct amdxdna_gem_obj *cmd_abo,
+ size_t *size, u32 *msg_op)
{
- u32 this_op = amdxdna_cmd_get_op(abo);
- void *cmd_buf = cmdbuf_abo->mem.kva;
+ struct amdxdna_dev *xdna = cmd_abo->client->xdna;
int ret;
+ u32 op;
- if (this_op != op) {
- ret = -EINVAL;
- goto done;
- }
+ op = amdxdna_cmd_get_op(cmd_abo);
switch (op) {
case ERT_START_CU:
- ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size);
+ ret = EXEC_MSG_OPS(xdna)->init_cu_req(cmd_abo, req, size, msg_op);
+ if (ret) {
+ XDNA_DBG(xdna, "Init CU req failed ret %d", ret);
+ return ret;
+ }
break;
case ERT_START_NPU:
- ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size);
+ ret = EXEC_MSG_OPS(xdna)->init_dpu_req(cmd_abo, req, size, msg_op);
+ if (ret) {
+ XDNA_DBG(xdna, "Init DPU req failed ret %d", ret);
+ return ret;
+ }
+
break;
default:
+ XDNA_ERR(xdna, "Unsupported op %d", op);
ret = -EOPNOTSUPP;
+ break;
}
-done:
- if (ret) {
- XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d",
- op, ret);
+ return ret;
+}
+
+static int
+aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo,
+ size_t *size, u32 *cmd_op)
+{
+ struct amdxdna_dev *xdna = cmd_abo->client->xdna;
+ int ret;
+ u32 op;
+
+ op = amdxdna_cmd_get_op(cmd_abo);
+ if (*cmd_op == ERT_INVALID_CMD)
+ *cmd_op = op;
+ else if (op != *cmd_op)
+ return -EINVAL;
+
+ switch (op) {
+ case ERT_START_CU:
+ ret = EXEC_MSG_OPS(xdna)->fill_cf_slot(cmd_abo, slot, size);
+ break;
+ case ERT_START_NPU:
+ ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size);
+ break;
+ case ERT_START_NPU_PREEMPT:
+ if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT))
+ return -EOPNOTSUPP;
+ ret = EXEC_MSG_OPS(xdna)->fill_preempt_slot(cmd_abo, slot, size);
+ break;
+ case ERT_START_NPU_PREEMPT_ELF:
+ if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT))
+ return -EOPNOTSUPP;
+ ret = EXEC_MSG_OPS(xdna)->fill_elf_slot(cmd_abo, slot, size);
+ break;
+ default:
+ XDNA_INFO(xdna, "Unsupported op %d", op);
+ ret = -EOPNOTSUPP;
+ break;
}
+
return ret;
}
+void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
+{
+ if (AIE2_FEATURE_ON(ndev, AIE2_NPU_COMMAND))
+ ndev->exec_msg_ops = &npu_exec_message_ops;
+ else
+ ndev->exec_msg_ops = &legacy_exec_message_ops;
+}
+
static inline struct amdxdna_gem_obj *
aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
{
@@ -616,29 +869,36 @@ aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
return job->hwctx->priv->cmd_buf[idx];
}
-static void
-aie2_cmdlist_prepare_request(struct cmd_chain_req *req,
- struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt)
+int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t))
{
- req->buf_addr = cmdbuf_abo->mem.dev_addr;
- req->buf_size = size;
- req->count = cnt;
- drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
- XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d",
- req->buf_addr, size, cnt);
-}
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct xdna_mailbox_msg msg;
+ union exec_req req;
+ int ret;
-static inline u32
-aie2_cmd_op_to_msg_op(u32 op)
-{
- switch (op) {
- case ERT_START_CU:
- return MSG_OP_CHAIN_EXEC_BUFFER_CF;
- case ERT_START_NPU:
- return MSG_OP_CHAIN_EXEC_DPU;
- default:
- return MSG_OP_MAX_OPCODE;
+ if (!chann)
+ return -ENODEV;
+
+ ret = aie2_init_exec_req(&req, cmd_abo, &msg.send_size, &msg.opcode);
+ if (ret)
+ return ret;
+
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
+ 0x40, false);
+
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(xdna, "Send message failed");
+ return ret;
}
+
+ return 0;
}
int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
@@ -649,12 +909,13 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
struct amdxdna_client *client = hwctx->client;
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct amdxdna_dev *xdna = client->xdna;
struct amdxdna_cmd_chain *payload;
struct xdna_mailbox_msg msg;
- struct cmd_chain_req req;
+ union exec_chain_req req;
u32 payload_len;
u32 offset = 0;
- u32 size;
+ size_t size;
int ret;
u32 op;
u32 i;
@@ -665,41 +926,42 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
payload_len < struct_size(payload, data, payload->command_count))
return -EINVAL;
+ op = ERT_INVALID_CMD;
for (i = 0; i < payload->command_count; i++) {
u32 boh = (u32)(payload->data[i]);
struct amdxdna_gem_obj *abo;
abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD);
if (!abo) {
- XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh);
+ XDNA_ERR(xdna, "Failed to find cmd BO %d", boh);
return -ENOENT;
}
- /* All sub-cmd should have same op, use the first one. */
- if (i == 0)
- op = amdxdna_cmd_get_op(abo);
-
- ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size);
+ size = cmdbuf_abo->mem.size - offset;
+ ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva + offset,
+ abo, &size, &op);
amdxdna_gem_put_obj(abo);
if (ret)
- return -EINVAL;
+ return ret;
offset += size;
}
+ msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
+ if (msg.opcode == MSG_OP_MAX_OPCODE)
+ return -EOPNOTSUPP;
/* The offset is the accumulated total size of the cmd buffer */
- aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count);
+ EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
+ offset, payload->command_count);
+ drm_clflush_virt_range(cmdbuf_abo->mem.kva, offset);
- msg.opcode = aie2_cmd_op_to_msg_op(op);
- if (msg.opcode == MSG_OP_MAX_OPCODE)
- return -EOPNOTSUPP;
msg.handle = job;
msg.notify_cb = notify_cb;
msg.send_data = (u8 *)&req;
msg.send_size = sizeof(req);
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
if (ret) {
- XDNA_ERR(hwctx->client->xdna, "Send message failed");
+ XDNA_ERR(xdna, "Send message failed");
return ret;
}
@@ -712,23 +974,27 @@ int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
{
struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
struct xdna_mailbox_msg msg;
- struct cmd_chain_req req;
- u32 size;
+ union exec_chain_req req;
+ u32 op = ERT_INVALID_CMD;
+ size_t size;
int ret;
- u32 op;
- op = amdxdna_cmd_get_op(cmd_abo);
- ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size);
+ size = cmdbuf_abo->mem.size;
+ ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva, cmd_abo, &size, &op);
if (ret)
return ret;
- aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1);
-
- msg.opcode = aie2_cmd_op_to_msg_op(op);
+ msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
if (msg.opcode == MSG_OP_MAX_OPCODE)
return -EOPNOTSUPP;
+
+ EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
+ size, 1);
+ drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
+
msg.handle = job;
msg.notify_cb = notify_cb;
msg.send_data = (u8 *)&req;
@@ -753,7 +1019,7 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
int ret = 0;
req.src_addr = 0;
- req.dst_addr = abo->mem.dev_addr - hwctx->client->dev_heap->mem.dev_addr;
+ req.dst_addr = amdxdna_dev_bo_offset(abo);
req.size = abo->mem.size;
/* Device to Host */
@@ -777,3 +1043,32 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
return 0;
}
+
+int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t))
+{
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]);
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct config_debug_bo_req req;
+ struct xdna_mailbox_msg msg;
+
+ if (job->drv_cmd->opcode == ATTACH_DEBUG_BO)
+ req.config = DEBUG_BO_REGISTER;
+ else
+ req.config = DEBUG_BO_UNREGISTER;
+
+ req.offset = amdxdna_dev_bo_offset(abo);
+ req.size = abo->mem.size;
+
+ XDNA_DBG(xdna, "offset 0x%llx size 0x%llx config %d",
+ req.offset, req.size, req.config);
+
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ msg.opcode = MSG_OP_CONFIG_DEBUG_BO;
+
+ return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+}
diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
index 6df9065b13f6..1c957a6298d3 100644
--- a/drivers/accel/amdxdna/aie2_msg_priv.h
+++ b/drivers/accel/amdxdna/aie2_msg_priv.h
@@ -9,7 +9,8 @@
enum aie2_msg_opcode {
MSG_OP_CREATE_CONTEXT = 0x2,
MSG_OP_DESTROY_CONTEXT = 0x3,
- MSG_OP_SYNC_BO = 0x7,
+ MSG_OP_GET_TELEMETRY = 0x4,
+ MSG_OP_SYNC_BO = 0x7,
MSG_OP_EXECUTE_BUFFER_CF = 0xC,
MSG_OP_QUERY_COL_STATUS = 0xD,
MSG_OP_QUERY_AIE_TILE_INFO = 0xE,
@@ -18,6 +19,8 @@ enum aie2_msg_opcode {
MSG_OP_CONFIG_CU = 0x11,
MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12,
MSG_OP_CHAIN_EXEC_DPU = 0x13,
+ MSG_OP_CONFIG_DEBUG_BO = 0x14,
+ MSG_OP_CHAIN_EXEC_NPU = 0x18,
MSG_OP_MAX_XRT_OPCODE,
MSG_OP_SUSPEND = 0x101,
MSG_OP_RESUME = 0x102,
@@ -135,6 +138,28 @@ struct destroy_ctx_resp {
enum aie2_msg_status status;
} __packed;
+enum telemetry_type {
+ TELEMETRY_TYPE_DISABLED,
+ TELEMETRY_TYPE_HEALTH,
+ TELEMETRY_TYPE_ERROR_INFO,
+ TELEMETRY_TYPE_PROFILING,
+ TELEMETRY_TYPE_DEBUG,
+ MAX_TELEMETRY_TYPE
+};
+
+struct get_telemetry_req {
+ enum telemetry_type type;
+ __u64 buf_addr;
+ __u32 buf_size;
+} __packed;
+
+struct get_telemetry_resp {
+ __u32 major;
+ __u32 minor;
+ __u32 size;
+ enum aie2_msg_status status;
+} __packed;
+
struct execute_buffer_req {
__u32 cu_idx;
__u32 payload[19];
@@ -148,6 +173,18 @@ struct exec_dpu_req {
__u32 payload[35];
} __packed;
+enum exec_npu_type {
+ EXEC_NPU_TYPE_NON_ELF = 0x1,
+ EXEC_NPU_TYPE_PARTIAL_ELF = 0x2,
+ EXEC_NPU_TYPE_PREEMPT = 0x3,
+ EXEC_NPU_TYPE_ELF = 0x4,
+};
+
+union exec_req {
+ struct execute_buffer_req ebuf;
+ struct exec_dpu_req dpu_req;
+};
+
struct execute_buffer_resp {
enum aie2_msg_status status;
} __packed;
@@ -319,9 +356,6 @@ struct async_event_msg_resp {
} __packed;
#define MAX_CHAIN_CMDBUF_SIZE SZ_4K
-#define slot_has_space(slot, offset, payload_size) \
- (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \
- sizeof(typeof(slot)))
struct cmd_chain_slot_execbuf_cf {
__u32 cu_idx;
@@ -339,12 +373,41 @@ struct cmd_chain_slot_dpu {
__u32 args[] __counted_by(arg_cnt);
};
+#define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32))
+#define AIE2_EXEC_BUFFER_KERNEL_OP_TXN 3
+struct cmd_chain_slot_npu {
+ enum exec_npu_type type;
+ u64 inst_buf_addr;
+ u64 save_buf_addr;
+ u64 restore_buf_addr;
+ u32 inst_size;
+ u32 save_size;
+ u32 restore_size;
+ u32 inst_prop_cnt;
+ u32 cu_idx;
+ u32 arg_cnt;
+ u32 args[] __counted_by(arg_cnt);
+} __packed;
+
struct cmd_chain_req {
__u64 buf_addr;
__u32 buf_size;
__u32 count;
} __packed;
+struct cmd_chain_npu_req {
+ u32 flags;
+ u32 reserved;
+ u64 buf_addr;
+ u32 buf_size;
+ u32 count;
+} __packed;
+
+union exec_chain_req {
+ struct cmd_chain_npu_req npu_req;
+ struct cmd_chain_req req;
+};
+
struct cmd_chain_resp {
enum aie2_msg_status status;
__u32 fail_cmd_idx;
@@ -365,4 +428,21 @@ struct sync_bo_req {
struct sync_bo_resp {
enum aie2_msg_status status;
} __packed;
+
+#define DEBUG_BO_UNREGISTER 0
+#define DEBUG_BO_REGISTER 1
+struct config_debug_bo_req {
+ __u64 offset;
+ __u64 size;
+ /*
+ * config operations.
+ * DEBUG_BO_REGISTER: Register debug buffer
+ * DEBUG_BO_UNREGISTER: Unregister debug buffer
+ */
+ __u32 config;
+} __packed;
+
+struct config_debug_bo_resp {
+ enum aie2_msg_status status;
+} __packed;
#endif /* _AIE2_MSG_PRIV_H_ */
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 87c425e3d2b9..ceef1c502e9e 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -25,6 +25,7 @@
#include "amdxdna_gem.h"
#include "amdxdna_mailbox.h"
#include "amdxdna_pci_drv.h"
+#include "amdxdna_pm.h"
static int aie2_max_col = XRS_MAX_COL;
module_param(aie2_max_col, uint, 0600);
@@ -54,6 +55,7 @@ struct mgmt_mbox_chann_info {
static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor)
{
+ const struct aie2_fw_feature_tbl *feature;
struct amdxdna_dev *xdna = ndev->xdna;
/*
@@ -77,6 +79,17 @@ static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 f
XDNA_ERR(xdna, "Firmware minor version smaller than supported");
return -EINVAL;
}
+
+ for (feature = ndev->priv->fw_feature_tbl; feature && feature->min_minor;
+ feature++) {
+ if (fw_minor < feature->min_minor)
+ continue;
+ if (feature->max_minor > 0 && fw_minor > feature->max_minor)
+ continue;
+
+ set_bit(feature->feature, &ndev->feature_mask);
+ }
+
return 0;
}
@@ -170,6 +183,10 @@ int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
if (cfg->category != category)
continue;
+ if (cfg->feature_mask &&
+ bitmap_subset(&cfg->feature_mask, &ndev->feature_mask, AIE2_FEATURE_MAX))
+ continue;
+
value = val ? *val : cfg->value;
ret = aie2_set_runtime_cfg(ndev, cfg->type, value);
if (ret) {
@@ -223,15 +240,6 @@ static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
return ret;
}
- if (!ndev->async_events)
- return 0;
-
- ret = aie2_error_async_events_send(ndev);
- if (ret) {
- XDNA_ERR(ndev->xdna, "Send async events failed");
- return ret;
- }
-
return 0;
}
@@ -257,6 +265,8 @@ static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev)
return ret;
}
+ ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
+
return 0;
}
@@ -338,6 +348,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
ndev->mbox = NULL;
aie2_psp_stop(ndev->psp_hdl);
aie2_smu_fini(ndev);
+ aie2_error_async_events_free(ndev);
pci_disable_device(pdev);
ndev->dev_status = AIE2_DEV_INIT;
@@ -424,6 +435,18 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
goto destroy_mgmt_chann;
}
+ ret = aie2_mgmt_fw_query(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to query fw, ret %d", ret);
+ goto destroy_mgmt_chann;
+ }
+
+ ret = aie2_error_async_events_alloc(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
+ goto destroy_mgmt_chann;
+ }
+
ndev->dev_status = AIE2_DEV_START;
return 0;
@@ -459,7 +482,6 @@ static int aie2_hw_resume(struct amdxdna_dev *xdna)
struct amdxdna_client *client;
int ret;
- guard(mutex)(&xdna->dev_lock);
ret = aie2_hw_start(xdna);
if (ret) {
XDNA_ERR(xdna, "Start hardware failed, %d", ret);
@@ -565,13 +587,6 @@ static int aie2_init(struct amdxdna_dev *xdna)
goto release_fw;
}
- ret = aie2_mgmt_fw_query(ndev);
- if (ret) {
- XDNA_ERR(xdna, "Query firmware failed, ret %d", ret);
- goto stop_hw;
- }
- ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
-
xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk;
@@ -587,30 +602,11 @@ static int aie2_init(struct amdxdna_dev *xdna)
goto stop_hw;
}
- ret = aie2_error_async_events_alloc(ndev);
- if (ret) {
- XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
- goto stop_hw;
- }
-
- ret = aie2_error_async_events_send(ndev);
- if (ret) {
- XDNA_ERR(xdna, "Send async events failed, ret %d", ret);
- goto async_event_free;
- }
-
- /* Issue a command to make sure firmware handled async events */
- ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
- if (ret) {
- XDNA_ERR(xdna, "Re-query firmware version failed");
- goto async_event_free;
- }
-
release_firmware(fw);
+ aie2_msg_init(ndev);
+ amdxdna_pm_init(xdna);
return 0;
-async_event_free:
- aie2_error_async_events_free(ndev);
stop_hw:
aie2_hw_stop(xdna);
release_fw:
@@ -621,10 +617,8 @@ release_fw:
static void aie2_fini(struct amdxdna_dev *xdna)
{
- struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
-
+ amdxdna_pm_fini(xdna);
aie2_hw_stop(xdna);
- aie2_error_async_events_free(ndev);
}
static int aie2_get_aie_status(struct amdxdna_client *client,
@@ -845,7 +839,120 @@ static int aie2_get_hwctx_status(struct amdxdna_client *client,
}
args->buffer_size -= (u32)(array_args.buffer - args->buffer);
- return ret;
+ return 0;
+}
+
+static int aie2_query_resource_info(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_get_resource_info res_info;
+ const struct amdxdna_dev_priv *priv;
+ struct amdxdna_dev_hdl *ndev;
+ struct amdxdna_dev *xdna;
+
+ xdna = client->xdna;
+ ndev = xdna->dev_handle;
+ priv = ndev->priv;
+
+ res_info.npu_clk_max = priv->dpm_clk_tbl[ndev->max_dpm_level].hclk;
+ res_info.npu_tops_max = ndev->max_tops;
+ res_info.npu_task_max = priv->hwctx_limit;
+ res_info.npu_tops_curr = ndev->curr_tops;
+ res_info.npu_task_curr = ndev->hwctx_num;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &res_info, sizeof(res_info)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int aie2_fill_hwctx_map(struct amdxdna_hwctx *hwctx, void *arg)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ u32 *map = arg;
+
+ if (hwctx->fw_ctx_id >= xdna->dev_handle->priv->hwctx_limit) {
+ XDNA_ERR(xdna, "Invalid fw ctx id %d/%d ", hwctx->fw_ctx_id,
+ xdna->dev_handle->priv->hwctx_limit);
+ return -EINVAL;
+ }
+
+ map[hwctx->fw_ctx_id] = hwctx->id;
+ return 0;
+}
+
+static int aie2_get_telemetry(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_telemetry_header *header __free(kfree) = NULL;
+ u32 telemetry_data_sz, header_sz, elem_num;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_client *tmp_client;
+ int ret;
+
+ elem_num = xdna->dev_handle->priv->hwctx_limit;
+ header_sz = struct_size(header, map, elem_num);
+ if (args->buffer_size <= header_sz) {
+ XDNA_ERR(xdna, "Invalid buffer size");
+ return -EINVAL;
+ }
+
+ telemetry_data_sz = args->buffer_size - header_sz;
+ if (telemetry_data_sz > SZ_4M) {
+ XDNA_ERR(xdna, "Buffer size is too big, %d", telemetry_data_sz);
+ return -EINVAL;
+ }
+
+ header = kzalloc(header_sz, GFP_KERNEL);
+ if (!header)
+ return -ENOMEM;
+
+ if (copy_from_user(header, u64_to_user_ptr(args->buffer), sizeof(*header))) {
+ XDNA_ERR(xdna, "Failed to copy telemetry header from user");
+ return -EFAULT;
+ }
+
+ header->map_num_elements = elem_num;
+ list_for_each_entry(tmp_client, &xdna->client_list, node) {
+ ret = amdxdna_hwctx_walk(tmp_client, &header->map,
+ aie2_fill_hwctx_map);
+ if (ret)
+ return ret;
+ }
+
+ ret = aie2_query_telemetry(xdna->dev_handle,
+ u64_to_user_ptr(args->buffer + header_sz),
+ telemetry_data_sz, header);
+ if (ret) {
+ XDNA_ERR(xdna, "Query telemetry failed ret %d", ret);
+ return ret;
+ }
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), header, header_sz)) {
+ XDNA_ERR(xdna, "Copy header failed");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int aie2_get_preempt_state(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_attribute_state state = {};
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+
+ ndev = xdna->dev_handle;
+ if (args->param == DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE)
+ state.state = ndev->force_preempt_enabled;
+ else if (args->param == DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE)
+ state.state = ndev->frame_boundary_preempt;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &state, sizeof(state)))
+ return -EFAULT;
+
+ return 0;
}
static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
@@ -856,6 +963,10 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
if (!drm_dev_enter(&xdna->ddev, &idx))
return -ENODEV;
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto dev_exit;
+
switch (args->param) {
case DRM_AMDXDNA_QUERY_AIE_STATUS:
ret = aie2_get_aie_status(client, args);
@@ -878,12 +989,25 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
case DRM_AMDXDNA_GET_POWER_MODE:
ret = aie2_get_power_mode(client, args);
break;
+ case DRM_AMDXDNA_QUERY_TELEMETRY:
+ ret = aie2_get_telemetry(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_RESOURCE_INFO:
+ ret = aie2_query_resource_info(client, args);
+ break;
+ case DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE:
+ case DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE:
+ ret = aie2_get_preempt_state(client, args);
+ break;
default:
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
ret = -EOPNOTSUPP;
}
+
+ amdxdna_pm_suspend_put(xdna);
XDNA_DBG(xdna, "Got param %d", args->param);
+dev_exit:
drm_dev_exit(idx);
return ret;
}
@@ -898,6 +1022,12 @@ static int aie2_query_ctx_status_array(struct amdxdna_client *client,
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ if (args->element_size > SZ_4K || args->num_element > SZ_1K) {
+ XDNA_DBG(xdna, "Invalid element size %d or number of element %d",
+ args->element_size, args->num_element);
+ return -EINVAL;
+ }
+
array_args.element_size = min(args->element_size,
sizeof(struct amdxdna_drm_hwctx_entry));
array_args.buffer = args->buffer;
@@ -914,7 +1044,7 @@ static int aie2_query_ctx_status_array(struct amdxdna_client *client,
args->num_element = (u32)((array_args.buffer - args->buffer) /
args->element_size);
- return ret;
+ return 0;
}
static int aie2_get_array(struct amdxdna_client *client,
@@ -926,16 +1056,26 @@ static int aie2_get_array(struct amdxdna_client *client,
if (!drm_dev_enter(&xdna->ddev, &idx))
return -ENODEV;
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto dev_exit;
+
switch (args->param) {
case DRM_AMDXDNA_HW_CONTEXT_ALL:
ret = aie2_query_ctx_status_array(client, args);
break;
+ case DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
+ ret = aie2_get_array_async_error(xdna->dev_handle, args);
+ break;
default:
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
ret = -EOPNOTSUPP;
}
+
+ amdxdna_pm_suspend_put(xdna);
XDNA_DBG(xdna, "Got param %d", args->param);
+dev_exit:
drm_dev_exit(idx);
return ret;
}
@@ -965,6 +1105,38 @@ static int aie2_set_power_mode(struct amdxdna_client *client,
return aie2_pm_set_mode(xdna->dev_handle, power_mode);
}
+static int aie2_set_preempt_state(struct amdxdna_client *client,
+ struct amdxdna_drm_set_state *args)
+{
+ struct amdxdna_dev_hdl *ndev = client->xdna->dev_handle;
+ struct amdxdna_drm_attribute_state state;
+ u32 val;
+ int ret;
+
+ if (copy_from_user(&state, u64_to_user_ptr(args->buffer), sizeof(state)))
+ return -EFAULT;
+
+ if (state.state > 1)
+ return -EINVAL;
+
+ if (XDNA_MBZ_DBG(client->xdna, state.pad, sizeof(state.pad)))
+ return -EINVAL;
+
+ if (args->param == DRM_AMDXDNA_SET_FORCE_PREEMPT) {
+ ndev->force_preempt_enabled = state.state;
+ } else if (args->param == DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT) {
+ val = state.state;
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT,
+ &val);
+ if (ret)
+ return ret;
+
+ ndev->frame_boundary_preempt = state.state;
+ }
+
+ return 0;
+}
+
static int aie2_set_state(struct amdxdna_client *client,
struct amdxdna_drm_set_state *args)
{
@@ -974,16 +1146,26 @@ static int aie2_set_state(struct amdxdna_client *client,
if (!drm_dev_enter(&xdna->ddev, &idx))
return -ENODEV;
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto dev_exit;
+
switch (args->param) {
case DRM_AMDXDNA_SET_POWER_MODE:
ret = aie2_set_power_mode(client, args);
break;
+ case DRM_AMDXDNA_SET_FORCE_PREEMPT:
+ case DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT:
+ ret = aie2_set_preempt_state(client, args);
+ break;
default:
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
ret = -EOPNOTSUPP;
break;
}
+ amdxdna_pm_suspend_put(xdna);
+dev_exit:
drm_dev_exit(idx);
return ret;
}
@@ -998,6 +1180,7 @@ const struct amdxdna_dev_ops aie2_ops = {
.hwctx_init = aie2_hwctx_init,
.hwctx_fini = aie2_hwctx_fini,
.hwctx_config = aie2_hwctx_config,
+ .hwctx_sync_debug_bo = aie2_hwctx_sync_debug_bo,
.cmd_submit = aie2_cmd_submit,
.hmm_invalidate = aie2_hmm_invalidate,
.get_array = aie2_get_array,
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index 91a8e948f82a..a5f9c42155d1 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -110,12 +110,15 @@ struct aie_metadata {
enum rt_config_category {
AIE2_RT_CFG_INIT,
AIE2_RT_CFG_CLK_GATING,
+ AIE2_RT_CFG_FORCE_PREEMPT,
+ AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT,
};
struct rt_config {
u32 type;
u32 value;
u32 category;
+ unsigned long feature_mask;
};
struct dpm_clk_freq {
@@ -156,6 +159,19 @@ enum aie2_dev_status {
AIE2_DEV_START,
};
+struct aie2_exec_msg_ops {
+ int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
+ size_t *size, u32 *msg_op);
+ int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
+ size_t *size, u32 *msg_op);
+ void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt);
+ int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+ int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+ int (*fill_preempt_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+ int (*fill_elf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+ u32 (*get_chain_msg_op)(u32 cmd_op);
+};
+
struct amdxdna_dev_hdl {
struct amdxdna_dev *xdna;
const struct amdxdna_dev_priv *priv;
@@ -173,6 +189,8 @@ struct amdxdna_dev_hdl {
u32 total_col;
struct aie_version version;
struct aie_metadata metadata;
+ unsigned long feature_mask;
+ struct aie2_exec_msg_ops *exec_msg_ops;
/* power management and clock*/
enum amdxdna_power_mode_type pw_mode;
@@ -182,6 +200,10 @@ struct amdxdna_dev_hdl {
u32 clk_gating;
u32 npuclk_freq;
u32 hclk_freq;
+ u32 max_tops;
+ u32 curr_tops;
+ u32 force_preempt_enabled;
+ u32 frame_boundary_preempt;
/* Mailbox and the management channel */
struct mailbox *mbox;
@@ -190,6 +212,8 @@ struct amdxdna_dev_hdl {
enum aie2_dev_status dev_status;
u32 hwctx_num;
+
+ struct amdxdna_async_error last_async_err;
};
#define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
@@ -204,12 +228,27 @@ struct aie2_hw_ops {
int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
};
+enum aie2_fw_feature {
+ AIE2_NPU_COMMAND,
+ AIE2_PREEMPT,
+ AIE2_FEATURE_MAX
+};
+
+struct aie2_fw_feature_tbl {
+ enum aie2_fw_feature feature;
+ u32 max_minor;
+ u32 min_minor;
+};
+
+#define AIE2_FEATURE_ON(ndev, feature) test_bit(feature, &(ndev)->feature_mask)
+
struct amdxdna_dev_priv {
const char *fw_path;
u64 protocol_major;
u64 protocol_minor;
const struct rt_config *rt_config;
const struct dpm_clk_freq *dpm_clk_tbl;
+ const struct aie2_fw_feature_tbl *fw_feature_tbl;
#define COL_ALIGN_NONE 0
#define COL_ALIGN_NATURE 1
@@ -217,6 +256,7 @@ struct amdxdna_dev_priv {
u32 mbox_dev_addr;
/* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */
u32 mbox_size;
+ u32 hwctx_limit;
u32 sram_dev_addr;
struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX];
struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS];
@@ -234,6 +274,7 @@ extern const struct dpm_clk_freq npu1_dpm_clk_table[];
extern const struct dpm_clk_freq npu4_dpm_clk_table[];
extern const struct rt_config npu1_default_rt_cfg[];
extern const struct rt_config npu4_default_rt_cfg[];
+extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[];
/* aie2_smu.c */
int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
@@ -253,10 +294,12 @@ void aie2_psp_stop(struct psp_device *psp);
/* aie2_error.c */
int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev);
void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev);
-int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev);
int aie2_error_async_msg_thread(void *data);
+int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
+ struct amdxdna_drm_get_array *args);
/* aie2_message.c */
+void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
@@ -270,9 +313,13 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size);
int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled);
+int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
+ char __user *buf, u32 size,
+ struct amdxdna_drm_query_telemetry_header *header);
int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
void *handle, int (*cb)(void*, void __iomem *, size_t));
-int aie2_config_cu(struct amdxdna_hwctx *hwctx);
+int aie2_config_cu(struct amdxdna_hwctx *hwctx,
+ int (*notify_cb)(void *, void __iomem *, size_t));
int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
int (*notify_cb)(void *, void __iomem *, size_t));
int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
@@ -283,11 +330,14 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
int (*notify_cb)(void *, void __iomem *, size_t));
int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
int (*notify_cb)(void *, void __iomem *, size_t));
+int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t));
/* aie2_hwctx.c */
int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
+int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
void aie2_hwctx_suspend(struct amdxdna_client *client);
int aie2_hwctx_resume(struct amdxdna_client *client);
int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c
index d303701b0ded..bd94ee96c2bc 100644
--- a/drivers/accel/amdxdna/aie2_smu.c
+++ b/drivers/accel/amdxdna/aie2_smu.c
@@ -11,6 +11,7 @@
#include "aie2_pci.h"
#include "amdxdna_pci_drv.h"
+#include "amdxdna_pm.h"
#define SMU_RESULT_OK 1
@@ -22,6 +23,13 @@
#define AIE2_SMU_SET_SOFT_DPMLEVEL 0x7
#define AIE2_SMU_SET_HARD_DPMLEVEL 0x8
+#define NPU4_DPM_TOPS(ndev, dpm_level) \
+({ \
+ typeof(ndev) _ndev = ndev; \
+ (4096 * (_ndev)->total_col * \
+ (_ndev)->priv->dpm_clk_tbl[dpm_level].hclk / 1000000); \
+})
+
static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd,
u32 reg_arg, u32 *out)
{
@@ -59,12 +67,16 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
u32 freq;
int ret;
+ ret = amdxdna_pm_resume_get(ndev->xdna);
+ if (ret)
+ return ret;
+
ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
if (ret) {
XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
- return ret;
+ goto suspend_put;
}
ndev->npuclk_freq = freq;
@@ -73,49 +85,78 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
if (ret) {
XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
- return ret;
+ goto suspend_put;
}
+
+ amdxdna_pm_suspend_put(ndev->xdna);
ndev->hclk_freq = freq;
ndev->dpm_level = dpm_level;
+ ndev->max_tops = 2 * ndev->total_col;
+ ndev->curr_tops = ndev->max_tops * freq / 1028;
XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
ndev->npuclk_freq, ndev->hclk_freq);
return 0;
+
+suspend_put:
+ amdxdna_pm_suspend_put(ndev->xdna);
+ return ret;
}
int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
{
int ret;
+ ret = amdxdna_pm_resume_get(ndev->xdna);
+ if (ret)
+ return ret;
+
ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL);
if (ret) {
XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
dpm_level, ret);
- return ret;
+ goto suspend_put;
}
ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL);
if (ret) {
XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
dpm_level, ret);
- return ret;
+ goto suspend_put;
}
+ amdxdna_pm_suspend_put(ndev->xdna);
ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
ndev->dpm_level = dpm_level;
+ ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->max_dpm_level);
+ ndev->curr_tops = NPU4_DPM_TOPS(ndev, dpm_level);
XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
ndev->npuclk_freq, ndev->hclk_freq);
return 0;
+
+suspend_put:
+ amdxdna_pm_suspend_put(ndev->xdna);
+ return ret;
}
int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
{
int ret;
+ /*
+ * Failing to set power off indicates an unrecoverable hardware or
+ * firmware error.
+ */
+ ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Access power failed, ret %d", ret);
+ return ret;
+ }
+
ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL);
if (ret) {
XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index 4bfe4ef20550..d17aef89a0ad 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -113,14 +113,14 @@ void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size)
return &cmd->data[num_masks];
}
-int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
+u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
{
struct amdxdna_cmd *cmd = abo->mem.kva;
u32 num_masks, i;
u32 *cu_mask;
if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
- return -1;
+ return INVALID_CU_IDX;
num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
cu_mask = cmd->data;
@@ -129,7 +129,7 @@ int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
return ffs(cu_mask[i]) - 1;
}
- return -1;
+ return INVALID_CU_IDX;
}
/*
@@ -161,19 +161,14 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
if (args->ext || args->ext_flags)
return -EINVAL;
- if (!drm_dev_enter(dev, &idx))
- return -ENODEV;
-
hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL);
- if (!hwctx) {
- ret = -ENOMEM;
- goto exit;
- }
+ if (!hwctx)
+ return -ENOMEM;
if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args->qos_p), sizeof(hwctx->qos))) {
XDNA_ERR(xdna, "Access QoS info failed");
- ret = -EFAULT;
- goto free_hwctx;
+ kfree(hwctx);
+ return -EFAULT;
}
hwctx->client = client;
@@ -181,30 +176,36 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
hwctx->num_tiles = args->num_tiles;
hwctx->mem_size = args->mem_size;
hwctx->max_opc = args->max_opc;
- ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
- XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID),
- &client->next_hwctxid, GFP_KERNEL);
- if (ret < 0) {
- XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
+
+ guard(mutex)(&xdna->dev_lock);
+
+ if (!drm_dev_enter(dev, &idx)) {
+ ret = -ENODEV;
goto free_hwctx;
}
- hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->id);
+ ret = xdna->dev_info->ops->hwctx_init(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
+ goto dev_exit;
+ }
+
+ hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->fw_ctx_id);
if (!hwctx->name) {
ret = -ENOMEM;
- goto rm_id;
+ goto fini_hwctx;
}
- mutex_lock(&xdna->dev_lock);
- ret = xdna->dev_info->ops->hwctx_init(hwctx);
- if (ret) {
- mutex_unlock(&xdna->dev_lock);
- XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
+ ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
+ XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID),
+ &client->next_hwctxid, GFP_KERNEL);
+ if (ret < 0) {
+ XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
goto free_name;
}
+
args->handle = hwctx->id;
args->syncobj_handle = hwctx->syncobj_hdl;
- mutex_unlock(&xdna->dev_lock);
atomic64_set(&hwctx->job_submit_cnt, 0);
atomic64_set(&hwctx->job_free_cnt, 0);
@@ -214,12 +215,12 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
free_name:
kfree(hwctx->name);
-rm_id:
- xa_erase(&client->hwctx_xa, hwctx->id);
+fini_hwctx:
+ xdna->dev_info->ops->hwctx_fini(hwctx);
+dev_exit:
+ drm_dev_exit(idx);
free_hwctx:
kfree(hwctx);
-exit:
- drm_dev_exit(idx);
return ret;
}
@@ -327,6 +328,38 @@ unlock_srcu:
return ret;
}
+int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_hwctx *hwctx;
+ struct amdxdna_gem_obj *abo;
+ struct drm_gem_object *gobj;
+ int ret, idx;
+
+ if (!xdna->dev_info->ops->hwctx_sync_debug_bo)
+ return -EOPNOTSUPP;
+
+ gobj = drm_gem_object_lookup(client->filp, debug_bo_hdl);
+ if (!gobj)
+ return -EINVAL;
+
+ abo = to_xdna_obj(gobj);
+ guard(mutex)(&xdna->dev_lock);
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ hwctx = xa_load(&client->hwctx_xa, abo->assigned_hwctx);
+ if (!hwctx) {
+ ret = -EINVAL;
+ goto unlock_srcu;
+ }
+
+ ret = xdna->dev_info->ops->hwctx_sync_debug_bo(hwctx, debug_bo_hdl);
+
+unlock_srcu:
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ drm_gem_object_put(gobj);
+ return ret;
+}
+
static void
amdxdna_arg_bos_put(struct amdxdna_sched_job *job)
{
@@ -389,9 +422,11 @@ void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job)
trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release");
amdxdna_arg_bos_put(job);
amdxdna_gem_put_obj(job->cmd_bo);
+ dma_fence_put(job->fence);
}
int amdxdna_cmd_submit(struct amdxdna_client *client,
+ struct amdxdna_drv_cmd *drv_cmd,
u32 cmd_bo_hdl, u32 *arg_bo_hdls, u32 arg_bo_cnt,
u32 hwctx_hdl, u64 *seq)
{
@@ -405,6 +440,8 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
if (!job)
return -ENOMEM;
+ job->drv_cmd = drv_cmd;
+
if (cmd_bo_hdl != AMDXDNA_INVALID_BO_HANDLE) {
job->cmd_bo = amdxdna_gem_get_obj(client, cmd_bo_hdl, AMDXDNA_BO_CMD);
if (!job->cmd_bo) {
@@ -412,8 +449,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
ret = -EINVAL;
goto free_job;
}
- } else {
- job->cmd_bo = NULL;
}
ret = amdxdna_arg_bos_lookup(client, job, arg_bo_hdls, arg_bo_cnt);
@@ -431,11 +466,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
goto unlock_srcu;
}
- if (hwctx->status != HWCTX_STAT_READY) {
- XDNA_ERR(xdna, "HW Context is not ready");
- ret = -EINVAL;
- goto unlock_srcu;
- }
job->hwctx = hwctx;
job->mm = current->mm;
@@ -512,7 +542,7 @@ static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client,
}
}
- ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls,
+ ret = amdxdna_cmd_submit(client, NULL, cmd_bo_hdl, arg_bo_hdls,
args->arg_count, args->hwctx, &args->seq);
if (ret)
XDNA_DBG(xdna, "Submit cmds failed, ret %d", ret);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index 7cd7a55936f0..b6151244d64f 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -13,9 +13,12 @@
struct amdxdna_hwctx_priv;
enum ert_cmd_opcode {
- ERT_START_CU = 0,
- ERT_CMD_CHAIN = 19,
- ERT_START_NPU = 20,
+ ERT_START_CU = 0,
+ ERT_CMD_CHAIN = 19,
+ ERT_START_NPU = 20,
+ ERT_START_NPU_PREEMPT = 21,
+ ERT_START_NPU_PREEMPT_ELF = 22,
+ ERT_INVALID_CMD = ~0U,
};
enum ert_cmd_state {
@@ -54,6 +57,21 @@ struct amdxdna_cmd_chain {
u64 data[] __counted_by(command_count);
};
+/*
+ * Interpretation of the beginning of data payload for ERT_START_NPU_PREEMPT in
+ * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args.
+ */
+struct amdxdna_cmd_preempt_data {
+ u64 inst_buf; /* instruction buffer address */
+ u64 save_buf; /* save buffer address */
+ u64 restore_buf; /* restore buffer address */
+ u32 inst_size; /* size of instruction buffer in bytes */
+ u32 save_size; /* size of save buffer in bytes */
+ u32 restore_size; /* size of restore buffer in bytes */
+ u32 inst_prop_cnt; /* properties count */
+ u32 prop_args[]; /* properties and regular kernel arguments */
+};
+
/* Exec buffer command header format */
#define AMDXDNA_CMD_STATE GENMASK(3, 0)
#define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10)
@@ -64,6 +82,8 @@ struct amdxdna_cmd {
u32 data[];
};
+#define INVALID_CU_IDX (~0U)
+
struct amdxdna_hwctx {
struct amdxdna_client *client;
struct amdxdna_hwctx_priv *priv;
@@ -95,6 +115,17 @@ struct amdxdna_hwctx {
#define drm_job_to_xdna_job(j) \
container_of(j, struct amdxdna_sched_job, base)
+enum amdxdna_job_opcode {
+ SYNC_DEBUG_BO,
+ ATTACH_DEBUG_BO,
+ DETACH_DEBUG_BO,
+};
+
+struct amdxdna_drv_cmd {
+ enum amdxdna_job_opcode opcode;
+ u32 result;
+};
+
struct amdxdna_sched_job {
struct drm_sched_job base;
struct kref refcnt;
@@ -105,7 +136,9 @@ struct amdxdna_sched_job {
/* user can wait on this fence */
struct dma_fence *out_fence;
bool job_done;
+ bool job_timeout;
u64 seq;
+ struct amdxdna_drv_cmd *drv_cmd;
struct amdxdna_gem_obj *cmd_bo;
size_t bo_cnt;
struct drm_gem_object *bos[] __counted_by(bo_cnt);
@@ -137,15 +170,17 @@ amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo)
}
void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size);
-int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
+u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
int amdxdna_hwctx_walk(struct amdxdna_client *client, void *arg,
int (*walk)(struct amdxdna_hwctx *hwctx, void *arg));
+int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl);
int amdxdna_cmd_submit(struct amdxdna_client *client,
- u32 cmd_bo_hdls, u32 *arg_bo_hdls, u32 arg_bo_cnt,
+ struct amdxdna_drv_cmd *drv_cmd, u32 cmd_bo_hdls,
+ u32 *arg_bo_hdls, u32 arg_bo_cnt,
u32 hwctx_hdl, u64 *seq);
int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
diff --git a/drivers/accel/amdxdna/amdxdna_error.h b/drivers/accel/amdxdna/amdxdna_error.h
new file mode 100644
index 000000000000..c51de86ec12b
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_error.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_ERROR_H_
+#define _AMDXDNA_ERROR_H_
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+
+#define AMDXDNA_ERR_DRV_AIE 4
+#define AMDXDNA_ERR_SEV_CRITICAL 3
+#define AMDXDNA_ERR_CLASS_AIE 2
+
+#define AMDXDNA_ERR_NUM_MASK GENMASK_U64(15, 0)
+#define AMDXDNA_ERR_DRV_MASK GENMASK_U64(23, 16)
+#define AMDXDNA_ERR_SEV_MASK GENMASK_U64(31, 24)
+#define AMDXDNA_ERR_MOD_MASK GENMASK_U64(39, 32)
+#define AMDXDNA_ERR_CLASS_MASK GENMASK_U64(47, 40)
+
+enum amdxdna_error_num {
+ AMDXDNA_ERROR_NUM_AIE_SATURATION = 3,
+ AMDXDNA_ERROR_NUM_AIE_FP,
+ AMDXDNA_ERROR_NUM_AIE_STREAM,
+ AMDXDNA_ERROR_NUM_AIE_ACCESS,
+ AMDXDNA_ERROR_NUM_AIE_BUS,
+ AMDXDNA_ERROR_NUM_AIE_INSTRUCTION,
+ AMDXDNA_ERROR_NUM_AIE_ECC,
+ AMDXDNA_ERROR_NUM_AIE_LOCK,
+ AMDXDNA_ERROR_NUM_AIE_DMA,
+ AMDXDNA_ERROR_NUM_AIE_MEM_PARITY,
+ AMDXDNA_ERROR_NUM_UNKNOWN = 15,
+};
+
+enum amdxdna_error_module {
+ AMDXDNA_ERROR_MODULE_AIE_CORE = 3,
+ AMDXDNA_ERROR_MODULE_AIE_MEMORY,
+ AMDXDNA_ERROR_MODULE_AIE_SHIM,
+ AMDXDNA_ERROR_MODULE_AIE_NOC,
+ AMDXDNA_ERROR_MODULE_AIE_PL,
+ AMDXDNA_ERROR_MODULE_UNKNOWN = 8,
+};
+
+#define AMDXDNA_ERROR_ENCODE(err_num, err_mod) \
+ (FIELD_PREP(AMDXDNA_ERR_NUM_MASK, err_num) | \
+ FIELD_PREP_CONST(AMDXDNA_ERR_DRV_MASK, AMDXDNA_ERR_DRV_AIE) | \
+ FIELD_PREP_CONST(AMDXDNA_ERR_SEV_MASK, AMDXDNA_ERR_SEV_CRITICAL) | \
+ FIELD_PREP(AMDXDNA_ERR_MOD_MASK, err_mod) | \
+ FIELD_PREP_CONST(AMDXDNA_ERR_CLASS_MASK, AMDXDNA_ERR_CLASS_AIE))
+
+#define AMDXDNA_EXTRA_ERR_COL_MASK GENMASK_U64(7, 0)
+#define AMDXDNA_EXTRA_ERR_ROW_MASK GENMASK_U64(15, 8)
+
+#define AMDXDNA_EXTRA_ERR_ENCODE(row, col) \
+ (FIELD_PREP(AMDXDNA_EXTRA_ERR_COL_MASK, col) | \
+ FIELD_PREP(AMDXDNA_EXTRA_ERR_ROW_MASK, row))
+
+#endif /* _AMDXDNA_ERROR_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index d407a36eb412..dfa916eeb2d9 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -8,6 +8,7 @@
#include <drm/drm_device.h>
#include <drm/drm_gem.h>
#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
#include <drm/gpu_scheduler.h>
#include <linux/dma-buf.h>
#include <linux/dma-direct.h>
@@ -392,35 +393,33 @@ static const struct dma_buf_ops amdxdna_dmabuf_ops = {
.vunmap = drm_gem_dmabuf_vunmap,
};
-static int amdxdna_gem_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
+static int amdxdna_gem_obj_vmap(struct amdxdna_gem_obj *abo, void **vaddr)
{
- struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
-
- iosys_map_clear(map);
-
- dma_resv_assert_held(obj->resv);
+ struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL);
+ int ret;
if (is_import_bo(abo))
- dma_buf_vmap(abo->dma_buf, map);
+ ret = dma_buf_vmap_unlocked(abo->dma_buf, &map);
else
- drm_gem_shmem_object_vmap(obj, map);
+ ret = drm_gem_vmap(to_gobj(abo), &map);
- if (!map->vaddr)
- return -ENOMEM;
-
- return 0;
+ *vaddr = map.vaddr;
+ return ret;
}
-static void amdxdna_gem_obj_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
+static void amdxdna_gem_obj_vunmap(struct amdxdna_gem_obj *abo)
{
- struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
+ struct iosys_map map;
+
+ if (!abo->mem.kva)
+ return;
- dma_resv_assert_held(obj->resv);
+ iosys_map_set_vaddr(&map, abo->mem.kva);
if (is_import_bo(abo))
- dma_buf_vunmap(abo->dma_buf, map);
+ dma_buf_vunmap_unlocked(abo->dma_buf, &map);
else
- drm_gem_shmem_object_vunmap(obj, map);
+ drm_gem_vunmap(to_gobj(abo), &map);
}
static struct dma_buf *amdxdna_gem_prime_export(struct drm_gem_object *gobj, int flags)
@@ -455,7 +454,6 @@ static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
{
struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
- struct iosys_map map = IOSYS_MAP_INIT_VADDR(abo->mem.kva);
XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr);
@@ -468,7 +466,7 @@ static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
if (abo->type == AMDXDNA_BO_DEV_HEAP)
drm_mm_takedown(&abo->mm);
- drm_gem_vunmap(gobj, &map);
+ amdxdna_gem_obj_vunmap(abo);
mutex_destroy(&abo->lock);
if (is_import_bo(abo)) {
@@ -489,8 +487,8 @@ static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
.pin = drm_gem_shmem_object_pin,
.unpin = drm_gem_shmem_object_unpin,
.get_sg_table = drm_gem_shmem_object_get_sg_table,
- .vmap = amdxdna_gem_obj_vmap,
- .vunmap = amdxdna_gem_obj_vunmap,
+ .vmap = drm_gem_shmem_object_vmap,
+ .vunmap = drm_gem_shmem_object_vunmap,
.mmap = amdxdna_gem_obj_mmap,
.vm_ops = &drm_gem_shmem_vm_ops,
.export = amdxdna_gem_prime_export,
@@ -663,7 +661,6 @@ amdxdna_drm_create_dev_heap(struct drm_device *dev,
struct drm_file *filp)
{
struct amdxdna_client *client = filp->driver_priv;
- struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL);
struct amdxdna_dev *xdna = to_xdna_dev(dev);
struct amdxdna_gem_obj *abo;
int ret;
@@ -692,12 +689,11 @@ amdxdna_drm_create_dev_heap(struct drm_device *dev,
abo->mem.dev_addr = client->xdna->dev_info->dev_mem_base;
drm_mm_init(&abo->mm, abo->mem.dev_addr, abo->mem.size);
- ret = drm_gem_vmap(to_gobj(abo), &map);
+ ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva);
if (ret) {
XDNA_ERR(xdna, "Vmap heap bo failed, ret %d", ret);
goto release_obj;
}
- abo->mem.kva = map.vaddr;
client->dev_heap = abo;
drm_gem_object_get(to_gobj(abo));
@@ -748,7 +744,6 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev,
struct amdxdna_drm_create_bo *args,
struct drm_file *filp)
{
- struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL);
struct amdxdna_dev *xdna = to_xdna_dev(dev);
struct amdxdna_gem_obj *abo;
int ret;
@@ -770,12 +765,11 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev,
abo->type = AMDXDNA_BO_CMD;
abo->client = filp->driver_priv;
- ret = drm_gem_vmap(to_gobj(abo), &map);
+ ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva);
if (ret) {
XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret);
goto release_obj;
}
- abo->mem.kva = map.vaddr;
return abo;
@@ -969,6 +963,9 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
XDNA_DBG(xdna, "Sync bo %d offset 0x%llx, size 0x%llx\n",
args->handle, args->offset, args->size);
+ if (args->direction == SYNC_DIRECT_FROM_DEVICE)
+ ret = amdxdna_hwctx_sync_debug_bo(abo->client, args->handle);
+
put_obj:
drm_gem_object_put(gobj);
return ret;
diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h
index ae29db94a9d3..f79fc7f3c93b 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.h
+++ b/drivers/accel/amdxdna/amdxdna_gem.h
@@ -7,6 +7,7 @@
#define _AMDXDNA_GEM_H_
#include <linux/hmm.h>
+#include "amdxdna_pci_drv.h"
struct amdxdna_umap {
struct vm_area_struct *vma;
@@ -62,6 +63,11 @@ static inline void amdxdna_gem_put_obj(struct amdxdna_gem_obj *abo)
drm_gem_object_put(to_gobj(abo));
}
+static inline u64 amdxdna_dev_bo_offset(struct amdxdna_gem_obj *abo)
+{
+ return abo->mem.dev_addr - abo->client->dev_heap->mem.dev_addr;
+}
+
void amdxdna_umap_put(struct amdxdna_umap *mapp);
struct drm_gem_object *
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
index da1ac89bb78f..858df97cd3fb 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox.c
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -194,7 +194,8 @@ static void mailbox_release_msg(struct mailbox_channel *mb_chann,
{
MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x",
mb_msg->pkg.header.id, mb_msg->pkg.header.opcode);
- mb_msg->notify_cb(mb_msg->handle, NULL, 0);
+ if (mb_msg->notify_cb)
+ mb_msg->notify_cb(mb_msg->handle, NULL, 0);
kfree(mb_msg);
}
@@ -248,7 +249,7 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade
{
struct mailbox_msg *mb_msg;
int msg_id;
- int ret;
+ int ret = 0;
msg_id = header->id;
if (!mailbox_validate_msgid(msg_id)) {
@@ -265,9 +266,11 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade
MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
header->opcode, header->total_size, header->id);
- ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size);
- if (unlikely(ret))
- MB_ERR(mb_chann, "Message callback ret %d", ret);
+ if (mb_msg->notify_cb) {
+ ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size);
+ if (unlikely(ret))
+ MB_ERR(mb_chann, "Message callback ret %d", ret);
+ }
kfree(mb_msg);
return ret;
@@ -513,6 +516,7 @@ xdna_mailbox_create_channel(struct mailbox *mb,
}
mb_chann->bad_state = false;
+ mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0);
MB_DBG(mb_chann, "Mailbox channel created (irq: %d)", mb_chann->msix_irq);
return mb_chann;
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.h b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h
index 710ff8873d61..556c712cad0a 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox_helper.h
+++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h
@@ -16,16 +16,18 @@ struct xdna_notify {
u32 *data;
size_t size;
int error;
+ u32 *status;
};
-#define DECLARE_XDNA_MSG_COMMON(name, op, status) \
+#define DECLARE_XDNA_MSG_COMMON(name, op, s) \
struct name##_req req = { 0 }; \
- struct name##_resp resp = { status }; \
+ struct name##_resp resp = { .status = s }; \
struct xdna_notify hdl = { \
.error = 0, \
.data = (u32 *)&resp, \
.size = sizeof(resp), \
.comp = COMPLETION_INITIALIZER_ONSTACK(hdl.comp), \
+ .status = (u32 *)&resp.status, \
}; \
struct xdna_mailbox_msg msg = { \
.send_data = (u8 *)&req, \
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 569cd703729d..1973ab67721b 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -13,13 +13,11 @@
#include <drm/gpu_scheduler.h>
#include <linux/iommu.h>
#include <linux/pci.h>
-#include <linux/pm_runtime.h>
#include "amdxdna_ctx.h"
#include "amdxdna_gem.h"
#include "amdxdna_pci_drv.h"
-
-#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
+#include "amdxdna_pm.h"
MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin");
MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin");
@@ -29,9 +27,14 @@ MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin");
/*
* 0.0: Initial version
* 0.1: Support getting all hardware contexts by DRM_IOCTL_AMDXDNA_GET_ARRAY
+ * 0.2: Support getting last error hardware error
+ * 0.3: Support firmware debug buffer
+ * 0.4: Support getting resource information
+ * 0.5: Support getting telemetry data
+ * 0.6: Support preemption
*/
#define AMDXDNA_DRIVER_MAJOR 0
-#define AMDXDNA_DRIVER_MINOR 1
+#define AMDXDNA_DRIVER_MINOR 6
/*
* Bind the driver base on (vendor_id, device_id) pair and later use the
@@ -61,17 +64,9 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
struct amdxdna_client *client;
int ret;
- ret = pm_runtime_resume_and_get(ddev->dev);
- if (ret) {
- XDNA_ERR(xdna, "Failed to get rpm, ret %d", ret);
- return ret;
- }
-
client = kzalloc(sizeof(*client), GFP_KERNEL);
- if (!client) {
- ret = -ENOMEM;
- goto put_rpm;
- }
+ if (!client)
+ return -ENOMEM;
client->pid = pid_nr(rcu_access_pointer(filp->pid));
client->xdna = xdna;
@@ -106,9 +101,6 @@ unbind_sva:
iommu_sva_unbind_device(client->sva);
failed:
kfree(client);
-put_rpm:
- pm_runtime_mark_last_busy(ddev->dev);
- pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
@@ -130,8 +122,6 @@ static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp)
XDNA_DBG(xdna, "pid %d closed", client->pid);
kfree(client);
- pm_runtime_mark_last_busy(ddev->dev);
- pm_runtime_put_autosuspend(ddev->dev);
}
static int amdxdna_flush(struct file *f, fl_owner_t id)
@@ -310,19 +300,12 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto failed_dev_fini;
}
- pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
- pm_runtime_use_autosuspend(dev);
- pm_runtime_allow(dev);
-
ret = drm_dev_register(&xdna->ddev, 0);
if (ret) {
XDNA_ERR(xdna, "DRM register failed, ret %d", ret);
- pm_runtime_forbid(dev);
goto failed_sysfs_fini;
}
- pm_runtime_mark_last_busy(dev);
- pm_runtime_put_autosuspend(dev);
return 0;
failed_sysfs_fini:
@@ -339,14 +322,10 @@ destroy_notifier_wq:
static void amdxdna_remove(struct pci_dev *pdev)
{
struct amdxdna_dev *xdna = pci_get_drvdata(pdev);
- struct device *dev = &pdev->dev;
struct amdxdna_client *client;
destroy_workqueue(xdna->notifier_wq);
- pm_runtime_get_noresume(dev);
- pm_runtime_forbid(dev);
-
drm_dev_unplug(&xdna->ddev);
amdxdna_sysfs_fini(xdna);
@@ -365,29 +344,9 @@ static void amdxdna_remove(struct pci_dev *pdev)
mutex_unlock(&xdna->dev_lock);
}
-static int amdxdna_pmops_suspend(struct device *dev)
-{
- struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
-
- if (!xdna->dev_info->ops->suspend)
- return -EOPNOTSUPP;
-
- return xdna->dev_info->ops->suspend(xdna);
-}
-
-static int amdxdna_pmops_resume(struct device *dev)
-{
- struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
-
- if (!xdna->dev_info->ops->resume)
- return -EOPNOTSUPP;
-
- return xdna->dev_info->ops->resume(xdna);
-}
-
static const struct dev_pm_ops amdxdna_pm_ops = {
- SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume)
- RUNTIME_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume, NULL)
+ SYSTEM_SLEEP_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume)
+ RUNTIME_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume, NULL)
};
static struct pci_driver amdxdna_pci_driver = {
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index 72d6696d49da..c99477f5e454 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -6,6 +6,7 @@
#ifndef _AMDXDNA_PCI_DRV_H_
#define _AMDXDNA_PCI_DRV_H_
+#include <drm/drm_print.h>
#include <linux/workqueue.h>
#include <linux/xarray.h>
@@ -54,6 +55,7 @@ struct amdxdna_dev_ops {
int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
+ int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
@@ -99,6 +101,7 @@ struct amdxdna_dev {
struct amdxdna_fw_ver fw_ver;
struct rw_semaphore notifier_lock; /* for mmu notifier*/
struct workqueue_struct *notifier_wq;
+ bool rpm_on;
};
/*
diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/amdxdna/amdxdna_pm.c
new file mode 100644
index 000000000000..fa38e65d617c
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_pm.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_drv.h>
+#include <linux/pm_runtime.h>
+
+#include "amdxdna_pm.h"
+
+#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
+
+int amdxdna_pm_suspend(struct device *dev)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
+ int ret = -EOPNOTSUPP;
+ bool rpm;
+
+ if (xdna->dev_info->ops->suspend) {
+ rpm = xdna->rpm_on;
+ xdna->rpm_on = false;
+ ret = xdna->dev_info->ops->suspend(xdna);
+ xdna->rpm_on = rpm;
+ }
+
+ XDNA_DBG(xdna, "Suspend done ret %d", ret);
+ return ret;
+}
+
+int amdxdna_pm_resume(struct device *dev)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
+ int ret = -EOPNOTSUPP;
+ bool rpm;
+
+ if (xdna->dev_info->ops->resume) {
+ rpm = xdna->rpm_on;
+ xdna->rpm_on = false;
+ ret = xdna->dev_info->ops->resume(xdna);
+ xdna->rpm_on = rpm;
+ }
+
+ XDNA_DBG(xdna, "Resume done ret %d", ret);
+ return ret;
+}
+
+int amdxdna_pm_resume_get(struct amdxdna_dev *xdna)
+{
+ struct device *dev = xdna->ddev.dev;
+ int ret;
+
+ if (!xdna->rpm_on)
+ return 0;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret) {
+ XDNA_ERR(xdna, "Resume failed: %d", ret);
+ pm_runtime_set_suspended(dev);
+ }
+
+ return ret;
+}
+
+void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna)
+{
+ struct device *dev = xdna->ddev.dev;
+
+ if (!xdna->rpm_on)
+ return;
+
+ pm_runtime_put_autosuspend(dev);
+}
+
+void amdxdna_pm_init(struct amdxdna_dev *xdna)
+{
+ struct device *dev = xdna->ddev.dev;
+
+ pm_runtime_set_active(dev);
+ pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_allow(dev);
+ pm_runtime_put_autosuspend(dev);
+ xdna->rpm_on = true;
+}
+
+void amdxdna_pm_fini(struct amdxdna_dev *xdna)
+{
+ struct device *dev = xdna->ddev.dev;
+
+ xdna->rpm_on = false;
+ pm_runtime_get_noresume(dev);
+ pm_runtime_forbid(dev);
+}
diff --git a/drivers/accel/amdxdna/amdxdna_pm.h b/drivers/accel/amdxdna/amdxdna_pm.h
new file mode 100644
index 000000000000..77b2d6e45570
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_pm.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_PM_H_
+#define _AMDXDNA_PM_H_
+
+#include "amdxdna_pci_drv.h"
+
+int amdxdna_pm_suspend(struct device *dev);
+int amdxdna_pm_resume(struct device *dev);
+int amdxdna_pm_resume_get(struct amdxdna_dev *xdna);
+void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna);
+void amdxdna_pm_init(struct amdxdna_dev *xdna);
+void amdxdna_pm_fini(struct amdxdna_dev *xdna);
+
+#endif /* _AMDXDNA_PM_H_ */
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
index e4f6dac7d00f..ec407f3b48fc 100644
--- a/drivers/accel/amdxdna/npu1_regs.c
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -46,6 +46,7 @@
const struct rt_config npu1_default_rt_cfg[] = {
{ 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+ { 4, 1, AIE2_RT_CFG_INIT }, /* Debug BO */
{ 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 0 },
};
@@ -62,16 +63,23 @@ const struct dpm_clk_freq npu1_dpm_clk_table[] = {
{ 0 }
};
+static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = {
+ { .feature = AIE2_NPU_COMMAND, .min_minor = 8 },
+ { 0 }
+};
+
static const struct amdxdna_dev_priv npu1_dev_priv = {
.fw_path = "amdnpu/1502_00/npu.sbin",
.protocol_major = 0x5,
.protocol_minor = 0x7,
.rt_config = npu1_default_rt_cfg,
.dpm_clk_tbl = npu1_dpm_clk_table,
+ .fw_feature_tbl = npu1_fw_feature_table,
.col_align = COL_ALIGN_NONE,
.mbox_dev_addr = NPU1_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
.sram_dev_addr = NPU1_SRAM_BAR_BASE,
+ .hwctx_limit = 6,
.sram_offs = {
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU1_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU1_SRAM, MPNPU_SRAM_I2X_MAILBOX_15),
diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c
index a081cac75ee0..86f87d0d1354 100644
--- a/drivers/accel/amdxdna/npu2_regs.c
+++ b/drivers/accel/amdxdna/npu2_regs.c
@@ -67,10 +67,12 @@ static const struct amdxdna_dev_priv npu2_dev_priv = {
.protocol_minor = 0x6,
.rt_config = npu4_default_rt_cfg,
.dpm_clk_tbl = npu4_dpm_clk_table,
+ .fw_feature_tbl = npu4_fw_feature_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU2_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
.sram_dev_addr = NPU2_SRAM_BAR_BASE,
+ .hwctx_limit = 16,
.sram_offs = {
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
index 9f2e33182ec6..986a5f28ba24 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -63,10 +63,14 @@
const struct rt_config npu4_default_rt_cfg[] = {
{ 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+ { 10, 1, AIE2_RT_CFG_INIT }, /* DEBUG BUF */
+ { 14, 0, AIE2_RT_CFG_INIT, BIT_U64(AIE2_PREEMPT) }, /* Frame boundary preemption */
{ 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 13, 0, AIE2_RT_CFG_FORCE_PREEMPT },
+ { 14, 0, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT },
{ 0 },
};
@@ -82,16 +86,24 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
{ 0 }
};
+const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
+ { .feature = AIE2_NPU_COMMAND, .min_minor = 15 },
+ { .feature = AIE2_PREEMPT, .min_minor = 12 },
+ { 0 }
+};
+
static const struct amdxdna_dev_priv npu4_dev_priv = {
.fw_path = "amdnpu/17f0_10/npu.sbin",
.protocol_major = 0x6,
.protocol_minor = 12,
.rt_config = npu4_default_rt_cfg,
.dpm_clk_tbl = npu4_dpm_clk_table,
+ .fw_feature_tbl = npu4_fw_feature_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU4_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
.sram_dev_addr = NPU4_SRAM_BAR_BASE,
+ .hwctx_limit = 16,
.sram_offs = {
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
index 5f1cf83461c4..75ad97f0b937 100644
--- a/drivers/accel/amdxdna/npu5_regs.c
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -67,10 +67,12 @@ static const struct amdxdna_dev_priv npu5_dev_priv = {
.protocol_minor = 12,
.rt_config = npu4_default_rt_cfg,
.dpm_clk_tbl = npu4_dpm_clk_table,
+ .fw_feature_tbl = npu4_fw_feature_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU5_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
.sram_dev_addr = NPU5_SRAM_BAR_BASE,
+ .hwctx_limit = 16,
.sram_offs = {
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
index 94a7005685a7..758dc013fe13 100644
--- a/drivers/accel/amdxdna/npu6_regs.c
+++ b/drivers/accel/amdxdna/npu6_regs.c
@@ -67,10 +67,12 @@ static const struct amdxdna_dev_priv npu6_dev_priv = {
.protocol_minor = 12,
.rt_config = npu4_default_rt_cfg,
.dpm_clk_tbl = npu4_dpm_clk_table,
+ .fw_feature_tbl = npu4_fw_feature_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU6_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
.sram_dev_addr = NPU6_SRAM_BAR_BASE,
+ .hwctx_limit = 16,
.sram_offs = {
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
diff --git a/drivers/accel/ethosu/Kconfig b/drivers/accel/ethosu/Kconfig
new file mode 100644
index 000000000000..d25f9b3eb317
--- /dev/null
+++ b/drivers/accel/ethosu/Kconfig
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config DRM_ACCEL_ARM_ETHOSU
+ tristate "Arm Ethos-U65/U85 NPU"
+ depends on HAS_IOMEM
+ depends on DRM_ACCEL
+ select DRM_GEM_DMA_HELPER
+ select DRM_SCHED
+ select GENERIC_ALLOCATOR
+ help
+ Enables driver for Arm Ethos-U65/U85 NPUs
diff --git a/drivers/accel/ethosu/Makefile b/drivers/accel/ethosu/Makefile
new file mode 100644
index 000000000000..17db5a600416
--- /dev/null
+++ b/drivers/accel/ethosu/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_DRM_ACCEL_ARM_ETHOSU) := ethosu.o
+ethosu-y += ethosu_drv.o ethosu_gem.o ethosu_job.o
diff --git a/drivers/accel/ethosu/ethosu_device.h b/drivers/accel/ethosu/ethosu_device.h
new file mode 100644
index 000000000000..b189fa783d6a
--- /dev/null
+++ b/drivers/accel/ethosu/ethosu_device.h
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: GPL-2.0-only or MIT */
+/* Copyright 2025 Arm, Ltd. */
+
+#ifndef __ETHOSU_DEVICE_H__
+#define __ETHOSU_DEVICE_H__
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/types.h>
+
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+
+#include <drm/ethosu_accel.h>
+
+struct clk;
+struct gen_pool;
+
+#define NPU_REG_ID 0x0000
+#define NPU_REG_STATUS 0x0004
+#define NPU_REG_CMD 0x0008
+#define NPU_REG_RESET 0x000c
+#define NPU_REG_QBASE 0x0010
+#define NPU_REG_QBASE_HI 0x0014
+#define NPU_REG_QREAD 0x0018
+#define NPU_REG_QCONFIG 0x001c
+#define NPU_REG_QSIZE 0x0020
+#define NPU_REG_PROT 0x0024
+#define NPU_REG_CONFIG 0x0028
+#define NPU_REG_REGIONCFG 0x003c
+#define NPU_REG_AXILIMIT0 0x0040 // U65
+#define NPU_REG_AXILIMIT1 0x0044 // U65
+#define NPU_REG_AXILIMIT2 0x0048 // U65
+#define NPU_REG_AXILIMIT3 0x004c // U65
+#define NPU_REG_MEM_ATTR0 0x0040 // U85
+#define NPU_REG_MEM_ATTR1 0x0044 // U85
+#define NPU_REG_MEM_ATTR2 0x0048 // U85
+#define NPU_REG_MEM_ATTR3 0x004c // U85
+#define NPU_REG_AXI_SRAM 0x0050 // U85
+#define NPU_REG_AXI_EXT 0x0054 // U85
+
+#define NPU_REG_BASEP(x) (0x0080 + (x) * 8)
+#define NPU_REG_BASEP_HI(x) (0x0084 + (x) * 8)
+#define NPU_BASEP_REGION_MAX 8
+
+#define ID_ARCH_MAJOR_MASK GENMASK(31, 28)
+#define ID_ARCH_MINOR_MASK GENMASK(27, 20)
+#define ID_ARCH_PATCH_MASK GENMASK(19, 16)
+#define ID_VER_MAJOR_MASK GENMASK(11, 8)
+#define ID_VER_MINOR_MASK GENMASK(7, 4)
+
+#define CONFIG_MACS_PER_CC_MASK GENMASK(3, 0)
+#define CONFIG_CMD_STREAM_VER_MASK GENMASK(7, 4)
+
+#define STATUS_STATE_RUNNING BIT(0)
+#define STATUS_IRQ_RAISED BIT(1)
+#define STATUS_BUS_STATUS BIT(2)
+#define STATUS_RESET_STATUS BIT(3)
+#define STATUS_CMD_PARSE_ERR BIT(4)
+#define STATUS_CMD_END_REACHED BIT(5)
+
+#define CMD_CLEAR_IRQ BIT(1)
+#define CMD_TRANSITION_TO_RUN BIT(0)
+
+#define RESET_PENDING_CSL BIT(1)
+#define RESET_PENDING_CPL BIT(0)
+
+#define PROT_ACTIVE_CSL BIT(1)
+
+enum ethosu_cmds {
+ NPU_OP_CONV = 0x2,
+ NPU_OP_DEPTHWISE = 0x3,
+ NPU_OP_POOL = 0x5,
+ NPU_OP_ELEMENTWISE = 0x6,
+ NPU_OP_RESIZE = 0x7, // U85 only
+ NPU_OP_DMA_START = 0x10,
+ NPU_SET_IFM_PAD_TOP = 0x100,
+ NPU_SET_IFM_PAD_LEFT = 0x101,
+ NPU_SET_IFM_PAD_RIGHT = 0x102,
+ NPU_SET_IFM_PAD_BOTTOM = 0x103,
+ NPU_SET_IFM_DEPTH_M1 = 0x104,
+ NPU_SET_IFM_PRECISION = 0x105,
+ NPU_SET_IFM_BROADCAST = 0x108,
+ NPU_SET_IFM_WIDTH0_M1 = 0x10a,
+ NPU_SET_IFM_HEIGHT0_M1 = 0x10b,
+ NPU_SET_IFM_HEIGHT1_M1 = 0x10c,
+ NPU_SET_IFM_REGION = 0x10f,
+ NPU_SET_OFM_WIDTH_M1 = 0x111,
+ NPU_SET_OFM_HEIGHT_M1 = 0x112,
+ NPU_SET_OFM_DEPTH_M1 = 0x113,
+ NPU_SET_OFM_PRECISION = 0x114,
+ NPU_SET_OFM_WIDTH0_M1 = 0x11a,
+ NPU_SET_OFM_HEIGHT0_M1 = 0x11b,
+ NPU_SET_OFM_HEIGHT1_M1 = 0x11c,
+ NPU_SET_OFM_REGION = 0x11f,
+ NPU_SET_KERNEL_WIDTH_M1 = 0x120,
+ NPU_SET_KERNEL_HEIGHT_M1 = 0x121,
+ NPU_SET_KERNEL_STRIDE = 0x122,
+ NPU_SET_WEIGHT_REGION = 0x128,
+ NPU_SET_SCALE_REGION = 0x129,
+ NPU_SET_DMA0_SRC_REGION = 0x130,
+ NPU_SET_DMA0_DST_REGION = 0x131,
+ NPU_SET_DMA0_SIZE0 = 0x132,
+ NPU_SET_DMA0_SIZE1 = 0x133,
+ NPU_SET_IFM2_BROADCAST = 0x180,
+ NPU_SET_IFM2_PRECISION = 0x185,
+ NPU_SET_IFM2_WIDTH0_M1 = 0x18a,
+ NPU_SET_IFM2_HEIGHT0_M1 = 0x18b,
+ NPU_SET_IFM2_HEIGHT1_M1 = 0x18c,
+ NPU_SET_IFM2_REGION = 0x18f,
+ NPU_SET_IFM_BASE0 = 0x4000,
+ NPU_SET_IFM_BASE1 = 0x4001,
+ NPU_SET_IFM_BASE2 = 0x4002,
+ NPU_SET_IFM_BASE3 = 0x4003,
+ NPU_SET_IFM_STRIDE_X = 0x4004,
+ NPU_SET_IFM_STRIDE_Y = 0x4005,
+ NPU_SET_IFM_STRIDE_C = 0x4006,
+ NPU_SET_OFM_BASE0 = 0x4010,
+ NPU_SET_OFM_BASE1 = 0x4011,
+ NPU_SET_OFM_BASE2 = 0x4012,
+ NPU_SET_OFM_BASE3 = 0x4013,
+ NPU_SET_OFM_STRIDE_X = 0x4014,
+ NPU_SET_OFM_STRIDE_Y = 0x4015,
+ NPU_SET_OFM_STRIDE_C = 0x4016,
+ NPU_SET_WEIGHT_BASE = 0x4020,
+ NPU_SET_WEIGHT_LENGTH = 0x4021,
+ NPU_SET_SCALE_BASE = 0x4022,
+ NPU_SET_SCALE_LENGTH = 0x4023,
+ NPU_SET_DMA0_SRC = 0x4030,
+ NPU_SET_DMA0_DST = 0x4031,
+ NPU_SET_DMA0_LEN = 0x4032,
+ NPU_SET_DMA0_SRC_STRIDE0 = 0x4033,
+ NPU_SET_DMA0_SRC_STRIDE1 = 0x4034,
+ NPU_SET_DMA0_DST_STRIDE0 = 0x4035,
+ NPU_SET_DMA0_DST_STRIDE1 = 0x4036,
+ NPU_SET_IFM2_BASE0 = 0x4080,
+ NPU_SET_IFM2_BASE1 = 0x4081,
+ NPU_SET_IFM2_BASE2 = 0x4082,
+ NPU_SET_IFM2_BASE3 = 0x4083,
+ NPU_SET_IFM2_STRIDE_X = 0x4084,
+ NPU_SET_IFM2_STRIDE_Y = 0x4085,
+ NPU_SET_IFM2_STRIDE_C = 0x4086,
+ NPU_SET_WEIGHT1_BASE = 0x4090,
+ NPU_SET_WEIGHT1_LENGTH = 0x4091,
+ NPU_SET_SCALE1_BASE = 0x4092,
+ NPU_SET_WEIGHT2_BASE = 0x4092,
+ NPU_SET_SCALE1_LENGTH = 0x4093,
+ NPU_SET_WEIGHT2_LENGTH = 0x4093,
+ NPU_SET_WEIGHT3_BASE = 0x4094,
+ NPU_SET_WEIGHT3_LENGTH = 0x4095,
+};
+
+#define ETHOSU_SRAM_REGION 2 /* Matching Vela compiler */
+
+/**
+ * struct ethosu_device - Ethosu device
+ */
+struct ethosu_device {
+ /** @base: Base drm_device. */
+ struct drm_device base;
+
+ /** @iomem: CPU mapping of the registers. */
+ void __iomem *regs;
+
+ void __iomem *sram;
+ struct gen_pool *srampool;
+ dma_addr_t sramphys;
+
+ struct clk_bulk_data *clks;
+ int num_clks;
+ int irq;
+
+ struct drm_ethosu_npu_info npu_info;
+
+ struct ethosu_job *in_flight_job;
+ /* For in_flight_job and ethosu_job_hw_submit() */
+ struct mutex job_lock;
+
+ /* For dma_fence */
+ spinlock_t fence_lock;
+
+ struct drm_gpu_scheduler sched;
+ /* For ethosu_job_do_push() */
+ struct mutex sched_lock;
+ u64 fence_context;
+ u64 emit_seqno;
+};
+
+#define to_ethosu_device(drm_dev) \
+ ((struct ethosu_device *)container_of(drm_dev, struct ethosu_device, base))
+
+static inline bool ethosu_is_u65(const struct ethosu_device *ethosudev)
+{
+ return FIELD_GET(ID_ARCH_MAJOR_MASK, ethosudev->npu_info.id) == 1;
+}
+
+#endif
diff --git a/drivers/accel/ethosu/ethosu_drv.c b/drivers/accel/ethosu/ethosu_drv.c
new file mode 100644
index 000000000000..e05a69bf5574
--- /dev/null
+++ b/drivers/accel/ethosu/ethosu_drv.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0-only or MIT
+// Copyright (C) 2025 Arm, Ltd.
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/genalloc.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_utils.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_accel.h>
+#include <drm/ethosu_accel.h>
+
+#include "ethosu_drv.h"
+#include "ethosu_device.h"
+#include "ethosu_gem.h"
+#include "ethosu_job.h"
+
+static int ethosu_ioctl_dev_query(struct drm_device *ddev, void *data,
+ struct drm_file *file)
+{
+ struct ethosu_device *ethosudev = to_ethosu_device(ddev);
+ struct drm_ethosu_dev_query *args = data;
+
+ if (!args->pointer) {
+ switch (args->type) {
+ case DRM_ETHOSU_DEV_QUERY_NPU_INFO:
+ args->size = sizeof(ethosudev->npu_info);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ switch (args->type) {
+ case DRM_ETHOSU_DEV_QUERY_NPU_INFO:
+ if (args->size < offsetofend(struct drm_ethosu_npu_info, sram_size))
+ return -EINVAL;
+ return copy_struct_to_user(u64_to_user_ptr(args->pointer),
+ args->size,
+ &ethosudev->npu_info,
+ sizeof(ethosudev->npu_info), NULL);
+ default:
+ return -EINVAL;
+ }
+}
+
+#define ETHOSU_BO_FLAGS DRM_ETHOSU_BO_NO_MMAP
+
+static int ethosu_ioctl_bo_create(struct drm_device *ddev, void *data,
+ struct drm_file *file)
+{
+ struct drm_ethosu_bo_create *args = data;
+ int cookie, ret;
+
+ if (!drm_dev_enter(ddev, &cookie))
+ return -ENODEV;
+
+ if (!args->size || (args->flags & ~ETHOSU_BO_FLAGS)) {
+ ret = -EINVAL;
+ goto out_dev_exit;
+ }
+
+ ret = ethosu_gem_create_with_handle(file, ddev, &args->size,
+ args->flags, &args->handle);
+
+out_dev_exit:
+ drm_dev_exit(cookie);
+ return ret;
+}
+
+static int ethosu_ioctl_bo_wait(struct drm_device *ddev, void *data,
+ struct drm_file *file)
+{
+ struct drm_ethosu_bo_wait *args = data;
+ int cookie, ret;
+ unsigned long timeout = drm_timeout_abs_to_jiffies(args->timeout_ns);
+
+ if (args->pad)
+ return -EINVAL;
+
+ if (!drm_dev_enter(ddev, &cookie))
+ return -ENODEV;
+
+ ret = drm_gem_dma_resv_wait(file, args->handle, true, timeout);
+
+ drm_dev_exit(cookie);
+ return ret;
+}
+
+static int ethosu_ioctl_bo_mmap_offset(struct drm_device *ddev, void *data,
+ struct drm_file *file)
+{
+ struct drm_ethosu_bo_mmap_offset *args = data;
+ struct drm_gem_object *obj;
+
+ if (args->pad)
+ return -EINVAL;
+
+ obj = drm_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -ENOENT;
+
+ args->offset = drm_vma_node_offset_addr(&obj->vma_node);
+ drm_gem_object_put(obj);
+ return 0;
+}
+
+static int ethosu_ioctl_cmdstream_bo_create(struct drm_device *ddev, void *data,
+ struct drm_file *file)
+{
+ struct drm_ethosu_cmdstream_bo_create *args = data;
+ int cookie, ret;
+
+ if (!drm_dev_enter(ddev, &cookie))
+ return -ENODEV;
+
+ if (!args->size || !args->data || args->pad || args->flags) {
+ ret = -EINVAL;
+ goto out_dev_exit;
+ }
+
+ args->flags |= DRM_ETHOSU_BO_NO_MMAP;
+
+ ret = ethosu_gem_cmdstream_create(file, ddev, args->size, args->data,
+ args->flags, &args->handle);
+
+out_dev_exit:
+ drm_dev_exit(cookie);
+ return ret;
+}
+
+static int ethosu_open(struct drm_device *ddev, struct drm_file *file)
+{
+ int ret = 0;
+
+ if (!try_module_get(THIS_MODULE))
+ return -EINVAL;
+
+ struct ethosu_file_priv __free(kfree) *priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv) {
+ ret = -ENOMEM;
+ goto err_put_mod;
+ }
+ priv->edev = to_ethosu_device(ddev);
+
+ ret = ethosu_job_open(priv);
+ if (ret)
+ goto err_put_mod;
+
+ file->driver_priv = no_free_ptr(priv);
+ return 0;
+
+err_put_mod:
+ module_put(THIS_MODULE);
+ return ret;
+}
+
+static void ethosu_postclose(struct drm_device *ddev, struct drm_file *file)
+{
+ ethosu_job_close(file->driver_priv);
+ kfree(file->driver_priv);
+ module_put(THIS_MODULE);
+}
+
+static const struct drm_ioctl_desc ethosu_drm_driver_ioctls[] = {
+#define ETHOSU_IOCTL(n, func, flags) \
+ DRM_IOCTL_DEF_DRV(ETHOSU_##n, ethosu_ioctl_##func, flags)
+
+ ETHOSU_IOCTL(DEV_QUERY, dev_query, 0),
+ ETHOSU_IOCTL(BO_CREATE, bo_create, 0),
+ ETHOSU_IOCTL(BO_WAIT, bo_wait, 0),
+ ETHOSU_IOCTL(BO_MMAP_OFFSET, bo_mmap_offset, 0),
+ ETHOSU_IOCTL(CMDSTREAM_BO_CREATE, cmdstream_bo_create, 0),
+ ETHOSU_IOCTL(SUBMIT, submit, 0),
+};
+
+DEFINE_DRM_ACCEL_FOPS(ethosu_drm_driver_fops);
+
+/*
+ * Ethosu driver version:
+ * - 1.0 - initial interface
+ */
+static const struct drm_driver ethosu_drm_driver = {
+ .driver_features = DRIVER_COMPUTE_ACCEL | DRIVER_GEM,
+ .open = ethosu_open,
+ .postclose = ethosu_postclose,
+ .ioctls = ethosu_drm_driver_ioctls,
+ .num_ioctls = ARRAY_SIZE(ethosu_drm_driver_ioctls),
+ .fops = &ethosu_drm_driver_fops,
+ .name = "ethosu",
+ .desc = "Arm Ethos-U Accel driver",
+ .major = 1,
+ .minor = 0,
+
+ .gem_create_object = ethosu_gem_create_object,
+};
+
+#define U65_DRAM_AXI_LIMIT_CFG 0x1f3f0002
+#define U65_SRAM_AXI_LIMIT_CFG 0x1f3f00b0
+#define U85_AXI_EXT_CFG 0x00021f3f
+#define U85_AXI_SRAM_CFG 0x00021f3f
+#define U85_MEM_ATTR0_CFG 0x00000000
+#define U85_MEM_ATTR2_CFG 0x000000b7
+
+static int ethosu_reset(struct ethosu_device *ethosudev)
+{
+ int ret;
+ u32 reg;
+
+ writel_relaxed(RESET_PENDING_CSL, ethosudev->regs + NPU_REG_RESET);
+ ret = readl_poll_timeout(ethosudev->regs + NPU_REG_STATUS, reg,
+ !FIELD_GET(STATUS_RESET_STATUS, reg),
+ USEC_PER_MSEC, USEC_PER_SEC);
+ if (ret)
+ return ret;
+
+ if (!FIELD_GET(PROT_ACTIVE_CSL, readl_relaxed(ethosudev->regs + NPU_REG_PROT))) {
+ dev_warn(ethosudev->base.dev, "Could not reset to non-secure mode (PROT = %x)\n",
+ readl_relaxed(ethosudev->regs + NPU_REG_PROT));
+ }
+
+ /*
+ * Assign region 2 (SRAM) to AXI M0 (AXILIMIT0),
+ * everything else to AXI M1 (AXILIMIT2)
+ */
+ writel_relaxed(0x0000aa8a, ethosudev->regs + NPU_REG_REGIONCFG);
+ if (ethosu_is_u65(ethosudev)) {
+ writel_relaxed(U65_SRAM_AXI_LIMIT_CFG, ethosudev->regs + NPU_REG_AXILIMIT0);
+ writel_relaxed(U65_DRAM_AXI_LIMIT_CFG, ethosudev->regs + NPU_REG_AXILIMIT2);
+ } else {
+ writel_relaxed(U85_AXI_SRAM_CFG, ethosudev->regs + NPU_REG_AXI_SRAM);
+ writel_relaxed(U85_AXI_EXT_CFG, ethosudev->regs + NPU_REG_AXI_EXT);
+ writel_relaxed(U85_MEM_ATTR0_CFG, ethosudev->regs + NPU_REG_MEM_ATTR0); // SRAM
+ writel_relaxed(U85_MEM_ATTR2_CFG, ethosudev->regs + NPU_REG_MEM_ATTR2); // DRAM
+ }
+
+ if (ethosudev->sram)
+ memset_io(ethosudev->sram, 0, ethosudev->npu_info.sram_size);
+
+ return 0;
+}
+
+static int ethosu_device_resume(struct device *dev)
+{
+ struct ethosu_device *ethosudev = dev_get_drvdata(dev);
+ int ret;
+
+ ret = clk_bulk_prepare_enable(ethosudev->num_clks, ethosudev->clks);
+ if (ret)
+ return ret;
+
+ ret = ethosu_reset(ethosudev);
+ if (!ret)
+ return 0;
+
+ clk_bulk_disable_unprepare(ethosudev->num_clks, ethosudev->clks);
+ return ret;
+}
+
+static int ethosu_device_suspend(struct device *dev)
+{
+ struct ethosu_device *ethosudev = dev_get_drvdata(dev);
+
+ clk_bulk_disable_unprepare(ethosudev->num_clks, ethosudev->clks);
+ return 0;
+}
+
+static int ethosu_sram_init(struct ethosu_device *ethosudev)
+{
+ ethosudev->npu_info.sram_size = 0;
+
+ ethosudev->srampool = of_gen_pool_get(ethosudev->base.dev->of_node, "sram", 0);
+ if (!ethosudev->srampool)
+ return 0;
+
+ ethosudev->npu_info.sram_size = gen_pool_size(ethosudev->srampool);
+
+ ethosudev->sram = (void __iomem *)gen_pool_dma_alloc(ethosudev->srampool,
+ ethosudev->npu_info.sram_size,
+ &ethosudev->sramphys);
+ if (!ethosudev->sram) {
+ dev_err(ethosudev->base.dev, "failed to allocate from SRAM pool\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int ethosu_init(struct ethosu_device *ethosudev)
+{
+ int ret;
+ u32 id, config;
+
+ ret = ethosu_device_resume(ethosudev->base.dev);
+ if (ret)
+ return ret;
+
+ pm_runtime_set_autosuspend_delay(ethosudev->base.dev, 50);
+ pm_runtime_use_autosuspend(ethosudev->base.dev);
+ ret = devm_pm_runtime_set_active_enabled(ethosudev->base.dev);
+ if (ret)
+ return ret;
+ pm_runtime_get_noresume(ethosudev->base.dev);
+
+ ethosudev->npu_info.id = id = readl_relaxed(ethosudev->regs + NPU_REG_ID);
+ ethosudev->npu_info.config = config = readl_relaxed(ethosudev->regs + NPU_REG_CONFIG);
+
+ ethosu_sram_init(ethosudev);
+
+ dev_info(ethosudev->base.dev,
+ "Ethos-U NPU, arch v%ld.%ld.%ld, rev r%ldp%ld, cmd stream ver%ld, %d MACs, %dKB SRAM\n",
+ FIELD_GET(ID_ARCH_MAJOR_MASK, id),
+ FIELD_GET(ID_ARCH_MINOR_MASK, id),
+ FIELD_GET(ID_ARCH_PATCH_MASK, id),
+ FIELD_GET(ID_VER_MAJOR_MASK, id),
+ FIELD_GET(ID_VER_MINOR_MASK, id),
+ FIELD_GET(CONFIG_CMD_STREAM_VER_MASK, config),
+ 1 << FIELD_GET(CONFIG_MACS_PER_CC_MASK, config),
+ ethosudev->npu_info.sram_size / 1024);
+
+ return 0;
+}
+
+static int ethosu_probe(struct platform_device *pdev)
+{
+ int ret;
+ struct ethosu_device *ethosudev;
+
+ ethosudev = devm_drm_dev_alloc(&pdev->dev, &ethosu_drm_driver,
+ struct ethosu_device, base);
+ if (IS_ERR(ethosudev))
+ return -ENOMEM;
+ platform_set_drvdata(pdev, ethosudev);
+
+ dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
+
+ ethosudev->regs = devm_platform_ioremap_resource(pdev, 0);
+
+ ethosudev->num_clks = devm_clk_bulk_get_all(&pdev->dev, &ethosudev->clks);
+ if (ethosudev->num_clks < 0)
+ return ethosudev->num_clks;
+
+ ret = ethosu_job_init(ethosudev);
+ if (ret)
+ return ret;
+
+ ret = ethosu_init(ethosudev);
+ if (ret)
+ return ret;
+
+ ret = drm_dev_register(&ethosudev->base, 0);
+ if (ret)
+ pm_runtime_dont_use_autosuspend(ethosudev->base.dev);
+
+ pm_runtime_put_autosuspend(ethosudev->base.dev);
+ return ret;
+}
+
+static void ethosu_remove(struct platform_device *pdev)
+{
+ struct ethosu_device *ethosudev = dev_get_drvdata(&pdev->dev);
+
+ drm_dev_unregister(&ethosudev->base);
+ ethosu_job_fini(ethosudev);
+ if (ethosudev->sram)
+ gen_pool_free(ethosudev->srampool, (unsigned long)ethosudev->sram,
+ ethosudev->npu_info.sram_size);
+}
+
+static const struct of_device_id dt_match[] = {
+ { .compatible = "arm,ethos-u65" },
+ { .compatible = "arm,ethos-u85" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, dt_match);
+
+static DEFINE_RUNTIME_DEV_PM_OPS(ethosu_pm_ops,
+ ethosu_device_suspend,
+ ethosu_device_resume,
+ NULL);
+
+static struct platform_driver ethosu_driver = {
+ .probe = ethosu_probe,
+ .remove = ethosu_remove,
+ .driver = {
+ .name = "ethosu",
+ .pm = pm_ptr(&ethosu_pm_ops),
+ .of_match_table = dt_match,
+ },
+};
+module_platform_driver(ethosu_driver);
+
+MODULE_AUTHOR("Rob Herring <robh@kernel.org>");
+MODULE_DESCRIPTION("Arm Ethos-U Accel Driver");
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/accel/ethosu/ethosu_drv.h b/drivers/accel/ethosu/ethosu_drv.h
new file mode 100644
index 000000000000..9e21dfe94184
--- /dev/null
+++ b/drivers/accel/ethosu/ethosu_drv.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright 2025 Arm, Ltd. */
+#ifndef __ETHOSU_DRV_H__
+#define __ETHOSU_DRV_H__
+
+#include <drm/gpu_scheduler.h>
+
+struct ethosu_device;
+
+struct ethosu_file_priv {
+ struct ethosu_device *edev;
+ struct drm_sched_entity sched_entity;
+};
+
+#endif
diff --git a/drivers/accel/ethosu/ethosu_gem.c b/drivers/accel/ethosu/ethosu_gem.c
new file mode 100644
index 000000000000..473b5f5d7514
--- /dev/null
+++ b/drivers/accel/ethosu/ethosu_gem.c
@@ -0,0 +1,704 @@
+// SPDX-License-Identifier: GPL-2.0-only or MIT
+/* Copyright 2025 Arm, Ltd. */
+
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <drm/ethosu_accel.h>
+
+#include "ethosu_device.h"
+#include "ethosu_gem.h"
+
+static void ethosu_gem_free_object(struct drm_gem_object *obj)
+{
+ struct ethosu_gem_object *bo = to_ethosu_bo(obj);
+
+ kfree(bo->info);
+ drm_gem_free_mmap_offset(&bo->base.base);
+ drm_gem_dma_free(&bo->base);
+}
+
+static int ethosu_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+ struct ethosu_gem_object *bo = to_ethosu_bo(obj);
+
+ /* Don't allow mmap on objects that have the NO_MMAP flag set. */
+ if (bo->flags & DRM_ETHOSU_BO_NO_MMAP)
+ return -EINVAL;
+
+ return drm_gem_dma_object_mmap(obj, vma);
+}
+
+static const struct drm_gem_object_funcs ethosu_gem_funcs = {
+ .free = ethosu_gem_free_object,
+ .print_info = drm_gem_dma_object_print_info,
+ .get_sg_table = drm_gem_dma_object_get_sg_table,
+ .vmap = drm_gem_dma_object_vmap,
+ .mmap = ethosu_gem_mmap,
+ .vm_ops = &drm_gem_dma_vm_ops,
+};
+
+/**
+ * ethosu_gem_create_object - Implementation of driver->gem_create_object.
+ * @ddev: DRM device
+ * @size: Size in bytes of the memory the object will reference
+ *
+ * This lets the GEM helpers allocate object structs for us, and keep
+ * our BO stats correct.
+ */
+struct drm_gem_object *ethosu_gem_create_object(struct drm_device *ddev, size_t size)
+{
+ struct ethosu_gem_object *obj;
+
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return ERR_PTR(-ENOMEM);
+
+ obj->base.base.funcs = &ethosu_gem_funcs;
+ return &obj->base.base;
+}
+
+/**
+ * ethosu_gem_create_with_handle() - Create a GEM object and attach it to a handle.
+ * @file: DRM file.
+ * @ddev: DRM device.
+ * @size: Size of the GEM object to allocate.
+ * @flags: Combination of drm_ethosu_bo_flags flags.
+ * @handle: Pointer holding the handle pointing to the new GEM object.
+ *
+ * Return: Zero on success
+ */
+int ethosu_gem_create_with_handle(struct drm_file *file,
+ struct drm_device *ddev,
+ u64 *size, u32 flags, u32 *handle)
+{
+ struct drm_gem_dma_object *mem;
+ struct ethosu_gem_object *bo;
+ int ret;
+
+ mem = drm_gem_dma_create(ddev, *size);
+ if (IS_ERR(mem))
+ return PTR_ERR(mem);
+
+ bo = to_ethosu_bo(&mem->base);
+ bo->flags = flags;
+
+ /*
+ * Allocate an id of idr table where the obj is registered
+ * and handle has the id what user can see.
+ */
+ ret = drm_gem_handle_create(file, &mem->base, handle);
+ if (!ret)
+ *size = bo->base.base.size;
+
+ /* drop reference from allocate - handle holds it now. */
+ drm_gem_object_put(&mem->base);
+
+ return ret;
+}
+
+struct dma {
+ s8 region;
+ u64 len;
+ u64 offset;
+ s64 stride[2];
+};
+
+struct dma_state {
+ u16 size0;
+ u16 size1;
+ s8 mode;
+ struct dma src;
+ struct dma dst;
+};
+
+struct buffer {
+ u64 base;
+ u32 length;
+ s8 region;
+};
+
+struct feat_matrix {
+ u64 base[4];
+ s64 stride_x;
+ s64 stride_y;
+ s64 stride_c;
+ s8 region;
+ u8 broadcast;
+ u16 stride_kernel;
+ u16 precision;
+ u16 depth;
+ u16 width;
+ u16 width0;
+ u16 height[3];
+ u8 pad_top;
+ u8 pad_left;
+ u8 pad_bottom;
+ u8 pad_right;
+};
+
+struct cmd_state {
+ struct dma_state dma;
+ struct buffer scale[2];
+ struct buffer weight[4];
+ struct feat_matrix ofm;
+ struct feat_matrix ifm;
+ struct feat_matrix ifm2;
+};
+
+static void cmd_state_init(struct cmd_state *st)
+{
+ /* Initialize to all 1s to detect missing setup */
+ memset(st, 0xff, sizeof(*st));
+}
+
+static u64 cmd_to_addr(u32 *cmd)
+{
+ return ((u64)((cmd[0] & 0xff0000) << 16)) | cmd[1];
+}
+
+static u64 dma_length(struct ethosu_validated_cmdstream_info *info,
+ struct dma_state *dma_st, struct dma *dma)
+{
+ s8 mode = dma_st->mode;
+ u64 len = dma->len;
+
+ if (mode >= 1) {
+ len += dma->stride[0];
+ len *= dma_st->size0;
+ }
+ if (mode == 2) {
+ len += dma->stride[1];
+ len *= dma_st->size1;
+ }
+ if (dma->region >= 0)
+ info->region_size[dma->region] = max(info->region_size[dma->region],
+ len + dma->offset);
+
+ return len;
+}
+
+static u64 feat_matrix_length(struct ethosu_validated_cmdstream_info *info,
+ struct feat_matrix *fm,
+ u32 x, u32 y, u32 c)
+{
+ u32 element_size, storage = fm->precision >> 14;
+ int tile = 0;
+ u64 addr;
+
+ if (fm->region < 0)
+ return U64_MAX;
+
+ switch (storage) {
+ case 0:
+ if (x >= fm->width0 + 1) {
+ x -= fm->width0 + 1;
+ tile += 1;
+ }
+ if (y >= fm->height[tile] + 1) {
+ y -= fm->height[tile] + 1;
+ tile += 2;
+ }
+ break;
+ case 1:
+ if (y >= fm->height[1] + 1) {
+ y -= fm->height[1] + 1;
+ tile = 2;
+ } else if (y >= fm->height[0] + 1) {
+ y -= fm->height[0] + 1;
+ tile = 1;
+ }
+ break;
+ }
+ if (fm->base[tile] == U64_MAX)
+ return U64_MAX;
+
+ addr = fm->base[tile] + y * fm->stride_y;
+
+ switch ((fm->precision >> 6) & 0x3) { // format
+ case 0: //nhwc:
+ addr += x * fm->stride_x + c;
+ break;
+ case 1: //nhcwb16:
+ element_size = BIT((fm->precision >> 1) & 0x3);
+
+ addr += (c / 16) * fm->stride_c + (16 * x + (c & 0xf)) * element_size;
+ break;
+ }
+
+ info->region_size[fm->region] = max(info->region_size[fm->region], addr + 1);
+
+ return addr;
+}
+
+static int calc_sizes(struct drm_device *ddev,
+ struct ethosu_validated_cmdstream_info *info,
+ u16 op, struct cmd_state *st,
+ bool ifm, bool ifm2, bool weight, bool scale)
+{
+ u64 len;
+
+ if (ifm) {
+ if (st->ifm.stride_kernel == U16_MAX)
+ return -EINVAL;
+ u32 stride_y = ((st->ifm.stride_kernel >> 8) & 0x2) +
+ ((st->ifm.stride_kernel >> 1) & 0x1) + 1;
+ u32 stride_x = ((st->ifm.stride_kernel >> 5) & 0x2) +
+ (st->ifm.stride_kernel & 0x1) + 1;
+ u32 ifm_height = st->ofm.height[2] * stride_y +
+ st->ifm.height[2] - (st->ifm.pad_top + st->ifm.pad_bottom);
+ u32 ifm_width = st->ofm.width * stride_x +
+ st->ifm.width - (st->ifm.pad_left + st->ifm.pad_right);
+
+ len = feat_matrix_length(info, &st->ifm, ifm_width,
+ ifm_height, st->ifm.depth);
+ dev_dbg(ddev->dev, "op %d: IFM:%d:0x%llx-0x%llx\n",
+ op, st->ifm.region, st->ifm.base[0], len);
+ if (len == U64_MAX)
+ return -EINVAL;
+ }
+
+ if (ifm2) {
+ len = feat_matrix_length(info, &st->ifm2, st->ifm.depth,
+ 0, st->ofm.depth);
+ dev_dbg(ddev->dev, "op %d: IFM2:%d:0x%llx-0x%llx\n",
+ op, st->ifm2.region, st->ifm2.base[0], len);
+ if (len == U64_MAX)
+ return -EINVAL;
+ }
+
+ if (weight) {
+ dev_dbg(ddev->dev, "op %d: W:%d:0x%llx-0x%llx\n",
+ op, st->weight[0].region, st->weight[0].base,
+ st->weight[0].base + st->weight[0].length - 1);
+ if (st->weight[0].region < 0 || st->weight[0].base == U64_MAX ||
+ st->weight[0].length == U32_MAX)
+ return -EINVAL;
+ info->region_size[st->weight[0].region] =
+ max(info->region_size[st->weight[0].region],
+ st->weight[0].base + st->weight[0].length);
+ }
+
+ if (scale) {
+ dev_dbg(ddev->dev, "op %d: S:%d:0x%llx-0x%llx\n",
+ op, st->scale[0].region, st->scale[0].base,
+ st->scale[0].base + st->scale[0].length - 1);
+ if (st->scale[0].region < 0 || st->scale[0].base == U64_MAX ||
+ st->scale[0].length == U32_MAX)
+ return -EINVAL;
+ info->region_size[st->scale[0].region] =
+ max(info->region_size[st->scale[0].region],
+ st->scale[0].base + st->scale[0].length);
+ }
+
+ len = feat_matrix_length(info, &st->ofm, st->ofm.width,
+ st->ofm.height[2], st->ofm.depth);
+ dev_dbg(ddev->dev, "op %d: OFM:%d:0x%llx-0x%llx\n",
+ op, st->ofm.region, st->ofm.base[0], len);
+ if (len == U64_MAX)
+ return -EINVAL;
+ info->output_region[st->ofm.region] = true;
+
+ return 0;
+}
+
+static int calc_sizes_elemwise(struct drm_device *ddev,
+ struct ethosu_validated_cmdstream_info *info,
+ u16 op, struct cmd_state *st,
+ bool ifm, bool ifm2)
+{
+ u32 height, width, depth;
+ u64 len;
+
+ if (ifm) {
+ height = st->ifm.broadcast & 0x1 ? 0 : st->ofm.height[2];
+ width = st->ifm.broadcast & 0x2 ? 0 : st->ofm.width;
+ depth = st->ifm.broadcast & 0x4 ? 0 : st->ofm.depth;
+
+ len = feat_matrix_length(info, &st->ifm, width,
+ height, depth);
+ dev_dbg(ddev->dev, "op %d: IFM:%d:0x%llx-0x%llx\n",
+ op, st->ifm.region, st->ifm.base[0], len);
+ if (len == U64_MAX)
+ return -EINVAL;
+ }
+
+ if (ifm2) {
+ height = st->ifm2.broadcast & 0x1 ? 0 : st->ofm.height[2];
+ width = st->ifm2.broadcast & 0x2 ? 0 : st->ofm.width;
+ depth = st->ifm2.broadcast & 0x4 ? 0 : st->ofm.depth;
+
+ len = feat_matrix_length(info, &st->ifm2, width,
+ height, depth);
+ dev_dbg(ddev->dev, "op %d: IFM2:%d:0x%llx-0x%llx\n",
+ op, st->ifm2.region, st->ifm2.base[0], len);
+ if (len == U64_MAX)
+ return -EINVAL;
+ }
+
+ len = feat_matrix_length(info, &st->ofm, st->ofm.width,
+ st->ofm.height[2], st->ofm.depth);
+ dev_dbg(ddev->dev, "op %d: OFM:%d:0x%llx-0x%llx\n",
+ op, st->ofm.region, st->ofm.base[0], len);
+ if (len == U64_MAX)
+ return -EINVAL;
+ info->output_region[st->ofm.region] = true;
+
+ return 0;
+}
+
+static int ethosu_gem_cmdstream_copy_and_validate(struct drm_device *ddev,
+ u32 __user *ucmds,
+ struct ethosu_gem_object *bo,
+ u32 size)
+{
+ struct ethosu_validated_cmdstream_info __free(kfree) *info = kzalloc(sizeof(*info), GFP_KERNEL);
+ struct ethosu_device *edev = to_ethosu_device(ddev);
+ u32 *bocmds = bo->base.vaddr;
+ struct cmd_state st;
+ int i, ret;
+
+ if (!info)
+ return -ENOMEM;
+ info->cmd_size = size;
+
+ cmd_state_init(&st);
+
+ for (i = 0; i < size / 4; i++) {
+ bool use_ifm, use_ifm2, use_scale;
+ u64 dstlen, srclen;
+ u16 cmd, param;
+ u32 cmds[2];
+ u64 addr;
+
+ if (get_user(cmds[0], ucmds++))
+ return -EFAULT;
+
+ bocmds[i] = cmds[0];
+
+ cmd = cmds[0];
+ param = cmds[0] >> 16;
+
+ if (cmd & 0x4000) {
+ if (get_user(cmds[1], ucmds++))
+ return -EFAULT;
+
+ i++;
+ bocmds[i] = cmds[1];
+ addr = cmd_to_addr(cmds);
+ }
+
+ switch (cmd) {
+ case NPU_OP_DMA_START:
+ srclen = dma_length(info, &st.dma, &st.dma.src);
+ dstlen = dma_length(info, &st.dma, &st.dma.dst);
+
+ if (st.dma.dst.region >= 0)
+ info->output_region[st.dma.dst.region] = true;
+ dev_dbg(ddev->dev, "cmd: DMA SRC:%d:0x%llx+0x%llx DST:%d:0x%llx+0x%llx\n",
+ st.dma.src.region, st.dma.src.offset, srclen,
+ st.dma.dst.region, st.dma.dst.offset, dstlen);
+ break;
+ case NPU_OP_CONV:
+ case NPU_OP_DEPTHWISE:
+ use_ifm2 = param & 0x1; // weights_ifm2
+ use_scale = !(st.ofm.precision & 0x100);
+ ret = calc_sizes(ddev, info, cmd, &st, true, use_ifm2,
+ !use_ifm2, use_scale);
+ if (ret)
+ return ret;
+ break;
+ case NPU_OP_POOL:
+ use_ifm = param != 0x4; // pooling mode
+ use_scale = !(st.ofm.precision & 0x100);
+ ret = calc_sizes(ddev, info, cmd, &st, use_ifm, false,
+ false, use_scale);
+ if (ret)
+ return ret;
+ break;
+ case NPU_OP_ELEMENTWISE:
+ use_ifm2 = !((st.ifm2.broadcast == 8) || (param == 5) ||
+ (param == 6) || (param == 7) || (param == 0x24));
+ use_ifm = st.ifm.broadcast != 8;
+ ret = calc_sizes_elemwise(ddev, info, cmd, &st, use_ifm, use_ifm2);
+ if (ret)
+ return ret;
+ break;
+ case NPU_OP_RESIZE: // U85 only
+ WARN_ON(1); // TODO
+ break;
+ case NPU_SET_KERNEL_WIDTH_M1:
+ st.ifm.width = param;
+ break;
+ case NPU_SET_KERNEL_HEIGHT_M1:
+ st.ifm.height[2] = param;
+ break;
+ case NPU_SET_KERNEL_STRIDE:
+ st.ifm.stride_kernel = param;
+ break;
+ case NPU_SET_IFM_PAD_TOP:
+ st.ifm.pad_top = param & 0x7f;
+ break;
+ case NPU_SET_IFM_PAD_LEFT:
+ st.ifm.pad_left = param & 0x7f;
+ break;
+ case NPU_SET_IFM_PAD_RIGHT:
+ st.ifm.pad_right = param & 0xff;
+ break;
+ case NPU_SET_IFM_PAD_BOTTOM:
+ st.ifm.pad_bottom = param & 0xff;
+ break;
+ case NPU_SET_IFM_DEPTH_M1:
+ st.ifm.depth = param;
+ break;
+ case NPU_SET_IFM_PRECISION:
+ st.ifm.precision = param;
+ break;
+ case NPU_SET_IFM_BROADCAST:
+ st.ifm.broadcast = param;
+ break;
+ case NPU_SET_IFM_REGION:
+ st.ifm.region = param & 0x7f;
+ break;
+ case NPU_SET_IFM_WIDTH0_M1:
+ st.ifm.width0 = param;
+ break;
+ case NPU_SET_IFM_HEIGHT0_M1:
+ st.ifm.height[0] = param;
+ break;
+ case NPU_SET_IFM_HEIGHT1_M1:
+ st.ifm.height[1] = param;
+ break;
+ case NPU_SET_IFM_BASE0:
+ case NPU_SET_IFM_BASE1:
+ case NPU_SET_IFM_BASE2:
+ case NPU_SET_IFM_BASE3:
+ st.ifm.base[cmd & 0x3] = addr;
+ break;
+ case NPU_SET_IFM_STRIDE_X:
+ st.ifm.stride_x = addr;
+ break;
+ case NPU_SET_IFM_STRIDE_Y:
+ st.ifm.stride_y = addr;
+ break;
+ case NPU_SET_IFM_STRIDE_C:
+ st.ifm.stride_c = addr;
+ break;
+
+ case NPU_SET_OFM_WIDTH_M1:
+ st.ofm.width = param;
+ break;
+ case NPU_SET_OFM_HEIGHT_M1:
+ st.ofm.height[2] = param;
+ break;
+ case NPU_SET_OFM_DEPTH_M1:
+ st.ofm.depth = param;
+ break;
+ case NPU_SET_OFM_PRECISION:
+ st.ofm.precision = param;
+ break;
+ case NPU_SET_OFM_REGION:
+ st.ofm.region = param & 0x7;
+ break;
+ case NPU_SET_OFM_WIDTH0_M1:
+ st.ofm.width0 = param;
+ break;
+ case NPU_SET_OFM_HEIGHT0_M1:
+ st.ofm.height[0] = param;
+ break;
+ case NPU_SET_OFM_HEIGHT1_M1:
+ st.ofm.height[1] = param;
+ break;
+ case NPU_SET_OFM_BASE0:
+ case NPU_SET_OFM_BASE1:
+ case NPU_SET_OFM_BASE2:
+ case NPU_SET_OFM_BASE3:
+ st.ofm.base[cmd & 0x3] = addr;
+ break;
+ case NPU_SET_OFM_STRIDE_X:
+ st.ofm.stride_x = addr;
+ break;
+ case NPU_SET_OFM_STRIDE_Y:
+ st.ofm.stride_y = addr;
+ break;
+ case NPU_SET_OFM_STRIDE_C:
+ st.ofm.stride_c = addr;
+ break;
+
+ case NPU_SET_IFM2_BROADCAST:
+ st.ifm2.broadcast = param;
+ break;
+ case NPU_SET_IFM2_PRECISION:
+ st.ifm2.precision = param;
+ break;
+ case NPU_SET_IFM2_REGION:
+ st.ifm2.region = param & 0x7;
+ break;
+ case NPU_SET_IFM2_WIDTH0_M1:
+ st.ifm2.width0 = param;
+ break;
+ case NPU_SET_IFM2_HEIGHT0_M1:
+ st.ifm2.height[0] = param;
+ break;
+ case NPU_SET_IFM2_HEIGHT1_M1:
+ st.ifm2.height[1] = param;
+ break;
+ case NPU_SET_IFM2_BASE0:
+ case NPU_SET_IFM2_BASE1:
+ case NPU_SET_IFM2_BASE2:
+ case NPU_SET_IFM2_BASE3:
+ st.ifm2.base[cmd & 0x3] = addr;
+ break;
+ case NPU_SET_IFM2_STRIDE_X:
+ st.ifm2.stride_x = addr;
+ break;
+ case NPU_SET_IFM2_STRIDE_Y:
+ st.ifm2.stride_y = addr;
+ break;
+ case NPU_SET_IFM2_STRIDE_C:
+ st.ifm2.stride_c = addr;
+ break;
+
+ case NPU_SET_WEIGHT_REGION:
+ st.weight[0].region = param & 0x7;
+ break;
+ case NPU_SET_SCALE_REGION:
+ st.scale[0].region = param & 0x7;
+ break;
+ case NPU_SET_WEIGHT_BASE:
+ st.weight[0].base = addr;
+ break;
+ case NPU_SET_WEIGHT_LENGTH:
+ st.weight[0].length = cmds[1];
+ break;
+ case NPU_SET_SCALE_BASE:
+ st.scale[0].base = addr;
+ break;
+ case NPU_SET_SCALE_LENGTH:
+ st.scale[0].length = cmds[1];
+ break;
+ case NPU_SET_WEIGHT1_BASE:
+ st.weight[1].base = addr;
+ break;
+ case NPU_SET_WEIGHT1_LENGTH:
+ st.weight[1].length = cmds[1];
+ break;
+ case NPU_SET_SCALE1_BASE: // NPU_SET_WEIGHT2_BASE (U85)
+ if (ethosu_is_u65(edev))
+ st.scale[1].base = addr;
+ else
+ st.weight[2].base = addr;
+ break;
+ case NPU_SET_SCALE1_LENGTH: // NPU_SET_WEIGHT2_LENGTH (U85)
+ if (ethosu_is_u65(edev))
+ st.scale[1].length = cmds[1];
+ else
+ st.weight[1].length = cmds[1];
+ break;
+ case NPU_SET_WEIGHT3_BASE:
+ st.weight[3].base = addr;
+ break;
+ case NPU_SET_WEIGHT3_LENGTH:
+ st.weight[3].length = cmds[1];
+ break;
+
+ case NPU_SET_DMA0_SRC_REGION:
+ if (param & 0x100)
+ st.dma.src.region = -1;
+ else
+ st.dma.src.region = param & 0x7;
+ st.dma.mode = (param >> 9) & 0x3;
+ break;
+ case NPU_SET_DMA0_DST_REGION:
+ if (param & 0x100)
+ st.dma.dst.region = -1;
+ else
+ st.dma.dst.region = param & 0x7;
+ break;
+ case NPU_SET_DMA0_SIZE0:
+ st.dma.size0 = param;
+ break;
+ case NPU_SET_DMA0_SIZE1:
+ st.dma.size1 = param;
+ break;
+ case NPU_SET_DMA0_SRC_STRIDE0:
+ st.dma.src.stride[0] = ((s64)addr << 24) >> 24;
+ break;
+ case NPU_SET_DMA0_SRC_STRIDE1:
+ st.dma.src.stride[1] = ((s64)addr << 24) >> 24;
+ break;
+ case NPU_SET_DMA0_DST_STRIDE0:
+ st.dma.dst.stride[0] = ((s64)addr << 24) >> 24;
+ break;
+ case NPU_SET_DMA0_DST_STRIDE1:
+ st.dma.dst.stride[1] = ((s64)addr << 24) >> 24;
+ break;
+ case NPU_SET_DMA0_SRC:
+ st.dma.src.offset = addr;
+ break;
+ case NPU_SET_DMA0_DST:
+ st.dma.dst.offset = addr;
+ break;
+ case NPU_SET_DMA0_LEN:
+ st.dma.src.len = st.dma.dst.len = addr;
+ break;
+ default:
+ break;
+ }
+ }
+
+ for (i = 0; i < NPU_BASEP_REGION_MAX; i++) {
+ if (!info->region_size[i])
+ continue;
+ dev_dbg(ddev->dev, "region %d max size: 0x%llx\n",
+ i, info->region_size[i]);
+ }
+
+ bo->info = no_free_ptr(info);
+ return 0;
+}
+
+/**
+ * ethosu_gem_cmdstream_create() - Create a GEM object and attach it to a handle.
+ * @file: DRM file.
+ * @ddev: DRM device.
+ * @exclusive_vm: Exclusive VM. Not NULL if the GEM object can't be shared.
+ * @size: Size of the GEM object to allocate.
+ * @flags: Combination of drm_ethosu_bo_flags flags.
+ * @handle: Pointer holding the handle pointing to the new GEM object.
+ *
+ * Return: Zero on success
+ */
+int ethosu_gem_cmdstream_create(struct drm_file *file,
+ struct drm_device *ddev,
+ u32 size, u64 data, u32 flags, u32 *handle)
+{
+ int ret;
+ struct drm_gem_dma_object *mem;
+ struct ethosu_gem_object *bo;
+
+ mem = drm_gem_dma_create(ddev, size);
+ if (IS_ERR(mem))
+ return PTR_ERR(mem);
+
+ bo = to_ethosu_bo(&mem->base);
+ bo->flags = flags;
+
+ ret = ethosu_gem_cmdstream_copy_and_validate(ddev,
+ (void __user *)(uintptr_t)data,
+ bo, size);
+ if (ret)
+ goto fail;
+
+ /*
+ * Allocate an id of idr table where the obj is registered
+ * and handle has the id what user can see.
+ */
+ ret = drm_gem_handle_create(file, &mem->base, handle);
+
+fail:
+ /* drop reference from allocate - handle holds it now. */
+ drm_gem_object_put(&mem->base);
+
+ return ret;
+}
diff --git a/drivers/accel/ethosu/ethosu_gem.h b/drivers/accel/ethosu/ethosu_gem.h
new file mode 100644
index 000000000000..3922895a60fb
--- /dev/null
+++ b/drivers/accel/ethosu/ethosu_gem.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2025 Arm, Ltd. */
+
+#ifndef __ETHOSU_GEM_H__
+#define __ETHOSU_GEM_H__
+
+#include "ethosu_device.h"
+#include <drm/drm_gem_dma_helper.h>
+
+struct ethosu_validated_cmdstream_info {
+ u32 cmd_size;
+ u64 region_size[NPU_BASEP_REGION_MAX];
+ bool output_region[NPU_BASEP_REGION_MAX];
+};
+
+/**
+ * struct ethosu_gem_object - Driver specific GEM object.
+ */
+struct ethosu_gem_object {
+ /** @base: Inherit from drm_gem_shmem_object. */
+ struct drm_gem_dma_object base;
+
+ struct ethosu_validated_cmdstream_info *info;
+
+ /** @flags: Combination of drm_ethosu_bo_flags flags. */
+ u32 flags;
+};
+
+static inline
+struct ethosu_gem_object *to_ethosu_bo(struct drm_gem_object *obj)
+{
+ return container_of(to_drm_gem_dma_obj(obj), struct ethosu_gem_object, base);
+}
+
+struct drm_gem_object *ethosu_gem_create_object(struct drm_device *ddev,
+ size_t size);
+
+int ethosu_gem_create_with_handle(struct drm_file *file,
+ struct drm_device *ddev,
+ u64 *size, u32 flags, uint32_t *handle);
+
+int ethosu_gem_cmdstream_create(struct drm_file *file,
+ struct drm_device *ddev,
+ u32 size, u64 data, u32 flags, u32 *handle);
+
+#endif /* __ETHOSU_GEM_H__ */
diff --git a/drivers/accel/ethosu/ethosu_job.c b/drivers/accel/ethosu/ethosu_job.c
new file mode 100644
index 000000000000..26e7a2f64d71
--- /dev/null
+++ b/drivers/accel/ethosu/ethosu_job.c
@@ -0,0 +1,497 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
+/* Copyright 2025 Arm, Ltd. */
+
+#include <linux/bitfield.h>
+#include <linux/genalloc.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_dma_helper.h>
+#include <drm/drm_print.h>
+#include <drm/ethosu_accel.h>
+
+#include "ethosu_device.h"
+#include "ethosu_drv.h"
+#include "ethosu_gem.h"
+#include "ethosu_job.h"
+
+#define JOB_TIMEOUT_MS 500
+
+static struct ethosu_job *to_ethosu_job(struct drm_sched_job *sched_job)
+{
+ return container_of(sched_job, struct ethosu_job, base);
+}
+
+static const char *ethosu_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "ethosu";
+}
+
+static const char *ethosu_fence_get_timeline_name(struct dma_fence *fence)
+{
+ return "ethosu-npu";
+}
+
+static const struct dma_fence_ops ethosu_fence_ops = {
+ .get_driver_name = ethosu_fence_get_driver_name,
+ .get_timeline_name = ethosu_fence_get_timeline_name,
+};
+
+static void ethosu_job_hw_submit(struct ethosu_device *dev, struct ethosu_job *job)
+{
+ struct drm_gem_dma_object *cmd_bo = to_drm_gem_dma_obj(job->cmd_bo);
+ struct ethosu_validated_cmdstream_info *cmd_info = to_ethosu_bo(job->cmd_bo)->info;
+
+ for (int i = 0; i < job->region_cnt; i++) {
+ struct drm_gem_dma_object *bo;
+ int region = job->region_bo_num[i];
+
+ bo = to_drm_gem_dma_obj(job->region_bo[i]);
+ writel_relaxed(lower_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP(region));
+ writel_relaxed(upper_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP_HI(region));
+ dev_dbg(dev->base.dev, "Region %d base addr = %pad\n", region, &bo->dma_addr);
+ }
+
+ if (job->sram_size) {
+ writel_relaxed(lower_32_bits(dev->sramphys),
+ dev->regs + NPU_REG_BASEP(ETHOSU_SRAM_REGION));
+ writel_relaxed(upper_32_bits(dev->sramphys),
+ dev->regs + NPU_REG_BASEP_HI(ETHOSU_SRAM_REGION));
+ dev_dbg(dev->base.dev, "Region %d base addr = %pad (SRAM)\n",
+ ETHOSU_SRAM_REGION, &dev->sramphys);
+ }
+
+ writel_relaxed(lower_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE);
+ writel_relaxed(upper_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE_HI);
+ writel_relaxed(cmd_info->cmd_size, dev->regs + NPU_REG_QSIZE);
+
+ writel(CMD_TRANSITION_TO_RUN, dev->regs + NPU_REG_CMD);
+
+ dev_dbg(dev->base.dev,
+ "Submitted cmd at %pad to core\n", &cmd_bo->dma_addr);
+}
+
+static int ethosu_acquire_object_fences(struct ethosu_job *job)
+{
+ int i, ret;
+ struct drm_gem_object **bos = job->region_bo;
+ struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info;
+
+ for (i = 0; i < job->region_cnt; i++) {
+ bool is_write;
+
+ if (!bos[i])
+ break;
+
+ ret = dma_resv_reserve_fences(bos[i]->resv, 1);
+ if (ret)
+ return ret;
+
+ is_write = info->output_region[job->region_bo_num[i]];
+ ret = drm_sched_job_add_implicit_dependencies(&job->base, bos[i],
+ is_write);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void ethosu_attach_object_fences(struct ethosu_job *job)
+{
+ int i;
+ struct dma_fence *fence = job->inference_done_fence;
+ struct drm_gem_object **bos = job->region_bo;
+ struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info;
+
+ for (i = 0; i < job->region_cnt; i++)
+ if (info->output_region[job->region_bo_num[i]])
+ dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
+}
+
+static int ethosu_job_push(struct ethosu_job *job)
+{
+ struct ww_acquire_ctx acquire_ctx;
+ int ret;
+
+ ret = drm_gem_lock_reservations(job->region_bo, job->region_cnt, &acquire_ctx);
+ if (ret)
+ return ret;
+
+ ret = ethosu_acquire_object_fences(job);
+ if (ret)
+ goto out;
+
+ ret = pm_runtime_resume_and_get(job->dev->base.dev);
+ if (!ret) {
+ guard(mutex)(&job->dev->sched_lock);
+
+ drm_sched_job_arm(&job->base);
+ job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished);
+ kref_get(&job->refcount); /* put by scheduler job completion */
+ drm_sched_entity_push_job(&job->base);
+ ethosu_attach_object_fences(job);
+ }
+
+out:
+ drm_gem_unlock_reservations(job->region_bo, job->region_cnt, &acquire_ctx);
+ return ret;
+}
+
+static void ethosu_job_cleanup(struct kref *ref)
+{
+ struct ethosu_job *job = container_of(ref, struct ethosu_job,
+ refcount);
+ unsigned int i;
+
+ pm_runtime_put_autosuspend(job->dev->base.dev);
+
+ dma_fence_put(job->done_fence);
+ dma_fence_put(job->inference_done_fence);
+
+ for (i = 0; i < job->region_cnt; i++)
+ drm_gem_object_put(job->region_bo[i]);
+
+ drm_gem_object_put(job->cmd_bo);
+
+ kfree(job);
+}
+
+static void ethosu_job_put(struct ethosu_job *job)
+{
+ kref_put(&job->refcount, ethosu_job_cleanup);
+}
+
+static void ethosu_job_free(struct drm_sched_job *sched_job)
+{
+ struct ethosu_job *job = to_ethosu_job(sched_job);
+
+ drm_sched_job_cleanup(sched_job);
+ ethosu_job_put(job);
+}
+
+static struct dma_fence *ethosu_job_run(struct drm_sched_job *sched_job)
+{
+ struct ethosu_job *job = to_ethosu_job(sched_job);
+ struct ethosu_device *dev = job->dev;
+ struct dma_fence *fence = job->done_fence;
+
+ if (unlikely(job->base.s_fence->finished.error))
+ return NULL;
+
+ dma_fence_init(fence, &ethosu_fence_ops, &dev->fence_lock,
+ dev->fence_context, ++dev->emit_seqno);
+ dma_fence_get(fence);
+
+ scoped_guard(mutex, &dev->job_lock) {
+ dev->in_flight_job = job;
+ ethosu_job_hw_submit(dev, job);
+ }
+
+ return fence;
+}
+
+static void ethosu_job_handle_irq(struct ethosu_device *dev)
+{
+ u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS);
+
+ if (status & (STATUS_BUS_STATUS | STATUS_CMD_PARSE_ERR)) {
+ dev_err(dev->base.dev, "Error IRQ - %x\n", status);
+ drm_sched_fault(&dev->sched);
+ return;
+ }
+
+ scoped_guard(mutex, &dev->job_lock) {
+ if (dev->in_flight_job) {
+ dma_fence_signal(dev->in_flight_job->done_fence);
+ dev->in_flight_job = NULL;
+ }
+ }
+}
+
+static irqreturn_t ethosu_job_irq_handler_thread(int irq, void *data)
+{
+ struct ethosu_device *dev = data;
+
+ ethosu_job_handle_irq(dev);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ethosu_job_irq_handler(int irq, void *data)
+{
+ struct ethosu_device *dev = data;
+ u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS);
+
+ if (!(status & STATUS_IRQ_RAISED))
+ return IRQ_NONE;
+
+ writel_relaxed(CMD_CLEAR_IRQ, dev->regs + NPU_REG_CMD);
+ return IRQ_WAKE_THREAD;
+}
+
+static enum drm_gpu_sched_stat ethosu_job_timedout(struct drm_sched_job *bad)
+{
+ struct ethosu_job *job = to_ethosu_job(bad);
+ struct ethosu_device *dev = job->dev;
+ bool running;
+ u32 *bocmds = to_drm_gem_dma_obj(job->cmd_bo)->vaddr;
+ u32 cmdaddr;
+
+ cmdaddr = readl_relaxed(dev->regs + NPU_REG_QREAD);
+ running = FIELD_GET(STATUS_STATE_RUNNING, readl_relaxed(dev->regs + NPU_REG_STATUS));
+
+ if (running) {
+ int ret;
+ u32 reg;
+
+ ret = readl_relaxed_poll_timeout(dev->regs + NPU_REG_QREAD,
+ reg,
+ reg != cmdaddr,
+ USEC_PER_MSEC, 100 * USEC_PER_MSEC);
+
+ /* If still running and progress is being made, just return */
+ if (!ret)
+ return DRM_GPU_SCHED_STAT_NO_HANG;
+ }
+
+ dev_err(dev->base.dev, "NPU sched timed out: NPU %s, cmdstream offset 0x%x: 0x%x\n",
+ running ? "running" : "stopped",
+ cmdaddr, bocmds[cmdaddr / 4]);
+
+ drm_sched_stop(&dev->sched, bad);
+
+ scoped_guard(mutex, &dev->job_lock)
+ dev->in_flight_job = NULL;
+
+ /* Proceed with reset now. */
+ pm_runtime_force_suspend(dev->base.dev);
+ pm_runtime_force_resume(dev->base.dev);
+
+ /* Restart the scheduler */
+ drm_sched_start(&dev->sched, 0);
+
+ return DRM_GPU_SCHED_STAT_RESET;
+}
+
+static const struct drm_sched_backend_ops ethosu_sched_ops = {
+ .run_job = ethosu_job_run,
+ .timedout_job = ethosu_job_timedout,
+ .free_job = ethosu_job_free
+};
+
+int ethosu_job_init(struct ethosu_device *edev)
+{
+ struct device *dev = edev->base.dev;
+ struct drm_sched_init_args args = {
+ .ops = &ethosu_sched_ops,
+ .num_rqs = DRM_SCHED_PRIORITY_COUNT,
+ .credit_limit = 1,
+ .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
+ .name = dev_name(dev),
+ .dev = dev,
+ };
+ int ret;
+
+ spin_lock_init(&edev->fence_lock);
+ ret = devm_mutex_init(dev, &edev->job_lock);
+ if (ret)
+ return ret;
+ ret = devm_mutex_init(dev, &edev->sched_lock);
+ if (ret)
+ return ret;
+
+ edev->irq = platform_get_irq(to_platform_device(dev), 0);
+ if (edev->irq < 0)
+ return edev->irq;
+
+ ret = devm_request_threaded_irq(dev, edev->irq,
+ ethosu_job_irq_handler,
+ ethosu_job_irq_handler_thread,
+ IRQF_SHARED, KBUILD_MODNAME,
+ edev);
+ if (ret) {
+ dev_err(dev, "failed to request irq\n");
+ return ret;
+ }
+
+ edev->fence_context = dma_fence_context_alloc(1);
+
+ ret = drm_sched_init(&edev->sched, &args);
+ if (ret) {
+ dev_err(dev, "Failed to create scheduler: %d\n", ret);
+ goto err_sched;
+ }
+
+ return 0;
+
+err_sched:
+ drm_sched_fini(&edev->sched);
+ return ret;
+}
+
+void ethosu_job_fini(struct ethosu_device *dev)
+{
+ drm_sched_fini(&dev->sched);
+}
+
+int ethosu_job_open(struct ethosu_file_priv *ethosu_priv)
+{
+ struct ethosu_device *dev = ethosu_priv->edev;
+ struct drm_gpu_scheduler *sched = &dev->sched;
+ int ret;
+
+ ret = drm_sched_entity_init(&ethosu_priv->sched_entity,
+ DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ return WARN_ON(ret);
+}
+
+void ethosu_job_close(struct ethosu_file_priv *ethosu_priv)
+{
+ struct drm_sched_entity *entity = &ethosu_priv->sched_entity;
+
+ drm_sched_entity_destroy(entity);
+}
+
+static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file,
+ struct drm_ethosu_job *job)
+{
+ struct ethosu_device *edev = to_ethosu_device(dev);
+ struct ethosu_file_priv *file_priv = file->driver_priv;
+ struct ethosu_job *ejob = NULL;
+ struct ethosu_validated_cmdstream_info *cmd_info;
+ int ret = 0;
+
+ /* BO region 2 is reserved if SRAM is used */
+ if (job->region_bo_handles[ETHOSU_SRAM_REGION] && job->sram_size)
+ return -EINVAL;
+
+ if (edev->npu_info.sram_size < job->sram_size)
+ return -EINVAL;
+
+ ejob = kzalloc(sizeof(*ejob), GFP_KERNEL);
+ if (!ejob)
+ return -ENOMEM;
+
+ kref_init(&ejob->refcount);
+
+ ejob->dev = edev;
+ ejob->sram_size = job->sram_size;
+
+ ejob->done_fence = kzalloc(sizeof(*ejob->done_fence), GFP_KERNEL);
+ if (!ejob->done_fence) {
+ ret = -ENOMEM;
+ goto out_cleanup_job;
+ }
+
+ ret = drm_sched_job_init(&ejob->base,
+ &file_priv->sched_entity,
+ 1, NULL, file->client_id);
+ if (ret)
+ goto out_put_job;
+
+ ejob->cmd_bo = drm_gem_object_lookup(file, job->cmd_bo);
+ if (!ejob->cmd_bo) {
+ ret = -ENOENT;
+ goto out_cleanup_job;
+ }
+ cmd_info = to_ethosu_bo(ejob->cmd_bo)->info;
+ if (!cmd_info) {
+ ret = -EINVAL;
+ goto out_cleanup_job;
+ }
+
+ for (int i = 0; i < NPU_BASEP_REGION_MAX; i++) {
+ struct drm_gem_object *gem;
+
+ /* Can only omit a BO handle if the region is not used or used for SRAM */
+ if (!job->region_bo_handles[i] &&
+ (!cmd_info->region_size[i] || (i == ETHOSU_SRAM_REGION && job->sram_size)))
+ continue;
+
+ if (job->region_bo_handles[i] && !cmd_info->region_size[i]) {
+ dev_err(dev->dev,
+ "Cmdstream BO handle %d set for unused region %d\n",
+ job->region_bo_handles[i], i);
+ ret = -EINVAL;
+ goto out_cleanup_job;
+ }
+
+ gem = drm_gem_object_lookup(file, job->region_bo_handles[i]);
+ if (!gem) {
+ dev_err(dev->dev,
+ "Invalid BO handle %d for region %d\n",
+ job->region_bo_handles[i], i);
+ ret = -ENOENT;
+ goto out_cleanup_job;
+ }
+
+ ejob->region_bo[ejob->region_cnt] = gem;
+ ejob->region_bo_num[ejob->region_cnt] = i;
+ ejob->region_cnt++;
+
+ if (to_ethosu_bo(gem)->info) {
+ dev_err(dev->dev,
+ "Cmdstream BO handle %d used for region %d\n",
+ job->region_bo_handles[i], i);
+ ret = -EINVAL;
+ goto out_cleanup_job;
+ }
+
+ /* Verify the command stream doesn't have accesses outside the BO */
+ if (cmd_info->region_size[i] > gem->size) {
+ dev_err(dev->dev,
+ "cmd stream region %d size greater than BO size (%llu > %zu)\n",
+ i, cmd_info->region_size[i], gem->size);
+ ret = -EOVERFLOW;
+ goto out_cleanup_job;
+ }
+ }
+ ret = ethosu_job_push(ejob);
+
+out_cleanup_job:
+ if (ret)
+ drm_sched_job_cleanup(&ejob->base);
+out_put_job:
+ ethosu_job_put(ejob);
+
+ return ret;
+}
+
+int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct drm_ethosu_submit *args = data;
+ int ret = 0;
+ unsigned int i = 0;
+
+ if (args->pad) {
+ drm_dbg(dev, "Reserved field in drm_ethosu_submit struct should be 0.\n");
+ return -EINVAL;
+ }
+
+ struct drm_ethosu_job __free(kvfree) *jobs =
+ kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL);
+ if (!jobs)
+ return -ENOMEM;
+
+ if (copy_from_user(jobs,
+ (void __user *)(uintptr_t)args->jobs,
+ args->job_count * sizeof(*jobs))) {
+ drm_dbg(dev, "Failed to copy incoming job array\n");
+ return -EFAULT;
+ }
+
+ for (i = 0; i < args->job_count; i++) {
+ ret = ethosu_ioctl_submit_job(dev, file, &jobs[i]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/drivers/accel/ethosu/ethosu_job.h b/drivers/accel/ethosu/ethosu_job.h
new file mode 100644
index 000000000000..ff1cf448d094
--- /dev/null
+++ b/drivers/accel/ethosu/ethosu_job.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
+/* Copyright 2025 Arm, Ltd. */
+
+#ifndef __ETHOSU_JOB_H__
+#define __ETHOSU_JOB_H__
+
+#include <linux/kref.h>
+#include <drm/gpu_scheduler.h>
+
+struct ethosu_device;
+struct ethosu_file_priv;
+
+struct ethosu_job {
+ struct drm_sched_job base;
+ struct ethosu_device *dev;
+
+ struct drm_gem_object *cmd_bo;
+ struct drm_gem_object *region_bo[NPU_BASEP_REGION_MAX];
+ u8 region_bo_num[NPU_BASEP_REGION_MAX];
+ u8 region_cnt;
+ u32 sram_size;
+
+ /* Fence to be signaled by drm-sched once its done with the job */
+ struct dma_fence *inference_done_fence;
+
+ /* Fence to be signaled by IRQ handler when the job is complete. */
+ struct dma_fence *done_fence;
+
+ struct kref refcount;
+};
+
+int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file);
+
+int ethosu_job_init(struct ethosu_device *dev);
+void ethosu_job_fini(struct ethosu_device *dev);
+int ethosu_job_open(struct ethosu_file_priv *ethosu_priv);
+void ethosu_job_close(struct ethosu_file_priv *ethosu_priv);
+
+#endif
diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile
index 1029e0bab061..dbf76b8a5b4c 100644
--- a/drivers/accel/ivpu/Makefile
+++ b/drivers/accel/ivpu/Makefile
@@ -6,6 +6,7 @@ intel_vpu-y := \
ivpu_fw.o \
ivpu_fw_log.o \
ivpu_gem.o \
+ ivpu_gem_userptr.o \
ivpu_hw.o \
ivpu_hw_btrs.o \
ivpu_hw_ip.o \
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index cd24ccd20ba6..3bd85ee6c26b 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -398,35 +398,25 @@ static int dct_active_set(void *data, u64 active_percent)
DEFINE_DEBUGFS_ATTRIBUTE(ivpu_dct_fops, dct_active_get, dct_active_set, "%llu\n");
+static void print_priority_band(struct seq_file *s, struct ivpu_hw_info *hw,
+ int band, const char *name)
+{
+ seq_printf(s, "%-9s: grace_period %9u process_grace_period %9u process_quantum %9u\n",
+ name,
+ hw->hws.grace_period[band],
+ hw->hws.process_grace_period[band],
+ hw->hws.process_quantum[band]);
+}
+
static int priority_bands_show(struct seq_file *s, void *v)
{
struct ivpu_device *vdev = s->private;
struct ivpu_hw_info *hw = vdev->hw;
- for (int band = VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE;
- band < VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT; band++) {
- switch (band) {
- case VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE:
- seq_puts(s, "Idle: ");
- break;
-
- case VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL:
- seq_puts(s, "Normal: ");
- break;
-
- case VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS:
- seq_puts(s, "Focus: ");
- break;
-
- case VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME:
- seq_puts(s, "Realtime: ");
- break;
- }
-
- seq_printf(s, "grace_period %9u process_grace_period %9u process_quantum %9u\n",
- hw->hws.grace_period[band], hw->hws.process_grace_period[band],
- hw->hws.process_quantum[band]);
- }
+ print_priority_band(s, hw, VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE, "Idle");
+ print_priority_band(s, hw, VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL, "Normal");
+ print_priority_band(s, hw, VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS, "Focus");
+ print_priority_band(s, hw, VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME, "Realtime");
return 0;
}
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 3289751b4757..3d6fccdefdd6 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -57,7 +57,7 @@ MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set NPU frequency");
int ivpu_sched_mode = IVPU_SCHED_MODE_AUTO;
module_param_named(sched_mode, ivpu_sched_mode, int, 0444);
-MODULE_PARM_DESC(sched_mode, "Scheduler mode: -1 - Use default scheduler, 0 - Use OS scheduler, 1 - Use HW scheduler");
+MODULE_PARM_DESC(sched_mode, "Scheduler mode: -1 - Use default scheduler, 0 - Use OS scheduler (supported on 27XX - 50XX), 1 - Use HW scheduler");
bool ivpu_disable_mmu_cont_pages;
module_param_named(disable_mmu_cont_pages, ivpu_disable_mmu_cont_pages, bool, 0444);
@@ -134,6 +134,8 @@ bool ivpu_is_capable(struct ivpu_device *vdev, u32 capability)
return true;
case DRM_IVPU_CAP_DMA_MEMORY_RANGE:
return true;
+ case DRM_IVPU_CAP_BO_CREATE_FROM_USERPTR:
+ return true;
case DRM_IVPU_CAP_MANAGE_CMDQ:
return vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW;
default:
@@ -200,6 +202,9 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
case DRM_IVPU_PARAM_CAPABILITIES:
args->value = ivpu_is_capable(vdev, args->index);
break;
+ case DRM_IVPU_PARAM_PREEMPT_BUFFER_SIZE:
+ args->value = ivpu_fw_preempt_buf_size(vdev);
+ break;
default:
ret = -EINVAL;
break;
@@ -310,6 +315,7 @@ static const struct drm_ioctl_desc ivpu_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(IVPU_CMDQ_CREATE, ivpu_cmdq_create_ioctl, 0),
DRM_IOCTL_DEF_DRV(IVPU_CMDQ_DESTROY, ivpu_cmdq_destroy_ioctl, 0),
DRM_IOCTL_DEF_DRV(IVPU_CMDQ_SUBMIT, ivpu_cmdq_submit_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(IVPU_BO_CREATE_FROM_USERPTR, ivpu_bo_create_from_userptr_ioctl, 0),
};
static int ivpu_wait_for_ready(struct ivpu_device *vdev)
@@ -377,8 +383,7 @@ int ivpu_boot(struct ivpu_device *vdev)
drm_WARN_ON(&vdev->drm, atomic_read(&vdev->job_timeout_counter));
drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
- /* Update boot params located at first 4KB of FW memory */
- ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem));
+ ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem_bp));
ret = ivpu_hw_boot_fw(vdev);
if (ret) {
@@ -450,6 +455,9 @@ int ivpu_shutdown(struct ivpu_device *vdev)
static const struct file_operations ivpu_fops = {
.owner = THIS_MODULE,
DRM_ACCEL_FOPS,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = drm_show_fdinfo,
+#endif
};
static const struct drm_driver driver = {
@@ -464,6 +472,9 @@ static const struct drm_driver driver = {
.ioctls = ivpu_drm_ioctls,
.num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
.fops = &ivpu_fops,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = drm_show_memory_stats,
+#endif
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
@@ -705,6 +716,7 @@ static struct pci_device_id ivpu_pci_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_LNL) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PTL_P) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_WCL) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_NVL) },
{ }
};
MODULE_DEVICE_TABLE(pci, ivpu_pci_ids);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 62ab1c654e63..5b34b6f50e69 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -27,6 +27,7 @@
#define PCI_DEVICE_ID_LNL 0x643e
#define PCI_DEVICE_ID_PTL_P 0xb03e
#define PCI_DEVICE_ID_WCL 0xfd3e
+#define PCI_DEVICE_ID_NVL 0xd71d
#define IVPU_HW_IP_37XX 37
#define IVPU_HW_IP_40XX 40
@@ -78,6 +79,7 @@
#define IVPU_DBG_KREF BIT(11)
#define IVPU_DBG_RPM BIT(12)
#define IVPU_DBG_MMU_MAP BIT(13)
+#define IVPU_DBG_IOCTL BIT(14)
#define ivpu_err(vdev, fmt, ...) \
drm_err(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__)
@@ -245,6 +247,8 @@ static inline int ivpu_hw_ip_gen(struct ivpu_device *vdev)
case PCI_DEVICE_ID_PTL_P:
case PCI_DEVICE_ID_WCL:
return IVPU_HW_IP_50XX;
+ case PCI_DEVICE_ID_NVL:
+ return IVPU_HW_IP_60XX;
default:
dump_stack();
ivpu_err(vdev, "Unknown NPU IP generation\n");
@@ -261,6 +265,7 @@ static inline int ivpu_hw_btrs_gen(struct ivpu_device *vdev)
case PCI_DEVICE_ID_LNL:
case PCI_DEVICE_ID_PTL_P:
case PCI_DEVICE_ID_WCL:
+ case PCI_DEVICE_ID_NVL:
return IVPU_HW_BTRS_LNL;
default:
dump_stack();
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 9db741695401..48386d2cddbb 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -17,15 +17,10 @@
#include "ivpu_ipc.h"
#include "ivpu_pm.h"
-#define FW_GLOBAL_MEM_START (2ull * SZ_1G)
-#define FW_GLOBAL_MEM_END (3ull * SZ_1G)
-#define FW_SHARED_MEM_SIZE SZ_256M /* Must be aligned to FW_SHARED_MEM_ALIGNMENT */
-#define FW_SHARED_MEM_ALIGNMENT SZ_128K /* VPU MTRR limitation */
-#define FW_RUNTIME_MAX_SIZE SZ_512M
#define FW_SHAVE_NN_MAX_SIZE SZ_2M
-#define FW_RUNTIME_MIN_ADDR (FW_GLOBAL_MEM_START)
-#define FW_RUNTIME_MAX_ADDR (FW_GLOBAL_MEM_END - FW_SHARED_MEM_SIZE)
#define FW_FILE_IMAGE_OFFSET (VPU_FW_HEADER_SIZE + FW_VERSION_HEADER_SIZE)
+#define FW_PREEMPT_BUF_MIN_SIZE SZ_4K
+#define FW_PREEMPT_BUF_MAX_SIZE SZ_32M
#define WATCHDOG_MSS_REDIRECT 32
#define WATCHDOG_NCE_REDIRECT 33
@@ -61,12 +56,14 @@ static struct {
{ IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v0.0.bin" },
{ IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v1.bin" },
{ IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v0.0.bin" },
+ { IVPU_HW_IP_60XX, "intel/vpu/vpu_60xx_v1.bin" },
};
/* Production fw_names from the table above */
MODULE_FIRMWARE("intel/vpu/vpu_37xx_v1.bin");
MODULE_FIRMWARE("intel/vpu/vpu_40xx_v1.bin");
MODULE_FIRMWARE("intel/vpu/vpu_50xx_v1.bin");
+MODULE_FIRMWARE("intel/vpu/vpu_60xx_v1.bin");
static int ivpu_fw_request(struct ivpu_device *vdev)
{
@@ -131,9 +128,14 @@ ivpu_fw_check_api_ver_lt(struct ivpu_device *vdev, const struct vpu_firmware_hea
return false;
}
-static bool is_within_range(u64 addr, size_t size, u64 range_start, size_t range_size)
+bool ivpu_is_within_range(u64 addr, size_t size, struct ivpu_addr_range *range)
{
- if (addr < range_start || addr + size > range_start + range_size)
+ u64 addr_end;
+
+ if (!range || check_add_overflow(addr, size, &addr_end))
+ return false;
+
+ if (addr < range->start || addr_end > range->end)
return false;
return true;
@@ -142,6 +144,12 @@ static bool is_within_range(u64 addr, size_t size, u64 range_start, size_t range
static u32
ivpu_fw_sched_mode_select(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr)
{
+ if (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_60XX &&
+ ivpu_sched_mode == VPU_SCHEDULING_MODE_OS) {
+ ivpu_warn(vdev, "OS sched mode is not supported, using HW mode\n");
+ return VPU_SCHEDULING_MODE_HW;
+ }
+
if (ivpu_sched_mode != IVPU_SCHED_MODE_AUTO)
return ivpu_sched_mode;
@@ -151,11 +159,56 @@ ivpu_fw_sched_mode_select(struct ivpu_device *vdev, const struct vpu_firmware_he
return VPU_SCHEDULING_MODE_HW;
}
+static void
+ivpu_preemption_config_parse(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr)
+{
+ struct ivpu_fw_info *fw = vdev->fw;
+ u32 primary_preempt_buf_size, secondary_preempt_buf_size;
+
+ if (fw_hdr->preemption_buffer_1_max_size)
+ primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size;
+ else
+ primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size;
+
+ if (fw_hdr->preemption_buffer_2_max_size)
+ secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size;
+ else
+ secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size;
+
+ ivpu_dbg(vdev, FW_BOOT, "Preemption buffer size, primary: %u, secondary: %u\n",
+ primary_preempt_buf_size, secondary_preempt_buf_size);
+
+ if (primary_preempt_buf_size < FW_PREEMPT_BUF_MIN_SIZE ||
+ secondary_preempt_buf_size < FW_PREEMPT_BUF_MIN_SIZE) {
+ ivpu_warn(vdev, "Preemption buffers size too small\n");
+ return;
+ }
+
+ if (primary_preempt_buf_size > FW_PREEMPT_BUF_MAX_SIZE ||
+ secondary_preempt_buf_size > FW_PREEMPT_BUF_MAX_SIZE) {
+ ivpu_warn(vdev, "Preemption buffers size too big\n");
+ return;
+ }
+
+ if (fw->sched_mode != VPU_SCHEDULING_MODE_HW)
+ return;
+
+ if (ivpu_test_mode & IVPU_TEST_MODE_MIP_DISABLE)
+ return;
+
+ vdev->fw->primary_preempt_buf_size = ALIGN(primary_preempt_buf_size, PAGE_SIZE);
+ vdev->fw->secondary_preempt_buf_size = ALIGN(secondary_preempt_buf_size, PAGE_SIZE);
+}
+
static int ivpu_fw_parse(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
const struct vpu_firmware_header *fw_hdr = (const void *)fw->file->data;
- u64 runtime_addr, image_load_addr, runtime_size, image_size;
+ struct ivpu_addr_range fw_image_range;
+ u64 boot_params_addr, boot_params_size;
+ u64 fw_version_addr, fw_version_size;
+ u64 runtime_addr, runtime_size;
+ u64 image_load_addr, image_size;
if (fw->file->size <= FW_FILE_IMAGE_OFFSET) {
ivpu_err(vdev, "Firmware file is too small: %zu\n", fw->file->size);
@@ -167,18 +220,37 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
return -EINVAL;
}
- runtime_addr = fw_hdr->boot_params_load_address;
- runtime_size = fw_hdr->runtime_size;
- image_load_addr = fw_hdr->image_load_address;
- image_size = fw_hdr->image_size;
+ boot_params_addr = fw_hdr->boot_params_load_address;
+ boot_params_size = SZ_4K;
- if (runtime_addr < FW_RUNTIME_MIN_ADDR || runtime_addr > FW_RUNTIME_MAX_ADDR) {
- ivpu_err(vdev, "Invalid firmware runtime address: 0x%llx\n", runtime_addr);
+ if (!ivpu_is_within_range(boot_params_addr, boot_params_size, &vdev->hw->ranges.runtime)) {
+ ivpu_err(vdev, "Invalid boot params address: 0x%llx\n", boot_params_addr);
return -EINVAL;
}
- if (runtime_size < fw->file->size || runtime_size > FW_RUNTIME_MAX_SIZE) {
- ivpu_err(vdev, "Invalid firmware runtime size: %llu\n", runtime_size);
+ fw_version_addr = fw_hdr->firmware_version_load_address;
+ fw_version_size = ALIGN(fw_hdr->firmware_version_size, SZ_4K);
+
+ if (fw_version_size != SZ_4K) {
+ ivpu_err(vdev, "Invalid firmware version size: %u\n",
+ fw_hdr->firmware_version_size);
+ return -EINVAL;
+ }
+
+ if (!ivpu_is_within_range(fw_version_addr, fw_version_size, &vdev->hw->ranges.runtime)) {
+ ivpu_err(vdev, "Invalid firmware version address: 0x%llx\n", fw_version_addr);
+ return -EINVAL;
+ }
+
+ runtime_addr = fw_hdr->image_load_address;
+ runtime_size = fw_hdr->runtime_size - boot_params_size - fw_version_size;
+
+ image_load_addr = fw_hdr->image_load_address;
+ image_size = fw_hdr->image_size;
+
+ if (!ivpu_is_within_range(runtime_addr, runtime_size, &vdev->hw->ranges.runtime)) {
+ ivpu_err(vdev, "Invalid firmware runtime address: 0x%llx and size %llu\n",
+ runtime_addr, runtime_size);
return -EINVAL;
}
@@ -187,23 +259,25 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
return -EINVAL;
}
- if (image_load_addr < runtime_addr ||
- image_load_addr + image_size > runtime_addr + runtime_size) {
- ivpu_err(vdev, "Invalid firmware load address size: 0x%llx and size %llu\n",
+ if (!ivpu_is_within_range(image_load_addr, image_size, &vdev->hw->ranges.runtime)) {
+ ivpu_err(vdev, "Invalid firmware load address: 0x%llx and size %llu\n",
image_load_addr, image_size);
return -EINVAL;
}
- if (fw_hdr->shave_nn_fw_size > FW_SHAVE_NN_MAX_SIZE) {
- ivpu_err(vdev, "SHAVE NN firmware is too big: %u\n", fw_hdr->shave_nn_fw_size);
+ if (ivpu_hw_range_init(vdev, &fw_image_range, image_load_addr, image_size))
return -EINVAL;
- }
- if (fw_hdr->entry_point < image_load_addr ||
- fw_hdr->entry_point >= image_load_addr + image_size) {
+ if (!ivpu_is_within_range(fw_hdr->entry_point, SZ_4K, &fw_image_range)) {
ivpu_err(vdev, "Invalid entry point: 0x%llx\n", fw_hdr->entry_point);
return -EINVAL;
}
+
+ if (fw_hdr->shave_nn_fw_size > FW_SHAVE_NN_MAX_SIZE) {
+ ivpu_err(vdev, "SHAVE NN firmware is too big: %u\n", fw_hdr->shave_nn_fw_size);
+ return -EINVAL;
+ }
+
ivpu_dbg(vdev, FW_BOOT, "Header version: 0x%x, format 0x%x\n",
fw_hdr->header_version, fw_hdr->image_format);
@@ -217,6 +291,10 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, JSM, 3))
return -EINVAL;
+ fw->boot_params_addr = boot_params_addr;
+ fw->boot_params_size = boot_params_size;
+ fw->fw_version_addr = fw_version_addr;
+ fw->fw_version_size = fw_version_size;
fw->runtime_addr = runtime_addr;
fw->runtime_size = runtime_size;
fw->image_load_offset = image_load_addr - runtime_addr;
@@ -235,22 +313,13 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr);
ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS");
- if (fw_hdr->preemption_buffer_1_max_size)
- fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size;
- else
- fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size;
+ ivpu_preemption_config_parse(vdev, fw_hdr);
+ ivpu_dbg(vdev, FW_BOOT, "Mid-inference preemption %s supported\n",
+ ivpu_fw_preempt_buf_size(vdev) ? "is" : "is not");
- if (fw_hdr->preemption_buffer_2_max_size)
- fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size;
- else
- fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size;
- ivpu_dbg(vdev, FW_BOOT, "Preemption buffer sizes: primary %u, secondary %u\n",
- fw->primary_preempt_buf_size, fw->secondary_preempt_buf_size);
-
- if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address,
- fw_hdr->ro_section_size,
- fw_hdr->image_load_address,
- fw_hdr->image_size)) {
+ if (fw_hdr->ro_section_start_address &&
+ !ivpu_is_within_range(fw_hdr->ro_section_start_address, fw_hdr->ro_section_size,
+ &fw_image_range)) {
ivpu_err(vdev, "Invalid read-only section: start address 0x%llx, size %u\n",
fw_hdr->ro_section_start_address, fw_hdr->ro_section_size);
return -EINVAL;
@@ -259,12 +328,18 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
fw->read_only_addr = fw_hdr->ro_section_start_address;
fw->read_only_size = fw_hdr->ro_section_size;
- ivpu_dbg(vdev, FW_BOOT, "Size: file %lu image %u runtime %u shavenn %u\n",
- fw->file->size, fw->image_size, fw->runtime_size, fw->shave_nn_size);
- ivpu_dbg(vdev, FW_BOOT, "Address: runtime 0x%llx, load 0x%llx, entry point 0x%llx\n",
- fw->runtime_addr, image_load_addr, fw->entry_point);
+ ivpu_dbg(vdev, FW_BOOT, "Boot params: address 0x%llx, size %llu\n",
+ fw->boot_params_addr, fw->boot_params_size);
+ ivpu_dbg(vdev, FW_BOOT, "FW version: address 0x%llx, size %llu\n",
+ fw->fw_version_addr, fw->fw_version_size);
+ ivpu_dbg(vdev, FW_BOOT, "Runtime: address 0x%llx, size %u\n",
+ fw->runtime_addr, fw->runtime_size);
+ ivpu_dbg(vdev, FW_BOOT, "Image load offset: 0x%llx, size %u\n",
+ fw->image_load_offset, fw->image_size);
ivpu_dbg(vdev, FW_BOOT, "Read-only section: address 0x%llx, size %u\n",
fw->read_only_addr, fw->read_only_size);
+ ivpu_dbg(vdev, FW_BOOT, "FW entry point: 0x%llx\n", fw->entry_point);
+ ivpu_dbg(vdev, FW_BOOT, "SHAVE NN size: %u\n", fw->shave_nn_size);
return 0;
}
@@ -291,39 +366,33 @@ ivpu_fw_init_wa(struct ivpu_device *vdev)
IVPU_PRINT_WA(disable_d0i3_msg);
}
-static int ivpu_fw_update_global_range(struct ivpu_device *vdev)
-{
- struct ivpu_fw_info *fw = vdev->fw;
- u64 start = ALIGN(fw->runtime_addr + fw->runtime_size, FW_SHARED_MEM_ALIGNMENT);
- u64 size = FW_SHARED_MEM_SIZE;
-
- if (start + size > FW_GLOBAL_MEM_END) {
- ivpu_err(vdev, "No space for shared region, start %lld, size %lld\n", start, size);
- return -EINVAL;
- }
-
- ivpu_hw_range_init(&vdev->hw->ranges.global, start, size);
- return 0;
-}
-
static int ivpu_fw_mem_init(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
- struct ivpu_addr_range fw_range;
int log_verb_size;
int ret;
- ret = ivpu_fw_update_global_range(vdev);
- if (ret)
- return ret;
+ fw->mem_bp = ivpu_bo_create_runtime(vdev, fw->boot_params_addr, fw->boot_params_size,
+ DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
+ if (!fw->mem_bp) {
+ ivpu_err(vdev, "Failed to create firmware boot params memory buffer\n");
+ return -ENOMEM;
+ }
- fw_range.start = fw->runtime_addr;
- fw_range.end = fw->runtime_addr + fw->runtime_size;
- fw->mem = ivpu_bo_create(vdev, &vdev->gctx, &fw_range, fw->runtime_size,
- DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
+ fw->mem_fw_ver = ivpu_bo_create_runtime(vdev, fw->fw_version_addr, fw->fw_version_size,
+ DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
+ if (!fw->mem_fw_ver) {
+ ivpu_err(vdev, "Failed to create firmware version memory buffer\n");
+ ret = -ENOMEM;
+ goto err_free_bp;
+ }
+
+ fw->mem = ivpu_bo_create_runtime(vdev, fw->runtime_addr, fw->runtime_size,
+ DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
if (!fw->mem) {
ivpu_err(vdev, "Failed to create firmware runtime memory buffer\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err_free_fw_ver;
}
ret = ivpu_mmu_context_set_pages_ro(vdev, &vdev->gctx, fw->read_only_addr,
@@ -372,6 +441,10 @@ err_free_log_crit:
ivpu_bo_free(fw->mem_log_crit);
err_free_fw_mem:
ivpu_bo_free(fw->mem);
+err_free_fw_ver:
+ ivpu_bo_free(fw->mem_fw_ver);
+err_free_bp:
+ ivpu_bo_free(fw->mem_bp);
return ret;
}
@@ -387,10 +460,14 @@ static void ivpu_fw_mem_fini(struct ivpu_device *vdev)
ivpu_bo_free(fw->mem_log_verb);
ivpu_bo_free(fw->mem_log_crit);
ivpu_bo_free(fw->mem);
+ ivpu_bo_free(fw->mem_fw_ver);
+ ivpu_bo_free(fw->mem_bp);
fw->mem_log_verb = NULL;
fw->mem_log_crit = NULL;
fw->mem = NULL;
+ fw->mem_fw_ver = NULL;
+ fw->mem_bp = NULL;
}
int ivpu_fw_init(struct ivpu_device *vdev)
@@ -483,11 +560,6 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
ivpu_dbg(vdev, FW_BOOT, "boot_params.cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg = 0x%x\n",
boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg);
- ivpu_dbg(vdev, FW_BOOT, "boot_params.global_memory_allocator_base = 0x%llx\n",
- boot_params->global_memory_allocator_base);
- ivpu_dbg(vdev, FW_BOOT, "boot_params.global_memory_allocator_size = 0x%x\n",
- boot_params->global_memory_allocator_size);
-
ivpu_dbg(vdev, FW_BOOT, "boot_params.shave_nn_fw_base = 0x%llx\n",
boot_params->shave_nn_fw_base);
@@ -495,10 +567,6 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
boot_params->watchdog_irq_mss);
ivpu_dbg(vdev, FW_BOOT, "boot_params.watchdog_irq_nce = 0x%x\n",
boot_params->watchdog_irq_nce);
- ivpu_dbg(vdev, FW_BOOT, "boot_params.host_to_vpu_irq = 0x%x\n",
- boot_params->host_to_vpu_irq);
- ivpu_dbg(vdev, FW_BOOT, "boot_params.job_done_irq = 0x%x\n",
- boot_params->job_done_irq);
ivpu_dbg(vdev, FW_BOOT, "boot_params.host_version_id = 0x%x\n",
boot_params->host_version_id);
@@ -546,6 +614,8 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
boot_params->system_time_us);
ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = 0x%x\n",
boot_params->power_profile);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_uses_ecc_mca_signal = 0x%x\n",
+ boot_params->vpu_uses_ecc_mca_signal);
}
void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
@@ -572,6 +642,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
return;
}
+ memset(boot_params, 0, sizeof(*boot_params));
vdev->pm->is_warmboot = false;
boot_params->magic = VPU_BOOT_PARAMS_MAGIC;
@@ -647,6 +718,8 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->d0i3_entry_vpu_ts = 0;
if (IVPU_WA(disable_d0i2))
boot_params->power_profile |= BIT(1);
+ boot_params->vpu_uses_ecc_mca_signal =
+ ivpu_hw_uses_ecc_mca_signal(vdev) ? VPU_BOOT_MCA_ECC_BOTH : 0;
boot_params->system_time_us = ktime_to_us(ktime_get_real());
wmb(); /* Flush WC buffers after writing bootparams */
diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h
index 7081913fb0dd..00945892b55e 100644
--- a/drivers/accel/ivpu/ivpu_fw.h
+++ b/drivers/accel/ivpu/ivpu_fw.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_FW_H__
@@ -19,10 +19,16 @@ struct ivpu_fw_info {
const struct firmware *file;
const char *name;
char version[FW_VERSION_STR_SIZE];
+ struct ivpu_bo *mem_bp;
+ struct ivpu_bo *mem_fw_ver;
struct ivpu_bo *mem;
struct ivpu_bo *mem_shave_nn;
struct ivpu_bo *mem_log_crit;
struct ivpu_bo *mem_log_verb;
+ u64 boot_params_addr;
+ u64 boot_params_size;
+ u64 fw_version_addr;
+ u64 fw_version_size;
u64 runtime_addr;
u32 runtime_size;
u64 image_load_offset;
@@ -42,6 +48,7 @@ struct ivpu_fw_info {
u64 last_heartbeat;
};
+bool ivpu_is_within_range(u64 addr, size_t size, struct ivpu_addr_range *range);
int ivpu_fw_init(struct ivpu_device *vdev);
void ivpu_fw_fini(struct ivpu_device *vdev);
void ivpu_fw_load(struct ivpu_device *vdev);
@@ -52,4 +59,9 @@ static inline bool ivpu_fw_is_cold_boot(struct ivpu_device *vdev)
return vdev->fw->entry_point == vdev->fw->cold_boot_entry_point;
}
+static inline u32 ivpu_fw_preempt_buf_size(struct ivpu_device *vdev)
+{
+ return vdev->fw->primary_preempt_buf_size + vdev->fw->secondary_preempt_buf_size;
+}
+
#endif /* __IVPU_FW_H__ */
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index 59cfcf3eaded..ece68f570b7e 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -15,6 +15,7 @@
#include <drm/drm_utils.h>
#include "ivpu_drv.h"
+#include "ivpu_fw.h"
#include "ivpu_gem.h"
#include "ivpu_hw.h"
#include "ivpu_mmu.h"
@@ -27,8 +28,8 @@ static const struct drm_gem_object_funcs ivpu_gem_funcs;
static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, const char *action)
{
ivpu_dbg(vdev, BO,
- "%6s: bo %8p vpu_addr %9llx size %8zu ctx %d has_pages %d dma_mapped %d mmu_mapped %d wc %d imported %d\n",
- action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx_id,
+ "%6s: bo %8p size %9zu ctx %d vpu_addr %9llx pages %d sgt %d mmu_mapped %d wc %d imported %d\n",
+ action, bo, ivpu_bo_size(bo), bo->ctx_id, bo->vpu_addr,
(bool)bo->base.pages, (bool)bo->base.sgt, bo->mmu_mapped, bo->base.map_wc,
(bool)drm_gem_is_imported(&bo->base.base));
}
@@ -43,22 +44,47 @@ static inline void ivpu_bo_unlock(struct ivpu_bo *bo)
dma_resv_unlock(bo->base.base.resv);
}
+static struct sg_table *ivpu_bo_map_attachment(struct ivpu_device *vdev, struct ivpu_bo *bo)
+{
+ struct sg_table *sgt;
+
+ drm_WARN_ON(&vdev->drm, !bo->base.base.import_attach);
+
+ ivpu_bo_lock(bo);
+
+ sgt = bo->base.sgt;
+ if (!sgt) {
+ sgt = dma_buf_map_attachment(bo->base.base.import_attach, DMA_BIDIRECTIONAL);
+ if (IS_ERR(sgt))
+ ivpu_err(vdev, "Failed to map BO in IOMMU: %ld\n", PTR_ERR(sgt));
+ else
+ bo->base.sgt = sgt;
+ }
+
+ ivpu_bo_unlock(bo);
+
+ return sgt;
+}
+
/*
- * ivpu_bo_pin() - pin the backing physical pages and map them to VPU.
+ * ivpu_bo_bind() - pin the backing physical pages and map them to VPU.
*
* This function pins physical memory pages, then maps the physical pages
* to IOMMU address space and finally updates the VPU MMU page tables
* to allow the VPU to translate VPU address to IOMMU address.
*/
-int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
+int __must_check ivpu_bo_bind(struct ivpu_bo *bo)
{
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
struct sg_table *sgt;
int ret = 0;
- ivpu_dbg_bo(vdev, bo, "pin");
+ ivpu_dbg_bo(vdev, bo, "bind");
- sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
+ if (bo->base.base.import_attach)
+ sgt = ivpu_bo_map_attachment(vdev, bo);
+ else
+ sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
if (IS_ERR(sgt)) {
ret = PTR_ERR(sgt);
ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
@@ -70,7 +96,7 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
if (!bo->mmu_mapped) {
drm_WARN_ON(&vdev->drm, !bo->ctx);
ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, sgt,
- ivpu_bo_is_snooped(bo));
+ ivpu_bo_is_snooped(bo), ivpu_bo_is_read_only(bo));
if (ret) {
ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret);
goto unlock;
@@ -99,9 +125,9 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
ret = ivpu_mmu_context_insert_node(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
if (!ret) {
bo->ctx = ctx;
+ bo->ctx_id = ctx->id;
bo->vpu_addr = bo->mm_node.start;
- } else {
- ivpu_err(vdev, "Failed to add BO to context %u: %d\n", ctx->id, ret);
+ ivpu_dbg_bo(vdev, bo, "vaddr");
}
ivpu_bo_unlock(bo);
@@ -115,7 +141,7 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
{
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
- lockdep_assert(dma_resv_held(bo->base.base.resv) || !kref_read(&bo->base.base.refcount));
+ dma_resv_assert_held(bo->base.base.resv);
if (bo->mmu_mapped) {
drm_WARN_ON(&vdev->drm, !bo->ctx);
@@ -130,13 +156,15 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
bo->ctx = NULL;
}
- if (drm_gem_is_imported(&bo->base.base))
- return;
-
if (bo->base.sgt) {
- dma_unmap_sgtable(vdev->drm.dev, bo->base.sgt, DMA_BIDIRECTIONAL, 0);
- sg_free_table(bo->base.sgt);
- kfree(bo->base.sgt);
+ if (bo->base.base.import_attach) {
+ dma_buf_unmap_attachment(bo->base.base.import_attach,
+ bo->base.sgt, DMA_BIDIRECTIONAL);
+ } else {
+ dma_unmap_sgtable(vdev->drm.dev, bo->base.sgt, DMA_BIDIRECTIONAL, 0);
+ sg_free_table(bo->base.sgt);
+ kfree(bo->base.sgt);
+ }
bo->base.sgt = NULL;
}
}
@@ -182,10 +210,11 @@ struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t siz
struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf)
{
+ struct ivpu_device *vdev = to_ivpu_device(dev);
struct device *attach_dev = dev->dev;
struct dma_buf_attachment *attach;
- struct sg_table *sgt;
struct drm_gem_object *obj;
+ struct ivpu_bo *bo;
int ret;
attach = dma_buf_attach(dma_buf, attach_dev);
@@ -194,25 +223,25 @@ struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev,
get_dma_buf(dma_buf);
- sgt = dma_buf_map_attachment_unlocked(attach, DMA_BIDIRECTIONAL);
- if (IS_ERR(sgt)) {
- ret = PTR_ERR(sgt);
- goto fail_detach;
- }
-
- obj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt);
+ obj = drm_gem_shmem_prime_import_sg_table(dev, attach, NULL);
if (IS_ERR(obj)) {
ret = PTR_ERR(obj);
- goto fail_unmap;
+ goto fail_detach;
}
obj->import_attach = attach;
obj->resv = dma_buf->resv;
+ bo = to_ivpu_bo(obj);
+
+ mutex_lock(&vdev->bo_list_lock);
+ list_add_tail(&bo->bo_list_node, &vdev->bo_list);
+ mutex_unlock(&vdev->bo_list_lock);
+
+ ivpu_dbg(vdev, BO, "import: bo %8p size %9zu\n", bo, ivpu_bo_size(bo));
+
return obj;
-fail_unmap:
- dma_buf_unmap_attachment_unlocked(attach, sgt, DMA_BIDIRECTIONAL);
fail_detach:
dma_buf_detach(dma_buf, attach);
dma_buf_put(dma_buf);
@@ -220,7 +249,7 @@ fail_detach:
return ERR_PTR(ret);
}
-static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags, u32 ctx_id)
+static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags)
{
struct drm_gem_shmem_object *shmem;
struct ivpu_bo *bo;
@@ -238,7 +267,6 @@ static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 fla
return ERR_CAST(shmem);
bo = to_ivpu_bo(&shmem->base);
- bo->ctx_id = ctx_id;
bo->base.map_wc = flags & DRM_IVPU_BO_WC;
bo->flags = flags;
@@ -246,7 +274,7 @@ static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 fla
list_add_tail(&bo->bo_list_node, &vdev->bo_list);
mutex_unlock(&vdev->bo_list_lock);
- ivpu_dbg_bo(vdev, bo, "alloc");
+ ivpu_dbg(vdev, BO, " alloc: bo %8p size %9llu\n", bo, size);
return bo;
}
@@ -259,8 +287,8 @@ static int ivpu_gem_bo_open(struct drm_gem_object *obj, struct drm_file *file)
struct ivpu_addr_range *range;
if (bo->ctx) {
- ivpu_warn(vdev, "Can't add BO to ctx %u: already in ctx %u\n",
- file_priv->ctx.id, bo->ctx->id);
+ ivpu_dbg(vdev, IOCTL, "Can't add BO %pe to ctx %u: already in ctx %u\n",
+ bo, file_priv->ctx.id, bo->ctx->id);
return -EALREADY;
}
@@ -281,23 +309,41 @@ static void ivpu_gem_bo_free(struct drm_gem_object *obj)
ivpu_dbg_bo(vdev, bo, "free");
+ drm_WARN_ON(&vdev->drm, list_empty(&bo->bo_list_node));
+
mutex_lock(&vdev->bo_list_lock);
list_del(&bo->bo_list_node);
- mutex_unlock(&vdev->bo_list_lock);
drm_WARN_ON(&vdev->drm, !drm_gem_is_imported(&bo->base.base) &&
!dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
drm_WARN_ON(&vdev->drm, ivpu_bo_size(bo) == 0);
drm_WARN_ON(&vdev->drm, bo->base.vaddr);
+ ivpu_bo_lock(bo);
ivpu_bo_unbind_locked(bo);
+ ivpu_bo_unlock(bo);
+
+ mutex_unlock(&vdev->bo_list_lock);
+
drm_WARN_ON(&vdev->drm, bo->mmu_mapped);
drm_WARN_ON(&vdev->drm, bo->ctx);
drm_WARN_ON(obj->dev, refcount_read(&bo->base.pages_use_count) > 1);
+ drm_WARN_ON(obj->dev, bo->base.base.vma_node.vm_files.rb_node);
drm_gem_shmem_free(&bo->base);
}
+static enum drm_gem_object_status ivpu_gem_status(struct drm_gem_object *obj)
+{
+ struct ivpu_bo *bo = to_ivpu_bo(obj);
+ enum drm_gem_object_status status = 0;
+
+ if (ivpu_bo_is_resident(bo))
+ status |= DRM_GEM_OBJECT_RESIDENT;
+
+ return status;
+}
+
static const struct drm_gem_object_funcs ivpu_gem_funcs = {
.free = ivpu_gem_bo_free,
.open = ivpu_gem_bo_open,
@@ -308,6 +354,7 @@ static const struct drm_gem_object_funcs ivpu_gem_funcs = {
.vmap = drm_gem_shmem_object_vmap,
.vunmap = drm_gem_shmem_object_vunmap,
.mmap = drm_gem_shmem_object_mmap,
+ .status = ivpu_gem_status,
.vm_ops = &drm_gem_shmem_vm_ops,
};
@@ -320,25 +367,33 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
struct ivpu_bo *bo;
int ret;
- if (args->flags & ~DRM_IVPU_BO_FLAGS)
+ if (args->flags & ~DRM_IVPU_BO_FLAGS) {
+ ivpu_dbg(vdev, IOCTL, "Invalid BO flags 0x%x\n", args->flags);
return -EINVAL;
+ }
- if (size == 0)
+ if (size == 0) {
+ ivpu_dbg(vdev, IOCTL, "Invalid BO size %llu\n", args->size);
return -EINVAL;
+ }
- bo = ivpu_bo_alloc(vdev, size, args->flags, file_priv->ctx.id);
+ bo = ivpu_bo_alloc(vdev, size, args->flags);
if (IS_ERR(bo)) {
- ivpu_err(vdev, "Failed to allocate BO: %pe (ctx %u size %llu flags 0x%x)",
+ ivpu_dbg(vdev, IOCTL, "Failed to allocate BO: %pe ctx %u size %llu flags 0x%x\n",
bo, file_priv->ctx.id, args->size, args->flags);
return PTR_ERR(bo);
}
+ drm_WARN_ON(&vdev->drm, bo->base.base.handle_count != 0);
+
ret = drm_gem_handle_create(file, &bo->base.base, &args->handle);
- if (ret)
- ivpu_err(vdev, "Failed to create handle for BO: %pe (ctx %u size %llu flags 0x%x)",
+ if (ret) {
+ ivpu_dbg(vdev, IOCTL, "Failed to create handle for BO: %pe ctx %u size %llu flags 0x%x\n",
bo, file_priv->ctx.id, args->size, args->flags);
- else
+ } else {
args->vpu_addr = bo->vpu_addr;
+ drm_WARN_ON(&vdev->drm, bo->base.base.handle_count != 1);
+ }
drm_gem_object_put(&bo->base.base);
@@ -360,18 +415,21 @@ ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(range->end));
drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size));
- bo = ivpu_bo_alloc(vdev, size, flags, IVPU_GLOBAL_CONTEXT_MMU_SSID);
+ bo = ivpu_bo_alloc(vdev, size, flags);
if (IS_ERR(bo)) {
- ivpu_err(vdev, "Failed to allocate BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
+ ivpu_err(vdev, "Failed to allocate BO: %pe vpu_addr 0x%llx size %llu flags 0x%x\n",
bo, range->start, size, flags);
return NULL;
}
ret = ivpu_bo_alloc_vpu_addr(bo, ctx, range);
- if (ret)
+ if (ret) {
+ ivpu_err(vdev, "Failed to allocate NPU address for BO: %pe ctx %u size %llu: %d\n",
+ bo, ctx->id, size, ret);
goto err_put;
+ }
- ret = ivpu_bo_pin(bo);
+ ret = ivpu_bo_bind(bo);
if (ret)
goto err_put;
@@ -391,6 +449,21 @@ err_put:
return NULL;
}
+struct ivpu_bo *ivpu_bo_create_runtime(struct ivpu_device *vdev, u64 addr, u64 size, u32 flags)
+{
+ struct ivpu_addr_range range;
+
+ if (!ivpu_is_within_range(addr, size, &vdev->hw->ranges.runtime)) {
+ ivpu_err(vdev, "Invalid runtime BO address 0x%llx size %llu\n", addr, size);
+ return NULL;
+ }
+
+ if (ivpu_hw_range_init(vdev, &range, addr, size))
+ return NULL;
+
+ return ivpu_bo_create(vdev, &vdev->gctx, &range, size, flags);
+}
+
struct ivpu_bo *ivpu_bo_create_global(struct ivpu_device *vdev, u64 size, u32 flags)
{
return ivpu_bo_create(vdev, &vdev->gctx, &vdev->hw->ranges.global, size, flags);
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
index aa8ff14f7aae..0c3350f22b55 100644
--- a/drivers/accel/ivpu/ivpu_gem.h
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_GEM_H__
#define __IVPU_GEM_H__
@@ -24,19 +24,22 @@ struct ivpu_bo {
bool mmu_mapped;
};
-int ivpu_bo_pin(struct ivpu_bo *bo);
+int ivpu_bo_bind(struct ivpu_bo *bo);
void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size);
struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
struct ivpu_bo *ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
struct ivpu_addr_range *range, u64 size, u32 flags);
+struct ivpu_bo *ivpu_bo_create_runtime(struct ivpu_device *vdev, u64 addr, u64 size, u32 flags);
struct ivpu_bo *ivpu_bo_create_global(struct ivpu_device *vdev, u64 size, u32 flags);
void ivpu_bo_free(struct ivpu_bo *bo);
int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_bo_create_from_userptr_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p);
void ivpu_bo_list_print(struct drm_device *dev);
@@ -74,6 +77,16 @@ static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo)
return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED;
}
+static inline bool ivpu_bo_is_read_only(struct ivpu_bo *bo)
+{
+ return bo->flags & DRM_IVPU_BO_READ_ONLY;
+}
+
+static inline bool ivpu_bo_is_resident(struct ivpu_bo *bo)
+{
+ return !!bo->base.pages;
+}
+
static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr)
{
if (vpu_addr < bo->vpu_addr)
@@ -96,4 +109,9 @@ static inline u32 cpu_to_vpu_addr(struct ivpu_bo *bo, void *cpu_addr)
return bo->vpu_addr + (cpu_addr - ivpu_bo_vaddr(bo));
}
+static inline bool ivpu_bo_is_mappable(struct ivpu_bo *bo)
+{
+ return bo->flags & DRM_IVPU_BO_MAPPABLE;
+}
+
#endif /* __IVPU_GEM_H__ */
diff --git a/drivers/accel/ivpu/ivpu_gem_userptr.c b/drivers/accel/ivpu/ivpu_gem_userptr.c
new file mode 100644
index 000000000000..25ba606164c0
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_gem_userptr.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2025 Intel Corporation
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/err.h>
+#include <linux/highmem.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/capability.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_gem.h"
+
+static struct sg_table *
+ivpu_gem_userptr_dmabuf_map(struct dma_buf_attachment *attachment,
+ enum dma_data_direction direction)
+{
+ struct sg_table *sgt = attachment->dmabuf->priv;
+ int ret;
+
+ ret = dma_map_sgtable(attachment->dev, sgt, direction, DMA_ATTR_SKIP_CPU_SYNC);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return sgt;
+}
+
+static void ivpu_gem_userptr_dmabuf_unmap(struct dma_buf_attachment *attachment,
+ struct sg_table *sgt,
+ enum dma_data_direction direction)
+{
+ dma_unmap_sgtable(attachment->dev, sgt, direction, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+static void ivpu_gem_userptr_dmabuf_release(struct dma_buf *dma_buf)
+{
+ struct sg_table *sgt = dma_buf->priv;
+ struct sg_page_iter page_iter;
+ struct page *page;
+
+ for_each_sgtable_page(sgt, &page_iter, 0) {
+ page = sg_page_iter_page(&page_iter);
+ unpin_user_page(page);
+ }
+
+ sg_free_table(sgt);
+ kfree(sgt);
+}
+
+static const struct dma_buf_ops ivpu_gem_userptr_dmabuf_ops = {
+ .map_dma_buf = ivpu_gem_userptr_dmabuf_map,
+ .unmap_dma_buf = ivpu_gem_userptr_dmabuf_unmap,
+ .release = ivpu_gem_userptr_dmabuf_release,
+};
+
+static struct dma_buf *
+ivpu_create_userptr_dmabuf(struct ivpu_device *vdev, void __user *user_ptr,
+ size_t size, uint32_t flags)
+{
+ struct dma_buf_export_info exp_info = {};
+ struct dma_buf *dma_buf;
+ struct sg_table *sgt;
+ struct page **pages;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+ unsigned int gup_flags = FOLL_LONGTERM;
+ int ret, i, pinned;
+
+ /* Add FOLL_WRITE only if the BO is not read-only */
+ if (!(flags & DRM_IVPU_BO_READ_ONLY))
+ gup_flags |= FOLL_WRITE;
+
+ pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return ERR_PTR(-ENOMEM);
+
+ pinned = pin_user_pages_fast((unsigned long)user_ptr, nr_pages, gup_flags, pages);
+ if (pinned < 0) {
+ ret = pinned;
+ ivpu_dbg(vdev, IOCTL, "Failed to pin user pages: %d\n", ret);
+ goto free_pages_array;
+ }
+
+ if (pinned != nr_pages) {
+ ivpu_dbg(vdev, IOCTL, "Pinned %d pages, expected %lu\n", pinned, nr_pages);
+ ret = -EFAULT;
+ goto unpin_pages;
+ }
+
+ sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
+ if (!sgt) {
+ ret = -ENOMEM;
+ goto unpin_pages;
+ }
+
+ ret = sg_alloc_table_from_pages(sgt, pages, nr_pages, 0, size, GFP_KERNEL);
+ if (ret) {
+ ivpu_dbg(vdev, IOCTL, "Failed to create sg table: %d\n", ret);
+ goto free_sgt;
+ }
+
+ exp_info.exp_name = "ivpu_userptr_dmabuf";
+ exp_info.owner = THIS_MODULE;
+ exp_info.ops = &ivpu_gem_userptr_dmabuf_ops;
+ exp_info.size = size;
+ exp_info.flags = O_RDWR | O_CLOEXEC;
+ exp_info.priv = sgt;
+
+ dma_buf = dma_buf_export(&exp_info);
+ if (IS_ERR(dma_buf)) {
+ ret = PTR_ERR(dma_buf);
+ ivpu_dbg(vdev, IOCTL, "Failed to export userptr dma-buf: %d\n", ret);
+ goto free_sg_table;
+ }
+
+ kvfree(pages);
+ return dma_buf;
+
+free_sg_table:
+ sg_free_table(sgt);
+free_sgt:
+ kfree(sgt);
+unpin_pages:
+ for (i = 0; i < pinned; i++)
+ unpin_user_page(pages[i]);
+free_pages_array:
+ kvfree(pages);
+ return ERR_PTR(ret);
+}
+
+static struct ivpu_bo *
+ivpu_bo_create_from_userptr(struct ivpu_device *vdev, void __user *user_ptr,
+ size_t size, uint32_t flags)
+{
+ struct dma_buf *dma_buf;
+ struct drm_gem_object *obj;
+ struct ivpu_bo *bo;
+
+ dma_buf = ivpu_create_userptr_dmabuf(vdev, user_ptr, size, flags);
+ if (IS_ERR(dma_buf))
+ return ERR_CAST(dma_buf);
+
+ obj = ivpu_gem_prime_import(&vdev->drm, dma_buf);
+ if (IS_ERR(obj)) {
+ dma_buf_put(dma_buf);
+ return ERR_CAST(obj);
+ }
+
+ dma_buf_put(dma_buf);
+
+ bo = to_ivpu_bo(obj);
+ bo->flags = flags;
+
+ return bo;
+}
+
+int ivpu_bo_create_from_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct drm_ivpu_bo_create_from_userptr *args = data;
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = to_ivpu_device(dev);
+ void __user *user_ptr = u64_to_user_ptr(args->user_ptr);
+ struct ivpu_bo *bo;
+ int ret;
+
+ if (args->flags & ~(DRM_IVPU_BO_HIGH_MEM | DRM_IVPU_BO_DMA_MEM | DRM_IVPU_BO_READ_ONLY)) {
+ ivpu_dbg(vdev, IOCTL, "Invalid BO flags: 0x%x\n", args->flags);
+ return -EINVAL;
+ }
+
+ if (!args->user_ptr || !args->size) {
+ ivpu_dbg(vdev, IOCTL, "Userptr or size are zero: ptr %llx size %llu\n",
+ args->user_ptr, args->size);
+ return -EINVAL;
+ }
+
+ if (!PAGE_ALIGNED(args->user_ptr) || !PAGE_ALIGNED(args->size)) {
+ ivpu_dbg(vdev, IOCTL, "Userptr or size not page aligned: ptr %llx size %llu\n",
+ args->user_ptr, args->size);
+ return -EINVAL;
+ }
+
+ if (!access_ok(user_ptr, args->size)) {
+ ivpu_dbg(vdev, IOCTL, "Userptr is not accessible: ptr %llx size %llu\n",
+ args->user_ptr, args->size);
+ return -EFAULT;
+ }
+
+ bo = ivpu_bo_create_from_userptr(vdev, user_ptr, args->size, args->flags);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ ret = drm_gem_handle_create(file, &bo->base.base, &args->handle);
+ if (ret) {
+ ivpu_dbg(vdev, IOCTL, "Failed to create handle for BO: %pe ctx %u size %llu flags 0x%x\n",
+ bo, file_priv->ctx.id, args->size, args->flags);
+ } else {
+ ivpu_dbg(vdev, BO, "Created userptr BO: handle=%u vpu_addr=0x%llx size=%llu flags=0x%x\n",
+ args->handle, bo->vpu_addr, args->size, bo->flags);
+ args->vpu_addr = bo->vpu_addr;
+ }
+
+ drm_gem_object_put(&bo->base.base);
+
+ return ret;
+}
diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c
index 08dcc31b56f4..d69cd0d93569 100644
--- a/drivers/accel/ivpu/ivpu_hw.c
+++ b/drivers/accel/ivpu/ivpu_hw.c
@@ -8,6 +8,8 @@
#include "ivpu_hw_btrs.h"
#include "ivpu_hw_ip.h"
+#include <asm/msr-index.h>
+#include <asm/msr.h>
#include <linux/dmi.h>
#include <linux/fault-inject.h>
#include <linux/pm_runtime.h>
@@ -20,6 +22,10 @@ module_param_named_unsafe(fail_hw, ivpu_fail_hw, charp, 0444);
MODULE_PARM_DESC(fail_hw, "<interval>,<probability>,<space>,<times>");
#endif
+#define FW_SHARED_MEM_ALIGNMENT SZ_512K /* VPU MTRR limitation */
+
+#define ECC_MCA_SIGNAL_ENABLE_MASK 0xff
+
static char *platform_to_str(u32 platform)
{
switch (platform) {
@@ -147,19 +153,39 @@ static void priority_bands_init(struct ivpu_device *vdev)
vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME] = 200000;
}
+int ivpu_hw_range_init(struct ivpu_device *vdev, struct ivpu_addr_range *range, u64 start, u64 size)
+{
+ u64 end;
+
+ if (!range || check_add_overflow(start, size, &end)) {
+ ivpu_err(vdev, "Invalid range: start 0x%llx size %llu\n", start, size);
+ return -EINVAL;
+ }
+
+ range->start = start;
+ range->end = end;
+
+ return 0;
+}
+
static void memory_ranges_init(struct ivpu_device *vdev)
{
if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) {
- ivpu_hw_range_init(&vdev->hw->ranges.global, 0x80000000, SZ_512M);
- ivpu_hw_range_init(&vdev->hw->ranges.user, 0x88000000, 511 * SZ_1M);
- ivpu_hw_range_init(&vdev->hw->ranges.shave, 0x180000000, SZ_2G);
- ivpu_hw_range_init(&vdev->hw->ranges.dma, 0x200000000, SZ_128G);
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.runtime, 0x84800000, SZ_64M);
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.global, 0x90000000, SZ_256M);
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.user, 0xa0000000, 511 * SZ_1M);
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.shave, 0x180000000, SZ_2G);
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.dma, 0x200000000, SZ_128G);
} else {
- ivpu_hw_range_init(&vdev->hw->ranges.global, 0x80000000, SZ_512M);
- ivpu_hw_range_init(&vdev->hw->ranges.shave, 0x80000000, SZ_2G);
- ivpu_hw_range_init(&vdev->hw->ranges.user, 0x100000000, SZ_256G);
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.runtime, 0x80000000, SZ_64M);
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.global, 0x90000000, SZ_256M);
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.shave, 0x80000000, SZ_2G);
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.user, 0x100000000, SZ_256G);
vdev->hw->ranges.dma = vdev->hw->ranges.user;
}
+
+ drm_WARN_ON(&vdev->drm, !IS_ALIGNED(vdev->hw->ranges.global.start,
+ FW_SHARED_MEM_ALIGNMENT));
}
static int wp_enable(struct ivpu_device *vdev)
@@ -373,3 +399,22 @@ irqreturn_t ivpu_hw_irq_handler(int irq, void *ptr)
pm_runtime_mark_last_busy(vdev->drm.dev);
return IRQ_HANDLED;
}
+
+bool ivpu_hw_uses_ecc_mca_signal(struct ivpu_device *vdev)
+{
+ unsigned long long msr_integrity_caps;
+ int ret;
+
+ if (ivpu_hw_ip_gen(vdev) < IVPU_HW_IP_50XX)
+ return false;
+
+ ret = rdmsrq_safe(MSR_INTEGRITY_CAPS, &msr_integrity_caps);
+ if (ret) {
+ ivpu_warn(vdev, "Error reading MSR_INTEGRITY_CAPS: %d", ret);
+ return false;
+ }
+
+ ivpu_dbg(vdev, MISC, "MSR_INTEGRITY_CAPS: 0x%llx\n", msr_integrity_caps);
+
+ return msr_integrity_caps & ECC_MCA_SIGNAL_ENABLE_MASK;
+}
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index d79668fe1609..b6d0f0d0dccc 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -21,6 +21,7 @@ struct ivpu_hw_info {
bool (*ip_irq_handler)(struct ivpu_device *vdev, int irq);
} irq;
struct {
+ struct ivpu_addr_range runtime;
struct ivpu_addr_range global;
struct ivpu_addr_range user;
struct ivpu_addr_range shave;
@@ -51,6 +52,8 @@ struct ivpu_hw_info {
};
int ivpu_hw_init(struct ivpu_device *vdev);
+int ivpu_hw_range_init(struct ivpu_device *vdev, struct ivpu_addr_range *range, u64 start,
+ u64 size);
int ivpu_hw_power_up(struct ivpu_device *vdev);
int ivpu_hw_power_down(struct ivpu_device *vdev);
int ivpu_hw_reset(struct ivpu_device *vdev);
@@ -60,6 +63,7 @@ void ivpu_irq_handlers_init(struct ivpu_device *vdev);
void ivpu_hw_irq_enable(struct ivpu_device *vdev);
void ivpu_hw_irq_disable(struct ivpu_device *vdev);
irqreturn_t ivpu_hw_irq_handler(int irq, void *ptr);
+bool ivpu_hw_uses_ecc_mca_signal(struct ivpu_device *vdev);
static inline u32 ivpu_hw_btrs_irq_handler(struct ivpu_device *vdev, int irq)
{
@@ -71,12 +75,6 @@ static inline u32 ivpu_hw_ip_irq_handler(struct ivpu_device *vdev, int irq)
return vdev->hw->irq.ip_irq_handler(vdev, irq);
}
-static inline void ivpu_hw_range_init(struct ivpu_addr_range *range, u64 start, u64 size)
-{
- range->start = start;
- range->end = start + size;
-}
-
static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range)
{
return range->end - range->start;
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c
index afdb3b2aa72a..06e65c592618 100644
--- a/drivers/accel/ivpu/ivpu_hw_btrs.c
+++ b/drivers/accel/ivpu/ivpu_hw_btrs.c
@@ -321,6 +321,14 @@ static int wait_for_pll_lock(struct ivpu_device *vdev, bool enable)
return REGB_POLL_FLD(VPU_HW_BTRS_MTL_PLL_STATUS, LOCK, exp_val, PLL_TIMEOUT_US);
}
+static int wait_for_cdyn_deassert(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return 0;
+
+ return REGB_POLL_FLD(VPU_HW_BTRS_LNL_CDYN, CDYN, 0, PLL_TIMEOUT_US);
+}
+
int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable)
{
struct wp_request wp;
@@ -354,6 +362,14 @@ int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable)
return ret;
}
+ if (!enable) {
+ ret = wait_for_cdyn_deassert(vdev);
+ if (ret) {
+ ivpu_err(vdev, "Timed out waiting for CDYN deassert\n");
+ return ret;
+ }
+ }
+
return 0;
}
@@ -673,7 +689,7 @@ bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq)
if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, SURV_ERR, status)) {
ivpu_dbg(vdev, IRQ, "Survivability IRQ\n");
- queue_work(system_wq, &vdev->irq_dct_work);
+ queue_work(system_percpu_wq, &vdev->irq_dct_work);
}
if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) {
@@ -752,7 +768,7 @@ int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable)
}
}
-void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 active_percent)
+void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u8 active_percent)
{
u32 val = 0;
u32 cmd = enable ? DCT_ENABLE : DCT_DISABLE;
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h
index 032c384ac3d4..c4c10e22f30f 100644
--- a/drivers/accel/ivpu/ivpu_hw_btrs.h
+++ b/drivers/accel/ivpu/ivpu_hw_btrs.h
@@ -36,7 +36,7 @@ u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev);
bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq);
bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq);
int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable);
-void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 active_percent);
+void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u8 active_percent);
u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h b/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h
index fff2ef2cada6..a81a9ba540fa 100644
--- a/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h
+++ b/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h
@@ -74,6 +74,9 @@
#define VPU_HW_BTRS_LNL_PLL_FREQ 0x00000148u
#define VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK GENMASK(15, 0)
+#define VPU_HW_BTRS_LNL_CDYN 0x0000014cu
+#define VPU_HW_BTRS_LNL_CDYN_CDYN_MASK GENMASK(15, 0)
+
#define VPU_HW_BTRS_LNL_TILE_FUSE 0x00000150u
#define VPU_HW_BTRS_LNL_TILE_FUSE_VALID_MASK BIT_MASK(0)
#define VPU_HW_BTRS_LNL_TILE_FUSE_CONFIG_MASK GENMASK(6, 1)
diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c
index 2bf9882ab52e..06aa1e7dc50b 100644
--- a/drivers/accel/ivpu/ivpu_hw_ip.c
+++ b/drivers/accel/ivpu/ivpu_hw_ip.c
@@ -691,6 +691,13 @@ static void pwr_island_delay_set(struct ivpu_device *vdev)
status = high ? 46 : 3;
break;
+ case PCI_DEVICE_ID_NVL:
+ post = high ? 198 : 17;
+ post1 = 0;
+ post2 = high ? 198 : 17;
+ status = 0;
+ break;
+
default:
dump_stack();
ivpu_err(vdev, "Unknown device ID\n");
@@ -889,6 +896,9 @@ static int soc_cpu_drive_40xx(struct ivpu_device *vdev, bool enable)
static int soc_cpu_enable(struct ivpu_device *vdev)
{
+ if (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_60XX)
+ return 0;
+
return soc_cpu_drive_40xx(vdev, true);
}
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 5f00809d448a..1f13bf95b2b3 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -459,7 +459,7 @@ void ivpu_ipc_irq_handler(struct ivpu_device *vdev)
}
}
- queue_work(system_wq, &vdev->irq_ipc_work);
+ queue_work(system_percpu_wq, &vdev->irq_ipc_work);
}
void ivpu_ipc_irq_work_fn(struct work_struct *work)
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 060f1fc031d3..4f8564e2878a 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -34,22 +34,20 @@ static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq)
static int ivpu_preemption_buffers_create(struct ivpu_device *vdev,
struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
{
- u64 primary_size = ALIGN(vdev->fw->primary_preempt_buf_size, PAGE_SIZE);
- u64 secondary_size = ALIGN(vdev->fw->secondary_preempt_buf_size, PAGE_SIZE);
-
- if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW ||
- ivpu_test_mode & IVPU_TEST_MODE_MIP_DISABLE)
+ if (ivpu_fw_preempt_buf_size(vdev) == 0)
return 0;
cmdq->primary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.user,
- primary_size, DRM_IVPU_BO_WC);
+ vdev->fw->primary_preempt_buf_size,
+ DRM_IVPU_BO_WC);
if (!cmdq->primary_preempt_buf) {
ivpu_err(vdev, "Failed to create primary preemption buffer\n");
return -ENOMEM;
}
cmdq->secondary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.dma,
- secondary_size, DRM_IVPU_BO_WC);
+ vdev->fw->secondary_preempt_buf_size,
+ DRM_IVPU_BO_WC);
if (!cmdq->secondary_preempt_buf) {
ivpu_err(vdev, "Failed to create secondary preemption buffer\n");
goto err_free_primary;
@@ -66,20 +64,39 @@ err_free_primary:
static void ivpu_preemption_buffers_free(struct ivpu_device *vdev,
struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
{
- if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW)
- return;
-
if (cmdq->primary_preempt_buf)
ivpu_bo_free(cmdq->primary_preempt_buf);
if (cmdq->secondary_preempt_buf)
ivpu_bo_free(cmdq->secondary_preempt_buf);
}
+static int ivpu_preemption_job_init(struct ivpu_device *vdev, struct ivpu_file_priv *file_priv,
+ struct ivpu_cmdq *cmdq, struct ivpu_job *job)
+{
+ int ret;
+
+ /* Use preemption buffer provided by the user space */
+ if (job->primary_preempt_buf)
+ return 0;
+
+ if (!cmdq->primary_preempt_buf) {
+ /* Allocate per command queue preemption buffers */
+ ret = ivpu_preemption_buffers_create(vdev, file_priv, cmdq);
+ if (ret)
+ return ret;
+ }
+
+ /* Use preemption buffers allocated by the kernel */
+ job->primary_preempt_buf = cmdq->primary_preempt_buf;
+ job->secondary_preempt_buf = cmdq->secondary_preempt_buf;
+
+ return 0;
+}
+
static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv)
{
struct ivpu_device *vdev = file_priv->vdev;
struct ivpu_cmdq *cmdq;
- int ret;
cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL);
if (!cmdq)
@@ -89,10 +106,6 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv)
if (!cmdq->mem)
goto err_free_cmdq;
- ret = ivpu_preemption_buffers_create(vdev, file_priv, cmdq);
- if (ret)
- ivpu_warn(vdev, "Failed to allocate preemption buffers, preemption limited\n");
-
return cmdq;
err_free_cmdq:
@@ -219,11 +232,13 @@ static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *
ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id,
cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem));
- if (!ret)
+ if (!ret) {
ivpu_dbg(vdev, JOB, "DB %d registered to cmdq %d ctx %d priority %d\n",
cmdq->db_id, cmdq->id, file_priv->ctx.id, cmdq->priority);
- else
+ } else {
xa_erase(&vdev->db_xa, cmdq->db_id);
+ cmdq->db_id = 0;
+ }
return ret;
}
@@ -333,7 +348,7 @@ static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u32
cmdq = xa_load(&file_priv->cmdq_xa, cmdq_id);
if (!cmdq) {
- ivpu_warn_ratelimited(vdev, "Failed to find command queue with ID: %u\n", cmdq_id);
+ ivpu_dbg(vdev, IOCTL, "Failed to find command queue with ID: %u\n", cmdq_id);
return NULL;
}
@@ -427,17 +442,14 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION))
entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK;
- if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
- if (cmdq->primary_preempt_buf) {
- entry->primary_preempt_buf_addr = cmdq->primary_preempt_buf->vpu_addr;
- entry->primary_preempt_buf_size = ivpu_bo_size(cmdq->primary_preempt_buf);
- }
+ if (job->primary_preempt_buf) {
+ entry->primary_preempt_buf_addr = job->primary_preempt_buf->vpu_addr;
+ entry->primary_preempt_buf_size = ivpu_bo_size(job->primary_preempt_buf);
+ }
- if (cmdq->secondary_preempt_buf) {
- entry->secondary_preempt_buf_addr = cmdq->secondary_preempt_buf->vpu_addr;
- entry->secondary_preempt_buf_size =
- ivpu_bo_size(cmdq->secondary_preempt_buf);
- }
+ if (job->secondary_preempt_buf) {
+ entry->secondary_preempt_buf_addr = job->secondary_preempt_buf->vpu_addr;
+ entry->secondary_preempt_buf_size = ivpu_bo_size(job->secondary_preempt_buf);
}
wmb(); /* Ensure that tail is updated after filling entry */
@@ -522,7 +534,7 @@ ivpu_job_create(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count)
job->bo_count = bo_count;
job->done_fence = ivpu_fence_create(vdev);
if (!job->done_fence) {
- ivpu_warn_ratelimited(vdev, "Failed to create a fence\n");
+ ivpu_err(vdev, "Failed to create a fence\n");
goto err_free_job;
}
@@ -552,21 +564,26 @@ static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device *
return job;
}
-static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status)
+bool ivpu_job_handle_engine_error(struct ivpu_device *vdev, u32 job_id, u32 job_status)
{
- struct ivpu_job *job;
-
lockdep_assert_held(&vdev->submitted_jobs_lock);
- job = xa_load(&vdev->submitted_jobs_xa, job_id);
- if (!job)
- return -ENOENT;
+ switch (job_status) {
+ case VPU_JSM_STATUS_PROCESSING_ERR:
+ case VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MIN ... VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MAX:
+ {
+ struct ivpu_job *job = xa_load(&vdev->submitted_jobs_xa, job_id);
- if (job_status == VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW) {
+ if (!job)
+ return false;
+
+ /* Trigger an engine reset */
guard(mutex)(&job->file_priv->lock);
+ job->job_status = job_status;
+
if (job->file_priv->has_mmu_faults)
- return 0;
+ return false;
/*
* Mark context as faulty and defer destruction of the job to jobs abort thread
@@ -574,23 +591,43 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32
* status and ensure both are handled in the same way
*/
job->file_priv->has_mmu_faults = true;
- queue_work(system_wq, &vdev->context_abort_work);
- return 0;
+ queue_work(system_percpu_wq, &vdev->context_abort_work);
+ return true;
}
+ default:
+ /* Complete job with error status, engine reset not required */
+ break;
+ }
+
+ return false;
+}
- job = ivpu_job_remove_from_submitted_jobs(vdev, job_id);
+static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status)
+{
+ struct ivpu_job *job;
+
+ lockdep_assert_held(&vdev->submitted_jobs_lock);
+
+ job = xa_load(&vdev->submitted_jobs_xa, job_id);
if (!job)
return -ENOENT;
- if (job->file_priv->has_mmu_faults)
- job_status = DRM_IVPU_JOB_STATUS_ABORTED;
+ ivpu_job_remove_from_submitted_jobs(vdev, job_id);
- job->bos[CMD_BUF_IDX]->job_status = job_status;
+ if (job->job_status == VPU_JSM_STATUS_SUCCESS) {
+ if (job->file_priv->has_mmu_faults)
+ job->job_status = DRM_IVPU_JOB_STATUS_ABORTED;
+ else
+ job->job_status = job_status;
+ }
+
+ job->bos[CMD_BUF_IDX]->job_status = job->job_status;
dma_fence_signal(job->done_fence);
trace_job("done", job);
ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n",
- job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, job_status);
+ job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx,
+ job->job_status);
ivpu_job_destroy(job);
ivpu_stop_job_timeout_detection(vdev);
@@ -650,7 +687,6 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id)
else
cmdq = ivpu_cmdq_acquire(file_priv, cmdq_id);
if (!cmdq) {
- ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d\n", file_priv->ctx.id);
ret = -EINVAL;
goto err_unlock;
}
@@ -661,6 +697,13 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id)
goto err_unlock;
}
+ ret = ivpu_preemption_job_init(vdev, file_priv, cmdq, job);
+ if (ret) {
+ ivpu_err(vdev, "Failed to initialize preemption buffers for job %d: %d\n",
+ job->job_id, ret);
+ goto err_unlock;
+ }
+
job->cmdq_id = cmdq->id;
is_first_job = xa_empty(&vdev->submitted_jobs_xa);
@@ -714,7 +757,7 @@ err_unlock:
static int
ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 *buf_handles,
- u32 buf_count, u32 commands_offset)
+ u32 buf_count, u32 commands_offset, u32 preempt_buffer_index)
{
struct ivpu_file_priv *file_priv = job->file_priv;
struct ivpu_device *vdev = file_priv->vdev;
@@ -727,40 +770,58 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
for (i = 0; i < buf_count; i++) {
struct drm_gem_object *obj = drm_gem_object_lookup(file, buf_handles[i]);
- if (!obj)
+ if (!obj) {
+ ivpu_dbg(vdev, IOCTL, "Failed to lookup GEM object with handle %u\n",
+ buf_handles[i]);
return -ENOENT;
+ }
job->bos[i] = to_ivpu_bo(obj);
- ret = ivpu_bo_pin(job->bos[i]);
+ ret = ivpu_bo_bind(job->bos[i]);
if (ret)
return ret;
}
bo = job->bos[CMD_BUF_IDX];
if (!dma_resv_test_signaled(bo->base.base.resv, DMA_RESV_USAGE_READ)) {
- ivpu_warn(vdev, "Buffer is already in use\n");
+ ivpu_dbg(vdev, IOCTL, "Buffer is already in use by another job\n");
return -EBUSY;
}
if (commands_offset >= ivpu_bo_size(bo)) {
- ivpu_warn(vdev, "Invalid command buffer offset %u\n", commands_offset);
+ ivpu_dbg(vdev, IOCTL, "Invalid commands offset %u for buffer size %zu\n",
+ commands_offset, ivpu_bo_size(bo));
return -EINVAL;
}
job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset;
+ if (preempt_buffer_index) {
+ struct ivpu_bo *preempt_bo = job->bos[preempt_buffer_index];
+
+ if (ivpu_bo_size(preempt_bo) < ivpu_fw_preempt_buf_size(vdev)) {
+ ivpu_dbg(vdev, IOCTL, "Preemption buffer is too small\n");
+ return -EINVAL;
+ }
+ if (ivpu_bo_is_mappable(preempt_bo)) {
+ ivpu_dbg(vdev, IOCTL, "Preemption buffer cannot be mappable\n");
+ return -EINVAL;
+ }
+ job->primary_preempt_buf = preempt_bo;
+ }
+
ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count,
&acquire_ctx);
if (ret) {
- ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret);
+ ivpu_warn_ratelimited(vdev, "Failed to lock reservations: %d\n", ret);
return ret;
}
for (i = 0; i < buf_count; i++) {
ret = dma_resv_reserve_fences(job->bos[i]->base.base.resv, 1);
if (ret) {
- ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret);
+ ivpu_warn_ratelimited(vdev, "Failed to reserve fences: %d\n", ret);
goto unlock_reservations;
}
}
@@ -780,7 +841,7 @@ unlock_reservations:
static int ivpu_submit(struct drm_file *file, struct ivpu_file_priv *file_priv, u32 cmdq_id,
u32 buffer_count, u32 engine, void __user *buffers_ptr, u32 cmds_offset,
- u8 priority)
+ u32 preempt_buffer_index, u8 priority)
{
struct ivpu_device *vdev = file_priv->vdev;
struct ivpu_job *job;
@@ -807,16 +868,14 @@ static int ivpu_submit(struct drm_file *file, struct ivpu_file_priv *file_priv,
job = ivpu_job_create(file_priv, engine, buffer_count);
if (!job) {
- ivpu_err(vdev, "Failed to create job\n");
ret = -ENOMEM;
goto err_exit_dev;
}
- ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, buffer_count, cmds_offset);
- if (ret) {
- ivpu_err(vdev, "Failed to prepare job: %d\n", ret);
+ ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, buffer_count, cmds_offset,
+ preempt_buffer_index);
+ if (ret)
goto err_destroy_job;
- }
down_read(&vdev->pm->reset_lock);
ret = ivpu_job_submit(job, priority, cmdq_id);
@@ -842,58 +901,91 @@ err_free_handles:
int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = file_priv->vdev;
struct drm_ivpu_submit *args = data;
u8 priority;
- if (args->engine != DRM_IVPU_ENGINE_COMPUTE)
+ if (args->engine != DRM_IVPU_ENGINE_COMPUTE) {
+ ivpu_dbg(vdev, IOCTL, "Invalid engine %d\n", args->engine);
return -EINVAL;
+ }
- if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME)
+ if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) {
+ ivpu_dbg(vdev, IOCTL, "Invalid priority %d\n", args->priority);
return -EINVAL;
+ }
- if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT)
+ if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT) {
+ ivpu_dbg(vdev, IOCTL, "Invalid buffer count %u\n", args->buffer_count);
return -EINVAL;
+ }
- if (!IS_ALIGNED(args->commands_offset, 8))
+ if (!IS_ALIGNED(args->commands_offset, 8)) {
+ ivpu_dbg(vdev, IOCTL, "Invalid commands offset %u\n", args->commands_offset);
return -EINVAL;
+ }
- if (!file_priv->ctx.id)
+ if (!file_priv->ctx.id) {
+ ivpu_dbg(vdev, IOCTL, "Context not initialized\n");
return -EINVAL;
+ }
- if (file_priv->has_mmu_faults)
+ if (file_priv->has_mmu_faults) {
+ ivpu_dbg(vdev, IOCTL, "Context %u has MMU faults\n", file_priv->ctx.id);
return -EBADFD;
+ }
priority = ivpu_job_to_jsm_priority(args->priority);
return ivpu_submit(file, file_priv, 0, args->buffer_count, args->engine,
- (void __user *)args->buffers_ptr, args->commands_offset, priority);
+ (void __user *)args->buffers_ptr, args->commands_offset, 0, priority);
}
int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = file_priv->vdev;
struct drm_ivpu_cmdq_submit *args = data;
- if (!ivpu_is_capable(file_priv->vdev, DRM_IVPU_CAP_MANAGE_CMDQ))
+ if (!ivpu_is_capable(file_priv->vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) {
+ ivpu_dbg(vdev, IOCTL, "Command queue management not supported\n");
return -ENODEV;
+ }
- if (args->cmdq_id < IVPU_CMDQ_MIN_ID || args->cmdq_id > IVPU_CMDQ_MAX_ID)
+ if (args->cmdq_id < IVPU_CMDQ_MIN_ID || args->cmdq_id > IVPU_CMDQ_MAX_ID) {
+ ivpu_dbg(vdev, IOCTL, "Invalid command queue ID %u\n", args->cmdq_id);
return -EINVAL;
+ }
- if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT)
+ if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT) {
+ ivpu_dbg(vdev, IOCTL, "Invalid buffer count %u\n", args->buffer_count);
return -EINVAL;
+ }
- if (!IS_ALIGNED(args->commands_offset, 8))
+ if (args->preempt_buffer_index >= args->buffer_count) {
+ ivpu_dbg(vdev, IOCTL, "Invalid preemption buffer index %u\n",
+ args->preempt_buffer_index);
return -EINVAL;
+ }
- if (!file_priv->ctx.id)
+ if (!IS_ALIGNED(args->commands_offset, 8)) {
+ ivpu_dbg(vdev, IOCTL, "Invalid commands offset %u\n", args->commands_offset);
return -EINVAL;
+ }
- if (file_priv->has_mmu_faults)
+ if (!file_priv->ctx.id) {
+ ivpu_dbg(vdev, IOCTL, "Context not initialized\n");
+ return -EINVAL;
+ }
+
+ if (file_priv->has_mmu_faults) {
+ ivpu_dbg(vdev, IOCTL, "Context %u has MMU faults\n", file_priv->ctx.id);
return -EBADFD;
+ }
return ivpu_submit(file, file_priv, args->cmdq_id, args->buffer_count, VPU_ENGINE_COMPUTE,
- (void __user *)args->buffers_ptr, args->commands_offset, 0);
+ (void __user *)args->buffers_ptr, args->commands_offset,
+ args->preempt_buffer_index, 0);
}
int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -904,11 +996,15 @@ int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *
struct ivpu_cmdq *cmdq;
int ret;
- if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ))
+ if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) {
+ ivpu_dbg(vdev, IOCTL, "Command queue management not supported\n");
return -ENODEV;
+ }
- if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME)
+ if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) {
+ ivpu_dbg(vdev, IOCTL, "Invalid priority %d\n", args->priority);
return -EINVAL;
+ }
ret = ivpu_rpm_get(vdev);
if (ret < 0)
@@ -936,8 +1032,10 @@ int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file
u32 cmdq_id = 0;
int ret;
- if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ))
+ if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) {
+ ivpu_dbg(vdev, IOCTL, "Command queue management not supported\n");
return -ENODEV;
+ }
ret = ivpu_rpm_get(vdev);
if (ret < 0)
@@ -984,7 +1082,9 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload;
mutex_lock(&vdev->submitted_jobs_lock);
- ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
+ if (!ivpu_job_handle_engine_error(vdev, payload->job_id, payload->job_status))
+ /* No engine error, complete the job normally */
+ ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
mutex_unlock(&vdev->submitted_jobs_lock);
}
@@ -1012,7 +1112,7 @@ void ivpu_context_abort_work_fn(struct work_struct *work)
if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW)
if (ivpu_jsm_reset_engine(vdev, 0))
- return;
+ goto runtime_put;
mutex_lock(&vdev->context_list_lock);
xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
@@ -1036,7 +1136,7 @@ void ivpu_context_abort_work_fn(struct work_struct *work)
goto runtime_put;
if (ivpu_jsm_hws_resume_engine(vdev, 0))
- return;
+ goto runtime_put;
/*
* In hardware scheduling mode NPU already has stopped processing jobs
* and won't send us any further notifications, thus we have to free job related resources
@@ -1049,6 +1149,5 @@ void ivpu_context_abort_work_fn(struct work_struct *work)
mutex_unlock(&vdev->submitted_jobs_lock);
runtime_put:
- pm_runtime_mark_last_busy(vdev->drm.dev);
pm_runtime_put_autosuspend(vdev->drm.dev);
}
diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h
index 2e301c2eea7b..3ab61e6a5616 100644
--- a/drivers/accel/ivpu/ivpu_job.h
+++ b/drivers/accel/ivpu/ivpu_job.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_JOB_H__
@@ -15,12 +15,17 @@ struct ivpu_device;
struct ivpu_file_priv;
/**
- * struct ivpu_cmdq - Object representing device queue used to send jobs.
- * @jobq: Pointer to job queue memory shared with the device
- * @mem: Memory allocated for the job queue, shared with device
- * @entry_count Number of job entries in the queue
- * @db_id: Doorbell assigned to this job queue
- * @db_registered: True if doorbell is registered in device
+ * struct ivpu_cmdq - Represents a command queue for submitting jobs to the VPU.
+ * Tracks queue memory, preemption buffers, and metadata for job management.
+ * @jobq: Pointer to job queue memory shared with the device
+ * @primary_preempt_buf: Primary preemption buffer for this queue (optional)
+ * @secondary_preempt_buf: Secondary preemption buffer for this queue (optional)
+ * @mem: Memory allocated for the job queue, shared with device
+ * @entry_count: Number of job entries in the queue
+ * @id: Unique command queue ID
+ * @db_id: Doorbell ID assigned to this job queue
+ * @priority: Priority level of the command queue
+ * @is_legacy: True if this is a legacy command queue
*/
struct ivpu_cmdq {
struct vpu_job_queue *jobq;
@@ -35,16 +40,22 @@ struct ivpu_cmdq {
};
/**
- * struct ivpu_job - KMD object that represents batchbuffer / DMA buffer.
- * Each batch / DMA buffer is a job to be submitted and executed by the VPU FW.
- * This is a unit of execution, and be tracked by the job_id for
- * any status reporting from VPU FW through IPC JOB RET/DONE message.
- * @file_priv: The client that submitted this job
- * @job_id: Job ID for KMD tracking and job status reporting from VPU FW
- * @status: Status of the Job from IPC JOB RET/DONE message
- * @batch_buffer: CPU vaddr points to the batch buffer memory allocated for the job
- * @submit_status_offset: Offset within batch buffer where job completion handler
- will update the job status
+ * struct ivpu_job - Representing a batch or DMA buffer submitted to the VPU.
+ * Each job is a unit of execution, tracked by job_id for status reporting from VPU FW.
+ * The structure holds all resources and metadata needed for job submission, execution,
+ * and completion handling.
+ * @vdev: Pointer to the VPU device
+ * @file_priv: The client context that submitted this job
+ * @done_fence: Fence signaled when job completes
+ * @cmd_buf_vpu_addr: VPU address of the command buffer for this job
+ * @cmdq_id: Command queue ID used for submission
+ * @job_id: Unique job ID for tracking and status reporting
+ * @engine_idx: Engine index for job execution
+ * @job_status: Status reported by firmware for this job
+ * @primary_preempt_buf: Primary preemption buffer for job
+ * @secondary_preempt_buf: Secondary preemption buffer for job (optional)
+ * @bo_count: Number of buffer objects associated with this job
+ * @bos: Array of buffer objects used by the job (batch buffer is at index 0)
*/
struct ivpu_job {
struct ivpu_device *vdev;
@@ -54,6 +65,9 @@ struct ivpu_job {
u32 cmdq_id;
u32 job_id;
u32 engine_idx;
+ u32 job_status;
+ struct ivpu_bo *primary_preempt_buf;
+ struct ivpu_bo *secondary_preempt_buf;
size_t bo_count;
struct ivpu_bo *bos[] __counted_by(bo_count);
};
@@ -71,6 +85,7 @@ void ivpu_cmdq_abort_all_jobs(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id)
void ivpu_job_done_consumer_init(struct ivpu_device *vdev);
void ivpu_job_done_consumer_fini(struct ivpu_device *vdev);
+bool ivpu_job_handle_engine_error(struct ivpu_device *vdev, u32 job_id, u32 job_status);
void ivpu_context_abort_work_fn(struct work_struct *work);
void ivpu_jobs_abort_all(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index 5ea010568faa..e1baf6b64935 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -970,7 +970,7 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
}
}
- queue_work(system_wq, &vdev->context_abort_work);
+ queue_work(system_percpu_wq, &vdev->context_abort_work);
}
void ivpu_mmu_evtq_dump(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index f0267efa55aa..87ad593ef47d 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -430,7 +430,7 @@ static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_a
int
ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
- u64 vpu_addr, struct sg_table *sgt, bool llc_coherent)
+ u64 vpu_addr, struct sg_table *sgt, bool llc_coherent, bool read_only)
{
size_t start_vpu_addr = vpu_addr;
struct scatterlist *sg;
@@ -450,6 +450,8 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
prot = IVPU_MMU_ENTRY_MAPPED;
if (llc_coherent)
prot |= IVPU_MMU_ENTRY_FLAG_LLC_COHERENT;
+ if (read_only)
+ prot |= IVPU_MMU_ENTRY_FLAG_RO;
mutex_lock(&ctx->lock);
@@ -527,7 +529,8 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
if (ret)
- ivpu_warn(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
+ ivpu_warn_ratelimited(vdev, "Failed to invalidate TLB for ctx %u: %d\n",
+ ctx->id, ret);
}
int
@@ -568,7 +571,7 @@ void ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
mutex_init(&ctx->lock);
if (!context_id) {
- start = vdev->hw->ranges.global.start;
+ start = vdev->hw->ranges.runtime.start;
end = vdev->hw->ranges.shave.end;
} else {
start = min_t(u64, vdev->hw->ranges.user.start, vdev->hw->ranges.shave.start);
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h
index f255310968cf..663a11a9db11 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.h
+++ b/drivers/accel/ivpu/ivpu_mmu_context.h
@@ -42,7 +42,7 @@ int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu
void ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node);
int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
- u64 vpu_addr, struct sg_table *sgt, bool llc_coherent);
+ u64 vpu_addr, struct sg_table *sgt, bool llc_coherent, bool read_only);
void ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
u64 vpu_addr, struct sg_table *sgt);
int ivpu_mmu_context_set_pages_ro(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c
index 2a043baf10ca..1d9c1cb17924 100644
--- a/drivers/accel/ivpu/ivpu_ms.c
+++ b/drivers/accel/ivpu/ivpu_ms.c
@@ -8,6 +8,7 @@
#include "ivpu_drv.h"
#include "ivpu_gem.h"
+#include "ivpu_hw.h"
#include "ivpu_jsm_msg.h"
#include "ivpu_ms.h"
#include "ivpu_pm.h"
@@ -37,8 +38,8 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
struct drm_ivpu_metric_streamer_start *args = data;
struct ivpu_device *vdev = file_priv->vdev;
struct ivpu_ms_instance *ms;
- u64 single_buff_size;
u32 sample_size;
+ u64 buf_size;
int ret;
if (!args->metric_group_mask || !args->read_period_samples ||
@@ -52,7 +53,8 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
mutex_lock(&file_priv->ms_lock);
if (get_instance_by_mask(file_priv, args->metric_group_mask)) {
- ivpu_err(vdev, "Instance already exists (mask %#llx)\n", args->metric_group_mask);
+ ivpu_dbg(vdev, IOCTL, "Instance already exists (mask %#llx)\n",
+ args->metric_group_mask);
ret = -EALREADY;
goto unlock;
}
@@ -69,12 +71,18 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
if (ret)
goto err_free_ms;
- single_buff_size = sample_size *
- ((u64)args->read_period_samples * MS_READ_PERIOD_MULTIPLIER);
- ms->bo = ivpu_bo_create_global(vdev, PAGE_ALIGN(single_buff_size * MS_NUM_BUFFERS),
- DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE);
+ buf_size = PAGE_ALIGN((u64)args->read_period_samples * sample_size *
+ MS_READ_PERIOD_MULTIPLIER * MS_NUM_BUFFERS);
+ if (buf_size > ivpu_hw_range_size(&vdev->hw->ranges.global)) {
+ ivpu_dbg(vdev, IOCTL, "Requested MS buffer size %llu exceeds range size %llu\n",
+ buf_size, ivpu_hw_range_size(&vdev->hw->ranges.global));
+ ret = -EINVAL;
+ goto err_free_ms;
+ }
+
+ ms->bo = ivpu_bo_create_global(vdev, buf_size, DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE);
if (!ms->bo) {
- ivpu_err(vdev, "Failed to allocate MS buffer (size %llu)\n", single_buff_size);
+ ivpu_dbg(vdev, IOCTL, "Failed to allocate MS buffer (size %llu)\n", buf_size);
ret = -ENOMEM;
goto err_free_ms;
}
@@ -175,7 +183,8 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *
ms = get_instance_by_mask(file_priv, args->metric_group_mask);
if (!ms) {
- ivpu_err(vdev, "Instance doesn't exist for mask: %#llx\n", args->metric_group_mask);
+ ivpu_dbg(vdev, IOCTL, "Instance doesn't exist for mask: %#llx\n",
+ args->metric_group_mask);
ret = -EINVAL;
goto unlock;
}
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 475ddc94f1cf..480c075d87f6 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -54,7 +54,7 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
- struct vpu_boot_params *bp = ivpu_bo_vaddr(fw->mem);
+ struct vpu_boot_params *bp = ivpu_bo_vaddr(fw->mem_bp);
if (!bp->save_restore_ret_address) {
ivpu_pm_prepare_cold_boot(vdev);
@@ -186,7 +186,7 @@ void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason)
if (atomic_cmpxchg(&vdev->pm->reset_pending, 0, 1) == 0) {
ivpu_hw_diagnose_failure(vdev);
ivpu_hw_irq_disable(vdev); /* Disable IRQ early to protect from IRQ storm */
- queue_work(system_unbound_wq, &vdev->pm->recovery_work);
+ queue_work(system_dfl_wq, &vdev->pm->recovery_work);
}
}
@@ -226,7 +226,8 @@ void ivpu_start_job_timeout_detection(struct ivpu_device *vdev)
unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
/* No-op if already queued */
- queue_delayed_work(system_wq, &vdev->pm->job_timeout_work, msecs_to_jiffies(timeout_ms));
+ queue_delayed_work(system_percpu_wq, &vdev->pm->job_timeout_work,
+ msecs_to_jiffies(timeout_ms));
}
void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev)
@@ -359,7 +360,6 @@ int ivpu_rpm_get(struct ivpu_device *vdev)
void ivpu_rpm_put(struct ivpu_device *vdev)
{
- pm_runtime_mark_last_busy(vdev->drm.dev);
pm_runtime_put_autosuspend(vdev->drm.dev);
}
@@ -428,7 +428,6 @@ void ivpu_pm_enable(struct ivpu_device *vdev)
struct device *dev = vdev->drm.dev;
pm_runtime_allow(dev);
- pm_runtime_mark_last_busy(dev);
pm_runtime_put_autosuspend(dev);
}
@@ -502,6 +501,11 @@ void ivpu_pm_irq_dct_work_fn(struct work_struct *work)
else
ret = ivpu_pm_dct_disable(vdev);
- if (!ret)
- ivpu_hw_btrs_dct_set_status(vdev, enable, vdev->pm->dct_active_percent);
+ if (!ret) {
+ /* Convert percent to U1.7 format */
+ u8 val = DIV_ROUND_CLOSEST(vdev->pm->dct_active_percent * 128, 100);
+
+ ivpu_hw_btrs_dct_set_status(vdev, enable, val);
+ }
+
}
diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c
index 268ab7744a8b..d250a10caca9 100644
--- a/drivers/accel/ivpu/ivpu_sysfs.c
+++ b/drivers/accel/ivpu/ivpu_sysfs.c
@@ -63,7 +63,8 @@ npu_memory_utilization_show(struct device *dev, struct device_attribute *attr, c
mutex_lock(&vdev->bo_list_lock);
list_for_each_entry(bo, &vdev->bo_list, bo_list_node)
- total_npu_memory += bo->base.base.size;
+ if (ivpu_bo_is_resident(bo))
+ total_npu_memory += ivpu_bo_size(bo);
mutex_unlock(&vdev->bo_list_lock);
return sysfs_emit(buf, "%lld\n", total_npu_memory);
diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h
index 4b6b2b3d2583..bca6a44dc041 100644
--- a/drivers/accel/ivpu/vpu_jsm_api.h
+++ b/drivers/accel/ivpu/vpu_jsm_api.h
@@ -1,15 +1,16 @@
/* SPDX-License-Identifier: MIT */
/*
- * Copyright (c) 2020-2024, Intel Corporation.
+ * Copyright (c) 2020-2025, Intel Corporation.
+ */
+
+/**
+ * @addtogroup Jsm
+ * @{
*/
/**
* @file
* @brief JSM shared definitions
- *
- * @ingroup Jsm
- * @brief JSM shared definitions
- * @{
*/
#ifndef VPU_JSM_API_H
#define VPU_JSM_API_H
@@ -22,7 +23,7 @@
/*
* Minor version changes when API backward compatibility is preserved.
*/
-#define VPU_JSM_API_VER_MINOR 29
+#define VPU_JSM_API_VER_MINOR 33
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
@@ -71,9 +72,15 @@
#define VPU_JSM_STATUS_MVNCI_OUT_OF_RESOURCES 0xAU
#define VPU_JSM_STATUS_MVNCI_NOT_IMPLEMENTED 0xBU
#define VPU_JSM_STATUS_MVNCI_INTERNAL_ERROR 0xCU
-/* Job status returned when the job was preempted mid-inference */
+/* @deprecated (use VPU_JSM_STATUS_PREEMPTED_MID_COMMAND instead) */
#define VPU_JSM_STATUS_PREEMPTED_MID_INFERENCE 0xDU
+/* Job status returned when the job was preempted mid-command */
+#define VPU_JSM_STATUS_PREEMPTED_MID_COMMAND 0xDU
+/* Range of status codes that require engine reset */
+#define VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MIN 0xEU
#define VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW 0xEU
+#define VPU_JSM_STATUS_MVNCI_PREEMPTION_TIMED_OUT 0xFU
+#define VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MAX 0x1FU
/*
* Host <-> VPU IPC channels.
@@ -134,11 +141,21 @@ enum {
* 2. Native fence queues are only supported on VPU 40xx onwards.
*/
VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U),
-
/*
* Enable turbo mode for testing NPU performance; not recommended for regular usage.
*/
- VPU_JOB_QUEUE_FLAGS_TURBO_MODE = (1 << 2U)
+ VPU_JOB_QUEUE_FLAGS_TURBO_MODE = (1 << 2U),
+ /*
+ * Queue error detection mode flag
+ * For 'interactive' queues (this bit not set), the FW will identify queues that have not
+ * completed a job inside the TDR timeout as in error as part of engine reset sequence.
+ * For 'non-interactive' queues (this bit set), the FW will identify queues that have not
+ * progressed the heartbeat inside the non-interactive no-progress timeout as in error as
+ * part of engine reset sequence. Additionally, there is an upper limit applied to these
+ * queues: even if they progress the heartbeat, if they run longer than non-interactive
+ * timeout, then the FW will also identify them as in error.
+ */
+ VPU_JOB_QUEUE_FLAGS_NON_INTERACTIVE = (1 << 3U)
};
/*
@@ -209,7 +226,7 @@ enum {
*/
#define VPU_INLINE_CMD_TYPE_FENCE_SIGNAL 0x2
-/*
+/**
* Job scheduling priority bands for both hardware scheduling and OS scheduling.
*/
enum vpu_job_scheduling_priority_band {
@@ -220,16 +237,16 @@ enum vpu_job_scheduling_priority_band {
VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT = 4,
};
-/*
+/**
* Job format.
* Jobs defines the actual workloads to be executed by a given engine.
*/
struct vpu_job_queue_entry {
- /**< Address of VPU commands batch buffer */
+ /** Address of VPU commands batch buffer */
u64 batch_buf_addr;
- /**< Job ID */
+ /** Job ID */
u32 job_id;
- /**< Flags bit field, see VPU_JOB_FLAGS_* above */
+ /** Flags bit field, see VPU_JOB_FLAGS_* above */
u32 flags;
/**
* Doorbell ring timestamp taken by KMD from SoC's global system clock, in
@@ -237,20 +254,20 @@ struct vpu_job_queue_entry {
* to match other profiling timestamps.
*/
u64 doorbell_timestamp;
- /**< Extra id for job tracking, used only in the firmware perf traces */
+ /** Extra id for job tracking, used only in the firmware perf traces */
u64 host_tracking_id;
- /**< Address of the primary preemption buffer to use for this job */
+ /** Address of the primary preemption buffer to use for this job */
u64 primary_preempt_buf_addr;
- /**< Size of the primary preemption buffer to use for this job */
+ /** Size of the primary preemption buffer to use for this job */
u32 primary_preempt_buf_size;
- /**< Size of secondary preemption buffer to use for this job */
+ /** Size of secondary preemption buffer to use for this job */
u32 secondary_preempt_buf_size;
- /**< Address of secondary preemption buffer to use for this job */
+ /** Address of secondary preemption buffer to use for this job */
u64 secondary_preempt_buf_addr;
u64 reserved_0;
};
-/*
+/**
* Inline command format.
* Inline commands are the commands executed at scheduler level (typically,
* synchronization directives). Inline command and job objects must be of
@@ -258,34 +275,36 @@ struct vpu_job_queue_entry {
*/
struct vpu_inline_cmd {
u64 reserved_0;
- /* Inline command type, see VPU_INLINE_CMD_TYPE_* defines. */
+ /** Inline command type, see VPU_INLINE_CMD_TYPE_* defines. */
u32 type;
- /* Flags bit field, see VPU_JOB_FLAGS_* above. */
+ /** Flags bit field, see VPU_JOB_FLAGS_* above. */
u32 flags;
- /* Inline command payload. Depends on inline command type. */
- union {
- /* Fence (wait and signal) commands' payload. */
- struct {
- /* Fence object handle. */
+ /** Inline command payload. Depends on inline command type. */
+ union payload {
+ /** Fence (wait and signal) commands' payload. */
+ struct fence {
+ /** Fence object handle. */
u64 fence_handle;
- /* User VA of the current fence value. */
+ /** User VA of the current fence value. */
u64 current_value_va;
- /* User VA of the monitored fence value (read-only). */
+ /** User VA of the monitored fence value (read-only). */
u64 monitored_value_va;
- /* Value to wait for or write in fence location. */
+ /** Value to wait for or write in fence location. */
u64 value;
- /* User VA of the log buffer in which to add log entry on completion. */
+ /** User VA of the log buffer in which to add log entry on completion. */
u64 log_buffer_va;
- /* NPU private data. */
+ /** NPU private data. */
u64 npu_private_data;
} fence;
- /* Other commands do not have a payload. */
- /* Payload definition for future inline commands can be inserted here. */
+ /**
+ * Other commands do not have a payload:
+ * Payload definition for future inline commands can be inserted here.
+ */
u64 reserved_1[6];
} payload;
};
-/*
+/**
* Job queue slots can be populated either with job objects or inline command objects.
*/
union vpu_jobq_slot {
@@ -293,7 +312,7 @@ union vpu_jobq_slot {
struct vpu_inline_cmd inline_cmd;
};
-/*
+/**
* Job queue control registers.
*/
struct vpu_job_queue_header {
@@ -301,18 +320,18 @@ struct vpu_job_queue_header {
u32 head;
u32 tail;
u32 flags;
- /* Set to 1 to indicate priority_band field is valid */
+ /** Set to 1 to indicate priority_band field is valid */
u32 priority_band_valid;
- /*
+ /**
* Priority for the work of this job queue, valid only if the HWS is NOT used
- * and the `priority_band_valid` is set to 1. It is applied only during
- * the VPU_JSM_MSG_REGISTER_DB message processing.
- * The device firmware might use the `priority_band` to optimize the power
+ * and the @ref priority_band_valid is set to 1. It is applied only during
+ * the @ref VPU_JSM_MSG_REGISTER_DB message processing.
+ * The device firmware might use the priority_band to optimize the power
* management logic, but it will not affect the order of jobs.
* Available priority bands: @see enum vpu_job_scheduling_priority_band
*/
u32 priority_band;
- /* Inside realtime band assigns a further priority, limited to 0..31 range */
+ /** Inside realtime band assigns a further priority, limited to 0..31 range */
u32 realtime_priority_level;
u32 reserved_0[9];
};
@@ -337,16 +356,16 @@ enum vpu_trace_entity_type {
VPU_TRACE_ENTITY_TYPE_HW_COMPONENT = 2,
};
-/*
+/**
* HWS specific log buffer header details.
* Total size is 32 bytes.
*/
struct vpu_hws_log_buffer_header {
- /* Written by VPU after adding a log entry. Initialised by host to 0. */
+ /** Written by VPU after adding a log entry. Initialised by host to 0. */
u32 first_free_entry_index;
- /* Incremented by VPU every time the VPU writes the 0th entry; initialised by host to 0. */
+ /** Incremented by VPU every time the VPU writes the 0th entry; initialised by host to 0. */
u32 wraparound_count;
- /*
+ /**
* This is the number of buffers that can be stored in the log buffer provided by the host.
* It is written by host before passing buffer to VPU. VPU should consider it read-only.
*/
@@ -354,14 +373,14 @@ struct vpu_hws_log_buffer_header {
u64 reserved[2];
};
-/*
+/**
* HWS specific log buffer entry details.
* Total size is 32 bytes.
*/
struct vpu_hws_log_buffer_entry {
- /* VPU timestamp must be an invariant timer tick (not impacted by DVFS) */
+ /** VPU timestamp must be an invariant timer tick (not impacted by DVFS) */
u64 vpu_timestamp;
- /*
+ /**
* Operation type:
* 0 - context state change
* 1 - queue new work
@@ -371,7 +390,7 @@ struct vpu_hws_log_buffer_entry {
*/
u32 operation_type;
u32 reserved;
- /* Operation data depends on operation type */
+ /** Operation data depends on operation type */
u64 operation_data[2];
};
@@ -381,51 +400,54 @@ enum vpu_hws_native_fence_log_type {
VPU_HWS_NATIVE_FENCE_LOG_TYPE_SIGNALS = 2
};
-/* HWS native fence log buffer header. */
+/** HWS native fence log buffer header. */
struct vpu_hws_native_fence_log_header {
union {
struct {
- /* Index of the first free entry in buffer. */
+ /** Index of the first free entry in buffer. */
u32 first_free_entry_idx;
- /* Incremented each time NPU wraps around the buffer to write next entry. */
+ /**
+ * Incremented whenever the NPU wraps around the buffer and writes
+ * to the first entry again.
+ */
u32 wraparound_count;
};
- /* Field allowing atomic update of both fields above. */
+ /** Field allowing atomic update of both fields above. */
u64 atomic_wraparound_and_entry_idx;
};
- /* Log buffer type, see enum vpu_hws_native_fence_log_type. */
+ /** Log buffer type, see enum vpu_hws_native_fence_log_type. */
u64 type;
- /* Allocated number of entries in the log buffer. */
+ /** Allocated number of entries in the log buffer. */
u64 entry_nb;
u64 reserved[2];
};
-/* Native fence log operation types. */
+/** Native fence log operation types. */
enum vpu_hws_native_fence_log_op {
VPU_HWS_NATIVE_FENCE_LOG_OP_SIGNAL_EXECUTED = 0,
VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED = 1
};
-/* HWS native fence log entry. */
+/** HWS native fence log entry. */
struct vpu_hws_native_fence_log_entry {
- /* Newly signaled/unblocked fence value. */
+ /** Newly signaled/unblocked fence value. */
u64 fence_value;
- /* Native fence object handle to which this operation belongs. */
+ /** Native fence object handle to which this operation belongs. */
u64 fence_handle;
- /* Operation type, see enum vpu_hws_native_fence_log_op. */
+ /** Operation type, see enum vpu_hws_native_fence_log_op. */
u64 op_type;
u64 reserved_0;
- /*
+ /**
* VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED only: Timestamp at which fence
* wait was started (in NPU SysTime).
*/
u64 fence_wait_start_ts;
u64 reserved_1;
- /* Timestamp at which fence operation was completed (in NPU SysTime). */
+ /** Timestamp at which fence operation was completed (in NPU SysTime). */
u64 fence_end_ts;
};
-/* Native fence log buffer. */
+/** Native fence log buffer. */
struct vpu_hws_native_fence_log_buffer {
struct vpu_hws_native_fence_log_header header;
struct vpu_hws_native_fence_log_entry entry[];
@@ -435,10 +457,17 @@ struct vpu_hws_native_fence_log_buffer {
* Host <-> VPU IPC messages types.
*/
enum vpu_ipc_msg_type {
+ /** Unsupported command */
VPU_JSM_MSG_UNKNOWN = 0xFFFFFFFF,
- /* IPC Host -> Device, Async commands */
+ /** IPC Host -> Device, base id for async commands */
VPU_JSM_MSG_ASYNC_CMD = 0x1100,
+ /**
+ * Reset engine. The NPU cancels all the jobs currently executing on the target
+ * engine making the engine become idle and then does a HW reset, before returning
+ * to the host.
+ * @see struct vpu_ipc_msg_payload_engine_reset
+ */
VPU_JSM_MSG_ENGINE_RESET = VPU_JSM_MSG_ASYNC_CMD,
/**
* Preempt engine. The NPU stops (preempts) all the jobs currently
@@ -448,10 +477,24 @@ enum vpu_ipc_msg_type {
* the target engine, but it stops processing them (until the queue doorbell
* is rung again); the host is responsible to reset the job queue, either
* after preemption or when resubmitting jobs to the queue.
+ * @see vpu_ipc_msg_payload_engine_preempt
*/
VPU_JSM_MSG_ENGINE_PREEMPT = 0x1101,
+ /**
+ * OS scheduling doorbell register command
+ * @see vpu_ipc_msg_payload_register_db
+ */
VPU_JSM_MSG_REGISTER_DB = 0x1102,
+ /**
+ * OS scheduling doorbell unregister command
+ * @see vpu_ipc_msg_payload_unregister_db
+ */
VPU_JSM_MSG_UNREGISTER_DB = 0x1103,
+ /**
+ * Query engine heartbeat. Heartbeat is expected to increase monotonically
+ * and increase while work is being progressed by NPU.
+ * @see vpu_ipc_msg_payload_query_engine_hb
+ */
VPU_JSM_MSG_QUERY_ENGINE_HB = 0x1104,
VPU_JSM_MSG_GET_POWER_LEVEL_COUNT = 0x1105,
VPU_JSM_MSG_GET_POWER_LEVEL = 0x1106,
@@ -477,6 +520,7 @@ enum vpu_ipc_msg_type {
* aborted and removed from internal scheduling queues. All doorbells assigned
* to the host_ssid are unregistered and any internal FW resources belonging to
* the host_ssid are released.
+ * @see vpu_ipc_msg_payload_ssid_release
*/
VPU_JSM_MSG_SSID_RELEASE = 0x110e,
/**
@@ -504,43 +548,78 @@ enum vpu_ipc_msg_type {
* @see vpu_jsm_metric_streamer_start
*/
VPU_JSM_MSG_METRIC_STREAMER_INFO = 0x1112,
- /** Control command: Priority band setup */
+ /**
+ * Control command: Priority band setup
+ * @see vpu_ipc_msg_payload_hws_priority_band_setup
+ */
VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP = 0x1113,
- /** Control command: Create command queue */
+ /**
+ * Control command: Create command queue
+ * @see vpu_ipc_msg_payload_hws_create_cmdq
+ */
VPU_JSM_MSG_CREATE_CMD_QUEUE = 0x1114,
- /** Control command: Destroy command queue */
+ /**
+ * Control command: Destroy command queue
+ * @see vpu_ipc_msg_payload_hws_destroy_cmdq
+ */
VPU_JSM_MSG_DESTROY_CMD_QUEUE = 0x1115,
- /** Control command: Set context scheduling properties */
+ /**
+ * Control command: Set context scheduling properties
+ * @see vpu_ipc_msg_payload_hws_set_context_sched_properties
+ */
VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES = 0x1116,
- /*
+ /**
* Register a doorbell to notify VPU of new work. The doorbell may later be
* deallocated or reassigned to another context.
+ * @see vpu_jsm_hws_register_db
*/
VPU_JSM_MSG_HWS_REGISTER_DB = 0x1117,
- /** Control command: Log buffer setting */
+ /**
+ * Control command: Log buffer setting
+ * @see vpu_ipc_msg_payload_hws_set_scheduling_log
+ */
VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG = 0x1118,
- /* Control command: Suspend command queue. */
+ /**
+ * Control command: Suspend command queue.
+ * @see vpu_ipc_msg_payload_hws_suspend_cmdq
+ */
VPU_JSM_MSG_HWS_SUSPEND_CMDQ = 0x1119,
- /* Control command: Resume command queue */
+ /**
+ * Control command: Resume command queue
+ * @see vpu_ipc_msg_payload_hws_resume_cmdq
+ */
VPU_JSM_MSG_HWS_RESUME_CMDQ = 0x111a,
- /* Control command: Resume engine after reset */
+ /**
+ * Control command: Resume engine after reset
+ * @see vpu_ipc_msg_payload_hws_resume_engine
+ */
VPU_JSM_MSG_HWS_ENGINE_RESUME = 0x111b,
- /* Control command: Enable survivability/DCT mode */
+ /**
+ * Control command: Enable survivability/DCT mode
+ * @see vpu_ipc_msg_payload_pwr_dct_control
+ */
VPU_JSM_MSG_DCT_ENABLE = 0x111c,
- /* Control command: Disable survivability/DCT mode */
+ /**
+ * Control command: Disable survivability/DCT mode
+ * This command has no payload
+ */
VPU_JSM_MSG_DCT_DISABLE = 0x111d,
/**
* Dump VPU state. To be used for debug purposes only.
- * NOTE: Please introduce new ASYNC commands before this one. *
+ * This command has no payload.
+ * NOTE: Please introduce new ASYNC commands before this one.
*/
VPU_JSM_MSG_STATE_DUMP = 0x11FF,
- /* IPC Host -> Device, General commands */
+ /** IPC Host -> Device, base id for general commands */
VPU_JSM_MSG_GENERAL_CMD = 0x1200,
+ /** Unsupported command */
VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED = VPU_JSM_MSG_GENERAL_CMD,
/**
* Control dyndbg behavior by executing a dyndbg command; equivalent to
- * Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`.
+ * Linux command:
+ * @verbatim echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control @endverbatim
+ * @see vpu_ipc_msg_payload_dyndbg_control
*/
VPU_JSM_MSG_DYNDBG_CONTROL = 0x1201,
/**
@@ -548,17 +627,35 @@ enum vpu_ipc_msg_type {
*/
VPU_JSM_MSG_PWR_D0I3_ENTER = 0x1202,
- /* IPC Device -> Host, Job completion */
+ /**
+ * IPC Device -> Host, Job completion
+ * @see struct vpu_ipc_msg_payload_job_done
+ */
VPU_JSM_MSG_JOB_DONE = 0x2100,
- /* IPC Device -> Host, Fence signalled */
+ /**
+ * IPC Device -> Host, Fence signalled
+ * @see vpu_ipc_msg_payload_native_fence_signalled
+ */
VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED = 0x2101,
/* IPC Device -> Host, Async command completion */
VPU_JSM_MSG_ASYNC_CMD_DONE = 0x2200,
+ /**
+ * IPC Device -> Host, engine reset complete
+ * @see vpu_ipc_msg_payload_engine_reset_done
+ */
VPU_JSM_MSG_ENGINE_RESET_DONE = VPU_JSM_MSG_ASYNC_CMD_DONE,
+ /**
+ * Preempt complete message
+ * @see vpu_ipc_msg_payload_engine_preempt_done
+ */
VPU_JSM_MSG_ENGINE_PREEMPT_DONE = 0x2201,
VPU_JSM_MSG_REGISTER_DB_DONE = 0x2202,
VPU_JSM_MSG_UNREGISTER_DB_DONE = 0x2203,
+ /**
+ * Response to query engine heartbeat.
+ * @see vpu_ipc_msg_payload_query_engine_hb_done
+ */
VPU_JSM_MSG_QUERY_ENGINE_HB_DONE = 0x2204,
VPU_JSM_MSG_GET_POWER_LEVEL_COUNT_DONE = 0x2205,
VPU_JSM_MSG_GET_POWER_LEVEL_DONE = 0x2206,
@@ -575,7 +672,10 @@ enum vpu_ipc_msg_type {
VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP = 0x220c,
/** Response to VPU_JSM_MSG_TRACE_GET_NAME. */
VPU_JSM_MSG_TRACE_GET_NAME_RSP = 0x220d,
- /** Response to VPU_JSM_MSG_SSID_RELEASE. */
+ /**
+ * Response to VPU_JSM_MSG_SSID_RELEASE.
+ * @see vpu_ipc_msg_payload_ssid_release
+ */
VPU_JSM_MSG_SSID_RELEASE_DONE = 0x220e,
/**
* Response to VPU_JSM_MSG_METRIC_STREAMER_START.
@@ -605,37 +705,71 @@ enum vpu_ipc_msg_type {
/**
* Asynchronous event sent from the VPU to the host either when the current
* metric buffer is full or when the VPU has collected a multiple of
- * @notify_sample_count samples as indicated through the start command
- * (VPU_JSM_MSG_METRIC_STREAMER_START). Returns information about collected
- * metric data.
+ * @ref vpu_jsm_metric_streamer_start::notify_sample_count samples as indicated
+ * through the start command (VPU_JSM_MSG_METRIC_STREAMER_START). Returns
+ * information about collected metric data.
* @see vpu_jsm_metric_streamer_done
*/
VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION = 0x2213,
- /** Response to control command: Priority band setup */
+ /**
+ * Response to control command: Priority band setup
+ * @see vpu_ipc_msg_payload_hws_priority_band_setup
+ */
VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP = 0x2214,
- /** Response to control command: Create command queue */
+ /**
+ * Response to control command: Create command queue
+ * @see vpu_ipc_msg_payload_hws_create_cmdq_rsp
+ */
VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP = 0x2215,
- /** Response to control command: Destroy command queue */
+ /**
+ * Response to control command: Destroy command queue
+ * @see vpu_ipc_msg_payload_hws_destroy_cmdq
+ */
VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP = 0x2216,
- /** Response to control command: Set context scheduling properties */
+ /**
+ * Response to control command: Set context scheduling properties
+ * @see vpu_ipc_msg_payload_hws_set_context_sched_properties
+ */
VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP = 0x2217,
- /** Response to control command: Log buffer setting */
+ /**
+ * Response to control command: Log buffer setting
+ * @see vpu_ipc_msg_payload_hws_set_scheduling_log
+ */
VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP = 0x2218,
- /* IPC Device -> Host, HWS notify index entry of log buffer written */
+ /**
+ * IPC Device -> Host, HWS notify index entry of log buffer written
+ * @see vpu_ipc_msg_payload_hws_scheduling_log_notification
+ */
VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION = 0x2219,
- /* IPC Device -> Host, HWS completion of a context suspend request */
+ /**
+ * IPC Device -> Host, HWS completion of a context suspend request
+ * @see vpu_ipc_msg_payload_hws_suspend_cmdq
+ */
VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE = 0x221a,
- /* Response to control command: Resume command queue */
+ /**
+ * Response to control command: Resume command queue
+ * @see vpu_ipc_msg_payload_hws_resume_cmdq
+ */
VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP = 0x221b,
- /* Response to control command: Resume engine command response */
+ /**
+ * Response to control command: Resume engine command response
+ * @see vpu_ipc_msg_payload_hws_resume_engine
+ */
VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE = 0x221c,
- /* Response to control command: Enable survivability/DCT mode */
+ /**
+ * Response to control command: Enable survivability/DCT mode
+ * This command has no payload
+ */
VPU_JSM_MSG_DCT_ENABLE_DONE = 0x221d,
- /* Response to control command: Disable survivability/DCT mode */
+ /**
+ * Response to control command: Disable survivability/DCT mode
+ * This command has no payload
+ */
VPU_JSM_MSG_DCT_DISABLE_DONE = 0x221e,
/**
* Response to state dump control command.
- * NOTE: Please introduce new ASYNC responses before this one. *
+ * This command has no payload.
+ * NOTE: Please introduce new ASYNC responses before this one.
*/
VPU_JSM_MSG_STATE_DUMP_RSP = 0x22FF,
@@ -653,57 +787,66 @@ enum vpu_ipc_msg_type {
enum vpu_ipc_msg_status { VPU_JSM_MSG_FREE, VPU_JSM_MSG_ALLOCATED };
-/*
- * Host <-> LRT IPC message payload definitions
+/**
+ * Engine reset request payload
+ * @see VPU_JSM_MSG_ENGINE_RESET
*/
struct vpu_ipc_msg_payload_engine_reset {
- /* Engine to be reset. */
+ /** Engine to be reset. */
u32 engine_idx;
- /* Reserved */
+ /** Reserved */
u32 reserved_0;
};
+/**
+ * Engine preemption request struct
+ * @see VPU_JSM_MSG_ENGINE_PREEMPT
+ */
struct vpu_ipc_msg_payload_engine_preempt {
- /* Engine to be preempted. */
+ /** Engine to be preempted. */
u32 engine_idx;
- /* ID of the preemption request. */
+ /** ID of the preemption request. */
u32 preempt_id;
};
-/*
- * @brief Register doorbell command structure.
+/**
+ * Register doorbell command structure.
* This structure supports doorbell registration for only OS scheduling.
* @see VPU_JSM_MSG_REGISTER_DB
*/
struct vpu_ipc_msg_payload_register_db {
- /* Index of the doorbell to register. */
+ /** Index of the doorbell to register. */
u32 db_idx;
- /* Reserved */
+ /** Reserved */
u32 reserved_0;
- /* Virtual address in Global GTT pointing to the start of job queue. */
+ /** Virtual address in Global GTT pointing to the start of job queue. */
u64 jobq_base;
- /* Size of the job queue in bytes. */
+ /** Size of the job queue in bytes. */
u32 jobq_size;
- /* Host sub-stream ID for the context assigned to the doorbell. */
+ /** Host sub-stream ID for the context assigned to the doorbell. */
u32 host_ssid;
};
/**
- * @brief Unregister doorbell command structure.
+ * Unregister doorbell command structure.
* Request structure to unregister a doorbell for both HW and OS scheduling.
* @see VPU_JSM_MSG_UNREGISTER_DB
*/
struct vpu_ipc_msg_payload_unregister_db {
- /* Index of the doorbell to unregister. */
+ /** Index of the doorbell to unregister. */
u32 db_idx;
- /* Reserved */
+ /** Reserved */
u32 reserved_0;
};
+/**
+ * Heartbeat request structure
+ * @see VPU_JSM_MSG_QUERY_ENGINE_HB
+ */
struct vpu_ipc_msg_payload_query_engine_hb {
- /* Engine to return heartbeat value. */
+ /** Engine to return heartbeat value. */
u32 engine_idx;
- /* Reserved */
+ /** Reserved */
u32 reserved_0;
};
@@ -723,10 +866,14 @@ struct vpu_ipc_msg_payload_power_level {
u32 reserved_0;
};
+/**
+ * Structure for requesting ssid release
+ * @see VPU_JSM_MSG_SSID_RELEASE
+ */
struct vpu_ipc_msg_payload_ssid_release {
- /* Host sub-stream ID for the context to be released. */
+ /** Host sub-stream ID for the context to be released. */
u32 host_ssid;
- /* Reserved */
+ /** Reserved */
u32 reserved_0;
};
@@ -752,7 +899,7 @@ struct vpu_jsm_metric_streamer_start {
u64 sampling_rate;
/**
* If > 0 the VPU will send a VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION message
- * after every @notify_sample_count samples is collected or dropped by the VPU.
+ * after every @ref notify_sample_count samples is collected or dropped by the VPU.
* If set to UINT_MAX the VPU will only generate a notification when the metric
* buffer is full. If set to 0 the VPU will never generate a notification.
*/
@@ -762,9 +909,9 @@ struct vpu_jsm_metric_streamer_start {
* Address and size of the buffer where the VPU will write metric data. The
* VPU writes all counters from enabled metric groups one after another. If
* there is no space left to write data at the next sample period the VPU
- * will switch to the next buffer (@see next_buffer_addr) and will optionally
- * send a notification to the host driver if @notify_sample_count is non-zero.
- * If @next_buffer_addr is NULL the VPU will stop collecting metric data.
+ * will switch to the next buffer (@ref next_buffer_addr) and will optionally
+ * send a notification to the host driver if @ref notify_sample_count is non-zero.
+ * If @ref next_buffer_addr is NULL the VPU will stop collecting metric data.
*/
u64 buffer_addr;
u64 buffer_size;
@@ -827,63 +974,80 @@ struct vpu_jsm_metric_streamer_update {
u64 next_buffer_size;
};
+/**
+ * Device -> host job completion message.
+ * @see VPU_JSM_MSG_JOB_DONE
+ */
struct vpu_ipc_msg_payload_job_done {
- /* Engine to which the job was submitted. */
+ /** Engine to which the job was submitted. */
u32 engine_idx;
- /* Index of the doorbell to which the job was submitted */
+ /** Index of the doorbell to which the job was submitted */
u32 db_idx;
- /* ID of the completed job */
+ /** ID of the completed job */
u32 job_id;
- /* Status of the completed job */
+ /** Status of the completed job */
u32 job_status;
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /* Zero Padding */
+ /** Zero Padding */
u32 reserved_0;
- /* Command queue id */
+ /** Command queue id */
u64 cmdq_id;
};
-/*
+/**
* Notification message upon native fence signalling.
* @see VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED
*/
struct vpu_ipc_msg_payload_native_fence_signalled {
- /* Engine ID. */
+ /** Engine ID. */
u32 engine_idx;
- /* Host SSID. */
+ /** Host SSID. */
u32 host_ssid;
- /* CMDQ ID */
+ /** CMDQ ID */
u64 cmdq_id;
- /* Fence object handle. */
+ /** Fence object handle. */
u64 fence_handle;
};
+/**
+ * vpu_ipc_msg_payload_engine_reset_done will contain an array of this structure
+ * which contains which queues caused reset if FW was able to detect any error.
+ * @see vpu_ipc_msg_payload_engine_reset_done
+ */
struct vpu_jsm_engine_reset_context {
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /* Zero Padding */
+ /** Zero Padding */
u32 reserved_0;
- /* Command queue id */
+ /** Command queue id */
u64 cmdq_id;
- /* See VPU_ENGINE_RESET_CONTEXT_* defines */
+ /** See VPU_ENGINE_RESET_CONTEXT_* defines */
u64 flags;
};
+/**
+ * Engine reset response.
+ * @see VPU_JSM_MSG_ENGINE_RESET_DONE
+ */
struct vpu_ipc_msg_payload_engine_reset_done {
- /* Engine ordinal */
+ /** Engine ordinal */
u32 engine_idx;
- /* Number of impacted contexts */
+ /** Number of impacted contexts */
u32 num_impacted_contexts;
- /* Array of impacted command queue ids and their flags */
+ /** Array of impacted command queue ids and their flags */
struct vpu_jsm_engine_reset_context
impacted_contexts[VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS];
};
+/**
+ * Preemption response struct
+ * @see VPU_JSM_MSG_ENGINE_PREEMPT_DONE
+ */
struct vpu_ipc_msg_payload_engine_preempt_done {
- /* Engine preempted. */
+ /** Engine preempted. */
u32 engine_idx;
- /* ID of the preemption request. */
+ /** ID of the preemption request. */
u32 preempt_id;
};
@@ -912,12 +1076,16 @@ struct vpu_ipc_msg_payload_unregister_db_done {
u32 reserved_0;
};
+/**
+ * Structure for heartbeat response
+ * @see VPU_JSM_MSG_QUERY_ENGINE_HB_DONE
+ */
struct vpu_ipc_msg_payload_query_engine_hb_done {
- /* Engine returning heartbeat value. */
+ /** Engine returning heartbeat value. */
u32 engine_idx;
- /* Reserved */
+ /** Reserved */
u32 reserved_0;
- /* Heartbeat value. */
+ /** Heartbeat value. */
u64 heartbeat;
};
@@ -937,7 +1105,10 @@ struct vpu_ipc_msg_payload_get_power_level_count_done {
u8 power_limit[16];
};
-/* HWS priority band setup request / response */
+/**
+ * HWS priority band setup request / response
+ * @see VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP
+ */
struct vpu_ipc_msg_payload_hws_priority_band_setup {
/*
* Grace period in 100ns units when preempting another priority band for
@@ -964,15 +1135,23 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
* TDR timeout value in milliseconds. Default value of 0 meaning no timeout.
*/
u32 tdr_timeout;
+ /* Non-interactive queue timeout for no progress of heartbeat in milliseconds.
+ * Default value of 0 meaning no timeout.
+ */
+ u32 non_interactive_no_progress_timeout;
+ /*
+ * Non-interactive queue upper limit timeout value in milliseconds. Default
+ * value of 0 meaning no timeout.
+ */
+ u32 non_interactive_timeout;
};
-/*
+/**
* @brief HWS create command queue request.
* Host will create a command queue via this command.
* Note: Cmdq group is a handle of an object which
* may contain one or more command queues.
* @see VPU_JSM_MSG_CREATE_CMD_QUEUE
- * @see VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP
*/
struct vpu_ipc_msg_payload_hws_create_cmdq {
/* Process id */
@@ -993,66 +1172,73 @@ struct vpu_ipc_msg_payload_hws_create_cmdq {
u32 reserved_0;
};
-/*
- * @brief HWS create command queue response.
- * @see VPU_JSM_MSG_CREATE_CMD_QUEUE
+/**
+ * HWS create command queue response.
* @see VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP
*/
struct vpu_ipc_msg_payload_hws_create_cmdq_rsp {
- /* Process id */
+ /** Process id */
u64 process_id;
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /* Engine for which queue is being created */
+ /** Engine for which queue is being created */
u32 engine_idx;
- /* Command queue group */
+ /** Command queue group */
u64 cmdq_group;
- /* Command queue id */
+ /** Command queue id */
u64 cmdq_id;
};
-/* HWS destroy command queue request / response */
+/**
+ * HWS destroy command queue request / response
+ * @see VPU_JSM_MSG_DESTROY_CMD_QUEUE
+ * @see VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP
+ */
struct vpu_ipc_msg_payload_hws_destroy_cmdq {
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /* Zero Padding */
+ /** Zero Padding */
u32 reserved;
- /* Command queue id */
+ /** Command queue id */
u64 cmdq_id;
};
-/* HWS set context scheduling properties request / response */
+/**
+ * HWS set context scheduling properties request / response
+ * @see VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES
+ * @see VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP
+ */
struct vpu_ipc_msg_payload_hws_set_context_sched_properties {
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /* Zero Padding */
+ /** Zero Padding */
u32 reserved_0;
- /* Command queue id */
+ /** Command queue id */
u64 cmdq_id;
- /*
+ /**
* Priority band to assign to work of this context.
* Available priority bands: @see enum vpu_job_scheduling_priority_band
*/
u32 priority_band;
- /* Inside realtime band assigns a further priority */
+ /** Inside realtime band assigns a further priority */
u32 realtime_priority_level;
- /* Priority relative to other contexts in the same process */
+ /** Priority relative to other contexts in the same process */
s32 in_process_priority;
- /* Zero padding / Reserved */
+ /** Zero padding / Reserved */
u32 reserved_1;
- /*
+ /**
* Context quantum relative to other contexts of same priority in the same process
* Minimum value supported by NPU is 1ms (10000 in 100ns units).
*/
u64 context_quantum;
- /* Grace period when preempting context of the same priority within the same process */
+ /** Grace period when preempting context of the same priority within the same process */
u64 grace_period_same_priority;
- /* Grace period when preempting context of a lower priority within the same process */
+ /** Grace period when preempting context of a lower priority within the same process */
u64 grace_period_lower_priority;
};
-/*
- * @brief Register doorbell command structure.
+/**
+ * Register doorbell command structure.
* This structure supports doorbell registration for both HW and OS scheduling.
* Note: Queue base and size are added here so that the same structure can be used for
* OS scheduling and HW scheduling. For OS scheduling, cmdq_id will be ignored
@@ -1061,27 +1247,27 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties {
* @see VPU_JSM_MSG_HWS_REGISTER_DB
*/
struct vpu_jsm_hws_register_db {
- /* Index of the doorbell to register. */
+ /** Index of the doorbell to register. */
u32 db_id;
- /* Host sub-stream ID for the context assigned to the doorbell. */
+ /** Host sub-stream ID for the context assigned to the doorbell. */
u32 host_ssid;
- /* ID of the command queue associated with the doorbell. */
+ /** ID of the command queue associated with the doorbell. */
u64 cmdq_id;
- /* Virtual address pointing to the start of command queue. */
+ /** Virtual address pointing to the start of command queue. */
u64 cmdq_base;
- /* Size of the command queue in bytes. */
+ /** Size of the command queue in bytes. */
u64 cmdq_size;
};
-/*
- * @brief Structure to set another buffer to be used for scheduling-related logging.
+/**
+ * Structure to set another buffer to be used for scheduling-related logging.
* The size of the logging buffer and the number of entries is defined as part of the
* buffer itself as described next.
* The log buffer received from the host is made up of;
- * - header: 32 bytes in size, as shown in 'struct vpu_hws_log_buffer_header'.
+ * - header: 32 bytes in size, as shown in @ref vpu_hws_log_buffer_header.
* The header contains the number of log entries in the buffer.
* - log entry: 0 to n-1, each log entry is 32 bytes in size, as shown in
- * 'struct vpu_hws_log_buffer_entry'.
+ * @ref vpu_hws_log_buffer_entry.
* The entry contains the VPU timestamp, operation type and data.
* The host should provide the notify index value of log buffer to VPU. This is a
* value defined within the log buffer and when written to will generate the
@@ -1095,30 +1281,30 @@ struct vpu_jsm_hws_register_db {
* @see VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION
*/
struct vpu_ipc_msg_payload_hws_set_scheduling_log {
- /* Engine ordinal */
+ /** Engine ordinal */
u32 engine_idx;
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /*
+ /**
* VPU log buffer virtual address.
* Set to 0 to disable logging for this engine.
*/
u64 vpu_log_buffer_va;
- /*
+ /**
* Notify index of log buffer. VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION
* is generated when an event log is written to this index.
*/
u64 notify_index;
- /*
+ /**
* Field is now deprecated, will be removed when KMD is updated to support removal
*/
u32 enable_extra_events;
- /* Zero Padding */
+ /** Zero Padding */
u32 reserved_0;
};
-/*
- * @brief The scheduling log notification is generated by VPU when it writes
+/**
+ * The scheduling log notification is generated by VPU when it writes
* an event into the log buffer at the notify_index. VPU notifies host with
* VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION. This is an asynchronous
* message from VPU to host.
@@ -1126,14 +1312,14 @@ struct vpu_ipc_msg_payload_hws_set_scheduling_log {
* @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG
*/
struct vpu_ipc_msg_payload_hws_scheduling_log_notification {
- /* Engine ordinal */
+ /** Engine ordinal */
u32 engine_idx;
- /* Zero Padding */
+ /** Zero Padding */
u32 reserved_0;
};
-/*
- * @brief HWS suspend command queue request and done structure.
+/**
+ * HWS suspend command queue request and done structure.
* Host will request the suspend of contexts and VPU will;
* - Suspend all work on this context
* - Preempt any running work
@@ -1152,21 +1338,21 @@ struct vpu_ipc_msg_payload_hws_scheduling_log_notification {
* @see VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE
*/
struct vpu_ipc_msg_payload_hws_suspend_cmdq {
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /* Zero Padding */
+ /** Zero Padding */
u32 reserved_0;
- /* Command queue id */
+ /** Command queue id */
u64 cmdq_id;
- /*
+ /**
* Suspend fence value - reported by the VPU suspend context
* completed once suspend is complete.
*/
u64 suspend_fence_value;
};
-/*
- * @brief HWS Resume command queue request / response structure.
+/**
+ * HWS Resume command queue request / response structure.
* Host will request the resume of a context;
* - VPU will resume all work on this context
* - Scheduler will allow this context to be scheduled
@@ -1174,25 +1360,25 @@ struct vpu_ipc_msg_payload_hws_suspend_cmdq {
* @see VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP
*/
struct vpu_ipc_msg_payload_hws_resume_cmdq {
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /* Zero Padding */
+ /** Zero Padding */
u32 reserved_0;
- /* Command queue id */
+ /** Command queue id */
u64 cmdq_id;
};
-/*
- * @brief HWS Resume engine request / response structure.
- * After a HWS engine reset, all scheduling is stopped on VPU until a engine resume.
+/**
+ * HWS Resume engine request / response structure.
+ * After a HWS engine reset, all scheduling is stopped on VPU until an engine resume.
* Host shall send this command to resume scheduling of any valid queue.
- * @see VPU_JSM_MSG_HWS_RESUME_ENGINE
+ * @see VPU_JSM_MSG_HWS_ENGINE_RESUME
* @see VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE
*/
struct vpu_ipc_msg_payload_hws_resume_engine {
- /* Engine to be resumed */
+ /** Engine to be resumed */
u32 engine_idx;
- /* Reserved */
+ /** Reserved */
u32 reserved_0;
};
@@ -1326,7 +1512,7 @@ struct vpu_jsm_metric_streamer_done {
/**
* Metric group description placed in the metric buffer after successful completion
* of the VPU_JSM_MSG_METRIC_STREAMER_INFO command. This is followed by one or more
- * @vpu_jsm_metric_counter_descriptor records.
+ * @ref vpu_jsm_metric_counter_descriptor records.
* @see VPU_JSM_MSG_METRIC_STREAMER_INFO
*/
struct vpu_jsm_metric_group_descriptor {
@@ -1413,29 +1599,24 @@ struct vpu_jsm_metric_counter_descriptor {
};
/**
- * Payload for VPU_JSM_MSG_DYNDBG_CONTROL requests.
+ * Payload for @ref VPU_JSM_MSG_DYNDBG_CONTROL requests.
*
- * VPU_JSM_MSG_DYNDBG_CONTROL are used to control the VPU FW Dynamic Debug
- * feature, which allows developers to selectively enable / disable MVLOG_DEBUG
- * messages. This is equivalent to the Dynamic Debug functionality provided by
- * Linux
- * (https://www.kernel.org/doc/html/latest/admin-guide/dynamic-debug-howto.html)
- * The host can control Dynamic Debug behavior by sending dyndbg commands, which
- * have the same syntax as Linux
- * dyndbg commands.
+ * VPU_JSM_MSG_DYNDBG_CONTROL requests are used to control the VPU FW dynamic debug
+ * feature, which allows developers to selectively enable/disable code to obtain
+ * additional FW information. This is equivalent to the dynamic debug functionality
+ * provided by Linux. The host can control dynamic debug behavior by sending dyndbg
+ * commands, using the same syntax as for Linux dynamic debug commands.
*
- * NOTE: in order for MVLOG_DEBUG messages to be actually printed, the host
- * still has to set the logging level to MVLOG_DEBUG, using the
- * VPU_JSM_MSG_TRACE_SET_CONFIG command.
+ * @see https://www.kernel.org/doc/html/latest/admin-guide/dynamic-debug-howto.html.
*
- * The host can see the current dynamic debug configuration by executing a
- * special 'show' command. The dyndbg configuration will be printed to the
- * configured logging destination using MVLOG_INFO logging level.
+ * NOTE:
+ * As the dynamic debug feature uses MVLOG messages to provide information, the host
+ * must first set the logging level to MVLOG_DEBUG, using the @ref VPU_JSM_MSG_TRACE_SET_CONFIG
+ * command.
*/
struct vpu_ipc_msg_payload_dyndbg_control {
/**
- * Dyndbg command (same format as Linux dyndbg); must be a NULL-terminated
- * string.
+ * Dyndbg command to be executed.
*/
char dyndbg_cmd[VPU_DYNDBG_CMD_MAX_LEN];
};
@@ -1456,7 +1637,7 @@ struct vpu_ipc_msg_payload_pwr_d0i3_enter {
};
/**
- * Payload for VPU_JSM_MSG_DCT_ENABLE message.
+ * Payload for @ref VPU_JSM_MSG_DCT_ENABLE message.
*
* Default values for DCT active/inactive times are 5.3ms and 30ms respectively,
* corresponding to a 85% duty cycle. This payload allows the host to tune these
@@ -1513,28 +1694,28 @@ union vpu_ipc_msg_payload {
struct vpu_ipc_msg_payload_pwr_dct_control pwr_dct_control;
};
-/*
- * Host <-> LRT IPC message base structure.
+/**
+ * Host <-> NPU IPC message base structure.
*
* NOTE: All instances of this object must be aligned on a 64B boundary
* to allow proper handling of VPU cache operations.
*/
struct vpu_jsm_msg {
- /* Reserved */
+ /** Reserved */
u64 reserved_0;
- /* Message type, see vpu_ipc_msg_type enum. */
+ /** Message type, see @ref vpu_ipc_msg_type. */
u32 type;
- /* Buffer status, see vpu_ipc_msg_status enum. */
+ /** Buffer status, see @ref vpu_ipc_msg_status. */
u32 status;
- /*
+ /**
* Request ID, provided by the host in a request message and passed
* back by VPU in the response message.
*/
u32 request_id;
- /* Request return code set by the VPU, see VPU_JSM_STATUS_* defines. */
+ /** Request return code set by the VPU, see VPU_JSM_STATUS_* defines. */
u32 result;
u64 reserved_1;
- /* Message payload depending on message type, see vpu_ipc_msg_payload union. */
+ /** Message payload depending on message type, see vpu_ipc_msg_payload union. */
union vpu_ipc_msg_payload payload;
};
diff --git a/drivers/accel/qaic/Kconfig b/drivers/accel/qaic/Kconfig
index 5e405a19c157..116e42d152ca 100644
--- a/drivers/accel/qaic/Kconfig
+++ b/drivers/accel/qaic/Kconfig
@@ -9,6 +9,7 @@ config DRM_ACCEL_QAIC
depends on PCI && HAS_IOMEM
depends on MHI_BUS
select CRC32
+ select WANT_DEV_COREDUMP
help
Enables driver for Qualcomm's Cloud AI accelerator PCIe cards that are
designed to accelerate Deep Learning inference workloads.
diff --git a/drivers/accel/qaic/Makefile b/drivers/accel/qaic/Makefile
index 1106b876f737..71f727b74da3 100644
--- a/drivers/accel/qaic/Makefile
+++ b/drivers/accel/qaic/Makefile
@@ -11,6 +11,8 @@ qaic-y := \
qaic_data.o \
qaic_drv.o \
qaic_ras.o \
+ qaic_ssr.o \
+ qaic_sysfs.o \
qaic_timesync.o \
sahara.o
diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
index 820d133236dd..fa7a8155658c 100644
--- a/drivers/accel/qaic/qaic.h
+++ b/drivers/accel/qaic/qaic.h
@@ -21,6 +21,7 @@
#define QAIC_DBC_BASE SZ_128K
#define QAIC_DBC_SIZE SZ_4K
+#define QAIC_SSR_DBC_SENTINEL U32_MAX /* No ongoing SSR sentinel */
#define QAIC_NO_PARTITION -1
@@ -47,6 +48,22 @@ enum __packed dev_states {
QAIC_ONLINE,
};
+enum dbc_states {
+ /* DBC is free and can be activated */
+ DBC_STATE_IDLE,
+ /* DBC is activated and a workload is running on device */
+ DBC_STATE_ASSIGNED,
+ /* Sub-system associated with this workload has crashed and it will shutdown soon */
+ DBC_STATE_BEFORE_SHUTDOWN,
+ /* Sub-system associated with this workload has crashed and it has shutdown */
+ DBC_STATE_AFTER_SHUTDOWN,
+ /* Sub-system associated with this workload is shutdown and it will be powered up soon */
+ DBC_STATE_BEFORE_POWER_UP,
+ /* Sub-system associated with this workload is now powered up */
+ DBC_STATE_AFTER_POWER_UP,
+ DBC_STATE_MAX,
+};
+
extern bool datapath_polling;
struct qaic_user {
@@ -114,6 +131,8 @@ struct dma_bridge_chan {
unsigned int irq;
/* Polling work item to simulate interrupts */
struct work_struct poll_work;
+ /* Represents various states of this DBC from enum dbc_states */
+ unsigned int state;
};
struct qaic_device {
@@ -161,6 +180,8 @@ struct qaic_device {
struct mhi_device *qts_ch;
/* Work queue for tasks related to MHI "QAIC_TIMESYNC" channel */
struct workqueue_struct *qts_wq;
+ /* MHI "QAIC_TIMESYNC_PERIODIC" channel device */
+ struct mhi_device *mqts_ch;
/* Head of list of page allocated by MHI bootlog device */
struct list_head bootlog;
/* MHI bootlog channel device */
@@ -177,6 +198,14 @@ struct qaic_device {
unsigned int ue_count;
/* Un-correctable non-fatal error count */
unsigned int ue_nf_count;
+ /* MHI SSR channel device */
+ struct mhi_device *ssr_ch;
+ /* Work queue for tasks related to MHI SSR device */
+ struct workqueue_struct *ssr_wq;
+ /* Buffer to collect SSR crashdump via SSR MHI channel */
+ void *ssr_mhi_buf;
+ /* DBC which is under SSR. Sentinel U32_MAX would mean that no SSR in progress */
+ u32 ssr_dbc;
};
struct qaic_drm_device {
@@ -195,6 +224,8 @@ struct qaic_drm_device {
struct list_head users;
/* Synchronizes access to users list */
struct mutex users_mutex;
+ /* Pointer to array of DBC sysfs attributes */
+ void *sysfs_attrs;
};
struct qaic_bo {
@@ -317,6 +348,13 @@ int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm
int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
-void irq_polling_work(struct work_struct *work);
+void qaic_irq_polling_work(struct work_struct *work);
+void qaic_dbc_enter_ssr(struct qaic_device *qdev, u32 dbc_id);
+void qaic_dbc_exit_ssr(struct qaic_device *qdev);
+
+/* qaic_sysfs.c */
+int qaic_sysfs_init(struct qaic_drm_device *qddev);
+void qaic_sysfs_remove(struct qaic_drm_device *qddev);
+void set_dbc_state(struct qaic_device *qdev, u32 dbc_id, unsigned int state);
#endif /* _QAIC_H_ */
diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c
index b86a8e48e731..428d8f65bff3 100644
--- a/drivers/accel/qaic/qaic_control.c
+++ b/drivers/accel/qaic/qaic_control.c
@@ -17,6 +17,7 @@
#include <linux/overflow.h>
#include <linux/pci.h>
#include <linux/scatterlist.h>
+#include <linux/sched/signal.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/workqueue.h>
@@ -30,7 +31,7 @@
#define MANAGE_MAGIC_NUMBER ((__force __le32)0x43494151) /* "QAIC" in little endian */
#define QAIC_DBC_Q_GAP SZ_256
#define QAIC_DBC_Q_BUF_ALIGN SZ_4K
-#define QAIC_MANAGE_EXT_MSG_LENGTH SZ_64K /* Max DMA message length */
+#define QAIC_MANAGE_WIRE_MSG_LENGTH SZ_64K /* Max DMA message length */
#define QAIC_WRAPPER_MAX_SIZE SZ_4K
#define QAIC_MHI_RETRY_WAIT_MS 100
#define QAIC_MHI_RETRY_MAX 20
@@ -309,6 +310,7 @@ static void save_dbc_buf(struct qaic_device *qdev, struct ioctl_resources *resou
enable_dbc(qdev, dbc_id, usr);
qdev->dbc[dbc_id].in_use = true;
resources->buf = NULL;
+ set_dbc_state(qdev, dbc_id, DBC_STATE_ASSIGNED);
}
}
@@ -367,7 +369,7 @@ static int encode_passthrough(struct qaic_device *qdev, void *trans, struct wrap
if (in_trans->hdr.len % 8 != 0)
return -EINVAL;
- if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_EXT_MSG_LENGTH)
+ if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_WIRE_MSG_LENGTH)
return -ENOSPC;
trans_wrapper = add_wrapper(wrappers,
@@ -495,7 +497,7 @@ static int encode_addr_size_pairs(struct dma_xfer *xfer, struct wrapper_list *wr
nents = sgt->nents;
nents_dma = nents;
- *size = QAIC_MANAGE_EXT_MSG_LENGTH - msg_hdr_len - sizeof(**out_trans);
+ *size = QAIC_MANAGE_WIRE_MSG_LENGTH - msg_hdr_len - sizeof(**out_trans);
for_each_sgtable_dma_sg(sgt, sg, i) {
*size -= sizeof(*asp);
/* Save 1K for possible follow-up transactions. */
@@ -576,7 +578,7 @@ static int encode_dma(struct qaic_device *qdev, void *trans, struct wrapper_list
/* There should be enough space to hold at least one ASP entry. */
if (size_add(msg_hdr_len, sizeof(*out_trans) + sizeof(struct wire_addr_size_pair)) >
- QAIC_MANAGE_EXT_MSG_LENGTH)
+ QAIC_MANAGE_WIRE_MSG_LENGTH)
return -ENOMEM;
xfer = kmalloc(sizeof(*xfer), GFP_KERNEL);
@@ -645,7 +647,7 @@ static int encode_activate(struct qaic_device *qdev, void *trans, struct wrapper
msg = &wrapper->msg;
msg_hdr_len = le32_to_cpu(msg->hdr.len);
- if (size_add(msg_hdr_len, sizeof(*out_trans)) > QAIC_MANAGE_MAX_MSG_LENGTH)
+ if (size_add(msg_hdr_len, sizeof(*out_trans)) > QAIC_MANAGE_WIRE_MSG_LENGTH)
return -ENOSPC;
if (!in_trans->queue_size)
@@ -655,8 +657,9 @@ static int encode_activate(struct qaic_device *qdev, void *trans, struct wrapper
return -EINVAL;
nelem = in_trans->queue_size;
- size = (get_dbc_req_elem_size() + get_dbc_rsp_elem_size()) * nelem;
- if (size / nelem != get_dbc_req_elem_size() + get_dbc_rsp_elem_size())
+ if (check_mul_overflow((u32)(get_dbc_req_elem_size() + get_dbc_rsp_elem_size()),
+ nelem,
+ &size))
return -EINVAL;
if (size + QAIC_DBC_Q_GAP + QAIC_DBC_Q_BUF_ALIGN < size)
@@ -729,7 +732,7 @@ static int encode_status(struct qaic_device *qdev, void *trans, struct wrapper_l
msg = &wrapper->msg;
msg_hdr_len = le32_to_cpu(msg->hdr.len);
- if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_MAX_MSG_LENGTH)
+ if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_WIRE_MSG_LENGTH)
return -ENOSPC;
trans_wrapper = add_wrapper(wrappers, sizeof(*trans_wrapper));
@@ -810,7 +813,7 @@ static int encode_message(struct qaic_device *qdev, struct manage_msg *user_msg,
}
if (ret)
- break;
+ goto out;
}
if (user_len != user_msg->len)
@@ -921,6 +924,7 @@ static int decode_deactivate(struct qaic_device *qdev, void *trans, u32 *msg_len
}
release_dbc(qdev, dbc_id);
+ set_dbc_state(qdev, dbc_id, DBC_STATE_IDLE);
*msg_len += sizeof(*in_trans);
return 0;
@@ -1052,7 +1056,7 @@ static void *msg_xfer(struct qaic_device *qdev, struct wrapper_list *wrappers, u
init_completion(&elem.xfer_done);
if (likely(!qdev->cntl_lost_buf)) {
/*
- * The max size of request to device is QAIC_MANAGE_EXT_MSG_LENGTH.
+ * The max size of request to device is QAIC_MANAGE_WIRE_MSG_LENGTH.
* The max size of response from device is QAIC_MANAGE_MAX_MSG_LENGTH.
*/
out_buf = kmalloc(QAIC_MANAGE_MAX_MSG_LENGTH, GFP_KERNEL);
@@ -1079,7 +1083,6 @@ static void *msg_xfer(struct qaic_device *qdev, struct wrapper_list *wrappers, u
list_for_each_entry(w, &wrappers->list, list) {
kref_get(&w->ref_count);
- retry_count = 0;
ret = mhi_queue_buf(qdev->cntl_ch, DMA_TO_DEVICE, &w->msg, w->len,
list_is_last(&w->list, &wrappers->list) ? MHI_EOT : MHI_CHAIN);
if (ret) {
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index c4f117edb266..60cb4d65d48e 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -18,6 +18,7 @@
#include <linux/scatterlist.h>
#include <linux/spinlock.h>
#include <linux/srcu.h>
+#include <linux/string.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/wait.h>
@@ -165,7 +166,7 @@ static void free_slice(struct kref *kref)
drm_gem_object_put(&slice->bo->base);
sg_free_table(slice->sgt);
kfree(slice->sgt);
- kfree(slice->reqs);
+ kvfree(slice->reqs);
kfree(slice);
}
@@ -404,7 +405,7 @@ static int qaic_map_one_slice(struct qaic_device *qdev, struct qaic_bo *bo,
goto free_sgt;
}
- slice->reqs = kcalloc(sgt->nents, sizeof(*slice->reqs), GFP_KERNEL);
+ slice->reqs = kvcalloc(sgt->nents, sizeof(*slice->reqs), GFP_KERNEL);
if (!slice->reqs) {
ret = -ENOMEM;
goto free_slice;
@@ -430,7 +431,7 @@ static int qaic_map_one_slice(struct qaic_device *qdev, struct qaic_bo *bo,
return 0;
free_req:
- kfree(slice->reqs);
+ kvfree(slice->reqs);
free_slice:
kfree(slice);
free_sgt:
@@ -643,8 +644,36 @@ static void qaic_free_object(struct drm_gem_object *obj)
kfree(bo);
}
+static struct sg_table *qaic_get_sg_table(struct drm_gem_object *obj)
+{
+ struct qaic_bo *bo = to_qaic_bo(obj);
+ struct scatterlist *sg, *sg_in;
+ struct sg_table *sgt, *sgt_in;
+ int i;
+
+ sgt_in = bo->sgt;
+
+ sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
+ if (!sgt)
+ return ERR_PTR(-ENOMEM);
+
+ if (sg_alloc_table(sgt, sgt_in->orig_nents, GFP_KERNEL)) {
+ kfree(sgt);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ sg = sgt->sgl;
+ for_each_sgtable_sg(sgt_in, sg_in, i) {
+ memcpy(sg, sg_in, sizeof(*sg));
+ sg = sg_next(sg);
+ }
+
+ return sgt;
+}
+
static const struct drm_gem_object_funcs qaic_gem_funcs = {
.free = qaic_free_object,
+ .get_sg_table = qaic_get_sg_table,
.print_info = qaic_gem_print_info,
.mmap = qaic_gem_object_mmap,
.vm_ops = &drm_vm_ops,
@@ -953,8 +982,9 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
if (args->hdr.count == 0)
return -EINVAL;
- arg_size = args->hdr.count * sizeof(*slice_ent);
- if (arg_size / args->hdr.count != sizeof(*slice_ent))
+ if (check_mul_overflow((unsigned long)args->hdr.count,
+ (unsigned long)sizeof(*slice_ent),
+ &arg_size))
return -EINVAL;
if (!(args->hdr.dir == DMA_TO_DEVICE || args->hdr.dir == DMA_FROM_DEVICE))
@@ -984,18 +1014,12 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
user_data = u64_to_user_ptr(args->data);
- slice_ent = kzalloc(arg_size, GFP_KERNEL);
- if (!slice_ent) {
- ret = -EINVAL;
+ slice_ent = memdup_user(user_data, arg_size);
+ if (IS_ERR(slice_ent)) {
+ ret = PTR_ERR(slice_ent);
goto unlock_dev_srcu;
}
- ret = copy_from_user(slice_ent, user_data, arg_size);
- if (ret) {
- ret = -EFAULT;
- goto free_slice_ent;
- }
-
obj = drm_gem_object_lookup(file_priv, args->hdr.handle);
if (!obj) {
ret = -ENOENT;
@@ -1023,6 +1047,11 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
goto unlock_ch_srcu;
}
+ if (dbc->id == qdev->ssr_dbc) {
+ ret = -EPIPE;
+ goto unlock_ch_srcu;
+ }
+
ret = qaic_prepare_bo(qdev, bo, &args->hdr);
if (ret)
goto unlock_ch_srcu;
@@ -1300,8 +1329,6 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
int usr_rcu_id, qdev_rcu_id;
struct qaic_device *qdev;
struct qaic_user *usr;
- u8 __user *user_data;
- unsigned long n;
u64 received_ts;
u32 queue_level;
u64 submit_ts;
@@ -1314,20 +1341,12 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
received_ts = ktime_get_ns();
size = is_partial ? sizeof(struct qaic_partial_execute_entry) : sizeof(*exec);
- n = (unsigned long)size * args->hdr.count;
- if (args->hdr.count == 0 || n / args->hdr.count != size)
+ if (args->hdr.count == 0)
return -EINVAL;
- user_data = u64_to_user_ptr(args->data);
-
- exec = kcalloc(args->hdr.count, size, GFP_KERNEL);
- if (!exec)
- return -ENOMEM;
-
- if (copy_from_user(exec, user_data, n)) {
- ret = -EFAULT;
- goto free_exec;
- }
+ exec = memdup_array_user(u64_to_user_ptr(args->data), args->hdr.count, size);
+ if (IS_ERR(exec))
+ return PTR_ERR(exec);
usr = file_priv->driver_priv;
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
@@ -1356,6 +1375,11 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
goto release_ch_rcu;
}
+ if (dbc->id == qdev->ssr_dbc) {
+ ret = -EPIPE;
+ goto release_ch_rcu;
+ }
+
ret = mutex_lock_interruptible(&dbc->req_lock);
if (ret)
goto release_ch_rcu;
@@ -1396,7 +1420,6 @@ unlock_dev_srcu:
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
unlock_usr_srcu:
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
-free_exec:
kfree(exec);
return ret;
}
@@ -1491,7 +1514,7 @@ irqreturn_t dbc_irq_handler(int irq, void *data)
return IRQ_WAKE_THREAD;
}
-void irq_polling_work(struct work_struct *work)
+void qaic_irq_polling_work(struct work_struct *work)
{
struct dma_bridge_chan *dbc = container_of(work, struct dma_bridge_chan, poll_work);
unsigned long flags;
@@ -1709,6 +1732,11 @@ int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file
goto unlock_ch_srcu;
}
+ if (dbc->id == qdev->ssr_dbc) {
+ ret = -EPIPE;
+ goto unlock_ch_srcu;
+ }
+
obj = drm_gem_object_lookup(file_priv, args->handle);
if (!obj) {
ret = -ENOENT;
@@ -1729,6 +1757,9 @@ int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file
if (!dbc->usr)
ret = -EPERM;
+ if (dbc->id == qdev->ssr_dbc)
+ ret = -EPIPE;
+
put_obj:
drm_gem_object_put(obj);
unlock_ch_srcu:
@@ -1749,7 +1780,8 @@ int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file
struct qaic_device *qdev;
struct qaic_user *usr;
struct qaic_bo *bo;
- int ret, i;
+ int ret = 0;
+ int i;
usr = file_priv->driver_priv;
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
@@ -1770,18 +1802,12 @@ int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file
goto unlock_dev_srcu;
}
- ent = kcalloc(args->hdr.count, sizeof(*ent), GFP_KERNEL);
- if (!ent) {
- ret = -EINVAL;
+ ent = memdup_array_user(u64_to_user_ptr(args->data), args->hdr.count, sizeof(*ent));
+ if (IS_ERR(ent)) {
+ ret = PTR_ERR(ent);
goto unlock_dev_srcu;
}
- ret = copy_from_user(ent, u64_to_user_ptr(args->data), args->hdr.count * sizeof(*ent));
- if (ret) {
- ret = -EFAULT;
- goto free_ent;
- }
-
for (i = 0; i < args->hdr.count; i++) {
obj = drm_gem_object_lookup(file_priv, ent[i].handle);
if (!obj) {
@@ -1789,6 +1815,16 @@ int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file
goto free_ent;
}
bo = to_qaic_bo(obj);
+ if (!bo->sliced) {
+ drm_gem_object_put(obj);
+ ret = -EINVAL;
+ goto free_ent;
+ }
+ if (bo->dbc->id != args->hdr.dbc_id) {
+ drm_gem_object_put(obj);
+ ret = -EINVAL;
+ goto free_ent;
+ }
/*
* perf stats ioctl is called before wait ioctl is complete then
* the latency information is invalid.
@@ -1927,6 +1963,17 @@ static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *db
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
}
+static void sync_empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc)
+{
+ empty_xfer_list(qdev, dbc);
+ synchronize_srcu(&dbc->ch_lock);
+ /*
+ * Threads holding channel lock, may add more elements in the xfer_list.
+ * Flush out these elements from xfer_list.
+ */
+ empty_xfer_list(qdev, dbc);
+}
+
int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr)
{
if (!qdev->dbc[dbc_id].usr || qdev->dbc[dbc_id].usr->handle != usr->handle)
@@ -1941,7 +1988,7 @@ int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr)
* enable_dbc - Enable the DBC. DBCs are disabled by removing the context of
* user. Add user context back to DBC to enable it. This function trusts the
* DBC ID passed and expects the DBC to be disabled.
- * @qdev: Qranium device handle
+ * @qdev: qaic device handle
* @dbc_id: ID of the DBC
* @usr: User context
*/
@@ -1955,13 +2002,7 @@ void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id)
struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id];
dbc->usr = NULL;
- empty_xfer_list(qdev, dbc);
- synchronize_srcu(&dbc->ch_lock);
- /*
- * Threads holding channel lock, may add more elements in the xfer_list.
- * Flush out these elements from xfer_list.
- */
- empty_xfer_list(qdev, dbc);
+ sync_empty_xfer_list(qdev, dbc);
}
void release_dbc(struct qaic_device *qdev, u32 dbc_id)
@@ -2002,3 +2043,30 @@ void qaic_data_get_fifo_info(struct dma_bridge_chan *dbc, u32 *head, u32 *tail)
*head = readl(dbc->dbc_base + REQHP_OFF);
*tail = readl(dbc->dbc_base + REQTP_OFF);
}
+
+/*
+ * qaic_dbc_enter_ssr - Prepare to enter in sub system reset(SSR) for given DBC ID.
+ * @qdev: qaic device handle
+ * @dbc_id: ID of the DBC which will enter SSR
+ *
+ * The device will automatically deactivate the workload as not
+ * all errors can be silently recovered. The user will be
+ * notified and will need to decide the required recovery
+ * action to take.
+ */
+void qaic_dbc_enter_ssr(struct qaic_device *qdev, u32 dbc_id)
+{
+ qdev->ssr_dbc = dbc_id;
+ release_dbc(qdev, dbc_id);
+}
+
+/*
+ * qaic_dbc_exit_ssr - Prepare to exit from sub system reset(SSR) for given DBC ID.
+ * @qdev: qaic device handle
+ *
+ * The DBC returns to an operational state and begins accepting work after exiting SSR.
+ */
+void qaic_dbc_exit_ssr(struct qaic_device *qdev)
+{
+ qdev->ssr_dbc = QAIC_SSR_DBC_SENTINEL;
+}
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index e162f4b8a262..4c70bd949d53 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -30,6 +30,7 @@
#include "qaic.h"
#include "qaic_debugfs.h"
#include "qaic_ras.h"
+#include "qaic_ssr.h"
#include "qaic_timesync.h"
#include "sahara.h"
@@ -270,6 +271,13 @@ static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id)
return ret;
}
+ ret = qaic_sysfs_init(qddev);
+ if (ret) {
+ drm_dev_unregister(drm);
+ pci_dbg(qdev->pdev, "qaic_sysfs_init failed %d\n", ret);
+ return ret;
+ }
+
qaic_debugfs_init(qddev);
return ret;
@@ -281,6 +289,7 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
struct drm_device *drm = to_drm(qddev);
struct qaic_user *usr;
+ qaic_sysfs_remove(qddev);
drm_dev_unregister(drm);
qddev->partition_id = 0;
/*
@@ -382,6 +391,7 @@ void qaic_dev_reset_clean_local_state(struct qaic_device *qdev)
qaic_notify_reset(qdev);
/* start tearing things down */
+ qaic_clean_up_ssr(qdev);
for (i = 0; i < qdev->num_dbc; ++i)
release_dbc(qdev, i);
}
@@ -431,11 +441,18 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev,
qdev->qts_wq = qaicm_wq_init(drm, "qaic_ts");
if (IS_ERR(qdev->qts_wq))
return NULL;
+ qdev->ssr_wq = qaicm_wq_init(drm, "qaic_ssr");
+ if (IS_ERR(qdev->ssr_wq))
+ return NULL;
ret = qaicm_srcu_init(drm, &qdev->dev_lock);
if (ret)
return NULL;
+ ret = qaic_ssr_init(qdev, drm);
+ if (ret)
+ pci_info(pdev, "QAIC SSR crashdump collection not supported.\n");
+
qdev->qddev = qddev;
qdev->pdev = pdev;
qddev->qdev = qdev;
@@ -545,7 +562,7 @@ static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
qdev->dbc[i].irq = pci_irq_vector(pdev, qdev->single_msi ? 0 : i + 1);
if (!qdev->single_msi)
disable_irq_nosync(qdev->dbc[i].irq);
- INIT_WORK(&qdev->dbc[i].poll_work, irq_polling_work);
+ INIT_WORK(&qdev->dbc[i].poll_work, qaic_irq_polling_work);
}
}
@@ -660,6 +677,92 @@ static const struct pci_error_handlers qaic_pci_err_handler = {
.reset_done = qaic_pci_reset_done,
};
+static bool qaic_is_under_reset(struct qaic_device *qdev)
+{
+ int rcu_id;
+ bool ret;
+
+ rcu_id = srcu_read_lock(&qdev->dev_lock);
+ ret = qdev->dev_state != QAIC_ONLINE;
+ srcu_read_unlock(&qdev->dev_lock, rcu_id);
+ return ret;
+}
+
+static bool qaic_data_path_busy(struct qaic_device *qdev)
+{
+ bool ret = false;
+ int dev_rcu_id;
+ int i;
+
+ dev_rcu_id = srcu_read_lock(&qdev->dev_lock);
+ if (qdev->dev_state != QAIC_ONLINE) {
+ srcu_read_unlock(&qdev->dev_lock, dev_rcu_id);
+ return false;
+ }
+ for (i = 0; i < qdev->num_dbc; i++) {
+ struct dma_bridge_chan *dbc = &qdev->dbc[i];
+ unsigned long flags;
+ int ch_rcu_id;
+
+ ch_rcu_id = srcu_read_lock(&dbc->ch_lock);
+ if (!dbc->usr || !dbc->in_use) {
+ srcu_read_unlock(&dbc->ch_lock, ch_rcu_id);
+ continue;
+ }
+ spin_lock_irqsave(&dbc->xfer_lock, flags);
+ ret = !list_empty(&dbc->xfer_list);
+ spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+ srcu_read_unlock(&dbc->ch_lock, ch_rcu_id);
+ if (ret)
+ break;
+ }
+ srcu_read_unlock(&qdev->dev_lock, dev_rcu_id);
+ return ret;
+}
+
+static int qaic_pm_suspend(struct device *dev)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
+
+ dev_dbg(dev, "Suspending..\n");
+ if (qaic_data_path_busy(qdev)) {
+ dev_dbg(dev, "Device's datapath is busy. Aborting suspend..\n");
+ return -EBUSY;
+ }
+ if (qaic_is_under_reset(qdev)) {
+ dev_dbg(dev, "Device is under reset. Aborting suspend..\n");
+ return -EBUSY;
+ }
+ qaic_mqts_ch_stop_timer(qdev->mqts_ch);
+ qaic_pci_reset_prepare(qdev->pdev);
+ pci_save_state(qdev->pdev);
+ pci_disable_device(qdev->pdev);
+ pci_set_power_state(qdev->pdev, PCI_D3hot);
+ return 0;
+}
+
+static int qaic_pm_resume(struct device *dev)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
+ int ret;
+
+ dev_dbg(dev, "Resuming..\n");
+ pci_set_power_state(qdev->pdev, PCI_D0);
+ pci_restore_state(qdev->pdev);
+ ret = pci_enable_device(qdev->pdev);
+ if (ret) {
+ dev_err(dev, "pci_enable_device failed on resume %d\n", ret);
+ return ret;
+ }
+ pci_set_master(qdev->pdev);
+ qaic_pci_reset_done(qdev->pdev);
+ return 0;
+}
+
+static const struct dev_pm_ops qaic_pm_ops = {
+ SYSTEM_SLEEP_PM_OPS(qaic_pm_suspend, qaic_pm_resume)
+};
+
static struct pci_driver qaic_pci_driver = {
.name = QAIC_NAME,
.id_table = qaic_ids,
@@ -667,6 +770,9 @@ static struct pci_driver qaic_pci_driver = {
.remove = qaic_pci_remove,
.shutdown = qaic_pci_shutdown,
.err_handler = &qaic_pci_err_handler,
+ .driver = {
+ .pm = pm_sleep_ptr(&qaic_pm_ops),
+ },
};
static int __init qaic_init(void)
@@ -702,9 +808,16 @@ static int __init qaic_init(void)
ret = qaic_ras_register();
if (ret)
pr_debug("qaic: qaic_ras_register failed %d\n", ret);
+ ret = qaic_ssr_register();
+ if (ret) {
+ pr_debug("qaic: qaic_ssr_register failed %d\n", ret);
+ goto free_bootlog;
+ }
return 0;
+free_bootlog:
+ qaic_bootlog_unregister();
free_mhi:
mhi_driver_unregister(&qaic_mhi_driver);
free_pci:
@@ -730,6 +843,7 @@ static void __exit qaic_exit(void)
* reinitializing the link_up state after the cleanup is done.
*/
link_up = true;
+ qaic_ssr_unregister();
qaic_ras_unregister();
qaic_bootlog_unregister();
qaic_timesync_deinit();
diff --git a/drivers/accel/qaic/qaic_ras.c b/drivers/accel/qaic/qaic_ras.c
index 914ffc4a9970..f1d52a710136 100644
--- a/drivers/accel/qaic/qaic_ras.c
+++ b/drivers/accel/qaic/qaic_ras.c
@@ -514,21 +514,21 @@ static ssize_t ce_count_show(struct device *dev, struct device_attribute *attr,
{
struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
- return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ce_count);
+ return sysfs_emit(buf, "%d\n", qdev->ce_count);
}
static ssize_t ue_count_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
- return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ue_count);
+ return sysfs_emit(buf, "%d\n", qdev->ue_count);
}
static ssize_t ue_nonfatal_count_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
- return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ue_nf_count);
+ return sysfs_emit(buf, "%d\n", qdev->ue_nf_count);
}
static DEVICE_ATTR_RO(ce_count);
diff --git a/drivers/accel/qaic/qaic_ssr.c b/drivers/accel/qaic/qaic_ssr.c
new file mode 100644
index 000000000000..9b662d690371
--- /dev/null
+++ b/drivers/accel/qaic/qaic_ssr.c
@@ -0,0 +1,815 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. */
+
+#include <asm/byteorder.h>
+#include <drm/drm_file.h>
+#include <drm/drm_managed.h>
+#include <linux/devcoredump.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mhi.h>
+#include <linux/workqueue.h>
+
+#include "qaic.h"
+#include "qaic_ssr.h"
+
+#define SSR_RESP_MSG_SZ 32
+#define SSR_MHI_BUF_SIZE SZ_64K
+#define SSR_MEM_READ_DATA_SIZE ((u64)SSR_MHI_BUF_SIZE - sizeof(struct ssr_crashdump))
+#define SSR_MEM_READ_CHUNK_SIZE ((u64)SSR_MEM_READ_DATA_SIZE - sizeof(struct ssr_memory_read_rsp))
+
+#define DEBUG_TRANSFER_INFO BIT(0)
+#define DEBUG_TRANSFER_INFO_RSP BIT(1)
+#define MEMORY_READ BIT(2)
+#define MEMORY_READ_RSP BIT(3)
+#define DEBUG_TRANSFER_DONE BIT(4)
+#define DEBUG_TRANSFER_DONE_RSP BIT(5)
+#define SSR_EVENT BIT(8)
+#define SSR_EVENT_RSP BIT(9)
+
+#define SSR_EVENT_NACK BIT(0)
+#define BEFORE_SHUTDOWN BIT(1)
+#define AFTER_SHUTDOWN BIT(2)
+#define BEFORE_POWER_UP BIT(3)
+#define AFTER_POWER_UP BIT(4)
+
+struct debug_info_table {
+ /* Save preferences. Default is mandatory */
+ u64 save_perf;
+ /* Base address of the debug region */
+ u64 mem_base;
+ /* Size of debug region in bytes */
+ u64 len;
+ /* Description */
+ char desc[20];
+ /* Filename of debug region */
+ char filename[20];
+};
+
+struct _ssr_hdr {
+ __le32 cmd;
+ __le32 len;
+ __le32 dbc_id;
+};
+
+struct ssr_hdr {
+ u32 cmd;
+ u32 len;
+ u32 dbc_id;
+};
+
+struct ssr_debug_transfer_info {
+ struct ssr_hdr hdr;
+ u32 resv;
+ u64 tbl_addr;
+ u64 tbl_len;
+} __packed;
+
+struct ssr_debug_transfer_info_rsp {
+ struct _ssr_hdr hdr;
+ __le32 ret;
+} __packed;
+
+struct ssr_memory_read {
+ struct _ssr_hdr hdr;
+ __le32 resv;
+ __le64 addr;
+ __le64 len;
+} __packed;
+
+struct ssr_memory_read_rsp {
+ struct _ssr_hdr hdr;
+ __le32 resv;
+ u8 data[];
+} __packed;
+
+struct ssr_debug_transfer_done {
+ struct _ssr_hdr hdr;
+ __le32 resv;
+} __packed;
+
+struct ssr_debug_transfer_done_rsp {
+ struct _ssr_hdr hdr;
+ __le32 ret;
+} __packed;
+
+struct ssr_event {
+ struct ssr_hdr hdr;
+ u32 event;
+} __packed;
+
+struct ssr_event_rsp {
+ struct _ssr_hdr hdr;
+ __le32 event;
+} __packed;
+
+struct ssr_resp {
+ /* Work struct to schedule work coming on QAIC_SSR channel */
+ struct work_struct work;
+ /* Root struct of device, used to access device resources */
+ struct qaic_device *qdev;
+ /* Buffer used by MHI for transfer requests */
+ u8 data[] __aligned(8);
+};
+
+/* SSR crashdump book keeping structure */
+struct ssr_dump_info {
+ /* DBC associated with this SSR crashdump */
+ struct dma_bridge_chan *dbc;
+ /*
+ * It will be used when we complete the crashdump download and switch
+ * to waiting on SSR events
+ */
+ struct ssr_resp *resp;
+ /* MEMORY READ request MHI buffer.*/
+ struct ssr_memory_read *read_buf_req;
+ /* TRUE: ->read_buf_req is queued for MHI transaction. FALSE: Otherwise */
+ bool read_buf_req_queued;
+ /* Address of table in host */
+ void *tbl_addr;
+ /* Total size of table */
+ u64 tbl_len;
+ /* Offset of table(->tbl_addr) where the new chunk will be dumped */
+ u64 tbl_off;
+ /* Address of table in device/target */
+ u64 tbl_addr_dev;
+ /* Ptr to the entire dump */
+ void *dump_addr;
+ /* Entire crashdump size */
+ u64 dump_sz;
+ /* Offset of crashdump(->dump_addr) where the new chunk will be dumped */
+ u64 dump_off;
+ /* Points to the table entry we are currently downloading */
+ struct debug_info_table *tbl_ent;
+ /* Offset in the current table entry(->tbl_ent) for next chuck */
+ u64 tbl_ent_off;
+};
+
+struct ssr_crashdump {
+ /*
+ * Points to a book keeping struct maintained by MHI SSR device while
+ * downloading a SSR crashdump. It is NULL when crashdump downloading
+ * not in progress.
+ */
+ struct ssr_dump_info *dump_info;
+ /* Work struct to schedule work coming on QAIC_SSR channel */
+ struct work_struct work;
+ /* Root struct of device, used to access device resources */
+ struct qaic_device *qdev;
+ /* Buffer used by MHI for transfer requests */
+ u8 data[];
+};
+
+#define QAIC_SSR_DUMP_V1_MAGIC 0x1234567890abcdef
+#define QAIC_SSR_DUMP_V1_VER 1
+struct dump_file_meta {
+ u64 magic;
+ u64 version;
+ u64 size; /* Total size of the entire dump */
+ u64 tbl_len; /* Length of the table in byte */
+};
+
+/*
+ * Layout of crashdump
+ * +------------------------------------------+
+ * | Crashdump Meta structure |
+ * | type: struct dump_file_meta |
+ * +------------------------------------------+
+ * | Crashdump Table |
+ * | type: array of struct debug_info_table |
+ * | |
+ * | |
+ * | |
+ * +------------------------------------------+
+ * | Crashdump |
+ * | |
+ * | |
+ * | |
+ * | |
+ * | |
+ * +------------------------------------------+
+ */
+
+static void free_ssr_dump_info(struct ssr_crashdump *ssr_crash)
+{
+ struct ssr_dump_info *dump_info = ssr_crash->dump_info;
+
+ ssr_crash->dump_info = NULL;
+ if (!dump_info)
+ return;
+ if (!dump_info->read_buf_req_queued)
+ kfree(dump_info->read_buf_req);
+ vfree(dump_info->tbl_addr);
+ vfree(dump_info->dump_addr);
+ kfree(dump_info);
+}
+
+void qaic_clean_up_ssr(struct qaic_device *qdev)
+{
+ struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
+
+ if (!ssr_crash)
+ return;
+
+ qaic_dbc_exit_ssr(qdev);
+ free_ssr_dump_info(ssr_crash);
+}
+
+static int alloc_dump(struct ssr_dump_info *dump_info)
+{
+ struct debug_info_table *tbl_ent = dump_info->tbl_addr;
+ struct dump_file_meta *dump_meta;
+ u64 tbl_sz_lp = 0;
+ u64 dump_size = 0;
+
+ while (tbl_sz_lp < dump_info->tbl_len) {
+ le64_to_cpus(&tbl_ent->save_perf);
+ le64_to_cpus(&tbl_ent->mem_base);
+ le64_to_cpus(&tbl_ent->len);
+
+ if (tbl_ent->len == 0)
+ return -EINVAL;
+
+ dump_size += tbl_ent->len;
+ tbl_ent++;
+ tbl_sz_lp += sizeof(*tbl_ent);
+ }
+
+ dump_info->dump_sz = dump_size + dump_info->tbl_len + sizeof(*dump_meta);
+ dump_info->dump_addr = vzalloc(dump_info->dump_sz);
+ if (!dump_info->dump_addr)
+ return -ENOMEM;
+
+ /* Copy crashdump meta and table */
+ dump_meta = dump_info->dump_addr;
+ dump_meta->magic = QAIC_SSR_DUMP_V1_MAGIC;
+ dump_meta->version = QAIC_SSR_DUMP_V1_VER;
+ dump_meta->size = dump_info->dump_sz;
+ dump_meta->tbl_len = dump_info->tbl_len;
+ memcpy(dump_info->dump_addr + sizeof(*dump_meta), dump_info->tbl_addr, dump_info->tbl_len);
+ /* Offset by crashdump meta and table (copied above) */
+ dump_info->dump_off = dump_info->tbl_len + sizeof(*dump_meta);
+
+ return 0;
+}
+
+static int send_xfer_done(struct qaic_device *qdev, void *resp, u32 dbc_id)
+{
+ struct ssr_debug_transfer_done *xfer_done;
+ int ret;
+
+ xfer_done = kmalloc(sizeof(*xfer_done), GFP_KERNEL);
+ if (!xfer_done) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, resp, SSR_RESP_MSG_SZ, MHI_EOT);
+ if (ret)
+ goto free_xfer_done;
+
+ xfer_done->hdr.cmd = cpu_to_le32(DEBUG_TRANSFER_DONE);
+ xfer_done->hdr.len = cpu_to_le32(sizeof(*xfer_done));
+ xfer_done->hdr.dbc_id = cpu_to_le32(dbc_id);
+
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, xfer_done, sizeof(*xfer_done), MHI_EOT);
+ if (ret)
+ goto free_xfer_done;
+
+ return 0;
+
+free_xfer_done:
+ kfree(xfer_done);
+out:
+ return ret;
+}
+
+static int mem_read_req(struct qaic_device *qdev, u64 dest_addr, u64 dest_len)
+{
+ struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
+ struct ssr_memory_read *read_buf_req;
+ struct ssr_dump_info *dump_info;
+ int ret;
+
+ dump_info = ssr_crash->dump_info;
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, ssr_crash->data, SSR_MEM_READ_DATA_SIZE,
+ MHI_EOT);
+ if (ret)
+ goto out;
+
+ read_buf_req = dump_info->read_buf_req;
+ read_buf_req->hdr.cmd = cpu_to_le32(MEMORY_READ);
+ read_buf_req->hdr.len = cpu_to_le32(sizeof(*read_buf_req));
+ read_buf_req->hdr.dbc_id = cpu_to_le32(qdev->ssr_dbc);
+ read_buf_req->addr = cpu_to_le64(dest_addr);
+ read_buf_req->len = cpu_to_le64(dest_len);
+
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, read_buf_req, sizeof(*read_buf_req),
+ MHI_EOT);
+ if (!ret)
+ dump_info->read_buf_req_queued = true;
+
+out:
+ return ret;
+}
+
+static int ssr_copy_table(struct ssr_dump_info *dump_info, void *data, u64 len)
+{
+ if (len > dump_info->tbl_len - dump_info->tbl_off)
+ return -EINVAL;
+
+ memcpy(dump_info->tbl_addr + dump_info->tbl_off, data, len);
+ dump_info->tbl_off += len;
+
+ /* Entire table has been downloaded, alloc dump memory */
+ if (dump_info->tbl_off == dump_info->tbl_len) {
+ dump_info->tbl_ent = dump_info->tbl_addr;
+ return alloc_dump(dump_info);
+ }
+
+ return 0;
+}
+
+static int ssr_copy_dump(struct ssr_dump_info *dump_info, void *data, u64 len)
+{
+ struct debug_info_table *tbl_ent;
+
+ tbl_ent = dump_info->tbl_ent;
+
+ if (len > tbl_ent->len - dump_info->tbl_ent_off)
+ return -EINVAL;
+
+ memcpy(dump_info->dump_addr + dump_info->dump_off, data, len);
+ dump_info->dump_off += len;
+ dump_info->tbl_ent_off += len;
+
+ /*
+ * Current segment (a entry in table) of the crashdump is complete,
+ * move to next one
+ */
+ if (tbl_ent->len == dump_info->tbl_ent_off) {
+ dump_info->tbl_ent++;
+ dump_info->tbl_ent_off = 0;
+ }
+
+ return 0;
+}
+
+static void ssr_dump_worker(struct work_struct *work)
+{
+ struct ssr_crashdump *ssr_crash = container_of(work, struct ssr_crashdump, work);
+ struct qaic_device *qdev = ssr_crash->qdev;
+ struct ssr_memory_read_rsp *mem_rd_resp;
+ struct debug_info_table *tbl_ent;
+ struct ssr_dump_info *dump_info;
+ u64 dest_addr, dest_len;
+ struct _ssr_hdr *_hdr;
+ struct ssr_hdr hdr;
+ u64 data_len;
+ int ret;
+
+ mem_rd_resp = (struct ssr_memory_read_rsp *)ssr_crash->data;
+ _hdr = &mem_rd_resp->hdr;
+ hdr.cmd = le32_to_cpu(_hdr->cmd);
+ hdr.len = le32_to_cpu(_hdr->len);
+ hdr.dbc_id = le32_to_cpu(_hdr->dbc_id);
+
+ if (hdr.dbc_id != qdev->ssr_dbc)
+ goto reset_device;
+
+ dump_info = ssr_crash->dump_info;
+ if (!dump_info)
+ goto reset_device;
+
+ if (hdr.cmd != MEMORY_READ_RSP)
+ goto free_dump_info;
+
+ if (hdr.len > SSR_MEM_READ_DATA_SIZE)
+ goto free_dump_info;
+
+ data_len = hdr.len - sizeof(*mem_rd_resp);
+
+ if (dump_info->tbl_off < dump_info->tbl_len) /* Chunk belongs to table */
+ ret = ssr_copy_table(dump_info, mem_rd_resp->data, data_len);
+ else /* Chunk belongs to crashdump */
+ ret = ssr_copy_dump(dump_info, mem_rd_resp->data, data_len);
+
+ if (ret)
+ goto free_dump_info;
+
+ if (dump_info->tbl_off < dump_info->tbl_len) {
+ /* Continue downloading table */
+ dest_addr = dump_info->tbl_addr_dev + dump_info->tbl_off;
+ dest_len = min(SSR_MEM_READ_CHUNK_SIZE, dump_info->tbl_len - dump_info->tbl_off);
+ ret = mem_read_req(qdev, dest_addr, dest_len);
+ } else if (dump_info->dump_off < dump_info->dump_sz) {
+ /* Continue downloading crashdump */
+ tbl_ent = dump_info->tbl_ent;
+ dest_addr = tbl_ent->mem_base + dump_info->tbl_ent_off;
+ dest_len = min(SSR_MEM_READ_CHUNK_SIZE, tbl_ent->len - dump_info->tbl_ent_off);
+ ret = mem_read_req(qdev, dest_addr, dest_len);
+ } else {
+ /* Crashdump download complete */
+ ret = send_xfer_done(qdev, dump_info->resp->data, hdr.dbc_id);
+ }
+
+ /* Most likely a MHI xfer has failed */
+ if (ret)
+ goto free_dump_info;
+
+ return;
+
+free_dump_info:
+ /* Free the allocated memory */
+ free_ssr_dump_info(ssr_crash);
+reset_device:
+ /*
+ * After subsystem crashes in device crashdump collection begins but
+ * something went wrong while collecting crashdump, now instead of
+ * handling this error we just reset the device as the best effort has
+ * been made
+ */
+ mhi_soc_reset(qdev->mhi_cntrl);
+}
+
+static struct ssr_dump_info *alloc_dump_info(struct qaic_device *qdev,
+ struct ssr_debug_transfer_info *debug_info)
+{
+ struct ssr_dump_info *dump_info;
+ int ret;
+
+ le64_to_cpus(&debug_info->tbl_len);
+ le64_to_cpus(&debug_info->tbl_addr);
+
+ if (debug_info->tbl_len == 0 ||
+ debug_info->tbl_len % sizeof(struct debug_info_table) != 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Allocate SSR crashdump book keeping structure */
+ dump_info = kzalloc(sizeof(*dump_info), GFP_KERNEL);
+ if (!dump_info) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* Buffer used to send MEMORY READ request to device via MHI */
+ dump_info->read_buf_req = kzalloc(sizeof(*dump_info->read_buf_req), GFP_KERNEL);
+ if (!dump_info->read_buf_req) {
+ ret = -ENOMEM;
+ goto free_dump_info;
+ }
+
+ /* Crashdump meta table buffer */
+ dump_info->tbl_addr = vzalloc(debug_info->tbl_len);
+ if (!dump_info->tbl_addr) {
+ ret = -ENOMEM;
+ goto free_read_buf_req;
+ }
+
+ dump_info->tbl_addr_dev = debug_info->tbl_addr;
+ dump_info->tbl_len = debug_info->tbl_len;
+
+ return dump_info;
+
+free_read_buf_req:
+ kfree(dump_info->read_buf_req);
+free_dump_info:
+ kfree(dump_info);
+out:
+ return ERR_PTR(ret);
+}
+
+static int dbg_xfer_info_rsp(struct qaic_device *qdev, struct dma_bridge_chan *dbc,
+ struct ssr_debug_transfer_info *debug_info)
+{
+ struct ssr_debug_transfer_info_rsp *debug_rsp;
+ struct ssr_crashdump *ssr_crash = NULL;
+ int ret = 0, ret2;
+
+ debug_rsp = kmalloc(sizeof(*debug_rsp), GFP_KERNEL);
+ if (!debug_rsp)
+ return -ENOMEM;
+
+ if (!qdev->ssr_mhi_buf) {
+ ret = -ENOMEM;
+ goto send_rsp;
+ }
+
+ if (dbc->state != DBC_STATE_BEFORE_POWER_UP) {
+ ret = -EINVAL;
+ goto send_rsp;
+ }
+
+ ssr_crash = qdev->ssr_mhi_buf;
+ ssr_crash->dump_info = alloc_dump_info(qdev, debug_info);
+ if (IS_ERR(ssr_crash->dump_info)) {
+ ret = PTR_ERR(ssr_crash->dump_info);
+ ssr_crash->dump_info = NULL;
+ }
+
+send_rsp:
+ debug_rsp->hdr.cmd = cpu_to_le32(DEBUG_TRANSFER_INFO_RSP);
+ debug_rsp->hdr.len = cpu_to_le32(sizeof(*debug_rsp));
+ debug_rsp->hdr.dbc_id = cpu_to_le32(dbc->id);
+ /*
+ * 0 = Return an ACK confirming the host is ready to download crashdump
+ * 1 = Return an NACK confirming the host is not ready to download crashdump
+ */
+ debug_rsp->ret = cpu_to_le32(ret ? 1 : 0);
+
+ ret2 = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, debug_rsp, sizeof(*debug_rsp), MHI_EOT);
+ if (ret2) {
+ free_ssr_dump_info(ssr_crash);
+ kfree(debug_rsp);
+ return ret2;
+ }
+
+ return ret;
+}
+
+static void dbg_xfer_done_rsp(struct qaic_device *qdev, struct dma_bridge_chan *dbc,
+ struct ssr_debug_transfer_done_rsp *xfer_rsp)
+{
+ struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
+ u32 status = le32_to_cpu(xfer_rsp->ret);
+ struct device *dev = &qdev->pdev->dev;
+ struct ssr_dump_info *dump_info;
+
+ dump_info = ssr_crash->dump_info;
+ if (!dump_info)
+ return;
+
+ if (status) {
+ free_ssr_dump_info(ssr_crash);
+ return;
+ }
+
+ dev_coredumpv(dev, dump_info->dump_addr, dump_info->dump_sz, GFP_KERNEL);
+ /* dev_coredumpv will free dump_info->dump_addr */
+ dump_info->dump_addr = NULL;
+ free_ssr_dump_info(ssr_crash);
+}
+
+static void ssr_worker(struct work_struct *work)
+{
+ struct ssr_resp *resp = container_of(work, struct ssr_resp, work);
+ struct ssr_hdr *hdr = (struct ssr_hdr *)resp->data;
+ struct ssr_dump_info *dump_info = NULL;
+ struct qaic_device *qdev = resp->qdev;
+ struct ssr_crashdump *ssr_crash;
+ struct ssr_event_rsp *event_rsp;
+ struct dma_bridge_chan *dbc;
+ struct ssr_event *event;
+ u32 ssr_event_ack;
+ int ret;
+
+ le32_to_cpus(&hdr->cmd);
+ le32_to_cpus(&hdr->len);
+ le32_to_cpus(&hdr->dbc_id);
+
+ if (hdr->len > SSR_RESP_MSG_SZ)
+ goto out;
+
+ if (hdr->dbc_id >= qdev->num_dbc)
+ goto out;
+
+ dbc = &qdev->dbc[hdr->dbc_id];
+
+ switch (hdr->cmd) {
+ case DEBUG_TRANSFER_INFO:
+ ret = dbg_xfer_info_rsp(qdev, dbc, (struct ssr_debug_transfer_info *)resp->data);
+ if (ret)
+ break;
+
+ ssr_crash = qdev->ssr_mhi_buf;
+ dump_info = ssr_crash->dump_info;
+ dump_info->dbc = dbc;
+ dump_info->resp = resp;
+
+ /* Start by downloading debug table */
+ ret = mem_read_req(qdev, dump_info->tbl_addr_dev,
+ min(dump_info->tbl_len, SSR_MEM_READ_CHUNK_SIZE));
+ if (ret) {
+ free_ssr_dump_info(ssr_crash);
+ break;
+ }
+
+ /*
+ * Till now everything went fine, which means that we will be
+ * collecting crashdump chunk by chunk. Do not queue a response
+ * buffer for SSR cmds till the crashdump is complete.
+ */
+ return;
+ case SSR_EVENT:
+ event = (struct ssr_event *)hdr;
+ le32_to_cpus(&event->event);
+ ssr_event_ack = event->event;
+ ssr_crash = qdev->ssr_mhi_buf;
+
+ switch (event->event) {
+ case BEFORE_SHUTDOWN:
+ set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_BEFORE_SHUTDOWN);
+ qaic_dbc_enter_ssr(qdev, hdr->dbc_id);
+ break;
+ case AFTER_SHUTDOWN:
+ set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_AFTER_SHUTDOWN);
+ break;
+ case BEFORE_POWER_UP:
+ set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_BEFORE_POWER_UP);
+ break;
+ case AFTER_POWER_UP:
+ /*
+ * If dump info is a non NULL value it means that we
+ * have received this SSR event while downloading a
+ * crashdump for this DBC is still in progress. NACK
+ * the SSR event
+ */
+ if (ssr_crash && ssr_crash->dump_info) {
+ free_ssr_dump_info(ssr_crash);
+ ssr_event_ack = SSR_EVENT_NACK;
+ break;
+ }
+
+ set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_AFTER_POWER_UP);
+ break;
+ default:
+ break;
+ }
+
+ event_rsp = kmalloc(sizeof(*event_rsp), GFP_KERNEL);
+ if (!event_rsp)
+ break;
+
+ event_rsp->hdr.cmd = cpu_to_le32(SSR_EVENT_RSP);
+ event_rsp->hdr.len = cpu_to_le32(sizeof(*event_rsp));
+ event_rsp->hdr.dbc_id = cpu_to_le32(hdr->dbc_id);
+ event_rsp->event = cpu_to_le32(ssr_event_ack);
+
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, event_rsp, sizeof(*event_rsp),
+ MHI_EOT);
+ if (ret)
+ kfree(event_rsp);
+
+ if (event->event == AFTER_POWER_UP && ssr_event_ack != SSR_EVENT_NACK) {
+ qaic_dbc_exit_ssr(qdev);
+ set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_IDLE);
+ }
+
+ break;
+ case DEBUG_TRANSFER_DONE_RSP:
+ dbg_xfer_done_rsp(qdev, dbc, (struct ssr_debug_transfer_done_rsp *)hdr);
+ break;
+ default:
+ break;
+ }
+
+out:
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, resp->data, SSR_RESP_MSG_SZ, MHI_EOT);
+ if (ret)
+ kfree(resp);
+}
+
+static int qaic_ssr_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
+ struct ssr_resp *resp;
+ int ret;
+
+ ret = mhi_prepare_for_transfer(mhi_dev);
+ if (ret)
+ return ret;
+
+ resp = kzalloc(sizeof(*resp) + SSR_RESP_MSG_SZ, GFP_KERNEL);
+ if (!resp) {
+ mhi_unprepare_from_transfer(mhi_dev);
+ return -ENOMEM;
+ }
+
+ resp->qdev = qdev;
+ INIT_WORK(&resp->work, ssr_worker);
+
+ ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, resp->data, SSR_RESP_MSG_SZ, MHI_EOT);
+ if (ret) {
+ kfree(resp);
+ mhi_unprepare_from_transfer(mhi_dev);
+ return ret;
+ }
+
+ dev_set_drvdata(&mhi_dev->dev, qdev);
+ qdev->ssr_ch = mhi_dev;
+
+ return 0;
+}
+
+static void qaic_ssr_mhi_remove(struct mhi_device *mhi_dev)
+{
+ struct qaic_device *qdev;
+
+ qdev = dev_get_drvdata(&mhi_dev->dev);
+ mhi_unprepare_from_transfer(qdev->ssr_ch);
+ qdev->ssr_ch = NULL;
+}
+
+static void qaic_ssr_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+ struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev);
+ struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
+ struct _ssr_hdr *hdr = mhi_result->buf_addr;
+ struct ssr_dump_info *dump_info;
+
+ if (mhi_result->transaction_status) {
+ kfree(mhi_result->buf_addr);
+ return;
+ }
+
+ /*
+ * MEMORY READ is used to download crashdump. And crashdump is
+ * downloaded chunk by chunk in a series of MEMORY READ SSR commands.
+ * Hence to avoid too many kmalloc() and kfree() of the same MEMORY READ
+ * request buffer, we allocate only one such buffer and free it only
+ * once.
+ */
+ if (le32_to_cpu(hdr->cmd) == MEMORY_READ) {
+ dump_info = ssr_crash->dump_info;
+ if (dump_info) {
+ dump_info->read_buf_req_queued = false;
+ return;
+ }
+ }
+
+ kfree(mhi_result->buf_addr);
+}
+
+static void qaic_ssr_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+ struct ssr_resp *resp = container_of(mhi_result->buf_addr, struct ssr_resp, data);
+ struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev);
+ struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
+ bool memory_read_rsp = false;
+
+ if (ssr_crash && ssr_crash->data == mhi_result->buf_addr)
+ memory_read_rsp = true;
+
+ if (mhi_result->transaction_status) {
+ /* Do not free SSR crashdump buffer as it allocated via managed APIs */
+ if (!memory_read_rsp)
+ kfree(resp);
+ return;
+ }
+
+ if (memory_read_rsp)
+ queue_work(qdev->ssr_wq, &ssr_crash->work);
+ else
+ queue_work(qdev->ssr_wq, &resp->work);
+}
+
+static const struct mhi_device_id qaic_ssr_mhi_match_table[] = {
+ { .chan = "QAIC_SSR", },
+ {},
+};
+
+static struct mhi_driver qaic_ssr_mhi_driver = {
+ .id_table = qaic_ssr_mhi_match_table,
+ .remove = qaic_ssr_mhi_remove,
+ .probe = qaic_ssr_mhi_probe,
+ .ul_xfer_cb = qaic_ssr_mhi_ul_xfer_cb,
+ .dl_xfer_cb = qaic_ssr_mhi_dl_xfer_cb,
+ .driver = {
+ .name = "qaic_ssr",
+ },
+};
+
+int qaic_ssr_init(struct qaic_device *qdev, struct drm_device *drm)
+{
+ struct ssr_crashdump *ssr_crash;
+
+ qdev->ssr_dbc = QAIC_SSR_DBC_SENTINEL;
+
+ /*
+ * Device requests only one SSR at a time. So allocating only one
+ * buffer to download crashdump is good enough.
+ */
+ ssr_crash = drmm_kzalloc(drm, SSR_MHI_BUF_SIZE, GFP_KERNEL);
+ if (!ssr_crash)
+ return -ENOMEM;
+
+ ssr_crash->qdev = qdev;
+ INIT_WORK(&ssr_crash->work, ssr_dump_worker);
+ qdev->ssr_mhi_buf = ssr_crash;
+
+ return 0;
+}
+
+int qaic_ssr_register(void)
+{
+ return mhi_driver_register(&qaic_ssr_mhi_driver);
+}
+
+void qaic_ssr_unregister(void)
+{
+ mhi_driver_unregister(&qaic_ssr_mhi_driver);
+}
diff --git a/drivers/accel/qaic/qaic_ssr.h b/drivers/accel/qaic/qaic_ssr.h
new file mode 100644
index 000000000000..97ccff305750
--- /dev/null
+++ b/drivers/accel/qaic/qaic_ssr.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) 2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021, 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __QAIC_SSR_H__
+#define __QAIC_SSR_H__
+
+struct drm_device;
+struct qaic_device;
+
+int qaic_ssr_register(void);
+void qaic_ssr_unregister(void);
+void qaic_clean_up_ssr(struct qaic_device *qdev);
+int qaic_ssr_init(struct qaic_device *qdev, struct drm_device *drm);
+#endif /* __QAIC_SSR_H__ */
diff --git a/drivers/accel/qaic/qaic_sysfs.c b/drivers/accel/qaic/qaic_sysfs.c
new file mode 100644
index 000000000000..e0afb0ffb589
--- /dev/null
+++ b/drivers/accel/qaic/qaic_sysfs.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Copyright (c) 2020-2025, The Linux Foundation. All rights reserved. */
+
+#include <drm/drm_file.h>
+#include <drm/drm_managed.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/mutex.h>
+#include <linux/sysfs.h>
+
+#include "qaic.h"
+
+#define NAME_LEN 14
+
+struct dbc_attribute {
+ struct device_attribute dev_attr;
+ u32 dbc_id;
+ char name[NAME_LEN];
+};
+
+static ssize_t dbc_state_show(struct device *dev, struct device_attribute *a, char *buf)
+{
+ struct dbc_attribute *dbc_attr = container_of(a, struct dbc_attribute, dev_attr);
+ struct drm_minor *minor = dev_get_drvdata(dev);
+ struct qaic_device *qdev;
+
+ qdev = to_qaic_device(minor->dev);
+ return sysfs_emit(buf, "%d\n", qdev->dbc[dbc_attr->dbc_id].state);
+}
+
+void set_dbc_state(struct qaic_device *qdev, u32 dbc_id, unsigned int state)
+{
+ struct device *kdev = to_accel_kdev(qdev->qddev);
+ char *envp[3] = {};
+ char state_str[16];
+ char id_str[12];
+
+ envp[0] = id_str;
+ envp[1] = state_str;
+
+ if (state >= DBC_STATE_MAX)
+ return;
+ if (dbc_id >= qdev->num_dbc)
+ return;
+ if (state == qdev->dbc[dbc_id].state)
+ return;
+
+ scnprintf(id_str, ARRAY_SIZE(id_str), "DBC_ID=%d", dbc_id);
+ scnprintf(state_str, ARRAY_SIZE(state_str), "DBC_STATE=%d", state);
+
+ qdev->dbc[dbc_id].state = state;
+ kobject_uevent_env(&kdev->kobj, KOBJ_CHANGE, envp);
+}
+
+int qaic_sysfs_init(struct qaic_drm_device *qddev)
+{
+ struct device *kdev = to_accel_kdev(qddev);
+ struct drm_device *drm = to_drm(qddev);
+ u32 num_dbc = qddev->qdev->num_dbc;
+ struct dbc_attribute *dbc_attrs;
+ int i, ret;
+
+ dbc_attrs = drmm_kcalloc(drm, num_dbc, sizeof(*dbc_attrs), GFP_KERNEL);
+ if (!dbc_attrs)
+ return -ENOMEM;
+
+ for (i = 0; i < num_dbc; ++i) {
+ struct dbc_attribute *dbc_attr = &dbc_attrs[i];
+
+ sysfs_attr_init(&dbc_attr->dev_attr.attr);
+ dbc_attr->dbc_id = i;
+ scnprintf(dbc_attr->name, NAME_LEN, "dbc%d_state", i);
+ dbc_attr->dev_attr.attr.name = dbc_attr->name;
+ dbc_attr->dev_attr.attr.mode = 0444;
+ dbc_attr->dev_attr.show = dbc_state_show;
+ ret = sysfs_create_file(&kdev->kobj, &dbc_attr->dev_attr.attr);
+ if (ret) {
+ int j;
+
+ for (j = 0; j < i; ++j) {
+ dbc_attr = &dbc_attrs[j];
+ sysfs_remove_file(&kdev->kobj, &dbc_attr->dev_attr.attr);
+ }
+ drmm_kfree(drm, dbc_attrs);
+ return ret;
+ }
+ }
+
+ qddev->sysfs_attrs = dbc_attrs;
+ return 0;
+}
+
+void qaic_sysfs_remove(struct qaic_drm_device *qddev)
+{
+ struct dbc_attribute *dbc_attrs = qddev->sysfs_attrs;
+ struct device *kdev = to_accel_kdev(qddev);
+ u32 num_dbc = qddev->qdev->num_dbc;
+ int i;
+
+ if (!dbc_attrs)
+ return;
+
+ qddev->sysfs_attrs = NULL;
+ for (i = 0; i < num_dbc; ++i)
+ sysfs_remove_file(&kdev->kobj, &dbc_attrs[i].dev_attr.attr);
+ drmm_kfree(to_drm(qddev), dbc_attrs);
+}
diff --git a/drivers/accel/qaic/qaic_timesync.c b/drivers/accel/qaic/qaic_timesync.c
index 3fac540f8e03..8af2475f4f36 100644
--- a/drivers/accel/qaic/qaic_timesync.c
+++ b/drivers/accel/qaic/qaic_timesync.c
@@ -171,6 +171,13 @@ mod_timer:
dev_err(mqtsdev->dev, "%s mod_timer error:%d\n", __func__, ret);
}
+void qaic_mqts_ch_stop_timer(struct mhi_device *mhi_dev)
+{
+ struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev);
+
+ timer_delete_sync(&mqtsdev->timer);
+}
+
static int qaic_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
{
struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
@@ -206,6 +213,7 @@ static int qaic_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_devi
timer->expires = jiffies + msecs_to_jiffies(timesync_delay_ms);
add_timer(timer);
dev_set_drvdata(&mhi_dev->dev, mqtsdev);
+ qdev->mqts_ch = mhi_dev;
return 0;
@@ -221,6 +229,7 @@ static void qaic_timesync_remove(struct mhi_device *mhi_dev)
{
struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev);
+ mqtsdev->qdev->mqts_ch = NULL;
timer_delete_sync(&mqtsdev->timer);
mhi_unprepare_from_transfer(mqtsdev->mhi_dev);
kfree(mqtsdev->sync_msg);
diff --git a/drivers/accel/qaic/qaic_timesync.h b/drivers/accel/qaic/qaic_timesync.h
index 851b7acd43bb..77b9c2b55057 100644
--- a/drivers/accel/qaic/qaic_timesync.h
+++ b/drivers/accel/qaic/qaic_timesync.h
@@ -6,6 +6,9 @@
#ifndef __QAIC_TIMESYNC_H__
#define __QAIC_TIMESYNC_H__
+#include <linux/mhi.h>
+
int qaic_timesync_init(void);
void qaic_timesync_deinit(void);
+void qaic_mqts_ch_stop_timer(struct mhi_device *mhi_dev);
#endif /* __QAIC_TIMESYNC_H__ */
diff --git a/drivers/accel/qaic/sahara.c b/drivers/accel/qaic/sahara.c
index 3ebcc1f7ff58..fd3c3b2d1fd3 100644
--- a/drivers/accel/qaic/sahara.c
+++ b/drivers/accel/qaic/sahara.c
@@ -159,6 +159,7 @@ struct sahara_context {
struct sahara_packet *rx;
struct work_struct fw_work;
struct work_struct dump_work;
+ struct work_struct read_data_work;
struct mhi_device *mhi_dev;
const char * const *image_table;
u32 table_size;
@@ -174,7 +175,10 @@ struct sahara_context {
u64 dump_image_offset;
void *mem_dump_freespace;
u64 dump_images_left;
+ u32 read_data_offset;
+ u32 read_data_length;
bool is_mem_dump_mode;
+ bool non_streaming;
};
static const char * const aic100_image_table[] = {
@@ -194,6 +198,7 @@ static const char * const aic200_image_table[] = {
[23] = "qcom/aic200/aop.mbn",
[32] = "qcom/aic200/tz.mbn",
[33] = "qcom/aic200/hypvm.mbn",
+ [38] = "qcom/aic200/xbl_config.elf",
[39] = "qcom/aic200/aic200_abl.elf",
[40] = "qcom/aic200/apdp.mbn",
[41] = "qcom/aic200/devcfg.mbn",
@@ -202,6 +207,7 @@ static const char * const aic200_image_table[] = {
[49] = "qcom/aic200/shrm.elf",
[50] = "qcom/aic200/cpucp.elf",
[51] = "qcom/aic200/aop_devcfg.mbn",
+ [54] = "qcom/aic200/qupv3fw.elf",
[57] = "qcom/aic200/cpucp_dtbs.elf",
[62] = "qcom/aic200/uefi_dtbs.elf",
[63] = "qcom/aic200/xbl_ac_config.mbn",
@@ -213,9 +219,15 @@ static const char * const aic200_image_table[] = {
[69] = "qcom/aic200/dcd.mbn",
[73] = "qcom/aic200/gearvm.mbn",
[74] = "qcom/aic200/sti.bin",
- [75] = "qcom/aic200/pvs.bin",
+ [76] = "qcom/aic200/tz_qti_config.mbn",
+ [78] = "qcom/aic200/pvs.bin",
};
+static bool is_streaming(struct sahara_context *context)
+{
+ return !context->non_streaming;
+}
+
static int sahara_find_image(struct sahara_context *context, u32 image_id)
{
int ret;
@@ -265,6 +277,8 @@ static void sahara_send_reset(struct sahara_context *context)
int ret;
context->is_mem_dump_mode = false;
+ context->read_data_offset = 0;
+ context->read_data_length = 0;
context->tx[0]->cmd = cpu_to_le32(SAHARA_RESET_CMD);
context->tx[0]->length = cpu_to_le32(SAHARA_RESET_LENGTH);
@@ -319,9 +333,39 @@ static void sahara_hello(struct sahara_context *context)
dev_err(&context->mhi_dev->dev, "Unable to send hello response %d\n", ret);
}
+static int read_data_helper(struct sahara_context *context, int buf_index)
+{
+ enum mhi_flags mhi_flag;
+ u32 pkt_data_len;
+ int ret;
+
+ pkt_data_len = min(context->read_data_length, SAHARA_PACKET_MAX_SIZE);
+
+ memcpy(context->tx[buf_index],
+ &context->firmware->data[context->read_data_offset],
+ pkt_data_len);
+
+ context->read_data_offset += pkt_data_len;
+ context->read_data_length -= pkt_data_len;
+
+ if (is_streaming(context) || !context->read_data_length)
+ mhi_flag = MHI_EOT;
+ else
+ mhi_flag = MHI_CHAIN;
+
+ ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE,
+ context->tx[buf_index], pkt_data_len, mhi_flag);
+ if (ret) {
+ dev_err(&context->mhi_dev->dev, "Unable to send read_data response %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
static void sahara_read_data(struct sahara_context *context)
{
- u32 image_id, data_offset, data_len, pkt_data_len;
+ u32 image_id, data_offset, data_len;
int ret;
int i;
@@ -357,7 +401,7 @@ static void sahara_read_data(struct sahara_context *context)
* and is not needed here on error.
*/
- if (data_len > SAHARA_TRANSFER_MAX_SIZE) {
+ if (context->non_streaming && data_len > SAHARA_TRANSFER_MAX_SIZE) {
dev_err(&context->mhi_dev->dev, "Malformed read_data packet - data len %d exceeds max xfer size %d\n",
data_len, SAHARA_TRANSFER_MAX_SIZE);
sahara_send_reset(context);
@@ -378,22 +422,18 @@ static void sahara_read_data(struct sahara_context *context)
return;
}
- for (i = 0; i < SAHARA_NUM_TX_BUF && data_len; ++i) {
- pkt_data_len = min(data_len, SAHARA_PACKET_MAX_SIZE);
-
- memcpy(context->tx[i], &context->firmware->data[data_offset], pkt_data_len);
+ context->read_data_offset = data_offset;
+ context->read_data_length = data_len;
- data_offset += pkt_data_len;
- data_len -= pkt_data_len;
+ if (is_streaming(context)) {
+ schedule_work(&context->read_data_work);
+ return;
+ }
- ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE,
- context->tx[i], pkt_data_len,
- !data_len ? MHI_EOT : MHI_CHAIN);
- if (ret) {
- dev_err(&context->mhi_dev->dev, "Unable to send read_data response %d\n",
- ret);
- return;
- }
+ for (i = 0; i < SAHARA_NUM_TX_BUF && context->read_data_length; ++i) {
+ ret = read_data_helper(context, i);
+ if (ret)
+ break;
}
}
@@ -538,6 +578,7 @@ static void sahara_parse_dump_table(struct sahara_context *context)
struct sahara_memory_dump_meta_v1 *dump_meta;
u64 table_nents;
u64 dump_length;
+ u64 mul_bytes;
int ret;
u64 i;
@@ -551,8 +592,9 @@ static void sahara_parse_dump_table(struct sahara_context *context)
dev_table[i].description[SAHARA_TABLE_ENTRY_STR_LEN - 1] = 0;
dev_table[i].filename[SAHARA_TABLE_ENTRY_STR_LEN - 1] = 0;
- dump_length = size_add(dump_length, le64_to_cpu(dev_table[i].length));
- if (dump_length == SIZE_MAX) {
+ if (check_add_overflow(dump_length,
+ le64_to_cpu(dev_table[i].length),
+ &dump_length)) {
/* Discard the dump */
sahara_send_reset(context);
return;
@@ -568,14 +610,17 @@ static void sahara_parse_dump_table(struct sahara_context *context)
dev_table[i].filename);
}
- dump_length = size_add(dump_length, sizeof(*dump_meta));
- if (dump_length == SIZE_MAX) {
+ if (check_add_overflow(dump_length, (u64)sizeof(*dump_meta), &dump_length)) {
/* Discard the dump */
sahara_send_reset(context);
return;
}
- dump_length = size_add(dump_length, size_mul(sizeof(*image_out_table), table_nents));
- if (dump_length == SIZE_MAX) {
+ if (check_mul_overflow((u64)sizeof(*image_out_table), table_nents, &mul_bytes)) {
+ /* Discard the dump */
+ sahara_send_reset(context);
+ return;
+ }
+ if (check_add_overflow(dump_length, mul_bytes, &dump_length)) {
/* Discard the dump */
sahara_send_reset(context);
return;
@@ -615,7 +660,7 @@ static void sahara_parse_dump_table(struct sahara_context *context)
/* Request the first chunk of the first image */
context->dump_image = &image_out_table[0];
- dump_length = min(context->dump_image->length, SAHARA_READ_MAX_SIZE);
+ dump_length = min_t(u64, context->dump_image->length, SAHARA_READ_MAX_SIZE);
/* Avoid requesting EOI sized data so that we can identify errors */
if (dump_length == SAHARA_END_OF_IMAGE_LENGTH)
dump_length = SAHARA_END_OF_IMAGE_LENGTH / 2;
@@ -663,7 +708,7 @@ static void sahara_parse_dump_image(struct sahara_context *context)
/* Get next image chunk */
dump_length = context->dump_image->length - context->dump_image_offset;
- dump_length = min(dump_length, SAHARA_READ_MAX_SIZE);
+ dump_length = min_t(u64, dump_length, SAHARA_READ_MAX_SIZE);
/* Avoid requesting EOI sized data so that we can identify errors */
if (dump_length == SAHARA_END_OF_IMAGE_LENGTH)
dump_length = SAHARA_END_OF_IMAGE_LENGTH / 2;
@@ -742,6 +787,13 @@ error:
sahara_send_reset(context);
}
+static void sahara_read_data_processing(struct work_struct *work)
+{
+ struct sahara_context *context = container_of(work, struct sahara_context, read_data_work);
+
+ read_data_helper(context, 0);
+}
+
static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
{
struct sahara_context *context;
@@ -756,34 +808,56 @@ static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_
if (!context->rx)
return -ENOMEM;
+ if (!strcmp(mhi_dev->mhi_cntrl->name, "AIC200")) {
+ context->image_table = aic200_image_table;
+ context->table_size = ARRAY_SIZE(aic200_image_table);
+ } else {
+ context->image_table = aic100_image_table;
+ context->table_size = ARRAY_SIZE(aic100_image_table);
+ context->non_streaming = true;
+ }
+
/*
- * AIC100 defines SAHARA_TRANSFER_MAX_SIZE as the largest value it
- * will request for READ_DATA. This is larger than
- * SAHARA_PACKET_MAX_SIZE, and we need 9x SAHARA_PACKET_MAX_SIZE to
- * cover SAHARA_TRANSFER_MAX_SIZE. When the remote side issues a
- * READ_DATA, it requires a transfer of the exact size requested. We
- * can use MHI_CHAIN to link multiple buffers into a single transfer
- * but the remote side will not consume the buffers until it sees an
- * EOT, thus we need to allocate enough buffers to put in the tx fifo
- * to cover an entire READ_DATA request of the max size.
+ * There are two firmware implementations for READ_DATA handling.
+ * The older "SBL" implementation defines a Sahara transfer size, and
+ * expects that the response is a single transport transfer. If the
+ * FW wants to transfer a file that is larger than the transfer size,
+ * the FW will issue multiple READ_DATA commands. For this
+ * implementation, we need to allocate enough buffers to contain the
+ * entire Sahara transfer size.
+ *
+ * The newer "XBL" implementation does not define a maximum transfer
+ * size and instead expects the data to be streamed over using the
+ * transport level MTU. The FW will issue a single READ_DATA command
+ * of whatever size, and consume multiple transport level transfers
+ * until the expected amount of data is consumed. For this
+ * implementation we only need a single buffer of the transport MTU
+ * but we'll need to be able to use it multiple times for a single
+ * READ_DATA request.
+ *
+ * AIC100 is the SBL implementation and defines SAHARA_TRANSFER_MAX_SIZE
+ * and we need 9x SAHARA_PACKET_MAX_SIZE to cover that. We can use
+ * MHI_CHAIN to link multiple buffers into a single transfer but the
+ * remote side will not consume the buffers until it sees an EOT, thus
+ * we need to allocate enough buffers to put in the tx fifo to cover an
+ * entire READ_DATA request of the max size.
+ *
+ * AIC200 is the XBL implementation, and so a single buffer will work.
*/
for (i = 0; i < SAHARA_NUM_TX_BUF; ++i) {
- context->tx[i] = devm_kzalloc(&mhi_dev->dev, SAHARA_PACKET_MAX_SIZE, GFP_KERNEL);
+ context->tx[i] = devm_kzalloc(&mhi_dev->dev,
+ SAHARA_PACKET_MAX_SIZE,
+ GFP_KERNEL);
if (!context->tx[i])
return -ENOMEM;
+ if (is_streaming(context))
+ break;
}
context->mhi_dev = mhi_dev;
INIT_WORK(&context->fw_work, sahara_processing);
INIT_WORK(&context->dump_work, sahara_dump_processing);
-
- if (!strcmp(mhi_dev->mhi_cntrl->name, "AIC200")) {
- context->image_table = aic200_image_table;
- context->table_size = ARRAY_SIZE(aic200_image_table);
- } else {
- context->image_table = aic100_image_table;
- context->table_size = ARRAY_SIZE(aic100_image_table);
- }
+ INIT_WORK(&context->read_data_work, sahara_read_data_processing);
context->active_image_id = SAHARA_IMAGE_ID_NONE;
dev_set_drvdata(&mhi_dev->dev, context);
@@ -814,6 +888,10 @@ static void sahara_mhi_remove(struct mhi_device *mhi_dev)
static void sahara_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
{
+ struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev);
+
+ if (!mhi_result->transaction_status && context->read_data_length && is_streaming(context))
+ schedule_work(&context->read_data_work);
}
static void sahara_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
diff --git a/drivers/accel/rocket/rocket_gem.c b/drivers/accel/rocket/rocket_gem.c
index 0551e11cc184..624c4ecf5a34 100644
--- a/drivers/accel/rocket/rocket_gem.c
+++ b/drivers/accel/rocket/rocket_gem.c
@@ -2,6 +2,7 @@
/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
#include <drm/drm_device.h>
+#include <drm/drm_print.h>
#include <drm/drm_utils.h>
#include <drm/rocket_accel.h>
#include <linux/dma-mapping.h>