summaryrefslogtreecommitdiff
path: root/drivers/accel/amdxdna
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel/amdxdna')
-rw-r--r--drivers/accel/amdxdna/Makefile1
-rw-r--r--drivers/accel/amdxdna/TODO1
-rw-r--r--drivers/accel/amdxdna/aie2_ctx.c195
-rw-r--r--drivers/accel/amdxdna/aie2_error.c95
-rw-r--r--drivers/accel/amdxdna/aie2_message.c647
-rw-r--r--drivers/accel/amdxdna/aie2_msg_priv.h88
-rw-r--r--drivers/accel/amdxdna/aie2_pci.c269
-rw-r--r--drivers/accel/amdxdna/aie2_pci.h54
-rw-r--r--drivers/accel/amdxdna/aie2_smu.c49
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.c104
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.h45
-rw-r--r--drivers/accel/amdxdna/amdxdna_error.h59
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.c51
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.h6
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox.c14
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox_helper.h6
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.c63
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.h3
-rw-r--r--drivers/accel/amdxdna/amdxdna_pm.c94
-rw-r--r--drivers/accel/amdxdna/amdxdna_pm.h18
-rw-r--r--drivers/accel/amdxdna/npu1_regs.c8
-rw-r--r--drivers/accel/amdxdna/npu2_regs.c2
-rw-r--r--drivers/accel/amdxdna/npu4_regs.c12
-rw-r--r--drivers/accel/amdxdna/npu5_regs.c2
-rw-r--r--drivers/accel/amdxdna/npu6_regs.c2
25 files changed, 1483 insertions, 405 deletions
diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
index 6797dac65efa..6344aaf523fa 100644
--- a/drivers/accel/amdxdna/Makefile
+++ b/drivers/accel/amdxdna/Makefile
@@ -14,6 +14,7 @@ amdxdna-y := \
amdxdna_mailbox.o \
amdxdna_mailbox_helper.o \
amdxdna_pci_drv.o \
+ amdxdna_pm.o \
amdxdna_sysfs.o \
amdxdna_ubuf.o \
npu1_regs.o \
diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO
index ad8ac6e315b6..0e4bbebeaedf 100644
--- a/drivers/accel/amdxdna/TODO
+++ b/drivers/accel/amdxdna/TODO
@@ -1,2 +1 @@
- Add debugfs support
-- Add debug BO support
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index e9f9b1fa5dc1..42d876a427c5 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -21,6 +21,7 @@
#include "amdxdna_gem.h"
#include "amdxdna_mailbox.h"
#include "amdxdna_pci_drv.h"
+#include "amdxdna_pm.h"
static bool force_cmdlist;
module_param(force_cmdlist, bool, 0600);
@@ -88,7 +89,7 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw
goto out;
}
- ret = aie2_config_cu(hwctx);
+ ret = aie2_config_cu(hwctx, NULL);
if (ret) {
XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
goto out;
@@ -167,14 +168,11 @@ static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg)
int aie2_hwctx_resume(struct amdxdna_client *client)
{
- struct amdxdna_dev *xdna = client->xdna;
-
/*
* The resume path cannot guarantee that mailbox channel can be
* regenerated. If this happen, when submit message to this
* mailbox channel, error will return.
*/
- drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb);
}
@@ -184,12 +182,13 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
struct dma_fence *fence = job->fence;
trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
+
+ amdxdna_pm_suspend_put(job->hwctx->client->xdna);
job->hwctx->priv->completed++;
dma_fence_signal(fence);
up(&job->hwctx->priv->job_sem);
job->job_done = true;
- dma_fence_put(fence);
mmput_async(job->mm);
aie2_job_put(job);
}
@@ -204,10 +203,13 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
cmd_abo = job->cmd_bo;
- if (unlikely(!data))
+ if (unlikely(job->job_timeout)) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
+ ret = -EINVAL;
goto out;
+ }
- if (unlikely(size != sizeof(u32))) {
+ if (unlikely(!data) || unlikely(size != sizeof(u32))) {
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
ret = -EINVAL;
goto out;
@@ -226,11 +228,10 @@ out:
}
static int
-aie2_sched_nocmd_resp_handler(void *handle, void __iomem *data, size_t size)
+aie2_sched_drvcmd_resp_handler(void *handle, void __iomem *data, size_t size)
{
struct amdxdna_sched_job *job = handle;
int ret = 0;
- u32 status;
if (unlikely(!data))
goto out;
@@ -240,8 +241,7 @@ aie2_sched_nocmd_resp_handler(void *handle, void __iomem *data, size_t size)
goto out;
}
- status = readl(data);
- XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
+ job->drv_cmd->result = readl(data);
out:
aie2_sched_notify(job);
@@ -260,6 +260,13 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
int ret = 0;
cmd_abo = job->cmd_bo;
+
+ if (unlikely(job->job_timeout)) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
+ ret = -EINVAL;
+ goto out;
+ }
+
if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
ret = -EINVAL;
@@ -314,8 +321,18 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
kref_get(&job->refcnt);
fence = dma_fence_get(job->fence);
- if (unlikely(!cmd_abo)) {
- ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler);
+ if (job->drv_cmd) {
+ switch (job->drv_cmd->opcode) {
+ case SYNC_DEBUG_BO:
+ ret = aie2_sync_bo(hwctx, job, aie2_sched_drvcmd_resp_handler);
+ break;
+ case ATTACH_DEBUG_BO:
+ ret = aie2_config_debug_bo(hwctx, job, aie2_sched_drvcmd_resp_handler);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
goto out;
}
@@ -362,6 +379,7 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job)
xdna = hwctx->client->xdna;
trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
+ job->job_timeout = true;
mutex_lock(&xdna->dev_lock);
aie2_hwctx_stop(xdna, hwctx, sched_job);
@@ -531,13 +549,12 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
.num_rqs = DRM_SCHED_PRIORITY_COUNT,
.credit_limit = HWCTX_MAX_CMDS,
.timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
- .name = hwctx->name,
+ .name = "amdxdna_js",
.dev = xdna->ddev.dev,
};
struct drm_gpu_scheduler *sched;
struct amdxdna_hwctx_priv *priv;
struct amdxdna_gem_obj *heap;
- struct amdxdna_dev_hdl *ndev;
int i, ret;
priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
@@ -610,10 +627,14 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
goto free_entity;
}
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto free_col_list;
+
ret = aie2_alloc_resource(hwctx);
if (ret) {
XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
- goto free_col_list;
+ goto suspend_put;
}
ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
@@ -628,10 +649,9 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
goto release_resource;
}
+ amdxdna_pm_suspend_put(xdna);
hwctx->status = HWCTX_STAT_INIT;
- ndev = xdna->dev_handle;
- ndev->hwctx_num++;
init_waitqueue_head(&priv->job_free_wq);
XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
@@ -640,6 +660,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
release_resource:
aie2_release_resource(hwctx);
+suspend_put:
+ amdxdna_pm_suspend_put(xdna);
free_col_list:
kfree(hwctx->col_list);
free_entity:
@@ -662,26 +684,25 @@ free_priv:
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
{
- struct amdxdna_dev_hdl *ndev;
struct amdxdna_dev *xdna;
int idx;
xdna = hwctx->client->xdna;
- ndev = xdna->dev_handle;
- ndev->hwctx_num--;
XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
- drm_sched_entity_destroy(&hwctx->priv->entity);
-
aie2_hwctx_wait_for_idle(hwctx);
/* Request fw to destroy hwctx and cancel the rest pending requests */
aie2_release_resource(hwctx);
+ mutex_unlock(&xdna->dev_lock);
+ drm_sched_entity_destroy(&hwctx->priv->entity);
+
/* Wait for all submitted jobs to be completed or canceled */
wait_event(hwctx->priv->job_free_wq,
atomic64_read(&hwctx->job_submit_cnt) ==
atomic64_read(&hwctx->job_free_cnt));
+ mutex_lock(&xdna->dev_lock);
drm_sched_fini(&hwctx->priv->sched);
aie2_ctx_syncobj_destroy(hwctx);
@@ -697,6 +718,14 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
kfree(hwctx->cus);
}
+static int aie2_config_cu_resp_handler(void *handle, void __iomem *data, size_t size)
+{
+ struct amdxdna_hwctx *hwctx = handle;
+
+ amdxdna_pm_suspend_put(hwctx->client->xdna);
+ return 0;
+}
+
static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
{
struct amdxdna_hwctx_param_config_cu *config = buf;
@@ -728,10 +757,14 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
if (!hwctx->cus)
return -ENOMEM;
- ret = aie2_config_cu(hwctx);
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto free_cus;
+
+ ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler);
if (ret) {
XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
- goto free_cus;
+ goto pm_suspend_put;
}
wmb(); /* To avoid locking in command submit when check status */
@@ -739,12 +772,82 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
return 0;
+pm_suspend_put:
+ amdxdna_pm_suspend_put(xdna);
free_cus:
kfree(hwctx->cus);
hwctx->cus = NULL;
return ret;
}
+static void aie2_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq)
+{
+ struct dma_fence *out_fence = aie2_cmd_get_out_fence(hwctx, seq);
+
+ if (!out_fence) {
+ XDNA_ERR(hwctx->client->xdna, "Failed to get fence");
+ return;
+ }
+
+ dma_fence_wait_timeout(out_fence, false, MAX_SCHEDULE_TIMEOUT);
+ dma_fence_put(out_fence);
+}
+
+static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx *hwctx, u32 bo_hdl,
+ bool attach)
+{
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_drv_cmd cmd = { 0 };
+ struct amdxdna_gem_obj *abo;
+ u64 seq;
+ int ret;
+
+ abo = amdxdna_gem_get_obj(client, bo_hdl, AMDXDNA_BO_DEV);
+ if (!abo) {
+ XDNA_ERR(xdna, "Get bo %d failed", bo_hdl);
+ return -EINVAL;
+ }
+
+ if (attach) {
+ if (abo->assigned_hwctx != AMDXDNA_INVALID_CTX_HANDLE) {
+ ret = -EBUSY;
+ goto put_obj;
+ }
+ cmd.opcode = ATTACH_DEBUG_BO;
+ } else {
+ if (abo->assigned_hwctx != hwctx->id) {
+ ret = -EINVAL;
+ goto put_obj;
+ }
+ cmd.opcode = DETACH_DEBUG_BO;
+ }
+
+ ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
+ &bo_hdl, 1, hwctx->id, &seq);
+ if (ret) {
+ XDNA_ERR(xdna, "Submit command failed");
+ goto put_obj;
+ }
+
+ aie2_cmd_wait(hwctx, seq);
+ if (cmd.result) {
+ XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
+ goto put_obj;
+ }
+
+ if (attach)
+ abo->assigned_hwctx = hwctx->id;
+ else
+ abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE;
+
+ XDNA_DBG(xdna, "Config debug BO %d to %s", bo_hdl, hwctx->name);
+
+put_obj:
+ amdxdna_gem_put_obj(abo);
+ return ret;
+}
+
int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
{
struct amdxdna_dev *xdna = hwctx->client->xdna;
@@ -754,14 +857,40 @@ int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *bu
case DRM_AMDXDNA_HWCTX_CONFIG_CU:
return aie2_hwctx_cu_config(hwctx, buf, size);
case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
+ return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, true);
case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
- return -EOPNOTSUPP;
+ return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, false);
default:
XDNA_DBG(xdna, "Not supported type %d", type);
return -EOPNOTSUPP;
}
}
+int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl)
+{
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_drv_cmd cmd = { 0 };
+ u64 seq;
+ int ret;
+
+ cmd.opcode = SYNC_DEBUG_BO;
+ ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
+ &debug_bo_hdl, 1, hwctx->id, &seq);
+ if (ret) {
+ XDNA_ERR(xdna, "Submit command failed");
+ return ret;
+ }
+
+ aie2_cmd_wait(hwctx, seq);
+ if (cmd.result) {
+ XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int aie2_populate_range(struct amdxdna_gem_obj *abo)
{
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
@@ -862,11 +991,15 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
goto free_chain;
}
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto cleanup_job;
+
retry:
ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
if (ret) {
XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
- goto cleanup_job;
+ goto suspend_put;
}
for (i = 0; i < job->bo_cnt; i++) {
@@ -874,7 +1007,7 @@ retry:
if (ret) {
XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
- goto cleanup_job;
+ goto suspend_put;
}
}
@@ -889,12 +1022,12 @@ retry:
msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
} else if (time_after(jiffies, timeout)) {
ret = -ETIME;
- goto cleanup_job;
+ goto suspend_put;
}
ret = aie2_populate_range(abo);
if (ret)
- goto cleanup_job;
+ goto suspend_put;
goto retry;
}
}
@@ -920,6 +1053,8 @@ retry:
return 0;
+suspend_put:
+ amdxdna_pm_suspend_put(xdna);
cleanup_job:
drm_sched_job_cleanup(&job->base);
free_chain:
diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c
index 5ee905632a39..d452008ec4f4 100644
--- a/drivers/accel/amdxdna/aie2_error.c
+++ b/drivers/accel/amdxdna/aie2_error.c
@@ -13,6 +13,7 @@
#include "aie2_msg_priv.h"
#include "aie2_pci.h"
+#include "amdxdna_error.h"
#include "amdxdna_mailbox.h"
#include "amdxdna_pci_drv.h"
@@ -46,6 +47,7 @@ enum aie_module_type {
AIE_MEM_MOD = 0,
AIE_CORE_MOD,
AIE_PL_MOD,
+ AIE_UNKNOWN_MOD,
};
enum aie_error_category {
@@ -143,6 +145,31 @@ static const struct aie_event_category aie_ml_shim_tile_event_cat[] = {
EVENT_CATEGORY(74U, AIE_ERROR_LOCK),
};
+static const enum amdxdna_error_num aie_cat_err_num_map[] = {
+ [AIE_ERROR_SATURATION] = AMDXDNA_ERROR_NUM_AIE_SATURATION,
+ [AIE_ERROR_FP] = AMDXDNA_ERROR_NUM_AIE_FP,
+ [AIE_ERROR_STREAM] = AMDXDNA_ERROR_NUM_AIE_STREAM,
+ [AIE_ERROR_ACCESS] = AMDXDNA_ERROR_NUM_AIE_ACCESS,
+ [AIE_ERROR_BUS] = AMDXDNA_ERROR_NUM_AIE_BUS,
+ [AIE_ERROR_INSTRUCTION] = AMDXDNA_ERROR_NUM_AIE_INSTRUCTION,
+ [AIE_ERROR_ECC] = AMDXDNA_ERROR_NUM_AIE_ECC,
+ [AIE_ERROR_LOCK] = AMDXDNA_ERROR_NUM_AIE_LOCK,
+ [AIE_ERROR_DMA] = AMDXDNA_ERROR_NUM_AIE_DMA,
+ [AIE_ERROR_MEM_PARITY] = AMDXDNA_ERROR_NUM_AIE_MEM_PARITY,
+ [AIE_ERROR_UNKNOWN] = AMDXDNA_ERROR_NUM_UNKNOWN,
+};
+
+static_assert(ARRAY_SIZE(aie_cat_err_num_map) == AIE_ERROR_UNKNOWN + 1);
+
+static const enum amdxdna_error_module aie_err_mod_map[] = {
+ [AIE_MEM_MOD] = AMDXDNA_ERROR_MODULE_AIE_MEMORY,
+ [AIE_CORE_MOD] = AMDXDNA_ERROR_MODULE_AIE_CORE,
+ [AIE_PL_MOD] = AMDXDNA_ERROR_MODULE_AIE_PL,
+ [AIE_UNKNOWN_MOD] = AMDXDNA_ERROR_MODULE_UNKNOWN,
+};
+
+static_assert(ARRAY_SIZE(aie_err_mod_map) == AIE_UNKNOWN_MOD + 1);
+
static enum aie_error_category
aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type)
{
@@ -176,12 +203,40 @@ aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type)
if (event_id != lut[i].event_id)
continue;
+ if (lut[i].category > AIE_ERROR_UNKNOWN)
+ return AIE_ERROR_UNKNOWN;
+
return lut[i].category;
}
return AIE_ERROR_UNKNOWN;
}
+static void aie2_update_last_async_error(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err)
+{
+ struct aie_error *errs = err_info;
+ enum amdxdna_error_module err_mod;
+ enum aie_error_category aie_err;
+ enum amdxdna_error_num err_num;
+ struct aie_error *last_err;
+
+ last_err = &errs[num_err - 1];
+ if (last_err->mod_type >= AIE_UNKNOWN_MOD) {
+ err_num = aie_cat_err_num_map[AIE_ERROR_UNKNOWN];
+ err_mod = aie_err_mod_map[AIE_UNKNOWN_MOD];
+ } else {
+ aie_err = aie_get_error_category(last_err->row,
+ last_err->event_id,
+ last_err->mod_type);
+ err_num = aie_cat_err_num_map[aie_err];
+ err_mod = aie_err_mod_map[last_err->mod_type];
+ }
+
+ ndev->last_async_err.err_code = AMDXDNA_ERROR_ENCODE(err_num, err_mod);
+ ndev->last_async_err.ts_us = ktime_to_us(ktime_get_real());
+ ndev->last_async_err.ex_err_code = AMDXDNA_EXTRA_ERR_ENCODE(last_err->row, last_err->col);
+}
+
static u32 aie2_error_backtrack(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err)
{
struct aie_error *errs = err_info;
@@ -264,29 +319,14 @@ static void aie2_error_worker(struct work_struct *err_work)
}
mutex_lock(&xdna->dev_lock);
+ aie2_update_last_async_error(e->ndev, info->payload, info->err_cnt);
+
/* Re-sent this event to firmware */
if (aie2_error_event_send(e))
XDNA_WARN(xdna, "Unable to register async event");
mutex_unlock(&xdna->dev_lock);
}
-int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev)
-{
- struct amdxdna_dev *xdna = ndev->xdna;
- struct async_event *e;
- int i, ret;
-
- drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
- for (i = 0; i < ndev->async_events->event_cnt; i++) {
- e = &ndev->async_events->event[i];
- ret = aie2_error_event_send(e);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev)
{
struct amdxdna_dev *xdna = ndev->xdna;
@@ -341,6 +381,10 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
e->size = ASYNC_BUF_SIZE;
e->resp.status = MAX_AIE2_STATUS_CODE;
INIT_WORK(&e->work, aie2_error_worker);
+
+ ret = aie2_error_event_send(e);
+ if (ret)
+ goto free_wq;
}
ndev->async_events = events;
@@ -349,6 +393,8 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
events->event_cnt, events->size);
return 0;
+free_wq:
+ destroy_workqueue(events->wq);
free_buf:
dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf,
events->addr, DMA_FROM_DEVICE);
@@ -356,3 +402,18 @@ free_events:
kfree(events);
return ret;
}
+
+int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev, struct amdxdna_drm_get_array *args)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ args->num_element = 1;
+ args->element_size = sizeof(ndev->last_async_err);
+ if (copy_to_user(u64_to_user_ptr(args->buffer),
+ &ndev->last_async_err, args->element_size))
+ return -EFAULT;
+
+ return 0;
+}
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 9caad083543d..d493bb1c3360 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -27,6 +27,8 @@
#define DECLARE_AIE2_MSG(name, op) \
DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE)
+#define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops)
+
static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
struct xdna_mailbox_msg *msg)
{
@@ -37,7 +39,7 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
if (!ndev->mgmt_chann)
return -ENODEV;
- drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ drm_WARN_ON(&xdna->ddev, xdna->rpm_on && !mutex_is_locked(&xdna->dev_lock));
ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
if (ret == -ETIME) {
xdna_mailbox_stop_channel(ndev->mgmt_chann);
@@ -45,7 +47,7 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
ndev->mgmt_chann = NULL;
}
- if (!ret && *hdl->data != AIE2_STATUS_SUCCESS) {
+ if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) {
XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
msg->opcode, *hdl->data);
ret = -EINVAL;
@@ -208,6 +210,14 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
hwctx->fw_ctx_id = resp.context_id;
WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id");
+ if (ndev->force_preempt_enabled) {
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPT, &hwctx->fw_ctx_id);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to enable force preempt %d", ret);
+ return ret;
+ }
+ }
+
cq_pair = &resp.cq_pair[0];
x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr);
x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr);
@@ -233,6 +243,7 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
ret = -EINVAL;
goto out_destroy_context;
}
+ ndev->hwctx_num++;
XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d",
hwctx->name, ret, resp.msix_id);
@@ -267,6 +278,7 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwc
hwctx->fw_ctx_id);
hwctx->priv->mbox_chann = NULL;
hwctx->fw_ctx_id = -1;
+ ndev->hwctx_num--;
return ret;
}
@@ -332,11 +344,6 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
goto fail;
}
- if (resp.status != AIE2_STATUS_SUCCESS) {
- XDNA_ERR(xdna, "Query NPU status failed, status 0x%x", resp.status);
- ret = -EINVAL;
- goto fail;
- }
XDNA_DBG(xdna, "Query NPU status completed");
if (size < resp.size) {
@@ -358,6 +365,55 @@ fail:
return ret;
}
+int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
+ char __user *buf, u32 size,
+ struct amdxdna_drm_query_telemetry_header *header)
+{
+ DECLARE_AIE2_MSG(get_telemetry, MSG_OP_GET_TELEMETRY);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ dma_addr_t dma_addr;
+ u8 *addr;
+ int ret;
+
+ if (header->type >= MAX_TELEMETRY_TYPE)
+ return -EINVAL;
+
+ addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr,
+ DMA_FROM_DEVICE, GFP_KERNEL);
+ if (!addr)
+ return -ENOMEM;
+
+ req.buf_addr = dma_addr;
+ req.buf_size = size;
+ req.type = header->type;
+
+ drm_clflush_virt_range(addr, size); /* device can access */
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret) {
+ XDNA_ERR(xdna, "Query telemetry failed, status %d", ret);
+ goto free_buf;
+ }
+
+ if (size < resp.size) {
+ ret = -EINVAL;
+ XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size);
+ goto free_buf;
+ }
+
+ if (copy_to_user(buf, addr, resp.size)) {
+ ret = -EFAULT;
+ XDNA_ERR(xdna, "Failed to copy telemetry to user space");
+ goto free_buf;
+ }
+
+ header->major = resp.major;
+ header->minor = resp.minor;
+
+free_buf:
+ dma_free_noncoherent(xdna->ddev.dev, size, addr, dma_addr, DMA_FROM_DEVICE);
+ return ret;
+}
+
int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
void *handle, int (*cb)(void*, void __iomem *, size_t))
{
@@ -377,15 +433,17 @@ int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr,
return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT);
}
-int aie2_config_cu(struct amdxdna_hwctx *hwctx)
+int aie2_config_cu(struct amdxdna_hwctx *hwctx,
+ int (*notify_cb)(void *, void __iomem *, size_t))
{
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
struct amdxdna_dev *xdna = hwctx->client->xdna;
u32 shift = xdna->dev_info->dev_mem_buf_shift;
- DECLARE_AIE2_MSG(config_cu, MSG_OP_CONFIG_CU);
+ struct config_cu_req req = { 0 };
+ struct xdna_mailbox_msg msg;
struct drm_gem_object *gobj;
struct amdxdna_gem_obj *abo;
- int ret, i;
+ int i;
if (!chann)
return -ENODEV;
@@ -423,191 +481,386 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx)
}
req.num_cus = hwctx->cus->num_cus;
- ret = xdna_send_msg_wait(xdna, chann, &msg);
- if (ret == -ETIME)
- aie2_destroy_context(xdna->dev_handle, hwctx);
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ msg.handle = hwctx;
+ msg.opcode = MSG_OP_CONFIG_CU;
+ msg.notify_cb = notify_cb;
+ return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+}
- if (resp.status == AIE2_STATUS_SUCCESS) {
- XDNA_DBG(xdna, "Configure %d CUs, ret %d", req.num_cus, ret);
- return 0;
- }
+static int aie2_init_exec_cu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
+ size_t *size, u32 *msg_op)
+{
+ struct execute_buffer_req *cu_req = req;
+ u32 cmd_len;
+ void *cmd;
- XDNA_ERR(xdna, "Command opcode 0x%x failed, status 0x%x ret %d",
- msg.opcode, resp.status, ret);
- return ret;
+ cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ if (cmd_len > sizeof(cu_req->payload))
+ return -EINVAL;
+
+ cu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (cu_req->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ memcpy(cu_req->payload, cmd, cmd_len);
+
+ *size = sizeof(*cu_req);
+ *msg_op = MSG_OP_EXECUTE_BUFFER_CF;
+ return 0;
}
-int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
- int (*notify_cb)(void *, void __iomem *, size_t))
+static int aie2_init_exec_dpu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
+ size_t *size, u32 *msg_op)
{
- struct mailbox_channel *chann = hwctx->priv->mbox_chann;
- struct amdxdna_dev *xdna = hwctx->client->xdna;
- struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
- union {
- struct execute_buffer_req ebuf;
- struct exec_dpu_req dpu;
- } req;
- struct xdna_mailbox_msg msg;
- u32 payload_len;
- void *payload;
- int cu_idx;
- int ret;
- u32 op;
+ struct exec_dpu_req *dpu_req = req;
+ struct amdxdna_cmd_start_npu *sn;
+ u32 cmd_len;
- if (!chann)
- return -ENODEV;
+ sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ if (cmd_len - sizeof(*sn) > sizeof(dpu_req->payload))
+ return -EINVAL;
- payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
- if (!payload) {
- XDNA_ERR(xdna, "Invalid command, cannot get payload");
+ dpu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (dpu_req->cu_idx == INVALID_CU_IDX)
return -EINVAL;
- }
- cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo);
- if (cu_idx < 0) {
- XDNA_DBG(xdna, "Invalid cu idx");
+ dpu_req->inst_buf_addr = sn->buffer;
+ dpu_req->inst_size = sn->buffer_size;
+ dpu_req->inst_prop_cnt = sn->prop_count;
+ memcpy(dpu_req->payload, sn->prop_args, cmd_len - sizeof(*sn));
+
+ *size = sizeof(*dpu_req);
+ *msg_op = MSG_OP_EXEC_DPU;
+ return 0;
+}
+
+static void aie2_init_exec_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt)
+{
+ struct cmd_chain_req *chain_req = req;
+
+ chain_req->buf_addr = slot_addr;
+ chain_req->buf_size = size;
+ chain_req->count = cmd_cnt;
+}
+
+static void aie2_init_npu_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt)
+{
+ struct cmd_chain_npu_req *npu_chain_req = req;
+
+ npu_chain_req->flags = 0;
+ npu_chain_req->reserved = 0;
+ npu_chain_req->buf_addr = slot_addr;
+ npu_chain_req->buf_size = size;
+ npu_chain_req->count = cmd_cnt;
+}
+
+static int
+aie2_cmdlist_fill_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_execbuf_cf *cf_slot = slot;
+ u32 cmd_len;
+ void *cmd;
+
+ cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ if (*size < sizeof(*cf_slot) + cmd_len)
return -EINVAL;
- }
- op = amdxdna_cmd_get_op(cmd_abo);
- switch (op) {
+ cf_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (cf_slot->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ cf_slot->arg_cnt = cmd_len / sizeof(u32);
+ memcpy(cf_slot->args, cmd, cmd_len);
+ /* Accurate slot size to hint firmware to do necessary copy */
+ *size = sizeof(*cf_slot) + cmd_len;
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_dpu *dpu_slot = slot;
+ struct amdxdna_cmd_start_npu *sn;
+ u32 cmd_len;
+ u32 arg_sz;
+
+ sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ arg_sz = cmd_len - sizeof(*sn);
+ if (cmd_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
+ return -EINVAL;
+
+ if (*size < sizeof(*dpu_slot) + arg_sz)
+ return -EINVAL;
+
+ dpu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (dpu_slot->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ dpu_slot->inst_buf_addr = sn->buffer;
+ dpu_slot->inst_size = sn->buffer_size;
+ dpu_slot->inst_prop_cnt = sn->prop_count;
+ dpu_slot->arg_cnt = arg_sz / sizeof(u32);
+ memcpy(dpu_slot->args, sn->prop_args, arg_sz);
+
+ /* Accurate slot size to hint firmware to do necessary copy */
+ *size = sizeof(*dpu_slot) + arg_sz;
+ return 0;
+}
+
+static int aie2_cmdlist_unsupp(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ return -EOPNOTSUPP;
+}
+
+static u32 aie2_get_chain_msg_op(u32 cmd_op)
+{
+ switch (cmd_op) {
case ERT_START_CU:
- if (unlikely(payload_len > sizeof(req.ebuf.payload)))
- XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len);
- req.ebuf.cu_idx = cu_idx;
- memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload));
- msg.send_size = sizeof(req.ebuf);
- msg.opcode = MSG_OP_EXECUTE_BUFFER_CF;
- break;
- case ERT_START_NPU: {
- struct amdxdna_cmd_start_npu *sn = payload;
-
- if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload)))
- XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len);
- req.dpu.inst_buf_addr = sn->buffer;
- req.dpu.inst_size = sn->buffer_size;
- req.dpu.inst_prop_cnt = sn->prop_count;
- req.dpu.cu_idx = cu_idx;
- memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload));
- msg.send_size = sizeof(req.dpu);
- msg.opcode = MSG_OP_EXEC_DPU;
+ return MSG_OP_CHAIN_EXEC_BUFFER_CF;
+ case ERT_START_NPU:
+ return MSG_OP_CHAIN_EXEC_DPU;
+ default:
break;
}
- default:
- XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op);
+
+ return MSG_OP_MAX_OPCODE;
+}
+
+static struct aie2_exec_msg_ops legacy_exec_message_ops = {
+ .init_cu_req = aie2_init_exec_cu_req,
+ .init_dpu_req = aie2_init_exec_dpu_req,
+ .init_chain_req = aie2_init_exec_chain_req,
+ .fill_cf_slot = aie2_cmdlist_fill_cf,
+ .fill_dpu_slot = aie2_cmdlist_fill_dpu,
+ .fill_preempt_slot = aie2_cmdlist_unsupp,
+ .fill_elf_slot = aie2_cmdlist_unsupp,
+ .get_chain_msg_op = aie2_get_chain_msg_op,
+};
+
+static int
+aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_npu *npu_slot = slot;
+ u32 cmd_len;
+ void *cmd;
+
+ cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ if (*size < sizeof(*npu_slot) + cmd_len)
return -EINVAL;
- }
- msg.handle = job;
- msg.notify_cb = notify_cb;
- msg.send_data = (u8 *)&req;
- print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
- 0x40, false);
- ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
- if (ret) {
- XDNA_ERR(xdna, "Send message failed");
- return ret;
- }
+ npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (npu_slot->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+ memset(npu_slot, 0, sizeof(*npu_slot));
+ npu_slot->type = EXEC_NPU_TYPE_NON_ELF;
+ npu_slot->arg_cnt = cmd_len / sizeof(u32);
+ memcpy(npu_slot->args, cmd, cmd_len);
+
+ *size = sizeof(*npu_slot) + cmd_len;
return 0;
}
static int
-aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
- struct amdxdna_gem_obj *abo, u32 *size)
+aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
- struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset;
- int cu_idx = amdxdna_cmd_get_cu_idx(abo);
- u32 payload_len;
- void *payload;
+ struct cmd_chain_slot_npu *npu_slot = slot;
+ struct amdxdna_cmd_start_npu *sn;
+ u32 cmd_len;
+ u32 arg_sz;
- if (cu_idx < 0)
+ sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ arg_sz = cmd_len - sizeof(*sn);
+ if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE)
return -EINVAL;
- payload = amdxdna_cmd_get_payload(abo, &payload_len);
- if (!payload)
+ if (*size < sizeof(*npu_slot) + arg_sz)
return -EINVAL;
- if (!slot_has_space(*buf, offset, payload_len))
- return -ENOSPC;
+ npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (npu_slot->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ memset(npu_slot, 0, sizeof(*npu_slot));
+ npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF;
+ npu_slot->inst_buf_addr = sn->buffer;
+ npu_slot->inst_size = sn->buffer_size;
+ npu_slot->inst_prop_cnt = sn->prop_count;
+ npu_slot->arg_cnt = arg_sz / sizeof(u32);
+ memcpy(npu_slot->args, sn->prop_args, arg_sz);
- buf->cu_idx = cu_idx;
- buf->arg_cnt = payload_len / sizeof(u32);
- memcpy(buf->args, payload, payload_len);
- /* Accurate buf size to hint firmware to do necessary copy */
- *size = sizeof(*buf) + payload_len;
+ *size = sizeof(*npu_slot) + arg_sz;
return 0;
}
static int
-aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
- struct amdxdna_gem_obj *abo, u32 *size)
+aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
- struct cmd_chain_slot_dpu *buf = cmd_buf + offset;
- int cu_idx = amdxdna_cmd_get_cu_idx(abo);
- struct amdxdna_cmd_start_npu *sn;
- u32 payload_len;
- void *payload;
+ struct cmd_chain_slot_npu *npu_slot = slot;
+ struct amdxdna_cmd_preempt_data *pd;
+ u32 cmd_len;
u32 arg_sz;
- if (cu_idx < 0)
+ pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ arg_sz = cmd_len - sizeof(*pd);
+ if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
return -EINVAL;
- payload = amdxdna_cmd_get_payload(abo, &payload_len);
- if (!payload)
+ if (*size < sizeof(*npu_slot) + arg_sz)
return -EINVAL;
- sn = payload;
- arg_sz = payload_len - sizeof(*sn);
- if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
+
+ npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (npu_slot->cu_idx == INVALID_CU_IDX)
return -EINVAL;
- if (!slot_has_space(*buf, offset, arg_sz))
- return -ENOSPC;
+ memset(npu_slot, 0, sizeof(*npu_slot));
+ npu_slot->type = EXEC_NPU_TYPE_PREEMPT;
+ npu_slot->inst_buf_addr = pd->inst_buf;
+ npu_slot->save_buf_addr = pd->save_buf;
+ npu_slot->restore_buf_addr = pd->restore_buf;
+ npu_slot->inst_size = pd->inst_size;
+ npu_slot->save_size = pd->save_size;
+ npu_slot->restore_size = pd->restore_size;
+ npu_slot->inst_prop_cnt = pd->inst_prop_cnt;
+ npu_slot->arg_cnt = arg_sz / sizeof(u32);
+ memcpy(npu_slot->args, pd->prop_args, arg_sz);
+
+ *size = sizeof(*npu_slot) + arg_sz;
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_npu *npu_slot = slot;
+ struct amdxdna_cmd_preempt_data *pd;
+ u32 cmd_len;
+ u32 arg_sz;
+
+ pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ arg_sz = cmd_len - sizeof(*pd);
+ if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
+ return -EINVAL;
- buf->inst_buf_addr = sn->buffer;
- buf->inst_size = sn->buffer_size;
- buf->inst_prop_cnt = sn->prop_count;
- buf->cu_idx = cu_idx;
- buf->arg_cnt = arg_sz / sizeof(u32);
- memcpy(buf->args, sn->prop_args, arg_sz);
+ if (*size < sizeof(*npu_slot) + arg_sz)
+ return -EINVAL;
- /* Accurate buf size to hint firmware to do necessary copy */
- *size = sizeof(*buf) + arg_sz;
+ memset(npu_slot, 0, sizeof(*npu_slot));
+ npu_slot->type = EXEC_NPU_TYPE_ELF;
+ npu_slot->inst_buf_addr = pd->inst_buf;
+ npu_slot->save_buf_addr = pd->save_buf;
+ npu_slot->restore_buf_addr = pd->restore_buf;
+ npu_slot->inst_size = pd->inst_size;
+ npu_slot->save_size = pd->save_size;
+ npu_slot->restore_size = pd->restore_size;
+ npu_slot->inst_prop_cnt = pd->inst_prop_cnt;
+ npu_slot->arg_cnt = 1;
+ npu_slot->args[0] = AIE2_EXEC_BUFFER_KERNEL_OP_TXN;
+
+ *size = struct_size(npu_slot, args, npu_slot->arg_cnt);
return 0;
}
-static int
-aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset,
- struct amdxdna_gem_obj *abo, u32 *size)
+static u32 aie2_get_npu_chain_msg_op(u32 cmd_op)
+{
+ return MSG_OP_CHAIN_EXEC_NPU;
+}
+
+static struct aie2_exec_msg_ops npu_exec_message_ops = {
+ .init_cu_req = aie2_init_exec_cu_req,
+ .init_dpu_req = aie2_init_exec_dpu_req,
+ .init_chain_req = aie2_init_npu_chain_req,
+ .fill_cf_slot = aie2_cmdlist_fill_npu_cf,
+ .fill_dpu_slot = aie2_cmdlist_fill_npu_dpu,
+ .fill_preempt_slot = aie2_cmdlist_fill_npu_preempt,
+ .fill_elf_slot = aie2_cmdlist_fill_npu_elf,
+ .get_chain_msg_op = aie2_get_npu_chain_msg_op,
+};
+
+static int aie2_init_exec_req(void *req, struct amdxdna_gem_obj *cmd_abo,
+ size_t *size, u32 *msg_op)
{
- u32 this_op = amdxdna_cmd_get_op(abo);
- void *cmd_buf = cmdbuf_abo->mem.kva;
+ struct amdxdna_dev *xdna = cmd_abo->client->xdna;
int ret;
+ u32 op;
- if (this_op != op) {
- ret = -EINVAL;
- goto done;
- }
+ op = amdxdna_cmd_get_op(cmd_abo);
switch (op) {
case ERT_START_CU:
- ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size);
+ ret = EXEC_MSG_OPS(xdna)->init_cu_req(cmd_abo, req, size, msg_op);
+ if (ret) {
+ XDNA_DBG(xdna, "Init CU req failed ret %d", ret);
+ return ret;
+ }
break;
case ERT_START_NPU:
- ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size);
+ ret = EXEC_MSG_OPS(xdna)->init_dpu_req(cmd_abo, req, size, msg_op);
+ if (ret) {
+ XDNA_DBG(xdna, "Init DPU req failed ret %d", ret);
+ return ret;
+ }
+
break;
default:
+ XDNA_ERR(xdna, "Unsupported op %d", op);
ret = -EOPNOTSUPP;
+ break;
}
-done:
- if (ret) {
- XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d",
- op, ret);
+ return ret;
+}
+
+static int
+aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo,
+ size_t *size, u32 *cmd_op)
+{
+ struct amdxdna_dev *xdna = cmd_abo->client->xdna;
+ int ret;
+ u32 op;
+
+ op = amdxdna_cmd_get_op(cmd_abo);
+ if (*cmd_op == ERT_INVALID_CMD)
+ *cmd_op = op;
+ else if (op != *cmd_op)
+ return -EINVAL;
+
+ switch (op) {
+ case ERT_START_CU:
+ ret = EXEC_MSG_OPS(xdna)->fill_cf_slot(cmd_abo, slot, size);
+ break;
+ case ERT_START_NPU:
+ ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size);
+ break;
+ case ERT_START_NPU_PREEMPT:
+ if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT))
+ return -EOPNOTSUPP;
+ ret = EXEC_MSG_OPS(xdna)->fill_preempt_slot(cmd_abo, slot, size);
+ break;
+ case ERT_START_NPU_PREEMPT_ELF:
+ if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT))
+ return -EOPNOTSUPP;
+ ret = EXEC_MSG_OPS(xdna)->fill_elf_slot(cmd_abo, slot, size);
+ break;
+ default:
+ XDNA_INFO(xdna, "Unsupported op %d", op);
+ ret = -EOPNOTSUPP;
+ break;
}
+
return ret;
}
+void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
+{
+ if (AIE2_FEATURE_ON(ndev, AIE2_NPU_COMMAND))
+ ndev->exec_msg_ops = &npu_exec_message_ops;
+ else
+ ndev->exec_msg_ops = &legacy_exec_message_ops;
+}
+
static inline struct amdxdna_gem_obj *
aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
{
@@ -616,29 +869,36 @@ aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
return job->hwctx->priv->cmd_buf[idx];
}
-static void
-aie2_cmdlist_prepare_request(struct cmd_chain_req *req,
- struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt)
+int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t))
{
- req->buf_addr = cmdbuf_abo->mem.dev_addr;
- req->buf_size = size;
- req->count = cnt;
- drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
- XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d",
- req->buf_addr, size, cnt);
-}
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct xdna_mailbox_msg msg;
+ union exec_req req;
+ int ret;
-static inline u32
-aie2_cmd_op_to_msg_op(u32 op)
-{
- switch (op) {
- case ERT_START_CU:
- return MSG_OP_CHAIN_EXEC_BUFFER_CF;
- case ERT_START_NPU:
- return MSG_OP_CHAIN_EXEC_DPU;
- default:
- return MSG_OP_MAX_OPCODE;
+ if (!chann)
+ return -ENODEV;
+
+ ret = aie2_init_exec_req(&req, cmd_abo, &msg.send_size, &msg.opcode);
+ if (ret)
+ return ret;
+
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
+ 0x40, false);
+
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(xdna, "Send message failed");
+ return ret;
}
+
+ return 0;
}
int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
@@ -649,12 +909,13 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
struct amdxdna_client *client = hwctx->client;
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct amdxdna_dev *xdna = client->xdna;
struct amdxdna_cmd_chain *payload;
struct xdna_mailbox_msg msg;
- struct cmd_chain_req req;
+ union exec_chain_req req;
u32 payload_len;
u32 offset = 0;
- u32 size;
+ size_t size;
int ret;
u32 op;
u32 i;
@@ -665,41 +926,42 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
payload_len < struct_size(payload, data, payload->command_count))
return -EINVAL;
+ op = ERT_INVALID_CMD;
for (i = 0; i < payload->command_count; i++) {
u32 boh = (u32)(payload->data[i]);
struct amdxdna_gem_obj *abo;
abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD);
if (!abo) {
- XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh);
+ XDNA_ERR(xdna, "Failed to find cmd BO %d", boh);
return -ENOENT;
}
- /* All sub-cmd should have same op, use the first one. */
- if (i == 0)
- op = amdxdna_cmd_get_op(abo);
-
- ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size);
+ size = cmdbuf_abo->mem.size - offset;
+ ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva + offset,
+ abo, &size, &op);
amdxdna_gem_put_obj(abo);
if (ret)
- return -EINVAL;
+ return ret;
offset += size;
}
+ msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
+ if (msg.opcode == MSG_OP_MAX_OPCODE)
+ return -EOPNOTSUPP;
/* The offset is the accumulated total size of the cmd buffer */
- aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count);
+ EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
+ offset, payload->command_count);
+ drm_clflush_virt_range(cmdbuf_abo->mem.kva, offset);
- msg.opcode = aie2_cmd_op_to_msg_op(op);
- if (msg.opcode == MSG_OP_MAX_OPCODE)
- return -EOPNOTSUPP;
msg.handle = job;
msg.notify_cb = notify_cb;
msg.send_data = (u8 *)&req;
msg.send_size = sizeof(req);
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
if (ret) {
- XDNA_ERR(hwctx->client->xdna, "Send message failed");
+ XDNA_ERR(xdna, "Send message failed");
return ret;
}
@@ -712,23 +974,27 @@ int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
{
struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
struct xdna_mailbox_msg msg;
- struct cmd_chain_req req;
- u32 size;
+ union exec_chain_req req;
+ u32 op = ERT_INVALID_CMD;
+ size_t size;
int ret;
- u32 op;
- op = amdxdna_cmd_get_op(cmd_abo);
- ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size);
+ size = cmdbuf_abo->mem.size;
+ ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva, cmd_abo, &size, &op);
if (ret)
return ret;
- aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1);
-
- msg.opcode = aie2_cmd_op_to_msg_op(op);
+ msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
if (msg.opcode == MSG_OP_MAX_OPCODE)
return -EOPNOTSUPP;
+
+ EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
+ size, 1);
+ drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
+
msg.handle = job;
msg.notify_cb = notify_cb;
msg.send_data = (u8 *)&req;
@@ -753,7 +1019,7 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
int ret = 0;
req.src_addr = 0;
- req.dst_addr = abo->mem.dev_addr - hwctx->client->dev_heap->mem.dev_addr;
+ req.dst_addr = amdxdna_dev_bo_offset(abo);
req.size = abo->mem.size;
/* Device to Host */
@@ -777,3 +1043,32 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
return 0;
}
+
+int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t))
+{
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]);
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct config_debug_bo_req req;
+ struct xdna_mailbox_msg msg;
+
+ if (job->drv_cmd->opcode == ATTACH_DEBUG_BO)
+ req.config = DEBUG_BO_REGISTER;
+ else
+ req.config = DEBUG_BO_UNREGISTER;
+
+ req.offset = amdxdna_dev_bo_offset(abo);
+ req.size = abo->mem.size;
+
+ XDNA_DBG(xdna, "offset 0x%llx size 0x%llx config %d",
+ req.offset, req.size, req.config);
+
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ msg.opcode = MSG_OP_CONFIG_DEBUG_BO;
+
+ return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+}
diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
index 6df9065b13f6..1c957a6298d3 100644
--- a/drivers/accel/amdxdna/aie2_msg_priv.h
+++ b/drivers/accel/amdxdna/aie2_msg_priv.h
@@ -9,7 +9,8 @@
enum aie2_msg_opcode {
MSG_OP_CREATE_CONTEXT = 0x2,
MSG_OP_DESTROY_CONTEXT = 0x3,
- MSG_OP_SYNC_BO = 0x7,
+ MSG_OP_GET_TELEMETRY = 0x4,
+ MSG_OP_SYNC_BO = 0x7,
MSG_OP_EXECUTE_BUFFER_CF = 0xC,
MSG_OP_QUERY_COL_STATUS = 0xD,
MSG_OP_QUERY_AIE_TILE_INFO = 0xE,
@@ -18,6 +19,8 @@ enum aie2_msg_opcode {
MSG_OP_CONFIG_CU = 0x11,
MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12,
MSG_OP_CHAIN_EXEC_DPU = 0x13,
+ MSG_OP_CONFIG_DEBUG_BO = 0x14,
+ MSG_OP_CHAIN_EXEC_NPU = 0x18,
MSG_OP_MAX_XRT_OPCODE,
MSG_OP_SUSPEND = 0x101,
MSG_OP_RESUME = 0x102,
@@ -135,6 +138,28 @@ struct destroy_ctx_resp {
enum aie2_msg_status status;
} __packed;
+enum telemetry_type {
+ TELEMETRY_TYPE_DISABLED,
+ TELEMETRY_TYPE_HEALTH,
+ TELEMETRY_TYPE_ERROR_INFO,
+ TELEMETRY_TYPE_PROFILING,
+ TELEMETRY_TYPE_DEBUG,
+ MAX_TELEMETRY_TYPE
+};
+
+struct get_telemetry_req {
+ enum telemetry_type type;
+ __u64 buf_addr;
+ __u32 buf_size;
+} __packed;
+
+struct get_telemetry_resp {
+ __u32 major;
+ __u32 minor;
+ __u32 size;
+ enum aie2_msg_status status;
+} __packed;
+
struct execute_buffer_req {
__u32 cu_idx;
__u32 payload[19];
@@ -148,6 +173,18 @@ struct exec_dpu_req {
__u32 payload[35];
} __packed;
+enum exec_npu_type {
+ EXEC_NPU_TYPE_NON_ELF = 0x1,
+ EXEC_NPU_TYPE_PARTIAL_ELF = 0x2,
+ EXEC_NPU_TYPE_PREEMPT = 0x3,
+ EXEC_NPU_TYPE_ELF = 0x4,
+};
+
+union exec_req {
+ struct execute_buffer_req ebuf;
+ struct exec_dpu_req dpu_req;
+};
+
struct execute_buffer_resp {
enum aie2_msg_status status;
} __packed;
@@ -319,9 +356,6 @@ struct async_event_msg_resp {
} __packed;
#define MAX_CHAIN_CMDBUF_SIZE SZ_4K
-#define slot_has_space(slot, offset, payload_size) \
- (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \
- sizeof(typeof(slot)))
struct cmd_chain_slot_execbuf_cf {
__u32 cu_idx;
@@ -339,12 +373,41 @@ struct cmd_chain_slot_dpu {
__u32 args[] __counted_by(arg_cnt);
};
+#define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32))
+#define AIE2_EXEC_BUFFER_KERNEL_OP_TXN 3
+struct cmd_chain_slot_npu {
+ enum exec_npu_type type;
+ u64 inst_buf_addr;
+ u64 save_buf_addr;
+ u64 restore_buf_addr;
+ u32 inst_size;
+ u32 save_size;
+ u32 restore_size;
+ u32 inst_prop_cnt;
+ u32 cu_idx;
+ u32 arg_cnt;
+ u32 args[] __counted_by(arg_cnt);
+} __packed;
+
struct cmd_chain_req {
__u64 buf_addr;
__u32 buf_size;
__u32 count;
} __packed;
+struct cmd_chain_npu_req {
+ u32 flags;
+ u32 reserved;
+ u64 buf_addr;
+ u32 buf_size;
+ u32 count;
+} __packed;
+
+union exec_chain_req {
+ struct cmd_chain_npu_req npu_req;
+ struct cmd_chain_req req;
+};
+
struct cmd_chain_resp {
enum aie2_msg_status status;
__u32 fail_cmd_idx;
@@ -365,4 +428,21 @@ struct sync_bo_req {
struct sync_bo_resp {
enum aie2_msg_status status;
} __packed;
+
+#define DEBUG_BO_UNREGISTER 0
+#define DEBUG_BO_REGISTER 1
+struct config_debug_bo_req {
+ __u64 offset;
+ __u64 size;
+ /*
+ * config operations.
+ * DEBUG_BO_REGISTER: Register debug buffer
+ * DEBUG_BO_UNREGISTER: Unregister debug buffer
+ */
+ __u32 config;
+} __packed;
+
+struct config_debug_bo_resp {
+ enum aie2_msg_status status;
+} __packed;
#endif /* _AIE2_MSG_PRIV_H_ */
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 87c425e3d2b9..ceef1c502e9e 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -25,6 +25,7 @@
#include "amdxdna_gem.h"
#include "amdxdna_mailbox.h"
#include "amdxdna_pci_drv.h"
+#include "amdxdna_pm.h"
static int aie2_max_col = XRS_MAX_COL;
module_param(aie2_max_col, uint, 0600);
@@ -54,6 +55,7 @@ struct mgmt_mbox_chann_info {
static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor)
{
+ const struct aie2_fw_feature_tbl *feature;
struct amdxdna_dev *xdna = ndev->xdna;
/*
@@ -77,6 +79,17 @@ static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 f
XDNA_ERR(xdna, "Firmware minor version smaller than supported");
return -EINVAL;
}
+
+ for (feature = ndev->priv->fw_feature_tbl; feature && feature->min_minor;
+ feature++) {
+ if (fw_minor < feature->min_minor)
+ continue;
+ if (feature->max_minor > 0 && fw_minor > feature->max_minor)
+ continue;
+
+ set_bit(feature->feature, &ndev->feature_mask);
+ }
+
return 0;
}
@@ -170,6 +183,10 @@ int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
if (cfg->category != category)
continue;
+ if (cfg->feature_mask &&
+ bitmap_subset(&cfg->feature_mask, &ndev->feature_mask, AIE2_FEATURE_MAX))
+ continue;
+
value = val ? *val : cfg->value;
ret = aie2_set_runtime_cfg(ndev, cfg->type, value);
if (ret) {
@@ -223,15 +240,6 @@ static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
return ret;
}
- if (!ndev->async_events)
- return 0;
-
- ret = aie2_error_async_events_send(ndev);
- if (ret) {
- XDNA_ERR(ndev->xdna, "Send async events failed");
- return ret;
- }
-
return 0;
}
@@ -257,6 +265,8 @@ static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev)
return ret;
}
+ ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
+
return 0;
}
@@ -338,6 +348,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
ndev->mbox = NULL;
aie2_psp_stop(ndev->psp_hdl);
aie2_smu_fini(ndev);
+ aie2_error_async_events_free(ndev);
pci_disable_device(pdev);
ndev->dev_status = AIE2_DEV_INIT;
@@ -424,6 +435,18 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
goto destroy_mgmt_chann;
}
+ ret = aie2_mgmt_fw_query(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to query fw, ret %d", ret);
+ goto destroy_mgmt_chann;
+ }
+
+ ret = aie2_error_async_events_alloc(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
+ goto destroy_mgmt_chann;
+ }
+
ndev->dev_status = AIE2_DEV_START;
return 0;
@@ -459,7 +482,6 @@ static int aie2_hw_resume(struct amdxdna_dev *xdna)
struct amdxdna_client *client;
int ret;
- guard(mutex)(&xdna->dev_lock);
ret = aie2_hw_start(xdna);
if (ret) {
XDNA_ERR(xdna, "Start hardware failed, %d", ret);
@@ -565,13 +587,6 @@ static int aie2_init(struct amdxdna_dev *xdna)
goto release_fw;
}
- ret = aie2_mgmt_fw_query(ndev);
- if (ret) {
- XDNA_ERR(xdna, "Query firmware failed, ret %d", ret);
- goto stop_hw;
- }
- ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
-
xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk;
@@ -587,30 +602,11 @@ static int aie2_init(struct amdxdna_dev *xdna)
goto stop_hw;
}
- ret = aie2_error_async_events_alloc(ndev);
- if (ret) {
- XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
- goto stop_hw;
- }
-
- ret = aie2_error_async_events_send(ndev);
- if (ret) {
- XDNA_ERR(xdna, "Send async events failed, ret %d", ret);
- goto async_event_free;
- }
-
- /* Issue a command to make sure firmware handled async events */
- ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
- if (ret) {
- XDNA_ERR(xdna, "Re-query firmware version failed");
- goto async_event_free;
- }
-
release_firmware(fw);
+ aie2_msg_init(ndev);
+ amdxdna_pm_init(xdna);
return 0;
-async_event_free:
- aie2_error_async_events_free(ndev);
stop_hw:
aie2_hw_stop(xdna);
release_fw:
@@ -621,10 +617,8 @@ release_fw:
static void aie2_fini(struct amdxdna_dev *xdna)
{
- struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
-
+ amdxdna_pm_fini(xdna);
aie2_hw_stop(xdna);
- aie2_error_async_events_free(ndev);
}
static int aie2_get_aie_status(struct amdxdna_client *client,
@@ -845,7 +839,120 @@ static int aie2_get_hwctx_status(struct amdxdna_client *client,
}
args->buffer_size -= (u32)(array_args.buffer - args->buffer);
- return ret;
+ return 0;
+}
+
+static int aie2_query_resource_info(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_get_resource_info res_info;
+ const struct amdxdna_dev_priv *priv;
+ struct amdxdna_dev_hdl *ndev;
+ struct amdxdna_dev *xdna;
+
+ xdna = client->xdna;
+ ndev = xdna->dev_handle;
+ priv = ndev->priv;
+
+ res_info.npu_clk_max = priv->dpm_clk_tbl[ndev->max_dpm_level].hclk;
+ res_info.npu_tops_max = ndev->max_tops;
+ res_info.npu_task_max = priv->hwctx_limit;
+ res_info.npu_tops_curr = ndev->curr_tops;
+ res_info.npu_task_curr = ndev->hwctx_num;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &res_info, sizeof(res_info)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int aie2_fill_hwctx_map(struct amdxdna_hwctx *hwctx, void *arg)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ u32 *map = arg;
+
+ if (hwctx->fw_ctx_id >= xdna->dev_handle->priv->hwctx_limit) {
+ XDNA_ERR(xdna, "Invalid fw ctx id %d/%d ", hwctx->fw_ctx_id,
+ xdna->dev_handle->priv->hwctx_limit);
+ return -EINVAL;
+ }
+
+ map[hwctx->fw_ctx_id] = hwctx->id;
+ return 0;
+}
+
+static int aie2_get_telemetry(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_telemetry_header *header __free(kfree) = NULL;
+ u32 telemetry_data_sz, header_sz, elem_num;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_client *tmp_client;
+ int ret;
+
+ elem_num = xdna->dev_handle->priv->hwctx_limit;
+ header_sz = struct_size(header, map, elem_num);
+ if (args->buffer_size <= header_sz) {
+ XDNA_ERR(xdna, "Invalid buffer size");
+ return -EINVAL;
+ }
+
+ telemetry_data_sz = args->buffer_size - header_sz;
+ if (telemetry_data_sz > SZ_4M) {
+ XDNA_ERR(xdna, "Buffer size is too big, %d", telemetry_data_sz);
+ return -EINVAL;
+ }
+
+ header = kzalloc(header_sz, GFP_KERNEL);
+ if (!header)
+ return -ENOMEM;
+
+ if (copy_from_user(header, u64_to_user_ptr(args->buffer), sizeof(*header))) {
+ XDNA_ERR(xdna, "Failed to copy telemetry header from user");
+ return -EFAULT;
+ }
+
+ header->map_num_elements = elem_num;
+ list_for_each_entry(tmp_client, &xdna->client_list, node) {
+ ret = amdxdna_hwctx_walk(tmp_client, &header->map,
+ aie2_fill_hwctx_map);
+ if (ret)
+ return ret;
+ }
+
+ ret = aie2_query_telemetry(xdna->dev_handle,
+ u64_to_user_ptr(args->buffer + header_sz),
+ telemetry_data_sz, header);
+ if (ret) {
+ XDNA_ERR(xdna, "Query telemetry failed ret %d", ret);
+ return ret;
+ }
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), header, header_sz)) {
+ XDNA_ERR(xdna, "Copy header failed");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int aie2_get_preempt_state(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_attribute_state state = {};
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+
+ ndev = xdna->dev_handle;
+ if (args->param == DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE)
+ state.state = ndev->force_preempt_enabled;
+ else if (args->param == DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE)
+ state.state = ndev->frame_boundary_preempt;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &state, sizeof(state)))
+ return -EFAULT;
+
+ return 0;
}
static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
@@ -856,6 +963,10 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
if (!drm_dev_enter(&xdna->ddev, &idx))
return -ENODEV;
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto dev_exit;
+
switch (args->param) {
case DRM_AMDXDNA_QUERY_AIE_STATUS:
ret = aie2_get_aie_status(client, args);
@@ -878,12 +989,25 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
case DRM_AMDXDNA_GET_POWER_MODE:
ret = aie2_get_power_mode(client, args);
break;
+ case DRM_AMDXDNA_QUERY_TELEMETRY:
+ ret = aie2_get_telemetry(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_RESOURCE_INFO:
+ ret = aie2_query_resource_info(client, args);
+ break;
+ case DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE:
+ case DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE:
+ ret = aie2_get_preempt_state(client, args);
+ break;
default:
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
ret = -EOPNOTSUPP;
}
+
+ amdxdna_pm_suspend_put(xdna);
XDNA_DBG(xdna, "Got param %d", args->param);
+dev_exit:
drm_dev_exit(idx);
return ret;
}
@@ -898,6 +1022,12 @@ static int aie2_query_ctx_status_array(struct amdxdna_client *client,
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ if (args->element_size > SZ_4K || args->num_element > SZ_1K) {
+ XDNA_DBG(xdna, "Invalid element size %d or number of element %d",
+ args->element_size, args->num_element);
+ return -EINVAL;
+ }
+
array_args.element_size = min(args->element_size,
sizeof(struct amdxdna_drm_hwctx_entry));
array_args.buffer = args->buffer;
@@ -914,7 +1044,7 @@ static int aie2_query_ctx_status_array(struct amdxdna_client *client,
args->num_element = (u32)((array_args.buffer - args->buffer) /
args->element_size);
- return ret;
+ return 0;
}
static int aie2_get_array(struct amdxdna_client *client,
@@ -926,16 +1056,26 @@ static int aie2_get_array(struct amdxdna_client *client,
if (!drm_dev_enter(&xdna->ddev, &idx))
return -ENODEV;
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto dev_exit;
+
switch (args->param) {
case DRM_AMDXDNA_HW_CONTEXT_ALL:
ret = aie2_query_ctx_status_array(client, args);
break;
+ case DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
+ ret = aie2_get_array_async_error(xdna->dev_handle, args);
+ break;
default:
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
ret = -EOPNOTSUPP;
}
+
+ amdxdna_pm_suspend_put(xdna);
XDNA_DBG(xdna, "Got param %d", args->param);
+dev_exit:
drm_dev_exit(idx);
return ret;
}
@@ -965,6 +1105,38 @@ static int aie2_set_power_mode(struct amdxdna_client *client,
return aie2_pm_set_mode(xdna->dev_handle, power_mode);
}
+static int aie2_set_preempt_state(struct amdxdna_client *client,
+ struct amdxdna_drm_set_state *args)
+{
+ struct amdxdna_dev_hdl *ndev = client->xdna->dev_handle;
+ struct amdxdna_drm_attribute_state state;
+ u32 val;
+ int ret;
+
+ if (copy_from_user(&state, u64_to_user_ptr(args->buffer), sizeof(state)))
+ return -EFAULT;
+
+ if (state.state > 1)
+ return -EINVAL;
+
+ if (XDNA_MBZ_DBG(client->xdna, state.pad, sizeof(state.pad)))
+ return -EINVAL;
+
+ if (args->param == DRM_AMDXDNA_SET_FORCE_PREEMPT) {
+ ndev->force_preempt_enabled = state.state;
+ } else if (args->param == DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT) {
+ val = state.state;
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT,
+ &val);
+ if (ret)
+ return ret;
+
+ ndev->frame_boundary_preempt = state.state;
+ }
+
+ return 0;
+}
+
static int aie2_set_state(struct amdxdna_client *client,
struct amdxdna_drm_set_state *args)
{
@@ -974,16 +1146,26 @@ static int aie2_set_state(struct amdxdna_client *client,
if (!drm_dev_enter(&xdna->ddev, &idx))
return -ENODEV;
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto dev_exit;
+
switch (args->param) {
case DRM_AMDXDNA_SET_POWER_MODE:
ret = aie2_set_power_mode(client, args);
break;
+ case DRM_AMDXDNA_SET_FORCE_PREEMPT:
+ case DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT:
+ ret = aie2_set_preempt_state(client, args);
+ break;
default:
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
ret = -EOPNOTSUPP;
break;
}
+ amdxdna_pm_suspend_put(xdna);
+dev_exit:
drm_dev_exit(idx);
return ret;
}
@@ -998,6 +1180,7 @@ const struct amdxdna_dev_ops aie2_ops = {
.hwctx_init = aie2_hwctx_init,
.hwctx_fini = aie2_hwctx_fini,
.hwctx_config = aie2_hwctx_config,
+ .hwctx_sync_debug_bo = aie2_hwctx_sync_debug_bo,
.cmd_submit = aie2_cmd_submit,
.hmm_invalidate = aie2_hmm_invalidate,
.get_array = aie2_get_array,
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index 91a8e948f82a..a5f9c42155d1 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -110,12 +110,15 @@ struct aie_metadata {
enum rt_config_category {
AIE2_RT_CFG_INIT,
AIE2_RT_CFG_CLK_GATING,
+ AIE2_RT_CFG_FORCE_PREEMPT,
+ AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT,
};
struct rt_config {
u32 type;
u32 value;
u32 category;
+ unsigned long feature_mask;
};
struct dpm_clk_freq {
@@ -156,6 +159,19 @@ enum aie2_dev_status {
AIE2_DEV_START,
};
+struct aie2_exec_msg_ops {
+ int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
+ size_t *size, u32 *msg_op);
+ int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
+ size_t *size, u32 *msg_op);
+ void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt);
+ int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+ int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+ int (*fill_preempt_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+ int (*fill_elf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+ u32 (*get_chain_msg_op)(u32 cmd_op);
+};
+
struct amdxdna_dev_hdl {
struct amdxdna_dev *xdna;
const struct amdxdna_dev_priv *priv;
@@ -173,6 +189,8 @@ struct amdxdna_dev_hdl {
u32 total_col;
struct aie_version version;
struct aie_metadata metadata;
+ unsigned long feature_mask;
+ struct aie2_exec_msg_ops *exec_msg_ops;
/* power management and clock*/
enum amdxdna_power_mode_type pw_mode;
@@ -182,6 +200,10 @@ struct amdxdna_dev_hdl {
u32 clk_gating;
u32 npuclk_freq;
u32 hclk_freq;
+ u32 max_tops;
+ u32 curr_tops;
+ u32 force_preempt_enabled;
+ u32 frame_boundary_preempt;
/* Mailbox and the management channel */
struct mailbox *mbox;
@@ -190,6 +212,8 @@ struct amdxdna_dev_hdl {
enum aie2_dev_status dev_status;
u32 hwctx_num;
+
+ struct amdxdna_async_error last_async_err;
};
#define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
@@ -204,12 +228,27 @@ struct aie2_hw_ops {
int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
};
+enum aie2_fw_feature {
+ AIE2_NPU_COMMAND,
+ AIE2_PREEMPT,
+ AIE2_FEATURE_MAX
+};
+
+struct aie2_fw_feature_tbl {
+ enum aie2_fw_feature feature;
+ u32 max_minor;
+ u32 min_minor;
+};
+
+#define AIE2_FEATURE_ON(ndev, feature) test_bit(feature, &(ndev)->feature_mask)
+
struct amdxdna_dev_priv {
const char *fw_path;
u64 protocol_major;
u64 protocol_minor;
const struct rt_config *rt_config;
const struct dpm_clk_freq *dpm_clk_tbl;
+ const struct aie2_fw_feature_tbl *fw_feature_tbl;
#define COL_ALIGN_NONE 0
#define COL_ALIGN_NATURE 1
@@ -217,6 +256,7 @@ struct amdxdna_dev_priv {
u32 mbox_dev_addr;
/* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */
u32 mbox_size;
+ u32 hwctx_limit;
u32 sram_dev_addr;
struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX];
struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS];
@@ -234,6 +274,7 @@ extern const struct dpm_clk_freq npu1_dpm_clk_table[];
extern const struct dpm_clk_freq npu4_dpm_clk_table[];
extern const struct rt_config npu1_default_rt_cfg[];
extern const struct rt_config npu4_default_rt_cfg[];
+extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[];
/* aie2_smu.c */
int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
@@ -253,10 +294,12 @@ void aie2_psp_stop(struct psp_device *psp);
/* aie2_error.c */
int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev);
void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev);
-int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev);
int aie2_error_async_msg_thread(void *data);
+int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
+ struct amdxdna_drm_get_array *args);
/* aie2_message.c */
+void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
@@ -270,9 +313,13 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size);
int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled);
+int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
+ char __user *buf, u32 size,
+ struct amdxdna_drm_query_telemetry_header *header);
int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
void *handle, int (*cb)(void*, void __iomem *, size_t));
-int aie2_config_cu(struct amdxdna_hwctx *hwctx);
+int aie2_config_cu(struct amdxdna_hwctx *hwctx,
+ int (*notify_cb)(void *, void __iomem *, size_t));
int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
int (*notify_cb)(void *, void __iomem *, size_t));
int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
@@ -283,11 +330,14 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
int (*notify_cb)(void *, void __iomem *, size_t));
int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
int (*notify_cb)(void *, void __iomem *, size_t));
+int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t));
/* aie2_hwctx.c */
int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
+int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
void aie2_hwctx_suspend(struct amdxdna_client *client);
int aie2_hwctx_resume(struct amdxdna_client *client);
int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c
index d303701b0ded..bd94ee96c2bc 100644
--- a/drivers/accel/amdxdna/aie2_smu.c
+++ b/drivers/accel/amdxdna/aie2_smu.c
@@ -11,6 +11,7 @@
#include "aie2_pci.h"
#include "amdxdna_pci_drv.h"
+#include "amdxdna_pm.h"
#define SMU_RESULT_OK 1
@@ -22,6 +23,13 @@
#define AIE2_SMU_SET_SOFT_DPMLEVEL 0x7
#define AIE2_SMU_SET_HARD_DPMLEVEL 0x8
+#define NPU4_DPM_TOPS(ndev, dpm_level) \
+({ \
+ typeof(ndev) _ndev = ndev; \
+ (4096 * (_ndev)->total_col * \
+ (_ndev)->priv->dpm_clk_tbl[dpm_level].hclk / 1000000); \
+})
+
static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd,
u32 reg_arg, u32 *out)
{
@@ -59,12 +67,16 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
u32 freq;
int ret;
+ ret = amdxdna_pm_resume_get(ndev->xdna);
+ if (ret)
+ return ret;
+
ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
if (ret) {
XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
- return ret;
+ goto suspend_put;
}
ndev->npuclk_freq = freq;
@@ -73,49 +85,78 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
if (ret) {
XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
- return ret;
+ goto suspend_put;
}
+
+ amdxdna_pm_suspend_put(ndev->xdna);
ndev->hclk_freq = freq;
ndev->dpm_level = dpm_level;
+ ndev->max_tops = 2 * ndev->total_col;
+ ndev->curr_tops = ndev->max_tops * freq / 1028;
XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
ndev->npuclk_freq, ndev->hclk_freq);
return 0;
+
+suspend_put:
+ amdxdna_pm_suspend_put(ndev->xdna);
+ return ret;
}
int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
{
int ret;
+ ret = amdxdna_pm_resume_get(ndev->xdna);
+ if (ret)
+ return ret;
+
ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL);
if (ret) {
XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
dpm_level, ret);
- return ret;
+ goto suspend_put;
}
ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL);
if (ret) {
XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
dpm_level, ret);
- return ret;
+ goto suspend_put;
}
+ amdxdna_pm_suspend_put(ndev->xdna);
ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
ndev->dpm_level = dpm_level;
+ ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->max_dpm_level);
+ ndev->curr_tops = NPU4_DPM_TOPS(ndev, dpm_level);
XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
ndev->npuclk_freq, ndev->hclk_freq);
return 0;
+
+suspend_put:
+ amdxdna_pm_suspend_put(ndev->xdna);
+ return ret;
}
int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
{
int ret;
+ /*
+ * Failing to set power off indicates an unrecoverable hardware or
+ * firmware error.
+ */
+ ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Access power failed, ret %d", ret);
+ return ret;
+ }
+
ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL);
if (ret) {
XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index 4bfe4ef20550..d17aef89a0ad 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -113,14 +113,14 @@ void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size)
return &cmd->data[num_masks];
}
-int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
+u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
{
struct amdxdna_cmd *cmd = abo->mem.kva;
u32 num_masks, i;
u32 *cu_mask;
if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
- return -1;
+ return INVALID_CU_IDX;
num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
cu_mask = cmd->data;
@@ -129,7 +129,7 @@ int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
return ffs(cu_mask[i]) - 1;
}
- return -1;
+ return INVALID_CU_IDX;
}
/*
@@ -161,19 +161,14 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
if (args->ext || args->ext_flags)
return -EINVAL;
- if (!drm_dev_enter(dev, &idx))
- return -ENODEV;
-
hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL);
- if (!hwctx) {
- ret = -ENOMEM;
- goto exit;
- }
+ if (!hwctx)
+ return -ENOMEM;
if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args->qos_p), sizeof(hwctx->qos))) {
XDNA_ERR(xdna, "Access QoS info failed");
- ret = -EFAULT;
- goto free_hwctx;
+ kfree(hwctx);
+ return -EFAULT;
}
hwctx->client = client;
@@ -181,30 +176,36 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
hwctx->num_tiles = args->num_tiles;
hwctx->mem_size = args->mem_size;
hwctx->max_opc = args->max_opc;
- ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
- XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID),
- &client->next_hwctxid, GFP_KERNEL);
- if (ret < 0) {
- XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
+
+ guard(mutex)(&xdna->dev_lock);
+
+ if (!drm_dev_enter(dev, &idx)) {
+ ret = -ENODEV;
goto free_hwctx;
}
- hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->id);
+ ret = xdna->dev_info->ops->hwctx_init(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
+ goto dev_exit;
+ }
+
+ hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->fw_ctx_id);
if (!hwctx->name) {
ret = -ENOMEM;
- goto rm_id;
+ goto fini_hwctx;
}
- mutex_lock(&xdna->dev_lock);
- ret = xdna->dev_info->ops->hwctx_init(hwctx);
- if (ret) {
- mutex_unlock(&xdna->dev_lock);
- XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
+ ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
+ XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID),
+ &client->next_hwctxid, GFP_KERNEL);
+ if (ret < 0) {
+ XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
goto free_name;
}
+
args->handle = hwctx->id;
args->syncobj_handle = hwctx->syncobj_hdl;
- mutex_unlock(&xdna->dev_lock);
atomic64_set(&hwctx->job_submit_cnt, 0);
atomic64_set(&hwctx->job_free_cnt, 0);
@@ -214,12 +215,12 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
free_name:
kfree(hwctx->name);
-rm_id:
- xa_erase(&client->hwctx_xa, hwctx->id);
+fini_hwctx:
+ xdna->dev_info->ops->hwctx_fini(hwctx);
+dev_exit:
+ drm_dev_exit(idx);
free_hwctx:
kfree(hwctx);
-exit:
- drm_dev_exit(idx);
return ret;
}
@@ -327,6 +328,38 @@ unlock_srcu:
return ret;
}
+int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_hwctx *hwctx;
+ struct amdxdna_gem_obj *abo;
+ struct drm_gem_object *gobj;
+ int ret, idx;
+
+ if (!xdna->dev_info->ops->hwctx_sync_debug_bo)
+ return -EOPNOTSUPP;
+
+ gobj = drm_gem_object_lookup(client->filp, debug_bo_hdl);
+ if (!gobj)
+ return -EINVAL;
+
+ abo = to_xdna_obj(gobj);
+ guard(mutex)(&xdna->dev_lock);
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ hwctx = xa_load(&client->hwctx_xa, abo->assigned_hwctx);
+ if (!hwctx) {
+ ret = -EINVAL;
+ goto unlock_srcu;
+ }
+
+ ret = xdna->dev_info->ops->hwctx_sync_debug_bo(hwctx, debug_bo_hdl);
+
+unlock_srcu:
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ drm_gem_object_put(gobj);
+ return ret;
+}
+
static void
amdxdna_arg_bos_put(struct amdxdna_sched_job *job)
{
@@ -389,9 +422,11 @@ void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job)
trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release");
amdxdna_arg_bos_put(job);
amdxdna_gem_put_obj(job->cmd_bo);
+ dma_fence_put(job->fence);
}
int amdxdna_cmd_submit(struct amdxdna_client *client,
+ struct amdxdna_drv_cmd *drv_cmd,
u32 cmd_bo_hdl, u32 *arg_bo_hdls, u32 arg_bo_cnt,
u32 hwctx_hdl, u64 *seq)
{
@@ -405,6 +440,8 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
if (!job)
return -ENOMEM;
+ job->drv_cmd = drv_cmd;
+
if (cmd_bo_hdl != AMDXDNA_INVALID_BO_HANDLE) {
job->cmd_bo = amdxdna_gem_get_obj(client, cmd_bo_hdl, AMDXDNA_BO_CMD);
if (!job->cmd_bo) {
@@ -412,8 +449,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
ret = -EINVAL;
goto free_job;
}
- } else {
- job->cmd_bo = NULL;
}
ret = amdxdna_arg_bos_lookup(client, job, arg_bo_hdls, arg_bo_cnt);
@@ -431,11 +466,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
goto unlock_srcu;
}
- if (hwctx->status != HWCTX_STAT_READY) {
- XDNA_ERR(xdna, "HW Context is not ready");
- ret = -EINVAL;
- goto unlock_srcu;
- }
job->hwctx = hwctx;
job->mm = current->mm;
@@ -512,7 +542,7 @@ static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client,
}
}
- ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls,
+ ret = amdxdna_cmd_submit(client, NULL, cmd_bo_hdl, arg_bo_hdls,
args->arg_count, args->hwctx, &args->seq);
if (ret)
XDNA_DBG(xdna, "Submit cmds failed, ret %d", ret);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index 7cd7a55936f0..b6151244d64f 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -13,9 +13,12 @@
struct amdxdna_hwctx_priv;
enum ert_cmd_opcode {
- ERT_START_CU = 0,
- ERT_CMD_CHAIN = 19,
- ERT_START_NPU = 20,
+ ERT_START_CU = 0,
+ ERT_CMD_CHAIN = 19,
+ ERT_START_NPU = 20,
+ ERT_START_NPU_PREEMPT = 21,
+ ERT_START_NPU_PREEMPT_ELF = 22,
+ ERT_INVALID_CMD = ~0U,
};
enum ert_cmd_state {
@@ -54,6 +57,21 @@ struct amdxdna_cmd_chain {
u64 data[] __counted_by(command_count);
};
+/*
+ * Interpretation of the beginning of data payload for ERT_START_NPU_PREEMPT in
+ * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args.
+ */
+struct amdxdna_cmd_preempt_data {
+ u64 inst_buf; /* instruction buffer address */
+ u64 save_buf; /* save buffer address */
+ u64 restore_buf; /* restore buffer address */
+ u32 inst_size; /* size of instruction buffer in bytes */
+ u32 save_size; /* size of save buffer in bytes */
+ u32 restore_size; /* size of restore buffer in bytes */
+ u32 inst_prop_cnt; /* properties count */
+ u32 prop_args[]; /* properties and regular kernel arguments */
+};
+
/* Exec buffer command header format */
#define AMDXDNA_CMD_STATE GENMASK(3, 0)
#define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10)
@@ -64,6 +82,8 @@ struct amdxdna_cmd {
u32 data[];
};
+#define INVALID_CU_IDX (~0U)
+
struct amdxdna_hwctx {
struct amdxdna_client *client;
struct amdxdna_hwctx_priv *priv;
@@ -95,6 +115,17 @@ struct amdxdna_hwctx {
#define drm_job_to_xdna_job(j) \
container_of(j, struct amdxdna_sched_job, base)
+enum amdxdna_job_opcode {
+ SYNC_DEBUG_BO,
+ ATTACH_DEBUG_BO,
+ DETACH_DEBUG_BO,
+};
+
+struct amdxdna_drv_cmd {
+ enum amdxdna_job_opcode opcode;
+ u32 result;
+};
+
struct amdxdna_sched_job {
struct drm_sched_job base;
struct kref refcnt;
@@ -105,7 +136,9 @@ struct amdxdna_sched_job {
/* user can wait on this fence */
struct dma_fence *out_fence;
bool job_done;
+ bool job_timeout;
u64 seq;
+ struct amdxdna_drv_cmd *drv_cmd;
struct amdxdna_gem_obj *cmd_bo;
size_t bo_cnt;
struct drm_gem_object *bos[] __counted_by(bo_cnt);
@@ -137,15 +170,17 @@ amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo)
}
void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size);
-int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
+u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
int amdxdna_hwctx_walk(struct amdxdna_client *client, void *arg,
int (*walk)(struct amdxdna_hwctx *hwctx, void *arg));
+int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl);
int amdxdna_cmd_submit(struct amdxdna_client *client,
- u32 cmd_bo_hdls, u32 *arg_bo_hdls, u32 arg_bo_cnt,
+ struct amdxdna_drv_cmd *drv_cmd, u32 cmd_bo_hdls,
+ u32 *arg_bo_hdls, u32 arg_bo_cnt,
u32 hwctx_hdl, u64 *seq);
int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
diff --git a/drivers/accel/amdxdna/amdxdna_error.h b/drivers/accel/amdxdna/amdxdna_error.h
new file mode 100644
index 000000000000..c51de86ec12b
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_error.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_ERROR_H_
+#define _AMDXDNA_ERROR_H_
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+
+#define AMDXDNA_ERR_DRV_AIE 4
+#define AMDXDNA_ERR_SEV_CRITICAL 3
+#define AMDXDNA_ERR_CLASS_AIE 2
+
+#define AMDXDNA_ERR_NUM_MASK GENMASK_U64(15, 0)
+#define AMDXDNA_ERR_DRV_MASK GENMASK_U64(23, 16)
+#define AMDXDNA_ERR_SEV_MASK GENMASK_U64(31, 24)
+#define AMDXDNA_ERR_MOD_MASK GENMASK_U64(39, 32)
+#define AMDXDNA_ERR_CLASS_MASK GENMASK_U64(47, 40)
+
+enum amdxdna_error_num {
+ AMDXDNA_ERROR_NUM_AIE_SATURATION = 3,
+ AMDXDNA_ERROR_NUM_AIE_FP,
+ AMDXDNA_ERROR_NUM_AIE_STREAM,
+ AMDXDNA_ERROR_NUM_AIE_ACCESS,
+ AMDXDNA_ERROR_NUM_AIE_BUS,
+ AMDXDNA_ERROR_NUM_AIE_INSTRUCTION,
+ AMDXDNA_ERROR_NUM_AIE_ECC,
+ AMDXDNA_ERROR_NUM_AIE_LOCK,
+ AMDXDNA_ERROR_NUM_AIE_DMA,
+ AMDXDNA_ERROR_NUM_AIE_MEM_PARITY,
+ AMDXDNA_ERROR_NUM_UNKNOWN = 15,
+};
+
+enum amdxdna_error_module {
+ AMDXDNA_ERROR_MODULE_AIE_CORE = 3,
+ AMDXDNA_ERROR_MODULE_AIE_MEMORY,
+ AMDXDNA_ERROR_MODULE_AIE_SHIM,
+ AMDXDNA_ERROR_MODULE_AIE_NOC,
+ AMDXDNA_ERROR_MODULE_AIE_PL,
+ AMDXDNA_ERROR_MODULE_UNKNOWN = 8,
+};
+
+#define AMDXDNA_ERROR_ENCODE(err_num, err_mod) \
+ (FIELD_PREP(AMDXDNA_ERR_NUM_MASK, err_num) | \
+ FIELD_PREP_CONST(AMDXDNA_ERR_DRV_MASK, AMDXDNA_ERR_DRV_AIE) | \
+ FIELD_PREP_CONST(AMDXDNA_ERR_SEV_MASK, AMDXDNA_ERR_SEV_CRITICAL) | \
+ FIELD_PREP(AMDXDNA_ERR_MOD_MASK, err_mod) | \
+ FIELD_PREP_CONST(AMDXDNA_ERR_CLASS_MASK, AMDXDNA_ERR_CLASS_AIE))
+
+#define AMDXDNA_EXTRA_ERR_COL_MASK GENMASK_U64(7, 0)
+#define AMDXDNA_EXTRA_ERR_ROW_MASK GENMASK_U64(15, 8)
+
+#define AMDXDNA_EXTRA_ERR_ENCODE(row, col) \
+ (FIELD_PREP(AMDXDNA_EXTRA_ERR_COL_MASK, col) | \
+ FIELD_PREP(AMDXDNA_EXTRA_ERR_ROW_MASK, row))
+
+#endif /* _AMDXDNA_ERROR_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index d407a36eb412..dfa916eeb2d9 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -8,6 +8,7 @@
#include <drm/drm_device.h>
#include <drm/drm_gem.h>
#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
#include <drm/gpu_scheduler.h>
#include <linux/dma-buf.h>
#include <linux/dma-direct.h>
@@ -392,35 +393,33 @@ static const struct dma_buf_ops amdxdna_dmabuf_ops = {
.vunmap = drm_gem_dmabuf_vunmap,
};
-static int amdxdna_gem_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
+static int amdxdna_gem_obj_vmap(struct amdxdna_gem_obj *abo, void **vaddr)
{
- struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
-
- iosys_map_clear(map);
-
- dma_resv_assert_held(obj->resv);
+ struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL);
+ int ret;
if (is_import_bo(abo))
- dma_buf_vmap(abo->dma_buf, map);
+ ret = dma_buf_vmap_unlocked(abo->dma_buf, &map);
else
- drm_gem_shmem_object_vmap(obj, map);
+ ret = drm_gem_vmap(to_gobj(abo), &map);
- if (!map->vaddr)
- return -ENOMEM;
-
- return 0;
+ *vaddr = map.vaddr;
+ return ret;
}
-static void amdxdna_gem_obj_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
+static void amdxdna_gem_obj_vunmap(struct amdxdna_gem_obj *abo)
{
- struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
+ struct iosys_map map;
+
+ if (!abo->mem.kva)
+ return;
- dma_resv_assert_held(obj->resv);
+ iosys_map_set_vaddr(&map, abo->mem.kva);
if (is_import_bo(abo))
- dma_buf_vunmap(abo->dma_buf, map);
+ dma_buf_vunmap_unlocked(abo->dma_buf, &map);
else
- drm_gem_shmem_object_vunmap(obj, map);
+ drm_gem_vunmap(to_gobj(abo), &map);
}
static struct dma_buf *amdxdna_gem_prime_export(struct drm_gem_object *gobj, int flags)
@@ -455,7 +454,6 @@ static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
{
struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
- struct iosys_map map = IOSYS_MAP_INIT_VADDR(abo->mem.kva);
XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr);
@@ -468,7 +466,7 @@ static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
if (abo->type == AMDXDNA_BO_DEV_HEAP)
drm_mm_takedown(&abo->mm);
- drm_gem_vunmap(gobj, &map);
+ amdxdna_gem_obj_vunmap(abo);
mutex_destroy(&abo->lock);
if (is_import_bo(abo)) {
@@ -489,8 +487,8 @@ static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
.pin = drm_gem_shmem_object_pin,
.unpin = drm_gem_shmem_object_unpin,
.get_sg_table = drm_gem_shmem_object_get_sg_table,
- .vmap = amdxdna_gem_obj_vmap,
- .vunmap = amdxdna_gem_obj_vunmap,
+ .vmap = drm_gem_shmem_object_vmap,
+ .vunmap = drm_gem_shmem_object_vunmap,
.mmap = amdxdna_gem_obj_mmap,
.vm_ops = &drm_gem_shmem_vm_ops,
.export = amdxdna_gem_prime_export,
@@ -663,7 +661,6 @@ amdxdna_drm_create_dev_heap(struct drm_device *dev,
struct drm_file *filp)
{
struct amdxdna_client *client = filp->driver_priv;
- struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL);
struct amdxdna_dev *xdna = to_xdna_dev(dev);
struct amdxdna_gem_obj *abo;
int ret;
@@ -692,12 +689,11 @@ amdxdna_drm_create_dev_heap(struct drm_device *dev,
abo->mem.dev_addr = client->xdna->dev_info->dev_mem_base;
drm_mm_init(&abo->mm, abo->mem.dev_addr, abo->mem.size);
- ret = drm_gem_vmap(to_gobj(abo), &map);
+ ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva);
if (ret) {
XDNA_ERR(xdna, "Vmap heap bo failed, ret %d", ret);
goto release_obj;
}
- abo->mem.kva = map.vaddr;
client->dev_heap = abo;
drm_gem_object_get(to_gobj(abo));
@@ -748,7 +744,6 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev,
struct amdxdna_drm_create_bo *args,
struct drm_file *filp)
{
- struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL);
struct amdxdna_dev *xdna = to_xdna_dev(dev);
struct amdxdna_gem_obj *abo;
int ret;
@@ -770,12 +765,11 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev,
abo->type = AMDXDNA_BO_CMD;
abo->client = filp->driver_priv;
- ret = drm_gem_vmap(to_gobj(abo), &map);
+ ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva);
if (ret) {
XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret);
goto release_obj;
}
- abo->mem.kva = map.vaddr;
return abo;
@@ -969,6 +963,9 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
XDNA_DBG(xdna, "Sync bo %d offset 0x%llx, size 0x%llx\n",
args->handle, args->offset, args->size);
+ if (args->direction == SYNC_DIRECT_FROM_DEVICE)
+ ret = amdxdna_hwctx_sync_debug_bo(abo->client, args->handle);
+
put_obj:
drm_gem_object_put(gobj);
return ret;
diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h
index ae29db94a9d3..f79fc7f3c93b 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.h
+++ b/drivers/accel/amdxdna/amdxdna_gem.h
@@ -7,6 +7,7 @@
#define _AMDXDNA_GEM_H_
#include <linux/hmm.h>
+#include "amdxdna_pci_drv.h"
struct amdxdna_umap {
struct vm_area_struct *vma;
@@ -62,6 +63,11 @@ static inline void amdxdna_gem_put_obj(struct amdxdna_gem_obj *abo)
drm_gem_object_put(to_gobj(abo));
}
+static inline u64 amdxdna_dev_bo_offset(struct amdxdna_gem_obj *abo)
+{
+ return abo->mem.dev_addr - abo->client->dev_heap->mem.dev_addr;
+}
+
void amdxdna_umap_put(struct amdxdna_umap *mapp);
struct drm_gem_object *
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
index da1ac89bb78f..858df97cd3fb 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox.c
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -194,7 +194,8 @@ static void mailbox_release_msg(struct mailbox_channel *mb_chann,
{
MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x",
mb_msg->pkg.header.id, mb_msg->pkg.header.opcode);
- mb_msg->notify_cb(mb_msg->handle, NULL, 0);
+ if (mb_msg->notify_cb)
+ mb_msg->notify_cb(mb_msg->handle, NULL, 0);
kfree(mb_msg);
}
@@ -248,7 +249,7 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade
{
struct mailbox_msg *mb_msg;
int msg_id;
- int ret;
+ int ret = 0;
msg_id = header->id;
if (!mailbox_validate_msgid(msg_id)) {
@@ -265,9 +266,11 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade
MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
header->opcode, header->total_size, header->id);
- ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size);
- if (unlikely(ret))
- MB_ERR(mb_chann, "Message callback ret %d", ret);
+ if (mb_msg->notify_cb) {
+ ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size);
+ if (unlikely(ret))
+ MB_ERR(mb_chann, "Message callback ret %d", ret);
+ }
kfree(mb_msg);
return ret;
@@ -513,6 +516,7 @@ xdna_mailbox_create_channel(struct mailbox *mb,
}
mb_chann->bad_state = false;
+ mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0);
MB_DBG(mb_chann, "Mailbox channel created (irq: %d)", mb_chann->msix_irq);
return mb_chann;
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.h b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h
index 710ff8873d61..556c712cad0a 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox_helper.h
+++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h
@@ -16,16 +16,18 @@ struct xdna_notify {
u32 *data;
size_t size;
int error;
+ u32 *status;
};
-#define DECLARE_XDNA_MSG_COMMON(name, op, status) \
+#define DECLARE_XDNA_MSG_COMMON(name, op, s) \
struct name##_req req = { 0 }; \
- struct name##_resp resp = { status }; \
+ struct name##_resp resp = { .status = s }; \
struct xdna_notify hdl = { \
.error = 0, \
.data = (u32 *)&resp, \
.size = sizeof(resp), \
.comp = COMPLETION_INITIALIZER_ONSTACK(hdl.comp), \
+ .status = (u32 *)&resp.status, \
}; \
struct xdna_mailbox_msg msg = { \
.send_data = (u8 *)&req, \
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 569cd703729d..1973ab67721b 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -13,13 +13,11 @@
#include <drm/gpu_scheduler.h>
#include <linux/iommu.h>
#include <linux/pci.h>
-#include <linux/pm_runtime.h>
#include "amdxdna_ctx.h"
#include "amdxdna_gem.h"
#include "amdxdna_pci_drv.h"
-
-#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
+#include "amdxdna_pm.h"
MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin");
MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin");
@@ -29,9 +27,14 @@ MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin");
/*
* 0.0: Initial version
* 0.1: Support getting all hardware contexts by DRM_IOCTL_AMDXDNA_GET_ARRAY
+ * 0.2: Support getting last error hardware error
+ * 0.3: Support firmware debug buffer
+ * 0.4: Support getting resource information
+ * 0.5: Support getting telemetry data
+ * 0.6: Support preemption
*/
#define AMDXDNA_DRIVER_MAJOR 0
-#define AMDXDNA_DRIVER_MINOR 1
+#define AMDXDNA_DRIVER_MINOR 6
/*
* Bind the driver base on (vendor_id, device_id) pair and later use the
@@ -61,17 +64,9 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
struct amdxdna_client *client;
int ret;
- ret = pm_runtime_resume_and_get(ddev->dev);
- if (ret) {
- XDNA_ERR(xdna, "Failed to get rpm, ret %d", ret);
- return ret;
- }
-
client = kzalloc(sizeof(*client), GFP_KERNEL);
- if (!client) {
- ret = -ENOMEM;
- goto put_rpm;
- }
+ if (!client)
+ return -ENOMEM;
client->pid = pid_nr(rcu_access_pointer(filp->pid));
client->xdna = xdna;
@@ -106,9 +101,6 @@ unbind_sva:
iommu_sva_unbind_device(client->sva);
failed:
kfree(client);
-put_rpm:
- pm_runtime_mark_last_busy(ddev->dev);
- pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
@@ -130,8 +122,6 @@ static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp)
XDNA_DBG(xdna, "pid %d closed", client->pid);
kfree(client);
- pm_runtime_mark_last_busy(ddev->dev);
- pm_runtime_put_autosuspend(ddev->dev);
}
static int amdxdna_flush(struct file *f, fl_owner_t id)
@@ -310,19 +300,12 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto failed_dev_fini;
}
- pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
- pm_runtime_use_autosuspend(dev);
- pm_runtime_allow(dev);
-
ret = drm_dev_register(&xdna->ddev, 0);
if (ret) {
XDNA_ERR(xdna, "DRM register failed, ret %d", ret);
- pm_runtime_forbid(dev);
goto failed_sysfs_fini;
}
- pm_runtime_mark_last_busy(dev);
- pm_runtime_put_autosuspend(dev);
return 0;
failed_sysfs_fini:
@@ -339,14 +322,10 @@ destroy_notifier_wq:
static void amdxdna_remove(struct pci_dev *pdev)
{
struct amdxdna_dev *xdna = pci_get_drvdata(pdev);
- struct device *dev = &pdev->dev;
struct amdxdna_client *client;
destroy_workqueue(xdna->notifier_wq);
- pm_runtime_get_noresume(dev);
- pm_runtime_forbid(dev);
-
drm_dev_unplug(&xdna->ddev);
amdxdna_sysfs_fini(xdna);
@@ -365,29 +344,9 @@ static void amdxdna_remove(struct pci_dev *pdev)
mutex_unlock(&xdna->dev_lock);
}
-static int amdxdna_pmops_suspend(struct device *dev)
-{
- struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
-
- if (!xdna->dev_info->ops->suspend)
- return -EOPNOTSUPP;
-
- return xdna->dev_info->ops->suspend(xdna);
-}
-
-static int amdxdna_pmops_resume(struct device *dev)
-{
- struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
-
- if (!xdna->dev_info->ops->resume)
- return -EOPNOTSUPP;
-
- return xdna->dev_info->ops->resume(xdna);
-}
-
static const struct dev_pm_ops amdxdna_pm_ops = {
- SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume)
- RUNTIME_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume, NULL)
+ SYSTEM_SLEEP_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume)
+ RUNTIME_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume, NULL)
};
static struct pci_driver amdxdna_pci_driver = {
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index 72d6696d49da..c99477f5e454 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -6,6 +6,7 @@
#ifndef _AMDXDNA_PCI_DRV_H_
#define _AMDXDNA_PCI_DRV_H_
+#include <drm/drm_print.h>
#include <linux/workqueue.h>
#include <linux/xarray.h>
@@ -54,6 +55,7 @@ struct amdxdna_dev_ops {
int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
+ int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
@@ -99,6 +101,7 @@ struct amdxdna_dev {
struct amdxdna_fw_ver fw_ver;
struct rw_semaphore notifier_lock; /* for mmu notifier*/
struct workqueue_struct *notifier_wq;
+ bool rpm_on;
};
/*
diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/amdxdna/amdxdna_pm.c
new file mode 100644
index 000000000000..fa38e65d617c
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_pm.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_drv.h>
+#include <linux/pm_runtime.h>
+
+#include "amdxdna_pm.h"
+
+#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
+
+int amdxdna_pm_suspend(struct device *dev)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
+ int ret = -EOPNOTSUPP;
+ bool rpm;
+
+ if (xdna->dev_info->ops->suspend) {
+ rpm = xdna->rpm_on;
+ xdna->rpm_on = false;
+ ret = xdna->dev_info->ops->suspend(xdna);
+ xdna->rpm_on = rpm;
+ }
+
+ XDNA_DBG(xdna, "Suspend done ret %d", ret);
+ return ret;
+}
+
+int amdxdna_pm_resume(struct device *dev)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
+ int ret = -EOPNOTSUPP;
+ bool rpm;
+
+ if (xdna->dev_info->ops->resume) {
+ rpm = xdna->rpm_on;
+ xdna->rpm_on = false;
+ ret = xdna->dev_info->ops->resume(xdna);
+ xdna->rpm_on = rpm;
+ }
+
+ XDNA_DBG(xdna, "Resume done ret %d", ret);
+ return ret;
+}
+
+int amdxdna_pm_resume_get(struct amdxdna_dev *xdna)
+{
+ struct device *dev = xdna->ddev.dev;
+ int ret;
+
+ if (!xdna->rpm_on)
+ return 0;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret) {
+ XDNA_ERR(xdna, "Resume failed: %d", ret);
+ pm_runtime_set_suspended(dev);
+ }
+
+ return ret;
+}
+
+void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna)
+{
+ struct device *dev = xdna->ddev.dev;
+
+ if (!xdna->rpm_on)
+ return;
+
+ pm_runtime_put_autosuspend(dev);
+}
+
+void amdxdna_pm_init(struct amdxdna_dev *xdna)
+{
+ struct device *dev = xdna->ddev.dev;
+
+ pm_runtime_set_active(dev);
+ pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_allow(dev);
+ pm_runtime_put_autosuspend(dev);
+ xdna->rpm_on = true;
+}
+
+void amdxdna_pm_fini(struct amdxdna_dev *xdna)
+{
+ struct device *dev = xdna->ddev.dev;
+
+ xdna->rpm_on = false;
+ pm_runtime_get_noresume(dev);
+ pm_runtime_forbid(dev);
+}
diff --git a/drivers/accel/amdxdna/amdxdna_pm.h b/drivers/accel/amdxdna/amdxdna_pm.h
new file mode 100644
index 000000000000..77b2d6e45570
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_pm.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_PM_H_
+#define _AMDXDNA_PM_H_
+
+#include "amdxdna_pci_drv.h"
+
+int amdxdna_pm_suspend(struct device *dev);
+int amdxdna_pm_resume(struct device *dev);
+int amdxdna_pm_resume_get(struct amdxdna_dev *xdna);
+void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna);
+void amdxdna_pm_init(struct amdxdna_dev *xdna);
+void amdxdna_pm_fini(struct amdxdna_dev *xdna);
+
+#endif /* _AMDXDNA_PM_H_ */
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
index e4f6dac7d00f..ec407f3b48fc 100644
--- a/drivers/accel/amdxdna/npu1_regs.c
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -46,6 +46,7 @@
const struct rt_config npu1_default_rt_cfg[] = {
{ 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+ { 4, 1, AIE2_RT_CFG_INIT }, /* Debug BO */
{ 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 0 },
};
@@ -62,16 +63,23 @@ const struct dpm_clk_freq npu1_dpm_clk_table[] = {
{ 0 }
};
+static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = {
+ { .feature = AIE2_NPU_COMMAND, .min_minor = 8 },
+ { 0 }
+};
+
static const struct amdxdna_dev_priv npu1_dev_priv = {
.fw_path = "amdnpu/1502_00/npu.sbin",
.protocol_major = 0x5,
.protocol_minor = 0x7,
.rt_config = npu1_default_rt_cfg,
.dpm_clk_tbl = npu1_dpm_clk_table,
+ .fw_feature_tbl = npu1_fw_feature_table,
.col_align = COL_ALIGN_NONE,
.mbox_dev_addr = NPU1_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
.sram_dev_addr = NPU1_SRAM_BAR_BASE,
+ .hwctx_limit = 6,
.sram_offs = {
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU1_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU1_SRAM, MPNPU_SRAM_I2X_MAILBOX_15),
diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c
index a081cac75ee0..86f87d0d1354 100644
--- a/drivers/accel/amdxdna/npu2_regs.c
+++ b/drivers/accel/amdxdna/npu2_regs.c
@@ -67,10 +67,12 @@ static const struct amdxdna_dev_priv npu2_dev_priv = {
.protocol_minor = 0x6,
.rt_config = npu4_default_rt_cfg,
.dpm_clk_tbl = npu4_dpm_clk_table,
+ .fw_feature_tbl = npu4_fw_feature_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU2_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
.sram_dev_addr = NPU2_SRAM_BAR_BASE,
+ .hwctx_limit = 16,
.sram_offs = {
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
index 9f2e33182ec6..986a5f28ba24 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -63,10 +63,14 @@
const struct rt_config npu4_default_rt_cfg[] = {
{ 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+ { 10, 1, AIE2_RT_CFG_INIT }, /* DEBUG BUF */
+ { 14, 0, AIE2_RT_CFG_INIT, BIT_U64(AIE2_PREEMPT) }, /* Frame boundary preemption */
{ 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 13, 0, AIE2_RT_CFG_FORCE_PREEMPT },
+ { 14, 0, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT },
{ 0 },
};
@@ -82,16 +86,24 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
{ 0 }
};
+const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
+ { .feature = AIE2_NPU_COMMAND, .min_minor = 15 },
+ { .feature = AIE2_PREEMPT, .min_minor = 12 },
+ { 0 }
+};
+
static const struct amdxdna_dev_priv npu4_dev_priv = {
.fw_path = "amdnpu/17f0_10/npu.sbin",
.protocol_major = 0x6,
.protocol_minor = 12,
.rt_config = npu4_default_rt_cfg,
.dpm_clk_tbl = npu4_dpm_clk_table,
+ .fw_feature_tbl = npu4_fw_feature_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU4_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
.sram_dev_addr = NPU4_SRAM_BAR_BASE,
+ .hwctx_limit = 16,
.sram_offs = {
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
index 5f1cf83461c4..75ad97f0b937 100644
--- a/drivers/accel/amdxdna/npu5_regs.c
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -67,10 +67,12 @@ static const struct amdxdna_dev_priv npu5_dev_priv = {
.protocol_minor = 12,
.rt_config = npu4_default_rt_cfg,
.dpm_clk_tbl = npu4_dpm_clk_table,
+ .fw_feature_tbl = npu4_fw_feature_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU5_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
.sram_dev_addr = NPU5_SRAM_BAR_BASE,
+ .hwctx_limit = 16,
.sram_offs = {
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
index 94a7005685a7..758dc013fe13 100644
--- a/drivers/accel/amdxdna/npu6_regs.c
+++ b/drivers/accel/amdxdna/npu6_regs.c
@@ -67,10 +67,12 @@ static const struct amdxdna_dev_priv npu6_dev_priv = {
.protocol_minor = 12,
.rt_config = npu4_default_rt_cfg,
.dpm_clk_tbl = npu4_dpm_clk_table,
+ .fw_feature_tbl = npu4_fw_feature_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU6_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
.sram_dev_addr = NPU6_SRAM_BAR_BASE,
+ .hwctx_limit = 16,
.sram_offs = {
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),