summaryrefslogtreecommitdiff
path: root/drivers/accel/qaic
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel/qaic')
-rw-r--r--drivers/accel/qaic/Kconfig1
-rw-r--r--drivers/accel/qaic/Makefile2
-rw-r--r--drivers/accel/qaic/qaic.h40
-rw-r--r--drivers/accel/qaic/qaic_control.c25
-rw-r--r--drivers/accel/qaic/qaic_data.c164
-rw-r--r--drivers/accel/qaic/qaic_drv.c116
-rw-r--r--drivers/accel/qaic/qaic_ras.c6
-rw-r--r--drivers/accel/qaic/qaic_ssr.c815
-rw-r--r--drivers/accel/qaic/qaic_ssr.h17
-rw-r--r--drivers/accel/qaic/qaic_sysfs.c109
-rw-r--r--drivers/accel/qaic/qaic_timesync.c9
-rw-r--r--drivers/accel/qaic/qaic_timesync.h3
-rw-r--r--drivers/accel/qaic/sahara.c164
13 files changed, 1364 insertions, 107 deletions
diff --git a/drivers/accel/qaic/Kconfig b/drivers/accel/qaic/Kconfig
index 5e405a19c157..116e42d152ca 100644
--- a/drivers/accel/qaic/Kconfig
+++ b/drivers/accel/qaic/Kconfig
@@ -9,6 +9,7 @@ config DRM_ACCEL_QAIC
depends on PCI && HAS_IOMEM
depends on MHI_BUS
select CRC32
+ select WANT_DEV_COREDUMP
help
Enables driver for Qualcomm's Cloud AI accelerator PCIe cards that are
designed to accelerate Deep Learning inference workloads.
diff --git a/drivers/accel/qaic/Makefile b/drivers/accel/qaic/Makefile
index 1106b876f737..71f727b74da3 100644
--- a/drivers/accel/qaic/Makefile
+++ b/drivers/accel/qaic/Makefile
@@ -11,6 +11,8 @@ qaic-y := \
qaic_data.o \
qaic_drv.o \
qaic_ras.o \
+ qaic_ssr.o \
+ qaic_sysfs.o \
qaic_timesync.o \
sahara.o
diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
index 820d133236dd..fa7a8155658c 100644
--- a/drivers/accel/qaic/qaic.h
+++ b/drivers/accel/qaic/qaic.h
@@ -21,6 +21,7 @@
#define QAIC_DBC_BASE SZ_128K
#define QAIC_DBC_SIZE SZ_4K
+#define QAIC_SSR_DBC_SENTINEL U32_MAX /* No ongoing SSR sentinel */
#define QAIC_NO_PARTITION -1
@@ -47,6 +48,22 @@ enum __packed dev_states {
QAIC_ONLINE,
};
+enum dbc_states {
+ /* DBC is free and can be activated */
+ DBC_STATE_IDLE,
+ /* DBC is activated and a workload is running on device */
+ DBC_STATE_ASSIGNED,
+ /* Sub-system associated with this workload has crashed and it will shutdown soon */
+ DBC_STATE_BEFORE_SHUTDOWN,
+ /* Sub-system associated with this workload has crashed and it has shutdown */
+ DBC_STATE_AFTER_SHUTDOWN,
+ /* Sub-system associated with this workload is shutdown and it will be powered up soon */
+ DBC_STATE_BEFORE_POWER_UP,
+ /* Sub-system associated with this workload is now powered up */
+ DBC_STATE_AFTER_POWER_UP,
+ DBC_STATE_MAX,
+};
+
extern bool datapath_polling;
struct qaic_user {
@@ -114,6 +131,8 @@ struct dma_bridge_chan {
unsigned int irq;
/* Polling work item to simulate interrupts */
struct work_struct poll_work;
+ /* Represents various states of this DBC from enum dbc_states */
+ unsigned int state;
};
struct qaic_device {
@@ -161,6 +180,8 @@ struct qaic_device {
struct mhi_device *qts_ch;
/* Work queue for tasks related to MHI "QAIC_TIMESYNC" channel */
struct workqueue_struct *qts_wq;
+ /* MHI "QAIC_TIMESYNC_PERIODIC" channel device */
+ struct mhi_device *mqts_ch;
/* Head of list of page allocated by MHI bootlog device */
struct list_head bootlog;
/* MHI bootlog channel device */
@@ -177,6 +198,14 @@ struct qaic_device {
unsigned int ue_count;
/* Un-correctable non-fatal error count */
unsigned int ue_nf_count;
+ /* MHI SSR channel device */
+ struct mhi_device *ssr_ch;
+ /* Work queue for tasks related to MHI SSR device */
+ struct workqueue_struct *ssr_wq;
+ /* Buffer to collect SSR crashdump via SSR MHI channel */
+ void *ssr_mhi_buf;
+ /* DBC which is under SSR. Sentinel U32_MAX would mean that no SSR in progress */
+ u32 ssr_dbc;
};
struct qaic_drm_device {
@@ -195,6 +224,8 @@ struct qaic_drm_device {
struct list_head users;
/* Synchronizes access to users list */
struct mutex users_mutex;
+ /* Pointer to array of DBC sysfs attributes */
+ void *sysfs_attrs;
};
struct qaic_bo {
@@ -317,6 +348,13 @@ int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm
int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
-void irq_polling_work(struct work_struct *work);
+void qaic_irq_polling_work(struct work_struct *work);
+void qaic_dbc_enter_ssr(struct qaic_device *qdev, u32 dbc_id);
+void qaic_dbc_exit_ssr(struct qaic_device *qdev);
+
+/* qaic_sysfs.c */
+int qaic_sysfs_init(struct qaic_drm_device *qddev);
+void qaic_sysfs_remove(struct qaic_drm_device *qddev);
+void set_dbc_state(struct qaic_device *qdev, u32 dbc_id, unsigned int state);
#endif /* _QAIC_H_ */
diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c
index b86a8e48e731..428d8f65bff3 100644
--- a/drivers/accel/qaic/qaic_control.c
+++ b/drivers/accel/qaic/qaic_control.c
@@ -17,6 +17,7 @@
#include <linux/overflow.h>
#include <linux/pci.h>
#include <linux/scatterlist.h>
+#include <linux/sched/signal.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/workqueue.h>
@@ -30,7 +31,7 @@
#define MANAGE_MAGIC_NUMBER ((__force __le32)0x43494151) /* "QAIC" in little endian */
#define QAIC_DBC_Q_GAP SZ_256
#define QAIC_DBC_Q_BUF_ALIGN SZ_4K
-#define QAIC_MANAGE_EXT_MSG_LENGTH SZ_64K /* Max DMA message length */
+#define QAIC_MANAGE_WIRE_MSG_LENGTH SZ_64K /* Max DMA message length */
#define QAIC_WRAPPER_MAX_SIZE SZ_4K
#define QAIC_MHI_RETRY_WAIT_MS 100
#define QAIC_MHI_RETRY_MAX 20
@@ -309,6 +310,7 @@ static void save_dbc_buf(struct qaic_device *qdev, struct ioctl_resources *resou
enable_dbc(qdev, dbc_id, usr);
qdev->dbc[dbc_id].in_use = true;
resources->buf = NULL;
+ set_dbc_state(qdev, dbc_id, DBC_STATE_ASSIGNED);
}
}
@@ -367,7 +369,7 @@ static int encode_passthrough(struct qaic_device *qdev, void *trans, struct wrap
if (in_trans->hdr.len % 8 != 0)
return -EINVAL;
- if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_EXT_MSG_LENGTH)
+ if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_WIRE_MSG_LENGTH)
return -ENOSPC;
trans_wrapper = add_wrapper(wrappers,
@@ -495,7 +497,7 @@ static int encode_addr_size_pairs(struct dma_xfer *xfer, struct wrapper_list *wr
nents = sgt->nents;
nents_dma = nents;
- *size = QAIC_MANAGE_EXT_MSG_LENGTH - msg_hdr_len - sizeof(**out_trans);
+ *size = QAIC_MANAGE_WIRE_MSG_LENGTH - msg_hdr_len - sizeof(**out_trans);
for_each_sgtable_dma_sg(sgt, sg, i) {
*size -= sizeof(*asp);
/* Save 1K for possible follow-up transactions. */
@@ -576,7 +578,7 @@ static int encode_dma(struct qaic_device *qdev, void *trans, struct wrapper_list
/* There should be enough space to hold at least one ASP entry. */
if (size_add(msg_hdr_len, sizeof(*out_trans) + sizeof(struct wire_addr_size_pair)) >
- QAIC_MANAGE_EXT_MSG_LENGTH)
+ QAIC_MANAGE_WIRE_MSG_LENGTH)
return -ENOMEM;
xfer = kmalloc(sizeof(*xfer), GFP_KERNEL);
@@ -645,7 +647,7 @@ static int encode_activate(struct qaic_device *qdev, void *trans, struct wrapper
msg = &wrapper->msg;
msg_hdr_len = le32_to_cpu(msg->hdr.len);
- if (size_add(msg_hdr_len, sizeof(*out_trans)) > QAIC_MANAGE_MAX_MSG_LENGTH)
+ if (size_add(msg_hdr_len, sizeof(*out_trans)) > QAIC_MANAGE_WIRE_MSG_LENGTH)
return -ENOSPC;
if (!in_trans->queue_size)
@@ -655,8 +657,9 @@ static int encode_activate(struct qaic_device *qdev, void *trans, struct wrapper
return -EINVAL;
nelem = in_trans->queue_size;
- size = (get_dbc_req_elem_size() + get_dbc_rsp_elem_size()) * nelem;
- if (size / nelem != get_dbc_req_elem_size() + get_dbc_rsp_elem_size())
+ if (check_mul_overflow((u32)(get_dbc_req_elem_size() + get_dbc_rsp_elem_size()),
+ nelem,
+ &size))
return -EINVAL;
if (size + QAIC_DBC_Q_GAP + QAIC_DBC_Q_BUF_ALIGN < size)
@@ -729,7 +732,7 @@ static int encode_status(struct qaic_device *qdev, void *trans, struct wrapper_l
msg = &wrapper->msg;
msg_hdr_len = le32_to_cpu(msg->hdr.len);
- if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_MAX_MSG_LENGTH)
+ if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_WIRE_MSG_LENGTH)
return -ENOSPC;
trans_wrapper = add_wrapper(wrappers, sizeof(*trans_wrapper));
@@ -810,7 +813,7 @@ static int encode_message(struct qaic_device *qdev, struct manage_msg *user_msg,
}
if (ret)
- break;
+ goto out;
}
if (user_len != user_msg->len)
@@ -921,6 +924,7 @@ static int decode_deactivate(struct qaic_device *qdev, void *trans, u32 *msg_len
}
release_dbc(qdev, dbc_id);
+ set_dbc_state(qdev, dbc_id, DBC_STATE_IDLE);
*msg_len += sizeof(*in_trans);
return 0;
@@ -1052,7 +1056,7 @@ static void *msg_xfer(struct qaic_device *qdev, struct wrapper_list *wrappers, u
init_completion(&elem.xfer_done);
if (likely(!qdev->cntl_lost_buf)) {
/*
- * The max size of request to device is QAIC_MANAGE_EXT_MSG_LENGTH.
+ * The max size of request to device is QAIC_MANAGE_WIRE_MSG_LENGTH.
* The max size of response from device is QAIC_MANAGE_MAX_MSG_LENGTH.
*/
out_buf = kmalloc(QAIC_MANAGE_MAX_MSG_LENGTH, GFP_KERNEL);
@@ -1079,7 +1083,6 @@ static void *msg_xfer(struct qaic_device *qdev, struct wrapper_list *wrappers, u
list_for_each_entry(w, &wrappers->list, list) {
kref_get(&w->ref_count);
- retry_count = 0;
ret = mhi_queue_buf(qdev->cntl_ch, DMA_TO_DEVICE, &w->msg, w->len,
list_is_last(&w->list, &wrappers->list) ? MHI_EOT : MHI_CHAIN);
if (ret) {
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index c4f117edb266..60cb4d65d48e 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -18,6 +18,7 @@
#include <linux/scatterlist.h>
#include <linux/spinlock.h>
#include <linux/srcu.h>
+#include <linux/string.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/wait.h>
@@ -165,7 +166,7 @@ static void free_slice(struct kref *kref)
drm_gem_object_put(&slice->bo->base);
sg_free_table(slice->sgt);
kfree(slice->sgt);
- kfree(slice->reqs);
+ kvfree(slice->reqs);
kfree(slice);
}
@@ -404,7 +405,7 @@ static int qaic_map_one_slice(struct qaic_device *qdev, struct qaic_bo *bo,
goto free_sgt;
}
- slice->reqs = kcalloc(sgt->nents, sizeof(*slice->reqs), GFP_KERNEL);
+ slice->reqs = kvcalloc(sgt->nents, sizeof(*slice->reqs), GFP_KERNEL);
if (!slice->reqs) {
ret = -ENOMEM;
goto free_slice;
@@ -430,7 +431,7 @@ static int qaic_map_one_slice(struct qaic_device *qdev, struct qaic_bo *bo,
return 0;
free_req:
- kfree(slice->reqs);
+ kvfree(slice->reqs);
free_slice:
kfree(slice);
free_sgt:
@@ -643,8 +644,36 @@ static void qaic_free_object(struct drm_gem_object *obj)
kfree(bo);
}
+static struct sg_table *qaic_get_sg_table(struct drm_gem_object *obj)
+{
+ struct qaic_bo *bo = to_qaic_bo(obj);
+ struct scatterlist *sg, *sg_in;
+ struct sg_table *sgt, *sgt_in;
+ int i;
+
+ sgt_in = bo->sgt;
+
+ sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
+ if (!sgt)
+ return ERR_PTR(-ENOMEM);
+
+ if (sg_alloc_table(sgt, sgt_in->orig_nents, GFP_KERNEL)) {
+ kfree(sgt);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ sg = sgt->sgl;
+ for_each_sgtable_sg(sgt_in, sg_in, i) {
+ memcpy(sg, sg_in, sizeof(*sg));
+ sg = sg_next(sg);
+ }
+
+ return sgt;
+}
+
static const struct drm_gem_object_funcs qaic_gem_funcs = {
.free = qaic_free_object,
+ .get_sg_table = qaic_get_sg_table,
.print_info = qaic_gem_print_info,
.mmap = qaic_gem_object_mmap,
.vm_ops = &drm_vm_ops,
@@ -953,8 +982,9 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
if (args->hdr.count == 0)
return -EINVAL;
- arg_size = args->hdr.count * sizeof(*slice_ent);
- if (arg_size / args->hdr.count != sizeof(*slice_ent))
+ if (check_mul_overflow((unsigned long)args->hdr.count,
+ (unsigned long)sizeof(*slice_ent),
+ &arg_size))
return -EINVAL;
if (!(args->hdr.dir == DMA_TO_DEVICE || args->hdr.dir == DMA_FROM_DEVICE))
@@ -984,18 +1014,12 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
user_data = u64_to_user_ptr(args->data);
- slice_ent = kzalloc(arg_size, GFP_KERNEL);
- if (!slice_ent) {
- ret = -EINVAL;
+ slice_ent = memdup_user(user_data, arg_size);
+ if (IS_ERR(slice_ent)) {
+ ret = PTR_ERR(slice_ent);
goto unlock_dev_srcu;
}
- ret = copy_from_user(slice_ent, user_data, arg_size);
- if (ret) {
- ret = -EFAULT;
- goto free_slice_ent;
- }
-
obj = drm_gem_object_lookup(file_priv, args->hdr.handle);
if (!obj) {
ret = -ENOENT;
@@ -1023,6 +1047,11 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
goto unlock_ch_srcu;
}
+ if (dbc->id == qdev->ssr_dbc) {
+ ret = -EPIPE;
+ goto unlock_ch_srcu;
+ }
+
ret = qaic_prepare_bo(qdev, bo, &args->hdr);
if (ret)
goto unlock_ch_srcu;
@@ -1300,8 +1329,6 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
int usr_rcu_id, qdev_rcu_id;
struct qaic_device *qdev;
struct qaic_user *usr;
- u8 __user *user_data;
- unsigned long n;
u64 received_ts;
u32 queue_level;
u64 submit_ts;
@@ -1314,20 +1341,12 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
received_ts = ktime_get_ns();
size = is_partial ? sizeof(struct qaic_partial_execute_entry) : sizeof(*exec);
- n = (unsigned long)size * args->hdr.count;
- if (args->hdr.count == 0 || n / args->hdr.count != size)
+ if (args->hdr.count == 0)
return -EINVAL;
- user_data = u64_to_user_ptr(args->data);
-
- exec = kcalloc(args->hdr.count, size, GFP_KERNEL);
- if (!exec)
- return -ENOMEM;
-
- if (copy_from_user(exec, user_data, n)) {
- ret = -EFAULT;
- goto free_exec;
- }
+ exec = memdup_array_user(u64_to_user_ptr(args->data), args->hdr.count, size);
+ if (IS_ERR(exec))
+ return PTR_ERR(exec);
usr = file_priv->driver_priv;
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
@@ -1356,6 +1375,11 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
goto release_ch_rcu;
}
+ if (dbc->id == qdev->ssr_dbc) {
+ ret = -EPIPE;
+ goto release_ch_rcu;
+ }
+
ret = mutex_lock_interruptible(&dbc->req_lock);
if (ret)
goto release_ch_rcu;
@@ -1396,7 +1420,6 @@ unlock_dev_srcu:
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
unlock_usr_srcu:
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
-free_exec:
kfree(exec);
return ret;
}
@@ -1491,7 +1514,7 @@ irqreturn_t dbc_irq_handler(int irq, void *data)
return IRQ_WAKE_THREAD;
}
-void irq_polling_work(struct work_struct *work)
+void qaic_irq_polling_work(struct work_struct *work)
{
struct dma_bridge_chan *dbc = container_of(work, struct dma_bridge_chan, poll_work);
unsigned long flags;
@@ -1709,6 +1732,11 @@ int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file
goto unlock_ch_srcu;
}
+ if (dbc->id == qdev->ssr_dbc) {
+ ret = -EPIPE;
+ goto unlock_ch_srcu;
+ }
+
obj = drm_gem_object_lookup(file_priv, args->handle);
if (!obj) {
ret = -ENOENT;
@@ -1729,6 +1757,9 @@ int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file
if (!dbc->usr)
ret = -EPERM;
+ if (dbc->id == qdev->ssr_dbc)
+ ret = -EPIPE;
+
put_obj:
drm_gem_object_put(obj);
unlock_ch_srcu:
@@ -1749,7 +1780,8 @@ int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file
struct qaic_device *qdev;
struct qaic_user *usr;
struct qaic_bo *bo;
- int ret, i;
+ int ret = 0;
+ int i;
usr = file_priv->driver_priv;
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
@@ -1770,18 +1802,12 @@ int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file
goto unlock_dev_srcu;
}
- ent = kcalloc(args->hdr.count, sizeof(*ent), GFP_KERNEL);
- if (!ent) {
- ret = -EINVAL;
+ ent = memdup_array_user(u64_to_user_ptr(args->data), args->hdr.count, sizeof(*ent));
+ if (IS_ERR(ent)) {
+ ret = PTR_ERR(ent);
goto unlock_dev_srcu;
}
- ret = copy_from_user(ent, u64_to_user_ptr(args->data), args->hdr.count * sizeof(*ent));
- if (ret) {
- ret = -EFAULT;
- goto free_ent;
- }
-
for (i = 0; i < args->hdr.count; i++) {
obj = drm_gem_object_lookup(file_priv, ent[i].handle);
if (!obj) {
@@ -1789,6 +1815,16 @@ int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file
goto free_ent;
}
bo = to_qaic_bo(obj);
+ if (!bo->sliced) {
+ drm_gem_object_put(obj);
+ ret = -EINVAL;
+ goto free_ent;
+ }
+ if (bo->dbc->id != args->hdr.dbc_id) {
+ drm_gem_object_put(obj);
+ ret = -EINVAL;
+ goto free_ent;
+ }
/*
* perf stats ioctl is called before wait ioctl is complete then
* the latency information is invalid.
@@ -1927,6 +1963,17 @@ static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *db
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
}
+static void sync_empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc)
+{
+ empty_xfer_list(qdev, dbc);
+ synchronize_srcu(&dbc->ch_lock);
+ /*
+ * Threads holding channel lock, may add more elements in the xfer_list.
+ * Flush out these elements from xfer_list.
+ */
+ empty_xfer_list(qdev, dbc);
+}
+
int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr)
{
if (!qdev->dbc[dbc_id].usr || qdev->dbc[dbc_id].usr->handle != usr->handle)
@@ -1941,7 +1988,7 @@ int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr)
* enable_dbc - Enable the DBC. DBCs are disabled by removing the context of
* user. Add user context back to DBC to enable it. This function trusts the
* DBC ID passed and expects the DBC to be disabled.
- * @qdev: Qranium device handle
+ * @qdev: qaic device handle
* @dbc_id: ID of the DBC
* @usr: User context
*/
@@ -1955,13 +2002,7 @@ void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id)
struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id];
dbc->usr = NULL;
- empty_xfer_list(qdev, dbc);
- synchronize_srcu(&dbc->ch_lock);
- /*
- * Threads holding channel lock, may add more elements in the xfer_list.
- * Flush out these elements from xfer_list.
- */
- empty_xfer_list(qdev, dbc);
+ sync_empty_xfer_list(qdev, dbc);
}
void release_dbc(struct qaic_device *qdev, u32 dbc_id)
@@ -2002,3 +2043,30 @@ void qaic_data_get_fifo_info(struct dma_bridge_chan *dbc, u32 *head, u32 *tail)
*head = readl(dbc->dbc_base + REQHP_OFF);
*tail = readl(dbc->dbc_base + REQTP_OFF);
}
+
+/*
+ * qaic_dbc_enter_ssr - Prepare to enter in sub system reset(SSR) for given DBC ID.
+ * @qdev: qaic device handle
+ * @dbc_id: ID of the DBC which will enter SSR
+ *
+ * The device will automatically deactivate the workload as not
+ * all errors can be silently recovered. The user will be
+ * notified and will need to decide the required recovery
+ * action to take.
+ */
+void qaic_dbc_enter_ssr(struct qaic_device *qdev, u32 dbc_id)
+{
+ qdev->ssr_dbc = dbc_id;
+ release_dbc(qdev, dbc_id);
+}
+
+/*
+ * qaic_dbc_exit_ssr - Prepare to exit from sub system reset(SSR) for given DBC ID.
+ * @qdev: qaic device handle
+ *
+ * The DBC returns to an operational state and begins accepting work after exiting SSR.
+ */
+void qaic_dbc_exit_ssr(struct qaic_device *qdev)
+{
+ qdev->ssr_dbc = QAIC_SSR_DBC_SENTINEL;
+}
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index e162f4b8a262..4c70bd949d53 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -30,6 +30,7 @@
#include "qaic.h"
#include "qaic_debugfs.h"
#include "qaic_ras.h"
+#include "qaic_ssr.h"
#include "qaic_timesync.h"
#include "sahara.h"
@@ -270,6 +271,13 @@ static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id)
return ret;
}
+ ret = qaic_sysfs_init(qddev);
+ if (ret) {
+ drm_dev_unregister(drm);
+ pci_dbg(qdev->pdev, "qaic_sysfs_init failed %d\n", ret);
+ return ret;
+ }
+
qaic_debugfs_init(qddev);
return ret;
@@ -281,6 +289,7 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
struct drm_device *drm = to_drm(qddev);
struct qaic_user *usr;
+ qaic_sysfs_remove(qddev);
drm_dev_unregister(drm);
qddev->partition_id = 0;
/*
@@ -382,6 +391,7 @@ void qaic_dev_reset_clean_local_state(struct qaic_device *qdev)
qaic_notify_reset(qdev);
/* start tearing things down */
+ qaic_clean_up_ssr(qdev);
for (i = 0; i < qdev->num_dbc; ++i)
release_dbc(qdev, i);
}
@@ -431,11 +441,18 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev,
qdev->qts_wq = qaicm_wq_init(drm, "qaic_ts");
if (IS_ERR(qdev->qts_wq))
return NULL;
+ qdev->ssr_wq = qaicm_wq_init(drm, "qaic_ssr");
+ if (IS_ERR(qdev->ssr_wq))
+ return NULL;
ret = qaicm_srcu_init(drm, &qdev->dev_lock);
if (ret)
return NULL;
+ ret = qaic_ssr_init(qdev, drm);
+ if (ret)
+ pci_info(pdev, "QAIC SSR crashdump collection not supported.\n");
+
qdev->qddev = qddev;
qdev->pdev = pdev;
qddev->qdev = qdev;
@@ -545,7 +562,7 @@ static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
qdev->dbc[i].irq = pci_irq_vector(pdev, qdev->single_msi ? 0 : i + 1);
if (!qdev->single_msi)
disable_irq_nosync(qdev->dbc[i].irq);
- INIT_WORK(&qdev->dbc[i].poll_work, irq_polling_work);
+ INIT_WORK(&qdev->dbc[i].poll_work, qaic_irq_polling_work);
}
}
@@ -660,6 +677,92 @@ static const struct pci_error_handlers qaic_pci_err_handler = {
.reset_done = qaic_pci_reset_done,
};
+static bool qaic_is_under_reset(struct qaic_device *qdev)
+{
+ int rcu_id;
+ bool ret;
+
+ rcu_id = srcu_read_lock(&qdev->dev_lock);
+ ret = qdev->dev_state != QAIC_ONLINE;
+ srcu_read_unlock(&qdev->dev_lock, rcu_id);
+ return ret;
+}
+
+static bool qaic_data_path_busy(struct qaic_device *qdev)
+{
+ bool ret = false;
+ int dev_rcu_id;
+ int i;
+
+ dev_rcu_id = srcu_read_lock(&qdev->dev_lock);
+ if (qdev->dev_state != QAIC_ONLINE) {
+ srcu_read_unlock(&qdev->dev_lock, dev_rcu_id);
+ return false;
+ }
+ for (i = 0; i < qdev->num_dbc; i++) {
+ struct dma_bridge_chan *dbc = &qdev->dbc[i];
+ unsigned long flags;
+ int ch_rcu_id;
+
+ ch_rcu_id = srcu_read_lock(&dbc->ch_lock);
+ if (!dbc->usr || !dbc->in_use) {
+ srcu_read_unlock(&dbc->ch_lock, ch_rcu_id);
+ continue;
+ }
+ spin_lock_irqsave(&dbc->xfer_lock, flags);
+ ret = !list_empty(&dbc->xfer_list);
+ spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+ srcu_read_unlock(&dbc->ch_lock, ch_rcu_id);
+ if (ret)
+ break;
+ }
+ srcu_read_unlock(&qdev->dev_lock, dev_rcu_id);
+ return ret;
+}
+
+static int qaic_pm_suspend(struct device *dev)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
+
+ dev_dbg(dev, "Suspending..\n");
+ if (qaic_data_path_busy(qdev)) {
+ dev_dbg(dev, "Device's datapath is busy. Aborting suspend..\n");
+ return -EBUSY;
+ }
+ if (qaic_is_under_reset(qdev)) {
+ dev_dbg(dev, "Device is under reset. Aborting suspend..\n");
+ return -EBUSY;
+ }
+ qaic_mqts_ch_stop_timer(qdev->mqts_ch);
+ qaic_pci_reset_prepare(qdev->pdev);
+ pci_save_state(qdev->pdev);
+ pci_disable_device(qdev->pdev);
+ pci_set_power_state(qdev->pdev, PCI_D3hot);
+ return 0;
+}
+
+static int qaic_pm_resume(struct device *dev)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
+ int ret;
+
+ dev_dbg(dev, "Resuming..\n");
+ pci_set_power_state(qdev->pdev, PCI_D0);
+ pci_restore_state(qdev->pdev);
+ ret = pci_enable_device(qdev->pdev);
+ if (ret) {
+ dev_err(dev, "pci_enable_device failed on resume %d\n", ret);
+ return ret;
+ }
+ pci_set_master(qdev->pdev);
+ qaic_pci_reset_done(qdev->pdev);
+ return 0;
+}
+
+static const struct dev_pm_ops qaic_pm_ops = {
+ SYSTEM_SLEEP_PM_OPS(qaic_pm_suspend, qaic_pm_resume)
+};
+
static struct pci_driver qaic_pci_driver = {
.name = QAIC_NAME,
.id_table = qaic_ids,
@@ -667,6 +770,9 @@ static struct pci_driver qaic_pci_driver = {
.remove = qaic_pci_remove,
.shutdown = qaic_pci_shutdown,
.err_handler = &qaic_pci_err_handler,
+ .driver = {
+ .pm = pm_sleep_ptr(&qaic_pm_ops),
+ },
};
static int __init qaic_init(void)
@@ -702,9 +808,16 @@ static int __init qaic_init(void)
ret = qaic_ras_register();
if (ret)
pr_debug("qaic: qaic_ras_register failed %d\n", ret);
+ ret = qaic_ssr_register();
+ if (ret) {
+ pr_debug("qaic: qaic_ssr_register failed %d\n", ret);
+ goto free_bootlog;
+ }
return 0;
+free_bootlog:
+ qaic_bootlog_unregister();
free_mhi:
mhi_driver_unregister(&qaic_mhi_driver);
free_pci:
@@ -730,6 +843,7 @@ static void __exit qaic_exit(void)
* reinitializing the link_up state after the cleanup is done.
*/
link_up = true;
+ qaic_ssr_unregister();
qaic_ras_unregister();
qaic_bootlog_unregister();
qaic_timesync_deinit();
diff --git a/drivers/accel/qaic/qaic_ras.c b/drivers/accel/qaic/qaic_ras.c
index 914ffc4a9970..f1d52a710136 100644
--- a/drivers/accel/qaic/qaic_ras.c
+++ b/drivers/accel/qaic/qaic_ras.c
@@ -514,21 +514,21 @@ static ssize_t ce_count_show(struct device *dev, struct device_attribute *attr,
{
struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
- return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ce_count);
+ return sysfs_emit(buf, "%d\n", qdev->ce_count);
}
static ssize_t ue_count_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
- return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ue_count);
+ return sysfs_emit(buf, "%d\n", qdev->ue_count);
}
static ssize_t ue_nonfatal_count_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
- return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ue_nf_count);
+ return sysfs_emit(buf, "%d\n", qdev->ue_nf_count);
}
static DEVICE_ATTR_RO(ce_count);
diff --git a/drivers/accel/qaic/qaic_ssr.c b/drivers/accel/qaic/qaic_ssr.c
new file mode 100644
index 000000000000..9b662d690371
--- /dev/null
+++ b/drivers/accel/qaic/qaic_ssr.c
@@ -0,0 +1,815 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. */
+
+#include <asm/byteorder.h>
+#include <drm/drm_file.h>
+#include <drm/drm_managed.h>
+#include <linux/devcoredump.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mhi.h>
+#include <linux/workqueue.h>
+
+#include "qaic.h"
+#include "qaic_ssr.h"
+
+#define SSR_RESP_MSG_SZ 32
+#define SSR_MHI_BUF_SIZE SZ_64K
+#define SSR_MEM_READ_DATA_SIZE ((u64)SSR_MHI_BUF_SIZE - sizeof(struct ssr_crashdump))
+#define SSR_MEM_READ_CHUNK_SIZE ((u64)SSR_MEM_READ_DATA_SIZE - sizeof(struct ssr_memory_read_rsp))
+
+#define DEBUG_TRANSFER_INFO BIT(0)
+#define DEBUG_TRANSFER_INFO_RSP BIT(1)
+#define MEMORY_READ BIT(2)
+#define MEMORY_READ_RSP BIT(3)
+#define DEBUG_TRANSFER_DONE BIT(4)
+#define DEBUG_TRANSFER_DONE_RSP BIT(5)
+#define SSR_EVENT BIT(8)
+#define SSR_EVENT_RSP BIT(9)
+
+#define SSR_EVENT_NACK BIT(0)
+#define BEFORE_SHUTDOWN BIT(1)
+#define AFTER_SHUTDOWN BIT(2)
+#define BEFORE_POWER_UP BIT(3)
+#define AFTER_POWER_UP BIT(4)
+
+struct debug_info_table {
+ /* Save preferences. Default is mandatory */
+ u64 save_perf;
+ /* Base address of the debug region */
+ u64 mem_base;
+ /* Size of debug region in bytes */
+ u64 len;
+ /* Description */
+ char desc[20];
+ /* Filename of debug region */
+ char filename[20];
+};
+
+struct _ssr_hdr {
+ __le32 cmd;
+ __le32 len;
+ __le32 dbc_id;
+};
+
+struct ssr_hdr {
+ u32 cmd;
+ u32 len;
+ u32 dbc_id;
+};
+
+struct ssr_debug_transfer_info {
+ struct ssr_hdr hdr;
+ u32 resv;
+ u64 tbl_addr;
+ u64 tbl_len;
+} __packed;
+
+struct ssr_debug_transfer_info_rsp {
+ struct _ssr_hdr hdr;
+ __le32 ret;
+} __packed;
+
+struct ssr_memory_read {
+ struct _ssr_hdr hdr;
+ __le32 resv;
+ __le64 addr;
+ __le64 len;
+} __packed;
+
+struct ssr_memory_read_rsp {
+ struct _ssr_hdr hdr;
+ __le32 resv;
+ u8 data[];
+} __packed;
+
+struct ssr_debug_transfer_done {
+ struct _ssr_hdr hdr;
+ __le32 resv;
+} __packed;
+
+struct ssr_debug_transfer_done_rsp {
+ struct _ssr_hdr hdr;
+ __le32 ret;
+} __packed;
+
+struct ssr_event {
+ struct ssr_hdr hdr;
+ u32 event;
+} __packed;
+
+struct ssr_event_rsp {
+ struct _ssr_hdr hdr;
+ __le32 event;
+} __packed;
+
+struct ssr_resp {
+ /* Work struct to schedule work coming on QAIC_SSR channel */
+ struct work_struct work;
+ /* Root struct of device, used to access device resources */
+ struct qaic_device *qdev;
+ /* Buffer used by MHI for transfer requests */
+ u8 data[] __aligned(8);
+};
+
+/* SSR crashdump book keeping structure */
+struct ssr_dump_info {
+ /* DBC associated with this SSR crashdump */
+ struct dma_bridge_chan *dbc;
+ /*
+ * It will be used when we complete the crashdump download and switch
+ * to waiting on SSR events
+ */
+ struct ssr_resp *resp;
+ /* MEMORY READ request MHI buffer.*/
+ struct ssr_memory_read *read_buf_req;
+ /* TRUE: ->read_buf_req is queued for MHI transaction. FALSE: Otherwise */
+ bool read_buf_req_queued;
+ /* Address of table in host */
+ void *tbl_addr;
+ /* Total size of table */
+ u64 tbl_len;
+ /* Offset of table(->tbl_addr) where the new chunk will be dumped */
+ u64 tbl_off;
+ /* Address of table in device/target */
+ u64 tbl_addr_dev;
+ /* Ptr to the entire dump */
+ void *dump_addr;
+ /* Entire crashdump size */
+ u64 dump_sz;
+ /* Offset of crashdump(->dump_addr) where the new chunk will be dumped */
+ u64 dump_off;
+ /* Points to the table entry we are currently downloading */
+ struct debug_info_table *tbl_ent;
+ /* Offset in the current table entry(->tbl_ent) for next chuck */
+ u64 tbl_ent_off;
+};
+
+struct ssr_crashdump {
+ /*
+ * Points to a book keeping struct maintained by MHI SSR device while
+ * downloading a SSR crashdump. It is NULL when crashdump downloading
+ * not in progress.
+ */
+ struct ssr_dump_info *dump_info;
+ /* Work struct to schedule work coming on QAIC_SSR channel */
+ struct work_struct work;
+ /* Root struct of device, used to access device resources */
+ struct qaic_device *qdev;
+ /* Buffer used by MHI for transfer requests */
+ u8 data[];
+};
+
+#define QAIC_SSR_DUMP_V1_MAGIC 0x1234567890abcdef
+#define QAIC_SSR_DUMP_V1_VER 1
+struct dump_file_meta {
+ u64 magic;
+ u64 version;
+ u64 size; /* Total size of the entire dump */
+ u64 tbl_len; /* Length of the table in byte */
+};
+
+/*
+ * Layout of crashdump
+ * +------------------------------------------+
+ * | Crashdump Meta structure |
+ * | type: struct dump_file_meta |
+ * +------------------------------------------+
+ * | Crashdump Table |
+ * | type: array of struct debug_info_table |
+ * | |
+ * | |
+ * | |
+ * +------------------------------------------+
+ * | Crashdump |
+ * | |
+ * | |
+ * | |
+ * | |
+ * | |
+ * +------------------------------------------+
+ */
+
+static void free_ssr_dump_info(struct ssr_crashdump *ssr_crash)
+{
+ struct ssr_dump_info *dump_info = ssr_crash->dump_info;
+
+ ssr_crash->dump_info = NULL;
+ if (!dump_info)
+ return;
+ if (!dump_info->read_buf_req_queued)
+ kfree(dump_info->read_buf_req);
+ vfree(dump_info->tbl_addr);
+ vfree(dump_info->dump_addr);
+ kfree(dump_info);
+}
+
+void qaic_clean_up_ssr(struct qaic_device *qdev)
+{
+ struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
+
+ if (!ssr_crash)
+ return;
+
+ qaic_dbc_exit_ssr(qdev);
+ free_ssr_dump_info(ssr_crash);
+}
+
+static int alloc_dump(struct ssr_dump_info *dump_info)
+{
+ struct debug_info_table *tbl_ent = dump_info->tbl_addr;
+ struct dump_file_meta *dump_meta;
+ u64 tbl_sz_lp = 0;
+ u64 dump_size = 0;
+
+ while (tbl_sz_lp < dump_info->tbl_len) {
+ le64_to_cpus(&tbl_ent->save_perf);
+ le64_to_cpus(&tbl_ent->mem_base);
+ le64_to_cpus(&tbl_ent->len);
+
+ if (tbl_ent->len == 0)
+ return -EINVAL;
+
+ dump_size += tbl_ent->len;
+ tbl_ent++;
+ tbl_sz_lp += sizeof(*tbl_ent);
+ }
+
+ dump_info->dump_sz = dump_size + dump_info->tbl_len + sizeof(*dump_meta);
+ dump_info->dump_addr = vzalloc(dump_info->dump_sz);
+ if (!dump_info->dump_addr)
+ return -ENOMEM;
+
+ /* Copy crashdump meta and table */
+ dump_meta = dump_info->dump_addr;
+ dump_meta->magic = QAIC_SSR_DUMP_V1_MAGIC;
+ dump_meta->version = QAIC_SSR_DUMP_V1_VER;
+ dump_meta->size = dump_info->dump_sz;
+ dump_meta->tbl_len = dump_info->tbl_len;
+ memcpy(dump_info->dump_addr + sizeof(*dump_meta), dump_info->tbl_addr, dump_info->tbl_len);
+ /* Offset by crashdump meta and table (copied above) */
+ dump_info->dump_off = dump_info->tbl_len + sizeof(*dump_meta);
+
+ return 0;
+}
+
+static int send_xfer_done(struct qaic_device *qdev, void *resp, u32 dbc_id)
+{
+ struct ssr_debug_transfer_done *xfer_done;
+ int ret;
+
+ xfer_done = kmalloc(sizeof(*xfer_done), GFP_KERNEL);
+ if (!xfer_done) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, resp, SSR_RESP_MSG_SZ, MHI_EOT);
+ if (ret)
+ goto free_xfer_done;
+
+ xfer_done->hdr.cmd = cpu_to_le32(DEBUG_TRANSFER_DONE);
+ xfer_done->hdr.len = cpu_to_le32(sizeof(*xfer_done));
+ xfer_done->hdr.dbc_id = cpu_to_le32(dbc_id);
+
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, xfer_done, sizeof(*xfer_done), MHI_EOT);
+ if (ret)
+ goto free_xfer_done;
+
+ return 0;
+
+free_xfer_done:
+ kfree(xfer_done);
+out:
+ return ret;
+}
+
+static int mem_read_req(struct qaic_device *qdev, u64 dest_addr, u64 dest_len)
+{
+ struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
+ struct ssr_memory_read *read_buf_req;
+ struct ssr_dump_info *dump_info;
+ int ret;
+
+ dump_info = ssr_crash->dump_info;
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, ssr_crash->data, SSR_MEM_READ_DATA_SIZE,
+ MHI_EOT);
+ if (ret)
+ goto out;
+
+ read_buf_req = dump_info->read_buf_req;
+ read_buf_req->hdr.cmd = cpu_to_le32(MEMORY_READ);
+ read_buf_req->hdr.len = cpu_to_le32(sizeof(*read_buf_req));
+ read_buf_req->hdr.dbc_id = cpu_to_le32(qdev->ssr_dbc);
+ read_buf_req->addr = cpu_to_le64(dest_addr);
+ read_buf_req->len = cpu_to_le64(dest_len);
+
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, read_buf_req, sizeof(*read_buf_req),
+ MHI_EOT);
+ if (!ret)
+ dump_info->read_buf_req_queued = true;
+
+out:
+ return ret;
+}
+
+static int ssr_copy_table(struct ssr_dump_info *dump_info, void *data, u64 len)
+{
+ if (len > dump_info->tbl_len - dump_info->tbl_off)
+ return -EINVAL;
+
+ memcpy(dump_info->tbl_addr + dump_info->tbl_off, data, len);
+ dump_info->tbl_off += len;
+
+ /* Entire table has been downloaded, alloc dump memory */
+ if (dump_info->tbl_off == dump_info->tbl_len) {
+ dump_info->tbl_ent = dump_info->tbl_addr;
+ return alloc_dump(dump_info);
+ }
+
+ return 0;
+}
+
+static int ssr_copy_dump(struct ssr_dump_info *dump_info, void *data, u64 len)
+{
+ struct debug_info_table *tbl_ent;
+
+ tbl_ent = dump_info->tbl_ent;
+
+ if (len > tbl_ent->len - dump_info->tbl_ent_off)
+ return -EINVAL;
+
+ memcpy(dump_info->dump_addr + dump_info->dump_off, data, len);
+ dump_info->dump_off += len;
+ dump_info->tbl_ent_off += len;
+
+ /*
+ * Current segment (a entry in table) of the crashdump is complete,
+ * move to next one
+ */
+ if (tbl_ent->len == dump_info->tbl_ent_off) {
+ dump_info->tbl_ent++;
+ dump_info->tbl_ent_off = 0;
+ }
+
+ return 0;
+}
+
+static void ssr_dump_worker(struct work_struct *work)
+{
+ struct ssr_crashdump *ssr_crash = container_of(work, struct ssr_crashdump, work);
+ struct qaic_device *qdev = ssr_crash->qdev;
+ struct ssr_memory_read_rsp *mem_rd_resp;
+ struct debug_info_table *tbl_ent;
+ struct ssr_dump_info *dump_info;
+ u64 dest_addr, dest_len;
+ struct _ssr_hdr *_hdr;
+ struct ssr_hdr hdr;
+ u64 data_len;
+ int ret;
+
+ mem_rd_resp = (struct ssr_memory_read_rsp *)ssr_crash->data;
+ _hdr = &mem_rd_resp->hdr;
+ hdr.cmd = le32_to_cpu(_hdr->cmd);
+ hdr.len = le32_to_cpu(_hdr->len);
+ hdr.dbc_id = le32_to_cpu(_hdr->dbc_id);
+
+ if (hdr.dbc_id != qdev->ssr_dbc)
+ goto reset_device;
+
+ dump_info = ssr_crash->dump_info;
+ if (!dump_info)
+ goto reset_device;
+
+ if (hdr.cmd != MEMORY_READ_RSP)
+ goto free_dump_info;
+
+ if (hdr.len > SSR_MEM_READ_DATA_SIZE)
+ goto free_dump_info;
+
+ data_len = hdr.len - sizeof(*mem_rd_resp);
+
+ if (dump_info->tbl_off < dump_info->tbl_len) /* Chunk belongs to table */
+ ret = ssr_copy_table(dump_info, mem_rd_resp->data, data_len);
+ else /* Chunk belongs to crashdump */
+ ret = ssr_copy_dump(dump_info, mem_rd_resp->data, data_len);
+
+ if (ret)
+ goto free_dump_info;
+
+ if (dump_info->tbl_off < dump_info->tbl_len) {
+ /* Continue downloading table */
+ dest_addr = dump_info->tbl_addr_dev + dump_info->tbl_off;
+ dest_len = min(SSR_MEM_READ_CHUNK_SIZE, dump_info->tbl_len - dump_info->tbl_off);
+ ret = mem_read_req(qdev, dest_addr, dest_len);
+ } else if (dump_info->dump_off < dump_info->dump_sz) {
+ /* Continue downloading crashdump */
+ tbl_ent = dump_info->tbl_ent;
+ dest_addr = tbl_ent->mem_base + dump_info->tbl_ent_off;
+ dest_len = min(SSR_MEM_READ_CHUNK_SIZE, tbl_ent->len - dump_info->tbl_ent_off);
+ ret = mem_read_req(qdev, dest_addr, dest_len);
+ } else {
+ /* Crashdump download complete */
+ ret = send_xfer_done(qdev, dump_info->resp->data, hdr.dbc_id);
+ }
+
+ /* Most likely a MHI xfer has failed */
+ if (ret)
+ goto free_dump_info;
+
+ return;
+
+free_dump_info:
+ /* Free the allocated memory */
+ free_ssr_dump_info(ssr_crash);
+reset_device:
+ /*
+ * After subsystem crashes in device crashdump collection begins but
+ * something went wrong while collecting crashdump, now instead of
+ * handling this error we just reset the device as the best effort has
+ * been made
+ */
+ mhi_soc_reset(qdev->mhi_cntrl);
+}
+
+static struct ssr_dump_info *alloc_dump_info(struct qaic_device *qdev,
+ struct ssr_debug_transfer_info *debug_info)
+{
+ struct ssr_dump_info *dump_info;
+ int ret;
+
+ le64_to_cpus(&debug_info->tbl_len);
+ le64_to_cpus(&debug_info->tbl_addr);
+
+ if (debug_info->tbl_len == 0 ||
+ debug_info->tbl_len % sizeof(struct debug_info_table) != 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Allocate SSR crashdump book keeping structure */
+ dump_info = kzalloc(sizeof(*dump_info), GFP_KERNEL);
+ if (!dump_info) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* Buffer used to send MEMORY READ request to device via MHI */
+ dump_info->read_buf_req = kzalloc(sizeof(*dump_info->read_buf_req), GFP_KERNEL);
+ if (!dump_info->read_buf_req) {
+ ret = -ENOMEM;
+ goto free_dump_info;
+ }
+
+ /* Crashdump meta table buffer */
+ dump_info->tbl_addr = vzalloc(debug_info->tbl_len);
+ if (!dump_info->tbl_addr) {
+ ret = -ENOMEM;
+ goto free_read_buf_req;
+ }
+
+ dump_info->tbl_addr_dev = debug_info->tbl_addr;
+ dump_info->tbl_len = debug_info->tbl_len;
+
+ return dump_info;
+
+free_read_buf_req:
+ kfree(dump_info->read_buf_req);
+free_dump_info:
+ kfree(dump_info);
+out:
+ return ERR_PTR(ret);
+}
+
+static int dbg_xfer_info_rsp(struct qaic_device *qdev, struct dma_bridge_chan *dbc,
+ struct ssr_debug_transfer_info *debug_info)
+{
+ struct ssr_debug_transfer_info_rsp *debug_rsp;
+ struct ssr_crashdump *ssr_crash = NULL;
+ int ret = 0, ret2;
+
+ debug_rsp = kmalloc(sizeof(*debug_rsp), GFP_KERNEL);
+ if (!debug_rsp)
+ return -ENOMEM;
+
+ if (!qdev->ssr_mhi_buf) {
+ ret = -ENOMEM;
+ goto send_rsp;
+ }
+
+ if (dbc->state != DBC_STATE_BEFORE_POWER_UP) {
+ ret = -EINVAL;
+ goto send_rsp;
+ }
+
+ ssr_crash = qdev->ssr_mhi_buf;
+ ssr_crash->dump_info = alloc_dump_info(qdev, debug_info);
+ if (IS_ERR(ssr_crash->dump_info)) {
+ ret = PTR_ERR(ssr_crash->dump_info);
+ ssr_crash->dump_info = NULL;
+ }
+
+send_rsp:
+ debug_rsp->hdr.cmd = cpu_to_le32(DEBUG_TRANSFER_INFO_RSP);
+ debug_rsp->hdr.len = cpu_to_le32(sizeof(*debug_rsp));
+ debug_rsp->hdr.dbc_id = cpu_to_le32(dbc->id);
+ /*
+ * 0 = Return an ACK confirming the host is ready to download crashdump
+ * 1 = Return an NACK confirming the host is not ready to download crashdump
+ */
+ debug_rsp->ret = cpu_to_le32(ret ? 1 : 0);
+
+ ret2 = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, debug_rsp, sizeof(*debug_rsp), MHI_EOT);
+ if (ret2) {
+ free_ssr_dump_info(ssr_crash);
+ kfree(debug_rsp);
+ return ret2;
+ }
+
+ return ret;
+}
+
+static void dbg_xfer_done_rsp(struct qaic_device *qdev, struct dma_bridge_chan *dbc,
+ struct ssr_debug_transfer_done_rsp *xfer_rsp)
+{
+ struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
+ u32 status = le32_to_cpu(xfer_rsp->ret);
+ struct device *dev = &qdev->pdev->dev;
+ struct ssr_dump_info *dump_info;
+
+ dump_info = ssr_crash->dump_info;
+ if (!dump_info)
+ return;
+
+ if (status) {
+ free_ssr_dump_info(ssr_crash);
+ return;
+ }
+
+ dev_coredumpv(dev, dump_info->dump_addr, dump_info->dump_sz, GFP_KERNEL);
+ /* dev_coredumpv will free dump_info->dump_addr */
+ dump_info->dump_addr = NULL;
+ free_ssr_dump_info(ssr_crash);
+}
+
+static void ssr_worker(struct work_struct *work)
+{
+ struct ssr_resp *resp = container_of(work, struct ssr_resp, work);
+ struct ssr_hdr *hdr = (struct ssr_hdr *)resp->data;
+ struct ssr_dump_info *dump_info = NULL;
+ struct qaic_device *qdev = resp->qdev;
+ struct ssr_crashdump *ssr_crash;
+ struct ssr_event_rsp *event_rsp;
+ struct dma_bridge_chan *dbc;
+ struct ssr_event *event;
+ u32 ssr_event_ack;
+ int ret;
+
+ le32_to_cpus(&hdr->cmd);
+ le32_to_cpus(&hdr->len);
+ le32_to_cpus(&hdr->dbc_id);
+
+ if (hdr->len > SSR_RESP_MSG_SZ)
+ goto out;
+
+ if (hdr->dbc_id >= qdev->num_dbc)
+ goto out;
+
+ dbc = &qdev->dbc[hdr->dbc_id];
+
+ switch (hdr->cmd) {
+ case DEBUG_TRANSFER_INFO:
+ ret = dbg_xfer_info_rsp(qdev, dbc, (struct ssr_debug_transfer_info *)resp->data);
+ if (ret)
+ break;
+
+ ssr_crash = qdev->ssr_mhi_buf;
+ dump_info = ssr_crash->dump_info;
+ dump_info->dbc = dbc;
+ dump_info->resp = resp;
+
+ /* Start by downloading debug table */
+ ret = mem_read_req(qdev, dump_info->tbl_addr_dev,
+ min(dump_info->tbl_len, SSR_MEM_READ_CHUNK_SIZE));
+ if (ret) {
+ free_ssr_dump_info(ssr_crash);
+ break;
+ }
+
+ /*
+ * Till now everything went fine, which means that we will be
+ * collecting crashdump chunk by chunk. Do not queue a response
+ * buffer for SSR cmds till the crashdump is complete.
+ */
+ return;
+ case SSR_EVENT:
+ event = (struct ssr_event *)hdr;
+ le32_to_cpus(&event->event);
+ ssr_event_ack = event->event;
+ ssr_crash = qdev->ssr_mhi_buf;
+
+ switch (event->event) {
+ case BEFORE_SHUTDOWN:
+ set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_BEFORE_SHUTDOWN);
+ qaic_dbc_enter_ssr(qdev, hdr->dbc_id);
+ break;
+ case AFTER_SHUTDOWN:
+ set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_AFTER_SHUTDOWN);
+ break;
+ case BEFORE_POWER_UP:
+ set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_BEFORE_POWER_UP);
+ break;
+ case AFTER_POWER_UP:
+ /*
+ * If dump info is a non NULL value it means that we
+ * have received this SSR event while downloading a
+ * crashdump for this DBC is still in progress. NACK
+ * the SSR event
+ */
+ if (ssr_crash && ssr_crash->dump_info) {
+ free_ssr_dump_info(ssr_crash);
+ ssr_event_ack = SSR_EVENT_NACK;
+ break;
+ }
+
+ set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_AFTER_POWER_UP);
+ break;
+ default:
+ break;
+ }
+
+ event_rsp = kmalloc(sizeof(*event_rsp), GFP_KERNEL);
+ if (!event_rsp)
+ break;
+
+ event_rsp->hdr.cmd = cpu_to_le32(SSR_EVENT_RSP);
+ event_rsp->hdr.len = cpu_to_le32(sizeof(*event_rsp));
+ event_rsp->hdr.dbc_id = cpu_to_le32(hdr->dbc_id);
+ event_rsp->event = cpu_to_le32(ssr_event_ack);
+
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, event_rsp, sizeof(*event_rsp),
+ MHI_EOT);
+ if (ret)
+ kfree(event_rsp);
+
+ if (event->event == AFTER_POWER_UP && ssr_event_ack != SSR_EVENT_NACK) {
+ qaic_dbc_exit_ssr(qdev);
+ set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_IDLE);
+ }
+
+ break;
+ case DEBUG_TRANSFER_DONE_RSP:
+ dbg_xfer_done_rsp(qdev, dbc, (struct ssr_debug_transfer_done_rsp *)hdr);
+ break;
+ default:
+ break;
+ }
+
+out:
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, resp->data, SSR_RESP_MSG_SZ, MHI_EOT);
+ if (ret)
+ kfree(resp);
+}
+
+static int qaic_ssr_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
+ struct ssr_resp *resp;
+ int ret;
+
+ ret = mhi_prepare_for_transfer(mhi_dev);
+ if (ret)
+ return ret;
+
+ resp = kzalloc(sizeof(*resp) + SSR_RESP_MSG_SZ, GFP_KERNEL);
+ if (!resp) {
+ mhi_unprepare_from_transfer(mhi_dev);
+ return -ENOMEM;
+ }
+
+ resp->qdev = qdev;
+ INIT_WORK(&resp->work, ssr_worker);
+
+ ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, resp->data, SSR_RESP_MSG_SZ, MHI_EOT);
+ if (ret) {
+ kfree(resp);
+ mhi_unprepare_from_transfer(mhi_dev);
+ return ret;
+ }
+
+ dev_set_drvdata(&mhi_dev->dev, qdev);
+ qdev->ssr_ch = mhi_dev;
+
+ return 0;
+}
+
+static void qaic_ssr_mhi_remove(struct mhi_device *mhi_dev)
+{
+ struct qaic_device *qdev;
+
+ qdev = dev_get_drvdata(&mhi_dev->dev);
+ mhi_unprepare_from_transfer(qdev->ssr_ch);
+ qdev->ssr_ch = NULL;
+}
+
+static void qaic_ssr_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+ struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev);
+ struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
+ struct _ssr_hdr *hdr = mhi_result->buf_addr;
+ struct ssr_dump_info *dump_info;
+
+ if (mhi_result->transaction_status) {
+ kfree(mhi_result->buf_addr);
+ return;
+ }
+
+ /*
+ * MEMORY READ is used to download crashdump. And crashdump is
+ * downloaded chunk by chunk in a series of MEMORY READ SSR commands.
+ * Hence to avoid too many kmalloc() and kfree() of the same MEMORY READ
+ * request buffer, we allocate only one such buffer and free it only
+ * once.
+ */
+ if (le32_to_cpu(hdr->cmd) == MEMORY_READ) {
+ dump_info = ssr_crash->dump_info;
+ if (dump_info) {
+ dump_info->read_buf_req_queued = false;
+ return;
+ }
+ }
+
+ kfree(mhi_result->buf_addr);
+}
+
+static void qaic_ssr_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+ struct ssr_resp *resp = container_of(mhi_result->buf_addr, struct ssr_resp, data);
+ struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev);
+ struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
+ bool memory_read_rsp = false;
+
+ if (ssr_crash && ssr_crash->data == mhi_result->buf_addr)
+ memory_read_rsp = true;
+
+ if (mhi_result->transaction_status) {
+ /* Do not free SSR crashdump buffer as it allocated via managed APIs */
+ if (!memory_read_rsp)
+ kfree(resp);
+ return;
+ }
+
+ if (memory_read_rsp)
+ queue_work(qdev->ssr_wq, &ssr_crash->work);
+ else
+ queue_work(qdev->ssr_wq, &resp->work);
+}
+
+static const struct mhi_device_id qaic_ssr_mhi_match_table[] = {
+ { .chan = "QAIC_SSR", },
+ {},
+};
+
+static struct mhi_driver qaic_ssr_mhi_driver = {
+ .id_table = qaic_ssr_mhi_match_table,
+ .remove = qaic_ssr_mhi_remove,
+ .probe = qaic_ssr_mhi_probe,
+ .ul_xfer_cb = qaic_ssr_mhi_ul_xfer_cb,
+ .dl_xfer_cb = qaic_ssr_mhi_dl_xfer_cb,
+ .driver = {
+ .name = "qaic_ssr",
+ },
+};
+
+int qaic_ssr_init(struct qaic_device *qdev, struct drm_device *drm)
+{
+ struct ssr_crashdump *ssr_crash;
+
+ qdev->ssr_dbc = QAIC_SSR_DBC_SENTINEL;
+
+ /*
+ * Device requests only one SSR at a time. So allocating only one
+ * buffer to download crashdump is good enough.
+ */
+ ssr_crash = drmm_kzalloc(drm, SSR_MHI_BUF_SIZE, GFP_KERNEL);
+ if (!ssr_crash)
+ return -ENOMEM;
+
+ ssr_crash->qdev = qdev;
+ INIT_WORK(&ssr_crash->work, ssr_dump_worker);
+ qdev->ssr_mhi_buf = ssr_crash;
+
+ return 0;
+}
+
+int qaic_ssr_register(void)
+{
+ return mhi_driver_register(&qaic_ssr_mhi_driver);
+}
+
+void qaic_ssr_unregister(void)
+{
+ mhi_driver_unregister(&qaic_ssr_mhi_driver);
+}
diff --git a/drivers/accel/qaic/qaic_ssr.h b/drivers/accel/qaic/qaic_ssr.h
new file mode 100644
index 000000000000..97ccff305750
--- /dev/null
+++ b/drivers/accel/qaic/qaic_ssr.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) 2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021, 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __QAIC_SSR_H__
+#define __QAIC_SSR_H__
+
+struct drm_device;
+struct qaic_device;
+
+int qaic_ssr_register(void);
+void qaic_ssr_unregister(void);
+void qaic_clean_up_ssr(struct qaic_device *qdev);
+int qaic_ssr_init(struct qaic_device *qdev, struct drm_device *drm);
+#endif /* __QAIC_SSR_H__ */
diff --git a/drivers/accel/qaic/qaic_sysfs.c b/drivers/accel/qaic/qaic_sysfs.c
new file mode 100644
index 000000000000..e0afb0ffb589
--- /dev/null
+++ b/drivers/accel/qaic/qaic_sysfs.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Copyright (c) 2020-2025, The Linux Foundation. All rights reserved. */
+
+#include <drm/drm_file.h>
+#include <drm/drm_managed.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/mutex.h>
+#include <linux/sysfs.h>
+
+#include "qaic.h"
+
+#define NAME_LEN 14
+
+struct dbc_attribute {
+ struct device_attribute dev_attr;
+ u32 dbc_id;
+ char name[NAME_LEN];
+};
+
+static ssize_t dbc_state_show(struct device *dev, struct device_attribute *a, char *buf)
+{
+ struct dbc_attribute *dbc_attr = container_of(a, struct dbc_attribute, dev_attr);
+ struct drm_minor *minor = dev_get_drvdata(dev);
+ struct qaic_device *qdev;
+
+ qdev = to_qaic_device(minor->dev);
+ return sysfs_emit(buf, "%d\n", qdev->dbc[dbc_attr->dbc_id].state);
+}
+
+void set_dbc_state(struct qaic_device *qdev, u32 dbc_id, unsigned int state)
+{
+ struct device *kdev = to_accel_kdev(qdev->qddev);
+ char *envp[3] = {};
+ char state_str[16];
+ char id_str[12];
+
+ envp[0] = id_str;
+ envp[1] = state_str;
+
+ if (state >= DBC_STATE_MAX)
+ return;
+ if (dbc_id >= qdev->num_dbc)
+ return;
+ if (state == qdev->dbc[dbc_id].state)
+ return;
+
+ scnprintf(id_str, ARRAY_SIZE(id_str), "DBC_ID=%d", dbc_id);
+ scnprintf(state_str, ARRAY_SIZE(state_str), "DBC_STATE=%d", state);
+
+ qdev->dbc[dbc_id].state = state;
+ kobject_uevent_env(&kdev->kobj, KOBJ_CHANGE, envp);
+}
+
+int qaic_sysfs_init(struct qaic_drm_device *qddev)
+{
+ struct device *kdev = to_accel_kdev(qddev);
+ struct drm_device *drm = to_drm(qddev);
+ u32 num_dbc = qddev->qdev->num_dbc;
+ struct dbc_attribute *dbc_attrs;
+ int i, ret;
+
+ dbc_attrs = drmm_kcalloc(drm, num_dbc, sizeof(*dbc_attrs), GFP_KERNEL);
+ if (!dbc_attrs)
+ return -ENOMEM;
+
+ for (i = 0; i < num_dbc; ++i) {
+ struct dbc_attribute *dbc_attr = &dbc_attrs[i];
+
+ sysfs_attr_init(&dbc_attr->dev_attr.attr);
+ dbc_attr->dbc_id = i;
+ scnprintf(dbc_attr->name, NAME_LEN, "dbc%d_state", i);
+ dbc_attr->dev_attr.attr.name = dbc_attr->name;
+ dbc_attr->dev_attr.attr.mode = 0444;
+ dbc_attr->dev_attr.show = dbc_state_show;
+ ret = sysfs_create_file(&kdev->kobj, &dbc_attr->dev_attr.attr);
+ if (ret) {
+ int j;
+
+ for (j = 0; j < i; ++j) {
+ dbc_attr = &dbc_attrs[j];
+ sysfs_remove_file(&kdev->kobj, &dbc_attr->dev_attr.attr);
+ }
+ drmm_kfree(drm, dbc_attrs);
+ return ret;
+ }
+ }
+
+ qddev->sysfs_attrs = dbc_attrs;
+ return 0;
+}
+
+void qaic_sysfs_remove(struct qaic_drm_device *qddev)
+{
+ struct dbc_attribute *dbc_attrs = qddev->sysfs_attrs;
+ struct device *kdev = to_accel_kdev(qddev);
+ u32 num_dbc = qddev->qdev->num_dbc;
+ int i;
+
+ if (!dbc_attrs)
+ return;
+
+ qddev->sysfs_attrs = NULL;
+ for (i = 0; i < num_dbc; ++i)
+ sysfs_remove_file(&kdev->kobj, &dbc_attrs[i].dev_attr.attr);
+ drmm_kfree(to_drm(qddev), dbc_attrs);
+}
diff --git a/drivers/accel/qaic/qaic_timesync.c b/drivers/accel/qaic/qaic_timesync.c
index 3fac540f8e03..8af2475f4f36 100644
--- a/drivers/accel/qaic/qaic_timesync.c
+++ b/drivers/accel/qaic/qaic_timesync.c
@@ -171,6 +171,13 @@ mod_timer:
dev_err(mqtsdev->dev, "%s mod_timer error:%d\n", __func__, ret);
}
+void qaic_mqts_ch_stop_timer(struct mhi_device *mhi_dev)
+{
+ struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev);
+
+ timer_delete_sync(&mqtsdev->timer);
+}
+
static int qaic_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
{
struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
@@ -206,6 +213,7 @@ static int qaic_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_devi
timer->expires = jiffies + msecs_to_jiffies(timesync_delay_ms);
add_timer(timer);
dev_set_drvdata(&mhi_dev->dev, mqtsdev);
+ qdev->mqts_ch = mhi_dev;
return 0;
@@ -221,6 +229,7 @@ static void qaic_timesync_remove(struct mhi_device *mhi_dev)
{
struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev);
+ mqtsdev->qdev->mqts_ch = NULL;
timer_delete_sync(&mqtsdev->timer);
mhi_unprepare_from_transfer(mqtsdev->mhi_dev);
kfree(mqtsdev->sync_msg);
diff --git a/drivers/accel/qaic/qaic_timesync.h b/drivers/accel/qaic/qaic_timesync.h
index 851b7acd43bb..77b9c2b55057 100644
--- a/drivers/accel/qaic/qaic_timesync.h
+++ b/drivers/accel/qaic/qaic_timesync.h
@@ -6,6 +6,9 @@
#ifndef __QAIC_TIMESYNC_H__
#define __QAIC_TIMESYNC_H__
+#include <linux/mhi.h>
+
int qaic_timesync_init(void);
void qaic_timesync_deinit(void);
+void qaic_mqts_ch_stop_timer(struct mhi_device *mhi_dev);
#endif /* __QAIC_TIMESYNC_H__ */
diff --git a/drivers/accel/qaic/sahara.c b/drivers/accel/qaic/sahara.c
index 3ebcc1f7ff58..fd3c3b2d1fd3 100644
--- a/drivers/accel/qaic/sahara.c
+++ b/drivers/accel/qaic/sahara.c
@@ -159,6 +159,7 @@ struct sahara_context {
struct sahara_packet *rx;
struct work_struct fw_work;
struct work_struct dump_work;
+ struct work_struct read_data_work;
struct mhi_device *mhi_dev;
const char * const *image_table;
u32 table_size;
@@ -174,7 +175,10 @@ struct sahara_context {
u64 dump_image_offset;
void *mem_dump_freespace;
u64 dump_images_left;
+ u32 read_data_offset;
+ u32 read_data_length;
bool is_mem_dump_mode;
+ bool non_streaming;
};
static const char * const aic100_image_table[] = {
@@ -194,6 +198,7 @@ static const char * const aic200_image_table[] = {
[23] = "qcom/aic200/aop.mbn",
[32] = "qcom/aic200/tz.mbn",
[33] = "qcom/aic200/hypvm.mbn",
+ [38] = "qcom/aic200/xbl_config.elf",
[39] = "qcom/aic200/aic200_abl.elf",
[40] = "qcom/aic200/apdp.mbn",
[41] = "qcom/aic200/devcfg.mbn",
@@ -202,6 +207,7 @@ static const char * const aic200_image_table[] = {
[49] = "qcom/aic200/shrm.elf",
[50] = "qcom/aic200/cpucp.elf",
[51] = "qcom/aic200/aop_devcfg.mbn",
+ [54] = "qcom/aic200/qupv3fw.elf",
[57] = "qcom/aic200/cpucp_dtbs.elf",
[62] = "qcom/aic200/uefi_dtbs.elf",
[63] = "qcom/aic200/xbl_ac_config.mbn",
@@ -213,9 +219,15 @@ static const char * const aic200_image_table[] = {
[69] = "qcom/aic200/dcd.mbn",
[73] = "qcom/aic200/gearvm.mbn",
[74] = "qcom/aic200/sti.bin",
- [75] = "qcom/aic200/pvs.bin",
+ [76] = "qcom/aic200/tz_qti_config.mbn",
+ [78] = "qcom/aic200/pvs.bin",
};
+static bool is_streaming(struct sahara_context *context)
+{
+ return !context->non_streaming;
+}
+
static int sahara_find_image(struct sahara_context *context, u32 image_id)
{
int ret;
@@ -265,6 +277,8 @@ static void sahara_send_reset(struct sahara_context *context)
int ret;
context->is_mem_dump_mode = false;
+ context->read_data_offset = 0;
+ context->read_data_length = 0;
context->tx[0]->cmd = cpu_to_le32(SAHARA_RESET_CMD);
context->tx[0]->length = cpu_to_le32(SAHARA_RESET_LENGTH);
@@ -319,9 +333,39 @@ static void sahara_hello(struct sahara_context *context)
dev_err(&context->mhi_dev->dev, "Unable to send hello response %d\n", ret);
}
+static int read_data_helper(struct sahara_context *context, int buf_index)
+{
+ enum mhi_flags mhi_flag;
+ u32 pkt_data_len;
+ int ret;
+
+ pkt_data_len = min(context->read_data_length, SAHARA_PACKET_MAX_SIZE);
+
+ memcpy(context->tx[buf_index],
+ &context->firmware->data[context->read_data_offset],
+ pkt_data_len);
+
+ context->read_data_offset += pkt_data_len;
+ context->read_data_length -= pkt_data_len;
+
+ if (is_streaming(context) || !context->read_data_length)
+ mhi_flag = MHI_EOT;
+ else
+ mhi_flag = MHI_CHAIN;
+
+ ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE,
+ context->tx[buf_index], pkt_data_len, mhi_flag);
+ if (ret) {
+ dev_err(&context->mhi_dev->dev, "Unable to send read_data response %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
static void sahara_read_data(struct sahara_context *context)
{
- u32 image_id, data_offset, data_len, pkt_data_len;
+ u32 image_id, data_offset, data_len;
int ret;
int i;
@@ -357,7 +401,7 @@ static void sahara_read_data(struct sahara_context *context)
* and is not needed here on error.
*/
- if (data_len > SAHARA_TRANSFER_MAX_SIZE) {
+ if (context->non_streaming && data_len > SAHARA_TRANSFER_MAX_SIZE) {
dev_err(&context->mhi_dev->dev, "Malformed read_data packet - data len %d exceeds max xfer size %d\n",
data_len, SAHARA_TRANSFER_MAX_SIZE);
sahara_send_reset(context);
@@ -378,22 +422,18 @@ static void sahara_read_data(struct sahara_context *context)
return;
}
- for (i = 0; i < SAHARA_NUM_TX_BUF && data_len; ++i) {
- pkt_data_len = min(data_len, SAHARA_PACKET_MAX_SIZE);
-
- memcpy(context->tx[i], &context->firmware->data[data_offset], pkt_data_len);
+ context->read_data_offset = data_offset;
+ context->read_data_length = data_len;
- data_offset += pkt_data_len;
- data_len -= pkt_data_len;
+ if (is_streaming(context)) {
+ schedule_work(&context->read_data_work);
+ return;
+ }
- ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE,
- context->tx[i], pkt_data_len,
- !data_len ? MHI_EOT : MHI_CHAIN);
- if (ret) {
- dev_err(&context->mhi_dev->dev, "Unable to send read_data response %d\n",
- ret);
- return;
- }
+ for (i = 0; i < SAHARA_NUM_TX_BUF && context->read_data_length; ++i) {
+ ret = read_data_helper(context, i);
+ if (ret)
+ break;
}
}
@@ -538,6 +578,7 @@ static void sahara_parse_dump_table(struct sahara_context *context)
struct sahara_memory_dump_meta_v1 *dump_meta;
u64 table_nents;
u64 dump_length;
+ u64 mul_bytes;
int ret;
u64 i;
@@ -551,8 +592,9 @@ static void sahara_parse_dump_table(struct sahara_context *context)
dev_table[i].description[SAHARA_TABLE_ENTRY_STR_LEN - 1] = 0;
dev_table[i].filename[SAHARA_TABLE_ENTRY_STR_LEN - 1] = 0;
- dump_length = size_add(dump_length, le64_to_cpu(dev_table[i].length));
- if (dump_length == SIZE_MAX) {
+ if (check_add_overflow(dump_length,
+ le64_to_cpu(dev_table[i].length),
+ &dump_length)) {
/* Discard the dump */
sahara_send_reset(context);
return;
@@ -568,14 +610,17 @@ static void sahara_parse_dump_table(struct sahara_context *context)
dev_table[i].filename);
}
- dump_length = size_add(dump_length, sizeof(*dump_meta));
- if (dump_length == SIZE_MAX) {
+ if (check_add_overflow(dump_length, (u64)sizeof(*dump_meta), &dump_length)) {
/* Discard the dump */
sahara_send_reset(context);
return;
}
- dump_length = size_add(dump_length, size_mul(sizeof(*image_out_table), table_nents));
- if (dump_length == SIZE_MAX) {
+ if (check_mul_overflow((u64)sizeof(*image_out_table), table_nents, &mul_bytes)) {
+ /* Discard the dump */
+ sahara_send_reset(context);
+ return;
+ }
+ if (check_add_overflow(dump_length, mul_bytes, &dump_length)) {
/* Discard the dump */
sahara_send_reset(context);
return;
@@ -615,7 +660,7 @@ static void sahara_parse_dump_table(struct sahara_context *context)
/* Request the first chunk of the first image */
context->dump_image = &image_out_table[0];
- dump_length = min(context->dump_image->length, SAHARA_READ_MAX_SIZE);
+ dump_length = min_t(u64, context->dump_image->length, SAHARA_READ_MAX_SIZE);
/* Avoid requesting EOI sized data so that we can identify errors */
if (dump_length == SAHARA_END_OF_IMAGE_LENGTH)
dump_length = SAHARA_END_OF_IMAGE_LENGTH / 2;
@@ -663,7 +708,7 @@ static void sahara_parse_dump_image(struct sahara_context *context)
/* Get next image chunk */
dump_length = context->dump_image->length - context->dump_image_offset;
- dump_length = min(dump_length, SAHARA_READ_MAX_SIZE);
+ dump_length = min_t(u64, dump_length, SAHARA_READ_MAX_SIZE);
/* Avoid requesting EOI sized data so that we can identify errors */
if (dump_length == SAHARA_END_OF_IMAGE_LENGTH)
dump_length = SAHARA_END_OF_IMAGE_LENGTH / 2;
@@ -742,6 +787,13 @@ error:
sahara_send_reset(context);
}
+static void sahara_read_data_processing(struct work_struct *work)
+{
+ struct sahara_context *context = container_of(work, struct sahara_context, read_data_work);
+
+ read_data_helper(context, 0);
+}
+
static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
{
struct sahara_context *context;
@@ -756,34 +808,56 @@ static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_
if (!context->rx)
return -ENOMEM;
+ if (!strcmp(mhi_dev->mhi_cntrl->name, "AIC200")) {
+ context->image_table = aic200_image_table;
+ context->table_size = ARRAY_SIZE(aic200_image_table);
+ } else {
+ context->image_table = aic100_image_table;
+ context->table_size = ARRAY_SIZE(aic100_image_table);
+ context->non_streaming = true;
+ }
+
/*
- * AIC100 defines SAHARA_TRANSFER_MAX_SIZE as the largest value it
- * will request for READ_DATA. This is larger than
- * SAHARA_PACKET_MAX_SIZE, and we need 9x SAHARA_PACKET_MAX_SIZE to
- * cover SAHARA_TRANSFER_MAX_SIZE. When the remote side issues a
- * READ_DATA, it requires a transfer of the exact size requested. We
- * can use MHI_CHAIN to link multiple buffers into a single transfer
- * but the remote side will not consume the buffers until it sees an
- * EOT, thus we need to allocate enough buffers to put in the tx fifo
- * to cover an entire READ_DATA request of the max size.
+ * There are two firmware implementations for READ_DATA handling.
+ * The older "SBL" implementation defines a Sahara transfer size, and
+ * expects that the response is a single transport transfer. If the
+ * FW wants to transfer a file that is larger than the transfer size,
+ * the FW will issue multiple READ_DATA commands. For this
+ * implementation, we need to allocate enough buffers to contain the
+ * entire Sahara transfer size.
+ *
+ * The newer "XBL" implementation does not define a maximum transfer
+ * size and instead expects the data to be streamed over using the
+ * transport level MTU. The FW will issue a single READ_DATA command
+ * of whatever size, and consume multiple transport level transfers
+ * until the expected amount of data is consumed. For this
+ * implementation we only need a single buffer of the transport MTU
+ * but we'll need to be able to use it multiple times for a single
+ * READ_DATA request.
+ *
+ * AIC100 is the SBL implementation and defines SAHARA_TRANSFER_MAX_SIZE
+ * and we need 9x SAHARA_PACKET_MAX_SIZE to cover that. We can use
+ * MHI_CHAIN to link multiple buffers into a single transfer but the
+ * remote side will not consume the buffers until it sees an EOT, thus
+ * we need to allocate enough buffers to put in the tx fifo to cover an
+ * entire READ_DATA request of the max size.
+ *
+ * AIC200 is the XBL implementation, and so a single buffer will work.
*/
for (i = 0; i < SAHARA_NUM_TX_BUF; ++i) {
- context->tx[i] = devm_kzalloc(&mhi_dev->dev, SAHARA_PACKET_MAX_SIZE, GFP_KERNEL);
+ context->tx[i] = devm_kzalloc(&mhi_dev->dev,
+ SAHARA_PACKET_MAX_SIZE,
+ GFP_KERNEL);
if (!context->tx[i])
return -ENOMEM;
+ if (is_streaming(context))
+ break;
}
context->mhi_dev = mhi_dev;
INIT_WORK(&context->fw_work, sahara_processing);
INIT_WORK(&context->dump_work, sahara_dump_processing);
-
- if (!strcmp(mhi_dev->mhi_cntrl->name, "AIC200")) {
- context->image_table = aic200_image_table;
- context->table_size = ARRAY_SIZE(aic200_image_table);
- } else {
- context->image_table = aic100_image_table;
- context->table_size = ARRAY_SIZE(aic100_image_table);
- }
+ INIT_WORK(&context->read_data_work, sahara_read_data_processing);
context->active_image_id = SAHARA_IMAGE_ID_NONE;
dev_set_drvdata(&mhi_dev->dev, context);
@@ -814,6 +888,10 @@ static void sahara_mhi_remove(struct mhi_device *mhi_dev)
static void sahara_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
{
+ struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev);
+
+ if (!mhi_result->transaction_status && context->read_data_length && is_streaming(context))
+ schedule_work(&context->read_data_work);
}
static void sahara_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)