diff options
author | Arnaldo Carvalho de Melo <acme@redhat.com> | 2021-03-08 10:11:33 -0300 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2021-03-08 10:11:33 -0300 |
commit | 009ef05f98129aa91c62c3baab859ba593a15bb2 (patch) | |
tree | f3414f08d636a597545b1e4f443b373b9d6d8f4b /drivers/misc/habanalabs/common/hw_queue.c | |
parent | 2777b81b379df772defd654bc4d3fa82dca17a4b (diff) | |
parent | 144c79ef33536b4ecb4951e07dbc1f2b7fa99d32 (diff) |
Merge remote-tracking branch 'torvalds/master' into perf/core
To pick up the fixes sent for v5.12 and continue development based on
v5.12-rc2, i.e. without the swap on file bug.
This also gets a slightly newer and better tools/perf/arch/arm/util/cs-etm.c
patch version, using the BIT() macro, that had already been slated to
v5.13 but ended up going to v5.12-rc1 on an older version.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'drivers/misc/habanalabs/common/hw_queue.c')
-rw-r--r-- | drivers/misc/habanalabs/common/hw_queue.c | 51 |
1 files changed, 46 insertions, 5 deletions
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 76217258780a..0f335182267f 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -38,7 +38,7 @@ static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len) return (abs(delta) - queue_len); } -void hl_int_hw_queue_update_ci(struct hl_cs *cs) +void hl_hw_queue_update_ci(struct hl_cs *cs) { struct hl_device *hdev = cs->ctx->hdev; struct hl_hw_queue *q; @@ -53,8 +53,13 @@ void hl_int_hw_queue_update_ci(struct hl_cs *cs) if (!hdev->asic_prop.max_queues || q->queue_type == QUEUE_TYPE_HW) return; + /* We must increment CI for every queue that will never get a + * completion, there are 2 scenarios this can happen: + * 1. All queues of a non completion CS will never get a completion. + * 2. Internal queues never gets completion. + */ for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) { - if (q->queue_type == QUEUE_TYPE_INT) + if (!cs_needs_completion(cs) || q->queue_type == QUEUE_TYPE_INT) atomic_add(cs->jobs_in_queue_cnt[i], &q->ci); } } @@ -292,6 +297,10 @@ static void ext_queue_schedule_job(struct hl_cs_job *job) len = job->job_cb_size; ptr = cb->bus_address; + /* Skip completion flow in case this is a non completion CS */ + if (!cs_needs_completion(job->cs)) + goto submit_bd; + cq_pkt.data = cpu_to_le32( ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT) & CQ_ENTRY_SHADOW_INDEX_MASK) | @@ -318,6 +327,7 @@ static void ext_queue_schedule_job(struct hl_cs_job *job) cq->pi = hl_cq_inc_ptr(cq->pi); +submit_bd: ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); } @@ -525,6 +535,7 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) struct hl_cs_job *job, *tmp; struct hl_hw_queue *q; int rc = 0, i, cq_cnt; + bool first_entry; u32 max_queues; cntr = &hdev->aggregated_cs_counters; @@ -548,7 +559,9 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) switch (q->queue_type) { case QUEUE_TYPE_EXT: rc = ext_queue_sanity_checks(hdev, q, - cs->jobs_in_queue_cnt[i], true); + cs->jobs_in_queue_cnt[i], + cs_needs_completion(cs) ? + true : false); break; case QUEUE_TYPE_INT: rc = int_queue_sanity_checks(hdev, q, @@ -583,12 +596,38 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) hdev->asic_funcs->collective_wait_init_cs(cs); spin_lock(&hdev->cs_mirror_lock); + + /* Verify staged CS exists and add to the staged list */ + if (cs->staged_cs && !cs->staged_first) { + struct hl_cs *staged_cs; + + staged_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence); + if (!staged_cs) { + dev_err(hdev->dev, + "Cannot find staged submission sequence %llu", + cs->staged_sequence); + rc = -EINVAL; + goto unlock_cs_mirror; + } + + if (is_staged_cs_last_exists(hdev, staged_cs)) { + dev_err(hdev->dev, + "Staged submission sequence %llu already submitted", + cs->staged_sequence); + rc = -EINVAL; + goto unlock_cs_mirror; + } + + list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node); + } + list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list); /* Queue TDR if the CS is the first entry and if timeout is wanted */ + first_entry = list_first_entry(&hdev->cs_mirror_list, + struct hl_cs, mirror_node) == cs; if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) && - (list_first_entry(&hdev->cs_mirror_list, - struct hl_cs, mirror_node) == cs)) { + first_entry && cs_needs_timeout(cs)) { cs->tdr_active = true; schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies); @@ -623,6 +662,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) goto out; +unlock_cs_mirror: + spin_unlock(&hdev->cs_mirror_lock); unroll_cq_resv: q = &hdev->kernel_queues[0]; for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) { |