summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/i915/Kconfig.debug1
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c6
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c38
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c26
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c8
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c12
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_tlb.c11
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h30
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c81
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h11
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c17
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c32
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.h2
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c2
16 files changed, 213 insertions, 89 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 47e845353ffa..2d21930d5501 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -157,6 +157,7 @@ config DRM_I915_SW_FENCE_CHECK_DAG
config DRM_I915_DEBUG_GUC
bool "Enable additional driver debugging for GuC"
depends on DRM_I915
+ select STACKDEPOT
default n
help
Choose this option to turn on extra driver debugging that may affect
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 5402a7bbcb1d..9a9ff84c90d7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -964,7 +964,11 @@ static int intel_context_set_gem(struct intel_context *ce,
RCU_INIT_POINTER(ce->gem_context, ctx);
GEM_BUG_ON(intel_context_is_pinned(ce));
- ce->ring_size = SZ_16K;
+
+ if (ce->engine->class == COMPUTE_CLASS)
+ ce->ring_size = SZ_512K;
+ else
+ ce->ring_size = SZ_16K;
i915_vm_put(ce->vm);
ce->vm = i915_gem_context_get_eb_vm(ctx);
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index e1c76e5bfa82..23857cc08eca 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -177,14 +177,40 @@ u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv
return cs;
}
+static int mtl_dummy_pipe_control(struct i915_request *rq)
+{
+ /* Wa_14016712196 */
+ if (IS_MTL_GRAPHICS_STEP(rq->engine->i915, M, STEP_A0, STEP_B0) ||
+ IS_MTL_GRAPHICS_STEP(rq->engine->i915, P, STEP_A0, STEP_B0)) {
+ u32 *cs;
+
+ /* dummy PIPE_CONTROL + depth flush */
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+ cs = gen12_emit_pipe_control(cs,
+ 0,
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH,
+ LRC_PPHWSP_SCRATCH_ADDR);
+ intel_ring_advance(rq, cs);
+ }
+
+ return 0;
+}
+
int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
{
struct intel_engine_cs *engine = rq->engine;
if (mode & EMIT_FLUSH) {
u32 flags = 0;
+ int err;
u32 *cs;
+ err = mtl_dummy_pipe_control(rq);
+ if (err)
+ return err;
+
flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
flags |= PIPE_CONTROL_FLUSH_L3;
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
@@ -217,6 +243,11 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
if (mode & EMIT_INVALIDATE) {
u32 flags = 0;
u32 *cs, count;
+ int err;
+
+ err = mtl_dummy_pipe_control(rq);
+ if (err)
+ return err;
flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TLB_INVALIDATE;
@@ -733,6 +764,13 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
+ /* Wa_14016712196 */
+ if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+ IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ /* dummy PIPE_CONTROL + depth flush */
+ cs = gen12_emit_pipe_control(cs, 0,
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
+
if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
/* Wa_1409600907 */
flags |= PIPE_CONTROL_DEPTH_STALL;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 2a7942fac798..122197737ef2 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -1015,16 +1015,16 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
/*
* For pre-gen8 platforms pat_index is the same as enum i915_cache_level,
- * so these PTE encode functions are left with using cache_level.
+ * so the switch-case statements in these PTE encode functions are still valid.
* See translation table LEGACY_CACHELEVEL.
*/
static u64 snb_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
- switch (level) {
+ switch (pat_index) {
case I915_CACHE_L3_LLC:
case I915_CACHE_LLC:
pte |= GEN6_PTE_CACHE_LLC;
@@ -1033,19 +1033,19 @@ static u64 snb_pte_encode(dma_addr_t addr,
pte |= GEN6_PTE_UNCACHED;
break;
default:
- MISSING_CASE(level);
+ MISSING_CASE(pat_index);
}
return pte;
}
static u64 ivb_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
- switch (level) {
+ switch (pat_index) {
case I915_CACHE_L3_LLC:
pte |= GEN7_PTE_CACHE_L3_LLC;
break;
@@ -1056,14 +1056,14 @@ static u64 ivb_pte_encode(dma_addr_t addr,
pte |= GEN6_PTE_UNCACHED;
break;
default:
- MISSING_CASE(level);
+ MISSING_CASE(pat_index);
}
return pte;
}
static u64 byt_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
@@ -1071,31 +1071,31 @@ static u64 byt_pte_encode(dma_addr_t addr,
if (!(flags & PTE_READ_ONLY))
pte |= BYT_PTE_WRITEABLE;
- if (level != I915_CACHE_NONE)
+ if (pat_index != I915_CACHE_NONE)
pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
return pte;
}
static u64 hsw_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
- if (level != I915_CACHE_NONE)
+ if (pat_index != I915_CACHE_NONE)
pte |= HSW_WB_LLC_AGE3;
return pte;
}
static u64 iris_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
- switch (level) {
+ switch (pat_index) {
case I915_CACHE_NONE:
break;
case I915_CACHE_WT:
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
index d6a74ae2527b..866c416afb73 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
@@ -18,10 +18,10 @@
static void gmch_ggtt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 unused)
{
- unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+ unsigned int flags = (pat_index == I915_CACHE_NONE) ?
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
@@ -29,10 +29,10 @@ static void gmch_ggtt_insert_page(struct i915_address_space *vm,
static void gmch_ggtt_insert_entries(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 unused)
{
- unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+ unsigned int flags = (pat_index == I915_CACHE_NONE) ?
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
intel_gmch_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT,
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 736b89a8ecf5..4202df5b8c12 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -1530,8 +1530,8 @@ static int live_busywait_preempt(void *arg)
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
enum intel_engine_id id;
- int err = -ENOMEM;
u32 *map;
+ int err;
/*
* Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
@@ -1539,13 +1539,17 @@ static int live_busywait_preempt(void *arg)
*/
ctx_hi = kernel_context(gt->i915, NULL);
- if (!ctx_hi)
- return -ENOMEM;
+ if (IS_ERR(ctx_hi))
+ return PTR_ERR(ctx_hi);
+
ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
ctx_lo = kernel_context(gt->i915, NULL);
- if (!ctx_lo)
+ if (IS_ERR(ctx_lo)) {
+ err = PTR_ERR(ctx_lo);
goto err_ctx_hi;
+ }
+
ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c
index 4493c8518e91..3bd6b540257b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_tlb.c
+++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c
@@ -190,11 +190,18 @@ out:
static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt)
{
+ struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0];
+ resource_size_t size = SZ_1G;
+
/*
* Allocation of largest possible page size allows to test all types
- * of pages.
+ * of pages. To succeed with both allocations, especially in case of Small
+ * BAR, try to allocate no more than quarter of mappable memory.
*/
- return i915_gem_object_create_lmem(gt->i915, SZ_1G, I915_BO_ALLOC_CONTIGUOUS);
+ if (mr && size > mr->io_size / 4)
+ size = mr->io_size / 4;
+
+ return i915_gem_object_create_lmem(gt->i915, size, I915_BO_ALLOC_CONTIGUOUS);
}
static struct drm_i915_gem_object *create_smem(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
index 28b8387f97b7..f7d70db16d76 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
@@ -167,25 +167,4 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
* - **flags**, holds various bits to control message handling
*/
-/*
- * Definition of the command transport message header (DW0)
- *
- * bit[4..0] message len (in dwords)
- * bit[7..5] reserved
- * bit[8] response (G2H only)
- * bit[8] write fence to desc (H2G only)
- * bit[9] write status to H2G buff (H2G only)
- * bit[10] send status back via G2H (H2G only)
- * bit[15..11] reserved
- * bit[31..16] action code
- */
-#define GUC_CT_MSG_LEN_SHIFT 0
-#define GUC_CT_MSG_LEN_MASK 0x1F
-#define GUC_CT_MSG_IS_RESPONSE (1 << 8)
-#define GUC_CT_MSG_WRITE_FENCE_TO_DESC (1 << 8)
-#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF (1 << 9)
-#define GUC_CT_MSG_SEND_STATUS (1 << 10)
-#define GUC_CT_MSG_ACTION_SHIFT 16
-#define GUC_CT_MSG_ACTION_MASK 0xFFFF
-
#endif /* _ABI_GUC_COMMUNICATION_CTB_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h
index 7d5ba4d97d70..98eb4f46572b 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h
@@ -24,6 +24,7 @@
* | | 30:28 | **TYPE** - message type |
* | | | - _`GUC_HXG_TYPE_REQUEST` = 0 |
* | | | - _`GUC_HXG_TYPE_EVENT` = 1 |
+ * | | | - _`GUC_HXG_TYPE_FAST_REQUEST` = 2 |
* | | | - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3 |
* | | | - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5 |
* | | | - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6 |
@@ -46,6 +47,7 @@
#define GUC_HXG_MSG_0_TYPE (0x7 << 28)
#define GUC_HXG_TYPE_REQUEST 0u
#define GUC_HXG_TYPE_EVENT 1u
+#define GUC_HXG_TYPE_FAST_REQUEST 2u
#define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u
#define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u
#define GUC_HXG_TYPE_RESPONSE_FAILURE 6u
@@ -90,6 +92,34 @@
#define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD
/**
+ * DOC: HXG Fast Request
+ *
+ * The `HXG Request`_ message should be used to initiate asynchronous activity
+ * for which confirmation or return data is not expected.
+ *
+ * If confirmation is required then `HXG Request`_ shall be used instead.
+ *
+ * The recipient of this message may only use `HXG Failure`_ message if it was
+ * unable to accept this request (like invalid data).
+ *
+ * Format of `HXG Fast Request`_ message is same as `HXG Request`_ except @TYPE.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN - see `HXG Message`_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = `GUC_HXG_TYPE_FAST_REQUEST`_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | DATA0 - see `HXG Request`_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION - see `HXG Request`_ |
+ * +---+-------+--------------------------------------------------------------+
+ * |...| | DATAn - see `HXG Request`_ |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+/**
* DOC: HXG Event
*
* The `HXG Event`_ message should be used to initiate asynchronous activity
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
index ebee0b5a2c1d..5f138de3c14f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
@@ -5,8 +5,8 @@
#include <linux/component.h>
-#include "drm/i915_component.h"
-#include "drm/i915_gsc_proxy_mei_interface.h"
+#include <drm/i915_component.h>
+#include <drm/i915_gsc_proxy_mei_interface.h>
#include "gt/intel_gt.h"
#include "gt/intel_gt_print.h"
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index a22e33f37cae..f28a3a83742d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -376,6 +376,24 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct)
}
}
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+static void ct_track_lost_and_found(struct intel_guc_ct *ct, u32 fence, u32 action)
+{
+ unsigned int lost = fence % ARRAY_SIZE(ct->requests.lost_and_found);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+ unsigned long entries[SZ_32];
+ unsigned int n;
+
+ n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
+
+ /* May be called under spinlock, so avoid sleeping */
+ ct->requests.lost_and_found[lost].stack = stack_depot_save(entries, n, GFP_NOWAIT);
+#endif
+ ct->requests.lost_and_found[lost].fence = fence;
+ ct->requests.lost_and_found[lost].action = action;
+}
+#endif
+
static u32 ct_get_next_fence(struct intel_guc_ct *ct)
{
/* For now it's trivial */
@@ -426,11 +444,11 @@ static int ct_write(struct intel_guc_ct *ct,
FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence);
- type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_EVENT :
+ type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_FAST_REQUEST :
GUC_HXG_TYPE_REQUEST;
hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, type) |
- FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
- GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION |
+ GUC_HXG_REQUEST_MSG_0_DATA0, action[0]);
CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n",
tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
@@ -447,6 +465,11 @@ static int ct_write(struct intel_guc_ct *ct,
}
GEM_BUG_ON(tail > size);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+ ct_track_lost_and_found(ct, fence,
+ FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, action[0]));
+#endif
+
/*
* make sure H2G buffer update and LRC tail update (if this triggering a
* submission) are visible before updating the descriptor tail
@@ -675,7 +698,7 @@ static int ct_send(struct intel_guc_ct *ct,
GEM_BUG_ON(!ct->enabled);
GEM_BUG_ON(!len);
- GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
+ GEM_BUG_ON(len > GUC_CTB_HXG_MSG_MAX_LEN - GUC_CTB_HDR_LEN);
GEM_BUG_ON(!response_buf && response_buf_size);
might_sleep();
@@ -953,6 +976,43 @@ corrupted:
return -EPIPE;
}
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+static bool ct_check_lost_and_found(struct intel_guc_ct *ct, u32 fence)
+{
+ unsigned int n;
+ char *buf = NULL;
+ bool found = false;
+
+ lockdep_assert_held(&ct->requests.lock);
+
+ for (n = 0; n < ARRAY_SIZE(ct->requests.lost_and_found); n++) {
+ if (ct->requests.lost_and_found[n].fence != fence)
+ continue;
+ found = true;
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+ buf = kmalloc(SZ_4K, GFP_NOWAIT);
+ if (buf && stack_depot_snprint(ct->requests.lost_and_found[n].stack,
+ buf, SZ_4K, 0)) {
+ CT_ERROR(ct, "Fence %u was used by action %#04x sent at\n%s",
+ fence, ct->requests.lost_and_found[n].action, buf);
+ break;
+ }
+#endif
+ CT_ERROR(ct, "Fence %u was used by action %#04x\n",
+ fence, ct->requests.lost_and_found[n].action);
+ break;
+ }
+ kfree(buf);
+ return found;
+}
+#else
+static bool ct_check_lost_and_found(struct intel_guc_ct *ct, u32 fence)
+{
+ return false;
+}
+#endif
+
static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *response)
{
u32 len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, response->msg[0]);
@@ -994,12 +1054,13 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
break;
}
if (!found) {
- CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence);
- CT_ERROR(ct, "Could not find fence=%u, last_fence=%u\n", fence,
- ct->requests.last_fence);
- list_for_each_entry(req, &ct->requests.pending, link)
- CT_ERROR(ct, "request %u awaits response\n",
- req->fence);
+ CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n",
+ len, hxg[0], fence, ct->requests.last_fence);
+ if (!ct_check_lost_and_found(ct, fence)) {
+ list_for_each_entry(req, &ct->requests.pending, link)
+ CT_ERROR(ct, "request %u awaits response\n",
+ req->fence);
+ }
err = -ENOKEY;
}
spin_unlock_irqrestore(&ct->requests.lock, flags);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
index 818415b64f4d..58e42901ff49 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
@@ -8,6 +8,7 @@
#include <linux/interrupt.h>
#include <linux/spinlock.h>
+#include <linux/stackdepot.h>
#include <linux/workqueue.h>
#include <linux/ktime.h>
#include <linux/wait.h>
@@ -81,6 +82,16 @@ struct intel_guc_ct {
struct list_head incoming; /* incoming requests */
struct work_struct worker; /* handler for incoming requests */
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+ struct {
+ u16 fence;
+ u16 action;
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+ depot_stack_handle_t stack;
+#endif
+ } lost_and_found[SZ_16];
+#endif
} requests;
/** @stall_time: time of first time a CTB submission is stalled */
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 19d5652300ee..58284156428d 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -877,12 +877,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
stream->oa_buffer.last_ctx_id = ctx_id;
}
- /*
- * Clear out the report id and timestamp as a means to detect unlanded
- * reports.
- */
- oa_report_id_clear(stream, report32);
- oa_timestamp_clear(stream, report32);
+ if (is_power_of_2(report_size)) {
+ /*
+ * Clear out the report id and timestamp as a means
+ * to detect unlanded reports.
+ */
+ oa_report_id_clear(stream, report32);
+ oa_timestamp_clear(stream, report32);
+ } else {
+ /* Zero out the entire report */
+ memset(report32, 0, report_size);
+ }
}
if (start_offset != *offset) {
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index a814583e19fd..f96fe92dca4e 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -139,7 +139,7 @@ static u32 frequency_enabled_mask(void)
return mask;
}
-static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
+static bool pmu_needs_timer(struct i915_pmu *pmu)
{
struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
u32 enable;
@@ -158,16 +158,10 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK;
/*
- * When the GPU is idle per-engine counters do not need to be
- * running so clear those bits out.
- */
- if (!gpu_active)
- enable &= ~ENGINE_SAMPLE_MASK;
- /*
* Also there is software busyness tracking available we do not
* need the timer for I915_SAMPLE_BUSY counter.
*/
- else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
+ if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
enable &= ~BIT(I915_SAMPLE_BUSY);
/*
@@ -197,31 +191,21 @@ static inline s64 ktime_since_raw(const ktime_t kt)
return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
}
-static unsigned int
-__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
- unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
-
- GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
-
- return idx;
-}
-
static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
{
- return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
+ return pmu->sample[gt_id][sample].cur;
}
static void
store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
{
- pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
+ pmu->sample[gt_id][sample].cur = val;
}
static void
add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
{
- pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
+ pmu->sample[gt_id][sample].cur += mul_u32_u32(val, mul);
}
static u64 get_rc6(struct intel_gt *gt)
@@ -295,7 +279,7 @@ static void park_rc6(struct intel_gt *gt)
static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
{
- if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
+ if (!pmu->timer_enabled && pmu_needs_timer(pmu)) {
pmu->timer_enabled = true;
pmu->timer_last = ktime_get();
hrtimer_start_range_ns(&pmu->timer,
@@ -321,7 +305,7 @@ void i915_pmu_gt_parked(struct intel_gt *gt)
*/
pmu->unparked &= ~BIT(gt->info.id);
if (pmu->unparked == 0)
- pmu->timer_enabled = pmu_needs_timer(pmu, false);
+ pmu->timer_enabled = false;
spin_unlock_irq(&pmu->lock);
}
@@ -827,7 +811,7 @@ static void i915_pmu_disable(struct perf_event *event)
*/
if (--pmu->enable_count[bit] == 0) {
pmu->enable &= ~BIT(bit);
- pmu->timer_enabled &= pmu_needs_timer(pmu, true);
+ pmu->timer_enabled &= pmu_needs_timer(pmu);
}
spin_unlock_irqrestore(&pmu->lock, flags);
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 33d80fbaab8b..d20592e7db99 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -127,7 +127,7 @@ struct i915_pmu {
* Only global counters are held here, while the per-engine ones are in
* struct intel_engine_cs.
*/
- struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
+ struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
/**
* @sleep_last: Last time GT parked for RC6 estimation.
*/
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c
index 8dc41de3f6f7..a217821eb0fb 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c
@@ -143,7 +143,7 @@ gsccs_send_message(struct intel_pxp *pxp,
reply_size = header->message_size - sizeof(*header);
if (reply_size > msg_out_size_max) {
- drm_warn(&i915->drm, "caller with insufficient PXP reply size %u (%ld)\n",
+ drm_warn(&i915->drm, "caller with insufficient PXP reply size %u (%zu)\n",
reply_size, msg_out_size_max);
reply_size = msg_out_size_max;
}