diff options
-rw-r--r-- | drivers/gpu/drm/i915/Kconfig.debug | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 38 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_ggtt.c | 26 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/selftest_execlists.c | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/selftest_tlb.c | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h | 21 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h | 30 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 81 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_perf.c | 17 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_pmu.c | 32 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_pmu.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c | 2 |
16 files changed, 213 insertions, 89 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 47e845353ffa..2d21930d5501 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -157,6 +157,7 @@ config DRM_I915_SW_FENCE_CHECK_DAG config DRM_I915_DEBUG_GUC bool "Enable additional driver debugging for GuC" depends on DRM_I915 + select STACKDEPOT default n help Choose this option to turn on extra driver debugging that may affect diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 5402a7bbcb1d..9a9ff84c90d7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -964,7 +964,11 @@ static int intel_context_set_gem(struct intel_context *ce, RCU_INIT_POINTER(ce->gem_context, ctx); GEM_BUG_ON(intel_context_is_pinned(ce)); - ce->ring_size = SZ_16K; + + if (ce->engine->class == COMPUTE_CLASS) + ce->ring_size = SZ_512K; + else + ce->ring_size = SZ_16K; i915_vm_put(ce->vm); ce->vm = i915_gem_context_get_eb_vm(ctx); diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index e1c76e5bfa82..23857cc08eca 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -177,14 +177,40 @@ u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv return cs; } +static int mtl_dummy_pipe_control(struct i915_request *rq) +{ + /* Wa_14016712196 */ + if (IS_MTL_GRAPHICS_STEP(rq->engine->i915, M, STEP_A0, STEP_B0) || + IS_MTL_GRAPHICS_STEP(rq->engine->i915, P, STEP_A0, STEP_B0)) { + u32 *cs; + + /* dummy PIPE_CONTROL + depth flush */ + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + cs = gen12_emit_pipe_control(cs, + 0, + PIPE_CONTROL_DEPTH_CACHE_FLUSH, + LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(rq, cs); + } + + return 0; +} + int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) { struct intel_engine_cs *engine = rq->engine; if (mode & EMIT_FLUSH) { u32 flags = 0; + int err; u32 *cs; + err = mtl_dummy_pipe_control(rq); + if (err) + return err; + flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; flags |= PIPE_CONTROL_FLUSH_L3; flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; @@ -217,6 +243,11 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) if (mode & EMIT_INVALIDATE) { u32 flags = 0; u32 *cs, count; + int err; + + err = mtl_dummy_pipe_control(rq); + if (err) + return err; flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TLB_INVALIDATE; @@ -733,6 +764,13 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE); + /* Wa_14016712196 */ + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || + IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + /* dummy PIPE_CONTROL + depth flush */ + cs = gen12_emit_pipe_control(cs, 0, + PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); + if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) /* Wa_1409600907 */ flags |= PIPE_CONTROL_DEPTH_STALL; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 2a7942fac798..122197737ef2 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -1015,16 +1015,16 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) /* * For pre-gen8 platforms pat_index is the same as enum i915_cache_level, - * so these PTE encode functions are left with using cache_level. + * so the switch-case statements in these PTE encode functions are still valid. * See translation table LEGACY_CACHELEVEL. */ static u64 snb_pte_encode(dma_addr_t addr, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; - switch (level) { + switch (pat_index) { case I915_CACHE_L3_LLC: case I915_CACHE_LLC: pte |= GEN6_PTE_CACHE_LLC; @@ -1033,19 +1033,19 @@ static u64 snb_pte_encode(dma_addr_t addr, pte |= GEN6_PTE_UNCACHED; break; default: - MISSING_CASE(level); + MISSING_CASE(pat_index); } return pte; } static u64 ivb_pte_encode(dma_addr_t addr, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; - switch (level) { + switch (pat_index) { case I915_CACHE_L3_LLC: pte |= GEN7_PTE_CACHE_L3_LLC; break; @@ -1056,14 +1056,14 @@ static u64 ivb_pte_encode(dma_addr_t addr, pte |= GEN6_PTE_UNCACHED; break; default: - MISSING_CASE(level); + MISSING_CASE(pat_index); } return pte; } static u64 byt_pte_encode(dma_addr_t addr, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; @@ -1071,31 +1071,31 @@ static u64 byt_pte_encode(dma_addr_t addr, if (!(flags & PTE_READ_ONLY)) pte |= BYT_PTE_WRITEABLE; - if (level != I915_CACHE_NONE) + if (pat_index != I915_CACHE_NONE) pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; return pte; } static u64 hsw_pte_encode(dma_addr_t addr, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; - if (level != I915_CACHE_NONE) + if (pat_index != I915_CACHE_NONE) pte |= HSW_WB_LLC_AGE3; return pte; } static u64 iris_pte_encode(dma_addr_t addr, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; - switch (level) { + switch (pat_index) { case I915_CACHE_NONE: break; case I915_CACHE_WT: diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c index d6a74ae2527b..866c416afb73 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c @@ -18,10 +18,10 @@ static void gmch_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 unused) { - unsigned int flags = (cache_level == I915_CACHE_NONE) ? + unsigned int flags = (pat_index == I915_CACHE_NONE) ? AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); @@ -29,10 +29,10 @@ static void gmch_ggtt_insert_page(struct i915_address_space *vm, static void gmch_ggtt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 unused) { - unsigned int flags = (cache_level == I915_CACHE_NONE) ? + unsigned int flags = (pat_index == I915_CACHE_NONE) ? AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; intel_gmch_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT, diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 736b89a8ecf5..4202df5b8c12 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -1530,8 +1530,8 @@ static int live_busywait_preempt(void *arg) struct drm_i915_gem_object *obj; struct i915_vma *vma; enum intel_engine_id id; - int err = -ENOMEM; u32 *map; + int err; /* * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can @@ -1539,13 +1539,17 @@ static int live_busywait_preempt(void *arg) */ ctx_hi = kernel_context(gt->i915, NULL); - if (!ctx_hi) - return -ENOMEM; + if (IS_ERR(ctx_hi)) + return PTR_ERR(ctx_hi); + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; ctx_lo = kernel_context(gt->i915, NULL); - if (!ctx_lo) + if (IS_ERR(ctx_lo)) { + err = PTR_ERR(ctx_lo); goto err_ctx_hi; + } + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c index 4493c8518e91..3bd6b540257b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_tlb.c +++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c @@ -190,11 +190,18 @@ out: static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt) { + struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0]; + resource_size_t size = SZ_1G; + /* * Allocation of largest possible page size allows to test all types - * of pages. + * of pages. To succeed with both allocations, especially in case of Small + * BAR, try to allocate no more than quarter of mappable memory. */ - return i915_gem_object_create_lmem(gt->i915, SZ_1G, I915_BO_ALLOC_CONTIGUOUS); + if (mr && size > mr->io_size / 4) + size = mr->io_size / 4; + + return i915_gem_object_create_lmem(gt->i915, size, I915_BO_ALLOC_CONTIGUOUS); } static struct drm_i915_gem_object *create_smem(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h index 28b8387f97b7..f7d70db16d76 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h @@ -167,25 +167,4 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64); * - **flags**, holds various bits to control message handling */ -/* - * Definition of the command transport message header (DW0) - * - * bit[4..0] message len (in dwords) - * bit[7..5] reserved - * bit[8] response (G2H only) - * bit[8] write fence to desc (H2G only) - * bit[9] write status to H2G buff (H2G only) - * bit[10] send status back via G2H (H2G only) - * bit[15..11] reserved - * bit[31..16] action code - */ -#define GUC_CT_MSG_LEN_SHIFT 0 -#define GUC_CT_MSG_LEN_MASK 0x1F -#define GUC_CT_MSG_IS_RESPONSE (1 << 8) -#define GUC_CT_MSG_WRITE_FENCE_TO_DESC (1 << 8) -#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF (1 << 9) -#define GUC_CT_MSG_SEND_STATUS (1 << 10) -#define GUC_CT_MSG_ACTION_SHIFT 16 -#define GUC_CT_MSG_ACTION_MASK 0xFFFF - #endif /* _ABI_GUC_COMMUNICATION_CTB_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h index 7d5ba4d97d70..98eb4f46572b 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h @@ -24,6 +24,7 @@ * | | 30:28 | **TYPE** - message type | * | | | - _`GUC_HXG_TYPE_REQUEST` = 0 | * | | | - _`GUC_HXG_TYPE_EVENT` = 1 | + * | | | - _`GUC_HXG_TYPE_FAST_REQUEST` = 2 | * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3 | * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5 | * | | | - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6 | @@ -46,6 +47,7 @@ #define GUC_HXG_MSG_0_TYPE (0x7 << 28) #define GUC_HXG_TYPE_REQUEST 0u #define GUC_HXG_TYPE_EVENT 1u +#define GUC_HXG_TYPE_FAST_REQUEST 2u #define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u #define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u #define GUC_HXG_TYPE_RESPONSE_FAILURE 6u @@ -90,6 +92,34 @@ #define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD /** + * DOC: HXG Fast Request + * + * The `HXG Request`_ message should be used to initiate asynchronous activity + * for which confirmation or return data is not expected. + * + * If confirmation is required then `HXG Request`_ shall be used instead. + * + * The recipient of this message may only use `HXG Failure`_ message if it was + * unable to accept this request (like invalid data). + * + * Format of `HXG Fast Request`_ message is same as `HXG Request`_ except @TYPE. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN - see `HXG Message`_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = `GUC_HXG_TYPE_FAST_REQUEST`_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 - see `HXG Request`_ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION - see `HXG Request`_ | + * +---+-------+--------------------------------------------------------------+ + * |...| | DATAn - see `HXG Request`_ | + * +---+-------+--------------------------------------------------------------+ + */ + +/** * DOC: HXG Event * * The `HXG Event`_ message should be used to initiate asynchronous activity diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c index ebee0b5a2c1d..5f138de3c14f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c @@ -5,8 +5,8 @@ #include <linux/component.h> -#include "drm/i915_component.h" -#include "drm/i915_gsc_proxy_mei_interface.h" +#include <drm/i915_component.h> +#include <drm/i915_gsc_proxy_mei_interface.h> #include "gt/intel_gt.h" #include "gt/intel_gt_print.h" diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index a22e33f37cae..f28a3a83742d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -376,6 +376,24 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct) } } +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +static void ct_track_lost_and_found(struct intel_guc_ct *ct, u32 fence, u32 action) +{ + unsigned int lost = fence % ARRAY_SIZE(ct->requests.lost_and_found); +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) + unsigned long entries[SZ_32]; + unsigned int n; + + n = stack_trace_save(entries, ARRAY_SIZE(entries), 1); + + /* May be called under spinlock, so avoid sleeping */ + ct->requests.lost_and_found[lost].stack = stack_depot_save(entries, n, GFP_NOWAIT); +#endif + ct->requests.lost_and_found[lost].fence = fence; + ct->requests.lost_and_found[lost].action = action; +} +#endif + static u32 ct_get_next_fence(struct intel_guc_ct *ct) { /* For now it's trivial */ @@ -426,11 +444,11 @@ static int ct_write(struct intel_guc_ct *ct, FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence); - type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_EVENT : + type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_FAST_REQUEST : GUC_HXG_TYPE_REQUEST; hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, type) | - FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | - GUC_HXG_EVENT_MSG_0_DATA0, action[0]); + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION | + GUC_HXG_REQUEST_MSG_0_DATA0, action[0]); CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n", tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]); @@ -447,6 +465,11 @@ static int ct_write(struct intel_guc_ct *ct, } GEM_BUG_ON(tail > size); +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) + ct_track_lost_and_found(ct, fence, + FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, action[0])); +#endif + /* * make sure H2G buffer update and LRC tail update (if this triggering a * submission) are visible before updating the descriptor tail @@ -675,7 +698,7 @@ static int ct_send(struct intel_guc_ct *ct, GEM_BUG_ON(!ct->enabled); GEM_BUG_ON(!len); - GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK); + GEM_BUG_ON(len > GUC_CTB_HXG_MSG_MAX_LEN - GUC_CTB_HDR_LEN); GEM_BUG_ON(!response_buf && response_buf_size); might_sleep(); @@ -953,6 +976,43 @@ corrupted: return -EPIPE; } +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +static bool ct_check_lost_and_found(struct intel_guc_ct *ct, u32 fence) +{ + unsigned int n; + char *buf = NULL; + bool found = false; + + lockdep_assert_held(&ct->requests.lock); + + for (n = 0; n < ARRAY_SIZE(ct->requests.lost_and_found); n++) { + if (ct->requests.lost_and_found[n].fence != fence) + continue; + found = true; + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) + buf = kmalloc(SZ_4K, GFP_NOWAIT); + if (buf && stack_depot_snprint(ct->requests.lost_and_found[n].stack, + buf, SZ_4K, 0)) { + CT_ERROR(ct, "Fence %u was used by action %#04x sent at\n%s", + fence, ct->requests.lost_and_found[n].action, buf); + break; + } +#endif + CT_ERROR(ct, "Fence %u was used by action %#04x\n", + fence, ct->requests.lost_and_found[n].action); + break; + } + kfree(buf); + return found; +} +#else +static bool ct_check_lost_and_found(struct intel_guc_ct *ct, u32 fence) +{ + return false; +} +#endif + static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *response) { u32 len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, response->msg[0]); @@ -994,12 +1054,13 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r break; } if (!found) { - CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence); - CT_ERROR(ct, "Could not find fence=%u, last_fence=%u\n", fence, - ct->requests.last_fence); - list_for_each_entry(req, &ct->requests.pending, link) - CT_ERROR(ct, "request %u awaits response\n", - req->fence); + CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n", + len, hxg[0], fence, ct->requests.last_fence); + if (!ct_check_lost_and_found(ct, fence)) { + list_for_each_entry(req, &ct->requests.pending, link) + CT_ERROR(ct, "request %u awaits response\n", + req->fence); + } err = -ENOKEY; } spin_unlock_irqrestore(&ct->requests.lock, flags); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index 818415b64f4d..58e42901ff49 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -8,6 +8,7 @@ #include <linux/interrupt.h> #include <linux/spinlock.h> +#include <linux/stackdepot.h> #include <linux/workqueue.h> #include <linux/ktime.h> #include <linux/wait.h> @@ -81,6 +82,16 @@ struct intel_guc_ct { struct list_head incoming; /* incoming requests */ struct work_struct worker; /* handler for incoming requests */ + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) + struct { + u16 fence; + u16 action; +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) + depot_stack_handle_t stack; +#endif + } lost_and_found[SZ_16]; +#endif } requests; /** @stall_time: time of first time a CTB submission is stalled */ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 19d5652300ee..58284156428d 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -877,12 +877,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, stream->oa_buffer.last_ctx_id = ctx_id; } - /* - * Clear out the report id and timestamp as a means to detect unlanded - * reports. - */ - oa_report_id_clear(stream, report32); - oa_timestamp_clear(stream, report32); + if (is_power_of_2(report_size)) { + /* + * Clear out the report id and timestamp as a means + * to detect unlanded reports. + */ + oa_report_id_clear(stream, report32); + oa_timestamp_clear(stream, report32); + } else { + /* Zero out the entire report */ + memset(report32, 0, report_size); + } } if (start_offset != *offset) { diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index a814583e19fd..f96fe92dca4e 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -139,7 +139,7 @@ static u32 frequency_enabled_mask(void) return mask; } -static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) +static bool pmu_needs_timer(struct i915_pmu *pmu) { struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); u32 enable; @@ -158,16 +158,10 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK; /* - * When the GPU is idle per-engine counters do not need to be - * running so clear those bits out. - */ - if (!gpu_active) - enable &= ~ENGINE_SAMPLE_MASK; - /* * Also there is software busyness tracking available we do not * need the timer for I915_SAMPLE_BUSY counter. */ - else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS) + if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS) enable &= ~BIT(I915_SAMPLE_BUSY); /* @@ -197,31 +191,21 @@ static inline s64 ktime_since_raw(const ktime_t kt) return ktime_to_ns(ktime_sub(ktime_get_raw(), kt)); } -static unsigned int -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample) -{ - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample; - - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample)); - - return idx; -} - static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample) { - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur; + return pmu->sample[gt_id][sample].cur; } static void store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val) { - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val; + pmu->sample[gt_id][sample].cur = val; } static void add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul) { - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul); + pmu->sample[gt_id][sample].cur += mul_u32_u32(val, mul); } static u64 get_rc6(struct intel_gt *gt) @@ -295,7 +279,7 @@ static void park_rc6(struct intel_gt *gt) static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) { - if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) { + if (!pmu->timer_enabled && pmu_needs_timer(pmu)) { pmu->timer_enabled = true; pmu->timer_last = ktime_get(); hrtimer_start_range_ns(&pmu->timer, @@ -321,7 +305,7 @@ void i915_pmu_gt_parked(struct intel_gt *gt) */ pmu->unparked &= ~BIT(gt->info.id); if (pmu->unparked == 0) - pmu->timer_enabled = pmu_needs_timer(pmu, false); + pmu->timer_enabled = false; spin_unlock_irq(&pmu->lock); } @@ -827,7 +811,7 @@ static void i915_pmu_disable(struct perf_event *event) */ if (--pmu->enable_count[bit] == 0) { pmu->enable &= ~BIT(bit); - pmu->timer_enabled &= pmu_needs_timer(pmu, true); + pmu->timer_enabled &= pmu_needs_timer(pmu); } spin_unlock_irqrestore(&pmu->lock, flags); diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 33d80fbaab8b..d20592e7db99 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -127,7 +127,7 @@ struct i915_pmu { * Only global counters are held here, while the per-engine ones are in * struct intel_engine_cs. */ - struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS]; + struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS]; /** * @sleep_last: Last time GT parked for RC6 estimation. */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c index 8dc41de3f6f7..a217821eb0fb 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c @@ -143,7 +143,7 @@ gsccs_send_message(struct intel_pxp *pxp, reply_size = header->message_size - sizeof(*header); if (reply_size > msg_out_size_max) { - drm_warn(&i915->drm, "caller with insufficient PXP reply size %u (%ld)\n", + drm_warn(&i915->drm, "caller with insufficient PXP reply size %u (%zu)\n", reply_size, msg_out_size_max); reply_size = msg_out_size_max; } |