summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Auld <matthew.auld@intel.com>2025-08-08 12:04:53 +0100
committerMatthew Auld <matthew.auld@intel.com>2025-08-28 09:58:19 +0100
commit81a45cb7ea31a59777af294de590eea66ab80691 (patch)
tree0c4462680ad5fc6e1e2d590cbfa376ce6931cee6
parentdb16f9d90c1d97a2b5483b4b856625875bffe860 (diff)
drm/xe/migrate: make MI_TLB_INVALIDATE conditional
When clearing VRAM we should be able to skip invalidating the TLBs if we are only using the identity map to access VRAM (which is the common case), since no modifications are made to PTEs on the fly. Also since we use huge 1G entries within the identity map, there should be a pretty decent chance that the next packet(s) (if also clears) can avoid a tree walk if we don't shoot down the TLBs, like if we have to process a long stream of clears. For normal moves/copies, we usually always end up with the src or dst being system memory, meaning we can't only rely on the identity map and will also need to emit PTEs and so will always require a TLB flush. v2: - Update commit to explain the situation for normal copies (Matt B) - Rebase on latest changes Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> Link: https://lore.kernel.org/r/20250808110452.467513-2-matthew.auld@intel.com
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c18
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c10
2 files changed, 16 insertions, 12 deletions
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 57e6d5a8ac39..9643442ef101 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -904,7 +904,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
goto err;
}
- xe_sched_job_add_migrate_flush(job, flush_flags);
+ xe_sched_job_add_migrate_flush(job, flush_flags | MI_INVALIDATE_TLB);
if (!fence) {
err = xe_sched_job_add_deps(job, src_bo->ttm.base.resv,
DMA_RESV_USAGE_BOOKKEEP);
@@ -1288,11 +1288,13 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
size -= clear_L0;
/* Preemption is enabled again by the ring ops. */
- if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it))
+ if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) {
xe_res_next(&src_it, clear_L0);
- else
- emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs,
- &src_it, clear_L0, dst);
+ } else {
+ emit_pte(m, bb, clear_L0_pt, clear_vram,
+ clear_only_system_ccs, &src_it, clear_L0, dst);
+ flush_flags |= MI_INVALIDATE_TLB;
+ }
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
update_idx = bb->len;
@@ -1303,7 +1305,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
if (xe_migrate_needs_ccs_emit(xe)) {
emit_copy_ccs(gt, bb, clear_L0_ofs, true,
m->cleared_mem_ofs, false, clear_L0);
- flush_flags = MI_FLUSH_DW_CCS;
+ flush_flags |= MI_FLUSH_DW_CCS;
}
job = xe_bb_create_migration_job(m->q, bb,
@@ -1638,6 +1640,8 @@ next_cmd:
goto err_sa;
}
+ xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB);
+
if (ops->pre_commit) {
pt_update->job = job;
err = ops->pre_commit(pt_update);
@@ -1863,7 +1867,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
goto err;
}
- xe_sched_job_add_migrate_flush(job, 0);
+ xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB);
mutex_lock(&m->job_mutex);
xe_sched_job_arm(job);
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 5f15360d14bf..d71837773d6c 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -110,10 +110,10 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
return i;
}
-static int emit_flush_invalidate(u32 addr, u32 val, u32 *dw, int i)
+static int emit_flush_invalidate(u32 addr, u32 val, u32 flush_flags, u32 *dw, int i)
{
- dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
- MI_FLUSH_IMM_DW;
+ dw[i++] = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW |
+ MI_FLUSH_IMM_DW | (flush_flags & MI_INVALIDATE_TLB) ?: 0;
dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
dw[i++] = 0;
@@ -410,13 +410,13 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i);
dw[i++] = preparser_disable(true);
- i = emit_flush_invalidate(saddr, seqno, dw, i);
+ i = emit_flush_invalidate(saddr, seqno, job->migrate_flush_flags, dw, i);
dw[i++] = preparser_disable(false);
i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i);
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno,
- MI_INVALIDATE_TLB | job->migrate_flush_flags,
+ job->migrate_flush_flags,
dw, i);
i = emit_user_interrupt(dw, i);