[PATCH] drm/xe/migrate: make MI_TLB_INVALIDATE conditional

Matthew Brost matthew.brost at intel.com
Fri Jun 20 16:11:55 UTC 2025


On Fri, Jun 20, 2025 at 04:24:47PM +0100, Matthew Auld wrote:
> When clearing VRAM we should be able to skip invalidating the TLBs if we

For copies, we always program SRAM PTEs, and need a invalidate?
Maybe mention this in the commit message.

> are only using the identity map to access VRAM (which is the common
> case), since no modifications are made to PTEs on the fly. Also since we
> use huge 1G entries within the identity map, there should be a pretty
> decent chance that the next packet(s) (if also clears) can avoid a tree
> walk if we don't shoot down the TLBs, like if we have to process a long
> stream of clears.
> 
> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
> Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_migrate.c  | 18 +++++++++++-------
>  drivers/gpu/drm/xe/xe_ring_ops.c | 10 +++++-----
>  2 files changed, 16 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index 8f8e9fdfb2a8..a76363740a12 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -896,7 +896,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>  			goto err;
>  		}
>  
> -		xe_sched_job_add_migrate_flush(job, flush_flags);
> +		xe_sched_job_add_migrate_flush(job, flush_flags | MI_INVALIDATE_TLB);
>  		if (!fence) {
>  			err = xe_sched_job_add_deps(job, src_bo->ttm.base.resv,
>  						    DMA_RESV_USAGE_BOOKKEEP);
> @@ -1119,11 +1119,13 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
>  
>  		size -= clear_L0;
>  		/* Preemption is enabled again by the ring ops. */
> -		if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it))
> +		if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) {
>  			xe_res_next(&src_it, clear_L0);
> -		else
> -			emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs,
> -				 &src_it, clear_L0, dst);
> +		} else {
> +			emit_pte(m, bb, clear_L0_pt, clear_vram,
> +				 clear_only_system_ccs, &src_it, clear_L0, dst);
> +			flush_flags |= MI_INVALIDATE_TLB;
> +		}

What about the if statements for dst_it / copy_system_ccs? Do we not
need to set MI_INVALIDATE_TLB there?

Matt

>  
>  		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
>  		update_idx = bb->len;
> @@ -1134,7 +1136,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
>  		if (xe_migrate_needs_ccs_emit(xe)) {
>  			emit_copy_ccs(gt, bb, clear_L0_ofs, true,
>  				      m->cleared_mem_ofs, false, clear_L0);
> -			flush_flags = MI_FLUSH_DW_CCS;
> +			flush_flags |= MI_FLUSH_DW_CCS;
>  		}
>  
>  		job = xe_bb_create_migration_job(m->q, bb,
> @@ -1469,6 +1471,8 @@ __xe_migrate_update_pgtables(struct xe_migrate *m,
>  		goto err_sa;
>  	}
>  
> +	xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB);
> +
>  	if (ops->pre_commit) {
>  		pt_update->job = job;
>  		err = ops->pre_commit(pt_update);
> @@ -1667,7 +1671,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
>  		goto err;
>  	}
>  
> -	xe_sched_job_add_migrate_flush(job, 0);
> +	xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB);
>  
>  	mutex_lock(&m->job_mutex);
>  	xe_sched_job_arm(job);
> diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
> index bc1689db4cd7..b5548e0769f4 100644
> --- a/drivers/gpu/drm/xe/xe_ring_ops.c
> +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
> @@ -110,10 +110,10 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
>  	return i;
>  }
>  
> -static int emit_flush_invalidate(u32 *dw, int i)
> +static int emit_flush_invalidate(u32 *dw, int i, u32 flush_flags)
>  {
> -	dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
> -		  MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX;
> +	dw[i++] = MI_FLUSH_DW |  MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW |
> +		MI_FLUSH_DW_STORE_INDEX | (flush_flags & MI_INVALIDATE_TLB) ?: 0;
>  	dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR;
>  	dw[i++] = 0;
>  	dw[i++] = 0;
> @@ -411,13 +411,13 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
>  	if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) {
>  		/* XXX: Do we need this? Leaving for now. */
>  		dw[i++] = preparser_disable(true);
> -		i = emit_flush_invalidate(dw, i);
> +		i = emit_flush_invalidate(dw, i, job->migrate_flush_flags);
>  		dw[i++] = preparser_disable(false);
>  	}
>  
>  	i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i);
>  
> -	dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags |
> +	dw[i++] = MI_FLUSH_DW | job->migrate_flush_flags |
>  		MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW;
>  	dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT;
>  	dw[i++] = 0;
> -- 
> 2.49.0
> 


More information about the Intel-xe mailing list