[PATCH] drm/xe: Force write completion of MI_STORE_DATA_IMM

Souza, Jose jose.souza at intel.com
Thu Dec 19 13:58:59 UTC 2024


On Wed, 2024-12-18 at 18:05 -0800, Dixit, Ashutosh wrote:
> On Wed, 18 Dec 2024 09:27:36 -0800, Matthew Brost wrote:
> > 
> 
> Folks,
> 
> > On Wed, Dec 18, 2024 at 08:38:49AM -0700, Souza, Jose wrote:
> > > On Tue, 2024-12-17 at 15:39 -0800, Matthew Brost wrote:
> > > > On Tue, Dec 17, 2024 at 08:07:32AM -0800, José Roberto de Souza wrote:
> > > > > With Force write completion unset there is no guarantees of when the
> > > > > write will be globally visible what is not the behavior wanted.
> > > > > 
> > > > 
> > > > Do we want this backported? If so, maybe add a fixes?
> > > 
> > > Not sure, I don't have an actual issue that is fixed by this but I think would be good to have it backported.
> > > But what do you suggest? Add a fixes tag to the patch removing force probe from LNL?
> > > 
> > 
> > Yea fixing LNL force probe removal sounds reasonable to me.
> 
> Hmm the plan was add Fixes to this patch and also Cc: stable. Yet it was
> merged without these:
> 
> 1460bb1fef9c ("drm/xe: Force write completion of MI_STORE_DATA_IMM")
> 
> Lucas/Thomas,
> 
> Would it be possible to send it to -fixes with a Cc:stable. We have patches
> depending on this one which we want to send to stable.

Please correct me if I'm wrong but that is enough for it to be picked to stable, it will happen in the next Xe stable pull request.

> 
> Thanks.
> --
> Ashutosh
> 
> 
> 
> 
> > 
> > Matt
> > 
> > > > 
> > > > Matt
> > > > 
> > > > > Signed-off-by: José Roberto de Souza <jose.souza at intel.com>
> > > > > ---
> > > > >  drivers/gpu/drm/xe/instructions/xe_mi_commands.h | 13 +++++++------
> > > > >  drivers/gpu/drm/xe/xe_migrate.c                  | 11 ++++++++---
> > > > >  drivers/gpu/drm/xe/xe_oa.c                       |  4 +++-
> > > > >  drivers/gpu/drm/xe/xe_ring_ops.c                 |  6 ++++--
> > > > >  4 files changed, 22 insertions(+), 12 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> > > > > index 10ec2920d31b3..f4ee910f09432 100644
> > > > > --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> > > > > +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> > > > > @@ -33,12 +33,13 @@
> > > > >  #define MI_TOPOLOGY_FILTER		__MI_INSTR(0xD)
> > > > >  #define MI_FORCE_WAKEUP			__MI_INSTR(0x1D)
> > > > > 
> > > > > -#define MI_STORE_DATA_IMM		__MI_INSTR(0x20)
> > > > > -#define   MI_SDI_GGTT			REG_BIT(22)
> > > > > -#define   MI_SDI_LEN_DW			GENMASK(9, 0)
> > > > > -#define   MI_SDI_NUM_DW(x)		REG_FIELD_PREP(MI_SDI_LEN_DW, (x) + 3 - 2)
> > > > > -#define   MI_SDI_NUM_QW(x)		(REG_FIELD_PREP(MI_SDI_LEN_DW, 2 * (x) + 3 - 2) | \
> > > > > -					 REG_BIT(21))
> > > > > +#define MI_STORE_DATA_IMM			__MI_INSTR(0x20)
> > > > > +#define   MI_SDI_GGTT				REG_BIT(22)
> > > > > +#define   MI_FORCE_WRITE_COMPLETION_CHECK	REG_BIT(10)
> > > > > +#define   MI_SDI_LEN_DW				GENMASK(9, 0)
> > > > > +#define   MI_SDI_NUM_DW(x)			REG_FIELD_PREP(MI_SDI_LEN_DW, (x) + 3 - 2)
> > > > > +#define   MI_SDI_NUM_QW(x)			(REG_FIELD_PREP(MI_SDI_LEN_DW, 2 * (x) + 3 - 2) | \
> > > > > +						 REG_BIT(21))
> > > > > 
> > > > >  #define MI_LOAD_REGISTER_IMM		__MI_INSTR(0x22)
> > > > >  #define   MI_LRI_LRM_CS_MMIO		REG_BIT(19)
> > > > > diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> > > > > index 1b97d90aaddaf..8b32fad678782 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_migrate.c
> > > > > +++ b/drivers/gpu/drm/xe/xe_migrate.c
> > > > > @@ -581,7 +581,9 @@ static void emit_pte(struct xe_migrate *m,
> > > > > 		while (ptes) {
> > > > > 			u32 chunk = min(MAX_PTE_PER_SDI, ptes);
> > > > > 
> > > > > -		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
> > > > > +		bb->cs[bb->len++] = MI_STORE_DATA_IMM |
> > > > > +				    MI_FORCE_WRITE_COMPLETION_CHECK |
> > > > > +				    MI_SDI_NUM_QW(chunk);
> > > > > 			bb->cs[bb->len++] = ofs;
> > > > > 			bb->cs[bb->len++] = 0;
> > > > > 
> > > > > @@ -1223,7 +1225,9 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
> > > > > 			if (!(bb->len & 1))
> > > > > 				bb->cs[bb->len++] = MI_NOOP;
> > > > > 
> > > > > -		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
> > > > > +		bb->cs[bb->len++] = MI_STORE_DATA_IMM |
> > > > > +				    MI_FORCE_WRITE_COMPLETION_CHECK |
> > > > > +				    MI_SDI_NUM_QW(chunk);
> > > > > 			bb->cs[bb->len++] = lower_32_bits(addr);
> > > > > 			bb->cs[bb->len++] = upper_32_bits(addr);
> > > > > 			if (pt_op->bind)
> > > > > @@ -1388,7 +1392,8 @@ __xe_migrate_update_pgtables(struct xe_migrate *m,
> > > > > 				u32 idx = 0;
> > > > > 
> > > > > 				bb->cs[bb->len++] = MI_STORE_DATA_IMM |
> > > > > -				MI_SDI_NUM_QW(chunk);
> > > > > +					    MI_FORCE_WRITE_COMPLETION_CHECK |
> > > > > +					    MI_SDI_NUM_QW(chunk);
> > > > > 				bb->cs[bb->len++] = ofs;
> > > > > 				bb->cs[bb->len++] = 0; /* upper_32_bits */
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> > > > > index 56bf375a9d4bc..ae94490b0eac8 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_oa.c
> > > > > +++ b/drivers/gpu/drm/xe/xe_oa.c
> > > > > @@ -690,7 +690,9 @@ static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
> > > > > 		u32 offset = xe_bo_ggtt_addr(lrc->bo);
> > > > > 
> > > > > 		do {
> > > > > -		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
> > > > > +		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT |
> > > > > +				    MI_FORCE_WRITE_COMPLETION_CHECK |
> > > > > +				    MI_SDI_NUM_DW(1);
> > > > > 			bb->cs[bb->len++] = offset + flex->offset * sizeof(u32);
> > > > > 			bb->cs[bb->len++] = 0;
> > > > > 			bb->cs[bb->len++] = flex->value;
> > > > > diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
> > > > > index 0be4f489d3e12..3a75a08b6be92 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_ring_ops.c
> > > > > +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
> > > > > @@ -72,7 +72,8 @@ static int emit_user_interrupt(u32 *dw, int i)
> > > > > 
> > > > >  static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
> > > > >  {
> > > > > -	dw[i++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
> > > > > +	dw[i++] = MI_STORE_DATA_IMM | MI_SDI_GGTT |
> > > > > +		  MI_FORCE_WRITE_COMPLETION_CHECK | MI_SDI_NUM_DW(1);
> > > > > 		dw[i++] = addr;
> > > > > 		dw[i++] = 0;
> > > > > 		dw[i++] = value;
> > > > > @@ -162,7 +163,8 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
> > > > >  static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
> > > > > 					       u32 *dw, int i)
> > > > >  {
> > > > > -	dw[i++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(1);
> > > > > +	dw[i++] = MI_STORE_DATA_IMM | MI_FORCE_WRITE_COMPLETION_CHECK |
> > > > > +		  MI_SDI_NUM_QW(1);
> > > > > 		dw[i++] = lower_32_bits(addr);
> > > > > 		dw[i++] = upper_32_bits(addr);
> > > > > 		dw[i++] = lower_32_bits(value);
> > > > > --
> > > > > 2.47.1
> > > > > 
> > > 



More information about the Intel-xe mailing list