[PATCH] Revert "drm/xe: Force write completion of MI_STORE_DATA_IMM"

Souza, Jose jose.souza at intel.com
Fri Dec 27 17:47:09 UTC 2024


On Fri, 2024-12-27 at 09:26 -0800, Dixit, Ashutosh wrote:
> On Fri, 27 Dec 2024 06:24:22 -0800, Souza, Jose wrote:
> > 
> > On Thu, 2024-12-26 at 19:31 -0800, Dixit, Ashutosh wrote:
> > > On Thu, 26 Dec 2024 08:23:10 -0800, José Roberto de Souza wrote:
> > > > 
> > > > This reverts commit 1460bb1fef9ccf7390af0d74a15252442fd6effd.
> > > > 
> > > > In all places the MI_STORE_DATA_IMM are not followed by a read of
> > > > the same memory address in the same batch buffer and the posted writes
> > > > are flushed with PIPE_CONTROL or MI_FLUSH_DW in xe_ring_ops.c functions
> > > > so there is no need to set this register.
> > > 
> > > As I was saying this commit message should just be:
> > > 
> > > In all places where MI_STORE_DATA_IMM is used, posted writes are flushed
> > > with PIPE_CONTROL or MI_FLUSH_DW in xe_ring_ops.c functions, so there is no
> > > need to set this register.
> > > 
> > > So according to me the "are not followed by a read of the same memory
> > > address in the same batch buffer" is incorrect, it will work even without
> > > setting the flag.
> > 
> > Not on Xe2+, with CS_PRIORITY_MEM_READ set there is a priority path to
> > memory for reads and other for writes.
> 
> OK, in that case please go ahead and merge.

thank you, will send this again as this version was not picked by patchwork or CI because freedesktop server ran out of disk space.

> 
> > 
> > > 
> > > Anyway apart from that nit, this is:
> > > 
> > > Reviewed-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
> > > 
> > > > 
> > > > Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > > > Cc: Ashutosh Dixit <ashutosh.dixit at intel.com>
> > > > Fixes: 1460bb1fef9c ("drm/xe: Force write completion of MI_STORE_DATA_IMM")
> > > > Signed-off-by: José Roberto de Souza <jose.souza at intel.com>
> > > > ---
> > > >  drivers/gpu/drm/xe/instructions/xe_mi_commands.h | 13 ++++++-------
> > > >  drivers/gpu/drm/xe/xe_migrate.c                  | 11 +++--------
> > > >  drivers/gpu/drm/xe/xe_ring_ops.c                 |  6 ++----
> > > >  3 files changed, 11 insertions(+), 19 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> > > > index f4ee910f09432..10ec2920d31b3 100644
> > > > --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> > > > +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> > > > @@ -33,13 +33,12 @@
> > > >  #define MI_TOPOLOGY_FILTER		__MI_INSTR(0xD)
> > > >  #define MI_FORCE_WAKEUP			__MI_INSTR(0x1D)
> > > > 
> > > > -#define MI_STORE_DATA_IMM			__MI_INSTR(0x20)
> > > > -#define   MI_SDI_GGTT				REG_BIT(22)
> > > > -#define   MI_FORCE_WRITE_COMPLETION_CHECK	REG_BIT(10)
> > > > -#define   MI_SDI_LEN_DW				GENMASK(9, 0)
> > > > -#define   MI_SDI_NUM_DW(x)			REG_FIELD_PREP(MI_SDI_LEN_DW, (x) + 3 - 2)
> > > > -#define   MI_SDI_NUM_QW(x)			(REG_FIELD_PREP(MI_SDI_LEN_DW, 2 * (x) + 3 - 2) | \
> > > > -						 REG_BIT(21))
> > > > +#define MI_STORE_DATA_IMM		__MI_INSTR(0x20)
> > > > +#define   MI_SDI_GGTT			REG_BIT(22)
> > > > +#define   MI_SDI_LEN_DW			GENMASK(9, 0)
> > > > +#define   MI_SDI_NUM_DW(x)		REG_FIELD_PREP(MI_SDI_LEN_DW, (x) + 3 - 2)
> > > > +#define   MI_SDI_NUM_QW(x)		(REG_FIELD_PREP(MI_SDI_LEN_DW, 2 * (x) + 3 - 2) | \
> > > > +					 REG_BIT(21))
> > > > 
> > > >  #define MI_LOAD_REGISTER_IMM		__MI_INSTR(0x22)
> > > >  #define   MI_LRI_LRM_CS_MMIO		REG_BIT(19)
> > > > diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> > > > index 8b32fad678782..1b97d90aaddaf 100644
> > > > --- a/drivers/gpu/drm/xe/xe_migrate.c
> > > > +++ b/drivers/gpu/drm/xe/xe_migrate.c
> > > > @@ -581,9 +581,7 @@ static void emit_pte(struct xe_migrate *m,
> > > > 	while (ptes) {
> > > > 		u32 chunk = min(MAX_PTE_PER_SDI, ptes);
> > > > 
> > > > -		bb->cs[bb->len++] = MI_STORE_DATA_IMM |
> > > > -				    MI_FORCE_WRITE_COMPLETION_CHECK |
> > > > -				    MI_SDI_NUM_QW(chunk);
> > > > +		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
> > > > 		bb->cs[bb->len++] = ofs;
> > > > 		bb->cs[bb->len++] = 0;
> > > > 
> > > > @@ -1225,9 +1223,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
> > > > 		if (!(bb->len & 1))
> > > > 			bb->cs[bb->len++] = MI_NOOP;
> > > > 
> > > > -		bb->cs[bb->len++] = MI_STORE_DATA_IMM |
> > > > -				    MI_FORCE_WRITE_COMPLETION_CHECK |
> > > > -				    MI_SDI_NUM_QW(chunk);
> > > > +		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
> > > > 		bb->cs[bb->len++] = lower_32_bits(addr);
> > > > 		bb->cs[bb->len++] = upper_32_bits(addr);
> > > > 		if (pt_op->bind)
> > > > @@ -1392,8 +1388,7 @@ __xe_migrate_update_pgtables(struct xe_migrate *m,
> > > > 			u32 idx = 0;
> > > > 
> > > > 			bb->cs[bb->len++] = MI_STORE_DATA_IMM |
> > > > -					    MI_FORCE_WRITE_COMPLETION_CHECK |
> > > > -					    MI_SDI_NUM_QW(chunk);
> > > > +				MI_SDI_NUM_QW(chunk);
> > > > 			bb->cs[bb->len++] = ofs;
> > > > 			bb->cs[bb->len++] = 0; /* upper_32_bits */
> > > > 
> > > > diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
> > > > index c8ab37fa0d198..9f327f27c0726 100644
> > > > --- a/drivers/gpu/drm/xe/xe_ring_ops.c
> > > > +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
> > > > @@ -72,8 +72,7 @@ static int emit_user_interrupt(u32 *dw, int i)
> > > > 
> > > >  static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
> > > >  {
> > > > -	dw[i++] = MI_STORE_DATA_IMM | MI_SDI_GGTT |
> > > > -		  MI_FORCE_WRITE_COMPLETION_CHECK | MI_SDI_NUM_DW(1);
> > > > +	dw[i++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
> > > > 	dw[i++] = addr;
> > > > 	dw[i++] = 0;
> > > > 	dw[i++] = value;
> > > > @@ -163,8 +162,7 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
> > > >  static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
> > > > 				       u32 *dw, int i)
> > > >  {
> > > > -	dw[i++] = MI_STORE_DATA_IMM | MI_FORCE_WRITE_COMPLETION_CHECK |
> > > > -		  MI_SDI_NUM_QW(1);
> > > > +	dw[i++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(1);
> > > > 	dw[i++] = lower_32_bits(addr);
> > > > 	dw[i++] = upper_32_bits(addr);
> > > > 	dw[i++] = lower_32_bits(value);
> > > > --
> > > > 2.47.1
> > > > 
> > 



More information about the Intel-xe mailing list