[Intel-xe] [PATCH] drm/xe/xe2: Add workaround 16020292621

Thu Nov 23 13:44:54 UTC 2023

> -----Original Message-----
> From: Roper, Matthew D <matthew.d.roper at intel.com>
> Sent: Wednesday, November 22, 2023 11:39 PM
> To: Upadhyay, Tejas <tejas.upadhyay at intel.com>
> Cc: intel-xe at lists.freedesktop.org
> Subject: Re: [Intel-xe] [PATCH] drm/xe/xe2: Add workaround 16020292621
> 
> On Mon, Nov 20, 2023 at 04:16:45PM +0530, Tejas Upadhyay wrote:
> > Workaround applies to Graphics 20.00 as part of ring submission
> 
> The Xe2_LPG graphics IP used by LNL is version 20.04, not 20.00.  And
> according to the WA database, there should be a stepping check as well since
> this only applies to A-step hardware.
> 
> >
> > Signed-off-by: Tejas Upadhyay <tejas.upadhyay at intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_ring_ops.c   | 22 ++++++++++++++++++++++
> >  drivers/gpu/drm/xe/xe_wa_oob.rules |  1 +
> >  2 files changed, 23 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c
> > b/drivers/gpu/drm/xe/xe_ring_ops.c
> > index 59e0aa2d6a4c..42eff69dff9e 100644
> > --- a/drivers/gpu/drm/xe/xe_ring_ops.c
> > +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
> > @@ -184,6 +184,22 @@ static int emit_render_cache_flush(struct
> xe_sched_job *job, u32 *dw, int i)
> >  	return i;
> >  }
> >
> > +static int emit_stalling_pipe_control(struct xe_sched_job *job, u32
> > +*dw, int i) {
> > +	struct xe_gt *gt = job->q->gt;
> > +
> > +	if (XE_WA(gt, 16020292621)) {
> > +		dw[i++] = GFX_OP_PIPE_CONTROL(6);
> > +		dw[i++] = PIPE_CONTROL_CS_STALL;
> 
> The workaround asks for a post sync LRI to NOPID which seems to be missing
> from the PIPE_CONTROL generated here.

Looks like for covering both Graphics 20.00 as well 20.04 WA, need to do something like this below :

+static int emit_pipe_control(struct xe_sched_job *job, u32 *dw, int i)
+{
+       struct xe_gt *gt = job->q->gt;
+
+       if (XE_WA(gt, 16020292621)) {
+               struct xe_device *xe = gt_to_xe(gt);
+
+               dw[i++] = GFX_OP_PIPE_CONTROL(6);
+               dw[i++] = GRAPHICS_VER(xe) == 20 ? PIPE_CONTROL_CS_STALL :
+                         PIPE_CONTROL_LRI_POST_SYNC;
+               dw[i++] = GRAPHICS_VER(xe) == 20 ? 0 : RCS_NOPID.addr;
+               dw[i++] = 0;
+               dw[i++] = 0;
+               dw[i++] = 0;
+       }
+
+       return i;
+}
+
 static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
                              int i)
 {
@@ -342,6 +361,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,

        i = emit_user_interrupt(dw, i);

+       i = emit_pipe_control(job, dw, i);
+
        xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);

        xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));

diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 752842d734be..6ce3a8aa900b 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -20,3 +20,4 @@
 16017236439    PLATFORM(PVC)
 22010954014    PLATFORM(DG2)
 14019821291    MEDIA_VERSION_RANGE(1300, 2000)
+16020292621    GRAPHICS_VERSION_RANGE(2000, 2004), ENGINE_CLASS(RENDER)

Agree? Or I am missing something.

Tejas
> 
> > +		dw[i++] = 0;
> > +		dw[i++] = 0;
> > +		dw[i++] = 0;
> > +		dw[i++] = 0;
> > +	}
> > +
> > +	return i;
> > +}
> > +
> >  static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32
> *dw,
> >  			      int i)
> >  {
> > @@ -294,6 +310,8 @@ static void __emit_job_gen12_video(struct
> > xe_sched_job *job, struct xe_lrc *lrc,
> >
> >  	i = emit_user_interrupt(dw, i);
> >
> > +	i = emit_stalling_pipe_control(job, dw, i);
> 
> Why are we doing this for video?  I don't see anything asking us to on this
> platform.
> 
> > +
> >  	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
> >
> >  	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); @@ -342,6 +360,8 @@
> > static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
> >
> >  	i = emit_user_interrupt(dw, i);
> >
> > +	i = emit_stalling_pipe_control(job, dw, i);
> 
> If we're not checking the engine type here, we should do so inside the
> function so that we don't do this on CCS engines.
> 
> > +
> >  	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
> >
> >  	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); @@ -374,6 +394,8 @@
> > static void emit_migration_job_gen12(struct xe_sched_job *job,
> >
> >  	i = emit_user_interrupt(dw, i);
> >
> > +	i = emit_stalling_pipe_control(job, dw, i);
> 
> Same here; I don't see anything asking us to do this for BCS engines.
> 
> 
> Matt
> 
> > +
> >  	xe_gt_assert(job->q->gt, i <= MAX_JOB_SIZE_DW);
> >
> >  	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); diff --git
> > a/drivers/gpu/drm/xe/xe_wa_oob.rules
> > b/drivers/gpu/drm/xe/xe_wa_oob.rules
> > index 752842d734be..0b9d29f8897c 100644
> > --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
> > +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
> > @@ -20,3 +20,4 @@
> >  16017236439	PLATFORM(PVC)
> >  22010954014	PLATFORM(DG2)
> >  14019821291	MEDIA_VERSION_RANGE(1300, 2000)
> > +16020292621	GRAPHICS_VERSION(2000)
> > --
> > 2.25.1
> >
> 
> --
> Matt Roper
> Graphics Software Engineer
> Linux GPU Platform Enablement
> Intel Corporation