[Mesa-stable] [Mesa-dev] [PATCH 1/4] i965/gen10: Implement WaSampleOffsetIZ workaround

Rafael Antognolli rafael.antognolli at intel.com
Wed Oct 4 16:29:19 UTC 2017


On Mon, Oct 02, 2017 at 07:39:04PM -0700, Jason Ekstrand wrote:
> On Mon, Oct 2, 2017 at 4:07 PM, Anuj Phogat <anuj.phogat at gmail.com> wrote:
> 
>     WaFlushHangWhenNonPipelineStateAndMarkerStalled goes along
>     with WaSampleOffsetIZ. Both recommends the same.
> 
>     Cc: mesa-stable at lists.freedesktop.org
>     Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
>     ---
>      src/mesa/drivers/dri/i965/brw_context.h            |  2 +
>      src/mesa/drivers/dri/i965/brw_defines.h            |  1 +
>      src/mesa/drivers/dri/i965/brw_pipe_control.c       | 54
>     ++++++++++++++++++++++
>      src/mesa/drivers/dri/i965/gen8_multisample_state.c |  8 ++++
>      4 files changed, 65 insertions(+)
> 
>     diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri
>     /i965/brw_context.h
>     index 92fc16de13..f0e8d562e9 100644
>     --- a/src/mesa/drivers/dri/i965/brw_context.h
>     +++ b/src/mesa/drivers/dri/i965/brw_context.h
>     @@ -1647,6 +1647,8 @@ void brw_emit_post_sync_nonzero_flush(struct
>     brw_context *brw);
>      void brw_emit_depth_stall_flushes(struct brw_context *brw);
>      void gen7_emit_vs_workaround_flush(struct brw_context *brw);
>      void gen7_emit_cs_stall_flush(struct brw_context *brw);
>     +void gen10_emit_wa_cs_stall_flush(struct brw_context *brw);
>     +void gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw);
> 
>      /* brw_queryformat.c */
>      void brw_query_internal_format(struct gl_context *ctx, GLenum target,
>     diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri
>     /i965/brw_defines.h
>     index 4abb790612..270cdf29db 100644
>     --- a/src/mesa/drivers/dri/i965/brw_defines.h
>     +++ b/src/mesa/drivers/dri/i965/brw_defines.h
>     @@ -1609,6 +1609,7 @@ enum brw_pixel_shader_coverage_mask_mode {
>      #define GEN7_GPGPU_DISPATCHDIMY         0x2504
>      #define GEN7_GPGPU_DISPATCHDIMZ         0x2508
> 
>     +#define GEN7_CACHE_MODE_0               0x7000
>      #define GEN7_CACHE_MODE_1               0x7004
>      # define GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
>      # define GEN8_HIZ_NP_PMA_FIX_ENABLE        (1 << 11)
>     diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/
>     drivers/dri/i965/brw_pipe_control.c
>     index 460b8f73b6..6326957a7a 100644
>     --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
>     +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
>     @@ -278,6 +278,60 @@ gen7_emit_cs_stall_flush(struct brw_context *brw)
>                                     brw->workaround_bo, 0, 0);
>      }
> 
>     +static void
>     +brw_flush_write_caches(struct brw_context *brw) {
>     +   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CACHE_FLUSH_BITS);
>     +}
>     +
>     +static void
>     +brw_flush_read_caches(struct brw_context *brw) {
>     +   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CACHE_INVALIDATE_BITS);
>     +}
>     +
>     +/**
>     + * From Gen10 Workarounds page in h/w specs:
>     + * WaSampleOffsetIZ:
>     + * Prior to the 3DSTATE_SAMPLE_PATTERN driver must ensure there are no
>     + * markers in the pipeline by programming a PIPE_CONTROL with stall.
>     + */
>     +void
>     +gen10_emit_wa_cs_stall_flush(struct brw_context *brw)
>     +{
>     +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
>     +   assert(devinfo->gen == 10);
>     +   brw_emit_pipe_control_flush(brw,
>     +                               PIPE_CONTROL_CS_STALL |
>     +                               PIPE_CONTROL_STALL_AT_SCOREBOARD);
>     +}
>     +
>     +/**
>     + * From Gen10 Workarounds page in h/w specs:
>     + * WaSampleOffsetIZ:
>     + * When 3DSTATE_SAMPLE_PATTERN is programmed, driver must then issue an
>     + * MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 0x7FFF
>     (SVL)
>     + * after the command to ensure the state has been delivered prior to any
>     + * command causing a marker in the pipeline.
>     + */
>     +void
>     +gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw)
>     +{
>     +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
>     +   assert(devinfo->gen == 10);
>     +
>     +   /* Before changing the value of CACHE_MODE_0 register, GFX pipeline
>     must
>     +    * be idle; i.e., full flush is required.
>     +    */
>     +   brw_flush_write_caches(brw);
>     +   brw_flush_read_caches(brw);
> 
> 
> If you do
> 
> brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CACHE_FLUSH_BITS |
>                                      PIPE_CONTROL_CACHE_INVALIDATE_BITS)
> 
> It will automatically do both and insert a stall between them.  What you have
> above, I don't think will actually every CS stall which appears to be required
> when changing CACHE_MODE_0
>  
> 
>     +
>     +   /* Write to CACHE_MODE_0 (0x7000) */
>     +   BEGIN_BATCH(3);
>     +   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
>     +   OUT_BATCH(GEN7_CACHE_MODE_0);
>     +   OUT_BATCH(0);
> 
> 
> I believe this won't actually change the register which is good.  But is that
> sufficient for the workaround?

The BSpec is not clear about why we need this, but there's a bug that
describes it as:

"When 3DSTATE_SAMPLE_PATTERN is programed, driver must then issue an
MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 0x7FFF(SVL) after
the command to ensure the state has been delivered prior to any command
causing a marker in the pipeline."

So, maybe the syncing property of this command is more important than what
we are loading into the register?

>     +   ADVANCE_BATCH();
>     +}
>     +
>      /**
>       * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
>       * implementing two workarounds on gen6.  From section 1.4.7.1
>     diff --git a/src/mesa/drivers/dri/i965/gen8_multisample_state.c b/src/mesa/
>     drivers/dri/i965/gen8_multisample_state.c
>     index 7a31a5df4a..14043025b6 100644
>     --- a/src/mesa/drivers/dri/i965/gen8_multisample_state.c
>     +++ b/src/mesa/drivers/dri/i965/gen8_multisample_state.c
>     @@ -49,6 +49,11 @@ gen8_emit_3dstate_multisample(struct brw_context *brw,
>     unsigned num_samples)
>      void
>      gen8_emit_3dstate_sample_pattern(struct brw_context *brw)
>      {
>     +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
>     +
>     +   if (devinfo->gen == 10)
>     +      gen10_emit_wa_cs_stall_flush(brw);
>     +
>         BEGIN_BATCH(9);
>         OUT_BATCH(_3DSTATE_SAMPLE_PATTERN << 16 | (9 - 2));
> 
>     @@ -68,4 +73,7 @@ gen8_emit_3dstate_sample_pattern(struct brw_context *brw)
>         /* 1x and 2x MSAA */
>         OUT_BATCH(brw_multisample_positions_1x_2x);
>         ADVANCE_BATCH();
>     +
>     +   if (devinfo->gen == 10)
>     +      gen10_emit_wa_lri_to_cache_mode_zero(brw);
>      }
>     --
>     2.13.5
> 
>     _______________________________________________
>     mesa-dev mailing list
>     mesa-dev at lists.freedesktop.org
>     https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
> 

> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev



More information about the mesa-stable mailing list