[Mesa-stable] [Mesa-dev] [PATCH 1/4] i965/gen10: Implement WaSampleOffsetIZ workaround
Rafael Antognolli
rafael.antognolli at intel.com
Wed Oct 4 16:25:57 UTC 2017
Hi Anuj,
On Mon, Oct 02, 2017 at 04:07:57PM -0700, Anuj Phogat wrote:
> WaFlushHangWhenNonPipelineStateAndMarkerStalled goes along
> with WaSampleOffsetIZ. Both recommends the same.
>
> Cc: mesa-stable at lists.freedesktop.org
> Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
> ---
> src/mesa/drivers/dri/i965/brw_context.h | 2 +
> src/mesa/drivers/dri/i965/brw_defines.h | 1 +
> src/mesa/drivers/dri/i965/brw_pipe_control.c | 54 ++++++++++++++++++++++
> src/mesa/drivers/dri/i965/gen8_multisample_state.c | 8 ++++
> 4 files changed, 65 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 92fc16de13..f0e8d562e9 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1647,6 +1647,8 @@ void brw_emit_post_sync_nonzero_flush(struct brw_context *brw);
> void brw_emit_depth_stall_flushes(struct brw_context *brw);
> void gen7_emit_vs_workaround_flush(struct brw_context *brw);
> void gen7_emit_cs_stall_flush(struct brw_context *brw);
> +void gen10_emit_wa_cs_stall_flush(struct brw_context *brw);
> +void gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw);
>
> /* brw_queryformat.c */
> void brw_query_internal_format(struct gl_context *ctx, GLenum target,
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 4abb790612..270cdf29db 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1609,6 +1609,7 @@ enum brw_pixel_shader_coverage_mask_mode {
> #define GEN7_GPGPU_DISPATCHDIMY 0x2504
> #define GEN7_GPGPU_DISPATCHDIMZ 0x2508
>
> +#define GEN7_CACHE_MODE_0 0x7000
> #define GEN7_CACHE_MODE_1 0x7004
> # define GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
> # define GEN8_HIZ_NP_PMA_FIX_ENABLE (1 << 11)
> diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> index 460b8f73b6..6326957a7a 100644
> --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
> +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> @@ -278,6 +278,60 @@ gen7_emit_cs_stall_flush(struct brw_context *brw)
> brw->workaround_bo, 0, 0);
> }
>
> +static void
> +brw_flush_write_caches(struct brw_context *brw) {
> + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CACHE_FLUSH_BITS);
> +}
> +
> +static void
> +brw_flush_read_caches(struct brw_context *brw) {
> + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CACHE_INVALIDATE_BITS);
> +}
> +
> +/**
> + * From Gen10 Workarounds page in h/w specs:
> + * WaSampleOffsetIZ:
> + * Prior to the 3DSTATE_SAMPLE_PATTERN driver must ensure there are no
> + * markers in the pipeline by programming a PIPE_CONTROL with stall.
> + */
> +void
> +gen10_emit_wa_cs_stall_flush(struct brw_context *brw)
> +{
> + const struct gen_device_info *devinfo = &brw->screen->devinfo;
> + assert(devinfo->gen == 10);
> + brw_emit_pipe_control_flush(brw,
> + PIPE_CONTROL_CS_STALL |
> + PIPE_CONTROL_STALL_AT_SCOREBOARD);
> +}
> +
> +/**
> + * From Gen10 Workarounds page in h/w specs:
> + * WaSampleOffsetIZ:
> + * When 3DSTATE_SAMPLE_PATTERN is programmed, driver must then issue an
> + * MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 0x7FFF(SVL)
> + * after the command to ensure the state has been delivered prior to any
> + * command causing a marker in the pipeline.
> + */
> +void
> +gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw)
> +{
> + const struct gen_device_info *devinfo = &brw->screen->devinfo;
> + assert(devinfo->gen == 10);
> +
> + /* Before changing the value of CACHE_MODE_0 register, GFX pipeline must
> + * be idle; i.e., full flush is required.
> + */
> + brw_flush_write_caches(brw);
> + brw_flush_read_caches(brw);
> +
> + /* Write to CACHE_MODE_0 (0x7000) */
> + BEGIN_BATCH(3);
> + OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
> + OUT_BATCH(GEN7_CACHE_MODE_0);
> + OUT_BATCH(0);
> + ADVANCE_BATCH();
> +}
> +
> /**
> * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
> * implementing two workarounds on gen6. From section 1.4.7.1
> diff --git a/src/mesa/drivers/dri/i965/gen8_multisample_state.c b/src/mesa/drivers/dri/i965/gen8_multisample_state.c
> index 7a31a5df4a..14043025b6 100644
> --- a/src/mesa/drivers/dri/i965/gen8_multisample_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_multisample_state.c
> @@ -49,6 +49,11 @@ gen8_emit_3dstate_multisample(struct brw_context *brw, unsigned num_samples)
> void
> gen8_emit_3dstate_sample_pattern(struct brw_context *brw)
> {
> + const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +
> + if (devinfo->gen == 10)
> + gen10_emit_wa_cs_stall_flush(brw);
Note there's a mention in a document that describes:
"New workaround in addition to one already mentioned in Bspec:
Insert a pipe control followed by 14 DW’s of zero before
3DSTATE_SAMPLE_PATTERN during mid batch context restore."
I know we are not doing a mid batch context restore yet, but I have a
sporadic hang in the beginning of a piglit test, and with this change,
it seems the hang takes a little longer to happen (it hangs a little
further down the batches). I don't know if that's really useful, though.
> BEGIN_BATCH(9);
> OUT_BATCH(_3DSTATE_SAMPLE_PATTERN << 16 | (9 - 2));
>
> @@ -68,4 +73,7 @@ gen8_emit_3dstate_sample_pattern(struct brw_context *brw)
> /* 1x and 2x MSAA */
> OUT_BATCH(brw_multisample_positions_1x_2x);
> ADVANCE_BATCH();
> +
> + if (devinfo->gen == 10)
> + gen10_emit_wa_lri_to_cache_mode_zero(brw);
> }
> --
> 2.13.5
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-stable
mailing list