[Mesa-dev] [PATCH 4/4] i965/gen6: Apply documented workaround for nonpipelined state packets.

Jose Fonseca jfonseca at vmware.com
Mon Jun 20 10:59:43 PDT 2011


With the way the corruption looked like, I would never imagine a GPU hang.  I must have been a nasty issue to debug. 

I saw this issue with a netbook I have at home but I didn't had opportunity to verify with your fix it yet.  I often use this trace for sanity checking of changes to glretrace, so thanks for fixing it.

Jose

----- Original Message -----
> Fixes a 100% reproducible GPU hang in topogun-1.06-orc-84k.trace.
> ---
>  src/mesa/drivers/dri/i965/brw_misc_state.c     |   23
>  +++++++++++++++++++++++
>  src/mesa/drivers/dri/intel/intel_batchbuffer.c |   21
>  ++++++++++++++++++++-
>  src/mesa/drivers/dri/intel/intel_batchbuffer.h |    1 +
>  3 files changed, 44 insertions(+), 1 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c
> b/src/mesa/drivers/dri/i965/brw_misc_state.c
> index 1f3b64f..85dae28 100644
> --- a/src/mesa/drivers/dri/i965/brw_misc_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
> @@ -219,6 +219,12 @@ static void emit_depthbuffer(struct brw_context
> *brw)
>     struct intel_region *hiz_region = depth_irb ?
>     depth_irb->hiz_region : NULL;
>     unsigned int len;
>  
> +   /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
> +    * non-pipelined state that will need the PIPE_CONTROL
> workaround.
> +    */
> +   if (intel->gen == 6)
> +      intel_emit_post_sync_nonzero_flush(intel);
> +
>     /*
>      * If either depth or stencil buffer has packed depth/stencil
>      format,
>      * then don't use separate stencil. Emit only a depth buffer.
> @@ -408,6 +414,9 @@ static void emit_depthbuffer(struct brw_context
> *brw)
>      *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
>      */
>     if (intel->gen >= 6 || hiz_region) {
> +      if (intel->gen == 6)
> +	 intel_emit_post_sync_nonzero_flush(intel);
> +
>        BEGIN_BATCH(2);
>        OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
>        OUT_BATCH(0);
> @@ -523,6 +532,9 @@ static void upload_aa_line_parameters(struct
> brw_context *brw)
>     if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
>        return;
>  
> +   if (intel->gen == 6)
> +      intel_emit_post_sync_nonzero_flush(intel);
> +
>     OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
>     /* use legacy aa line coverage computation */
>     OUT_BATCH(0);
> @@ -553,6 +565,9 @@ static void upload_line_stipple(struct
> brw_context *brw)
>     if (!ctx->Line.StippleFlag)
>        return;
>  
> +   if (intel->gen == 6)
> +      intel_emit_post_sync_nonzero_flush(intel);
> +
>     BEGIN_BATCH(3);
>     OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
>     OUT_BATCH(ctx->Line.StipplePattern);
> @@ -580,6 +595,10 @@ static void upload_invarient_state( struct
> brw_context *brw )
>  {
>     struct intel_context *intel = &brw->intel;
>  
> +   /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
> +   if (intel->gen == 6)
> +      intel_emit_post_sync_nonzero_flush(intel);
> +
>     {
>        /* 0x61040000  Pipeline Select */
>        /*     PipelineSelect            : 0 */
> @@ -643,6 +662,7 @@ static void upload_invarient_state( struct
> brw_context *brw )
>        sip.header.length = 0;
>        sip.bits0.pad = 0;
>        sip.bits0.system_instruction_pointer = 0;
> +
>        BRW_BATCH_STRUCT(brw, &sip);
>     }
>  
> @@ -683,6 +703,9 @@ static void upload_state_base_address( struct
> brw_context *brw )
>     struct intel_context *intel = &brw->intel;
>  
>     if (intel->gen >= 6) {
> +      if (intel->gen == 6)
> +	 intel_emit_post_sync_nonzero_flush(intel);
> +
>         BEGIN_BATCH(10);
>         OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
>         /* General state base address: stateless DP read/write
>         requests */
> diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
> b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
> index 77563ae..13dd855 100644
> --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
> @@ -293,7 +293,26 @@ emit:
>     item->header = intel->batch.emit;
>  }
>  
> -static void
> +/**
> + * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
> + * implementing two workarounds on gen6:
> + *
> + * [DevSNB-C+{W/A}] Before any depth stall flush (including those
> + * produced by non-pipelined state commands), software needs to
> first
> + * send a PIPE_CONTROL with no bits set except Post-Sync Operation
> !=
> + * 0.
> + *
> + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
> Enable
> + * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
> + *
> + * XXX: There is also a workaround that would appear to apply to
> this
> + * workaround, but it doesn't appear to be necessary so far:
> + *
> + * Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
> + * BEFORE the pipe-control with a post-sync op and no write-cache
> + * flushes.
> + */
> +void
>  intel_emit_post_sync_nonzero_flush(struct intel_context *intel)
>  {
>     if (!intel->batch.need_workaround_flush)
> diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
> b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
> index 3ed88d0..fb4134d 100644
> --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
> +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
> @@ -39,6 +39,7 @@ GLboolean
> intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel,
>  					      uint32_t write_domain,
>  					      uint32_t offset);
>  void intel_batchbuffer_emit_mi_flush(struct intel_context *intel);
> +void intel_emit_post_sync_nonzero_flush(struct intel_context
> *intel);
>  
>  static INLINE uint32_t float_as_int(float f)
>  {
> --
> 1.7.5.3
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 


More information about the mesa-dev mailing list