[Mesa-dev] [PATCH 4/4] i965/gen6: Apply documented workaround for nonpipelined state packets.

Eric Anholt eric at anholt.net
Fri Jun 17 18:46:23 PDT 2011


Fixes a 100% reproducible GPU hang in topogun-1.06-orc-84k.trace.
---
 src/mesa/drivers/dri/i965/brw_misc_state.c     |   23 +++++++++++++++++++++++
 src/mesa/drivers/dri/intel/intel_batchbuffer.c |   21 ++++++++++++++++++++-
 src/mesa/drivers/dri/intel/intel_batchbuffer.h |    1 +
 3 files changed, 44 insertions(+), 1 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 1f3b64f..85dae28 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -219,6 +219,12 @@ static void emit_depthbuffer(struct brw_context *brw)
    struct intel_region *hiz_region = depth_irb ? depth_irb->hiz_region : NULL;
    unsigned int len;
 
+   /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
+    * non-pipelined state that will need the PIPE_CONTROL workaround.
+    */
+   if (intel->gen == 6)
+      intel_emit_post_sync_nonzero_flush(intel);
+
    /*
     * If either depth or stencil buffer has packed depth/stencil format,
     * then don't use separate stencil. Emit only a depth buffer.
@@ -408,6 +414,9 @@ static void emit_depthbuffer(struct brw_context *brw)
     *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
     */
    if (intel->gen >= 6 || hiz_region) {
+      if (intel->gen == 6)
+	 intel_emit_post_sync_nonzero_flush(intel);
+
       BEGIN_BATCH(2);
       OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
       OUT_BATCH(0);
@@ -523,6 +532,9 @@ static void upload_aa_line_parameters(struct brw_context *brw)
    if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
       return;
 
+   if (intel->gen == 6)
+      intel_emit_post_sync_nonzero_flush(intel);
+
    OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
    /* use legacy aa line coverage computation */
    OUT_BATCH(0);
@@ -553,6 +565,9 @@ static void upload_line_stipple(struct brw_context *brw)
    if (!ctx->Line.StippleFlag)
       return;
 
+   if (intel->gen == 6)
+      intel_emit_post_sync_nonzero_flush(intel);
+
    BEGIN_BATCH(3);
    OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
    OUT_BATCH(ctx->Line.StipplePattern);
@@ -580,6 +595,10 @@ static void upload_invarient_state( struct brw_context *brw )
 {
    struct intel_context *intel = &brw->intel;
 
+   /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
+   if (intel->gen == 6)
+      intel_emit_post_sync_nonzero_flush(intel);
+
    {
       /* 0x61040000  Pipeline Select */
       /*     PipelineSelect            : 0 */
@@ -643,6 +662,7 @@ static void upload_invarient_state( struct brw_context *brw )
       sip.header.length = 0;
       sip.bits0.pad = 0;
       sip.bits0.system_instruction_pointer = 0;
+
       BRW_BATCH_STRUCT(brw, &sip);
    }
 
@@ -683,6 +703,9 @@ static void upload_state_base_address( struct brw_context *brw )
    struct intel_context *intel = &brw->intel;
 
    if (intel->gen >= 6) {
+      if (intel->gen == 6)
+	 intel_emit_post_sync_nonzero_flush(intel);
+
        BEGIN_BATCH(10);
        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
        /* General state base address: stateless DP read/write requests */
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 77563ae..13dd855 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -293,7 +293,26 @@ emit:
    item->header = intel->batch.emit;
 }
 
-static void
+/**
+ * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
+ * implementing two workarounds on gen6:
+ *
+ * [DevSNB-C+{W/A}] Before any depth stall flush (including those
+ * produced by non-pipelined state commands), software needs to first
+ * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
+ * 0.
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
+ * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
+ *
+ * XXX: There is also a workaround that would appear to apply to this
+ * workaround, but it doesn't appear to be necessary so far:
+ *
+ * Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * BEFORE the pipe-control with a post-sync op and no write-cache
+ * flushes.
+ */
+void
 intel_emit_post_sync_nonzero_flush(struct intel_context *intel)
 {
    if (!intel->batch.need_workaround_flush)
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
index 3ed88d0..fb4134d 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@@ -39,6 +39,7 @@ GLboolean intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel,
 					      uint32_t write_domain,
 					      uint32_t offset);
 void intel_batchbuffer_emit_mi_flush(struct intel_context *intel);
+void intel_emit_post_sync_nonzero_flush(struct intel_context *intel);
 
 static INLINE uint32_t float_as_int(float f)
 {
-- 
1.7.5.3



More information about the mesa-dev mailing list