[Mesa-dev] [PATCH] i965/gen7: Emit workaround flush when changing GS enable state.

Paul Berry stereotype441 at gmail.com
Mon Nov 4 20:10:15 PST 2013


---
 src/mesa/drivers/dri/i965/brw_context.h       | 44 +++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/gen7_blorp.cpp      |  8 +++++
 src/mesa/drivers/dri/i965/gen7_gs_state.c     | 18 +++++++++++
 src/mesa/drivers/dri/i965/gen7_urb.c          | 24 ++-------------
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 38 +++++++++++++++++++++++
 src/mesa/drivers/dri/i965/intel_batchbuffer.h |  1 +
 6 files changed, 111 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index bec4d6b..9e96e1d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -862,6 +862,44 @@ struct intel_sync_object {
    drm_intel_bo *bo;
 };
 
+
+/**
+ * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > Geometry
+ * > Geometry Shader > State:
+ *
+ *     "Note: Because of corruption in IVB:GT2, software needs to flush the
+ *     whole fixed function pipeline when the GS enable changes value in the
+ *     3DSTATE_GS."
+ *
+ * The hardware architects have clarified that in this context "flush the
+ * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
+ * Stall" bit set.
+ *
+ * This enum keeps track of the information we need to determine when a flush
+ * needs to happen.
+ */
+enum ivbgt2_gs_flush_workaround_state {
+
+   /**
+    * The pipeline has been flushed since the last 3DSTATE_GS, so no further
+    * flushing is needed right now.
+    */
+   IVBGT2_GS_FLUSH_UNNECESSARY,
+
+   /**
+    * Since the last flush, primitives have been rendered with GS enabled.  So
+    * a flush will be needed next time we want to disable the GS.
+    */
+   IVBGT2_GS_FLUSH_NEEDED_BEFORE_DISABLE,
+
+   /**
+    * Since the last flush, primitives have been rendered with GS disabled.
+    * So a flush will be needed next time we want to enable the GS.
+    */
+   IVBGT2_GS_FLUSH_NEEDED_BEFORE_ENABLE,
+};
+
+
 struct intel_batchbuffer {
    /** Current batchbuffer being queued up. */
    drm_intel_bo *bo;
@@ -871,6 +909,12 @@ struct intel_batchbuffer {
    drm_intel_bo *workaround_bo;
    bool need_workaround_flush;
 
+   /**
+    * For Ivy Bridge GT2, state machine to determine whether a flush is needed
+    * before changing GS enable.  Undefined for all other platforms.
+    */
+   enum ivbgt2_gs_flush_workaround_state ivbgt2_gs_flush_needed;
+
    struct cached_batch_item *cached_items;
 
    uint16_t emit, total;
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index 71f31b7..accb93e 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -402,6 +402,14 @@ gen7_blorp_emit_gs_disable(struct brw_context *brw,
    OUT_BATCH(0);
    ADVANCE_BATCH();
 
+   if (!brw->is_haswell && brw->gt == 2) {
+      if (brw->batch.ivbgt2_gs_flush_needed ==
+          IVBGT2_GS_FLUSH_NEEDED_BEFORE_DISABLE) {
+         gen7_emit_cs_stall_flush(brw);
+      }
+      brw->batch.ivbgt2_gs_flush_needed = IVBGT2_GS_FLUSH_NEEDED_BEFORE_ENABLE;
+   }
+
    BEGIN_BATCH(7);
    OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
    OUT_BATCH(0);
diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
index 2602200..c2f036e 100644
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -81,6 +81,15 @@ upload_gs_state(struct brw_context *brw)
    gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS);
 
    if (active) {
+      if (!brw->is_haswell && brw->gt == 2) {
+         if (brw->batch.ivbgt2_gs_flush_needed ==
+             IVBGT2_GS_FLUSH_NEEDED_BEFORE_ENABLE) {
+            gen7_emit_cs_stall_flush(brw);
+         }
+         brw->batch.ivbgt2_gs_flush_needed =
+            IVBGT2_GS_FLUSH_NEEDED_BEFORE_DISABLE;
+      }
+
       BEGIN_BATCH(7);
       OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
       OUT_BATCH(stage_state->prog_offset);
@@ -159,6 +168,15 @@ upload_gs_state(struct brw_context *brw)
       OUT_BATCH(dw6);
       ADVANCE_BATCH();
    } else {
+      if (!brw->is_haswell && brw->gt == 2) {
+         if (brw->batch.ivbgt2_gs_flush_needed ==
+             IVBGT2_GS_FLUSH_NEEDED_BEFORE_DISABLE) {
+            gen7_emit_cs_stall_flush(brw);
+         }
+         brw->batch.ivbgt2_gs_flush_needed =
+            IVBGT2_GS_FLUSH_NEEDED_BEFORE_ENABLE;
+      }
+
       BEGIN_BATCH(7);
       OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
       OUT_BATCH(0); /* prog_bo */
diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c
index 6dcdfe4..c638586 100644
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -122,28 +122,8 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
     *
     * No such restriction exists for Haswell.
     */
-   if (!brw->is_haswell) {
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
-      /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20]
-       * CS Stall):
-       *
-       *     One of the following must also be set:
-       *     - Render Target Cache Flush Enable ([12] of DW1)
-       *     - Depth Cache Flush Enable ([0] of DW1)
-       *     - Stall at Pixel Scoreboard ([1] of DW1)
-       *     - Depth Stall ([13] of DW1)
-       *     - Post-Sync Operation ([13] of DW1)
-       *
-       * We choose to do a Post-Sync Operation (Write Immediate Data), since
-       * it seems like it will incur the least additional performance penalty.
-       */
-      OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
-      OUT_RELOC(brw->batch.workaround_bo,
-                I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
+   if (!brw->is_haswell)
+      gen7_emit_cs_stall_flush(brw);
 }
 
 const struct brw_tracked_state gen7_push_constant_space = {
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 6d1ae79..66d0c96 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -194,6 +194,11 @@ brw_new_batch(struct brw_context *brw)
     */
    brw->batch.need_workaround_flush = true;
 
+   /* The kernel flushes the pipeline between batches, so no GS workaround
+    * flush is necessary.
+    */
+   brw->batch.ivbgt2_gs_flush_needed = IVBGT2_GS_FLUSH_UNNECESSARY;
+
    brw->state_batch_count = 0;
 
    brw->ib.type = -1;
@@ -510,6 +515,37 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw)
    ADVANCE_BATCH();
 }
 
+
+/**
+ * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set.
+ */
+void
+gen7_emit_cs_stall_flush(struct brw_context *brw)
+{
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
+   /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20]
+    * CS Stall):
+    *
+    *     One of the following must also be set:
+    *     - Render Target Cache Flush Enable ([12] of DW1)
+    *     - Depth Cache Flush Enable ([0] of DW1)
+    *     - Stall at Pixel Scoreboard ([1] of DW1)
+    *     - Depth Stall ([13] of DW1)
+    *     - Post-Sync Operation ([13] of DW1)
+    *
+    * We choose to do a Post-Sync Operation (Write Immediate Data), since
+    * it seems like it will incur the least additional performance penalty.
+    */
+   OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
+   OUT_RELOC(brw->batch.workaround_bo,
+             I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   brw->batch.ivbgt2_gs_flush_needed = IVBGT2_GS_FLUSH_UNNECESSARY;
+}
+
+
 /**
  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
  * implementing two workarounds on gen6.  From section 1.4.7.1
@@ -560,6 +596,7 @@ intel_emit_post_sync_nonzero_flush(struct brw_context *brw)
    OUT_BATCH(0); /* address */
    OUT_BATCH(0); /* write data */
    ADVANCE_BATCH();
+   brw->batch.ivbgt2_gs_flush_needed = IVBGT2_GS_FLUSH_UNNECESSARY;
 
    BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
@@ -612,6 +649,7 @@ intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
 	 OUT_BATCH(0); /* write address */
 	 OUT_BATCH(0); /* write data */
 	 ADVANCE_BATCH();
+         brw->batch.ivbgt2_gs_flush_needed = IVBGT2_GS_FLUSH_UNNECESSARY;
       }
    } else {
       BEGIN_BATCH(4);
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index d46f48e..cabbb69 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -59,6 +59,7 @@ void intel_batchbuffer_emit_mi_flush(struct brw_context *brw);
 void intel_emit_post_sync_nonzero_flush(struct brw_context *brw);
 void intel_emit_depth_stall_flushes(struct brw_context *brw);
 void gen7_emit_vs_workaround_flush(struct brw_context *brw);
+void gen7_emit_cs_stall_flush(struct brw_context *brw);
 
 static INLINE uint32_t float_as_int(float f)
 {
-- 
1.8.4.2



More information about the mesa-dev mailing list