[Mesa-dev] [PATCH] i965: Do Sandybridge workaround flushes before each primitive.

Kenneth Graunke kenneth at whitecape.org
Fri Jan 9 23:07:56 PST 2015


Sandybridge requires the post-sync non-zero workaround in a ton of
places, and if you ever miss one, the GPU usually hangs.

Currently, we try to track exactly when a workaround flush is
necessary (via the brw->batch.need_workaround_flush flag).  This is
tricky to get right, and we've botched it several times in the past.

This patch unconditionally performs the post-sync non-zero flush at the
start of each primitive's state upload (including BLORP).  We drop the
needs_workaround_flush flag, and drop all the other callers, as the
flush has already been performed.

We have no data to indicate that simply flushing all the time will
hurt performance, and it has the potential to help stability.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_context.h            |  1 -
 src/mesa/drivers/dri/i965/brw_draw.c               |  3 ---
 src/mesa/drivers/dri/i965/brw_misc_state.c         | 27 -------------------
 src/mesa/drivers/dri/i965/brw_queryobj.c           |  4 ---
 src/mesa/drivers/dri/i965/brw_state_upload.c       |  4 +++
 src/mesa/drivers/dri/i965/gen6_blorp.cpp           | 31 ++++++++--------------
 src/mesa/drivers/dri/i965/gen6_depth_state.c       |  7 -----
 src/mesa/drivers/dri/i965/gen6_multisample_state.c |  3 ---
 src/mesa/drivers/dri/i965/gen6_sol.c               |  3 ---
 src/mesa/drivers/dri/i965/gen6_vs_state.c          |  6 ++---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c      | 12 ---------
 11 files changed, 17 insertions(+), 84 deletions(-)

I thought I sent this out a while back, but I couldn't find it on the list.

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index c1787a8..d0b5828 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -856,7 +856,6 @@ struct intel_batchbuffer {
    drm_intel_bo *last_bo;
    /** BO for post-sync nonzero writes for gen6 workaround. */
    drm_intel_bo *workaround_bo;
-   bool need_workaround_flush;
 
    uint16_t emit, total;
    uint16_t used, reserved_space;
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index b7e42cb..2d3de45 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -276,9 +276,6 @@ static void brw_emit_prim(struct brw_context *brw,
    OUT_BATCH(base_vertex_location);
    ADVANCE_BATCH();
 
-   /* Only used on Sandybridge; harmless to set elsewhere. */
-   brw->batch.need_workaround_flush = true;
-
    if (brw->always_flush_cache) {
       intel_batchbuffer_emit_mi_flush(brw);
    }
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index a405eb2..190cdf7 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -47,10 +47,6 @@ static void upload_drawing_rect(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
 
-   /* 3DSTATE_DRAWING_RECTANGLE is non-pipelined. */
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
    BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
    OUT_BATCH(0); /* xmin, ymin */
@@ -581,7 +577,6 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
     * non-pipelined state that will need the PIPE_CONTROL workaround.
     */
    if (brw->gen == 6) {
-      intel_emit_post_sync_nonzero_flush(brw);
       intel_emit_depth_stall_flushes(brw);
    }
 
@@ -685,9 +680,6 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
     *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
     */
    if (brw->gen >= 6 || hiz) {
-      if (brw->gen == 6)
-	 intel_emit_post_sync_nonzero_flush(brw);
-
       BEGIN_BATCH(2);
       OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
 		GEN5_DEPTH_CLEAR_VALID |
@@ -720,9 +712,6 @@ static void upload_polygon_stipple(struct brw_context *brw)
    if (!ctx->Polygon.StippleFlag)
       return;
 
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
    BEGIN_BATCH(33);
    OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
 
@@ -766,9 +755,6 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
    if (!ctx->Polygon.StippleFlag)
       return;
 
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
 
@@ -810,9 +796,6 @@ static void upload_aa_line_parameters(struct brw_context *brw)
    if (brw->gen == 4 && !brw->is_g4x)
       return;
 
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
    BEGIN_BATCH(3);
    OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
    /* use legacy aa line coverage computation */
@@ -842,9 +825,6 @@ static void upload_line_stipple(struct brw_context *brw)
    if (!ctx->Line.StippleFlag)
       return;
 
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
    BEGIN_BATCH(3);
    OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
    OUT_BATCH(ctx->Line.StipplePattern);
@@ -883,10 +863,6 @@ brw_upload_invariant_state(struct brw_context *brw)
 {
    const bool is_965 = brw->gen == 4 && !brw->is_g4x;
 
-   /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
    /* Select the 3D pipeline (as opposed to media) */
    const uint32_t _3DSTATE_PIPELINE_SELECT =
       is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45;
@@ -954,9 +930,6 @@ static void upload_state_base_address( struct brw_context *brw )
    if (brw->gen >= 6) {
       uint8_t mocs = brw->gen == 7 ? GEN7_MOCS_L3 : 0;
 
-      if (brw->gen == 6)
-	 intel_emit_post_sync_nonzero_flush(brw);
-
        BEGIN_BATCH(10);
        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
        OUT_BATCH(mocs << 8 | /* General State Memory Object Control State */
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
index c053c34..2a90822 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -66,10 +66,6 @@ brw_write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
 void
 brw_write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
 {
-   /* Emit Sandybridge workaround flush: */
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
    brw_emit_pipe_control_write(brw,
                                PIPE_CONTROL_WRITE_DEPTH_COUNT
                                | PIPE_CONTROL_DEPTH_STALL,
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 7a25ef5..9979374 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -599,6 +599,10 @@ void brw_upload_state(struct brw_context *brw)
    if ((state->mesa | state->brw) == 0)
       return;
 
+   /* Emit Sandybridge workaround flushes on every primitive, for safety. */
+   if (brw->gen == 6)
+      intel_emit_post_sync_nonzero_flush(brw);
+
    if (unlikely(INTEL_DEBUG)) {
       /* Debug version which enforces various sanity checks on the
        * state flags which are generated and checked to help ensure
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index d4aa955..372846b 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -517,17 +517,16 @@ void
 gen6_blorp_emit_vs_disable(struct brw_context *brw,
                            const brw_blorp_params *params)
 {
-   if (brw->gen == 6) {
-      /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
-       * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
-       *
-       *   [DevSNB] A pipeline flush must be programmed prior to a
-       *   3DSTATE_VS command that causes the VS Function Enable to
-       *   toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
-       *   command with CS stall bit set and a post sync operation.
-       */
-      intel_emit_post_sync_nonzero_flush(brw);
-   }
+   /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
+    * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
+    *
+    *   [DevSNB] A pipeline flush must be programmed prior to a
+    *   3DSTATE_VS command that causes the VS Function Enable to
+    *   toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
+    *   command with CS stall bit set and a post sync operation.
+    *
+    * We've already done one at the start of the BLORP operation.
+    */
 
    /* Disable the push constant buffers. */
    BEGIN_BATCH(5);
@@ -817,7 +816,6 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context *brw,
 
    /* 3DSTATE_DEPTH_BUFFER */
    {
-      intel_emit_post_sync_nonzero_flush(brw);
       intel_emit_depth_stall_flushes(brw);
 
       BEGIN_BATCH(7);
@@ -893,7 +891,6 @@ static void
 gen6_blorp_emit_depth_disable(struct brw_context *brw,
                               const brw_blorp_params *params)
 {
-   intel_emit_post_sync_nonzero_flush(brw);
    intel_emit_depth_stall_flushes(brw);
 
    BEGIN_BATCH(7);
@@ -945,9 +942,6 @@ void
 gen6_blorp_emit_drawing_rectangle(struct brw_context *brw,
                                   const brw_blorp_params *params)
 {
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
    BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
    OUT_BATCH(0);
@@ -997,9 +991,6 @@ gen6_blorp_emit_primitive(struct brw_context *brw,
    OUT_BATCH(0);
    OUT_BATCH(0);
    ADVANCE_BATCH();
-
-   /* Only used on Sandybridge; harmless to set elsewhere. */
-   brw->batch.need_workaround_flush = true;
 }
 
 /**
@@ -1025,7 +1016,7 @@ gen6_blorp_exec(struct brw_context *brw,
    uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
 
    /* Emit workaround flushes when we switch from drawing to blorping. */
-   brw->batch.need_workaround_flush = true;
+   intel_emit_post_sync_nonzero_flush(brw);
 
    gen6_emit_3dstate_multisample(brw, params->dst.num_samples);
    gen6_emit_3dstate_sample_mask(brw,
diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c b/src/mesa/drivers/dri/i965/gen6_depth_state.c
index f92c1af..effb9c6 100644
--- a/src/mesa/drivers/dri/i965/gen6_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
@@ -65,11 +65,6 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
     */
    bool enable_hiz_ss = hiz || separate_stencil;
 
-
-   /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
-    * non-pipelined state that will need the PIPE_CONTROL workaround.
-    */
-   intel_emit_post_sync_nonzero_flush(brw);
    intel_emit_depth_stall_flushes(brw);
 
    irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
@@ -238,8 +233,6 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
     *     3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
     *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
     */
-   intel_emit_post_sync_nonzero_flush(brw);
-
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
              GEN5_DEPTH_CLEAR_VALID |
diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
index 7c9cbfa..ec46479 100644
--- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
@@ -132,9 +132,6 @@ gen6_emit_3dstate_multisample(struct brw_context *brw,
       unreachable("Unrecognized num_samples in gen6_emit_3dstate_multisample");
    }
 
-   /* 3DSTATE_MULTISAMPLE is nonpipelined. */
-   intel_emit_post_sync_nonzero_flush(brw);
-
    int len = brw->gen >= 7 ? 4 : 3;
    BEGIN_BATCH(len);
    OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2));
diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
index 0dafd0f..d867c5d 100644
--- a/src/mesa/drivers/dri/i965/gen6_sol.c
+++ b/src/mesa/drivers/dri/i965/gen6_sol.c
@@ -259,9 +259,6 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
       = _mesa_compute_max_transform_feedback_vertices(xfb_obj,
                                                       linked_xfb_info);
 
-   /* 3DSTATE_GS_SVB_INDEX is non-pipelined. */
-   intel_emit_post_sync_nonzero_flush(brw);
-
    /* Initialize the SVBI 0 register to zero and set the maximum index. */
    BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index e365cc6..ee68ba5 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -171,10 +171,9 @@ upload_vs_state(struct brw_context *brw)
     *   flush can be executed by sending a PIPE_CONTROL command with CS
     *   stall bit set and a post sync operation.
     *
-    * Although we don't disable the VS during normal drawing, BLORP sometimes
-    * disables it.  To be safe, do the flush here just in case.
+    * We've already done such a flush at the start of state upload, so we
+    * don't need to do another one here.
     */
-   intel_emit_post_sync_nonzero_flush(brw);
 
    if (stage_state->push_const_size == 0) {
       /* Disable the push constant buffers. */
@@ -247,7 +246,6 @@ upload_vs_state(struct brw_context *brw)
     * bug reports that led to this workaround, and may be more than
     * what is strictly required to avoid the issue.
     */
-   intel_emit_post_sync_nonzero_flush(brw);
    brw_emit_pipe_control_flush(brw,
                                PIPE_CONTROL_DEPTH_STALL |
                                PIPE_CONTROL_INSTRUCTION_FLUSH |
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 193550b..4e610a9 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -51,8 +51,6 @@ intel_batchbuffer_init(struct brw_context *brw)
 						      4096, 4096);
    }
 
-   brw->batch.need_workaround_flush = true;
-
    if (!brw->has_llc) {
       brw->batch.cpu_map = malloc(BATCH_SZ);
       brw->batch.map = brw->batch.cpu_map;
@@ -183,11 +181,6 @@ brw_new_batch(struct brw_context *brw)
 
    brw->state.dirty.brw |= BRW_NEW_BATCH;
 
-   /* Assume that the last command before the start of our batch was a
-    * primitive, for safety.
-    */
-   brw->batch.need_workaround_flush = true;
-
    brw->state_batch_count = 0;
 
    brw->ib.type = -1;
@@ -648,17 +641,12 @@ gen7_emit_cs_stall_flush(struct brw_context *brw)
 void
 intel_emit_post_sync_nonzero_flush(struct brw_context *brw)
 {
-   if (!brw->batch.need_workaround_flush)
-      return;
-
    brw_emit_pipe_control_flush(brw,
                                PIPE_CONTROL_CS_STALL |
                                PIPE_CONTROL_STALL_AT_SCOREBOARD);
 
    brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
                                brw->batch.workaround_bo, 0, 0, 0);
-
-   brw->batch.need_workaround_flush = false;
 }
 
 /* Emit a pipelined flush to either flush render and texture cache for
-- 
2.1.3



More information about the mesa-dev mailing list