[Mesa-stable] [PATCH 1/4] i965/gen10: Implement WaSampleOffsetIZ workaround

Anuj Phogat anuj.phogat at gmail.com
Mon Oct 2 23:07:57 UTC 2017


WaFlushHangWhenNonPipelineStateAndMarkerStalled goes along
with WaSampleOffsetIZ. Both recommends the same.

Cc: mesa-stable at lists.freedesktop.org
Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
---
 src/mesa/drivers/dri/i965/brw_context.h            |  2 +
 src/mesa/drivers/dri/i965/brw_defines.h            |  1 +
 src/mesa/drivers/dri/i965/brw_pipe_control.c       | 54 ++++++++++++++++++++++
 src/mesa/drivers/dri/i965/gen8_multisample_state.c |  8 ++++
 4 files changed, 65 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 92fc16de13..f0e8d562e9 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1647,6 +1647,8 @@ void brw_emit_post_sync_nonzero_flush(struct brw_context *brw);
 void brw_emit_depth_stall_flushes(struct brw_context *brw);
 void gen7_emit_vs_workaround_flush(struct brw_context *brw);
 void gen7_emit_cs_stall_flush(struct brw_context *brw);
+void gen10_emit_wa_cs_stall_flush(struct brw_context *brw);
+void gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw);
 
 /* brw_queryformat.c */
 void brw_query_internal_format(struct gl_context *ctx, GLenum target,
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 4abb790612..270cdf29db 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1609,6 +1609,7 @@ enum brw_pixel_shader_coverage_mask_mode {
 #define GEN7_GPGPU_DISPATCHDIMY         0x2504
 #define GEN7_GPGPU_DISPATCHDIMZ         0x2508
 
+#define GEN7_CACHE_MODE_0               0x7000
 #define GEN7_CACHE_MODE_1               0x7004
 # define GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
 # define GEN8_HIZ_NP_PMA_FIX_ENABLE        (1 << 11)
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index 460b8f73b6..6326957a7a 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -278,6 +278,60 @@ gen7_emit_cs_stall_flush(struct brw_context *brw)
                                brw->workaround_bo, 0, 0);
 }
 
+static void
+brw_flush_write_caches(struct brw_context *brw) {
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CACHE_FLUSH_BITS);
+}
+
+static void
+brw_flush_read_caches(struct brw_context *brw) {
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CACHE_INVALIDATE_BITS);
+}
+
+/**
+ * From Gen10 Workarounds page in h/w specs:
+ * WaSampleOffsetIZ:
+ * Prior to the 3DSTATE_SAMPLE_PATTERN driver must ensure there are no
+ * markers in the pipeline by programming a PIPE_CONTROL with stall.
+ */
+void
+gen10_emit_wa_cs_stall_flush(struct brw_context *brw)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   assert(devinfo->gen == 10);
+   brw_emit_pipe_control_flush(brw,
+                               PIPE_CONTROL_CS_STALL |
+                               PIPE_CONTROL_STALL_AT_SCOREBOARD);
+}
+
+/**
+ * From Gen10 Workarounds page in h/w specs:
+ * WaSampleOffsetIZ:
+ * When 3DSTATE_SAMPLE_PATTERN is programmed, driver must then issue an
+ * MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 0x7FFF(SVL)
+ * after the command to ensure the state has been delivered prior to any
+ * command causing a marker in the pipeline.
+ */
+void
+gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   assert(devinfo->gen == 10);
+
+   /* Before changing the value of CACHE_MODE_0 register, GFX pipeline must
+    * be idle; i.e., full flush is required.
+    */
+   brw_flush_write_caches(brw);
+   brw_flush_read_caches(brw);
+
+   /* Write to CACHE_MODE_0 (0x7000) */
+   BEGIN_BATCH(3);
+   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+   OUT_BATCH(GEN7_CACHE_MODE_0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+
 /**
  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
  * implementing two workarounds on gen6.  From section 1.4.7.1
diff --git a/src/mesa/drivers/dri/i965/gen8_multisample_state.c b/src/mesa/drivers/dri/i965/gen8_multisample_state.c
index 7a31a5df4a..14043025b6 100644
--- a/src/mesa/drivers/dri/i965/gen8_multisample_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_multisample_state.c
@@ -49,6 +49,11 @@ gen8_emit_3dstate_multisample(struct brw_context *brw, unsigned num_samples)
 void
 gen8_emit_3dstate_sample_pattern(struct brw_context *brw)
 {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+   if (devinfo->gen == 10)
+      gen10_emit_wa_cs_stall_flush(brw);
+
    BEGIN_BATCH(9);
    OUT_BATCH(_3DSTATE_SAMPLE_PATTERN << 16 | (9 - 2));
 
@@ -68,4 +73,7 @@ gen8_emit_3dstate_sample_pattern(struct brw_context *brw)
    /* 1x and 2x MSAA */
    OUT_BATCH(brw_multisample_positions_1x_2x);
    ADVANCE_BATCH();
+
+   if (devinfo->gen == 10)
+      gen10_emit_wa_lri_to_cache_mode_zero(brw);
 }
-- 
2.13.5



More information about the mesa-stable mailing list