[Mesa-dev] [PATCH v4] i965: Update workaround names and platforms

Wed Feb 10 20:21:49 UTC 2016

Update the format in which workarounds are documented
in the source code. This allows mesa to be parsed
by the list-workarounds utility in intel-gpu-tools.

Signed-off-by: Sameer Kibey <sameer.kibey at intel.com>
---
 src/mesa/drivers/dri/i965/brw_binding_tables.c | 3 ++-
 src/mesa/drivers/dri/i965/brw_blorp.cpp        | 2 ++
 src/mesa/drivers/dri/i965/brw_eu_emit.c        | 2 ++
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 6 ++++++
 src/mesa/drivers/dri/i965/brw_pipe_control.c   | 2 ++
 src/mesa/drivers/dri/i965/gen6_queryobj.c      | 5 +++--
 src/mesa/drivers/dri/i965/gen8_depth_state.c   | 7 ++++---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c  | 2 +-
 8 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c
index f3a0310..6dd35dd 100644
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
@@ -54,13 +54,14 @@ static uint32_t
 reserve_hw_bt_space(struct brw_context *brw, unsigned bytes)
 {
    /* From the Broadwell PRM, Volume 16, "Workarounds",
-    * WaStateBindingTableOverfetch:
     * "HW over-fetches two cache lines of binding table indices.  When
     *  using the resource streamer, SW needs to pad binding table pointer
     *  updates with an additional two cache lines."
     *
     * Cache lines are 64 bytes, so we subtract 128 bytes from the size of
     * the binding table pool buffer.
+    *
+    * WaStateBindingTableOverfetch:hsw,bdw,chv,bxt
     */
    if (brw->hw_bt_pool.next_offset + bytes >= brw->hw_bt_pool.bo->size - 128) {
       gen7_reset_hw_bt_pool_offsets(brw);
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
index 1bc6d15..f798e29 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -318,6 +318,8 @@ brw_hiz_op_params::brw_hiz_op_params(struct intel_mipmap_tree *mt,
     * SURFACE_STATE.Surface_Horizontal_Alignment should be 4 for Z24 surfaces,
     * not 8. But commit 1f112cc increased the alignment from 4 to 8, which
     * prevents the clobbering.
+    *
+    * WaHizAmbiguate8x4Aligned:hsw
     */
    depth.width = ALIGN(depth.width, 8);
    depth.height = ALIGN(depth.height, 4);
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 35d8039..7a6179a 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1891,6 +1891,8 @@ void brw_CMP(struct brw_codegen *p,
     *
     * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
     * mentioned on their work-arounds pages.
+    *
+    * WaCMPInstNullDstForcesThreadSwitch:ivb,hsw,vlv
     */
    if (devinfo->gen == 7) {
       if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 1916a99..b91a405 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1846,6 +1846,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
           * We choose to split into CMP(8) instructions since disabling
           * coissuing would affect CMP instructions not otherwise affected by
           * the errata.
+          *
+          * WaCMPInstFlagDepClearedEarly:ivb,vlv
           */
          if (dispatch_width == 16 && devinfo->gen == 7 && !devinfo->is_haswell) {
             if (dst.file == BRW_GENERAL_REGISTER_FILE) {
@@ -1932,6 +1934,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
           * should
           *
           *    "Force BFI instructions to be executed always in SIMD8."
+          *
+          * WaForceSIMD8ForBFIInstruction:hsw
           */
          if (dispatch_width == 16 && devinfo->is_haswell) {
             brw_set_default_exec_size(p, BRW_EXECUTE_8);
@@ -1954,6 +1958,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
           *
           * Otherwise we would be able to emit compressed instructions like we
           * do for the other three-source instructions.
+          *
+          * WaForceSIMD8ForBFIInstruction:hsw
           */
          if (dispatch_width == 16 &&
              (devinfo->is_haswell || !devinfo->supports_simd16_3src)) {
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index b41e28e..51b20b2 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -67,6 +67,8 @@ gen8_add_cs_stall_workaround_bits(uint32_t *flags)
  *
  * Note that the kernel does CS stalls between batches, so we only need
  * to count them within a batch.
+ *
+ * WaCsStallAtEveryFourthPipecontrol:ivb,vlv
  */
 static uint32_t
 gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c
index d508c4c..4e8bab1 100644
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -238,8 +238,7 @@ gen6_queryobj_get_results(struct gl_context *ctx,
 
    case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
       query->Base.Result = (results[1] - results[0]);
-      /* Implement the "WaDividePSInvocationCountBy4:HSW,BDW" workaround:
-       * "Invocation counter is 4 times actual.  WA: SW to divide HW reported
+      /* "Invocation counter is 4 times actual.  WA: SW to divide HW reported
        *  PS Invocations value by 4."
        *
        * Prior to Haswell, invocation count was counted by the WM, and it
@@ -247,6 +246,8 @@ gen6_queryobj_get_results(struct gl_context *ctx,
        * correct value, the CS multiplied this by 4. With HSW the logic moved,
        * and correctly emitted the number of pixel shader invocations, but,
        * whomever forgot to undo the multiply by 4.
+       *
+       * WaDividePSInvocationCountBy4:hsw,bdw,chv
        */
       if (brw->gen == 8 || brw->is_haswell)
          query->Base.Result /= 4;
diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index 93100a0..fd3d5a7 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -193,9 +193,10 @@ gen8_emit_depth_stencil_hiz(struct brw_context *brw,
    case GL_TEXTURE_1D_ARRAY:
    case GL_TEXTURE_1D:
       if (brw->gen >= 9) {
-         /* WaDisable1DDepthStencil. Skylake+ doesn't support 1D depth
-          * textures but it does allow pretending it's a 2D texture
-          * instead.
+         /* Skylake+ doesn't support 1D depth textures but it does allow
+          * pretending it's a 2D texture instead.
+          *
+          * WaDisable1DDepthStencil:skl,bxt,kbl
           */
          surftype = BRW_SURFACE_2D;
          break;
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index f778074..c6ec379 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -228,7 +228,7 @@ brw_finish_batch(struct brw_context *brw)
           * From the example in the docs, it seems to expect a regular pipe control
           * flush here as well. We may have done it already, but meh.
           *
-          * See also WaAvoidRCZCounterRollover.
+          * WaAvoidRCZCounterRollover:hsw
           */
          brw_emit_mi_flush(brw);
          BEGIN_BATCH(2);
-- 
1.9.1