[Mesa-dev] [PATCH] intel/compiler/gen9: Pixel shader header only workaround

Topi Pohjolainen topi.pohjolainen at gmail.com
Mon Sep 25 13:39:16 UTC 2017


Fixes intermittent GPU hangs on Broxton with an Intel internal
test case.

There are plenty of similar fragment shaders in piglit that do
not use any varyings and any uniforms. According to the
documentation special timing is needed between pipeline stages.
Apparently we just don't hit that with piglit. Even with the
failing test case one doesn't always get the hang.

Moreover, according to the error states the hang happens
significantly later than the execution of the problematic shader.
There are multiple render cycles (primitive submissions) in between.
I've also seen error states where the ACTHD points outside the
batch. Almost as if the hardware writes somewhere that gets used
later on. That would also explain why piglit doesn't suffer from
this - most tests kick off one render cycle and any corruption
is left unseen.

For clarity I chose to make the decision in the compiler only and
mark it with a boolean. In principle, constant loaders could make
the same decision by examing num_varying_inputs along with push
constant details.

Alternatively tweaking nr_params in compiler would allow GL driver
to be kept as is if one did, for example:

   static const gl_constant_value zero = { 0 };
   wm_prog_data->base.param[0] = &zero;
   wm_prog_data->base.nr_params = 1;

This, however, doesn't work for Vulkan which would still need
some logic to be added in anv_cmd_buffer_push_constants().

In the end I thought future debugging is probably easier when
the explicit boolean tells about this corner case.

CC: Jason Ekstrand <jason at jlekstrand.net>
CC: Eero Tamminen <eero.t.tamminen at intel.com>
Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
 src/intel/compiler/brw_compiler.h               |  7 ++++
 src/intel/compiler/brw_fs.cpp                   | 46 +++++++++++++++++++++++++
 src/intel/vulkan/anv_cmd_buffer.c               | 22 ++++++++++--
 src/intel/vulkan/genX_pipeline.c                |  6 +++-
 src/mesa/drivers/dri/i965/gen6_constant_state.c | 17 +++++++--
 src/mesa/drivers/dri/i965/genX_state_upload.c   |  3 +-
 6 files changed, 93 insertions(+), 8 deletions(-)

diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index 6753a8daf0..8a1c8c85ac 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -622,6 +622,13 @@ struct brw_wm_prog_data {
    bool contains_noperspective_varying;
 
    /**
+    * Tell constant uplaoders, gen6_upload_push_constants() and
+    * anv_cmd_buffer_push_constants(), that workaround is needed.
+    * See gen9_ps_header_only_workaround().
+    */
+   bool needs_gen9_ps_header_only_workaround;
+
+   /**
     * Mask of which interpolation modes are required by the fragment shader.
     * Used in hardware setup on gen6+.
     */
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index eb9b4c3890..5f4271fb59 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -6159,6 +6159,48 @@ fs_visitor::run_gs()
    return !failed;
 }
 
+/* From the SKL PRM, Volume 16, Workarounds:
+ *
+ *   0877  3D   Pixel Shader Hang possible when pixel shader dispatched with
+ *              only header phases (R0-R2)
+ *
+ *   WA: Enable a non-header phase (e.g. push constant) when dispatch would
+ *       have been header only.
+ *
+ * Additionally from the SKL PRM, Volume 2a, Command Reference,
+ * 3DSTATE_PS and Push Constant Enable:
+ *
+ *   This field must be enabled if the sum of the PS Constant Buffer [3:0]
+ *   Read Length fields in 3DSTATE_CONSTANT_PS is nonzero, and must be
+ *   disabled if the sum is zero. 
+ *
+ * Therefore one needs to prepare register space for minimum amount of
+ * constants to be uploaded.
+ *
+ * Here it is assumed that assign_curb_setup() has determined the total amount
+ * of constants (uniforms + ubos) and therefore it is safe to examine if the
+ * workaround is needed.
+ */
+static void
+gen9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data,
+                               int *first_non_payload_grf)
+{
+   if (wm_prog_data->num_varying_inputs)
+      return;
+
+   if (wm_prog_data->base.curb_read_length)
+      return;
+
+   assert(wm_prog_data->base.nr_params == 0);
+
+   wm_prog_data->needs_gen9_ps_header_only_workaround = true;
+
+   const unsigned wa_upload_size = DIV_ROUND_UP(1, 8);
+
+   wm_prog_data->base.curb_read_length = wa_upload_size;
+   *first_non_payload_grf += wa_upload_size;
+}
+
 bool
 fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
 {
@@ -6222,6 +6264,10 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
       optimize();
 
       assign_curb_setup();
+
+      if (devinfo->gen >= 9)
+         gen9_ps_header_only_workaround(wm_prog_data, &first_non_payload_grf);
+
       assign_urb_setup();
 
       fixup_3src_null_dest();
diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c
index 3b59af8f6f..07d45bd5d4 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -641,9 +641,25 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
    const struct brw_stage_prog_data *prog_data =
       cmd_buffer->state.pipeline->shaders[stage]->prog_data;
 
-   /* If we don't actually have any push constants, bail. */
-   if (data == NULL || prog_data == NULL || prog_data->nr_params == 0)
-      return (struct anv_state) { .offset = 0 };
+   if (data == NULL || prog_data == NULL || prog_data->nr_params == 0) {
+      /* If we don't actually have any push constants, bail. */
+      if (stage != MESA_SHADER_FRAGMENT)
+         return (struct anv_state) { .offset = 0 };
+
+      const struct brw_wm_prog_data *wm_prog_data =
+         (const struct brw_wm_prog_data *)prog_data;
+      if (!wm_prog_data->needs_gen9_ps_header_only_workaround)
+         return (struct anv_state) { .offset = 0 };
+
+      assert(cmd_buffer->device->info.gen >= 9);
+      assert(wm_prog_data->num_varying_inputs == 0);
+
+      struct anv_state workaround_state =
+         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 1 * sizeof(float),
+                                            32 /* bottom 5 bits MBZ */);
+      anv_state_flush(cmd_buffer->device, workaround_state);
+      return workaround_state;
+   }
 
    struct anv_state state =
       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index c2fa9c0ff7..42854b06b3 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1506,9 +1506,13 @@ emit_3dstate_ps(struct anv_pipeline *pipeline,
       ps.VectorMaskEnable           = true;
       ps.SamplerCount               = get_sampler_count(fs_bin);
       ps.BindingTableEntryCount     = get_binding_table_entry_count(fs_bin);
-      ps.PushConstantEnable         = wm_prog_data->base.nr_params > 0;
       ps.PositionXYOffsetSelect     = wm_prog_data->uses_pos_offset ?
                                       POSOFFSET_SAMPLE: POSOFFSET_NONE;
+
+      ps.PushConstantEnable =
+         wm_prog_data->base.nr_params > 0 ||
+         wm_prog_data->needs_gen9_ps_header_only_workaround;
+
 #if GEN_GEN < 8
       ps.AttributeEnable            = wm_prog_data->num_varying_inputs > 0;
       ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c
index 72f00d5640..4723cda778 100644
--- a/src/mesa/drivers/dri/i965/gen6_constant_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c
@@ -52,9 +52,9 @@ gen6_upload_push_constants(struct brw_context *brw,
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    struct gl_context *ctx = &brw->ctx;
 
-   if (prog_data->nr_params == 0) {
-      stage_state->push_const_size = 0;
-   } else {
+   stage_state->push_const_size = 0;
+
+   if (prog_data->nr_params) {
       /* Updates the ParamaterValues[i] pointers for all parameters of the
        * basic type of PROGRAM_STATE_VAR.
        */
@@ -119,6 +119,17 @@ gen6_upload_push_constants(struct brw_context *brw,
        * The other shader stages all match the VS's limits.
        */
       assert(stage_state->push_const_size <= 32);
+   } else if (stage_state->stage == MESA_SHADER_FRAGMENT &&
+              devinfo->gen >= 9) {
+      const struct brw_wm_prog_data *wm_prog_data =
+         (const struct brw_wm_prog_data *)prog_data;
+
+      if (wm_prog_data->needs_gen9_ps_header_only_workaround) {
+         intel_upload_space(brw, 1 * sizeof(gl_constant_value), 32,
+                            &stage_state->push_const_bo,
+                            &stage_state->push_const_offset);
+         stage_state->push_const_size = ALIGN(1, 8) / 8;
+      }
    }
 
    stage_state->push_constants_dirty = true;
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 612761601a..fdb5decf86 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -3831,7 +3831,8 @@ genX(upload_ps)(struct brw_context *brw)
 #endif
 
       if (prog_data->base.nr_params > 0 ||
-          prog_data->base.ubo_ranges[0].length > 0)
+          prog_data->base.ubo_ranges[0].length > 0 ||
+          prog_data->needs_gen9_ps_header_only_workaround)
          ps.PushConstantEnable = true;
 
 #if GEN_GEN < 8
-- 
2.11.0



More information about the mesa-dev mailing list