[Mesa-dev] [PATCH] intel/compiler/gen9: Pixel shader header only workaround

Eero Tamminen eero.t.tamminen at intel.com
Mon Sep 25 14:41:04 UTC 2017


Hi,

Ran several tens of rounds on 2 different 18 EU BXT devices with the 
patch -> no hangs. Earlier that (internal) test-case always hung within 
1-3 rounds.

Tested-by: Eero Tamminen <eero.t.tamminen at intel.com>

On 25.09.2017 16:39, Topi Pohjolainen wrote:
> Fixes intermittent GPU hangs on Broxton with an Intel internal
> test case.
> 
> There are plenty of similar fragment shaders in piglit that do
> not use any varyings and any uniforms. According to the
> documentation special timing is needed between pipeline stages.
> Apparently we just don't hit that with piglit. Even with the
> failing test case one doesn't always get the hang.
> 
> Moreover, according to the error states the hang happens
> significantly later than the execution of the problematic shader.
> There are multiple render cycles (primitive submissions) in between.
> I've also seen error states where the ACTHD points outside the
> batch. Almost as if the hardware writes somewhere that gets used
> later on. That would also explain why piglit doesn't suffer from
> this - most tests kick off one render cycle and any corruption
> is left unseen.
> 
> For clarity I chose to make the decision in the compiler only and
> mark it with a boolean. In principle, constant loaders could make
> the same decision by examing num_varying_inputs along with push
> constant details.
> 
> Alternatively tweaking nr_params in compiler would allow GL driver
> to be kept as is if one did, for example:
> 
>     static const gl_constant_value zero = { 0 };
>     wm_prog_data->base.param[0] = &zero;
>     wm_prog_data->base.nr_params = 1;
> 
> This, however, doesn't work for Vulkan which would still need
> some logic to be added in anv_cmd_buffer_push_constants().
> 
> In the end I thought future debugging is probably easier when
> the explicit boolean tells about this corner case.
> 
> CC: Jason Ekstrand <jason at jlekstrand.net>
> CC: Eero Tamminen <eero.t.tamminen at intel.com>
> Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
> ---
>   src/intel/compiler/brw_compiler.h               |  7 ++++
>   src/intel/compiler/brw_fs.cpp                   | 46 +++++++++++++++++++++++++
>   src/intel/vulkan/anv_cmd_buffer.c               | 22 ++++++++++--
>   src/intel/vulkan/genX_pipeline.c                |  6 +++-
>   src/mesa/drivers/dri/i965/gen6_constant_state.c | 17 +++++++--
>   src/mesa/drivers/dri/i965/genX_state_upload.c   |  3 +-
>   6 files changed, 93 insertions(+), 8 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
> index 6753a8daf0..8a1c8c85ac 100644
> --- a/src/intel/compiler/brw_compiler.h
> +++ b/src/intel/compiler/brw_compiler.h
> @@ -622,6 +622,13 @@ struct brw_wm_prog_data {
>      bool contains_noperspective_varying;
>   
>      /**
> +    * Tell constant uplaoders, gen6_upload_push_constants() and
> +    * anv_cmd_buffer_push_constants(), that workaround is needed.
> +    * See gen9_ps_header_only_workaround().
> +    */
> +   bool needs_gen9_ps_header_only_workaround;
> +
> +   /**
>       * Mask of which interpolation modes are required by the fragment shader.
>       * Used in hardware setup on gen6+.
>       */
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index eb9b4c3890..5f4271fb59 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -6159,6 +6159,48 @@ fs_visitor::run_gs()
>      return !failed;
>   }
>   
> +/* From the SKL PRM, Volume 16, Workarounds:
> + *
> + *   0877  3D   Pixel Shader Hang possible when pixel shader dispatched with
> + *              only header phases (R0-R2)
> + *
> + *   WA: Enable a non-header phase (e.g. push constant) when dispatch would
> + *       have been header only.
> + *
> + * Additionally from the SKL PRM, Volume 2a, Command Reference,
> + * 3DSTATE_PS and Push Constant Enable:
> + *
> + *   This field must be enabled if the sum of the PS Constant Buffer [3:0]
> + *   Read Length fields in 3DSTATE_CONSTANT_PS is nonzero, and must be
> + *   disabled if the sum is zero.
> + *
> + * Therefore one needs to prepare register space for minimum amount of
> + * constants to be uploaded.
> + *
> + * Here it is assumed that assign_curb_setup() has determined the total amount
> + * of constants (uniforms + ubos) and therefore it is safe to examine if the
> + * workaround is needed.
> + */
> +static void
> +gen9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data,
> +                               int *first_non_payload_grf)
> +{
> +   if (wm_prog_data->num_varying_inputs)
> +      return;
> +
> +   if (wm_prog_data->base.curb_read_length)
> +      return;
> +
> +   assert(wm_prog_data->base.nr_params == 0);
> +
> +   wm_prog_data->needs_gen9_ps_header_only_workaround = true;
> +
> +   const unsigned wa_upload_size = DIV_ROUND_UP(1, 8);
> +
> +   wm_prog_data->base.curb_read_length = wa_upload_size;
> +   *first_non_payload_grf += wa_upload_size;
> +}
> +
>   bool
>   fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
>   {
> @@ -6222,6 +6264,10 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
>         optimize();
>   
>         assign_curb_setup();
> +
> +      if (devinfo->gen >= 9)
> +         gen9_ps_header_only_workaround(wm_prog_data, &first_non_payload_grf);
> +
>         assign_urb_setup();
>   
>         fixup_3src_null_dest();
> diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c
> index 3b59af8f6f..07d45bd5d4 100644
> --- a/src/intel/vulkan/anv_cmd_buffer.c
> +++ b/src/intel/vulkan/anv_cmd_buffer.c
> @@ -641,9 +641,25 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
>      const struct brw_stage_prog_data *prog_data =
>         cmd_buffer->state.pipeline->shaders[stage]->prog_data;
>   
> -   /* If we don't actually have any push constants, bail. */
> -   if (data == NULL || prog_data == NULL || prog_data->nr_params == 0)
> -      return (struct anv_state) { .offset = 0 };
> +   if (data == NULL || prog_data == NULL || prog_data->nr_params == 0) {
> +      /* If we don't actually have any push constants, bail. */
> +      if (stage != MESA_SHADER_FRAGMENT)
> +         return (struct anv_state) { .offset = 0 };
> +
> +      const struct brw_wm_prog_data *wm_prog_data =
> +         (const struct brw_wm_prog_data *)prog_data;
> +      if (!wm_prog_data->needs_gen9_ps_header_only_workaround)
> +         return (struct anv_state) { .offset = 0 };
> +
> +      assert(cmd_buffer->device->info.gen >= 9);
> +      assert(wm_prog_data->num_varying_inputs == 0);
> +
> +      struct anv_state workaround_state =
> +         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 1 * sizeof(float),
> +                                            32 /* bottom 5 bits MBZ */);
> +      anv_state_flush(cmd_buffer->device, workaround_state);
> +      return workaround_state;
> +   }
>   
>      struct anv_state state =
>         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
> diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
> index c2fa9c0ff7..42854b06b3 100644
> --- a/src/intel/vulkan/genX_pipeline.c
> +++ b/src/intel/vulkan/genX_pipeline.c
> @@ -1506,9 +1506,13 @@ emit_3dstate_ps(struct anv_pipeline *pipeline,
>         ps.VectorMaskEnable           = true;
>         ps.SamplerCount               = get_sampler_count(fs_bin);
>         ps.BindingTableEntryCount     = get_binding_table_entry_count(fs_bin);
> -      ps.PushConstantEnable         = wm_prog_data->base.nr_params > 0;
>         ps.PositionXYOffsetSelect     = wm_prog_data->uses_pos_offset ?
>                                         POSOFFSET_SAMPLE: POSOFFSET_NONE;
> +
> +      ps.PushConstantEnable =
> +         wm_prog_data->base.nr_params > 0 ||
> +         wm_prog_data->needs_gen9_ps_header_only_workaround;
> +
>   #if GEN_GEN < 8
>         ps.AttributeEnable            = wm_prog_data->num_varying_inputs > 0;
>         ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
> diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c
> index 72f00d5640..4723cda778 100644
> --- a/src/mesa/drivers/dri/i965/gen6_constant_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c
> @@ -52,9 +52,9 @@ gen6_upload_push_constants(struct brw_context *brw,
>      const struct gen_device_info *devinfo = &brw->screen->devinfo;
>      struct gl_context *ctx = &brw->ctx;
>   
> -   if (prog_data->nr_params == 0) {
> -      stage_state->push_const_size = 0;
> -   } else {
> +   stage_state->push_const_size = 0;
> +
> +   if (prog_data->nr_params) {
>         /* Updates the ParamaterValues[i] pointers for all parameters of the
>          * basic type of PROGRAM_STATE_VAR.
>          */
> @@ -119,6 +119,17 @@ gen6_upload_push_constants(struct brw_context *brw,
>          * The other shader stages all match the VS's limits.
>          */
>         assert(stage_state->push_const_size <= 32);
> +   } else if (stage_state->stage == MESA_SHADER_FRAGMENT &&
> +              devinfo->gen >= 9) {
> +      const struct brw_wm_prog_data *wm_prog_data =
> +         (const struct brw_wm_prog_data *)prog_data;
> +
> +      if (wm_prog_data->needs_gen9_ps_header_only_workaround) {
> +         intel_upload_space(brw, 1 * sizeof(gl_constant_value), 32,
> +                            &stage_state->push_const_bo,
> +                            &stage_state->push_const_offset);
> +         stage_state->push_const_size = ALIGN(1, 8) / 8;
> +      }
>      }
>   
>      stage_state->push_constants_dirty = true;
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index 612761601a..fdb5decf86 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> @@ -3831,7 +3831,8 @@ genX(upload_ps)(struct brw_context *brw)
>   #endif
>   
>         if (prog_data->base.nr_params > 0 ||
> -          prog_data->base.ubo_ranges[0].length > 0)
> +          prog_data->base.ubo_ranges[0].length > 0 ||
> +          prog_data->needs_gen9_ps_header_only_workaround)
>            ps.PushConstantEnable = true;
>   
>   #if GEN_GEN < 8
> 



More information about the mesa-dev mailing list