[Mesa-dev] [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu v2

Tue Oct 30 17:14:52 PDT 2012

This looks good to me. Thank you.

Marek

On Tue, Oct 30, 2012 at 11:04 PM,  <j.glisse at gmail.com> wrote:
> From: Jerome Glisse <jglisse at redhat.com>
>
> On r6xx/r7xx shader resource management need to make sure that the
> shader does not goes over the gpr register limit. Each specific
> asic has a maxmimum register that can be split btw shader stage.
> For each stage the shader must not use more register than the
> limit programmed.
>
> v2: Print an error message when discarding draw. Don't add another
>     boolean to context structure, but rather propagate the discard
>     boolean through the call chain.
>
> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
> ---
>  src/gallium/drivers/r600/r600_pipe.h         |  2 +-
>  src/gallium/drivers/r600/r600_state.c        | 67 +++++++++++++++++++---------
>  src/gallium/drivers/r600/r600_state_common.c | 27 ++++++-----
>  3 files changed, 62 insertions(+), 34 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
> index ff2a5fd..3edef40 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -595,7 +595,7 @@ void *r600_create_db_flush_dsa(struct r600_context *rctx);
>  void *r600_create_resolve_blend(struct r600_context *rctx);
>  void *r700_create_resolve_blend(struct r600_context *rctx);
>  void *r600_create_decompress_blend(struct r600_context *rctx);
> -void r600_adjust_gprs(struct r600_context *rctx);
> +bool r600_adjust_gprs(struct r600_context *rctx);
>  boolean r600_is_format_supported(struct pipe_screen *screen,
>                                  enum pipe_format format,
>                                  enum pipe_texture_target target,
> diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
> index 7d07008..76fe44d 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -2187,36 +2187,61 @@ void r600_init_state_functions(struct r600_context *rctx)
>  }
>
>  /* Adjust GPR allocation on R6xx/R7xx */
> -void r600_adjust_gprs(struct r600_context *rctx)
> +bool r600_adjust_gprs(struct r600_context *rctx)
>  {
> -       unsigned num_ps_gprs = rctx->default_ps_gprs;
> -       unsigned num_vs_gprs = rctx->default_vs_gprs;
> +       unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr;
> +       unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr;
> +       unsigned new_num_ps_gprs = num_ps_gprs;
> +       unsigned new_num_vs_gprs = num_vs_gprs;
> +       unsigned cur_num_ps_gprs = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
> +       unsigned cur_num_vs_gprs = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
> +       unsigned def_num_ps_gprs = rctx->default_ps_gprs;
> +       unsigned def_num_vs_gprs = rctx->default_vs_gprs;
> +       unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
> +       /* hardware will reserve twice num_clause_temp_gprs */
> +       unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + def_num_clause_temp_gprs * 2;
>         unsigned tmp;
> -       int diff;
>
> -       if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) {
> -               diff = rctx->ps_shader->current->shader.bc.ngpr - rctx->default_ps_gprs;
> -               num_vs_gprs -= diff;
> -               num_ps_gprs += diff;
> -       }
> -
> -       if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs)
> -       {
> -               diff = rctx->vs_shader->current->shader.bc.ngpr - rctx->default_vs_gprs;
> -               num_ps_gprs -= diff;
> -               num_vs_gprs += diff;
> +       /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to max_gprs */
> +       if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > cur_num_vs_gprs) {
> +               /* try to use switch back to default */
> +               if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > def_num_vs_gprs) {
> +                       /* always privilege vs stage so that at worst we have the
> +                        * pixel stage producing wrong output (not the vertex
> +                        * stage) */
> +                       new_num_ps_gprs = max_gprs - (new_num_vs_gprs + def_num_clause_temp_gprs * 2);
> +                       new_num_vs_gprs = num_vs_gprs;
> +               } else {
> +                       new_num_ps_gprs = def_num_ps_gprs;
> +                       new_num_vs_gprs = def_num_vs_gprs;
> +               }
> +       } else {
> +               return true;
>         }
>
> -       tmp = 0;
> -       tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
> -       tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
> -       tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs);
> -
> -       if (tmp != rctx->config_state.sq_gpr_resource_mgmt_1) {
> +       /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <=
> +        * SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup
> +        * Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS
> +        * it will lockup. So in this case just discard the draw command
> +        * and don't change the current gprs repartitions.
> +        */
> +       if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs > new_num_vs_gprs) {
> +               R600_ERR("ps & vs shader require too many register (%d + %d) "
> +                        "for a combined maximum of %d\n",
> +                        num_ps_gprs, num_vs_gprs, max_gprs);
> +               return false;
> +       }
> +
> +       /* in some case we endup recomputing the current value */
> +       tmp = S_008C04_NUM_PS_GPRS(new_num_ps_gprs) |
> +               S_008C04_NUM_VS_GPRS(new_num_vs_gprs) |
> +               S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs);
> +       if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp) {
>                 rctx->config_state.sq_gpr_resource_mgmt_1 = tmp;
>                 rctx->config_state.atom.dirty = true;
>                 rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
>         }
> +       return true;
>  }
>
>  void r600_init_atom_start_cs(struct r600_context *rctx)
> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
> index 65985c7..94e5e47 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -755,10 +755,6 @@ static int r600_shader_select(struct pipe_context *ctx,
>         shader->next_variant = sel->current;
>         sel->current = shader;
>
> -       if (rctx->chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
> -               r600_adjust_gprs(rctx);
> -       }
> -
>         if (rctx->ps_shader &&
>             rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) {
>                 rctx->cb_misc_state.nr_ps_color_outputs = rctx->ps_shader->current->nr_ps_color_outputs;
> @@ -814,9 +810,6 @@ static void r600_bind_ps_state(struct pipe_context *ctx, void *state)
>                         rctx->cb_misc_state.multiwrite = multiwrite;
>                         rctx->cb_misc_state.atom.dirty = true;
>                 }
> -
> -               if (rctx->vs_shader)
> -                       r600_adjust_gprs(rctx);
>         }
>
>         if (rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) {
> @@ -839,9 +832,6 @@ static void r600_bind_vs_state(struct pipe_context *ctx, void *state)
>         if (state) {
>                 r600_context_pipe_state_set(rctx, &rctx->vs_shader->current->rstate);
>
> -               if (rctx->chip_class < EVERGREEN && rctx->ps_shader)
> -                       r600_adjust_gprs(rctx);
> -
>                 /* Update clip misc state. */
>                 if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
>                     rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) {
> @@ -1032,7 +1022,7 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask
>         rctx->sample_mask.atom.dirty = true;
>  }
>
> -static void r600_update_derived_state(struct r600_context *rctx)
> +static bool r600_update_derived_state(struct r600_context *rctx)
>  {
>         struct pipe_context * ctx = (struct pipe_context*)rctx;
>         unsigned ps_dirty = 0;
> @@ -1070,6 +1060,13 @@ static void r600_update_derived_state(struct r600_context *rctx)
>         if (ps_dirty)
>                 r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate);
>
> +       if (rctx->chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
> +               if (!r600_adjust_gprs(rctx)) {
> +                       /* discard rendering */
> +                       return false;
> +               }
> +       }
> +
>         blend_disable = (rctx->dual_src_blend &&
>                         rctx->ps_shader->current->nr_ps_color_outputs < 2);
>
> @@ -1079,6 +1076,7 @@ static void r600_update_derived_state(struct r600_context *rctx)
>                                                rctx->blend_state.cso,
>                                                blend_disable);
>         }
> +       return true;
>  }
>
>  static unsigned r600_conv_prim_to_gs_out(unsigned mode)
> @@ -1137,7 +1135,12 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
>                 return;
>         }
>
> -       r600_update_derived_state(rctx);
> +       if (!r600_update_derived_state(rctx)) {
> +               /* useless to render because current rendering command
> +                * can't be achieved
> +                */
> +               return;
> +       }
>
>         if (info.indexed) {
>                 /* Initialize the index buffer struct. */
> --
> 1.7.11.7
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev