[Mesa-dev] [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu

Sat Oct 27 07:12:43 PDT 2012

FWIW, instead of putting the discard_draw flag in r600_context, it
would be cleaner to have r600_adjust_gprs return false if drawing
should be skipped, then r600_update_derived_state would return false
and draw_vbo would skip rendering. That way you wouldn't have to add
any comments in draw_vbo, because it would be clear where the error is
coming from.

Using R600_ERR to report the error should be sufficient.
r600_adjust_gprs seems to be the best place for that.

Marek

On Sat, Oct 27, 2012 at 3:47 AM, Jerome Glisse <j.glisse at gmail.com> wrote:
> On Fri, Oct 26, 2012 at 10:01 PM,  <j.glisse at gmail.com> wrote:
>> From: Jerome Glisse <jglisse at redhat.com>
>>
>> On r6xx/r7xx shader resource management need to make sure that the
>> shader does not goes over the gpr register limit. Each specific
>> asic has a maxmimum register that can be split btw shader stage.
>> For each stage the shader must not use more register than the
>> limit programmed.
>>
>> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
>
> I haven't yet fully tested it on wide range of GPU but it fixes piglit
> case that were locking up o one can directly use quick-drivers. I
> mostly would like feedback on if we should print a warning when we
> discard a draw command because shader exceed limit.
>
> Note that with this patch the test that were locking up fails but with
> a simple patch on top of that (decreasing clause temp gpr to 2) they
> pass.
>
> Regards,
> Jerome
>
>> ---
>>  src/gallium/drivers/r600/r600_pipe.h         |  1 +
>>  src/gallium/drivers/r600/r600_state.c        | 60 +++++++++++++++++++---------
>>  src/gallium/drivers/r600/r600_state_common.c | 22 +++++-----
>>  3 files changed, 55 insertions(+), 28 deletions(-)
>>
>> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
>> index ff2a5fd..2045af3 100644
>> --- a/src/gallium/drivers/r600/r600_pipe.h
>> +++ b/src/gallium/drivers/r600/r600_pipe.h
>> @@ -363,6 +363,7 @@ struct r600_context {
>>         enum chip_class                 chip_class;
>>         boolean                         has_vertex_cache;
>>         boolean                         keep_tiling_flags;
>> +       bool                            discard_draw;
>>         unsigned                        default_ps_gprs, default_vs_gprs;
>>         unsigned                        r6xx_num_clause_temp_gprs;
>>         unsigned                        backend_mask;
>> diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
>> index 7d07008..43af934 100644
>> --- a/src/gallium/drivers/r600/r600_state.c
>> +++ b/src/gallium/drivers/r600/r600_state.c
>> @@ -2189,30 +2189,54 @@ void r600_init_state_functions(struct r600_context *rctx)
>>  /* Adjust GPR allocation on R6xx/R7xx */
>>  void r600_adjust_gprs(struct r600_context *rctx)
>>  {
>> -       unsigned num_ps_gprs = rctx->default_ps_gprs;
>> -       unsigned num_vs_gprs = rctx->default_vs_gprs;
>> +       unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr;
>> +       unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr;
>> +       unsigned new_num_ps_gprs = num_ps_gprs;
>> +       unsigned new_num_vs_gprs = num_vs_gprs;
>> +       unsigned cur_num_ps_gprs = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
>> +       unsigned cur_num_vs_gprs = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
>> +       unsigned def_num_ps_gprs = rctx->default_ps_gprs;
>> +       unsigned def_num_vs_gprs = rctx->default_vs_gprs;
>> +       unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
>> +       /* hardware will reserve twice num_clause_temp_gprs */
>> +       unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + def_num_clause_temp_gprs * 2;
>>         unsigned tmp;
>> -       int diff;
>>
>> -       if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) {
>> -               diff = rctx->ps_shader->current->shader.bc.ngpr - rctx->default_ps_gprs;
>> -               num_vs_gprs -= diff;
>> -               num_ps_gprs += diff;
>> +       /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to max_gprs */
>> +       if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > cur_num_vs_gprs) {
>> +               /* try to use switch back to default */
>> +               if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > def_num_vs_gprs) {
>> +                       /* always privilege vs stage so that at worst we have the
>> +                        * pixel stage producing wrong output (not the vertex
>> +                        * stage) */
>> +                       new_num_ps_gprs = max_gprs - (new_num_vs_gprs + def_num_clause_temp_gprs * 2);
>> +                       new_num_vs_gprs = num_vs_gprs;
>> +               } else {
>> +                       new_num_ps_gprs = def_num_ps_gprs;
>> +                       new_num_vs_gprs = def_num_vs_gprs;
>> +               }
>> +       } else {
>> +               rctx->discard_draw = false;
>> +               return;
>>         }
>>
>> -       if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs)
>> -       {
>> -               diff = rctx->vs_shader->current->shader.bc.ngpr - rctx->default_vs_gprs;
>> -               num_ps_gprs -= diff;
>> -               num_vs_gprs += diff;
>> +       /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <=
>> +        * SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup
>> +        * Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS
>> +        * it will lockup. So in this case just discard the draw command
>> +        * and don't change the current gprs repartitions.
>> +        */
>> +       rctx->discard_draw = false;
>> +       if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs > new_num_vs_gprs) {
>> +               rctx->discard_draw = true;
>> +               return;
>>         }
>>
>> -       tmp = 0;
>> -       tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
>> -       tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
>> -       tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs);
>> -
>> -       if (tmp != rctx->config_state.sq_gpr_resource_mgmt_1) {
>> +       /* in some case we endup recomputing the current value */
>> +       tmp = S_008C04_NUM_PS_GPRS(new_num_ps_gprs) |
>> +               S_008C04_NUM_VS_GPRS(new_num_vs_gprs) |
>> +               S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs);
>> +       if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp) {
>>                 rctx->config_state.sq_gpr_resource_mgmt_1 = tmp;
>>                 rctx->config_state.atom.dirty = true;
>>                 rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
>> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
>> index 65985c7..5827efb 100644
>> --- a/src/gallium/drivers/r600/r600_state_common.c
>> +++ b/src/gallium/drivers/r600/r600_state_common.c
>> @@ -755,10 +755,6 @@ static int r600_shader_select(struct pipe_context *ctx,
>>         shader->next_variant = sel->current;
>>         sel->current = shader;
>>
>> -       if (rctx->chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
>> -               r600_adjust_gprs(rctx);
>> -       }
>> -
>>         if (rctx->ps_shader &&
>>             rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) {
>>                 rctx->cb_misc_state.nr_ps_color_outputs = rctx->ps_shader->current->nr_ps_color_outputs;
>> @@ -814,9 +810,6 @@ static void r600_bind_ps_state(struct pipe_context *ctx, void *state)
>>                         rctx->cb_misc_state.multiwrite = multiwrite;
>>                         rctx->cb_misc_state.atom.dirty = true;
>>                 }
>> -
>> -               if (rctx->vs_shader)
>> -                       r600_adjust_gprs(rctx);
>>         }
>>
>>         if (rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) {
>> @@ -839,9 +832,6 @@ static void r600_bind_vs_state(struct pipe_context *ctx, void *state)
>>         if (state) {
>>                 r600_context_pipe_state_set(rctx, &rctx->vs_shader->current->rstate);
>>
>> -               if (rctx->chip_class < EVERGREEN && rctx->ps_shader)
>> -                       r600_adjust_gprs(rctx);
>> -
>>                 /* Update clip misc state. */
>>                 if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
>>                     rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) {
>> @@ -1055,6 +1045,10 @@ static void r600_update_derived_state(struct r600_context *rctx)
>>
>>         r600_shader_select(ctx, rctx->ps_shader, &ps_dirty);
>>
>> +       if (rctx->chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
>> +               r600_adjust_gprs(rctx);
>> +       }
>> +
>>         if (rctx->ps_shader && rctx->rasterizer &&
>>             ((rctx->rasterizer->sprite_coord_enable != rctx->ps_shader->current->sprite_coord_enable) ||
>>              (rctx->rasterizer->flatshade != rctx->ps_shader->current->flatshade))) {
>> @@ -1139,6 +1133,14 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
>>
>>         r600_update_derived_state(rctx);
>>
>> +       if (rctx->discard_draw) {
>> +               /* no need to clutter command stream with useless state
>> +                * state change. Draw call are discarded if shader is
>> +                * over the limit. See r600_adjust_gprs
>> +                */
>> +               return;
>> +       }
>> +
>>         if (info.indexed) {
>>                 /* Initialize the index buffer struct. */
>>                 pipe_resource_reference(&ib.buffer, rctx->index_buffer.buffer);
>> --
>> 1.7.11.7
>>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev