[Mesa-dev] [PATCH 03/53] r600: make adjust_gprs use hw stages.

Oded Gabbay oded.gabbay at gmail.com
Mon Nov 30 01:28:07 PST 2015


On Mon, Nov 30, 2015 at 8:20 AM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This changes the r600 specific GPR adjustment code
> to use the stage defines, and arrays.
>
> This is prep work for the tess changes later.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/gallium/drivers/r600/r600_pipe.h  |   2 +-
>  src/gallium/drivers/r600/r600_state.c | 117 ++++++++++++++++++----------------
>  2 files changed, 64 insertions(+), 55 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
> index 36fa1c9..0e57efe 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -432,7 +432,7 @@ struct r600_context {
>         /* Hardware info. */
>         boolean                         has_vertex_cache;
>         boolean                         keep_tiling_flags;
> -       unsigned                        default_ps_gprs, default_vs_gprs;
> +       unsigned                        default_gprs[EG_NUM_HW_STAGES];
>         unsigned                        r6xx_num_clause_temp_gprs;
>
>         /* Miscellaneous state objects. */
> diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
> index c2d4abc..a16d4bd 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -2044,57 +2044,62 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
>  /* Adjust GPR allocation on R6xx/R7xx */
>  bool r600_adjust_gprs(struct r600_context *rctx)
>  {
> -       unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr;
> -       unsigned num_vs_gprs, num_es_gprs, num_gs_gprs;
> -       unsigned new_num_ps_gprs = num_ps_gprs;
> -       unsigned new_num_vs_gprs, new_num_es_gprs, new_num_gs_gprs;
> -       unsigned cur_num_ps_gprs = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
> -       unsigned cur_num_vs_gprs = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
> -       unsigned cur_num_gs_gprs = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
> -       unsigned cur_num_es_gprs = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
> -       unsigned def_num_ps_gprs = rctx->default_ps_gprs;
> -       unsigned def_num_vs_gprs = rctx->default_vs_gprs;
> -       unsigned def_num_gs_gprs = 0;
> -       unsigned def_num_es_gprs = 0;
> +       unsigned num_gprs[R600_NUM_HW_STAGES];
> +       unsigned new_gprs[R600_NUM_HW_STAGES];
> +       unsigned cur_gprs[R600_NUM_HW_STAGES];
> +       unsigned def_gprs[R600_NUM_HW_STAGES];
>         unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
> -       /* hardware will reserve twice num_clause_temp_gprs */
> -       unsigned max_gprs = def_num_gs_gprs + def_num_es_gprs + def_num_ps_gprs + def_num_vs_gprs + def_num_clause_temp_gprs * 2;
> +       unsigned max_gprs;
>         unsigned tmp, tmp2;
> +       unsigned i;
> +       bool need_recalc = false, use_default = true;
> +
> +       /* hardware will reserve twice num_clause_temp_gprs */
> +       max_gprs = def_num_clause_temp_gprs * 2;
> +       for (i = 0; i < R600_NUM_HW_STAGES; i++) {
> +               def_gprs[i] = rctx->default_gprs[i];
> +               max_gprs += def_gprs[i];
> +       }
>
> +       cur_gprs[R600_HW_STAGE_PS] = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
> +       cur_gprs[R600_HW_STAGE_VS] = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
> +       cur_gprs[R600_HW_STAGE_GS] = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
> +       cur_gprs[R600_HW_STAGE_ES] = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
> +
> +       num_gprs[R600_HW_STAGE_PS] = rctx->ps_shader->current->shader.bc.ngpr;
>         if (rctx->gs_shader) {
> -               num_es_gprs = rctx->vs_shader->current->shader.bc.ngpr;
> -               num_gs_gprs = rctx->gs_shader->current->shader.bc.ngpr;
> -               num_vs_gprs = rctx->gs_shader->current->gs_copy_shader->shader.bc.ngpr;
> +               num_gprs[R600_HW_STAGE_ES] = rctx->vs_shader->current->shader.bc.ngpr;
> +               num_gprs[R600_HW_STAGE_GS] = rctx->gs_shader->current->shader.bc.ngpr;
> +               num_gprs[R600_HW_STAGE_VS] = rctx->gs_shader->current->gs_copy_shader->shader.bc.ngpr;
>         } else {
> -               num_es_gprs = 0;
> -               num_gs_gprs = 0;
> -               num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr;
> +               num_gprs[R600_HW_STAGE_ES] = 0;
> +               num_gprs[R600_HW_STAGE_GS] = 0;
> +               num_gprs[R600_HW_STAGE_VS] = rctx->vs_shader->current->shader.bc.ngpr;
> +       }
> +
> +       for (i = 0; i < R600_NUM_HW_STAGES; i++) {
> +               new_gprs[i] = num_gprs[i];
> +               if (new_gprs[i] > cur_gprs[i])
> +                       need_recalc = true;
> +               if (new_gprs[i] > def_gprs[i])
> +                       use_default = false;
>         }
> -       new_num_vs_gprs = num_vs_gprs;
> -       new_num_es_gprs = num_es_gprs;
> -       new_num_gs_gprs = num_gs_gprs;
>
>         /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to max_gprs */
> -       if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > cur_num_vs_gprs ||
> -           new_num_es_gprs > cur_num_es_gprs || new_num_gs_gprs > cur_num_gs_gprs) {
> -               /* try to use switch back to default */
> -               if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > def_num_vs_gprs ||
> -                   new_num_gs_gprs > def_num_gs_gprs || new_num_es_gprs > def_num_es_gprs) {
> -                       /* always privilege vs stage so that at worst we have the
> -                        * pixel stage producing wrong output (not the vertex
> -                        * stage) */
> -                       new_num_ps_gprs = max_gprs - ((new_num_vs_gprs + new_num_es_gprs + new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
> -                       new_num_vs_gprs = num_vs_gprs;
> -                       new_num_gs_gprs = num_gs_gprs;
> -                       new_num_es_gprs = num_es_gprs;
> -               } else {
> -                       new_num_ps_gprs = def_num_ps_gprs;
> -                       new_num_vs_gprs = def_num_vs_gprs;
> -                       new_num_es_gprs = def_num_es_gprs;
> -                       new_num_gs_gprs = def_num_gs_gprs;
> -               }
> -       } else {
> +       if (!need_recalc)
>                 return true;
> +
> +       /* try to use switch back to default */
> +       if (!use_default) {
> +               /* always privilege vs stage so that at worst we have the
> +                * pixel stage producing wrong output (not the vertex
> +                * stage) */
> +               new_gprs[R600_HW_STAGE_PS] = max_gprs - def_num_clause_temp_gprs * 2;
> +               for (i = R600_HW_STAGE_VS; i < R600_NUM_HW_STAGES; i++)
> +                       new_gprs[R600_HW_STAGE_PS] -= new_gprs[i];
> +       } else {
> +               for (i = 0; i < 4; i++)

Maybe it would be better to use a define instead of 4, i.e "i <
R600_NUM_HW_STAGES" (like in the loop above) ?

> +                       new_gprs[i] = def_gprs[i];
>         }
>
>         /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <=
> @@ -2103,21 +2108,22 @@ bool r600_adjust_gprs(struct r600_context *rctx)
>          * it will lockup. So in this case just discard the draw command
>          * and don't change the current gprs repartitions.
>          */
> -       if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs > new_num_vs_gprs ||
> -           num_gs_gprs > new_num_gs_gprs || num_es_gprs > new_num_es_gprs) {
> -               R600_ERR("shaders require too many register (%d + %d + %d + %d) "
> -                        "for a combined maximum of %d\n",
> -                        num_ps_gprs, num_vs_gprs, num_es_gprs, num_gs_gprs, max_gprs);
> -               return false;
> +       for (i = 0; i < R600_NUM_HW_STAGES; i++) {
> +               if (num_gprs[i] > new_gprs[i]) {
> +                       R600_ERR("shaders require too many register (%d + %d + %d + %d) "
> +                                "for a combined maximum of %d\n",
> +                                num_gprs[R600_HW_STAGE_PS], num_gprs[R600_HW_STAGE_VS], num_gprs[R600_HW_STAGE_ES], num_gprs[R600_HW_STAGE_GS], max_gprs);
> +                       return false;
> +               }
>         }
>
>         /* in some case we endup recomputing the current value */
> -       tmp = S_008C04_NUM_PS_GPRS(new_num_ps_gprs) |
> -               S_008C04_NUM_VS_GPRS(new_num_vs_gprs) |
> +       tmp = S_008C04_NUM_PS_GPRS(new_gprs[R600_HW_STAGE_PS]) |
> +               S_008C04_NUM_VS_GPRS(new_gprs[R600_HW_STAGE_VS]) |
>                 S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs);
>
> -       tmp2 = S_008C08_NUM_ES_GPRS(new_num_es_gprs) |
> -               S_008C08_NUM_GS_GPRS(new_num_gs_gprs);
> +       tmp2 = S_008C08_NUM_ES_GPRS(new_gprs[R600_HW_STAGE_ES]) |
> +               S_008C08_NUM_GS_GPRS(new_gprs[R600_HW_STAGE_GS]);
>         if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp || rctx->config_state.sq_gpr_resource_mgmt_2 != tmp2) {
>                 rctx->config_state.sq_gpr_resource_mgmt_1 = tmp;
>                 rctx->config_state.sq_gpr_resource_mgmt_2 = tmp2;
> @@ -2285,8 +2291,11 @@ void r600_init_atom_start_cs(struct r600_context *rctx)
>                 break;
>         }
>
> -       rctx->default_ps_gprs = num_ps_gprs;
> -       rctx->default_vs_gprs = num_vs_gprs;
> +       rctx->default_gprs[R600_HW_STAGE_PS] = num_ps_gprs;
> +       rctx->default_gprs[R600_HW_STAGE_VS] = num_vs_gprs;
> +       rctx->default_gprs[R600_HW_STAGE_GS] = 0;
> +       rctx->default_gprs[R600_HW_STAGE_ES] = 0;
> +
>         rctx->r6xx_num_clause_temp_gprs = num_temp_gprs;
>
>         /* SQ_CONFIG */
> --
> 2.5.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

With the above fixed, this patch is:

Reviewed-by: Oded Gabbay <oded.gabbay at gmail.com>


More information about the mesa-dev mailing list