[Mesa-dev] [PATCH 1/3] r600g: avoid unnecessary shader exports v2

Andreas Boll andreas.boll.dev at gmail.com
Tue Jun 26 11:03:45 PDT 2012


2012/6/26  <j.glisse at gmail.com>:
> From: Vadim Girlin <vadimgirlin at gmail.com>
>
> In some cases TGSI shader has more color outputs than the number of CBs,
> so it seems we need to limit the number of color exports. This requires
> different shader variants depending on the nr_cbufs, but on the other hand
> we are doing less exports, which are very costly.
>
> v2: fix various piglit regressions
>
> Signed-off-by: Vadim Girlin <vadimgirlin at gmail.com>
> Signed-off-by: Jerome Glisse <jglisse at redhat.com>

Thanks Vadim and Jerome!

No piglit regressions on my rv770

Improves results in fill demo by 50%

before:

Simple fill: 5.0 billion pixels/second
Blended fill: 4.9 billion pixels/second
Textured fill: 5.0 billion pixels/second
Shader1 fill: 4.9 billion pixels/second
Shader2 fill: 5.0 billion pixels/second

after:

Simple fill: 7.5 billion pixels/second
Blended fill: 7.4 billion pixels/second
Textured fill: 7.5 billion pixels/second
Shader1 fill: 7.5 billion pixels/second
Shader2 fill: 5.0 billion pixels/second

For the series:

Tested-by: Andreas Boll <andreas.boll.dev at gmail.com>

> ---
>  src/gallium/drivers/r600/evergreen_state.c   |   10 +++-------
>  src/gallium/drivers/r600/r600_shader.c       |   25 ++++++++++++++++++++++---
>  src/gallium/drivers/r600/r600_shader.h       |    7 ++++++-
>  src/gallium/drivers/r600/r600_state.c        |    2 ++
>  src/gallium/drivers/r600/r600_state_common.c |    4 ++--
>  5 files changed, 35 insertions(+), 13 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
> index b618ca8..3fe95e1 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -2641,18 +2641,14 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
>                db_shader_control |= S_02880C_KILL_ENABLE(1);
>
>        exports_ps = 0;
> -       num_cout = 0;
>        for (i = 0; i < rshader->noutput; i++) {
>                if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
>                    rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
>                        exports_ps |= 1;
> -               else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
> -                       if (rshader->fs_write_all)
> -                               num_cout = rshader->nr_cbufs;
> -                       else
> -                               num_cout++;
> -               }
>        }
> +
> +       num_cout = rshader->nr_ps_color_exports;
> +
>        exports_ps |= S_02884C_EXPORT_COLORS(num_cout);
>        if (!exports_ps) {
>                /* always at least export 1 component per pixel */
> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
> index d294084..37914eb 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -801,6 +801,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
>                                ctx->cv_output = i;
>                                break;
>                        }
> +               } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
> +                       switch (d->Semantic.Name) {
> +                       case TGSI_SEMANTIC_COLOR:
> +                               ctx->shader->nr_ps_max_color_exports++;
> +                               break;
> +                       }
>                }
>                break;
>        case TGSI_FILE_CONSTANT:
> @@ -1153,8 +1159,10 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
>        ctx.colors_used = 0;
>        ctx.clip_vertex_write = 0;
>
> +       shader->nr_ps_color_exports = 0;
> +       shader->nr_ps_max_color_exports = 0;
> +
>        shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->two_side;
> -       shader->nr_cbufs = rctx->nr_cbufs;
>
>        /* register allocations */
>        /* Values [0,127] correspond to GPR[0..127].
> @@ -1289,6 +1297,9 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
>                }
>        }
>
> +       if (shader->fs_write_all && rctx->chip_class >= EVERGREEN)
> +               shader->nr_ps_max_color_exports = 8;
> +
>        if (ctx.fragcoord_input >= 0) {
>                if (ctx.bc->chip_class == CAYMAN) {
>                        for (j = 0 ; j < 4; j++) {
> @@ -1528,10 +1539,17 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
>                        break;
>                case TGSI_PROCESSOR_FRAGMENT:
>                        if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
> +                               /* never export more colors than the number of CBs */
> +                               if (next_pixel_base && next_pixel_base >= (rctx->nr_cbufs + rctx->dual_src_blend * 1)) {
> +                                       /* skip export */
> +                                       j--;
> +                                       continue;
> +                               }
>                                output[j].array_base = next_pixel_base++;
>                                output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
> +                               shader->nr_ps_color_exports++;
>                                if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
> -                                       for (k = 1; k < shader->nr_cbufs; k++) {
> +                                       for (k = 1; k < rctx->nr_cbufs; k++) {
>                                                j++;
>                                                memset(&output[j], 0, sizeof(struct r600_bytecode_output));
>                                                output[j].gpr = shader->output[i].gpr;
> @@ -1545,6 +1563,7 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
>                                                output[j].array_base = next_pixel_base++;
>                                                output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
>                                                output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
> +                                               shader->nr_ps_color_exports++;
>                                        }
>                                }
>                        } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
> @@ -1595,7 +1614,7 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
>        }
>
>        /* add fake pixel export */
> -       if (ctx.type == TGSI_PROCESSOR_FRAGMENT && j == 0) {
> +       if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) {
>                memset(&output[j], 0, sizeof(struct r600_bytecode_output));
>                output[j].gpr = 0;
>                output[j].elem_size = 3;
> diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
> index 2d35e77..eb0bbf6 100644
> --- a/src/gallium/drivers/r600/r600_shader.h
> +++ b/src/gallium/drivers/r600/r600_shader.h
> @@ -49,7 +49,12 @@ struct r600_shader {
>        boolean                 fs_write_all;
>        boolean                 vs_prohibit_ucps;
>        boolean                 two_side;
> -       unsigned                nr_cbufs;
> +       /* Number of color outputs in the TGSI shader,
> +        * sometimes it could be higher than nr_cbufs (bug?).
> +        * Also with writes_all property on eg+ it will be set to max CB number */
> +       unsigned                nr_ps_max_color_exports;
> +       /* Real number of ps color exports compiled in the bytecode */
> +       unsigned                nr_ps_color_exports;
>        /* bit n is set if the shader writes gl_ClipDistance[n] */
>        unsigned                clip_dist_write;
>        /* flag is set if the shader writes VS_OUT_MISC_VEC (e.g. for PSIZE) */
> diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
> index fc75781..b314edc 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -1640,6 +1640,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
>
>        /* build states */
>        rctx->have_depth_fb = 0;
> +       rctx->nr_cbufs = state->nr_cbufs;
> +
>        for (int i = 0; i < state->nr_cbufs; i++) {
>                r600_cb(rctx, rstate, state, i);
>        }
> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
> index 00e1bd0..7755c9e 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -690,8 +690,8 @@ static void r600_update_derived_state(struct r600_context *rctx)
>        }
>
>        if ((rctx->ps_shader->shader.two_side != rctx->two_side) ||
> -           ((rctx->chip_class >= EVERGREEN) && rctx->ps_shader->shader.fs_write_all &&
> -            (rctx->ps_shader->shader.nr_cbufs != rctx->nr_cbufs))) {
> +            (MIN2(rctx->ps_shader->shader.nr_ps_max_color_exports, rctx->nr_cbufs + rctx->dual_src_blend)
> +             != rctx->ps_shader->shader.nr_ps_color_exports)) {
>                r600_shader_rebuild(&rctx->context, rctx->ps_shader);
>        }
>
> --
> 1.7.10.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list