[Mesa-dev] [PATCH 1/3] r600g: avoid unnecessary shader exports v2
Andreas Boll
andreas.boll.dev at gmail.com
Tue Jun 26 11:03:45 PDT 2012
2012/6/26 <j.glisse at gmail.com>:
> From: Vadim Girlin <vadimgirlin at gmail.com>
>
> In some cases TGSI shader has more color outputs than the number of CBs,
> so it seems we need to limit the number of color exports. This requires
> different shader variants depending on the nr_cbufs, but on the other hand
> we are doing less exports, which are very costly.
>
> v2: fix various piglit regressions
>
> Signed-off-by: Vadim Girlin <vadimgirlin at gmail.com>
> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
Thanks Vadim and Jerome!
No piglit regressions on my rv770
Improves results in fill demo by 50%
before:
Simple fill: 5.0 billion pixels/second
Blended fill: 4.9 billion pixels/second
Textured fill: 5.0 billion pixels/second
Shader1 fill: 4.9 billion pixels/second
Shader2 fill: 5.0 billion pixels/second
after:
Simple fill: 7.5 billion pixels/second
Blended fill: 7.4 billion pixels/second
Textured fill: 7.5 billion pixels/second
Shader1 fill: 7.5 billion pixels/second
Shader2 fill: 5.0 billion pixels/second
For the series:
Tested-by: Andreas Boll <andreas.boll.dev at gmail.com>
> ---
> src/gallium/drivers/r600/evergreen_state.c | 10 +++-------
> src/gallium/drivers/r600/r600_shader.c | 25 ++++++++++++++++++++++---
> src/gallium/drivers/r600/r600_shader.h | 7 ++++++-
> src/gallium/drivers/r600/r600_state.c | 2 ++
> src/gallium/drivers/r600/r600_state_common.c | 4 ++--
> 5 files changed, 35 insertions(+), 13 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
> index b618ca8..3fe95e1 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -2641,18 +2641,14 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
> db_shader_control |= S_02880C_KILL_ENABLE(1);
>
> exports_ps = 0;
> - num_cout = 0;
> for (i = 0; i < rshader->noutput; i++) {
> if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
> rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
> exports_ps |= 1;
> - else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
> - if (rshader->fs_write_all)
> - num_cout = rshader->nr_cbufs;
> - else
> - num_cout++;
> - }
> }
> +
> + num_cout = rshader->nr_ps_color_exports;
> +
> exports_ps |= S_02884C_EXPORT_COLORS(num_cout);
> if (!exports_ps) {
> /* always at least export 1 component per pixel */
> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
> index d294084..37914eb 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -801,6 +801,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
> ctx->cv_output = i;
> break;
> }
> + } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
> + switch (d->Semantic.Name) {
> + case TGSI_SEMANTIC_COLOR:
> + ctx->shader->nr_ps_max_color_exports++;
> + break;
> + }
> }
> break;
> case TGSI_FILE_CONSTANT:
> @@ -1153,8 +1159,10 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
> ctx.colors_used = 0;
> ctx.clip_vertex_write = 0;
>
> + shader->nr_ps_color_exports = 0;
> + shader->nr_ps_max_color_exports = 0;
> +
> shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->two_side;
> - shader->nr_cbufs = rctx->nr_cbufs;
>
> /* register allocations */
> /* Values [0,127] correspond to GPR[0..127].
> @@ -1289,6 +1297,9 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
> }
> }
>
> + if (shader->fs_write_all && rctx->chip_class >= EVERGREEN)
> + shader->nr_ps_max_color_exports = 8;
> +
> if (ctx.fragcoord_input >= 0) {
> if (ctx.bc->chip_class == CAYMAN) {
> for (j = 0 ; j < 4; j++) {
> @@ -1528,10 +1539,17 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
> break;
> case TGSI_PROCESSOR_FRAGMENT:
> if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
> + /* never export more colors than the number of CBs */
> + if (next_pixel_base && next_pixel_base >= (rctx->nr_cbufs + rctx->dual_src_blend * 1)) {
> + /* skip export */
> + j--;
> + continue;
> + }
> output[j].array_base = next_pixel_base++;
> output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
> + shader->nr_ps_color_exports++;
> if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
> - for (k = 1; k < shader->nr_cbufs; k++) {
> + for (k = 1; k < rctx->nr_cbufs; k++) {
> j++;
> memset(&output[j], 0, sizeof(struct r600_bytecode_output));
> output[j].gpr = shader->output[i].gpr;
> @@ -1545,6 +1563,7 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
> output[j].array_base = next_pixel_base++;
> output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
> output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
> + shader->nr_ps_color_exports++;
> }
> }
> } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
> @@ -1595,7 +1614,7 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
> }
>
> /* add fake pixel export */
> - if (ctx.type == TGSI_PROCESSOR_FRAGMENT && j == 0) {
> + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) {
> memset(&output[j], 0, sizeof(struct r600_bytecode_output));
> output[j].gpr = 0;
> output[j].elem_size = 3;
> diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
> index 2d35e77..eb0bbf6 100644
> --- a/src/gallium/drivers/r600/r600_shader.h
> +++ b/src/gallium/drivers/r600/r600_shader.h
> @@ -49,7 +49,12 @@ struct r600_shader {
> boolean fs_write_all;
> boolean vs_prohibit_ucps;
> boolean two_side;
> - unsigned nr_cbufs;
> + /* Number of color outputs in the TGSI shader,
> + * sometimes it could be higher than nr_cbufs (bug?).
> + * Also with writes_all property on eg+ it will be set to max CB number */
> + unsigned nr_ps_max_color_exports;
> + /* Real number of ps color exports compiled in the bytecode */
> + unsigned nr_ps_color_exports;
> /* bit n is set if the shader writes gl_ClipDistance[n] */
> unsigned clip_dist_write;
> /* flag is set if the shader writes VS_OUT_MISC_VEC (e.g. for PSIZE) */
> diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
> index fc75781..b314edc 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -1640,6 +1640,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
>
> /* build states */
> rctx->have_depth_fb = 0;
> + rctx->nr_cbufs = state->nr_cbufs;
> +
> for (int i = 0; i < state->nr_cbufs; i++) {
> r600_cb(rctx, rstate, state, i);
> }
> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
> index 00e1bd0..7755c9e 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -690,8 +690,8 @@ static void r600_update_derived_state(struct r600_context *rctx)
> }
>
> if ((rctx->ps_shader->shader.two_side != rctx->two_side) ||
> - ((rctx->chip_class >= EVERGREEN) && rctx->ps_shader->shader.fs_write_all &&
> - (rctx->ps_shader->shader.nr_cbufs != rctx->nr_cbufs))) {
> + (MIN2(rctx->ps_shader->shader.nr_ps_max_color_exports, rctx->nr_cbufs + rctx->dual_src_blend)
> + != rctx->ps_shader->shader.nr_ps_color_exports)) {
> r600_shader_rebuild(&rctx->context, rctx->ps_shader);
> }
>
> --
> 1.7.10.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list