[Mesa-dev] [PATCH] radeonsi: Optimize out exports to unbound color buffers

Marek Olšák maraeo at gmail.com
Sun Oct 20 05:12:45 CEST 2013


As per the discussion on IRC, this is trying to fix an issue with a
GLSL compiler pass lower_output_reads that always generates 8 output
writes for any shader which writes gl_FragData. I'll fix this in the
GLSL compiler. NAK.

Marek

On Sun, Oct 20, 2013 at 2:33 AM, Jay Cornwall <jay at jcornwall.me> wrote:
> This patch identifies shader exports to unbound CBs and removes them during
> TGSI to LLVM IR lowering. The method is identical to the one used in the
> gallium/r600 driver.
>
> The GLSL lower_output_reads pass generates temporary copies for writes to
> shader outputs. In the case of gl_FragData, this results in writes to every
> MRT when one or more elements are written in the shader. When these MRTs
> are unbound and masked out there is still a performance loss equivalent to
> exports to bound, unmasked MRTs on SI.
>
> Signed-off-by: Jay Cornwall <jay at jcornwall.me>
> ---
>  src/gallium/drivers/radeonsi/radeonsi_pipe.h   |  1 +
>  src/gallium/drivers/radeonsi/radeonsi_shader.c |  6 ++++++
>  src/gallium/drivers/radeonsi/si_state.c        | 15 +++++++++++++--
>  src/gallium/drivers/radeonsi/si_state.h        |  1 +
>  4 files changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> index 26f7e09..bede043 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> @@ -148,6 +148,7 @@ struct r600_context {
>         unsigned                        fb_compressed_cb_mask;
>         unsigned                        pa_sc_line_stipple;
>         unsigned                        pa_su_sc_mode_cntl;
> +       boolean                         dual_src_blend;
>         /* for saving when using blitter */
>         struct pipe_stencil_ref         stencil_ref;
>         struct si_pipe_shader_selector  *ps_shader;
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
> index 80ee325..e4a9f56 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
> +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
> @@ -995,6 +995,12 @@ handle_semantic:
>                                         semantic_name);
>                         }
>
> +                       /* Shader is keyed on nr_cbufs, optimize out exports to unbound CBs */
> +                       if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT &&
> +                           semantic_name == TGSI_SEMANTIC_COLOR &&
> +                           color_count > si_shader_ctx->shader->key.ps.nr_cbufs)
> +                               continue;
> +
>                         si_llvm_init_export_args(bld_base, d, index, target, args);
>
>                         if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index da7c3d0..8109bde 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -27,6 +27,7 @@
>  #include "util/u_memory.h"
>  #include "util/u_framebuffer.h"
>  #include "util/u_blitter.h"
> +#include "util/u_dual_blend.h"
>  #include "util/u_helpers.h"
>  #include "util/u_math.h"
>  #include "util/u_pack_color.h"
> @@ -168,6 +169,8 @@ static void si_update_fb_blend_state(struct r600_context *rctx)
>         si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
>
>         si_pm4_set_state(rctx, fb_blend, pm4);
> +
> +       rctx->dual_src_blend = blend->dual_src_blend;
>  }
>
>  /*
> @@ -309,6 +312,9 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
>                 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
>         }
>
> +       /* only MRT0 has dual src blend */
> +       blend->dual_src_blend = util_blend_state_is_dual(state, 0);
> +
>         return blend;
>  }
>
> @@ -2097,8 +2103,13 @@ static INLINE void si_shader_selector_key(struct pipe_context *ctx,
>                 if (rctx->queued.named.rasterizer->clip_plane_enable & 0xf)
>                         key->vs.ucps_enabled |= 0x1;
>         } else if (sel->type == PIPE_SHADER_FRAGMENT) {
> -               if (sel->fs_write_all)
> -                       key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs;
> +               /* Key on nr_cbufs to optimize unused EXPORTs. */
> +               key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs;
> +
> +               /* Dual-source blending only makes sense with nr_cbufs == 1. */
> +               if (key->ps.nr_cbufs == 1 && rctx->dual_src_blend)
> +                       key->ps.nr_cbufs = 2;
> +
>                 key->ps.export_16bpc = rctx->export_16bpc;
>
>                 if (rctx->queued.named.rasterizer) {
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index 6dbf880..ca51496 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -34,6 +34,7 @@ struct si_state_blend {
>         struct si_pm4_state     pm4;
>         uint32_t                cb_target_mask;
>         bool                    alpha_to_one;
> +       bool                    dual_src_blend;
>  };
>
>  struct si_state_viewport {
> --
> 1.8.4.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list