[Mesa-dev] [PATCH 2/2] i965: Implement color clears using a simple shader in blorp.

Kenneth Graunke kenneth at whitecape.org
Sun Apr 28 00:55:51 PDT 2013


On 04/18/2013 02:25 PM, Eric Anholt wrote:
> The upside is less CPU overhead in fiddling with GL error handling, the
> ability to use the constant color write message in most cases, and no GLSL
> clear shaders appearing in MESA_GLSL=dump output.  The downside is more
> batch flushing and a total recompute of GL state at the end of blorp.
> However, if we're ever going to use the fast color clear feature of CMS
> surfaces, we'll need this anyway since it requires very special state
> setup.
>
> This increases the fail rate of some the GLES3conform ARB_sync tests,
> because of the initial flush at the start of blorp.  The tests already
> intermittently failed (because it's just a bad testing procedure), and we
> can return it to its previous fail rate by fixing the initial flush.
>
> Improves GLB2.7 performance 0.37% +/- 0.11% (n=71/70, outlier removed).
>
> v2: Rename the key member, use the core helper for sRGB, and use
>      BRW_MASK_* enums, fix comment and indentation.
> ---
>   src/mesa/drivers/dri/i965/Makefile.sources    |    1 +
>   src/mesa/drivers/dri/i965/brw_blorp.cpp       |    4 +
>   src/mesa/drivers/dri/i965/brw_blorp.h         |    4 +
>   src/mesa/drivers/dri/i965/brw_blorp_clear.cpp |  305 +++++++++++++++++++++++++
>   src/mesa/drivers/dri/i965/brw_clear.c         |   11 +
>   src/mesa/drivers/dri/i965/brw_context.h       |    1 +
>   src/mesa/drivers/dri/i965/gen6_blorp.cpp      |   21 +-
>   src/mesa/drivers/dri/i965/gen7_blorp.cpp      |   12 +-
>   8 files changed, 345 insertions(+), 14 deletions(-)
>   create mode 100644 src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
>
> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
> index be8d630..049ae91 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.sources
> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> @@ -31,6 +31,7 @@ i965_FILES = \
>   	intel_tex_validate.c \
>   	brw_blorp.cpp \
>   	brw_blorp_blit.cpp \
> +	brw_blorp_clear.cpp \
>   	brw_cc.c \
>   	brw_cfg.cpp \
>   	brw_clear.c \
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
> index 8a044c1..a2d02bf 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
> @@ -148,6 +148,10 @@ brw_blorp_params::brw_blorp_params()
>        num_samples(0),
>        use_wm_prog(false)
>   {
> +   color_write_disable[0] = false;
> +   color_write_disable[1] = false;
> +   color_write_disable[2] = false;
> +   color_write_disable[3] = false;
>   }
>
>   extern "C" {
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
> index 1b95818..8915080 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.h
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
> @@ -45,6 +45,9 @@ brw_blorp_blit_miptrees(struct intel_context *intel,
>                           int dst_x1, int dst_y1,
>                           bool mirror_x, bool mirror_y);
>
> +bool
> +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb);
> +
>   #ifdef __cplusplus
>   } /* end extern "C" */
>
> @@ -211,6 +214,7 @@ public:
>      unsigned num_samples;
>      bool use_wm_prog;
>      brw_blorp_wm_push_constants wm_push_consts;
> +   bool color_write_disable[4];
>   };
>
>
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
> new file mode 100644
> index 0000000..f778d57
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
> @@ -0,0 +1,305 @@
> +/*
> + * Copyright © 2013 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +extern "C" {
> +#include "main/teximage.h"
> +#include "main/blend.h"
> +#include "main/fbobject.h"
> +#include "main/renderbuffer.h"
> +}
> +
> +#include "glsl/ralloc.h"
> +
> +#include "intel_fbo.h"
> +
> +#include "brw_blorp.h"
> +#include "brw_context.h"
> +#include "brw_eu.h"
> +#include "brw_state.h"
> +
> +struct brw_blorp_clear_prog_key
> +{
> +   bool use_simd16_replicated_data;
> +   bool pad[3];
> +};
> +
> +class brw_blorp_clear_params : public brw_blorp_params
> +{
> +public:
> +   brw_blorp_clear_params(struct brw_context *brw,
> +                          struct gl_framebuffer *fb,
> +                          struct gl_renderbuffer *rb,
> +                          GLubyte *color_mask);
> +
> +   virtual uint32_t get_wm_prog(struct brw_context *brw,
> +                                brw_blorp_prog_data **prog_data) const;
> +
> +private:
> +   brw_blorp_clear_prog_key wm_prog_key;
> +};
> +
> +class brw_blorp_clear_program
> +{
> +public:
> +   brw_blorp_clear_program(struct brw_context *brw,
> +                          const brw_blorp_clear_prog_key *key);
> +   ~brw_blorp_clear_program();
> +
> +   const GLuint *compile(struct brw_context *brw, GLuint *program_size);
> +
> +   brw_blorp_prog_data prog_data;
> +
> +private:
> +   void alloc_regs();
> +
> +   void *mem_ctx;
> +   struct brw_context *brw;
> +   const brw_blorp_clear_prog_key *key;
> +   struct brw_compile func;
> +
> +   /* Thread dispatch header */
> +   struct brw_reg R0;
> +
> +   /* Pixel X/Y coordinates (always in R1). */
> +   struct brw_reg R1;
> +
> +   /* Register with push constants (a single vec4) */
> +   struct brw_reg clear_rgba;
> +
> +   /* MRF used for render target writes */
> +   GLuint base_mrf;
> +};
> +
> +brw_blorp_clear_program::brw_blorp_clear_program(
> +      struct brw_context *brw,
> +      const brw_blorp_clear_prog_key *key)
> +   : mem_ctx(ralloc_context(NULL)),
> +     brw(brw),
> +     key(key)
> +{
> +   brw_init_compile(brw, &func, mem_ctx);
> +}
> +
> +brw_blorp_clear_program::~brw_blorp_clear_program()
> +{
> +   ralloc_free(mem_ctx);
> +}
> +
> +brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
> +                                               struct gl_framebuffer *fb,
> +                                               struct gl_renderbuffer *rb,
> +                                               GLubyte *color_mask)
> +{
> +   struct intel_context *intel = &brw->intel;
> +   struct gl_context *ctx = &intel->ctx;
> +   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
> +
> +   dst.set(brw, irb->mt, irb->mt_level, irb->mt_layer);
> +
> +   /* Override the surface format according to the context's sRGB rules. */
> +   gl_format format = _mesa_get_render_format(ctx, irb->mt->format);
> +   dst.brw_surfaceformat = brw->render_target_format[format];
> +
> +   x0 = fb->_Xmin;
> +   x1 = fb->_Xmax;
> +   if (rb->Name != 0) {
> +      y0 = fb->_Ymin;
> +      y1 = fb->_Ymax;
> +   } else {
> +      y0 = rb->Height - fb->_Ymax;
> +      y1 = rb->Height - fb->_Ymin;
> +   }

I've seen this code in several places; we should probably refactor it at 
some point.

> +
> +   float *push_consts = (float *)&wm_push_consts;
> +
> +   push_consts[0] = ctx->Color.ClearColor.f[0];
> +   push_consts[1] = ctx->Color.ClearColor.f[1];
> +   push_consts[2] = ctx->Color.ClearColor.f[2];
> +   push_consts[3] = ctx->Color.ClearColor.f[3];
> +
> +   use_wm_prog = true;
> +
> +   memset(&wm_prog_key, 0, sizeof(wm_prog_key));
> +
> +   wm_prog_key.use_simd16_replicated_data = true;
> +
> +   /* From the SNB PRM (Vol4_Part1):
> +    *
> +    *     "Replicated data (Message Type = 111) is only supported when
> +    *      accessing tiled memory.  Using this Message Type to access linear
> +    *      (untiled) memory is UNDEFINED."
> +    */
> +   if (irb->mt->region->tiling == I915_TILING_NONE)
> +      wm_prog_key.use_simd16_replicated_data = false;
> +
> +   /* Constant color writes ignore everyting in blend and color calculator
> +    * state.  This is not documented.
> +    */
> +   for (int i = 0; i < 4; i++) {
> +      if (!color_mask[i]) {
> +         color_write_disable[i] = true;
> +         wm_prog_key.use_simd16_replicated_data = false;
> +      }
> +   }
> +}
> +
> +uint32_t
> +brw_blorp_clear_params::get_wm_prog(struct brw_context *brw,
> +                                   brw_blorp_prog_data **prog_data) const
> +{
> +   uint32_t prog_offset;
> +   if (!brw_search_cache(&brw->cache, BRW_BLORP_CLEAR_PROG,
> +                         &this->wm_prog_key, sizeof(this->wm_prog_key),
> +                         &prog_offset, prog_data)) {
> +      brw_blorp_clear_program prog(brw, &this->wm_prog_key);
> +      GLuint program_size;
> +      const GLuint *program = prog.compile(brw, &program_size);
> +      brw_upload_cache(&brw->cache, BRW_BLORP_CLEAR_PROG,
> +                       &this->wm_prog_key, sizeof(this->wm_prog_key),
> +                       program, program_size,
> +                       &prog.prog_data, sizeof(prog.prog_data),
> +                       &prog_offset, prog_data);
> +   }
> +   return prog_offset;
> +}
> +
> +void
> +brw_blorp_clear_program::alloc_regs()
> +{
> +   int reg = 0;
> +   this->R0 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
> +   this->R1 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
> +
> +   prog_data.first_curbe_grf = reg;
> +   clear_rgba = retype(brw_vec4_grf(reg++, 0), BRW_REGISTER_TYPE_F);
> +   reg += BRW_BLORP_NUM_PUSH_CONST_REGS;
> +
> +   /* Make sure we didn't run out of registers */
> +   assert(reg <= GEN7_MRF_HACK_START);
> +
> +   int mrf = 2;
> +   this->base_mrf = mrf;

Temporary seems kind of superfluous here, but not a big deal.

> +}
> +
> +const GLuint *
> +brw_blorp_clear_program::compile(struct brw_context *brw,
> +                                 GLuint *program_size)
> +{
> +   /* Set up prog_data */
> +   memset(&prog_data, 0, sizeof(prog_data));
> +   prog_data.persample_msaa_dispatch = false;
> +
> +   alloc_regs();
> +
> +   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
> +
> +   struct brw_reg mrf_rt_write =
> +      retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F);
> +
> +   uint32_t mlen, msg_type;
> +   if (key->use_simd16_replicated_data) {
> +      /* The message payload is a single register with the low 4 floats/ints
> +       * filled with the constant clear color.
> +       */
> +      brw_set_mask_control(&func, BRW_MASK_DISABLE);
> +      brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba);
> +      brw_set_mask_control(&func, BRW_MASK_ENABLE);
> +
> +      msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
> +      mlen = 1;
> +   } else {
> +      for (int i = 0; i < 4; i++) {
> +         /* The message payload is pairs of registers for 16 pixels each of r,
> +          * g, b, and a.
> +          */
> +         brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
> +         brw_MOV(&func,
> +                 brw_message_reg(base_mrf + i * 2),
> +                 brw_vec1_grf(clear_rgba.nr, i));
> +         brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
> +      }
> +
> +      msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
> +      mlen = 8;
> +   }
> +
> +   /* Now write to the render target and terminate the thread */
> +   brw_fb_WRITE(&func,
> +                16 /* dispatch_width */,
> +                base_mrf /* msg_reg_nr */,
> +                mrf_rt_write /* src0 */,
> +                msg_type,
> +                BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
> +                mlen,
> +                0 /* response_length */,
> +                true /* eot */,
> +                false /* header present */);
> +
> +   if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) {
> +      printf("Native code for BLORP clear:\n");
> +      brw_dump_compile(&func, stdout, 0, func.next_insn_offset);
> +      printf("\n");
> +   }
> +   return brw_get_program(&func, program_size);
> +}
> +
> +extern "C" {
> +bool
> +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
> +{
> +   struct gl_context *ctx = &intel->ctx;
> +   struct brw_context *brw = brw_context(ctx);
> +
> +   /* The constant color clear code doesn't work for multisampled surfaces, so
> +    * we need to support falling back to other clear mechanisms.
> +    * Unfortunately, our clear code is based on a bitmask that doesn't
> +    * distinguish individual color attachments, so we walk the attachments to
> +    * see if any require fallback, and fall back for all if any of them need
> +    * to.
> +    */

Ugh, yeah, we should go back and fix that.  In the meantime, your 
approach is reasonable.

> +   for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
> +      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
> +      struct intel_renderbuffer *irb = intel_renderbuffer(rb);
> +
> +      if (irb && irb->mt->msaa_layout != INTEL_MSAA_LAYOUT_NONE)
> +         return false;
> +   }
> +
> +   for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
> +      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
> +
> +      /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
> +       * the framebuffer can be complete with some attachments missing.  In
> +       * this case the _ColorDrawBuffers pointer will be NULL.
> +       */
> +      if (rb == NULL)
> +         continue;
> +
> +      brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf]);
> +      brw_blorp_exec(intel, &params);
> +   }
> +
> +   return true;
> +}
> +
> +} /* extern "C" */
> diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
> index c0ac69d..ad6a554 100644
> --- a/src/mesa/drivers/dri/i965/brw_clear.c
> +++ b/src/mesa/drivers/dri/i965/brw_clear.c
> @@ -41,6 +41,7 @@
>   #include "intel_regions.h"
>
>   #include "brw_context.h"
> +#include "brw_blorp.h"
>
>   #define FILE_DEBUG_FLAG DEBUG_BLIT
>
> @@ -243,6 +244,16 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
>         }
>      }
>
> +   /* We only have support for blorp as of gen6 so far. */

I think this would be easier to read as:

    /* BLORP is currently only supported on Gen6+. */

but it's obviously up to you.

> +   if (intel->gen >= 6) {
> +      if (mask & BUFFER_BITS_COLOR) {
> +         if (brw_blorp_clear_color(intel, fb)) {
> +            debug_mask("blorp color", mask & BUFFER_BITS_COLOR);
> +            mask &= ~BUFFER_BITS_COLOR;
> +         }
> +      }
> +   }
> +
>      GLbitfield tri_mask = mask & (BUFFER_BITS_COLOR |
>   				 BUFFER_BIT_STENCIL |
>   				 BUFFER_BIT_DEPTH);
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 114c369..7ec448c 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -605,6 +605,7 @@ enum brw_cache_id {
>      BRW_CC_UNIT,
>      BRW_WM_PROG,
>      BRW_BLORP_BLIT_PROG,
> +   BRW_BLORP_CLEAR_PROG,
>      BRW_SAMPLER,
>      BRW_WM_UNIT,
>      BRW_SF_PROG,
> diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
> index 872c408..3e7467e 100644
> --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
> +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
> @@ -278,17 +278,18 @@ gen6_blorp_emit_blend_state(struct brw_context *brw,
>      blend->blend1.post_blend_clamp_enable = 1;
>      blend->blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT;
>
> -   blend->blend1.write_disable_r = false;
> -   blend->blend1.write_disable_g = false;
> -   blend->blend1.write_disable_b = false;
> -   blend->blend1.write_disable_a = false;
> +   blend->blend1.write_disable_r = params->color_write_disable[0];
> +   blend->blend1.write_disable_g = params->color_write_disable[1];
> +   blend->blend1.write_disable_b = params->color_write_disable[2];
> +   blend->blend1.write_disable_a = params->color_write_disable[3];
>
>      /* When blitting from an XRGB source to a ARGB destination, we need to
>       * interpret the missing channel as 1.0.  Blending can do that for us:
>       * we simply use the RGB values from the fragment shader ("source RGB"),
>       * but smash the alpha channel to 1.
>       */
> -   if (_mesa_get_format_bits(params->dst.mt->format, GL_ALPHA_BITS) > 0 &&
> +   if (params->src.mt &&
> +       _mesa_get_format_bits(params->dst.mt->format, GL_ALPHA_BITS) > 0 &&
>          _mesa_get_format_bits(params->src.mt->format, GL_ALPHA_BITS) == 0) {
>         blend->blend0.blend_enable = 1;
>         blend->blend0.ia_blend_enable = 1;
> @@ -1058,16 +1059,18 @@ gen6_blorp_exec(struct intel_context *intel,
>                                        depthstencil_offset, cc_state_offset);
>      if (params->use_wm_prog) {
>         uint32_t wm_surf_offset_renderbuffer;
> -      uint32_t wm_surf_offset_texture;
> +      uint32_t wm_surf_offset_texture = 0;
>         uint32_t sampler_offset;
>         wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params);
>         wm_surf_offset_renderbuffer =
>            gen6_blorp_emit_surface_state(brw, params, &params->dst,
>                                          I915_GEM_DOMAIN_RENDER,
>                                          I915_GEM_DOMAIN_RENDER);
> -      wm_surf_offset_texture =
> -         gen6_blorp_emit_surface_state(brw, params, &params->src,
> -                                       I915_GEM_DOMAIN_SAMPLER, 0);
> +      if (params->src.mt) {
> +         wm_surf_offset_texture =
> +            gen6_blorp_emit_surface_state(brw, params, &params->src,
> +                                          I915_GEM_DOMAIN_SAMPLER, 0);
> +      }
>         wm_bind_bo_offset =
>            gen6_blorp_emit_binding_table(brw, params,
>                                          wm_surf_offset_renderbuffer,
> diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> index 99e7e58..1c23866 100644
> --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> @@ -765,17 +765,19 @@ gen7_blorp_exec(struct intel_context *intel,
>                                                   depthstencil_offset);
>      if (params->use_wm_prog) {
>         uint32_t wm_surf_offset_renderbuffer;
> -      uint32_t wm_surf_offset_texture;
> +      uint32_t wm_surf_offset_texture = 0;
>         wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params);
>         wm_surf_offset_renderbuffer =
>            gen7_blorp_emit_surface_state(brw, params, &params->dst,
>                                          I915_GEM_DOMAIN_RENDER,
>                                          I915_GEM_DOMAIN_RENDER,
>                                          true /* is_render_target */);
> -      wm_surf_offset_texture =
> -         gen7_blorp_emit_surface_state(brw, params, &params->src,
> -                                       I915_GEM_DOMAIN_SAMPLER, 0,
> -                                       false /* is_render_target */);
> +      if (params->src.mt) {
> +         wm_surf_offset_texture =
> +            gen7_blorp_emit_surface_state(brw, params, &params->src,
> +                                          I915_GEM_DOMAIN_SAMPLER, 0,
> +                                          false /* is_render_target */);
> +      }
>         wm_bind_bo_offset =
>            gen6_blorp_emit_binding_table(brw, params,
>                                          wm_surf_offset_renderbuffer,
>

Nitpicking aside, this looks good.
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>


More information about the mesa-dev mailing list