[Mesa-dev] [PATCH 2/2] i965: Implement color clears using a simple shader in blorp.
Kenneth Graunke
kenneth at whitecape.org
Sun Apr 28 00:55:51 PDT 2013
On 04/18/2013 02:25 PM, Eric Anholt wrote:
> The upside is less CPU overhead in fiddling with GL error handling, the
> ability to use the constant color write message in most cases, and no GLSL
> clear shaders appearing in MESA_GLSL=dump output. The downside is more
> batch flushing and a total recompute of GL state at the end of blorp.
> However, if we're ever going to use the fast color clear feature of CMS
> surfaces, we'll need this anyway since it requires very special state
> setup.
>
> This increases the fail rate of some the GLES3conform ARB_sync tests,
> because of the initial flush at the start of blorp. The tests already
> intermittently failed (because it's just a bad testing procedure), and we
> can return it to its previous fail rate by fixing the initial flush.
>
> Improves GLB2.7 performance 0.37% +/- 0.11% (n=71/70, outlier removed).
>
> v2: Rename the key member, use the core helper for sRGB, and use
> BRW_MASK_* enums, fix comment and indentation.
> ---
> src/mesa/drivers/dri/i965/Makefile.sources | 1 +
> src/mesa/drivers/dri/i965/brw_blorp.cpp | 4 +
> src/mesa/drivers/dri/i965/brw_blorp.h | 4 +
> src/mesa/drivers/dri/i965/brw_blorp_clear.cpp | 305 +++++++++++++++++++++++++
> src/mesa/drivers/dri/i965/brw_clear.c | 11 +
> src/mesa/drivers/dri/i965/brw_context.h | 1 +
> src/mesa/drivers/dri/i965/gen6_blorp.cpp | 21 +-
> src/mesa/drivers/dri/i965/gen7_blorp.cpp | 12 +-
> 8 files changed, 345 insertions(+), 14 deletions(-)
> create mode 100644 src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
>
> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
> index be8d630..049ae91 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.sources
> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> @@ -31,6 +31,7 @@ i965_FILES = \
> intel_tex_validate.c \
> brw_blorp.cpp \
> brw_blorp_blit.cpp \
> + brw_blorp_clear.cpp \
> brw_cc.c \
> brw_cfg.cpp \
> brw_clear.c \
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
> index 8a044c1..a2d02bf 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
> @@ -148,6 +148,10 @@ brw_blorp_params::brw_blorp_params()
> num_samples(0),
> use_wm_prog(false)
> {
> + color_write_disable[0] = false;
> + color_write_disable[1] = false;
> + color_write_disable[2] = false;
> + color_write_disable[3] = false;
> }
>
> extern "C" {
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
> index 1b95818..8915080 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.h
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
> @@ -45,6 +45,9 @@ brw_blorp_blit_miptrees(struct intel_context *intel,
> int dst_x1, int dst_y1,
> bool mirror_x, bool mirror_y);
>
> +bool
> +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb);
> +
> #ifdef __cplusplus
> } /* end extern "C" */
>
> @@ -211,6 +214,7 @@ public:
> unsigned num_samples;
> bool use_wm_prog;
> brw_blorp_wm_push_constants wm_push_consts;
> + bool color_write_disable[4];
> };
>
>
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
> new file mode 100644
> index 0000000..f778d57
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
> @@ -0,0 +1,305 @@
> +/*
> + * Copyright © 2013 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +extern "C" {
> +#include "main/teximage.h"
> +#include "main/blend.h"
> +#include "main/fbobject.h"
> +#include "main/renderbuffer.h"
> +}
> +
> +#include "glsl/ralloc.h"
> +
> +#include "intel_fbo.h"
> +
> +#include "brw_blorp.h"
> +#include "brw_context.h"
> +#include "brw_eu.h"
> +#include "brw_state.h"
> +
> +struct brw_blorp_clear_prog_key
> +{
> + bool use_simd16_replicated_data;
> + bool pad[3];
> +};
> +
> +class brw_blorp_clear_params : public brw_blorp_params
> +{
> +public:
> + brw_blorp_clear_params(struct brw_context *brw,
> + struct gl_framebuffer *fb,
> + struct gl_renderbuffer *rb,
> + GLubyte *color_mask);
> +
> + virtual uint32_t get_wm_prog(struct brw_context *brw,
> + brw_blorp_prog_data **prog_data) const;
> +
> +private:
> + brw_blorp_clear_prog_key wm_prog_key;
> +};
> +
> +class brw_blorp_clear_program
> +{
> +public:
> + brw_blorp_clear_program(struct brw_context *brw,
> + const brw_blorp_clear_prog_key *key);
> + ~brw_blorp_clear_program();
> +
> + const GLuint *compile(struct brw_context *brw, GLuint *program_size);
> +
> + brw_blorp_prog_data prog_data;
> +
> +private:
> + void alloc_regs();
> +
> + void *mem_ctx;
> + struct brw_context *brw;
> + const brw_blorp_clear_prog_key *key;
> + struct brw_compile func;
> +
> + /* Thread dispatch header */
> + struct brw_reg R0;
> +
> + /* Pixel X/Y coordinates (always in R1). */
> + struct brw_reg R1;
> +
> + /* Register with push constants (a single vec4) */
> + struct brw_reg clear_rgba;
> +
> + /* MRF used for render target writes */
> + GLuint base_mrf;
> +};
> +
> +brw_blorp_clear_program::brw_blorp_clear_program(
> + struct brw_context *brw,
> + const brw_blorp_clear_prog_key *key)
> + : mem_ctx(ralloc_context(NULL)),
> + brw(brw),
> + key(key)
> +{
> + brw_init_compile(brw, &func, mem_ctx);
> +}
> +
> +brw_blorp_clear_program::~brw_blorp_clear_program()
> +{
> + ralloc_free(mem_ctx);
> +}
> +
> +brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
> + struct gl_framebuffer *fb,
> + struct gl_renderbuffer *rb,
> + GLubyte *color_mask)
> +{
> + struct intel_context *intel = &brw->intel;
> + struct gl_context *ctx = &intel->ctx;
> + struct intel_renderbuffer *irb = intel_renderbuffer(rb);
> +
> + dst.set(brw, irb->mt, irb->mt_level, irb->mt_layer);
> +
> + /* Override the surface format according to the context's sRGB rules. */
> + gl_format format = _mesa_get_render_format(ctx, irb->mt->format);
> + dst.brw_surfaceformat = brw->render_target_format[format];
> +
> + x0 = fb->_Xmin;
> + x1 = fb->_Xmax;
> + if (rb->Name != 0) {
> + y0 = fb->_Ymin;
> + y1 = fb->_Ymax;
> + } else {
> + y0 = rb->Height - fb->_Ymax;
> + y1 = rb->Height - fb->_Ymin;
> + }
I've seen this code in several places; we should probably refactor it at
some point.
> +
> + float *push_consts = (float *)&wm_push_consts;
> +
> + push_consts[0] = ctx->Color.ClearColor.f[0];
> + push_consts[1] = ctx->Color.ClearColor.f[1];
> + push_consts[2] = ctx->Color.ClearColor.f[2];
> + push_consts[3] = ctx->Color.ClearColor.f[3];
> +
> + use_wm_prog = true;
> +
> + memset(&wm_prog_key, 0, sizeof(wm_prog_key));
> +
> + wm_prog_key.use_simd16_replicated_data = true;
> +
> + /* From the SNB PRM (Vol4_Part1):
> + *
> + * "Replicated data (Message Type = 111) is only supported when
> + * accessing tiled memory. Using this Message Type to access linear
> + * (untiled) memory is UNDEFINED."
> + */
> + if (irb->mt->region->tiling == I915_TILING_NONE)
> + wm_prog_key.use_simd16_replicated_data = false;
> +
> + /* Constant color writes ignore everyting in blend and color calculator
> + * state. This is not documented.
> + */
> + for (int i = 0; i < 4; i++) {
> + if (!color_mask[i]) {
> + color_write_disable[i] = true;
> + wm_prog_key.use_simd16_replicated_data = false;
> + }
> + }
> +}
> +
> +uint32_t
> +brw_blorp_clear_params::get_wm_prog(struct brw_context *brw,
> + brw_blorp_prog_data **prog_data) const
> +{
> + uint32_t prog_offset;
> + if (!brw_search_cache(&brw->cache, BRW_BLORP_CLEAR_PROG,
> + &this->wm_prog_key, sizeof(this->wm_prog_key),
> + &prog_offset, prog_data)) {
> + brw_blorp_clear_program prog(brw, &this->wm_prog_key);
> + GLuint program_size;
> + const GLuint *program = prog.compile(brw, &program_size);
> + brw_upload_cache(&brw->cache, BRW_BLORP_CLEAR_PROG,
> + &this->wm_prog_key, sizeof(this->wm_prog_key),
> + program, program_size,
> + &prog.prog_data, sizeof(prog.prog_data),
> + &prog_offset, prog_data);
> + }
> + return prog_offset;
> +}
> +
> +void
> +brw_blorp_clear_program::alloc_regs()
> +{
> + int reg = 0;
> + this->R0 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
> + this->R1 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
> +
> + prog_data.first_curbe_grf = reg;
> + clear_rgba = retype(brw_vec4_grf(reg++, 0), BRW_REGISTER_TYPE_F);
> + reg += BRW_BLORP_NUM_PUSH_CONST_REGS;
> +
> + /* Make sure we didn't run out of registers */
> + assert(reg <= GEN7_MRF_HACK_START);
> +
> + int mrf = 2;
> + this->base_mrf = mrf;
Temporary seems kind of superfluous here, but not a big deal.
> +}
> +
> +const GLuint *
> +brw_blorp_clear_program::compile(struct brw_context *brw,
> + GLuint *program_size)
> +{
> + /* Set up prog_data */
> + memset(&prog_data, 0, sizeof(prog_data));
> + prog_data.persample_msaa_dispatch = false;
> +
> + alloc_regs();
> +
> + brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
> +
> + struct brw_reg mrf_rt_write =
> + retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F);
> +
> + uint32_t mlen, msg_type;
> + if (key->use_simd16_replicated_data) {
> + /* The message payload is a single register with the low 4 floats/ints
> + * filled with the constant clear color.
> + */
> + brw_set_mask_control(&func, BRW_MASK_DISABLE);
> + brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba);
> + brw_set_mask_control(&func, BRW_MASK_ENABLE);
> +
> + msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
> + mlen = 1;
> + } else {
> + for (int i = 0; i < 4; i++) {
> + /* The message payload is pairs of registers for 16 pixels each of r,
> + * g, b, and a.
> + */
> + brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
> + brw_MOV(&func,
> + brw_message_reg(base_mrf + i * 2),
> + brw_vec1_grf(clear_rgba.nr, i));
> + brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
> + }
> +
> + msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
> + mlen = 8;
> + }
> +
> + /* Now write to the render target and terminate the thread */
> + brw_fb_WRITE(&func,
> + 16 /* dispatch_width */,
> + base_mrf /* msg_reg_nr */,
> + mrf_rt_write /* src0 */,
> + msg_type,
> + BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
> + mlen,
> + 0 /* response_length */,
> + true /* eot */,
> + false /* header present */);
> +
> + if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) {
> + printf("Native code for BLORP clear:\n");
> + brw_dump_compile(&func, stdout, 0, func.next_insn_offset);
> + printf("\n");
> + }
> + return brw_get_program(&func, program_size);
> +}
> +
> +extern "C" {
> +bool
> +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
> +{
> + struct gl_context *ctx = &intel->ctx;
> + struct brw_context *brw = brw_context(ctx);
> +
> + /* The constant color clear code doesn't work for multisampled surfaces, so
> + * we need to support falling back to other clear mechanisms.
> + * Unfortunately, our clear code is based on a bitmask that doesn't
> + * distinguish individual color attachments, so we walk the attachments to
> + * see if any require fallback, and fall back for all if any of them need
> + * to.
> + */
Ugh, yeah, we should go back and fix that. In the meantime, your
approach is reasonable.
> + for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
> + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
> + struct intel_renderbuffer *irb = intel_renderbuffer(rb);
> +
> + if (irb && irb->mt->msaa_layout != INTEL_MSAA_LAYOUT_NONE)
> + return false;
> + }
> +
> + for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
> + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
> +
> + /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
> + * the framebuffer can be complete with some attachments missing. In
> + * this case the _ColorDrawBuffers pointer will be NULL.
> + */
> + if (rb == NULL)
> + continue;
> +
> + brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf]);
> + brw_blorp_exec(intel, ¶ms);
> + }
> +
> + return true;
> +}
> +
> +} /* extern "C" */
> diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
> index c0ac69d..ad6a554 100644
> --- a/src/mesa/drivers/dri/i965/brw_clear.c
> +++ b/src/mesa/drivers/dri/i965/brw_clear.c
> @@ -41,6 +41,7 @@
> #include "intel_regions.h"
>
> #include "brw_context.h"
> +#include "brw_blorp.h"
>
> #define FILE_DEBUG_FLAG DEBUG_BLIT
>
> @@ -243,6 +244,16 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
> }
> }
>
> + /* We only have support for blorp as of gen6 so far. */
I think this would be easier to read as:
/* BLORP is currently only supported on Gen6+. */
but it's obviously up to you.
> + if (intel->gen >= 6) {
> + if (mask & BUFFER_BITS_COLOR) {
> + if (brw_blorp_clear_color(intel, fb)) {
> + debug_mask("blorp color", mask & BUFFER_BITS_COLOR);
> + mask &= ~BUFFER_BITS_COLOR;
> + }
> + }
> + }
> +
> GLbitfield tri_mask = mask & (BUFFER_BITS_COLOR |
> BUFFER_BIT_STENCIL |
> BUFFER_BIT_DEPTH);
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 114c369..7ec448c 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -605,6 +605,7 @@ enum brw_cache_id {
> BRW_CC_UNIT,
> BRW_WM_PROG,
> BRW_BLORP_BLIT_PROG,
> + BRW_BLORP_CLEAR_PROG,
> BRW_SAMPLER,
> BRW_WM_UNIT,
> BRW_SF_PROG,
> diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
> index 872c408..3e7467e 100644
> --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
> +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
> @@ -278,17 +278,18 @@ gen6_blorp_emit_blend_state(struct brw_context *brw,
> blend->blend1.post_blend_clamp_enable = 1;
> blend->blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT;
>
> - blend->blend1.write_disable_r = false;
> - blend->blend1.write_disable_g = false;
> - blend->blend1.write_disable_b = false;
> - blend->blend1.write_disable_a = false;
> + blend->blend1.write_disable_r = params->color_write_disable[0];
> + blend->blend1.write_disable_g = params->color_write_disable[1];
> + blend->blend1.write_disable_b = params->color_write_disable[2];
> + blend->blend1.write_disable_a = params->color_write_disable[3];
>
> /* When blitting from an XRGB source to a ARGB destination, we need to
> * interpret the missing channel as 1.0. Blending can do that for us:
> * we simply use the RGB values from the fragment shader ("source RGB"),
> * but smash the alpha channel to 1.
> */
> - if (_mesa_get_format_bits(params->dst.mt->format, GL_ALPHA_BITS) > 0 &&
> + if (params->src.mt &&
> + _mesa_get_format_bits(params->dst.mt->format, GL_ALPHA_BITS) > 0 &&
> _mesa_get_format_bits(params->src.mt->format, GL_ALPHA_BITS) == 0) {
> blend->blend0.blend_enable = 1;
> blend->blend0.ia_blend_enable = 1;
> @@ -1058,16 +1059,18 @@ gen6_blorp_exec(struct intel_context *intel,
> depthstencil_offset, cc_state_offset);
> if (params->use_wm_prog) {
> uint32_t wm_surf_offset_renderbuffer;
> - uint32_t wm_surf_offset_texture;
> + uint32_t wm_surf_offset_texture = 0;
> uint32_t sampler_offset;
> wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params);
> wm_surf_offset_renderbuffer =
> gen6_blorp_emit_surface_state(brw, params, ¶ms->dst,
> I915_GEM_DOMAIN_RENDER,
> I915_GEM_DOMAIN_RENDER);
> - wm_surf_offset_texture =
> - gen6_blorp_emit_surface_state(brw, params, ¶ms->src,
> - I915_GEM_DOMAIN_SAMPLER, 0);
> + if (params->src.mt) {
> + wm_surf_offset_texture =
> + gen6_blorp_emit_surface_state(brw, params, ¶ms->src,
> + I915_GEM_DOMAIN_SAMPLER, 0);
> + }
> wm_bind_bo_offset =
> gen6_blorp_emit_binding_table(brw, params,
> wm_surf_offset_renderbuffer,
> diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> index 99e7e58..1c23866 100644
> --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> @@ -765,17 +765,19 @@ gen7_blorp_exec(struct intel_context *intel,
> depthstencil_offset);
> if (params->use_wm_prog) {
> uint32_t wm_surf_offset_renderbuffer;
> - uint32_t wm_surf_offset_texture;
> + uint32_t wm_surf_offset_texture = 0;
> wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params);
> wm_surf_offset_renderbuffer =
> gen7_blorp_emit_surface_state(brw, params, ¶ms->dst,
> I915_GEM_DOMAIN_RENDER,
> I915_GEM_DOMAIN_RENDER,
> true /* is_render_target */);
> - wm_surf_offset_texture =
> - gen7_blorp_emit_surface_state(brw, params, ¶ms->src,
> - I915_GEM_DOMAIN_SAMPLER, 0,
> - false /* is_render_target */);
> + if (params->src.mt) {
> + wm_surf_offset_texture =
> + gen7_blorp_emit_surface_state(brw, params, ¶ms->src,
> + I915_GEM_DOMAIN_SAMPLER, 0,
> + false /* is_render_target */);
> + }
> wm_bind_bo_offset =
> gen6_blorp_emit_binding_table(brw, params,
> wm_surf_offset_renderbuffer,
>
Nitpicking aside, this looks good.
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
More information about the mesa-dev
mailing list