[Mesa-dev] [PATCH] i965/blorp: Special-case the clear color in MSAA resolves
Jason Ekstrand
jason at jlekstrand.net
Thu May 12 00:20:32 UTC 2016
I need to recind this patch. I thought it worked but it's far more
half-baked than I realized. There's some issue with swizzles interacting
with the clear color. :-(
--Jason
On Tue, May 10, 2016 at 9:45 PM, Jason Ekstrand <jason at jlekstrand.net>
wrote:
> The current MSAA resolve code has a special-case for if the MCS value is 0.
> In this case we can only sample once because we know that all values are in
> slice 0. This commit adds a second optimization that detecs the magic MCS
> value that indicates the clear color and grabs the color from a push
> constant and avoids sampling altogether. On a microbenchmark written by
> Neil Roberts that tests resolving surfaces with just clear color, this
> improves performance by 60% for 8x, 40% for 4x, and 28% for 2x MSAA on my
> SKL gte3 laptop. The benchmark can be found on the ML archive:
>
> https://lists.freedesktop.org/archives/mesa-dev/2016-February/108077.html
> ---
> src/mesa/drivers/dri/i965/brw_blorp.h | 4 +-
> src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 72
> ++++++++++++++++++++++++++--
> 2 files changed, 71 insertions(+), 5 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h
> b/src/mesa/drivers/dri/i965/brw_blorp.h
> index 5f7569c..550c6c5 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.h
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
> @@ -197,7 +197,9 @@ struct brw_blorp_wm_push_constants
> uint32_t src_z;
>
> /* Pad out to an integral number of registers */
> - uint32_t pad[5];
> + uint32_t pad;
> +
> + union gl_color_union clear_color;
> };
>
> #define BRW_BLORP_NUM_PUSH_CONSTANT_DWORDS \
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> index 97e3908..314034e 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> @@ -346,6 +346,7 @@ struct brw_blorp_blit_vars {
> nir_variable *offset;
> } u_x_transform, u_y_transform;
> nir_variable *u_src_z;
> + nir_variable *u_clear_color;
>
> /* gl_FragCoord */
> nir_variable *frag_coord;
> @@ -374,6 +375,7 @@ brw_blorp_blit_vars_init(nir_builder *b, struct
> brw_blorp_blit_vars *v,
> LOAD_UNIFORM(y_transform.multiplier, glsl_float_type())
> LOAD_UNIFORM(y_transform.offset, glsl_float_type())
> LOAD_UNIFORM(src_z, glsl_uint_type())
> + LOAD_UNIFORM(clear_color, glsl_vec4_type())
>
> #undef DECL_UNIFORM
>
> @@ -858,7 +860,8 @@ static nir_ssa_def *
> blorp_nir_manual_blend_average(nir_builder *b, nir_ssa_def *pos,
> unsigned tex_samples,
> enum intel_msaa_layout tex_layout,
> - enum brw_reg_type dst_type)
> + enum brw_reg_type dst_type,
> + struct brw_blorp_blit_vars *v)
> {
> /* If non-null, this is the outer-most if statement */
> nir_if *outer_if = NULL;
> @@ -867,9 +870,53 @@ blorp_nir_manual_blend_average(nir_builder *b,
> nir_ssa_def *pos,
> nir_local_variable_create(b->impl, glsl_vec4_type(), "color");
>
> nir_ssa_def *mcs = NULL;
> - if (tex_layout == INTEL_MSAA_LAYOUT_CMS)
> + if (tex_layout == INTEL_MSAA_LAYOUT_CMS) {
> mcs = blorp_nir_txf_ms_mcs(b, pos);
>
> + /* The MCS buffer stores a packed value that provides a mapping from
> + * samples to array slices. The magic value of all ones means that
> all
> + * samples have the clear color. In this case, we can
> short-circuit the
> + * sampling process and just use the clear color that we pushed
> into the
> + * shader.
> + */
> + nir_ssa_def *is_clear_color;
> + switch (tex_samples) {
> + case 2:
> + /* Empirical evidence suggests that the value returned from the
> + * sampler is not always 0x3 for clear color so we need to mask
> it.
> + */
> + is_clear_color =
> + nir_ieq(b, nir_iand(b, nir_channel(b, mcs, 0), nir_imm_int(b,
> 0x3)),
> + nir_imm_int(b, 0x3));
> + break;
> + case 4:
> + is_clear_color =
> + nir_ieq(b, nir_channel(b, mcs, 0), nir_imm_int(b, 0xff));
> + break;
> + case 8:
> + is_clear_color =
> + nir_ieq(b, nir_channel(b, mcs, 0), nir_imm_int(b, ~0));
> + break;
> + case 16:
> + is_clear_color =
> + nir_ior(b, nir_ieq(b, nir_channel(b, mcs, 0), nir_imm_int(b,
> ~0)),
> + nir_ieq(b, nir_channel(b, mcs, 1), nir_imm_int(b,
> ~0)));
> + break;
> + default:
> + unreachable("Invalid sample count");
> + }
> +
> + nir_if *if_stmt = nir_if_create(b->shader);
> + if_stmt->condition = nir_src_for_ssa(is_clear_color);
> + nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
> +
> + b->cursor = nir_after_cf_list(&if_stmt->then_list);
> + nir_store_var(b, color, nir_load_var(b, v->u_clear_color), 0xf);
> +
> + b->cursor = nir_after_cf_list(&if_stmt->else_list);
> + outer_if = if_stmt;
> + }
> +
> /* We add together samples using a binary tree structure, e.g. for 4x
> MSAA:
> *
> * result = ((sample[0] + sample[1]) + (sample[2] + sample[3])) / 4
> @@ -937,7 +984,8 @@ blorp_nir_manual_blend_average(nir_builder *b,
> nir_ssa_def *pos,
> nir_store_var(b, color, texture_data[0], 0xf);
>
> b->cursor = nir_after_cf_list(&if_stmt->else_list);
> - outer_if = if_stmt;
> + if (!outer_if)
> + outer_if = if_stmt;
> }
>
> for (int j = 0; j < count_trailing_one_bits(i); j++) {
> @@ -1345,7 +1393,7 @@ brw_blorp_build_nir_shader(struct brw_context *brw,
> /* Gen7+ hardware doesn't automaticaly blend. */
> color = blorp_nir_manual_blend_average(&b, src_pos,
> key->src_samples,
> key->src_layout,
> - key->texture_data_type);
> + key->texture_data_type,
> &v);
> }
> } else if (key->blend && key->blit_scaled) {
> color = blorp_nir_manual_blend_bilinear(&b, src_pos,
> key->src_samples, key, &v);
> @@ -1669,6 +1717,22 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
> params.src.num_samples <= 1 && params.dst.num_samples <= 1)
> wm_prog_key.bilinear_filter = true;
>
> + union gl_color_union clear_color;
> + if (brw->gen >= 9) {
> + clear_color = src_mt->gen9_fast_clear_color;
> + } else if (_mesa_is_format_integer(src_mt->format)) {
> + clear_color.i[0] = (src_mt->fast_clear_color_value & (1 << 0)) != 0;
> + clear_color.i[1] = (src_mt->fast_clear_color_value & (1 << 1)) != 0;
> + clear_color.i[2] = (src_mt->fast_clear_color_value & (1 << 2)) != 0;
> + clear_color.i[3] = (src_mt->fast_clear_color_value & (1 << 3)) != 0;
> + } else {
> + clear_color.f[0] = (src_mt->fast_clear_color_value & (1 << 0)) != 0;
> + clear_color.f[1] = (src_mt->fast_clear_color_value & (1 << 1)) != 0;
> + clear_color.f[2] = (src_mt->fast_clear_color_value & (1 << 2)) != 0;
> + clear_color.f[3] = (src_mt->fast_clear_color_value & (1 << 3)) != 0;
> + }
> + params.wm_push_consts.clear_color = clear_color;
> +
> GLenum base_format = _mesa_get_format_base_format(src_mt->format);
> if (base_format != GL_DEPTH_COMPONENT && /* TODO: what about
> depth/stencil? */
> base_format != GL_STENCIL_INDEX &&
> --
> 2.5.0.400.gff86faf
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160511/9635a7cb/attachment-0001.html>
More information about the mesa-dev
mailing list