[Mesa-dev] [PATCH 03/10] i965/skl: Enable fast color clears on SKL
Chad Versace
chad.versace at intel.com
Fri Oct 16 16:10:02 PDT 2015
But this patch doesn't enable fast clears! The reverts in pathches 6 and
7 need to be folded into this patch, otherwise the patch does not do
what it claims.
Also, you can't enable fast clears before patches 4 and 5 without introducing
regressions. Patches 4 and 5 must precede this patch.
On Tue 13 Oct 2015, Ben Widawsky wrote:
> Based on a patch originally from Kristian. Skylake has extended capabilities
> with regard to fast clears, but that is saved for another patch.
>
> The same effect could be acheived with the following, however I think the way
> I've done it is more in line with how the docs explain it.
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -150,9 +150,13 @@ intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
> /* In release builds, fall through */
> case I915_TILING_Y:
> *width_px = 32 / mt->cpp;
> - *height = 4;
> + if (brw->gen >= 9)
> + *height = 2;
> + else
> + *height = 4;
>
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
> ---
> src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 54 +++++++++++++++++--------
> src/mesa/drivers/dri/i965/gen8_surface_state.c | 34 ++++++++++++----
> src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 9 +++++
> src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 7 +++-
> 4 files changed, 78 insertions(+), 26 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
> index fbde3f0..7bf52f0 100644
> --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
> +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
> @@ -204,7 +204,7 @@ brw_draw_rectlist(struct gl_context *ctx, struct rect *rect, int num_instances)
> }
>
> static void
> -get_fast_clear_rect(struct gl_framebuffer *fb,
> +get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
> struct intel_renderbuffer *irb, struct rect *rect)
> {
> unsigned int x_align, y_align;
> @@ -228,7 +228,14 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
> */
> intel_get_non_msrt_mcs_alignment(irb->mt, &x_align, &y_align);
> x_align *= 16;
> - y_align *= 32;
> +
> + /* SKL+ line alignment requirement for Y-tiled are half those of the prior
> + * generations.
> + */
> + if (brw->gen >= 9)
> + y_align *= 16;
> + else
> + y_align *= 32;
>
> /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
> * Target(s)", beneath the "Fast Color Clear" bullet (p327):
> @@ -265,8 +272,10 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
> * terms of (width,height) of the RT.
> *
> * MSAA Width of Clear Rect Height of Clear Rect
> + * 2X Ceil(1/8*width) Ceil(1/2*height)
> * 4X Ceil(1/8*width) Ceil(1/2*height)
> * 8X Ceil(1/2*width) Ceil(1/2*height)
> + * 16X width Ceil(1/2*height)
> *
> * The text "with upper left co-ordinate to coincide with actual
> * rectangle being cleared" is a little confusing--it seems to imply
> @@ -289,6 +298,9 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
> case 8:
> x_scaledown = 2;
> break;
> + case 16:
> + x_scaledown = 1;
> + break;
> default:
> unreachable("Unexpected sample count for fast clear");
> }
> @@ -358,18 +370,24 @@ is_color_fast_clear_compatible(struct brw_context *brw,
>
> /**
> * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
> - * SURFACE_STATE.
> + * SURFACE_STATE (DWORD 12-15 on SKL+).
> */
> -static uint32_t
> -compute_fast_clear_color_bits(const union gl_color_union *color)
> +static void
> +set_fast_clear_color(struct brw_context *brw,
> + struct intel_mipmap_tree *mt,
> + const union gl_color_union *color)
> {
> - uint32_t bits = 0;
> - for (int i = 0; i < 4; i++) {
> - /* Testing for non-0 works for integer and float colors */
> - if (color->f[i] != 0.0f)
> - bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
> + if (brw->gen >= 9) {
> + mt->gen9_fast_clear_color = *color;
> + } else {
> + mt->fast_clear_color_value = 0;
> + for (int i = 0; i < 4; i++) {
> + /* Testing for non-0 works for integer and float colors */
> + if (color->f[i] != 0.0f)
> + mt->fast_clear_color_value |=
> + 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
Please put braces round the multi-line if-statement.
> + }
> }
> - return bits;
> }
>
> static const uint32_t fast_clear_color[4] = { ~0, ~0, ~0, ~0 };
> @@ -504,8 +522,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
>
> switch (clear_type) {
> case FAST_CLEAR:
> - irb->mt->fast_clear_color_value =
> - compute_fast_clear_color_bits(&ctx->Color.ClearColor);
> + set_fast_clear_color(brw, irb->mt, &ctx->Color.ClearColor);
> irb->need_downsample = true;
>
> /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the
> @@ -521,7 +538,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
> irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
> irb->need_downsample = true;
> fast_clear_buffers |= 1 << index;
> - get_fast_clear_rect(fb, irb, &fast_clear_rect);
> + get_fast_clear_rect(brw, fb, irb, &fast_clear_rect);
> break;
>
> case REP_CLEAR:
> @@ -654,8 +671,9 @@ get_resolve_rect(struct brw_context *brw,
> *
> * The scaledown factors in the table that follows are related to the
> * alignment size returned by intel_get_non_msrt_mcs_alignment() by a
> - * multiplier. For IVB and HSW, we divide by two, for BDW we multiply
> - * by 8 and 16 and 8 and 8 for SKL.
> + * multiplier. For IVB and HSW, we divide by two, for BDW we multiply
> + * by 8 and 16. Similar to the fast clear, SKL eases the BDW vertical scaling
> + * by a factor of 2.
> */
>
> intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align);
> @@ -701,6 +719,10 @@ brw_meta_resolve_color(struct brw_context *brw,
>
> brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
>
> + /* SKL+ also has a resolve mode for compressed render targets and thus more
> + * bits to let us select the type of resolve. For fast clear resolves, it
> + * turns out we can use the same value as pre-SKL though.
> + */
> set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE);
>
> mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
> diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
> index e70c15b..995b4dd 100644
> --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
> @@ -183,6 +183,28 @@ gen8_emit_buffer_surface_state(struct brw_context *brw,
> }
>
> static void
> +gen8_emit_fast_clear_color(struct brw_context *brw,
> + struct intel_mipmap_tree *mt,
> + uint32_t *surf)
> +{
> + if (brw->gen >= 9) {
> +#define check_fast_clear_val(x) \
> + assert(mt->gen9_fast_clear_color.f[x] == 0.0 || \
> + mt->gen9_fast_clear_color.f[x] == 1.0)
> + check_fast_clear_val(0);
> + check_fast_clear_val(1);
> + check_fast_clear_val(2);
> + check_fast_clear_val(3);
> +#undef check_fast_clear_val
Replacing the above with a simple loop accomplishes the same. The code
is shorder, cleaner, and the compiler will unroll it anyway.
for (int i = 0; ++i; i < 4) {
>assert(mt->gen9_fast_clear_color.f[x] == 0.0 ||
mt->gen9_fast_clear_color.f[x] == 1.0)
}
Anyways, We'll need to remove this check very soon when enabling
full-color clears. So I prefer that we just omit it now. But it's not
a big deal either way.
> + surf[12] = mt->gen9_fast_clear_color.ui[0];
> + surf[13] = mt->gen9_fast_clear_color.ui[1];
> + surf[14] = mt->gen9_fast_clear_color.ui[2];
> + surf[15] = mt->gen9_fast_clear_color.ui[3];
> + } else
> + surf[7] |= mt->fast_clear_color_value;
> +}
> +
> +static void
> gen8_emit_texture_surface_state(struct brw_context *brw,
> struct intel_mipmap_tree *mt,
> GLenum target,
> @@ -286,7 +308,9 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
> aux_mode;
> }
>
> - surf[7] = mt->fast_clear_color_value |
> + gen8_emit_fast_clear_color(brw, mt, surf);
> +
> + surf[7] |=
> SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) |
> SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) |
> SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) |
> @@ -384,12 +408,6 @@ gen8_emit_null_surface_state(struct brw_context *brw,
> SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);
> }
>
> -static void
> -gen8_emit_fast_clear_color(struct intel_mipmap_tree *mt, uint32_t *surf)
> -{
> - surf[7] |= mt->fast_clear_color_value;
> -}
> -
> /**
> * Sets up a surface state structure to point at the given region.
> * While it is only used for the front/back buffer currently, it should be
> @@ -516,7 +534,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
> aux_mode;
> }
>
> - gen8_emit_fast_clear_color(mt, surf);
> + gen8_emit_fast_clear_color(brw, mt, surf);
> surf[7] |= SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
> SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
> SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index b6e3520..32f0ff7 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -192,6 +192,12 @@ intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
> *
> * - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
> * 64bpp, and 128bpp.
> + *
> + * From the Skylake documentation, it is made clear that X-tiling is no longer
> + * supported:
> + *
> + * - MCS and Lossless compression is supported for TiledY/TileYs/TileYf
> + * non-MSRTs only.
> */
> static bool
> intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
> @@ -1485,6 +1491,9 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
> intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height);
> unsigned width_divisor = block_width_px * 4;
> unsigned height_divisor = block_height * 8;
> + if (brw->gen >= 9)
> + height_divisor /= 2;
> +
This hunk really needs a comment. Please explain why Skylake's MCS is
twice as tall as previous generations'.
> unsigned mcs_width =
> ALIGN(mt->logical_width0, width_divisor) / width_divisor;
> unsigned mcs_height =
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> index 805cd71..d9cdba0 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> @@ -634,14 +634,17 @@ struct intel_mipmap_tree
> * color mipmap tree, if any.
> *
> * This value will only ever contain ones in bits 28-31, so it is safe to
> - * OR into dword 7 of SURFACE_STATE.
> + * OR into dword 7 of SURFACE_STATE. (dword 12-15 on SKL)
The comment is wrong. "This value will only ever contain ones in bits
28-31" applies only to fast_clear_color_value but not to
gen9_fast_clear_color. And it doesn't make sense to bit-or
gen9_fast_color_clear to dwords 12-15, as gen9_fast_color_clear is
exactly 4 dwords big, and all the bits are significant.
> *
> * @see RENDER_SURFACE_STATE.RedClearColor
> * @see RENDER_SURFACE_STATE.GreenClearColor
> * @see RENDER_SURFACE_STATE.BlueClearColor
> * @see RENDER_SURFACE_STATE.AlphaClearColor
> */
> - uint32_t fast_clear_color_value;
> + union {
> + uint32_t fast_clear_color_value;
> + union gl_color_union gen9_fast_clear_color;
> + };
>
> /**
> * Disable allocation of auxiliary buffers, such as the HiZ buffer and MCS
> --
> 2.6.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list