[Mesa-dev] [PATCH 03/10] i965/skl: Enable fast color clears on SKL
Ben Widawsky
benjamin.widawsky at intel.com
Tue Oct 13 20:50:20 PDT 2015
Based on a patch originally from Kristian. Skylake has extended capabilities
with regard to fast clears, but that is saved for another patch.
The same effect could be acheived with the following, however I think the way
I've done it is more in line with how the docs explain it.
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -150,9 +150,13 @@ intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
/* In release builds, fall through */
case I915_TILING_Y:
*width_px = 32 / mt->cpp;
- *height = 4;
+ if (brw->gen >= 9)
+ *height = 2;
+ else
+ *height = 4;
Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 54 +++++++++++++++++--------
src/mesa/drivers/dri/i965/gen8_surface_state.c | 34 ++++++++++++----
src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 9 +++++
src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 7 +++-
4 files changed, 78 insertions(+), 26 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index fbde3f0..7bf52f0 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -204,7 +204,7 @@ brw_draw_rectlist(struct gl_context *ctx, struct rect *rect, int num_instances)
}
static void
-get_fast_clear_rect(struct gl_framebuffer *fb,
+get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
struct intel_renderbuffer *irb, struct rect *rect)
{
unsigned int x_align, y_align;
@@ -228,7 +228,14 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
*/
intel_get_non_msrt_mcs_alignment(irb->mt, &x_align, &y_align);
x_align *= 16;
- y_align *= 32;
+
+ /* SKL+ line alignment requirement for Y-tiled are half those of the prior
+ * generations.
+ */
+ if (brw->gen >= 9)
+ y_align *= 16;
+ else
+ y_align *= 32;
/* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
* Target(s)", beneath the "Fast Color Clear" bullet (p327):
@@ -265,8 +272,10 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
* terms of (width,height) of the RT.
*
* MSAA Width of Clear Rect Height of Clear Rect
+ * 2X Ceil(1/8*width) Ceil(1/2*height)
* 4X Ceil(1/8*width) Ceil(1/2*height)
* 8X Ceil(1/2*width) Ceil(1/2*height)
+ * 16X width Ceil(1/2*height)
*
* The text "with upper left co-ordinate to coincide with actual
* rectangle being cleared" is a little confusing--it seems to imply
@@ -289,6 +298,9 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
case 8:
x_scaledown = 2;
break;
+ case 16:
+ x_scaledown = 1;
+ break;
default:
unreachable("Unexpected sample count for fast clear");
}
@@ -358,18 +370,24 @@ is_color_fast_clear_compatible(struct brw_context *brw,
/**
* Convert the given color to a bitfield suitable for ORing into DWORD 7 of
- * SURFACE_STATE.
+ * SURFACE_STATE (DWORD 12-15 on SKL+).
*/
-static uint32_t
-compute_fast_clear_color_bits(const union gl_color_union *color)
+static void
+set_fast_clear_color(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ const union gl_color_union *color)
{
- uint32_t bits = 0;
- for (int i = 0; i < 4; i++) {
- /* Testing for non-0 works for integer and float colors */
- if (color->f[i] != 0.0f)
- bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
+ if (brw->gen >= 9) {
+ mt->gen9_fast_clear_color = *color;
+ } else {
+ mt->fast_clear_color_value = 0;
+ for (int i = 0; i < 4; i++) {
+ /* Testing for non-0 works for integer and float colors */
+ if (color->f[i] != 0.0f)
+ mt->fast_clear_color_value |=
+ 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
+ }
}
- return bits;
}
static const uint32_t fast_clear_color[4] = { ~0, ~0, ~0, ~0 };
@@ -504,8 +522,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
switch (clear_type) {
case FAST_CLEAR:
- irb->mt->fast_clear_color_value =
- compute_fast_clear_color_bits(&ctx->Color.ClearColor);
+ set_fast_clear_color(brw, irb->mt, &ctx->Color.ClearColor);
irb->need_downsample = true;
/* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the
@@ -521,7 +538,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
irb->need_downsample = true;
fast_clear_buffers |= 1 << index;
- get_fast_clear_rect(fb, irb, &fast_clear_rect);
+ get_fast_clear_rect(brw, fb, irb, &fast_clear_rect);
break;
case REP_CLEAR:
@@ -654,8 +671,9 @@ get_resolve_rect(struct brw_context *brw,
*
* The scaledown factors in the table that follows are related to the
* alignment size returned by intel_get_non_msrt_mcs_alignment() by a
- * multiplier. For IVB and HSW, we divide by two, for BDW we multiply
- * by 8 and 16 and 8 and 8 for SKL.
+ * multiplier. For IVB and HSW, we divide by two, for BDW we multiply
+ * by 8 and 16. Similar to the fast clear, SKL eases the BDW vertical scaling
+ * by a factor of 2.
*/
intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align);
@@ -701,6 +719,10 @@ brw_meta_resolve_color(struct brw_context *brw,
brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
+ /* SKL+ also has a resolve mode for compressed render targets and thus more
+ * bits to let us select the type of resolve. For fast clear resolves, it
+ * turns out we can use the same value as pre-SKL though.
+ */
set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE);
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index e70c15b..995b4dd 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -183,6 +183,28 @@ gen8_emit_buffer_surface_state(struct brw_context *brw,
}
static void
+gen8_emit_fast_clear_color(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ uint32_t *surf)
+{
+ if (brw->gen >= 9) {
+#define check_fast_clear_val(x) \
+ assert(mt->gen9_fast_clear_color.f[x] == 0.0 || \
+ mt->gen9_fast_clear_color.f[x] == 1.0)
+ check_fast_clear_val(0);
+ check_fast_clear_val(1);
+ check_fast_clear_val(2);
+ check_fast_clear_val(3);
+#undef check_fast_clear_val
+ surf[12] = mt->gen9_fast_clear_color.ui[0];
+ surf[13] = mt->gen9_fast_clear_color.ui[1];
+ surf[14] = mt->gen9_fast_clear_color.ui[2];
+ surf[15] = mt->gen9_fast_clear_color.ui[3];
+ } else
+ surf[7] |= mt->fast_clear_color_value;
+}
+
+static void
gen8_emit_texture_surface_state(struct brw_context *brw,
struct intel_mipmap_tree *mt,
GLenum target,
@@ -286,7 +308,9 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
aux_mode;
}
- surf[7] = mt->fast_clear_color_value |
+ gen8_emit_fast_clear_color(brw, mt, surf);
+
+ surf[7] |=
SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) |
SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) |
SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) |
@@ -384,12 +408,6 @@ gen8_emit_null_surface_state(struct brw_context *brw,
SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);
}
-static void
-gen8_emit_fast_clear_color(struct intel_mipmap_tree *mt, uint32_t *surf)
-{
- surf[7] |= mt->fast_clear_color_value;
-}
-
/**
* Sets up a surface state structure to point at the given region.
* While it is only used for the front/back buffer currently, it should be
@@ -516,7 +534,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
aux_mode;
}
- gen8_emit_fast_clear_color(mt, surf);
+ gen8_emit_fast_clear_color(brw, mt, surf);
surf[7] |= SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b6e3520..32f0ff7 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -192,6 +192,12 @@ intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
*
* - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
* 64bpp, and 128bpp.
+ *
+ * From the Skylake documentation, it is made clear that X-tiling is no longer
+ * supported:
+ *
+ * - MCS and Lossless compression is supported for TiledY/TileYs/TileYf
+ * non-MSRTs only.
*/
static bool
intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
@@ -1485,6 +1491,9 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height);
unsigned width_divisor = block_width_px * 4;
unsigned height_divisor = block_height * 8;
+ if (brw->gen >= 9)
+ height_divisor /= 2;
+
unsigned mcs_width =
ALIGN(mt->logical_width0, width_divisor) / width_divisor;
unsigned mcs_height =
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 805cd71..d9cdba0 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -634,14 +634,17 @@ struct intel_mipmap_tree
* color mipmap tree, if any.
*
* This value will only ever contain ones in bits 28-31, so it is safe to
- * OR into dword 7 of SURFACE_STATE.
+ * OR into dword 7 of SURFACE_STATE. (dword 12-15 on SKL)
*
* @see RENDER_SURFACE_STATE.RedClearColor
* @see RENDER_SURFACE_STATE.GreenClearColor
* @see RENDER_SURFACE_STATE.BlueClearColor
* @see RENDER_SURFACE_STATE.AlphaClearColor
*/
- uint32_t fast_clear_color_value;
+ union {
+ uint32_t fast_clear_color_value;
+ union gl_color_union gen9_fast_clear_color;
+ };
/**
* Disable allocation of auxiliary buffers, such as the HiZ buffer and MCS
--
2.6.1
More information about the mesa-dev
mailing list