[Mesa-dev] [PATCH v2 06/12] i965/gen7+: Implement fast color clear operation in BLORP.
Paul Berry
stereotype441 at gmail.com
Tue Jun 4 17:01:09 PDT 2013
Since we defer allocation of the MCS miptree until the time of the
fast clear operation, this patch also implements creation of the MCS
miptree.
In addition, this patch adds the field
intel_mipmap_tree::fast_clear_color_value, which holds the most recent
fast color clear value, if any. We use it to set the SURFACE_STATE's
clear color for render targets.
v2: Flag BRW_NEW_SURFACES when allocating the MCS miptree. Generate a
perf_debug message if clearing to a color that isn't compatible with
fast color clear. Fix "control reaches end of non-void function"
build warning.
---
src/mesa/drivers/dri/i965/brw_blorp.cpp | 1 +
src/mesa/drivers/dri/i965/brw_blorp.h | 11 +-
src/mesa/drivers/dri/i965/brw_blorp_clear.cpp | 149 +++++++++++++++++++++-
src/mesa/drivers/dri/i965/brw_clear.c | 2 +-
src/mesa/drivers/dri/i965/brw_defines.h | 2 +
src/mesa/drivers/dri/i965/gen7_blorp.cpp | 18 ++-
src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 10 +-
src/mesa/drivers/dri/intel/intel_mipmap_tree.c | 48 +++++++
src/mesa/drivers/dri/intel/intel_mipmap_tree.h | 13 ++
9 files changed, 240 insertions(+), 14 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
index a2d02bf..9c9a4a7 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -145,6 +145,7 @@ brw_blorp_params::brw_blorp_params()
y1(0),
depth_format(0),
hiz_op(GEN6_HIZ_OP_NONE),
+ fast_clear_op(GEN7_FAST_CLEAR_OP_NONE),
num_samples(0),
use_wm_prog(false)
{
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
index 51b23db..0808206 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -47,7 +47,8 @@ brw_blorp_blit_miptrees(struct intel_context *intel,
bool mirror_x, bool mirror_y);
bool
-brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb);
+brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb,
+ bool partial_clear);
#ifdef __cplusplus
} /* end extern "C" */
@@ -192,6 +193,13 @@ struct brw_blorp_prog_data
bool persample_msaa_dispatch;
};
+
+enum gen7_fast_clear_op {
+ GEN7_FAST_CLEAR_OP_NONE,
+ GEN7_FAST_CLEAR_OP_FAST_CLEAR,
+};
+
+
class brw_blorp_params
{
public:
@@ -209,6 +217,7 @@ public:
brw_blorp_surface_info src;
brw_blorp_surface_info dst;
enum gen6_hiz_op hiz_op;
+ enum gen7_fast_clear_op fast_clear_op;
unsigned num_samples;
bool use_wm_prog;
brw_blorp_wm_push_constants wm_push_consts;
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
index b626659..ffcaa5b 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
@@ -49,7 +49,8 @@ public:
brw_blorp_clear_params(struct brw_context *brw,
struct gl_framebuffer *fb,
struct gl_renderbuffer *rb,
- GLubyte *color_mask);
+ GLubyte *color_mask,
+ bool partial_clear);
virtual uint32_t get_wm_prog(struct brw_context *brw,
brw_blorp_prog_data **prog_data) const;
@@ -105,10 +106,53 @@ brw_blorp_clear_program::~brw_blorp_clear_program()
ralloc_free(mem_ctx);
}
+
+/**
+ * Determine if fast color clear supports the given clear color.
+ *
+ * Fast color clear can only clear to color values of 1.0 or 0.0. At the
+ * moment we only support floating point buffers.
+ */
+static bool
+is_color_fast_clear_compatible(struct intel_context *intel,
+ gl_format format,
+ const union gl_color_union *color)
+{
+ if (_mesa_is_format_integer_color(format))
+ return false;
+
+ for (int i = 0; i < 4; i++) {
+ if (color->f[i] != 0.0 && color->f[i] != 1.0) {
+ perf_debug("Clear color unsupported by fast color clear. "
+ "Falling back to slow clear.");
+ return false;
+ }
+ }
+ return true;
+}
+
+
+/**
+ * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
+ * SURFACE_STATE.
+ */
+static uint32_t
+compute_fast_clear_color_bits(const union gl_color_union *color)
+{
+ uint32_t bits = 0;
+ for (int i = 0; i < 4; i++) {
+ if (color->f[i] != 0.0)
+ bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
+ }
+ return bits;
+}
+
+
brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
struct gl_framebuffer *fb,
struct gl_renderbuffer *rb,
- GLubyte *color_mask)
+ GLubyte *color_mask,
+ bool partial_clear)
{
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
@@ -161,6 +205,56 @@ brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
wm_prog_key.use_simd16_replicated_data = false;
}
}
+
+ /* If we can do this as a fast color clear, do so. */
+ if (irb->mt->mcs_state != INTEL_MCS_STATE_NONE && !partial_clear &&
+ wm_prog_key.use_simd16_replicated_data &&
+ is_color_fast_clear_compatible(intel, format, &ctx->Color.ClearColor)) {
+ memset(push_consts, 0xff, 4*sizeof(float));
+ fast_clear_op = GEN7_FAST_CLEAR_OP_FAST_CLEAR;
+
+ /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
+ * Target(s)", beneath the "Fast Color Clear" bullet (p327):
+ *
+ * Clear pass must have a clear rectangle that must follow alignment
+ * rules in terms of pixels and lines as shown in the table
+ * below. Further, the clear-rectangle height and width must be
+ * multiple of the following dimensions. If the height and width of
+ * the render target being cleared do not meet these requirements,
+ * an MCS buffer can be created such that it follows the requirement
+ * and covers the RT.
+ *
+ * The alignment size in the table that follows is related to the
+ * alignment size returned by intel_get_non_msrt_mcs_alignment(), but
+ * with X alignment multiplied by 16 and Y alignment multiplied by 32.
+ */
+ unsigned x_align, y_align;
+ intel_get_non_msrt_mcs_alignment(intel, irb->mt, &x_align, &y_align);
+ x_align *= 16;
+ y_align *= 32;
+ x0 = ROUND_DOWN_TO(x0, x_align);
+ y0 = ROUND_DOWN_TO(y0, y_align);
+ x1 = ALIGN(x1, x_align);
+ y1 = ALIGN(y1, y_align);
+
+ /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
+ * Target(s)", beneath the "Fast Color Clear" bullet (p327):
+ *
+ * In order to optimize the performance MCS buffer (when bound to 1X
+ * RT) clear similarly to MCS buffer clear for MSRT case, clear rect
+ * is required to be scaled by the following factors in the
+ * horizontal and vertical directions:
+ *
+ * The X and Y scale down factors in the table that follows are each
+ * equal to half the alignment value computed above.
+ */
+ unsigned x_scaledown = x_align / 2;
+ unsigned y_scaledown = y_align / 2;
+ x0 /= x_scaledown;
+ y0 /= y_scaledown;
+ x1 /= x_scaledown;
+ y1 /= y_scaledown;
+ }
}
uint32_t
@@ -264,7 +358,8 @@ brw_blorp_clear_program::compile(struct brw_context *brw,
extern "C" {
bool
-brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
+brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb,
+ bool partial_clear)
{
struct gl_context *ctx = &intel->ctx;
struct brw_context *brw = brw_context(ctx);
@@ -286,6 +381,7 @@ brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
/* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
* the framebuffer can be complete with some attachments missing. In
@@ -294,8 +390,53 @@ brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
if (rb == NULL)
continue;
- brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf]);
+ brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf],
+ partial_clear);
+
+ bool is_fast_clear =
+ (params.fast_clear_op == GEN7_FAST_CLEAR_OP_FAST_CLEAR);
+ if (is_fast_clear) {
+ /* Record the clear color in the miptree so that it will be
+ * programmed in SURFACE_STATE by later rendering and resolve
+ * operations.
+ */
+ uint32_t new_color_value =
+ compute_fast_clear_color_bits(&ctx->Color.ClearColor);
+ if (irb->mt->fast_clear_color_value != new_color_value) {
+ irb->mt->fast_clear_color_value = new_color_value;
+ brw->state.dirty.brw |= BRW_NEW_SURFACES;
+ }
+
+ /* If the buffer is already in INTEL_MCS_STATE_CLEAR, the clear is
+ * redundant and can be skipped.
+ */
+ if (irb->mt->mcs_state == INTEL_MCS_STATE_CLEAR)
+ continue;
+
+ /* If the MCS buffer hasn't been allocated yet, we need to allocate
+ * it now.
+ */
+ if (!irb->mt->mcs_mt) {
+ if (!intel_miptree_alloc_non_msrt_mcs(intel, irb->mt)) {
+ /* MCS allocation failed--probably this will only happen in
+ * out-of-memory conditions. But in any case, try to recover
+ * by falling back to a non-blorp clear technique.
+ */
+ return false;
+ }
+ brw->state.dirty.brw |= BRW_NEW_SURFACES;
+ }
+ }
+
brw_blorp_exec(intel, ¶ms);
+
+ if (is_fast_clear) {
+ /* Now that the fast clear has occurred, put the buffer in
+ * INTEL_MCS_STATE_CLEAR so that we won't waste time doing redundant
+ * clears.
+ */
+ irb->mt->mcs_state = INTEL_MCS_STATE_CLEAR;
+ }
}
return true;
diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index 2b999bf..80b7a0c 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -234,7 +234,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
/* BLORP is currently only supported on Gen6+. */
if (intel->gen >= 6) {
if (mask & BUFFER_BITS_COLOR) {
- if (brw_blorp_clear_color(intel, fb)) {
+ if (brw_blorp_clear_color(intel, fb, partial_clear)) {
debug_mask("blorp color", mask & BUFFER_BITS_COLOR);
mask &= ~BUFFER_BITS_COLOR;
}
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index d61151f..ce1f71d 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -557,6 +557,7 @@
#define GEN7_SURFACE_MCS_PITCH_MASK INTEL_MASK(11, 3)
/* Surface state DW7 */
+#define GEN7_SURFACE_CLEAR_COLOR_SHIFT 28
#define GEN7_SURFACE_SCS_R_SHIFT 25
#define GEN7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25)
#define GEN7_SURFACE_SCS_G_SHIFT 22
@@ -1615,6 +1616,7 @@ enum brw_wm_barycentric_interp_mode {
# define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11)
# define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10)
# define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9)
+# define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE (1 << 8)
# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
# define GEN7_PS_POSOFFSET_NONE (0 << 3)
# define GEN7_PS_POSOFFSET_CENTROID (2 << 3)
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index 5b15931..caef56f 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -199,11 +199,13 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
is_render_target);
}
+ surf[7] = surface->mt->fast_clear_color_value;
+
if (intel->is_haswell) {
- surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
- SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
- SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
- SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
+ surf[7] |= (SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
+ SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
+ SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
+ SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A));
}
/* Emit relocation to surface contents */
@@ -584,6 +586,14 @@ gen7_blorp_emit_ps_config(struct brw_context *brw,
dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
}
+ switch (params->fast_clear_op) {
+ case GEN7_FAST_CLEAR_OP_FAST_CLEAR:
+ dw4 |= GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE;
+ break;
+ default:
+ break;
+ }
+
BEGIN_BATCH(8);
OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
OUT_BATCH(params->use_wm_prog ? prog_offset : 0);
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 3164f99..1a4e416 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -614,11 +614,13 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
irb->mt->mcs_mt, true /* is RT */);
}
+ surf[7] = irb->mt->fast_clear_color_value;
+
if (intel->is_haswell) {
- surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
- SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
- SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
- SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
+ surf[7] |= (SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
+ SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
+ SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
+ SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A));
}
drm_intel_bo_emit_reloc(brw->intel.batch.bo,
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index a75ac81..ba941c0 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -1201,6 +1201,54 @@ intel_miptree_alloc_mcs(struct intel_context *intel,
#endif
}
+
+bool
+intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel,
+ struct intel_mipmap_tree *mt)
+{
+#ifdef I915
+ assert(!"MCS not supported on i915");
+ return false;
+#else
+ assert(mt->mcs_mt == NULL);
+
+ /* The format of the MCS buffer is opaque to the driver; all that matters
+ * is that we get its size and pitch right. We'll pretend that the format
+ * is R32. Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
+ * R32 buffer is 32 pixels across, we'll need to scale the width down by
+ * the block width and then a further factor of 4. Since an MCS tile
+ * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
+ * we'll need to scale the height down by the block height and then a
+ * further factor of 8.
+ */
+ const gl_format format = MESA_FORMAT_R_UINT32;
+ unsigned block_width_px;
+ unsigned block_height;
+ intel_get_non_msrt_mcs_alignment(intel, mt, &block_width_px, &block_height);
+ unsigned width_divisor = block_width_px * 4;
+ unsigned height_divisor = block_height * 8;
+ unsigned mcs_width =
+ ALIGN(mt->logical_width0, width_divisor) / width_divisor;
+ unsigned mcs_height =
+ ALIGN(mt->logical_height0, height_divisor) / height_divisor;
+ assert(mt->logical_depth0 == 1);
+ mt->mcs_mt = intel_miptree_create(intel,
+ mt->target,
+ format,
+ mt->first_level,
+ mt->last_level,
+ mcs_width,
+ mcs_height,
+ mt->logical_depth0,
+ true,
+ 0 /* num_samples */,
+ INTEL_MIPTREE_TILING_Y);
+
+ return mt->mcs_mt;
+#endif
+}
+
+
/**
* Helper for intel_miptree_alloc_hiz() that sets
* \c mt->level[level].slice[layer].has_hiz. Return true if and only if
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
index e11d0d6..c44c8ea 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
@@ -459,6 +459,15 @@ struct intel_mipmap_tree
enum intel_mcs_state mcs_state;
#endif
+ /**
+ * The SURFACE_STATE bits associated with the last fast color clear to this
+ * color mipmap tree, if any.
+ *
+ * This value will only ever contain ones in bits 28-31, so it is safe to
+ * OR into dword 7 of SURFACE_STATE.
+ */
+ uint32_t fast_clear_color_value;
+
/* These are also refcounted:
*/
GLuint refcount;
@@ -479,6 +488,10 @@ intel_get_non_msrt_mcs_alignment(struct intel_context *intel,
struct intel_mipmap_tree *mt,
unsigned *width_px, unsigned *height);
+bool
+intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel,
+ struct intel_mipmap_tree *mt);
+
struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel,
GLenum target,
gl_format format,
--
1.8.3
More information about the mesa-dev
mailing list