[Mesa-dev] [PATCH 06/12] i965/gen7+: Implement fast color clear operation in BLORP.
Ian Romanick
idr at freedesktop.org
Wed May 22 12:30:43 PDT 2013
On 05/21/2013 04:52 PM, Paul Berry wrote:
> Since we defer allocation of the MCS miptree until the time of the
> fast clear operation, this patch also implements creation of the MCS
> miptree.
>
> In addition, this patch adds the field
> intel_mipmap_tree::fast_clear_color_value, which holds the most recent
> fast color clear value, if any. We use it to set the SURFACE_STATE's
> clear color for render targets.
> ---
> src/mesa/drivers/dri/i965/brw_blorp.cpp | 1 +
> src/mesa/drivers/dri/i965/brw_blorp.h | 11 +-
> src/mesa/drivers/dri/i965/brw_blorp_clear.cpp | 143 +++++++++++++++++++++-
> src/mesa/drivers/dri/i965/brw_clear.c | 2 +-
> src/mesa/drivers/dri/i965/brw_defines.h | 2 +
> src/mesa/drivers/dri/i965/gen7_blorp.cpp | 18 ++-
> src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 10 +-
> src/mesa/drivers/dri/intel/intel_mipmap_tree.c | 47 +++++++
> src/mesa/drivers/dri/intel/intel_mipmap_tree.h | 13 ++
> 9 files changed, 233 insertions(+), 14 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
> index 20f7153..c6019d1 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
> @@ -147,6 +147,7 @@ brw_blorp_params::brw_blorp_params()
> y1(0),
> depth_format(0),
> hiz_op(GEN6_HIZ_OP_NONE),
> + fast_clear_op(GEN7_FAST_CLEAR_OP_NONE),
> num_samples(0),
> use_wm_prog(false)
> {
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
> index 6360a62..687d7eb 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.h
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
> @@ -46,7 +46,8 @@ brw_blorp_blit_miptrees(struct intel_context *intel,
> bool mirror_x, bool mirror_y);
>
> bool
> -brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb);
> +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb,
> + bool partial_clear);
>
> #ifdef __cplusplus
> } /* end extern "C" */
> @@ -195,6 +196,13 @@ struct brw_blorp_prog_data
> bool persample_msaa_dispatch;
> };
>
> +
> +enum gen7_fast_clear_op {
> + GEN7_FAST_CLEAR_OP_NONE,
> + GEN7_FAST_CLEAR_OP_FAST_CLEAR,
> +};
> +
> +
> class brw_blorp_params
> {
> public:
> @@ -212,6 +220,7 @@ public:
> brw_blorp_surface_info src;
> brw_blorp_surface_info dst;
> enum gen6_hiz_op hiz_op;
> + enum gen7_fast_clear_op fast_clear_op;
> unsigned num_samples;
> bool use_wm_prog;
> brw_blorp_wm_push_constants wm_push_consts;
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
> index 28d7ad0..675289b 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
> @@ -49,7 +49,8 @@ public:
> brw_blorp_clear_params(struct brw_context *brw,
> struct gl_framebuffer *fb,
> struct gl_renderbuffer *rb,
> - GLubyte *color_mask);
> + GLubyte *color_mask,
> + bool partial_clear);
>
> virtual uint32_t get_wm_prog(struct brw_context *brw,
> brw_blorp_prog_data **prog_data) const;
> @@ -105,10 +106,49 @@ brw_blorp_clear_program::~brw_blorp_clear_program()
> ralloc_free(mem_ctx);
> }
>
> +
> +/**
> + * Determine if fast color clear supports the given clear color.
> + *
> + * Fast color clear can only clear to color values of 1.0 or 0.0. At the
> + * moment we only support floating point buffers.
> + */
> +static bool
> +is_color_fast_clear_compatible(gl_format format,
> + const union gl_color_union *color)
> +{
> + if (_mesa_is_format_integer_color(format))
> + return false;
> +
> + for (int i = 0; i < 4; i++) {
> + if (color->f[i] != 0.0 && color->f[i] != 1.0)
> + return false;
Should this generate a perf debug message? Eric may have an opinion
about generating warnings for the non-fast path...
> + }
> + return true;
> +}
> +
> +
> +/**
> + * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
> + * SURFACE_STATE.
> + */
> +static uint32_t
> +compute_fast_clear_color_bits(const union gl_color_union *color)
> +{
> + uint32_t bits = 0;
> + for (int i = 0; i < 4; i++) {
> + if (color->f[i] != 0.0)
> + bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
> + }
> + return bits;
> +}
> +
> +
> brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
> struct gl_framebuffer *fb,
> struct gl_renderbuffer *rb,
> - GLubyte *color_mask)
> + GLubyte *color_mask,
> + bool partial_clear)
> {
> struct intel_context *intel = &brw->intel;
> struct gl_context *ctx = &intel->ctx;
> @@ -163,6 +203,56 @@ brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
> wm_prog_key.use_simd16_replicated_data = false;
> }
> }
> +
> + /* If we can do this as a fast color clear, do so. */
> + if (irb->mt->mcs_state != INTEL_MCS_STATE_NONE && !partial_clear &&
> + wm_prog_key.use_simd16_replicated_data &&
> + is_color_fast_clear_compatible(format, &ctx->Color.ClearColor)) {
> + memset(push_consts, 0xff, 4*sizeof(float));
> + fast_clear_op = GEN7_FAST_CLEAR_OP_FAST_CLEAR;
> +
> + /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
> + * Target(s)", beneath the "Fast Color Clear" bullet (p327):
> + *
> + * Clear pass must have a clear rectangle that must follow alignment
> + * rules in terms of pixels and lines as shown in the table
> + * below. Further, the clear-rectangle height and width must be
> + * multiple of the following dimensions. If the height and width of
> + * the render target being cleared do not meet these requirements,
> + * an MCS buffer can be created such that it follows the requirement
> + * and covers the RT.
> + *
> + * The alignment size in the table that follows is related to the
> + * alignment size returned by intel_get_non_msrt_mcs_alignment(), but
> + * with X alignment multiplied by 16 and Y alignment multiplied by 32.
> + */
> + unsigned x_align, y_align;
> + intel_get_non_msrt_mcs_alignment(intel, irb->mt, &x_align, &y_align);
> + x_align *= 16;
> + y_align *= 32;
> + x0 = ROUND_DOWN_TO(x0, x_align);
> + y0 = ROUND_DOWN_TO(y0, y_align);
> + x1 = ALIGN(x1, x_align);
> + y1 = ALIGN(y1, y_align);
> +
> + /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
> + * Target(s)", beneath the "Fast Color Clear" bullet (p327):
> + *
> + * In order to optimize the performance MCS buffer (when bound to 1X
> + * RT) clear similarly to MCS buffer clear for MSRT case, clear rect
> + * is required to be scaled by the following factors in the
> + * horizontal and vertical directions:
> + *
> + * The X and Y scale down factors in the table that follows are each
> + * equal to half the alignment value computed above.
> + */
> + unsigned x_scaledown = x_align / 2;
> + unsigned y_scaledown = y_align / 2;
> + x0 /= x_scaledown;
> + y0 /= y_scaledown;
> + x1 /= x_scaledown;
> + y1 /= y_scaledown;
> + }
> }
>
> uint32_t
> @@ -266,7 +356,8 @@ brw_blorp_clear_program::compile(struct brw_context *brw,
>
> extern "C" {
> bool
> -brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
> +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb,
> + bool partial_clear)
> {
> struct gl_context *ctx = &intel->ctx;
> struct brw_context *brw = brw_context(ctx);
> @@ -288,6 +379,7 @@ brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
>
> for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
> struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
> + struct intel_renderbuffer *irb = intel_renderbuffer(rb);
>
> /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
> * the framebuffer can be complete with some attachments missing. In
> @@ -296,8 +388,51 @@ brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
> if (rb == NULL)
> continue;
>
> - brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf]);
> + brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf],
> + partial_clear);
> +
> + bool is_fast_clear =
> + (params.fast_clear_op == GEN7_FAST_CLEAR_OP_FAST_CLEAR);
> + if (is_fast_clear) {
> + /* Record the clear color in the miptree so that it will be
> + * programmed in SURFACE_STATE by later rendering and resolve
> + * operations.
> + */
> + uint32_t new_color_value =
> + compute_fast_clear_color_bits(&ctx->Color.ClearColor);
> + if (irb->mt->fast_clear_color_value != new_color_value) {
> + irb->mt->fast_clear_color_value = new_color_value;
> + brw->state.dirty.brw |= BRW_NEW_SURFACES;
> + }
> +
> + /* If the buffer is already in INTEL_MCS_STATE_CLEAR, the clear is
> + * redundant and can be skipped.
> + */
> + if (irb->mt->mcs_state == INTEL_MCS_STATE_CLEAR)
> + continue;
> +
> + /* If the MCS buffer hasn't been allocated yet, we need to allocate
> + * it now.
> + */
> + if (!irb->mt->mcs_mt &&
> + !intel_miptree_alloc_non_msrt_mcs(intel, irb->mt)) {
> + /* MCS allocation failed--probably this will only happen in
> + * out-of-memory conditions. But in any case, try to recover by
> + * falling back to a non-blorp clear technique.
> + */
> + return false;
> + }
> + }
> +
> brw_blorp_exec(intel, ¶ms);
> +
> + if (is_fast_clear) {
> + /* Now that the fast clear has occurred, put the buffer in
> + * INTEL_MCS_STATE_CLEAR so that we won't waste time doing redundant
> + * clears.
> + */
> + irb->mt->mcs_state = INTEL_MCS_STATE_CLEAR;
> + }
> }
>
> return true;
> diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
> index 2b999bf..80b7a0c 100644
> --- a/src/mesa/drivers/dri/i965/brw_clear.c
> +++ b/src/mesa/drivers/dri/i965/brw_clear.c
> @@ -234,7 +234,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
> /* BLORP is currently only supported on Gen6+. */
> if (intel->gen >= 6) {
> if (mask & BUFFER_BITS_COLOR) {
> - if (brw_blorp_clear_color(intel, fb)) {
> + if (brw_blorp_clear_color(intel, fb, partial_clear)) {
> debug_mask("blorp color", mask & BUFFER_BITS_COLOR);
> mask &= ~BUFFER_BITS_COLOR;
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index fedd78c..90b16ab 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -555,6 +555,7 @@
> #define GEN7_SURFACE_MCS_PITCH_MASK INTEL_MASK(11, 3)
>
> /* Surface state DW7 */
> +#define GEN7_SURFACE_CLEAR_COLOR_SHIFT 28
> #define GEN7_SURFACE_SCS_R_SHIFT 25
> #define GEN7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25)
> #define GEN7_SURFACE_SCS_G_SHIFT 22
> @@ -1613,6 +1614,7 @@ enum brw_wm_barycentric_interp_mode {
> # define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11)
> # define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10)
> # define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9)
> +# define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE (1 << 8)
> # define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
> # define GEN7_PS_POSOFFSET_NONE (0 << 3)
> # define GEN7_PS_POSOFFSET_CENTROID (2 << 3)
> diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> index 2d09c7f..5f7e10f 100644
> --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> @@ -202,11 +202,13 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
> is_render_target);
> }
>
> + surf[7] = surface->mt->fast_clear_color_value;
> +
> if (intel->is_haswell) {
> - surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
> - SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
> - SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
> - SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
> + surf[7] |= (SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
> + SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
> + SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
> + SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A));
> }
>
> /* Emit relocation to surface contents */
> @@ -587,6 +589,14 @@ gen7_blorp_emit_ps_config(struct brw_context *brw,
> dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
> }
>
> + switch (params->fast_clear_op) {
> + case GEN7_FAST_CLEAR_OP_FAST_CLEAR:
> + dw4 |= GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE;
> + break;
> + default:
> + break;
> + }
> +
> BEGIN_BATCH(8);
> OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
> OUT_BATCH(params->use_wm_prog ? prog_offset : 0);
> diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> index f5d2e43..fda4b2c 100644
> --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> @@ -589,11 +589,13 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
> irb->mt->mcs_mt, true /* is RT */);
> }
>
> + surf[7] = irb->mt->fast_clear_color_value;
> +
> if (intel->is_haswell) {
> - surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
> - SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
> - SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
> - SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
> + surf[7] |= (SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
> + SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
> + SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
> + SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A));
> }
>
> drm_intel_bo_emit_reloc(brw->intel.batch.bo,
> diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
> index 9d1b91a..657532f 100644
> --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
> @@ -1163,6 +1163,53 @@ intel_miptree_alloc_mcs(struct intel_context *intel,
> #endif
> }
>
> +
> +bool
> +intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel,
> + struct intel_mipmap_tree *mt)
> +{
> +#ifdef I915
> + assert(!"MCS not supported on i915");
> +#else
> + assert(mt->mcs_mt == NULL);
> +
> + /* The format of the MCS buffer is opaque to the driver; all that matters
> + * is that we get its size and pitch right. We'll pretend that the format
> + * is R32. Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
> + * R32 buffer is 32 pixels across, we'll need to scale the width down by
> + * the block width and then a further factor of 4. Since an MCS tile
> + * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
> + * we'll need to scale the height down by the block height and then a
> + * further factor of 8.
> + */
> + const gl_format format = MESA_FORMAT_R_UINT32;
> + unsigned block_width_px;
> + unsigned block_height;
> + intel_get_non_msrt_mcs_alignment(intel, mt, &block_width_px, &block_height);
> + unsigned width_divisor = block_width_px * 4;
> + unsigned height_divisor = block_height * 8;
> + unsigned mcs_width =
> + ALIGN(mt->logical_width0, width_divisor) / width_divisor;
> + unsigned mcs_height =
> + ALIGN(mt->logical_height0, height_divisor) / height_divisor;
> + assert(mt->logical_depth0 == 1);
> + mt->mcs_mt = intel_miptree_create(intel,
> + mt->target,
> + format,
> + mt->first_level,
> + mt->last_level,
> + mcs_width,
> + mcs_height,
> + mt->logical_depth0,
> + true,
> + 0 /* num_samples */,
> + true /* force_y_tiling */);
> +
> + return mt->mcs_mt;
> +#endif
> +}
> +
> +
> /**
> * Helper for intel_miptree_alloc_hiz() that sets
> * \c mt->level[level].slice[layer].has_hiz. Return true if and only if
> diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
> index 5cd69cb..4c9ff94 100644
> --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
> +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
> @@ -463,6 +463,15 @@ struct intel_mipmap_tree
> enum intel_mcs_state mcs_state;
> #endif
>
> + /**
> + * The SURFACE_STATE bits associated with the last fast color clear to this
> + * color mipmap tree, if any.
> + *
> + * This value will only ever contain ones in bits 28-31, so it is safe to
> + * OR into dword 7 of SURFACE_STATE.
> + */
> + uint32_t fast_clear_color_value;
> +
> /* These are also refcounted:
> */
> GLuint refcount;
> @@ -477,6 +486,10 @@ intel_get_non_msrt_mcs_alignment(struct intel_context *intel,
> struct intel_mipmap_tree *mt,
> unsigned *width_px, unsigned *height);
>
> +bool
> +intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel,
> + struct intel_mipmap_tree *mt);
> +
> struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel,
> GLenum target,
> gl_format format,
>
More information about the mesa-dev
mailing list