[Mesa-dev] [PATCH 06/12] i965/gen7+: Implement fast color clear operation in BLORP.
Paul Berry
stereotype441 at gmail.com
Thu May 23 11:57:49 PDT 2013
On 22 May 2013 12:30, Ian Romanick <idr at freedesktop.org> wrote:
> On 05/21/2013 04:52 PM, Paul Berry wrote:
>
>> Since we defer allocation of the MCS miptree until the time of the
>> fast clear operation, this patch also implements creation of the MCS
>> miptree.
>>
>> In addition, this patch adds the field
>> intel_mipmap_tree::fast_clear_**color_value, which holds the most recent
>> fast color clear value, if any. We use it to set the SURFACE_STATE's
>> clear color for render targets.
>> ---
>> src/mesa/drivers/dri/i965/brw_**blorp.cpp | 1 +
>> src/mesa/drivers/dri/i965/brw_**blorp.h | 11 +-
>> src/mesa/drivers/dri/i965/brw_**blorp_clear.cpp | 143
>> +++++++++++++++++++++-
>> src/mesa/drivers/dri/i965/brw_**clear.c | 2 +-
>> src/mesa/drivers/dri/i965/brw_**defines.h | 2 +
>> src/mesa/drivers/dri/i965/**gen7_blorp.cpp | 18 ++-
>> src/mesa/drivers/dri/i965/**gen7_wm_surface_state.c | 10 +-
>> src/mesa/drivers/dri/intel/**intel_mipmap_tree.c | 47 +++++++
>> src/mesa/drivers/dri/intel/**intel_mipmap_tree.h | 13 ++
>> 9 files changed, 233 insertions(+), 14 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/**brw_blorp.cpp
>> b/src/mesa/drivers/dri/i965/**brw_blorp.cpp
>> index 20f7153..c6019d1 100644
>> --- a/src/mesa/drivers/dri/i965/**brw_blorp.cpp
>> +++ b/src/mesa/drivers/dri/i965/**brw_blorp.cpp
>> @@ -147,6 +147,7 @@ brw_blorp_params::brw_blorp_**params()
>> y1(0),
>> depth_format(0),
>> hiz_op(GEN6_HIZ_OP_NONE),
>> + fast_clear_op(GEN7_FAST_CLEAR_**OP_NONE),
>> num_samples(0),
>> use_wm_prog(false)
>> {
>> diff --git a/src/mesa/drivers/dri/i965/**brw_blorp.h
>> b/src/mesa/drivers/dri/i965/**brw_blorp.h
>> index 6360a62..687d7eb 100644
>> --- a/src/mesa/drivers/dri/i965/**brw_blorp.h
>> +++ b/src/mesa/drivers/dri/i965/**brw_blorp.h
>> @@ -46,7 +46,8 @@ brw_blorp_blit_miptrees(struct intel_context *intel,
>> bool mirror_x, bool mirror_y);
>>
>> bool
>> -brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer
>> *fb);
>> +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer
>> *fb,
>> + bool partial_clear);
>>
>> #ifdef __cplusplus
>> } /* end extern "C" */
>> @@ -195,6 +196,13 @@ struct brw_blorp_prog_data
>> bool persample_msaa_dispatch;
>> };
>>
>> +
>> +enum gen7_fast_clear_op {
>> + GEN7_FAST_CLEAR_OP_NONE,
>> + GEN7_FAST_CLEAR_OP_FAST_CLEAR,
>> +};
>> +
>> +
>> class brw_blorp_params
>> {
>> public:
>> @@ -212,6 +220,7 @@ public:
>> brw_blorp_surface_info src;
>> brw_blorp_surface_info dst;
>> enum gen6_hiz_op hiz_op;
>> + enum gen7_fast_clear_op fast_clear_op;
>> unsigned num_samples;
>> bool use_wm_prog;
>> brw_blorp_wm_push_constants wm_push_consts;
>> diff --git a/src/mesa/drivers/dri/i965/**brw_blorp_clear.cpp
>> b/src/mesa/drivers/dri/i965/**brw_blorp_clear.cpp
>> index 28d7ad0..675289b 100644
>> --- a/src/mesa/drivers/dri/i965/**brw_blorp_clear.cpp
>> +++ b/src/mesa/drivers/dri/i965/**brw_blorp_clear.cpp
>> @@ -49,7 +49,8 @@ public:
>> brw_blorp_clear_params(struct brw_context *brw,
>> struct gl_framebuffer *fb,
>> struct gl_renderbuffer *rb,
>> - GLubyte *color_mask);
>> + GLubyte *color_mask,
>> + bool partial_clear);
>>
>> virtual uint32_t get_wm_prog(struct brw_context *brw,
>> brw_blorp_prog_data **prog_data) const;
>> @@ -105,10 +106,49 @@ brw_blorp_clear_program::~brw_**
>> blorp_clear_program()
>> ralloc_free(mem_ctx);
>> }
>>
>> +
>> +/**
>> + * Determine if fast color clear supports the given clear color.
>> + *
>> + * Fast color clear can only clear to color values of 1.0 or 0.0. At the
>> + * moment we only support floating point buffers.
>> + */
>> +static bool
>> +is_color_fast_clear_**compatible(gl_format format,
>> + const union gl_color_union *color)
>> +{
>> + if (_mesa_is_format_integer_**color(format))
>> + return false;
>> +
>> + for (int i = 0; i < 4; i++) {
>> + if (color->f[i] != 0.0 && color->f[i] != 1.0)
>> + return false;
>>
>
> Should this generate a perf debug message? Eric may have an opinion about
> generating warnings for the non-fast path...
Sounds reasonable to me. We already have perf debug messages for other
things that can inhibit fast clears (e.g. scissor preventing fast depth
clear). I'll add it unless I hear an objection.
>
> + }
>> + return true;
>> +}
>> +
>> +
>> +/**
>> + * Convert the given color to a bitfield suitable for ORing into DWORD 7
>> of
>> + * SURFACE_STATE.
>> + */
>> +static uint32_t
>> +compute_fast_clear_color_**bits(const union gl_color_union *color)
>> +{
>> + uint32_t bits = 0;
>> + for (int i = 0; i < 4; i++) {
>> + if (color->f[i] != 0.0)
>> + bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_**SHIFT + (3 - i));
>> + }
>> + return bits;
>> +}
>> +
>> +
>> brw_blorp_clear_params::brw_**blorp_clear_params(struct brw_context
>> *brw,
>> struct gl_framebuffer
>> *fb,
>> struct gl_renderbuffer
>> *rb,
>> - GLubyte *color_mask)
>> + GLubyte *color_mask,
>> + bool partial_clear)
>> {
>> struct intel_context *intel = &brw->intel;
>> struct gl_context *ctx = &intel->ctx;
>> @@ -163,6 +203,56 @@ brw_blorp_clear_params::brw_**blorp_clear_params(struct
>> brw_context *brw,
>> wm_prog_key.use_simd16_**replicated_data = false;
>> }
>> }
>> +
>> + /* If we can do this as a fast color clear, do so. */
>> + if (irb->mt->mcs_state != INTEL_MCS_STATE_NONE && !partial_clear &&
>> + wm_prog_key.use_simd16_**replicated_data &&
>> + is_color_fast_clear_**compatible(format,
>> &ctx->Color.ClearColor)) {
>> + memset(push_consts, 0xff, 4*sizeof(float));
>> + fast_clear_op = GEN7_FAST_CLEAR_OP_FAST_CLEAR;
>> +
>> + /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
>> + * Target(s)", beneath the "Fast Color Clear" bullet (p327):
>> + *
>> + * Clear pass must have a clear rectangle that must follow
>> alignment
>> + * rules in terms of pixels and lines as shown in the table
>> + * below. Further, the clear-rectangle height and width must be
>> + * multiple of the following dimensions. If the height and
>> width of
>> + * the render target being cleared do not meet these
>> requirements,
>> + * an MCS buffer can be created such that it follows the
>> requirement
>> + * and covers the RT.
>> + *
>> + * The alignment size in the table that follows is related to the
>> + * alignment size returned by intel_get_non_msrt_mcs_**alignment(),
>> but
>> + * with X alignment multiplied by 16 and Y alignment multiplied by
>> 32.
>> + */
>> + unsigned x_align, y_align;
>> + intel_get_non_msrt_mcs_**alignment(intel, irb->mt, &x_align,
>> &y_align);
>> + x_align *= 16;
>> + y_align *= 32;
>> + x0 = ROUND_DOWN_TO(x0, x_align);
>> + y0 = ROUND_DOWN_TO(y0, y_align);
>> + x1 = ALIGN(x1, x_align);
>> + y1 = ALIGN(y1, y_align);
>> +
>> + /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
>> + * Target(s)", beneath the "Fast Color Clear" bullet (p327):
>> + *
>> + * In order to optimize the performance MCS buffer (when bound
>> to 1X
>> + * RT) clear similarly to MCS buffer clear for MSRT case,
>> clear rect
>> + * is required to be scaled by the following factors in the
>> + * horizontal and vertical directions:
>> + *
>> + * The X and Y scale down factors in the table that follows are
>> each
>> + * equal to half the alignment value computed above.
>> + */
>> + unsigned x_scaledown = x_align / 2;
>> + unsigned y_scaledown = y_align / 2;
>> + x0 /= x_scaledown;
>> + y0 /= y_scaledown;
>> + x1 /= x_scaledown;
>> + y1 /= y_scaledown;
>> + }
>> }
>>
>> uint32_t
>> @@ -266,7 +356,8 @@ brw_blorp_clear_program::**compile(struct
>> brw_context *brw,
>>
>> extern "C" {
>> bool
>> -brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer
>> *fb)
>> +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer
>> *fb,
>> + bool partial_clear)
>> {
>> struct gl_context *ctx = &intel->ctx;
>> struct brw_context *brw = brw_context(ctx);
>> @@ -288,6 +379,7 @@ brw_blorp_clear_color(struct intel_context *intel,
>> struct gl_framebuffer *fb)
>>
>> for (unsigned buf = 0; buf < ctx->DrawBuffer->_**NumColorDrawBuffers;
>> buf++) {
>> struct gl_renderbuffer *rb = ctx->DrawBuffer->_**
>> ColorDrawBuffers[buf];
>> + struct intel_renderbuffer *irb = intel_renderbuffer(rb);
>>
>> /* If this is an ES2 context or GL_ARB_ES2_compatibility is
>> supported,
>> * the framebuffer can be complete with some attachments missing.
>> In
>> @@ -296,8 +388,51 @@ brw_blorp_clear_color(struct intel_context *intel,
>> struct gl_framebuffer *fb)
>> if (rb == NULL)
>> continue;
>>
>> - brw_blorp_clear_params params(brw, fb, rb,
>> ctx->Color.ColorMask[buf]);
>> + brw_blorp_clear_params params(brw, fb, rb,
>> ctx->Color.ColorMask[buf],
>> + partial_clear);
>> +
>> + bool is_fast_clear =
>> + (params.fast_clear_op == GEN7_FAST_CLEAR_OP_FAST_CLEAR)**;
>> + if (is_fast_clear) {
>> + /* Record the clear color in the miptree so that it will be
>> + * programmed in SURFACE_STATE by later rendering and resolve
>> + * operations.
>> + */
>> + uint32_t new_color_value =
>> + compute_fast_clear_color_bits(**&ctx->Color.ClearColor);
>> + if (irb->mt->fast_clear_color_**value != new_color_value) {
>> + irb->mt->fast_clear_color_**value = new_color_value;
>> + brw->state.dirty.brw |= BRW_NEW_SURFACES;
>> + }
>> +
>> + /* If the buffer is already in INTEL_MCS_STATE_CLEAR, the clear
>> is
>> + * redundant and can be skipped.
>> + */
>> + if (irb->mt->mcs_state == INTEL_MCS_STATE_CLEAR)
>> + continue;
>> +
>> + /* If the MCS buffer hasn't been allocated yet, we need to
>> allocate
>> + * it now.
>> + */
>> + if (!irb->mt->mcs_mt &&
>> + !intel_miptree_alloc_non_msrt_**mcs(intel, irb->mt)) {
>> + /* MCS allocation failed--probably this will only happen in
>> + * out-of-memory conditions. But in any case, try to
>> recover by
>> + * falling back to a non-blorp clear technique.
>> + */
>> + return false;
>> + }
>> + }
>> +
>> brw_blorp_exec(intel, ¶ms);
>> +
>> + if (is_fast_clear) {
>> + /* Now that the fast clear has occurred, put the buffer in
>> + * INTEL_MCS_STATE_CLEAR so that we won't waste time doing
>> redundant
>> + * clears.
>> + */
>> + irb->mt->mcs_state = INTEL_MCS_STATE_CLEAR;
>> + }
>> }
>>
>> return true;
>> diff --git a/src/mesa/drivers/dri/i965/**brw_clear.c
>> b/src/mesa/drivers/dri/i965/**brw_clear.c
>> index 2b999bf..80b7a0c 100644
>> --- a/src/mesa/drivers/dri/i965/**brw_clear.c
>> +++ b/src/mesa/drivers/dri/i965/**brw_clear.c
>> @@ -234,7 +234,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
>> /* BLORP is currently only supported on Gen6+. */
>> if (intel->gen >= 6) {
>> if (mask & BUFFER_BITS_COLOR) {
>> - if (brw_blorp_clear_color(intel, fb)) {
>> + if (brw_blorp_clear_color(intel, fb, partial_clear)) {
>> debug_mask("blorp color", mask & BUFFER_BITS_COLOR);
>> mask &= ~BUFFER_BITS_COLOR;
>> }
>> diff --git a/src/mesa/drivers/dri/i965/**brw_defines.h
>> b/src/mesa/drivers/dri/i965/**brw_defines.h
>> index fedd78c..90b16ab 100644
>> --- a/src/mesa/drivers/dri/i965/**brw_defines.h
>> +++ b/src/mesa/drivers/dri/i965/**brw_defines.h
>> @@ -555,6 +555,7 @@
>> #define GEN7_SURFACE_MCS_PITCH_MASK INTEL_MASK(11, 3)
>>
>> /* Surface state DW7 */
>> +#define GEN7_SURFACE_CLEAR_COLOR_SHIFT 28
>> #define GEN7_SURFACE_SCS_R_SHIFT 25
>> #define GEN7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25)
>> #define GEN7_SURFACE_SCS_G_SHIFT 22
>> @@ -1613,6 +1614,7 @@ enum brw_wm_barycentric_interp_mode {
>> # define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11)
>> # define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10)
>> # define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9)
>> +# define GEN7_PS_RENDER_TARGET_FAST_**CLEAR_ENABLE (1 << 8)
>> # define GEN7_PS_DUAL_SOURCE_BLEND_**ENABLE (1 << 7)
>> # define GEN7_PS_POSOFFSET_NONE (0 << 3)
>> # define GEN7_PS_POSOFFSET_CENTROID (2 << 3)
>> diff --git a/src/mesa/drivers/dri/i965/**gen7_blorp.cpp
>> b/src/mesa/drivers/dri/i965/**gen7_blorp.cpp
>> index 2d09c7f..5f7e10f 100644
>> --- a/src/mesa/drivers/dri/i965/**gen7_blorp.cpp
>> +++ b/src/mesa/drivers/dri/i965/**gen7_blorp.cpp
>> @@ -202,11 +202,13 @@ gen7_blorp_emit_surface_state(**struct brw_context
>> *brw,
>> is_render_target);
>> }
>>
>> + surf[7] = surface->mt->fast_clear_color_**value;
>> +
>> if (intel->is_haswell) {
>> - surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
>> - SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
>> - SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
>> - SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
>> + surf[7] |= (SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
>> + SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
>> + SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
>> + SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A));
>> }
>>
>> /* Emit relocation to surface contents */
>> @@ -587,6 +589,14 @@ gen7_blorp_emit_ps_config(**struct brw_context *brw,
>> dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_*
>> *SHIFT_0;
>> }
>>
>> + switch (params->fast_clear_op) {
>> + case GEN7_FAST_CLEAR_OP_FAST_CLEAR:
>> + dw4 |= GEN7_PS_RENDER_TARGET_FAST_**CLEAR_ENABLE;
>> + break;
>> + default:
>> + break;
>> + }
>> +
>> BEGIN_BATCH(8);
>> OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
>> OUT_BATCH(params->use_wm_prog ? prog_offset : 0);
>> diff --git a/src/mesa/drivers/dri/i965/**gen7_wm_surface_state.c
>> b/src/mesa/drivers/dri/i965/**gen7_wm_surface_state.c
>> index f5d2e43..fda4b2c 100644
>> --- a/src/mesa/drivers/dri/i965/**gen7_wm_surface_state.c
>> +++ b/src/mesa/drivers/dri/i965/**gen7_wm_surface_state.c
>> @@ -589,11 +589,13 @@ gen7_update_renderbuffer_**surface(struct
>> brw_context *brw,
>> irb->mt->mcs_mt, true /* is RT */);
>> }
>>
>> + surf[7] = irb->mt->fast_clear_color_**value;
>> +
>> if (intel->is_haswell) {
>> - surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
>> - SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
>> - SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
>> - SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
>> + surf[7] |= (SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
>> + SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
>> + SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
>> + SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A));
>> }
>>
>> drm_intel_bo_emit_reloc(brw->i**ntel.batch.bo<http://intel.batch.bo>
>> ,
>> diff --git a/src/mesa/drivers/dri/intel/**intel_mipmap_tree.c
>> b/src/mesa/drivers/dri/intel/**intel_mipmap_tree.c
>> index 9d1b91a..657532f 100644
>> --- a/src/mesa/drivers/dri/intel/**intel_mipmap_tree.c
>> +++ b/src/mesa/drivers/dri/intel/**intel_mipmap_tree.c
>> @@ -1163,6 +1163,53 @@ intel_miptree_alloc_mcs(struct intel_context
>> *intel,
>> #endif
>> }
>>
>> +
>> +bool
>> +intel_miptree_alloc_non_msrt_**mcs(struct intel_context *intel,
>> + struct intel_mipmap_tree *mt)
>> +{
>> +#ifdef I915
>> + assert(!"MCS not supported on i915");
>> +#else
>> + assert(mt->mcs_mt == NULL);
>> +
>> + /* The format of the MCS buffer is opaque to the driver; all that
>> matters
>> + * is that we get its size and pitch right. We'll pretend that the
>> format
>> + * is R32. Since an MCS tile covers 128 blocks horizontally, and a
>> Y-tiled
>> + * R32 buffer is 32 pixels across, we'll need to scale the width down
>> by
>> + * the block width and then a further factor of 4. Since an MCS tile
>> + * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows
>> high,
>> + * we'll need to scale the height down by the block height and then a
>> + * further factor of 8.
>> + */
>> + const gl_format format = MESA_FORMAT_R_UINT32;
>> + unsigned block_width_px;
>> + unsigned block_height;
>> + intel_get_non_msrt_mcs_**alignment(intel, mt, &block_width_px,
>> &block_height);
>> + unsigned width_divisor = block_width_px * 4;
>> + unsigned height_divisor = block_height * 8;
>> + unsigned mcs_width =
>> + ALIGN(mt->logical_width0, width_divisor) / width_divisor;
>> + unsigned mcs_height =
>> + ALIGN(mt->logical_height0, height_divisor) / height_divisor;
>> + assert(mt->logical_depth0 == 1);
>> + mt->mcs_mt = intel_miptree_create(intel,
>> + mt->target,
>> + format,
>> + mt->first_level,
>> + mt->last_level,
>> + mcs_width,
>> + mcs_height,
>> + mt->logical_depth0,
>> + true,
>> + 0 /* num_samples */,
>> + true /* force_y_tiling */);
>> +
>> + return mt->mcs_mt;
>> +#endif
>> +}
>> +
>> +
>> /**
>> * Helper for intel_miptree_alloc_hiz() that sets
>> * \c mt->level[level].slice[layer].**has_hiz. Return true if and only
>> if
>> diff --git a/src/mesa/drivers/dri/intel/**intel_mipmap_tree.h
>> b/src/mesa/drivers/dri/intel/**intel_mipmap_tree.h
>> index 5cd69cb..4c9ff94 100644
>> --- a/src/mesa/drivers/dri/intel/**intel_mipmap_tree.h
>> +++ b/src/mesa/drivers/dri/intel/**intel_mipmap_tree.h
>> @@ -463,6 +463,15 @@ struct intel_mipmap_tree
>> enum intel_mcs_state mcs_state;
>> #endif
>>
>> + /**
>> + * The SURFACE_STATE bits associated with the last fast color clear
>> to this
>> + * color mipmap tree, if any.
>> + *
>> + * This value will only ever contain ones in bits 28-31, so it is
>> safe to
>> + * OR into dword 7 of SURFACE_STATE.
>> + */
>> + uint32_t fast_clear_color_value;
>> +
>> /* These are also refcounted:
>> */
>> GLuint refcount;
>> @@ -477,6 +486,10 @@ intel_get_non_msrt_mcs_**alignment(struct
>> intel_context *intel,
>> struct intel_mipmap_tree *mt,
>> unsigned *width_px, unsigned *height);
>>
>> +bool
>> +intel_miptree_alloc_non_msrt_**mcs(struct intel_context *intel,
>> + struct intel_mipmap_tree *mt);
>> +
>> struct intel_mipmap_tree *intel_miptree_create(struct intel_context
>> *intel,
>> GLenum target,
>> gl_format format,
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130523/37041b4f/attachment-0001.html>
More information about the mesa-dev
mailing list