[Mesa-dev] [PATCH 06/12] i965/gen7+: Implement fast color clear operation in BLORP.

Ian Romanick idr at freedesktop.org
Wed May 22 12:30:43 PDT 2013


On 05/21/2013 04:52 PM, Paul Berry wrote:
> Since we defer allocation of the MCS miptree until the time of the
> fast clear operation, this patch also implements creation of the MCS
> miptree.
>
> In addition, this patch adds the field
> intel_mipmap_tree::fast_clear_color_value, which holds the most recent
> fast color clear value, if any. We use it to set the SURFACE_STATE's
> clear color for render targets.
> ---
>   src/mesa/drivers/dri/i965/brw_blorp.cpp           |   1 +
>   src/mesa/drivers/dri/i965/brw_blorp.h             |  11 +-
>   src/mesa/drivers/dri/i965/brw_blorp_clear.cpp     | 143 +++++++++++++++++++++-
>   src/mesa/drivers/dri/i965/brw_clear.c             |   2 +-
>   src/mesa/drivers/dri/i965/brw_defines.h           |   2 +
>   src/mesa/drivers/dri/i965/gen7_blorp.cpp          |  18 ++-
>   src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |  10 +-
>   src/mesa/drivers/dri/intel/intel_mipmap_tree.c    |  47 +++++++
>   src/mesa/drivers/dri/intel/intel_mipmap_tree.h    |  13 ++
>   9 files changed, 233 insertions(+), 14 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
> index 20f7153..c6019d1 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
> @@ -147,6 +147,7 @@ brw_blorp_params::brw_blorp_params()
>        y1(0),
>        depth_format(0),
>        hiz_op(GEN6_HIZ_OP_NONE),
> +     fast_clear_op(GEN7_FAST_CLEAR_OP_NONE),
>        num_samples(0),
>        use_wm_prog(false)
>   {
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
> index 6360a62..687d7eb 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.h
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
> @@ -46,7 +46,8 @@ brw_blorp_blit_miptrees(struct intel_context *intel,
>                           bool mirror_x, bool mirror_y);
>
>   bool
> -brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb);
> +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb,
> +                      bool partial_clear);
>
>   #ifdef __cplusplus
>   } /* end extern "C" */
> @@ -195,6 +196,13 @@ struct brw_blorp_prog_data
>      bool persample_msaa_dispatch;
>   };
>
> +
> +enum gen7_fast_clear_op {
> +   GEN7_FAST_CLEAR_OP_NONE,
> +   GEN7_FAST_CLEAR_OP_FAST_CLEAR,
> +};
> +
> +
>   class brw_blorp_params
>   {
>   public:
> @@ -212,6 +220,7 @@ public:
>      brw_blorp_surface_info src;
>      brw_blorp_surface_info dst;
>      enum gen6_hiz_op hiz_op;
> +   enum gen7_fast_clear_op fast_clear_op;
>      unsigned num_samples;
>      bool use_wm_prog;
>      brw_blorp_wm_push_constants wm_push_consts;
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
> index 28d7ad0..675289b 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
> @@ -49,7 +49,8 @@ public:
>      brw_blorp_clear_params(struct brw_context *brw,
>                             struct gl_framebuffer *fb,
>                             struct gl_renderbuffer *rb,
> -                          GLubyte *color_mask);
> +                          GLubyte *color_mask,
> +                          bool partial_clear);
>
>      virtual uint32_t get_wm_prog(struct brw_context *brw,
>                                   brw_blorp_prog_data **prog_data) const;
> @@ -105,10 +106,49 @@ brw_blorp_clear_program::~brw_blorp_clear_program()
>      ralloc_free(mem_ctx);
>   }
>
> +
> +/**
> + * Determine if fast color clear supports the given clear color.
> + *
> + * Fast color clear can only clear to color values of 1.0 or 0.0.  At the
> + * moment we only support floating point buffers.
> + */
> +static bool
> +is_color_fast_clear_compatible(gl_format format,
> +                               const union gl_color_union *color)
> +{
> +   if (_mesa_is_format_integer_color(format))
> +      return false;
> +
> +   for (int i = 0; i < 4; i++) {
> +      if (color->f[i] != 0.0 && color->f[i] != 1.0)
> +         return false;

Should this generate a perf debug message?  Eric may have an opinion 
about generating warnings for the non-fast path...

> +   }
> +   return true;
> +}
> +
> +
> +/**
> + * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
> + * SURFACE_STATE.
> + */
> +static uint32_t
> +compute_fast_clear_color_bits(const union gl_color_union *color)
> +{
> +   uint32_t bits = 0;
> +   for (int i = 0; i < 4; i++) {
> +      if (color->f[i] != 0.0)
> +         bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
> +   }
> +   return bits;
> +}
> +
> +
>   brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
>                                                  struct gl_framebuffer *fb,
>                                                  struct gl_renderbuffer *rb,
> -                                               GLubyte *color_mask)
> +                                               GLubyte *color_mask,
> +                                               bool partial_clear)
>   {
>      struct intel_context *intel = &brw->intel;
>      struct gl_context *ctx = &intel->ctx;
> @@ -163,6 +203,56 @@ brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
>            wm_prog_key.use_simd16_replicated_data = false;
>         }
>      }
> +
> +   /* If we can do this as a fast color clear, do so. */
> +   if (irb->mt->mcs_state != INTEL_MCS_STATE_NONE && !partial_clear &&
> +       wm_prog_key.use_simd16_replicated_data &&
> +       is_color_fast_clear_compatible(format, &ctx->Color.ClearColor)) {
> +      memset(push_consts, 0xff, 4*sizeof(float));
> +      fast_clear_op = GEN7_FAST_CLEAR_OP_FAST_CLEAR;
> +
> +      /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
> +       * Target(s)", beneath the "Fast Color Clear" bullet (p327):
> +       *
> +       *     Clear pass must have a clear rectangle that must follow alignment
> +       *     rules in terms of pixels and lines as shown in the table
> +       *     below. Further, the clear-rectangle height and width must be
> +       *     multiple of the following dimensions. If the height and width of
> +       *     the render target being cleared do not meet these requirements,
> +       *     an MCS buffer can be created such that it follows the requirement
> +       *     and covers the RT.
> +       *
> +       * The alignment size in the table that follows is related to the
> +       * alignment size returned by intel_get_non_msrt_mcs_alignment(), but
> +       * with X alignment multiplied by 16 and Y alignment multiplied by 32.
> +       */
> +      unsigned x_align, y_align;
> +      intel_get_non_msrt_mcs_alignment(intel, irb->mt, &x_align, &y_align);
> +      x_align *= 16;
> +      y_align *= 32;
> +      x0 = ROUND_DOWN_TO(x0, x_align);
> +      y0 = ROUND_DOWN_TO(y0, y_align);
> +      x1 = ALIGN(x1, x_align);
> +      y1 = ALIGN(y1, y_align);
> +
> +      /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
> +       * Target(s)", beneath the "Fast Color Clear" bullet (p327):
> +       *
> +       *     In order to optimize the performance MCS buffer (when bound to 1X
> +       *     RT) clear similarly to MCS buffer clear for MSRT case, clear rect
> +       *     is required to be scaled by the following factors in the
> +       *     horizontal and vertical directions:
> +       *
> +       * The X and Y scale down factors in the table that follows are each
> +       * equal to half the alignment value computed above.
> +       */
> +      unsigned x_scaledown = x_align / 2;
> +      unsigned y_scaledown = y_align / 2;
> +      x0 /= x_scaledown;
> +      y0 /= y_scaledown;
> +      x1 /= x_scaledown;
> +      y1 /= y_scaledown;
> +   }
>   }
>
>   uint32_t
> @@ -266,7 +356,8 @@ brw_blorp_clear_program::compile(struct brw_context *brw,
>
>   extern "C" {
>   bool
> -brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
> +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb,
> +                      bool partial_clear)
>   {
>      struct gl_context *ctx = &intel->ctx;
>      struct brw_context *brw = brw_context(ctx);
> @@ -288,6 +379,7 @@ brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
>
>      for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
>         struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
> +      struct intel_renderbuffer *irb = intel_renderbuffer(rb);
>
>         /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
>          * the framebuffer can be complete with some attachments missing.  In
> @@ -296,8 +388,51 @@ brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
>         if (rb == NULL)
>            continue;
>
> -      brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf]);
> +      brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf],
> +                                    partial_clear);
> +
> +      bool is_fast_clear =
> +         (params.fast_clear_op == GEN7_FAST_CLEAR_OP_FAST_CLEAR);
> +      if (is_fast_clear) {
> +         /* Record the clear color in the miptree so that it will be
> +          * programmed in SURFACE_STATE by later rendering and resolve
> +          * operations.
> +          */
> +         uint32_t new_color_value =
> +            compute_fast_clear_color_bits(&ctx->Color.ClearColor);
> +         if (irb->mt->fast_clear_color_value != new_color_value) {
> +            irb->mt->fast_clear_color_value = new_color_value;
> +            brw->state.dirty.brw |= BRW_NEW_SURFACES;
> +         }
> +
> +         /* If the buffer is already in INTEL_MCS_STATE_CLEAR, the clear is
> +          * redundant and can be skipped.
> +          */
> +         if (irb->mt->mcs_state == INTEL_MCS_STATE_CLEAR)
> +            continue;
> +
> +         /* If the MCS buffer hasn't been allocated yet, we need to allocate
> +          * it now.
> +          */
> +         if (!irb->mt->mcs_mt &&
> +             !intel_miptree_alloc_non_msrt_mcs(intel, irb->mt)) {
> +            /* MCS allocation failed--probably this will only happen in
> +             * out-of-memory conditions.  But in any case, try to recover by
> +             * falling back to a non-blorp clear technique.
> +             */
> +            return false;
> +         }
> +      }
> +
>         brw_blorp_exec(intel, &params);
> +
> +      if (is_fast_clear) {
> +         /* Now that the fast clear has occurred, put the buffer in
> +          * INTEL_MCS_STATE_CLEAR so that we won't waste time doing redundant
> +          * clears.
> +          */
> +         irb->mt->mcs_state = INTEL_MCS_STATE_CLEAR;
> +      }
>      }
>
>      return true;
> diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
> index 2b999bf..80b7a0c 100644
> --- a/src/mesa/drivers/dri/i965/brw_clear.c
> +++ b/src/mesa/drivers/dri/i965/brw_clear.c
> @@ -234,7 +234,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
>      /* BLORP is currently only supported on Gen6+. */
>      if (intel->gen >= 6) {
>         if (mask & BUFFER_BITS_COLOR) {
> -         if (brw_blorp_clear_color(intel, fb)) {
> +         if (brw_blorp_clear_color(intel, fb, partial_clear)) {
>               debug_mask("blorp color", mask & BUFFER_BITS_COLOR);
>               mask &= ~BUFFER_BITS_COLOR;
>            }
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index fedd78c..90b16ab 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -555,6 +555,7 @@
>   #define GEN7_SURFACE_MCS_PITCH_MASK             INTEL_MASK(11, 3)
>
>   /* Surface state DW7 */
> +#define GEN7_SURFACE_CLEAR_COLOR_SHIFT		28
>   #define GEN7_SURFACE_SCS_R_SHIFT                25
>   #define GEN7_SURFACE_SCS_R_MASK                 INTEL_MASK(27, 25)
>   #define GEN7_SURFACE_SCS_G_SHIFT                22
> @@ -1613,6 +1614,7 @@ enum brw_wm_barycentric_interp_mode {
>   # define GEN7_PS_PUSH_CONSTANT_ENABLE		        (1 << 11)
>   # define GEN7_PS_ATTRIBUTE_ENABLE		        (1 << 10)
>   # define GEN7_PS_OMASK_TO_RENDER_TARGET			(1 << 9)
> +# define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE	(1 << 8)
>   # define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE		(1 << 7)
>   # define GEN7_PS_POSOFFSET_NONE				(0 << 3)
>   # define GEN7_PS_POSOFFSET_CENTROID			(2 << 3)
> diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> index 2d09c7f..5f7e10f 100644
> --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> @@ -202,11 +202,13 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
>                                   is_render_target);
>      }
>
> +   surf[7] = surface->mt->fast_clear_color_value;
> +
>      if (intel->is_haswell) {
> -      surf[7] = SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
> -                SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
> -                SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
> -                SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
> +      surf[7] |= (SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
> +                  SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
> +                  SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
> +                  SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A));
>      }
>
>      /* Emit relocation to surface contents */
> @@ -587,6 +589,14 @@ gen7_blorp_emit_ps_config(struct brw_context *brw,
>         dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
>      }
>
> +   switch (params->fast_clear_op) {
> +   case GEN7_FAST_CLEAR_OP_FAST_CLEAR:
> +      dw4 |= GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE;
> +      break;
> +   default:
> +      break;
> +   }
> +
>      BEGIN_BATCH(8);
>      OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
>      OUT_BATCH(params->use_wm_prog ? prog_offset : 0);
> diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> index f5d2e43..fda4b2c 100644
> --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> @@ -589,11 +589,13 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
>                                   irb->mt->mcs_mt, true /* is RT */);
>      }
>
> +   surf[7] = irb->mt->fast_clear_color_value;
> +
>      if (intel->is_haswell) {
> -      surf[7] = SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
> -                SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
> -                SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
> -                SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
> +      surf[7] |= (SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
> +                  SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
> +                  SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
> +                  SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A));
>      }
>
>      drm_intel_bo_emit_reloc(brw->intel.batch.bo,
> diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
> index 9d1b91a..657532f 100644
> --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
> @@ -1163,6 +1163,53 @@ intel_miptree_alloc_mcs(struct intel_context *intel,
>   #endif
>   }
>
> +
> +bool
> +intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel,
> +                                 struct intel_mipmap_tree *mt)
> +{
> +#ifdef I915
> +   assert(!"MCS not supported on i915");
> +#else
> +   assert(mt->mcs_mt == NULL);
> +
> +   /* The format of the MCS buffer is opaque to the driver; all that matters
> +    * is that we get its size and pitch right.  We'll pretend that the format
> +    * is R32.  Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
> +    * R32 buffer is 32 pixels across, we'll need to scale the width down by
> +    * the block width and then a further factor of 4.  Since an MCS tile
> +    * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
> +    * we'll need to scale the height down by the block height and then a
> +    * further factor of 8.
> +    */
> +   const gl_format format = MESA_FORMAT_R_UINT32;
> +   unsigned block_width_px;
> +   unsigned block_height;
> +   intel_get_non_msrt_mcs_alignment(intel, mt, &block_width_px, &block_height);
> +   unsigned width_divisor = block_width_px * 4;
> +   unsigned height_divisor = block_height * 8;
> +   unsigned mcs_width =
> +      ALIGN(mt->logical_width0, width_divisor) / width_divisor;
> +   unsigned mcs_height =
> +      ALIGN(mt->logical_height0, height_divisor) / height_divisor;
> +   assert(mt->logical_depth0 == 1);
> +   mt->mcs_mt = intel_miptree_create(intel,
> +                                     mt->target,
> +                                     format,
> +                                     mt->first_level,
> +                                     mt->last_level,
> +                                     mcs_width,
> +                                     mcs_height,
> +                                     mt->logical_depth0,
> +                                     true,
> +                                     0 /* num_samples */,
> +                                     true /* force_y_tiling */);
> +
> +   return mt->mcs_mt;
> +#endif
> +}
> +
> +
>   /**
>    * Helper for intel_miptree_alloc_hiz() that sets
>    * \c mt->level[level].slice[layer].has_hiz. Return true if and only if
> diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
> index 5cd69cb..4c9ff94 100644
> --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
> +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
> @@ -463,6 +463,15 @@ struct intel_mipmap_tree
>      enum intel_mcs_state mcs_state;
>   #endif
>
> +   /**
> +    * The SURFACE_STATE bits associated with the last fast color clear to this
> +    * color mipmap tree, if any.
> +    *
> +    * This value will only ever contain ones in bits 28-31, so it is safe to
> +    * OR into dword 7 of SURFACE_STATE.
> +    */
> +   uint32_t fast_clear_color_value;
> +
>      /* These are also refcounted:
>       */
>      GLuint refcount;
> @@ -477,6 +486,10 @@ intel_get_non_msrt_mcs_alignment(struct intel_context *intel,
>                                    struct intel_mipmap_tree *mt,
>                                    unsigned *width_px, unsigned *height);
>
> +bool
> +intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel,
> +                                 struct intel_mipmap_tree *mt);
> +
>   struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel,
>                                                  GLenum target,
>   					       gl_format format,
>



More information about the mesa-dev mailing list