[Mesa-dev] [PATCH 06/12] i965/gen7+: Implement fast color clear operation in BLORP.

Paul Berry stereotype441 at gmail.com
Tue May 21 16:52:10 PDT 2013


Since we defer allocation of the MCS miptree until the time of the
fast clear operation, this patch also implements creation of the MCS
miptree.

In addition, this patch adds the field
intel_mipmap_tree::fast_clear_color_value, which holds the most recent
fast color clear value, if any. We use it to set the SURFACE_STATE's
clear color for render targets.
---
 src/mesa/drivers/dri/i965/brw_blorp.cpp           |   1 +
 src/mesa/drivers/dri/i965/brw_blorp.h             |  11 +-
 src/mesa/drivers/dri/i965/brw_blorp_clear.cpp     | 143 +++++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_clear.c             |   2 +-
 src/mesa/drivers/dri/i965/brw_defines.h           |   2 +
 src/mesa/drivers/dri/i965/gen7_blorp.cpp          |  18 ++-
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |  10 +-
 src/mesa/drivers/dri/intel/intel_mipmap_tree.c    |  47 +++++++
 src/mesa/drivers/dri/intel/intel_mipmap_tree.h    |  13 ++
 9 files changed, 233 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
index 20f7153..c6019d1 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -147,6 +147,7 @@ brw_blorp_params::brw_blorp_params()
      y1(0),
      depth_format(0),
      hiz_op(GEN6_HIZ_OP_NONE),
+     fast_clear_op(GEN7_FAST_CLEAR_OP_NONE),
      num_samples(0),
      use_wm_prog(false)
 {
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
index 6360a62..687d7eb 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -46,7 +46,8 @@ brw_blorp_blit_miptrees(struct intel_context *intel,
                         bool mirror_x, bool mirror_y);
 
 bool
-brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb);
+brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb,
+                      bool partial_clear);
 
 #ifdef __cplusplus
 } /* end extern "C" */
@@ -195,6 +196,13 @@ struct brw_blorp_prog_data
    bool persample_msaa_dispatch;
 };
 
+
+enum gen7_fast_clear_op {
+   GEN7_FAST_CLEAR_OP_NONE,
+   GEN7_FAST_CLEAR_OP_FAST_CLEAR,
+};
+
+
 class brw_blorp_params
 {
 public:
@@ -212,6 +220,7 @@ public:
    brw_blorp_surface_info src;
    brw_blorp_surface_info dst;
    enum gen6_hiz_op hiz_op;
+   enum gen7_fast_clear_op fast_clear_op;
    unsigned num_samples;
    bool use_wm_prog;
    brw_blorp_wm_push_constants wm_push_consts;
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
index 28d7ad0..675289b 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
@@ -49,7 +49,8 @@ public:
    brw_blorp_clear_params(struct brw_context *brw,
                           struct gl_framebuffer *fb,
                           struct gl_renderbuffer *rb,
-                          GLubyte *color_mask);
+                          GLubyte *color_mask,
+                          bool partial_clear);
 
    virtual uint32_t get_wm_prog(struct brw_context *brw,
                                 brw_blorp_prog_data **prog_data) const;
@@ -105,10 +106,49 @@ brw_blorp_clear_program::~brw_blorp_clear_program()
    ralloc_free(mem_ctx);
 }
 
+
+/**
+ * Determine if fast color clear supports the given clear color.
+ *
+ * Fast color clear can only clear to color values of 1.0 or 0.0.  At the
+ * moment we only support floating point buffers.
+ */
+static bool
+is_color_fast_clear_compatible(gl_format format,
+                               const union gl_color_union *color)
+{
+   if (_mesa_is_format_integer_color(format))
+      return false;
+
+   for (int i = 0; i < 4; i++) {
+      if (color->f[i] != 0.0 && color->f[i] != 1.0)
+         return false;
+   }
+   return true;
+}
+
+
+/**
+ * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
+ * SURFACE_STATE.
+ */
+static uint32_t
+compute_fast_clear_color_bits(const union gl_color_union *color)
+{
+   uint32_t bits = 0;
+   for (int i = 0; i < 4; i++) {
+      if (color->f[i] != 0.0)
+         bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
+   }
+   return bits;
+}
+
+
 brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
                                                struct gl_framebuffer *fb,
                                                struct gl_renderbuffer *rb,
-                                               GLubyte *color_mask)
+                                               GLubyte *color_mask,
+                                               bool partial_clear)
 {
    struct intel_context *intel = &brw->intel;
    struct gl_context *ctx = &intel->ctx;
@@ -163,6 +203,56 @@ brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
          wm_prog_key.use_simd16_replicated_data = false;
       }
    }
+
+   /* If we can do this as a fast color clear, do so. */
+   if (irb->mt->mcs_state != INTEL_MCS_STATE_NONE && !partial_clear &&
+       wm_prog_key.use_simd16_replicated_data &&
+       is_color_fast_clear_compatible(format, &ctx->Color.ClearColor)) {
+      memset(push_consts, 0xff, 4*sizeof(float));
+      fast_clear_op = GEN7_FAST_CLEAR_OP_FAST_CLEAR;
+
+      /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
+       * Target(s)", beneath the "Fast Color Clear" bullet (p327):
+       *
+       *     Clear pass must have a clear rectangle that must follow alignment
+       *     rules in terms of pixels and lines as shown in the table
+       *     below. Further, the clear-rectangle height and width must be
+       *     multiple of the following dimensions. If the height and width of
+       *     the render target being cleared do not meet these requirements,
+       *     an MCS buffer can be created such that it follows the requirement
+       *     and covers the RT.
+       *
+       * The alignment size in the table that follows is related to the
+       * alignment size returned by intel_get_non_msrt_mcs_alignment(), but
+       * with X alignment multiplied by 16 and Y alignment multiplied by 32.
+       */
+      unsigned x_align, y_align;
+      intel_get_non_msrt_mcs_alignment(intel, irb->mt, &x_align, &y_align);
+      x_align *= 16;
+      y_align *= 32;
+      x0 = ROUND_DOWN_TO(x0, x_align);
+      y0 = ROUND_DOWN_TO(y0, y_align);
+      x1 = ALIGN(x1, x_align);
+      y1 = ALIGN(y1, y_align);
+
+      /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
+       * Target(s)", beneath the "Fast Color Clear" bullet (p327):
+       *
+       *     In order to optimize the performance MCS buffer (when bound to 1X
+       *     RT) clear similarly to MCS buffer clear for MSRT case, clear rect
+       *     is required to be scaled by the following factors in the
+       *     horizontal and vertical directions:
+       *
+       * The X and Y scale down factors in the table that follows are each
+       * equal to half the alignment value computed above.
+       */
+      unsigned x_scaledown = x_align / 2;
+      unsigned y_scaledown = y_align / 2;
+      x0 /= x_scaledown;
+      y0 /= y_scaledown;
+      x1 /= x_scaledown;
+      y1 /= y_scaledown;
+   }
 }
 
 uint32_t
@@ -266,7 +356,8 @@ brw_blorp_clear_program::compile(struct brw_context *brw,
 
 extern "C" {
 bool
-brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
+brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb,
+                      bool partial_clear)
 {
    struct gl_context *ctx = &intel->ctx;
    struct brw_context *brw = brw_context(ctx);
@@ -288,6 +379,7 @@ brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
 
    for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
       struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
+      struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 
       /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
        * the framebuffer can be complete with some attachments missing.  In
@@ -296,8 +388,51 @@ brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
       if (rb == NULL)
          continue;
 
-      brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf]);
+      brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf],
+                                    partial_clear);
+
+      bool is_fast_clear =
+         (params.fast_clear_op == GEN7_FAST_CLEAR_OP_FAST_CLEAR);
+      if (is_fast_clear) {
+         /* Record the clear color in the miptree so that it will be
+          * programmed in SURFACE_STATE by later rendering and resolve
+          * operations.
+          */
+         uint32_t new_color_value =
+            compute_fast_clear_color_bits(&ctx->Color.ClearColor);
+         if (irb->mt->fast_clear_color_value != new_color_value) {
+            irb->mt->fast_clear_color_value = new_color_value;
+            brw->state.dirty.brw |= BRW_NEW_SURFACES;
+         }
+
+         /* If the buffer is already in INTEL_MCS_STATE_CLEAR, the clear is
+          * redundant and can be skipped.
+          */
+         if (irb->mt->mcs_state == INTEL_MCS_STATE_CLEAR)
+            continue;
+
+         /* If the MCS buffer hasn't been allocated yet, we need to allocate
+          * it now.
+          */
+         if (!irb->mt->mcs_mt &&
+             !intel_miptree_alloc_non_msrt_mcs(intel, irb->mt)) {
+            /* MCS allocation failed--probably this will only happen in
+             * out-of-memory conditions.  But in any case, try to recover by
+             * falling back to a non-blorp clear technique.
+             */
+            return false;
+         }
+      }
+
       brw_blorp_exec(intel, &params);
+
+      if (is_fast_clear) {
+         /* Now that the fast clear has occurred, put the buffer in
+          * INTEL_MCS_STATE_CLEAR so that we won't waste time doing redundant
+          * clears.
+          */
+         irb->mt->mcs_state = INTEL_MCS_STATE_CLEAR;
+      }
    }
 
    return true;
diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index 2b999bf..80b7a0c 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -234,7 +234,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
    /* BLORP is currently only supported on Gen6+. */
    if (intel->gen >= 6) {
       if (mask & BUFFER_BITS_COLOR) {
-         if (brw_blorp_clear_color(intel, fb)) {
+         if (brw_blorp_clear_color(intel, fb, partial_clear)) {
             debug_mask("blorp color", mask & BUFFER_BITS_COLOR);
             mask &= ~BUFFER_BITS_COLOR;
          }
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index fedd78c..90b16ab 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -555,6 +555,7 @@
 #define GEN7_SURFACE_MCS_PITCH_MASK             INTEL_MASK(11, 3)
 
 /* Surface state DW7 */
+#define GEN7_SURFACE_CLEAR_COLOR_SHIFT		28
 #define GEN7_SURFACE_SCS_R_SHIFT                25
 #define GEN7_SURFACE_SCS_R_MASK                 INTEL_MASK(27, 25)
 #define GEN7_SURFACE_SCS_G_SHIFT                22
@@ -1613,6 +1614,7 @@ enum brw_wm_barycentric_interp_mode {
 # define GEN7_PS_PUSH_CONSTANT_ENABLE		        (1 << 11)
 # define GEN7_PS_ATTRIBUTE_ENABLE		        (1 << 10)
 # define GEN7_PS_OMASK_TO_RENDER_TARGET			(1 << 9)
+# define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE	(1 << 8)
 # define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE		(1 << 7)
 # define GEN7_PS_POSOFFSET_NONE				(0 << 3)
 # define GEN7_PS_POSOFFSET_CENTROID			(2 << 3)
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index 2d09c7f..5f7e10f 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -202,11 +202,13 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
                                 is_render_target);
    }
 
+   surf[7] = surface->mt->fast_clear_color_value;
+
    if (intel->is_haswell) {
-      surf[7] = SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
-                SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
-                SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
-                SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
+      surf[7] |= (SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
+                  SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
+                  SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
+                  SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A));
    }
 
    /* Emit relocation to surface contents */
@@ -587,6 +589,14 @@ gen7_blorp_emit_ps_config(struct brw_context *brw,
       dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
    }
 
+   switch (params->fast_clear_op) {
+   case GEN7_FAST_CLEAR_OP_FAST_CLEAR:
+      dw4 |= GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE;
+      break;
+   default:
+      break;
+   }
+
    BEGIN_BATCH(8);
    OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
    OUT_BATCH(params->use_wm_prog ? prog_offset : 0);
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index f5d2e43..fda4b2c 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -589,11 +589,13 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
                                 irb->mt->mcs_mt, true /* is RT */);
    }
 
+   surf[7] = irb->mt->fast_clear_color_value;
+
    if (intel->is_haswell) {
-      surf[7] = SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
-                SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
-                SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
-                SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
+      surf[7] |= (SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
+                  SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
+                  SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
+                  SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A));
    }
 
    drm_intel_bo_emit_reloc(brw->intel.batch.bo,
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 9d1b91a..657532f 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -1163,6 +1163,53 @@ intel_miptree_alloc_mcs(struct intel_context *intel,
 #endif
 }
 
+
+bool
+intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel,
+                                 struct intel_mipmap_tree *mt)
+{
+#ifdef I915
+   assert(!"MCS not supported on i915");
+#else
+   assert(mt->mcs_mt == NULL);
+
+   /* The format of the MCS buffer is opaque to the driver; all that matters
+    * is that we get its size and pitch right.  We'll pretend that the format
+    * is R32.  Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
+    * R32 buffer is 32 pixels across, we'll need to scale the width down by
+    * the block width and then a further factor of 4.  Since an MCS tile
+    * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
+    * we'll need to scale the height down by the block height and then a
+    * further factor of 8.
+    */
+   const gl_format format = MESA_FORMAT_R_UINT32;
+   unsigned block_width_px;
+   unsigned block_height;
+   intel_get_non_msrt_mcs_alignment(intel, mt, &block_width_px, &block_height);
+   unsigned width_divisor = block_width_px * 4;
+   unsigned height_divisor = block_height * 8;
+   unsigned mcs_width =
+      ALIGN(mt->logical_width0, width_divisor) / width_divisor;
+   unsigned mcs_height =
+      ALIGN(mt->logical_height0, height_divisor) / height_divisor;
+   assert(mt->logical_depth0 == 1);
+   mt->mcs_mt = intel_miptree_create(intel,
+                                     mt->target,
+                                     format,
+                                     mt->first_level,
+                                     mt->last_level,
+                                     mcs_width,
+                                     mcs_height,
+                                     mt->logical_depth0,
+                                     true,
+                                     0 /* num_samples */,
+                                     true /* force_y_tiling */);
+
+   return mt->mcs_mt;
+#endif
+}
+
+
 /**
  * Helper for intel_miptree_alloc_hiz() that sets
  * \c mt->level[level].slice[layer].has_hiz. Return true if and only if
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
index 5cd69cb..4c9ff94 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
@@ -463,6 +463,15 @@ struct intel_mipmap_tree
    enum intel_mcs_state mcs_state;
 #endif
 
+   /**
+    * The SURFACE_STATE bits associated with the last fast color clear to this
+    * color mipmap tree, if any.
+    *
+    * This value will only ever contain ones in bits 28-31, so it is safe to
+    * OR into dword 7 of SURFACE_STATE.
+    */
+   uint32_t fast_clear_color_value;
+
    /* These are also refcounted:
     */
    GLuint refcount;
@@ -477,6 +486,10 @@ intel_get_non_msrt_mcs_alignment(struct intel_context *intel,
                                  struct intel_mipmap_tree *mt,
                                  unsigned *width_px, unsigned *height);
 
+bool
+intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel,
+                                 struct intel_mipmap_tree *mt);
+
 struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel,
                                                GLenum target,
 					       gl_format format,
-- 
1.8.2.3



More information about the mesa-dev mailing list