[Mesa-dev] [PATCH 13/29] anv/cmd_buffer: Rework aux tracking

Jason Ekstrand jason at jlekstrand.net
Tue Nov 28 03:06:03 UTC 2017


This makes us start tracking two bits per level for aux to describe both
whether or not something is fast-cleared and whether or not it is
compressed as opposed to a single "needs resolve" bit.  The previous
scheme only worked because we assumed that CCS_E compressed images would
always be read with CCS_E enabled and so they didn't need any sort of
resolve if there was no fast clear color.

The assumptions of the previous scheme held because the one case where
we do need a full resolve when CCS_E is enabled is for window-system
images.  Since we only ever allowed X-tiled window-system images, CCS
was entirely disabled on gen9+ and we never got CCS_E.  With the advent
of Y-tiled window-system buffers, we now need to properly support doing
a full resolve images marked CCS_E.  This requires us to track things
more granularly because, if the client does two back-to-back transitions
where we first do a partial resolve and then a full resolve, we need
both resolves to happen.
---
 src/intel/vulkan/anv_private.h     |  10 +--
 src/intel/vulkan/genX_cmd_buffer.c | 158 ++++++++++++++++++++++++++++++-------
 2 files changed, 135 insertions(+), 33 deletions(-)

diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index f805246..e875705 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -2476,16 +2476,16 @@ anv_fast_clear_state_entry_size(const struct anv_device *device)
 {
    assert(device);
    /* Entry contents:
-    *   +--------------------------------------------+
-    *   | clear value dword(s) | needs resolve dword |
-    *   +--------------------------------------------+
+    *   +---------------------------------------------+
+    *   | clear value dword(s) | needs resolve dwords |
+    *   +---------------------------------------------+
     */
 
-   /* Ensure that the needs resolve dword is in fact dword-aligned to enable
+   /* Ensure that the needs resolve dwords are in fact dword-aligned to enable
     * GPU memcpy operations.
     */
    assert(device->isl_dev.ss.clear_value_size % 4 == 0);
-   return device->isl_dev.ss.clear_value_size + 4;
+   return device->isl_dev.ss.clear_value_size + 8;
 }
 
 static inline struct anv_address
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 6aeffa3..2d47179 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -404,6 +404,22 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
                        0, 0, 1, hiz_op);
 }
 
+/** Bitfield representing the needed resolves.
+ *
+ * This bitfield represents the kinds of compression we may or may not have in
+ * a given image.  The ANV_IMAGE_NEEDS_* can be ANDed with the ANV_IMAGE_HAS
+ * bits to determine when a given resolve actually needs to happen.
+ *
+ * For convenience of dealing with MI_PREDICATE, we blow these two bits out
+ * into two dwords in the image meatadata page.
+ */
+enum anv_image_resolve_bits {
+   ANV_IMAGE_HAS_FAST_CLEAR_BIT           = 0x1,
+   ANV_IMAGE_HAS_COMPRESSION_BIT          = 0x2,
+   ANV_IMAGE_NEEDS_PARTIAL_RESOLVE_BITS   = 0x1,
+   ANV_IMAGE_NEEDS_FULL_RESOLVE_BITS      = 0x3,
+};
+
 #define MI_PREDICATE_SRC0  0x2400
 #define MI_PREDICATE_SRC1  0x2408
 
@@ -411,23 +427,62 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
  * performed properly.
  */
 static void
-set_image_needs_resolve(struct anv_cmd_buffer *cmd_buffer,
-                        const struct anv_image *image,
-                        VkImageAspectFlagBits aspect,
-                        unsigned level, bool needs_resolve)
+set_image_needs_resolve_bits(struct anv_cmd_buffer *cmd_buffer,
+                             const struct anv_image *image,
+                             VkImageAspectFlagBits aspect,
+                             unsigned level,
+                             enum anv_image_resolve_bits set_bits)
 {
    assert(cmd_buffer && image);
    assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
    assert(level < anv_image_aux_levels(image, aspect));
 
-   /* The HW docs say that there is no way to guarantee the completion of
-    * the following command. We use it nevertheless because it shows no
-    * issues in testing is currently being used in the GL driver.
-    */
-   anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
-      sdi.Address = anv_image_get_needs_resolve_addr(cmd_buffer->device,
-                                                     image, aspect, level);
-      sdi.ImmediateData = needs_resolve;
+   const struct anv_address resolve_flag_addr =
+      anv_image_get_needs_resolve_addr(cmd_buffer->device,
+                                       image, aspect, level);
+
+   if (set_bits & ANV_IMAGE_HAS_FAST_CLEAR_BIT) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
+         sdi.Address = resolve_flag_addr;
+         sdi.ImmediateData = 1;
+      }
+   }
+   if (set_bits & ANV_IMAGE_HAS_COMPRESSION_BIT) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
+         sdi.Address = resolve_flag_addr;
+         sdi.Address.offset += 4;
+         sdi.ImmediateData = 1;
+      }
+   }
+}
+
+static void
+clear_image_needs_resolve_bits(struct anv_cmd_buffer *cmd_buffer,
+                               const struct anv_image *image,
+                               VkImageAspectFlagBits aspect,
+                               unsigned level,
+                               enum anv_image_resolve_bits clear_bits)
+{
+   assert(cmd_buffer && image);
+   assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
+   assert(level < anv_image_aux_levels(image, aspect));
+
+   const struct anv_address resolve_flag_addr =
+      anv_image_get_needs_resolve_addr(cmd_buffer->device,
+                                       image, aspect, level);
+
+   if (clear_bits & ANV_IMAGE_HAS_FAST_CLEAR_BIT) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
+         sdi.Address = resolve_flag_addr;
+         sdi.ImmediateData = 0;
+      }
+   }
+   if (clear_bits & ANV_IMAGE_HAS_COMPRESSION_BIT) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
+         sdi.Address = resolve_flag_addr;
+         sdi.Address.offset += 4;
+         sdi.ImmediateData = 0;
+      }
    }
 }
 
@@ -435,7 +490,8 @@ static void
 load_needs_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
                              const struct anv_image *image,
                              VkImageAspectFlagBits aspect,
-                             unsigned level)
+                             unsigned level,
+                             enum anv_image_resolve_bits bits)
 {
    assert(cmd_buffer && image);
    assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
@@ -450,9 +506,27 @@ load_needs_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
     */
    emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1    , 0);
    emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
-   emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0    , 0);
-   emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4,
-            resolve_flag_addr.bo, resolve_flag_addr.offset);
+
+   /* Conditionally load the two dwords into the high and low portions of
+    * MI_PREDICATE_SRC0.  This effectively ANDs the bits passed into this
+    * function with the logical bits stored in the metadata page.  Because
+    * they're split out with one bit per dword, we don't need to use any
+    * sort of MI math.
+    */
+   if (bits & ANV_IMAGE_HAS_FAST_CLEAR_BIT) {
+      emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0,
+               resolve_flag_addr.bo, resolve_flag_addr.offset);
+   } else {
+      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0, 0);
+   }
+
+   if (bits & ANV_IMAGE_HAS_COMPRESSION_BIT) {
+      emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4,
+               resolve_flag_addr.bo, resolve_flag_addr.offset + 4);
+   } else {
+      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0);
+   }
+
    anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
       mip.LoadOperation    = LOAD_LOADINV;
       mip.CombineOperation = COMBINE_SET;
@@ -467,6 +541,18 @@ genX(cmd_buffer_mark_image_written)(struct anv_cmd_buffer *cmd_buffer,
                                     enum isl_aux_usage aux_usage,
                                     unsigned level)
 {
+   /* The only compression types with more than just fast-clears are MCS,
+    * CCS_E, and HiZ.  With HiZ we just trust the layout and don't actually
+    * track the current fast-clear and compression state.  This leaves us
+    * with just MCS and CCS_E.
+    */
+   if (aux_usage != ISL_AUX_USAGE_CCS_E &&
+       aux_usage != ISL_AUX_USAGE_MCS)
+      return;
+
+   set_image_needs_resolve_bits(cmd_buffer, image,
+                                VK_IMAGE_ASPECT_COLOR_BIT, level,
+                                ANV_IMAGE_HAS_COMPRESSION_BIT);
 }
 
 static void
@@ -488,8 +574,11 @@ init_fast_clear_state_entry(struct anv_cmd_buffer *cmd_buffer,
     * to return incorrect data. The fast clear data in CCS_D buffers should
     * be removed because CCS_D isn't enabled all the time.
     */
-   set_image_needs_resolve(cmd_buffer, image, aspect, level,
-                           aux_usage == ISL_AUX_USAGE_NONE);
+   if (aux_usage == ISL_AUX_USAGE_NONE) {
+      set_image_needs_resolve_bits(cmd_buffer, image, aspect, level, ~0);
+   } else {
+      clear_image_needs_resolve_bits(cmd_buffer, image, aspect, level, ~0);
+   }
 
    /* The fast clear value dword(s) will be copied into a surface state object.
     * Ensure that the restrictions of the fields in the dword(s) are followed.
@@ -812,12 +901,25 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
          layer_count = MIN2(layer_count, anv_image_aux_layers(image, aspect, level));
       }
 
-      load_needs_resolve_predicate(cmd_buffer, image, aspect, level);
+      enum anv_image_resolve_bits resolve_bits;
+      switch (resolve_op) {
+      case ISL_AUX_OP_FULL_RESOLVE:
+         resolve_bits = ANV_IMAGE_NEEDS_FULL_RESOLVE_BITS;
+         break;
+      case ISL_AUX_OP_PARTIAL_RESOLVE:
+         resolve_bits = ANV_IMAGE_NEEDS_PARTIAL_RESOLVE_BITS;
+         break;
+      default:
+         unreachable("Invalid resolve op");
+      }
+      load_needs_resolve_predicate(cmd_buffer, image, aspect, level,
+                                   resolve_bits);
 
       anv_image_ccs_op(cmd_buffer, image, aspect, level,
                        base_layer, layer_count, resolve_op, true);
 
-      set_image_needs_resolve(cmd_buffer, image, aspect, level, false);
+      clear_image_needs_resolve_bits(cmd_buffer, image, aspect,
+                                     level, resolve_bits);
    }
 
    cmd_buffer->state.pending_pipe_bits |=
@@ -2992,15 +3094,15 @@ cmd_buffer_subpass_sync_fast_clear_values(struct anv_cmd_buffer *cmd_buffer)
              * will match what's in every RENDER_SURFACE_STATE object when it's
              * being used for sampling.
              */
-            set_image_needs_resolve(cmd_buffer, iview->image,
-                                    VK_IMAGE_ASPECT_COLOR_BIT,
-                                    iview->planes[0].isl.base_level,
-                                    false);
+            clear_image_needs_resolve_bits(cmd_buffer, iview->image,
+                                           VK_IMAGE_ASPECT_COLOR_BIT,
+                                           iview->planes[0].isl.base_level,
+                                           ANV_IMAGE_HAS_FAST_CLEAR_BIT);
          } else {
-            set_image_needs_resolve(cmd_buffer, iview->image,
-                                    VK_IMAGE_ASPECT_COLOR_BIT,
-                                    iview->planes[0].isl.base_level,
-                                    true);
+            set_image_needs_resolve_bits(cmd_buffer, iview->image,
+                                         VK_IMAGE_ASPECT_COLOR_BIT,
+                                         iview->planes[0].isl.base_level,
+                                         ANV_IMAGE_HAS_FAST_CLEAR_BIT);
          }
       } else if (rp_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
          /* The attachment may have been fast-cleared in a previous render
-- 
2.5.0.400.gff86faf



More information about the mesa-dev mailing list