[Mesa-dev] [PATCH 22/22] anv: Predicate fast-clear resolves

Nanley Chery nanleychery at gmail.com
Thu Apr 27 18:32:21 UTC 2017


There's no image layout to represent a full-RT-cleared color attachment.
That's one reason we can end up with redundant resolves. Testing has
shown that such resolves can measurably hurt performance and that
predicating them can avoid the penalty.

Signed-off-by: Nanley Chery <nanley.g.chery at intel.com>
---
 src/intel/vulkan/anv_blorp.c       |   3 +-
 src/intel/vulkan/anv_image.c       |   6 +++
 src/intel/vulkan/genX_cmd_buffer.c | 106 ++++++++++++++++++++++++++++++++++---
 3 files changed, 108 insertions(+), 7 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 821d01a077..32f0edf316 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1500,7 +1500,8 @@ anv_ccs_resolve(struct anv_cmd_buffer * const cmd_buffer,
 
    struct blorp_batch batch;
    blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
-                    BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
+                    BLORP_BATCH_NO_EMIT_DEPTH_STENCIL |
+                    BLORP_BATCH_PREDICATE_ENABLE);
 
    struct blorp_surf surf;
    get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index 751f2d6026..92ee86dab5 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -208,6 +208,12 @@ anv_fill_ccs_resolve_ss(const struct anv_device * const device,
                        .aux_usage = image->aux_usage == ISL_AUX_USAGE_NONE ?
                                     ISL_AUX_USAGE_CCS_D : image->aux_usage,
                        .mocs = device->default_mocs);
+
+   /* The following dword is used as a flag to represent whether or not this
+    * CCS subresource needs resolving. We want this to be zero by default,
+    * which means that a resolve is not necessary.
+    */
+   assert(*(uint32_t *)(data + device->isl_dev.ss.addr_offset) == 0);
 }
 
 /**
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 95729ec8a8..041301290e 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -402,6 +402,82 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
       anv_gen8_hiz_op_resolve(cmd_buffer, image, hiz_op);
 }
 
+static uint32_t
+clear_value_buffer_offset(const struct anv_cmd_buffer * const cmd_buffer,
+                          const struct anv_image * const image,
+                          const uint8_t level)
+{
+   return image->offset +
+          image->aux_surface.offset + image->aux_surface.isl.size +
+          cmd_buffer->device->isl_dev.ss.size * level;
+}
+
+#define MI_PREDICATE_SRC0  0x2400
+#define MI_PREDICATE_SRC1  0x2408
+
+enum ccs_resolve_state {
+   CCS_RESOLVE_NOT_NEEDED,
+   CCS_RESOLVE_NEEDED,
+   CCS_RESOLVE_STARTING,
+};
+
+/* Manages the state of an color image subresource to ensure resolves are
+ * performed properly.
+ */
+static void
+genX(set_resolve_state)(struct anv_cmd_buffer * const cmd_buffer,
+                        const struct anv_image * const image,
+                        const uint8_t level,
+                        const enum ccs_resolve_state state)
+{
+   assert(cmd_buffer && image);
+
+   /* The image subresource range must have a color auxiliary buffer. */
+   assert(anv_image_has_color_aux(image));
+   assert(level < anv_color_aux_levels(image));
+
+   /* We store the resolve flag in the location of the surface base address
+    * field for simplicity and because it is initialized to zero when the
+    * clear value buffer is initialized.
+    */
+   const uint32_t resolve_flag_offset =
+      clear_value_buffer_offset(cmd_buffer, image, level) +
+      cmd_buffer->device->isl_dev.ss.addr_offset;
+
+   /* An update operation should not overwrite the fast clear value. */
+   if (cmd_buffer->device->isl_dev.ss.addr_offset <
+       cmd_buffer->device->isl_dev.ss.clear_value_offset) {
+      assert(cmd_buffer->device->isl_dev.ss.addr_offset + 4 <=
+             cmd_buffer->device->isl_dev.ss.clear_value_offset);
+   }
+
+   if (state != CCS_RESOLVE_STARTING) {
+      assert(state == CCS_RESOLVE_NEEDED || state == CCS_RESOLVE_NOT_NEEDED);
+      /* The HW docs say that there is no way to guarantee the completion of
+       * the following command. We use it nevertheless because it shows no
+       * issues in testing is currently being used in the GL driver.
+       */
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
+         sdi.Address = (struct anv_address) { image->bo, resolve_flag_offset };
+         sdi.ImmediateData = state == CCS_RESOLVE_NEEDED;
+      }
+   } else {
+      /* Make the pending predicated resolve a no-op if one is not needed.
+       * predicate = do_resolve = resolve_flag != 0;
+       */
+      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1    , 0);
+      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
+      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0    , 0);
+      emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4,
+               image->bo, resolve_flag_offset);
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
+         mip.LoadOperation    = LOAD_LOADINV;
+         mip.CombineOperation = COMBINE_SET;
+         mip.CompareOperation = COMPARE_SRCS_EQUAL;
+      }
+   }
+}
+
 
 /* Copies clear value dwords between a surface state object and an image's
  * clear value buffer.
@@ -420,9 +496,7 @@ genX(transfer_clear_value)(struct anv_cmd_buffer * const cmd_buffer,
    assert(level < anv_color_aux_levels(image));
 
    const uint32_t img_clear_offset =
-      image->offset + image->aux_surface.offset +
-      image->aux_surface.isl.size +
-      cmd_buffer->device->isl_dev.ss.size * level +
+      clear_value_buffer_offset(cmd_buffer, image, level) +
       cmd_buffer->device->isl_dev.ss.clear_value_offset;
 
    struct anv_bo * const ss_bo =
@@ -523,6 +597,8 @@ transition_color_buffer(struct anv_cmd_buffer * const cmd_buffer,
 
    for (uint32_t level = base_level; level < base_level + level_count; level++) {
 
+      genX(set_resolve_state)(cmd_buffer, image, level, CCS_RESOLVE_STARTING);
+
       layer_count = MIN2(layer_count, anv_color_aux_layers(image, level));
       for (uint32_t layer = base_layer; layer < base_layer + layer_count; layer++) {
 
@@ -541,6 +617,8 @@ transition_color_buffer(struct anv_cmd_buffer * const cmd_buffer,
                                     false);
          anv_ccs_resolve(cmd_buffer, surface_state, image, level, layer, op);
       }
+
+      genX(set_resolve_state)(cmd_buffer, image, level, CCS_RESOLVE_NOT_NEEDED);
    }
 
    cmd_buffer->state.pending_pipe_bits |=
@@ -693,6 +771,25 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
                genX(transfer_clear_value)(cmd_buffer,
                   state->attachments[i].color_rt_state, iview->image,
                   iview->isl.base_level, true /* copy_to_buffer */);
+
+               /* If this image always has the auxiliary buffer enabled we can
+                * mark the subresource as not needing a resolve if the clear
+                * color will match what's in the RENDER_SURFACE_STATE object
+                * when it's being used for sampling.
+                */
+               if (iview->image->aux_usage == ISL_AUX_USAGE_CCS_E &&
+                   clear_color.u32[0] == 0 &&
+                   clear_color.u32[1] == 0 &&
+                   clear_color.u32[2] == 0 &&
+                   clear_color.u32[3] == 0) {
+                  genX(set_resolve_state)(cmd_buffer, iview->image,
+                                          iview->isl.base_level,
+                                          CCS_RESOLVE_NOT_NEEDED);
+               } else {
+                  genX(set_resolve_state)(cmd_buffer, iview->image,
+                                          iview->isl.base_level,
+                                          CCS_RESOLVE_NEEDED);
+               }
             } else if (att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD &&
                        state->attachments[i].aux_usage != ISL_AUX_USAGE_NONE) {
                /* The attachment may have been fast-cleared in a previous
@@ -2194,9 +2291,6 @@ void genX(CmdDispatch)(
 #define GPGPU_DISPATCHDIMY 0x2504
 #define GPGPU_DISPATCHDIMZ 0x2508
 
-#define MI_PREDICATE_SRC0  0x2400
-#define MI_PREDICATE_SRC1  0x2408
-
 void genX(CmdDispatchIndirect)(
     VkCommandBuffer                             commandBuffer,
     VkBuffer                                    _buffer,
-- 
2.12.2



More information about the mesa-dev mailing list