[Mesa-dev] [PATCH 22/22] anv: Predicate fast-clear resolves

Jason Ekstrand jason at jlekstrand.net
Wed May 3 22:39:51 UTC 2017


On Thu, Apr 27, 2017 at 11:32 AM, Nanley Chery <nanleychery at gmail.com>
wrote:

> There's no image layout to represent a full-RT-cleared color attachment.
> That's one reason we can end up with redundant resolves. Testing has
> shown that such resolves can measurably hurt performance and that
> predicating them can avoid the penalty.
>
> Signed-off-by: Nanley Chery <nanley.g.chery at intel.com>
> ---
>  src/intel/vulkan/anv_blorp.c       |   3 +-
>  src/intel/vulkan/anv_image.c       |   6 +++
>  src/intel/vulkan/genX_cmd_buffer.c | 106 ++++++++++++++++++++++++++++++
> ++++---
>  3 files changed, 108 insertions(+), 7 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 821d01a077..32f0edf316 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -1500,7 +1500,8 @@ anv_ccs_resolve(struct anv_cmd_buffer * const
> cmd_buffer,
>
>     struct blorp_batch batch;
>     blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
> -                    BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
> +                    BLORP_BATCH_NO_EMIT_DEPTH_STENCIL |
> +                    BLORP_BATCH_PREDICATE_ENABLE);
>
>     struct blorp_surf surf;
>     get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index 751f2d6026..92ee86dab5 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -208,6 +208,12 @@ anv_fill_ccs_resolve_ss(const struct anv_device *
> const device,
>                         .aux_usage = image->aux_usage ==
> ISL_AUX_USAGE_NONE ?
>                                      ISL_AUX_USAGE_CCS_D :
> image->aux_usage,
>                         .mocs = device->default_mocs);
> +
> +   /* The following dword is used as a flag to represent whether or not
> this
> +    * CCS subresource needs resolving. We want this to be zero by default,
> +    * which means that a resolve is not necessary.
> +    */
> +   assert(*(uint32_t *)(data + device->isl_dev.ss.addr_offset) == 0);
>

This seems like kind-of an odd place to put it... Not a problem, just odd.


>  }
>
>  /**
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 95729ec8a8..041301290e 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -402,6 +402,82 @@ transition_depth_buffer(struct anv_cmd_buffer
> *cmd_buffer,
>        anv_gen8_hiz_op_resolve(cmd_buffer, image, hiz_op);
>  }
>
> +static uint32_t
> +clear_value_buffer_offset(const struct anv_cmd_buffer * const cmd_buffer,
> +                          const struct anv_image * const image,
> +                          const uint8_t level)
> +{
> +   return image->offset +
> +          image->aux_surface.offset + image->aux_surface.isl.size +
> +          cmd_buffer->device->isl_dev.ss.size * level;
> +}
> +
> +#define MI_PREDICATE_SRC0  0x2400
> +#define MI_PREDICATE_SRC1  0x2408
> +
> +enum ccs_resolve_state {
> +   CCS_RESOLVE_NOT_NEEDED,
> +   CCS_RESOLVE_NEEDED,
> +   CCS_RESOLVE_STARTING,
> +};
> +
> +/* Manages the state of an color image subresource to ensure resolves are
> + * performed properly.
> + */
> +static void
> +genX(set_resolve_state)(struct anv_cmd_buffer * const cmd_buffer,
> +                        const struct anv_image * const image,
> +                        const uint8_t level,
> +                        const enum ccs_resolve_state state)
> +{
> +   assert(cmd_buffer && image);
> +
> +   /* The image subresource range must have a color auxiliary buffer. */
> +   assert(anv_image_has_color_aux(image));
> +   assert(level < anv_color_aux_levels(image));
> +
> +   /* We store the resolve flag in the location of the surface base
> address
> +    * field for simplicity and because it is initialized to zero when the
> +    * clear value buffer is initialized.
> +    */
> +   const uint32_t resolve_flag_offset =
> +      clear_value_buffer_offset(cmd_buffer, image, level) +
> +      cmd_buffer->device->isl_dev.ss.addr_offset;
> +
> +   /* An update operation should not overwrite the fast clear value. */
> +   if (cmd_buffer->device->isl_dev.ss.addr_offset <
> +       cmd_buffer->device->isl_dev.ss.clear_value_offset) {
> +      assert(cmd_buffer->device->isl_dev.ss.addr_offset + 4 <=
> +             cmd_buffer->device->isl_dev.ss.clear_value_offset);
> +   }
> +
> +   if (state != CCS_RESOLVE_STARTING) {
> +      assert(state == CCS_RESOLVE_NEEDED || state ==
> CCS_RESOLVE_NOT_NEEDED);
> +      /* The HW docs say that there is no way to guarantee the completion
> of
> +       * the following command. We use it nevertheless because it shows no
> +       * issues in testing is currently being used in the GL driver.
> +       */
> +      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
> +         sdi.Address = (struct anv_address) { image->bo,
> resolve_flag_offset };
> +         sdi.ImmediateData = state == CCS_RESOLVE_NEEDED;
> +      }
> +   } else {
> +      /* Make the pending predicated resolve a no-op if one is not needed.
> +       * predicate = do_resolve = resolve_flag != 0;
> +       */
> +      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1    , 0);
> +      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
> +      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0    , 0);
> +      emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4,
> +               image->bo, resolve_flag_offset);
> +      anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
> +         mip.LoadOperation    = LOAD_LOADINV;
> +         mip.CombineOperation = COMBINE_SET;
> +         mip.CompareOperation = COMPARE_SRCS_EQUAL;
> +      }
>

I really wish the MI_PREDICATE were closer to the anv_ccs_resolve.  Maybe
we could just inline this function?  If we had a helper to get the
anv_address of the resolve flag, it wouldn't be bad at all.


> +   }
> +}
> +
>
>  /* Copies clear value dwords between a surface state object and an image's
>   * clear value buffer.
> @@ -420,9 +496,7 @@ genX(transfer_clear_value)(struct anv_cmd_buffer *
> const cmd_buffer,
>     assert(level < anv_color_aux_levels(image));
>
>     const uint32_t img_clear_offset =
> -      image->offset + image->aux_surface.offset +
> -      image->aux_surface.isl.size +
> -      cmd_buffer->device->isl_dev.ss.size * level +
> +      clear_value_buffer_offset(cmd_buffer, image, level) +
>        cmd_buffer->device->isl_dev.ss.clear_value_offset;
>
>     struct anv_bo * const ss_bo =
> @@ -523,6 +597,8 @@ transition_color_buffer(struct anv_cmd_buffer * const
> cmd_buffer,
>
>     for (uint32_t level = base_level; level < base_level + level_count;
> level++) {
>
> +      genX(set_resolve_state)(cmd_buffer, image, level,
> CCS_RESOLVE_STARTING);
> +
>        layer_count = MIN2(layer_count, anv_color_aux_layers(image, level));
>        for (uint32_t layer = base_layer; layer < base_layer + layer_count;
> layer++) {
>
> @@ -541,6 +617,8 @@ transition_color_buffer(struct anv_cmd_buffer * const
> cmd_buffer,
>                                      false);
>           anv_ccs_resolve(cmd_buffer, surface_state, image, level, layer,
> op);
>        }
> +
> +      genX(set_resolve_state)(cmd_buffer, image, level,
> CCS_RESOLVE_NOT_NEEDED);
>     }
>
>     cmd_buffer->state.pending_pipe_bits |=
> @@ -693,6 +771,25 @@ genX(cmd_buffer_setup_attachments)(struct
> anv_cmd_buffer *cmd_buffer,
>                 genX(transfer_clear_value)(cmd_buffer,
>                    state->attachments[i].color_rt_state, iview->image,
>                    iview->isl.base_level, true /* copy_to_buffer */);
> +
> +               /* If this image always has the auxiliary buffer enabled
> we can
> +                * mark the subresource as not needing a resolve if the
> clear
> +                * color will match what's in the RENDER_SURFACE_STATE
> object
> +                * when it's being used for sampling.
> +                */
> +               if (iview->image->aux_usage == ISL_AUX_USAGE_CCS_E &&
> +                   clear_color.u32[0] == 0 &&
> +                   clear_color.u32[1] == 0 &&
> +                   clear_color.u32[2] == 0 &&
> +                   clear_color.u32[3] == 0) {
> +                  genX(set_resolve_state)(cmd_buffer, iview->image,
> +                                          iview->isl.base_level,
> +                                          CCS_RESOLVE_NOT_NEEDED);
> +               } else {
> +                  genX(set_resolve_state)(cmd_buffer, iview->image,
> +                                          iview->isl.base_level,
> +                                          CCS_RESOLVE_NEEDED);
> +               }
>              } else if (att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD &&
>                         state->attachments[i].aux_usage !=
> ISL_AUX_USAGE_NONE) {
>                 /* The attachment may have been fast-cleared in a previous
> @@ -2194,9 +2291,6 @@ void genX(CmdDispatch)(
>  #define GPGPU_DISPATCHDIMY 0x2504
>  #define GPGPU_DISPATCHDIMZ 0x2508
>
> -#define MI_PREDICATE_SRC0  0x2400
> -#define MI_PREDICATE_SRC1  0x2408
> -
>  void genX(CmdDispatchIndirect)(
>      VkCommandBuffer                             commandBuffer,
>      VkBuffer                                    _buffer,
> --
> 2.12.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170503/02356fd5/attachment-0001.html>


More information about the mesa-dev mailing list