[Mesa-dev] [PATCH 13/29] anv/cmd_buffer: Rework aux tracking

Pohjolainen, Topi topi.pohjolainen at gmail.com
Thu Nov 30 19:59:09 UTC 2017


On Mon, Nov 27, 2017 at 07:06:03PM -0800, Jason Ekstrand wrote:
> This makes us start tracking two bits per level for aux to describe both
> whether or not something is fast-cleared and whether or not it is
> compressed as opposed to a single "needs resolve" bit.  The previous
> scheme only worked because we assumed that CCS_E compressed images would
> always be read with CCS_E enabled and so they didn't need any sort of
> resolve if there was no fast clear color.
> 
> The assumptions of the previous scheme held because the one case where
> we do need a full resolve when CCS_E is enabled is for window-system
> images.  Since we only ever allowed X-tiled window-system images, CCS
> was entirely disabled on gen9+ and we never got CCS_E.  With the advent
> of Y-tiled window-system buffers, we now need to properly support doing
> a full resolve images marked CCS_E.  This requires us to track things
> more granularly because, if the client does two back-to-back transitions
> where we first do a partial resolve and then a full resolve, we need
> both resolves to happen.
> ---
>  src/intel/vulkan/anv_private.h     |  10 +--
>  src/intel/vulkan/genX_cmd_buffer.c | 158 ++++++++++++++++++++++++++++++-------
>  2 files changed, 135 insertions(+), 33 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index f805246..e875705 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -2476,16 +2476,16 @@ anv_fast_clear_state_entry_size(const struct anv_device *device)
>  {
>     assert(device);
>     /* Entry contents:
> -    *   +--------------------------------------------+
> -    *   | clear value dword(s) | needs resolve dword |
> -    *   +--------------------------------------------+
> +    *   +---------------------------------------------+
> +    *   | clear value dword(s) | needs resolve dwords |
> +    *   +---------------------------------------------+
>      */
>  
> -   /* Ensure that the needs resolve dword is in fact dword-aligned to enable
> +   /* Ensure that the needs resolve dwords are in fact dword-aligned to enable
>      * GPU memcpy operations.
>      */
>     assert(device->isl_dev.ss.clear_value_size % 4 == 0);
> -   return device->isl_dev.ss.clear_value_size + 4;
> +   return device->isl_dev.ss.clear_value_size + 8;
>  }
>  
>  static inline struct anv_address
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
> index 6aeffa3..2d47179 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -404,6 +404,22 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
>                         0, 0, 1, hiz_op);
>  }
>  
> +/** Bitfield representing the needed resolves.
> + *
> + * This bitfield represents the kinds of compression we may or may not have in
> + * a given image.  The ANV_IMAGE_NEEDS_* can be ANDed with the ANV_IMAGE_HAS
> + * bits to determine when a given resolve actually needs to happen.
> + *
> + * For convenience of dealing with MI_PREDICATE, we blow these two bits out
> + * into two dwords in the image meatadata page.

s/meatadata/metadata/

> + */
> +enum anv_image_resolve_bits {
> +   ANV_IMAGE_HAS_FAST_CLEAR_BIT           = 0x1,
> +   ANV_IMAGE_HAS_COMPRESSION_BIT          = 0x2,
> +   ANV_IMAGE_NEEDS_PARTIAL_RESOLVE_BITS   = 0x1,
> +   ANV_IMAGE_NEEDS_FULL_RESOLVE_BITS      = 0x3,
> +};
> +
>  #define MI_PREDICATE_SRC0  0x2400
>  #define MI_PREDICATE_SRC1  0x2408
>  
> @@ -411,23 +427,62 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
>   * performed properly.
>   */
>  static void
> -set_image_needs_resolve(struct anv_cmd_buffer *cmd_buffer,
> -                        const struct anv_image *image,
> -                        VkImageAspectFlagBits aspect,
> -                        unsigned level, bool needs_resolve)
> +set_image_needs_resolve_bits(struct anv_cmd_buffer *cmd_buffer,
> +                             const struct anv_image *image,
> +                             VkImageAspectFlagBits aspect,
> +                             unsigned level,
> +                             enum anv_image_resolve_bits set_bits)
>  {
>     assert(cmd_buffer && image);
>     assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
>     assert(level < anv_image_aux_levels(image, aspect));
>  
> -   /* The HW docs say that there is no way to guarantee the completion of
> -    * the following command. We use it nevertheless because it shows no
> -    * issues in testing is currently being used in the GL driver.
> -    */

Why are we dropping this comment now?

> -   anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
> -      sdi.Address = anv_image_get_needs_resolve_addr(cmd_buffer->device,
> -                                                     image, aspect, level);
> -      sdi.ImmediateData = needs_resolve;
> +   const struct anv_address resolve_flag_addr =
> +      anv_image_get_needs_resolve_addr(cmd_buffer->device,
> +                                       image, aspect, level);
> +
> +   if (set_bits & ANV_IMAGE_HAS_FAST_CLEAR_BIT) {
> +      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
> +         sdi.Address = resolve_flag_addr;
> +         sdi.ImmediateData = 1;
> +      }
> +   }
> +   if (set_bits & ANV_IMAGE_HAS_COMPRESSION_BIT) {
> +      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
> +         sdi.Address = resolve_flag_addr;
> +         sdi.Address.offset += 4;
> +         sdi.ImmediateData = 1;
> +      }
> +   }
> +}
> +
> +static void
> +clear_image_needs_resolve_bits(struct anv_cmd_buffer *cmd_buffer,
> +                               const struct anv_image *image,
> +                               VkImageAspectFlagBits aspect,
> +                               unsigned level,
> +                               enum anv_image_resolve_bits clear_bits)
> +{
> +   assert(cmd_buffer && image);
> +   assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> +   assert(level < anv_image_aux_levels(image, aspect));
> +
> +   const struct anv_address resolve_flag_addr =
> +      anv_image_get_needs_resolve_addr(cmd_buffer->device,
> +                                       image, aspect, level);
> +
> +   if (clear_bits & ANV_IMAGE_HAS_FAST_CLEAR_BIT) {
> +      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
> +         sdi.Address = resolve_flag_addr;
> +         sdi.ImmediateData = 0;
> +      }
> +   }
> +   if (clear_bits & ANV_IMAGE_HAS_COMPRESSION_BIT) {
> +      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
> +         sdi.Address = resolve_flag_addr;
> +         sdi.Address.offset += 4;
> +         sdi.ImmediateData = 0;
> +      }

Do set_image_needs_resolve_bits() and clear_image_needs_resolve_bits()
deviate in later patches? Now the only difference is the value set for
"sdi.ImmediateData". I can't help thinking of having one function and the
value as an extra argument. The argument could be an enum documenting for
the reader if SET or CLEAR is wanted. Not a big issue, just thinking aloud.

Either way the logic itself looks good:

Reviewed-by: Topi Pohjolainen <topi.pohjolainen at intel.com>

>     }
>  }
>  
> @@ -435,7 +490,8 @@ static void
>  load_needs_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
>                               const struct anv_image *image,
>                               VkImageAspectFlagBits aspect,
> -                             unsigned level)
> +                             unsigned level,
> +                             enum anv_image_resolve_bits bits)
>  {
>     assert(cmd_buffer && image);
>     assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> @@ -450,9 +506,27 @@ load_needs_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
>      */
>     emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1    , 0);
>     emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
> -   emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0    , 0);
> -   emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4,
> -            resolve_flag_addr.bo, resolve_flag_addr.offset);
> +
> +   /* Conditionally load the two dwords into the high and low portions of
> +    * MI_PREDICATE_SRC0.  This effectively ANDs the bits passed into this
> +    * function with the logical bits stored in the metadata page.  Because
> +    * they're split out with one bit per dword, we don't need to use any
> +    * sort of MI math.
> +    */
> +   if (bits & ANV_IMAGE_HAS_FAST_CLEAR_BIT) {
> +      emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0,
> +               resolve_flag_addr.bo, resolve_flag_addr.offset);
> +   } else {
> +      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0, 0);
> +   }
> +
> +   if (bits & ANV_IMAGE_HAS_COMPRESSION_BIT) {
> +      emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4,
> +               resolve_flag_addr.bo, resolve_flag_addr.offset + 4);
> +   } else {
> +      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0);
> +   }
> +
>     anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
>        mip.LoadOperation    = LOAD_LOADINV;
>        mip.CombineOperation = COMBINE_SET;
> @@ -467,6 +541,18 @@ genX(cmd_buffer_mark_image_written)(struct anv_cmd_buffer *cmd_buffer,
>                                      enum isl_aux_usage aux_usage,
>                                      unsigned level)
>  {
> +   /* The only compression types with more than just fast-clears are MCS,
> +    * CCS_E, and HiZ.  With HiZ we just trust the layout and don't actually
> +    * track the current fast-clear and compression state.  This leaves us
> +    * with just MCS and CCS_E.
> +    */
> +   if (aux_usage != ISL_AUX_USAGE_CCS_E &&
> +       aux_usage != ISL_AUX_USAGE_MCS)
> +      return;
> +
> +   set_image_needs_resolve_bits(cmd_buffer, image,
> +                                VK_IMAGE_ASPECT_COLOR_BIT, level,
> +                                ANV_IMAGE_HAS_COMPRESSION_BIT);
>  }
>  
>  static void
> @@ -488,8 +574,11 @@ init_fast_clear_state_entry(struct anv_cmd_buffer *cmd_buffer,
>      * to return incorrect data. The fast clear data in CCS_D buffers should
>      * be removed because CCS_D isn't enabled all the time.
>      */
> -   set_image_needs_resolve(cmd_buffer, image, aspect, level,
> -                           aux_usage == ISL_AUX_USAGE_NONE);
> +   if (aux_usage == ISL_AUX_USAGE_NONE) {
> +      set_image_needs_resolve_bits(cmd_buffer, image, aspect, level, ~0);
> +   } else {
> +      clear_image_needs_resolve_bits(cmd_buffer, image, aspect, level, ~0);
> +   }
>  
>     /* The fast clear value dword(s) will be copied into a surface state object.
>      * Ensure that the restrictions of the fields in the dword(s) are followed.
> @@ -812,12 +901,25 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
>           layer_count = MIN2(layer_count, anv_image_aux_layers(image, aspect, level));
>        }
>  
> -      load_needs_resolve_predicate(cmd_buffer, image, aspect, level);
> +      enum anv_image_resolve_bits resolve_bits;
> +      switch (resolve_op) {
> +      case ISL_AUX_OP_FULL_RESOLVE:
> +         resolve_bits = ANV_IMAGE_NEEDS_FULL_RESOLVE_BITS;
> +         break;
> +      case ISL_AUX_OP_PARTIAL_RESOLVE:
> +         resolve_bits = ANV_IMAGE_NEEDS_PARTIAL_RESOLVE_BITS;
> +         break;
> +      default:
> +         unreachable("Invalid resolve op");
> +      }
> +      load_needs_resolve_predicate(cmd_buffer, image, aspect, level,
> +                                   resolve_bits);
>  
>        anv_image_ccs_op(cmd_buffer, image, aspect, level,
>                         base_layer, layer_count, resolve_op, true);
>  
> -      set_image_needs_resolve(cmd_buffer, image, aspect, level, false);
> +      clear_image_needs_resolve_bits(cmd_buffer, image, aspect,
> +                                     level, resolve_bits);
>     }
>  
>     cmd_buffer->state.pending_pipe_bits |=
> @@ -2992,15 +3094,15 @@ cmd_buffer_subpass_sync_fast_clear_values(struct anv_cmd_buffer *cmd_buffer)
>               * will match what's in every RENDER_SURFACE_STATE object when it's
>               * being used for sampling.
>               */
> -            set_image_needs_resolve(cmd_buffer, iview->image,
> -                                    VK_IMAGE_ASPECT_COLOR_BIT,
> -                                    iview->planes[0].isl.base_level,
> -                                    false);
> +            clear_image_needs_resolve_bits(cmd_buffer, iview->image,
> +                                           VK_IMAGE_ASPECT_COLOR_BIT,
> +                                           iview->planes[0].isl.base_level,
> +                                           ANV_IMAGE_HAS_FAST_CLEAR_BIT);
>           } else {
> -            set_image_needs_resolve(cmd_buffer, iview->image,
> -                                    VK_IMAGE_ASPECT_COLOR_BIT,
> -                                    iview->planes[0].isl.base_level,
> -                                    true);
> +            set_image_needs_resolve_bits(cmd_buffer, iview->image,
> +                                         VK_IMAGE_ASPECT_COLOR_BIT,
> +                                         iview->planes[0].isl.base_level,
> +                                         ANV_IMAGE_HAS_FAST_CLEAR_BIT);
>           }
>        } else if (rp_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
>           /* The attachment may have been fast-cleared in a previous render
> -- 
> 2.5.0.400.gff86faf
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list