[Mesa-dev] [PATCH 13/29] anv/cmd_buffer: Rework aux tracking
Pohjolainen, Topi
topi.pohjolainen at gmail.com
Thu Nov 30 19:59:09 UTC 2017
On Mon, Nov 27, 2017 at 07:06:03PM -0800, Jason Ekstrand wrote:
> This makes us start tracking two bits per level for aux to describe both
> whether or not something is fast-cleared and whether or not it is
> compressed as opposed to a single "needs resolve" bit. The previous
> scheme only worked because we assumed that CCS_E compressed images would
> always be read with CCS_E enabled and so they didn't need any sort of
> resolve if there was no fast clear color.
>
> The assumptions of the previous scheme held because the one case where
> we do need a full resolve when CCS_E is enabled is for window-system
> images. Since we only ever allowed X-tiled window-system images, CCS
> was entirely disabled on gen9+ and we never got CCS_E. With the advent
> of Y-tiled window-system buffers, we now need to properly support doing
> a full resolve images marked CCS_E. This requires us to track things
> more granularly because, if the client does two back-to-back transitions
> where we first do a partial resolve and then a full resolve, we need
> both resolves to happen.
> ---
> src/intel/vulkan/anv_private.h | 10 +--
> src/intel/vulkan/genX_cmd_buffer.c | 158 ++++++++++++++++++++++++++++++-------
> 2 files changed, 135 insertions(+), 33 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index f805246..e875705 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -2476,16 +2476,16 @@ anv_fast_clear_state_entry_size(const struct anv_device *device)
> {
> assert(device);
> /* Entry contents:
> - * +--------------------------------------------+
> - * | clear value dword(s) | needs resolve dword |
> - * +--------------------------------------------+
> + * +---------------------------------------------+
> + * | clear value dword(s) | needs resolve dwords |
> + * +---------------------------------------------+
> */
>
> - /* Ensure that the needs resolve dword is in fact dword-aligned to enable
> + /* Ensure that the needs resolve dwords are in fact dword-aligned to enable
> * GPU memcpy operations.
> */
> assert(device->isl_dev.ss.clear_value_size % 4 == 0);
> - return device->isl_dev.ss.clear_value_size + 4;
> + return device->isl_dev.ss.clear_value_size + 8;
> }
>
> static inline struct anv_address
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
> index 6aeffa3..2d47179 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -404,6 +404,22 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
> 0, 0, 1, hiz_op);
> }
>
> +/** Bitfield representing the needed resolves.
> + *
> + * This bitfield represents the kinds of compression we may or may not have in
> + * a given image. The ANV_IMAGE_NEEDS_* can be ANDed with the ANV_IMAGE_HAS
> + * bits to determine when a given resolve actually needs to happen.
> + *
> + * For convenience of dealing with MI_PREDICATE, we blow these two bits out
> + * into two dwords in the image meatadata page.
s/meatadata/metadata/
> + */
> +enum anv_image_resolve_bits {
> + ANV_IMAGE_HAS_FAST_CLEAR_BIT = 0x1,
> + ANV_IMAGE_HAS_COMPRESSION_BIT = 0x2,
> + ANV_IMAGE_NEEDS_PARTIAL_RESOLVE_BITS = 0x1,
> + ANV_IMAGE_NEEDS_FULL_RESOLVE_BITS = 0x3,
> +};
> +
> #define MI_PREDICATE_SRC0 0x2400
> #define MI_PREDICATE_SRC1 0x2408
>
> @@ -411,23 +427,62 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
> * performed properly.
> */
> static void
> -set_image_needs_resolve(struct anv_cmd_buffer *cmd_buffer,
> - const struct anv_image *image,
> - VkImageAspectFlagBits aspect,
> - unsigned level, bool needs_resolve)
> +set_image_needs_resolve_bits(struct anv_cmd_buffer *cmd_buffer,
> + const struct anv_image *image,
> + VkImageAspectFlagBits aspect,
> + unsigned level,
> + enum anv_image_resolve_bits set_bits)
> {
> assert(cmd_buffer && image);
> assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> assert(level < anv_image_aux_levels(image, aspect));
>
> - /* The HW docs say that there is no way to guarantee the completion of
> - * the following command. We use it nevertheless because it shows no
> - * issues in testing is currently being used in the GL driver.
> - */
Why are we dropping this comment now?
> - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
> - sdi.Address = anv_image_get_needs_resolve_addr(cmd_buffer->device,
> - image, aspect, level);
> - sdi.ImmediateData = needs_resolve;
> + const struct anv_address resolve_flag_addr =
> + anv_image_get_needs_resolve_addr(cmd_buffer->device,
> + image, aspect, level);
> +
> + if (set_bits & ANV_IMAGE_HAS_FAST_CLEAR_BIT) {
> + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
> + sdi.Address = resolve_flag_addr;
> + sdi.ImmediateData = 1;
> + }
> + }
> + if (set_bits & ANV_IMAGE_HAS_COMPRESSION_BIT) {
> + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
> + sdi.Address = resolve_flag_addr;
> + sdi.Address.offset += 4;
> + sdi.ImmediateData = 1;
> + }
> + }
> +}
> +
> +static void
> +clear_image_needs_resolve_bits(struct anv_cmd_buffer *cmd_buffer,
> + const struct anv_image *image,
> + VkImageAspectFlagBits aspect,
> + unsigned level,
> + enum anv_image_resolve_bits clear_bits)
> +{
> + assert(cmd_buffer && image);
> + assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> + assert(level < anv_image_aux_levels(image, aspect));
> +
> + const struct anv_address resolve_flag_addr =
> + anv_image_get_needs_resolve_addr(cmd_buffer->device,
> + image, aspect, level);
> +
> + if (clear_bits & ANV_IMAGE_HAS_FAST_CLEAR_BIT) {
> + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
> + sdi.Address = resolve_flag_addr;
> + sdi.ImmediateData = 0;
> + }
> + }
> + if (clear_bits & ANV_IMAGE_HAS_COMPRESSION_BIT) {
> + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
> + sdi.Address = resolve_flag_addr;
> + sdi.Address.offset += 4;
> + sdi.ImmediateData = 0;
> + }
Do set_image_needs_resolve_bits() and clear_image_needs_resolve_bits()
deviate in later patches? Now the only difference is the value set for
"sdi.ImmediateData". I can't help thinking of having one function and the
value as an extra argument. The argument could be an enum documenting for
the reader if SET or CLEAR is wanted. Not a big issue, just thinking aloud.
Either way the logic itself looks good:
Reviewed-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
> }
> }
>
> @@ -435,7 +490,8 @@ static void
> load_needs_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
> const struct anv_image *image,
> VkImageAspectFlagBits aspect,
> - unsigned level)
> + unsigned level,
> + enum anv_image_resolve_bits bits)
> {
> assert(cmd_buffer && image);
> assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> @@ -450,9 +506,27 @@ load_needs_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
> */
> emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 , 0);
> emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
> - emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 , 0);
> - emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4,
> - resolve_flag_addr.bo, resolve_flag_addr.offset);
> +
> + /* Conditionally load the two dwords into the high and low portions of
> + * MI_PREDICATE_SRC0. This effectively ANDs the bits passed into this
> + * function with the logical bits stored in the metadata page. Because
> + * they're split out with one bit per dword, we don't need to use any
> + * sort of MI math.
> + */
> + if (bits & ANV_IMAGE_HAS_FAST_CLEAR_BIT) {
> + emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0,
> + resolve_flag_addr.bo, resolve_flag_addr.offset);
> + } else {
> + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0, 0);
> + }
> +
> + if (bits & ANV_IMAGE_HAS_COMPRESSION_BIT) {
> + emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4,
> + resolve_flag_addr.bo, resolve_flag_addr.offset + 4);
> + } else {
> + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0);
> + }
> +
> anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
> mip.LoadOperation = LOAD_LOADINV;
> mip.CombineOperation = COMBINE_SET;
> @@ -467,6 +541,18 @@ genX(cmd_buffer_mark_image_written)(struct anv_cmd_buffer *cmd_buffer,
> enum isl_aux_usage aux_usage,
> unsigned level)
> {
> + /* The only compression types with more than just fast-clears are MCS,
> + * CCS_E, and HiZ. With HiZ we just trust the layout and don't actually
> + * track the current fast-clear and compression state. This leaves us
> + * with just MCS and CCS_E.
> + */
> + if (aux_usage != ISL_AUX_USAGE_CCS_E &&
> + aux_usage != ISL_AUX_USAGE_MCS)
> + return;
> +
> + set_image_needs_resolve_bits(cmd_buffer, image,
> + VK_IMAGE_ASPECT_COLOR_BIT, level,
> + ANV_IMAGE_HAS_COMPRESSION_BIT);
> }
>
> static void
> @@ -488,8 +574,11 @@ init_fast_clear_state_entry(struct anv_cmd_buffer *cmd_buffer,
> * to return incorrect data. The fast clear data in CCS_D buffers should
> * be removed because CCS_D isn't enabled all the time.
> */
> - set_image_needs_resolve(cmd_buffer, image, aspect, level,
> - aux_usage == ISL_AUX_USAGE_NONE);
> + if (aux_usage == ISL_AUX_USAGE_NONE) {
> + set_image_needs_resolve_bits(cmd_buffer, image, aspect, level, ~0);
> + } else {
> + clear_image_needs_resolve_bits(cmd_buffer, image, aspect, level, ~0);
> + }
>
> /* The fast clear value dword(s) will be copied into a surface state object.
> * Ensure that the restrictions of the fields in the dword(s) are followed.
> @@ -812,12 +901,25 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
> layer_count = MIN2(layer_count, anv_image_aux_layers(image, aspect, level));
> }
>
> - load_needs_resolve_predicate(cmd_buffer, image, aspect, level);
> + enum anv_image_resolve_bits resolve_bits;
> + switch (resolve_op) {
> + case ISL_AUX_OP_FULL_RESOLVE:
> + resolve_bits = ANV_IMAGE_NEEDS_FULL_RESOLVE_BITS;
> + break;
> + case ISL_AUX_OP_PARTIAL_RESOLVE:
> + resolve_bits = ANV_IMAGE_NEEDS_PARTIAL_RESOLVE_BITS;
> + break;
> + default:
> + unreachable("Invalid resolve op");
> + }
> + load_needs_resolve_predicate(cmd_buffer, image, aspect, level,
> + resolve_bits);
>
> anv_image_ccs_op(cmd_buffer, image, aspect, level,
> base_layer, layer_count, resolve_op, true);
>
> - set_image_needs_resolve(cmd_buffer, image, aspect, level, false);
> + clear_image_needs_resolve_bits(cmd_buffer, image, aspect,
> + level, resolve_bits);
> }
>
> cmd_buffer->state.pending_pipe_bits |=
> @@ -2992,15 +3094,15 @@ cmd_buffer_subpass_sync_fast_clear_values(struct anv_cmd_buffer *cmd_buffer)
> * will match what's in every RENDER_SURFACE_STATE object when it's
> * being used for sampling.
> */
> - set_image_needs_resolve(cmd_buffer, iview->image,
> - VK_IMAGE_ASPECT_COLOR_BIT,
> - iview->planes[0].isl.base_level,
> - false);
> + clear_image_needs_resolve_bits(cmd_buffer, iview->image,
> + VK_IMAGE_ASPECT_COLOR_BIT,
> + iview->planes[0].isl.base_level,
> + ANV_IMAGE_HAS_FAST_CLEAR_BIT);
> } else {
> - set_image_needs_resolve(cmd_buffer, iview->image,
> - VK_IMAGE_ASPECT_COLOR_BIT,
> - iview->planes[0].isl.base_level,
> - true);
> + set_image_needs_resolve_bits(cmd_buffer, iview->image,
> + VK_IMAGE_ASPECT_COLOR_BIT,
> + iview->planes[0].isl.base_level,
> + ANV_IMAGE_HAS_FAST_CLEAR_BIT);
> }
> } else if (rp_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
> /* The attachment may have been fast-cleared in a previous render
> --
> 2.5.0.400.gff86faf
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list