[Mesa-dev] [PATCH 02/29] anv/blorp: Rework image clear/resolve helpers
Nanley Chery
nanleychery at gmail.com
Tue Dec 5 23:48:45 UTC 2017
On Mon, Nov 27, 2017 at 07:05:52PM -0800, Jason Ekstrand wrote:
> This replaces image_fast_clear and ccs_resolve with two new helpers that
> simply perform an isl_aux_op whatever that may be on CCS or MCS. This
> is a bit cleaner as it separates performing the aux operation from which
> blorp helper we have to call to do it.
> ---
> src/intel/vulkan/anv_blorp.c | 218 ++++++++++++++++++++++---------------
> src/intel/vulkan/anv_private.h | 23 ++--
> src/intel/vulkan/genX_cmd_buffer.c | 28 +++--
> 3 files changed, 165 insertions(+), 104 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index e244468..7c8a673 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -1439,75 +1439,6 @@ fast_clear_aux_usage(const struct anv_image *image,
> }
>
> void
> -anv_image_fast_clear(struct anv_cmd_buffer *cmd_buffer,
> - const struct anv_image *image,
> - VkImageAspectFlagBits aspect,
> - const uint32_t base_level, const uint32_t level_count,
> - const uint32_t base_layer, uint32_t layer_count)
> -{
> - assert(image->type == VK_IMAGE_TYPE_3D || image->extent.depth == 1);
> -
> - if (image->type == VK_IMAGE_TYPE_3D) {
> - assert(base_layer == 0);
> - assert(layer_count == anv_minify(image->extent.depth, base_level));
> - }
> -
> - struct blorp_batch batch;
> - blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
> -
> - struct blorp_surf surf;
> - get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
> - fast_clear_aux_usage(image, aspect),
> - &surf);
> -
> - /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
> - *
> - * "After Render target fast clear, pipe-control with color cache
> - * write-flush must be issued before sending any DRAW commands on
> - * that render target."
> - *
> - * This comment is a bit cryptic and doesn't really tell you what's going
> - * or what's really needed. It appears that fast clear ops are not
> - * properly synchronized with other drawing. This means that we cannot
> - * have a fast clear operation in the pipe at the same time as other
> - * regular drawing operations. We need to use a PIPE_CONTROL to ensure
> - * that the contents of the previous draw hit the render target before we
> - * resolve and then use a second PIPE_CONTROL after the resolve to ensure
> - * that it is completed before any additional drawing occurs.
> - */
> - cmd_buffer->state.pending_pipe_bits |=
> - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> -
> - uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
> - uint32_t width_div = image->format->planes[plane].denominator_scales[0];
> - uint32_t height_div = image->format->planes[plane].denominator_scales[1];
> -
> - for (uint32_t l = 0; l < level_count; l++) {
> - const uint32_t level = base_level + l;
> -
> - const VkExtent3D extent = {
> - .width = anv_minify(image->extent.width, level),
> - .height = anv_minify(image->extent.height, level),
> - .depth = anv_minify(image->extent.depth, level),
> - };
> -
> - if (image->type == VK_IMAGE_TYPE_3D)
> - layer_count = extent.depth;
> -
> - assert(level < anv_image_aux_levels(image, aspect));
> - assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, level));
> - blorp_fast_clear(&batch, &surf, surf.surf->format,
> - level, base_layer, layer_count,
> - 0, 0,
> - extent.width / width_div,
> - extent.height / height_div);
> - }
> -
> - cmd_buffer->state.pending_pipe_bits |=
> - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> -}
> -
> -void
> anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
> {
> struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
> @@ -1681,36 +1612,153 @@ anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,
> }
>
> void
> -anv_ccs_resolve(struct anv_cmd_buffer * const cmd_buffer,
> - const struct anv_image * const image,
> - VkImageAspectFlagBits aspect,
> - const uint8_t level,
> - const uint32_t start_layer, const uint32_t layer_count,
> - const enum blorp_fast_clear_op op)
> +anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
> + const struct anv_image *image,
> + VkImageAspectFlagBits aspect,
> + uint32_t base_layer, uint32_t layer_count,
> + enum isl_aux_op mcs_op, bool predicate)
> {
> - assert(cmd_buffer && image);
> + assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> + assert(image->samples > 1);
> + assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 0));
>
> - uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
> + /* We don't support planar images with multisampling yet */
> + assert(image->n_planes == 1);
> +
Is this true? I can't find a similar restriction in anv_formats.c.
> + struct blorp_batch batch;
> + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
> + predicate ? BLORP_BATCH_PREDICATE_ENABLE : 0);
> +
> + struct blorp_surf surf;
> + get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
> + fast_clear_aux_usage(image, aspect),
How about ANV_AUX_USAGE_DEFAULT instead? The fast_clear_aux_usage helper
seems only beneficial for CCS_D/CCS images. Not a big deal though.
> + &surf);
> +
> + /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
> + *
> + * "After Render target fast clear, pipe-control with color cache
> + * write-flush must be issued before sending any DRAW commands on
> + * that render target."
> + *
> + * This comment is a bit cryptic and doesn't really tell you what's going
> + * or what's really needed. It appears that fast clear ops are not
> + * properly synchronized with other drawing. This means that we cannot
> + * have a fast clear operation in the pipe at the same time as other
> + * regular drawing operations. We need to use a PIPE_CONTROL to ensure
> + * that the contents of the previous draw hit the render target before we
> + * resolve and then use a second PIPE_CONTROL after the resolve to ensure
> + * that it is completed before any additional drawing occurs.
> + */
> + cmd_buffer->state.pending_pipe_bits |=
> + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> +
> + switch (mcs_op) {
Are you missing this case?
case ISL_AUX_OP_NONE:
return;
Seems like the NONE case is left out in a number of other switches. Was
this intentional?
> + case ISL_AUX_OP_FAST_CLEAR:
> + blorp_fast_clear(&batch, &surf, surf.surf->format,
> + 0, base_layer, layer_count,
> + 0, 0, image->extent.width, image->extent.height);
> + break;
> + case ISL_AUX_OP_FULL_RESOLVE:
> + case ISL_AUX_OP_PARTIAL_RESOLVE:
> + case ISL_AUX_OP_AMBIGUATE:
> + default:
> + unreachable("Unsupported CCS operation");
^
MCS
> + }
> +
> + cmd_buffer->state.pending_pipe_bits |=
> + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> +
> + blorp_batch_finish(&batch);
> +}
>
> - /* The resolved subresource range must have a CCS buffer. */
> +static enum blorp_fast_clear_op
> +isl_to_blorp_fast_clear_op(enum isl_aux_op isl_op)
> +{
> + switch (isl_op) {
Are you missing this case?
case ISL_AUX_OP_NONE: return BLORP_FAST_CLEAR_OP_NONE;
> + case ISL_AUX_OP_FAST_CLEAR: return BLORP_FAST_CLEAR_OP_CLEAR;
> + case ISL_AUX_OP_FULL_RESOLVE: return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
> + case ISL_AUX_OP_PARTIAL_RESOLVE: return BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
> + default:
> + unreachable("Unsupported HiZ aux op");
> + }
> +}
> +
> +void
> +anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
> + const struct anv_image *image,
> + VkImageAspectFlagBits aspect, uint32_t level,
> + uint32_t base_layer, uint32_t layer_count,
> + enum isl_aux_op ccs_op, bool predicate)
> +{
> + assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> + assert(image->samples == 1);
> assert(level < anv_image_aux_levels(image, aspect));
> - assert(start_layer + layer_count <=
> + assert(base_layer + layer_count <=
> anv_image_aux_layers(image, aspect, level));
> - assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV && image->samples == 1);
> +
> + uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
> + uint32_t width_div = image->format->planes[plane].denominator_scales[0];
> + uint32_t height_div = image->format->planes[plane].denominator_scales[1];
> + uint32_t level_width = anv_minify(image->extent.width, level) / width_div;
> + uint32_t level_height = anv_minify(image->extent.height, level) / height_div;
I can't find any spec text covering mipmaps and multi-planar images, but
the image level is no longer a valid YCbCr subresource if
(anv_minify(image->extent.width , level) % width_div ) != 0
(anv_minify(image->extent.height, level) % height_div) != 0
If this is an open issue, what do you think about some assertions for
this? This was a problem in the original code as well.
>
> struct blorp_batch batch;
> blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
> - BLORP_BATCH_PREDICATE_ENABLE);
> + predicate ? BLORP_BATCH_PREDICATE_ENABLE : 0);
>
> struct blorp_surf surf;
> get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
> fast_clear_aux_usage(image, aspect),
> &surf);
> - surf.clear_color_addr = anv_to_blorp_address(
> - anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect, level));
>
> - blorp_ccs_resolve(&batch, &surf, level, start_layer, layer_count,
> - image->planes[plane].surface.isl.format, op);
> + if (ccs_op == ISL_AUX_OP_FULL_RESOLVE ||
> + ccs_op == ISL_AUX_OP_PARTIAL_RESOLVE) {
> + /* If we're doing a resolve operation, then we need the indirect clear
> + * color. The clear and ambiguate operations just stomp the CCS to a
> + * particular value and don't care about format or clear value.
> + */
> + const struct anv_address clear_color_addr =
> + anv_image_get_clear_color_addr(cmd_buffer->device, image,
> + aspect, level);
> + surf.clear_color_addr = anv_to_blorp_address(clear_color_addr);
> + }
> +
> + /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
> + *
> + * "After Render target fast clear, pipe-control with color cache
> + * write-flush must be issued before sending any DRAW commands on
> + * that render target."
> + *
> + * This comment is a bit cryptic and doesn't really tell you what's going
> + * or what's really needed. It appears that fast clear ops are not
> + * properly synchronized with other drawing. This means that we cannot
> + * have a fast clear operation in the pipe at the same time as other
> + * regular drawing operations. We need to use a PIPE_CONTROL to ensure
> + * that the contents of the previous draw hit the render target before we
> + * resolve and then use a second PIPE_CONTROL after the resolve to ensure
> + * that it is completed before any additional drawing occurs.
> + */
> + cmd_buffer->state.pending_pipe_bits |=
> + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> +
We now explicitly flush:
* Between the levels of a multi-level layout transition.
* Around resolves.
Is there any performance penalty associated with this coarser-grained
flushing?
-Nanley
> + switch (ccs_op) {
> + case ISL_AUX_OP_FAST_CLEAR:
> + blorp_fast_clear(&batch, &surf, surf.surf->format,
> + level, base_layer, layer_count,
> + 0, 0, level_width, level_height);
> + break;
> + case ISL_AUX_OP_FULL_RESOLVE:
> + case ISL_AUX_OP_PARTIAL_RESOLVE:
> + blorp_ccs_resolve(&batch, &surf, level, base_layer, layer_count,
> + surf.surf->format, isl_to_blorp_fast_clear_op(ccs_op));
> + break;
> + case ISL_AUX_OP_AMBIGUATE:
> + default:
> + unreachable("Unsupported CCS operation");
> + }
> +
> + cmd_buffer->state.pending_pipe_bits |=
> + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
>
> blorp_batch_finish(&batch);
> }
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index ca3644d..dc44ab6 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -2533,20 +2533,19 @@ void
> anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,
> const struct anv_image *image,
> enum blorp_hiz_op op);
> -void
> -anv_ccs_resolve(struct anv_cmd_buffer * const cmd_buffer,
> - const struct anv_image * const image,
> - VkImageAspectFlagBits aspect,
> - const uint8_t level,
> - const uint32_t start_layer, const uint32_t layer_count,
> - const enum blorp_fast_clear_op op);
>
> void
> -anv_image_fast_clear(struct anv_cmd_buffer *cmd_buffer,
> - const struct anv_image *image,
> - VkImageAspectFlagBits aspect,
> - const uint32_t base_level, const uint32_t level_count,
> - const uint32_t base_layer, uint32_t layer_count);
> +anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
> + const struct anv_image *image,
> + VkImageAspectFlagBits aspect,
> + uint32_t base_layer, uint32_t layer_count,
> + enum isl_aux_op mcs_op, bool predicate);
> +void
> +anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
> + const struct anv_image *image,
> + VkImageAspectFlagBits aspect, uint32_t level,
> + uint32_t base_layer, uint32_t layer_count,
> + enum isl_aux_op ccs_op, bool predicate);
>
> void
> anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
> index ab5590d..2e7a2cc 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -689,9 +689,22 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
> "define an MCS buffer.");
> }
>
> - anv_image_fast_clear(cmd_buffer, image, aspect,
> - base_level, level_count,
> - base_layer, layer_count);
> + if (image->samples == 1) {
> + for (uint32_t l = 0; l < level_count; l++) {
> + const uint32_t level = base_level + l;
> + const uint32_t level_layer_count =
> + MIN2(layer_count, anv_image_aux_layers(image, aspect, level));
> + anv_image_ccs_op(cmd_buffer, image, aspect, level,
> + base_layer, level_layer_count,
> + ISL_AUX_OP_FAST_CLEAR, false);
> + }
> + } else {
> + assert(image->samples > 1);
> + assert(base_level == 0 && level_count == 1);
> + anv_image_mcs_op(cmd_buffer, image, aspect,
> + base_layer, layer_count,
> + ISL_AUX_OP_FAST_CLEAR, false);
> + }
> }
> /* At this point, some elements of the CCS buffer may have the fast-clear
> * bit-arrangement. As the user writes to a subresource, we need to have
> @@ -760,10 +773,11 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
>
> genX(load_needs_resolve_predicate)(cmd_buffer, image, aspect, level);
>
> - anv_ccs_resolve(cmd_buffer, image, aspect, level, base_layer, layer_count,
> - image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E ?
> - BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL :
> - BLORP_FAST_CLEAR_OP_RESOLVE_FULL);
> + anv_image_ccs_op(cmd_buffer, image, aspect, level,
> + base_layer, layer_count,
> + image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E ?
> + ISL_AUX_OP_PARTIAL_RESOLVE : ISL_AUX_OP_FULL_RESOLVE,
> + true);
>
> genX(set_image_needs_resolve)(cmd_buffer, image, aspect, level, false);
> }
> --
> 2.5.0.400.gff86faf
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list