[Mesa-dev] [PATCH v3 08/16] anv: Transition more color buffer layouts
Jason Ekstrand
jason at jlekstrand.net
Mon Jul 10 17:14:21 UTC 2017
On Wed, Jun 28, 2017 at 2:14 PM, Nanley Chery <nanleychery at gmail.com> wrote:
> v2: Expound on comment for the pipe controls (Jason Ekstrand).
>
> Signed-off-by: Nanley Chery <nanley.g.chery at intel.com>
> ---
> src/intel/vulkan/anv_blorp.c | 4 +-
> src/intel/vulkan/genX_cmd_buffer.c | 183 ++++++++++++++++++++++++++++++
> +++----
> 2 files changed, 167 insertions(+), 20 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 459d57ec57..84b01e8792 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -1451,7 +1451,9 @@ anv_image_ccs_clear(struct anv_cmd_buffer
> *cmd_buffer,
>
> struct blorp_surf surf;
> get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
> - image->aux_usage, &surf);
> + image->aux_usage == ISL_AUX_USAGE_CCS_E ?
> + ISL_AUX_USAGE_CCS_E : ISL_AUX_USAGE_CCS_D,
> + &surf);
>
> /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
> *
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> index decf0b28d6..1a9b841c7c 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -524,6 +524,17 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer
> *cmd_buffer,
> }
> }
>
> +/**
> + * @brief Transitions a color buffer from one layout to another.
> + *
> + * See section 6.1.1. Image Layout Transitions of the Vulkan 1.0.50 spec
> for
> + * more information.
> + *
> + * @param level_count VK_REMAINING_MIP_LEVELS isn't supported.
> + * @param layer_count VK_REMAINING_ARRAY_LAYERS isn't supported. For 3D
> images,
> + * this represents the maximum layers to transition at
> each
> + * specified miplevel.
> + */
> static void
> transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
> const struct anv_image *image,
> @@ -532,13 +543,27 @@ transition_color_buffer(struct anv_cmd_buffer
> *cmd_buffer,
> VkImageLayout initial_layout,
> VkImageLayout final_layout)
> {
> - assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> -
> - if (image->aux_surface.isl.size == 0)
> - return;
> -
> - if (initial_layout != VK_IMAGE_LAYOUT_UNDEFINED &&
> - initial_layout != VK_IMAGE_LAYOUT_PREINITIALIZED)
> + /* Validate the inputs. */
> + assert(cmd_buffer);
> + assert(image && image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> + /* These values aren't supported for simplicity's sake. */
> + assert(level_count != VK_REMAINING_MIP_LEVELS &&
> + layer_count != VK_REMAINING_ARRAY_LAYERS);
> + /* Ensure the subresource range is valid. */
> + uint64_t last_level_num = base_level + level_count;
> + const uint32_t max_depth = anv_minify(image->extent.depth,
> base_level);
> + const uint32_t image_layers = MAX2(image->array_size, max_depth);
> + assert(base_layer + layer_count <= image_layers);
> + assert(last_level_num <= image->levels);
> + /* The spec disallows these final layouts. */
> + assert(final_layout != VK_IMAGE_LAYOUT_UNDEFINED &&
> + final_layout != VK_IMAGE_LAYOUT_PREINITIALIZED);
> +
> + /* No work is necessary if the layout stays the same or if this
> subresource
> + * range lacks auxiliary data.
> + */
> + if (initial_layout == final_layout ||
> + base_layer >= anv_image_aux_layers(image, base_level))
> return;
>
> /* A transition of a 3D subresource works on all slices at a time. */
> @@ -549,22 +574,142 @@ transition_color_buffer(struct anv_cmd_buffer
> *cmd_buffer,
>
> /* We're interested in the subresource range subset that has aux data.
> */
> level_count = MIN2(level_count, anv_image_aux_levels(image));
> + layer_count = MIN2(layer_count, anv_image_aux_layers(image,
> base_level));
>
Is this correct? I think we want MIN2(layer_count, anv_image_aux_layers()
- base_layer), don't we? This would also mean there's a bug in the current
level_count.
> + last_level_num = base_level + level_count;
> +
> + /* Record whether or not the layout is undefined. Pre-initialized
> images
> + * with auxiliary buffers have a non-linear layout and are thus
> undefined.
> + */
> + assert(image->tiling == VK_IMAGE_TILING_OPTIMAL);
> + const bool undef_layout = initial_layout == VK_IMAGE_LAYOUT_UNDEFINED
> ||
> + initial_layout == VK_IMAGE_LAYOUT_
> PREINITIALIZED;
>
> - /* We're transitioning from an undefined layout. We must ensure that
> the
> - * clear values buffer is filled with valid data.
> + /* Do preparatory work before the resolve operation or return early if
> no
> + * resolve is actually needed.
> */
> - for (unsigned l = 0; l < level_count; l++)
> - init_fast_clear_state_entry(cmd_buffer, image, base_level + l);
> -
> - if (image->aux_usage == ISL_AUX_USAGE_CCS_E) {
> - /* We're transitioning from an undefined layout so it doesn't really
> - * matter what data ends up in the color buffer. We do, however,
> need to
> - * ensure that the CCS has valid data in it. One easy way to do
> that is
> - * to fast-clear the specified range.
> + if (undef_layout) {
> + /* A subresource in the undefined layout may have been aliased and
> + * populated with any arrangement of bits. Therefore, we must
> initialize
> + * the related aux buffer and clear buffer entry with desirable
> values.
> + *
> + * Initialize the relevant clear buffer entries.
> */
> - anv_image_ccs_clear(cmd_buffer, image, base_level, level_count,
> - base_layer, layer_count);
> + for (unsigned level = base_level; level < last_level_num; level++)
> + init_fast_clear_state_entry(cmd_buffer, image, level);
> +
> + /* MCS buffers have been initialized for correct behaviour. */
>
I don't think this is correct. I can definitely see MCS going off the
rails in a similar way to CCS if things aren't initialized correctly.
Unfortunately, MSAA test coverage is defintiely sub-par right now. I think
I'd feel more comfortable if we fast-cleared them too. Fortunately,
though, we always leave basic MCS on for MSAA surfaces so no resolve will
ever be needed.
> + if (image->samples > 1)
> + return;
> +
> + /* Initialize the CCS buffers to enable correct rendering. This
> operation
> + * requires up to two steps: one to rid the CCS buffer of data that
> may
> + * cause GPU hangs, and another to ensure that writes done without
> CCS
> + * will be visible to reads done with CCS.
> + *
> + * On SKL+, it becomes possible to have a CCS buffer with invalid
> data.
> + * One easy way to avoid this state is to fast-clear the specified
> range.
> + */
> + if (GEN_GEN >= 9) {
> + anv_image_ccs_clear(cmd_buffer, image, base_level, level_count,
> + base_layer, layer_count);
> + }
> + /* At this point, some elements of the CCS buffer may have the
> fast-clear
> + * bit-arrangement. As the user writes to a subresource, we need to
> have
> + * the associated CCS elements enter the ambiguated state. This
> enables
> + * reads (implicit or explicit) to reflect the user-written data
> instead
> + * of the clear color. The only time such elements will not change
> their
> + * state as described above, is in a final layout that doesn't have
> CCS
> + * enabled. In this case, we must force the associated CCS buffers
> of the
> + * specified range to enter the ambiguated state in advance.
> + */
> + if (image->aux_usage != ISL_AUX_USAGE_CCS_E &&
> + final_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
> + /* The CCS_D buffer may not be enabled in the final layout.
> Continue
> + * executing this function to perform a resolve.
> + */
> + anv_perf_warn("Performing an additional resolve for CCS_D
> layout "
> + "transition. Consider always leaving it on or "
> + "performing an ambiguation pass.");
> + } else {
> + /* Writes in the final layout will be aware of the CCS buffer. In
> + * addition, the clear buffer entries and the CCS buffers have
> been
> + * populated with values that will result in correct rendering.
> + */
> + return;
>
Ugh... This all works but it makes me an even bigger fan of that ambiguate
pass... Maybe we should consider reviving it once this all lands.
> + }
> + } else if (initial_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
> {
> + /* Resolves are only necessary if the subresource may contain blocks
> + * fast-cleared to values unsupported in other layouts. This only
> occurs
> + * if the initial layout is COLOR_ATTACHMENT_OPTIMAL.
> + */
> + return;
> + } else if (image->samples > 1) {
> + /* MCS buffers don't need resolving. */
> + return;
> }
> +
> + /* Perform a resolve to synchronize data between the main and aux
> buffer.
> + * Before we begin, we must satisfy the cache flushing requirement
> specified
> + * in the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)":
> + *
> + * Any transition from any value in {Clear, Render, Resolve} to a
> + * different value in {Clear, Render, Resolve} requires end of pipe
> + * synchronization.
> + *
> + * We perform a flush of the write cache before and after the clear and
> + * resolve operations to meet this requirement.
> + *
> + * Unlike other drawing, it seems that fast clear operations are not
> + * properly synchronized. The first PIPE_CONTROL here likely ensures
> that
> + * the contents of the previous render or clear hit the render target
> before
> + * we resolve and the second likely ensures that the resolve is
> complete
> + * before we do any more rendering or clearing.
>
I'm really not a fan of how much this has been weasel-worded...
<rant>
One of the big problems with the PRMs is that they don't actually describe
the hardware. They describe how a theoretical driver can/should program
the hardware to get correct rendering results. What they don't tell you is
*why* programming the hardware that way is needed. They get especially bad
about this around synchronization and image layout. They're also
frequently missing important details and are sometimes flat-out wrong.
The statement "fast clear ops are not properly synchronized with other
drawing" describes, in some sense, what's going on in the hardware. You
have two rendering operations in-flight and, due to some hardware detail I
don't fully understand, they can't safely be in the pipeline at the same
time. It's not a full description I'll grant, but it at least describes,
in general terms, the problem the workaround is solving. The language in
the PRM, on the other hand, describes some engineer's mental model of that
synchronization problem and a way of working around it.
We write drivers for hardware, not for PRMs. I don't think we need to
weasel-word our comments to make it look like all we have is hair-brained
theories while whoever wrote the PRM note has the actual knowledge. If
we're particularly unsure about something then that should be expressed in
the comment. However, I'm pretty convinced that what's going on here is a
synchronization problem. Our understanding of fast-clear operations may
improve in the future and, if/when it does, we'll update the comments.
</rant>
> + */
> + cmd_buffer->state.pending_pipe_bits |=
> + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> +
> + for (uint32_t level = base_level; level < last_level_num; level++) {
> +
> + /* The number of layers changes at each 3D miplevel. */
> + if (image->type == VK_IMAGE_TYPE_3D) {
> + layer_count = MIN2(layer_count, anv_image_aux_layers(image,
> level));
> + }
> +
> + /* Create a surface state with the right clear color and perform the
> + * resolve.
> + */
> + struct anv_state surface_state =
> + anv_cmd_buffer_alloc_surface_state(cmd_buffer);
> + isl_surf_fill_state(&cmd_buffer->device->isl_dev,
> surface_state.map,
> + .surf = &image->color_surface.isl,
> + .view = &(struct isl_view) {
> + .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
> + .format = image->color_surface.isl.format,
> + .swizzle = ISL_SWIZZLE_IDENTITY,
> + .base_level = level,
> + .levels = 1,
> + .base_array_layer = base_layer,
> + .array_len = layer_count,
> + },
> + .aux_surf = &image->aux_surface.isl,
> + .aux_usage = image->aux_usage ==
> ISL_AUX_USAGE_NONE ?
> + ISL_AUX_USAGE_CCS_D :
> image->aux_usage,
> + .mocs = cmd_buffer->device->default_mocs);
> + add_image_relocs(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
> + image->aux_usage == ISL_AUX_USAGE_CCS_E ?
> + ISL_AUX_USAGE_CCS_E : ISL_AUX_USAGE_CCS_D,
> + surface_state);
> + anv_state_flush(cmd_buffer->device, surface_state);
> + genX(copy_fast_clear_dwords)(cmd_buffer, surface_state, image,
> level,
> + false /* copy to ss */);
> + anv_ccs_resolve(cmd_buffer, surface_state, image, level,
> layer_count,
> + image->aux_usage == ISL_AUX_USAGE_CCS_E ?
> + BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL :
> + BLORP_FAST_CLEAR_OP_RESOLVE_FULL);
> + }
> +
> + cmd_buffer->state.pending_pipe_bits |=
> + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> }
>
> /**
> --
> 2.13.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170710/fcced53f/attachment-0001.html>
More information about the mesa-dev
mailing list