[Mesa-dev] [PATCH v4 14/18] anv: Transition more color buffer layouts
Nanley Chery
nanleychery at gmail.com
Wed Jul 19 21:22:06 UTC 2017
v2: Expound on comment for the pipe controls (Jason Ekstrand).
v3:
- Cast base_layer to uint64_t to avoid overflow.
- Remove "seems" from the pipe control comment.
- Fix clamp of layer_count (Jason Ekstrand).
Signed-off-by: Nanley Chery <nanley.g.chery at intel.com>
---
src/intel/vulkan/anv_blorp.c | 4 +-
src/intel/vulkan/genX_cmd_buffer.c | 187 ++++++++++++++++++++++++++++++++-----
2 files changed, 167 insertions(+), 24 deletions(-)
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 88dbba8a12..5065f1fbc4 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1455,7 +1455,9 @@ anv_image_fast_clear(struct anv_cmd_buffer *cmd_buffer,
struct blorp_surf surf;
get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
- image->aux_usage, &surf);
+ image->aux_usage == ISL_AUX_USAGE_NONE ?
+ ISL_AUX_USAGE_CCS_D : image->aux_usage,
+ &surf);
/* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
*
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index e52ed74e29..7b2a3551b6 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -519,6 +519,17 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer,
}
}
+/**
+ * @brief Transitions a color buffer from one layout to another.
+ *
+ * See section 6.1.1. Image Layout Transitions of the Vulkan 1.0.50 spec for
+ * more information.
+ *
+ * @param level_count VK_REMAINING_MIP_LEVELS isn't supported.
+ * @param layer_count VK_REMAINING_ARRAY_LAYERS isn't supported. For 3D images,
+ * this represents the maximum layers to transition at each
+ * specified miplevel.
+ */
static void
transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
@@ -527,14 +538,27 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
VkImageLayout initial_layout,
VkImageLayout final_layout)
{
- assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
-
- if (image->aux_surface.isl.size == 0 ||
- base_level >= anv_image_aux_levels(image))
- return;
-
- if (initial_layout != VK_IMAGE_LAYOUT_UNDEFINED &&
- initial_layout != VK_IMAGE_LAYOUT_PREINITIALIZED)
+ /* Validate the inputs. */
+ assert(cmd_buffer);
+ assert(image && image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
+ /* These values aren't supported for simplicity's sake. */
+ assert(level_count != VK_REMAINING_MIP_LEVELS &&
+ layer_count != VK_REMAINING_ARRAY_LAYERS);
+ /* Ensure the subresource range is valid. */
+ uint64_t last_level_num = base_level + level_count;
+ const uint32_t max_depth = anv_minify(image->extent.depth, base_level);
+ const uint32_t image_layers = MAX2(image->array_size, max_depth);
+ assert((uint64_t)base_layer + layer_count <= image_layers);
+ assert(last_level_num <= image->levels);
+ /* The spec disallows these final layouts. */
+ assert(final_layout != VK_IMAGE_LAYOUT_UNDEFINED &&
+ final_layout != VK_IMAGE_LAYOUT_PREINITIALIZED);
+
+ /* No work is necessary if the layout stays the same or if this subresource
+ * range lacks auxiliary data.
+ */
+ if (initial_layout == final_layout ||
+ base_layer >= anv_image_aux_layers(image, base_level))
return;
/* A transition of a 3D subresource works on all slices at a time. */
@@ -545,25 +569,142 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
/* We're interested in the subresource range subset that has aux data. */
level_count = MIN2(level_count, anv_image_aux_levels(image) - base_level);
+ layer_count = MIN2(layer_count,
+ anv_image_aux_layers(image, base_level) - base_layer);
+ last_level_num = base_level + level_count;
- /* We're transitioning from an undefined layout. We must ensure that the
- * clear values buffer is filled with valid data.
+ /* Record whether or not the layout is undefined. Pre-initialized images
+ * with auxiliary buffers have a non-linear layout and are thus undefined.
*/
- for (unsigned l = 0; l < level_count; l++)
- init_fast_clear_state_entry(cmd_buffer, image, base_level + l);
-
- if (image->aux_usage == ISL_AUX_USAGE_CCS_E ||
- image->samples == 2 || image->samples == 8) {
- /* We're transitioning from an undefined layout so it doesn't really
- * matter what data ends up in the color buffer. We do, however, need to
- * ensure that the auxiliary surface is not in an undefined state. This
- * state is possible for CCS buffers SKL+ and MCS buffers with certain
- * sample counts. One easy way to get to a valid state is to fast-clear
- * the specified range.
+ assert(image->tiling == VK_IMAGE_TILING_OPTIMAL);
+ const bool undef_layout = initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
+ initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED;
+
+ /* Do preparatory work before the resolve operation or return early if no
+ * resolve is actually needed.
+ */
+ if (undef_layout) {
+ /* A subresource in the undefined layout may have been aliased and
+ * populated with any arrangement of bits. Therefore, we must initialize
+ * the related aux buffer and clear buffer entry with desirable values.
+ *
+ * Initialize the relevant clear buffer entries.
+ */
+ for (unsigned level = base_level; level < last_level_num; level++)
+ init_fast_clear_state_entry(cmd_buffer, image, level);
+
+ /* Initialize the aux buffers to enable correct rendering. This operation
+ * requires up to two steps: one to rid the aux buffer of data that may
+ * cause GPU hangs, and another to ensure that writes done without aux
+ * will be visible to reads done with aux.
+ *
+ * Having an aux buffer with invalid data is possible for CCS buffers
+ * SKL+ and for MCS buffers with certain sample counts. One easy way to
+ * get to a valid state is to fast-clear the specified range.
*/
- anv_image_fast_clear(cmd_buffer, image, base_level, level_count,
- base_layer, layer_count);
+ if ((GEN_GEN >= 9 && image->samples == 1) ||
+ image->samples == 2 || image->samples == 8) {
+ anv_image_fast_clear(cmd_buffer, image, base_level, level_count,
+ base_layer, layer_count);
+ }
+ /* At this point, some elements of the CCS buffer may have the fast-clear
+ * bit-arrangement. As the user writes to a subresource, we need to have
+ * the associated CCS elements enter the ambiguated state. This enables
+ * reads (implicit or explicit) to reflect the user-written data instead
+ * of the clear color. The only time such elements will not change their
+ * state as described above, is in a final layout that doesn't have CCS
+ * enabled. In this case, we must force the associated CCS buffers of the
+ * specified range to enter the ambiguated state in advance.
+ */
+ if (image->samples == 1 && image->aux_usage != ISL_AUX_USAGE_CCS_E &&
+ final_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
+ /* The CCS_D buffer may not be enabled in the final layout. Continue
+ * executing this function to perform a resolve.
+ */
+ anv_perf_warn("Performing an additional resolve for CCS_D layout "
+ "transition. Consider always leaving it on or "
+ "performing an ambiguation pass.");
+ } else {
+ /* Writes in the final layout will be aware of the auxiliary buffer.
+ * In addition, the clear buffer entries and the auxiliary buffers
+ * have been populated with values that will result in correct
+ * rendering.
+ */
+ return;
+ }
+ } else if (initial_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
+ /* Resolves are only necessary if the subresource may contain blocks
+ * fast-cleared to values unsupported in other layouts. This only occurs
+ * if the initial layout is COLOR_ATTACHMENT_OPTIMAL.
+ */
+ return;
+ } else if (image->samples > 1) {
+ /* MCS buffers don't need resolving. */
+ return;
+ }
+
+ /* Perform a resolve to synchronize data between the main and aux buffer.
+ * Before we begin, we must satisfy the cache flushing requirement specified
+ * in the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)":
+ *
+ * Any transition from any value in {Clear, Render, Resolve} to a
+ * different value in {Clear, Render, Resolve} requires end of pipe
+ * synchronization.
+ *
+ * We perform a flush of the write cache before and after the clear and
+ * resolve operations to meet this requirement.
+ *
+ * Unlike other drawing, fast clear operations are not properly
+ * synchronized. The first PIPE_CONTROL here likely ensures that the
+ * contents of the previous render or clear hit the render target before we
+ * resolve and the second likely ensures that the resolve is complete before
+ * we do any more rendering or clearing.
+ */
+ cmd_buffer->state.pending_pipe_bits |=
+ ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
+
+ for (uint32_t level = base_level; level < last_level_num; level++) {
+
+ /* The number of layers changes at each 3D miplevel. */
+ if (image->type == VK_IMAGE_TYPE_3D) {
+ layer_count = MIN2(layer_count, anv_image_aux_layers(image, level));
+ }
+
+ /* Create a surface state with the right clear color and perform the
+ * resolve.
+ */
+ struct anv_state surface_state =
+ anv_cmd_buffer_alloc_surface_state(cmd_buffer);
+ isl_surf_fill_state(&cmd_buffer->device->isl_dev, surface_state.map,
+ .surf = &image->color_surface.isl,
+ .view = &(struct isl_view) {
+ .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
+ .format = image->color_surface.isl.format,
+ .swizzle = ISL_SWIZZLE_IDENTITY,
+ .base_level = level,
+ .levels = 1,
+ .base_array_layer = base_layer,
+ .array_len = layer_count,
+ },
+ .aux_surf = &image->aux_surface.isl,
+ .aux_usage = image->aux_usage == ISL_AUX_USAGE_NONE ?
+ ISL_AUX_USAGE_CCS_D : image->aux_usage,
+ .mocs = cmd_buffer->device->default_mocs);
+ add_image_relocs(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
+ image->aux_usage == ISL_AUX_USAGE_CCS_E ?
+ ISL_AUX_USAGE_CCS_E : ISL_AUX_USAGE_CCS_D,
+ surface_state);
+ anv_state_flush(cmd_buffer->device, surface_state);
+ genX(copy_fast_clear_dwords)(cmd_buffer, surface_state, image, level,
+ false /* copy to ss */);
+ anv_ccs_resolve(cmd_buffer, surface_state, image, level, layer_count,
+ image->aux_usage == ISL_AUX_USAGE_CCS_E ?
+ BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL :
+ BLORP_FAST_CLEAR_OP_RESOLVE_FULL);
}
+
+ cmd_buffer->state.pending_pipe_bits |=
+ ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
}
/**
--
2.13.3
More information about the mesa-dev
mailing list