[Mesa-dev] [PATCH 6/7] anv: Add support for fast clears on gen9
Jason Ekstrand
jason at jlekstrand.net
Sat Nov 19 19:40:08 UTC 2016
---
src/intel/vulkan/anv_blorp.c | 102 ++++++++++++++++++++++++++++++++-----
src/intel/vulkan/anv_private.h | 3 ++
src/intel/vulkan/genX_cmd_buffer.c | 100 ++++++++++++++++++++++++++++++------
3 files changed, 176 insertions(+), 29 deletions(-)
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 24b98ab..cab1906 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1178,16 +1178,35 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer)
struct blorp_surf surf;
get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
att_state->aux_usage, &surf);
+ surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
const VkRect2D render_area = cmd_buffer->state.render_area;
- blorp_clear(&batch, &surf, iview->isl.format, iview->isl.swizzle,
- iview->isl.base_level,
- iview->isl.base_array_layer, fb->layers,
- render_area.offset.x, render_area.offset.y,
- render_area.offset.x + render_area.extent.width,
- render_area.offset.y + render_area.extent.height,
- vk_to_isl_color(att_state->clear_value.color), NULL);
+ if (att_state->fast_clear) {
+ blorp_fast_clear(&batch, &surf, iview->isl.format,
+ iview->isl.base_level,
+ iview->isl.base_array_layer, fb->layers,
+ render_area.offset.x, render_area.offset.y,
+ render_area.offset.x + render_area.extent.width,
+ render_area.offset.y + render_area.extent.height);
+
+ /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
+ *
+ * "After Render target fast clear, pipe-control with color cache
+ * write-flush must be issued before sending any DRAW commands on
+ * that render target."
+ */
+ cmd_buffer->state.pending_pipe_bits |=
+ ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
+ } else {
+ blorp_clear(&batch, &surf, iview->isl.format, iview->isl.swizzle,
+ iview->isl.base_level,
+ iview->isl.base_array_layer, fb->layers,
+ render_area.offset.x, render_area.offset.y,
+ render_area.offset.x + render_area.extent.width,
+ render_area.offset.y + render_area.extent.height,
+ surf.clear_color, NULL);
+ }
att_state->pending_clear_aspects = 0;
}
@@ -1298,10 +1317,12 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
struct anv_attachment_state *att_state =
&cmd_buffer->state.attachments[att];
- assert(att_state->aux_usage != ISL_AUX_USAGE_CCS_D);
- if (att_state->aux_usage != ISL_AUX_USAGE_CCS_E)
+ if (att_state->aux_usage == ISL_AUX_USAGE_NONE)
return; /* Nothing to resolve */
+ assert(att_state->aux_usage == ISL_AUX_USAGE_CCS_E ||
+ att_state->aux_usage == ISL_AUX_USAGE_CCS_D);
+
struct anv_render_pass *pass = cmd_buffer->state.pass;
struct anv_subpass *subpass = cmd_buffer->state.subpass;
unsigned subpass_idx = subpass - pass->subpasses;
@@ -1312,14 +1333,17 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
* of a particular attachment. That way we only resolve once but it's
* still hot in the cache.
*/
+ bool found_draw = false;
+ enum anv_subpass_usage usage = 0;
for (uint32_t s = subpass_idx + 1; s < pass->subpass_count; s++) {
- enum anv_subpass_usage usage = pass->attachments[att].subpass_usage[s];
+ usage |= pass->attachments[att].subpass_usage[s];
if (usage & (ANV_SUBPASS_USAGE_DRAW | ANV_SUBPASS_USAGE_RESOLVE_DST)) {
/* We found another subpass that draws to this attachment. We'll
* wait to resolve until then.
*/
- return;
+ found_draw = true;
+ break;
}
}
@@ -1327,12 +1351,60 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image = iview->image;
assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
- if (image->aux_usage == ISL_AUX_USAGE_CCS_E)
+ enum blorp_fast_clear_op resolve_op = BLORP_FAST_CLEAR_OP_NONE;
+ if (!found_draw) {
+ /* This is the last subpass that writes to this attachment so we need to
+ * resolve here. Ideally, we would like to only resolve if the storeOp
+ * is set to VK_ATTACHMENT_STORE_OP_STORE. However, we need to ensure
+ * that the CCS bits are set to "resolved" because there may be copy or
+ * blit operations (which may ignore CCS) between now and the next time
+ * we render and we need to ensure that anything they write will be
+ * respected in the next render. Unfortunately, the hardware does not
+ * provide us with any sort of "invalidate" pass that sets the CCS to
+ * "resolved" without writing to the render target.
+ */
+ if (iview->image->aux_usage != ISL_AUX_USAGE_CCS_E) {
+ /* The image destination surface doesn't support compression outside
+ * the render pass. We need a full resolve.
+ */
+ resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
+ } else if (att_state->fast_clear) {
+ /* We don't know what to do with clear colors outside the render
+ * pass. We need a partial resolve.
+ */
+ resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
+ } else {
+ /* The image "natively" supports all the compression we care about
+ * and we don't need to resolve at all. If this is the case, we also
+ * don't need to resolve for any of the input attachment cases below.
+ */
+ }
+ } else if (usage & ANV_SUBPASS_USAGE_INPUT) {
+ /* Input attachments are clear-color aware so, at least on Sky Lake, we
+ * can frequently sample from them with no resolves at all.
+ */
+ if (att_state->aux_usage != att_state->input_aux_usage) {
+ assert(att_state->input_aux_usage == ISL_AUX_USAGE_NONE);
+ resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
+ } else if (!att_state->clear_color_is_zero_one) {
+ /* Sky Lake PRM, Vol. 2d, RENDER_SURFACE_STATE::Red Clear Color:
+ *
+ * "If Number of Multisamples is MULTISAMPLECOUNT_1 AND if this RT
+ * is fast cleared with non-0/1 clear value, this RT must be
+ * partially resolved (refer to Partial Resolve operation) before
+ * binding this surface to Sampler."
+ */
+ resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
+ }
+ }
+
+ if (resolve_op == BLORP_FAST_CLEAR_OP_NONE)
return;
struct blorp_surf surf;
get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
att_state->aux_usage, &surf);
+ surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
/* From the Sky Lake PRM Vol. 7, "Render Target Resolve":
*
@@ -1353,12 +1425,14 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
blorp_ccs_resolve(batch, &surf,
iview->isl.base_level,
iview->isl.base_array_layer + layer,
- iview->isl.format,
- BLORP_FAST_CLEAR_OP_RESOLVE_FULL);
+ iview->isl.format, resolve_op);
}
cmd_buffer->state.pending_pipe_bits |=
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
+
+ /* Once we've done any sort of resolve, we're no longer fast-cleared */
+ att_state->fast_clear = false;
}
void
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 7b521b1..4e6049c 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1087,11 +1087,14 @@ void anv_dynamic_state_copy(struct anv_dynamic_state *dest,
*/
struct anv_attachment_state {
enum isl_aux_usage aux_usage;
+ enum isl_aux_usage input_aux_usage;
struct anv_state color_rt_state;
struct anv_state input_att_state;
VkImageAspectFlags pending_clear_aspects;
+ bool fast_clear;
VkClearValue clear_value;
+ bool clear_color_is_zero_one;
};
/** State required while building cmd buffer */
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 1ad28fd..38579ce 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -191,23 +191,87 @@ add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer,
}
}
-static enum isl_aux_usage
-fb_attachment_get_aux_usage(struct anv_device *device,
- struct anv_framebuffer *fb,
- uint32_t attachment)
+static bool
+color_is_zero_one(VkClearColorValue value, enum isl_format format)
{
- struct anv_image_view *iview = fb->attachments[attachment];
+ if (isl_format_has_int_channel(format)) {
+ for (unsigned i = 0; i < 4; i++) {
+ if (value.int32[i] != 0 && value.int32[i] != 1)
+ return false;
+ }
+ } else {
+ for (unsigned i = 0; i < 4; i++) {
+ if (value.float32[i] != 0.0f && value.float32[i] != 1.0f)
+ return false;
+ }
+ }
- if (iview->image->aux_surface.isl.size == 0)
- return ISL_AUX_USAGE_NONE; /* No aux surface */
+ return true;
+}
+
+static void
+color_attachment_compute_aux_usage(struct anv_device *device,
+ struct anv_attachment_state *att_state,
+ struct anv_image_view *iview,
+ VkRect2D render_area,
+ union isl_color_value *fast_clear_color)
+{
+ if (iview->image->aux_surface.isl.size == 0) {
+ att_state->aux_usage = ISL_AUX_USAGE_NONE;
+ att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
+ att_state->fast_clear = false;
+ return;
+ }
assert(iview->image->aux_surface.isl.usage & ISL_SURF_USAGE_CCS_BIT);
- if (isl_format_supports_lossless_compression(&device->info,
- iview->isl.format))
- return ISL_AUX_USAGE_CCS_E;
+ att_state->clear_color_is_zero_one =
+ color_is_zero_one(att_state->clear_value.color, iview->isl.format);
+
+ if (att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
+ /* Start off assuming fast clears are possible */
+ att_state->fast_clear = true;
- return ISL_AUX_USAGE_NONE;
+ /* Potentially, we could do partial fast-clears but doing so has crazy
+ * alignment restrictions. It's easier to just restrict to full size
+ * fast clears for now.
+ */
+ if (render_area.offset.x != 0 ||
+ render_area.offset.y != 0 ||
+ render_area.extent.width != iview->extent.width ||
+ render_area.extent.height != iview->extent.height)
+ att_state->fast_clear = false;
+
+ if (att_state->fast_clear) {
+ memcpy(fast_clear_color->u32, att_state->clear_value.color.uint32,
+ sizeof(fast_clear_color->u32));
+ }
+ } else {
+ att_state->fast_clear = false;
+ }
+
+ if (isl_format_supports_lossless_compression(&device->info,
+ iview->isl.format)) {
+ att_state->aux_usage = ISL_AUX_USAGE_CCS_E;
+ att_state->input_aux_usage = ISL_AUX_USAGE_CCS_E;
+ } else if (att_state->fast_clear) {
+ att_state->aux_usage = ISL_AUX_USAGE_CCS_D;
+ /* From the Sky Lake PRM, RENDER_SURFACE_STATE::AuxiliarySurfaceMode:
+ *
+ * "If Number of Multisamples is MULTISAMPLECOUNT_1, AUX_CCS_D
+ * setting is only allowed if Surface Format supported for Fast
+ * Clear. In addition, if the surface is bound to the sampling
+ * engine, Surface Format must be supported for Render Target
+ * Compression for surfaces bound to the sampling engine."
+ *
+ * In other words, we can't sample from a fast-cleared image if it
+ * doesn't also support color compression.
+ */
+ att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
+ } else {
+ att_state->aux_usage = ISL_AUX_USAGE_NONE;
+ att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
+ }
}
static bool
@@ -350,9 +414,12 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
struct anv_image_view *iview = framebuffer->attachments[i];
assert(iview->vk_format == att->format);
+ union isl_color_value clear_color = { .u32 = { 0, } };
if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
- state->attachments[i].aux_usage =
- fb_attachment_get_aux_usage(cmd_buffer->device, framebuffer, i);
+ color_attachment_compute_aux_usage(cmd_buffer->device,
+ &state->attachments[i],
+ iview, begin->renderArea,
+ &clear_color);
struct isl_view view = iview->isl;
view.usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
@@ -362,6 +429,7 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
.view = &view,
.aux_surf = &iview->image->aux_surface.isl,
.aux_usage = state->attachments[i].aux_usage,
+ .clear_color = clear_color,
.mocs = cmd_buffer->device->default_mocs);
add_image_view_relocs(cmd_buffer, iview,
@@ -369,6 +437,7 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
state->attachments[i].color_rt_state);
} else {
state->attachments[i].aux_usage = ISL_AUX_USAGE_NONE;
+ state->attachments[i].input_aux_usage = ISL_AUX_USAGE_NONE;
}
if (need_input_attachment_state(&pass->attachments[i])) {
@@ -386,11 +455,12 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
.surf = surf,
.view = &view,
.aux_surf = &iview->image->aux_surface.isl,
- .aux_usage = state->attachments[i].aux_usage,
+ .aux_usage = state->attachments[i].input_aux_usage,
+ .clear_color = clear_color,
.mocs = cmd_buffer->device->default_mocs);
add_image_view_relocs(cmd_buffer, iview,
- state->attachments[i].aux_usage,
+ state->attachments[i].input_aux_usage,
state->attachments[i].input_att_state);
}
}
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list