[Mesa-dev] [PATCH v3 16/16] anv: Predicate fast-clear resolves
Nanley Chery
nanleychery at gmail.com
Wed Jun 28 21:14:49 UTC 2017
Image layouts only let us know that an image *may* be fast-cleared. For
this reason we can end up with redundant resolves. Testing has shown
that such resolves can measurably hurt performance and that predicating
them can avoid the penalty.
Signed-off-by: Nanley Chery <nanley.g.chery at intel.com>
---
src/intel/vulkan/anv_blorp.c | 3 +-
src/intel/vulkan/anv_private.h | 13 ++++--
src/intel/vulkan/genX_cmd_buffer.c | 87 ++++++++++++++++++++++++++++++++++++--
3 files changed, 95 insertions(+), 8 deletions(-)
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 35317ba6be..d06d7e2cc3 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1619,7 +1619,8 @@ anv_ccs_resolve(struct anv_cmd_buffer * const cmd_buffer,
return;
struct blorp_batch batch;
- blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
+ blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
+ BLORP_BATCH_PREDICATE_ENABLE);
struct blorp_surf surf;
get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index be1623f3c3..951cf50842 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -2118,11 +2118,16 @@ anv_fast_clear_state_entry_size(const struct anv_device *device)
{
assert(device);
/* Entry contents:
- * +----------------------+
- * | clear value dword(s) |
- * +----------------------+
+ * +--------------------------------------------+
+ * | clear value dword(s) | needs resolve dword |
+ * +--------------------------------------------+
*/
- return device->isl_dev.ss.clear_value_size;
+
+ /* Ensure that the needs resolve dword is in fact dword-aligned to enable
+ * GPU memcpy operations.
+ */
+ assert(device->isl_dev.ss.clear_value_size % 4 == 0);
+ return device->isl_dev.ss.clear_value_size + 4;
}
/* Returns true if a HiZ-enabled depth buffer can be sampled from. */
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 62a2f22782..65d9c92783 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -421,6 +421,59 @@ get_fast_clear_state_entry_offset(const struct anv_device *device,
return offset;
}
+#define MI_PREDICATE_SRC0 0x2400
+#define MI_PREDICATE_SRC1 0x2408
+
+enum ccs_resolve_state {
+ CCS_RESOLVE_NOT_NEEDED,
+ CCS_RESOLVE_NEEDED,
+ CCS_RESOLVE_STARTING,
+};
+
+/* Manages the state of an color image subresource to ensure resolves are
+ * performed properly.
+ */
+static void
+genX(set_resolve_state)(struct anv_cmd_buffer *cmd_buffer,
+ const struct anv_image *image,
+ unsigned level,
+ enum ccs_resolve_state state)
+{
+ assert(cmd_buffer && image);
+ assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(level < anv_image_aux_levels(image));
+
+ const uint32_t resolve_flag_offset =
+ get_fast_clear_state_entry_offset(cmd_buffer->device, image, level) +
+ cmd_buffer->device->isl_dev.ss.clear_value_size;
+
+ if (state != CCS_RESOLVE_STARTING) {
+ assert(state == CCS_RESOLVE_NEEDED || state == CCS_RESOLVE_NOT_NEEDED);
+ /* The HW docs say that there is no way to guarantee the completion of
+ * the following command. We use it nevertheless because it shows no
+ * issues in testing is currently being used in the GL driver.
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
+ sdi.Address = (struct anv_address) { image->bo, resolve_flag_offset };
+ sdi.ImmediateData = state == CCS_RESOLVE_NEEDED;
+ }
+ } else {
+ /* Make the pending predicated resolve a no-op if one is not needed.
+ * predicate = do_resolve = resolve_flag != 0;
+ */
+ emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 , 0);
+ emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
+ emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 , 0);
+ emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4,
+ image->bo, resolve_flag_offset);
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
+ mip.LoadOperation = LOAD_LOADINV;
+ mip.CombineOperation = COMBINE_SET;
+ mip.CompareOperation = COMPARE_SRCS_EQUAL;
+ }
+ }
+}
+
static void
init_fast_clear_state_entry(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
@@ -430,6 +483,16 @@ init_fast_clear_state_entry(struct anv_cmd_buffer *cmd_buffer,
assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
assert(level < anv_image_aux_levels(image));
+ /* The resolve flag should updated to signify that fast-clear/compression
+ * data needs to be removed when leaving the undefined layout. Such data
+ * may need to be removed if it would cause accesses to the color buffer
+ * to return incorrect data. The fast clear data in CCS_D buffers should
+ * be removed because CCS_D isn't enabled all the time.
+ */
+ genX(set_resolve_state)(cmd_buffer, image, level,
+ image->aux_usage == ISL_AUX_USAGE_CCS_E ?
+ CCS_RESOLVE_NOT_NEEDED : CCS_RESOLVE_NEEDED);
+
/* The fast clear value dword(s) will be copied into a surface state object.
* Ensure that the restrictions of the fields in the dword(s) are followed.
*
@@ -666,6 +729,8 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
layer_count = MIN2(layer_count, anv_image_aux_layers(image, level));
}
+ genX(set_resolve_state)(cmd_buffer, image, level, CCS_RESOLVE_STARTING);
+
/* Create a surface state with the right clear color and perform the
* resolve.
*/
@@ -697,6 +762,8 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
image->aux_usage == ISL_AUX_USAGE_CCS_E ?
BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL :
BLORP_FAST_CLEAR_OP_RESOLVE_FULL);
+
+ genX(set_resolve_state)(cmd_buffer, image, level, CCS_RESOLVE_NOT_NEEDED);
}
cmd_buffer->state.pending_pipe_bits |=
@@ -2447,9 +2514,6 @@ void genX(CmdDispatch)(
#define GPGPU_DISPATCHDIMY 0x2504
#define GPGPU_DISPATCHDIMZ 0x2508
-#define MI_PREDICATE_SRC0 0x2400
-#define MI_PREDICATE_SRC1 0x2408
-
void genX(CmdDispatchIndirect)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
@@ -2856,6 +2920,23 @@ cmd_buffer_subpass_sync_fast_clear_values(struct anv_cmd_buffer *cmd_buffer)
genX(copy_fast_clear_dwords)(cmd_buffer, att_state->color_rt_state,
iview->image, iview->isl.base_level,
true /* copy from ss */);
+
+ /* Fast-clears impact whether or not a resolve will be necessary. */
+ if (iview->image->aux_usage == ISL_AUX_USAGE_CCS_E &&
+ att_state->clear_color_is_zero) {
+ /* This image always has the auxiliary buffer enabled. We can mark
+ * the subresource as not needing a resolve because the clear color
+ * will match what's in every RENDER_SURFACE_STATE object when it's
+ * being used for sampling.
+ */
+ genX(set_resolve_state)(cmd_buffer, iview->image,
+ iview->isl.base_level,
+ CCS_RESOLVE_NOT_NEEDED);
+ } else {
+ genX(set_resolve_state)(cmd_buffer, iview->image,
+ iview->isl.base_level,
+ CCS_RESOLVE_NEEDED);
+ }
} else if (rp_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
/* The attachment may have been fast-cleared in a previous render
* pass and the value is needed now. Update the surface state(s).
--
2.13.1
More information about the mesa-dev
mailing list