<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Thu, Apr 27, 2017 at 11:32 AM, Nanley Chery <span dir="ltr"><<a href="mailto:nanleychery@gmail.com" target="_blank">nanleychery@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">There's no image layout to represent a full-RT-cleared color attachment.<br>
That's one reason we can end up with redundant resolves. Testing has<br>
shown that such resolves can measurably hurt performance and that<br>
predicating them can avoid the penalty.<br>
<br>
Signed-off-by: Nanley Chery <<a href="mailto:nanley.g.chery@intel.com">nanley.g.chery@intel.com</a>><br>
---<br>
src/intel/vulkan/anv_blorp.c | 3 +-<br>
src/intel/vulkan/anv_image.c | 6 +++<br>
src/intel/vulkan/genX_cmd_<wbr>buffer.c | 106 ++++++++++++++++++++++++++++++<wbr>++++---<br>
3 files changed, 108 insertions(+), 7 deletions(-)<br>
<br>
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c<br>
index 821d01a077..32f0edf316 100644<br>
--- a/src/intel/vulkan/anv_blorp.c<br>
+++ b/src/intel/vulkan/anv_blorp.c<br>
@@ -1500,7 +1500,8 @@ anv_ccs_resolve(struct anv_cmd_buffer * const cmd_buffer,<br>
<br>
struct blorp_batch batch;<br>
blorp_batch_init(&cmd_buffer-><wbr>device->blorp, &batch, cmd_buffer,<br>
- BLORP_BATCH_NO_EMIT_DEPTH_<wbr>STENCIL);<br>
+ BLORP_BATCH_NO_EMIT_DEPTH_<wbr>STENCIL |<br>
+ BLORP_BATCH_PREDICATE_ENABLE);<br>
<br>
struct blorp_surf surf;<br>
get_blorp_surf_for_anv_image(<wbr>image, VK_IMAGE_ASPECT_COLOR_BIT,<br>
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c<br>
index 751f2d6026..92ee86dab5 100644<br>
--- a/src/intel/vulkan/anv_image.c<br>
+++ b/src/intel/vulkan/anv_image.c<br>
@@ -208,6 +208,12 @@ anv_fill_ccs_resolve_ss(const struct anv_device * const device,<br>
.aux_usage = image->aux_usage == ISL_AUX_USAGE_NONE ?<br>
ISL_AUX_USAGE_CCS_D : image->aux_usage,<br>
.mocs = device->default_mocs);<br>
+<br>
+ /* The following dword is used as a flag to represent whether or not this<br>
+ * CCS subresource needs resolving. We want this to be zero by default,<br>
+ * which means that a resolve is not necessary.<br>
+ */<br>
+ assert(*(uint32_t *)(data + device->isl_dev.ss.addr_<wbr>offset) == 0);<br></blockquote><div><br></div><div>This seems like kind-of an odd place to put it... Not a problem, just odd.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
}<br>
<br>
/**<br>
diff --git a/src/intel/vulkan/genX_cmd_<wbr>buffer.c b/src/intel/vulkan/genX_cmd_<wbr>buffer.c<br>
index 95729ec8a8..041301290e 100644<br>
--- a/src/intel/vulkan/genX_cmd_<wbr>buffer.c<br>
+++ b/src/intel/vulkan/genX_cmd_<wbr>buffer.c<br>
@@ -402,6 +402,82 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,<br>
anv_gen8_hiz_op_resolve(cmd_<wbr>buffer, image, hiz_op);<br>
}<br>
<br>
+static uint32_t<br>
+clear_value_buffer_offset(<wbr>const struct anv_cmd_buffer * const cmd_buffer,<br>
+ const struct anv_image * const image,<br>
+ const uint8_t level)<br>
+{<br>
+ return image->offset +<br>
+ image->aux_surface.offset + image->aux_surface.isl.size +<br>
+ cmd_buffer->device->isl_dev.<wbr>ss.size * level;<br>
+}<br>
+<br>
+#define MI_PREDICATE_SRC0 0x2400<br>
+#define MI_PREDICATE_SRC1 0x2408<br>
+<br>
+enum ccs_resolve_state {<br>
+ CCS_RESOLVE_NOT_NEEDED,<br>
+ CCS_RESOLVE_NEEDED,<br>
+ CCS_RESOLVE_STARTING,<br>
+};<br>
+<br>
+/* Manages the state of an color image subresource to ensure resolves are<br>
+ * performed properly.<br>
+ */<br>
+static void<br>
+genX(set_resolve_state)(<wbr>struct anv_cmd_buffer * const cmd_buffer,<br>
+ const struct anv_image * const image,<br>
+ const uint8_t level,<br>
+ const enum ccs_resolve_state state)<br>
+{<br>
+ assert(cmd_buffer && image);<br>
+<br>
+ /* The image subresource range must have a color auxiliary buffer. */<br>
+ assert(anv_image_has_color_<wbr>aux(image));<br>
+ assert(level < anv_color_aux_levels(image));<br>
+<br>
+ /* We store the resolve flag in the location of the surface base address<br>
+ * field for simplicity and because it is initialized to zero when the<br>
+ * clear value buffer is initialized.<br>
+ */<br>
+ const uint32_t resolve_flag_offset =<br>
+ clear_value_buffer_offset(cmd_<wbr>buffer, image, level) +<br>
+ cmd_buffer->device->isl_dev.<wbr>ss.addr_offset;<br>
+<br>
+ /* An update operation should not overwrite the fast clear value. */<br>
+ if (cmd_buffer->device->isl_dev.<wbr>ss.addr_offset <<br>
+ cmd_buffer->device->isl_dev.<wbr>ss.clear_value_offset) {<br>
+ assert(cmd_buffer->device-><wbr>isl_dev.ss.addr_offset + 4 <=<br>
+ cmd_buffer->device->isl_dev.<wbr>ss.clear_value_offset);<br>
+ }<br>
+<br>
+ if (state != CCS_RESOLVE_STARTING) {<br>
+ assert(state == CCS_RESOLVE_NEEDED || state == CCS_RESOLVE_NOT_NEEDED);<br>
+ /* The HW docs say that there is no way to guarantee the completion of<br>
+ * the following command. We use it nevertheless because it shows no<br>
+ * issues in testing is currently being used in the GL driver.<br>
+ */<br>
+ anv_batch_emit(&cmd_buffer-><wbr>batch, GENX(MI_STORE_DATA_IMM), sdi) {<br>
+ sdi.Address = (struct anv_address) { image->bo, resolve_flag_offset };<br>
+ sdi.ImmediateData = state == CCS_RESOLVE_NEEDED;<br>
+ }<br>
+ } else {<br>
+ /* Make the pending predicated resolve a no-op if one is not needed.<br>
+ * predicate = do_resolve = resolve_flag != 0;<br>
+ */<br>
+ emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 , 0);<br>
+ emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);<br>
+ emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 , 0);<br>
+ emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4,<br>
+ image->bo, resolve_flag_offset);<br>
+ anv_batch_emit(&cmd_buffer-><wbr>batch, GENX(MI_PREDICATE), mip) {<br>
+ mip.LoadOperation = LOAD_LOADINV;<br>
+ mip.CombineOperation = COMBINE_SET;<br>
+ mip.CompareOperation = COMPARE_SRCS_EQUAL;<br>
+ }<br></blockquote><div><br></div><div>I really wish the MI_PREDICATE were closer to the anv_ccs_resolve. Maybe we could just inline this function? If we had a helper to get the anv_address of the resolve flag, it wouldn't be bad at all.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ }<br>
+}<br>
+<br>
<br>
/* Copies clear value dwords between a surface state object and an image's<br>
* clear value buffer.<br>
@@ -420,9 +496,7 @@ genX(transfer_clear_value)(<wbr>struct anv_cmd_buffer * const cmd_buffer,<br>
assert(level < anv_color_aux_levels(image));<br>
<br>
const uint32_t img_clear_offset =<br>
- image->offset + image->aux_surface.offset +<br>
- image->aux_surface.isl.size +<br>
- cmd_buffer->device->isl_dev.<wbr>ss.size * level +<br>
+ clear_value_buffer_offset(cmd_<wbr>buffer, image, level) +<br>
cmd_buffer->device->isl_dev.<wbr>ss.clear_value_offset;<br>
<br>
struct anv_bo * const ss_bo =<br>
@@ -523,6 +597,8 @@ transition_color_buffer(struct anv_cmd_buffer * const cmd_buffer,<br>
<br>
for (uint32_t level = base_level; level < base_level + level_count; level++) {<br>
<br>
+ genX(set_resolve_state)(cmd_<wbr>buffer, image, level, CCS_RESOLVE_STARTING);<br>
+<br>
layer_count = MIN2(layer_count, anv_color_aux_layers(image, level));<br>
for (uint32_t layer = base_layer; layer < base_layer + layer_count; layer++) {<br>
<br>
@@ -541,6 +617,8 @@ transition_color_buffer(struct anv_cmd_buffer * const cmd_buffer,<br>
false);<br>
anv_ccs_resolve(cmd_buffer, surface_state, image, level, layer, op);<br>
}<br>
+<br>
+ genX(set_resolve_state)(cmd_<wbr>buffer, image, level, CCS_RESOLVE_NOT_NEEDED);<br>
}<br>
<br>
cmd_buffer->state.pending_<wbr>pipe_bits |=<br>
@@ -693,6 +771,25 @@ genX(cmd_buffer_setup_<wbr>attachments)(struct anv_cmd_buffer *cmd_buffer,<br>
genX(transfer_clear_value)(<wbr>cmd_buffer,<br>
state->attachments[i].color_<wbr>rt_state, iview->image,<br>
iview->isl.base_level, true /* copy_to_buffer */);<br>
+<br>
+ /* If this image always has the auxiliary buffer enabled we can<br>
+ * mark the subresource as not needing a resolve if the clear<br>
+ * color will match what's in the RENDER_SURFACE_STATE object<br>
+ * when it's being used for sampling.<br>
+ */<br>
+ if (iview->image->aux_usage == ISL_AUX_USAGE_CCS_E &&<br>
+ clear_color.u32[0] == 0 &&<br>
+ clear_color.u32[1] == 0 &&<br>
+ clear_color.u32[2] == 0 &&<br>
+ clear_color.u32[3] == 0) {<br>
+ genX(set_resolve_state)(cmd_<wbr>buffer, iview->image,<br>
+ iview->isl.base_level,<br>
+ CCS_RESOLVE_NOT_NEEDED);<br>
+ } else {<br>
+ genX(set_resolve_state)(cmd_<wbr>buffer, iview->image,<br>
+ iview->isl.base_level,<br>
+ CCS_RESOLVE_NEEDED);<br>
+ }<br>
} else if (att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD &&<br>
state->attachments[i].aux_<wbr>usage != ISL_AUX_USAGE_NONE) {<br>
/* The attachment may have been fast-cleared in a previous<br>
@@ -2194,9 +2291,6 @@ void genX(CmdDispatch)(<br>
#define GPGPU_DISPATCHDIMY 0x2504<br>
#define GPGPU_DISPATCHDIMZ 0x2508<br>
<br>
-#define MI_PREDICATE_SRC0 0x2400<br>
-#define MI_PREDICATE_SRC1 0x2408<br>
-<br>
void genX(CmdDispatchIndirect)(<br>
VkCommandBuffer commandBuffer,<br>
VkBuffer _buffer,<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.12.2<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>