<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Sep 26, 2016 at 5:10 PM, Nanley Chery <span dir="ltr"><<a href="mailto:nanleychery@gmail.com" target="_blank">nanleychery@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Create a function that performs one of three HiZ operations -<br>
depth/stencil clears, HiZ resolve, and depth resolves.<br>
<br>
Signed-off-by: Nanley Chery <<a href="mailto:nanley.g.chery@intel.com">nanley.g.chery@intel.com</a>><br>
<br>
---<br>
<br>
v2. Add documentation<br>
Fix the alignment check<br>
Don't minify clear rectangle (Jason)<br>
Use blorp enums (Jason)<br>
Enable depth stalls and flushes<br>
Use full RT rectangle for resolve ops<br>
Add stencil clear todo<br>
<br>
src/intel/vulkan/anv_genX.h | 3 +<br>
src/intel/vulkan/gen7_cmd_<wbr>buffer.c | 6 ++<br>
src/intel/vulkan/gen8_cmd_<wbr>buffer.c | 167 ++++++++++++++++++++++++++++++<wbr>+++++++<br>
3 files changed, 176 insertions(+)<br>
<br>
diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h<br>
index 02e79c2..ad3bec9 100644<br>
--- a/src/intel/vulkan/anv_genX.h<br>
+++ b/src/intel/vulkan/anv_genX.h<br>
@@ -58,6 +58,9 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,<br>
unsigned vs_entry_size, unsigned gs_entry_size,<br>
const struct gen_l3_config *l3_config);<br>
<br>
+void genX(cmd_buffer_do_hz_op)(<wbr>struct anv_cmd_buffer *cmd_buffer,<br>
+ enum blorp_hiz_op op);<br>
+<br>
VkResult<br>
genX(graphics_pipeline_create)<wbr>(VkDevice _device,<br>
struct anv_pipeline_cache *cache,<br>
diff --git a/src/intel/vulkan/gen7_cmd_<wbr>buffer.c b/src/intel/vulkan/gen7_cmd_<wbr>buffer.c<br>
index b627ef0..78b5ac7 100644<br>
--- a/src/intel/vulkan/gen7_cmd_<wbr>buffer.c<br>
+++ b/src/intel/vulkan/gen7_cmd_<wbr>buffer.c<br>
@@ -323,6 +323,12 @@ genX(cmd_buffer_flush_dynamic_<wbr>state)(struct anv_cmd_buffer *cmd_buffer)<br>
cmd_buffer->state.dirty = 0;<br>
}<br>
<br>
+void<br>
+genX(cmd_buffer_do_hz_op)(<wbr>struct anv_cmd_buffer *cmd_buffer,<br>
+ enum blorp_hiz_op op)<br>
+{<br></blockquote><div><br></div><div>This should have an anv_finishme in it.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+}<br>
+<br>
void genX(CmdSetEvent)(<br>
VkCommandBuffer commandBuffer,<br>
VkEvent event,<br>
diff --git a/src/intel/vulkan/gen8_cmd_<wbr>buffer.c b/src/intel/vulkan/gen8_cmd_<wbr>buffer.c<br>
index 7058608..a13413c 100644<br>
--- a/src/intel/vulkan/gen8_cmd_<wbr>buffer.c<br>
+++ b/src/intel/vulkan/gen8_cmd_<wbr>buffer.c<br>
@@ -399,6 +399,173 @@ genX(cmd_buffer_flush_compute_<wbr>state)(struct anv_cmd_buffer *cmd_buffer)<br>
genX(cmd_buffer_apply_pipe_<wbr>flushes)(cmd_buffer);<br>
}<br>
<br>
+<br>
+/**<br>
+ * Emit the HZ_OP packet in the sequence specified by the BDW PRM section<br>
+ * entitled: "Optimized Depth Buffer Clear and/or Stencil Buffer Clear."<br>
+ *<br>
+ * \todo Enable Stencil Buffer-only clears<br>
+ */<br>
+void<br>
+genX(cmd_buffer_do_hz_op)(<wbr>struct anv_cmd_buffer *cmd_buffer,<br>
+ enum blorp_hiz_op op)<br>
+{<br>
+ struct anv_cmd_state *cmd_state = &cmd_buffer->state;<br>
+ const struct anv_image_view *iview =<br>
+ anv_cmd_buffer_get_depth_<wbr>stencil_view(cmd_buffer);<br>
+<br>
+ if (iview == NULL || !anv_image_has_hiz(iview-><wbr>image))<br>
+ return;<br>
+<br>
+ const uint32_t ds = cmd_state->subpass->depth_<wbr>stencil_attachment;<br>
+ const bool full_surface_op =<br>
+ cmd_state->render_area.extent.<wbr>width == iview->extent.width &&<br>
+ cmd_state->render_area.extent.<wbr>height == iview->extent.height;<br></blockquote><div><br></div><div>It's probably a bit redundant, but we might as well check render_area.offset == 0. I realize that, from API requirements, if the extents match then offset must be 0, but it's not incredibly obvious and the check won't hurt that much.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+ /* Validate that we can perform the HZ operation and that it's necessary. */<br>
+ switch (op) {<br>
+ case BLORP_HIZ_OP_DEPTH_CLEAR:<br>
+ if (cmd_buffer->state.pass-><wbr>attachments[ds].load_op !=<br>
+ VK_ATTACHMENT_LOAD_OP_CLEAR)<br>
+ return;<br>
+<br>
+ /* Apply alignment restrictions. Despite the BDW PRM mentioning this is<br>
+ * only needed for a depth buffer surface type of D16_UNORM, testing<br>
+ * showed it to be necessary for other depth formats as well<br>
+ * (e.g., D32_FLOAT).<br>
+ */<br>
+ if (!full_surface_op) {<br>
+<br>
+ struct isl_extent2d px_dim;<br></blockquote><div><br></div><div>Would it be better to call this hiz_block_size_px? That follows the ISL naming convention a bit better.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+#if GEN_GEN == 8<br></blockquote><div><br></div><div>Mind making this <= 8? I know it's in a gen8+ file, but <= 8 makes it clear that the else case is >= 9 and not != 8.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ /* Pre-SKL, HiZ has an 8x4 sample block. As the number of samples<br>
+ * increases, the number of pixels representable by this block<br>
+ * decreases by a factor of the sample dimensions. Sample dimensions<br>
+ * scale following the MSAA interleaved pattern.<br>
+ *<br>
+ * Sample|Sample|Pixel<br>
+ * Count |Dim |Dim<br>
+ * ===================<br>
+ * 1 | 1x1 | 8x4<br>
+ * 2 | 2x1 | 4x4<br>
+ * 4 | 2x2 | 4x2<br>
+ * 8 | 4x2 | 2x2<br>
+ * 16 | 4x4 | 2x1<br>
+ *<br>
+ * Table: Pixel Dimensions in a HiZ Sample Block Pre-SKL<br>
+ */<br>
+ const struct isl_extent2d sa_dim =<br></blockquote><div><br></div><div>Maybe call this px_size_sa?<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ isl_get_interleaved_msaa_px_<wbr>size_sa(iview->image->samples)<wbr>;<br>
+ px_dim.w = 8 / sa_dim.w;<br>
+ px_dim.h = 4 / sa_dim.h;<br>
+#else<br>
+ /* SKL+, the sample block becomes a "pixel block" so the expected<br>
+ * pixel dimension is a constant 8x4 px for all sample counts.<br>
+ */<br>
+ px_dim = (struct isl_extent2d) { .w = 8, .h = 4};<br>
+#endif<br>
+<br>
+ /* Fast depth clears clear an entire sample block at a time. As a<br>
+ * result, the rectangle must be aligned to the pixel dimensions of<br>
+ * a sample block for a successful operation.<br>
+ */<br>
+ if (cmd_state->render_area.<wbr>offset.x % px_dim.w ||<br>
+ cmd_state->render_area.offset.<wbr>y % px_dim.h ||<br>
+ cmd_state->render_area.extent.<wbr>width % px_dim.w ||<br>
+ cmd_state->render_area.extent.<wbr>height % px_dim.h)<br>
+ return;<br></blockquote><div><br></div><div>What about the case where the offset is aligned and we go to the far right or bottom of the image but the image size is not aligned? I think this case should be possible too.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ }<br>
+ break;<br>
+ case BLORP_HIZ_OP_DEPTH_RESOLVE:<br>
+ if (cmd_buffer->state.pass-><wbr>attachments[ds].store_op !=<br>
+ VK_ATTACHMENT_STORE_OP_STORE)<br>
+ return;<br>
+ break;<br>
+ case BLORP_HIZ_OP_HIZ_RESOLVE:<br>
+ if (cmd_buffer->state.pass-><wbr>attachments[ds].load_op !=<br>
+ VK_ATTACHMENT_LOAD_OP_LOAD)<br>
+ return;<br>
+ break;<br>
+ case BLORP_HIZ_OP_NONE:<br>
+ unreachable("Invalid HiZ OP");<br>
+ break;<br>
+ }<br>
+<br>
+ anv_batch_emit(&cmd_buffer-><wbr>batch, GENX(3DSTATE_WM_HZ_OP), hzp) {<br>
+ switch (op) {<br>
+ case BLORP_HIZ_OP_DEPTH_CLEAR:<br>
+ hzp.StencilBufferClearEnable = VK_IMAGE_ASPECT_STENCIL_BIT &<br>
+ cmd_state->attachments[ds].<wbr>pending_clear_aspects;<br>
+ hzp.DepthBufferClearEnable = VK_IMAGE_ASPECT_DEPTH_BIT &<br>
+ cmd_state->attachments[ds].<wbr>pending_clear_aspects;<br>
+ hzp.<wbr>FullSurfaceDepthandStencilClea<wbr>r = full_surface_op;<br>
+ hzp.StencilClearValue = 0xff &<br>
+ cmd_state->attachments[ds].<wbr>clear_value.depthStencil.<wbr>stencil;<br></blockquote><div><br></div><div>nit: I would find having "clear_value & 0xff" and all on the second line more readable.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+ /* Mark aspects as cleared */<br>
+ cmd_state->attachments[ds].<wbr>pending_clear_aspects = 0;<br>
+ break;<br>
+ case BLORP_HIZ_OP_DEPTH_RESOLVE:<br>
+ hzp.DepthBufferResolveEnable = true;<br>
+ break;<br>
+ case BLORP_HIZ_OP_HIZ_RESOLVE:<br>
+ hzp.<wbr>HierarchicalDepthBufferResolve<wbr>Enable = true;<br>
+ break;<br>
+ case BLORP_HIZ_OP_NONE:<br>
+ unreachable("Invalid HiZ OP");<br>
+ break;<br>
+ }<br>
+<br>
+ if (op != BLORP_HIZ_OP_DEPTH_CLEAR) {<br>
+ /* The Optimized HiZ resolve rectangle must be the size of the full RT<br>
+ * and aligned to 8x4. The non-optimized Depth resolve rectangle must<br>
+ * be the size of the full RT. The same alignment is assumed to be<br>
+ * required.<br>
+ *<br>
+ * TODO:<br>
+ * Consider changing halign of non-D16 depth formats to 8 as mip 2 may<br>
+ * get clobbered.<br></blockquote><div><br></div><div>This shouldn't be needed on BDW+<br><br></div><div>Most of my comments were just nits.<br><br></div><div>Reviewed-by: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>><br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ */<br>
+ hzp.ClearRectangleXMax = align_u32(iview->extent.width, 8);<br>
+ hzp.ClearRectangleYMax = align_u32(iview->extent.<wbr>height, 4); <br></blockquote><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ } else {<br>
+ /* This clear rectangle is aligned */<br>
+ hzp.ClearRectangleXMin = cmd_state->render_area.offset.<wbr>x;<br>
+ hzp.ClearRectangleYMin = cmd_state->render_area.offset.<wbr>y;<br>
+ hzp.ClearRectangleXMax = cmd_state->render_area.offset.<wbr>x +<br>
+ cmd_state->render_area.extent.<wbr>width;<br>
+ hzp.ClearRectangleYMax = cmd_state->render_area.offset.<wbr>y +<br>
+ cmd_state->render_area.extent.<wbr>height;<br>
+ }<br>
+<br>
+<br>
+ /* Due to a hardware issue, this bit MBZ */<br>
+ hzp.ScissorRectangleEnable = false;<br>
+ hzp.NumberofMultisamples = ffs(iview->image->samples) - 1;<br>
+ hzp.SampleMask = 0xFFFF;<br>
+ }<br>
+<br>
+ anv_batch_emit(&cmd_buffer-><wbr>batch, GENX(PIPE_CONTROL), pc) {<br>
+ pc.PostSyncOperation = WriteImmediateData;<br>
+ pc.Address =<br>
+ (struct anv_address){ &cmd_buffer->device-><wbr>workaround_bo, 0 };<br>
+ }<br>
+<br>
+ anv_batch_emit(&cmd_buffer-><wbr>batch, GENX(3DSTATE_WM_HZ_OP), hzp);<br>
+<br>
+ /* TODO:<br>
+ * Determine if a DepthCacheFlushEnable and DepthStallEnable is really<br>
+ * necessary for non-full_surface_op clears. Performing a HZ op without<br>
+ * this pipecontrol showed no impact on rendering results.<br>
+ */<br>
+ if (!full_surface_op && op == BLORP_HIZ_OP_DEPTH_CLEAR) {<br>
+ anv_batch_emit(&cmd_buffer-><wbr>batch, GENX(PIPE_CONTROL), pc) {<br>
+ pc.DepthStallEnable = true;<br>
+ pc.DepthCacheFlushEnable = true;<br>
+ }<br>
+ }<br>
+}<br>
+<br>
void genX(CmdSetEvent)(<br>
VkCommandBuffer commandBuffer,<br>
VkEvent _event,<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.10.0<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>