<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Sep 26, 2016 at 5:10 PM, Nanley Chery <span dir="ltr"><<a href="mailto:nanleychery@gmail.com" target="_blank">nanleychery@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Create a function that performs one of three HiZ operations -<br>
depth/stencil clears, HiZ resolve, and depth resolves.<br>
<br>
Signed-off-by: Nanley Chery <<a href="mailto:nanley.g.chery@intel.com">nanley.g.chery@intel.com</a>><br>
<br>
---<br>
<br>
v2. Add documentation<br>
    Fix the alignment check<br>
    Don't minify clear rectangle (Jason)<br>
    Use blorp enums (Jason)<br>
    Enable depth stalls and flushes<br>
    Use full RT rectangle for resolve ops<br>
    Add stencil clear todo<br>
<br>
 src/intel/vulkan/anv_genX.h        |   3 +<br>
 src/intel/vulkan/gen7_cmd_<wbr>buffer.c |   6 ++<br>
 src/intel/vulkan/gen8_cmd_<wbr>buffer.c | 167 ++++++++++++++++++++++++++++++<wbr>+++++++<br>
 3 files changed, 176 insertions(+)<br>
<br>
diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h<br>
index 02e79c2..ad3bec9 100644<br>
--- a/src/intel/vulkan/anv_genX.h<br>
+++ b/src/intel/vulkan/anv_genX.h<br>
@@ -58,6 +58,9 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,<br>
                      unsigned vs_entry_size, unsigned gs_entry_size,<br>
                      const struct gen_l3_config *l3_config);<br>
<br>
+void genX(cmd_buffer_do_hz_op)(<wbr>struct anv_cmd_buffer *cmd_buffer,<br>
+                               enum blorp_hiz_op op);<br>
+<br>
 VkResult<br>
 genX(graphics_pipeline_create)<wbr>(VkDevice _device,<br>
                                struct anv_pipeline_cache *cache,<br>
diff --git a/src/intel/vulkan/gen7_cmd_<wbr>buffer.c b/src/intel/vulkan/gen7_cmd_<wbr>buffer.c<br>
index b627ef0..78b5ac7 100644<br>
--- a/src/intel/vulkan/gen7_cmd_<wbr>buffer.c<br>
+++ b/src/intel/vulkan/gen7_cmd_<wbr>buffer.c<br>
@@ -323,6 +323,12 @@ genX(cmd_buffer_flush_dynamic_<wbr>state)(struct anv_cmd_buffer *cmd_buffer)<br>
    cmd_buffer->state.dirty = 0;<br>
 }<br>
<br>
+void<br>
+genX(cmd_buffer_do_hz_op)(<wbr>struct anv_cmd_buffer *cmd_buffer,<br>
+                          enum blorp_hiz_op op)<br>
+{<br></blockquote><div><br></div><div>This should have an anv_finishme in it.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+}<br>
+<br>
 void genX(CmdSetEvent)(<br>
     VkCommandBuffer                             commandBuffer,<br>
     VkEvent                                     event,<br>
diff --git a/src/intel/vulkan/gen8_cmd_<wbr>buffer.c b/src/intel/vulkan/gen8_cmd_<wbr>buffer.c<br>
index 7058608..a13413c 100644<br>
--- a/src/intel/vulkan/gen8_cmd_<wbr>buffer.c<br>
+++ b/src/intel/vulkan/gen8_cmd_<wbr>buffer.c<br>
@@ -399,6 +399,173 @@ genX(cmd_buffer_flush_compute_<wbr>state)(struct anv_cmd_buffer *cmd_buffer)<br>
    genX(cmd_buffer_apply_pipe_<wbr>flushes)(cmd_buffer);<br>
 }<br>
<br>
+<br>
+/**<br>
+ * Emit the HZ_OP packet in the sequence specified by the BDW PRM section<br>
+ * entitled: "Optimized Depth Buffer Clear and/or Stencil Buffer Clear."<br>
+ *<br>
+ * \todo Enable Stencil Buffer-only clears<br>
+ */<br>
+void<br>
+genX(cmd_buffer_do_hz_op)(<wbr>struct anv_cmd_buffer *cmd_buffer,<br>
+                          enum blorp_hiz_op op)<br>
+{<br>
+   struct anv_cmd_state *cmd_state = &cmd_buffer->state;<br>
+   const struct anv_image_view *iview =<br>
+      anv_cmd_buffer_get_depth_<wbr>stencil_view(cmd_buffer);<br>
+<br>
+   if (iview == NULL || !anv_image_has_hiz(iview-><wbr>image))<br>
+      return;<br>
+<br>
+   const uint32_t ds = cmd_state->subpass->depth_<wbr>stencil_attachment;<br>
+   const bool full_surface_op =<br>
+             cmd_state->render_area.extent.<wbr>width == iview->extent.width &&<br>
+             cmd_state->render_area.extent.<wbr>height == iview->extent.height;<br></blockquote><div><br></div><div>It's probably a bit redundant, but we might as well check render_area.offset == 0.  I realize that, from API requirements, if the extents match then offset must be 0, but it's not incredibly obvious and the check won't hurt that much.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+   /* Validate that we can perform the HZ operation and that it's necessary. */<br>
+   switch (op) {<br>
+   case BLORP_HIZ_OP_DEPTH_CLEAR:<br>
+      if (cmd_buffer->state.pass-><wbr>attachments[ds].load_op !=<br>
+          VK_ATTACHMENT_LOAD_OP_CLEAR)<br>
+         return;<br>
+<br>
+      /* Apply alignment restrictions. Despite the BDW PRM mentioning this is<br>
+       * only needed for a depth buffer surface type of D16_UNORM, testing<br>
+       * showed it to be necessary for other depth formats as well<br>
+       * (e.g., D32_FLOAT).<br>
+       */<br>
+      if (!full_surface_op) {<br>
+<br>
+         struct isl_extent2d px_dim;<br></blockquote><div><br></div><div>Would it be better to call this hiz_block_size_px?  That follows the ISL naming convention a bit better.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+#if GEN_GEN == 8<br></blockquote><div><br></div><div>Mind making this <= 8?  I know it's in a gen8+ file, but <= 8 makes it clear that the else case is >= 9 and not != 8.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+         /* Pre-SKL, HiZ has an 8x4 sample block. As the number of samples<br>
+          * increases, the number of pixels representable by this block<br>
+          * decreases by a factor of the sample dimensions. Sample dimensions<br>
+          * scale following the MSAA interleaved pattern.<br>
+          *<br>
+          * Sample|Sample|Pixel<br>
+          * Count |Dim   |Dim<br>
+          * ===================<br>
+          *    1  | 1x1  | 8x4<br>
+          *    2  | 2x1  | 4x4<br>
+          *    4  | 2x2  | 4x2<br>
+          *    8  | 4x2  | 2x2<br>
+          *   16  | 4x4  | 2x1<br>
+          *<br>
+          * Table: Pixel Dimensions in a HiZ Sample Block Pre-SKL<br>
+          */<br>
+         const struct isl_extent2d sa_dim =<br></blockquote><div><br></div><div>Maybe call this px_size_sa?<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+            isl_get_interleaved_msaa_px_<wbr>size_sa(iview->image->samples)<wbr>;<br>
+         px_dim.w = 8 / sa_dim.w;<br>
+         px_dim.h = 4 / sa_dim.h;<br>
+#else<br>
+         /* SKL+, the sample block becomes a "pixel block" so the expected<br>
+          * pixel dimension is a constant 8x4 px for all sample counts.<br>
+          */<br>
+         px_dim = (struct isl_extent2d) { .w = 8, .h = 4};<br>
+#endif<br>
+<br>
+         /* Fast depth clears clear an entire sample block at a time. As a<br>
+          * result, the rectangle must be aligned to the pixel dimensions of<br>
+          * a sample block for a successful operation.<br>
+          */<br>
+         if (cmd_state->render_area.<wbr>offset.x % px_dim.w ||<br>
+             cmd_state->render_area.offset.<wbr>y % px_dim.h ||<br>
+             cmd_state->render_area.extent.<wbr>width % px_dim.w ||<br>
+             cmd_state->render_area.extent.<wbr>height % px_dim.h)<br>
+            return;<br></blockquote><div><br></div><div>What about the case where the offset is aligned and we go to the far right or bottom of the image but the image size is not aligned?  I think this case should be possible too.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+      }<br>
+      break;<br>
+   case BLORP_HIZ_OP_DEPTH_RESOLVE:<br>
+      if (cmd_buffer->state.pass-><wbr>attachments[ds].store_op !=<br>
+          VK_ATTACHMENT_STORE_OP_STORE)<br>
+         return;<br>
+      break;<br>
+   case BLORP_HIZ_OP_HIZ_RESOLVE:<br>
+      if (cmd_buffer->state.pass-><wbr>attachments[ds].load_op !=<br>
+          VK_ATTACHMENT_LOAD_OP_LOAD)<br>
+         return;<br>
+      break;<br>
+   case BLORP_HIZ_OP_NONE:<br>
+      unreachable("Invalid HiZ OP");<br>
+      break;<br>
+   }<br>
+<br>
+   anv_batch_emit(&cmd_buffer-><wbr>batch, GENX(3DSTATE_WM_HZ_OP), hzp) {<br>
+      switch (op) {<br>
+      case BLORP_HIZ_OP_DEPTH_CLEAR:<br>
+         hzp.StencilBufferClearEnable = VK_IMAGE_ASPECT_STENCIL_BIT &<br>
+                            cmd_state->attachments[ds].<wbr>pending_clear_aspects;<br>
+         hzp.DepthBufferClearEnable = VK_IMAGE_ASPECT_DEPTH_BIT &<br>
+                            cmd_state->attachments[ds].<wbr>pending_clear_aspects;<br>
+         hzp.<wbr>FullSurfaceDepthandStencilClea<wbr>r = full_surface_op;<br>
+         hzp.StencilClearValue = 0xff &<br>
+                   cmd_state->attachments[ds].<wbr>clear_value.depthStencil.<wbr>stencil;<br></blockquote><div><br></div><div>nit: I would find having "clear_value & 0xff" and all on the second line more readable.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+         /* Mark aspects as cleared */<br>
+         cmd_state->attachments[ds].<wbr>pending_clear_aspects = 0;<br>
+         break;<br>
+      case BLORP_HIZ_OP_DEPTH_RESOLVE:<br>
+         hzp.DepthBufferResolveEnable = true;<br>
+         break;<br>
+      case BLORP_HIZ_OP_HIZ_RESOLVE:<br>
+         hzp.<wbr>HierarchicalDepthBufferResolve<wbr>Enable = true;<br>
+         break;<br>
+      case BLORP_HIZ_OP_NONE:<br>
+         unreachable("Invalid HiZ OP");<br>
+         break;<br>
+      }<br>
+<br>
+      if (op != BLORP_HIZ_OP_DEPTH_CLEAR) {<br>
+         /* The Optimized HiZ resolve rectangle must be the size of the full RT<br>
+          * and aligned to 8x4. The non-optimized Depth resolve rectangle must<br>
+          * be the size of the full RT. The same alignment is assumed to be<br>
+          * required.<br>
+          *<br>
+          * TODO:<br>
+          * Consider changing halign of non-D16 depth formats to 8 as mip 2 may<br>
+          * get clobbered.<br></blockquote><div><br></div><div>This shouldn't be needed on BDW+<br><br></div><div>Most of my comments were just nits.<br><br></div><div>Reviewed-by: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>><br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+          */<br>
+         hzp.ClearRectangleXMax = align_u32(iview->extent.width, 8);<br>
+         hzp.ClearRectangleYMax = align_u32(iview->extent.<wbr>height, 4); <br></blockquote><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+      } else {<br>
+         /* This clear rectangle is aligned */<br>
+         hzp.ClearRectangleXMin = cmd_state->render_area.offset.<wbr>x;<br>
+         hzp.ClearRectangleYMin = cmd_state->render_area.offset.<wbr>y;<br>
+         hzp.ClearRectangleXMax = cmd_state->render_area.offset.<wbr>x +<br>
+                                  cmd_state->render_area.extent.<wbr>width;<br>
+         hzp.ClearRectangleYMax = cmd_state->render_area.offset.<wbr>y +<br>
+                                  cmd_state->render_area.extent.<wbr>height;<br>
+      }<br>
+<br>
+<br>
+      /* Due to a hardware issue, this bit MBZ */<br>
+      hzp.ScissorRectangleEnable = false;<br>
+      hzp.NumberofMultisamples = ffs(iview->image->samples) - 1;<br>
+      hzp.SampleMask = 0xFFFF;<br>
+   }<br>
+<br>
+   anv_batch_emit(&cmd_buffer-><wbr>batch, GENX(PIPE_CONTROL), pc) {<br>
+      pc.PostSyncOperation = WriteImmediateData;<br>
+      pc.Address =<br>
+         (struct anv_address){ &cmd_buffer->device-><wbr>workaround_bo, 0 };<br>
+   }<br>
+<br>
+   anv_batch_emit(&cmd_buffer-><wbr>batch, GENX(3DSTATE_WM_HZ_OP), hzp);<br>
+<br>
+   /* TODO:<br>
+    * Determine if a DepthCacheFlushEnable and DepthStallEnable is really<br>
+    * necessary for non-full_surface_op clears. Performing a HZ op without<br>
+    * this pipecontrol showed no impact on rendering results.<br>
+    */<br>
+   if (!full_surface_op && op == BLORP_HIZ_OP_DEPTH_CLEAR) {<br>
+      anv_batch_emit(&cmd_buffer-><wbr>batch, GENX(PIPE_CONTROL), pc) {<br>
+         pc.DepthStallEnable = true;<br>
+         pc.DepthCacheFlushEnable = true;<br>
+      }<br>
+   }<br>
+}<br>
+<br>
 void genX(CmdSetEvent)(<br>
     VkCommandBuffer                             commandBuffer,<br>
     VkEvent                                     _event,<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.10.0<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>