<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Wed, Oct 19, 2016 at 10:47 AM, Nanley Chery <span dir="ltr"><<a href="mailto:nanleychery@gmail.com" target="_blank">nanleychery@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">The HZ sequence modifies less state than the blorp path and requires<br>
less CPU time to generate the necessary packets.<br>
<span class=""><br>
Signed-off-by: Nanley Chery <<a href="mailto:nanley.g.chery@intel.com">nanley.g.chery@intel.com</a>><br>
---<br>
<br>
</span>v2: Don't combine the depth alignment if statements<br>
<br>
 src/intel/vulkan/gen8_cmd_<wbr>buffer.c | 46 +++++++++++++++++++++++++++---<wbr>--------<br>
 1 file changed, 33 insertions(+), 13 deletions(-)<br>
<br>
diff --git a/src/intel/vulkan/gen8_cmd_<wbr>buffer.c b/src/intel/vulkan/gen8_cmd_<wbr>buffer.c<br>
index 204542e..d4410d4 100644<br>
--- a/src/intel/vulkan/gen8_cmd_<wbr>buffer.c<br>
+++ b/src/intel/vulkan/gen8_cmd_<wbr>buffer.c<br>
@@ -350,15 +350,19 @@ genX(cmd_buffer_emit_hz_op)(<wbr>struct anv_cmd_buffer *cmd_buffer,<br>
       assert(cmd_state->render_area.<wbr>offset.x == 0 &&<br>
              cmd_state->render_area.offset.<wbr>y == 0);<br>
<span class=""><br>
+   bool depth_clear;<br>
+   bool stc_clear;<br></span></blockquote><div><br></div><div>Mind calling this stencil_clear instead of the abbreviation.  While stc is fairly obvious, it's not an abbreviation we usually use.  Yes, it's used in the PRM in the docs for WM_HZ_OP, but this is the first time I'd seen it.  With that changed, all three are<br><br></div><div>Reviewed-by: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>><br><br></div><div>Sorry it took so long. :-/<br><br></div><div>Feel free to ignore the comment below for now.  I'm mostly just pointing it out.  (Sorry if I've pointed it out before.)<br><br></div><div>--Jason<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class="">
+<br>
    /* This variable corresponds to the Pixel Dim column in the table below */<br>
    struct isl_extent2d px_dim;<br></span></blockquote><div><br>Pedanticism: I'd really rather we call this align_px or something because that's really what it is.  Yes, it comes from the size of a HiZ block but the way we use it is as an alignment.  We could split the difference and call it block_size_px or something.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class="">
</span><span class="">    /* Validate that we can perform the HZ operation and that it's necessary. */<br>
    switch (op) {<br>
    case BLORP_HIZ_OP_DEPTH_CLEAR:<br>
-      if (cmd_buffer->state.pass-><wbr>attachments[ds].load_op !=<br>
-          VK_ATTACHMENT_LOAD_OP_CLEAR)<br>
-         return;<br>
</span>+      stc_clear = VK_IMAGE_ASPECT_STENCIL_BIT &<br>
+                  cmd_state->attachments[ds].<wbr>pending_clear_aspects;<br>
+      depth_clear = VK_IMAGE_ASPECT_DEPTH_BIT &<br>
+                    cmd_state->attachments[ds].<wbr>pending_clear_aspects;<br>
<br>
       /* Apply alignment restrictions. Despite the BDW PRM mentioning this is<br>
        * only needed for a depth buffer surface type of D16_UNORM, testing<br>
@@ -396,7 +400,7 @@ genX(cmd_buffer_emit_hz_op)(<wbr>struct anv_cmd_buffer *cmd_buffer,<br>
<span class="">       px_dim = (struct isl_extent2d) { .w = 8, .h = 4};<br>
 #endif<br>
<br>
</span>-      if (!full_surface_op) {<br>
+      if (depth_clear && !full_surface_op) {<br>
<span class="">          /* Fast depth clears clear an entire sample block at a time. As a<br>
           * result, the rectangle must be aligned to the pixel dimensions of<br>
           * a sample block for a successful operation.<br>
</span>@@ -409,15 +413,25 @@ genX(cmd_buffer_emit_hz_op)(<wbr>struct anv_cmd_buffer *cmd_buffer,<br>
<span class="">           */<br>
          if (cmd_state->render_area.<wbr>offset.x % px_dim.w ||<br>
</span><span class="">              cmd_state->render_area.offset.<wbr>y % px_dim.h)<br>
-            return;<br>
</span>+            depth_clear = false;<br>
          if (cmd_state->render_area.<wbr>offset.x +<br>
<span class="">              cmd_state->render_area.extent.<wbr>width != iview->extent.width &&<br>
</span><span class="">              cmd_state->render_area.extent.<wbr>width % px_dim.w)<br>
-            return;<br>
</span>+            depth_clear = false;<br>
          if (cmd_state->render_area.<wbr>offset.y +<br>
<span class="">              cmd_state->render_area.extent.<wbr>height != iview->extent.height &&<br>
</span>              cmd_state->render_area.extent.<wbr>height % px_dim.h)<br>
<span class="">+            depth_clear = false;<br>
+      }<br>
+<br>
+      if (!depth_clear) {<br>
+         if (stc_clear) {<br>
+            /* Stencil has no alignment requirements */<br>
+            px_dim = (struct isl_extent2d) { .w = 1, .h = 1};<br>
+         } else {<br>
+            /* Nothing to clear */<br>
             return;<br>
+         }</span> <br></blockquote><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class="">
       }<br>
</span>       break;<br>
    case BLORP_HIZ_OP_DEPTH_RESOLVE:<br>
@@ -448,10 +462,8 @@ genX(cmd_buffer_emit_hz_op)(<wbr>struct anv_cmd_buffer *cmd_buffer,<br>
<span class="">    anv_batch_emit(&cmd_buffer-><wbr>batch, GENX(3DSTATE_WM_HZ_OP), hzp) {<br>
       switch (op) {<br>
       case BLORP_HIZ_OP_DEPTH_CLEAR:<br>
-         hzp.StencilBufferClearEnable = VK_IMAGE_ASPECT_STENCIL_BIT &<br>
-                            cmd_state->attachments[ds].<wbr>pending_clear_aspects;<br>
-         hzp.DepthBufferClearEnable = VK_IMAGE_ASPECT_DEPTH_BIT &<br>
-                            cmd_state->attachments[ds].<wbr>pending_clear_aspects;<br>
+         hzp.StencilBufferClearEnable = stc_clear;<br>
+         hzp.DepthBufferClearEnable = depth_clear;<br>
</span>          hzp.<wbr>FullSurfaceDepthandStencilClea<wbr>r = full_surface_op;<br>
<span class="">          hzp.StencilClearValue =<br>
             cmd_state->attachments[ds].<wbr>clear_value.depthStencil.<wbr>stencil & 0xff;<br>
</span>@@ -503,16 +515,24 @@ genX(cmd_buffer_emit_hz_op)(<wbr>struct anv_cmd_buffer *cmd_buffer,<br>
<span class=""><br>
    anv_batch_emit(&cmd_buffer-><wbr>batch, GENX(3DSTATE_WM_HZ_OP), hzp);<br>
<br>
+   /* Perform clear specific flushing and state updates */<br>
    if (op == BLORP_HIZ_OP_DEPTH_CLEAR) {<br>
</span>-      if (!full_surface_op) {<br>
+      if (depth_clear && !full_surface_op) {<br>
<div class="HOEnZb"><div class="h5">          anv_batch_emit(&cmd_buffer-><wbr>batch, GENX(PIPE_CONTROL), pc) {<br>
             pc.DepthStallEnable = true;<br>
             pc.DepthCacheFlushEnable = true;<br>
          }<br>
       }<br>
<br>
-      /* Mark aspects as cleared */<br>
-      cmd_state->attachments[ds].<wbr>pending_clear_aspects = 0;<br>
+      /* Remove cleared aspects from the pending mask */<br>
+      if (stc_clear) {<br>
+         cmd_state->attachments[ds].<wbr>pending_clear_aspects &=<br>
+            ~VK_IMAGE_ASPECT_STENCIL_BIT;<br>
+      }<br>
+      if (depth_clear) {<br>
+         cmd_state->attachments[ds].<wbr>pending_clear_aspects &=<br>
+            ~VK_IMAGE_ASPECT_DEPTH_BIT;<br>
+      }<br>
    }<br>
 }<br>
<br>
--<br>
2.10.0<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</div></div></blockquote></div><br></div></div>