[Mesa-dev] [PATCH V2 08/11] anv/cmd_buffer: Add code for performing HZ operations

Nanley Chery nanleychery at gmail.com
Tue Sep 27 00:10:22 UTC 2016


Create a function that performs one of three HiZ operations -
depth/stencil clears, HiZ resolve, and depth resolves.

Signed-off-by: Nanley Chery <nanley.g.chery at intel.com>

---

v2. Add documentation
    Fix the alignment check
    Don't minify clear rectangle (Jason)
    Use blorp enums (Jason)
    Enable depth stalls and flushes
    Use full RT rectangle for resolve ops
    Add stencil clear todo

 src/intel/vulkan/anv_genX.h        |   3 +
 src/intel/vulkan/gen7_cmd_buffer.c |   6 ++
 src/intel/vulkan/gen8_cmd_buffer.c | 167 +++++++++++++++++++++++++++++++++++++
 3 files changed, 176 insertions(+)

diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index 02e79c2..ad3bec9 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -58,6 +58,9 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
                      unsigned vs_entry_size, unsigned gs_entry_size,
                      const struct gen_l3_config *l3_config);
 
+void genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer,
+                               enum blorp_hiz_op op);
+
 VkResult
 genX(graphics_pipeline_create)(VkDevice _device,
                                struct anv_pipeline_cache *cache,
diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c
index b627ef0..78b5ac7 100644
--- a/src/intel/vulkan/gen7_cmd_buffer.c
+++ b/src/intel/vulkan/gen7_cmd_buffer.c
@@ -323,6 +323,12 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
    cmd_buffer->state.dirty = 0;
 }
 
+void
+genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer,
+                          enum blorp_hiz_op op)
+{
+}
+
 void genX(CmdSetEvent)(
     VkCommandBuffer                             commandBuffer,
     VkEvent                                     event,
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c
index 7058608..a13413c 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -399,6 +399,173 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
 }
 
+
+/**
+ * Emit the HZ_OP packet in the sequence specified by the BDW PRM section
+ * entitled: "Optimized Depth Buffer Clear and/or Stencil Buffer Clear."
+ *
+ * \todo Enable Stencil Buffer-only clears
+ */
+void
+genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer,
+                          enum blorp_hiz_op op)
+{
+   struct anv_cmd_state *cmd_state = &cmd_buffer->state;
+   const struct anv_image_view *iview =
+      anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
+
+   if (iview == NULL || !anv_image_has_hiz(iview->image))
+      return;
+
+   const uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
+   const bool full_surface_op =
+             cmd_state->render_area.extent.width == iview->extent.width &&
+             cmd_state->render_area.extent.height == iview->extent.height;
+
+   /* Validate that we can perform the HZ operation and that it's necessary. */
+   switch (op) {
+   case BLORP_HIZ_OP_DEPTH_CLEAR:
+      if (cmd_buffer->state.pass->attachments[ds].load_op !=
+          VK_ATTACHMENT_LOAD_OP_CLEAR)
+         return;
+
+      /* Apply alignment restrictions. Despite the BDW PRM mentioning this is
+       * only needed for a depth buffer surface type of D16_UNORM, testing
+       * showed it to be necessary for other depth formats as well
+       * (e.g., D32_FLOAT).
+       */
+      if (!full_surface_op) {
+
+         struct isl_extent2d px_dim;
+#if GEN_GEN == 8
+         /* Pre-SKL, HiZ has an 8x4 sample block. As the number of samples
+          * increases, the number of pixels representable by this block
+          * decreases by a factor of the sample dimensions. Sample dimensions
+          * scale following the MSAA interleaved pattern.
+          *
+          * Sample|Sample|Pixel
+          * Count |Dim   |Dim
+          * ===================
+          *    1  | 1x1  | 8x4
+          *    2  | 2x1  | 4x4
+          *    4  | 2x2  | 4x2
+          *    8  | 4x2  | 2x2
+          *   16  | 4x4  | 2x1
+          *
+          * Table: Pixel Dimensions in a HiZ Sample Block Pre-SKL
+          */
+         const struct isl_extent2d sa_dim =
+            isl_get_interleaved_msaa_px_size_sa(iview->image->samples);
+         px_dim.w = 8 / sa_dim.w;
+         px_dim.h = 4 / sa_dim.h;
+#else
+         /* SKL+, the sample block becomes a "pixel block" so the expected
+          * pixel dimension is a constant 8x4 px for all sample counts.
+          */
+         px_dim = (struct isl_extent2d) { .w = 8, .h = 4};
+#endif
+
+         /* Fast depth clears clear an entire sample block at a time. As a
+          * result, the rectangle must be aligned to the pixel dimensions of
+          * a sample block for a successful operation.
+          */
+         if (cmd_state->render_area.offset.x % px_dim.w ||
+             cmd_state->render_area.offset.y % px_dim.h ||
+             cmd_state->render_area.extent.width % px_dim.w ||
+             cmd_state->render_area.extent.height % px_dim.h)
+            return;
+      }
+      break;
+   case BLORP_HIZ_OP_DEPTH_RESOLVE:
+      if (cmd_buffer->state.pass->attachments[ds].store_op !=
+          VK_ATTACHMENT_STORE_OP_STORE)
+         return;
+      break;
+   case BLORP_HIZ_OP_HIZ_RESOLVE:
+      if (cmd_buffer->state.pass->attachments[ds].load_op !=
+          VK_ATTACHMENT_LOAD_OP_LOAD)
+         return;
+      break;
+   case BLORP_HIZ_OP_NONE:
+      unreachable("Invalid HiZ OP");
+      break;
+   }
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_HZ_OP), hzp) {
+      switch (op) {
+      case BLORP_HIZ_OP_DEPTH_CLEAR:
+         hzp.StencilBufferClearEnable = VK_IMAGE_ASPECT_STENCIL_BIT &
+                            cmd_state->attachments[ds].pending_clear_aspects;
+         hzp.DepthBufferClearEnable = VK_IMAGE_ASPECT_DEPTH_BIT &
+                            cmd_state->attachments[ds].pending_clear_aspects;
+         hzp.FullSurfaceDepthandStencilClear = full_surface_op;
+         hzp.StencilClearValue = 0xff &
+                   cmd_state->attachments[ds].clear_value.depthStencil.stencil;
+
+         /* Mark aspects as cleared */
+         cmd_state->attachments[ds].pending_clear_aspects = 0;
+         break;
+      case BLORP_HIZ_OP_DEPTH_RESOLVE:
+         hzp.DepthBufferResolveEnable = true;
+         break;
+      case BLORP_HIZ_OP_HIZ_RESOLVE:
+         hzp.HierarchicalDepthBufferResolveEnable = true;
+         break;
+      case BLORP_HIZ_OP_NONE:
+         unreachable("Invalid HiZ OP");
+         break;
+      }
+
+      if (op != BLORP_HIZ_OP_DEPTH_CLEAR) {
+         /* The Optimized HiZ resolve rectangle must be the size of the full RT
+          * and aligned to 8x4. The non-optimized Depth resolve rectangle must
+          * be the size of the full RT. The same alignment is assumed to be
+          * required.
+          *
+          * TODO:
+          * Consider changing halign of non-D16 depth formats to 8 as mip 2 may
+          * get clobbered.
+          */
+         hzp.ClearRectangleXMax = align_u32(iview->extent.width, 8);
+         hzp.ClearRectangleYMax = align_u32(iview->extent.height, 4);
+      } else {
+         /* This clear rectangle is aligned */
+         hzp.ClearRectangleXMin = cmd_state->render_area.offset.x;
+         hzp.ClearRectangleYMin = cmd_state->render_area.offset.y;
+         hzp.ClearRectangleXMax = cmd_state->render_area.offset.x +
+                                  cmd_state->render_area.extent.width;
+         hzp.ClearRectangleYMax = cmd_state->render_area.offset.y +
+                                  cmd_state->render_area.extent.height;
+      }
+
+
+      /* Due to a hardware issue, this bit MBZ */
+      hzp.ScissorRectangleEnable = false;
+      hzp.NumberofMultisamples = ffs(iview->image->samples) - 1;
+      hzp.SampleMask = 0xFFFF;
+   }
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+      pc.PostSyncOperation = WriteImmediateData;
+      pc.Address =
+         (struct anv_address){ &cmd_buffer->device->workaround_bo, 0 };
+   }
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_HZ_OP), hzp);
+
+   /* TODO:
+    * Determine if a DepthCacheFlushEnable and DepthStallEnable is really
+    * necessary for non-full_surface_op clears. Performing a HZ op without
+    * this pipecontrol showed no impact on rendering results.
+    */
+   if (!full_surface_op && op == BLORP_HIZ_OP_DEPTH_CLEAR) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+         pc.DepthStallEnable = true;
+         pc.DepthCacheFlushEnable = true;
+      }
+   }
+}
+
 void genX(CmdSetEvent)(
     VkCommandBuffer                             commandBuffer,
     VkEvent                                     _event,
-- 
2.10.0



More information about the mesa-dev mailing list