[Mesa-dev] [PATCH 09/12] anv/cmd_buffer: Add code for performing HZ operations
Nanley Chery
nanleychery at gmail.com
Thu Sep 1 03:29:55 UTC 2016
From: Jason Ekstrand <jason.ekstrand at intel.com>
Nanley Chery:
(rebase)
- Resolve conflicts with the new anv_batch_emit macro
(amend)
- Update commit title
- Combine all HZ operations into one function
- Add code for performing HiZ resolve operations
- Add proper stencil and multisampling support
- Set the proper clear rectangles
- Add required cases for aborting an HZ operation
Signed-off-by: Nanley Chery <nanley.g.chery at intel.com>
---
src/intel/vulkan/anv_genX.h | 3 +
src/intel/vulkan/anv_private.h | 6 ++
src/intel/vulkan/gen7_cmd_buffer.c | 5 ++
src/intel/vulkan/gen8_cmd_buffer.c | 124 +++++++++++++++++++++++++++++++++++++
4 files changed, 138 insertions(+)
diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index cf5a232..16de990 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -54,6 +54,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer);
+void genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer,
+ enum anv_hz_op op);
+
VkResult
genX(graphics_pipeline_create)(VkDevice _device,
struct anv_pipeline_cache *cache,
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 5718a19..40325fd 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1401,6 +1401,12 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);
void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
+enum anv_hz_op {
+ ANV_HZ_OP_CLEAR,
+ ANV_HZ_OP_HIZ_RESOLVE,
+ ANV_HZ_OP_DEPTH_RESOLVE,
+};
+
struct anv_fence {
struct anv_bo bo;
struct drm_i915_gem_execbuffer2 execbuf;
diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c
index 61778aa..a057a04 100644
--- a/src/intel/vulkan/gen7_cmd_buffer.c
+++ b/src/intel/vulkan/gen7_cmd_buffer.c
@@ -323,6 +323,11 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer->state.dirty = 0;
}
+void
+genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, enum anv_hz_op op)
+{
+}
+
void genX(CmdSetEvent)(
VkCommandBuffer commandBuffer,
VkEvent event,
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c
index e22b4e2..4f27350 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -399,6 +399,130 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}
+
+/**
+ * Emit the HZ_OP packet in the sequence specified by the BDW PRM section
+ * entitled: "Optimized Depth Buffer Clear and/or Stencil Buffer Clear."
+ */
+void
+genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, enum anv_hz_op op)
+{
+ struct anv_cmd_state *cmd_state = &cmd_buffer->state;
+ const struct anv_image_view *iview =
+ anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
+
+ if (iview == NULL || !anv_image_has_hiz(iview->image))
+ return;
+
+ const uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
+ const bool full_surface_op =
+ cmd_state->render_area.extent.width == iview->extent.width &&
+ cmd_state->render_area.extent.height == iview->extent.height;
+
+ /* Validate that we can perform the HZ operation and that it's necessary. */
+ switch (op) {
+ case ANV_HZ_OP_CLEAR:
+ if (cmd_buffer->state.pass->attachments[ds].load_op !=
+ VK_ATTACHMENT_LOAD_OP_CLEAR)
+ return;
+
+ /* Apply alignment restrictions. For a sample count of 16, the formulas
+ * reduce to identity and indicate that no alignment is required.
+ */
+ if (!full_surface_op && iview->image->samples < 16) {
+ uint32_t align_w = 1;
+ uint32_t align_h = 1;
+
+ if (iview->image->samples > 1) {
+ isl_msaa_interleaved_scale_px_to_sa(iview->image->samples,
+ &align_w, &align_h);
+ }
+
+ align_w = 8 / align_w;
+ align_h = 4 / align_h;
+
+ if (cmd_state->render_area.offset.x % align_w ||
+ cmd_state->render_area.offset.y % align_h ||
+ cmd_state->render_area.extent.width % align_w ||
+ cmd_state->render_area.extent.height % align_h)
+ return;
+ }
+ break;
+ case ANV_HZ_OP_DEPTH_RESOLVE:
+ if (cmd_buffer->state.pass->attachments[ds].store_op !=
+ VK_ATTACHMENT_STORE_OP_STORE)
+ return;
+ break;
+ case ANV_HZ_OP_HIZ_RESOLVE:
+ if (cmd_buffer->state.pass->attachments[ds].load_op !=
+ VK_ATTACHMENT_LOAD_OP_LOAD)
+ return;
+ break;
+ }
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_HZ_OP), hzp) {
+ switch (op) {
+ case ANV_HZ_OP_CLEAR:
+ hzp.StencilBufferClearEnable = VK_IMAGE_ASPECT_STENCIL_BIT &
+ cmd_state->attachments[ds].pending_clear_aspects;
+ hzp.DepthBufferClearEnable = VK_IMAGE_ASPECT_DEPTH_BIT &
+ cmd_state->attachments[ds].pending_clear_aspects;
+ hzp.FullSurfaceDepthandStencilClear = full_surface_op;
+ hzp.StencilClearValue = 0xff &
+ cmd_state->attachments[ds].clear_value.depthStencil.stencil;
+
+ /* Mark aspects as cleared */
+ cmd_state->attachments[ds].pending_clear_aspects = 0;
+ break;
+ case ANV_HZ_OP_DEPTH_RESOLVE:
+ hzp.DepthBufferResolveEnable = true;
+ break;
+ case ANV_HZ_OP_HIZ_RESOLVE:
+ hzp.HierarchicalDepthBufferResolveEnable = true;
+ break;
+ }
+
+ /* The depth resolve rectangle must match the size of the previous clear
+ * rectangle.
+ *
+ * The HiZ resolve rectangle is specified as needing to be the
+ * size of the full RT and aligned to 8x4, these requirements are in
+ * conflict if the RT extent is not a multiple of 8x4. Testing shows
+ * that setting the rectangle to match the render area works just fine.
+ *
+ * In a manner similar to i965, we'd like to diverge from the PRMs here
+ * to reduce the number of HiZ blocks written to.
+ */
+ hzp.ClearRectangleXMin = anv_minify(cmd_state->render_area.offset.x,
+ iview->base_mip);
+ hzp.ClearRectangleYMin = anv_minify(cmd_state->render_area.offset.y,
+ iview->base_mip);
+ hzp.ClearRectangleXMax = anv_minify(cmd_state->render_area.offset.x +
+ cmd_state->render_area.extent.width,
+ iview->base_mip);
+ hzp.ClearRectangleYMax = anv_minify(cmd_state->render_area.offset.y +
+ cmd_state->render_area.extent.height,
+ iview->base_mip);
+
+ /* Due to a hardware issue, this bit MBZ */
+ hzp.ScissorRectangleEnable = false;
+ hzp.NumberofMultisamples = ffs(iview->image->samples) - 1;
+ hzp.SampleMask = 0xFFFF;
+ }
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ pc.PostSyncOperation = WriteImmediateData;
+ pc.Address =
+ (struct anv_address){ &cmd_buffer->device->workaround_bo, 0 };
+ }
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_HZ_OP), hzp);
+
+ /* TODO: Determine if a DepthFlush and DepthStall is really necessary for
+ * non-full_surface_op clears.
+ */
+}
+
void genX(CmdSetEvent)(
VkCommandBuffer commandBuffer,
VkEvent _event,
--
2.9.3
More information about the mesa-dev
mailing list