[Mesa-dev] [PATCH 09/12] anv/cmd_buffer: Add code for performing HZ operations
Nanley Chery
nanleychery at gmail.com
Fri Sep 2 21:56:47 UTC 2016
On Wed, Aug 31, 2016 at 08:29:55PM -0700, Nanley Chery wrote:
> From: Jason Ekstrand <jason.ekstrand at intel.com>
>
> Nanley Chery:
> (rebase)
> - Resolve conflicts with the new anv_batch_emit macro
> (amend)
> - Update commit title
> - Combine all HZ operations into one function
> - Add code for performing HiZ resolve operations
> - Add proper stencil and multisampling support
> - Set the proper clear rectangles
> - Add required cases for aborting an HZ operation
>
> Signed-off-by: Nanley Chery <nanley.g.chery at intel.com>
> ---
> src/intel/vulkan/anv_genX.h | 3 +
> src/intel/vulkan/anv_private.h | 6 ++
> src/intel/vulkan/gen7_cmd_buffer.c | 5 ++
> src/intel/vulkan/gen8_cmd_buffer.c | 124 +++++++++++++++++++++++++++++++++++++
> 4 files changed, 138 insertions(+)
>
> diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
> index cf5a232..16de990 100644
> --- a/src/intel/vulkan/anv_genX.h
> +++ b/src/intel/vulkan/anv_genX.h
> @@ -54,6 +54,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer);
>
> void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer);
>
> +void genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer,
> + enum anv_hz_op op);
> +
> VkResult
> genX(graphics_pipeline_create)(VkDevice _device,
> struct anv_pipeline_cache *cache,
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index 5718a19..40325fd 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -1401,6 +1401,12 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);
>
> void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
>
> +enum anv_hz_op {
> + ANV_HZ_OP_CLEAR,
> + ANV_HZ_OP_HIZ_RESOLVE,
> + ANV_HZ_OP_DEPTH_RESOLVE,
> +};
> +
> struct anv_fence {
> struct anv_bo bo;
> struct drm_i915_gem_execbuffer2 execbuf;
> diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c
> index 61778aa..a057a04 100644
> --- a/src/intel/vulkan/gen7_cmd_buffer.c
> +++ b/src/intel/vulkan/gen7_cmd_buffer.c
> @@ -323,6 +323,11 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
> cmd_buffer->state.dirty = 0;
> }
>
> +void
> +genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, enum anv_hz_op op)
> +{
> +}
> +
> void genX(CmdSetEvent)(
> VkCommandBuffer commandBuffer,
> VkEvent event,
> diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c
> index e22b4e2..4f27350 100644
> --- a/src/intel/vulkan/gen8_cmd_buffer.c
> +++ b/src/intel/vulkan/gen8_cmd_buffer.c
> @@ -399,6 +399,130 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
> genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
> }
>
> +
> +/**
> + * Emit the HZ_OP packet in the sequence specified by the BDW PRM section
> + * entitled: "Optimized Depth Buffer Clear and/or Stencil Buffer Clear."
> + */
> +void
> +genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, enum anv_hz_op op)
> +{
> + struct anv_cmd_state *cmd_state = &cmd_buffer->state;
> + const struct anv_image_view *iview =
> + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
> +
> + if (iview == NULL || !anv_image_has_hiz(iview->image))
> + return;
> +
> + const uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
> + const bool full_surface_op =
> + cmd_state->render_area.extent.width == iview->extent.width &&
> + cmd_state->render_area.extent.height == iview->extent.height;
> +
> + /* Validate that we can perform the HZ operation and that it's necessary. */
> + switch (op) {
> + case ANV_HZ_OP_CLEAR:
> + if (cmd_buffer->state.pass->attachments[ds].load_op !=
> + VK_ATTACHMENT_LOAD_OP_CLEAR)
> + return;
> +
> + /* Apply alignment restrictions. For a sample count of 16, the formulas
> + * reduce to identity and indicate that no alignment is required.
> + */
> + if (!full_surface_op && iview->image->samples < 16) {
There may be a bug here. I plan to remove the samples < 16 condition because
when samples == 16, I'd expect the alignment will become 2x1, not 1x1. I also
need to take a second look at isl_msaa_interleaved_scale_px_to_sa to ensure it
returns the expected values.
- Nanley
> + uint32_t align_w = 1;
> + uint32_t align_h = 1;
> +
> + if (iview->image->samples > 1) {
> + isl_msaa_interleaved_scale_px_to_sa(iview->image->samples,
> + &align_w, &align_h);
> + }
> +
> + align_w = 8 / align_w;
> + align_h = 4 / align_h;
> +
> + if (cmd_state->render_area.offset.x % align_w ||
> + cmd_state->render_area.offset.y % align_h ||
> + cmd_state->render_area.extent.width % align_w ||
> + cmd_state->render_area.extent.height % align_h)
> + return;
> + }
> + break;
> + case ANV_HZ_OP_DEPTH_RESOLVE:
> + if (cmd_buffer->state.pass->attachments[ds].store_op !=
> + VK_ATTACHMENT_STORE_OP_STORE)
> + return;
> + break;
> + case ANV_HZ_OP_HIZ_RESOLVE:
> + if (cmd_buffer->state.pass->attachments[ds].load_op !=
> + VK_ATTACHMENT_LOAD_OP_LOAD)
> + return;
> + break;
> + }
> +
> + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_HZ_OP), hzp) {
> + switch (op) {
> + case ANV_HZ_OP_CLEAR:
> + hzp.StencilBufferClearEnable = VK_IMAGE_ASPECT_STENCIL_BIT &
> + cmd_state->attachments[ds].pending_clear_aspects;
> + hzp.DepthBufferClearEnable = VK_IMAGE_ASPECT_DEPTH_BIT &
> + cmd_state->attachments[ds].pending_clear_aspects;
> + hzp.FullSurfaceDepthandStencilClear = full_surface_op;
> + hzp.StencilClearValue = 0xff &
> + cmd_state->attachments[ds].clear_value.depthStencil.stencil;
> +
> + /* Mark aspects as cleared */
> + cmd_state->attachments[ds].pending_clear_aspects = 0;
> + break;
> + case ANV_HZ_OP_DEPTH_RESOLVE:
> + hzp.DepthBufferResolveEnable = true;
> + break;
> + case ANV_HZ_OP_HIZ_RESOLVE:
> + hzp.HierarchicalDepthBufferResolveEnable = true;
> + break;
> + }
> +
> + /* The depth resolve rectangle must match the size of the previous clear
> + * rectangle.
> + *
> + * The HiZ resolve rectangle is specified as needing to be the
> + * size of the full RT and aligned to 8x4, these requirements are in
> + * conflict if the RT extent is not a multiple of 8x4. Testing shows
> + * that setting the rectangle to match the render area works just fine.
> + *
> + * In a manner similar to i965, we'd like to diverge from the PRMs here
> + * to reduce the number of HiZ blocks written to.
> + */
> + hzp.ClearRectangleXMin = anv_minify(cmd_state->render_area.offset.x,
> + iview->base_mip);
> + hzp.ClearRectangleYMin = anv_minify(cmd_state->render_area.offset.y,
> + iview->base_mip);
> + hzp.ClearRectangleXMax = anv_minify(cmd_state->render_area.offset.x +
> + cmd_state->render_area.extent.width,
> + iview->base_mip);
> + hzp.ClearRectangleYMax = anv_minify(cmd_state->render_area.offset.y +
> + cmd_state->render_area.extent.height,
> + iview->base_mip);
> +
> + /* Due to a hardware issue, this bit MBZ */
> + hzp.ScissorRectangleEnable = false;
> + hzp.NumberofMultisamples = ffs(iview->image->samples) - 1;
> + hzp.SampleMask = 0xFFFF;
> + }
> +
> + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
> + pc.PostSyncOperation = WriteImmediateData;
> + pc.Address =
> + (struct anv_address){ &cmd_buffer->device->workaround_bo, 0 };
> + }
> +
> + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_HZ_OP), hzp);
> +
> + /* TODO: Determine if a DepthFlush and DepthStall is really necessary for
> + * non-full_surface_op clears.
> + */
> +}
> +
> void genX(CmdSetEvent)(
> VkCommandBuffer commandBuffer,
> VkEvent _event,
> --
> 2.9.3
>
More information about the mesa-dev
mailing list