[Mesa-dev] [PATCH 09/12] anv/cmd_buffer: Add code for performing HZ operations

Nanley Chery nanleychery at gmail.com
Fri Sep 2 21:56:47 UTC 2016


On Wed, Aug 31, 2016 at 08:29:55PM -0700, Nanley Chery wrote:
> From: Jason Ekstrand <jason.ekstrand at intel.com>
> 
> Nanley Chery:
> (rebase)
>  - Resolve conflicts with the new anv_batch_emit macro
> (amend)
>  - Update commit title
>  - Combine all HZ operations into one function
>  - Add code for performing HiZ resolve operations
>  - Add proper stencil and multisampling support
>  - Set the proper clear rectangles
>  - Add required cases for aborting an HZ operation
> 
> Signed-off-by: Nanley Chery <nanley.g.chery at intel.com>
> ---
>  src/intel/vulkan/anv_genX.h        |   3 +
>  src/intel/vulkan/anv_private.h     |   6 ++
>  src/intel/vulkan/gen7_cmd_buffer.c |   5 ++
>  src/intel/vulkan/gen8_cmd_buffer.c | 124 +++++++++++++++++++++++++++++++++++++
>  4 files changed, 138 insertions(+)
> 
> diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
> index cf5a232..16de990 100644
> --- a/src/intel/vulkan/anv_genX.h
> +++ b/src/intel/vulkan/anv_genX.h
> @@ -54,6 +54,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer);
>  
>  void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer);
>  
> +void genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer,
> +                               enum anv_hz_op op);
> +
>  VkResult
>  genX(graphics_pipeline_create)(VkDevice _device,
>                                 struct anv_pipeline_cache *cache,
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index 5718a19..40325fd 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -1401,6 +1401,12 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);
>  
>  void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
>  
> +enum anv_hz_op {
> +   ANV_HZ_OP_CLEAR,
> +   ANV_HZ_OP_HIZ_RESOLVE,
> +   ANV_HZ_OP_DEPTH_RESOLVE,
> +};
> +
>  struct anv_fence {
>     struct anv_bo bo;
>     struct drm_i915_gem_execbuffer2 execbuf;
> diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c
> index 61778aa..a057a04 100644
> --- a/src/intel/vulkan/gen7_cmd_buffer.c
> +++ b/src/intel/vulkan/gen7_cmd_buffer.c
> @@ -323,6 +323,11 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
>     cmd_buffer->state.dirty = 0;
>  }
>  
> +void
> +genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, enum anv_hz_op op)
> +{
> +}
> +
>  void genX(CmdSetEvent)(
>      VkCommandBuffer                             commandBuffer,
>      VkEvent                                     event,
> diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c
> index e22b4e2..4f27350 100644
> --- a/src/intel/vulkan/gen8_cmd_buffer.c
> +++ b/src/intel/vulkan/gen8_cmd_buffer.c
> @@ -399,6 +399,130 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
>     genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
>  }
>  
> +
> +/**
> + * Emit the HZ_OP packet in the sequence specified by the BDW PRM section
> + * entitled: "Optimized Depth Buffer Clear and/or Stencil Buffer Clear."
> + */
> +void
> +genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, enum anv_hz_op op)
> +{
> +   struct anv_cmd_state *cmd_state = &cmd_buffer->state;
> +   const struct anv_image_view *iview =
> +      anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
> +
> +   if (iview == NULL || !anv_image_has_hiz(iview->image))
> +      return;
> +
> +   const uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
> +   const bool full_surface_op =
> +             cmd_state->render_area.extent.width == iview->extent.width &&
> +             cmd_state->render_area.extent.height == iview->extent.height;
> +
> +   /* Validate that we can perform the HZ operation and that it's necessary. */
> +   switch (op) {
> +   case ANV_HZ_OP_CLEAR:
> +      if (cmd_buffer->state.pass->attachments[ds].load_op !=
> +          VK_ATTACHMENT_LOAD_OP_CLEAR)
> +         return;
> +
> +      /* Apply alignment restrictions. For a sample count of 16, the formulas
> +       * reduce to identity and indicate that no alignment is required.
> +       */
> +      if (!full_surface_op && iview->image->samples < 16) {

There may be a bug here. I plan to remove the samples < 16 condition because
when samples == 16, I'd expect the alignment will become 2x1, not 1x1. I also
need to take a second look at isl_msaa_interleaved_scale_px_to_sa to ensure it
returns the expected values.

- Nanley

> +         uint32_t align_w = 1;
> +         uint32_t align_h = 1;
> +
> +         if (iview->image->samples > 1) {
> +            isl_msaa_interleaved_scale_px_to_sa(iview->image->samples,
> +                                                &align_w, &align_h);
> +         }
> +
> +         align_w = 8 / align_w;
> +         align_h = 4 / align_h;
> +
> +         if (cmd_state->render_area.offset.x % align_w ||
> +             cmd_state->render_area.offset.y % align_h ||
> +             cmd_state->render_area.extent.width % align_w ||
> +             cmd_state->render_area.extent.height % align_h)
> +            return;
> +      }
> +      break;
> +   case ANV_HZ_OP_DEPTH_RESOLVE:
> +      if (cmd_buffer->state.pass->attachments[ds].store_op !=
> +          VK_ATTACHMENT_STORE_OP_STORE)
> +         return;
> +      break;
> +   case ANV_HZ_OP_HIZ_RESOLVE:
> +      if (cmd_buffer->state.pass->attachments[ds].load_op !=
> +          VK_ATTACHMENT_LOAD_OP_LOAD)
> +         return;
> +      break;
> +   }
> +
> +   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_HZ_OP), hzp) {
> +      switch (op) {
> +      case ANV_HZ_OP_CLEAR:
> +         hzp.StencilBufferClearEnable = VK_IMAGE_ASPECT_STENCIL_BIT &
> +                            cmd_state->attachments[ds].pending_clear_aspects;
> +         hzp.DepthBufferClearEnable = VK_IMAGE_ASPECT_DEPTH_BIT &
> +                            cmd_state->attachments[ds].pending_clear_aspects;
> +         hzp.FullSurfaceDepthandStencilClear = full_surface_op;
> +         hzp.StencilClearValue = 0xff &
> +                   cmd_state->attachments[ds].clear_value.depthStencil.stencil;
> +
> +         /* Mark aspects as cleared */
> +         cmd_state->attachments[ds].pending_clear_aspects = 0;
> +         break;
> +      case ANV_HZ_OP_DEPTH_RESOLVE:
> +         hzp.DepthBufferResolveEnable = true;
> +         break;
> +      case ANV_HZ_OP_HIZ_RESOLVE:
> +         hzp.HierarchicalDepthBufferResolveEnable = true;
> +         break;
> +      }
> +
> +      /* The depth resolve rectangle must match the size of the previous clear
> +       * rectangle.
> +       *
> +       * The HiZ resolve rectangle is specified as needing to be the
> +       * size of the full RT and aligned to 8x4, these requirements are in
> +       * conflict if the RT extent is not a multiple of 8x4. Testing shows
> +       * that setting the rectangle to match the render area works just fine.
> +       *
> +       * In a manner similar to i965, we'd like to diverge from the PRMs here
> +       * to reduce the number of HiZ blocks written to.
> +       */
> +      hzp.ClearRectangleXMin = anv_minify(cmd_state->render_area.offset.x,
> +                                          iview->base_mip);
> +      hzp.ClearRectangleYMin = anv_minify(cmd_state->render_area.offset.y,
> +                                          iview->base_mip);
> +      hzp.ClearRectangleXMax = anv_minify(cmd_state->render_area.offset.x +
> +                                          cmd_state->render_area.extent.width,
> +                                          iview->base_mip);
> +      hzp.ClearRectangleYMax = anv_minify(cmd_state->render_area.offset.y +
> +                                          cmd_state->render_area.extent.height,
> +                                          iview->base_mip);
> +
> +      /* Due to a hardware issue, this bit MBZ */
> +      hzp.ScissorRectangleEnable = false;
> +      hzp.NumberofMultisamples = ffs(iview->image->samples) - 1;
> +      hzp.SampleMask = 0xFFFF;
> +   }
> +
> +   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
> +      pc.PostSyncOperation = WriteImmediateData;
> +      pc.Address =
> +         (struct anv_address){ &cmd_buffer->device->workaround_bo, 0 };
> +   }
> +
> +   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_HZ_OP), hzp);
> +
> +   /* TODO: Determine if a DepthFlush and DepthStall is really necessary for
> +    *       non-full_surface_op clears.
> +    */
> +}
> +
>  void genX(CmdSetEvent)(
>      VkCommandBuffer                             commandBuffer,
>      VkEvent                                     _event,
> -- 
> 2.9.3
> 


More information about the mesa-dev mailing list