[Mesa-dev] [PATCH] i965: Avoid extraneous fast depth clears

Chad Versace chad.versace at linux.intel.com
Thu Dec 26 15:25:51 PST 2013


On 12/10/2013 09:54 PM, Chia-I Wu wrote:
> When the depth buffer is already cleared, skip GEN6_HIZ_OP_DEPTH_CLEAR.  This
> is made possible by tracking which slices have been cleared in
> "struct intel_mipmap_level".  The hiz_cleared flag is unset when the depth
> buffer is rendered to or when a HiZ resolve is needed.
>
> For Unigine Tropics, the FPS improvement is 1.32134% +/- 0.161878% (n=13).

The code looks correct to me, and the perf improvement is nice. I have
comments below that should improve the maintainability of the affected
codepaths.

> ---
>   src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  |  1 +
>   src/mesa/drivers/dri/i965/brw_clear.c         | 58 +++++++++++++++++++++------
>   src/mesa/drivers/dri/i965/brw_draw.c          | 16 +++++++-
>   src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 37 +++++++++++++++++
>   src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 21 ++++++++++
>   5 files changed, 119 insertions(+), 14 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> index 51a3bef..d9ec3e9 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> @@ -173,6 +173,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>      brw_blorp_exec(brw, &params);
>
>      intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level, dst_layer);
> +   intel_miptree_slice_set_hiz_cleared(dst_mt, dst_level, dst_layer, false);

If the miptree slice needs a hiz resolve, then the hiz buffer is not cleared. You captured
this invariant by appending ``intel_miptree_slice_set_hiz_cleared(false)`` to each occurrence
``intel_miptree_slice_set_needs_hiz_resolve()``.

In effect, this patch introduces the requirement that all calls to ``intel_miptree_slice_set_needs_hiz_resolve()``
be followed by ``intel_miptree_slice_set_hiz_cleared(false)``. Rather than introducing an implicit
requirement, ``intel_miptree_slice_set_needs_hiz_resolve()`` should automatically set ``hiz_cleared = false``.

>   }
>
>   static void
> diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
> index 1cac996..9dfb94a 100644
> --- a/src/mesa/drivers/dri/i965/brw_clear.c
> +++ b/src/mesa/drivers/dri/i965/brw_clear.c
> @@ -164,34 +164,66 @@ brw_fast_clear_depth(struct gl_context *ctx)
>         break;
>      }
>
> +   unsigned num_layers_cleared = 0;
> +   bool clear_all_layers = false;
> +
>      /* If we're clearing to a new clear value, then we need to resolve any clear
>       * flags out of the HiZ buffer into the real depth buffer.
>       */
>      if (mt->depth_clear_value != depth_clear_value) {
>         intel_miptree_all_slices_resolve_depth(brw, mt);
>         mt->depth_clear_value = depth_clear_value;
> -   }
>
> -   /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
> -    *
> -    *     "If other rendering operations have preceded this clear, a
> -    *      PIPE_CONTROL with write cache flush enabled and Z-inhibit disabled
> -    *      must be issued before the rectangle primitive used for the depth
> -    *      buffer clear operation.
> -    */
> -   intel_batchbuffer_emit_mi_flush(brw);
> +      clear_all_layers = true;
> +   }
>
>      if (fb->NumLayers > 0) {
>         assert(fb->NumLayers == depth_irb->mt->level[depth_irb->mt_level].depth);
>         for (unsigned layer = 0; layer < fb->NumLayers; layer++) {
> -         intel_hiz_exec(brw, mt, depth_irb->mt_level, layer,
> -                        GEN6_HIZ_OP_DEPTH_CLEAR);
> +         if (clear_all_layers ||
> +             !intel_miptree_slice_get_hiz_cleared(mt,
> +                                                  depth_irb->mt_level,
> +                                                  layer)) {
> +            /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
> +             *
> +             *     "If other rendering operations have preceded this clear, a
> +             *      PIPE_CONTROL with write cache flush enabled and Z-inhibit
> +             *      disabled must be issued before the rectangle primitive used
> +             *      for the depth buffer clear operation.
> +             */
> +            if (!num_layers_cleared)
                    ^^^^^^^^^^^^^^^^^^^^
This is an integer, not a boolean, and therefore ``num_layers_cleared == 0`` is more
readable.

> +               intel_batchbuffer_emit_mi_flush(brw);
> +
> +            intel_hiz_exec(brw, mt, depth_irb->mt_level, layer,
> +                           GEN6_HIZ_OP_DEPTH_CLEAR);
> +
> +            intel_miptree_slice_set_hiz_cleared(mt,
> +                                                depth_irb->mt_level,
> +                                                layer,
> +                                                true);
> +            num_layers_cleared++;
> +         }
>         }
>      } else {
> -      intel_hiz_exec(brw, mt, depth_irb->mt_level, depth_irb->mt_layer,
> -                     GEN6_HIZ_OP_DEPTH_CLEAR);
> +      if (clear_all_layers ||
> +          !intel_miptree_slice_get_hiz_cleared(mt,
> +                                               depth_irb->mt_level,
> +                                               depth_irb->mt_layer)) {
> +         intel_batchbuffer_emit_mi_flush(brw);
> +         intel_hiz_exec(brw, mt, depth_irb->mt_level, depth_irb->mt_layer,
> +                        GEN6_HIZ_OP_DEPTH_CLEAR);
> +
> +         intel_miptree_slice_set_hiz_cleared(mt,
> +                                             depth_irb->mt_level,
> +                                             depth_irb->mt_layer,
> +                                             true);
> +         num_layers_cleared = 1;
> +      }
>      }
>
> +   if (!num_layers_cleared)
> +      return true;
> +
>      if (brw->gen == 6) {
>         /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
>          *

Several lines down is this:

         intel_renderbuffer_att_set_needs_depth_resolve(depth_att);

         return true;
     } // end of function

The new code added in this patch would be more concise, and more closely follow the pattern of
existing code, if you removed all the calls to ``intel_miptree_slice_set_hiz_cleared()`` and
replaced them with a single ``intel_renderbuffer_att_set_hiz_cleared(..., true)``.

There is another good use of intel_renderbuffer_att_set_hiz_cleared() in the below hunk
for brw_postdraw_set_buffers_need_resolve().

> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
> index b898cd3..4ebfe44 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -363,8 +363,22 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
>         intel_renderbuffer_set_needs_downsample(front_irb);
>      if (back_irb)
>         intel_renderbuffer_set_needs_downsample(back_irb);
> -   if (depth_irb && ctx->Depth.Mask)
> +   if (depth_irb && ctx->Depth.Mask) {
>         intel_renderbuffer_att_set_needs_depth_resolve(depth_att);
> +
> +      if (depth_irb->mt) {
> +         if (depth_att->Layered) {
> +            intel_miptree_set_all_slices_hiz_cleared(depth_irb->mt,
> +                                                     depth_irb->mt_level,
> +                                                     false);
> +         } else {
> +            intel_miptree_slice_set_hiz_cleared(depth_irb->mt,
> +                                                depth_irb->mt_level,
> +                                                depth_irb->mt_layer,
> +                                                false);
> +         }
> +      }
> +   }

This code would be easier to read if the new if-tree were replaced with a
single line, ``intel_renderbuffer_att_set_hiz_cleared(...)``.

>   }
>
>   /* May fail if out of video memory for texture or vbo upload, or on


> @@ -2256,6 +2292,7 @@ intel_miptree_map_singlesample(struct brw_context *brw,
>      intel_miptree_slice_resolve_depth(brw, mt, level, slice);
>      if (map->mode & GL_MAP_WRITE_BIT) {
>         intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice);
> +      intel_miptree_slice_set_hiz_cleared(mt, level, slice, false);

Again, ``intel_miptree_set_needs_hiz_resolve()`` should automatically set ``hiz_cleared = false``.

>      }
>
>      if (mt->format == MESA_FORMAT_S8) {



More information about the mesa-dev mailing list