[Mesa-dev] [PATCH] i965: Avoid extraneous fast depth clears

Chia-I Wu olvaffe at gmail.com
Thu Jan 2 22:32:29 PST 2014


On Fri, Dec 27, 2013 at 7:25 AM, Chad Versace
<chad.versace at linux.intel.com> wrote:
> On 12/10/2013 09:54 PM, Chia-I Wu wrote:
>>
>> When the depth buffer is already cleared, skip GEN6_HIZ_OP_DEPTH_CLEAR.
>> This
>> is made possible by tracking which slices have been cleared in
>> "struct intel_mipmap_level".  The hiz_cleared flag is unset when the depth
>> buffer is rendered to or when a HiZ resolve is needed.
>>
>> For Unigine Tropics, the FPS improvement is 1.32134% +/- 0.161878% (n=13).
>
>
> The code looks correct to me, and the perf improvement is nice. I have
> comments below that should improve the maintainability of the affected
> codepaths.
>
>
>> ---
>>   src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  |  1 +
>>   src/mesa/drivers/dri/i965/brw_clear.c         | 58
>> +++++++++++++++++++++------
>>   src/mesa/drivers/dri/i965/brw_draw.c          | 16 +++++++-
>>   src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 37 +++++++++++++++++
>>   src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 21 ++++++++++
>>   5 files changed, 119 insertions(+), 14 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>> b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>> index 51a3bef..d9ec3e9 100644
>> --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>> @@ -173,6 +173,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>>      brw_blorp_exec(brw, &params);
>>
>>      intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level,
>> dst_layer);
>> +   intel_miptree_slice_set_hiz_cleared(dst_mt, dst_level, dst_layer,
>> false);
>
>
> If the miptree slice needs a hiz resolve, then the hiz buffer is not
> cleared. You captured
> this invariant by appending ``intel_miptree_slice_set_hiz_cleared(false)``
> to each occurrence
> ``intel_miptree_slice_set_needs_hiz_resolve()``.
>
> In effect, this patch introduces the requirement that all calls to
> ``intel_miptree_slice_set_needs_hiz_resolve()``
> be followed by ``intel_miptree_slice_set_hiz_cleared(false)``. Rather than
> introducing an implicit
> requirement, ``intel_miptree_slice_set_needs_hiz_resolve()`` should
> automatically set ``hiz_cleared = false``.
>
>
>>   }
>>
>>   static void
>> diff --git a/src/mesa/drivers/dri/i965/brw_clear.c
>> b/src/mesa/drivers/dri/i965/brw_clear.c
>> index 1cac996..9dfb94a 100644
>> --- a/src/mesa/drivers/dri/i965/brw_clear.c
>> +++ b/src/mesa/drivers/dri/i965/brw_clear.c
>> @@ -164,34 +164,66 @@ brw_fast_clear_depth(struct gl_context *ctx)
>>         break;
>>      }
>>
>> +   unsigned num_layers_cleared = 0;
>> +   bool clear_all_layers = false;
>> +
>>      /* If we're clearing to a new clear value, then we need to resolve
>> any clear
>>       * flags out of the HiZ buffer into the real depth buffer.
>>       */
>>      if (mt->depth_clear_value != depth_clear_value) {
>>         intel_miptree_all_slices_resolve_depth(brw, mt);
>>         mt->depth_clear_value = depth_clear_value;
>> -   }
>>
>> -   /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
>> -    *
>> -    *     "If other rendering operations have preceded this clear, a
>> -    *      PIPE_CONTROL with write cache flush enabled and Z-inhibit
>> disabled
>> -    *      must be issued before the rectangle primitive used for the
>> depth
>> -    *      buffer clear operation.
>> -    */
>> -   intel_batchbuffer_emit_mi_flush(brw);
>> +      clear_all_layers = true;
>> +   }
>>
>>      if (fb->NumLayers > 0) {
>>         assert(fb->NumLayers ==
>> depth_irb->mt->level[depth_irb->mt_level].depth);
>>         for (unsigned layer = 0; layer < fb->NumLayers; layer++) {
>> -         intel_hiz_exec(brw, mt, depth_irb->mt_level, layer,
>> -                        GEN6_HIZ_OP_DEPTH_CLEAR);
>> +         if (clear_all_layers ||
>> +             !intel_miptree_slice_get_hiz_cleared(mt,
>> +                                                  depth_irb->mt_level,
>> +                                                  layer)) {
>> +            /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
>> +             *
>> +             *     "If other rendering operations have preceded this
>> clear, a
>> +             *      PIPE_CONTROL with write cache flush enabled and
>> Z-inhibit
>> +             *      disabled must be issued before the rectangle
>> primitive used
>> +             *      for the depth buffer clear operation.
>> +             */
>> +            if (!num_layers_cleared)
>
>                    ^^^^^^^^^^^^^^^^^^^^
> This is an integer, not a boolean, and therefore ``num_layers_cleared == 0``
> is more
> readable.
>
>
>> +               intel_batchbuffer_emit_mi_flush(brw);
>> +
>> +            intel_hiz_exec(brw, mt, depth_irb->mt_level, layer,
>> +                           GEN6_HIZ_OP_DEPTH_CLEAR);
>> +
>> +            intel_miptree_slice_set_hiz_cleared(mt,
>> +                                                depth_irb->mt_level,
>> +                                                layer,
>> +                                                true);
>> +            num_layers_cleared++;
>> +         }
>>         }
>>      } else {
>> -      intel_hiz_exec(brw, mt, depth_irb->mt_level, depth_irb->mt_layer,
>> -                     GEN6_HIZ_OP_DEPTH_CLEAR);
>> +      if (clear_all_layers ||
>> +          !intel_miptree_slice_get_hiz_cleared(mt,
>> +                                               depth_irb->mt_level,
>> +                                               depth_irb->mt_layer)) {
>> +         intel_batchbuffer_emit_mi_flush(brw);
>> +         intel_hiz_exec(brw, mt, depth_irb->mt_level,
>> depth_irb->mt_layer,
>> +                        GEN6_HIZ_OP_DEPTH_CLEAR);
>> +
>> +         intel_miptree_slice_set_hiz_cleared(mt,
>> +                                             depth_irb->mt_level,
>> +                                             depth_irb->mt_layer,
>> +                                             true);
>> +         num_layers_cleared = 1;
>> +      }
>>      }
>>
>> +   if (!num_layers_cleared)
>> +      return true;
>> +
>>      if (brw->gen == 6) {
>>         /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
>>          *
>
>
> Several lines down is this:
>
>         intel_renderbuffer_att_set_needs_depth_resolve(depth_att);
>
>         return true;
>     } // end of function
>
> The new code added in this patch would be more concise, and more closely
> follow the pattern of
> existing code, if you removed all the calls to
> ``intel_miptree_slice_set_hiz_cleared()`` and
> replaced them with a single ``intel_renderbuffer_att_set_hiz_cleared(...,
> true)``.
>
> There is another good use of intel_renderbuffer_att_set_hiz_cleared() in the
> below hunk
> for brw_postdraw_set_buffers_need_resolve().
Since intel_renderbuffer_att_set_needs_depth_resolve is called when
the depth buffer is rendered to or cleared, we can extend it to
set/unset the hiz_cleared flag.  I've sent out v2 doing that (as well
as incorporating all your other comments).

>
>
>> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c
>> b/src/mesa/drivers/dri/i965/brw_draw.c
>> index b898cd3..4ebfe44 100644
>> --- a/src/mesa/drivers/dri/i965/brw_draw.c
>> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
>> @@ -363,8 +363,22 @@ static void
>> brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
>>         intel_renderbuffer_set_needs_downsample(front_irb);
>>      if (back_irb)
>>         intel_renderbuffer_set_needs_downsample(back_irb);
>> -   if (depth_irb && ctx->Depth.Mask)
>> +   if (depth_irb && ctx->Depth.Mask) {
>>         intel_renderbuffer_att_set_needs_depth_resolve(depth_att);
>> +
>> +      if (depth_irb->mt) {
>> +         if (depth_att->Layered) {
>> +            intel_miptree_set_all_slices_hiz_cleared(depth_irb->mt,
>> +                                                     depth_irb->mt_level,
>> +                                                     false);
>> +         } else {
>> +            intel_miptree_slice_set_hiz_cleared(depth_irb->mt,
>> +                                                depth_irb->mt_level,
>> +                                                depth_irb->mt_layer,
>> +                                                false);
>> +         }
>> +      }
>> +   }
>
>
> This code would be easier to read if the new if-tree were replaced with a
> single line, ``intel_renderbuffer_att_set_hiz_cleared(...)``.
>
>
>>   }
>>
>>   /* May fail if out of video memory for texture or vbo upload, or on
>
>
>
>> @@ -2256,6 +2292,7 @@ intel_miptree_map_singlesample(struct brw_context
>> *brw,
>>      intel_miptree_slice_resolve_depth(brw, mt, level, slice);
>>      if (map->mode & GL_MAP_WRITE_BIT) {
>>         intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice);
>> +      intel_miptree_slice_set_hiz_cleared(mt, level, slice, false);
>
>
> Again, ``intel_miptree_set_needs_hiz_resolve()`` should automatically set
> ``hiz_cleared = false``.
>
>
>>      }
>>
>>      if (mt->format == MESA_FORMAT_S8) {
>
>



-- 
olv at LunarG.com


More information about the mesa-dev mailing list