[Mesa-dev] [PATCH 05/30] i965/miptree: Rework aux enabling

Chad Versace chadversary at chromium.org
Wed Jun 21 21:18:03 UTC 2017


On Wed 21 Jun 2017, Jason Ekstrand wrote:
> On Wed, Jun 21, 2017 at 12:33 PM, Chad Versace <[1]chadversary at chromium.org>
> wrote:
> 
>     On Fri 16 Jun 2017, Jason Ekstrand wrote:
>     > This commit replaces the complex and confusing set of disable flags with
>     > two fairly straightforward fields which describe the intended auxiliary
>     > surface usage and whether or not the miptree supports fast clears.
>     > Right now, supports_fast_clear can be entirely derived from aux_usage
>     > but that will not always be the case.
>     >
>     > This commit makes functional changes.  One of these changes is that it
>     > re-enables multisampled fast-clears which were accidentally disabled in
>     > cec30a666930ddb8476a9452a89364a24979ff62 around a year ago.  It should
>     > also enable CCS_E for window-system buffers which are Y-tiled.  They
>     > will still get a full resolve like CCS_D but we will at least get some
>     > of the advantage of compression.
>     > ---
>     >  src/mesa/drivers/dri/i965/brw_blorp.c         |   4 +-
>     >  src/mesa/drivers/dri/i965/intel_fbo.c         |   2 +-
>     >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 190
>     +++++++++++++-------------
>     >  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  43 +++---
>     >  4 files changed, 120 insertions(+), 119 deletions(-)
>     >
>     > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri
>     /i965/brw_blorp.c
>     > index 00092ee..9bd25f0 100644
>     > --- a/src/mesa/drivers/dri/i965/brw_blorp.c
>     > +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
>     > @@ -762,7 +762,7 @@ do_single_blorp_clear(struct brw_context *brw, struct
>     gl_framebuffer *fb,
>     >     if (set_write_disables(irb, ctx->Color.ColorMask[buf],
>     color_write_disable))
>     >        can_fast_clear = false;
>     >
>     > -   if (irb->mt->aux_disable & INTEL_AUX_DISABLE_CCS ||
>     > +   if (!irb->mt->supports_fast_clear ||
>     >         !brw_is_color_fast_clear_compatible(brw, irb->mt, &ctx->
>     Color.ClearColor))
>     >        can_fast_clear = false;
>     >
>     > @@ -785,7 +785,7 @@ do_single_blorp_clear(struct brw_context *brw, struct
>     gl_framebuffer *fb,
>     >         */
>     >        if (!irb->mt->mcs_buf) {
>     >           assert(!intel_miptree_is_lossless_compressed(brw, irb->mt));
>     > -         if (!intel_miptree_alloc_ccs(brw, irb->mt, false)) {
>     > +         if (!intel_miptree_alloc_ccs(brw, irb->mt)) {
>     >              /* MCS allocation failed--probably this will only happen in
>     >               * out-of-memory conditions.  But in any case, try to
>     recover
>     >               * by falling back to a non-blorp clear technique.
>     > diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri
>     /i965/intel_fbo.c
>     > index ee4aba9..6a64bcb 100644
>     > --- a/src/mesa/drivers/dri/i965/intel_fbo.c
>     > +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
>     > @@ -555,7 +555,7 @@ intel_renderbuffer_update_wrapper(struct brw_context
>     *brw,
>     >
>     >     intel_renderbuffer_set_draw_offset(irb);
>     >
>     > -   if (intel_miptree_wants_hiz_buffer(brw, mt)) {
>     > +   if (mt->aux_usage == ISL_AUX_USAGE_HIZ && !mt->hiz_buf) {
>     >        intel_miptree_alloc_hiz(brw, mt);
>     >        if (!mt->hiz_buf)
>     >        return false;
>     > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/
>     drivers/dri/i965/intel_mipmap_tree.c
>     > index 0f6d542..101317f 100644
>     > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>     > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>     > @@ -64,7 +64,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
>     >   */
>     >  static enum intel_msaa_layout
>     >  compute_msaa_layout(struct brw_context *brw, mesa_format format,
>     > -                    enum intel_aux_disable aux_disable)
>     > +                    uint32_t layout_flags)
>     >  {
>     >     /* Prior to Gen7, all MSAA surfaces used IMS layout. */
>     >     if (brw->gen < 7)
>     > @@ -90,7 +90,7 @@ compute_msaa_layout(struct brw_context *brw,
>     mesa_format format,
>     >         */
>     >        if (brw->gen == 7 && _mesa_get_format_datatype(format) == GL_INT)
>     {
>     >           return INTEL_MSAA_LAYOUT_UMS;
>     > -      } else if (aux_disable & INTEL_AUX_DISABLE_MCS) {
>     > +      } else if (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) {
>     >           /* We can't use the CMS layout because it uses an aux buffer,
>     the MCS
>     >            * buffer. So fallback to UMS, which is identical to CMS
>     without the
>     >            * MCS. */
>     > @@ -148,9 +148,6 @@ intel_miptree_supports_ccs(struct brw_context *brw,
>     >     if (brw->gen < 7)
>     >        return false;
>     >
>     > -   if (mt->aux_disable & INTEL_AUX_DISABLE_MCS)
>     > -      return false;
>     > -
>     >     /* This function applies only to non-multisampled render targets. */
>     >     if (mt->num_samples > 1)
>     >        return false;
>     > @@ -215,6 +212,26 @@ intel_miptree_supports_ccs(struct brw_context *brw,
>     >        return true;
>     >  }
>     >
>     > +static bool
>     > +intel_miptree_supports_hiz(struct brw_context *brw,
>     > +                           struct intel_mipmap_tree *mt)
>     > +{
>     > +   if (!brw->has_hiz)
>     > +      return false;
>     > +
>     > +   switch (mt->format) {
>     > +   case MESA_FORMAT_Z_FLOAT32:
>     > +   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
>     > +   case MESA_FORMAT_Z24_UNORM_X8_UINT:
>     > +   case MESA_FORMAT_Z24_UNORM_S8_UINT:
>     > +   case MESA_FORMAT_Z_UNORM16:
>     > +      return true;
>     > +   default:
>     > +      return false;
>     > +   }
>     > +}
>     > +
>     > +
>     >  /* On Gen9 support for color buffer compression was extended to single
>     >   * sampled surfaces. This is a helper considering both auxiliary buffer
>     >   * type and number of samples telling if the given miptree represents
>     > @@ -320,10 +337,9 @@ intel_miptree_create_layout(struct brw_context *brw,
>     >     mt->logical_width0 = width0;
>     >     mt->logical_height0 = height0;
>     >     mt->logical_depth0 = depth0;
>     > -   mt->aux_disable = (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) != 0 ?
>     > -      INTEL_AUX_DISABLE_ALL : INTEL_AUX_DISABLE_NONE;
>     > -   mt->aux_disable |= INTEL_AUX_DISABLE_CCS;
>     >     mt->is_scanout = (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT) != 0;
>     > +   mt->aux_usage = ISL_AUX_USAGE_NONE;
>     > +   mt->supports_fast_clear = false;
>     >     mt->aux_state = NULL;
>     >     mt->cpp = _mesa_get_format_bytes(format);
>     >     mt->num_samples = num_samples;
>     > @@ -337,7 +353,7 @@ intel_miptree_create_layout(struct brw_context *brw,
>     >     int depth_multiply = 1;
>     >     if (num_samples > 1) {
>     >        /* Adjust width/height/depth for MSAA */
>     > -      mt->msaa_layout = compute_msaa_layout(brw, format, mt->
>     aux_disable);
>     > +      mt->msaa_layout = compute_msaa_layout(brw, format, layout_flags);
>     >        if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
>     >           /* From the Ivybridge PRM, Volume 1, Part 1, page 108:
>     >            * "If the surface is multisampled and it is a depth or stencil
>     > @@ -460,8 +476,7 @@ intel_miptree_create_layout(struct brw_context *brw,
>     >     if (!(layout_flags & MIPTREE_LAYOUT_FOR_BO) &&
>     >         _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
>     >         (brw->must_use_separate_stencil ||
>     > -     (brw->has_separate_stencil &&
>     > -         intel_miptree_wants_hiz_buffer(brw, mt)))) {
>     > +     (brw->has_separate_stencil && intel_miptree_supports_hiz(brw,
>     mt)))) {
>     >        uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
>     >        if (brw->gen == 6) {
>     >           stencil_flags |= MIPTREE_LAYOUT_TILING_ANY;
>     > @@ -530,14 +545,44 @@ intel_miptree_create_layout(struct brw_context
>     *brw,
>     >        return NULL;
>     >     }
>     >
>     > -   if (mt->aux_disable & INTEL_AUX_DISABLE_MCS)
>     > -      assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS);
>     > -
>     >     return mt;
>     >  }
>     >
>     >
>     >  /**
>     > + * Choose the aux usage for this miptree.  This function must be called
>     fairly
>     > + * late in the miptree create process after we have a tiling.
>     > + */
>     > +static void
>     > +intel_miptree_choose_aux_usage(struct brw_context *brw,
>     > +                               struct intel_mipmap_tree *mt)
>     > +{
>     > +   assert(mt->aux_usage == ISL_AUX_USAGE_NONE);
>     > +
>     > +   if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
>     > +      mt->aux_usage = ISL_AUX_USAGE_MCS;
>     > +   } else if (intel_tiling_supports_ccs(brw, mt->tiling) &&
>     > +              intel_miptree_supports_ccs(brw, mt)) {
>     > +      if (!unlikely(INTEL_DEBUG & DEBUG_NO_RBC) &&
>     > +          brw->gen >= 9 && !mt->is_scanout &&
>     > +          intel_miptree_supports_ccs_e(brw, mt)) {
>     > +         mt->aux_usage = ISL_AUX_USAGE_CCS_E;
>     > +      } else {
>     > +         mt->aux_usage = ISL_AUX_USAGE_CCS_D;
>     > +      }
>     > +   } else if (intel_miptree_supports_hiz(brw, mt)) {
>     > +      mt->aux_usage = ISL_AUX_USAGE_HIZ;
>     > +   }
>     > +
>     > +   /* We can do fast-clear on all auxiliary surface types that are
>     > +    * allocated through the normal texture creation paths.
>     > +    */
>     > +   if (mt->aux_usage != ISL_AUX_USAGE_NONE)
>     > +      mt->supports_fast_clear = true;
>     > +}
>     > +
>     > +
>     > +/**
>     >   * Choose an appropriate uncompressed format for a requested
>     >   * compressed format, if unsupported.
>     >   */
>     > @@ -670,6 +715,9 @@ miptree_create(struct brw_context *brw,
>     >     if (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT)
>     >        mt->bo->cache_coherent = false;
>     >
>     > +   if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
>     > +      intel_miptree_choose_aux_usage(brw, mt);
>     > +
>     >     return mt;
>     >  }
>     >
>     > @@ -726,29 +774,14 @@ intel_miptree_create(struct brw_context *brw,
>     >        }
>     >     }
>     >
>     > -   /* If this miptree is capable of supporting fast color clears, set
>     > -    * fast_clear_state appropriately to ensure that fast clears will
>     occur.
>     > -    * Allocation of the MCS miptree will be deferred until the first
>     fast
>     > -    * clear actually occurs or when compressed single sampled buffer is
>     > -    * written by the GPU for the first time.
>     > +   /* Since CCS_E can compress more than just clear color, we create the
>     CCS
>     > +    * for it up-front.  For CCS_D which only compresses clears, we
>     create the
>     > +    * CCS on-demand when a clear occurs that wants one.
>     >      */
> 
>     The above comment...
>    
>     > -   if (intel_tiling_supports_ccs(brw, mt->tiling) &&
>     > -       intel_miptree_supports_ccs(brw, mt)) {
>     > -      mt->aux_disable &= ~INTEL_AUX_DISABLE_CCS;
>     > -      assert(brw->gen < 8 || mt->halign == 16 || num_samples <= 1);
>     > -
>     > -      /* On Gen9+ clients are not currently capable of consuming
>     compressed
>     > -       * single-sampled buffers. Disabling compression allows us to skip
>     > -       * resolves.
>     > -       */
>     > -      const bool lossless_compression_disabled = INTEL_DEBUG &
>     DEBUG_NO_RBC;
>     > -      const bool is_lossless_compressed =
>     > -         unlikely(!lossless_compression_disabled) &&
>     > -         brw->gen >= 9 && !mt->is_scanout &&
>     > -         intel_miptree_supports_ccs_e(brw, mt);
>     > -
>     > -      if (is_lossless_compressed) {
>     > -         intel_miptree_alloc_ccs(brw, mt, is_lossless_compressed);
>     > +   if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
>     > +      if (!intel_miptree_alloc_ccs(brw, mt)) {
>     > +         intel_miptree_release(&mt);
>     > +         return NULL;
>     >        }
> 
>     ...and the above hunk appear twice in the commit. I believe that both
>     locations are necessary and sufficient. However, you can replace them
>     with a single location.
> 
>     Also, the patch contains two calls to intel_miptree_choose_aux_usage().
>     Again, you can replace them with a single call.
> 
>     Intstead of having these two things duplicated, I think they should
>     each happen once along the common code path:
>     intel_miptree_create_layout(). That should lead to less fragile code,
>     I hope.
> 
>     Facts:
> 
>         * All miptree creation paths eventually lead to
>           intel_miptree_create_layout().
> 
>         * The result of intel_miptree_choose_aux_usage() depends directly on
>           mt->msaa_layout and indirectly on mt->tiling. As far as I can
>           tell, it depends on no other miptree state.
> 
>         * mt->msaa_layout is set in exactly two places: the top of
>           intel_miptree_create_layout() and as a side-effect of
>           intel_miptree_alloc_ccs(). (Why intel_miptree_alloc_ccs, formerly
>           intel_miptree_alloc_non_msrt_mcs, sets mt->msaa_layout to
>           INTEL_MSAA_LAYOUT_CMS is a mystery to me. I choose to ignore it,
>           because I suspect that assignment is nowhere used afterwards.
>           WARNING: I have left the realm of facts!)
> 
>         * mt->tiling is set in exactly 3 locations in intel_mipmap_tree.c:
> 
>             a. intel_miptree_create_layout() sometimes sets mt->tiling as one
>     of its
>                last actions, when it calls brw_miptree_layout(). "sometimes",
>                because brw_miptree_layout() sets mt->tiling if and only if
>                !MIPTREE_LAYOUT_FOR_BO.
> 
>             b. intel_miptree_create_for_bo() sets mt->tiling immediately
>                after calling intel_miptree_create_layout().
> 
>             c. miptree_create() sets mt->tiling in a weird fallback
>                immediately after it calls intel_miptree_create_layout().
> 
>     Conclusions:
> 
>         * You can collapse the two calls of
>           intel_miptree_choose_aux_usage(), and the two pre-allocations of
>           the ccs_e, into a single location (which I'll call "pot o' gold")
>           at the tail of intel_miptree_create_layout() immediately before it
>           returns, IF...
> 
>             * IF the pot o' gold happens after mt->msaa_layout is set. This
>               is trivial because mt->msaa_layout is set at the top of
>               intel_miptree_create_layout() and nowhere else, modulo my
>               willfull ignorance of intel_miptree_alloc_ccs's weirdness.
> 
>             * IF the pot o' gold happens after mt->tiling is set. What's
>               required for this is:
> 
>               * Move the mt->tiling assignment in (b)
>                 to occur immediately before the call to
>     intel_miptree_create_layout().
>                 Trivial fixup.
> 
>               * Fix the weird tiling fallback in item (c) by pushing it into
>                 intel_miptree_create_layout(). Another trivial fixup.
> 
> 
>     I expect that I missed some details. I await your rebuttal.
> 
> 
> You are correct that they could be placed on the common path, sort of.  You are
> wrong that it's a good idea.  Why?  Modifiers.  With modifiers, we have to
> override the choice of aux and create the auxiliary surface ourselves so we
> don't want it created for us.  But we also can't create it with DISABLE_AUX
> because we may want it to still do HALIGN16 if we're going to create one
> ourselves.  So, instead, what we do is to let create_for_bo go ahead and choose
> aux usage, then we stomp it to what we want, and then we set up the CCS.  We
> could choose aux usage in create_layout but I figured that would hint that aux
> usage was something entirely chose by create_layout and not stompped later.  I
> don't like to imply that.

I fast-forwarded through the patch series, and see the final result in
intel_miptree_create_for_dri_image(). Now I see why you want to make
last-minute aux decisions for intel_miptree_create_for_dri_image() but
not intel_miptree_create().

That was only issue with the patch. So,
Reviewed-by: Chad Versace <chadversary at chromium.org>


More information about the mesa-dev mailing list