[Mesa-dev] [PATCH v3 16/16] anv: Predicate fast-clear resolves

Jason Ekstrand jason at jlekstrand.net
Wed Jul 12 00:11:07 UTC 2017


On Tue, Jul 11, 2017 at 3:34 PM, Nanley Chery <nanleychery at gmail.com> wrote:

> On Mon, Jul 10, 2017 at 04:00:23PM -0700, Jason Ekstrand wrote:
> > On Wed, Jun 28, 2017 at 2:14 PM, Nanley Chery <nanleychery at gmail.com>
> wrote:
> >
> > > Image layouts only let us know that an image *may* be fast-cleared. For
> > > this reason we can end up with redundant resolves. Testing has shown
> > > that such resolves can measurably hurt performance and that predicating
> > > them can avoid the penalty.
> > >
> > > Signed-off-by: Nanley Chery <nanley.g.chery at intel.com>
> > > ---
> > >  src/intel/vulkan/anv_blorp.c       |  3 +-
> > >  src/intel/vulkan/anv_private.h     | 13 ++++--
> > >  src/intel/vulkan/genX_cmd_buffer.c | 87
> ++++++++++++++++++++++++++++++
> > > ++++++--
> > >  3 files changed, 95 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/src/intel/vulkan/anv_blorp.c
> b/src/intel/vulkan/anv_blorp.c
> > > index 35317ba6be..d06d7e2cc3 100644
> > > --- a/src/intel/vulkan/anv_blorp.c
> > > +++ b/src/intel/vulkan/anv_blorp.c
> > > @@ -1619,7 +1619,8 @@ anv_ccs_resolve(struct anv_cmd_buffer * const
> > > cmd_buffer,
> > >        return;
> > >
> > >     struct blorp_batch batch;
> > > -   blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
> 0);
> > > +   blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
> > > +                    BLORP_BATCH_PREDICATE_ENABLE);
> > >
> > >     struct blorp_surf surf;
> > >     get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
> > > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> > > private.h
> > > index be1623f3c3..951cf50842 100644
> > > --- a/src/intel/vulkan/anv_private.h
> > > +++ b/src/intel/vulkan/anv_private.h
> > > @@ -2118,11 +2118,16 @@ anv_fast_clear_state_entry_size(const struct
> > > anv_device *device)
> > >  {
> > >     assert(device);
> > >     /* Entry contents:
> > > -    *   +----------------------+
> > > -    *   | clear value dword(s) |
> > > -    *   +----------------------+
> > > +    *   +--------------------------------------------+
> > > +    *   | clear value dword(s) | needs resolve dword |
> > > +    *   +--------------------------------------------+
> > >      */
> > > -   return device->isl_dev.ss.clear_value_size;
> > > +
> > > +   /* Ensure that the needs resolve dword is in fact dword-aligned to
> > > enable
> > > +    * GPU memcpy operations.
> > > +    */
> > > +   assert(device->isl_dev.ss.clear_value_size % 4 == 0);
> > > +   return device->isl_dev.ss.clear_value_size + 4;
> > >  }
> > >
> > >  /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
> > > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> > > b/src/intel/vulkan/genX_cmd_buffer.c
> > > index 62a2f22782..65d9c92783 100644
> > > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > > @@ -421,6 +421,59 @@ get_fast_clear_state_entry_offset(const struct
> > > anv_device *device,
> > >     return offset;
> > >  }
> > >
> > > +#define MI_PREDICATE_SRC0  0x2400
> > > +#define MI_PREDICATE_SRC1  0x2408
> > > +
> > > +enum ccs_resolve_state {
> > > +   CCS_RESOLVE_NOT_NEEDED,
> > > +   CCS_RESOLVE_NEEDED,
> > >
> >
> > Are these two values sufficient?  Do we ever have a scenario where we do
> a
> > partial resolve and then a full resolve?  Do we need to be able to track
> > that?
> >
> >
>
> Yes, they are. We don't currently have such a scenario. This may come up
> later if we start temporarily enabling CCS_E, but I can't think of what
> sequence of events would trigger that to happen.
>
> > > +   CCS_RESOLVE_STARTING,
> > > +};
> > > +
> > > +/* Manages the state of an color image subresource to ensure resolves
> are
> > > + * performed properly.
> > > + */
> > > +static void
> > > +genX(set_resolve_state)(struct anv_cmd_buffer *cmd_buffer,
> > > +                        const struct anv_image *image,
> > > +                        unsigned level,
> > > +                        enum ccs_resolve_state state)
> > > +{
> > > +   assert(cmd_buffer && image);
> > > +   assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> > > +   assert(level < anv_image_aux_levels(image));
> > > +
> > > +   const uint32_t resolve_flag_offset =
> > > +      get_fast_clear_state_entry_offset(cmd_buffer->device, image,
> > > level) +
> > > +      cmd_buffer->device->isl_dev.ss.clear_value_size;
> > > +
> > > +   if (state != CCS_RESOLVE_STARTING) {
> > > +      assert(state == CCS_RESOLVE_NEEDED || state ==
> > > CCS_RESOLVE_NOT_NEEDED);
> > > +      /* The HW docs say that there is no way to guarantee the
> completion
> > > of
> > > +       * the following command. We use it nevertheless because it
> shows no
> > > +       * issues in testing is currently being used in the GL driver.
> > > +       */
> > > +      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM),
> sdi) {
> > > +         sdi.Address = (struct anv_address) { image->bo,
> > > resolve_flag_offset };
> > > +         sdi.ImmediateData = state == CCS_RESOLVE_NEEDED;
> > > +      }
> > > +   } else {
> > > +      /* Make the pending predicated resolve a no-op if one is not
> needed.
> > > +       * predicate = do_resolve = resolve_flag != 0;
> > > +       */
> > > +      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1    , 0);
> > > +      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
> > > +      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0    , 0);
> > > +      emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4,
> > > +               image->bo, resolve_flag_offset);
> > > +      anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
> > > +         mip.LoadOperation    = LOAD_LOADINV;
> > > +         mip.CombineOperation = COMBINE_SET;
> > > +         mip.CompareOperation = COMPARE_SRCS_EQUAL;
> > > +      }
> > > +   }
> > >
> >
> > This function does two very different things hidden behind an enum that,
> in
> > my view, only clouds what's going on.  If state != STARTING, it *sets*
> the
> > flag, otherwise it *checks* the flag and emits MI_PREDICATE.  Having them
> > in the same function is confusing.  Why not make this two separate
> > functions: set_image_needs_resolve and setup_resolve_predicate or
> something
> > like that.
> >
> >
>
> Sounds good. It did feel awkward to use the RESOLVE_STARTING enum with
> this function. I'll go with:
> * load_needs_resolve_predicate(),
> * set_image_needs_resolve(),
>

Those are good names.


> * and replace the enums with true/false.
>

Sounds good to me.


> Thanks,
> Nanley
>
> > > +}
> > > +
> > >  static void
> > >  init_fast_clear_state_entry(struct anv_cmd_buffer *cmd_buffer,
> > >                              const struct anv_image *image,
> > > @@ -430,6 +483,16 @@ init_fast_clear_state_entry(struct anv_cmd_buffer
> > > *cmd_buffer,
> > >     assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> > >     assert(level < anv_image_aux_levels(image));
> > >
> > > +   /* The resolve flag should updated to signify that
> > > fast-clear/compression
> > > +    * data needs to be removed when leaving the undefined layout. Such
> > > data
> > > +    * may need to be removed if it would cause accesses to the color
> > > buffer
> > > +    * to return incorrect data. The fast clear data in CCS_D buffers
> > > should
> > > +    * be removed because CCS_D isn't enabled all the time.
> > > +    */
> > > +   genX(set_resolve_state)(cmd_buffer, image, level,
> > > +                           image->aux_usage == ISL_AUX_USAGE_CCS_E ?
> > > +                           CCS_RESOLVE_NOT_NEEDED :
> CCS_RESOLVE_NEEDED);
> > > +
> > >     /* The fast clear value dword(s) will be copied into a surface
> state
> > > object.
> > >      * Ensure that the restrictions of the fields in the dword(s) are
> > > followed.
> > >      *
> > > @@ -666,6 +729,8 @@ transition_color_buffer(struct anv_cmd_buffer
> > > *cmd_buffer,
> > >           layer_count = MIN2(layer_count, anv_image_aux_layers(image,
> > > level));
> > >        }
> > >
> > > +      genX(set_resolve_state)(cmd_buffer, image, level,
> > > CCS_RESOLVE_STARTING);
> > > +
> > >        /* Create a surface state with the right clear color and
> perform the
> > >         * resolve.
> > >         */
> > > @@ -697,6 +762,8 @@ transition_color_buffer(struct anv_cmd_buffer
> > > *cmd_buffer,
> > >                        image->aux_usage == ISL_AUX_USAGE_CCS_E ?
> > >                        BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL :
> > >                        BLORP_FAST_CLEAR_OP_RESOLVE_FULL);
> > > +
> > > +      genX(set_resolve_state)(cmd_buffer, image, level,
> > > CCS_RESOLVE_NOT_NEEDED);
> > >     }
> > >
> > >     cmd_buffer->state.pending_pipe_bits |=
> > > @@ -2447,9 +2514,6 @@ void genX(CmdDispatch)(
> > >  #define GPGPU_DISPATCHDIMY 0x2504
> > >  #define GPGPU_DISPATCHDIMZ 0x2508
> > >
> > > -#define MI_PREDICATE_SRC0  0x2400
> > > -#define MI_PREDICATE_SRC1  0x2408
> > > -
> > >  void genX(CmdDispatchIndirect)(
> > >      VkCommandBuffer                             commandBuffer,
> > >      VkBuffer                                    _buffer,
> > > @@ -2856,6 +2920,23 @@ cmd_buffer_subpass_sync_fast_
> clear_values(struct
> > > anv_cmd_buffer *cmd_buffer)
> > >           genX(copy_fast_clear_dwords)(cmd_buffer,
> > > att_state->color_rt_state,
> > >                                        iview->image,
> iview->isl.base_level,
> > >                                        true /* copy from ss */);
> > > +
> > > +         /* Fast-clears impact whether or not a resolve will be
> > > necessary. */
> > > +         if (iview->image->aux_usage == ISL_AUX_USAGE_CCS_E &&
> > > +             att_state->clear_color_is_zero) {
> > > +            /* This image always has the auxiliary buffer enabled. We
> can
> > > mark
> > > +             * the subresource as not needing a resolve because the
> clear
> > > color
> > > +             * will match what's in every RENDER_SURFACE_STATE object
> > > when it's
> > > +             * being used for sampling.
> > > +             */
> > > +            genX(set_resolve_state)(cmd_buffer, iview->image,
> > > +                                    iview->isl.base_level,
> > > +                                    CCS_RESOLVE_NOT_NEEDED);
> > > +         } else {
> > > +            genX(set_resolve_state)(cmd_buffer, iview->image,
> > > +                                    iview->isl.base_level,
> > > +                                    CCS_RESOLVE_NEEDED);
> > > +         }
> > >        } else if (rp_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
> > >           /* The attachment may have been fast-cleared in a previous
> render
> > >            * pass and the value is needed now. Update the surface
> state(s).
> > > --
> > > 2.13.1
> > >
> > > _______________________________________________
> > > mesa-dev mailing list
> > > mesa-dev at lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > >
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170711/1bb6f267/attachment-0001.html>


More information about the mesa-dev mailing list