<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Tue, Jul 11, 2017 at 3:34 PM, Nanley Chery <span dir="ltr"><<a href="mailto:nanleychery@gmail.com" target="_blank">nanleychery@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5">On Mon, Jul 10, 2017 at 04:00:23PM -0700, Jason Ekstrand wrote:<br>
> On Wed, Jun 28, 2017 at 2:14 PM, Nanley Chery <<a href="mailto:nanleychery@gmail.com">nanleychery@gmail.com</a>> wrote:<br>
><br>
> > Image layouts only let us know that an image *may* be fast-cleared. For<br>
> > this reason we can end up with redundant resolves. Testing has shown<br>
> > that such resolves can measurably hurt performance and that predicating<br>
> > them can avoid the penalty.<br>
> ><br>
> > Signed-off-by: Nanley Chery <<a href="mailto:nanley.g.chery@intel.com">nanley.g.chery@intel.com</a>><br>
> > ---<br>
> >  src/intel/vulkan/anv_blorp.c       |  3 +-<br>
> >  src/intel/vulkan/anv_private.h     | 13 ++++--<br>
> >  src/intel/vulkan/genX_cmd_<wbr>buffer.c | 87 ++++++++++++++++++++++++++++++<br>
> > ++++++--<br>
> >  3 files changed, 95 insertions(+), 8 deletions(-)<br>
> ><br>
> > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c<br>
> > index 35317ba6be..d06d7e2cc3 100644<br>
> > --- a/src/intel/vulkan/anv_blorp.c<br>
> > +++ b/src/intel/vulkan/anv_blorp.c<br>
> > @@ -1619,7 +1619,8 @@ anv_ccs_resolve(struct anv_cmd_buffer * const<br>
> > cmd_buffer,<br>
> >        return;<br>
> ><br>
> >     struct blorp_batch batch;<br>
> > -   blorp_batch_init(&cmd_buffer-><wbr>device->blorp, &batch, cmd_buffer, 0);<br>
> > +   blorp_batch_init(&cmd_buffer-><wbr>device->blorp, &batch, cmd_buffer,<br>
> > +                    BLORP_BATCH_PREDICATE_ENABLE);<br>
> ><br>
> >     struct blorp_surf surf;<br>
> >     get_blorp_surf_for_anv_image(<wbr>image, VK_IMAGE_ASPECT_COLOR_BIT,<br>
> > diff --git a/src/intel/vulkan/anv_<wbr>private.h b/src/intel/vulkan/anv_<br>
> > private.h<br>
> > index be1623f3c3..951cf50842 100644<br>
> > --- a/src/intel/vulkan/anv_<wbr>private.h<br>
> > +++ b/src/intel/vulkan/anv_<wbr>private.h<br>
> > @@ -2118,11 +2118,16 @@ anv_fast_clear_state_entry_<wbr>size(const struct<br>
> > anv_device *device)<br>
> >  {<br>
> >     assert(device);<br>
> >     /* Entry contents:<br>
> > -    *   +----------------------+<br>
> > -    *   | clear value dword(s) |<br>
> > -    *   +----------------------+<br>
> > +    *   +-----------------------------<wbr>---------------+<br>
> > +    *   | clear value dword(s) | needs resolve dword |<br>
> > +    *   +-----------------------------<wbr>---------------+<br>
> >      */<br>
> > -   return device->isl_dev.ss.clear_<wbr>value_size;<br>
> > +<br>
> > +   /* Ensure that the needs resolve dword is in fact dword-aligned to<br>
> > enable<br>
> > +    * GPU memcpy operations.<br>
> > +    */<br>
> > +   assert(device->isl_dev.ss.<wbr>clear_value_size % 4 == 0);<br>
> > +   return device->isl_dev.ss.clear_<wbr>value_size + 4;<br>
> >  }<br>
> ><br>
> >  /* Returns true if a HiZ-enabled depth buffer can be sampled from. */<br>
> > diff --git a/src/intel/vulkan/genX_cmd_<wbr>buffer.c<br>
> > b/src/intel/vulkan/genX_cmd_<wbr>buffer.c<br>
> > index 62a2f22782..65d9c92783 100644<br>
> > --- a/src/intel/vulkan/genX_cmd_<wbr>buffer.c<br>
> > +++ b/src/intel/vulkan/genX_cmd_<wbr>buffer.c<br>
> > @@ -421,6 +421,59 @@ get_fast_clear_state_entry_<wbr>offset(const struct<br>
> > anv_device *device,<br>
> >     return offset;<br>
> >  }<br>
> ><br>
> > +#define MI_PREDICATE_SRC0  0x2400<br>
> > +#define MI_PREDICATE_SRC1  0x2408<br>
> > +<br>
> > +enum ccs_resolve_state {<br>
> > +   CCS_RESOLVE_NOT_NEEDED,<br>
> > +   CCS_RESOLVE_NEEDED,<br>
> ><br>
><br>
> Are these two values sufficient?  Do we ever have a scenario where we do a<br>
> partial resolve and then a full resolve?  Do we need to be able to track<br>
> that?<br>
><br>
><br>
<br>
</div></div>Yes, they are. We don't currently have such a scenario. This may come up<br>
later if we start temporarily enabling CCS_E, but I can't think of what<br>
sequence of events would trigger that to happen.<br>
<div><div class="h5"><br>
> > +   CCS_RESOLVE_STARTING,<br>
> > +};<br>
> > +<br>
> > +/* Manages the state of an color image subresource to ensure resolves are<br>
> > + * performed properly.<br>
> > + */<br>
> > +static void<br>
> > +genX(set_resolve_state)(<wbr>struct anv_cmd_buffer *cmd_buffer,<br>
> > +                        const struct anv_image *image,<br>
> > +                        unsigned level,<br>
> > +                        enum ccs_resolve_state state)<br>
> > +{<br>
> > +   assert(cmd_buffer && image);<br>
> > +   assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);<br>
> > +   assert(level < anv_image_aux_levels(image));<br>
> > +<br>
> > +   const uint32_t resolve_flag_offset =<br>
> > +      get_fast_clear_state_entry_<wbr>offset(cmd_buffer->device, image,<br>
> > level) +<br>
> > +      cmd_buffer->device->isl_dev.<wbr>ss.clear_value_size;<br>
> > +<br>
> > +   if (state != CCS_RESOLVE_STARTING) {<br>
> > +      assert(state == CCS_RESOLVE_NEEDED || state ==<br>
> > CCS_RESOLVE_NOT_NEEDED);<br>
> > +      /* The HW docs say that there is no way to guarantee the completion<br>
> > of<br>
> > +       * the following command. We use it nevertheless because it shows no<br>
> > +       * issues in testing is currently being used in the GL driver.<br>
> > +       */<br>
> > +      anv_batch_emit(&cmd_buffer-><wbr>batch, GENX(MI_STORE_DATA_IMM), sdi) {<br>
> > +         sdi.Address = (struct anv_address) { image->bo,<br>
> > resolve_flag_offset };<br>
> > +         sdi.ImmediateData = state == CCS_RESOLVE_NEEDED;<br>
> > +      }<br>
> > +   } else {<br>
> > +      /* Make the pending predicated resolve a no-op if one is not needed.<br>
> > +       * predicate = do_resolve = resolve_flag != 0;<br>
> > +       */<br>
> > +      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1    , 0);<br>
> > +      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);<br>
> > +      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0    , 0);<br>
> > +      emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4,<br>
> > +               image->bo, resolve_flag_offset);<br>
> > +      anv_batch_emit(&cmd_buffer-><wbr>batch, GENX(MI_PREDICATE), mip) {<br>
> > +         mip.LoadOperation    = LOAD_LOADINV;<br>
> > +         mip.CombineOperation = COMBINE_SET;<br>
> > +         mip.CompareOperation = COMPARE_SRCS_EQUAL;<br>
> > +      }<br>
> > +   }<br>
> ><br>
><br>
> This function does two very different things hidden behind an enum that, in<br>
> my view, only clouds what's going on.  If state != STARTING, it *sets* the<br>
> flag, otherwise it *checks* the flag and emits MI_PREDICATE.  Having them<br>
> in the same function is confusing.  Why not make this two separate<br>
> functions: set_image_needs_resolve and setup_resolve_predicate or something<br>
> like that.<br>
><br>
><br>
<br>
</div></div>Sounds good. It did feel awkward to use the RESOLVE_STARTING enum with<br>
this function. I'll go with:<br>
* load_needs_resolve_predicate()<wbr>,<br>
* set_image_needs_resolve(),<br></blockquote><div><br></div><div>Those are good names.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
* and replace the enums with true/false.<br></blockquote><div><br></div><div>Sounds good to me.<br></div><div> <br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Thanks,<br>
Nanley<br>
<div class="HOEnZb"><div class="h5"><br>
> > +}<br>
> > +<br>
> >  static void<br>
> >  init_fast_clear_state_entry(<wbr>struct anv_cmd_buffer *cmd_buffer,<br>
> >                              const struct anv_image *image,<br>
> > @@ -430,6 +483,16 @@ init_fast_clear_state_entry(<wbr>struct anv_cmd_buffer<br>
> > *cmd_buffer,<br>
> >     assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);<br>
> >     assert(level < anv_image_aux_levels(image));<br>
> ><br>
> > +   /* The resolve flag should updated to signify that<br>
> > fast-clear/compression<br>
> > +    * data needs to be removed when leaving the undefined layout. Such<br>
> > data<br>
> > +    * may need to be removed if it would cause accesses to the color<br>
> > buffer<br>
> > +    * to return incorrect data. The fast clear data in CCS_D buffers<br>
> > should<br>
> > +    * be removed because CCS_D isn't enabled all the time.<br>
> > +    */<br>
> > +   genX(set_resolve_state)(cmd_<wbr>buffer, image, level,<br>
> > +                           image->aux_usage == ISL_AUX_USAGE_CCS_E ?<br>
> > +                           CCS_RESOLVE_NOT_NEEDED : CCS_RESOLVE_NEEDED);<br>
> > +<br>
> >     /* The fast clear value dword(s) will be copied into a surface state<br>
> > object.<br>
> >      * Ensure that the restrictions of the fields in the dword(s) are<br>
> > followed.<br>
> >      *<br>
> > @@ -666,6 +729,8 @@ transition_color_buffer(struct anv_cmd_buffer<br>
> > *cmd_buffer,<br>
> >           layer_count = MIN2(layer_count, anv_image_aux_layers(image,<br>
> > level));<br>
> >        }<br>
> ><br>
> > +      genX(set_resolve_state)(cmd_<wbr>buffer, image, level,<br>
> > CCS_RESOLVE_STARTING);<br>
> > +<br>
> >        /* Create a surface state with the right clear color and perform the<br>
> >         * resolve.<br>
> >         */<br>
> > @@ -697,6 +762,8 @@ transition_color_buffer(struct anv_cmd_buffer<br>
> > *cmd_buffer,<br>
> >                        image->aux_usage == ISL_AUX_USAGE_CCS_E ?<br>
> >                        BLORP_FAST_CLEAR_OP_RESOLVE_<wbr>PARTIAL :<br>
> >                        BLORP_FAST_CLEAR_OP_RESOLVE_<wbr>FULL);<br>
> > +<br>
> > +      genX(set_resolve_state)(cmd_<wbr>buffer, image, level,<br>
> > CCS_RESOLVE_NOT_NEEDED);<br>
> >     }<br>
> ><br>
> >     cmd_buffer->state.pending_<wbr>pipe_bits |=<br>
> > @@ -2447,9 +2514,6 @@ void genX(CmdDispatch)(<br>
> >  #define GPGPU_DISPATCHDIMY 0x2504<br>
> >  #define GPGPU_DISPATCHDIMZ 0x2508<br>
> ><br>
> > -#define MI_PREDICATE_SRC0  0x2400<br>
> > -#define MI_PREDICATE_SRC1  0x2408<br>
> > -<br>
> >  void genX(CmdDispatchIndirect)(<br>
> >      VkCommandBuffer                             commandBuffer,<br>
> >      VkBuffer                                    _buffer,<br>
> > @@ -2856,6 +2920,23 @@ cmd_buffer_subpass_sync_fast_<wbr>clear_values(struct<br>
> > anv_cmd_buffer *cmd_buffer)<br>
> >           genX(copy_fast_clear_dwords)(<wbr>cmd_buffer,<br>
> > att_state->color_rt_state,<br>
> >                                        iview->image, iview->isl.base_level,<br>
> >                                        true /* copy from ss */);<br>
> > +<br>
> > +         /* Fast-clears impact whether or not a resolve will be<br>
> > necessary. */<br>
> > +         if (iview->image->aux_usage == ISL_AUX_USAGE_CCS_E &&<br>
> > +             att_state->clear_color_is_<wbr>zero) {<br>
> > +            /* This image always has the auxiliary buffer enabled. We can<br>
> > mark<br>
> > +             * the subresource as not needing a resolve because the clear<br>
> > color<br>
> > +             * will match what's in every RENDER_SURFACE_STATE object<br>
> > when it's<br>
> > +             * being used for sampling.<br>
> > +             */<br>
> > +            genX(set_resolve_state)(cmd_<wbr>buffer, iview->image,<br>
> > +                                    iview->isl.base_level,<br>
> > +                                    CCS_RESOLVE_NOT_NEEDED);<br>
> > +         } else {<br>
> > +            genX(set_resolve_state)(cmd_<wbr>buffer, iview->image,<br>
> > +                                    iview->isl.base_level,<br>
> > +                                    CCS_RESOLVE_NEEDED);<br>
> > +         }<br>
> >        } else if (rp_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {<br>
> >           /* The attachment may have been fast-cleared in a previous render<br>
> >            * pass and the value is needed now. Update the surface state(s).<br>
> > --<br>
> > 2.13.1<br>
> ><br>
> > ______________________________<wbr>_________________<br>
> > mesa-dev mailing list<br>
> > <a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
> > <a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
> ><br>
</div></div></blockquote></div><br></div></div>