<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Wed, May 31, 2017 at 8:43 AM, Jason Ekstrand <span dir="ltr"><<a href="mailto:jason@jlekstrand.net" target="_blank">jason@jlekstrand.net</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr"><div class="gmail_extra"><div class="gmail_quote"><div><div class="gmail-h5">On Wed, May 31, 2017 at 6:03 AM, Pohjolainen, Topi <span dir="ltr"><<a href="mailto:topi.pohjolainen@gmail.com" target="_blank">topi.pohjolainen@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div class="gmail-m_-206131039698425916HOEnZb"><div class="gmail-m_-206131039698425916h5">On Fri, May 26, 2017 at 04:30:31PM -0700, Jason Ekstrand wrote:<br>
> This commit reworks the resolve tracking for CCS and MCS to use the new<br>
> isl_aux_state enum.  This should provide much more accurate and easy to<br>
> reason about tracking.  In order to understand, for instance, the<br>
> intel_miptree_prepare_ccs_acce<wbr>ss function, one only has to go look at<br>
> the giant comment for the isl_aux_state enum and follow the arrows.<br>
> Unfortunately, there's no good way to split this up without making a<br>
> real mess so there are a bunch of changes in here:<br>
><br>
>  1) We now do partial resolves.  I really have no idea how this ever<br>
>     worked before.  So far as I can tell, the only time the old code<br>
>     ever did a partial resolve was when it was using CCS_D where a<br>
>     partial resolve and a full resolve are the same thing.<br>
><br>
>  2) We are now tracking 4 states instead of 3 for CCS_E.  In particular,<br>
>     we distinguish between compressed with clear and compressed without<br>
>     clear.  The end result is that you will never get two partial<br>
>     resolves in a row.<br>
><br>
>  3) The texture view rules are now more correct.  Previously, we would<br>
>     only bail if compression was not supported by the destination<br>
>     format.  However, this is not actually correct.  Not all format<br>
>     pairs are supported for texture views with CCS even if both support<br>
>     CCS individually.  Fortunately, ISL has a helper for this.<br>
><br>
>  4) We are no longer using intel_resolve_map for tracking aux state but<br>
>     are instead using a simple array of enum isl_aux_state indexed by<br>
>     level and layer.  This is because, now that we're tracking 4<br>
>     different states, it's no longer clear which should be the "default"<br>
>     and array lookups are faster than linked list searches.<br>
><br>
>  5) The new code is very assert-happy.  Incorrect transitions will now<br>
>     get caught by assertions rather than by rendering corruption.<br>
> ---<br>
>  src/mesa/drivers/dri/i965/inte<wbr>l_mipmap_tree.c | 479 +++++++++++++++++---------<br>
>  src/mesa/drivers/dri/i965/inte<wbr>l_mipmap_tree.h |  39 +--<br>
>  src/mesa/drivers/dri/i965/inte<wbr>l_resolve_map.h |  62 +---<br>
>  3 files changed, 320 insertions(+), 260 deletions(-)<br>
><br>
> diff --git a/src/mesa/drivers/dri/i965/in<wbr>tel_mipmap_tree.c b/src/mesa/drivers/dri/i965/in<wbr>tel_mipmap_tree.c<br>
> index cae8358..3e30b2a 100644<br>
> --- a/src/mesa/drivers/dri/i965/in<wbr>tel_mipmap_tree.c<br>
> +++ b/src/mesa/drivers/dri/i965/in<wbr>tel_mipmap_tree.c<br>
> @@ -326,7 +326,7 @@ intel_miptree_create_layout(st<wbr>ruct brw_context *brw,<br>
>     mt->aux_disable |= INTEL_AUX_DISABLE_CCS;<br>
>     mt->is_scanout = (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT) != 0;<br>
>     exec_list_make_empty(&mt-><wbr>hiz_map);<br>
> -   exec_list_make_empty(&mt->col<wbr>or_resolve_map);<br>
> +   mt->aux_state = NULL;<br>
>     mt->cpp = _mesa_get_format_bytes(format)<wbr>;<br>
>     mt->num_samples = num_samples;<br>
>     mt->compressed = _mesa_is_format_compressed(for<wbr>mat);<br>
> @@ -576,6 +576,46 @@ intel_lower_compressed_format(<wbr>struct brw_context *brw, mesa_format format)<br>
>     }<br>
>  }<br>
><br>
> +static enum isl_aux_state **<br>
> +create_aux_state_map(struct intel_mipmap_tree *mt,<br>
> +                     enum isl_aux_state initial)<br>
> +{<br>
> +   const uint32_t levels = mt->last_level + 1;<br>
> +<br>
> +   uint32_t total_slices = 0;<br>
> +   for (uint32_t level = 0; level < levels; level++)<br>
> +      total_slices += mt->level[level].depth;<br>
> +<br>
> +   const size_t per_level_array_size = levels * sizeof(enum isl_aux_state *);<br>
> +<br>
> +   /* We're going to allocate a single chunk of data for both the per-level<br>
> +    * reference array and the arrays of aux_state.  This makes cleanup<br>
> +    * significantly easier.<br>
> +    */<br>
> +   const size_t total_size = per_level_array_size +<br>
> +                             total_slices * sizeof(enum isl_aux_state);<br>
> +   void *data = malloc(total_size);<br>
> +   if (data == NULL)<br>
> +      return NULL;<br>
> +<br>
> +   enum isl_aux_state **per_level_arr = data;<br>
> +   enum isl_aux_state *s = data + per_level_array_size;<br>
> +   for (uint32_t level = 0; level < levels; level++) {<br>
> +      per_level_arr[level] = s;<br>
> +      for (uint32_t a = 0; a < mt->level[level].depth; a++)<br>
> +         *(s++) = initial;<br>
> +   }<br>
> +   assert((void *)s == data + total_size);<br>
> +<br>
> +   return per_level_arr;<br>
> +}<br>
> +<br>
> +static void<br>
> +free_aux_state_map(enum isl_aux_state **state)<br>
> +{<br>
> +   free(state);<br>
> +}<br>
> +<br>
>  static struct intel_mipmap_tree *<br>
>  miptree_create(struct brw_context *brw,<br>
>                 GLenum target,<br>
> @@ -935,7 +975,7 @@ intel_miptree_release(struct intel_mipmap_tree **mt)<br>
>           free((*mt)->mcs_buf);<br>
>        }<br>
>        intel_resolve_map_clear(&(*mt)<wbr>->hiz_map);<br>
> -      intel_resolve_map_clear(&(*mt)<wbr>->color_resolve_map);<br>
> +      free_aux_state_map((*mt)->aux_<wbr>state);<br>
><br>
>        intel_miptree_release(&(*mt)-><wbr>plane[0]);<br>
>        intel_miptree_release(&(*mt)-><wbr>plane[1]);<br>
> @@ -1482,27 +1522,31 @@ intel_miptree_alloc_mcs(struct brw_context *brw,<br>
>        unreachable("Unrecognized sample count in intel_miptree_alloc_mcs");<br>
>     };<br>
><br>
> +   /* Multisampled miptrees are only supported for single level. */<br>
> +   assert(mt->first_level == 0);<br>
> +   enum isl_aux_state **aux_state =<br>
> +      create_aux_state_map(mt, ISL_AUX_STATE_CLEAR);<br>
> +   if (!aux_state)<br>
> +      return false;<br>
> +<br>
>     mt->mcs_buf =<br>
>        intel_mcs_miptree_buf_create(b<wbr>rw, mt,<br>
>                                     format,<br>
>                                     mt->logical_width0,<br>
>                                     mt->logical_height0,<br>
>                                     MIPTREE_LAYOUT_ACCELERATED_UP<wbr>LOAD);<br>
> -   if (!mt->mcs_buf)<br>
> +   if (!mt->mcs_buf) {<br>
> +      free(aux_state);<br>
>        return false;<br>
> +   }<br>
><br>
> -   intel_miptree_init_mcs(brw, mt, 0xFF);<br>
> +   mt->aux_state = aux_state;<br>
><br>
> -   /* Multisampled miptrees are only supported for single level. */<br>
> -   assert(mt->first_level == 0);<br>
> -   intel_miptree_set_fast_clear_<wbr>state(brw, mt, mt->first_level, 0,<br>
> -                                      mt->logical_depth0,<br>
> -                                      INTEL_FAST_CLEAR_STATE_CLEAR);<br>
> +   intel_miptree_init_mcs(brw, mt, 0xFF);<br>
><br>
>     return true;<br>
>  }<br>
><br>
> -<br>
>  bool<br>
>  intel_miptree_alloc_non_msrt_m<wbr>cs(struct brw_context *brw,<br>
>                                   struct intel_mipmap_tree *mt,<br>
> @@ -1528,6 +1572,13 @@ intel_miptree_alloc_non_msrt_m<wbr>cs(struct brw_context *brw,<br>
>     if (!buf)<br>
>        return false;<br>
><br>
> +   enum isl_aux_state **aux_state =<br>
> +      create_aux_state_map(mt, ISL_AUX_STATE_PASS_THROUGH);<br>
> +   if (!aux_state) {<br>
> +      free(buf);<br>
> +      return false;<br>
> +   }<br>
> +<br>
>     buf->size = temp_ccs_surf.size;<br>
>     buf->pitch = temp_ccs_surf.row_pitch;<br>
>     buf->qpitch = isl_surf_get_array_pitch_sa_ro<wbr>ws(&temp_ccs_surf);<br>
> @@ -1549,10 +1600,12 @@ intel_miptree_alloc_non_msrt_m<wbr>cs(struct brw_context *brw,<br>
>                                  1, I915_TILING_Y, &buf->pitch, alloc_flags);<br>
>     if (!buf->bo) {<br>
>        free(buf);<br>
> +      free(aux_state);<br>
>        return false;<br>
>     }<br>
><br>
>     mt->mcs_buf = buf;<br>
> +   mt->aux_state = aux_state;<br>
><br>
>     /* From Gen9 onwards single-sampled (non-msrt) auxiliary buffers are<br>
>      * used for lossless compression which requires similar initialisation<br>
> @@ -1975,19 +2028,35 @@ intel_miptree_all_slices_resol<wbr>ve_depth(struct brw_context *brw,<br>
>                                            BLORP_HIZ_OP_DEPTH_RESOLVE);<br>
>  }<br>
><br>
> -enum intel_fast_clear_state<br>
> -intel_miptree_get_fast_clear_<wbr>state(const struct intel_mipmap_tree *mt,<br>
> -                                   unsigned level, unsigned layer)<br>
> +bool<br>
> +intel_miptree_has_color_unres<wbr>olved(const struct intel_mipmap_tree *mt,<br>
> +                                   unsigned start_level, unsigned num_levels,<br>
> +                                   unsigned start_layer, unsigned num_layers)<br>
>  {<br>
> -   intel_miptree_check_level_lay<wbr>er(mt, level, layer);<br>
> +   assert(_mesa_is_format_color_<wbr>format(mt->format));<br>
><br>
> -   const struct intel_resolve_map *item =<br>
> -      intel_resolve_map_const_get(&m<wbr>t->color_resolve_map, level, layer);<br>
> +   if (!mt->mcs_buf)<br>
> +      return false;<br>
><br>
> -   if (!item)<br>
> -      return INTEL_FAST_CLEAR_STATE_RESOLVE<wbr>D;<br>
> +   /* Clamp the level range to fit the miptree */<br>
> +   assert(start_level + num_levels >= start_level);<br>
<br>
</div></div>This assert looks odd, and did you mean to have it after adjusting?<br><div><div class="gmail-m_-206131039698425916h5"></div></div></blockquote><div><br></div></div></div><div>No, this is a "does the sum overflow" assertion that we do right before we do a sum.  There's a bunch of places where we pass [0, UINT32_MAX] as the range and if someone decided to pass [5, UINT32_MAX], it would overflow.<br><br></div><div>That said, I think my intention was to do the more direct "if (num_levels == INTEL_REMAINING_LEVELS)" check.  I switch it to that.<br></div></div></div></div></blockquote><div><br></div><div>Ok, I think I have something better now.  Take a look at<br><br><a href="https://cgit.freedesktop.org/~jekstrand/mesa/commit/?h=wip/i965-resolve-rework-v3&id=7ae758d95bee414d677df845d4781254f9f334f3">https://cgit.freedesktop.org/~jekstrand/mesa/commit/?h=wip/i965-resolve-rework-v3&id=7ae758d95bee414d677df845d4781254f9f334f3</a><br><br>If you like it, I'm happy to rebase a bit and scatter the changes through the series.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr"><div class="gmail_extra"><div class="gmail_quote"><div></div><div><div class="gmail-h5"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div><div class="gmail-m_-206131039698425916h5">
> +   const uint32_t last_level =<br>
> +      MIN2(mt->last_level, start_level + num_levels - 1);<br>
> +   start_level = MAX2(mt->first_level, start_level);<br>
> +   num_levels = last_level - start_level + 1;<br>
> +<br>
> +   for (uint32_t level = start_level; level <= last_level; level++) {<br>
> +      const uint32_t level_layers = MIN2(num_layers, mt->level[level].depth);<br>
> +      for (unsigned a = 0; a < level_layers; a++) {<br>
> +         enum isl_aux_state aux_state =<br>
> +            intel_miptree_get_aux_state(mt<wbr>, level, start_layer + a);<br>
> +         assert(aux_state != ISL_AUX_STATE_AUX_INVALID);<br>
> +         if (aux_state != ISL_AUX_STATE_PASS_THROUGH)<br>
> +            return true;<br>
> +      }<br>
> +   }<br>
><br>
> -   return item->fast_clear_state;<br>
> +   return false;<br>
>  }<br>
><br>
>  static void<br>
> @@ -2014,135 +2083,188 @@ intel_miptree_check_color_reso<wbr>lve(const struct brw_context *brw,<br>
>     (void)layer;<br>
>  }<br>
><br>
> -void<br>
> -intel_miptree_set_fast_clear_<wbr>state(const struct brw_context *brw,<br>
> -                                   struct intel_mipmap_tree *mt,<br>
> -                                   unsigned level,<br>
> -                                   unsigned first_layer,<br>
> -                                   unsigned num_layers,<br>
> -                                   enum intel_fast_clear_state new_state)<br>
> -{<br>
> -   /* Setting the state to resolved means removing the item from the list<br>
> -    * altogether.<br>
> -    */<br>
> -   assert(new_state != INTEL_FAST_CLEAR_STATE_RESOLVE<wbr>D);<br>
> +static enum blorp_fast_clear_op<br>
> +get_ccs_d_resolve_op(enum isl_aux_state aux_state,<br>
> +                     bool ccs_supported, bool fast_clear_supported)<br>
> +{<br>
> +   assert(ccs_supported == fast_clear_supported);<br>
><br>
> -   intel_miptree_check_color_res<wbr>olve(brw, mt, level, first_layer);<br>
> +   switch (aux_state) {<br>
> +   case ISL_AUX_STATE_CLEAR:<br>
> +   case ISL_AUX_STATE_COMPRESSED_CLEAR<wbr>:<br>
> +      if (!ccs_supported)<br>
> +         return BLORP_FAST_CLEAR_OP_RESOLVE_FU<wbr>LL;<br>
> +      else<br>
> +         return BLORP_FAST_CLEAR_OP_NONE;<br>
><br>
> -   assert(first_layer + num_layers <= mt->physical_depth0);<br>
> +   case ISL_AUX_STATE_PASS_THROUGH:<br>
> +      return BLORP_FAST_CLEAR_OP_NONE;<br>
><br>
> -   for (unsigned i = 0; i < num_layers; i++)<br>
> -      intel_resolve_map_set(&mt->col<wbr>or_resolve_map, level,<br>
> -                            first_layer + i, new_state);<br>
> -}<br>
> +   case ISL_AUX_STATE_RESOLVED:<br>
> +   case ISL_AUX_STATE_AUX_INVALID:<br>
> +   case ISL_AUX_STATE_COMPRESSED_NO_CL<wbr>EAR:<br>
> +      break;<br>
> +   }<br>
><br>
> -bool<br>
> -intel_miptree_has_color_unres<wbr>olved(const struct intel_mipmap_tree *mt,<br>
> -                                   unsigned start_level, unsigned num_levels,<br>
> -                                   unsigned start_layer, unsigned num_layers)<br>
> -{<br>
> -   return intel_resolve_map_find_any(&mt<wbr>->color_resolve_map,<br>
> -                                     start_level, num_levels,<br>
> -                                     start_layer, num_layers) != NULL;<br>
> +   unreachable("Invalid aux state for CCS_D");<br>
>  }<br>
><br>
> -void<br>
> -intel_miptree_used_for_render<wbr>ing(const struct brw_context *brw,<br>
> -                                 struct intel_mipmap_tree *mt, unsigned level,<br>
> -                                 unsigned start_layer, unsigned num_layers)<br>
> +static enum blorp_fast_clear_op<br>
> +get_ccs_e_resolve_op(enum isl_aux_state aux_state,<br>
> +                     bool ccs_supported, bool fast_clear_supported)<br>
>  {<br>
> -   const bool is_lossless_compressed =<br>
> -      intel_miptree_is_lossless_comp<wbr>ressed(brw, mt);<br>
> +   switch (aux_state) {<br>
> +   case ISL_AUX_STATE_CLEAR:<br>
> +   case ISL_AUX_STATE_COMPRESSED_CLEAR<wbr>:<br>
> +      if (!ccs_supported)<br>
> +         return BLORP_FAST_CLEAR_OP_RESOLVE_FU<wbr>LL;<br>
> +      else if (!fast_clear_supported)<br>
> +         return BLORP_FAST_CLEAR_OP_RESOLVE_PA<wbr>RTIAL;<br>
> +      else<br>
> +         return BLORP_FAST_CLEAR_OP_NONE;<br>
><br>
> -   for (unsigned i = 0; i < num_layers; ++i) {<br>
> -      const enum intel_fast_clear_state fast_clear_state =<br>
> -         intel_miptree_get_fast_clear_<wbr>state(mt, level, start_layer + i);<br>
> +   case ISL_AUX_STATE_COMPRESSED_NO_CL<wbr>EAR:<br>
> +      if (!ccs_supported)<br>
> +         return BLORP_FAST_CLEAR_OP_RESOLVE_FU<wbr>LL;<br>
> +      else<br>
> +         return BLORP_FAST_CLEAR_OP_NONE;<br>
><br>
> -      /* If the buffer was previously in fast clear state, change it to<br>
> -       * unresolved state, since it won't be guaranteed to be clear after<br>
> -       * rendering occurs.<br>
> -       */<br>
> -      if (is_lossless_compressed ||<br>
> -          fast_clear_state == INTEL_FAST_CLEAR_STATE_CLEAR) {<br>
> -         intel_miptree_set_fast_clear_<wbr>state(<br>
> -            brw, mt, level, start_layer + i, 1,<br>
> -            INTEL_FAST_CLEAR_STATE_UNRESOL<wbr>VED);<br>
> -      }<br>
> +   case ISL_AUX_STATE_PASS_THROUGH:<br>
> +      return BLORP_FAST_CLEAR_OP_NONE;<br>
> +<br>
> +   case ISL_AUX_STATE_RESOLVED:<br>
> +   case ISL_AUX_STATE_AUX_INVALID:<br>
> +      break;<br>
>     }<br>
> +<br>
> +   unreachable("Invalid aux state for CCS_E");<br>
>  }<br>
><br>
> -static bool<br>
> -intel_miptree_needs_color_res<wbr>olve(const struct brw_context *brw,<br>
> -                                  const struct intel_mipmap_tree *mt,<br>
> -                                  int flags)<br>
> +static void<br>
> +intel_miptree_prepare_ccs_acc<wbr>ess(struct brw_context *brw,<br>
> +                                 struct intel_mipmap_tree *mt,<br>
> +                                 uint32_t level, uint32_t layer,<br>
> +                                 bool aux_supported,<br>
> +                                 bool fast_clear_supported)<br>
>  {<br>
> -   if (mt->aux_disable & INTEL_AUX_DISABLE_CCS)<br>
> -      return false;<br>
> +   enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt<wbr>, level, layer);<br>
><br>
> -   const bool is_lossless_compressed =<br>
> -      intel_miptree_is_lossless_comp<wbr>ressed(brw, mt);<br>
> +   enum blorp_fast_clear_op resolve_op;<br>
> +   if (intel_miptree_is_lossless_com<wbr>pressed(brw, mt)) {<br>
> +      resolve_op = get_ccs_e_resolve_op(aux_state<wbr>, aux_supported,<br>
> +                                        fast_clear_supported);<br>
> +   } else {<br>
> +      resolve_op = get_ccs_d_resolve_op(aux_state<wbr>, aux_supported,<br>
> +                                        fast_clear_supported);<br>
> +   }<br>
><br>
> -   /* From gen9 onwards there is new compression scheme for single sampled<br>
> -    * surfaces called "lossless compressed". These don't need to be always<br>
> -    * resolved.<br>
> -    */<br>
> -   if ((flags & INTEL_MIPTREE_IGNORE_CCS_E) && is_lossless_compressed)<br>
> -      return false;<br>
> +   if (resolve_op != BLORP_FAST_CLEAR_OP_NONE) {<br>
> +      intel_miptree_check_color_reso<wbr>lve(brw, mt, level, layer);<br>
> +      brw_blorp_resolve_color(brw, mt, level, layer, resolve_op);<br>
><br>
> -   /* Fast color clear resolves only make sense for non-MSAA buffers. */<br>
> -   if (mt->msaa_layout != INTEL_MSAA_LAYOUT_NONE && !is_lossless_compressed)<br>
> -      return false;<br>
> +      switch (resolve_op) {<br>
> +      case BLORP_FAST_CLEAR_OP_RESOLVE_FU<wbr>LL:<br>
> +         /* The CCS full resolve operation destroys the CCS and sets it to the<br>
> +          * pass-through state.  (You can also think of this as being both a<br>
> +          * resolve and an ambiguate in one operation.)<br>
> +          */<br>
> +         intel_miptree_set_aux_state(b<wbr>rw, mt, level, layer, 1,<br>
> +                                     ISL_AUX_STATE_PASS_THROUGH);<br>
> +         break;<br>
><br>
> -   return true;<br>
> +      case BLORP_FAST_CLEAR_OP_RESOLVE_PA<wbr>RTIAL:<br>
> +         intel_miptree_set_aux_state(b<wbr>rw, mt, level, layer, 1,<br>
> +                                     ISL_AUX_STATE_COMPRESSED_NO_C<wbr>LEAR);<br>
> +         break;<br>
> +<br>
> +      default:<br>
> +         unreachable("Invalid resolve op");<br>
> +      }<br>
> +   }<br>
>  }<br>
><br>
> -static bool<br>
> -intel_miptree_resolve_color(s<wbr>truct brw_context *brw,<br>
> -                            struct intel_mipmap_tree *mt,<br>
> -                            uint32_t start_level, uint32_t num_levels,<br>
> -                            uint32_t start_layer, uint32_t num_layers,<br>
> -                            int flags)<br>
> +static void<br>
> +intel_miptree_finish_ccs_writ<wbr>e(struct brw_context *brw,<br>
> +                               struct intel_mipmap_tree *mt,<br>
> +                               uint32_t level, uint32_t layer,<br>
> +                               bool written_with_ccs)<br>
>  {<br>
> -   intel_miptree_check_color_res<wbr>olve(brw, mt, start_level, start_layer);<br>
> +   enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt<wbr>, level, layer);<br>
><br>
> -   if (!intel_miptree_needs_color_re<wbr>solve(brw, mt, flags))<br>
> -      return false;<br>
> +   if (intel_miptree_is_lossless_com<wbr>pressed(brw, mt)) {<br>
> +      switch (aux_state) {<br>
> +      case ISL_AUX_STATE_CLEAR:<br>
> +         assert(written_with_ccs);<br>
> +         intel_miptree_set_aux_state(b<wbr>rw, mt, level, layer, 1,<br>
> +                                     ISL_AUX_STATE_COMPRESSED_CLEA<wbr>R);<br>
> +         break;<br>
><br>
> -   enum blorp_fast_clear_op resolve_op;<br>
> -   if (brw->gen >= 9) {<br>
> -      if (intel_miptree_is_lossless_com<wbr>pressed(brw, mt)) {<br>
> -         resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FU<wbr>LL;<br>
> -      } else {<br>
> -         resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PA<wbr>RTIAL;<br>
> +      case ISL_AUX_STATE_COMPRESSED_CLEAR<wbr>:<br>
> +      case ISL_AUX_STATE_COMPRESSED_NO_CL<wbr>EAR:<br>
> +         assert(written_with_ccs);<br>
> +         break; /* Nothing to do */<br>
> +<br>
> +      case ISL_AUX_STATE_PASS_THROUGH:<br>
> +         if (written_with_ccs) {<br>
> +            intel_miptree_set_aux_state(br<wbr>w, mt, level, layer, 1,<br>
> +                                        ISL_AUX_STATE_COMPRESSED_NO_CL<wbr>EAR);<br>
> +         } else {<br>
> +            /* Nothing to do */<br>
> +         }<br>
> +         break;<br>
> +<br>
> +      case ISL_AUX_STATE_RESOLVED:<br>
> +      case ISL_AUX_STATE_AUX_INVALID:<br>
> +         unreachable("Invalid aux state for CCS_E");<br>
>        }<br>
>     } else {<br>
> -      /* Broadwell and earlier do not have a partial resolve */<br>
> -      assert(!intel_miptree_is_lossl<wbr>ess_compressed(brw, mt));<br>
> -      resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FU<wbr>LL;<br>
> -   }<br>
> +      /* CCS_D is a bit simpler */<br>
> +      switch (aux_state) {<br>
> +      case ISL_AUX_STATE_CLEAR:<br>
> +         assert(written_with_ccs);<br>
> +         intel_miptree_set_aux_state(b<wbr>rw, mt, level, layer, 1,<br>
> +                                     ISL_AUX_STATE_COMPRESSED_CLEA<wbr>R);<br>
> +         break;<br>
><br>
> -   bool resolved = false;<br>
> -   foreach_list_typed_safe(<wbr>struct intel_resolve_map, map, link,<br>
> -                           &mt->color_resolve_map) {<br>
> -      if (map->level < start_level ||<br>
> -          map->level >= (start_level + num_levels) ||<br>
> -          map->layer < start_layer ||<br>
> -          map->layer >= (start_layer + num_layers))<br>
> -         continue;<br>
> +      case ISL_AUX_STATE_COMPRESSED_CLEAR<wbr>:<br>
> +         assert(written_with_ccs);<br>
> +         break; /* Nothing to do */<br>
> +<br>
> +      case ISL_AUX_STATE_PASS_THROUGH:<br>
> +         /* Nothing to do */<br>
> +         break;<br>
><br>
> -      /* Arrayed fast clear is only supported for gen8+. */<br>
> -      assert(brw->gen >= 8 || map->level == 0);<br>
> +      case ISL_AUX_STATE_COMPRESSED_NO_CL<wbr>EAR:<br>
> +      case ISL_AUX_STATE_RESOLVED:<br>
> +      case ISL_AUX_STATE_AUX_INVALID:<br>
> +         unreachable("Invalid aux state for CCS_D");<br>
> +      }<br>
> +   }<br>
> +}<br>
><br>
> -      intel_miptree_check_level_laye<wbr>r(mt, map->level, map->layer);<br>
> +static void<br>
> +intel_miptree_finish_mcs_writ<wbr>e(struct brw_context *brw,<br>
> +                               struct intel_mipmap_tree *mt,<br>
> +                               uint32_t level, uint32_t layer,<br>
> +                               bool written_with_aux)<br>
> +{<br>
> +   switch (intel_miptree_get_aux_state(m<wbr>t, level, layer)) {<br>
> +   case ISL_AUX_STATE_CLEAR:<br>
> +      assert(written_with_aux);<br>
> +      intel_miptree_set_aux_state(br<wbr>w, mt, level, layer, 1,<br>
> +                                  ISL_AUX_STATE_COMPRESSED_CLEAR<wbr>);<br>
> +      break;<br>
><br>
> -      assert(map->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVE<wbr>D);<br>
> +   case ISL_AUX_STATE_COMPRESSED_CLEAR<wbr>:<br>
> +      assert(written_with_aux);<br>
> +      break; /* Nothing to do */<br>
><br>
> -      brw_blorp_resolve_color(brw, mt, map->level, map->layer, resolve_op);<br>
> -      intel_resolve_map_remove(map);<br>
> -      resolved = true;<br>
> +   case ISL_AUX_STATE_COMPRESSED_NO_CL<wbr>EAR:<br>
> +   case ISL_AUX_STATE_RESOLVED:<br>
> +   case ISL_AUX_STATE_PASS_THROUGH:<br>
> +   case ISL_AUX_STATE_AUX_INVALID:<br>
> +      unreachable("Invalid aux state for MCS");<br>
>     }<br>
> -<br>
> -   return resolved;<br>
>  }<br>
><br>
>  void<br>
> @@ -2152,17 +2274,29 @@ intel_miptree_prepare_access(s<wbr>truct brw_context *brw,<br>
>                               uint32_t start_layer, uint32_t num_layers,<br>
>                               bool aux_supported, bool fast_clear_supported)<br>
>  {<br>
> +   /* Clamp the level range to fit the miptree */<br>
> +   assert(start_level + num_levels >= start_level);<br>
<br>
</div></div>This also looks odd.<br>
<div><div class="gmail-m_-206131039698425916h5"><br>
> +   const uint32_t last_level =<br>
> +      MIN2(mt->last_level, start_level + num_levels - 1);<br>
> +   start_level = MAX2(mt->first_level, start_level);<br>
> +   num_levels = last_level - start_level + 1;<br>
> +<br>
>     if (_mesa_is_format_color_format(<wbr>mt->format)) {<br>
>        if (!mt->mcs_buf)<br>
>           return;<br>
><br>
>        if (mt->num_samples > 1) {<br>
>           /* Nothing to do for MSAA */<br>
> +         assert(aux_supported && fast_clear_supported);<br>
>        } else {<br>
> -         /* TODO: This is fairly terrible.  We can do better. */<br>
> -         if (!aux_supported || !fast_clear_supported) {<br>
> -            intel_miptree_resolve_color(br<wbr>w, mt, start_level, num_levels,<br>
> -                                        start_layer, num_layers, 0);<br>
> +         for (uint32_t level = start_level; level <= last_level; level++) {<br>
> +            const uint32_t level_layers =<br>
> +               MIN2(num_layers, mt->level[level].depth);<br>
> +            for (uint32_t a = 0; a < level_layers; a++) {<br>
> +               intel_miptree_prepare_ccs_acc<wbr>ess(brw, mt, level,<br>
> +                                                start_layer + a, aux_supported,<br>
> +                                                fast_clear_supported);<br>
> +            }<br>
>           }<br>
>        }<br>
>     } else if (mt->format == MESA_FORMAT_S_UINT8) {<br>
> @@ -2197,12 +2331,18 @@ intel_miptree_finish_write(str<wbr>uct brw_context *brw,<br>
>     assert(num_layers <= mt->level[level].depth - start_layer);<br>
><br>
>     if (_mesa_is_format_color_format(<wbr>mt->format)) {<br>
> +      if (!mt->mcs_buf)<br>
> +         return;<br>
> +<br>
>        if (mt->num_samples > 1) {<br>
> -         /* Nothing to do for MSAA */<br>
> +         for (uint32_t a = 0; a < num_layers; a++) {<br>
> +            intel_miptree_finish_mcs_write<wbr>(brw, mt, level, start_layer + a,<br>
> +                                           written_with_aux);<br>
> +         }<br>
>        } else {<br>
> -         if (written_with_aux) {<br>
> -            intel_miptree_used_for_renderi<wbr>ng(brw, mt, level,<br>
> -                                             start_layer, num_layers);<br>
> +         for (uint32_t a = 0; a < num_layers; a++) {<br>
> +            intel_miptree_finish_ccs_write<wbr>(brw, mt, level, start_layer + a,<br>
> +                                           written_with_aux);<br>
>           }<br>
>        }<br>
>     } else if (mt->format == MESA_FORMAT_S_UINT8) {<br>
> @@ -2231,22 +2371,12 @@ enum isl_aux_state<br>
>  intel_miptree_get_aux_state(co<wbr>nst struct intel_mipmap_tree *mt,<br>
>                              uint32_t level, uint32_t layer)<br>
>  {<br>
> +   intel_miptree_check_level_lay<wbr>er(mt, level, layer);<br>
> +<br>
>     if (_mesa_is_format_color_format(<wbr>mt->format)) {<br>
>        assert(mt->mcs_buf != NULL);<br>
> -      if (mt->num_samples > 1) {<br>
> -         return ISL_AUX_STATE_COMPRESSED_CLEAR<wbr>;<br>
> -      } else {<br>
> -         switch (intel_miptree_get_fast_clear_<wbr>state(mt, level, layer)) {<br>
> -         case INTEL_FAST_CLEAR_STATE_RESOLVE<wbr>D:<br>
> -            return ISL_AUX_STATE_RESOLVED;<br>
> -         case INTEL_FAST_CLEAR_STATE_UNRESOL<wbr>VED:<br>
> -            return ISL_AUX_STATE_COMPRESSED_CLEAR<wbr>;<br>
> -         case INTEL_FAST_CLEAR_STATE_CLEAR:<br>
> -            return ISL_AUX_STATE_CLEAR;<br>
> -         default:<br>
> -            unreachable("Invalid fast clear state");<br>
> -         }<br>
> -      }<br>
> +      assert(mt->num_samples <= 1 || mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS);<br>
> +      return mt->aux_state[level][layer];<br>
>     } else if (mt->format == MESA_FORMAT_S_UINT8) {<br>
>        unreachable("Cannot get aux state for stencil");<br>
>     } else {<br>
> @@ -2277,19 +2407,20 @@ intel_miptree_set_aux_state(st<wbr>ruct brw_context *brw,<br>
>        num_layers = mt->level[level].depth - start_layer;<br>
>     assert(num_layers <= mt->level[level].depth - start_layer);<br>
><br>
> -   /* Right now, this only applies to clears. */<br>
> -   assert(aux_state == ISL_AUX_STATE_CLEAR);<br>
> -<br>
>     if (_mesa_is_format_color_format(<wbr>mt->format)) {<br>
> -      if (mt->num_samples > 1)<br>
> -         assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS);<br>
> +      assert(mt->mcs_buf != NULL);<br>
> +      assert(mt->num_samples <= 1 || mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS);<br>
><br>
> -      assert(level == 0 && start_layer == 0 && num_layers == 1);<br>
> -      intel_miptree_set_fast_clear_s<wbr>tate(brw, mt, 0, 0, 1,<br>
> -                                         INTEL_FAST_CLEAR_STATE_CLEAR)<wbr>;<br>
> +      for (unsigned a = 0; a < num_layers; a++)<br>
> +         mt->aux_state[level][start_la<wbr>yer + a] = aux_state;<br>
>     } else if (mt->format == MESA_FORMAT_S_UINT8) {<br>
> -      assert(!"Cannot set aux state for stencil");<br>
> +      unreachable("Cannot get aux state for stencil");<br>
>     } else {<br>
> +      assert(mt->hiz_buf != NULL);<br>
> +<br>
> +      /* Right now, this only applies to clears. */<br>
> +      assert(aux_state == ISL_AUX_STATE_CLEAR);<br>
> +<br>
>        for (unsigned a = 0; a < num_layers; a++) {<br>
>           intel_miptree_check_level_lay<wbr>er(mt, level, start_layer);<br>
>           intel_resolve_map_set(&mt->hi<wbr>z_map, level, start_layer + a,<br>
> @@ -2310,22 +2441,23 @@ intel_miptree_set_aux_state(st<wbr>ruct brw_context *brw,<br>
>   * set).<br>
>   */<br>
>  static bool<br>
> -intel_texture_view_requires_r<wbr>esolve(struct brw_context *brw,<br>
> -                                    struct intel_mipmap_tree *mt,<br>
> -                                    mesa_format format)<br>
> +can_texture_with_ccs(struct brw_context *brw,<br>
> +                     struct intel_mipmap_tree *mt,<br>
> +                     mesa_format view_format)<br>
>  {<br>
> -   if (brw->gen < 9 ||<br>
> -       !intel_miptree_is_lossless_co<wbr>mpressed(brw, mt))<br>
> -     return false;<br>
> +   if (!intel_miptree_is_lossless_co<wbr>mpressed(brw, mt))<br>
> +      return false;<br>
><br>
> -   const enum isl_format isl_format = brw_isl_format_for_mesa_format<wbr>(format);<br>
> +   enum isl_format isl_mt_format = brw_isl_format_for_mesa_format<wbr>(mt->format);<br>
> +   enum isl_format isl_view_format = brw_isl_format_for_mesa_format<wbr>(view_format);<br>
><br>
> -   if (isl_format_supports_ccs_e(&br<wbr>w->screen->devinfo, isl_format))<br>
> +   if (!isl_formats_are_ccs_e_compat<wbr>ible(&brw->screen->devinfo,<br>
> +                                         isl_mt_format, isl_view_format)) {<br>
> +      perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",<br>
> +                 _mesa_get_format_name(view_fo<wbr>rmat),<br>
> +                 _mesa_get_format_name(mt->for<wbr>mat));<br>
>        return false;<br>
> -<br>
> -   perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",<br>
> -              _mesa_get_format_name(format),<br>
> -              _mesa_get_format_name(mt->form<wbr>at));<br>
> +   }<br>
><br>
>     return true;<br>
>  }<br>
> @@ -2338,19 +2470,29 @@ intel_miptree_prepare_texture_<wbr>slices(struct brw_context *brw,<br>
>                                       uint32_t start_layer, uint32_t num_layers,<br>
>                                       bool *aux_supported_out)<br>
>  {<br>
> -   bool aux_supported;<br>
> +   bool aux_supported, clear_supported;<br>
>     if (_mesa_is_format_color_format(<wbr>mt->format)) {<br>
> -      aux_supported = intel_miptree_is_lossless_comp<wbr>ressed(brw, mt) &&<br>
> -                      !intel_texture_view_requires_r<wbr>esolve(brw, mt, view_format);<br>
> +      if (mt->num_samples > 1) {<br>
> +         aux_supported = clear_supported = true;<br>
> +      } else {<br>
> +         aux_supported = can_texture_with_ccs(brw, mt, view_format);<br>
> +<br>
> +         /* Clear color is specified as ints or floats and the conversion is<br>
> +          * done by the sampler.  If we have a texture view, we would have to<br>
> +          * perform the clear color conversion manually.  Just disable clear<br>
> +          * color.<br>
> +          */<br>
> +         clear_supported = aux_supported && (mt->format == view_format);<br>
> +      }<br>
>     } else if (mt->format == MESA_FORMAT_S_UINT8) {<br>
> -      aux_supported = false;<br>
> +      aux_supported = clear_supported = false;<br>
>     } else {<br>
> -      aux_supported = intel_miptree_sample_with_hiz(<wbr>brw, mt);<br>
> +      aux_supported = clear_supported = intel_miptree_sample_with_hiz(<wbr>brw, mt);<br>
>     }<br>
><br>
>     intel_miptree_prepare_access(<wbr>brw, mt, start_level, num_levels,<br>
>                                  start_layer, num_layers,<br>
> -                                aux_supported, aux_supported);<br>
> +                                aux_supported, clear_supported);<br>
>     if (aux_supported_out)<br>
>        *aux_supported_out = aux_supported;<br>
>  }<br>
> @@ -2488,7 +2630,8 @@ intel_miptree_make_shareable(s<wbr>truct brw_context *brw,<br>
>         * execute any will likely crash due to the missing aux buffer. So let's<br>
>         * delete all pending ops.<br>
>         */<br>
> -      exec_list_make_empty(&mt->colo<wbr>r_resolve_map);<br>
> +      free(mt->aux_state);<br>
> +      mt->aux_state = NULL;<br>
>     }<br>
><br>
>     if (mt->hiz_buf) {<br>
> diff --git a/src/mesa/drivers/dri/i965/in<wbr>tel_mipmap_tree.h b/src/mesa/drivers/dri/i965/in<wbr>tel_mipmap_tree.h<br>
> index 3f3a78d..528c32c 100644<br>
> --- a/src/mesa/drivers/dri/i965/in<wbr>tel_mipmap_tree.h<br>
> +++ b/src/mesa/drivers/dri/i965/in<wbr>tel_mipmap_tree.h<br>
> @@ -557,7 +557,14 @@ struct intel_mipmap_tree<br>
>      * indicates state other than RESOLVED.<br>
>      */<br>
>     struct exec_list hiz_map; /* List of intel_resolve_map. */<br>
> -   struct exec_list color_resolve_map; /* List of intel_resolve_map. */<br>
> +<br>
> +   /**<br>
> +    * \brief Maps miptree slices to their current aux state<br>
> +    *<br>
> +    * This two-dimensional array is indexed as [level][layer] and stores an<br>
> +    * aux state for each slice.<br>
> +    */<br>
> +   enum isl_aux_state **aux_state;<br>
><br>
>     /**<br>
>      * \brief Stencil miptree for depthstencil textures.<br>
> @@ -827,41 +834,11 @@ intel_miptree_all_slices_resol<wbr>ve_depth(struct brw_context *brw,<br>
><br>
>  /**\}*/<br>
><br>
> -enum intel_fast_clear_state<br>
> -intel_miptree_get_fast_clear_<wbr>state(const struct intel_mipmap_tree *mt,<br>
> -                                   unsigned level, unsigned layer);<br>
> -<br>
> -void<br>
> -intel_miptree_set_fast_clear_<wbr>state(const struct brw_context *brw,<br>
> -                                   struct intel_mipmap_tree *mt,<br>
> -                                   unsigned level,<br>
> -                                   unsigned first_layer,<br>
> -                                   unsigned num_layers,<br>
> -                                   enum intel_fast_clear_state new_state);<br>
> -<br>
>  bool<br>
>  intel_miptree_has_color_unreso<wbr>lved(const struct intel_mipmap_tree *mt,<br>
>                                     unsigned start_level, unsigned num_levels,<br>
>                                     unsigned start_layer, unsigned num_layers);<br>
><br>
> -/**<br>
> - * Update the fast clear state for a miptree to indicate that it has been used<br>
> - * for rendering.<br>
> - */<br>
> -void<br>
> -intel_miptree_used_for_render<wbr>ing(const struct brw_context *brw,<br>
> -                                 struct intel_mipmap_tree *mt, unsigned level,<br>
> -                                 unsigned start_layer, unsigned num_layers);<br>
> -<br>
> -/**<br>
> - * Flag values telling color resolve pass which special types of buffers<br>
> - * can be ignored.<br>
> - *<br>
> - * INTEL_MIPTREE_IGNORE_CCS_E:   Lossless compressed (single-sample<br>
> - *                               compression scheme since gen9)<br>
> - */<br>
> -#define INTEL_MIPTREE_IGNORE_CCS_E (1 << 0)<br>
> -<br>
><br>
>  #define INTEL_REMAINING_LAYERS UINT32_MAX<br>
>  #define INTEL_REMAINING_LEVELS UINT32_MAX<br>
> diff --git a/src/mesa/drivers/dri/i965/in<wbr>tel_resolve_map.h b/src/mesa/drivers/dri/i965/in<wbr>tel_resolve_map.h<br>
> index 17d3983..771d855 100644<br>
> --- a/src/mesa/drivers/dri/i965/in<wbr>tel_resolve_map.h<br>
> +++ b/src/mesa/drivers/dri/i965/in<wbr>tel_resolve_map.h<br>
> @@ -33,63 +33,6 @@ extern "C" {<br>
>  #endif<br>
><br>
>  /**<br>
> - * Enum for keeping track of the fast clear state of a buffer associated with<br>
> - * a miptree.<br>
> - *<br>
> - * Fast clear works by deferring the memory writes that would be used to clear<br>
> - * the buffer, so that instead of performing them at the time of the clear<br>
> - * operation, the hardware automatically performs them at the time that the<br>
> - * buffer is later accessed for rendering.  The MCS buffer keeps track of<br>
> - * which regions of the buffer still have pending clear writes.<br>
> - *<br>
> - * This enum keeps track of the driver's knowledge of pending fast clears in<br>
> - * the MCS buffer.<br>
> - *<br>
> - * MCS buffers only exist on Gen7+.<br>
> - */<br>
> -enum intel_fast_clear_state<br>
> -{<br>
> -   /**<br>
> -    * No deferred clears are pending for this miptree, and the contents of the<br>
> -    * color buffer are entirely correct.  An MCS buffer may or may not exist<br>
> -    * for this miptree.  If it does exist, it is entirely in the "no deferred<br>
> -    * clears pending" state.  If it does not exist, it will be created the<br>
> -    * first time a fast color clear is executed.<br>
> -    *<br>
> -    * In this state, the color buffer can be used for purposes other than<br>
> -    * rendering without needing a render target resolve.<br>
> -    *<br>
> -    * Since there is no such thing as a "fast color clear resolve" for MSAA<br>
> -    * buffers, an MSAA buffer will never be in this state.<br>
> -    */<br>
> -   INTEL_FAST_CLEAR_STATE_RESOLV<wbr>ED,<br>
> -<br>
> -   /**<br>
> -    * An MCS buffer exists for this miptree, and deferred clears are pending<br>
> -    * for some regions of the color buffer, as indicated by the MCS buffer.<br>
> -    * The contents of the color buffer are only correct for the regions where<br>
> -    * the MCS buffer doesn't indicate a deferred clear.<br>
> -    *<br>
> -    * If a single-sample buffer is in this state, a render target resolve must<br>
> -    * be performed before it can be used for purposes other than rendering.<br>
> -    */<br>
> -   INTEL_FAST_CLEAR_STATE_UNRESO<wbr>LVED,<br>
> -<br>
> -   /**<br>
> -    * An MCS buffer exists for this miptree, and deferred clears are pending<br>
> -    * for the entire color buffer, and the contents of the MCS buffer reflect<br>
> -    * this.  The contents of the color buffer are undefined.<br>
> -    *<br>
> -    * If a single-sample buffer is in this state, a render target resolve must<br>
> -    * be performed before it can be used for purposes other than rendering.<br>
> -    *<br>
> -    * If the client attempts to clear a buffer which is already in this state,<br>
> -    * the clear can be safely skipped, since the buffer is already clear.<br>
> -    */<br>
> -   INTEL_FAST_CLEAR_STATE_CLEAR,<br>
> -};<br>
> -<br>
> -/**<br>
>   * \brief Map of miptree slices to needed resolves.<br>
>   *<br>
>   * The map is implemented as a linear doubly-linked list.<br>
> @@ -121,10 +64,7 @@ struct intel_resolve_map {<br>
>     uint32_t level;<br>
>     uint32_t layer;<br>
><br>
> -   union {<br>
> -      enum blorp_hiz_op need;<br>
> -      enum intel_fast_clear_state fast_clear_state;<br>
> -   };<br>
> +   enum blorp_hiz_op need;<br>
>  };<br>
><br>
>  void<br>
> --<br>
> 2.5.0.400.gff86faf<br>
><br>
</div></div>> ______________________________<wbr>_________________<br>
> mesa-dev mailing list<br>
> <a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
> <a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</blockquote></div></div></div><br></div></div>
</blockquote></div><br></div></div>