[Mesa-dev] [PATCH 6/7] anv: Add support for fast clears on gen9

Jordan Justen jordan.l.justen at intel.com
Mon Nov 21 17:52:33 UTC 2016


On 2016-11-19 11:40:08, Jason Ekstrand wrote:
> ---
>  src/intel/vulkan/anv_blorp.c       | 102 ++++++++++++++++++++++++++++++++-----
>  src/intel/vulkan/anv_private.h     |   3 ++
>  src/intel/vulkan/genX_cmd_buffer.c | 100 ++++++++++++++++++++++++++++++------
>  3 files changed, 176 insertions(+), 29 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 24b98ab..cab1906 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -1178,16 +1178,35 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer)
>        struct blorp_surf surf;
>        get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
>                                     att_state->aux_usage, &surf);
> +      surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
>  
>        const VkRect2D render_area = cmd_buffer->state.render_area;
>  
> -      blorp_clear(&batch, &surf, iview->isl.format, iview->isl.swizzle,
> -                  iview->isl.base_level,
> -                  iview->isl.base_array_layer, fb->layers,
> -                  render_area.offset.x, render_area.offset.y,
> -                  render_area.offset.x + render_area.extent.width,
> -                  render_area.offset.y + render_area.extent.height,
> -                  vk_to_isl_color(att_state->clear_value.color), NULL);
> +      if (att_state->fast_clear) {
> +         blorp_fast_clear(&batch, &surf, iview->isl.format,
> +                          iview->isl.base_level,
> +                          iview->isl.base_array_layer, fb->layers,
> +                          render_area.offset.x, render_area.offset.y,
> +                          render_area.offset.x + render_area.extent.width,
> +                          render_area.offset.y + render_area.extent.height);
> +
> +         /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
> +          *
> +          *    "After Render target fast clear, pipe-control with color cache
> +          *    write-flush must be issued before sending any DRAW commands on
> +          *    that render target."
> +          */
> +         cmd_buffer->state.pending_pipe_bits |=
> +            ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> +      } else {
> +         blorp_clear(&batch, &surf, iview->isl.format, iview->isl.swizzle,
> +                     iview->isl.base_level,
> +                     iview->isl.base_array_layer, fb->layers,
> +                     render_area.offset.x, render_area.offset.y,
> +                     render_area.offset.x + render_area.extent.width,
> +                     render_area.offset.y + render_area.extent.height,
> +                     surf.clear_color, NULL);
> +      }
>  
>        att_state->pending_clear_aspects = 0;
>     }
> @@ -1298,10 +1317,12 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
>     struct anv_attachment_state *att_state =
>        &cmd_buffer->state.attachments[att];
>  
> -   assert(att_state->aux_usage != ISL_AUX_USAGE_CCS_D);
> -   if (att_state->aux_usage != ISL_AUX_USAGE_CCS_E)
> +   if (att_state->aux_usage == ISL_AUX_USAGE_NONE)
>        return; /* Nothing to resolve */
>  
> +   assert(att_state->aux_usage == ISL_AUX_USAGE_CCS_E ||
> +          att_state->aux_usage == ISL_AUX_USAGE_CCS_D);
> +
>     struct anv_render_pass *pass = cmd_buffer->state.pass;
>     struct anv_subpass *subpass = cmd_buffer->state.subpass;
>     unsigned subpass_idx = subpass - pass->subpasses;
> @@ -1312,14 +1333,17 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
>      * of a particular attachment.  That way we only resolve once but it's
>      * still hot in the cache.
>      */
> +   bool found_draw = false;
> +   enum anv_subpass_usage usage = 0;
>     for (uint32_t s = subpass_idx + 1; s < pass->subpass_count; s++) {
> -      enum anv_subpass_usage usage = pass->attachments[att].subpass_usage[s];
> +      usage |= pass->attachments[att].subpass_usage[s];
>  
>        if (usage & (ANV_SUBPASS_USAGE_DRAW | ANV_SUBPASS_USAGE_RESOLVE_DST)) {
>           /* We found another subpass that draws to this attachment.  We'll
>            * wait to resolve until then.
>            */
> -         return;
> +         found_draw = true;
> +         break;
>        }
>     }
>  
> @@ -1327,12 +1351,60 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
>     const struct anv_image *image = iview->image;
>     assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
>  
> -   if (image->aux_usage == ISL_AUX_USAGE_CCS_E)
> +   enum blorp_fast_clear_op resolve_op = BLORP_FAST_CLEAR_OP_NONE;
> +   if (!found_draw) {
> +      /* This is the last subpass that writes to this attachment so we need to
> +       * resolve here.  Ideally, we would like to only resolve if the storeOp
> +       * is set to VK_ATTACHMENT_STORE_OP_STORE.  However, we need to ensure
> +       * that the CCS bits are set to "resolved" because there may be copy or
> +       * blit operations (which may ignore CCS) between now and the next time
> +       * we render and we need to ensure that anything they write will be
> +       * respected in the next render.  Unfortunately, the hardware does not
> +       * provide us with any sort of "invalidate" pass that sets the CCS to
> +       * "resolved" without writing to the render target.
> +       */
> +      if (iview->image->aux_usage != ISL_AUX_USAGE_CCS_E) {
> +         /* The image destination surface doesn't support compression outside
> +          * the render pass.  We need a full resolve.
> +          */
> +         resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
> +      } else if (att_state->fast_clear) {
> +         /* We don't know what to do with clear colors outside the render
> +          * pass.  We need a partial resolve.
> +          */
> +         resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
> +      } else {
> +         /* The image "natively" supports all the compression we care about
> +          * and we don't need to resolve at all.  If this is the case, we also
> +          * don't need to resolve for any of the input attachment cases below.
> +          */
> +      }
> +   } else if (usage & ANV_SUBPASS_USAGE_INPUT) {
> +      /* Input attachments are clear-color aware so, at least on Sky Lake, we
> +       * can frequently sample from them with no resolves at all.
> +       */
> +      if (att_state->aux_usage != att_state->input_aux_usage) {
> +         assert(att_state->input_aux_usage == ISL_AUX_USAGE_NONE);
> +         resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
> +      } else if (!att_state->clear_color_is_zero_one) {
> +         /* Sky Lake PRM, Vol. 2d, RENDER_SURFACE_STATE::Red Clear Color:
> +          *
> +          *    "If Number of Multisamples is MULTISAMPLECOUNT_1 AND if this RT
> +          *    is fast cleared with non-0/1 clear value, this RT must be
> +          *    partially resolved (refer to Partial Resolve operation) before
> +          *    binding this surface to Sampler."
> +          */
> +         resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
> +      }
> +   }
> +
> +   if (resolve_op == BLORP_FAST_CLEAR_OP_NONE)
>        return;
>  
>     struct blorp_surf surf;
>     get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
>                                  att_state->aux_usage, &surf);
> +   surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
>  
>     /* From the Sky Lake PRM Vol. 7, "Render Target Resolve":
>      *
> @@ -1353,12 +1425,14 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
>        blorp_ccs_resolve(batch, &surf,
>                          iview->isl.base_level,
>                          iview->isl.base_array_layer + layer,
> -                        iview->isl.format,
> -                        BLORP_FAST_CLEAR_OP_RESOLVE_FULL);
> +                        iview->isl.format, resolve_op);
>     }
>  
>     cmd_buffer->state.pending_pipe_bits |=
>        ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> +
> +   /* Once we've done any sort of resolve, we're no longer fast-cleared */
> +   att_state->fast_clear = false;
>  }
>  
>  void
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index 7b521b1..4e6049c 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -1087,11 +1087,14 @@ void anv_dynamic_state_copy(struct anv_dynamic_state *dest,
>   */
>  struct anv_attachment_state {
>     enum isl_aux_usage                           aux_usage;
> +   enum isl_aux_usage                           input_aux_usage;
>     struct anv_state                             color_rt_state;
>     struct anv_state                             input_att_state;
>  
>     VkImageAspectFlags                           pending_clear_aspects;
> +   bool                                         fast_clear;
>     VkClearValue                                 clear_value;
> +   bool                                         clear_color_is_zero_one;
>  };
>  
>  /** State required while building cmd buffer */
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
> index 1ad28fd..38579ce 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -191,23 +191,87 @@ add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer,
>     }
>  }
>  
> -static enum isl_aux_usage
> -fb_attachment_get_aux_usage(struct anv_device *device,
> -                            struct anv_framebuffer *fb,
> -                            uint32_t attachment)
> +static bool
> +color_is_zero_one(VkClearColorValue value, enum isl_format format)
>  {
> -   struct anv_image_view *iview = fb->attachments[attachment];
> +   if (isl_format_has_int_channel(format)) {
> +      for (unsigned i = 0; i < 4; i++) {
> +         if (value.int32[i] != 0 && value.int32[i] != 1)
> +            return false;
> +      }
> +   } else {
> +      for (unsigned i = 0; i < 4; i++) {
> +         if (value.float32[i] != 0.0f && value.float32[i] != 1.0f)
> +            return false;
> +      }
> +   }
>  
> -   if (iview->image->aux_surface.isl.size == 0)
> -      return ISL_AUX_USAGE_NONE; /* No aux surface */
> +   return true;
> +}
> +
> +static void
> +color_attachment_compute_aux_usage(struct anv_device *device,
> +                                   struct anv_attachment_state *att_state,
> +                                   struct anv_image_view *iview,
> +                                   VkRect2D render_area,
> +                                   union isl_color_value *fast_clear_color)
> +{
> +   if (iview->image->aux_surface.isl.size == 0) {
> +      att_state->aux_usage = ISL_AUX_USAGE_NONE;
> +      att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
> +      att_state->fast_clear = false;
> +      return;
> +   }
>  
>     assert(iview->image->aux_surface.isl.usage & ISL_SURF_USAGE_CCS_BIT);
>  
> -   if (isl_format_supports_lossless_compression(&device->info,
> -                                                iview->isl.format))
> -      return ISL_AUX_USAGE_CCS_E;
> +   att_state->clear_color_is_zero_one =
> +      color_is_zero_one(att_state->clear_value.color, iview->isl.format);
> +
> +   if (att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
> +      /* Start off assuming fast clears are possible */
> +      att_state->fast_clear = true;
>  
> -   return ISL_AUX_USAGE_NONE;
> +      /* Potentially, we could do partial fast-clears but doing so has crazy
> +       * alignment restrictions.  It's easier to just restrict to full size
> +       * fast clears for now.
> +       */
> +      if (render_area.offset.x != 0 ||
> +          render_area.offset.y != 0 ||
> +          render_area.extent.width != iview->extent.width ||
> +          render_area.extent.height != iview->extent.height)
> +         att_state->fast_clear = false;
> +
> +      if (att_state->fast_clear) {
> +         memcpy(fast_clear_color->u32, att_state->clear_value.color.uint32,
> +                sizeof(fast_clear_color->u32));

Maybe vk_to_isl_color from patch 2 should be available to all of anv?

-Jordan

> +      }
> +   } else {
> +      att_state->fast_clear = false;
> +   }
> +
> +   if (isl_format_supports_lossless_compression(&device->info,
> +                                                iview->isl.format)) {
> +      att_state->aux_usage = ISL_AUX_USAGE_CCS_E;
> +      att_state->input_aux_usage = ISL_AUX_USAGE_CCS_E;
> +   } else if (att_state->fast_clear) {
> +      att_state->aux_usage = ISL_AUX_USAGE_CCS_D;
> +      /* From the Sky Lake PRM, RENDER_SURFACE_STATE::AuxiliarySurfaceMode:
> +       *
> +       *    "If Number of Multisamples is MULTISAMPLECOUNT_1, AUX_CCS_D
> +       *    setting is only allowed if Surface Format supported for Fast
> +       *    Clear. In addition, if the surface is bound to the sampling
> +       *    engine, Surface Format must be supported for Render Target
> +       *    Compression for surfaces bound to the sampling engine."
> +       *
> +       * In other words, we can't sample from a fast-cleared image if it
> +       * doesn't also support color compression.
> +       */
> +      att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
> +   } else {
> +      att_state->aux_usage = ISL_AUX_USAGE_NONE;
> +      att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
> +   }
>  }
>  
>  static bool
> @@ -350,9 +414,12 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
>           struct anv_image_view *iview = framebuffer->attachments[i];
>           assert(iview->vk_format == att->format);
>  
> +         union isl_color_value clear_color = { .u32 = { 0, } };
>           if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
> -            state->attachments[i].aux_usage =
> -               fb_attachment_get_aux_usage(cmd_buffer->device, framebuffer, i);
> +            color_attachment_compute_aux_usage(cmd_buffer->device,
> +                                               &state->attachments[i],
> +                                               iview, begin->renderArea,
> +                                               &clear_color);
>  
>              struct isl_view view = iview->isl;
>              view.usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
> @@ -362,6 +429,7 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
>                                  .view = &view,
>                                  .aux_surf = &iview->image->aux_surface.isl,
>                                  .aux_usage = state->attachments[i].aux_usage,
> +                                .clear_color = clear_color,
>                                  .mocs = cmd_buffer->device->default_mocs);
>  
>              add_image_view_relocs(cmd_buffer, iview,
> @@ -369,6 +437,7 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
>                                    state->attachments[i].color_rt_state);
>           } else {
>              state->attachments[i].aux_usage = ISL_AUX_USAGE_NONE;
> +            state->attachments[i].input_aux_usage = ISL_AUX_USAGE_NONE;
>           }
>  
>           if (need_input_attachment_state(&pass->attachments[i])) {
> @@ -386,11 +455,12 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
>                                  .surf = surf,
>                                  .view = &view,
>                                  .aux_surf = &iview->image->aux_surface.isl,
> -                                .aux_usage = state->attachments[i].aux_usage,
> +                                .aux_usage = state->attachments[i].input_aux_usage,
> +                                .clear_color = clear_color,
>                                  .mocs = cmd_buffer->device->default_mocs);
>  
>              add_image_view_relocs(cmd_buffer, iview,
> -                                  state->attachments[i].aux_usage,
> +                                  state->attachments[i].input_aux_usage,
>                                    state->attachments[i].input_att_state);
>           }
>        }
> -- 
> 2.5.0.400.gff86faf
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list