<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Tue, Sep 19, 2017 at 6:52 AM, Lionel Landwerlin <span dir="ltr"><<a href="mailto:lionel.g.landwerlin@intel.com" target="_blank">lionel.g.landwerlin@intel.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5">On 15/09/17 17:01, Jason Ekstrand wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
In order to get support everywhere, this gets a bit complicated.  On Sky<br>
Lake and later, everything is fine because HALIGN/VALIGN are specified<br>
in surface elements and are required to be at least 4 so any offsetting<br>
we may need to do falls neatly within the heavy restrictions placed on<br>
the X/Y Offset parameter of RENDER_SURFACE_STATE.  On Broadwell and<br>
earlier, HALIGN/VALIGN are specified in pixels are are hard-coded to<br>
align to exactly the block size of the compressed texture.  This meas<br>
that, when reinterpreted as a non-compressed texture, the tile offsets<br>
may be anything and we can't rely on X/Y Offset.<br>
<br>
In order to work around this issue, we fall back to linear where we can<br>
trivially offset to whatever element we so choose.  However, since<br>
linear texturing performance is terrible, we create a tiled shadow copy<br>
of the image to use for texturing.  Whenever the user does a layout<br>
transition from anything to SHADER_READ_ONLY_OPTIMAL, we use blorp to<br>
copy the contents of the texture from the linear copy to the tiled<br>
shadow copy.  This assumes that the client will use the image far more<br>
for texturing than as a storage image or render target.<br>
<br>
Even though we don't need the shadow copy on Sky Lake, we implement it<br>
this way first to make testing easier.  Due to the hardware restriction<br>
that ASTC must not be linear, ASTC does not work yet.<br>
---<br>
  src/intel/vulkan/anv_blorp.c       |  46 +++++++++++++++<br>
  src/intel/vulkan/anv_image.c       | 111 ++++++++++++++++++++++++++++++<wbr>++++++-<br>
  src/intel/vulkan/anv_private.<wbr>h     |  14 +++++<br>
  src/intel/vulkan/genX_cmd_buff<wbr>er.c |  21 ++++++-<br>
  4 files changed, 187 insertions(+), 5 deletions(-)<br>
<br>
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c<br>
index 7f51bed..95f8696 100644<br>
--- a/src/intel/vulkan/anv_blorp.c<br>
+++ b/src/intel/vulkan/anv_blorp.c<br>
@@ -1489,6 +1489,52 @@ anv_cmd_buffer_resolve_subpass<wbr>(struct anv_cmd_buffer *cmd_buffer)<br>
  }<br>
    void<br>
+anv_image_copy_to_shadow(stru<wbr>ct anv_cmd_buffer *cmd_buffer,<br>
+                         const struct anv_image *image,<br>
+                         VkImageAspectFlagBits aspect,<br>
+                         uint32_t base_level, uint32_t level_count,<br>
+                         uint32_t base_layer, uint32_t layer_count)<br>
+{<br>
+   struct blorp_batch batch;<br>
+   blorp_batch_init(&cmd_buffer-<wbr>>device->blorp, &batch, cmd_buffer, 0);<br>
+<br>
+   struct blorp_surf surf;<br>
+   get_blorp_surf_for_anv_image(<wbr>image, VK_IMAGE_ASPECT_DEPTH_BIT,<br>
+                                ISL_AUX_USAGE_NONE, &surf);<br>
+<br>
+   struct blorp_surf shadow_surf = {<br>
+      .surf = &image->shadow_surface.isl,<br>
+      .addr = {<br>
+         .buffer = image->bo,<br>
+         .offset = image->offset + image->shadow_surface.offset,<br>
+      },<br>
+   };<br>
+<br>
+   for (uint32_t l = 0; l < level_count; l++) {<br>
+      const uint32_t level = base_level + l;<br>
+<br>
+      const VkExtent3D extent = {<br>
+         .width = anv_minify(image->extent.width<wbr>, level),<br>
+         .height = anv_minify(image->extent.heigh<wbr>t, level),<br>
+         .depth = anv_minify(image->extent.depth<wbr>, level),<br>
+      };<br>
+<br>
+      if (image->type == VK_IMAGE_TYPE_3D)<br>
+         layer_count = extent.depth;<br>
+<br>
+      for (uint32_t a = 0; a < layer_count; a++) {<br>
+         const uint32_t layer = base_layer + a;<br>
+<br>
+         blorp_copy(&batch, &surf, level, layer,<br>
+                    &shadow_surf, level, layer,<br>
+                    0, 0, 0, 0, extent.width, extent.height);<br>
+      }<br>
+   }<br>
+<br>
+   blorp_batch_finish(&batch);<br>
+}<br>
+<br>
+void<br>
  anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,<br>
                          const struct anv_image *image,<br>
                          enum blorp_hiz_op op)<br>
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c<br>
index 77ffa7d..e6e3250 100644<br>
--- a/src/intel/vulkan/anv_image.c<br>
+++ b/src/intel/vulkan/anv_image.c<br>
@@ -235,6 +235,18 @@ make_surface(const struct anv_device *dev,<br>
                                                 aspect, vk_info->tiling);<br>
     assert(format != ISL_FORMAT_UNSUPPORTED);<br>
  +   /* If an image is created as BLOCK_TEXEL_VIEW_COMPATIBLE, then we need to<br>
+    * fall back to linear because we aren't guaranteed that we can handle<br>
+    * offsets correctly.<br>
+    */<br>
+   bool needs_shadow = false;<br>
+   if ((vk_info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VI<wbr>EW_COMPATIBLE_BIT_KHR) &&<br>
+       vk_info->tiling == VK_IMAGE_TILING_OPTIMAL) {<br>
+      assert(isl_format_is_compresse<wbr>d(format));<br>
+      tiling_flags = ISL_TILING_LINEAR_BIT;<br>
+      needs_shadow = true;<br>
+   }<br>
+<br>
     ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl,<br>
        .dim = vk_to_isl_surf_dim[vk_info->im<wbr>ageType],<br>
        .format = format,<br>
@@ -256,6 +268,36 @@ make_surface(const struct anv_device *dev,<br>
       add_surface(image, anv_surf);<br>
  +   /* If an image is created as BLOCK_TEXEL_VIEW_COMPATIBLE, then we need to<br>
+    * create an identical tiled shadow surface for use while texturing so we<br>
+    * don't get garbage performance.<br>
+    */<br>
+   if (needs_shadow) {<br>
+      assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);<br>
+      assert(tiling_flags == ISL_TILING_LINEAR_BIT);<br>
+<br>
+      ok = isl_surf_init(&dev->isl_dev, &image->shadow_surface.isl,<br>
+         .dim = vk_to_isl_surf_dim[vk_info->im<wbr>ageType],<br>
+         .format = format,<br>
+         .width = image->extent.width,<br>
+         .height = image->extent.height,<br>
+         .depth = image->extent.depth,<br>
+         .levels = vk_info->mipLevels,<br>
+         .array_len = vk_info->arrayLayers,<br>
+         .samples = vk_info->samples,<br>
+         .min_alignment = 0,<br>
+         .row_pitch = anv_info->stride,<br>
+         .usage = choose_isl_surf_usage(image->u<wbr>sage, image->usage, aspect),<br>
+         .tiling_flags = ISL_TILING_ANY_MASK);<br>
+<br>
+      /* isl_surf_init() will fail only if provided invalid input. Invalid input<br>
+       * is illegal in Vulkan.<br>
+       */<br>
+      assert(ok);<br>
+<br>
+      add_surface(image, &image->shadow_surface);<br>
+   }<br>
+<br>
     /* Add a HiZ surface to a depth buffer that will be used for rendering.<br>
      */<br>
     if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {<br>
@@ -673,6 +715,20 @@ anv_image_fill_surface_state(s<wbr>truct anv_device *device,<br>
     struct isl_view view = *view_in;<br>
     view.usage |= view_usage;<br>
  +   /* For texturing with VK_IMAGE_LAYOUT_SHADER_READ_ON<wbr>LY_OPTIMAL from a<br>
+    * compressed surface with a shadow surface, we use the shadow instead of<br>
+    * the primary surface.  The shadow surface will be tiled, unlike the main<br>
+    * surface, so it should get significantly better performance.<br>
+    */<br>
+   if (image->shadow_surface.isl.siz<wbr>e > 0 &&<br>
+       isl_format_is_compressed(<wbr>view.format) &&<br>
+       (flags & ANV_IMAGE_VIEW_STATE_TEXTURE_O<wbr>PTIMAL)) {<br>
+      assert(isl_format_is_compresse<wbr>d(surface->isl.format));<br>
+      assert(surface->isl.tiling == ISL_TILING_LINEAR);<br>
+      assert(image->shadow_surface.i<wbr>sl.tiling != ISL_TILING_LINEAR);<br>
</blockquote>
<br></div></div>
There is something odd here (I must misunderstand...).<br>
In make_surface() you always make the shadow_surface with linear tiling, yet here you're expecting it *not* to be linear?<br>
</blockquote><div><br></div><div>No.  We set tiling to LINEAR and set needs_shadow.  When we actually create the shadow, we explicitly use TILING_ANY.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+      surface = &image->shadow_surface;<br>
+   }<br>
+<br>
     if (view_usage == ISL_SURF_USAGE_RENDER_TARGET_B<wbr>IT)<br>
        view.swizzle = anv_swizzle_for_render(view.sw<wbr>izzle);<br>
  @@ -718,16 +774,65 @@ anv_image_fill_surface_state(s<wbr>truct anv_device *device,<br>
                                                        view.format);<br>
        }<br>
  +      const struct isl_surf *isl_surf = &surface->isl;<br>
+<br>
+      struct isl_surf tmp_surf;<br>
+      uint32_t offset_B = 0, tile_x_sa = 0, tile_y_sa = 0;<br>
+      if (isl_format_is_compressed(surf<wbr>ace->isl.format) &&<br>
+          !isl_format_is_compressed(view<wbr>.format)) {<br>
+         /* We're creating an uncompressed view of a compressed surface.  This<br>
+          * is allowed but only for a single level/layer.<br>
+          */<br>
+         assert(surface->isl.samples == 1);<br>
+         assert(view.levels == 1);<br>
+         assert(view.array_len == 1);<br>
+<br>
+         isl_surf_get_image_surf(&devi<wbr>ce->isl_dev, isl_surf,<br>
+                                 view.base_level,<br>
+                                 surface->isl.dim == ISL_SURF_DIM_3D ?<br>
+                                    0 : view.base_array_layer,<br>
+                                 surface->isl.dim == ISL_SURF_DIM_3D ?<br>
+                                    view.base_array_layer : 0,<br>
+                                 &tmp_surf,<br>
+                                 &offset_B, &tile_x_sa, &tile_y_sa);<br>
+<br>
+         /* The newly created image represents the one subimage we're<br>
+          * referencing with this view so it only has one array slice and<br>
+          * miplevel.<br>
+          */<br>
+         view.base_array_layer = 0;<br>
+         view.base_level = 0;<br>
+<br>
+         /* We're making an uncompressed view here.  The image dimensions need<br>
+          * to be scaled down by the block size.<br>
+          */<br>
+         const struct isl_format_layout *fmtl =<br>
+            isl_format_get_layout(surface-<wbr>>isl.format);<br>
+         tmp_surf.format = view.format;<br>
+         tmp_surf.logical_level0_px.wi<wbr>dth =<br>
+            DIV_ROUND_UP(tmp_surf.logical_<wbr>level0_px.width, fmtl->bw);<br>
+         tmp_surf.logical_level0_px.he<wbr>ight =<br>
+            DIV_ROUND_UP(tmp_surf.logical_<wbr>level0_px.height, fmtl->bh);<br>
+         tmp_surf.phys_level0_sa.width /= fmtl->bw;<br>
+         tmp_surf.phys_level0_sa.<wbr>height /= fmtl->bh;<br>
+<br>
+         isl_surf = &tmp_surf;<br>
+<br>
+         assert(surface->isl.tiling == ISL_TILING_LINEAR);<br>
+         assert(tile_x_sa == 0);<br>
+         assert(tile_y_sa == 0);<br>
+      }<br>
+<br>
        isl_surf_fill_state(&device->i<wbr>sl_dev, state_inout->state.map,<br>
-                          .surf = &surface->isl,<br>
+                          .surf = isl_surf,<br>
                            .view = &view,<br>
-                          .address = address,<br>
+                          .address = address + offset_B,<br>
                            .clear_color = *clear_color,<br>
                            .aux_surf = &image->aux_surface.isl,<br>
                            .aux_usage = aux_usage,<br>
                            .aux_address = aux_address,<br>
                            .mocs = device->default_mocs);<br>
-      state_inout->address = address;<br>
+      state_inout->address = address + offset_B;<br>
        if (device->info.gen >= 8) {<br>
           state_inout->aux_address = aux_address;<br>
        } else {<br>
diff --git a/src/intel/vulkan/anv_private<wbr>.h b/src/intel/vulkan/anv_private<wbr>.h<br>
index 85843b2..355adba 100644<br>
--- a/src/intel/vulkan/anv_private<wbr>.h<br>
+++ b/src/intel/vulkan/anv_private<wbr>.h<br>
@@ -2266,6 +2266,13 @@ struct anv_image {<br>
     };<br>
       /**<br>
+    * A surface which shadows the main surface and may have different tiling.<br>
+    * This is used for sampling using a tiling that isn't supported for other<br>
+    * operations.<br>
+    */<br>
+   struct anv_surface shadow_surface;<br>
+<br>
+   /**<br>
      * For color images, this is the aux usage for this image when not used as a<br>
      * color attachment.<br>
      *<br>
@@ -2352,6 +2359,13 @@ anv_image_fast_clear(struct anv_cmd_buffer *cmd_buffer,<br>
                       const uint32_t base_level, const uint32_t level_count,<br>
                       const uint32_t base_layer, uint32_t layer_count);<br>
  +void<br>
+anv_image_copy_to_shadow(stru<wbr>ct anv_cmd_buffer *cmd_buffer,<br>
+                         const struct anv_image *image,<br>
+                         VkImageAspectFlagBits aspect,<br>
+                         uint32_t base_level, uint32_t level_count,<br>
+                         uint32_t base_layer, uint32_t layer_count);<br>
+<br>
  enum isl_aux_usage<br>
  anv_layout_to_aux_usage(const struct gen_device_info * const devinfo,<br>
                          const struct anv_image *image,<br>
diff --git a/src/intel/vulkan/genX_cmd_bu<wbr>ffer.c b/src/intel/vulkan/genX_cmd_bu<wbr>ffer.c<br>
index 7fb607f..fbc1995 100644<br>
--- a/src/intel/vulkan/genX_cmd_bu<wbr>ffer.c<br>
+++ b/src/intel/vulkan/genX_cmd_bu<wbr>ffer.c<br>
@@ -627,8 +627,25 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,<br>
     /* No work is necessary if the layout stays the same or if this subresource<br>
      * range lacks auxiliary data.<br>
      */<br>
-   if (initial_layout == final_layout ||<br>
-       base_layer >= anv_image_aux_layers(image, base_level))<br>
+   if (initial_layout == final_layout)<br>
+      return;<br>
+<br>
+   if (image->shadow_surface.isl.siz<wbr>e > 0 &&<br>
+       final_layout == VK_IMAGE_LAYOUT_SHADER_READ_ON<wbr>LY_OPTIMAL) {<br>
+      /* This surface is a linear compressed image with a tiled shadow surface<br>
+       * for texturing.  The client is about to use it in READ_ONLY_OPTIMAL so<br>
+       * we need to ensure the shadow copy is up-to-date.<br>
+       */<br>
+      assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);<br>
+      assert(image-><a href="http://color_surface.is">color_surface.is</a><wbr>l.tiling == ISL_TILING_LINEAR);<br>
+      assert(image->shadow_surface.i<wbr>sl.tiling != ISL_TILING_LINEAR);<br>
+      assert(isl_format_is_compresse<wbr>d(image->color_surface.isl.<wbr>format));<br>
+      anv_image_copy_to_shadow(cmd_b<wbr>uffer, image, VK_IMAGE_ASPECT_COLOR_BIT,<br>
+                               base_level, level_count,<br>
+                               base_layer, layer_count);<br>
+   }<br>
+<br>
+   if (base_layer >= anv_image_aux_layers(image, base_level))<br>
        return;<br>
       /* A transition of a 3D subresource works on all slices at a time. */<br>
</blockquote>
<br>
<br>
</div></div></blockquote></div><br></div></div>