<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Tue, Sep 19, 2017 at 6:52 AM, Lionel Landwerlin <<a href="mailto:lionel.g.landwerlin@intel.com" target="_blank">lionel.g.landwerlin@intel.com</a>> wrote: <blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5">On 15/09/17 17:01, Jason Ekstrand wrote: <blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> In order to get support everywhere, this gets a bit complicated. On Sky Lake and later, everything is fine because HALIGN/VALIGN are specified in surface elements and are required to be at least 4 so any offsetting we may need to do falls neatly within the heavy restrictions placed on the X/Y Offset parameter of RENDER_SURFACE_STATE. On Broadwell and earlier, HALIGN/VALIGN are specified in pixels are are hard-coded to align to exactly the block size of the compressed texture. This meas that, when reinterpreted as a non-compressed texture, the tile offsets may be anything and we can't rely on X/Y Offset. In order to work around this issue, we fall back to linear where we can trivially offset to whatever element we so choose. However, since linear texturing performance is terrible, we create a tiled shadow copy of the image to use for texturing. Whenever the user does a layout transition from anything to SHADER_READ_ONLY_OPTIMAL, we use blorp to copy the contents of the texture from the linear copy to the tiled shadow copy. This assumes that the client will use the image far more for texturing than as a storage image or render target. Even though we don't need the shadow copy on Sky Lake, we implement it this way first to make testing easier. Due to the hardware restriction that ASTC must not be linear, ASTC does not work yet. --- src/intel/vulkan/anv_blorp.c | 46 +++++++++++++++ src/intel/vulkan/anv_image.c | 111 ++++++++++++++++++++++++++++++++++++- src/intel/vulkan/anv_private.h | 14 +++++ src/intel/vulkan/genX_cmd_buffer.c | 21 ++++++- 4 files changed, 187 insertions(+), 5 deletions(-) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 7f51bed..95f8696 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -1489,6 +1489,52 @@ anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) } void +anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *image, + VkImageAspectFlagBits aspect, + uint32_t base_level, uint32_t level_count, + uint32_t base_layer, uint32_t layer_count) +{ + struct blorp_batch batch; + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); + + struct blorp_surf surf; + get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT, + ISL_AUX_USAGE_NONE, &surf); + + struct blorp_surf shadow_surf = { + .surf = &image->shadow_surface.isl, + .addr = { + .buffer = image->bo, + .offset = image->offset + image->shadow_surface.offset, + }, + }; + + for (uint32_t l = 0; l < level_count; l++) { + const uint32_t level = base_level + l; + + const VkExtent3D extent = { + .width = anv_minify(image->extent.width, level), + .height = anv_minify(image->extent.height, level), + .depth = anv_minify(image->extent.depth, level), + }; + + if (image->type == VK_IMAGE_TYPE_3D) + layer_count = extent.depth; + + for (uint32_t a = 0; a < layer_count; a++) { + const uint32_t layer = base_layer + a; + + blorp_copy(&batch, &surf, level, layer, + &shadow_surf, level, layer, + 0, 0, 0, 0, extent.width, extent.height); + } + } + + blorp_batch_finish(&batch); +} + +void anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, enum blorp_hiz_op op) diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 77ffa7d..e6e3250 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -235,6 +235,18 @@ make_surface(const struct anv_device *dev, aspect, vk_info->tiling); assert(format != ISL_FORMAT_UNSUPPORTED); + /* If an image is created as BLOCK_TEXEL_VIEW_COMPATIBLE, then we need to + * fall back to linear because we aren't guaranteed that we can handle + * offsets correctly. + */ + bool needs_shadow = false; + if ((vk_info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT_KHR) && + vk_info->tiling == VK_IMAGE_TILING_OPTIMAL) { + assert(isl_format_is_compressed(format)); + tiling_flags = ISL_TILING_LINEAR_BIT; + needs_shadow = true; + } + ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl, .dim = vk_to_isl_surf_dim[vk_info->imageType], .format = format, @@ -256,6 +268,36 @@ make_surface(const struct anv_device *dev, add_surface(image, anv_surf); + /* If an image is created as BLOCK_TEXEL_VIEW_COMPATIBLE, then we need to + * create an identical tiled shadow surface for use while texturing so we + * don't get garbage performance. + */ + if (needs_shadow) { + assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT); + assert(tiling_flags == ISL_TILING_LINEAR_BIT); + + ok = isl_surf_init(&dev->isl_dev, &image->shadow_surface.isl, + .dim = vk_to_isl_surf_dim[vk_info->imageType], + .format = format, + .width = image->extent.width, + .height = image->extent.height, + .depth = image->extent.depth, + .levels = vk_info->mipLevels, + .array_len = vk_info->arrayLayers, + .samples = vk_info->samples, + .min_alignment = 0, + .row_pitch = anv_info->stride, + .usage = choose_isl_surf_usage(image->usage, image->usage, aspect), + .tiling_flags = ISL_TILING_ANY_MASK); + + /* isl_surf_init() will fail only if provided invalid input. Invalid input + * is illegal in Vulkan. + */ + assert(ok); + + add_surface(image, &image->shadow_surface); + } + /* Add a HiZ surface to a depth buffer that will be used for rendering. */ if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) { @@ -673,6 +715,20 @@ anv_image_fill_surface_state(struct anv_device *device, struct isl_view view = *view_in; view.usage |= view_usage; + /* For texturing with VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL from a + * compressed surface with a shadow surface, we use the shadow instead of + * the primary surface. The shadow surface will be tiled, unlike the main + * surface, so it should get significantly better performance. + */ + if (image->shadow_surface.isl.size > 0 && + isl_format_is_compressed(view.format) && + (flags & ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL)) { + assert(isl_format_is_compressed(surface->isl.format)); + assert(surface->isl.tiling == ISL_TILING_LINEAR); + assert(image->shadow_surface.isl.tiling != ISL_TILING_LINEAR); </blockquote> </div></div> There is something odd here (I must misunderstand...). In make_surface() you always make the shadow_surface with linear tiling, yet here you're expecting it *not* to be linear? </blockquote><div> </div><div>No. We set tiling to LINEAR and set needs_shadow. When we actually create the shadow, we explicitly use TILING_ANY. </div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> + surface = &image->shadow_surface; + } + if (view_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT) view.swizzle = anv_swizzle_for_render(view.swizzle); @@ -718,16 +774,65 @@ anv_image_fill_surface_state(struct anv_device *device, view.format); } + const struct isl_surf *isl_surf = &surface->isl; + + struct isl_surf tmp_surf; + uint32_t offset_B = 0, tile_x_sa = 0, tile_y_sa = 0; + if (isl_format_is_compressed(surface->isl.format) && + !isl_format_is_compressed(view.format)) { + /* We're creating an uncompressed view of a compressed surface. This + * is allowed but only for a single level/layer. + */ + assert(surface->isl.samples == 1); + assert(view.levels == 1); + assert(view.array_len == 1); + + isl_surf_get_image_surf(&device->isl_dev, isl_surf, + view.base_level, + surface->isl.dim == ISL_SURF_DIM_3D ? + 0 : view.base_array_layer, + surface->isl.dim == ISL_SURF_DIM_3D ? + view.base_array_layer : 0, + &tmp_surf, + &offset_B, &tile_x_sa, &tile_y_sa); + + /* The newly created image represents the one subimage we're + * referencing with this view so it only has one array slice and + * miplevel. + */ + view.base_array_layer = 0; + view.base_level = 0; + + /* We're making an uncompressed view here. The image dimensions need + * to be scaled down by the block size. + */ + const struct isl_format_layout *fmtl = + isl_format_get_layout(surface->isl.format); + tmp_surf.format = view.format; + tmp_surf.logical_level0_px.width = + DIV_ROUND_UP(tmp_surf.logical_level0_px.width, fmtl->bw); + tmp_surf.logical_level0_px.height = + DIV_ROUND_UP(tmp_surf.logical_level0_px.height, fmtl->bh); + tmp_surf.phys_level0_sa.width /= fmtl->bw; + tmp_surf.phys_level0_sa.height /= fmtl->bh; + + isl_surf = &tmp_surf; + + assert(surface->isl.tiling == ISL_TILING_LINEAR); + assert(tile_x_sa == 0); + assert(tile_y_sa == 0); + } + isl_surf_fill_state(&device->isl_dev, state_inout->state.map, - .surf = &surface->isl, + .surf = isl_surf, .view = &view, - .address = address, + .address = address + offset_B, .clear_color = *clear_color, .aux_surf = &image->aux_surface.isl, .aux_usage = aux_usage, .aux_address = aux_address, .mocs = device->default_mocs); - state_inout->address = address; + state_inout->address = address + offset_B; if (device->info.gen >= 8) { state_inout->aux_address = aux_address; } else { diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 85843b2..355adba 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2266,6 +2266,13 @@ struct anv_image { }; /** + * A surface which shadows the main surface and may have different tiling. + * This is used for sampling using a tiling that isn't supported for other + * operations. + */ + struct anv_surface shadow_surface; + + /** * For color images, this is the aux usage for this image when not used as a * color attachment. * @@ -2352,6 +2359,13 @@ anv_image_fast_clear(struct anv_cmd_buffer *cmd_buffer, const uint32_t base_level, const uint32_t level_count, const uint32_t base_layer, uint32_t layer_count); +void +anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *image, + VkImageAspectFlagBits aspect, + uint32_t base_level, uint32_t level_count, + uint32_t base_layer, uint32_t layer_count); + enum isl_aux_usage anv_layout_to_aux_usage(const struct gen_device_info * const devinfo, const struct anv_image *image, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 7fb607f..fbc1995 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -627,8 +627,25 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, /* No work is necessary if the layout stays the same or if this subresource * range lacks auxiliary data. */ - if (initial_layout == final_layout || - base_layer >= anv_image_aux_layers(image, base_level)) + if (initial_layout == final_layout) + return; + + if (image->shadow_surface.isl.size > 0 && + final_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { + /* This surface is a linear compressed image with a tiled shadow surface + * for texturing. The client is about to use it in READ_ONLY_OPTIMAL so + * we need to ensure the shadow copy is up-to-date. + */ + assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); + assert(image-><a href="http://color_surface.is">color_surface.is</a>l.tiling == ISL_TILING_LINEAR); + assert(image->shadow_surface.isl.tiling != ISL_TILING_LINEAR); + assert(isl_format_is_compressed(image->color_surface.isl.format)); + anv_image_copy_to_shadow(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT, + base_level, level_count, + base_layer, layer_count); + } + + if (base_layer >= anv_image_aux_layers(image, base_level)) return; /* A transition of a 3D subresource works on all slices at a time. */ </blockquote> </div></div></blockquote></div> </div></div>