[Mesa-dev] [PATCH] radv: Implement TC compatible HTILE.

Dave Airlie airlied at gmail.com
Wed Oct 4 01:54:13 UTC 2017


On 4 October 2017 at 10:51, Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl> wrote:
> The situations where we enable it are quite limitied, but it works,
> even for madmax, so lets just enable it.

Let's land it, then we can work out where it needs tuning.

Reviewed-by: Dave Airlie <airlied at redhat.com>

Dave.

> ---
>  src/amd/vulkan/radv_device.c     | 28 ++++++++++++++++++++++++++--
>  src/amd/vulkan/radv_image.c      | 21 +++++++++++++++++++++
>  src/amd/vulkan/radv_meta_clear.c | 18 ++++++++++++++----
>  src/amd/vulkan/radv_private.h    |  1 +
>  4 files changed, 62 insertions(+), 6 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index 402c948e523..aa7fe35d87e 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -3249,6 +3249,18 @@ radv_initialise_ds_surface(struct radv_device *device,
>                 if (iview->image->surface.htile_size && !level) {
>                         ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
>
> +                       if (iview->image->tc_compatible_htile) {
> +                               unsigned max_zplanes = 4;
> +
> +                               if (iview->vk_format == VK_FORMAT_D16_UNORM  &&
> +                                   iview->image->info.samples > 1)
> +                                       max_zplanes = 2;
> +
> +                               ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) |
> +                                         S_028038_ITERATE_FLUSH(1);
> +                               ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
> +                       }
> +
>                         if (!iview->image->surface.has_stencil)
>                                 /* Use all of the htile_buffer for depth if there's no stencil. */
>                                 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
> @@ -3268,7 +3280,7 @@ radv_initialise_ds_surface(struct radv_device *device,
>                 z_offs += iview->image->surface.u.legacy.level[level].offset;
>                 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
>
> -               ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
> +               ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!iview->image->tc_compatible_htile);
>                 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
>                 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
>
> @@ -3312,7 +3324,8 @@ radv_initialise_ds_surface(struct radv_device *device,
>                 if (iview->image->surface.htile_size && !level) {
>                         ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
>
> -                       if (!iview->image->surface.has_stencil)
> +                       if (!iview->image->surface.has_stencil &&
> +                           !iview->image->tc_compatible_htile)
>                                 /* Use all of the htile_buffer for depth if there's no stencil. */
>                                 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
>
> @@ -3320,6 +3333,17 @@ radv_initialise_ds_surface(struct radv_device *device,
>                                 iview->image->htile_offset;
>                         ds->db_htile_data_base = va >> 8;
>                         ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
> +
> +                       if (iview->image->tc_compatible_htile) {
> +                               ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
> +
> +                               if (iview->image->info.samples <= 1)
> +                                       ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
> +                               else if (iview->image->info.samples <= 4)
> +                                       ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
> +                               else
> +                                       ds->db_z_info|= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
> +                       }
>                 }
>         }
>
> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
> index 35c58f45ab5..bf30281abaa 100644
> --- a/src/amd/vulkan/radv_image.c
> +++ b/src/amd/vulkan/radv_image.c
> @@ -109,6 +109,15 @@ radv_init_surface(struct radv_device *device,
>
>         if (is_depth) {
>                 surface->flags |= RADEON_SURF_ZBUFFER;
> +               if (!(pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
> +                   !(pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) &&
> +                   pCreateInfo->tiling != VK_IMAGE_TILING_LINEAR &&
> +                   pCreateInfo->mipLevels <= 1 &&
> +                   device->physical_device->rad_info.chip_class >= VI &&
> +                   (pCreateInfo->format == VK_FORMAT_D32_SFLOAT ||
> +                    (device->physical_device->rad_info.chip_class >= GFX9 &&
> +                     pCreateInfo->format == VK_FORMAT_D16_UNORM)))
> +                       surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
>         }
>
>         if (is_stencil)
> @@ -255,6 +264,11 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
>                         meta_va = gpu_address + image->dcc_offset;
>                         if (chip_class <= VI)
>                                 meta_va += base_level_info->dcc_offset;
> +               } else if(image->tc_compatible_htile && image->surface.htile_size) {
> +                       meta_va = gpu_address + image->htile_offset;
> +               }
> +
> +               if (meta_va) {
>                         state[6] |= S_008F28_COMPRESSION_EN(1);
>                         state[7] = meta_va >> 8;
>                         state[7] |= image->surface.tile_swizzle;
> @@ -898,6 +912,7 @@ radv_image_create(VkDevice _device,
>                 if (radv_image_can_enable_htile(image) &&
>                     !(device->debug_flags & RADV_DEBUG_NO_HIZ)) {
>                         radv_image_alloc_htile(image);
> +                       image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
>                 } else {
>                         image->surface.htile_size = 0;
>                 }
> @@ -1040,6 +1055,9 @@ bool radv_layout_has_htile(const struct radv_image *image,
>                             VkImageLayout layout,
>                             unsigned queue_mask)
>  {
> +       if (image->surface.htile_size && image->tc_compatible_htile)
> +               return layout != VK_IMAGE_LAYOUT_GENERAL;
> +
>         return image->surface.htile_size &&
>                (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
>                 layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
> @@ -1050,6 +1068,9 @@ bool radv_layout_is_htile_compressed(const struct radv_image *image,
>                                       VkImageLayout layout,
>                                       unsigned queue_mask)
>  {
> +       if (image->surface.htile_size && image->tc_compatible_htile)
> +               return layout != VK_IMAGE_LAYOUT_GENERAL;
> +
>         return image->surface.htile_size &&
>                (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
>                 layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
> diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
> index 1133024d588..fd2caf3d0ce 100644
> --- a/src/amd/vulkan/radv_meta_clear.c
> +++ b/src/amd/vulkan/radv_meta_clear.c
> @@ -543,8 +543,10 @@ create_depthstencil_pipeline(struct radv_device *device,
>
>  static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
>                                       const struct radv_image_view *iview,
> +                                     VkImageAspectFlags aspects,
>                                       VkImageLayout layout,
> -                                     const VkClearRect *clear_rect)
> +                                     const VkClearRect *clear_rect,
> +                                     VkClearDepthStencilValue clear_value)
>  {
>         uint32_t queue_mask = radv_image_queue_family_mask(iview->image,
>                                                            cmd_buffer->queue_family_index,
> @@ -553,7 +555,13 @@ static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
>             clear_rect->rect.extent.width != iview->extent.width ||
>             clear_rect->rect.extent.height != iview->extent.height)
>                 return false;
> -       if (iview->base_mip == 0 &&
> +       if (iview->image->tc_compatible_htile &&
> +           (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && clear_value.depth != 0.0 &&
> +             clear_value.depth != 1.0) ||
> +            ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && clear_value.stencil != 0)))
> +               return false;
> +       if (iview->image->surface.htile_size &&
> +           iview->base_mip == 0 &&
>             iview->base_layer == 0 &&
>             radv_layout_is_htile_compressed(iview->image, layout, queue_mask) &&
>             !radv_image_extent_compare(iview->image, &iview->extent))
> @@ -571,7 +579,7 @@ pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
>                            const VkClearRect *clear_rect,
>                            VkClearDepthStencilValue clear_value)
>  {
> -       bool fast = depth_view_can_fast_clear(cmd_buffer, iview, layout, clear_rect);
> +       bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout, clear_rect, clear_value);
>         int index = DEPTH_CLEAR_SLOW;
>
>         if (fast) {
> @@ -641,7 +649,9 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
>                                      pipeline);
>         }
>
> -       if (depth_view_can_fast_clear(cmd_buffer, iview, subpass->depth_stencil_attachment.layout, clear_rect))
> +       if (depth_view_can_fast_clear(cmd_buffer, iview, aspects,
> +                                     subpass->depth_stencil_attachment.layout,
> +                                     clear_rect, clear_value))
>                 radv_set_depth_clear_regs(cmd_buffer, iview->image, clear_value, aspects);
>
>         radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 5cab4072116..c2d78a7e2c7 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1218,6 +1218,7 @@ struct radv_image {
>         VkDeviceSize offset;
>         uint32_t dcc_offset;
>         uint32_t htile_offset;
> +       bool tc_compatible_htile;
>         struct radeon_surf surface;
>
>         struct radv_fmask_info fmask;
> --
> 2.14.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list