Mesa (master): radv: Enable DCC for image stores on GFX10.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Mar 26 16:25:28 UTC 2021


Module: Mesa
Branch: master
Commit: a7c0cf500b335069bfe480c947b26052335f897e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a7c0cf500b335069bfe480c947b26052335f897e

Author: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Date:   Fri Sep 18 14:22:59 2020 +0200

radv: Enable DCC for image stores on GFX10.

Since image stores can now compress and we can't track image stores
this also stops using predication for DCC decompression.

In GFX10 this was benchmarked to be faster. For GFX10.3 the microbenchmarks
are not as possible though I haven't tested any games, so this is not enabled
there yet.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796>

---

 src/amd/vulkan/radv_cmd_buffer.c      |  3 ++
 src/amd/vulkan/radv_formats.c         | 15 ++++--
 src/amd/vulkan/radv_image.c           | 87 ++++++++++++++++++++++++++++++++---
 src/amd/vulkan/radv_meta_fast_clear.c |  5 ++
 src/amd/vulkan/radv_private.h         |  5 ++
 5 files changed, 104 insertions(+), 11 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 53afb7db47c..8b5010df708 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2165,6 +2165,9 @@ radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer,
 
 	assert(radv_dcc_enabled(image, range->baseMipLevel));
 
+	if (image->dcc_pred_offset == 0)
+		return;
+
 	radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
 	radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) |
 				    S_370_WR_CONFIRM(1) |
diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c
index cb2e4c34da8..5d3dfd94a07 100644
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -507,6 +507,15 @@ static bool radv_is_sampler_format_supported(VkFormat format, bool *linear_sampl
 					     vk_format_get_first_non_void_channel(format)) != ~0U;
 }
 
+bool
+radv_is_atomic_format_supported(VkFormat format)
+{
+	return format == VK_FORMAT_R32_UINT ||
+		format == VK_FORMAT_R32_SINT ||
+		format == VK_FORMAT_R32_SFLOAT ||
+		format == VK_FORMAT_R64_UINT ||
+		format == VK_FORMAT_R64_SINT;
+}
 
 static bool radv_is_storage_image_format_supported(struct radv_physical_device *physical_device,
 						   VkFormat format)
@@ -775,11 +784,7 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
 		          VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
 	}
 
-	if (format == VK_FORMAT_R32_UINT ||
-	    format == VK_FORMAT_R32_SINT ||
-	    format == VK_FORMAT_R32_SFLOAT ||
-	    format == VK_FORMAT_R64_UINT ||
-	    format == VK_FORMAT_R64_SINT) {
+	if (radv_is_atomic_format_supported(format)) {
 		buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
 		linear |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
 		tiled |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index a7ada09b529..e6c1913495d 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -189,6 +189,31 @@ radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev,
 	return true;
 }
 
+static bool
+radv_formats_is_atomic_allowed(const void *pNext, VkFormat format,
+                               VkImageCreateFlags flags)
+{
+	if (radv_is_atomic_format_supported(format))
+		return true;
+
+	if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
+		const struct VkImageFormatListCreateInfo *format_list =
+			(const struct  VkImageFormatListCreateInfo *)
+				vk_find_struct_const(pNext,
+						     IMAGE_FORMAT_LIST_CREATE_INFO);
+
+		/* We have to ignore the existence of the list if viewFormatCount = 0 */
+		if (format_list && format_list->viewFormatCount) {
+			for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
+				if (radv_is_atomic_format_supported(format_list->pViewFormats[i]))
+					return true;
+			}
+		}
+	}
+
+	return false;
+}
+
 static bool
 radv_use_dcc_for_image(struct radv_device *device,
 		       const struct radv_image *image,
@@ -205,8 +230,16 @@ radv_use_dcc_for_image(struct radv_device *device,
 	if (image->shareable && image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
 		return false;
 
-	/* TODO: Enable DCC for storage images. */
-	if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
+	/*
+	 * TODO: Enable DCC for storage images on GFX9 and earlier.
+	 *
+	 * Also disable DCC with atomics because even when DCC stores are
+	 * supported atomics will always decompress. So if we are
+	 * decompressing a lot anyway we might as well not have DCC.
+	 */
+	if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
+	    (!radv_image_use_dcc_image_stores(device, image) ||
+	     radv_formats_is_atomic_allowed(pCreateInfo->pNext, format, pCreateInfo->flags)))
 		return false;
 
 	if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
@@ -241,6 +274,41 @@ radv_use_dcc_for_image(struct radv_device *device,
 	                                       pCreateInfo->flags);
 }
 
+/*
+ * Whether to enable image stores with DCC compression for this image. If
+ * this function returns false the image subresource should be decompressed
+ * before using it with image stores.
+ *
+ * Note that this can have mixed performance implications, see
+ * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643299
+ *
+ * This function assumes the image uses DCC compression.
+ */
+bool radv_image_use_dcc_image_stores(const struct radv_device *device,
+				     const struct radv_image *image)
+{
+	/*
+	 * TODO: Enable on more HW. DIMGREY and VANGOGH need a workaround and
+	 * we need more perf analysis.
+	 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643853
+	 *
+	 * DCC with MSAA > 2 samples results in CTS failures (some of dEQP-VK.pipeline.multisample.storage_image.*).
+	 */
+	return device->physical_device->rad_info.chip_class == GFX10 && image->info.samples <= 2;
+}
+
+/*
+ * Whether to use a predicate to determine whether DCC is in a compressed
+ * state. This can be used to avoid decompressing an image multiple times.
+ *
+ * This function assumes the image uses DCC compression.
+ */
+bool radv_image_use_dcc_predication(const struct radv_device *device,
+				    const struct radv_image *image)
+{
+	return !radv_image_use_dcc_image_stores(device, image);
+}
+
 static inline bool
 radv_use_fmask_for_image(const struct radv_device *device,
                          const struct radv_image *image)
@@ -713,6 +781,9 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
 			if (plane->surface.dcc_offset)
 				meta = plane->surface.u.gfx9.dcc;
 
+			if (radv_dcc_enabled(image, first_level))
+				state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1);
+
 			state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
 				    S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
 		}
@@ -1287,7 +1358,7 @@ radv_image_alloc_values(const struct radv_device *device, struct radv_image *ima
 		image->size += 8 * image->info.levels;
 	}
 
-	if (radv_image_has_dcc(image)) {
+	if (radv_image_use_dcc_predication(device, image)) {
 		image->dcc_pred_offset = image->size;
 		image->size += 8 * image->info.levels;
 	}
@@ -1755,13 +1826,16 @@ radv_image_view_make_descriptor(struct radv_image_view *iview,
 		else
 			base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
 	}
+
+	if (is_storage_image && !radv_image_use_dcc_image_stores(device, image))
+		disable_compression = true;
 	si_set_mutable_tex_desc_fields(device, image,
 				       base_level_info,
 				       plane_id,
 				       iview->base_mip,
 				       iview->base_mip,
 				       blk_w, is_stencil, is_storage_image,
-				       is_storage_image || disable_compression,
+				       disable_compression,
 				       descriptor->plane_descriptors[descriptor_plane_id]);
 }
 
@@ -2043,10 +2117,11 @@ bool radv_layout_dcc_compressed(const struct radv_device *device,
 	    radv_image_has_dcc(image))
 		return false;
 
-	/* Don't compress compute transfer dst, as image stores are not supported. */
+	/* Don't compress compute transfer dst when image stores are not supported. */
 	if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL ||
 	     layout == VK_IMAGE_LAYOUT_GENERAL) &&
-	    (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
+	    (queue_mask & (1u << RADV_QUEUE_COMPUTE)) &&
+	    !radv_image_use_dcc_image_stores(device, image))
 		return false;
 
 	return radv_image_has_dcc(image) &&
diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c
index 467b5b0d8c3..e10da6bfde9 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -752,6 +752,11 @@ radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer,
 		use_predication = true;
 	}
 
+	/* If we are asked for DCC decompression without DCC predicates we cannot
+	 * use the FCE predicate. */
+	if (decompress_dcc && image->dcc_pred_offset == 0)
+		use_predication = false;
+
 	if (radv_dcc_enabled(image, subresourceRange->baseMipLevel) &&
 	    (image->info.array_size != radv_get_layerCount(image, subresourceRange) ||
 	    subresourceRange->baseArrayLayer != 0)) {
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 8adb5fa8684..79aad3215b2 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1538,6 +1538,10 @@ void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 				      int cb_idx,
 				      uint32_t color_values[2]);
 
+bool radv_image_use_dcc_image_stores(const struct radv_device *device,
+				     const struct radv_image *image);
+bool radv_image_use_dcc_predication(const struct radv_device *device,
+				    const struct radv_image *image);
 void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer,
 			      struct radv_image *image,
 			      const VkImageSubresourceRange *range, bool value);
@@ -1835,6 +1839,7 @@ bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pde
                                           VkFormat format, bool *blendable);
 bool radv_dcc_formats_compatible(VkFormat format1,
                                  VkFormat format2);
+bool radv_is_atomic_format_supported(VkFormat format);
 bool radv_device_supports_etc(struct radv_physical_device *physical_device);
 
 struct radv_image_plane {



More information about the mesa-commit mailing list