Mesa (main): radv: move pipe_misaligned and l2_coherent image checks to flags set on init
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Jun 18 16:18:16 UTC 2021
Module: Mesa
Branch: main
Commit: 651c6b16ff09745cb033566bc7910375868027df
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=651c6b16ff09745cb033566bc7910375868027df
Author: Mike Blumenkrantz <michael.blumenkrantz at gmail.com>
Date: Fri Jun 18 09:08:40 2021 -0400
radv: move pipe_misaligned and l2_coherent image checks to flags set on init
this should save 4-5% cpu in some cases
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11462>
---
src/amd/vulkan/radv_cmd_buffer.c | 77 ++--------------------------------------
src/amd/vulkan/radv_image.c | 74 ++++++++++++++++++++++++++++++++++++++
src/amd/vulkan/radv_private.h | 1 +
3 files changed, 77 insertions(+), 75 deletions(-)
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index b95347a2166..94d48f6bf5f 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -3225,85 +3225,12 @@ radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags src_st
}
}
-/* Determine if the image is affected by the pipe misaligned metadata issue
- * which requires to invalidate L2.
- */
-static bool
-radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
-{
- struct radeon_info *rad_info = &device->physical_device->rad_info;
- unsigned log2_samples = util_logbase2(image->info.samples);
-
- assert(rad_info->chip_class >= GFX10);
-
- for (unsigned i = 0; i < image->plane_count; ++i) {
- VkFormat fmt = vk_format_get_plane_format(image->vk_format, i);
- unsigned log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
- unsigned log2_bpp_and_samples;
-
- if (rad_info->chip_class >= GFX10_3) {
- log2_bpp_and_samples = log2_bpp + log2_samples;
- } else {
- if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) {
- log2_bpp = 2;
- }
-
- log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
- }
-
- unsigned num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
- int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
-
- if (vk_format_has_depth(image->vk_format)) {
- if (radv_image_is_tc_compat_htile(image) && overlap) {
- return true;
- }
- } else {
- unsigned max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
- int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
- int samples_overlap = MIN2(log2_samples, overlap);
-
- /* TODO: It shouldn't be necessary if the image has DCC but
- * not readable by shader.
- */
- if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
- (samples_overlap > log2_samples_frag_diff)) {
- return true;
- }
- }
- }
-
- return false;
-}
-
-static bool
-radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
-{
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- return !device->physical_device->rad_info.tcc_rb_non_coherent &&
- (image && !radv_image_is_pipe_misaligned(device, image));
- } else if (device->physical_device->rad_info.chip_class == GFX9 && image) {
- if (image->info.samples == 1 &&
- (image->usage &
- (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
- !vk_format_has_stencil(image->vk_format)) {
- /* Single-sample color and single-sample depth
- * (not stencil) are coherent with shaders on
- * GFX9.
- */
- return true;
- }
- }
-
- return false;
-}
-
enum radv_cmd_flush_bits
radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags src_flags,
const struct radv_image *image)
{
bool has_CB_meta = true, has_DB_meta = true;
- bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image);
+ bool image_is_coherent = image ? image->l2_coherent : false;
enum radv_cmd_flush_bits flush_bits = 0;
if (image) {
@@ -3379,7 +3306,7 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags dst_flag
bool has_CB_meta = true, has_DB_meta = true;
enum radv_cmd_flush_bits flush_bits = 0;
bool flush_CB = true, flush_DB = true;
- bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image);
+ bool image_is_coherent = image ? image->l2_coherent : false;
if (image) {
if (!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index f18f96ca491..9a11e013542 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -1527,6 +1527,79 @@ radv_select_modifier(const struct radv_device *dev, VkFormat format,
unreachable("App specified an invalid modifier");
}
+/* Determine if the image is affected by the pipe misaligned metadata issue
+ * which requires to invalidate L2.
+ */
+static bool
+radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
+{
+ struct radeon_info *rad_info = &device->physical_device->rad_info;
+ unsigned log2_samples = util_logbase2(image->info.samples);
+
+ assert(rad_info->chip_class >= GFX10);
+
+ for (unsigned i = 0; i < image->plane_count; ++i) {
+ VkFormat fmt = vk_format_get_plane_format(image->vk_format, i);
+ unsigned log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
+ unsigned log2_bpp_and_samples;
+
+ if (rad_info->chip_class >= GFX10_3) {
+ log2_bpp_and_samples = log2_bpp + log2_samples;
+ } else {
+ if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) {
+ log2_bpp = 2;
+ }
+
+ log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
+ }
+
+ unsigned num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
+ int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
+
+ if (vk_format_has_depth(image->vk_format)) {
+ if (radv_image_is_tc_compat_htile(image) && overlap) {
+ return true;
+ }
+ } else {
+ unsigned max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
+ int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
+ int samples_overlap = MIN2(log2_samples, overlap);
+
+ /* TODO: It shouldn't be necessary if the image has DCC but
+ * not readable by shader.
+ */
+ if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
+ (samples_overlap > log2_samples_frag_diff)) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static bool
+radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
+{
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ return !device->physical_device->rad_info.tcc_rb_non_coherent &&
+ !radv_image_is_pipe_misaligned(device, image);
+ } else if (device->physical_device->rad_info.chip_class == GFX9) {
+ if (image->info.samples == 1 &&
+ (image->usage &
+ (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
+ !vk_format_has_stencil(image->vk_format)) {
+ /* Single-sample color and single-sample depth
+ * (not stencil) are coherent with shaders on
+ * GFX9.
+ */
+ return true;
+ }
+ }
+
+ return false;
+}
+
VkResult
radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info,
const VkAllocationCallbacks *alloc, VkImage *pImage)
@@ -1634,6 +1707,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_
return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
}
+ image->l2_coherent = radv_image_is_l2_coherent(device, image);
if (device->instance->debug_flags & RADV_DEBUG_IMG) {
radv_image_print_info(device, image);
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 6bcb9993854..610c8bb7653 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1856,6 +1856,7 @@ struct radv_image {
unsigned queue_family_mask;
bool exclusive;
bool shareable;
+ bool l2_coherent;
/* Set when bound */
struct radeon_winsys_bo *bo;
More information about the mesa-commit
mailing list