[Mesa-dev] [PATCH] [rfc] radv: offset images by a differing amount. (v2)

Dave Airlie airlied at gmail.com
Fri Jul 7 19:59:59 UTC 2017


From: Dave Airlie <airlied at redhat.com>

(this patch doesn't seem to work fully, hopefully AMD can tell us
more info on the rules, and how to calculate the magic).

It appears that to get full access to memory bandwidth with MRT
rendering the pro vulkan driver seems to offset each image by 0x3800.
I'm not sure how that value is calculated.

Glenn came up with the idea (probably what -pro does also) of just
offseting every image in round robin order, in the hope that apps
would create mrt images in sequence anyways.

This attempts to do that using an atomic counter in the device.

This gets the deferred demo from 800fps->1150fps on my rx480.

(I've tested dota2 and talos still run at least after this)

v2: acknowledge it isn't an offset but a tile rotation pattern.
add a quote from evergreen docs
---
 src/amd/vulkan/radv_device.c  |  8 ++++----
 src/amd/vulkan/radv_image.c   | 22 ++++++++++++++++++----
 src/amd/vulkan/radv_private.h |  3 +++
 3 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 59efccf..fb15ed6 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2756,16 +2756,16 @@ radv_initialise_color_surface(struct radv_device *device,
 		}
 	}
 
-	cb->cb_color_base = va >> 8;
+	cb->cb_color_base = (va >> 8) | iview->image->tile_rotate_bits;
 
 	/* CMASK variables */
 	va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
 	va += iview->image->cmask.offset;
-	cb->cb_color_cmask = va >> 8;
+	cb->cb_color_cmask = (va >> 8) | iview->image->tile_rotate_bits;
 
 	va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
 	va += iview->image->dcc_offset;
-	cb->cb_dcc_base = va >> 8;
+	cb->cb_dcc_base = (va >> 8) | iview->image->tile_rotate_bits;
 
 	uint32_t max_slice = radv_surface_layer_count(iview);
 	cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
@@ -2780,7 +2780,7 @@ radv_initialise_color_surface(struct radv_device *device,
 
 	if (iview->image->fmask.size) {
 		va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
-		cb->cb_color_fmask = va >> 8;
+		cb->cb_color_fmask = (va >> 8) | iview->image->tile_rotate_bits;
 	} else {
 		cb->cb_color_fmask = cb->cb_color_base;
 	}
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index b3a223b..b57a7d1 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -31,6 +31,7 @@
 #include "sid.h"
 #include "gfx9d.h"
 #include "util/debug.h"
+#include "util/u_atomic.h"
 static unsigned
 radv_choose_tiling(struct radv_device *Device,
 		   const struct radv_image_create_info *create_info)
@@ -208,7 +209,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
 	} else
 		va += base_level_info->offset;
 
-	state[0] = va >> 8;
+	state[0] = (va >> 8) | image->tile_rotate_bits;
 	state[1] &= C_008F14_BASE_ADDRESS_HI;
 	state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
 	state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image, base_level,
@@ -223,8 +224,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
 			if (chip_class <= VI)
 				meta_va += base_level_info->dcc_offset;
 			state[6] |= S_008F28_COMPRESSION_EN(1);
-			state[7] = meta_va >> 8;
-
+			state[7] = (meta_va >> 8) | image->tile_rotate_bits;
 		}
 	}
 
@@ -471,7 +471,7 @@ si_make_texture_descriptor(struct radv_device *device,
 			num_format = V_008F14_IMG_NUM_FORMAT_UINT;
 		}
 
-		fmask_state[0] = va >> 8;
+		fmask_state[0] = (va >> 8) | image->tile_rotate_bits;
 		fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
 			S_008F14_DATA_FORMAT_GFX6(fmask_format) |
 			S_008F14_NUM_FORMAT_GFX6(num_format);
@@ -801,6 +801,20 @@ radv_image_create(VkDevice _device,
 	image->size = image->surface.surf_size;
 	image->alignment = image->surface.surf_alignment;
 
+	if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && !create_info->scanout) {
+		/*
+		 * from the evergreen docs -
+		 * Bits [p-1:0] of this field, where p =
+		 * log2(numPipes), specifiy the pipe swizzle. Bits [p+b-
+		 * 1:p], where b = log2(numBanks) specify the bank
+		 * swizzle.
+		 * this may not be correct for GCN gpus.
+		*/
+		uint32_t mrt_idx = p_atomic_inc_return(&device->image_mrt_offset_counter) - 1;
+		mrt_idx %= 4;
+		image->tile_rotate_bits = 0x38 * mrt_idx;
+	}
+
 	if (image->exclusive || image->queue_family_mask == 1)
 		can_cmask_dcc = true;
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 5c30d18..03f3133 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -547,6 +547,8 @@ struct radv_device {
 
 	/* Backup in-memory cache to be used if the app doesn't provide one */
 	struct radv_pipeline_cache *                mem_cache;
+
+	uint32_t image_mrt_offset_counter;
 };
 
 struct radv_device_memory {
@@ -1211,6 +1213,7 @@ struct radv_image {
 	/* Set when bound */
 	struct radeon_winsys_bo *bo;
 	VkDeviceSize offset;
+	uint32_t tile_rotate_bits;
 	uint32_t dcc_offset;
 	uint32_t htile_offset;
 	struct radeon_surf surface;
-- 
2.9.4



More information about the mesa-dev mailing list