[Mesa-dev] [PATCH] radv: set cb base tile swizzles for MRT speedups (v3)

Dave Airlie airlied at gmail.com
Mon Jul 10 04:59:40 UTC 2017


From: Dave Airlie <airlied at redhat.com>

This patch uses addrlib to workout the tile swizzles according
to the surface index. It seems to produce the same values as
amdgpu-pro for the deferred test.

v2: don't apply swizzle to CMASK. the eg docs don't mention
it, and we clearly don't align cmask for that.
v3: disable surf index for dedicated images, as these will
most likely be shared, and I don't think the metadata has
space for this info in it yet.

This gets the deferred demo from 730->950fps on my rx480.
(dcc cmask elim predication patches get it further)
I'm also seeing some improvements in Mad Max at 4K

Signed-off-by: Dave Airlie <airlied at redhat.com>

fixup for dedicate
---
 src/amd/common/ac_surface.c   | 14 ++++++++++++++
 src/amd/common/ac_surface.h   |  2 ++
 src/amd/vulkan/radv_device.c  |  7 ++++++-
 src/amd/vulkan/radv_image.c   | 19 ++++++++++++++++++-
 src/amd/vulkan/radv_private.h |  2 ++
 5 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 23fb66b..0aebacc 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -692,6 +692,20 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
 		surf->htile_size *= 2;
 
 	surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+	/* workout base swizzle */
+	if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
+		ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
+		ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
+
+		AddrBaseSwizzleIn.surfIndex = config->info.surf_index;
+		AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex;
+		AddrBaseSwizzleIn.macroModeIndex = AddrSurfInfoOut.macroModeIndex;
+		AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo;
+		AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode;
+		AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut);
+		surf->u.legacy.combined_swizzle = AddrBaseSwizzleOut.tileSwizzle;
+	}
 	return 0;
 }
 
diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
index 4d893ff..7901b86 100644
--- a/src/amd/common/ac_surface.h
+++ b/src/amd/common/ac_surface.h
@@ -97,6 +97,7 @@ struct legacy_surf_layout {
     unsigned                    depth_adjusted:1;
     unsigned                    stencil_adjusted:1;
 
+    uint8_t                     combined_swizzle;
     struct legacy_surf_level    level[RADEON_SURF_MAX_LEVELS];
     struct legacy_surf_level    stencil_level[RADEON_SURF_MAX_LEVELS];
     uint8_t                     tiling_index[RADEON_SURF_MAX_LEVELS];
@@ -194,6 +195,7 @@ struct ac_surf_info {
 	uint32_t width;
 	uint32_t height;
 	uint32_t depth;
+	uint32_t surf_index;
 	uint8_t samples;
 	uint8_t levels;
 	uint16_t array_size;
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 789c90d..eb77914 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2757,7 +2757,8 @@ radv_initialise_color_surface(struct radv_device *device,
 	}
 
 	cb->cb_color_base = va >> 8;
-
+	if (device->physical_device->rad_info.chip_class < GFX9)
+		cb->cb_color_base |= iview->image->surface.u.legacy.combined_swizzle;
 	/* CMASK variables */
 	va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
 	va += iview->image->cmask.offset;
@@ -2766,6 +2767,8 @@ radv_initialise_color_surface(struct radv_device *device,
 	va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
 	va += iview->image->dcc_offset;
 	cb->cb_dcc_base = va >> 8;
+	if (device->physical_device->rad_info.chip_class < GFX9)
+		cb->cb_dcc_base |= iview->image->surface.u.legacy.combined_swizzle;
 
 	uint32_t max_slice = radv_surface_layer_count(iview);
 	cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
@@ -2781,6 +2784,8 @@ radv_initialise_color_surface(struct radv_device *device,
 	if (iview->image->fmask.size) {
 		va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
 		cb->cb_color_fmask = va >> 8;
+		if (device->physical_device->rad_info.chip_class < GFX9)
+			cb->cb_color_fmask |= iview->image->surface.u.legacy.combined_swizzle;
 	} else {
 		cb->cb_color_fmask = cb->cb_color_base;
 	}
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 115e5a5..28f3874 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -27,10 +27,12 @@
 
 #include "radv_private.h"
 #include "vk_format.h"
+#include "vk_util.h"
 #include "radv_radeon_winsys.h"
 #include "sid.h"
 #include "gfx9d.h"
 #include "util/debug.h"
+#include "util/u_atomic.h"
 static unsigned
 radv_choose_tiling(struct radv_device *Device,
 		   const struct radv_image_create_info *create_info)
@@ -209,6 +211,8 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
 		va += base_level_info->offset;
 
 	state[0] = va >> 8;
+	if (chip_class < GFX9)
+		state[0] |= image->surface.u.legacy.combined_swizzle;
 	state[1] &= C_008F14_BASE_ADDRESS_HI;
 	state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
 	state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image, base_level,
@@ -224,7 +228,8 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
 				meta_va += base_level_info->dcc_offset;
 			state[6] |= S_008F28_COMPRESSION_EN(1);
 			state[7] = meta_va >> 8;
-
+			if (chip_class < GFX9)
+				state[7] |= image->surface.u.legacy.combined_swizzle;
 		}
 	}
 
@@ -472,6 +477,8 @@ si_make_texture_descriptor(struct radv_device *device,
 		}
 
 		fmask_state[0] = va >> 8;
+		if (device->physical_device->rad_info.chip_class < GFX9)
+			fmask_state[0] |= image->surface.u.legacy.combined_swizzle;
 		fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
 			S_008F14_DATA_FORMAT_GFX6(fmask_format) |
 			S_008F14_NUM_FORMAT_GFX6(num_format);
@@ -752,6 +759,7 @@ radv_image_create(VkDevice _device,
 	const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
 	struct radv_image *image = NULL;
 	bool can_cmask_dcc = false;
+	bool dedicated = false;
 	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
 
 	radv_assert(pCreateInfo->mipLevels > 0);
@@ -761,6 +769,11 @@ radv_image_create(VkDevice _device,
 	radv_assert(pCreateInfo->extent.height > 0);
 	radv_assert(pCreateInfo->extent.depth > 0);
 
+	const VkDedicatedAllocationImageCreateInfoNV *dedicate_info =
+		vk_find_struct_const(pCreateInfo->pNext, DEDICATED_ALLOCATION_IMAGE_CREATE_INFO_NV);
+
+	if (dedicate_info && dedicate_info->dedicatedAllocation)
+		dedicated = true;
 	image = vk_alloc2(&device->alloc, alloc, sizeof(*image), 8,
 			    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 	if (!image)
@@ -789,6 +802,10 @@ radv_image_create(VkDevice _device,
 				image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
 	}
 
+	if (!vk_format_is_depth(pCreateInfo->format) && !create_info->scanout && !dedicated) {
+		image->info.surf_index = p_atomic_inc_return(&device->image_mrt_offset_counter) - 1;
+	}
+
 	radv_init_surface(device, &image->surface, create_info);
 
 	device->ws->surface_init(device->ws, &image->info, &image->surface);
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index a167409..b0533bd 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -547,6 +547,8 @@ struct radv_device {
 
 	/* Backup in-memory cache to be used if the app doesn't provide one */
 	struct radv_pipeline_cache *                mem_cache;
+
+	uint32_t image_mrt_offset_counter;
 };
 
 struct radv_device_memory {
-- 
2.9.4



More information about the mesa-dev mailing list