[Mesa-dev] [PATCH] [rfc] radv: set cb base tile swizzles for MRT speedups

Dave Airlie airlied at gmail.com
Fri Jul 7 22:15:33 UTC 2017


From: Dave Airlie <airlied at redhat.com>

This patch uses addrlib to workout the tile swizzles according
to the surface index. It seems to produce the same values as
amdgpu-pro for the deferred test.

This gets the deferred demo from 730->950fps on my rx480.
(dcc cmask elim predication patches get it further)

open question: should we do this for all textures or just
color surfaces?
---
 src/amd/common/ac_surface.c   | 14 ++++++++++++++
 src/amd/common/ac_surface.h   |  2 ++
 src/amd/vulkan/radv_device.c  |  9 ++++++++-
 src/amd/vulkan/radv_image.c   | 12 +++++++++++-
 src/amd/vulkan/radv_private.h |  2 ++
 5 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 877d592..c936426 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -684,6 +684,20 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
 		surf->htile_size *= 2;
 
 	surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+	/* workout base swizzle */
+	if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
+		ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
+		ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
+
+		AddrBaseSwizzleIn.surfIndex = config->info.surf_index;
+		AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex;
+		AddrBaseSwizzleIn.macroModeIndex = AddrSurfInfoOut.macroModeIndex;
+		AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo;
+		AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode;
+		AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut);
+		surf->u.legacy.combined_swizzle = AddrBaseSwizzleOut.tileSwizzle;
+	}
 	return 0;
 }
 
diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
index 582a671..ecba4c8 100644
--- a/src/amd/common/ac_surface.h
+++ b/src/amd/common/ac_surface.h
@@ -97,6 +97,7 @@ struct legacy_surf_layout {
     unsigned                    depth_adjusted:1;
     unsigned                    stencil_adjusted:1;
 
+    uint8_t                     combined_swizzle;
     struct legacy_surf_level    level[RADEON_SURF_MAX_LEVELS];
     struct legacy_surf_level    stencil_level[RADEON_SURF_MAX_LEVELS];
     uint8_t                     tiling_index[RADEON_SURF_MAX_LEVELS];
@@ -194,6 +195,7 @@ struct ac_surf_info {
 	uint32_t width;
 	uint32_t height;
 	uint32_t depth;
+	uint32_t surf_index;
 	uint8_t samples;
 	uint8_t levels;
 	uint16_t array_size;
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 59efccf..0cf0d57 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2757,15 +2757,20 @@ radv_initialise_color_surface(struct radv_device *device,
 	}
 
 	cb->cb_color_base = va >> 8;
-
+	if (device->physical_device->rad_info.chip_class < GFX9)
+		cb->cb_color_base |= iview->image->surface.u.legacy.combined_swizzle;
 	/* CMASK variables */
 	va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
 	va += iview->image->cmask.offset;
 	cb->cb_color_cmask = va >> 8;
+	if (device->physical_device->rad_info.chip_class < GFX9)
+		cb->cb_color_cmask |= iview->image->surface.u.legacy.combined_swizzle;
 
 	va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
 	va += iview->image->dcc_offset;
 	cb->cb_dcc_base = va >> 8;
+	if (device->physical_device->rad_info.chip_class < GFX9)
+		cb->cb_dcc_base |= iview->image->surface.u.legacy.combined_swizzle;
 
 	uint32_t max_slice = radv_surface_layer_count(iview);
 	cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
@@ -2781,6 +2786,8 @@ radv_initialise_color_surface(struct radv_device *device,
 	if (iview->image->fmask.size) {
 		va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
 		cb->cb_color_fmask = va >> 8;
+		if (device->physical_device->rad_info.chip_class < GFX9)
+			cb->cb_color_fmask |= iview->image->surface.u.legacy.combined_swizzle;
 	} else {
 		cb->cb_color_fmask = cb->cb_color_base;
 	}
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 115e5a5..cd0af5e 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -31,6 +31,7 @@
 #include "sid.h"
 #include "gfx9d.h"
 #include "util/debug.h"
+#include "util/u_atomic.h"
 static unsigned
 radv_choose_tiling(struct radv_device *Device,
 		   const struct radv_image_create_info *create_info)
@@ -209,6 +210,8 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
 		va += base_level_info->offset;
 
 	state[0] = va >> 8;
+	if (chip_class < GFX9)
+		state[0] |= image->surface.u.legacy.combined_swizzle;
 	state[1] &= C_008F14_BASE_ADDRESS_HI;
 	state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
 	state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image, base_level,
@@ -224,7 +227,8 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
 				meta_va += base_level_info->dcc_offset;
 			state[6] |= S_008F28_COMPRESSION_EN(1);
 			state[7] = meta_va >> 8;
-
+			if (chip_class < GFX9)
+				state[7] |= image->surface.u.legacy.combined_swizzle;
 		}
 	}
 
@@ -472,6 +476,8 @@ si_make_texture_descriptor(struct radv_device *device,
 		}
 
 		fmask_state[0] = va >> 8;
+		if (device->physical_device->rad_info.chip_class < GFX9)
+			fmask_state[0] |= image->surface.u.legacy.combined_swizzle;
 		fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
 			S_008F14_DATA_FORMAT_GFX6(fmask_format) |
 			S_008F14_NUM_FORMAT_GFX6(num_format);
@@ -789,6 +795,10 @@ radv_image_create(VkDevice _device,
 				image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
 	}
 
+	if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && !create_info->scanout) {
+		image->info.surf_index = p_atomic_inc_return(&device->image_mrt_offset_counter) - 1;
+	}
+
 	radv_init_surface(device, &image->surface, create_info);
 
 	device->ws->surface_init(device->ws, &image->info, &image->surface);
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index a167409..b0533bd 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -547,6 +547,8 @@ struct radv_device {
 
 	/* Backup in-memory cache to be used if the app doesn't provide one */
 	struct radv_pipeline_cache *                mem_cache;
+
+	uint32_t image_mrt_offset_counter;
 };
 
 struct radv_device_memory {
-- 
2.9.4



More information about the mesa-dev mailing list