[Mesa-dev] [PATCH] radv: set cb base tile swizzles for MRT speedups (v3)
Dave Airlie
airlied at gmail.com
Mon Jul 10 04:59:40 UTC 2017
From: Dave Airlie <airlied at redhat.com>
This patch uses addrlib to workout the tile swizzles according
to the surface index. It seems to produce the same values as
amdgpu-pro for the deferred test.
v2: don't apply swizzle to CMASK. the eg docs don't mention
it, and we clearly don't align cmask for that.
v3: disable surf index for dedicated images, as these will
most likely be shared, and I don't think the metadata has
space for this info in it yet.
This gets the deferred demo from 730->950fps on my rx480.
(dcc cmask elim predication patches get it further)
I'm also seeing some improvements in Mad Max at 4K
Signed-off-by: Dave Airlie <airlied at redhat.com>
fixup for dedicate
---
src/amd/common/ac_surface.c | 14 ++++++++++++++
src/amd/common/ac_surface.h | 2 ++
src/amd/vulkan/radv_device.c | 7 ++++++-
src/amd/vulkan/radv_image.c | 19 ++++++++++++++++++-
src/amd/vulkan/radv_private.h | 2 ++
5 files changed, 42 insertions(+), 2 deletions(-)
diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 23fb66b..0aebacc 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -692,6 +692,20 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
surf->htile_size *= 2;
surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ /* workout base swizzle */
+ if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
+ ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
+ ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
+
+ AddrBaseSwizzleIn.surfIndex = config->info.surf_index;
+ AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex;
+ AddrBaseSwizzleIn.macroModeIndex = AddrSurfInfoOut.macroModeIndex;
+ AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo;
+ AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode;
+ AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut);
+ surf->u.legacy.combined_swizzle = AddrBaseSwizzleOut.tileSwizzle;
+ }
return 0;
}
diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
index 4d893ff..7901b86 100644
--- a/src/amd/common/ac_surface.h
+++ b/src/amd/common/ac_surface.h
@@ -97,6 +97,7 @@ struct legacy_surf_layout {
unsigned depth_adjusted:1;
unsigned stencil_adjusted:1;
+ uint8_t combined_swizzle;
struct legacy_surf_level level[RADEON_SURF_MAX_LEVELS];
struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS];
uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
@@ -194,6 +195,7 @@ struct ac_surf_info {
uint32_t width;
uint32_t height;
uint32_t depth;
+ uint32_t surf_index;
uint8_t samples;
uint8_t levels;
uint16_t array_size;
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 789c90d..eb77914 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2757,7 +2757,8 @@ radv_initialise_color_surface(struct radv_device *device,
}
cb->cb_color_base = va >> 8;
-
+ if (device->physical_device->rad_info.chip_class < GFX9)
+ cb->cb_color_base |= iview->image->surface.u.legacy.combined_swizzle;
/* CMASK variables */
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
va += iview->image->cmask.offset;
@@ -2766,6 +2767,8 @@ radv_initialise_color_surface(struct radv_device *device,
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
va += iview->image->dcc_offset;
cb->cb_dcc_base = va >> 8;
+ if (device->physical_device->rad_info.chip_class < GFX9)
+ cb->cb_dcc_base |= iview->image->surface.u.legacy.combined_swizzle;
uint32_t max_slice = radv_surface_layer_count(iview);
cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
@@ -2781,6 +2784,8 @@ radv_initialise_color_surface(struct radv_device *device,
if (iview->image->fmask.size) {
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
cb->cb_color_fmask = va >> 8;
+ if (device->physical_device->rad_info.chip_class < GFX9)
+ cb->cb_color_fmask |= iview->image->surface.u.legacy.combined_swizzle;
} else {
cb->cb_color_fmask = cb->cb_color_base;
}
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 115e5a5..28f3874 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -27,10 +27,12 @@
#include "radv_private.h"
#include "vk_format.h"
+#include "vk_util.h"
#include "radv_radeon_winsys.h"
#include "sid.h"
#include "gfx9d.h"
#include "util/debug.h"
+#include "util/u_atomic.h"
static unsigned
radv_choose_tiling(struct radv_device *Device,
const struct radv_image_create_info *create_info)
@@ -209,6 +211,8 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
va += base_level_info->offset;
state[0] = va >> 8;
+ if (chip_class < GFX9)
+ state[0] |= image->surface.u.legacy.combined_swizzle;
state[1] &= C_008F14_BASE_ADDRESS_HI;
state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image, base_level,
@@ -224,7 +228,8 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
meta_va += base_level_info->dcc_offset;
state[6] |= S_008F28_COMPRESSION_EN(1);
state[7] = meta_va >> 8;
-
+ if (chip_class < GFX9)
+ state[7] |= image->surface.u.legacy.combined_swizzle;
}
}
@@ -472,6 +477,8 @@ si_make_texture_descriptor(struct radv_device *device,
}
fmask_state[0] = va >> 8;
+ if (device->physical_device->rad_info.chip_class < GFX9)
+ fmask_state[0] |= image->surface.u.legacy.combined_swizzle;
fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
S_008F14_DATA_FORMAT_GFX6(fmask_format) |
S_008F14_NUM_FORMAT_GFX6(num_format);
@@ -752,6 +759,7 @@ radv_image_create(VkDevice _device,
const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
struct radv_image *image = NULL;
bool can_cmask_dcc = false;
+ bool dedicated = false;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
radv_assert(pCreateInfo->mipLevels > 0);
@@ -761,6 +769,11 @@ radv_image_create(VkDevice _device,
radv_assert(pCreateInfo->extent.height > 0);
radv_assert(pCreateInfo->extent.depth > 0);
+ const VkDedicatedAllocationImageCreateInfoNV *dedicate_info =
+ vk_find_struct_const(pCreateInfo->pNext, DEDICATED_ALLOCATION_IMAGE_CREATE_INFO_NV);
+
+ if (dedicate_info && dedicate_info->dedicatedAllocation)
+ dedicated = true;
image = vk_alloc2(&device->alloc, alloc, sizeof(*image), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!image)
@@ -789,6 +802,10 @@ radv_image_create(VkDevice _device,
image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
}
+ if (!vk_format_is_depth(pCreateInfo->format) && !create_info->scanout && !dedicated) {
+ image->info.surf_index = p_atomic_inc_return(&device->image_mrt_offset_counter) - 1;
+ }
+
radv_init_surface(device, &image->surface, create_info);
device->ws->surface_init(device->ws, &image->info, &image->surface);
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index a167409..b0533bd 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -547,6 +547,8 @@ struct radv_device {
/* Backup in-memory cache to be used if the app doesn't provide one */
struct radv_pipeline_cache * mem_cache;
+
+ uint32_t image_mrt_offset_counter;
};
struct radv_device_memory {
--
2.9.4
More information about the mesa-dev
mailing list