[Mesa-dev] [PATCH] radv: set cb base tile swizzles for MRT speedups (v3)
Alex Smith
asmith at feralinteractive.com
Tue Jul 11 13:27:06 UTC 2017
On 10 July 2017 at 05:59, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This patch uses addrlib to workout the tile swizzles according
> to the surface index. It seems to produce the same values as
> amdgpu-pro for the deferred test.
>
> v2: don't apply swizzle to CMASK. the eg docs don't mention
> it, and we clearly don't align cmask for that.
> v3: disable surf index for dedicated images, as these will
> most likely be shared, and I don't think the metadata has
> space for this info in it yet.
>
FWIW, disabling this for images marked as dedicated means this won't get
any improvements for render targets on our games. We create all render
targets as dedicated when NV_dedicated_allocation is available since this
gets us significant perf improvement on NVIDIA.
If it's not currently possible to have this enabled for dedicated images we
could avoid using it on AMD, though I'm curious if there's likely to be any
other perf benefits to marking RTs as dedicated we'd then be missing out
on? I've not done any testing to see if there's any benefit from using it.
Thanks,
Alex
> This gets the deferred demo from 730->950fps on my rx480.
> (dcc cmask elim predication patches get it further)
> I'm also seeing some improvements in Mad Max at 4K
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
>
> fixup for dedicate
> ---
> src/amd/common/ac_surface.c | 14 ++++++++++++++
> src/amd/common/ac_surface.h | 2 ++
> src/amd/vulkan/radv_device.c | 7 ++++++-
> src/amd/vulkan/radv_image.c | 19 ++++++++++++++++++-
> src/amd/vulkan/radv_private.h | 2 ++
> 5 files changed, 42 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
> index 23fb66b..0aebacc 100644
> --- a/src/amd/common/ac_surface.c
> +++ b/src/amd/common/ac_surface.c
> @@ -692,6 +692,20 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
> surf->htile_size *= 2;
>
> surf->is_linear = surf->u.legacy.level[0].mode ==
> RADEON_SURF_MODE_LINEAR_ALIGNED;
> +
> + /* workout base swizzle */
> + if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
> + ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
> + ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
> +
> + AddrBaseSwizzleIn.surfIndex = config->info.surf_index;
> + AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex;
> + AddrBaseSwizzleIn.macroModeIndex = AddrSurfInfoOut.
> macroModeIndex;
> + AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo;
> + AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode;
> + AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn,
> &AddrBaseSwizzleOut);
> + surf->u.legacy.combined_swizzle = AddrBaseSwizzleOut.
> tileSwizzle;
> + }
> return 0;
> }
>
> diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
> index 4d893ff..7901b86 100644
> --- a/src/amd/common/ac_surface.h
> +++ b/src/amd/common/ac_surface.h
> @@ -97,6 +97,7 @@ struct legacy_surf_layout {
> unsigned depth_adjusted:1;
> unsigned stencil_adjusted:1;
>
> + uint8_t combined_swizzle;
> struct legacy_surf_level level[RADEON_SURF_MAX_LEVELS];
> struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS];
> uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
> @@ -194,6 +195,7 @@ struct ac_surf_info {
> uint32_t width;
> uint32_t height;
> uint32_t depth;
> + uint32_t surf_index;
> uint8_t samples;
> uint8_t levels;
> uint16_t array_size;
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index 789c90d..eb77914 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -2757,7 +2757,8 @@ radv_initialise_color_surface(struct radv_device
> *device,
> }
>
> cb->cb_color_base = va >> 8;
> -
> + if (device->physical_device->rad_info.chip_class < GFX9)
> + cb->cb_color_base |= iview->image->surface.u.
> legacy.combined_swizzle;
> /* CMASK variables */
> va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
> va += iview->image->cmask.offset;
> @@ -2766,6 +2767,8 @@ radv_initialise_color_surface(struct radv_device
> *device,
> va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
> va += iview->image->dcc_offset;
> cb->cb_dcc_base = va >> 8;
> + if (device->physical_device->rad_info.chip_class < GFX9)
> + cb->cb_dcc_base |= iview->image->surface.u.
> legacy.combined_swizzle;
>
> uint32_t max_slice = radv_surface_layer_count(iview);
> cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
> @@ -2781,6 +2784,8 @@ radv_initialise_color_surface(struct radv_device
> *device,
> if (iview->image->fmask.size) {
> va = device->ws->buffer_get_va(iview->bo) +
> iview->image->offset + iview->image->fmask.offset;
> cb->cb_color_fmask = va >> 8;
> + if (device->physical_device->rad_info.chip_class < GFX9)
> + cb->cb_color_fmask |= iview->image->surface.u.
> legacy.combined_swizzle;
> } else {
> cb->cb_color_fmask = cb->cb_color_base;
> }
> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
> index 115e5a5..28f3874 100644
> --- a/src/amd/vulkan/radv_image.c
> +++ b/src/amd/vulkan/radv_image.c
> @@ -27,10 +27,12 @@
>
> #include "radv_private.h"
> #include "vk_format.h"
> +#include "vk_util.h"
> #include "radv_radeon_winsys.h"
> #include "sid.h"
> #include "gfx9d.h"
> #include "util/debug.h"
> +#include "util/u_atomic.h"
> static unsigned
> radv_choose_tiling(struct radv_device *Device,
> const struct radv_image_create_info *create_info)
> @@ -209,6 +211,8 @@ si_set_mutable_tex_desc_fields(struct radv_device
> *device,
> va += base_level_info->offset;
>
> state[0] = va >> 8;
> + if (chip_class < GFX9)
> + state[0] |= image->surface.u.legacy.combined_swizzle;
> state[1] &= C_008F14_BASE_ADDRESS_HI;
> state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
> state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image,
> base_level,
> @@ -224,7 +228,8 @@ si_set_mutable_tex_desc_fields(struct radv_device
> *device,
> meta_va += base_level_info->dcc_offset;
> state[6] |= S_008F28_COMPRESSION_EN(1);
> state[7] = meta_va >> 8;
> -
> + if (chip_class < GFX9)
> + state[7] |= image->surface.u.legacy.
> combined_swizzle;
> }
> }
>
> @@ -472,6 +477,8 @@ si_make_texture_descriptor(struct radv_device *device,
> }
>
> fmask_state[0] = va >> 8;
> + if (device->physical_device->rad_info.chip_class < GFX9)
> + fmask_state[0] |= image->surface.u.legacy.
> combined_swizzle;
> fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
> S_008F14_DATA_FORMAT_GFX6(fmask_format) |
> S_008F14_NUM_FORMAT_GFX6(num_format);
> @@ -752,6 +759,7 @@ radv_image_create(VkDevice _device,
> const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
> struct radv_image *image = NULL;
> bool can_cmask_dcc = false;
> + bool dedicated = false;
> assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
>
> radv_assert(pCreateInfo->mipLevels > 0);
> @@ -761,6 +769,11 @@ radv_image_create(VkDevice _device,
> radv_assert(pCreateInfo->extent.height > 0);
> radv_assert(pCreateInfo->extent.depth > 0);
>
> + const VkDedicatedAllocationImageCreateInfoNV *dedicate_info =
> + vk_find_struct_const(pCreateInfo->pNext,
> DEDICATED_ALLOCATION_IMAGE_CREATE_INFO_NV);
> +
> + if (dedicate_info && dedicate_info->dedicatedAllocation)
> + dedicated = true;
> image = vk_alloc2(&device->alloc, alloc, sizeof(*image), 8,
> VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
> if (!image)
> @@ -789,6 +802,10 @@ radv_image_create(VkDevice _device,
> image->queue_family_mask |= 1u <<
> pCreateInfo->pQueueFamilyIndices[i];
> }
>
> + if (!vk_format_is_depth(pCreateInfo->format) &&
> !create_info->scanout && !dedicated) {
> + image->info.surf_index = p_atomic_inc_return(&device->image_mrt_offset_counter)
> - 1;
> + }
> +
> radv_init_surface(device, &image->surface, create_info);
>
> device->ws->surface_init(device->ws, &image->info,
> &image->surface);
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index a167409..b0533bd 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -547,6 +547,8 @@ struct radv_device {
>
> /* Backup in-memory cache to be used if the app doesn't provide
> one */
> struct radv_pipeline_cache * mem_cache;
> +
> + uint32_t image_mrt_offset_counter;
> };
>
> struct radv_device_memory {
> --
> 2.9.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170711/8518973c/attachment-0001.html>
More information about the mesa-dev
mailing list