[Mesa-dev] [PATCH] radv: set cb base tile swizzles for MRT speedups (v3)
Alex Smith
asmith at feralinteractive.com
Tue Jul 11 13:49:31 UTC 2017
On 11 July 2017 at 14:27, Alex Smith <asmith at feralinteractive.com> wrote:
> On 10 July 2017 at 05:59, Dave Airlie <airlied at gmail.com> wrote:
>
>> From: Dave Airlie <airlied at redhat.com>
>>
>> This patch uses addrlib to workout the tile swizzles according
>> to the surface index. It seems to produce the same values as
>> amdgpu-pro for the deferred test.
>>
>> v2: don't apply swizzle to CMASK. the eg docs don't mention
>> it, and we clearly don't align cmask for that.
>> v3: disable surf index for dedicated images, as these will
>> most likely be shared, and I don't think the metadata has
>> space for this info in it yet.
>>
>
> FWIW, disabling this for images marked as dedicated means this won't get
> any improvements for render targets on our games. We create all render
> targets as dedicated when NV_dedicated_allocation is available since this
> gets us significant perf improvement on NVIDIA.
>
> If it's not currently possible to have this enabled for dedicated images
> we could avoid using it on AMD, though I'm curious if there's likely to be
> any other perf benefits to marking RTs as dedicated we'd then be missing
> out on? I've not done any testing to see if there's any benefit from using
> it.
>
Realised this possibly didn't sound clear - what I'm asking is does using
NV_dedicated_allocation give any perf benefit on RADV at all like it does
for NV? If not we could avoid it to get the benefits from this patch.
Alex
>
> Thanks,
> Alex
>
>
>> This gets the deferred demo from 730->950fps on my rx480.
>> (dcc cmask elim predication patches get it further)
>> I'm also seeing some improvements in Mad Max at 4K
>>
>> Signed-off-by: Dave Airlie <airlied at redhat.com>
>>
>> fixup for dedicate
>> ---
>> src/amd/common/ac_surface.c | 14 ++++++++++++++
>> src/amd/common/ac_surface.h | 2 ++
>> src/amd/vulkan/radv_device.c | 7 ++++++-
>> src/amd/vulkan/radv_image.c | 19 ++++++++++++++++++-
>> src/amd/vulkan/radv_private.h | 2 ++
>> 5 files changed, 42 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
>> index 23fb66b..0aebacc 100644
>> --- a/src/amd/common/ac_surface.c
>> +++ b/src/amd/common/ac_surface.c
>> @@ -692,6 +692,20 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
>> surf->htile_size *= 2;
>>
>> surf->is_linear = surf->u.legacy.level[0].mode ==
>> RADEON_SURF_MODE_LINEAR_ALIGNED;
>> +
>> + /* workout base swizzle */
>> + if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
>> + ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
>> + ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut =
>> {0};
>> +
>> + AddrBaseSwizzleIn.surfIndex = config->info.surf_index;
>> + AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex;
>> + AddrBaseSwizzleIn.macroModeIndex =
>> AddrSurfInfoOut.macroModeIndex;
>> + AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo;
>> + AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode;
>> + AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn,
>> &AddrBaseSwizzleOut);
>> + surf->u.legacy.combined_swizzle =
>> AddrBaseSwizzleOut.tileSwizzle;
>> + }
>> return 0;
>> }
>>
>> diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
>> index 4d893ff..7901b86 100644
>> --- a/src/amd/common/ac_surface.h
>> +++ b/src/amd/common/ac_surface.h
>> @@ -97,6 +97,7 @@ struct legacy_surf_layout {
>> unsigned depth_adjusted:1;
>> unsigned stencil_adjusted:1;
>>
>> + uint8_t combined_swizzle;
>> struct legacy_surf_level level[RADEON_SURF_MAX_LEVELS];
>> struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS];
>> uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
>> @@ -194,6 +195,7 @@ struct ac_surf_info {
>> uint32_t width;
>> uint32_t height;
>> uint32_t depth;
>> + uint32_t surf_index;
>> uint8_t samples;
>> uint8_t levels;
>> uint16_t array_size;
>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>> index 789c90d..eb77914 100644
>> --- a/src/amd/vulkan/radv_device.c
>> +++ b/src/amd/vulkan/radv_device.c
>> @@ -2757,7 +2757,8 @@ radv_initialise_color_surface(struct radv_device
>> *device,
>> }
>>
>> cb->cb_color_base = va >> 8;
>> -
>> + if (device->physical_device->rad_info.chip_class < GFX9)
>> + cb->cb_color_base |= iview->image->surface.u.legacy
>> .combined_swizzle;
>> /* CMASK variables */
>> va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
>> va += iview->image->cmask.offset;
>> @@ -2766,6 +2767,8 @@ radv_initialise_color_surface(struct radv_device
>> *device,
>> va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
>> va += iview->image->dcc_offset;
>> cb->cb_dcc_base = va >> 8;
>> + if (device->physical_device->rad_info.chip_class < GFX9)
>> + cb->cb_dcc_base |= iview->image->surface.u.legacy
>> .combined_swizzle;
>>
>> uint32_t max_slice = radv_surface_layer_count(iview);
>> cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
>> @@ -2781,6 +2784,8 @@ radv_initialise_color_surface(struct radv_device
>> *device,
>> if (iview->image->fmask.size) {
>> va = device->ws->buffer_get_va(iview->bo) +
>> iview->image->offset + iview->image->fmask.offset;
>> cb->cb_color_fmask = va >> 8;
>> + if (device->physical_device->rad_info.chip_class < GFX9)
>> + cb->cb_color_fmask |=
>> iview->image->surface.u.legacy.combined_swizzle;
>> } else {
>> cb->cb_color_fmask = cb->cb_color_base;
>> }
>> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
>> index 115e5a5..28f3874 100644
>> --- a/src/amd/vulkan/radv_image.c
>> +++ b/src/amd/vulkan/radv_image.c
>> @@ -27,10 +27,12 @@
>>
>> #include "radv_private.h"
>> #include "vk_format.h"
>> +#include "vk_util.h"
>> #include "radv_radeon_winsys.h"
>> #include "sid.h"
>> #include "gfx9d.h"
>> #include "util/debug.h"
>> +#include "util/u_atomic.h"
>> static unsigned
>> radv_choose_tiling(struct radv_device *Device,
>> const struct radv_image_create_info *create_info)
>> @@ -209,6 +211,8 @@ si_set_mutable_tex_desc_fields(struct radv_device
>> *device,
>> va += base_level_info->offset;
>>
>> state[0] = va >> 8;
>> + if (chip_class < GFX9)
>> + state[0] |= image->surface.u.legacy.combined_swizzle;
>> state[1] &= C_008F14_BASE_ADDRESS_HI;
>> state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
>> state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image,
>> base_level,
>> @@ -224,7 +228,8 @@ si_set_mutable_tex_desc_fields(struct radv_device
>> *device,
>> meta_va += base_level_info->dcc_offset;
>> state[6] |= S_008F28_COMPRESSION_EN(1);
>> state[7] = meta_va >> 8;
>> -
>> + if (chip_class < GFX9)
>> + state[7] |= image->surface.u.legacy.combin
>> ed_swizzle;
>> }
>> }
>>
>> @@ -472,6 +477,8 @@ si_make_texture_descriptor(struct radv_device
>> *device,
>> }
>>
>> fmask_state[0] = va >> 8;
>> + if (device->physical_device->rad_info.chip_class < GFX9)
>> + fmask_state[0] |= image->surface.u.legacy.combin
>> ed_swizzle;
>> fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
>> S_008F14_DATA_FORMAT_GFX6(fmask_format) |
>> S_008F14_NUM_FORMAT_GFX6(num_format);
>> @@ -752,6 +759,7 @@ radv_image_create(VkDevice _device,
>> const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
>> struct radv_image *image = NULL;
>> bool can_cmask_dcc = false;
>> + bool dedicated = false;
>> assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE
>> _INFO);
>>
>> radv_assert(pCreateInfo->mipLevels > 0);
>> @@ -761,6 +769,11 @@ radv_image_create(VkDevice _device,
>> radv_assert(pCreateInfo->extent.height > 0);
>> radv_assert(pCreateInfo->extent.depth > 0);
>>
>> + const VkDedicatedAllocationImageCreateInfoNV *dedicate_info =
>> + vk_find_struct_const(pCreateInfo->pNext,
>> DEDICATED_ALLOCATION_IMAGE_CREATE_INFO_NV);
>> +
>> + if (dedicate_info && dedicate_info->dedicatedAllocation)
>> + dedicated = true;
>> image = vk_alloc2(&device->alloc, alloc, sizeof(*image), 8,
>> VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
>> if (!image)
>> @@ -789,6 +802,10 @@ radv_image_create(VkDevice _device,
>> image->queue_family_mask |= 1u <<
>> pCreateInfo->pQueueFamilyIndices[i];
>> }
>>
>> + if (!vk_format_is_depth(pCreateInfo->format) &&
>> !create_info->scanout && !dedicated) {
>> + image->info.surf_index = p_atomic_inc_return(&device->image_mrt_offset_counter)
>> - 1;
>> + }
>> +
>> radv_init_surface(device, &image->surface, create_info);
>>
>> device->ws->surface_init(device->ws, &image->info,
>> &image->surface);
>> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.
>> h
>> index a167409..b0533bd 100644
>> --- a/src/amd/vulkan/radv_private.h
>> +++ b/src/amd/vulkan/radv_private.h
>> @@ -547,6 +547,8 @@ struct radv_device {
>>
>> /* Backup in-memory cache to be used if the app doesn't provide
>> one */
>> struct radv_pipeline_cache * mem_cache;
>> +
>> + uint32_t image_mrt_offset_counter;
>> };
>>
>> struct radv_device_memory {
>> --
>> 2.9.4
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170711/a02790f7/attachment-0001.html>
More information about the mesa-dev
mailing list