[Mesa-dev] [PATCH] radv: set cb base tile swizzles for MRT speedups (v3)

Alex Smith asmith at feralinteractive.com
Tue Jul 11 13:49:31 UTC 2017


On 11 July 2017 at 14:27, Alex Smith <asmith at feralinteractive.com> wrote:

> On 10 July 2017 at 05:59, Dave Airlie <airlied at gmail.com> wrote:
>
>> From: Dave Airlie <airlied at redhat.com>
>>
>> This patch uses addrlib to workout the tile swizzles according
>> to the surface index. It seems to produce the same values as
>> amdgpu-pro for the deferred test.
>>
>> v2: don't apply swizzle to CMASK. the eg docs don't mention
>> it, and we clearly don't align cmask for that.
>> v3: disable surf index for dedicated images, as these will
>> most likely be shared, and I don't think the metadata has
>> space for this info in it yet.
>>
>
> FWIW, disabling this for images marked as dedicated means this won't get
> any improvements for render targets on our games. We create all render
> targets as dedicated when NV_dedicated_allocation is available since this
> gets us significant perf improvement on NVIDIA.
>
> If it's not currently possible to have this enabled for dedicated images
> we could avoid using it on AMD, though I'm curious if there's likely to be
> any other perf benefits to marking RTs as dedicated we'd then be missing
> out on? I've not done any testing to see if there's any benefit from using
> it.
>

Realised this possibly didn't sound clear - what I'm asking is does using
NV_dedicated_allocation give any perf benefit on RADV at all like it does
for NV? If not we could avoid it to get the benefits from this patch.

Alex


>
> Thanks,
> Alex
>
>
>> This gets the deferred demo from 730->950fps on my rx480.
>> (dcc cmask elim predication patches get it further)
>> I'm also seeing some improvements in Mad Max at 4K
>>
>> Signed-off-by: Dave Airlie <airlied at redhat.com>
>>
>> fixup for dedicate
>> ---
>>  src/amd/common/ac_surface.c   | 14 ++++++++++++++
>>  src/amd/common/ac_surface.h   |  2 ++
>>  src/amd/vulkan/radv_device.c  |  7 ++++++-
>>  src/amd/vulkan/radv_image.c   | 19 ++++++++++++++++++-
>>  src/amd/vulkan/radv_private.h |  2 ++
>>  5 files changed, 42 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
>> index 23fb66b..0aebacc 100644
>> --- a/src/amd/common/ac_surface.c
>> +++ b/src/amd/common/ac_surface.c
>> @@ -692,6 +692,20 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
>>                 surf->htile_size *= 2;
>>
>>         surf->is_linear = surf->u.legacy.level[0].mode ==
>> RADEON_SURF_MODE_LINEAR_ALIGNED;
>> +
>> +       /* workout base swizzle */
>> +       if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
>> +               ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
>> +               ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut =
>> {0};
>> +
>> +               AddrBaseSwizzleIn.surfIndex = config->info.surf_index;
>> +               AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex;
>> +               AddrBaseSwizzleIn.macroModeIndex =
>> AddrSurfInfoOut.macroModeIndex;
>> +               AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo;
>> +               AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode;
>> +               AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn,
>> &AddrBaseSwizzleOut);
>> +               surf->u.legacy.combined_swizzle =
>> AddrBaseSwizzleOut.tileSwizzle;
>> +       }
>>         return 0;
>>  }
>>
>> diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
>> index 4d893ff..7901b86 100644
>> --- a/src/amd/common/ac_surface.h
>> +++ b/src/amd/common/ac_surface.h
>> @@ -97,6 +97,7 @@ struct legacy_surf_layout {
>>      unsigned                    depth_adjusted:1;
>>      unsigned                    stencil_adjusted:1;
>>
>> +    uint8_t                     combined_swizzle;
>>      struct legacy_surf_level    level[RADEON_SURF_MAX_LEVELS];
>>      struct legacy_surf_level    stencil_level[RADEON_SURF_MAX_LEVELS];
>>      uint8_t                     tiling_index[RADEON_SURF_MAX_LEVELS];
>> @@ -194,6 +195,7 @@ struct ac_surf_info {
>>         uint32_t width;
>>         uint32_t height;
>>         uint32_t depth;
>> +       uint32_t surf_index;
>>         uint8_t samples;
>>         uint8_t levels;
>>         uint16_t array_size;
>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>> index 789c90d..eb77914 100644
>> --- a/src/amd/vulkan/radv_device.c
>> +++ b/src/amd/vulkan/radv_device.c
>> @@ -2757,7 +2757,8 @@ radv_initialise_color_surface(struct radv_device
>> *device,
>>         }
>>
>>         cb->cb_color_base = va >> 8;
>> -
>> +       if (device->physical_device->rad_info.chip_class < GFX9)
>> +               cb->cb_color_base |= iview->image->surface.u.legacy
>> .combined_swizzle;
>>         /* CMASK variables */
>>         va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
>>         va += iview->image->cmask.offset;
>> @@ -2766,6 +2767,8 @@ radv_initialise_color_surface(struct radv_device
>> *device,
>>         va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
>>         va += iview->image->dcc_offset;
>>         cb->cb_dcc_base = va >> 8;
>> +       if (device->physical_device->rad_info.chip_class < GFX9)
>> +               cb->cb_dcc_base |= iview->image->surface.u.legacy
>> .combined_swizzle;
>>
>>         uint32_t max_slice = radv_surface_layer_count(iview);
>>         cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
>> @@ -2781,6 +2784,8 @@ radv_initialise_color_surface(struct radv_device
>> *device,
>>         if (iview->image->fmask.size) {
>>                 va = device->ws->buffer_get_va(iview->bo) +
>> iview->image->offset + iview->image->fmask.offset;
>>                 cb->cb_color_fmask = va >> 8;
>> +               if (device->physical_device->rad_info.chip_class < GFX9)
>> +                       cb->cb_color_fmask |=
>> iview->image->surface.u.legacy.combined_swizzle;
>>         } else {
>>                 cb->cb_color_fmask = cb->cb_color_base;
>>         }
>> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
>> index 115e5a5..28f3874 100644
>> --- a/src/amd/vulkan/radv_image.c
>> +++ b/src/amd/vulkan/radv_image.c
>> @@ -27,10 +27,12 @@
>>
>>  #include "radv_private.h"
>>  #include "vk_format.h"
>> +#include "vk_util.h"
>>  #include "radv_radeon_winsys.h"
>>  #include "sid.h"
>>  #include "gfx9d.h"
>>  #include "util/debug.h"
>> +#include "util/u_atomic.h"
>>  static unsigned
>>  radv_choose_tiling(struct radv_device *Device,
>>                    const struct radv_image_create_info *create_info)
>> @@ -209,6 +211,8 @@ si_set_mutable_tex_desc_fields(struct radv_device
>> *device,
>>                 va += base_level_info->offset;
>>
>>         state[0] = va >> 8;
>> +       if (chip_class < GFX9)
>> +               state[0] |= image->surface.u.legacy.combined_swizzle;
>>         state[1] &= C_008F14_BASE_ADDRESS_HI;
>>         state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
>>         state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image,
>> base_level,
>> @@ -224,7 +228,8 @@ si_set_mutable_tex_desc_fields(struct radv_device
>> *device,
>>                                 meta_va += base_level_info->dcc_offset;
>>                         state[6] |= S_008F28_COMPRESSION_EN(1);
>>                         state[7] = meta_va >> 8;
>> -
>> +                       if (chip_class < GFX9)
>> +                               state[7] |= image->surface.u.legacy.combin
>> ed_swizzle;
>>                 }
>>         }
>>
>> @@ -472,6 +477,8 @@ si_make_texture_descriptor(struct radv_device
>> *device,
>>                 }
>>
>>                 fmask_state[0] = va >> 8;
>> +               if (device->physical_device->rad_info.chip_class < GFX9)
>> +                       fmask_state[0] |= image->surface.u.legacy.combin
>> ed_swizzle;
>>                 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
>>                         S_008F14_DATA_FORMAT_GFX6(fmask_format) |
>>                         S_008F14_NUM_FORMAT_GFX6(num_format);
>> @@ -752,6 +759,7 @@ radv_image_create(VkDevice _device,
>>         const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
>>         struct radv_image *image = NULL;
>>         bool can_cmask_dcc = false;
>> +       bool dedicated = false;
>>         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE
>> _INFO);
>>
>>         radv_assert(pCreateInfo->mipLevels > 0);
>> @@ -761,6 +769,11 @@ radv_image_create(VkDevice _device,
>>         radv_assert(pCreateInfo->extent.height > 0);
>>         radv_assert(pCreateInfo->extent.depth > 0);
>>
>> +       const VkDedicatedAllocationImageCreateInfoNV *dedicate_info =
>> +               vk_find_struct_const(pCreateInfo->pNext,
>> DEDICATED_ALLOCATION_IMAGE_CREATE_INFO_NV);
>> +
>> +       if (dedicate_info && dedicate_info->dedicatedAllocation)
>> +               dedicated = true;
>>         image = vk_alloc2(&device->alloc, alloc, sizeof(*image), 8,
>>                             VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
>>         if (!image)
>> @@ -789,6 +802,10 @@ radv_image_create(VkDevice _device,
>>                                 image->queue_family_mask |= 1u <<
>> pCreateInfo->pQueueFamilyIndices[i];
>>         }
>>
>> +       if (!vk_format_is_depth(pCreateInfo->format) &&
>> !create_info->scanout && !dedicated) {
>> +               image->info.surf_index = p_atomic_inc_return(&device->image_mrt_offset_counter)
>> - 1;
>> +       }
>> +
>>         radv_init_surface(device, &image->surface, create_info);
>>
>>         device->ws->surface_init(device->ws, &image->info,
>> &image->surface);
>> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.
>> h
>> index a167409..b0533bd 100644
>> --- a/src/amd/vulkan/radv_private.h
>> +++ b/src/amd/vulkan/radv_private.h
>> @@ -547,6 +547,8 @@ struct radv_device {
>>
>>         /* Backup in-memory cache to be used if the app doesn't provide
>> one */
>>         struct radv_pipeline_cache *                mem_cache;
>> +
>> +       uint32_t image_mrt_offset_counter;
>>  };
>>
>>  struct radv_device_memory {
>> --
>> 2.9.4
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170711/a02790f7/attachment-0001.html>


More information about the mesa-dev mailing list