<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On 10 July 2017 at 05:59, Dave Airlie <span dir="ltr"><<a href="mailto:airlied@gmail.com" target="_blank">airlied@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">From: Dave Airlie <<a href="mailto:airlied@redhat.com">airlied@redhat.com</a>><br>
<br>
This patch uses addrlib to workout the tile swizzles according<br>
to the surface index. It seems to produce the same values as<br>
amdgpu-pro for the deferred test.<br>
<br>
v2: don't apply swizzle to CMASK. the eg docs don't mention<br>
it, and we clearly don't align cmask for that.<br>
v3: disable surf index for dedicated images, as these will<br>
most likely be shared, and I don't think the metadata has<br>
space for this info in it yet.<br></blockquote><div><br></div><div>FWIW, disabling this for images marked as dedicated means this won't get any improvements for render targets on our games. We create all render targets as dedicated when NV_dedicated_allocation is available since this gets us significant perf improvement on NVIDIA.<div><br></div><div>If it's not currently possible to have this enabled for dedicated images we could avoid using it on AMD, though I'm curious if there's likely to be any other perf benefits to marking RTs as dedicated we'd then be missing out on? I've not done any testing to see if there's any benefit from using it.</div></div><div><br></div><div>Thanks,</div><div>Alex</div><div><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<br>
This gets the deferred demo from 730->950fps on my rx480.<br>
(dcc cmask elim predication patches get it further)<br>
I'm also seeing some improvements in Mad Max at 4K<br>
<br>
Signed-off-by: Dave Airlie <<a href="mailto:airlied@redhat.com">airlied@redhat.com</a>><br>
<br>
fixup for dedicate<br>
---<br>
 src/amd/common/ac_surface.c   | 14 ++++++++++++++<br>
 src/amd/common/ac_surface.h   |  2 ++<br>
 src/amd/vulkan/radv_device.c  |  7 ++++++-<br>
 src/amd/vulkan/radv_image.c   | 19 ++++++++++++++++++-<br>
 src/amd/vulkan/radv_private.h |  2 ++<br>
 5 files changed, 42 insertions(+), 2 deletions(-)<br>
<br>
diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c<br>
index 23fb66b..0aebacc 100644<br>
--- a/src/amd/common/ac_surface.c<br>
+++ b/src/amd/common/ac_surface.c<br>
@@ -692,6 +692,20 @@ static int gfx6_compute_surface(ADDR_<wbr>HANDLE addrlib,<br>
                surf->htile_size *= 2;<br>
<br>
        surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_<wbr>ALIGNED;<br>
+<br>
+       /* workout base swizzle */<br>
+       if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {<br>
+               ADDR_COMPUTE_BASE_SWIZZLE_<wbr>INPUT AddrBaseSwizzleIn = {0};<br>
+               ADDR_COMPUTE_BASE_SWIZZLE_<wbr>OUTPUT AddrBaseSwizzleOut = {0};<br>
+<br>
+               AddrBaseSwizzleIn.surfIndex = config->info.surf_index;<br>
+               AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex;<br>
+               AddrBaseSwizzleIn.<wbr>macroModeIndex = AddrSurfInfoOut.<wbr>macroModeIndex;<br>
+               AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo;<br>
+               AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode;<br>
+               AddrComputeBaseSwizzle(<wbr>addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut);<br>
+               surf->u.legacy.combined_<wbr>swizzle = AddrBaseSwizzleOut.<wbr>tileSwizzle;<br>
+       }<br>
        return 0;<br>
 }<br>
<br>
diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h<br>
index 4d893ff..7901b86 100644<br>
--- a/src/amd/common/ac_surface.h<br>
+++ b/src/amd/common/ac_surface.h<br>
@@ -97,6 +97,7 @@ struct legacy_surf_layout {<br>
     unsigned                    depth_adjusted:1;<br>
     unsigned                    stencil_adjusted:1;<br>
<br>
+    uint8_t                     combined_swizzle;<br>
     struct legacy_surf_level    level[RADEON_SURF_MAX_LEVELS];<br>
     struct legacy_surf_level    stencil_level[RADEON_SURF_MAX_<wbr>LEVELS];<br>
     uint8_t                     tiling_index[RADEON_SURF_MAX_<wbr>LEVELS];<br>
@@ -194,6 +195,7 @@ struct ac_surf_info {<br>
        uint32_t width;<br>
        uint32_t height;<br>
        uint32_t depth;<br>
+       uint32_t surf_index;<br>
        uint8_t samples;<br>
        uint8_t levels;<br>
        uint16_t array_size;<br>
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c<br>
index 789c90d..eb77914 100644<br>
--- a/src/amd/vulkan/radv_device.c<br>
+++ b/src/amd/vulkan/radv_device.c<br>
@@ -2757,7 +2757,8 @@ radv_initialise_color_surface(<wbr>struct radv_device *device,<br>
        }<br>
<br>
        cb->cb_color_base = va >> 8;<br>
-<br>
+       if (device->physical_device->rad_<wbr>info.chip_class < GFX9)<br>
+               cb->cb_color_base |= iview->image->surface.u.<wbr>legacy.combined_swizzle;<br>
        /* CMASK variables */<br>
        va = device->ws->buffer_get_va(<wbr>iview->bo) + iview->image->offset;<br>
        va += iview->image->cmask.offset;<br>
@@ -2766,6 +2767,8 @@ radv_initialise_color_surface(<wbr>struct radv_device *device,<br>
        va = device->ws->buffer_get_va(<wbr>iview->bo) + iview->image->offset;<br>
        va += iview->image->dcc_offset;<br>
        cb->cb_dcc_base = va >> 8;<br>
+       if (device->physical_device->rad_<wbr>info.chip_class < GFX9)<br>
+               cb->cb_dcc_base |= iview->image->surface.u.<wbr>legacy.combined_swizzle;<br>
<br>
        uint32_t max_slice = radv_surface_layer_count(<wbr>iview);<br>
        cb->cb_color_view = S_028C6C_SLICE_START(iview-><wbr>base_layer) |<br>
@@ -2781,6 +2784,8 @@ radv_initialise_color_surface(<wbr>struct radv_device *device,<br>
        if (iview->image->fmask.size) {<br>
                va = device->ws->buffer_get_va(<wbr>iview->bo) + iview->image->offset + iview->image->fmask.offset;<br>
                cb->cb_color_fmask = va >> 8;<br>
+               if (device->physical_device->rad_<wbr>info.chip_class < GFX9)<br>
+                       cb->cb_color_fmask |= iview->image->surface.u.<wbr>legacy.combined_swizzle;<br>
        } else {<br>
                cb->cb_color_fmask = cb->cb_color_base;<br>
        }<br>
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c<br>
index 115e5a5..28f3874 100644<br>
--- a/src/amd/vulkan/radv_image.c<br>
+++ b/src/amd/vulkan/radv_image.c<br>
@@ -27,10 +27,12 @@<br>
<br>
 #include "radv_private.h"<br>
 #include "vk_format.h"<br>
+#include "vk_util.h"<br>
 #include "radv_radeon_winsys.h"<br>
 #include "sid.h"<br>
 #include "gfx9d.h"<br>
 #include "util/debug.h"<br>
+#include "util/u_atomic.h"<br>
 static unsigned<br>
 radv_choose_tiling(struct radv_device *Device,<br>
                   const struct radv_image_create_info *create_info)<br>
@@ -209,6 +211,8 @@ si_set_mutable_tex_desc_<wbr>fields(struct radv_device *device,<br>
                va += base_level_info->offset;<br>
<br>
        state[0] = va >> 8;<br>
+       if (chip_class < GFX9)<br>
+               state[0] |= image->surface.u.legacy.<wbr>combined_swizzle;<br>
        state[1] &= C_008F14_BASE_ADDRESS_HI;<br>
        state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);<br>
        state[3] |= S_008F1C_TILING_INDEX(si_tile_<wbr>mode_index(image, base_level,<br>
@@ -224,7 +228,8 @@ si_set_mutable_tex_desc_<wbr>fields(struct radv_device *device,<br>
                                meta_va += base_level_info->dcc_offset;<br>
                        state[6] |= S_008F28_COMPRESSION_EN(1);<br>
                        state[7] = meta_va >> 8;<br>
-<br>
+                       if (chip_class < GFX9)<br>
+                               state[7] |= image->surface.u.legacy.<wbr>combined_swizzle;<br>
                }<br>
        }<br>
<br>
@@ -472,6 +477,8 @@ si_make_texture_descriptor(<wbr>struct radv_device *device,<br>
                }<br>
<br>
                fmask_state[0] = va >> 8;<br>
+               if (device->physical_device->rad_<wbr>info.chip_class < GFX9)<br>
+                       fmask_state[0] |= image->surface.u.legacy.<wbr>combined_swizzle;<br>
                fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |<br>
                        S_008F14_DATA_FORMAT_GFX6(<wbr>fmask_format) |<br>
                        S_008F14_NUM_FORMAT_GFX6(num_<wbr>format);<br>
@@ -752,6 +759,7 @@ radv_image_create(VkDevice _device,<br>
        const VkImageCreateInfo *pCreateInfo = create_info->vk_info;<br>
        struct radv_image *image = NULL;<br>
        bool can_cmask_dcc = false;<br>
+       bool dedicated = false;<br>
        assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_<wbr>CREATE_INFO);<br>
<br>
        radv_assert(pCreateInfo-><wbr>mipLevels > 0);<br>
@@ -761,6 +769,11 @@ radv_image_create(VkDevice _device,<br>
        radv_assert(pCreateInfo-><wbr>extent.height > 0);<br>
        radv_assert(pCreateInfo-><wbr>extent.depth > 0);<br>
<br>
+       const VkDedicatedAllocationImageCrea<wbr>teInfoNV *dedicate_info =<br>
+               vk_find_struct_const(<wbr>pCreateInfo->pNext, DEDICATED_ALLOCATION_IMAGE_<wbr>CREATE_INFO_NV);<br>
+<br>
+       if (dedicate_info && dedicate_info-><wbr>dedicatedAllocation)<br>
+               dedicated = true;<br>
        image = vk_alloc2(&device->alloc, alloc, sizeof(*image), 8,<br>
                            VK_SYSTEM_ALLOCATION_SCOPE_<wbr>OBJECT);<br>
        if (!image)<br>
@@ -789,6 +802,10 @@ radv_image_create(VkDevice _device,<br>
                                image->queue_family_mask |= 1u << pCreateInfo-><wbr>pQueueFamilyIndices[i];<br>
        }<br>
<br>
+       if (!vk_format_is_depth(<wbr>pCreateInfo->format) && !create_info->scanout && !dedicated) {<br>
+               image->info.surf_index = p_atomic_inc_return(&device-><wbr>image_mrt_offset_counter) - 1;<br>
+       }<br>
+<br>
        radv_init_surface(device, &image->surface, create_info);<br>
<br>
        device->ws->surface_init(<wbr>device->ws, &image->info, &image->surface);<br>
diff --git a/src/amd/vulkan/radv_private.<wbr>h b/src/amd/vulkan/radv_private.<wbr>h<br>
index a167409..b0533bd 100644<br>
--- a/src/amd/vulkan/radv_private.<wbr>h<br>
+++ b/src/amd/vulkan/radv_private.<wbr>h<br>
@@ -547,6 +547,8 @@ struct radv_device {<br>
<br>
        /* Backup in-memory cache to be used if the app doesn't provide one */<br>
        struct radv_pipeline_cache *                mem_cache;<br>
+<br>
+       uint32_t image_mrt_offset_counter;<br>
 };<br>
<br>
 struct radv_device_memory {<br>
<span class="gmail-HOEnZb"><font color="#888888">--<br>
2.9.4<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>