<div dir="auto"><div><br><div class="gmail_extra"><br><div class="gmail_quote">On 7 Jul. 2017 19:29, "Christian König" <<a href="mailto:deathsimple@vodafone.de">deathsimple@vodafone.de</a>> wrote:<br type="attribution"><blockquote class="quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">What tilling format have the destination textures?<br>
<br>
Sounds like the offset is just added so that we distribute memory accesses more equally over memory channels.<br></blockquote></div></div></div><div dir="auto"><br></div><div dir="auto">From the traces i think tile index mode was 10.</div><div dir="auto"><br></div><div dir="auto">Dave.</div><div dir="auto"><div class="gmail_extra"><div class="gmail_quote"><blockquote class="quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
Regards,<br>
Christian.<div class="elided-text"><br>
<br>
Am 07.07.2017 um 09:18 schrieb Dave Airlie:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
From: Dave Airlie <<a href="mailto:airlied@redhat.com" target="_blank">airlied@redhat.com</a>><br>
<br>
(this patch doesn't seem to work fully, hopefully AMD can tell us<br>
more info on the rules, and how to calculate the magic).<br>
<br>
It appears that to get full access to memory bandwidth with MRT<br>
rendering the pro vulkan driver seems to offset each image by 0x3800.<br>
I'm not sure how that value is calculated.<br>
<br>
Glenn came up with the idea (probably what -pro does also) of just<br>
offseting every image in round robin order, in the hope that apps<br>
would create mrt images in sequence anyways.<br>
<br>
This attempts to do that using an atomic counter in the device.<br>
<br>
This gets the deferred demo from 800fps->1150fps on my rx480.<br>
<br>
(I've tested dota2 and talos still run at least after this)<br>
---<br>
  src/amd/vulkan/radv_device.c  |  7 ++++---<br>
  src/amd/vulkan/radv_image.c   | 16 +++++++++++++++-<br>
  src/amd/vulkan/radv_private.h |  3 +++<br>
  3 files changed, 22 insertions(+), 4 deletions(-)<br>
<br>
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c<br>
index d1c519a..f39526d 100644<br>
--- a/src/amd/vulkan/radv_device.c<br>
+++ b/src/amd/vulkan/radv_device.c<br>
@@ -2706,7 +2706,7 @@ radv_initialise_color_surface(<wbr>struct radv_device *device,<br>
        /* Intensity is implemented as Red, so treat it that way. */<br>
        cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(des<wbr>c->swizzle[3] == VK_SWIZZLE_1);<br>
  -     va = device->ws->buffer_get_va(ivie<wbr>w->bo) + iview->image->offset;<br>
+       va = device->ws->buffer_get_va(ivie<wbr>w->bo) + iview->image->offset + iview->image->mrt_offset;<br>
        if (device->physical_device->rad_<wbr>info.chip_class >= GFX9) {<br>
                struct gfx9_surf_meta_flags meta;<br>
@@ -2756,11 +2756,11 @@ radv_initialise_color_surface(<wbr>struct radv_device *device,<br>
        /* CMASK variables */<br>
        va = device->ws->buffer_get_va(ivie<wbr>w->bo) + iview->image->offset;<br>
-       va += iview->image->cmask.offset;<br>
+       va += iview->image->cmask.offset + iview->image->mrt_offset;<br>
        cb->cb_color_cmask = va >> 8;<br>
        va = device->ws->buffer_get_va(ivie<wbr>w->bo) + iview->image->offset;<br>
-       va += iview->image->dcc_offset;<br>
+       va += iview->image->dcc_offset + iview->image->mrt_offset;<br>
        cb->cb_dcc_base = va >> 8;<br>
        uint32_t max_slice = radv_surface_layer_count(iview<wbr>);<br>
@@ -2776,6 +2776,7 @@ radv_initialise_color_surface(<wbr>struct radv_device *device,<br>
        if (iview->image->fmask.size) {<br>
                va = device->ws->buffer_get_va(ivie<wbr>w->bo) + iview->image->offset + iview->image->fmask.offset;<br>
+               va += iview->image->mrt_offset;<br>
                cb->cb_color_fmask = va >> 8;<br>
        } else {<br>
                cb->cb_color_fmask = cb->cb_color_base;<br>
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c<br>
index b3a223b..bc20a53 100644<br>
--- a/src/amd/vulkan/radv_image.c<br>
+++ b/src/amd/vulkan/radv_image.c<br>
@@ -31,6 +31,7 @@<br>
  #include "sid.h"<br>
  #include "gfx9d.h"<br>
  #include "util/debug.h"<br>
+#include "util/u_atomic.h"<br>
  static unsigned<br>
  radv_choose_tiling(struct radv_device *Device,<br>
                   const struct radv_image_create_info *create_info)<br>
@@ -208,6 +209,7 @@ si_set_mutable_tex_desc_fields<wbr>(struct radv_device *device,<br>
        } else<br>
                va += base_level_info->offset;<br>
  +     va += image->mrt_offset;<br>
        state[0] = va >> 8;<br>
        state[1] &= C_008F14_BASE_ADDRESS_HI;<br>
        state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);<br>
@@ -220,6 +222,7 @@ si_set_mutable_tex_desc_fields<wbr>(struct radv_device *device,<br>
                state[7] = 0;<br>
                if (image->surface.dcc_size && first_level < image->surface.num_dcc_levels) {<br>
                        uint64_t meta_va = gpu_address + image->dcc_offset;<br>
+                       meta_va += image->mrt_offset;<br>
                        if (chip_class <= VI)<br>
                                meta_va += base_level_info->dcc_offset;<br>
                        state[6] |= S_008F28_COMPRESSION_EN(1);<br>
@@ -436,7 +439,7 @@ si_make_texture_descriptor(str<wbr>uct radv_device *device,<br>
                uint64_t gpu_address = device->ws->buffer_get_va(imag<wbr>e->bo);<br>
                uint64_t va;<br>
  -             va = gpu_address + image->offset + image->fmask.offset;<br>
+               va = gpu_address + image->offset + image->mrt_offset + image->fmask.offset;<br>
                if (device->physical_device->rad_<wbr>info.chip_class >= GFX9) {<br>
                        fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK<wbr>;<br>
@@ -642,6 +645,7 @@ radv_image_alloc_fmask(struct radv_device *device,<br>
        radv_image_get_fmask_info(devi<wbr>ce, image, image->info.samples, &image->fmask);<br>
        image->fmask.offset = align64(image->size, image->fmask.alignment);<br>
+       image->fmask.size += image->mrt_offset;<br>
        image->size = image->fmask.offset + image->fmask.size;<br>
        image->alignment = MAX2(image->alignment, image->fmask.alignment);<br>
  }<br>
@@ -709,6 +713,7 @@ radv_image_alloc_cmask(struct radv_device *device,<br>
        radv_image_get_cmask_info(devi<wbr>ce, image, &image->cmask);<br>
        image->cmask.offset = align64(image->size, image->cmask.alignment);<br>
+       image->cmask.size += image->mrt_offset;<br>
        /* + 8 for storing the clear values */<br>
        if (!image->clear_value_offset) {<br>
                image->clear_value_offset = image->cmask.offset + image->cmask.size;<br>
@@ -724,6 +729,7 @@ radv_image_alloc_dcc(struct radv_device *device,<br>
  {<br>
        image->dcc_offset = align64(image->size, image->surface.dcc_alignment);<br>
        /* + 16 for storing the clear values + dcc pred */<br>
+       image->surface.dcc_size += image->mrt_offset;<br>
        image->clear_value_offset = image->dcc_offset + image->surface.dcc_size;<br>
        image->dcc_pred_offset = image->clear_value_offset + 8;<br>
        image->size = image->dcc_offset + image->surface.dcc_size + 16;<br>
@@ -801,6 +807,14 @@ radv_image_create(VkDevice _device,<br>
        image->size = image->surface.surf_size;<br>
        image->alignment = image->surface.surf_alignment;<br>
  +     if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMEN<wbr>T_BIT) && !create_info->scanout) {<br>
+               uint32_t mrt_idx = p_atomic_inc_return(&device->i<wbr>mage_mrt_offset_counter) - 1;<br>
+               mrt_idx %= 8;<br>
+               mrt_idx *= 0x3800;<br>
+               image->mrt_offset = mrt_idx;<br>
+               image->size += image->mrt_offset;<br>
+       }<br>
+<br>
        if (image->exclusive || image->queue_family_mask == 1)<br>
                can_cmask_dcc = true;<br>
  diff --git a/src/amd/vulkan/radv_private.<wbr>h b/src/amd/vulkan/radv_private.<wbr>h<br>
index 5c30d18..f09095a 100644<br>
--- a/src/amd/vulkan/radv_private.<wbr>h<br>
+++ b/src/amd/vulkan/radv_private.<wbr>h<br>
@@ -547,6 +547,8 @@ struct radv_device {<br>
        /* Backup in-memory cache to be used if the app doesn't provide one */<br>
        struct radv_pipeline_cache *                mem_cache;<br>
+<br>
+       uint32_t image_mrt_offset_counter;<br>
  };<br>
    struct radv_device_memory {<br>
@@ -1211,6 +1213,7 @@ struct radv_image {<br>
        /* Set when bound */<br>
        struct radeon_winsys_bo *bo;<br>
        VkDeviceSize offset;<br>
+       VkDeviceSize mrt_offset;<br>
        uint32_t dcc_offset;<br>
        uint32_t htile_offset;<br>
        struct radeon_surf surface;<br>
</blockquote>
<br>
<br>
</div></blockquote></div><br></div></div></div>