[Mesa-dev] [PATCH] [rfc] radv: offset images by a differing amount.

Christian König deathsimple at vodafone.de
Fri Jul 7 09:18:15 UTC 2017


What tilling format have the destination textures?

Sounds like the offset is just added so that we distribute memory 
accesses more equally over memory channels.

Regards,
Christian.

Am 07.07.2017 um 09:18 schrieb Dave Airlie:
> From: Dave Airlie <airlied at redhat.com>
>
> (this patch doesn't seem to work fully, hopefully AMD can tell us
> more info on the rules, and how to calculate the magic).
>
> It appears that to get full access to memory bandwidth with MRT
> rendering the pro vulkan driver seems to offset each image by 0x3800.
> I'm not sure how that value is calculated.
>
> Glenn came up with the idea (probably what -pro does also) of just
> offseting every image in round robin order, in the hope that apps
> would create mrt images in sequence anyways.
>
> This attempts to do that using an atomic counter in the device.
>
> This gets the deferred demo from 800fps->1150fps on my rx480.
>
> (I've tested dota2 and talos still run at least after this)
> ---
>   src/amd/vulkan/radv_device.c  |  7 ++++---
>   src/amd/vulkan/radv_image.c   | 16 +++++++++++++++-
>   src/amd/vulkan/radv_private.h |  3 +++
>   3 files changed, 22 insertions(+), 4 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index d1c519a..f39526d 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -2706,7 +2706,7 @@ radv_initialise_color_surface(struct radv_device *device,
>   	/* Intensity is implemented as Red, so treat it that way. */
>   	cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
>   
> -	va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
> +	va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->mrt_offset;
>   
>   	if (device->physical_device->rad_info.chip_class >= GFX9) {
>   		struct gfx9_surf_meta_flags meta;
> @@ -2756,11 +2756,11 @@ radv_initialise_color_surface(struct radv_device *device,
>   
>   	/* CMASK variables */
>   	va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
> -	va += iview->image->cmask.offset;
> +	va += iview->image->cmask.offset + iview->image->mrt_offset;
>   	cb->cb_color_cmask = va >> 8;
>   
>   	va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
> -	va += iview->image->dcc_offset;
> +	va += iview->image->dcc_offset + iview->image->mrt_offset;
>   	cb->cb_dcc_base = va >> 8;
>   
>   	uint32_t max_slice = radv_surface_layer_count(iview);
> @@ -2776,6 +2776,7 @@ radv_initialise_color_surface(struct radv_device *device,
>   
>   	if (iview->image->fmask.size) {
>   		va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
> +		va += iview->image->mrt_offset;
>   		cb->cb_color_fmask = va >> 8;
>   	} else {
>   		cb->cb_color_fmask = cb->cb_color_base;
> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
> index b3a223b..bc20a53 100644
> --- a/src/amd/vulkan/radv_image.c
> +++ b/src/amd/vulkan/radv_image.c
> @@ -31,6 +31,7 @@
>   #include "sid.h"
>   #include "gfx9d.h"
>   #include "util/debug.h"
> +#include "util/u_atomic.h"
>   static unsigned
>   radv_choose_tiling(struct radv_device *Device,
>   		   const struct radv_image_create_info *create_info)
> @@ -208,6 +209,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
>   	} else
>   		va += base_level_info->offset;
>   
> +	va += image->mrt_offset;
>   	state[0] = va >> 8;
>   	state[1] &= C_008F14_BASE_ADDRESS_HI;
>   	state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
> @@ -220,6 +222,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
>   		state[7] = 0;
>   		if (image->surface.dcc_size && first_level < image->surface.num_dcc_levels) {
>   			uint64_t meta_va = gpu_address + image->dcc_offset;
> +			meta_va += image->mrt_offset;
>   			if (chip_class <= VI)
>   				meta_va += base_level_info->dcc_offset;
>   			state[6] |= S_008F28_COMPRESSION_EN(1);
> @@ -436,7 +439,7 @@ si_make_texture_descriptor(struct radv_device *device,
>   		uint64_t gpu_address = device->ws->buffer_get_va(image->bo);
>   		uint64_t va;
>   
> -		va = gpu_address + image->offset + image->fmask.offset;
> +		va = gpu_address + image->offset + image->mrt_offset + image->fmask.offset;
>   
>   		if (device->physical_device->rad_info.chip_class >= GFX9) {
>   			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
> @@ -642,6 +645,7 @@ radv_image_alloc_fmask(struct radv_device *device,
>   	radv_image_get_fmask_info(device, image, image->info.samples, &image->fmask);
>   
>   	image->fmask.offset = align64(image->size, image->fmask.alignment);
> +	image->fmask.size += image->mrt_offset;
>   	image->size = image->fmask.offset + image->fmask.size;
>   	image->alignment = MAX2(image->alignment, image->fmask.alignment);
>   }
> @@ -709,6 +713,7 @@ radv_image_alloc_cmask(struct radv_device *device,
>   	radv_image_get_cmask_info(device, image, &image->cmask);
>   
>   	image->cmask.offset = align64(image->size, image->cmask.alignment);
> +	image->cmask.size += image->mrt_offset;
>   	/* + 8 for storing the clear values */
>   	if (!image->clear_value_offset) {
>   		image->clear_value_offset = image->cmask.offset + image->cmask.size;
> @@ -724,6 +729,7 @@ radv_image_alloc_dcc(struct radv_device *device,
>   {
>   	image->dcc_offset = align64(image->size, image->surface.dcc_alignment);
>   	/* + 16 for storing the clear values + dcc pred */
> +	image->surface.dcc_size += image->mrt_offset;
>   	image->clear_value_offset = image->dcc_offset + image->surface.dcc_size;
>   	image->dcc_pred_offset = image->clear_value_offset + 8;
>   	image->size = image->dcc_offset + image->surface.dcc_size + 16;
> @@ -801,6 +807,14 @@ radv_image_create(VkDevice _device,
>   	image->size = image->surface.surf_size;
>   	image->alignment = image->surface.surf_alignment;
>   
> +	if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && !create_info->scanout) {
> +		uint32_t mrt_idx = p_atomic_inc_return(&device->image_mrt_offset_counter) - 1;
> +		mrt_idx %= 8;
> +		mrt_idx *= 0x3800;
> +		image->mrt_offset = mrt_idx;
> +		image->size += image->mrt_offset;
> +	}
> +
>   	if (image->exclusive || image->queue_family_mask == 1)
>   		can_cmask_dcc = true;
>   
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 5c30d18..f09095a 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -547,6 +547,8 @@ struct radv_device {
>   
>   	/* Backup in-memory cache to be used if the app doesn't provide one */
>   	struct radv_pipeline_cache *                mem_cache;
> +
> +	uint32_t image_mrt_offset_counter;
>   };
>   
>   struct radv_device_memory {
> @@ -1211,6 +1213,7 @@ struct radv_image {
>   	/* Set when bound */
>   	struct radeon_winsys_bo *bo;
>   	VkDeviceSize offset;
> +	VkDeviceSize mrt_offset;
>   	uint32_t dcc_offset;
>   	uint32_t htile_offset;
>   	struct radeon_surf surface;




More information about the mesa-dev mailing list