Mesa (master): radv: do not expose GTT as device local memory mostly for APUs

Mon Apr 27 22:51:14 UTC 2020

Module: Mesa
Branch: master
Commit: 7a0a6a718035e1a754972fbbad8b91d19f39fa42
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=7a0a6a718035e1a754972fbbad8b91d19f39fa42

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Wed Apr 22 16:54:28 2020 +0200

radv: do not expose GTT as device local memory mostly for APUs

On APUs, the memory is unified (all heaps are equally fast) and
apps should count all memory heaps together. But some games like
Id Tech games (Youngblood and such) don't manage memory correctly
on APUs and they spill everything when one VRAM heap is full.

Instead of spilling buffers, they should just allocate new buffers
in the second heap but it seems like these games are confused if
two memory heaps have the DEVICE_LOCAL_BIT set.

This is probably a first step towards better memory management on
APUs but there is still some work to do if we want to run most apps
with a small dedicated VRAM (256MB or so).

This gives a huge boost for Id Tech games on APUs, and doesn't
seem to reduce Feral games performance.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4771>

---

 src/amd/vulkan/radv_device.c | 59 ++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index c34674d0904..b590a92d4d5 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -180,6 +180,15 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
 			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
 		};
 	}
+
+	if (device->rad_info.gart_size > 0) {
+		gart_index = device->memory_properties.memoryHeapCount++;
+		device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
+			.size = device->rad_info.gart_size,
+			.flags = 0,
+		};
+	}
+
 	if (visible_vram_size) {
 		visible_vram_index = device->memory_properties.memoryHeapCount++;
 		device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
@@ -187,24 +196,29 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
 			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
 		};
 	}
-	if (device->rad_info.gart_size > 0) {
-		gart_index = device->memory_properties.memoryHeapCount++;
-		device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
-			.size = device->rad_info.gart_size,
-			.flags = device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
-		};
-	}
 
 	STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
 	unsigned type_count = 0;
-	if (vram_index >= 0) {
-		device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
-		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
-			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-			.heapIndex = vram_index,
-		};
+
+	if (device->rad_info.has_dedicated_vram) {
+		if (vram_index >= 0) {
+			device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
+			device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+				.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+				.heapIndex = vram_index,
+			};
+		}
+	} else {
+		if (visible_vram_index >= 0) {
+			device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
+			device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+				.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+				.heapIndex = visible_vram_index,
+			};
+		}
 	}
-	if (gart_index >= 0 && device->rad_info.has_dedicated_vram) {
+
+	if (gart_index >= 0) {
 		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
 		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
 			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
@@ -221,26 +235,13 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
 			.heapIndex = visible_vram_index,
 		};
 	}
-	if (gart_index >= 0 && !device->rad_info.has_dedicated_vram) {
-		/* Put GTT after visible VRAM for GPUs without dedicated VRAM
-		 * as they have identical property flags, and according to the
-		 * spec, for types with identical flags, the one with greater
-		 * performance must be given a lower index. */
-		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
-		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
-			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-			.heapIndex = gart_index,
-		};
-	}
+
 	if (gart_index >= 0) {
 		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
 		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
 			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
 			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
-			VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
-			(device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
+			VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
 			.heapIndex = gart_index,
 		};
 	}