Mesa (master): radv/winsys: do not count visible VRAM buffers twice in the budget

Wed May 6 07:29:53 UTC 2020

Module: Mesa
Branch: master
Commit: f457e1b6d5814e51cb9e0ae47e8fd5936139f42f
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f457e1b6d5814e51cb9e0ae47e8fd5936139f42f

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Thu Apr 30 18:42:27 2020 +0200

radv/winsys: do not count visible VRAM buffers twice in the budget

The VRAM size returned to apps is computed as follows:
vram_size = real_hw_vram_size - visible_vram_size.

Visible VRAM buffers should be counted only in the visible VRAM
counter and not twice. Buffers with the NO_CPU_ACCESS flag are
known to not be mappable, so they are counted in the VRAM counter.

Other buffers, with the CPU_ACCESS flag, or without any of both
(imported buffers) are counted in the visible VRAM counter because
they are mappable.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4834>

---

 src/amd/vulkan/radv_radeon_winsys.h           |  2 +-
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 48 ++++++++++++++++++---------
 2 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h
index ef2b4849692..e5a4ae57d99 100644
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -162,7 +162,7 @@ struct radeon_winsys_fence;
 struct radeon_winsys_bo {
 	uint64_t va;
 	bool is_local;
-	bool vram_cpu_access;
+	bool vram_no_cpu_access;
 };
 struct radv_winsys_sem_counts {
 	uint32_t syncobj_count;
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
index 740d7d23fa6..99bd44226ae 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
@@ -276,12 +276,16 @@ static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
 		amdgpu_bo_free(bo->bo);
 	}
 
-	if (bo->initial_domain & RADEON_DOMAIN_VRAM)
-		p_atomic_add(&ws->allocated_vram,
-			     -align64(bo->size, ws->info.gart_page_size));
-	if (bo->base.vram_cpu_access)
-		p_atomic_add(&ws->allocated_vram_vis,
-			     -align64(bo->size, ws->info.gart_page_size));
+	if (bo->initial_domain & RADEON_DOMAIN_VRAM) {
+		if (bo->base.vram_no_cpu_access) {
+			p_atomic_add(&ws->allocated_vram,
+				     -align64(bo->size, ws->info.gart_page_size));
+		} else {
+			p_atomic_add(&ws->allocated_vram_vis,
+				     -align64(bo->size, ws->info.gart_page_size));
+		}
+	}
+
 	if (bo->initial_domain & RADEON_DOMAIN_GTT)
 		p_atomic_add(&ws->allocated_gtt,
 			     -align64(bo->size, ws->info.gart_page_size));
@@ -366,12 +370,12 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
 	if (initial_domain & RADEON_DOMAIN_OA)
 		request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
 
-	if (flags & RADEON_FLAG_CPU_ACCESS) {
-		bo->base.vram_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
+	if (flags & RADEON_FLAG_CPU_ACCESS)
 		request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-	}
-	if (flags & RADEON_FLAG_NO_CPU_ACCESS)
+	if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
+		bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
 		request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+	}
 	if (flags & RADEON_FLAG_GTT_WC)
 		request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 	if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22)
@@ -411,12 +415,24 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
 	r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
 	assert(!r);
 
-	if (initial_domain & RADEON_DOMAIN_VRAM)
-		p_atomic_add(&ws->allocated_vram,
-			     align64(bo->size, ws->info.gart_page_size));
-	if (bo->base.vram_cpu_access)
-		p_atomic_add(&ws->allocated_vram_vis,
-			     align64(bo->size, ws->info.gart_page_size));
+	if (initial_domain & RADEON_DOMAIN_VRAM) {
+		/* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
+		 * aren't mappable and they are counted as part of the VRAM
+		 * counter.
+		 *
+		 * Otherwise, buffers with the CPU_ACCESS flag or without any
+		 * of both (imported buffers) are counted as part of the VRAM
+		 * visible counter because they can be mapped.
+		 */
+		if (bo->base.vram_no_cpu_access) {
+			p_atomic_add(&ws->allocated_vram,
+				     align64(bo->size, ws->info.gart_page_size));
+		} else {
+			p_atomic_add(&ws->allocated_vram_vis,
+				     align64(bo->size, ws->info.gart_page_size));
+		}
+	}
+
 	if (initial_domain & RADEON_DOMAIN_GTT)
 		p_atomic_add(&ws->allocated_gtt,
 			     align64(bo->size, ws->info.gart_page_size));