<div dir="auto"><div><br><br><div class="gmail_quote"><div dir="ltr">On Wed, Jan 9, 2019, 2:37 PM Samuel Pitoiset <<a href="mailto:samuel.pitoiset@gmail.com">samuel.pitoiset@gmail.com</a> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">A simple Vulkan extension that allows apps to query size and<br>
usage of all exposed memory heaps.<br>
<br>
The different usage values are not really accurate because<br>
they are per drm-fd, but they should be close enough.<br>
<br>
v3: - use atomic operations in the winsys<br>
v2: - add software counters for the different heaps in the winsys<br>
  - improve budget/usage computations based on these counters<br>
<br>
Signed-off-by: Samuel Pitoiset <<a href="mailto:samuel.pitoiset@gmail.com" target="_blank" rel="noreferrer">samuel.pitoiset@gmail.com</a>><br>
---<br>
 src/amd/vulkan/radv_device.c         | 72 +++++++++++++++++++<br>
 src/amd/vulkan/radv_extensions.py       | 1 +<br>
 src/amd/vulkan/radv_radeon_winsys.h      | 4 ++<br>
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 38 +++++++++-<br>
 .../vulkan/winsys/amdgpu/radv_amdgpu_winsys.c | 6 ++<br>
 .../vulkan/winsys/amdgpu/radv_amdgpu_winsys.h | 4 ++<br>
 6 files changed, 124 insertions(+), 1 deletion(-)<br>
<br>
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c<br>
index 279917f3e0c..4bf36f9f384 100644<br>
--- a/src/amd/vulkan/radv_device.c<br>
+++ b/src/amd/vulkan/radv_device.c<br>
@@ -1350,12 +1350,84 @@ void radv_GetPhysicalDeviceMemoryProperties(<br>
    *pMemoryProperties = physical_device->memory_properties;<br>
 }<br>
<br>
+static void<br>
+radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)<br>
+{<br>
+Â Â Â Â RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);<br>
+Â Â Â Â VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;<br>
+Â Â Â Â uint64_t visible_vram_size = radv_get_visible_vram_size(device);<br>
+Â Â Â Â uint64_t vram_size = radv_get_vram_size(device);<br>
+Â Â Â Â uint64_t gtt_size = device->rad_info.gart_size;<br>
+Â Â Â Â uint64_t heap_budget, heap_usage;<br>
+<br>
+Â Â Â Â /* For all memory heaps, the computation of budget is as follow:<br>
+Â Â Â Â *Â Â Â heap_budget = heap_size - global_heap_usage + app_heap_usage<br>
+Â Â Â Â *<br>
+Â Â Â Â * The Vulkan spec 1.1.97 says that the budget should include any<br>
+Â Â Â Â * currently allocated device memory.<br>
+Â Â Â Â *<br>
+Â Â Â Â * Note that the application heap usages are not really accurate (eg.<br>
+Â Â Â Â * in presence of shared buffers).<br>
+Â Â Â Â */<br>
+Â Â Â Â if (vram_size) {<br>
+Â Â Â Â Â Â Â Â heap_usage = device->ws->query_value(device->ws,<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â RADEON_ALLOCATED_VRAM);<br>
+<br>
+Â Â Â Â Â Â Â Â heap_budget = vram_size -<br>
+Â Â Â Â Â Â Â Â Â Â Â Â device->ws->query_value(device->ws, RADEON_VRAM_USAGE) +<br>
+Â Â Â Â Â Â Â Â Â Â Â Â heap_usage;<br>
+<br>
+Â Â Â Â Â Â Â Â memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM] = heap_budget;<br>
+Â Â Â Â Â Â Â Â memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM] = heap_usage;<br>
+Â Â Â Â }<br>
+<br>
+Â Â Â Â if (visible_vram_size) {<br>
+Â Â Â Â Â Â Â Â heap_usage = device->ws->query_value(device->ws,<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â RADEON_ALLOCATED_VRAM_VIS);<br>
+<br>
+Â Â Â Â Â Â Â Â heap_budget = visible_vram_size -<br>
+Â Â Â Â Â Â Â Â Â Â Â Â device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +<br>
+Â Â Â Â Â Â Â Â Â Â Â Â heap_usage;<br>
+<br>
+Â Â Â Â Â Â Â Â memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_budget;<br>
+Â Â Â Â Â Â Â Â memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_usage;<br>
+Â Â Â Â }<br>
+<br>
+Â Â Â Â if (gtt_size) {<br>
+Â Â Â Â Â Â Â Â heap_usage = device->ws->query_value(device->ws,<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â RADEON_ALLOCATED_GTT);<br>
+<br>
+Â Â Â Â Â Â Â Â heap_budget = gtt_size -<br>
+Â Â Â Â Â Â Â Â Â Â Â Â device->ws->query_value(device->ws, RADEON_GTT_USAGE) +<br>
+Â Â Â Â Â Â Â Â Â Â Â Â heap_usage;<br>
+<br>
+Â Â Â Â Â Â Â Â memoryBudget->heapBudget[RADV_MEM_HEAP_GTT] = heap_budget;<br>
+Â Â Â Â Â Â Â Â memoryBudget->heapUsage[RADV_MEM_HEAP_GTT] = heap_usage;<br>
+Â Â Â Â }<br>
+<br>
+Â Â Â Â /* The heapBudget and heapUsage values must be zero for array elements<br>
+Â Â Â Â * greater than or equal to<br>
+Â Â Â Â * VkPhysicalDeviceMemoryProperties::memoryHeapCount.<br>
+Â Â Â Â */<br>
+Â Â Â Â for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {<br>
+Â Â Â Â Â Â Â Â memoryBudget->heapBudget[i] = 0;<br>
+Â Â Â Â Â Â Â Â memoryBudget->heapUsage[i] = 0;<br>
+Â Â Â Â }<br>
+}<br>
+<br>
 void radv_GetPhysicalDeviceMemoryProperties2(<br>
    VkPhysicalDevice              physicalDevice,<br>
    VkPhysicalDeviceMemoryProperties2     *pMemoryProperties)<br>
 {<br>
    radv_GetPhysicalDeviceMemoryProperties(physicalDevice,<br>
                        &pMemoryProperties->memoryProperties);<br>
+<br>
+Â Â Â Â VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =<br>
+Â Â Â Â Â Â Â Â vk_find_struct(pMemoryProperties->pNext,<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);<br>
+Â Â Â Â if (memory_budget)<br>
+Â Â Â Â Â Â Â Â radv_get_memory_budget_properties(physicalDevice, memory_budget);<br>
 }<br>
<br>
 VkResult radv_GetMemoryHostPointerPropertiesEXT(<br>
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py<br>
index 9952bb9c1c6..491ed9d94c3 100644<br>
--- a/src/amd/vulkan/radv_extensions.py<br>
+++ b/src/amd/vulkan/radv_extensions.py<br>
@@ -105,6 +105,7 @@ EXTENSIONS = [<br>
   Extension('VK_EXT_external_memory_dma_buf',      1, True),<br>
   Extension('VK_EXT_external_memory_host',       1, 'device->rad_info.has_userptr'),<br>
   Extension('VK_EXT_global_priority',          1, 'device->rad_info.has_ctx_priority'),<br>
+  Extension('VK_EXT_memory_budget',           1, True),<br>
   Extension('VK_EXT_pci_bus_info',           2, True),<br>
   Extension('VK_EXT_sampler_filter_minmax',       1, 'device->rad_info.chip_class >= CIK'),<br>
   Extension('VK_EXT_scalar_block_layout',        1, 'device->rad_info.chip_class >= CIK'),<br>
diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h<br>
index e9d541ab150..d9b46d89cf3 100644<br>
--- a/src/amd/vulkan/radv_radeon_winsys.h<br>
+++ b/src/amd/vulkan/radv_radeon_winsys.h<br>
@@ -84,6 +84,9 @@ enum radeon_ctx_priority {<br>
 };<br>
<br>
 enum radeon_value_id {<br>
+Â Â Â Â RADEON_ALLOCATED_VRAM,<br>
+Â Â Â Â RADEON_ALLOCATED_VRAM_VIS,<br>
+Â Â Â Â RADEON_ALLOCATED_GTT,<br>
    RADEON_TIMESTAMP,<br>
    RADEON_NUM_BYTES_MOVED,<br>
    RADEON_NUM_EVICTIONS,<br>
@@ -164,6 +167,7 @@ struct radeon_winsys_fence;<br>
 struct radeon_winsys_bo {<br>
    uint64_t va;<br>
    bool is_local;<br>
+Â Â Â Â bool vram_cpu_access;<br>
 };<br>
 struct radv_winsys_sem_counts {<br>
    uint32_t syncobj_count;<br>
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c<br>
index a9bd55eac8f..7194d5a3236 100644<br>
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c<br>
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c<br>
@@ -249,6 +249,7 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent,<br>
 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)<br>
 {<br>
    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);<br>
+Â Â Â Â struct radv_amdgpu_winsys *ws = bo->ws;<br>
<br>
    if (p_atomic_dec_return(&bo->ref_count))<br>
        return;<br>
@@ -269,6 +270,17 @@ static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)<br>
                   0, AMDGPU_VA_OP_UNMAP);<br>
        amdgpu_bo_free(bo->bo);<br>
    }<br>
+<br>
+Â Â Â Â if (bo->initial_domain & RADEON_DOMAIN_VRAM)<br>
+Â Â Â Â Â Â Â Â p_atomic_add(&ws->allocated_vram,<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â -align64(bo->size, ws->info.gart_page_size));<br>
+Â Â Â Â if (bo->base.vram_cpu_access)<br>
+Â Â Â Â Â Â Â Â p_atomic_add(&ws->allocated_vram_vis,<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â -align64(bo->size, ws->info.gart_page_size));<br>
+Â Â Â Â if (bo->initial_domain & RADEON_DOMAIN_GTT)<br>
+Â Â Â Â Â Â Â Â p_atomic_add(&ws->allocated_gtt,<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â -align64(bo->size, ws->info.gart_page_size));<br></blockquote></div></div><div dir="auto"><br></div><div dir="auto">Isn't bo_size page aligned already? Saves some align operations. Otherwise rb for the series</div><div dir="auto"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
    amdgpu_va_range_free(bo->va_handle);<br>
    FREE(bo);<br>
 }<br>
@@ -344,8 +356,10 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,<br>
    if (initial_domain & RADEON_DOMAIN_GTT)<br>
        request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;<br>
<br>
-Â Â Â Â if (flags & RADEON_FLAG_CPU_ACCESS)<br>
+Â Â Â Â if (flags & RADEON_FLAG_CPU_ACCESS) {<br>
+Â Â Â Â Â Â Â Â bo->base.vram_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;<br>
        request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;<br>
+Â Â Â Â }<br>
    if (flags & RADEON_FLAG_NO_CPU_ACCESS)<br>
        request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;<br>
    if (flags & RADEON_FLAG_GTT_WC)<br>
@@ -378,6 +392,17 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,<br>
    bo->bo = buf_handle;<br>
    bo->initial_domain = initial_domain;<br>
    bo->is_shared = false;<br>
+<br>
+Â Â Â Â if (initial_domain & RADEON_DOMAIN_VRAM)<br>
+Â Â Â Â Â Â Â Â p_atomic_add(&ws->allocated_vram,<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â align64(bo->size, ws->info.gart_page_size));<br>
+Â Â Â Â if (bo->base.vram_cpu_access)<br>
+Â Â Â Â Â Â Â Â p_atomic_add(&ws->allocated_vram_vis,<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â align64(bo->size, ws->info.gart_page_size));<br>
+Â Â Â Â if (initial_domain & RADEON_DOMAIN_GTT)<br>
+Â Â Â Â Â Â Â Â p_atomic_add(&ws->allocated_gtt,<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â align64(bo->size, ws->info.gart_page_size));<br>
+<br>
    radv_amdgpu_add_buffer_to_global_list(bo);<br>
    return (struct radeon_winsys_bo *)bo;<br>
 error_va_map:<br>
@@ -474,6 +499,9 @@ radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,<br>
    bo->bo = buf_handle;<br>
    bo->initial_domain = RADEON_DOMAIN_GTT;<br>
<br>
+Â Â Â Â p_atomic_add(&ws->allocated_gtt,<br>
+Â Â Â Â Â Â Â Â Â Â align64(bo->size, ws->info.gart_page_size));<br>
+<br>
    radv_amdgpu_add_buffer_to_global_list(bo);<br>
    return (struct radeon_winsys_bo *)bo;<br>
<br>
@@ -538,6 +566,14 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,<br>
    bo->is_shared = true;<br>
    bo->ws = ws;<br>
    bo->ref_count = 1;<br>
+<br>
+Â Â Â Â if (bo->initial_domain & RADEON_DOMAIN_VRAM)<br>
+Â Â Â Â Â Â Â Â p_atomic_add(&ws->allocated_vram,<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â align64(bo->size, ws->info.gart_page_size));<br>
+Â Â Â Â if (bo->initial_domain & RADEON_DOMAIN_GTT)<br>
+Â Â Â Â Â Â Â Â p_atomic_add(&ws->allocated_gtt,<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â align64(bo->size, ws->info.gart_page_size));<br>
+<br>
    radv_amdgpu_add_buffer_to_global_list(bo);<br>
    return (struct radeon_winsys_bo *)bo;<br>
 error_va_map:<br>
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c<br>
index 9706c04e8cd..d3a57f6b4f3 100644<br>
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c<br>
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c<br>
@@ -72,6 +72,12 @@ static uint64_t radv_amdgpu_winsys_query_value(struct radeon_winsys *rws,<br>
    uint64_t retval = 0;<br>
<br>
    switch (value) {<br>
+Â Â Â Â case RADEON_ALLOCATED_VRAM:<br>
+Â Â Â Â Â Â Â Â return ws->allocated_vram;<br>
+Â Â Â Â case RADEON_ALLOCATED_VRAM_VIS:<br>
+Â Â Â Â Â Â Â Â return ws->allocated_vram_vis;<br>
+Â Â Â Â case RADEON_ALLOCATED_GTT:<br>
+Â Â Â Â Â Â Â Â return ws->allocated_gtt;<br>
    case RADEON_TIMESTAMP:<br>
        amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);<br>
        return retval;<br>
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h<br>
index 80a1c6f2926..edec0a1ed78 100644<br>
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h<br>
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h<br>
@@ -52,6 +52,10 @@ struct radv_amdgpu_winsys {<br>
<br>
    pthread_mutex_t global_bo_list_lock;<br>
    struct list_head global_bo_list;<br>
+<br>
+Â Â Â Â uint64_t allocated_vram;<br>
+Â Â Â Â uint64_t allocated_vram_vis;<br>
+Â Â Â Â uint64_t allocated_gtt;<br>
 };<br>
<br>
 static inline struct radv_amdgpu_winsys *<br>
-- <br>
2.20.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank" rel="noreferrer">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div></div></div>