[Mesa-dev] [PATCH 7/7] winsys/amdgpu: overallocate buffers for faster address translation on Gfx9

Marek Olšák maraeo at gmail.com
Fri Nov 23 23:40:52 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

Sadly, the 3 games I tested (DeusEx:MD, DiRT Rally, DOTA 2) are unaffected
by the overallocation, because I guess their buffers don't fall into
the small range below a power-of-two size.
---
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 24 +++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 3fc1da8b0b8..5139e765b72 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -403,20 +403,44 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
    amdgpu_bo_handle buf_handle;
    uint64_t va = 0;
    struct amdgpu_winsys_bo *bo;
    amdgpu_va_handle va_handle;
    unsigned va_gap_size;
    int r;
 
    /* VRAM or GTT must be specified, but not both at the same time. */
    assert(util_bitcount(initial_domain & RADEON_DOMAIN_VRAM_GTT) == 1);
 
+   /* Gfx9: Overallocate the size to the next power of two for faster address
+    * translation if we don't waste too much memory.
+    */
+   if (ws->info.chip_class >= GFX9) {
+      uint64_t next_pot_size = util_next_power_of_two64(size);
+
+      /* For slightly lower than 4 GB allocations, at most 32 MB are wasted.
+       * For slightly lower than 256 MB allocations, at most 2 MB are wasted.
+       * For slightly lower than 64 MB allocations, at most 512 KB are wasted.
+       *
+       * Waste at most 0.79% (1/127) of the size if we decide to overallocate.
+       */
+      uint64_t max_overalloc = next_pot_size >> 7;
+
+      /* If the next power-of-two size is <= the page size, waste up to
+       * 6.25% (1/16) of the size if we decide to overallocate.
+       */
+      if (next_pot_size <= ws->info.pte_fragment_size)
+         max_overalloc = next_pot_size >> 4;
+
+      if (size + max_overalloc >= next_pot_size)
+         size = next_pot_size;
+   }
+
    bo = CALLOC_STRUCT(amdgpu_winsys_bo);
    if (!bo) {
       return NULL;
    }
 
    if (heap >= 0) {
       pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,
                           heap);
    }
    request.alloc_size = size;
-- 
2.17.1



More information about the mesa-dev mailing list