[Mesa-dev] [PATCH 12/12] anv: Allocate buffer offsets from userspace

Chris Wilson chris at chris-wilson.co.uk
Sun May 14 23:38:56 UTC 2017


A very simple allocator that increments from the last globally allocated
offset and assigns that for the lifetime of this bo for every context.
Once we run out of space, we delegate finding holes to the kernel. In
the future, we can opt to handle this ourselves as well, but a 48b
address space means we can defer this optimisation for some time - we
already assume that we will never exceed that virtual usage in a single
command submission (execbuf).

The only caveat is to remember to exclude certain objects from certain
ranges; for which we fallback to the kernel allocator atm.

The advantage of assigning our own offsets is that we can completely
skipped relocation tracking; no more lists and potential allocation
failures, no more relocations prior to submission both ourselves and
potential fixups from the kernel (which is done under global mutexes).
However, not everything immediately stores its target address in the
batch and so we must keep the relocation list around to perform deferred
relocations. As these are fixed, the number of relocations can be
reduced.

The downside of using a monotonic allocator is that we do not reuse
address space. If the application doesn't settle to using a static
working set, but is dynamically a few object, then we continue to grow
the address space consuming more and more pagetables.

v2: Allocate sub-4G for state buffers as well (previously skipped)

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Jason Ekstrand <jason.ekstrand at intel.com>
---
 src/intel/vulkan/anv_allocator.c   |  9 +++++
 src/intel/vulkan/anv_batch_chain.c |  5 ++-
 src/intel/vulkan/anv_device.c      | 78 ++++++++++++++++++++++++++++++++++++++
 src/intel/vulkan/anv_gem.c         | 10 +++++
 src/intel/vulkan/anv_gem_stubs.c   |  6 +++
 src/intel/vulkan/anv_intel.c       |  3 +-
 src/intel/vulkan/anv_private.h     | 20 ++++++++++
 7 files changed, 129 insertions(+), 2 deletions(-)

diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index e316b421f4..88372c629e 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -416,8 +416,17 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
     * more work than just not setting a flag.  So, we explicitly DO NOT set
     * the EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and the kernel does all of the
     * hard work for us.
+    *
+    * Note also that due to the reassignment of the anv_bo and the offseting
+    * of the relocation by the center requring fixup on execbuf, we can not
+    * trivially discard the relocation entries (EXEC_OBJECT_PINNED) as we
+    * will need to update all the previous relocations before we discard
+    * the old handle and address. To reduce the likelihood of kernel
+    * relocation, we still assign it an offset from our allocator.
     */
    anv_bo_init(&pool->bo, gem_handle, size);
+   anv_physical_device_allocate_offset(&pool->device->instance->physicalDevice,
+                                       size, 0, &pool->bo.offset);
    pool->bo.map = map;
 
    return VK_SUCCESS;
diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c
index baee8290cd..22230f911a 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -60,7 +60,7 @@ anv_reloc_list_init_clone(struct anv_reloc_list *list,
       list->num_bos = 0;
       list->size_bos = 32;
       list->num_relocs = 0;
-      list->size_relocs = 256;
+      list->size_relocs = 8;
    }
 
    list->bos =
@@ -235,6 +235,9 @@ anv_reloc_list_add(struct anv_reloc_list *list,
       list->bos[index] = target_bo;
    }
 
+   if (target_bo->flags & EXEC_OBJECT_PINNED)
+      return VK_SUCCESS;
+
    const uint32_t domain =
       (target_bo->flags & EXEC_OBJECT_WRITE) ? I915_GEM_DOMAIN_RENDER : 0;
 
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 6ecd2960e1..2e9b64d3df 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -31,6 +31,7 @@
 #include <xf86drm.h>
 
 #include "anv_private.h"
+#include "util/u_atomic.h"
 #include "util/strtod.h"
 #include "util/debug.h"
 #include "util/build_id.h"
@@ -225,8 +226,21 @@ anv_physical_device_init(struct anv_physical_device *device,
       goto fail;
    }
 
+   device->supports_full_ppgtt = anv_gem_supports_full_ppgtt(fd);
    device->supports_48bit_addresses = anv_gem_supports_48b_addresses(fd);
 
+   device->last_allocated_gtt_offset = 0;
+   device->last_allocated_rsvd_offset = 0;
+   device->gtt_size = 2u << 30; /* conservative for ilk+ */
+   anv_gem_get_context_param(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE,
+                             &device->gtt_size);
+
+   /* Exclude the first 4GiB for reserved allocations */
+   if (device->supports_48bit_addresses)
+      device->last_allocated_gtt_offset = 1ull << 32;
+   else
+      device->gtt_size = MIN2(device->gtt_size, 1ull << 32); /* paranoia */
+
    result = anv_compute_heap_size(fd, &device->heap_size);
    if (result != VK_SUCCESS)
       goto fail;
@@ -1406,6 +1420,70 @@ VkResult anv_DeviceWaitIdle(
    return anv_device_submit_simple_batch(device, &batch);
 }
 
+bool
+anv_physical_device_allocate_offset(struct anv_physical_device *instance,
+                                    uint64_t size, unsigned int flags,
+                                    uint64_t *out)
+{
+#define MiB(x) ((x) << 20)
+
+   if (!instance->supports_full_ppgtt)
+      return false;
+
+   /* Minimum allocation in the GTT is a single page */
+   size = align(size, 4096);
+
+   /* Align to 2MiB to allow for 64KiB pages (for large objects) */
+   if (size >= MiB(2)/2)
+      size = align(size, MiB(2));
+
+   /*
+    * Most basic range manager for user ppgtt allocation - allocate the
+    * next offset until we run out of space, where we fallback to using
+    * relocations and letting the kernel find holes.
+    *
+    * We allocate a global offset so that if we do share buffers between
+    * contexts on the same device, we do not need to relocate on each
+    * invocation of a difference context. Given that we have a large
+    * address space to play with using a shared address space for all of
+    * our allocations forever is a reasonable simplification in the short
+    * term (and avoids too much duplication with the kernel).
+    *
+    * The kernel will handle finding holes in our allocations to fit in
+    * other buffers, and once found are unlikely to have to move (as per
+    * normal).
+    */
+
+   uint64_t *last = &instance->last_allocated_gtt_offset;
+   uint64_t max = instance->gtt_size;
+   if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
+       instance->supports_48bit_addresses) {
+      last = &instance->last_allocated_rsvd_offset;
+      max = 1ull << 32;
+   }
+   uint64_t offset = *last;
+
+   do {
+      uint64_t old = offset;
+
+      if (size >= MiB(2))
+         offset = align(offset, MiB(2));
+
+      if (offset + size > max)
+         return false;
+
+      uint64_t new = p_atomic_cmpxchg(last, old, offset + size);
+      if (new == old) {
+         *out = offset;
+         return true;
+      }
+
+      offset = new;
+   } while (1);
+
+#undef MiB
+}
+
 VkResult
 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device,
                 uint64_t size, unsigned flags)
diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c
index ac47da4117..2a21839bfe 100644
--- a/src/intel/vulkan/anv_gem.c
+++ b/src/intel/vulkan/anv_gem.c
@@ -335,6 +335,16 @@ anv_gem_get_aperture(int fd, uint64_t *size)
 }
 
 bool
+anv_gem_supports_full_ppgtt(int fd)
+{
+   int val = 0;
+   struct drm_i915_getparam gp = { 18, &val };
+
+   ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp, sizeof(gp));
+   return val >= 2;
+}
+
+bool
 anv_gem_supports_48b_addresses(int fd)
 {
    struct drm_i915_gem_exec_object2 obj = {
diff --git a/src/intel/vulkan/anv_gem_stubs.c b/src/intel/vulkan/anv_gem_stubs.c
index 8d81eb5b28..3f7c56bcff 100644
--- a/src/intel/vulkan/anv_gem_stubs.c
+++ b/src/intel/vulkan/anv_gem_stubs.c
@@ -157,6 +157,12 @@ anv_gem_get_aperture(int fd, uint64_t *size)
 }
 
 bool
+anv_gem_supports_full_ppgtt(int fd)
+{
+   unreachable("Unused");
+}
+
+bool
 anv_gem_supports_48b_addresses(int fd)
 {
    unreachable("Unused");
diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c
index f232411231..a3ce930634 100644
--- a/src/intel/vulkan/anv_intel.c
+++ b/src/intel/vulkan/anv_intel.c
@@ -56,7 +56,8 @@ VkResult anv_CreateDmaBufImageINTEL(
    if (result != VK_SUCCESS)
       goto fail;
 
-   anv_bo_set_flags(device, mem->bo, EXEC_OBJECT_SUPPORTS_48B_ADDRESS);
+   anv_bo_set_flags(device, mem->bo,
+                    EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED);
 
    anv_image_create(_device,
       &(struct anv_image_create_info) {
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 7c9b9f3b2a..2f6b4217b2 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -657,6 +657,7 @@ struct anv_physical_device {
      * the total system ram to try and avoid running out of RAM.
      */
     uint64_t                                    heap_size;
+    bool                                        supports_full_ppgtt;
     bool                                        supports_48bit_addresses;
     struct brw_compiler *                       compiler;
     struct isl_device                           isl_dev;
@@ -672,6 +673,10 @@ struct anv_physical_device {
 
     struct wsi_device                       wsi_device;
     int                                         local_fd;
+
+    uint64_t					gtt_size;
+    uint64_t					last_allocated_gtt_offset;
+    uint64_t					last_allocated_rsvd_offset;
 };
 
 struct anv_instance {
@@ -799,6 +804,7 @@ int anv_gem_get_context_param(int fd, int context, uint32_t param,
 int anv_gem_get_param(int fd, uint32_t param);
 bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling);
 int anv_gem_get_aperture(int fd, uint64_t *size);
+bool anv_gem_supports_full_ppgtt(int fd);
 bool anv_gem_supports_48b_addresses(int fd);
 int anv_gem_gpu_get_reset_stats(struct anv_device *device,
                                 uint32_t *active, uint32_t *pending);
@@ -808,6 +814,11 @@ int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t
 int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
                        uint32_t read_domains, uint32_t write_domain);
 
+bool
+anv_physical_device_allocate_offset(struct anv_physical_device *instance,
+                                    uint64_t size, unsigned flags,
+                                    uint64_t *out);
+
 static inline void
 anv_bo_set_flags(const struct anv_device *device,
                  struct anv_bo *bo,
@@ -819,6 +830,14 @@ anv_bo_set_flags(const struct anv_device *device,
        phys->supports_48bit_addresses)
       bo->flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
 
+   if (flags & EXEC_OBJECT_PINNED) {
+      bo->flags &= ~EXEC_OBJECT_PINNED;
+      if (anv_physical_device_allocate_offset(phys,
+                                              bo->size, flags,
+                                              &bo->offset))
+         bo->flags |= EXEC_OBJECT_PINNED;
+   }
+
    if (flags & EXEC_OBJECT_ASYNC &&
        phys->has_exec_async)
       bo->flags |= EXEC_OBJECT_ASYNC;
@@ -826,6 +845,7 @@ anv_bo_set_flags(const struct anv_device *device,
 
 #define ANV_BO_DEFAULT_FLAGS \
    (EXEC_OBJECT_SUPPORTS_48B_ADDRESS | \
+    EXEC_OBJECT_PINNED | \
     EXEC_OBJECT_ASYNC)
 
 VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device,
-- 
2.11.0



More information about the mesa-dev mailing list