[Mesa-dev] [PATCH 4/4] anv: Allocate buffer offsets from userspace

Chris Wilson chris at chris-wilson.co.uk
Thu May 11 13:13:13 UTC 2017


A very simple allocator that increments from the last globally allocated
offset and assigns that for the lifetime of this bo for every context.
Once we run out of space, we delegate finding holes to the kernel. In
the future, we can opt to handle this ourselves as well, but a 48b
address space means we can defer this optimisation for some time - we
already assume that we will never exceed that virtual usage in a single
command submission (execbuf).

The only caveat is to remember to exclude certain objects from certain
ranges; for which we fallback to the kernel allocator atm.

The advantage of assigning our own offsets is that we can completely
skipped relocation tracking; no more lists and potential allocation
failures, no more relocations prior to submission both ourselves and
potential fixups from the kernel (which is done under global mutexes).
However, not everything immediately stores its target address in the
batch and so we must keep the relocation list around to perform deferred
relocations. As these are fixed, the number of relocations can be
reduced.

The downside of using a monotonic allocator is that we do not reuse
address space. If the application doesn't settle to using a static
working set, but is dynamically a few object, then we continue to grow
the address space consuming more and more pagetables.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Jason Ekstrand <jason.ekstrand at intel.com>
---
 src/intel/vulkan/anv_allocator.c   |  6 +++-
 src/intel/vulkan/anv_batch_chain.c | 14 +++++---
 src/intel/vulkan/anv_device.c      | 72 ++++++++++++++++++++++++++++++++++++++
 src/intel/vulkan/anv_gem.c         | 10 ++++++
 src/intel/vulkan/anv_gem_stubs.c   |  6 ++++
 src/intel/vulkan/anv_private.h     | 11 +++++-
 src/intel/vulkan/genX_blorp_exec.c |  3 +-
 src/intel/vulkan/genX_cmd_buffer.c |  6 ++--
 8 files changed, 119 insertions(+), 9 deletions(-)

diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index b767542aa5..ff433bbc03 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -1154,7 +1154,7 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
     *
     * so nothing will ever touch the top page.
     */
-   bo->bo.flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+   bo->bo.flags &= ~(EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED);
 
    /* Set the exists last because it may be read by other threads */
    __sync_synchronize();
@@ -1315,6 +1315,10 @@ anv_bo_cache_import(struct anv_device *device,
       if (device->instance->physicalDevice.has_exec_async)
          bo->bo.flags |= EXEC_OBJECT_ASYNC;
 
+      if (anv_physical_device_allocate_offset(&device->instance->physicalDevice,
+					      size, &bo->bo.offset))
+	 bo->bo.flags |= EXEC_OBJECT_PINNED;
+
       _mesa_hash_table_insert(cache->bo_map, (void *)(uintptr_t)gem_handle, bo);
    }
 
diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c
index 166935f8b8..66fc3ab9b7 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -60,7 +60,7 @@ anv_reloc_list_init_clone(struct anv_reloc_list *list,
       list->num_bos = 0;
       list->size_bos = 32;
       list->num_relocs = 0;
-      list->size_relocs = 256;
+      list->size_relocs = 8;
    }
 
    list->bos =
@@ -188,7 +188,8 @@ anv_reloc_list_grow_relocs(struct anv_reloc_list *list,
 VkResult
 anv_reloc_list_add(struct anv_reloc_list *list,
                    const VkAllocationCallbacks *alloc,
-                   uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
+                   uint32_t offset, struct anv_bo *target_bo, uint32_t delta,
+                   bool force)
 {
    VkResult result;
    int index;
@@ -207,6 +208,9 @@ anv_reloc_list_add(struct anv_reloc_list *list,
       list->bos[index] = target_bo;
    }
 
+   if (target_bo->flags & EXEC_OBJECT_PINNED && !force)
+      return VK_SUCCESS;
+
    const uint32_t domain =
       (target_bo->flags & EXEC_OBJECT_WRITE) ? I915_GEM_DOMAIN_RENDER : 0;
 
@@ -311,7 +315,8 @@ anv_batch_emit_reloc(struct anv_batch *batch,
                      void *location, struct anv_bo *bo, uint32_t delta)
 {
    VkResult result = anv_reloc_list_add(batch->relocs, batch->alloc,
-                                        location - batch->start, bo, delta);
+                                        location - batch->start, bo, delta,
+                                        false);
    if (result != VK_SUCCESS) {
       anv_batch_set_error(batch, result);
       return 0;
@@ -524,7 +529,8 @@ anv_batch_bo_list_clone(const struct list_head *list,
 
          int reloc_size = cmd_buffer->device->info.gen < 8 ? 4 : 8;
          anv_reloc_list_add(&prev_bbo->relocs, &cmd_buffer->pool->alloc,
-                            prev_bbo->length - reloc_size, &new_bbo->bo, 0);
+                            prev_bbo->length - reloc_size, &new_bbo->bo, 0,
+                            true);
       }
 
       prev_bbo = new_bbo;
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 35e40301a3..ce07702f55 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -31,6 +31,7 @@
 #include <xf86drm.h>
 
 #include "anv_private.h"
+#include "util/u_atomic.h"
 #include "util/strtod.h"
 #include "util/debug.h"
 #include "util/build_id.h"
@@ -225,8 +226,19 @@ anv_physical_device_init(struct anv_physical_device *device,
       goto fail;
    }
 
+   device->supports_full_ppgtt = anv_gem_supports_full_ppgtt(fd);
    device->supports_48bit_addresses = anv_gem_supports_48b_addresses(fd);
 
+   device->gtt_size = 2u << 30; /* conservative for ilk+ */
+   anv_gem_get_context_param(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE,
+                             &device->gtt_size);
+
+   /* Exclude the first 4GiB for reserved allocations */
+   if (device->supports_48bit_addresses)
+      device->last_allocated_gtt_offset = 1ull << 32;
+   else
+      device->gtt_size = MIN2(device->gtt_size, 1ull << 32); /* paranoia */
+
    result = anv_compute_heap_size(fd, &device->heap_size);
    if (result != VK_SUCCESS)
       goto fail;
@@ -1405,6 +1417,62 @@ VkResult anv_DeviceWaitIdle(
    return anv_device_submit_simple_batch(device, &batch);
 }
 
+bool
+anv_physical_device_allocate_offset(struct anv_physical_device *instance,
+                                    uint64_t size, uint64_t *out)
+{
+   if (!instance->supports_full_ppgtt)
+      return false;
+
+#define MiB(x) ((x) << 20)
+
+   /* Minimum allocation in the GTT is a single page */
+   size = align(size, 4096);
+
+   /* Align to 2MiB to allow for 64KiB pages (for large objects) */
+   if (size >= MiB(2)/2)
+      size = align(size, MiB(2));
+
+   /*
+    * Most basic range manager for user ppgtt allocation - allocate the
+    * next offset until we run out of space, where we fallback to using
+    * relocations and letting the kernel find holes.
+    *
+    * We allocate a global offset so that if we do share buffers between
+    * contexts on the same device, we do not need to relocate on each
+    * invocation of a difference context. Given that we have a large
+    * address space to play with using a shared address space for all of
+    * our allocations forever is a reasonable simplification in the short
+    * term (and avoids too much duplication with the kernel).
+    *
+    * The kernel will handle finding holes in our allocations to fit in
+    * other buffers, and once found are unlikely to have to move (as per
+    * normal).
+    */
+
+   uint64_t offset = instance->last_allocated_gtt_offset;
+   do {
+      uint64_t old = offset;
+
+      if (size >= MiB(2))
+         offset = align(offset, MiB(2));
+
+      if (offset + size > instance->gtt_size)
+         return false;
+
+      uint64_t new = p_atomic_cmpxchg(&instance->last_allocated_gtt_offset,
+                                      old, offset + size);
+      if (new == old) {
+         *out = offset;
+         return true;
+      }
+
+      offset = new;
+   } while (1);
+
+#undef MiB
+}
+
 VkResult
 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
 {
@@ -1417,6 +1485,10 @@ anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
    if (device->instance->physicalDevice.supports_48bit_addresses)
       bo->flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
 
+   if (anv_physical_device_allocate_offset(&device->instance->physicalDevice,
+                                           size, &bo->offset))
+      bo->flags |= EXEC_OBJECT_PINNED;
+
    if (device->instance->physicalDevice.has_exec_async)
       bo->flags |= EXEC_OBJECT_ASYNC;
 
diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c
index 4b6ee58070..0fe77be738 100644
--- a/src/intel/vulkan/anv_gem.c
+++ b/src/intel/vulkan/anv_gem.c
@@ -335,6 +335,16 @@ anv_gem_get_aperture(int fd, uint64_t *size)
 }
 
 bool
+anv_gem_supports_full_ppgtt(int fd)
+{
+   int val = 0;
+   struct drm_i915_getparam gp = { 18, &val };
+
+   ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp, sizeof(gp));
+   return val >= 2;
+}
+
+bool
 anv_gem_supports_48b_addresses(int fd)
 {
    struct drm_i915_gem_exec_object2 obj = {
diff --git a/src/intel/vulkan/anv_gem_stubs.c b/src/intel/vulkan/anv_gem_stubs.c
index 8d81eb5b28..3f7c56bcff 100644
--- a/src/intel/vulkan/anv_gem_stubs.c
+++ b/src/intel/vulkan/anv_gem_stubs.c
@@ -157,6 +157,12 @@ anv_gem_get_aperture(int fd, uint64_t *size)
 }
 
 bool
+anv_gem_supports_full_ppgtt(int fd)
+{
+   unreachable("Unused");
+}
+
+bool
 anv_gem_supports_48b_addresses(int fd)
 {
    unreachable("Unused");
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 16995d5cf5..00608e2422 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -657,6 +657,7 @@ struct anv_physical_device {
      * the total system ram to try and avoid running out of RAM.
      */
     uint64_t                                    heap_size;
+    bool                                        supports_full_ppgtt;
     bool                                        supports_48bit_addresses;
     struct brw_compiler *                       compiler;
     struct isl_device                           isl_dev;
@@ -672,6 +673,9 @@ struct anv_physical_device {
 
     struct wsi_device                       wsi_device;
     int                                         local_fd;
+
+    uint64_t					gtt_size;
+    uint64_t					last_allocated_gtt_offset;
 };
 
 struct anv_instance {
@@ -799,6 +803,7 @@ int anv_gem_get_context_param(int fd, int context, uint32_t param,
 int anv_gem_get_param(int fd, uint32_t param);
 bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling);
 int anv_gem_get_aperture(int fd, uint64_t *size);
+bool anv_gem_supports_full_ppgtt(int fd);
 bool anv_gem_supports_48b_addresses(int fd);
 int anv_gem_gpu_get_reset_stats(struct anv_device *device,
                                 uint32_t *active, uint32_t *pending);
@@ -808,6 +813,10 @@ int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t
 int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
                        uint32_t read_domains, uint32_t write_domain);
 
+bool
+anv_physical_device_allocate_offset(struct anv_physical_device *instance,
+                                    uint64_t size, uint64_t *out);
+
 VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size);
 
 struct anv_reloc_list {
@@ -829,7 +838,7 @@ void anv_reloc_list_finish(struct anv_reloc_list *list,
 VkResult anv_reloc_list_add(struct anv_reloc_list *list,
                             const VkAllocationCallbacks *alloc,
                             uint32_t offset, struct anv_bo *target_bo,
-                            uint32_t delta);
+                            uint32_t delta, bool force);
 
 struct anv_batch_bo {
    /* Link in the anv_cmd_buffer.owned_batch_bos list */
diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c
index 71ed70741e..52614e372f 100644
--- a/src/intel/vulkan/genX_blorp_exec.c
+++ b/src/intel/vulkan/genX_blorp_exec.c
@@ -59,7 +59,8 @@ blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
    VkResult result =
       anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc,
-                         ss_offset, address.buffer, address.offset + delta);
+                         ss_offset, address.buffer, address.offset + delta,
+                         true);
    if (result != VK_SUCCESS)
       anv_batch_set_error(&cmd_buffer->batch, result);
 }
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index ef9b7d0554..fde294d174 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -173,7 +173,8 @@ add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer,
 
    VkResult result =
       anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc,
-                         state.offset + isl_dev->ss.addr_offset, bo, offset);
+                         state.offset + isl_dev->ss.addr_offset, bo, offset,
+                         true);
    if (result != VK_SUCCESS)
       anv_batch_set_error(&cmd_buffer->batch, result);
 }
@@ -203,7 +204,8 @@ add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer,
          anv_reloc_list_add(&cmd_buffer->surface_relocs,
                             &cmd_buffer->pool->alloc,
                             state.offset + isl_dev->ss.aux_addr_offset,
-                            iview->bo, aux_offset);
+                            iview->bo, aux_offset,
+                            true);
       if (result != VK_SUCCESS)
          anv_batch_set_error(&cmd_buffer->batch, result);
    }
-- 
2.11.0



More information about the mesa-dev mailing list