[Mesa-dev] [PATCH 15/21] winsys/amdgpu: sparse buffer creation / destruction / commitment

Wed Feb 8 12:42:57 UTC 2017

From: Nicolai Hähnle <nicolai.haehnle at amd.com>

This is the bulk of the buffer allocation logic. It is fairly simple and
stupid. We'll probably want to use e.g. interval trees at some point to
keep track of commitments, but Mesa doesn't have an implementation of those
yet.
---
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 581 +++++++++++++++++++++++++++++-
 1 file changed, 580 insertions(+), 1 deletion(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index f998eae..a5a7f04 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -575,6 +575,575 @@ void amdgpu_bo_slab_free(void *priv, struct pb_slab *pslab)
    FREE(slab);
 }
 
+/*
+ * Attempt to allocate the given number of backing pages. Fewer pages may be
+ * allocated (depending on the fragmentation of existing backing buffers),
+ * which will be reflected by a change to *pnum_pages.
+ */
+static struct amdgpu_sparse_backing *
+sparse_backing_alloc(struct amdgpu_winsys_bo *bo, uint32_t *pstart_page, uint32_t *pnum_pages)
+{
+   struct amdgpu_sparse_backing *best_backing;
+   unsigned best_idx;
+   uint32_t best_num_pages;
+
+   best_backing = NULL;
+   best_idx = 0;
+   best_num_pages = 0;
+
+   /* This is a very simple and inefficient best-fit algorithm. */
+   list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {
+      for (unsigned idx = 0; idx < backing->num_chunks; ++idx) {
+         uint32_t cur_num_pages = backing->chunks[idx].end - backing->chunks[idx].begin;
+         if ((best_num_pages < *pnum_pages && cur_num_pages > best_num_pages) ||
+            (best_num_pages > *pnum_pages && cur_num_pages < best_num_pages)) {
+            best_backing = backing;
+            best_idx = idx;
+            best_num_pages = cur_num_pages;
+         }
+      }
+   }
+
+   /* Allocate a new backing buffer if necessary. */
+   if (!best_backing) {
+      struct pb_buffer *buf;
+      uint64_t size;
+      uint32_t pages;
+
+      best_backing = CALLOC_STRUCT(amdgpu_sparse_backing);
+      if (!best_backing)
+         return NULL;
+
+      best_backing->max_chunks = 4;
+      best_backing->chunks = CALLOC(best_backing->max_chunks,
+                                    sizeof(*best_backing->chunks));
+      if (!best_backing->chunks) {
+         FREE(best_backing);
+         return NULL;
+      }
+
+      assert(bo->u.sparse.total_backing_pages < DIV_ROUND_UP(bo->base.size, RADEON_SPARSE_PAGE_SIZE));
+
+      size = MIN3(bo->base.size / 16,
+                  8 * 1024 * 1024,
+                  bo->base.size - (uint64_t)bo->u.sparse.total_backing_pages * RADEON_SPARSE_PAGE_SIZE);
+      size = MAX2(size, RADEON_SPARSE_PAGE_SIZE);
+
+      buf = amdgpu_bo_create(&bo->ws->base, size, RADEON_SPARSE_PAGE_SIZE,
+                             bo->initial_domain,
+                             bo->u.sparse.flags | RADEON_FLAG_HANDLE);
+      if (!buf) {
+         FREE(best_backing->chunks);
+         FREE(best_backing);
+         return NULL;
+      }
+
+      /* We might have gotten a bigger buffer than requested via caching. */
+      pages = buf->size / RADEON_SPARSE_PAGE_SIZE;
+
+      best_backing->bo = amdgpu_winsys_bo(buf);
+      best_backing->num_chunks = 1;
+      best_backing->chunks[0].begin = 0;
+      best_backing->chunks[0].end = pages;
+
+      list_add(&best_backing->list, &bo->u.sparse.backing);
+      bo->u.sparse.total_backing_pages += pages;
+
+      best_idx = 0;
+      best_num_pages = pages;
+   }
+
+   *pnum_pages = MIN2(*pnum_pages, best_num_pages);
+   *pstart_page = best_backing->chunks[best_idx].begin;
+   best_backing->chunks[best_idx].begin += *pnum_pages;
+
+   if (best_backing->chunks[best_idx].begin >= best_backing->chunks[best_idx].end) {
+      memmove(&best_backing->chunks[best_idx], &best_backing->chunks[best_idx + 1],
+              sizeof(*best_backing->chunks) * (best_backing->num_chunks - best_idx - 1));
+      best_backing->num_chunks--;
+   }
+
+   return best_backing;
+}
+
+static void
+sparse_free_backing_buffer(struct amdgpu_sparse_backing *backing)
+{
+   list_del(&backing->list);
+   amdgpu_winsys_bo_reference(&backing->bo, NULL);
+   FREE(backing->chunks);
+   FREE(backing);
+}
+
+/*
+ * Return a range of pages from the given backing buffer back into the
+ * free structure.
+ */
+static bool
+sparse_backing_free(struct amdgpu_winsys_bo *bo,
+                    struct amdgpu_sparse_backing *backing,
+                    uint32_t start_page, uint32_t num_pages)
+{
+   uint32_t end_page = start_page + num_pages;
+   unsigned low = 0;
+   unsigned high = backing->num_chunks;
+
+   /* Find the first chunk with begin >= start_page. */
+   while (low < high) {
+      unsigned mid = low + (high - low) / 2;
+
+      if (backing->chunks[mid].begin >= start_page)
+         high = mid;
+      else
+         low = mid + 1;
+   }
+
+   assert(low >= backing->num_chunks || end_page <= backing->chunks[low].begin);
+   assert(low == 0 || backing->chunks[low - 1].end <= start_page);
+
+   if (low > 0 && backing->chunks[low - 1].end == start_page) {
+      backing->chunks[low - 1].end = end_page;
+
+      if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
+         backing->chunks[low - 1].end = backing->chunks[low].end;
+         memmove(&backing->chunks[low], &backing->chunks[low + 1],
+                 sizeof(*backing->chunks) * (backing->num_chunks - low - 1));
+         backing->num_chunks--;
+      }
+   } else if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
+      backing->chunks[low].begin = start_page;
+   } else {
+      if (backing->num_chunks >= backing->max_chunks) {
+         unsigned new_max_chunks = 2 * backing->max_chunks;
+         struct amdgpu_sparse_backing_chunk *new_chunks =
+            REALLOC(backing->chunks,
+                    sizeof(*backing->chunks) * backing->max_chunks,
+                    sizeof(*backing->chunks) * new_max_chunks);
+         if (!new_chunks)
+            return false;
+
+         backing->max_chunks = new_max_chunks;
+         backing->chunks = new_chunks;
+      }
+
+      memmove(&backing->chunks[low + 1], &backing->chunks[low],
+              sizeof(*backing->chunks) * (backing->num_chunks - low));
+      backing->chunks[low].begin = start_page;
+      backing->chunks[low].end = end_page;
+      backing->num_chunks++;
+   }
+
+   if (backing->num_chunks == 1 && backing->chunks[0].begin == 0 &&
+       backing->chunks[0].end == backing->bo->base.size / RADEON_SPARSE_PAGE_SIZE)
+      sparse_free_backing_buffer(backing);
+
+   return true;
+}
+
+/*
+ * Add a commitment structure and map its pages.
+ */
+static bool
+sparse_commitment_add(struct amdgpu_winsys_bo *bo,
+                      struct amdgpu_sparse_commitment *before,
+                      uint32_t page,
+                      struct amdgpu_sparse_backing *backing,
+                      uint32_t backing_start, uint32_t num_pages)
+{
+   struct amdgpu_sparse_commitment *comm;
+   int r;
+
+   assert(page + num_pages <= before->va_page_offset);
+   assert((uint64_t)(backing_start + num_pages) * RADEON_SPARSE_PAGE_SIZE <= backing->bo->base.size);
+
+   comm = CALLOC_STRUCT(amdgpu_sparse_commitment);
+   if (!comm)
+      return false;
+
+   comm->backing = backing;
+   comm->va_page_offset = page;
+   comm->backing_page_offset = backing_start;
+   comm->num_pages = num_pages;
+
+   r = amdgpu_bo_va_op(backing->bo->bo,
+                       (uint64_t)backing_start * RADEON_SPARSE_PAGE_SIZE,
+                       (uint64_t)num_pages * RADEON_SPARSE_PAGE_SIZE,
+                       bo->va + (uint64_t)page * RADEON_SPARSE_PAGE_SIZE,
+                       0, AMDGPU_VA_OP_MAP);
+   if (r) {
+      FREE(comm);
+      return false;
+   }
+
+   list_addtail(&comm->list, &before->list);
+   return true;
+}
+
+/*
+ * Remove (and free) a commitment structure and unmap it.
+ */
+static void
+sparse_commitment_remove(struct amdgpu_winsys_bo *bo,
+                         struct amdgpu_sparse_commitment *comm)
+{
+   int r;
+
+   r = amdgpu_bo_va_op(comm->backing->bo->bo,
+                       (uint64_t)comm->backing_page_offset * RADEON_SPARSE_PAGE_SIZE,
+                       (uint64_t)comm->num_pages * RADEON_SPARSE_PAGE_SIZE,
+                       bo->va + (uint64_t)comm->va_page_offset * RADEON_SPARSE_PAGE_SIZE,
+                       0, AMDGPU_VA_OP_UNMAP);
+   if (r) {
+      /* Ignore the failure. If it's really a problem, subsequent
+       * operations will fail.
+       */
+      fprintf(stderr, "amdgpu: PRT commitment unmap failed (%d)\n", r);
+   }
+
+   list_del(&comm->list);
+   FREE(comm);
+}
+
+static void amdgpu_bo_sparse_destroy(struct pb_buffer *_buf)
+{
+   struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
+   uint32_t page;
+   int r;
+
+   assert(!bo->bo && bo->sparse);
+
+   page = 0;
+
+   for (;;) {
+      struct amdgpu_sparse_commitment *comm =
+         list_first_entry(&bo->u.sparse.commitments.list, struct amdgpu_sparse_commitment, list);
+      uint32_t num_pages = comm->va_page_offset - page;
+
+      if (num_pages) {
+         r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0,
+                                 (uint64_t)num_pages * RADEON_SPARSE_PAGE_SIZE,
+                                 bo->va + (uint64_t)page * RADEON_SPARSE_PAGE_SIZE,
+                                 AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_UNMAP);
+         if (r) {
+            fprintf(stderr, "amdgpu: PRT hole unmap on destroy failed (%d)\n", r);
+         }
+      }
+
+      if (LIST_IS_EMPTY(&bo->u.sparse.commitments.list))
+         break;
+
+      page = comm->va_page_offset + comm->num_pages;
+      sparse_commitment_remove(bo, comm);
+   }
+
+   while (!list_empty(&bo->u.sparse.backing)) {
+      struct amdgpu_sparse_backing *dummy = NULL;
+      sparse_free_backing_buffer(container_of(bo->u.sparse.backing.next,
+                                              dummy, list));
+   }
+
+   amdgpu_va_range_free(bo->u.sparse.va_handle);
+   FREE(bo);
+}
+
+static const struct pb_vtbl amdgpu_winsys_bo_sparse_vtbl = {
+   amdgpu_bo_sparse_destroy
+   /* other functions are never called */
+};
+
+static struct pb_buffer *
+amdgpu_bo_sparse_create(struct amdgpu_winsys *ws, uint64_t size,
+                        enum radeon_bo_domain domain,
+                        enum radeon_bo_flag flags)
+{
+   struct amdgpu_winsys_bo *bo;
+   uint64_t map_size;
+   uint64_t va_gap_size;
+   int r;
+
+   /* We use 32-bit page numbers; refuse to attempt allocating sparse buffers
+    * that exceed this limit. This is not really a restriction: we don't have
+    * that much virtual address space anyway.
+    */
+   if (size > (uint64_t)INT32_MAX * RADEON_SPARSE_PAGE_SIZE)
+      return NULL;
+
+   bo = CALLOC_STRUCT(amdgpu_winsys_bo);
+   if (!bo)
+      return NULL;
+
+   pipe_reference_init(&bo->base.reference, 1);
+   bo->base.alignment = RADEON_SPARSE_PAGE_SIZE;
+   bo->base.size = size;
+   bo->base.vtbl = &amdgpu_winsys_bo_sparse_vtbl;
+   bo->ws = ws;
+   bo->initial_domain = domain;
+   bo->unique_id =  __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
+   bo->sparse = true;
+   bo->u.sparse.flags = flags & ~RADEON_FLAG_SPARSE;
+
+   pipe_mutex_init(bo->u.sparse.commit_lock);
+   LIST_INITHEAD(&bo->u.sparse.backing);
+   LIST_INITHEAD(&bo->u.sparse.commitments.list);
+   bo->u.sparse.commitments.va_page_offset = DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE);
+
+   /* For simplicity, we always map a multiple of the page size. */
+   map_size = align64(size, RADEON_SPARSE_PAGE_SIZE);
+   va_gap_size = ws->check_vm ? 4 * RADEON_SPARSE_PAGE_SIZE : 0;
+   r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
+                             map_size + va_gap_size, RADEON_SPARSE_PAGE_SIZE,
+                             0, &bo->va, &bo->u.sparse.va_handle, 0);
+   if (r)
+      goto error_va_alloc;
+
+   r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0, size, bo->va,
+                           AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);
+   if (r)
+      goto error_va_map;
+
+   return &bo->base;
+
+error_va_map:
+   amdgpu_va_range_free(bo->u.sparse.va_handle);
+error_va_alloc:
+   pipe_mutex_destroy(bo->u.sparse.commit_lock);
+   FREE(bo);
+   return NULL;
+}
+
+static bool
+amdgpu_bo_sparse_commit(struct pb_buffer *buf, uint64_t offset, uint64_t size,
+                        bool commit)
+{
+   struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buf);
+   struct amdgpu_sparse_commitment *comm;
+   uint32_t page, end_page;
+   bool ok = true;
+   int r;
+
+   assert(bo->sparse);
+   assert(offset % RADEON_SPARSE_PAGE_SIZE == 0);
+   assert(offset <= bo->base.size);
+   assert(size <= bo->base.size - offset);
+   assert(size % RADEON_SPARSE_PAGE_SIZE == 0 || offset + size == bo->base.size);
+
+   page = offset / RADEON_SPARSE_PAGE_SIZE;
+   end_page = page + DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE);
+
+   pipe_mutex_lock(bo->u.sparse.commit_lock);
+
+   /* Find the first existing commitment that we might overlap. */
+   LIST_FOR_EACH_ENTRY(comm, &bo->u.sparse.commitments.list, list) {
+      if (page < comm->va_page_offset + comm->num_pages)
+         break;
+   }
+
+   if (commit) {
+      /* Handle the case where we start in the middle of an existing commitment. */
+      if (comm->va_page_offset <= page) {
+         page = MIN2(end_page, comm->va_page_offset + comm->num_pages);
+         comm = container_of(comm->list.next, comm, list);
+      }
+
+      /* Loop invariant: page <= comm->va_page_offset (if idx in range). */
+      while (page < end_page) {
+         /* Remove or split the PRT hole mapping. */
+         if (page < comm->va_page_offset) {
+            struct amdgpu_sparse_commitment *prev =
+               LIST_ENTRY(struct amdgpu_sparse_commitment, comm->list.prev, list);
+            uint32_t hole_start;
+
+            if (prev != &bo->u.sparse.commitments)
+               hole_start = prev->va_page_offset + prev->num_pages;
+            else
+               hole_start = 0;
+
+            r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0,
+                                    (uint64_t)(comm->va_page_offset - hole_start) * RADEON_SPARSE_PAGE_SIZE,
+                                    bo->va + (uint64_t)hole_start * RADEON_SPARSE_PAGE_SIZE,
+                                    AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_UNMAP);
+            if (r) {
+               /* Ignore the failure. If it's really a problem, subsequent
+                * operations will fail.
+                */
+               fprintf(stderr, "amdgpu: PRT hole unmap failed (%d)\n", r);
+            }
+
+            if (hole_start < page) {
+               r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0,
+                                       (uint64_t)(page - hole_start) * RADEON_SPARSE_PAGE_SIZE,
+                                       bo->va + (uint64_t)hole_start * RADEON_SPARSE_PAGE_SIZE,
+                                       AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);
+               if (r) {
+                  ok = false;
+                  goto out;
+               }
+            }
+         }
+
+         /* Fill the hole in. */
+         while (page < MIN2(end_page, comm->va_page_offset)) {
+            struct amdgpu_sparse_backing *backing;
+            uint32_t backing_start, backing_size;
+
+            backing_size = MIN2(end_page, comm->va_page_offset) - page;
+
+            backing = sparse_backing_alloc(bo, &backing_start, &backing_size);
+            if (!backing) {
+               ok = false;
+               goto out;
+            }
+
+            if (!sparse_commitment_add(bo, comm, page, backing, backing_start, backing_size)) {
+               sparse_backing_free(bo, backing, backing_start, backing_size);
+               ok = false;
+               goto out;
+            }
+
+            page += backing_size;
+         }
+
+         if (page >= end_page) {
+            /* We're done, except we may have to re-establish a PRT hole
+             * mapping.
+             */
+            if (page < comm->va_page_offset) {
+               r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0,
+                                       (uint64_t)(comm->va_page_offset - page) * RADEON_SPARSE_PAGE_SIZE,
+                                       bo->va + (uint64_t)page * RADEON_SPARSE_PAGE_SIZE,
+                                       AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);
+               if (r) {
+                  fprintf(stderr, "amdgpu: failed to re-establish PRT hole (%d)\n", r);
+                  ok = false;
+                  goto out;
+               }
+            }
+
+            break;
+         }
+
+         /* Skip the existing commitment. */
+         assert(page == comm->va_page_offset);
+         page = comm->va_page_offset + comm->num_pages;
+         comm = container_of(comm->list.next, comm, list);
+      }
+   } else {
+      /* Remove or split existing mappings and ensure contiguous PRT hole
+       * mappings.
+       */
+      uint32_t start_page = page;
+
+      if (end_page <= comm->va_page_offset)
+         goto out;
+
+      if (page <= comm->va_page_offset) {
+         /* We may have to merge with a preceding PRT hole. */
+         struct amdgpu_sparse_commitment *prev =
+            LIST_ENTRY(struct amdgpu_sparse_commitment, comm->list.prev, list);
+
+         if (prev == &bo->u.sparse.commitments)
+            start_page = 0;
+         else
+            start_page = prev->va_page_offset + prev->num_pages;
+
+         if (start_page < page) {
+            r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0,
+                                    (uint64_t)(comm->va_page_offset - start_page) * RADEON_SPARSE_PAGE_SIZE,
+                                    bo->va + (uint64_t)start_page * RADEON_SPARSE_PAGE_SIZE,
+                                    AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_UNMAP);
+            if (r) {
+               /* Ignore the failure. If it's really a problem, subsequent
+                * operations will fail.
+                */
+               fprintf(stderr, "amdgpu: PRT first hole unmap for merge failed (%d)\n", r);
+            }
+         }
+
+         page = comm->va_page_offset;
+      }
+
+      while (page < end_page) {
+         struct amdgpu_sparse_commitment *cur = comm;
+         struct amdgpu_sparse_backing *backing;
+         uint32_t va_page_offset, va_page_end;
+         uint32_t backing_page_offset, backing_size;
+         uint32_t free_start_offset, free_end_offset;
+
+         assert(comm->va_page_offset <= page);
+         assert(page < comm->va_page_offset + comm->num_pages);
+
+         backing = comm->backing;
+         va_page_offset = comm->va_page_offset;
+         backing_page_offset = comm->backing_page_offset;
+         backing_size = comm->num_pages;
+         va_page_end = va_page_offset + backing_size;
+
+         comm = container_of(comm->list.next, comm, list);
+         sparse_commitment_remove(bo, cur);
+
+         free_start_offset = page - va_page_offset;
+         free_end_offset = MIN2(end_page - va_page_offset, backing_size);
+
+         if (free_start_offset > 0) {
+            if (!sparse_commitment_add(bo, comm, va_page_offset, backing,
+                                       backing_page_offset, free_start_offset)) {
+               ok = false;
+               goto out;
+            }
+         }
+
+         if (free_end_offset < backing_size) {
+            if (!sparse_commitment_add(bo, comm, va_page_offset + free_end_offset,
+                                       backing, backing_page_offset + free_end_offset,
+                                       backing_size - free_end_offset)) {
+               ok = false;
+               goto out;
+            }
+         }
+
+         if (!sparse_backing_free(bo, backing,
+                                  backing_page_offset + free_start_offset,
+                                  free_end_offset - free_start_offset)) {
+            ok = false;
+            goto out;
+         }
+
+         if (end_page < va_page_end) {
+            page = end_page;
+         } else {
+            page = comm->va_page_offset;
+
+            r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0,
+                                    (uint64_t)(comm->va_page_offset - va_page_end) * RADEON_SPARSE_PAGE_SIZE,
+                                    bo->va + (uint64_t)va_page_end * RADEON_SPARSE_PAGE_SIZE,
+                                    AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_UNMAP);
+            if (r) {
+               /* Ignore the failure. If it's really a problem, subsequent
+                * operations will fail.
+                */
+               fprintf(stderr, "amdgpu: PRT hole unmap for merge failed (%d)\n", r);
+            }
+         }
+      }
+
+      /* Re-create a hole mapping. */
+      r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0,
+                              (uint64_t)(page - start_page) * RADEON_SPARSE_PAGE_SIZE,
+                              bo->va + (uint64_t)start_page * RADEON_SPARSE_PAGE_SIZE,
+                              AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);
+      if (r) {
+         fprintf(stderr, "amdgpu: failed to create PRT hole (%d)\n", r);
+         ok = false;
+         goto out;
+      }
+   }
+out:
+
+   pipe_mutex_unlock(bo->u.sparse.commit_lock);
+
+   return ok;
+}
+
 static unsigned eg_tile_split(unsigned tile_split)
 {
    switch (tile_split) {
@@ -688,7 +1257,7 @@ amdgpu_bo_create(struct radeon_winsys *rws,
    unsigned usage = 0, pb_cache_bucket;
 
    /* Sub-allocate small buffers from slabs. */
-   if (!(flags & RADEON_FLAG_HANDLE) &&
+   if (!(flags & (RADEON_FLAG_HANDLE | RADEON_FLAG_SPARSE)) &&
        size <= (1 << AMDGPU_SLAB_MAX_SIZE_LOG2) &&
        alignment <= MAX2(1 << AMDGPU_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
       struct pb_slab_entry *entry;
@@ -734,6 +1303,15 @@ amdgpu_bo_create(struct radeon_winsys *rws,
    }
 no_slab:
 
+   if (flags & RADEON_FLAG_SPARSE) {
+      assert(RADEON_SPARSE_PAGE_SIZE % alignment == 0);
+      assert(!(flags & RADEON_FLAG_CPU_ACCESS));
+
+      flags |= RADEON_FLAG_NO_CPU_ACCESS;
+
+      return amdgpu_bo_sparse_create(ws, size, domain, flags);
+   }
+
    /* This flag is irrelevant for the cache. */
    flags &= ~RADEON_FLAG_HANDLE;
 
@@ -995,6 +1573,7 @@ void amdgpu_bo_init_functions(struct amdgpu_winsys *ws)
    ws->base.buffer_from_ptr = amdgpu_bo_from_ptr;
    ws->base.buffer_is_user_ptr = amdgpu_bo_is_user_ptr;
    ws->base.buffer_get_handle = amdgpu_bo_get_handle;
+   ws->base.buffer_commit = amdgpu_bo_sparse_commit;
    ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
    ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
 }
-- 
2.9.3