[Mesa-dev] [PATCH] anv: add support for allocating more than 1 block of memory

Mon Feb 27 12:48:28 UTC 2017

Current Anv allocator assign memory in terms of a fixed block size.

But there can be cases where this block is not enough for a memory
request, and thus several blocks must be assigned in a row.

This commit adds support for specifying how many blocks of memory must
be assigned.

This fixes a number dEQP-VK.pipeline.render_to_image.* tests that crash.
---
 src/intel/vulkan/anv_allocator.c   | 82 ++++++++++++++++++++++++--------------
 src/intel/vulkan/anv_batch_chain.c |  4 +-
 src/intel/vulkan/anv_private.h     |  8 +++-
 3 files changed, 61 insertions(+), 33 deletions(-)

diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index 45c663b..7e0f3d7 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -149,12 +149,14 @@ round_to_power_of_two(uint32_t value)
 }
 
 static bool
-anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset)
+anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset, uint32_t n_blocks)
 {
    union anv_free_list current, new, old;
 
+   assert(n_blocks > 0 && n_blocks < 256);
+
    current.u64 = list->u64;
-   while (current.offset != EMPTY) {
+   while (current.offset != EMPTY && n_blocks <= current.n_blocks) {
       /* We have to add a memory barrier here so that the list head (and
        * offset) gets read before we read the map pointer.  This way we
        * know that the map pointer is valid for the given offset at the
@@ -177,16 +179,19 @@ anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset)
 }
 
 static void
-anv_free_list_push(union anv_free_list *list, void *map, int32_t offset)
+anv_free_list_push(union anv_free_list *list, void *map, int32_t offset, uint32_t n_blocks)
 {
    union anv_free_list current, old, new;
    int32_t *next_ptr = map + offset;
 
+   assert(n_blocks > 0 && n_blocks < 256);
+
    old = *list;
    do {
       current = old;
       VG_NOACCESS_WRITE(next_ptr, current.offset);
       new.offset = offset;
+      new.n_blocks = n_blocks;
       new.count = current.count + 1;
       old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
    } while (old.u64 != current.u64);
@@ -500,30 +505,37 @@ fail:
 
 static uint32_t
 anv_block_pool_alloc_new(struct anv_block_pool *pool,
-                         struct anv_block_state *pool_state)
+                         struct anv_block_state *pool_state,
+                         uint32_t n_blocks)
 {
    struct anv_block_state state, old, new;
 
+   assert(n_blocks > 0 && n_blocks < 256);
+
    while (1) {
-      state.u64 = __sync_fetch_and_add(&pool_state->u64, pool->block_size);
-      if (state.next < state.end) {
+      state.u64 = __sync_fetch_and_add(&pool_state->u64, n_blocks * pool->block_size);
+      if (state.next > state.end) {
+         futex_wait(&pool_state->end, state.end);
+         continue;
+      } else if ((state.next + (n_blocks - 1) * pool->block_size) < state.end) {
          assert(pool->map);
          return state.next;
-      } else if (state.next == state.end) {
-         /* We allocated the first block outside the pool, we have to grow it.
-          * pool_state->next acts a mutex: threads who try to allocate now will
-          * get block indexes above the current limit and hit futex_wait
-          * below. */
-         new.next = state.next + pool->block_size;
+      } else {
+         /* We allocated the firsts blocks outside the pool, we have to grow
+          * it. pool_state->next acts a mutex: threads who try to allocate
+          * now will get block indexes above the current limit and hit
+          * futex_wait below.
+          */
+         new.next = state.next + n_blocks * pool->block_size;
          new.end = anv_block_pool_grow(pool, pool_state);
+         /* We assume that just growing once the pool is enough to fulfil the
+          * memory requirements
+          */
          assert(new.end >= new.next && new.end % pool->block_size == 0);
          old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64);
          if (old.next != state.next)
             futex_wake(&pool_state->end, INT_MAX);
          return state.next;
-      } else {
-         futex_wait(&pool_state->end, state.end);
-         continue;
       }
    }
 }
@@ -531,16 +543,21 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool,
 int32_t
 anv_block_pool_alloc(struct anv_block_pool *pool)
 {
+   return anv_block_pool_alloc_n(pool, 1);
+}
+
+int32_t
+anv_block_pool_alloc_n(struct anv_block_pool *pool, uint32_t n_blocks)
+{
    int32_t offset;
 
-   /* Try free list first. */
-   if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) {
+   if (anv_free_list_pop(&pool->free_list, &pool->map, &offset, n_blocks)) {
       assert(offset >= 0);
       assert(pool->map);
       return offset;
    }
 
-   return anv_block_pool_alloc_new(pool, &pool->state);
+   return anv_block_pool_alloc_new(pool, &pool->state, n_blocks);
 }
 
 /* Allocates a block out of the back of the block pool.
@@ -558,13 +575,13 @@ anv_block_pool_alloc_back(struct anv_block_pool *pool)
    int32_t offset;
 
    /* Try free list first. */
-   if (anv_free_list_pop(&pool->back_free_list, &pool->map, &offset)) {
+   if (anv_free_list_pop(&pool->back_free_list, &pool->map, &offset, 1)) {
       assert(offset < 0);
       assert(pool->map);
       return offset;
    }
 
-   offset = anv_block_pool_alloc_new(pool, &pool->back_state);
+   offset = anv_block_pool_alloc_new(pool, &pool->back_state, 1);
 
    /* The offset we get out of anv_block_pool_alloc_new() is actually the
     * number of bytes downwards from the middle to the end of the block.
@@ -576,12 +593,12 @@ anv_block_pool_alloc_back(struct anv_block_pool *pool)
 }
 
 void
-anv_block_pool_free(struct anv_block_pool *pool, int32_t offset)
+anv_block_pool_free(struct anv_block_pool *pool, int32_t offset, uint32_t n_blocks)
 {
    if (offset < 0) {
-      anv_free_list_push(&pool->back_free_list, pool->map, offset);
+      anv_free_list_push(&pool->back_free_list, pool->map, offset, n_blocks);
    } else {
-      anv_free_list_push(&pool->free_list, pool->map, offset);
+      anv_free_list_push(&pool->free_list, pool->map, offset, n_blocks);
    }
 }
 
@@ -606,7 +623,7 @@ anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool,
    struct anv_block_state block, old, new;
 
    /* Try free list first. */
-   if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset)) {
+   if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset, 1)) {
       assert(offset >= 0);
       return offset;
    }
@@ -637,7 +654,7 @@ anv_fixed_size_state_pool_free(struct anv_fixed_size_state_pool *pool,
                                struct anv_block_pool *block_pool,
                                uint32_t offset)
 {
-   anv_free_list_push(&pool->free_list, block_pool->map, offset);
+   anv_free_list_push(&pool->free_list, block_pool->map, offset, 1);
 }
 
 void
@@ -698,6 +715,9 @@ struct anv_state_stream_block {
    /* The offset into the block pool at which this block starts */
    uint32_t offset;
 
+   /* Blocks allocated */
+   uint32_t n_blocks;
+
 #ifdef HAVE_VALGRIND
    /* A pointer to the first user-allocated thing in this block.  This is
     * what valgrind sees as the start of the block.
@@ -736,7 +756,7 @@ anv_state_stream_finish(struct anv_state_stream *stream)
       struct anv_state_stream_block sb = VG_NOACCESS_READ(next);
       VG(VALGRIND_MEMPOOL_FREE(stream, sb._vg_ptr));
       VG(VALGRIND_MAKE_MEM_UNDEFINED(next, block_size));
-      anv_block_pool_free(stream->block_pool, sb.offset);
+      anv_block_pool_free(stream->block_pool, sb.offset, sb.n_blocks);
       next = sb.next;
    }
 
@@ -753,19 +773,23 @@ anv_state_stream_alloc(struct anv_state_stream *stream,
 
    state.offset = align_u32(stream->next, alignment);
    if (state.offset + size > stream->end) {
-      uint32_t block = anv_block_pool_alloc(stream->block_pool);
+      uint32_t n_blocks =
+         DIV_ROUND_UP(state.offset - stream->next + size, stream->block_pool->block_size);
+      uint32_t block = anv_block_pool_alloc_n(stream->block_pool, n_blocks);
+
       sb = stream->block_pool->map + block;
 
       VG(VALGRIND_MAKE_MEM_UNDEFINED(sb, sizeof(*sb)));
       sb->next = stream->block;
       sb->offset = block;
+      sb->n_blocks = n_blocks;
       VG(sb->_vg_ptr = NULL);
-      VG(VALGRIND_MAKE_MEM_NOACCESS(sb, stream->block_pool->block_size));
+      VG(VALGRIND_MAKE_MEM_NOACCESS(sb, n_blocks * stream->block_pool->block_size));
 
       stream->block = sb;
       stream->start = block;
       stream->next = block + sizeof(*sb);
-      stream->end = block + stream->block_pool->block_size;
+      stream->end = block + n_blocks * stream->block_pool->block_size;
 
       state.offset = align_u32(stream->next, alignment);
       assert(state.offset + size <= stream->end);
diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c
index 3f6039e..cc9d9d7 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -716,7 +716,7 @@ anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
    int32_t *bt_block;
    u_vector_foreach(bt_block, &cmd_buffer->bt_blocks) {
       anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool,
-                          *bt_block);
+                          *bt_block, 1);
    }
    u_vector_finish(&cmd_buffer->bt_blocks);
 
@@ -750,7 +750,7 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
    while (u_vector_length(&cmd_buffer->bt_blocks) > 1) {
       int32_t *bt_block = u_vector_remove(&cmd_buffer->bt_blocks);
       anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool,
-                          *bt_block);
+                          *bt_block, 1);
    }
    assert(u_vector_length(&cmd_buffer->bt_blocks) == 1);
    cmd_buffer->bt_next = 0;
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 2527c2c..50a8815 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -324,8 +324,11 @@ union anv_free_list {
    struct {
       int32_t offset;
 
+      /* How many blocks are free */
+      uint32_t n_blocks:8;
+
       /* A simple count that is incremented every time the head changes. */
-      uint32_t count;
+      uint32_t count:24;
    };
    uint64_t u64;
 };
@@ -465,8 +468,9 @@ VkResult anv_block_pool_init(struct anv_block_pool *pool,
                              struct anv_device *device, uint32_t block_size);
 void anv_block_pool_finish(struct anv_block_pool *pool);
 int32_t anv_block_pool_alloc(struct anv_block_pool *pool);
+int32_t anv_block_pool_alloc_n(struct anv_block_pool *pool, uint32_t n_blocks);
 int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool);
-void anv_block_pool_free(struct anv_block_pool *pool, int32_t offset);
+void anv_block_pool_free(struct anv_block_pool *pool, int32_t offset, uint32_t n_blocks);
 void anv_state_pool_init(struct anv_state_pool *pool,
                          struct anv_block_pool *block_pool);
 void anv_state_pool_finish(struct anv_state_pool *pool);
-- 
2.9.3