[Mesa-dev] [PATCH 08/18] anv/allocator: Drop the block_size field from block_pool

Wed May 3 14:44:23 UTC 2017

Reviewed-by: Juan A. Suarez Romero <jasuarez at igalia.com>

On Wed, 2017-04-26 at 07:35 -0700, Jason Ekstrand wrote:
> Since the state_stream is now pulling from a state_pool, the only thing
> pulling directly off the block pool is the state pool so we can just
> move the block_size there.  The one exception is when we allocate
> binding tables but we can just reference the state pool there as well.
> 
> The only functional change here is that we no longer grow the block pool
> immediately upon creation so no BO gets allocated until our first state
> allocation.
> ---
>  src/intel/vulkan/anv_allocator.c                   | 66 +++++++++++++---------
>  src/intel/vulkan/anv_batch_chain.c                 |  6 +-
>  src/intel/vulkan/anv_blorp.c                       |  2 +-
>  src/intel/vulkan/anv_device.c                      | 15 +++--
>  src/intel/vulkan/anv_private.h                     | 18 ++++--
>  src/intel/vulkan/tests/block_pool_no_free.c        |  7 ++-
>  src/intel/vulkan/tests/state_pool.c                |  4 +-
>  src/intel/vulkan/tests/state_pool_free_list_only.c |  2 +-
>  src/intel/vulkan/tests/state_pool_no_free.c        |  4 +-
>  9 files changed, 74 insertions(+), 50 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
> index 183d2cb..b576101 100644
> --- a/src/intel/vulkan/anv_allocator.c
> +++ b/src/intel/vulkan/anv_allocator.c
> @@ -243,20 +243,19 @@ anv_ptr_free_list_push(void **list, void *elem)
>     } while (old != current);
>  }
>  
> -static uint32_t
> -anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state);
> +static VkResult
> +anv_block_pool_expand_range(struct anv_block_pool *pool,
> +                            uint32_t center_bo_offset, uint32_t size);
>  
>  VkResult
>  anv_block_pool_init(struct anv_block_pool *pool,
> -                    struct anv_device *device, uint32_t block_size)
> +                    struct anv_device *device,
> +                    uint32_t initial_size)
>  {
>     VkResult result;
>  
> -   assert(util_is_power_of_two(block_size));
> -
>     pool->device = device;
>     anv_bo_init(&pool->bo, 0, 0);
> -   pool->block_size = block_size;
>     pool->free_list = ANV_FREE_LIST_EMPTY;
>     pool->back_free_list = ANV_FREE_LIST_EMPTY;
>  
> @@ -285,11 +284,14 @@ anv_block_pool_init(struct anv_block_pool *pool,
>     pool->back_state.next = 0;
>     pool->back_state.end = 0;
>  
> -   /* Immediately grow the pool so we'll have a backing bo. */
> -   pool->state.end = anv_block_pool_grow(pool, &pool->state);
> +   result = anv_block_pool_expand_range(pool, 0, initial_size);
> +   if (result != VK_SUCCESS)
> +      goto fail_mmap_cleanups;
>  
>     return VK_SUCCESS;
>  
> + fail_mmap_cleanups:
> +   u_vector_finish(&pool->mmap_cleanups);
>   fail_fd:
>     close(pool->fd);
>  
> @@ -430,7 +432,8 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
>   *     the pool and a 4K CPU page.
>   */
>  static uint32_t
> -anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state)
> +anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state,
> +                    uint32_t block_size)
>  {
>     uint32_t size;
>     VkResult result = VK_SUCCESS;
> @@ -469,7 +472,7 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state)
>  
>     if (old_size == 0) {
>        /* This is the first allocation */
> -      size = MAX2(32 * pool->block_size, PAGE_SIZE);
> +      size = MAX2(32 * block_size, PAGE_SIZE);
>     } else {
>        size = old_size * 2;
>     }
> @@ -498,7 +501,7 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state)
>        center_bo_offset = ((uint64_t)size * back_used) / total_used;
>  
>        /* Align down to a multiple of both the block size and page size */
> -      uint32_t granularity = MAX2(pool->block_size, PAGE_SIZE);
> +      uint32_t granularity = MAX2(block_size, PAGE_SIZE);
>        assert(util_is_power_of_two(granularity));
>        center_bo_offset &= ~(granularity - 1);
>  
> @@ -513,7 +516,7 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state)
>           center_bo_offset = size - pool->state.end;
>     }
>  
> -   assert(center_bo_offset % pool->block_size == 0);
> +   assert(center_bo_offset % block_size == 0);
>     assert(center_bo_offset % PAGE_SIZE == 0);
>  
>     result = anv_block_pool_expand_range(pool, center_bo_offset, size);
> @@ -539,12 +542,15 @@ done:
>  
>  static uint32_t
>  anv_block_pool_alloc_new(struct anv_block_pool *pool,
> -                         struct anv_block_state *pool_state)
> +                         struct anv_block_state *pool_state,
> +                         uint32_t block_size)
>  {
>     struct anv_block_state state, old, new;
>  
> +   assert(util_is_power_of_two(block_size));
> +
>     while (1) {
> -      state.u64 = __sync_fetch_and_add(&pool_state->u64, pool->block_size);
> +      state.u64 = __sync_fetch_and_add(&pool_state->u64, block_size);
>        if (state.next < state.end) {
>           assert(pool->map);
>           return state.next;
> @@ -553,9 +559,8 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool,
>            * pool_state->next acts a mutex: threads who try to allocate now will
>            * get block indexes above the current limit and hit futex_wait
>            * below. */
> -         new.next = state.next + pool->block_size;
> -         new.end = anv_block_pool_grow(pool, pool_state);
> -         assert(new.end >= new.next && new.end % pool->block_size == 0);
> +         new.next = state.next + block_size;
> +         new.end = anv_block_pool_grow(pool, pool_state, block_size);
>           old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64);
>           if (old.next != state.next)
>              futex_wake(&pool_state->end, INT_MAX);
> @@ -568,7 +573,8 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool,
>  }
>  
>  int32_t
> -anv_block_pool_alloc(struct anv_block_pool *pool)
> +anv_block_pool_alloc(struct anv_block_pool *pool,
> +                     uint32_t block_size)
>  {
>     int32_t offset;
>  
> @@ -579,7 +585,7 @@ anv_block_pool_alloc(struct anv_block_pool *pool)
>        return offset;
>     }
>  
> -   return anv_block_pool_alloc_new(pool, &pool->state);
> +   return anv_block_pool_alloc_new(pool, &pool->state, block_size);
>  }
>  
>  /* Allocates a block out of the back of the block pool.
> @@ -592,7 +598,8 @@ anv_block_pool_alloc(struct anv_block_pool *pool)
>   * gymnastics with the block pool's BO when doing relocations.
>   */
>  int32_t
> -anv_block_pool_alloc_back(struct anv_block_pool *pool)
> +anv_block_pool_alloc_back(struct anv_block_pool *pool,
> +                          uint32_t block_size)
>  {
>     int32_t offset;
>  
> @@ -603,7 +610,7 @@ anv_block_pool_alloc_back(struct anv_block_pool *pool)
>        return offset;
>     }
>  
> -   offset = anv_block_pool_alloc_new(pool, &pool->back_state);
> +   offset = anv_block_pool_alloc_new(pool, &pool->back_state, block_size);
>  
>     /* The offset we get out of anv_block_pool_alloc_new() is actually the
>      * number of bytes downwards from the middle to the end of the block.
> @@ -611,7 +618,7 @@ anv_block_pool_alloc_back(struct anv_block_pool *pool)
>      * start of the block.
>      */
>     assert(offset >= 0);
> -   return -(offset + pool->block_size);
> +   return -(offset + block_size);
>  }
>  
>  void
> @@ -626,9 +633,12 @@ anv_block_pool_free(struct anv_block_pool *pool, int32_t offset)
>  
>  void
>  anv_state_pool_init(struct anv_state_pool *pool,
> -                    struct anv_block_pool *block_pool)
> +                    struct anv_block_pool *block_pool,
> +                    uint32_t block_size)
>  {
>     pool->block_pool = block_pool;
> +   assert(util_is_power_of_two(block_size));
> +   pool->block_size = block_size;
>     for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) {
>        pool->buckets[i].free_list = ANV_FREE_LIST_EMPTY;
>        pool->buckets[i].block.next = 0;
> @@ -646,7 +656,8 @@ anv_state_pool_finish(struct anv_state_pool *pool)
>  static uint32_t
>  anv_fixed_size_state_pool_alloc_new(struct anv_fixed_size_state_pool *pool,
>                                      struct anv_block_pool *block_pool,
> -                                    uint32_t state_size)
> +                                    uint32_t state_size,
> +                                    uint32_t block_size)
>  {
>     struct anv_block_state block, old, new;
>     uint32_t offset;
> @@ -657,9 +668,9 @@ anv_fixed_size_state_pool_alloc_new(struct anv_fixed_size_state_pool *pool,
>     if (block.next < block.end) {
>        return block.next;
>     } else if (block.next == block.end) {
> -      offset = anv_block_pool_alloc(block_pool);
> +      offset = anv_block_pool_alloc(block_pool, block_size);
>        new.next = offset + state_size;
> -      new.end = offset + block_pool->block_size;
> +      new.end = offset + block_size;
>        old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64);
>        if (old.next != block.next)
>           futex_wake(&pool->block.end, INT_MAX);
> @@ -692,7 +703,8 @@ anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
>  
>     state.offset = anv_fixed_size_state_pool_alloc_new(&pool->buckets[bucket],
>                                                        pool->block_pool,
> -                                                      state.alloc_size);
> +                                                      state.alloc_size,
> +                                                      pool->block_size);
>  
>  done:
>     state.map = pool->block_pool->map + state.offset;
> diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c
> index 5f0528f..28193c5 100644
> --- a/src/intel/vulkan/anv_batch_chain.c
> +++ b/src/intel/vulkan/anv_batch_chain.c
> @@ -623,12 +623,13 @@ anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
>  {
>     struct anv_block_pool *block_pool =
>         &cmd_buffer->device->surface_state_block_pool;
> +   struct anv_state_pool *state_pool = &cmd_buffer->device->surface_state_pool;
>     int32_t *bt_block = u_vector_head(&cmd_buffer->bt_blocks);
>     struct anv_state state;
>  
>     state.alloc_size = align_u32(entries * 4, 32);
>  
> -   if (cmd_buffer->bt_next + state.alloc_size > block_pool->block_size)
> +   if (cmd_buffer->bt_next + state.alloc_size > state_pool->block_size)
>        return (struct anv_state) { 0 };
>  
>     state.offset = cmd_buffer->bt_next;
> @@ -663,6 +664,7 @@ anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer)
>  {
>     struct anv_block_pool *block_pool =
>         &cmd_buffer->device->surface_state_block_pool;
> +   struct anv_state_pool *state_pool = &cmd_buffer->device->surface_state_pool;
>  
>     int32_t *offset = u_vector_add(&cmd_buffer->bt_blocks);
>     if (offset == NULL) {
> @@ -670,7 +672,7 @@ anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer)
>        return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
>     }
>  
> -   *offset = anv_block_pool_alloc_back(block_pool);
> +   *offset = anv_block_pool_alloc_back(block_pool, state_pool->block_size);
>     cmd_buffer->bt_next = 0;
>  
>     return VK_SUCCESS;
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index d17b73d..e3e9520 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -686,7 +686,7 @@ void anv_CmdUpdateBuffer(
>      * little data at the top to build its linked list.
>      */
>     const uint32_t max_update_size =
> -      cmd_buffer->device->dynamic_state_block_pool.block_size - 64;
> +      cmd_buffer->device->dynamic_state_pool.block_size - 64;
>  
>     assert(max_update_size < MAX_SURFACE_DIM * 4);
>  
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 8f4625b..a37ac3e 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -1074,28 +1074,31 @@ VkResult anv_CreateDevice(
>     anv_bo_pool_init(&device->batch_bo_pool, device);
>  
>     result = anv_block_pool_init(&device->dynamic_state_block_pool, device,
> -                                16384);
> +                                16384 * 16);
>     if (result != VK_SUCCESS)
>        goto fail_batch_bo_pool;
>  
>     anv_state_pool_init(&device->dynamic_state_pool,
> -                       &device->dynamic_state_block_pool);
> +                       &device->dynamic_state_block_pool,
> +                       16384);
>  
>     result = anv_block_pool_init(&device->instruction_block_pool, device,
> -                                1024 * 1024);
> +                                1024 * 1024 * 16);
>     if (result != VK_SUCCESS)
>        goto fail_dynamic_state_pool;
>  
>     anv_state_pool_init(&device->instruction_state_pool,
> -                       &device->instruction_block_pool);
> +                       &device->instruction_block_pool,
> +                       1024 * 1024);
>  
>     result = anv_block_pool_init(&device->surface_state_block_pool, device,
> -                                4096);
> +                                4096 * 16);
>     if (result != VK_SUCCESS)
>        goto fail_instruction_state_pool;
>  
>     anv_state_pool_init(&device->surface_state_pool,
> -                       &device->surface_state_block_pool);
> +                       &device->surface_state_block_pool,
> +                       4096);
>  
>     result = anv_bo_init_new(&device->workaround_bo, device, 1024);
>     if (result != VK_SUCCESS)
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index 9f30301..10657c1 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -461,8 +461,6 @@ struct anv_block_pool {
>      */
>     struct u_vector mmap_cleanups;
>  
> -   uint32_t block_size;
> -
>     union anv_free_list free_list;
>     struct anv_block_state state;
>  
> @@ -504,6 +502,10 @@ struct anv_fixed_size_state_pool {
>  
>  struct anv_state_pool {
>     struct anv_block_pool *block_pool;
> +
> +   /* The size of blocks which will be allocated from the block pool */
> +   uint32_t block_size;
> +
>     struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
>  };
>  
> @@ -555,13 +557,17 @@ anv_invalidate_range(void *start, size_t size)
>  }
>  
>  VkResult anv_block_pool_init(struct anv_block_pool *pool,
> -                             struct anv_device *device, uint32_t block_size);
> +                             struct anv_device *device,
> +                             uint32_t initial_size);
>  void anv_block_pool_finish(struct anv_block_pool *pool);
> -int32_t anv_block_pool_alloc(struct anv_block_pool *pool);
> -int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool);
> +int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
> +                             uint32_t block_size);
> +int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
> +                                  uint32_t block_size);
>  void anv_block_pool_free(struct anv_block_pool *pool, int32_t offset);
>  void anv_state_pool_init(struct anv_state_pool *pool,
> -                         struct anv_block_pool *block_pool);
> +                         struct anv_block_pool *block_pool,
> +                         uint32_t block_size);
>  void anv_state_pool_finish(struct anv_state_pool *pool);
>  struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
>                                        uint32_t state_size, uint32_t alignment);
> diff --git a/src/intel/vulkan/tests/block_pool_no_free.c b/src/intel/vulkan/tests/block_pool_no_free.c
> index 86d1a76..6e6fc68 100644
> --- a/src/intel/vulkan/tests/block_pool_no_free.c
> +++ b/src/intel/vulkan/tests/block_pool_no_free.c
> @@ -25,6 +25,7 @@
>  
>  #include "anv_private.h"
>  
> +#define BLOCK_SIZE 16
>  #define NUM_THREADS 16
>  #define BLOCKS_PER_THREAD 1024
>  #define NUM_RUNS 64
> @@ -44,13 +45,13 @@ static void *alloc_blocks(void *_job)
>     int32_t block, *data;
>  
>     for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) {
> -      block = anv_block_pool_alloc(job->pool);
> +      block = anv_block_pool_alloc(job->pool, BLOCK_SIZE);
>        data = job->pool->map + block;
>        *data = block;
>        assert(block >= 0);
>        job->blocks[i] = block;
>  
> -      block = anv_block_pool_alloc_back(job->pool);
> +      block = anv_block_pool_alloc_back(job->pool, BLOCK_SIZE);
>        data = job->pool->map + block;
>        *data = block;
>        assert(block < 0);
> @@ -111,7 +112,7 @@ static void run_test()
>     struct anv_block_pool pool;
>  
>     pthread_mutex_init(&device.mutex, NULL);
> -   anv_block_pool_init(&pool, &device, 16);
> +   anv_block_pool_init(&pool, &device, 4096);
>  
>     for (unsigned i = 0; i < NUM_THREADS; i++) {
>        jobs[i].pool = &pool;
> diff --git a/src/intel/vulkan/tests/state_pool.c b/src/intel/vulkan/tests/state_pool.c
> index 878ec19..d9d3fd3 100644
> --- a/src/intel/vulkan/tests/state_pool.c
> +++ b/src/intel/vulkan/tests/state_pool.c
> @@ -41,8 +41,8 @@ int main(int argc, char **argv)
>     pthread_mutex_init(&device.mutex, NULL);
>  
>     for (unsigned i = 0; i < NUM_RUNS; i++) {
> -      anv_block_pool_init(&block_pool, &device, 256);
> -      anv_state_pool_init(&state_pool, &block_pool);
> +      anv_block_pool_init(&block_pool, &device, 4096);
> +      anv_state_pool_init(&state_pool, &block_pool, 256);
>  
>        /* Grab one so a zero offset is impossible */
>        anv_state_pool_alloc(&state_pool, 16, 16);
> diff --git a/src/intel/vulkan/tests/state_pool_free_list_only.c b/src/intel/vulkan/tests/state_pool_free_list_only.c
> index 2f4eb47..2cba8e1 100644
> --- a/src/intel/vulkan/tests/state_pool_free_list_only.c
> +++ b/src/intel/vulkan/tests/state_pool_free_list_only.c
> @@ -39,7 +39,7 @@ int main(int argc, char **argv)
>  
>     pthread_mutex_init(&device.mutex, NULL);
>     anv_block_pool_init(&block_pool, &device, 4096);
> -   anv_state_pool_init(&state_pool, &block_pool);
> +   anv_state_pool_init(&state_pool, &block_pool, 4096);
>  
>     /* Grab one so a zero offset is impossible */
>     anv_state_pool_alloc(&state_pool, 16, 16);
> diff --git a/src/intel/vulkan/tests/state_pool_no_free.c b/src/intel/vulkan/tests/state_pool_no_free.c
> index 4b248c2..5afce05 100644
> --- a/src/intel/vulkan/tests/state_pool_no_free.c
> +++ b/src/intel/vulkan/tests/state_pool_no_free.c
> @@ -59,8 +59,8 @@ static void run_test()
>     struct anv_state_pool state_pool;
>  
>     pthread_mutex_init(&device.mutex, NULL);
> -   anv_block_pool_init(&block_pool, &device, 64);
> -   anv_state_pool_init(&state_pool, &block_pool);
> +   anv_block_pool_init(&block_pool, &device, 4096);
> +   anv_state_pool_init(&state_pool, &block_pool, 64);
>  
>     pthread_barrier_init(&barrier, NULL, NUM_THREADS);
>