[Mesa-dev] [RFC PATCH 12/14] anv/allocator: Rework chunk return to the state pool.

Tue Dec 11 23:31:13 UTC 2018

On Mon, Dec 10, 2018 at 11:10:02PM -0600, Jason Ekstrand wrote:
> 
> 
> On Mon, Dec 10, 2018 at 5:48 PM Rafael Antognolli <rafael.antognolli at intel.com>
> wrote:
> 
>     On Mon, Dec 10, 2018 at 04:56:40PM -0600, Jason Ekstrand wrote:
>     > On Fri, Dec 7, 2018 at 6:06 PM Rafael Antognolli <
>     rafael.antognolli at intel.com>
>     > wrote:
>     >
>     >     This commit tries to rework the code that split and returns chunks
>     back
>     >     to the state pool, while still keeping the same logic.
>     >
>     >     The original code would get a chunk larger than we need and split it
>     >     into pool->block_size. Then it would return all but the first one,
>     and
>     >     would split that first one into alloc_size chunks. Then it would keep
>     >     the first one (for the allocation), and return the others back to the
>     >     pool.
>     >
>     >     The new anv_state_pool_return_chunk() function will take a chunk
>     (with
>     >     the alloc_size part removed), and a small_size hint. It then splits
>     that
>     >     chunk into pool->block_size'd chunks, and if there's some space still
>     >     left, split that into small_size chunks. small_size in this case is
>     the
>     >     same size as alloc_size.
>     >
>     >     The idea is to keep the same logic, but make it in a way we can reuse
>     it
>     >     to return other chunks to the pool when we are growing the buffer.
>     >     ---
>     >      src/intel/vulkan/anv_allocator.c | 147
>     +++++++++++++++++++++----------
>     >      1 file changed, 102 insertions(+), 45 deletions(-)
>     >
>     >     diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/
>     >     anv_allocator.c
>     >     index 31258e38635..bddeb4a0fbd 100644
>     >     --- a/src/intel/vulkan/anv_allocator.c
>     >     +++ b/src/intel/vulkan/anv_allocator.c
>     >     @@ -994,6 +994,97 @@ anv_state_pool_get_bucket_size(uint32_t bucket)
>     >         return 1 << size_log2;
>     >      }
>     >
>     >     +/** Helper to create a chunk into the state table.
>     >     + *
>     >     + * It just creates 'count' entries into the state table and update
>     their
>     >     sizes,
>     >     + * offsets and maps, also pushing them as "free" states.
>     >     + */
>     >     +static void
>     >     +anv_state_pool_return_blocks(struct anv_state_pool *pool,
>     >     +                             uint32_t chunk_offset, uint32_t count,
>     >     +                             uint32_t block_size)
>     >     +{
>     >     +   if (count == 0)
>     >     +      return;
>     >     +
>     >     +   uint32_t st_idx = anv_state_table_add(&pool->table, count);
>     >     +   for (int i = 0; i < count; i++) {
>     >     +      /* update states that were added back to the state table */
>     >     +      struct anv_state *state_i = anv_state_table_get(&pool->table,
>     >     +                                                      st_idx + i);
>     >     +      state_i->alloc_size = block_size;
>     >     +      state_i->offset = chunk_offset + block_size * i;
>     >     +      struct anv_pool_map pool_map = anv_block_pool_map(&pool->
>     block_pool,
>     >     +                                                        state_i->
>     offset);
>     >     +      state_i->map = pool_map.map + pool_map.offset;
>     >     +   }
>     >     +
>     >     +   uint32_t block_bucket = anv_state_pool_get_bucket(block_size);
>     >     +   anv_state_table_push(&pool->buckets[block_bucket].free_list,
>     >     +                        &pool->table, st_idx, count);
>     >     +}
>     >     +
>     >     +static uint32_t
>     >     +calculate_divisor(uint32_t size)
>     >     +{
>     >     +   uint32_t bucket = anv_state_pool_get_bucket(size);
>     >     +
>     >     +   while (bucket >= 0) {
>     >     +      uint32_t bucket_size = anv_state_pool_get_bucket_size(bucket);
>     >     +      if (size % bucket_size == 0)
>     >     +         return bucket_size;
>     >     +   }
>     >     +
>     >     +   return 0;
>     >     +}
>     >     +
>     >     +/** Returns a chunk of memory back to the state pool.
>     >     + *
>     >     + * If small_size is zero, we split chunk_size into pool->
>     block_size'd
>     >     pieces,
>     >     + * and return those. If there's some remaining 'rest' space
>     (chunk_size is
>     >     not
>     >     + * divisble by pool->block_size), then we find a bucket size that is
>     a
>     >     divisor
>     >     + * of that rest, and split the 'rest' into that size, returning it
>     to the
>     >     pool.
>     >     + *
>     >     + * If small_size is non-zero, we use it in two different ways:
>     >     + *    * if it is larger than pool->block_size, we split the chunk
>     into
>     >     + *    small_size'd pieces, instead of pool->block_size'd ones.
>     >     + *    * we also use it as the desired size to split the 'rest' after
>     we
>     >     split
>     >     + *    the bigger size of the chunk into pool->block_size;
>     >
>     >
>     > This seems both overly complicated and not really what we want.  If I
>     have a
>     > block size of 8k and allocate a single 64-byte state and then a 8k state,
>     it
>     > will break my almost 8k of padding into 511 64-byte states and return
>     those
>     > which may be very wasteful if the next thing I do is allocate a 1K state.
> 
>     Good, this would definitely be a waste.
> 
>     > It also doesn't provide the current alignment guarantees that all states
>     are
>     > aligned to their size.  While the alignment guarantee doesn't matter for
>     most
>     > large states, it does matter for some of the smaller sizes.  Now that I
>     look at
>     > it in detail, it appears that the highest alignment we ever require is
>     64B and
>     > the smallest size we allow is 64B so maybe it just doesn't matter?
>     >
>     > Assuming we don't care about alignments, we could do something like this?
>     >
>     > if (small_size > 0) {
>     >    assert(chunk_size % small_size == 0);
>     >    anv_state_pool_return_blocks(pool, chunk_offset, chunk_size /
>     small_size,
>     > small_size);
>     > } else {
>     >    uint32_t divisor = MAX_STATE_SIZE;
>     >    while (divisor >= MIN_STATE_SIZE) {
>     >       uint32_t nblocks = chunk_size / divisor;
>     >       if (nblocks > 0) {
>     >          anv_state_pool_return_blocs(pool, chunk_offset, nblocks,
>     divisor);
>     >          chunk_offset += nblocks * divisor;
>     >          chunk_size -= nblocks * divisor;
>     >       }
>     >    }
>     > }
> 
>     The code above is indeed way simpler and it does seem to achieve
>     what we want for the "return padding" case.
> 
>     However, we also use this helper for returning blocks that we got from
>     the freelist, but were only partially used. For instance if we need to
>     allocate a state of size 64 bytes, and we have a block of 8KB in the
>     freelist, due to the snippet above (small_size == 64) we will end up
>     splitting it into 511 64-byte states too.
> 
>     Maybe (if we want to keep the old semantics), in the case where
>     small_size > 0, we have to do something like:
> 
>     if (small_size > 0) {
>        assert(chunk_size % small_size == 0);
>        if (chunk_size > pool->block_size) {
>           assert(chunk_size % pool->block_size == 0);
>           uint32_t nblocks = chunk_size / pool->block_size - 1;
>           anv_state_pool_return_blocks(pool, offset, nblocks, pool->
>     block_size);
>           chunk_size -= nblocks * pool->block_size;
>           offset += nblocks * pool->block_size;
>        }
>        anv_state_pool_return_blocks(pool, chunk_offset, chunk_size /
>     small_size, small_size);
>     } else {
>        ... your else clause here
>     }
> 
>     Maybe it's over complicating things again... what do you think?
> 
> 
> Maybe?  Or maybe we want to just keep the block_size semantics for everything
> and split padding into some small chunks as needed and then a bunch of
> block_size chunks.  If we did that, we'd have the same semantics for
> everything.

Just to be sure, how does something like this look:

 uint32_t divisor = MAX2(pool->block_size, small_size);
 uint32_t nblocks = chunk_size / divisor;
 uint32_t rest = chunk_size - nblocks * divisor;

 /* First return pool->block_size'd chunks.*/
 uint32_t offset = chunk_offset + rest;
 anv_state_pool_return_blocks(pool, offset, nblocks, divisor);

 chunk_size = rest;

 uint32_t b = anv_state_pool_get_bucket(divisor);
 while (chunk_size > 0 && b >= ANV_MIN_STATE_SIZE_LOG2) {
    divisor = anv_state_pool_get_bucket_size(b);
    nblocks = chunk_size / divisor;
    rest = chunk_size - nblocks * divisor;
    if (nblocks > 0) {
       anv_state_pool_return_blocks(pool, chunk_offset + rest,
                                    nblocks, divisor);
       chunk_size = rest;
    }
    b--;
 }

I'm also wondering if it's worth leaving the "small_size" portion of it,
and if so I probably need a better name for it.

> 

<snip>

> 
> 
> I think what I'd recommend is that you pull the first helper in this patch into
> it's own patch right before patch 2 and use it replace the two in-line copies
> here.  Then the API wouldn't look nearly as horrible and this could focus on
> the heuristic for dealing with padding (and maybe even be rolled into the
> padding patch).

Cool, done locally already, thanks!

> 
>     >     -            chunk_size = pool->block_size;
>     >     -         }
>     >     -
>     >     -         assert(chunk_size % alloc_size == 0);
>     >     -         uint32_t push_back = (chunk_size / alloc_size) - 1;
>     >     -         uint32_t st_idx = anv_state_table_add(&pool->table,
>     push_back);
>     >     -         for (int i = 0; i < push_back; i++) {
>     >     -            /* update states that were added back to the state table
>     */
>     >     -            struct anv_state *state_i = anv_state_table_get(&pool->
>     table,
>     >     -                                                            st_idx +
>     i);
>     >     -            state_i->alloc_size = alloc_size;
>     >     -            state_i->offset = chunk_offset + alloc_size * (i + 1);
>     >     -            struct anv_pool_map pool_map = anv_block_pool_map(&
>     pool->
>     >     block_pool,
>     >     -                                                             
>     state_i->
>     >     offset);
>     >     -            state_i->map = pool_map.map + pool_map.offset;
>     >     -         }
>     >     -         anv_state_table_push(&pool->buckets[bucket].free_list,
>     >     -                              &pool->table, st_idx, push_back);
>     >     -
>     >     -         offset = chunk_offset;
>     >     +         uint32_t return_offset = chunk_offset + alloc_size;
>     >     +         uint32_t return_size = chunk_size - alloc_size;
>     >     +         anv_state_pool_return_chunk(pool, return_offset,
>     >     +                                     return_size, alloc_size);
>     >               goto done;
>     >            }
>     >         }
>     >     --
>     >     2.17.1
>     >
>     >     _______________________________________________
>     >     mesa-dev mailing list
>     >     mesa-dev at lists.freedesktop.org
>     >     https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>     >
>