[Mesa-dev] [RFC PATCH 12/14] anv/allocator: Rework chunk return to the state pool.

Wed Dec 12 00:20:47 UTC 2018

On Tue, Dec 11, 2018 at 5:31 PM Rafael Antognolli <
rafael.antognolli at intel.com> wrote:

> On Mon, Dec 10, 2018 at 11:10:02PM -0600, Jason Ekstrand wrote:
> >
> >
> > On Mon, Dec 10, 2018 at 5:48 PM Rafael Antognolli <
> rafael.antognolli at intel.com>
> > wrote:
> >
> >     On Mon, Dec 10, 2018 at 04:56:40PM -0600, Jason Ekstrand wrote:
> >     > On Fri, Dec 7, 2018 at 6:06 PM Rafael Antognolli <
> >     rafael.antognolli at intel.com>
> >     > wrote:
> >     >
> >     >     This commit tries to rework the code that split and returns
> chunks
> >     back
> >     >     to the state pool, while still keeping the same logic.
> >     >
> >     >     The original code would get a chunk larger than we need and
> split it
> >     >     into pool->block_size. Then it would return all but the first
> one,
> >     and
> >     >     would split that first one into alloc_size chunks. Then it
> would keep
> >     >     the first one (for the allocation), and return the others back
> to the
> >     >     pool.
> >     >
> >     >     The new anv_state_pool_return_chunk() function will take a
> chunk
> >     (with
> >     >     the alloc_size part removed), and a small_size hint. It then
> splits
> >     that
> >     >     chunk into pool->block_size'd chunks, and if there's some
> space still
> >     >     left, split that into small_size chunks. small_size in this
> case is
> >     the
> >     >     same size as alloc_size.
> >     >
> >     >     The idea is to keep the same logic, but make it in a way we
> can reuse
> >     it
> >     >     to return other chunks to the pool when we are growing the
> buffer.
> >     >     ---
> >     >      src/intel/vulkan/anv_allocator.c | 147
> >     +++++++++++++++++++++----------
> >     >      1 file changed, 102 insertions(+), 45 deletions(-)
> >     >
> >     >     diff --git a/src/intel/vulkan/anv_allocator.c
> b/src/intel/vulkan/
> >     >     anv_allocator.c
> >     >     index 31258e38635..bddeb4a0fbd 100644
> >     >     --- a/src/intel/vulkan/anv_allocator.c
> >     >     +++ b/src/intel/vulkan/anv_allocator.c
> >     >     @@ -994,6 +994,97 @@ anv_state_pool_get_bucket_size(uint32_t
> bucket)
> >     >         return 1 << size_log2;
> >     >      }
> >     >
> >     >     +/** Helper to create a chunk into the state table.
> >     >     + *
> >     >     + * It just creates 'count' entries into the state table and
> update
> >     their
> >     >     sizes,
> >     >     + * offsets and maps, also pushing them as "free" states.
> >     >     + */
> >     >     +static void
> >     >     +anv_state_pool_return_blocks(struct anv_state_pool *pool,
> >     >     +                             uint32_t chunk_offset, uint32_t
> count,
> >     >     +                             uint32_t block_size)
> >     >     +{
> >     >     +   if (count == 0)
> >     >     +      return;
> >     >     +
> >     >     +   uint32_t st_idx = anv_state_table_add(&pool->table, count);
> >     >     +   for (int i = 0; i < count; i++) {
> >     >     +      /* update states that were added back to the state
> table */
> >     >     +      struct anv_state *state_i =
> anv_state_table_get(&pool->table,
> >     >     +                                                      st_idx
> + i);
> >     >     +      state_i->alloc_size = block_size;
> >     >     +      state_i->offset = chunk_offset + block_size * i;
> >     >     +      struct anv_pool_map pool_map =
> anv_block_pool_map(&pool->
> >     block_pool,
> >     >     +
> state_i->
> >     offset);
> >     >     +      state_i->map = pool_map.map + pool_map.offset;
> >     >     +   }
> >     >     +
> >     >     +   uint32_t block_bucket =
> anv_state_pool_get_bucket(block_size);
> >     >     +
>  anv_state_table_push(&pool->buckets[block_bucket].free_list,
> >     >     +                        &pool->table, st_idx, count);
> >     >     +}
> >     >     +
> >     >     +static uint32_t
> >     >     +calculate_divisor(uint32_t size)
> >     >     +{
> >     >     +   uint32_t bucket = anv_state_pool_get_bucket(size);
> >     >     +
> >     >     +   while (bucket >= 0) {
> >     >     +      uint32_t bucket_size =
> anv_state_pool_get_bucket_size(bucket);
> >     >     +      if (size % bucket_size == 0)
> >     >     +         return bucket_size;
> >     >     +   }
> >     >     +
> >     >     +   return 0;
> >     >     +}
> >     >     +
> >     >     +/** Returns a chunk of memory back to the state pool.
> >     >     + *
> >     >     + * If small_size is zero, we split chunk_size into pool->
> >     block_size'd
> >     >     pieces,
> >     >     + * and return those. If there's some remaining 'rest' space
> >     (chunk_size is
> >     >     not
> >     >     + * divisble by pool->block_size), then we find a bucket size
> that is
> >     a
> >     >     divisor
> >     >     + * of that rest, and split the 'rest' into that size,
> returning it
> >     to the
> >     >     pool.
> >     >     + *
> >     >     + * If small_size is non-zero, we use it in two different ways:
> >     >     + *    * if it is larger than pool->block_size, we split the
> chunk
> >     into
> >     >     + *    small_size'd pieces, instead of pool->block_size'd ones.
> >     >     + *    * we also use it as the desired size to split the
> 'rest' after
> >     we
> >     >     split
> >     >     + *    the bigger size of the chunk into pool->block_size;
> >     >
> >     >
> >     > This seems both overly complicated and not really what we want.
> If I
> >     have a
> >     > block size of 8k and allocate a single 64-byte state and then a 8k
> state,
> >     it
> >     > will break my almost 8k of padding into 511 64-byte states and
> return
> >     those
> >     > which may be very wasteful if the next thing I do is allocate a 1K
> state.
> >
> >     Good, this would definitely be a waste.
> >
> >     > It also doesn't provide the current alignment guarantees that all
> states
> >     are
> >     > aligned to their size.  While the alignment guarantee doesn't
> matter for
> >     most
> >     > large states, it does matter for some of the smaller sizes.  Now
> that I
> >     look at
> >     > it in detail, it appears that the highest alignment we ever
> require is
> >     64B and
> >     > the smallest size we allow is 64B so maybe it just doesn't matter?
> >     >
> >     > Assuming we don't care about alignments, we could do something
> like this?
> >     >
> >     > if (small_size > 0) {
> >     >    assert(chunk_size % small_size == 0);
> >     >    anv_state_pool_return_blocks(pool, chunk_offset, chunk_size /
> >     small_size,
> >     > small_size);
> >     > } else {
> >     >    uint32_t divisor = MAX_STATE_SIZE;
> >     >    while (divisor >= MIN_STATE_SIZE) {
> >     >       uint32_t nblocks = chunk_size / divisor;
> >     >       if (nblocks > 0) {
> >     >          anv_state_pool_return_blocs(pool, chunk_offset, nblocks,
> >     divisor);
> >     >          chunk_offset += nblocks * divisor;
> >     >          chunk_size -= nblocks * divisor;
> >     >       }
> >     >    }
> >     > }
> >
> >     The code above is indeed way simpler and it does seem to achieve
> >     what we want for the "return padding" case.
> >
> >     However, we also use this helper for returning blocks that we got
> from
> >     the freelist, but were only partially used. For instance if we need
> to
> >     allocate a state of size 64 bytes, and we have a block of 8KB in the
> >     freelist, due to the snippet above (small_size == 64) we will end up
> >     splitting it into 511 64-byte states too.
> >
> >     Maybe (if we want to keep the old semantics), in the case where
> >     small_size > 0, we have to do something like:
> >
> >     if (small_size > 0) {
> >        assert(chunk_size % small_size == 0);
> >        if (chunk_size > pool->block_size) {
> >           assert(chunk_size % pool->block_size == 0);
> >           uint32_t nblocks = chunk_size / pool->block_size - 1;
> >           anv_state_pool_return_blocks(pool, offset, nblocks, pool->
> >     block_size);
> >           chunk_size -= nblocks * pool->block_size;
> >           offset += nblocks * pool->block_size;
> >        }
> >        anv_state_pool_return_blocks(pool, chunk_offset, chunk_size /
> >     small_size, small_size);
> >     } else {
> >        ... your else clause here
> >     }
> >
> >     Maybe it's over complicating things again... what do you think?
> >
> >
> > Maybe?  Or maybe we want to just keep the block_size semantics for
> everything
> > and split padding into some small chunks as needed and then a bunch of
> > block_size chunks.  If we did that, we'd have the same semantics for
> > everything.
>
> Just to be sure, how does something like this look:
>
>  uint32_t divisor = MAX2(pool->block_size, small_size);
>  uint32_t nblocks = chunk_size / divisor;
>  uint32_t rest = chunk_size - nblocks * divisor;
>
>  /* First return pool->block_size'd chunks.*/
>  uint32_t offset = chunk_offset + rest;
>  anv_state_pool_return_blocks(pool, offset, nblocks, divisor);
>

oh.... That's clever.  We can't assume that the start is aligned but we can
always assume that the end is aligned.  I like it!  Probably deserves a
comment and an assert though. :-)

>
>  chunk_size = rest;
>
>  uint32_t b = anv_state_pool_get_bucket(divisor);
>

I think we want divisor to start off at small_size here if small_size > 0
&& small_size < bucket_size.

>  while (chunk_size > 0 && b >= ANV_MIN_STATE_SIZE_LOG2) {
>     divisor = anv_state_pool_get_bucket_size(b);
>     nblocks = chunk_size / divisor;
>     rest = chunk_size - nblocks * divisor;
>     if (nblocks > 0) {
>        anv_state_pool_return_blocks(pool, chunk_offset + rest,
>                                     nblocks, divisor);
>        chunk_size = rest;
>     }
>     b--;
>  }
>
> I'm also wondering if it's worth leaving the "small_size" portion of it,
> and if so I probably need a better name for it.
>

Yeah, I think this is more-or-less what we want.  Not sure if I like
decrementing b or just doing "divisor /= 2" better; probably doesn't matter.

--Jason

> >
>
> <snip>
>
> >
> >
> > I think what I'd recommend is that you pull the first helper in this
> patch into
> > it's own patch right before patch 2 and use it replace the two in-line
> copies
> > here.  Then the API wouldn't look nearly as horrible and this could
> focus on
> > the heuristic for dealing with padding (and maybe even be rolled into the
> > padding patch).
>
> Cool, done locally already, thanks!
>
> >
> >     >     -            chunk_size = pool->block_size;
> >     >     -         }
> >     >     -
> >     >     -         assert(chunk_size % alloc_size == 0);
> >     >     -         uint32_t push_back = (chunk_size / alloc_size) - 1;
> >     >     -         uint32_t st_idx = anv_state_table_add(&pool->table,
> >     push_back);
> >     >     -         for (int i = 0; i < push_back; i++) {
> >     >     -            /* update states that were added back to the
> state table
> >     */
> >     >     -            struct anv_state *state_i =
> anv_state_table_get(&pool->
> >     table,
> >     >     -
> st_idx +
> >     i);
> >     >     -            state_i->alloc_size = alloc_size;
> >     >     -            state_i->offset = chunk_offset + alloc_size * (i
> + 1);
> >     >     -            struct anv_pool_map pool_map =
> anv_block_pool_map(&
> >     pool->
> >     >     block_pool,
> >     >     -
> >     state_i->
> >     >     offset);
> >     >     -            state_i->map = pool_map.map + pool_map.offset;
> >     >     -         }
> >     >     -
>  anv_state_table_push(&pool->buckets[bucket].free_list,
> >     >     -                              &pool->table, st_idx,
> push_back);
> >     >     -
> >     >     -         offset = chunk_offset;
> >     >     +         uint32_t return_offset = chunk_offset + alloc_size;
> >     >     +         uint32_t return_size = chunk_size - alloc_size;
> >     >     +         anv_state_pool_return_chunk(pool, return_offset,
> >     >     +                                     return_size, alloc_size);
> >     >               goto done;
> >     >            }
> >     >         }
> >     >     --
> >     >     2.17.1
> >     >
> >     >     _______________________________________________
> >     >     mesa-dev mailing list
> >     >     mesa-dev at lists.freedesktop.org
> >     >     https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >     >
> >
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20181211/f50ed066/attachment-0001.html>