[Mesa-dev] [PATCH v3] anv: add support for allocating more than 1 block of memory
Juan A. Suarez Romero
jasuarez at igalia.com
Fri Apr 21 08:17:53 UTC 2017
Current Anv allocator assign memory in terms of a fixed block size.
But there can be cases where this block is not enough for a memory
request, and thus several blocks must be assigned in a row.
This commit adds support for specifying how many blocks of memory must
be assigned.
This fixes a number dEQP-VK.pipeline.render_to_image.* tests that crash.
v2: lock-free free-list is not handled correctly (Jason)
v3: use power-of-2 allocation instead of linear (Jason)
---
src/intel/vulkan/anv_allocator.c | 99 ++++++++++++++++++++++++++++----------
src/intel/vulkan/anv_batch_chain.c | 10 ++--
src/intel/vulkan/anv_private.h | 6 ++-
3 files changed, 83 insertions(+), 32 deletions(-)
diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index 784191ed97..245ca605b4 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -257,7 +257,8 @@ anv_block_pool_init(struct anv_block_pool *pool,
pool->device = device;
anv_bo_init(&pool->bo, 0, 0);
pool->block_size = block_size;
- pool->free_list = ANV_FREE_LIST_EMPTY;
+ for (uint32_t i = 0; i < ANV_FREE_LIST_SIZE; i++)
+ pool->free_list[i] = ANV_FREE_LIST_EMPTY;
pool->back_free_list = ANV_FREE_LIST_EMPTY;
pool->fd = memfd_create("block pool", MFD_CLOEXEC);
@@ -526,30 +527,37 @@ fail:
static uint32_t
anv_block_pool_alloc_new(struct anv_block_pool *pool,
- struct anv_block_state *pool_state)
+ struct anv_block_state *pool_state,
+ uint32_t block_size)
{
struct anv_block_state state, old, new;
+ assert(util_is_power_of_two(block_size));
+
while (1) {
- state.u64 = __sync_fetch_and_add(&pool_state->u64, pool->block_size);
- if (state.next < state.end) {
+ state.u64 = __sync_fetch_and_add(&pool_state->u64, block_size);
+ if (state.next > state.end) {
+ futex_wait(&pool_state->end, state.end);
+ continue;
+ } else if ((state.next + block_size) < state.end) {
assert(pool->map);
return state.next;
- } else if (state.next == state.end) {
- /* We allocated the first block outside the pool, we have to grow it.
- * pool_state->next acts a mutex: threads who try to allocate now will
- * get block indexes above the current limit and hit futex_wait
- * below. */
- new.next = state.next + pool->block_size;
+ } else {
+ /* We allocated the firsts blocks outside the pool, we have to grow
+ * it. pool_state->next acts a mutex: threads who try to allocate
+ * now will get block indexes above the current limit and hit
+ * futex_wait below.
+ */
+ new.next = state.next + block_size;
new.end = anv_block_pool_grow(pool, pool_state);
+ /* We assume that just growing once the pool is enough to fulfil the
+ * memory requirements
+ */
assert(new.end >= new.next && new.end % pool->block_size == 0);
old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64);
if (old.next != state.next)
futex_wake(&pool_state->end, INT_MAX);
return state.next;
- } else {
- futex_wait(&pool_state->end, state.end);
- continue;
}
}
}
@@ -557,16 +565,41 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool,
int32_t
anv_block_pool_alloc(struct anv_block_pool *pool)
{
+ return anv_block_pool_alloc_block(pool, round_to_power_of_two(pool->block_size));
+}
+
+int32_t
+anv_block_pool_alloc_block(struct anv_block_pool *pool, uint32_t size)
+{
int32_t offset;
+ assert(util_is_power_of_two(size));
+
+ uint32_t size_log2 = ilog2_round_up(size);
+
+ assert(size_log2 < ANV_FREE_LIST_SIZE);
+
/* Try free list first. */
- if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) {
+ if (anv_free_list_pop(&(pool->free_list[size_log2]), &pool->map, &offset)) {
assert(offset >= 0);
assert(pool->map);
return offset;
}
- return anv_block_pool_alloc_new(pool, &pool->state);
+ /* Try to steal them. */
+ for (unsigned int i = size_log2 + 1; i < ANV_FREE_LIST_SIZE; i++) {
+ if (anv_free_list_pop (&(pool->free_list[i]), &pool->map, &offset)) {
+ assert(offset >= 0);
+ assert(pool->map);
+ /* Just return the blocks we do not require */
+ int32_t needless_offset = offset + size;
+ anv_free_list_push(&(pool->free_list[i - size_log2]), pool->map, needless_offset);
+
+ return offset;
+ }
+ }
+
+ return anv_block_pool_alloc_new(pool, &pool->state, size);
}
/* Allocates a block out of the back of the block pool.
@@ -590,7 +623,7 @@ anv_block_pool_alloc_back(struct anv_block_pool *pool)
return offset;
}
- offset = anv_block_pool_alloc_new(pool, &pool->back_state);
+ offset = anv_block_pool_alloc_new(pool, &pool->back_state, round_to_power_of_two(pool->block_size));
/* The offset we get out of anv_block_pool_alloc_new() is actually the
* number of bytes downwards from the middle to the end of the block.
@@ -602,12 +635,16 @@ anv_block_pool_alloc_back(struct anv_block_pool *pool)
}
void
-anv_block_pool_free(struct anv_block_pool *pool, int32_t offset)
+anv_block_pool_free(struct anv_block_pool *pool, int32_t offset, uint32_t size)
{
+ assert(util_is_power_of_two(size));
+
if (offset < 0) {
anv_free_list_push(&pool->back_free_list, pool->map, offset);
} else {
- anv_free_list_push(&pool->free_list, pool->map, offset);
+ uint32_t size_log2 = ilog2_round_up(size);
+ assert(size_log2 < ANV_FREE_LIST_SIZE);
+ anv_free_list_push(&(pool->free_list[size_log2]), pool->map, offset);
}
}
@@ -724,6 +761,9 @@ struct anv_state_stream_block {
/* The offset into the block pool at which this block starts */
uint32_t offset;
+ /* Power-of-2 block size allocated */
+ uint32_t size;
+
#ifdef HAVE_VALGRIND
/* A pointer to the first user-allocated thing in this block. This is
* what valgrind sees as the start of the block.
@@ -754,14 +794,12 @@ anv_state_stream_init(struct anv_state_stream *stream,
void
anv_state_stream_finish(struct anv_state_stream *stream)
{
- VG(const uint32_t block_size = stream->block_pool->block_size);
-
struct anv_state_stream_block *next = stream->block;
while (next != NULL) {
struct anv_state_stream_block sb = VG_NOACCESS_READ(next);
VG(VALGRIND_MEMPOOL_FREE(stream, sb._vg_ptr));
- VG(VALGRIND_MAKE_MEM_UNDEFINED(next, block_size));
- anv_block_pool_free(stream->block_pool, sb.offset);
+ VG(VALGRIND_MAKE_MEM_UNDEFINED(next, sb.size));
+ anv_block_pool_free(stream->block_pool, sb.offset, sb.size);
next = sb.next;
}
@@ -773,24 +811,33 @@ anv_state_stream_alloc(struct anv_state_stream *stream,
uint32_t size, uint32_t alignment)
{
struct anv_state_stream_block *sb = stream->block;
-
+ static int counter=0;
struct anv_state state;
+ counter++;
state.offset = align_u32(stream->next, alignment);
if (state.offset + size > stream->end) {
- uint32_t block = anv_block_pool_alloc(stream->block_pool);
+ /* As the offset must be aligned, ensure we add enough space to do the
+ * alignment, if required.
+ * In case the block pool needs to grow, ensure we have also space to
+ * store the stream block state.
+ */
+ uint32_t size_pow2 = round_to_power_of_two(size + alignment + sizeof(*sb));
+ uint32_t block = anv_block_pool_alloc_block(stream->block_pool, size_pow2);
+
sb = stream->block_pool->map + block;
VG(VALGRIND_MAKE_MEM_UNDEFINED(sb, sizeof(*sb)));
sb->next = stream->block;
sb->offset = block;
+ sb->size = size_pow2;
VG(sb->_vg_ptr = NULL);
- VG(VALGRIND_MAKE_MEM_NOACCESS(sb, stream->block_pool->block_size));
+ VG(VALGRIND_MAKE_MEM_NOACCESS(sb, size_pow2));
stream->block = sb;
stream->start = block;
stream->next = block + sizeof(*sb);
- stream->end = block + stream->block_pool->block_size;
+ stream->end = block + size_pow2;
state.offset = align_u32(stream->next, alignment);
assert(state.offset + size <= stream->end);
diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c
index 5f0528fc8f..708aaf0488 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -741,9 +741,10 @@ void
anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
{
int32_t *bt_block;
+ struct anv_block_pool *block_pool =
+ &cmd_buffer->device->surface_state_block_pool;
u_vector_foreach(bt_block, &cmd_buffer->bt_blocks) {
- anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool,
- *bt_block);
+ anv_block_pool_free(block_pool, *bt_block, block_pool->block_size);
}
u_vector_finish(&cmd_buffer->bt_blocks);
@@ -774,10 +775,11 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
&cmd_buffer->batch,
GEN8_MI_BATCH_BUFFER_START_length * 4);
+ struct anv_block_pool *block_pool =
+ &cmd_buffer->device->surface_state_block_pool;
while (u_vector_length(&cmd_buffer->bt_blocks) > 1) {
int32_t *bt_block = u_vector_remove(&cmd_buffer->bt_blocks);
- anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool,
- *bt_block);
+ anv_block_pool_free(block_pool, *bt_block, block_pool->block_size);
}
assert(u_vector_length(&cmd_buffer->bt_blocks) == 1);
cmd_buffer->bt_next = 0;
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 7d079000b0..b1e1183023 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -421,6 +421,7 @@ union anv_free_list {
};
#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } })
+#define ANV_FREE_LIST_SIZE 32
struct anv_block_state {
union {
@@ -463,7 +464,7 @@ struct anv_block_pool {
uint32_t block_size;
- union anv_free_list free_list;
+ union anv_free_list free_list[ANV_FREE_LIST_SIZE];
struct anv_block_state state;
union anv_free_list back_free_list;
@@ -555,8 +556,9 @@ VkResult anv_block_pool_init(struct anv_block_pool *pool,
struct anv_device *device, uint32_t block_size);
void anv_block_pool_finish(struct anv_block_pool *pool);
int32_t anv_block_pool_alloc(struct anv_block_pool *pool);
+int32_t anv_block_pool_alloc_block(struct anv_block_pool *pool, uint32_t size);
int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool);
-void anv_block_pool_free(struct anv_block_pool *pool, int32_t offset);
+void anv_block_pool_free(struct anv_block_pool *pool, int32_t offset, uint32_t size);
void anv_state_pool_init(struct anv_state_pool *pool,
struct anv_block_pool *block_pool);
void anv_state_pool_finish(struct anv_state_pool *pool);
--
2.11.0
More information about the mesa-dev
mailing list