[Mesa-dev] [RFC PATCH 02/14] anv/allocator: Add anv_state_table.
Rafael Antognolli
rafael.antognolli at intel.com
Sat Dec 8 00:05:41 UTC 2018
Add a structure to hold anv_states. This table will initially be used to
recicle anv_states, instead of relying on a linked list implemented in
GPU memory. Later it could be used so that all anv_states just point to
the content of this struct, instead of making copies of anv_states
everywhere.
TODO:
1) I need to refine the API, specially anv_state_table_add(). So far
we have to add an item, get the pointer to the anv_state, and then
fill the content. I tried some different things so far but need to
come back to this one.
2) There's a lot of common code between this table backing store
memory and the anv_block_pool buffer, due to how we grow it. I think
it's possible to refactory this and reuse code on both places.
3) Add unit tests.
---
src/intel/vulkan/anv_allocator.c | 246 ++++++++++++++++++++++++++++++-
src/intel/vulkan/anv_private.h | 44 ++++++
2 files changed, 288 insertions(+), 2 deletions(-)
diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index 67f2f73aa11..3590ede6050 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -100,6 +100,9 @@
/* Allocations are always at least 64 byte aligned, so 1 is an invalid value.
* We use it to indicate the free list is empty. */
#define EMPTY 1
+#define EMPTY2 UINT32_MAX
+
+#define PAGE_SIZE 4096
struct anv_mmap_cleanup {
void *map;
@@ -130,6 +133,246 @@ round_to_power_of_two(uint32_t value)
return 1 << ilog2_round_up(value);
}
+struct anv_state_table_cleanup {
+ void *map;
+ size_t size;
+};
+
+#define ANV_STATE_TABLE_CLEANUP_INIT ((struct anv_state_table_cleanup){0})
+#define ANV_STATE_ENTRY_SIZE (sizeof(struct anv_free_entry))
+
+static VkResult
+anv_state_table_expand_range(struct anv_state_table *table, uint32_t size);
+
+VkResult
+anv_state_table_init(struct anv_state_table *table,
+ struct anv_device *device,
+ uint32_t initial_entries)
+{
+ VkResult result;
+
+ table->device = device;
+
+ table->fd = memfd_create("free table", MFD_CLOEXEC);
+ if (table->fd == -1)
+ return vk_error(VK_ERROR_INITIALIZATION_FAILED);
+
+ /* Just make it 2GB up-front. The Linux kernel won't actually back it
+ * with pages until we either map and fault on one of them or we use
+ * userptr and send a chunk of it off to the GPU.
+ */
+ if (ftruncate(table->fd, BLOCK_POOL_MEMFD_SIZE) == -1) {
+ result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+ goto fail_fd;
+ }
+
+ if (!u_vector_init(&table->mmap_cleanups,
+ round_to_power_of_two(sizeof(struct anv_state_table_cleanup)),
+ 128)) {
+ result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+ goto fail_fd;
+ }
+
+ table->state.next = 0;
+ table->state.end = 0;
+ table->size = 0;
+
+ uint32_t initial_size = initial_entries * ANV_STATE_ENTRY_SIZE;
+ result = anv_state_table_expand_range(table, initial_size);
+ if (result != VK_SUCCESS)
+ goto fail_mmap_cleanups;
+
+ return VK_SUCCESS;
+
+ fail_mmap_cleanups:
+ u_vector_finish(&table->mmap_cleanups);
+ fail_fd:
+ close(table->fd);
+
+ return result;
+}
+
+static VkResult
+anv_state_table_expand_range(struct anv_state_table *table, uint32_t size)
+{
+ void *map;
+ struct anv_mmap_cleanup *cleanup;
+
+ /* Assert that we only ever grow the pool */
+ assert(size >= table->state.end);
+
+ /* Assert that we don't go outside the bounds of the memfd */
+ assert(size <= BLOCK_POOL_MEMFD_SIZE);
+
+ cleanup = u_vector_add(&table->mmap_cleanups);
+ if (!cleanup)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ *cleanup = ANV_MMAP_CLEANUP_INIT;
+
+ /* Just leak the old map until we destroy the pool. We can't munmap it
+ * without races or imposing locking on the block allocate fast path. On
+ * the whole the leaked maps adds up to less than the size of the
+ * current map. MAP_POPULATE seems like the right thing to do, but we
+ * should try to get some numbers.
+ */
+ map = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, table->fd, 0);
+ if (map == MAP_FAILED) {
+ exit(-1);
+ return vk_errorf(table->device->instance, table->device,
+ VK_ERROR_MEMORY_MAP_FAILED, "mmap failed: %m");
+ }
+
+ cleanup->map = map;
+ cleanup->size = size;
+
+ table->map = map;
+ table->size = size;
+
+ return VK_SUCCESS;
+}
+
+static uint32_t
+anv_state_table_grow(struct anv_state_table *table)
+{
+ VkResult result = VK_SUCCESS;
+
+ pthread_mutex_lock(&table->device->mutex);
+
+ uint32_t used = align_u32(table->state.next * ANV_STATE_ENTRY_SIZE,
+ PAGE_SIZE);
+ uint32_t old_size = table->size;
+
+ /* The block pool is always initialized to a nonzero size and this function
+ * is always called after initialization.
+ */
+ assert(old_size > 0);
+
+ uint32_t required = MAX2(used, old_size);
+ if (used * 2 <= required) {
+ /* If we're in this case then this isn't the firsta allocation and we
+ * already have enough space on both sides to hold double what we
+ * have allocated. There's nothing for us to do.
+ */
+ goto done;
+ }
+
+ uint32_t size = old_size * 2;
+ while (size < required)
+ size *= 2;
+
+ assert(size > table->size);
+
+ result = anv_state_table_expand_range(table, size);
+
+ done:
+ pthread_mutex_unlock(&table->device->mutex);
+
+ if (result == VK_SUCCESS) {
+ /* Return the appropriate new size. This function never actually
+ * updates state->next. Instead, we let the caller do that because it
+ * needs to do so in order to maintain its concurrency model.
+ */
+ return table->size / ANV_STATE_ENTRY_SIZE;
+ } else {
+ return 0;
+ }
+}
+
+void
+anv_state_table_finish(struct anv_state_table *table)
+{
+ struct anv_state_table_cleanup *cleanup;
+
+ u_vector_foreach(cleanup, &table->mmap_cleanups) {
+ if (cleanup->map)
+ munmap(cleanup->map, cleanup->size);
+ }
+
+ u_vector_finish(&table->mmap_cleanups);
+
+ close(table->fd);
+}
+
+uint32_t
+anv_state_table_add(struct anv_state_table *table, uint32_t count)
+{
+ struct anv_block_state state, old, new;
+
+ while(1) {
+ state.u64 = __sync_fetch_and_add(&table->state.u64, count);
+ if (state.next + count <= state.end) {
+ assert(table->map);
+ struct anv_free_entry *entry = &table->map[state.next];
+ for (int i = 0; i < count; i++) {
+ entry[i].state.idx = state.next + i;
+ }
+ return state.next;
+ } else if (state.next <= state.end) {
+ /* We allocated the first block outside the pool so we have to grow
+ * the pool. pool_state->next acts a mutex: threads who try to
+ * allocate now will get block indexes above the current limit and
+ * hit futex_wait below.
+ */
+ new.next = state.next + count;
+ do {
+ new.end = anv_state_table_grow(table);
+ } while (new.end < new.next);
+
+ old.u64 = __sync_lock_test_and_set(&table->state.u64, new.u64);
+ if (old.next != state.next)
+ futex_wake(&table->state.end, INT_MAX);
+ } else {
+ futex_wait(&table->state.end, state.end, NULL);
+ continue;
+ }
+ }
+}
+
+void
+anv_state_table_push(union anv_free_list2 *list,
+ struct anv_state_table *table,
+ uint32_t idx, uint32_t count)
+{
+ union anv_free_list2 current, old, new;
+ uint32_t next = idx;
+
+ for (uint32_t i = 1; i < count; i++, next++)
+ table->map[next].list.offset = next + 1;
+
+ old = *list;
+ do {
+ current = old;
+ table->map[next].list.offset = current.offset;
+ new.offset = idx;
+ new.count = current.count + 1;
+ old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
+ } while (old.u64 != current.u64);
+}
+
+struct anv_state *
+anv_state_table_pop(union anv_free_list2 *list,
+ struct anv_state_table *table)
+{
+ union anv_free_list2 current, new, old;
+
+ current.u64 = list->u64;
+ while (current.offset != EMPTY2) {
+ __sync_synchronize();
+ new.offset = table->map[current.offset].list.offset;
+ new.count = current.count;
+ old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
+ if (old.u64 == current.u64) {
+ struct anv_free_entry *entry = &table->map[current.offset];
+ return &entry->state;
+ }
+ current = old;
+ }
+
+ return NULL;
+}
+
static bool
anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset)
{
@@ -311,8 +554,6 @@ anv_block_pool_finish(struct anv_block_pool *pool)
close(pool->fd);
}
-#define PAGE_SIZE 4096
-
static VkResult
anv_block_pool_expand_range(struct anv_block_pool *pool,
uint32_t center_bo_offset, uint32_t size)
@@ -782,6 +1023,7 @@ anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
&pool->block_pool,
state.alloc_size,
pool->block_size);
+ /* state.idx = anv_state_table_add(pool->table, state); */
done:
state.map = pool->block_pool.map + state.offset;
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index aff076a55d9..3fe299d55f9 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -615,7 +615,18 @@ union anv_free_list {
uint64_t u64;
};
+union anv_free_list2 {
+ struct {
+ uint32_t offset;
+
+ /* A simple count that is incremented every time the head changes. */
+ uint32_t count;
+ };
+ uint64_t u64;
+};
+
#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } })
+#define ANV_FREE_LIST2_EMPTY ((union anv_free_list2) { { UINT32_MAX, 0 } })
struct anv_block_state {
union {
@@ -687,6 +698,7 @@ struct anv_state {
int32_t offset;
uint32_t alloc_size;
void *map;
+ uint32_t idx;
};
#define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
@@ -701,6 +713,21 @@ struct anv_fixed_size_state_pool {
#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
+struct anv_free_entry {
+ union anv_free_list2 list;
+ struct anv_state state;
+};
+
+struct anv_state_table {
+ struct anv_device *device;
+ int fd;
+ /* void *map; */
+ struct anv_free_entry *map;
+ uint32_t size;
+ struct anv_block_state state;
+ struct u_vector mmap_cleanups;
+};
+
struct anv_state_pool {
struct anv_block_pool block_pool;
@@ -762,6 +789,23 @@ void anv_state_stream_finish(struct anv_state_stream *stream);
struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
uint32_t size, uint32_t alignment);
+VkResult anv_state_table_init(struct anv_state_table *table,
+ struct anv_device *device,
+ uint32_t initial_entries);
+void anv_state_table_finish(struct anv_state_table *table);
+uint32_t anv_state_table_add(struct anv_state_table *table, uint32_t count);
+void anv_state_table_push(union anv_free_list2 *list,
+ struct anv_state_table *table,
+ uint32_t idx, uint32_t count);
+struct anv_state* anv_state_table_pop(union anv_free_list2 *list,
+ struct anv_state_table *table);
+
+
+static inline struct anv_state *
+anv_state_table_get(struct anv_state_table *table, uint32_t idx)
+{
+ return &table->map[idx].state;
+}
/**
* Implements a pool of re-usable BOs. The interface is identical to that
* of block_pool except that each block is its own BO.
--
2.17.1
More information about the mesa-dev
mailing list