[igt-dev] [PATCH i-g-t 1/6] lib/i915/gem_create: Introduce gem-pool bo cache
Kamil Konieczny
kamil.konieczny at linux.intel.com
Thu Mar 10 19:18:46 UTC 2022
Dnia 2022-03-10 at 08:15:24 +0100, Zbigniew Kempczyński napisał(a):
> Handling batchbuffers with softpin requires tracking its state otherwise
> we can write to inflight batchbuffer and encounter gpu hang. Gem pool
> adds such tracking (similar to libdrm bo cache) and provides free and
> ready to use bo. If pool has no free bo new one is created what means pool
> can be growing during test execution. When test completes freeing buffers
> and memory is called from igt_core so no additional cleanup is necessary.
>
> Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> Cc: Petri Latvala <petri.latvala at intel.com>
> Cc: Kamil Konieczny <kamil.konieczny at linux.intel.com>
> ---
> .../igt-gpu-tools/igt-gpu-tools-docs.xml | 1 +
> lib/i915/gem_create.c | 271 ++++++++++++++++++
> lib/i915/gem_create.h | 4 +
> lib/igt_core.c | 2 +
> 4 files changed, 278 insertions(+)
>
> diff --git a/docs/reference/igt-gpu-tools/igt-gpu-tools-docs.xml b/docs/reference/igt-gpu-tools/igt-gpu-tools-docs.xml
> index 0dc5a0b7e7..c22e70b712 100644
> --- a/docs/reference/igt-gpu-tools/igt-gpu-tools-docs.xml
> +++ b/docs/reference/igt-gpu-tools/igt-gpu-tools-docs.xml
> @@ -56,6 +56,7 @@
> </chapter>
> <chapter>
> <title>igt/i915 API Reference</title>
> + <xi:include href="xml/gem_create.xml"/>
> <xi:include href="xml/gem_context.xml"/>
> <xi:include href="xml/gem_engine_topology.xml"/>
> <xi:include href="xml/gem_scheduler.xml"/>
> diff --git a/lib/i915/gem_create.c b/lib/i915/gem_create.c
> index b2e8d5595f..605c45139d 100644
> --- a/lib/i915/gem_create.c
> +++ b/lib/i915/gem_create.c
> @@ -4,12 +4,25 @@
> */
>
> #include <errno.h>
> +#include <pthread.h>
>
> +#include "drmtest.h"
> #include "gem_create.h"
> #include "i915_drm.h"
> #include "igt_core.h"
> +#include "igt_list.h"
> +#include "igt_map.h"
> #include "ioctl_wrappers.h"
>
> +/**
> + * SECTION:gem_create
> + * @short_description: Helpers for dealing with objects creation
> + * @title: GEM Create
> + *
> + * This helper library contains functions used for handling creating gem
> + * objects.
> + */
> +
> int __gem_create(int fd, uint64_t *size, uint32_t *handle)
> {
> struct drm_i915_gem_create create = {
> @@ -88,3 +101,261 @@ uint32_t gem_create_ext(int fd, uint64_t size, struct i915_user_extension *ext)
>
> return handle;
> }
> +
> +static struct igt_map *pool;
> +static pthread_mutex_t pool_mutex = PTHREAD_MUTEX_INITIALIZER;
> +
> +struct pool_entry {
> + int fd;
> + uint32_t handle;
> + uint64_t size; /* requested bo size */
> + uint64_t bo_size; /* created bo size */
> + uint32_t region;
> + struct igt_list_head link;
> +};
> +
> +struct pool_list {
> + uint64_t size;
> + struct igt_list_head list;
> +};
> +
> +static struct pool_entry *find_or_create(int fd, struct pool_list *pl,
> + uint64_t size, uint32_t region)
> +{
> + struct pool_entry *pe;
> + bool found = false;
> +
> + igt_list_for_each_entry(pe, &pl->list, link) {
> + if (pe->fd == fd && pe->size == size && pe->region == region &&
> + !gem_bo_busy(fd, pe->handle)) {
> + found = true;
> + break;
> + }
> + }
> +
> + if (!found) {
> + pe = calloc(1, sizeof(*pe));
> + if (!pe)
> + goto out;
> +
> + pe->fd = fd;
> + pe->bo_size = size;
> + if (__gem_create_in_memory_regions(fd, &pe->handle, &pe->bo_size, region)) {
> + free(pe);
> + pe = NULL;
> + goto out;
> + }
> + pe->size = size;
> + pe->region = region;
> +
> + igt_list_add_tail(&pe->link, &pl->list);
> + }
> +
> +out:
> + return pe;
> +}
> +
> +/**
> + * gem_create_from_pool:
> + * @fd: open i915 drm file descriptor
> + * @size: pointer to size, on input it points to requested bo size,
> + * on output created bo size will be stored there
> + * @region: region in which bo should be created
> + *
> + * Function returns bo handle which is free to use (not busy). Internally
> + * it iterates over previously allocated bo and returns first free. If there
> + * are no free bo a new one is created.
> + *
> + * Returns: bo handle + created bo size (via pointer to size)
> + */
> +uint32_t gem_create_from_pool(int fd, uint64_t *size, uint32_t region)
> +{
> + struct pool_list *pl;
> + struct pool_entry *pe;
> +
> + pthread_mutex_lock(&pool_mutex);
> +
> + pl = igt_map_search(pool, size);
> + if (!pl) {
> + pl = calloc(1, sizeof(*pl));
> + if (!pl)
> + goto out;
> +
> + IGT_INIT_LIST_HEAD(&pl->list);
> + pl->size = *size;
> + igt_map_insert(pool, &pl->size, pl);
> + }
> + pe = find_or_create(fd, pl, *size, region);
> +
> +out:
> + pthread_mutex_unlock(&pool_mutex);
> +
> + igt_assert(pl && pe);
> +
> + return pe->handle;
> +}
> +
> +static void __pool_list_free_func(struct igt_map_entry *entry)
> +{
> + free(entry->data);
> +}
> +
> +static void __destroy_pool(struct igt_map *map, pthread_mutex_t *mutex)
> +{
> + struct igt_map_entry *pos;
> + const struct pool_list *pl;
> + struct pool_entry *pe, *tmp;
> +
> + if (!map)
> + return;
> +
> + pthread_mutex_lock(mutex);
> +
> + igt_map_foreach(map, pos) {
> + pl = pos->key;
> + igt_list_for_each_entry_safe(pe, tmp, &pl->list, link) {
> + gem_close(pe->fd, pe->handle);
> + igt_list_del(&pe->link);
> + free(pe);
> + }
> + }
> +
> + pthread_mutex_unlock(mutex);
> +
> + igt_map_destroy(map, __pool_list_free_func);
> +}
> +
> +void gem_pool_dump(void)
> +{
> + struct igt_map_entry *pos;
> + const struct pool_list *pl;
> + struct pool_entry *pe;
> +
> + if (!pool)
> + return;
> +
> + pthread_mutex_lock(&pool_mutex);
> +
> + igt_debug("[pool]\n");
> + igt_map_foreach(pool, pos) {
> + pl = pos->key;
> + igt_debug("bucket [%llx]\n", (long long) pl->size);
> + igt_list_for_each_entry(pe, &pl->list, link)
> + igt_debug(" - handle: %u, size: %llx, bo_size: %llx, region: %x\n",
> + pe->handle, (long long) pe->size,
> + (long long) pe->bo_size, pe->region);
> + }
> +
> + pthread_mutex_unlock(&pool_mutex);
> +}
> +
> +#define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001ULL
> +static inline uint32_t hash_pool(const void *val)
> +{
> + uint64_t hash = *(uint64_t *) val;
> +
> + hash = hash * GOLDEN_RATIO_PRIME_64;
> + return hash >> 32;
> +}
> +
> +static int equal_pool(const void *a, const void *b)
> +{
> + struct pool_list *p1 = (struct pool_list *) a;
> + struct pool_list *p2 = (struct pool_list *) b;
> +
> + return p1->size == p2->size;
> +}
> +
> +/**
> + * gem_pool_init:
> + *
> + * Function initializes bo pool (kind of bo cache). Main purpose of it is to
> + * support working with softpin to achieve pipelined execution on gpu (without
> + * stalls).
> + *
> + * For example imagine code as follows:
> + *
> + * |[<!-- language="C" -->
> + * uint32_t bb = gem_create(fd, 4096);
> + * uint32_t *bbptr = gem_mmap__device_coherent(fd, bb, ...)
> + * uint32_t *cmd = bbptr;
> + * ...
> + * *cmd++ = ...gpu commands...
> + * ...
> + * *cmd++ = MI_BATCH_BUFFER_END;
> + * ...
> + * gem_execbuf(fd, execbuf); // bb is part of execbuf <--- first execbuf
> + *
> + * cmd = bbptr;
> + * ...
> + * *cmd++ = ... next gpu commands...
> + * ...
> + * *cmd++ = MI_BATCH_BUFFER_END;
> + * ...
> + * gem_execbuf(fd, execbuf); // bb is part of execbuf <--- second execbuf
> + * ]|
> + *
> + * Above code is prone to gpu hang because when bb was submitted to gpu
> + * we immediately started writing to it. If gpu started executing commands
> + * from first execbuf we're overwriting it leading to unpredicted behavior
> + * (partially execute from first and second commands or we get gpu hang).
> + * To avoid this we can sync after first execbuf but we will get stall
> + * in execution. For some tests it might be accepted but such "isolated"
> + * execution hides bugs (synchronization, cache flushes, etc).
> + *
> + * So, to achive pipelined execution we need to use another bb. If we would
> + * like to enqueue more work which is serialized we would need more bbs
> + * (depends on execution speed). Handling this manually is cumbersome as
> + * we need to track all bb and their status (busy or free).
> + *
> + * Solution to above is gem pool. It returns first handle of requested size
> + * which is not busy (or create a new one if there's none or all of bo are
> + * in use). Here's an example how to use it:
> + *
> + * |[<!-- language="C" -->
> + * uint64_t bbsize = 4096;
> + * uint32_t bb = gem_create_from_pool(fd, &bbsize, REGION_SMEM);
> + * uint32_t *bbptr = gem_mmap__device_coherent(fd, bb, ...)
> + * uint32_t *cmd = bbptr;
> + * ...
> + * *cmd++ = ...gpu commands...
> + * ...
> + * *cmd++ = MI_BATCH_BUFFER_END;
> + * gem_munmap(bbptr, bbsize);
> + * ...
> + * gem_execbuf(fd, execbuf); // bb is part of execbuf <--- first execbuf
> + *
> + * bbsize = 4096;
> + * bb = gem_create_from_pool(fd, &bbsize, REGION_SMEM);
> + * cmd = bbptr;
> + * ...
> + * *cmd++ = ... next gpu commands...
> + * ...
> + * *cmd++ = MI_BATCH_BUFFER_END;
> + * gem_munmap(bbptr, bbsize);
> + * ...
> + * gem_execbuf(fd, execbuf); // bb is part of execbuf <--- second execbuf
> + * ]|
> + *
> + * Assuming first execbuf is executed we will get new bb handle when we call
> + * gem_create_from_pool(). When test completes pool is freed automatically
> + * in igt core (all handles will be closed, memory will be freed and gem pool
> + * will be reinitialized for next test).
> + *
> + * Some explanation is needed why we need to put pointer to size instead of
> + * passing absolute value. On discrete regarding memory placement (region)
> + * object created in the memory can be bigger than requested. Especially when
> + * we use allocator to handle vm space and we allocate vma with requested
> + * size (which is smaller than bo created) we can overlap with next allocation
> + * and get -ENOSPC.
> + */
> +void gem_pool_init(void)
> +{
> + pthread_mutex_init(&pool_mutex, NULL);
> + __destroy_pool(pool, &pool_mutex);
> + pool = igt_map_create(hash_pool, equal_pool);
> +}
> +
> +igt_constructor {
> + gem_pool_init();
> +}
> diff --git a/lib/i915/gem_create.h b/lib/i915/gem_create.h
> index c2b531b4d7..c32a815d60 100644
> --- a/lib/i915/gem_create.h
> +++ b/lib/i915/gem_create.h
> @@ -16,4 +16,8 @@ int __gem_create_ext(int fd, uint64_t *size, uint32_t *handle,
> struct i915_user_extension *ext);
> uint32_t gem_create_ext(int fd, uint64_t size, struct i915_user_extension *ext);
>
> +void gem_pool_init(void);
> +void gem_pool_dump(void);
> +uint32_t gem_create_from_pool(int fd, uint64_t *size, uint32_t region);
> +
> #endif /* GEM_CREATE_H */
> diff --git a/lib/igt_core.c b/lib/igt_core.c
> index f2c701deab..6dad3c8485 100644
> --- a/lib/igt_core.c
> +++ b/lib/igt_core.c
> @@ -58,6 +58,7 @@
> #include <glib.h>
>
> #include "drmtest.h"
> +#include "i915/gem_create.h"
> #include "intel_allocator.h"
> #include "intel_batchbuffer.h"
> #include "intel_chipset.h"
> @@ -1428,6 +1429,7 @@ __noreturn static void exit_subtest(const char *result)
> */
> intel_allocator_init();
> intel_bb_reinit_allocator();
> + gem_pool_init();
>
> if (!in_dynamic_subtest)
> _igt_dynamic_tests_executed = -1;
> --
> 2.32.0
>
Reviewed-by: Kamil Konieczny <kamil.konieczny at linux.intel.com>
--
Kamil
More information about the igt-dev
mailing list