[Mesa-dev] [PATCH 04/15] pb_cache: let drivers choose the number of buckets
Samuel Pitoiset
samuel.pitoiset at gmail.com
Mon Jan 8 20:51:05 UTC 2018
On 01/06/2018 12:12 PM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
> src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 2 +-
> src/gallium/auxiliary/pipebuffer/pb_cache.c | 20 ++++++++++++++++----
> src/gallium/auxiliary/pipebuffer/pb_cache.h | 6 ++++--
> src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 1 -
> src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 3 ++-
> src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 1 -
> src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 3 ++-
> 7 files changed, 25 insertions(+), 11 deletions(-)
>
> diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
> index 24831f6..4e70048 100644
> --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
> +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
> @@ -297,16 +297,16 @@ pb_cache_manager_create(struct pb_manager *provider,
> return NULL;
>
> mgr = CALLOC_STRUCT(pb_cache_manager);
> if (!mgr)
> return NULL;
>
> mgr->base.destroy = pb_cache_manager_destroy;
> mgr->base.create_buffer = pb_cache_manager_create_buffer;
> mgr->base.flush = pb_cache_manager_flush;
> mgr->provider = provider;
> - pb_cache_init(&mgr->cache, usecs, size_factor, bypass_usage,
> + pb_cache_init(&mgr->cache, 1, usecs, size_factor, bypass_usage,
> maximum_cache_size,
> _pb_cache_buffer_destroy,
> pb_cache_can_reclaim_buffer);
> return &mgr->base;
> }
> diff --git a/src/gallium/auxiliary/pipebuffer/pb_cache.c b/src/gallium/auxiliary/pipebuffer/pb_cache.c
> index dd479ae..af899a2 100644
> --- a/src/gallium/auxiliary/pipebuffer/pb_cache.c
> +++ b/src/gallium/auxiliary/pipebuffer/pb_cache.c
> @@ -85,21 +85,21 @@ pb_cache_add_buffer(struct pb_cache_entry *entry)
> struct pb_cache *mgr = entry->mgr;
> struct list_head *cache = &mgr->buckets[entry->bucket_index];
> struct pb_buffer *buf = entry->buffer;
> unsigned i;
>
> mtx_lock(&mgr->mutex);
> assert(!pipe_is_referenced(&buf->reference));
>
> int64_t current_time = os_time_get();
>
> - for (i = 0; i < ARRAY_SIZE(mgr->buckets); i++)
> + for (i = 0; i < mgr->num_heaps; i++)
> release_expired_buffers_locked(&mgr->buckets[i], current_time);
>
> /* Directly release any buffer that exceeds the limit. */
> if (mgr->cache_size + buf->size > mgr->max_cache_size) {
> mgr->destroy_buffer(buf);
> mtx_unlock(&mgr->mutex);
> return;
> }
>
> entry->start = os_time_get();
> @@ -146,20 +146,22 @@ pb_cache_is_buffer_compat(struct pb_cache_entry *entry,
> struct pb_buffer *
> pb_cache_reclaim_buffer(struct pb_cache *mgr, pb_size size,
> unsigned alignment, unsigned usage,
> unsigned bucket_index)
> {
> struct pb_cache_entry *entry;
> struct pb_cache_entry *cur_entry;
> struct list_head *cur, *next;
> int64_t now;
> int ret = 0;
> +
> + assert(bucket_index < mgr->num_heaps);
> struct list_head *cache = &mgr->buckets[bucket_index];
>
> mtx_lock(&mgr->mutex);
>
> entry = NULL;
> cur = cache->next;
> next = cur->next;
>
> /* search in the expired buffers, freeing them in the process */
> now = os_time_get();
> @@ -222,39 +224,41 @@ pb_cache_reclaim_buffer(struct pb_cache *mgr, pb_size size,
> * Empty the cache. Useful when there is not enough memory.
> */
> void
> pb_cache_release_all_buffers(struct pb_cache *mgr)
> {
> struct list_head *curr, *next;
> struct pb_cache_entry *buf;
> unsigned i;
>
> mtx_lock(&mgr->mutex);
> - for (i = 0; i < ARRAY_SIZE(mgr->buckets); i++) {
> + for (i = 0; i < mgr->num_heaps; i++) {
> struct list_head *cache = &mgr->buckets[i];
>
> curr = cache->next;
> next = curr->next;
> while (curr != cache) {
> buf = LIST_ENTRY(struct pb_cache_entry, curr, head);
> destroy_buffer_locked(buf);
> curr = next;
> next = curr->next;
> }
> }
> mtx_unlock(&mgr->mutex);
> }
>
> void
> pb_cache_init_entry(struct pb_cache *mgr, struct pb_cache_entry *entry,
> struct pb_buffer *buf, unsigned bucket_index)
> {
> + assert(bucket_index < mgr->num_heaps);
> +
> memset(entry, 0, sizeof(*entry));
> entry->buffer = buf;
> entry->mgr = mgr;
> entry->bucket_index = bucket_index;
> }
>
> /**
> * Initialize a caching buffer manager.
> *
> * @param mgr The cache buffer manager
> @@ -263,40 +267,48 @@ pb_cache_init_entry(struct pb_cache *mgr, struct pb_cache_entry *entry,
> * @param size_factor Declare buffers that are size_factor times bigger than
> * the requested size as cache hits.
> * @param bypass_usage Bitmask. If (requested usage & bypass_usage) != 0,
> * buffer allocation requests are rejected.
> * @param maximum_cache_size Maximum size of all unused buffers the cache can
> * hold.
> * @param destroy_buffer Function that destroys a buffer for good.
> * @param can_reclaim Whether a buffer can be reclaimed (e.g. is not busy)
> */
> void
> -pb_cache_init(struct pb_cache *mgr, uint usecs, float size_factor,
> +pb_cache_init(struct pb_cache *mgr, uint num_heaps,
> + uint usecs, float size_factor,
> unsigned bypass_usage, uint64_t maximum_cache_size,
> void (*destroy_buffer)(struct pb_buffer *buf),
> bool (*can_reclaim)(struct pb_buffer *buf))
You might want to update the parameters description above, usually
functions are not described like this one. :)
Patches 1-4 are:
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> {
> unsigned i;
>
> - for (i = 0; i < ARRAY_SIZE(mgr->buckets); i++)
> + mgr->buckets = CALLOC(num_heaps, sizeof(struct list_head));
> + if (!mgr->buckets)
> + return;
> +
> + for (i = 0; i < num_heaps; i++)
> LIST_INITHEAD(&mgr->buckets[i]);
>
> (void) mtx_init(&mgr->mutex, mtx_plain);
> mgr->cache_size = 0;
> mgr->max_cache_size = maximum_cache_size;
> + mgr->num_heaps = num_heaps;
> mgr->usecs = usecs;
> mgr->num_buffers = 0;
> mgr->bypass_usage = bypass_usage;
> mgr->size_factor = size_factor;
> mgr->destroy_buffer = destroy_buffer;
> mgr->can_reclaim = can_reclaim;
> }
>
> /**
> * Deinitialize the manager completely.
> */
> void
> pb_cache_deinit(struct pb_cache *mgr)
> {
> pb_cache_release_all_buffers(mgr);
> mtx_destroy(&mgr->mutex);
> + FREE(mgr->buckets);
> + mgr->buckets = NULL;
> }
> diff --git a/src/gallium/auxiliary/pipebuffer/pb_cache.h b/src/gallium/auxiliary/pipebuffer/pb_cache.h
> index d082eca..904095c 100644
> --- a/src/gallium/auxiliary/pipebuffer/pb_cache.h
> +++ b/src/gallium/auxiliary/pipebuffer/pb_cache.h
> @@ -43,38 +43,40 @@ struct pb_cache_entry
> struct pb_cache *mgr;
> int64_t start, end; /**< Caching time interval */
> unsigned bucket_index;
> };
>
> struct pb_cache
> {
> /* The cache is divided into buckets for minimizing cache misses.
> * The driver controls which buffer goes into which bucket.
> */
> - struct list_head buckets[4];
> + struct list_head *buckets;
>
> mtx_t mutex;
> uint64_t cache_size;
> uint64_t max_cache_size;
> + unsigned num_heaps;
> unsigned usecs;
> unsigned num_buffers;
> unsigned bypass_usage;
> float size_factor;
>
> void (*destroy_buffer)(struct pb_buffer *buf);
> bool (*can_reclaim)(struct pb_buffer *buf);
> };
>
> void pb_cache_add_buffer(struct pb_cache_entry *entry);
> struct pb_buffer *pb_cache_reclaim_buffer(struct pb_cache *mgr, pb_size size,
> unsigned alignment, unsigned usage,
> unsigned bucket_index);
> void pb_cache_release_all_buffers(struct pb_cache *mgr);
> void pb_cache_init_entry(struct pb_cache *mgr, struct pb_cache_entry *entry,
> struct pb_buffer *buf, unsigned bucket_index);
> -void pb_cache_init(struct pb_cache *mgr, uint usecs, float size_factor,
> +void pb_cache_init(struct pb_cache *mgr, uint num_heaps,
> + uint usecs, float size_factor,
> unsigned bypass_usage, uint64_t maximum_cache_size,
> void (*destroy_buffer)(struct pb_buffer *buf),
> bool (*can_reclaim)(struct pb_buffer *buf));
> void pb_cache_deinit(struct pb_cache *mgr);
>
> #endif
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> index 4b12735..92c314e 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> @@ -1219,21 +1219,20 @@ no_slab:
> alignment = align(alignment, ws->info.gart_page_size);
>
> bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
>
> if (use_reusable_pool) {
> int heap = radeon_get_heap_index(domain, flags);
> assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
> usage = 1 << heap; /* Only set one usage bit for each heap. */
>
> pb_cache_bucket = radeon_get_pb_cache_bucket_index(heap);
> - assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
>
> /* Get a buffer from the cache. */
> bo = (struct amdgpu_winsys_bo*)
> pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage,
> pb_cache_bucket);
> if (bo)
> return &bo->base;
> }
>
> /* Create a new one. */
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> index ab91faf..707ebf9 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> @@ -278,21 +278,22 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
> goto fail;
>
> ws->dev = dev;
> ws->info.drm_major = drm_major;
> ws->info.drm_minor = drm_minor;
>
> if (!do_winsys_init(ws, fd))
> goto fail_alloc;
>
> /* Create managers. */
> - pb_cache_init(&ws->bo_cache, 500000, ws->check_vm ? 1.0f : 2.0f, 0,
> + pb_cache_init(&ws->bo_cache, 4,
> + 500000, ws->check_vm ? 1.0f : 2.0f, 0,
> (ws->info.vram_size + ws->info.gart_size) / 8,
> amdgpu_bo_destroy, amdgpu_bo_can_reclaim);
>
> if (!pb_slabs_init(&ws->bo_slabs,
> AMDGPU_SLAB_MIN_SIZE_LOG2, AMDGPU_SLAB_MAX_SIZE_LOG2,
> RADEON_MAX_SLAB_HEAPS,
> ws,
> amdgpu_bo_can_reclaim_slab,
> amdgpu_bo_slab_alloc,
> amdgpu_bo_slab_free))
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> index fc95a98..4be6f40 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> @@ -978,21 +978,20 @@ no_slab:
>
> bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
>
> /* Shared resources don't use cached heaps. */
> if (use_reusable_pool) {
> int heap = radeon_get_heap_index(domain, flags);
> assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
> usage = 1 << heap; /* Only set one usage bit for each heap. */
>
> pb_cache_bucket = radeon_get_pb_cache_bucket_index(heap);
> - assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
>
> bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
> usage, pb_cache_bucket));
> if (bo)
> return &bo->base;
> }
>
> bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
> pb_cache_bucket);
> if (!bo) {
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> index e600199..f7d7998 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> @@ -752,21 +752,22 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
> if (!ws) {
> mtx_unlock(&fd_tab_mutex);
> return NULL;
> }
>
> ws->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
>
> if (!do_winsys_init(ws))
> goto fail1;
>
> - pb_cache_init(&ws->bo_cache, 500000, ws->check_vm ? 1.0f : 2.0f, 0,
> + pb_cache_init(&ws->bo_cache, 4,
> + 500000, ws->check_vm ? 1.0f : 2.0f, 0,
> MIN2(ws->info.vram_size, ws->info.gart_size),
> radeon_bo_destroy,
> radeon_bo_can_reclaim);
>
> if (ws->info.has_virtual_memory) {
> /* There is no fundamental obstacle to using slab buffer allocation
> * without GPUVM, but enabling it requires making sure that the drivers
> * honor the address offset.
> */
> if (!pb_slabs_init(&ws->bo_slabs,
>
More information about the mesa-dev
mailing list