[Mesa-dev] [PATCH 05/15] gallium/radeon: set number of pb_cache buckets = number of heaps
Marek Olšák
maraeo at gmail.com
Sat Jan 6 11:12:42 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeon/radeon_winsys.h | 24 --------------------
src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 27 +++++++++--------------
src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 2 +-
src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 27 ++++++++++-------------
src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 2 +-
5 files changed, 25 insertions(+), 57 deletions(-)
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 9f274b4..7914170 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -716,44 +716,20 @@ static inline unsigned radeon_flags_from_heap(enum radeon_heap heap)
RADEON_FLAG_32BIT;
case RADEON_HEAP_VRAM:
case RADEON_HEAP_GTT_WC:
case RADEON_HEAP_GTT:
default:
return flags;
}
}
-/* The pb cache bucket is chosen to minimize pb_cache misses.
- * It must be between 0 and 3 inclusive.
- */
-static inline unsigned radeon_get_pb_cache_bucket_index(enum radeon_heap heap)
-{
- switch (heap) {
- case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
- return 0;
- case RADEON_HEAP_VRAM_READ_ONLY:
- case RADEON_HEAP_VRAM_READ_ONLY_32BIT:
- case RADEON_HEAP_VRAM_32BIT:
- case RADEON_HEAP_VRAM:
- return 1;
- case RADEON_HEAP_GTT_WC:
- case RADEON_HEAP_GTT_WC_READ_ONLY:
- case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
- case RADEON_HEAP_GTT_WC_32BIT:
- return 2;
- case RADEON_HEAP_GTT:
- default:
- return 3;
- }
-}
-
/* Return the heap index for winsys allocators, or -1 on failure. */
static inline int radeon_get_heap_index(enum radeon_bo_domain domain,
enum radeon_bo_flag flags)
{
/* VRAM implies WC (write combining) */
assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC);
/* NO_CPU_ACCESS implies VRAM only. */
assert(!(flags & RADEON_FLAG_NO_CPU_ACCESS) || domain == RADEON_DOMAIN_VRAM);
/* Resources with interprocess sharing don't use any winsys allocators. */
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 92c314e..5d565ff 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -366,43 +366,44 @@ static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
simple_mtx_lock(&ws->global_bo_list_lock);
LIST_ADDTAIL(&bo->u.real.global_list_item, &ws->global_bo_list);
ws->num_buffers++;
simple_mtx_unlock(&ws->global_bo_list_lock);
}
}
static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
uint64_t size,
unsigned alignment,
- unsigned usage,
enum radeon_bo_domain initial_domain,
unsigned flags,
- unsigned pb_cache_bucket)
+ int heap)
{
struct amdgpu_bo_alloc_request request = {0};
amdgpu_bo_handle buf_handle;
uint64_t va = 0;
struct amdgpu_winsys_bo *bo;
amdgpu_va_handle va_handle;
unsigned va_gap_size;
int r;
/* VRAM or GTT must be specified, but not both at the same time. */
assert(util_bitcount(initial_domain & RADEON_DOMAIN_VRAM_GTT) == 1);
bo = CALLOC_STRUCT(amdgpu_winsys_bo);
if (!bo) {
return NULL;
}
- pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,
- pb_cache_bucket);
+ if (heap >= 0) {
+ pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,
+ heap);
+ }
request.alloc_size = size;
request.phys_alignment = alignment;
if (initial_domain & RADEON_DOMAIN_VRAM)
request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
if (initial_domain & RADEON_DOMAIN_GTT)
request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
/* If VRAM is just stolen system memory, allow both VRAM and
* GTT, whichever has free space. If a buffer is evicted from
@@ -446,21 +447,21 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
if (!(flags & RADEON_FLAG_READ_ONLY))
vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;
r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,
AMDGPU_VA_OP_MAP);
if (r)
goto error_va_map;
pipe_reference_init(&bo->base.reference, 1);
bo->base.alignment = alignment;
- bo->base.usage = usage;
+ bo->base.usage = 0;
bo->base.size = size;
bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
bo->ws = ws;
bo->bo = buf_handle;
bo->va = va;
bo->u.real.va_handle = va_handle;
bo->initial_domain = initial_domain;
bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
bo->is_local = !!(request.flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID);
@@ -1155,21 +1156,21 @@ static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
static struct pb_buffer *
amdgpu_bo_create(struct radeon_winsys *rws,
uint64_t size,
unsigned alignment,
enum radeon_bo_domain domain,
enum radeon_bo_flag flags)
{
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
struct amdgpu_winsys_bo *bo;
- unsigned usage = 0, pb_cache_bucket = 0;
+ int heap = -1;
/* VRAM implies WC. This is not optional. */
assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC);
/* NO_CPU_ACCESS is valid with VRAM only. */
assert(domain == RADEON_DOMAIN_VRAM || !(flags & RADEON_FLAG_NO_CPU_ACCESS));
/* Sparse buffers must have NO_CPU_ACCESS set. */
assert(!(flags & RADEON_FLAG_SPARSE) || flags & RADEON_FLAG_NO_CPU_ACCESS);
@@ -1214,43 +1215,37 @@ no_slab:
/* Align size to page size. This is the minimum alignment for normal
* BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
* like constant/uniform buffers, can benefit from better and more reuse.
*/
size = align64(size, ws->info.gart_page_size);
alignment = align(alignment, ws->info.gart_page_size);
bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
if (use_reusable_pool) {
- int heap = radeon_get_heap_index(domain, flags);
+ heap = radeon_get_heap_index(domain, flags);
assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
- usage = 1 << heap; /* Only set one usage bit for each heap. */
-
- pb_cache_bucket = radeon_get_pb_cache_bucket_index(heap);
/* Get a buffer from the cache. */
bo = (struct amdgpu_winsys_bo*)
- pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage,
- pb_cache_bucket);
+ pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, 0, heap);
if (bo)
return &bo->base;
}
/* Create a new one. */
- bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
- pb_cache_bucket);
+ bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap);
if (!bo) {
/* Clear the cache and try again. */
pb_slabs_reclaim(&ws->bo_slabs);
pb_cache_release_all_buffers(&ws->bo_cache);
- bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
- pb_cache_bucket);
+ bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap);
if (!bo)
return NULL;
}
bo->u.real.use_reusable_pool = use_reusable_pool;
return &bo->base;
}
static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
struct winsys_handle *whandle,
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 707ebf9..f4bbd3e 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -278,21 +278,21 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
goto fail;
ws->dev = dev;
ws->info.drm_major = drm_major;
ws->info.drm_minor = drm_minor;
if (!do_winsys_init(ws, fd))
goto fail_alloc;
/* Create managers. */
- pb_cache_init(&ws->bo_cache, 4,
+ pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS,
500000, ws->check_vm ? 1.0f : 2.0f, 0,
(ws->info.vram_size + ws->info.gart_size) / 8,
amdgpu_bo_destroy, amdgpu_bo_can_reclaim);
if (!pb_slabs_init(&ws->bo_slabs,
AMDGPU_SLAB_MIN_SIZE_LOG2, AMDGPU_SLAB_MAX_SIZE_LOG2,
RADEON_MAX_SLAB_HEAPS,
ws,
amdgpu_bo_can_reclaim_slab,
amdgpu_bo_slab_alloc,
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 4be6f40..7aef238 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -582,24 +582,23 @@ static void radeon_bo_unmap(struct pb_buffer *_buf)
mtx_unlock(&bo->u.real.map_mutex);
}
static const struct pb_vtbl radeon_bo_vtbl = {
radeon_bo_destroy_or_cache
/* other functions are never called */
};
static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
unsigned size, unsigned alignment,
- unsigned usage,
unsigned initial_domains,
unsigned flags,
- unsigned pb_cache_bucket)
+ int heap)
{
struct radeon_bo *bo;
struct drm_radeon_gem_create args;
int r;
memset(&args, 0, sizeof(args));
assert(initial_domains);
assert((initial_domains &
~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
@@ -632,31 +631,34 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
}
assert(args.handle != 0);
bo = CALLOC_STRUCT(radeon_bo);
if (!bo)
return NULL;
pipe_reference_init(&bo->base.reference, 1);
bo->base.alignment = alignment;
- bo->base.usage = usage;
+ bo->base.usage = 0;
bo->base.size = size;
bo->base.vtbl = &radeon_bo_vtbl;
bo->rws = rws;
bo->handle = args.handle;
bo->va = 0;
bo->initial_domain = initial_domains;
bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
(void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
- pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
- pb_cache_bucket);
+
+ if (heap >= 0) {
+ pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
+ heap);
+ }
if (rws->info.has_virtual_memory) {
struct drm_radeon_gem_va va;
unsigned va_gap_size;
va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
bo->va = radeon_bomgr_find_va(rws, size + va_gap_size, alignment);
va.handle = bo->handle;
va.vm_id = 0;
@@ -914,21 +916,21 @@ static void radeon_bo_set_metadata(struct pb_buffer *_buf,
static struct pb_buffer *
radeon_winsys_bo_create(struct radeon_winsys *rws,
uint64_t size,
unsigned alignment,
enum radeon_bo_domain domain,
enum radeon_bo_flag flags)
{
struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
struct radeon_bo *bo;
- unsigned usage = 0, pb_cache_bucket = 0;
+ int heap = -1;
assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */
/* Only 32-bit sizes are supported. */
if (size > UINT_MAX)
return NULL;
/* VRAM implies WC. This is not optional. */
if (domain & RADEON_DOMAIN_VRAM)
flags |= RADEON_FLAG_GTT_WC;
@@ -973,41 +975,36 @@ no_slab:
* BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
* like constant/uniform buffers, can benefit from better and more reuse.
*/
size = align(size, ws->info.gart_page_size);
alignment = align(alignment, ws->info.gart_page_size);
bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
/* Shared resources don't use cached heaps. */
if (use_reusable_pool) {
- int heap = radeon_get_heap_index(domain, flags);
+ heap = radeon_get_heap_index(domain, flags);
assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
- usage = 1 << heap; /* Only set one usage bit for each heap. */
-
- pb_cache_bucket = radeon_get_pb_cache_bucket_index(heap);
bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
- usage, pb_cache_bucket));
+ 0, heap));
if (bo)
return &bo->base;
}
- bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
- pb_cache_bucket);
+ bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
if (!bo) {
/* Clear the cache and try again. */
if (ws->info.has_virtual_memory)
pb_slabs_reclaim(&ws->bo_slabs);
pb_cache_release_all_buffers(&ws->bo_cache);
- bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
- pb_cache_bucket);
+ bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
if (!bo)
return NULL;
}
bo->u.real.use_reusable_pool = use_reusable_pool;
mtx_lock(&ws->bo_handles_mutex);
util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
mtx_unlock(&ws->bo_handles_mutex);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index f7d7998..25faa40 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -752,21 +752,21 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
if (!ws) {
mtx_unlock(&fd_tab_mutex);
return NULL;
}
ws->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
if (!do_winsys_init(ws))
goto fail1;
- pb_cache_init(&ws->bo_cache, 4,
+ pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS,
500000, ws->check_vm ? 1.0f : 2.0f, 0,
MIN2(ws->info.vram_size, ws->info.gart_size),
radeon_bo_destroy,
radeon_bo_can_reclaim);
if (ws->info.has_virtual_memory) {
/* There is no fundamental obstacle to using slab buffer allocation
* without GPUVM, but enabling it requires making sure that the drivers
* honor the address offset.
*/
--
2.7.4
More information about the mesa-dev
mailing list