[Mesa-dev] [PATCH 7/7] winsys/radeon: implement and enable 32-bit VM allocations
Marek Olšák
maraeo at gmail.com
Fri Feb 2 20:48:53 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 42 +++++++++++++++++++----
src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 28 ++++++++++++++-
src/gallium/winsys/radeon/drm/radeon_drm_winsys.h | 2 ++
3 files changed, 64 insertions(+), 8 deletions(-)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index bbfe5cc..06842a4 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -242,32 +242,54 @@ static uint64_t radeon_bomgr_find_va(const struct radeon_info *info,
if ((hole->size - waste) == size) {
hole->size = waste;
mtx_unlock(&heap->mutex);
return offset;
}
}
offset = heap->start;
waste = offset % alignment;
waste = waste ? alignment - waste : 0;
+
+ if (offset + waste + size > heap->end) {
+ mtx_unlock(&heap->mutex);
+ return 0;
+ }
+
if (waste) {
n = CALLOC_STRUCT(radeon_bo_va_hole);
n->size = waste;
n->offset = offset;
list_add(&n->list, &heap->holes);
}
offset += waste;
heap->start += size + waste;
mtx_unlock(&heap->mutex);
return offset;
}
+static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws,
+ uint64_t size, uint64_t alignment)
+{
+ uint64_t va = 0;
+
+ /* Try to allocate from the 64-bit address space first.
+ * If it doesn't exist (start = 0) or if it doesn't have enough space,
+ * fall back to the 32-bit address space.
+ */
+ if (ws->vm64.start)
+ va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment);
+ if (!va)
+ va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment);
+ return va;
+}
+
static void radeon_bomgr_free_va(const struct radeon_info *info,
struct radeon_vm_heap *heap,
uint64_t va, uint64_t size)
{
struct radeon_bo_va_hole *hole = NULL;
size = align(size, info->gart_page_size);
mtx_lock(&heap->mutex);
if ((va + size) == heap->start) {
@@ -363,21 +385,23 @@ void radeon_bo_destroy(struct pb_buffer *_buf)
if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
sizeof(va)) != 0 &&
va.operation == RADEON_VA_RESULT_ERROR) {
fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
fprintf(stderr, "radeon: size : %"PRIu64" bytes\n", bo->base.size);
fprintf(stderr, "radeon: va : 0x%"PRIx64"\n", bo->va);
}
}
- radeon_bomgr_free_va(&rws->info, &rws->vm64, bo->va, bo->base.size);
+ radeon_bomgr_free_va(&rws->info,
+ bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64,
+ bo->va, bo->base.size);
}
/* Close object. */
args.handle = bo->handle;
drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
mtx_destroy(&bo->u.real.map_mutex);
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
@@ -653,22 +677,28 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
if (heap >= 0) {
pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
heap);
}
if (rws->info.has_virtual_memory) {
struct drm_radeon_gem_va va;
unsigned va_gap_size;
va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
- bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm64,
- size + va_gap_size, alignment);
+
+ if (flags & RADEON_FLAG_32BIT) {
+ bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32,
+ size + va_gap_size, alignment);
+ assert(bo->va + size < rws->vm32.end);
+ } else {
+ bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment);
+ }
va.handle = bo->handle;
va.vm_id = 0;
va.operation = RADEON_VA_MAP;
va.flags = RADEON_VM_PAGE_READABLE |
RADEON_VM_PAGE_WRITEABLE |
RADEON_VM_PAGE_SNOOPED;
va.offset = bo->va;
r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
if (r && va.operation == RADEON_VA_RESULT_ERROR) {
@@ -1055,22 +1085,21 @@ static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
(void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
mtx_unlock(&ws->bo_handles_mutex);
if (ws->info.has_virtual_memory) {
struct drm_radeon_gem_va va;
- bo->va = radeon_bomgr_find_va(&ws->info, &ws->vm64,
- bo->base.size, 1 << 20);
+ bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
va.handle = bo->handle;
va.operation = RADEON_VA_MAP;
va.vm_id = 0;
va.offset = bo->va;
va.flags = RADEON_VM_PAGE_READABLE |
RADEON_VM_PAGE_WRITEABLE |
RADEON_VM_PAGE_SNOOPED;
va.offset = bo->va;
r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
@@ -1199,22 +1228,21 @@ done:
mtx_unlock(&ws->bo_handles_mutex);
if (stride)
*stride = whandle->stride;
if (offset)
*offset = whandle->offset;
if (ws->info.has_virtual_memory && !bo->va) {
struct drm_radeon_gem_va va;
- bo->va = radeon_bomgr_find_va(&ws->info, &ws->vm64,
- bo->base.size, 1 << 20);
+ bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
va.handle = bo->handle;
va.operation = RADEON_VA_MAP;
va.vm_id = 0;
va.offset = bo->va;
va.flags = RADEON_VM_PAGE_READABLE |
RADEON_VM_PAGE_WRITEABLE |
RADEON_VM_PAGE_SNOOPED;
va.offset = bo->va;
r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 4fe36dc..85a186a 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -356,20 +356,22 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
if (ws->info.drm_minor < 49)
ws->info.vram_vis_size = MIN2(ws->info.vram_vis_size, 256*1024*1024);
/* Radeon allocates all buffers as contigous, which makes large allocations
* unlikely to succeed. */
ws->info.max_alloc_size = MAX2(ws->info.vram_size, ws->info.gart_size) * 0.7;
if (ws->info.has_dedicated_vram)
ws->info.max_alloc_size = MIN2(ws->info.vram_size * 0.7, ws->info.max_alloc_size);
if (ws->info.drm_minor < 40)
ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 256*1024*1024);
+ /* Both 32-bit and 64-bit address spaces only have 4GB. */
+ ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 3ull*1024*1024*1024);
/* Get max clock frequency info and convert it to MHz */
radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL,
&ws->info.max_shader_clock);
ws->info.max_shader_clock /= 1000;
ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
/* Generation-specific queries. */
if (ws->gen == DRV_R300) {
@@ -546,20 +548,21 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
pb_cache_deinit(&ws->bo_cache);
if (ws->gen >= DRV_R600) {
radeon_surface_manager_free(ws->surf_man);
}
util_hash_table_destroy(ws->bo_names);
util_hash_table_destroy(ws->bo_handles);
util_hash_table_destroy(ws->bo_vas);
mtx_destroy(&ws->bo_handles_mutex);
+ mtx_destroy(&ws->vm32.mutex);
mtx_destroy(&ws->vm64.mutex);
mtx_destroy(&ws->bo_fence_lock);
if (ws->fd >= 0)
close(ws->fd);
FREE(rws);
}
static void radeon_query_info(struct radeon_winsys *rws,
@@ -809,25 +812,48 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
radeon_drm_cs_init_functions(ws);
radeon_surface_init_functions(ws);
(void) mtx_init(&ws->hyperz_owner_mutex, mtx_plain);
(void) mtx_init(&ws->cmask_owner_mutex, mtx_plain);
ws->bo_names = util_hash_table_create(handle_hash, handle_compare);
ws->bo_handles = util_hash_table_create(handle_hash, handle_compare);
ws->bo_vas = util_hash_table_create(handle_hash, handle_compare);
(void) mtx_init(&ws->bo_handles_mutex, mtx_plain);
+ (void) mtx_init(&ws->vm32.mutex, mtx_plain);
(void) mtx_init(&ws->vm64.mutex, mtx_plain);
(void) mtx_init(&ws->bo_fence_lock, mtx_plain);
+ list_inithead(&ws->vm32.holes);
list_inithead(&ws->vm64.holes);
- ws->vm64.start = ws->va_start;
+ /* The kernel currently returns 8MB. Make sure this doesn't change. */
+ if (ws->va_start > 8 * 1024 * 1024) {
+ /* Not enough 32-bit address space. */
+ radeon_winsys_destroy(&ws->base);
+ mtx_unlock(&fd_tab_mutex);
+ return NULL;
+ }
+
+ ws->vm32.start = ws->va_start;
+ ws->vm32.end = 1ull << 32;
+
+ /* The maximum is 8GB of virtual address space limited by the kernel.
+ * It's obviously not enough for bigger cards, like Hawaiis with 4GB
+ * and 8GB of physical memory and 4GB of GART.
+ *
+ * Older kernels set the limit to 4GB, which is even worse, so they only
+ * have 32-bit address space.
+ */
+ if (ws->info.drm_minor >= 41) {
+ ws->vm64.start = 1ull << 32;
+ ws->vm64.end = 1ull << 33;
+ }
/* TTM aligns the BO size to the CPU page size */
ws->info.gart_page_size = sysconf(_SC_PAGESIZE);
if (ws->num_cpus > 1 && debug_get_option_thread())
util_queue_init(&ws->cs_queue, "radeon_cs", 8, 1, 0);
/* Create the screen at the end. The winsys must be initialized
* completely.
*
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
index c65f5cb..03d96ea 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
@@ -41,20 +41,21 @@ enum radeon_generation {
DRV_R600,
DRV_SI
};
#define RADEON_SLAB_MIN_SIZE_LOG2 9
#define RADEON_SLAB_MAX_SIZE_LOG2 14
struct radeon_vm_heap {
mtx_t mutex;
uint64_t start;
+ uint64_t end;
struct list_head holes;
};
struct radeon_drm_winsys {
struct radeon_winsys base;
struct pipe_reference reference;
struct pb_cache bo_cache;
struct pb_slabs bo_slabs;
int fd; /* DRM file descriptor */
@@ -77,20 +78,21 @@ struct radeon_drm_winsys {
/* List of buffer GEM names. Protected by bo_handles_mutex. */
struct util_hash_table *bo_names;
/* List of buffer handles. Protectded by bo_handles_mutex. */
struct util_hash_table *bo_handles;
/* List of buffer virtual memory ranges. Protectded by bo_handles_mutex. */
struct util_hash_table *bo_vas;
mtx_t bo_handles_mutex;
mtx_t bo_fence_lock;
+ struct radeon_vm_heap vm32;
struct radeon_vm_heap vm64;
bool check_vm;
struct radeon_surface_manager *surf_man;
uint32_t num_cpus; /* Number of CPUs. */
struct radeon_drm_cs *hyperz_owner;
mtx_t hyperz_owner_mutex;
--
2.7.4
More information about the mesa-dev
mailing list