[Mesa-dev] [PATCH 08/15] winsys/radeon: implement and enable 32-bit VM allocations
Samuel Pitoiset
samuel.pitoiset at gmail.com
Mon Jan 8 21:12:46 UTC 2018
Patches 5-8 are:
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
On 01/06/2018 12:12 PM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
> src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 42 +++++++++++++++++++----
> src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 28 ++++++++++++++-
> src/gallium/winsys/radeon/drm/radeon_drm_winsys.h | 2 ++
> 3 files changed, 64 insertions(+), 8 deletions(-)
>
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> index bbfe5cc..06842a4 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> @@ -242,32 +242,54 @@ static uint64_t radeon_bomgr_find_va(const struct radeon_info *info,
> if ((hole->size - waste) == size) {
> hole->size = waste;
> mtx_unlock(&heap->mutex);
> return offset;
> }
> }
>
> offset = heap->start;
> waste = offset % alignment;
> waste = waste ? alignment - waste : 0;
> +
> + if (offset + waste + size > heap->end) {
> + mtx_unlock(&heap->mutex);
> + return 0;
> + }
> +
> if (waste) {
> n = CALLOC_STRUCT(radeon_bo_va_hole);
> n->size = waste;
> n->offset = offset;
> list_add(&n->list, &heap->holes);
> }
> offset += waste;
> heap->start += size + waste;
> mtx_unlock(&heap->mutex);
> return offset;
> }
>
> +static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws,
> + uint64_t size, uint64_t alignment)
> +{
> + uint64_t va = 0;
> +
> + /* Try to allocate from the 64-bit address space first.
> + * If it doesn't exist (start = 0) or if it doesn't have enough space,
> + * fall back to the 32-bit address space.
> + */
> + if (ws->vm64.start)
> + va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment);
> + if (!va)
> + va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment);
> + return va;
> +}
> +
> static void radeon_bomgr_free_va(const struct radeon_info *info,
> struct radeon_vm_heap *heap,
> uint64_t va, uint64_t size)
> {
> struct radeon_bo_va_hole *hole = NULL;
>
> size = align(size, info->gart_page_size);
>
> mtx_lock(&heap->mutex);
> if ((va + size) == heap->start) {
> @@ -363,21 +385,23 @@ void radeon_bo_destroy(struct pb_buffer *_buf)
>
> if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
> sizeof(va)) != 0 &&
> va.operation == RADEON_VA_RESULT_ERROR) {
> fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
> fprintf(stderr, "radeon: size : %"PRIu64" bytes\n", bo->base.size);
> fprintf(stderr, "radeon: va : 0x%"PRIx64"\n", bo->va);
> }
> }
>
> - radeon_bomgr_free_va(&rws->info, &rws->vm64, bo->va, bo->base.size);
> + radeon_bomgr_free_va(&rws->info,
> + bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64,
> + bo->va, bo->base.size);
> }
>
> /* Close object. */
> args.handle = bo->handle;
> drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
>
> mtx_destroy(&bo->u.real.map_mutex);
>
> if (bo->initial_domain & RADEON_DOMAIN_VRAM)
> rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
> @@ -653,22 +677,28 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
> if (heap >= 0) {
> pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
> heap);
> }
>
> if (rws->info.has_virtual_memory) {
> struct drm_radeon_gem_va va;
> unsigned va_gap_size;
>
> va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
> - bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm64,
> - size + va_gap_size, alignment);
> +
> + if (flags & RADEON_FLAG_32BIT) {
> + bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32,
> + size + va_gap_size, alignment);
> + assert(bo->va + size < rws->vm32.end);
> + } else {
> + bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment);
> + }
>
> va.handle = bo->handle;
> va.vm_id = 0;
> va.operation = RADEON_VA_MAP;
> va.flags = RADEON_VM_PAGE_READABLE |
> RADEON_VM_PAGE_WRITEABLE |
> RADEON_VM_PAGE_SNOOPED;
> va.offset = bo->va;
> r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
> if (r && va.operation == RADEON_VA_RESULT_ERROR) {
> @@ -1055,22 +1085,21 @@ static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
> bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
> (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
>
> util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
>
> mtx_unlock(&ws->bo_handles_mutex);
>
> if (ws->info.has_virtual_memory) {
> struct drm_radeon_gem_va va;
>
> - bo->va = radeon_bomgr_find_va(&ws->info, &ws->vm64,
> - bo->base.size, 1 << 20);
> + bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
>
> va.handle = bo->handle;
> va.operation = RADEON_VA_MAP;
> va.vm_id = 0;
> va.offset = bo->va;
> va.flags = RADEON_VM_PAGE_READABLE |
> RADEON_VM_PAGE_WRITEABLE |
> RADEON_VM_PAGE_SNOOPED;
> va.offset = bo->va;
> r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
> @@ -1199,22 +1228,21 @@ done:
> mtx_unlock(&ws->bo_handles_mutex);
>
> if (stride)
> *stride = whandle->stride;
> if (offset)
> *offset = whandle->offset;
>
> if (ws->info.has_virtual_memory && !bo->va) {
> struct drm_radeon_gem_va va;
>
> - bo->va = radeon_bomgr_find_va(&ws->info, &ws->vm64,
> - bo->base.size, 1 << 20);
> + bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
>
> va.handle = bo->handle;
> va.operation = RADEON_VA_MAP;
> va.vm_id = 0;
> va.offset = bo->va;
> va.flags = RADEON_VM_PAGE_READABLE |
> RADEON_VM_PAGE_WRITEABLE |
> RADEON_VM_PAGE_SNOOPED;
> va.offset = bo->va;
> r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> index 0c1085b..1a8500b 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> @@ -356,20 +356,22 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
> if (ws->info.drm_minor < 49)
> ws->info.vram_vis_size = MIN2(ws->info.vram_vis_size, 256*1024*1024);
>
> /* Radeon allocates all buffers as contigous, which makes large allocations
> * unlikely to succeed. */
> ws->info.max_alloc_size = MAX2(ws->info.vram_size, ws->info.gart_size) * 0.7;
> if (ws->info.has_dedicated_vram)
> ws->info.max_alloc_size = MIN2(ws->info.vram_size * 0.7, ws->info.max_alloc_size);
> if (ws->info.drm_minor < 40)
> ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 256*1024*1024);
> + /* Both 32-bit and 64-bit address spaces only have 4GB. */
> + ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 3ull*1024*1024*1024);
>
> /* Get max clock frequency info and convert it to MHz */
> radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL,
> &ws->info.max_shader_clock);
> ws->info.max_shader_clock /= 1000;
>
> /* Default value. */
> ws->info.enabled_rb_mask = u_bit_consecutive(0, ws->info.num_render_backends);
> /* This fails on non-GCN or older kernels: */
> radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL,
> @@ -542,20 +544,21 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
> pb_cache_deinit(&ws->bo_cache);
>
> if (ws->gen >= DRV_R600) {
> radeon_surface_manager_free(ws->surf_man);
> }
>
> util_hash_table_destroy(ws->bo_names);
> util_hash_table_destroy(ws->bo_handles);
> util_hash_table_destroy(ws->bo_vas);
> mtx_destroy(&ws->bo_handles_mutex);
> + mtx_destroy(&ws->vm32.mutex);
> mtx_destroy(&ws->vm64.mutex);
> mtx_destroy(&ws->bo_fence_lock);
>
> if (ws->fd >= 0)
> close(ws->fd);
>
> FREE(rws);
> }
>
> static void radeon_query_info(struct radeon_winsys *rws,
> @@ -805,25 +808,48 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
> radeon_drm_cs_init_functions(ws);
> radeon_surface_init_functions(ws);
>
> (void) mtx_init(&ws->hyperz_owner_mutex, mtx_plain);
> (void) mtx_init(&ws->cmask_owner_mutex, mtx_plain);
>
> ws->bo_names = util_hash_table_create(handle_hash, handle_compare);
> ws->bo_handles = util_hash_table_create(handle_hash, handle_compare);
> ws->bo_vas = util_hash_table_create(handle_hash, handle_compare);
> (void) mtx_init(&ws->bo_handles_mutex, mtx_plain);
> + (void) mtx_init(&ws->vm32.mutex, mtx_plain);
> (void) mtx_init(&ws->vm64.mutex, mtx_plain);
> (void) mtx_init(&ws->bo_fence_lock, mtx_plain);
> + list_inithead(&ws->vm32.holes);
> list_inithead(&ws->vm64.holes);
>
> - ws->vm64.start = ws->va_start;
> + /* The kernel currently returns 8MB. Make sure this doesn't change. */
> + if (ws->va_start > 8 * 1024 * 1024) {
> + /* Not enough 32-bit address space. */
> + radeon_winsys_destroy(&ws->base);
> + mtx_unlock(&fd_tab_mutex);
> + return NULL;
> + }
> +
> + ws->vm32.start = ws->va_start;
> + ws->vm32.end = 1ull << 32;
> +
> + /* The maximum is 8GB of virtual address space limited by the kernel.
> + * It's obviously not enough for bigger cards, like Hawaiis with 4GB
> + * and 8GB of physical memory and 4GB of GART.
> + *
> + * Older kernels set the limit to 4GB, which is even worse, so they only
> + * have 32-bit address space.
> + */
> + if (ws->info.drm_minor >= 41) {
> + ws->vm64.start = 1ull << 32;
> + ws->vm64.end = 1ull << 33;
> + }
>
> /* TTM aligns the BO size to the CPU page size */
> ws->info.gart_page_size = sysconf(_SC_PAGESIZE);
>
> if (ws->num_cpus > 1 && debug_get_option_thread())
> util_queue_init(&ws->cs_queue, "radeon_cs", 8, 1, 0);
>
> /* Create the screen at the end. The winsys must be initialized
> * completely.
> *
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
> index c65f5cb..03d96ea 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
> @@ -41,20 +41,21 @@ enum radeon_generation {
> DRV_R600,
> DRV_SI
> };
>
> #define RADEON_SLAB_MIN_SIZE_LOG2 9
> #define RADEON_SLAB_MAX_SIZE_LOG2 14
>
> struct radeon_vm_heap {
> mtx_t mutex;
> uint64_t start;
> + uint64_t end;
> struct list_head holes;
> };
>
> struct radeon_drm_winsys {
> struct radeon_winsys base;
> struct pipe_reference reference;
> struct pb_cache bo_cache;
> struct pb_slabs bo_slabs;
>
> int fd; /* DRM file descriptor */
> @@ -77,20 +78,21 @@ struct radeon_drm_winsys {
>
> /* List of buffer GEM names. Protected by bo_handles_mutex. */
> struct util_hash_table *bo_names;
> /* List of buffer handles. Protectded by bo_handles_mutex. */
> struct util_hash_table *bo_handles;
> /* List of buffer virtual memory ranges. Protectded by bo_handles_mutex. */
> struct util_hash_table *bo_vas;
> mtx_t bo_handles_mutex;
> mtx_t bo_fence_lock;
>
> + struct radeon_vm_heap vm32;
> struct radeon_vm_heap vm64;
>
> bool check_vm;
>
> struct radeon_surface_manager *surf_man;
>
> uint32_t num_cpus; /* Number of CPUs. */
>
> struct radeon_drm_cs *hyperz_owner;
> mtx_t hyperz_owner_mutex;
>
More information about the mesa-dev
mailing list