[Mesa-dev] [PATCH] r600g: add support for virtual address space on cayman v8
Marek Olšák
maraeo at gmail.com
Sat Jan 7 17:08:54 PST 2012
On Fri, Jan 6, 2012 at 4:42 PM, <j.glisse at gmail.com> wrote:
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> index ccf9c4f..8ef0c18 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> @@ -30,6 +30,7 @@
> #include "util/u_hash_table.h"
> #include "util/u_memory.h"
> #include "util/u_simple_list.h"
> +#include "util/u_double_list.h"
> #include "os/os_thread.h"
> #include "os/os_mman.h"
>
> @@ -67,6 +68,12 @@ static INLINE struct radeon_bo *radeon_bo(struct pb_buffer *bo)
> return (struct radeon_bo *)bo;
> }
>
> +struct radeon_bo_va_hole {
> + struct list_head list;
> + uint64_t offset;
> + uint64_t size;
> +};
> +
> struct radeon_bomgr {
> /* Base class. */
> struct pb_manager base;
> @@ -77,6 +84,11 @@ struct radeon_bomgr {
> /* List of buffer handles and its mutex. */
> struct util_hash_table *bo_handles;
> pipe_mutex bo_handles_mutex;
> +
> + /* is virtual address supported */
> + bool va;
> + unsigned va_offset;
> + struct list_head va_holes;
> };
>
> static INLINE struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr)
> @@ -151,9 +163,85 @@ static boolean radeon_bo_is_busy(struct pb_buffer *_buf,
> }
> }
>
> +static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size)
> +{
> + struct radeon_bo_va_hole *hole, *n;
> + uint64_t offset = 0;
> +
> + pipe_mutex_lock(mgr->bo_handles_mutex);
radeon_bomgr::bo_handles_mutex should only guard accesses to
radeon_bomgr::bo_handles. I don't see a reason to reuse it. Could you
please add another mutex for the va_* stuff?
> + /* first look for a hole */
> + LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
> + if (hole->size == size) {
> + offset = hole->offset;
> + list_del(&hole->list);
> + FREE(hole);
> + pipe_mutex_unlock(mgr->bo_handles_mutex);
> + return offset;
> + }
> + if (hole->size > size) {
> + offset = hole->offset;
> + hole->size -= size;
> + hole->offset += size;
> + pipe_mutex_unlock(mgr->bo_handles_mutex);
> + return offset;
> + }
> + }
> +
> + offset = mgr->va_offset;
> + mgr->va_offset += size;
> + pipe_mutex_unlock(mgr->bo_handles_mutex);
> + return offset;
> +}
> +
> +static void radeon_bomgr_force_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size)
> +{
> + pipe_mutex_lock(mgr->bo_handles_mutex);
> + if (va >= mgr->va_offset) {
> + mgr->va_offset = va + size;
> + } else {
> + struct radeon_bo_va_hole *hole, *n;
> + uint64_t stmp, etmp;
> +
> + /* free all hole that fall into the range
> + * NOTE that we might loose virtual address space
> + */
> + LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
> + stmp = hole->offset;
> + etmp = stmp + hole->size;
> + if (va >= stmp && va < etmp) {
> + list_del(&hole->list);
> + FREE(hole);
> + }
> + }
> + }
> + pipe_mutex_unlock(mgr->bo_handles_mutex);
> +}
> +
> +static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size)
> +{
> + pipe_mutex_lock(mgr->bo_handles_mutex);
> + if ((va + size) == mgr->va_offset) {
> + mgr->va_offset = va;
> + } else {
> + struct radeon_bo_va_hole *hole;
> +
> + /* FIXME on allocation failure we just loose virtual address space
> + * maybe print a warning
> + */
> + hole = CALLOC_STRUCT(radeon_bo_va_hole);
> + if (hole) {
> + hole->size = size;
> + hole->offset = va;
> + list_add(&hole->list, &mgr->va_holes);
> + }
> + }
> + pipe_mutex_unlock(mgr->bo_handles_mutex);
> +}
> +
> static void radeon_bo_destroy(struct pb_buffer *_buf)
> {
> struct radeon_bo *bo = radeon_bo(_buf);
> + struct radeon_bomgr *mgr = bo->mgr;
> struct drm_gem_close args;
>
> memset(&args, 0, sizeof(args));
> @@ -168,6 +256,10 @@ static void radeon_bo_destroy(struct pb_buffer *_buf)
> if (bo->ptr)
> os_munmap(bo->ptr, bo->base.size);
>
> + if (mgr->va) {
> + radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
> + }
> +
> /* Close object. */
> args.handle = bo->handle;
> drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
> @@ -343,6 +435,7 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
> struct radeon_bo *bo;
> struct drm_radeon_gem_create args;
> struct radeon_bo_desc *rdesc = (struct radeon_bo_desc*)desc;
> + int r;
>
> memset(&args, 0, sizeof(args));
>
> @@ -378,8 +471,38 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
> bo->rws = mgr->rws;
> bo->handle = args.handle;
> bo->reloc_domains = rdesc->reloc_domains;
> + bo->va = 0;
> pipe_mutex_init(bo->map_mutex);
>
> + if (mgr->va) {
> + struct drm_radeon_gem_va va;
> +
> + bo->va_size = ((size + 4095) & ~4095);
Please use the "align" function from u_math.h.
> + bo->va = radeon_bomgr_find_va(mgr, bo->va_size);
> +
> + va.handle = bo->handle;
> + va.vm_id = 0;
> + va.operation = RADEON_VA_MAP;
> + va.flags = RADEON_VM_PAGE_READABLE |
> + RADEON_VM_PAGE_WRITEABLE |
> + RADEON_VM_PAGE_SNOOPED;
> + va.offset = bo->va;
> + r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
> + if (r && va.operation == RADEON_VA_RESULT_ERROR) {
> + fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
> + fprintf(stderr, "radeon: size : %d bytes\n", size);
> + fprintf(stderr, "radeon: alignment : %d bytes\n", desc->alignment);
> + fprintf(stderr, "radeon: domains : %d\n", args.initial_domain);
> + radeon_bo_destroy(&bo->base);
> + return NULL;
> + }
> + if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
> + radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
> + bo->va = va.offset;
> + radeon_bomgr_force_va(mgr, bo->va, bo->va_size);
> + }
> + }
> +
> return &bo->base;
> }
>
> @@ -441,6 +564,11 @@ struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws)
> mgr->rws = rws;
> mgr->bo_handles = util_hash_table_create(handle_hash, handle_compare);
> pipe_mutex_init(mgr->bo_handles_mutex);
> +
> + mgr->va = rws->info.r600_va;
> + mgr->va_offset = rws->info.r600_va_start;
> + list_inithead(&mgr->va_holes);
> +
> return &mgr->base;
> }
>
> @@ -584,6 +712,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
> struct radeon_bo *bo;
> struct radeon_bomgr *mgr = radeon_bomgr(ws->kman);
> struct drm_gem_open open_arg = {};
> + int r;
>
> memset(&open_arg, 0, sizeof(open_arg));
>
> @@ -628,6 +757,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
> bo->base.vtbl = &radeon_bo_vtbl;
> bo->mgr = mgr;
> bo->rws = mgr->rws;
> + bo->va = 0;
> pipe_mutex_init(bo->map_mutex);
>
> util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)whandle->handle, bo);
> @@ -638,6 +768,33 @@ done:
> if (stride)
> *stride = whandle->stride;
>
> + if (mgr->va) {
> + struct drm_radeon_gem_va va;
> +
> + bo->va_size = ((bo->base.size + 4095) & ~4095);
> + bo->va = radeon_bomgr_find_va(mgr, bo->va_size);
> +
> + va.handle = bo->handle;
> + va.operation = RADEON_VA_MAP;
> + va.vm_id = 0;
> + va.offset = bo->va;
> + va.flags = RADEON_VM_PAGE_READABLE |
> + RADEON_VM_PAGE_WRITEABLE |
> + RADEON_VM_PAGE_SNOOPED;
> + va.offset = bo->va;
> + r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
> + if (r && va.operation == RADEON_VA_RESULT_ERROR) {
> + fprintf(stderr, "radeon: Failed to open a buffer:\n");
> + radeon_bo_destroy(&bo->base);
> + return NULL;
> + }
> + if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
> + radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
> + bo->va = va.offset;
> + radeon_bomgr_force_va(mgr, bo->va, bo->va_size);
> + }
> + }
> +
> return (struct pb_buffer*)bo;
>
> fail:
> @@ -674,6 +831,13 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
> return TRUE;
> }
>
> +static uint64_t radeon_winsys_bo_va(struct pb_buffer *buffer)
> +{
> + struct radeon_bo *bo = get_radeon_bo(buffer);
> +
> + return bo->va;
> +}
> +
> void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
> {
> ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle;
> @@ -686,4 +850,5 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
> ws->base.buffer_create = radeon_winsys_bo_create;
> ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
> ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
> + ws->base.buffer_va = radeon_winsys_bo_va;
> }
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
> index ba71cfb..0fc00ae 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
> @@ -61,6 +61,8 @@ struct radeon_bo {
> uint32_t reloc_domains;
> uint32_t handle;
> uint32_t name;
> + uint64_t va;
> + uint64_t va_size;
>
> /* how many command streams is this bo referenced in? */
> int num_cs_references;
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> index 8d5a6b3..a745937 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> @@ -73,9 +73,10 @@
>
> #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
>
> -static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd)
> +static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
> + struct radeon_drm_winsys *ws)
> {
> - csc->fd = fd;
> + csc->fd = ws->fd;
> csc->nrelocs = 512;
> csc->relocs_bo = (struct radeon_bo**)
> CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
> @@ -90,17 +91,34 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd)
> return FALSE;
> }
>
> + csc->cs_flags = (uint32_t*)
> + CALLOC(1, 3 * sizeof(uint32_t));
I don't see a reason to allocate cs_flags. Wouldn't it be simpler to
just declare it as an array?
Also please rebase. winsys/radeon already uses RADEON_CHUNK_ID_FLAGS
and it's incompatible with the way you use it. When I added that
chunk, I made it such that it had only one uint32_t and new flags had
to be or'd. It's already in kernel 3.2.
> + if (!csc->cs_flags) {
> + FREE(csc->relocs_bo);
> + FREE(csc->relocs);
> + return FALSE;
> + }
> +
> csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
> csc->chunks[0].length_dw = 0;
> csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
> csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
> csc->chunks[1].length_dw = 0;
> csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
> + csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
> + csc->chunks[2].length_dw = 3;
> + csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)csc->cs_flags;
> + csc->cs_flags[0] = 0;
> + csc->cs_flags[1] = RADEON_CS_RING_GFX;
> + csc->cs_flags[2] = 0;
> + if (ws->info.r600_va)
> + csc->cs_flags[0] |= RADEON_CS_USE_VM;
>
> csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
> csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
> + csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
>
> - csc->cs.num_chunks = 2;
> + csc->cs.num_chunks = 3;
This could be 2 for DRM 2.11.0 and older. Please see also Mesa master.
We change num_chunks in radeon_drm_cs_flush.
> csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
> return TRUE;
> }
> @@ -128,6 +146,7 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
> radeon_cs_context_cleanup(csc);
> FREE(csc->relocs_bo);
> FREE(csc->relocs);
> + FREE(csc->cs_flags);
> }
>
> DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE)
> @@ -147,11 +166,11 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
>
> cs->ws = ws;
>
> - if (!radeon_init_cs_context(&cs->csc1, cs->ws->fd)) {
> + if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
> FREE(cs);
> return NULL;
> }
> - if (!radeon_init_cs_context(&cs->csc2, cs->ws->fd)) {
> + if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
> radeon_destroy_cs_context(&cs->csc1);
> FREE(cs);
> return NULL;
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
> index f316b5e..26e0c76 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
> @@ -35,8 +35,8 @@ struct radeon_cs_context {
>
> int fd;
> struct drm_radeon_cs cs;
> - struct drm_radeon_cs_chunk chunks[2];
> - uint64_t chunk_array[2];
> + struct drm_radeon_cs_chunk chunks[3];
> + uint64_t chunk_array[3];
>
> /* Relocs. */
> unsigned nrelocs;
> @@ -44,6 +44,7 @@ struct radeon_cs_context {
> unsigned validated_crelocs;
> struct radeon_bo **relocs_bo;
> struct drm_radeon_cs_reloc *relocs;
> + uint32_t *cs_flags;
>
> /* 0 = BO not added, 1 = BO added */
> char is_handle_added[256];
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> index 442bd2a..8124016 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> @@ -138,9 +138,11 @@ static boolean radeon_get_drm_value(int fd, unsigned request,
> info.request = request;
>
> retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info));
> - if (retval && errname) {
> - fprintf(stderr, "radeon: Failed to get %s, error number %d\n",
> - errname, retval);
> + if (retval) {
> + if (errname) {
> + fprintf(stderr, "radeon: Failed to get %s, error number %d\n",
> + errname, retval);
> + }
> return FALSE;
> }
> return TRUE;
> @@ -263,6 +265,16 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
> &ws->info.r600_backend_map))
> ws->info.r600_backend_map_valid = TRUE;
> }
> + ws->info.r600_va = FALSE;
> + if (ws->info.drm_minor >= 12) {
Kernel 3.2 already reports 2.12.0. Did you mean 13?
> + ws->info.r600_va = TRUE;
> + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
> + &ws->info.r600_va_start))
> + ws->info.r600_va = FALSE;
> + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
> + &ws->info.r600_ib_vm_max_size))
> + ws->info.r600_va = FALSE;
> + }
> }
>
> return TRUE;
> diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
> index c4ea655..69c42c2 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
> +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
> @@ -96,6 +96,9 @@ struct radeon_info {
> uint32_t r600_num_tile_pipes;
> uint32_t r600_backend_map;
> boolean r600_backend_map_valid;
> + boolean r600_va;
Could you please rename r600_va to r600_virtual_address_space. Let's
make the code more approachable to potential new contributors by using
whole words. Plus, you don't have to document it if you give it the
right name.
> + uint32_t r600_va_start;
> + uint32_t r600_ib_vm_max_size;
> };
>
> enum radeon_feature_id {
> @@ -242,6 +245,14 @@ struct radeon_winsys {
> unsigned stride,
> struct winsys_handle *whandle);
>
> + /**
> + * Return the virtual address of a buffer.
> + *
> + * \param buf A winsys buffer object
> + * \return virtual address
> + */
> + uint64_t (*buffer_va)(struct pb_buffer *buf);
Please rename this function to "buffer_get_virtual_address".
Marek
More information about the mesa-dev
mailing list