[Mesa-dev] [PATCH] r600g: add support for virtual address space on cayman v8
Jerome Glisse
j.glisse at gmail.com
Sun Jan 8 15:00:53 PST 2012
On Sat, Jan 7, 2012 at 8:08 PM, Marek Olšák <maraeo at gmail.com> wrote:
> On Fri, Jan 6, 2012 at 4:42 PM, <j.glisse at gmail.com> wrote:
>> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
>> index ccf9c4f..8ef0c18 100644
>> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
>> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
>> @@ -30,6 +30,7 @@
>> #include "util/u_hash_table.h"
>> #include "util/u_memory.h"
>> #include "util/u_simple_list.h"
>> +#include "util/u_double_list.h"
>> #include "os/os_thread.h"
>> #include "os/os_mman.h"
>>
>> @@ -67,6 +68,12 @@ static INLINE struct radeon_bo *radeon_bo(struct pb_buffer *bo)
>> return (struct radeon_bo *)bo;
>> }
>>
>> +struct radeon_bo_va_hole {
>> + struct list_head list;
>> + uint64_t offset;
>> + uint64_t size;
>> +};
>> +
>> struct radeon_bomgr {
>> /* Base class. */
>> struct pb_manager base;
>> @@ -77,6 +84,11 @@ struct radeon_bomgr {
>> /* List of buffer handles and its mutex. */
>> struct util_hash_table *bo_handles;
>> pipe_mutex bo_handles_mutex;
>> +
>> + /* is virtual address supported */
>> + bool va;
>> + unsigned va_offset;
>> + struct list_head va_holes;
>> };
>>
>> static INLINE struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr)
>> @@ -151,9 +163,85 @@ static boolean radeon_bo_is_busy(struct pb_buffer *_buf,
>> }
>> }
>>
>> +static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size)
>> +{
>> + struct radeon_bo_va_hole *hole, *n;
>> + uint64_t offset = 0;
>> +
>> + pipe_mutex_lock(mgr->bo_handles_mutex);
>
> radeon_bomgr::bo_handles_mutex should only guard accesses to
> radeon_bomgr::bo_handles. I don't see a reason to reuse it. Could you
> please add another mutex for the va_* stuff?
>
>> + /* first look for a hole */
>> + LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
>> + if (hole->size == size) {
>> + offset = hole->offset;
>> + list_del(&hole->list);
>> + FREE(hole);
>> + pipe_mutex_unlock(mgr->bo_handles_mutex);
>> + return offset;
>> + }
>> + if (hole->size > size) {
>> + offset = hole->offset;
>> + hole->size -= size;
>> + hole->offset += size;
>> + pipe_mutex_unlock(mgr->bo_handles_mutex);
>> + return offset;
>> + }
>> + }
>> +
>> + offset = mgr->va_offset;
>> + mgr->va_offset += size;
>> + pipe_mutex_unlock(mgr->bo_handles_mutex);
>> + return offset;
>> +}
>> +
>> +static void radeon_bomgr_force_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size)
>> +{
>> + pipe_mutex_lock(mgr->bo_handles_mutex);
>> + if (va >= mgr->va_offset) {
>> + mgr->va_offset = va + size;
>> + } else {
>> + struct radeon_bo_va_hole *hole, *n;
>> + uint64_t stmp, etmp;
>> +
>> + /* free all hole that fall into the range
>> + * NOTE that we might loose virtual address space
>> + */
>> + LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
>> + stmp = hole->offset;
>> + etmp = stmp + hole->size;
>> + if (va >= stmp && va < etmp) {
>> + list_del(&hole->list);
>> + FREE(hole);
>> + }
>> + }
>> + }
>> + pipe_mutex_unlock(mgr->bo_handles_mutex);
>> +}
>> +
>> +static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size)
>> +{
>> + pipe_mutex_lock(mgr->bo_handles_mutex);
>> + if ((va + size) == mgr->va_offset) {
>> + mgr->va_offset = va;
>> + } else {
>> + struct radeon_bo_va_hole *hole;
>> +
>> + /* FIXME on allocation failure we just loose virtual address space
>> + * maybe print a warning
>> + */
>> + hole = CALLOC_STRUCT(radeon_bo_va_hole);
>> + if (hole) {
>> + hole->size = size;
>> + hole->offset = va;
>> + list_add(&hole->list, &mgr->va_holes);
>> + }
>> + }
>> + pipe_mutex_unlock(mgr->bo_handles_mutex);
>> +}
>> +
>> static void radeon_bo_destroy(struct pb_buffer *_buf)
>> {
>> struct radeon_bo *bo = radeon_bo(_buf);
>> + struct radeon_bomgr *mgr = bo->mgr;
>> struct drm_gem_close args;
>>
>> memset(&args, 0, sizeof(args));
>> @@ -168,6 +256,10 @@ static void radeon_bo_destroy(struct pb_buffer *_buf)
>> if (bo->ptr)
>> os_munmap(bo->ptr, bo->base.size);
>>
>> + if (mgr->va) {
>> + radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
>> + }
>> +
>> /* Close object. */
>> args.handle = bo->handle;
>> drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
>> @@ -343,6 +435,7 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
>> struct radeon_bo *bo;
>> struct drm_radeon_gem_create args;
>> struct radeon_bo_desc *rdesc = (struct radeon_bo_desc*)desc;
>> + int r;
>>
>> memset(&args, 0, sizeof(args));
>>
>> @@ -378,8 +471,38 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
>> bo->rws = mgr->rws;
>> bo->handle = args.handle;
>> bo->reloc_domains = rdesc->reloc_domains;
>> + bo->va = 0;
>> pipe_mutex_init(bo->map_mutex);
>>
>> + if (mgr->va) {
>> + struct drm_radeon_gem_va va;
>> +
>> + bo->va_size = ((size + 4095) & ~4095);
>
> Please use the "align" function from u_math.h.
>
>> + bo->va = radeon_bomgr_find_va(mgr, bo->va_size);
>> +
>> + va.handle = bo->handle;
>> + va.vm_id = 0;
>> + va.operation = RADEON_VA_MAP;
>> + va.flags = RADEON_VM_PAGE_READABLE |
>> + RADEON_VM_PAGE_WRITEABLE |
>> + RADEON_VM_PAGE_SNOOPED;
>> + va.offset = bo->va;
>> + r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
>> + if (r && va.operation == RADEON_VA_RESULT_ERROR) {
>> + fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
>> + fprintf(stderr, "radeon: size : %d bytes\n", size);
>> + fprintf(stderr, "radeon: alignment : %d bytes\n", desc->alignment);
>> + fprintf(stderr, "radeon: domains : %d\n", args.initial_domain);
>> + radeon_bo_destroy(&bo->base);
>> + return NULL;
>> + }
>> + if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
>> + radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
>> + bo->va = va.offset;
>> + radeon_bomgr_force_va(mgr, bo->va, bo->va_size);
>> + }
>> + }
>> +
>> return &bo->base;
>> }
>>
>> @@ -441,6 +564,11 @@ struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws)
>> mgr->rws = rws;
>> mgr->bo_handles = util_hash_table_create(handle_hash, handle_compare);
>> pipe_mutex_init(mgr->bo_handles_mutex);
>> +
>> + mgr->va = rws->info.r600_va;
>> + mgr->va_offset = rws->info.r600_va_start;
>> + list_inithead(&mgr->va_holes);
>> +
>> return &mgr->base;
>> }
>>
>> @@ -584,6 +712,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
>> struct radeon_bo *bo;
>> struct radeon_bomgr *mgr = radeon_bomgr(ws->kman);
>> struct drm_gem_open open_arg = {};
>> + int r;
>>
>> memset(&open_arg, 0, sizeof(open_arg));
>>
>> @@ -628,6 +757,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
>> bo->base.vtbl = &radeon_bo_vtbl;
>> bo->mgr = mgr;
>> bo->rws = mgr->rws;
>> + bo->va = 0;
>> pipe_mutex_init(bo->map_mutex);
>>
>> util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)whandle->handle, bo);
>> @@ -638,6 +768,33 @@ done:
>> if (stride)
>> *stride = whandle->stride;
>>
>> + if (mgr->va) {
>> + struct drm_radeon_gem_va va;
>> +
>> + bo->va_size = ((bo->base.size + 4095) & ~4095);
>> + bo->va = radeon_bomgr_find_va(mgr, bo->va_size);
>> +
>> + va.handle = bo->handle;
>> + va.operation = RADEON_VA_MAP;
>> + va.vm_id = 0;
>> + va.offset = bo->va;
>> + va.flags = RADEON_VM_PAGE_READABLE |
>> + RADEON_VM_PAGE_WRITEABLE |
>> + RADEON_VM_PAGE_SNOOPED;
>> + va.offset = bo->va;
>> + r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
>> + if (r && va.operation == RADEON_VA_RESULT_ERROR) {
>> + fprintf(stderr, "radeon: Failed to open a buffer:\n");
>> + radeon_bo_destroy(&bo->base);
>> + return NULL;
>> + }
>> + if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
>> + radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
>> + bo->va = va.offset;
>> + radeon_bomgr_force_va(mgr, bo->va, bo->va_size);
>> + }
>> + }
>> +
>> return (struct pb_buffer*)bo;
>>
>> fail:
>> @@ -674,6 +831,13 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
>> return TRUE;
>> }
>>
>> +static uint64_t radeon_winsys_bo_va(struct pb_buffer *buffer)
>> +{
>> + struct radeon_bo *bo = get_radeon_bo(buffer);
>> +
>> + return bo->va;
>> +}
>> +
>> void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
>> {
>> ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle;
>> @@ -686,4 +850,5 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
>> ws->base.buffer_create = radeon_winsys_bo_create;
>> ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
>> ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
>> + ws->base.buffer_va = radeon_winsys_bo_va;
>> }
>> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
>> index ba71cfb..0fc00ae 100644
>> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
>> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
>> @@ -61,6 +61,8 @@ struct radeon_bo {
>> uint32_t reloc_domains;
>> uint32_t handle;
>> uint32_t name;
>> + uint64_t va;
>> + uint64_t va_size;
>>
>> /* how many command streams is this bo referenced in? */
>> int num_cs_references;
>> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
>> index 8d5a6b3..a745937 100644
>> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
>> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
>> @@ -73,9 +73,10 @@
>>
>> #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
>>
>> -static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd)
>> +static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
>> + struct radeon_drm_winsys *ws)
>> {
>> - csc->fd = fd;
>> + csc->fd = ws->fd;
>> csc->nrelocs = 512;
>> csc->relocs_bo = (struct radeon_bo**)
>> CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
>> @@ -90,17 +91,34 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd)
>> return FALSE;
>> }
>>
>> + csc->cs_flags = (uint32_t*)
>> + CALLOC(1, 3 * sizeof(uint32_t));
>
> I don't see a reason to allocate cs_flags. Wouldn't it be simpler to
> just declare it as an array?
>
> Also please rebase. winsys/radeon already uses RADEON_CHUNK_ID_FLAGS
> and it's incompatible with the way you use it. When I added that
> chunk, I made it such that it had only one uint32_t and new flags had
> to be or'd. It's already in kernel 3.2.
>
>> + if (!csc->cs_flags) {
>> + FREE(csc->relocs_bo);
>> + FREE(csc->relocs);
>> + return FALSE;
>> + }
>> +
>> csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
>> csc->chunks[0].length_dw = 0;
>> csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
>> csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
>> csc->chunks[1].length_dw = 0;
>> csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
>> + csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
>> + csc->chunks[2].length_dw = 3;
>> + csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)csc->cs_flags;
>> + csc->cs_flags[0] = 0;
>> + csc->cs_flags[1] = RADEON_CS_RING_GFX;
>> + csc->cs_flags[2] = 0;
>> + if (ws->info.r600_va)
>> + csc->cs_flags[0] |= RADEON_CS_USE_VM;
>>
>> csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
>> csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
>> + csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
>>
>> - csc->cs.num_chunks = 2;
>> + csc->cs.num_chunks = 3;
>
> This could be 2 for DRM 2.11.0 and older. Please see also Mesa master.
> We change num_chunks in radeon_drm_cs_flush.
>
>> csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
>> return TRUE;
>> }
>> @@ -128,6 +146,7 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
>> radeon_cs_context_cleanup(csc);
>> FREE(csc->relocs_bo);
>> FREE(csc->relocs);
>> + FREE(csc->cs_flags);
>> }
>>
>> DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE)
>> @@ -147,11 +166,11 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
>>
>> cs->ws = ws;
>>
>> - if (!radeon_init_cs_context(&cs->csc1, cs->ws->fd)) {
>> + if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
>> FREE(cs);
>> return NULL;
>> }
>> - if (!radeon_init_cs_context(&cs->csc2, cs->ws->fd)) {
>> + if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
>> radeon_destroy_cs_context(&cs->csc1);
>> FREE(cs);
>> return NULL;
>> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
>> index f316b5e..26e0c76 100644
>> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
>> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
>> @@ -35,8 +35,8 @@ struct radeon_cs_context {
>>
>> int fd;
>> struct drm_radeon_cs cs;
>> - struct drm_radeon_cs_chunk chunks[2];
>> - uint64_t chunk_array[2];
>> + struct drm_radeon_cs_chunk chunks[3];
>> + uint64_t chunk_array[3];
>>
>> /* Relocs. */
>> unsigned nrelocs;
>> @@ -44,6 +44,7 @@ struct radeon_cs_context {
>> unsigned validated_crelocs;
>> struct radeon_bo **relocs_bo;
>> struct drm_radeon_cs_reloc *relocs;
>> + uint32_t *cs_flags;
>>
>> /* 0 = BO not added, 1 = BO added */
>> char is_handle_added[256];
>> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
>> index 442bd2a..8124016 100644
>> --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
>> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
>> @@ -138,9 +138,11 @@ static boolean radeon_get_drm_value(int fd, unsigned request,
>> info.request = request;
>>
>> retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info));
>> - if (retval && errname) {
>> - fprintf(stderr, "radeon: Failed to get %s, error number %d\n",
>> - errname, retval);
>> + if (retval) {
>> + if (errname) {
>> + fprintf(stderr, "radeon: Failed to get %s, error number %d\n",
>> + errname, retval);
>> + }
>> return FALSE;
>> }
>> return TRUE;
>> @@ -263,6 +265,16 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
>> &ws->info.r600_backend_map))
>> ws->info.r600_backend_map_valid = TRUE;
>> }
>> + ws->info.r600_va = FALSE;
>> + if (ws->info.drm_minor >= 12) {
>
> Kernel 3.2 already reports 2.12.0. Did you mean 13?
>
>> + ws->info.r600_va = TRUE;
>> + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
>> + &ws->info.r600_va_start))
>> + ws->info.r600_va = FALSE;
>> + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
>> + &ws->info.r600_ib_vm_max_size))
>> + ws->info.r600_va = FALSE;
>> + }
>> }
>>
>> return TRUE;
>> diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
>> index c4ea655..69c42c2 100644
>> --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
>> +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
>> @@ -96,6 +96,9 @@ struct radeon_info {
>> uint32_t r600_num_tile_pipes;
>> uint32_t r600_backend_map;
>> boolean r600_backend_map_valid;
>> + boolean r600_va;
>
> Could you please rename r600_va to r600_virtual_address_space. Let's
> make the code more approachable to potential new contributors by using
> whole words. Plus, you don't have to document it if you give it the
> right name.
>
>> + uint32_t r600_va_start;
>> + uint32_t r600_ib_vm_max_size;
>> };
>>
>> enum radeon_feature_id {
>> @@ -242,6 +245,14 @@ struct radeon_winsys {
>> unsigned stride,
>> struct winsys_handle *whandle);
>>
>> + /**
>> + * Return the virtual address of a buffer.
>> + *
>> + * \param buf A winsys buffer object
>> + * \return virtual address
>> + */
>> + uint64_t (*buffer_va)(struct pb_buffer *buf);
>
> Please rename this function to "buffer_get_virtual_address".
>
> Marek
I should have mention that this patch is several month old and at that
time the kernel interface was much into flux. I haven't had time to
respin against lastest mesa and lastest kernel, will do next week once
i sorted out more pressing issues.
Thanks for the review.
Cheers,
Jerome
More information about the mesa-dev
mailing list