[Mesa-dev] [PATCH] r600g: add support for virtual address space on cayman v8

Marek Olšák maraeo at gmail.com
Sat Jan 7 17:08:54 PST 2012


On Fri, Jan 6, 2012 at 4:42 PM,  <j.glisse at gmail.com> wrote:
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> index ccf9c4f..8ef0c18 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> @@ -30,6 +30,7 @@
>  #include "util/u_hash_table.h"
>  #include "util/u_memory.h"
>  #include "util/u_simple_list.h"
> +#include "util/u_double_list.h"
>  #include "os/os_thread.h"
>  #include "os/os_mman.h"
>
> @@ -67,6 +68,12 @@ static INLINE struct radeon_bo *radeon_bo(struct pb_buffer *bo)
>     return (struct radeon_bo *)bo;
>  }
>
> +struct radeon_bo_va_hole {
> +    struct list_head list;
> +    uint64_t         offset;
> +    uint64_t         size;
> +};
> +
>  struct radeon_bomgr {
>     /* Base class. */
>     struct pb_manager base;
> @@ -77,6 +84,11 @@ struct radeon_bomgr {
>     /* List of buffer handles and its mutex. */
>     struct util_hash_table *bo_handles;
>     pipe_mutex bo_handles_mutex;
> +
> +    /* is virtual address supported */
> +    bool va;
> +    unsigned va_offset;
> +    struct list_head va_holes;
>  };
>
>  static INLINE struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr)
> @@ -151,9 +163,85 @@ static boolean radeon_bo_is_busy(struct pb_buffer *_buf,
>     }
>  }
>
> +static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size)
> +{
> +    struct radeon_bo_va_hole *hole, *n;
> +    uint64_t offset = 0;
> +
> +    pipe_mutex_lock(mgr->bo_handles_mutex);

radeon_bomgr::bo_handles_mutex should only guard accesses to
radeon_bomgr::bo_handles. I don't see a reason to reuse it. Could you
please add another mutex for the va_* stuff?

> +    /* first look for a hole */
> +    LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
> +        if (hole->size == size) {
> +            offset = hole->offset;
> +            list_del(&hole->list);
> +            FREE(hole);
> +            pipe_mutex_unlock(mgr->bo_handles_mutex);
> +            return offset;
> +        }
> +        if (hole->size > size) {
> +            offset = hole->offset;
> +            hole->size -= size;
> +            hole->offset += size;
> +            pipe_mutex_unlock(mgr->bo_handles_mutex);
> +            return offset;
> +        }
> +    }
> +
> +    offset = mgr->va_offset;
> +    mgr->va_offset += size;
> +    pipe_mutex_unlock(mgr->bo_handles_mutex);
> +    return offset;
> +}
> +
> +static void radeon_bomgr_force_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size)
> +{
> +    pipe_mutex_lock(mgr->bo_handles_mutex);
> +    if (va >= mgr->va_offset) {
> +        mgr->va_offset = va + size;
> +    } else {
> +        struct radeon_bo_va_hole *hole, *n;
> +        uint64_t stmp, etmp;
> +
> +        /* free all hole that fall into the range
> +         * NOTE that we might loose virtual address space
> +         */
> +        LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
> +            stmp = hole->offset;
> +            etmp = stmp + hole->size;
> +            if (va >= stmp && va < etmp) {
> +                list_del(&hole->list);
> +                FREE(hole);
> +            }
> +        }
> +    }
> +    pipe_mutex_unlock(mgr->bo_handles_mutex);
> +}
> +
> +static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size)
> +{
> +    pipe_mutex_lock(mgr->bo_handles_mutex);
> +    if ((va + size) == mgr->va_offset) {
> +        mgr->va_offset = va;
> +    } else {
> +        struct radeon_bo_va_hole *hole;
> +
> +        /* FIXME on allocation failure we just loose virtual address space
> +         * maybe print a warning
> +         */
> +        hole = CALLOC_STRUCT(radeon_bo_va_hole);
> +        if (hole) {
> +            hole->size = size;
> +            hole->offset = va;
> +            list_add(&hole->list, &mgr->va_holes);
> +        }
> +    }
> +    pipe_mutex_unlock(mgr->bo_handles_mutex);
> +}
> +
>  static void radeon_bo_destroy(struct pb_buffer *_buf)
>  {
>     struct radeon_bo *bo = radeon_bo(_buf);
> +    struct radeon_bomgr *mgr = bo->mgr;
>     struct drm_gem_close args;
>
>     memset(&args, 0, sizeof(args));
> @@ -168,6 +256,10 @@ static void radeon_bo_destroy(struct pb_buffer *_buf)
>     if (bo->ptr)
>         os_munmap(bo->ptr, bo->base.size);
>
> +    if (mgr->va) {
> +        radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
> +    }
> +
>     /* Close object. */
>     args.handle = bo->handle;
>     drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
> @@ -343,6 +435,7 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
>     struct radeon_bo *bo;
>     struct drm_radeon_gem_create args;
>     struct radeon_bo_desc *rdesc = (struct radeon_bo_desc*)desc;
> +    int r;
>
>     memset(&args, 0, sizeof(args));
>
> @@ -378,8 +471,38 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
>     bo->rws = mgr->rws;
>     bo->handle = args.handle;
>     bo->reloc_domains = rdesc->reloc_domains;
> +    bo->va = 0;
>     pipe_mutex_init(bo->map_mutex);
>
> +    if (mgr->va) {
> +        struct drm_radeon_gem_va va;
> +
> +        bo->va_size = ((size + 4095) & ~4095);

Please use the "align" function from u_math.h.

> +        bo->va = radeon_bomgr_find_va(mgr, bo->va_size);
> +
> +        va.handle = bo->handle;
> +        va.vm_id = 0;
> +        va.operation = RADEON_VA_MAP;
> +        va.flags = RADEON_VM_PAGE_READABLE |
> +                   RADEON_VM_PAGE_WRITEABLE |
> +                   RADEON_VM_PAGE_SNOOPED;
> +        va.offset = bo->va;
> +        r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
> +        if (r && va.operation == RADEON_VA_RESULT_ERROR) {
> +            fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
> +            fprintf(stderr, "radeon:    size      : %d bytes\n", size);
> +            fprintf(stderr, "radeon:    alignment : %d bytes\n", desc->alignment);
> +            fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
> +            radeon_bo_destroy(&bo->base);
> +            return NULL;
> +        }
> +        if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
> +            radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
> +            bo->va = va.offset;
> +            radeon_bomgr_force_va(mgr, bo->va, bo->va_size);
> +        }
> +    }
> +
>     return &bo->base;
>  }
>
> @@ -441,6 +564,11 @@ struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws)
>     mgr->rws = rws;
>     mgr->bo_handles = util_hash_table_create(handle_hash, handle_compare);
>     pipe_mutex_init(mgr->bo_handles_mutex);
> +
> +    mgr->va = rws->info.r600_va;
> +    mgr->va_offset = rws->info.r600_va_start;
> +    list_inithead(&mgr->va_holes);
> +
>     return &mgr->base;
>  }
>
> @@ -584,6 +712,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
>     struct radeon_bo *bo;
>     struct radeon_bomgr *mgr = radeon_bomgr(ws->kman);
>     struct drm_gem_open open_arg = {};
> +    int r;
>
>     memset(&open_arg, 0, sizeof(open_arg));
>
> @@ -628,6 +757,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
>     bo->base.vtbl = &radeon_bo_vtbl;
>     bo->mgr = mgr;
>     bo->rws = mgr->rws;
> +    bo->va = 0;
>     pipe_mutex_init(bo->map_mutex);
>
>     util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)whandle->handle, bo);
> @@ -638,6 +768,33 @@ done:
>     if (stride)
>         *stride = whandle->stride;
>
> +    if (mgr->va) {
> +        struct drm_radeon_gem_va va;
> +
> +        bo->va_size = ((bo->base.size + 4095) & ~4095);
> +        bo->va = radeon_bomgr_find_va(mgr, bo->va_size);
> +
> +        va.handle = bo->handle;
> +        va.operation = RADEON_VA_MAP;
> +        va.vm_id = 0;
> +        va.offset = bo->va;
> +        va.flags = RADEON_VM_PAGE_READABLE |
> +                   RADEON_VM_PAGE_WRITEABLE |
> +                   RADEON_VM_PAGE_SNOOPED;
> +        va.offset = bo->va;
> +        r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
> +        if (r && va.operation == RADEON_VA_RESULT_ERROR) {
> +            fprintf(stderr, "radeon: Failed to open a buffer:\n");
> +            radeon_bo_destroy(&bo->base);
> +            return NULL;
> +        }
> +        if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
> +            radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
> +            bo->va = va.offset;
> +            radeon_bomgr_force_va(mgr, bo->va, bo->va_size);
> +        }
> +    }
> +
>     return (struct pb_buffer*)bo;
>
>  fail:
> @@ -674,6 +831,13 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
>     return TRUE;
>  }
>
> +static uint64_t radeon_winsys_bo_va(struct pb_buffer *buffer)
> +{
> +    struct radeon_bo *bo = get_radeon_bo(buffer);
> +
> +    return bo->va;
> +}
> +
>  void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
>  {
>     ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle;
> @@ -686,4 +850,5 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
>     ws->base.buffer_create = radeon_winsys_bo_create;
>     ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
>     ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
> +    ws->base.buffer_va = radeon_winsys_bo_va;
>  }
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
> index ba71cfb..0fc00ae 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
> @@ -61,6 +61,8 @@ struct radeon_bo {
>     uint32_t reloc_domains;
>     uint32_t handle;
>     uint32_t name;
> +    uint64_t va;
> +    uint64_t va_size;
>
>     /* how many command streams is this bo referenced in? */
>     int num_cs_references;
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> index 8d5a6b3..a745937 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> @@ -73,9 +73,10 @@
>
>  #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
>
> -static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd)
> +static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
> +                                      struct radeon_drm_winsys *ws)
>  {
> -    csc->fd = fd;
> +    csc->fd = ws->fd;
>     csc->nrelocs = 512;
>     csc->relocs_bo = (struct radeon_bo**)
>                      CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
> @@ -90,17 +91,34 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd)
>         return FALSE;
>     }
>
> +    csc->cs_flags = (uint32_t*)
> +                  CALLOC(1, 3 * sizeof(uint32_t));

I don't see a reason to allocate cs_flags. Wouldn't it be simpler to
just declare it as an array?

Also please rebase. winsys/radeon already uses RADEON_CHUNK_ID_FLAGS
and it's incompatible with the way you use it. When I added that
chunk, I made it such that it had only one uint32_t and new flags had
to be or'd. It's already in kernel 3.2.

> +    if (!csc->cs_flags) {
> +        FREE(csc->relocs_bo);
> +        FREE(csc->relocs);
> +        return FALSE;
> +    }
> +
>     csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
>     csc->chunks[0].length_dw = 0;
>     csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
>     csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
>     csc->chunks[1].length_dw = 0;
>     csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
> +    csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
> +    csc->chunks[2].length_dw = 3;
> +    csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)csc->cs_flags;
> +    csc->cs_flags[0] = 0;
> +    csc->cs_flags[1] = RADEON_CS_RING_GFX;
> +    csc->cs_flags[2] = 0;
> +    if (ws->info.r600_va)
> +        csc->cs_flags[0] |= RADEON_CS_USE_VM;
>
>     csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
>     csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
> +    csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
>
> -    csc->cs.num_chunks = 2;
> +    csc->cs.num_chunks = 3;

This could be 2 for DRM 2.11.0 and older. Please see also Mesa master.
We change num_chunks in radeon_drm_cs_flush.

>     csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
>     return TRUE;
>  }
> @@ -128,6 +146,7 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
>     radeon_cs_context_cleanup(csc);
>     FREE(csc->relocs_bo);
>     FREE(csc->relocs);
> +    FREE(csc->cs_flags);
>  }
>
>  DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE)
> @@ -147,11 +166,11 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
>
>     cs->ws = ws;
>
> -    if (!radeon_init_cs_context(&cs->csc1, cs->ws->fd)) {
> +    if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
>         FREE(cs);
>         return NULL;
>     }
> -    if (!radeon_init_cs_context(&cs->csc2, cs->ws->fd)) {
> +    if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
>         radeon_destroy_cs_context(&cs->csc1);
>         FREE(cs);
>         return NULL;
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
> index f316b5e..26e0c76 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
> @@ -35,8 +35,8 @@ struct radeon_cs_context {
>
>     int fd;
>     struct drm_radeon_cs        cs;
> -    struct drm_radeon_cs_chunk  chunks[2];
> -    uint64_t                    chunk_array[2];
> +    struct drm_radeon_cs_chunk  chunks[3];
> +    uint64_t                    chunk_array[3];
>
>     /* Relocs. */
>     unsigned                    nrelocs;
> @@ -44,6 +44,7 @@ struct radeon_cs_context {
>     unsigned                    validated_crelocs;
>     struct radeon_bo            **relocs_bo;
>     struct drm_radeon_cs_reloc  *relocs;
> +    uint32_t                    *cs_flags;
>
>     /* 0 = BO not added, 1 = BO added */
>     char                        is_handle_added[256];
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> index 442bd2a..8124016 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> @@ -138,9 +138,11 @@ static boolean radeon_get_drm_value(int fd, unsigned request,
>     info.request = request;
>
>     retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info));
> -    if (retval && errname) {
> -        fprintf(stderr, "radeon: Failed to get %s, error number %d\n",
> -                errname, retval);
> +    if (retval) {
> +        if (errname) {
> +            fprintf(stderr, "radeon: Failed to get %s, error number %d\n",
> +                    errname, retval);
> +        }
>         return FALSE;
>     }
>     return TRUE;
> @@ -263,6 +265,16 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
>                                       &ws->info.r600_backend_map))
>                 ws->info.r600_backend_map_valid = TRUE;
>         }
> +        ws->info.r600_va = FALSE;
> +        if (ws->info.drm_minor >= 12) {

Kernel 3.2 already reports 2.12.0. Did you mean 13?

> +            ws->info.r600_va = TRUE;
> +            if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
> +                                      &ws->info.r600_va_start))
> +                ws->info.r600_va = FALSE;
> +            if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
> +                                      &ws->info.r600_ib_vm_max_size))
> +                ws->info.r600_va = FALSE;
> +        }
>     }
>
>     return TRUE;
> diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
> index c4ea655..69c42c2 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
> +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
> @@ -96,6 +96,9 @@ struct radeon_info {
>     uint32_t r600_num_tile_pipes;
>     uint32_t r600_backend_map;
>     boolean r600_backend_map_valid;
> +    boolean r600_va;

Could you please rename r600_va to r600_virtual_address_space. Let's
make the code more approachable to potential new contributors by using
whole words. Plus, you don't have to document it if you give it the
right name.

> +    uint32_t r600_va_start;
> +    uint32_t r600_ib_vm_max_size;
>  };
>
>  enum radeon_feature_id {
> @@ -242,6 +245,14 @@ struct radeon_winsys {
>                                  unsigned stride,
>                                  struct winsys_handle *whandle);
>
> +    /**
> +     * Return the virtual address of a buffer.
> +     *
> +     * \param buf       A winsys buffer object
> +     * \return          virtual address
> +     */
> +    uint64_t (*buffer_va)(struct pb_buffer *buf);

Please rename this function to "buffer_get_virtual_address".

Marek


More information about the mesa-dev mailing list