[Mesa-dev] [PATCH 3/8] radv/amdgpu: Add winsys implementation of virtual buffers.
Dave Airlie
airlied at gmail.com
Mon Feb 6 22:36:51 UTC 2017
On 5 February 2017 at 21:43, Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl> wrote:
> Signed-off-by: Bas Nieuwenhuizen <basni at google.com>
> ---
> src/amd/vulkan/radv_radeon_winsys.h | 5 +
> src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 218 +++++++++++++++++++++++---
> src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h | 35 ++++-
> src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 98 +++++++++++-
> 4 files changed, 330 insertions(+), 26 deletions(-)
This patch seems to be the main work involved,
I'm happy to give Reviewed-by: Dave Airlie <airlied at redhat.com> for the series,
However for this patch it might be nice to add some more
comments to radv_amdgpu_winsys_bo_virtual_bind with what exactly it is doing.
Like if we are merging ranges or why you remove ranges etc, just what rules
it is following to make it easier when debugging it in the future.
Dave.
>
> diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h
> index 79c182007a6..20d6b1d60d2 100644
> --- a/src/amd/vulkan/radv_radeon_winsys.h
> +++ b/src/amd/vulkan/radv_radeon_winsys.h
> @@ -47,6 +47,7 @@ enum radeon_bo_flag { /* bitfield */
> RADEON_FLAG_GTT_WC = (1 << 0),
> RADEON_FLAG_CPU_ACCESS = (1 << 1),
> RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
> + RADEON_FLAG_VIRTUAL = (1 << 3)
> };
>
> enum radeon_bo_usage { /* bitfield */
> @@ -284,6 +285,10 @@ struct radeon_winsys {
>
> void (*buffer_set_metadata)(struct radeon_winsys_bo *bo,
> struct radeon_bo_metadata *md);
> +
> + void (*buffer_virtual_bind)(struct radeon_winsys_bo *parent,
> + uint64_t offset, uint64_t size,
> + struct radeon_winsys_bo *bo, uint64_t bo_offset);
> struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws);
> void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
>
> diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
> index 7319a988872..d5bce304510 100644
> --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
> +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
> @@ -34,19 +34,182 @@
> #include <amdgpu_drm.h>
> #include <inttypes.h>
>
> +static void
> +radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
> + const struct radv_amdgpu_map_range *range)
> +{
> + assert(range->size);
> +
> + if (!range->bo)
> + return; /* TODO: PRT mapping */
> +
> + int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size,
> + range->offset + bo->va, 0, AMDGPU_VA_OP_MAP);
> + if (r)
> + abort();
> +}
> +
> +static void
> +radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo,
> + const struct radv_amdgpu_map_range *range)
> +{
> + assert(range->size);
> +
> + if (!range->bo)
> + return; /* TODO: PRT mapping */
> +
> + int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size,
> + range->offset + bo->va, 0, AMDGPU_VA_OP_UNMAP);
> + if (r)
> + abort();
> +}
> +
> +static void
> +radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
> +{
> + bo->bo_count = 0;
> + for (uint32_t i = 0; i < bo->range_count; ++i) {
> + bool found = false;
> + if (!bo->ranges[i].bo)
> + continue;
> +
> + for(uint32_t j = 0; j < bo->bo_count; ++j) {
> + if (bo->bos[j] == bo->ranges[i].bo) {
> + found = true;
> + break;
> + }
> + }
> +
> + if (!found) {
> + if (bo->bo_capacity == bo->bo_count) {
> + bo->bos = realloc(bo->bos,
> + (bo->bo_capacity + 1) * sizeof(struct radv_amdgpu_bo *));
> + ++bo->bo_capacity;
> + }
> + bo->bos[bo->bo_count++] = bo->ranges[i].bo;
> + }
> + }
> +}
> +
> +static void
> +radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent,
> + uint64_t offset, uint64_t size,
> + struct radeon_winsys_bo *_bo, uint64_t bo_offset)
> +{
> + struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
> + struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo;
> + int range_count_delta, new_idx;
> + int first = 0, last;
> + struct radv_amdgpu_map_range new_first, new_last;
> +
> + assert(parent->is_virtual);
> + assert(!bo || !bo->is_virtual);
> +
> + if (!size)
> + return;
> +
> + if (parent->range_capacity - parent->range_count < 2) {
> + parent->range_capacity += 2;
> + parent->ranges = realloc(parent->ranges, parent->range_capacity * sizeof(struct radv_amdgpu_map_range));
> + }
> +
> + /*
> + * [first, last] is exactly the range of ranges that either overlap the
> + * new parent, or are adjacent to it. This corresponds to the bind parents
> + * that may change.
> + */
> + while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
> + ++first;
> +
> + last = first;
> + while(last + 1 < parent->range_count && parent->ranges[last].offset <= offset + size)
> + ++last;
> +
> + bool remove_first = parent->ranges[first].offset == offset;
> + bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
> +
> + assert(parent->ranges[first].offset <= offset);
> + assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
> +
> + if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
> + size += offset - parent->ranges[first].offset;
> + offset = parent->ranges[first].offset;
> + remove_first = true;
> + }
> +
> + if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
> + size = parent->ranges[last].offset + parent->ranges[last].size - offset;
> + remove_last = true;
> + }
> +
> + range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
> + new_idx = first + !remove_first;
> +
> + new_first = parent->ranges[first];
> + new_last = parent->ranges[last];
> +
> + if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
> + radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + first);
> +
> + if (!remove_first) {
> + new_first.size = offset - new_first.offset;
> + radv_amdgpu_winsys_virtual_map(parent, &new_first);
> + }
> + }
> +
> + if (parent->ranges[last].offset < offset + size || remove_last) {
> + if (first != last)
> + radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + last);
> +
> + if (!remove_last) {
> + new_last.size -= offset + size - new_last.offset;
> + new_last.offset = offset + size;
> + radv_amdgpu_winsys_virtual_map(parent, &new_last);
> + }
> + }
> +
> + for (int i = parent->range_count - 1; i > last; --i)
> + parent->ranges[i + range_count_delta] = parent->ranges[i];
> +
> + if (!remove_first)
> + parent->ranges[first] = new_first;
> +
> + if (!remove_last)
> + parent->ranges[new_idx + 1] = new_last;
> +
> + parent->ranges[new_idx].offset = offset;
> + parent->ranges[new_idx].size = size;
> + parent->ranges[new_idx].bo = bo;
> + parent->ranges[new_idx].bo_offset = bo_offset;
> +
> + radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx);
> +
> + parent->range_count += range_count_delta;
> +
> + radv_amdgpu_winsys_rebuild_bo_list(parent);
> +}
> +
> static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
> {
> struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
>
> - if (bo->ws->debug_all_bos) {
> - pthread_mutex_lock(&bo->ws->global_bo_list_lock);
> - LIST_DEL(&bo->global_list_item);
> - bo->ws->num_buffers--;
> - pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
> + if (bo->is_virtual) {
> + for (uint32_t i = 0; i < bo->range_count; ++i) {
> + radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i);
> + }
> + free(bo->bos);
> + free(bo->ranges);
> + } else {
> + if (bo->ws->debug_all_bos) {
> + pthread_mutex_lock(&bo->ws->global_bo_list_lock);
> + LIST_DEL(&bo->global_list_item);
> + bo->ws->num_buffers--;
> + pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
> + }
> + amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
> + amdgpu_bo_free(bo->bo);
> }
> - amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
> amdgpu_va_range_free(bo->va_handle);
> - amdgpu_bo_free(bo->bo);
> FREE(bo);
> }
>
> @@ -81,6 +244,31 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
> return NULL;
> }
>
> + r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
> + size, alignment, 0, &va, &va_handle, 0);
> + if (r)
> + goto error_va_alloc;
> +
> + bo->va = va;
> + bo->va_handle = va_handle;
> + bo->size = size;
> + bo->ws = ws;
> + bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
> +
> + if (flags & RADEON_FLAG_VIRTUAL) {
> + bo->ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
> + bo->range_count = 1;
> + bo->range_capacity = 1;
> +
> + bo->ranges[0].offset = 0;
> + bo->ranges[0].size = size;
> + bo->ranges[0].bo = NULL;
> + bo->ranges[0].bo_offset = 0;
> +
> + radv_amdgpu_winsys_virtual_map(bo, bo->ranges);
> + return (struct radeon_winsys_bo *)bo;
> + }
> +
> request.alloc_size = size;
> request.phys_alignment = alignment;
>
> @@ -105,31 +293,22 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
> goto error_bo_alloc;
> }
>
> - r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
> - size, alignment, 0, &va, &va_handle, 0);
> - if (r)
> - goto error_va_alloc;
> -
> r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
> if (r)
> goto error_va_map;
>
> bo->bo = buf_handle;
> - bo->va = va;
> - bo->va_handle = va_handle;
> bo->initial_domain = initial_domain;
> - bo->size = size;
> bo->is_shared = false;
> - bo->ws = ws;
> radv_amdgpu_add_buffer_to_global_list(bo);
> return (struct radeon_winsys_bo *)bo;
> error_va_map:
> - amdgpu_va_range_free(va_handle);
> -
> -error_va_alloc:
> amdgpu_bo_free(buf_handle);
>
> error_bo_alloc:
> + amdgpu_va_range_free(va_handle);
> +
> +error_va_alloc:
> FREE(bo);
> return NULL;
> }
> @@ -294,4 +473,5 @@ void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
> ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
> ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
> ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
> + ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
> }
> diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
> index 499b063d56f..c7d484bc8dd 100644
> --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
> +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
> @@ -31,17 +31,40 @@
>
> #include "radv_amdgpu_winsys.h"
>
> +
> +struct radv_amdgpu_map_range {
> + uint64_t offset;
> + uint64_t size;
> + struct radv_amdgpu_winsys_bo *bo;
> + uint64_t bo_offset;
> +};
> +
> struct radv_amdgpu_winsys_bo {
> - amdgpu_bo_handle bo;
> amdgpu_va_handle va_handle;
> -
> uint64_t va;
> - enum radeon_bo_domain initial_domain;
> uint64_t size;
> - bool is_shared;
> -
> struct radv_amdgpu_winsys *ws;
> - struct list_head global_list_item;
> + bool is_virtual;
> +
> + union {
> + /* physical bo */
> + struct {
> + amdgpu_bo_handle bo;
> + enum radeon_bo_domain initial_domain;
> + bool is_shared;
> + struct list_head global_list_item;
> + };
> + /* virtual bo */
> + struct {
> + struct radv_amdgpu_map_range *ranges;
> + uint32_t range_count;
> + uint32_t range_capacity;
> +
> + struct radv_amdgpu_winsys_bo **bos;
> + uint32_t bo_count;
> + uint32_t bo_capacity;
> + };
> + };
> };
>
> static inline
> diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> index 2f2c3e1338f..c094abc672c 100644
> --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> @@ -34,6 +34,11 @@
> #include "radv_amdgpu_bo.h"
> #include "sid.h"
>
> +
> +enum {
> + VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024
> +};
> +
> struct radv_amdgpu_cs {
> struct radeon_winsys_cs base;
> struct radv_amdgpu_winsys *ws;
> @@ -56,6 +61,12 @@ struct radv_amdgpu_cs {
>
> int buffer_hash_table[1024];
> unsigned hw_ip;
> +
> + unsigned num_virtual_buffers;
> + unsigned max_num_virtual_buffers;
> + struct radeon_winsys_bo **virtual_buffers;
> + uint8_t *virtual_buffer_priorities;
> + int *virtual_buffer_hash_table;
> };
>
> static inline struct radv_amdgpu_cs *
> @@ -141,6 +152,9 @@ static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
> cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
>
> free(cs->old_ib_buffers);
> + free(cs->virtual_buffers);
> + free(cs->virtual_buffer_priorities);
> + free(cs->virtual_buffer_hash_table);
> free(cs->handles);
> free(cs->priorities);
> free(cs);
> @@ -315,7 +329,13 @@ static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs)
> cs->buffer_hash_table[hash] = -1;
> }
>
> + for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
> + unsigned hash = ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
> + cs->virtual_buffer_hash_table[hash] = -1;
> + }
> +
> cs->num_buffers = 0;
> + cs->num_virtual_buffers = 0;
>
> if (cs->ws->use_ib_bos) {
> cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
> @@ -380,6 +400,49 @@ static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs,
> ++cs->num_buffers;
> }
>
> +static void radv_amdgpu_cs_add_virtual_buffer(struct radeon_winsys_cs *_cs,
> + struct radeon_winsys_bo *bo,
> + uint8_t priority)
> +{
> + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
> + unsigned hash = ((uintptr_t)bo >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
> +
> +
> + if (!cs->virtual_buffer_hash_table) {
> + cs->virtual_buffer_hash_table = malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE * sizeof(int));
> + for (int i = 0; i < VIRTUAL_BUFFER_HASH_TABLE_SIZE; ++i)
> + cs->virtual_buffer_hash_table[i] = -1;
> + }
> +
> + if (cs->virtual_buffer_hash_table[hash] >= 0) {
> + int idx = cs->virtual_buffer_hash_table[hash];
> + if (cs->virtual_buffers[idx] == bo) {
> + cs->virtual_buffer_priorities[idx] = MAX2(cs->virtual_buffer_priorities[idx], priority);
> + return;
> + }
> + for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
> + if (cs->virtual_buffers[i] == bo) {
> + cs->virtual_buffer_priorities[i] = MAX2(cs->virtual_buffer_priorities[i], priority);
> + cs->virtual_buffer_hash_table[hash] = i;
> + return;
> + }
> + }
> + }
> +
> + if(cs->max_num_virtual_buffers <= cs->num_virtual_buffers) {
> + cs->max_num_virtual_buffers = MAX2(2, cs->max_num_virtual_buffers * 2);
> + cs->virtual_buffers = realloc(cs->virtual_buffers, sizeof(struct radv_amdgpu_virtual_virtual_buffer*) * cs->max_num_virtual_buffers);
> + cs->virtual_buffer_priorities = realloc(cs->virtual_buffer_priorities, sizeof(uint8_t) * cs->max_num_virtual_buffers);
> + }
> +
> + cs->virtual_buffers[cs->num_virtual_buffers] = bo;
> + cs->virtual_buffer_priorities[cs->num_virtual_buffers] = priority;
> +
> + cs->virtual_buffer_hash_table[hash] = cs->num_virtual_buffers;
> + ++cs->num_virtual_buffers;
> +
> +}
> +
> static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs,
> struct radeon_winsys_bo *_bo,
> uint8_t priority)
> @@ -387,6 +450,11 @@ static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs,
> struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
> struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
>
> + if (bo->is_virtual) {
> + radv_amdgpu_cs_add_virtual_buffer(_cs, _bo, priority);
> + return;
> + }
> +
> radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority);
> }
>
> @@ -401,6 +469,11 @@ static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent,
> child->priorities[i]);
> }
>
> + for (unsigned i = 0; i < child->num_virtual_buffers; ++i) {
> + radv_amdgpu_cs_add_buffer(&parent->base, child->virtual_buffers[i],
> + child->virtual_buffer_priorities[i]);
> + }
> +
> if (parent->ws->use_ib_bos) {
> if (parent->base.cdw + 4 > parent->base.max_dw)
> radv_amdgpu_cs_grow(&parent->base, 4);
> @@ -449,7 +522,8 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
> bo_list);
> free(handles);
> pthread_mutex_unlock(&ws->global_bo_list_lock);
> - } else if (count == 1 && !extra_bo && !extra_cs) {
> + } else if (count == 1 && !extra_bo && !extra_cs &&
> + !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) {
> struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
> r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
> cs->priorities, bo_list);
> @@ -459,6 +533,8 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
> for (unsigned i = 0; i < count; ++i) {
> struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
> total_buffer_count += cs->num_buffers;
> + for (unsigned j = 0; j < cs->num_virtual_buffers; ++j)
> + total_buffer_count += radv_amdgpu_winsys_bo(cs->virtual_buffers[j])->bo_count;
> }
>
> if (extra_cs) {
> @@ -502,6 +578,26 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
> ++unique_bo_count;
> }
> }
> + for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) {
> + struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]);
> + for(unsigned k = 0; k < virtual_bo->bo_count; ++k) {
> + struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k];
> + bool found = false;
> + for (unsigned m = 0; m < unique_bo_count; ++m) {
> + if (handles[m] == bo->bo) {
> + found = true;
> + priorities[m] = MAX2(priorities[m],
> + cs->virtual_buffer_priorities[j]);
> + break;
> + }
> + }
> + if (!found) {
> + handles[unique_bo_count] = bo->bo;
> + priorities[unique_bo_count] = cs->virtual_buffer_priorities[j];
> + ++unique_bo_count;
> + }
> + }
> + }
> }
> r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles,
> priorities, bo_list);
> --
> 2.11.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list