[Mesa-dev] [PATCH 2/3] winsys/amdgpu: add support for allocating GDS and OA resources
Mike Lothian
mike at fireburn.co.uk
Tue Nov 27 02:09:20 UTC 2018
Same for OA
Cheers
Mike
On Tue, 27 Nov 2018, 01:57 Marek Olšák, <maraeo at gmail.com> wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
> src/gallium/drivers/radeon/radeon_winsys.h | 4 +-
> src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 55 +++++++++++++---------
> 2 files changed, 36 insertions(+), 23 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/radeon_winsys.h
> b/src/gallium/drivers/radeon/radeon_winsys.h
> index 3d0bb75ef6e..a5dd3e6f9b1 100644
> --- a/src/gallium/drivers/radeon/radeon_winsys.h
> +++ b/src/gallium/drivers/radeon/radeon_winsys.h
> @@ -45,21 +45,23 @@ enum radeon_bo_layout {
> RADEON_LAYOUT_LINEAR = 0,
> RADEON_LAYOUT_TILED,
> RADEON_LAYOUT_SQUARETILED,
>
> RADEON_LAYOUT_UNKNOWN
> };
>
> enum radeon_bo_domain { /* bitfield */
> RADEON_DOMAIN_GTT = 2,
> RADEON_DOMAIN_VRAM = 4,
> - RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT
> + RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT,
> + RADEON_DOMAIN_GDS = 8,
> + RADEON_DOMAIN_OA = 16,
> };
>
> enum radeon_bo_flag { /* bitfield */
> RADEON_FLAG_GTT_WC = (1 << 0),
> RADEON_FLAG_NO_CPU_ACCESS = (1 << 1),
> RADEON_FLAG_NO_SUBALLOC = (1 << 2),
> RADEON_FLAG_SPARSE = (1 << 3),
> RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4),
> RADEON_FLAG_READ_ONLY = (1 << 5),
> RADEON_FLAG_32BIT = (1 << 6),
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> index a9170a2bc69..1470c873a6a 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> @@ -177,22 +177,24 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
> simple_mtx_lock(&ws->global_bo_list_lock);
> LIST_DEL(&bo->u.real.global_list_item);
> ws->num_buffers--;
> simple_mtx_unlock(&ws->global_bo_list_lock);
> }
>
> simple_mtx_lock(&ws->bo_export_table_lock);
> util_hash_table_remove(ws->bo_export_table, bo->bo);
> simple_mtx_unlock(&ws->bo_export_table_lock);
>
> - amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0,
> AMDGPU_VA_OP_UNMAP);
> - amdgpu_va_range_free(bo->u.real.va_handle);
> + if (bo->initial_domain & RADEON_DOMAIN_VRAM_GTT) {
> + amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0,
> AMDGPU_VA_OP_UNMAP);
> + amdgpu_va_range_free(bo->u.real.va_handle);
> + }
> amdgpu_bo_free(bo->bo);
>
> amdgpu_bo_remove_fences(bo);
>
> if (bo->initial_domain & RADEON_DOMAIN_VRAM)
> ws->allocated_vram -= align64(bo->base.size,
> ws->info.gart_page_size);
> else if (bo->initial_domain & RADEON_DOMAIN_GTT)
> ws->allocated_gtt -= align64(bo->base.size,
> ws->info.gart_page_size);
>
> if (bo->u.real.map_count >= 1) {
> @@ -418,25 +420,26 @@ static struct amdgpu_winsys_bo
> *amdgpu_create_bo(struct amdgpu_winsys *ws,
> unsigned alignment,
> enum radeon_bo_domain
> initial_domain,
> unsigned flags,
> int heap)
> {
> struct amdgpu_bo_alloc_request request = {0};
> amdgpu_bo_handle buf_handle;
> uint64_t va = 0;
> struct amdgpu_winsys_bo *bo;
> amdgpu_va_handle va_handle;
> - unsigned va_gap_size;
> int r;
>
> /* VRAM or GTT must be specified, but not both at the same time. */
> - assert(util_bitcount(initial_domain & RADEON_DOMAIN_VRAM_GTT) == 1);
> + assert(util_bitcount(initial_domain & (RADEON_DOMAIN_VRAM_GTT |
> + RADEON_DOMAIN_GDS |
> + RADEON_DOMAIN_OA)) == 1);
>
> /* Gfx9: Overallocate the size to the next power of two for faster
> address
> * translation if we don't waste too much memory.
> */
> if (ws->info.chip_class >= GFX9) {
> uint64_t next_pot_size = util_next_power_of_two64(size);
>
> /* For slightly lower than 4 GB allocations, at most 32 MB are
> wasted.
> * For slightly lower than 256 MB allocations, at most 2 MB are
> wasted.
> * For slightly lower than 64 MB allocations, at most 512 KB are
> wasted.
> @@ -464,20 +467,24 @@ static struct amdgpu_winsys_bo
> *amdgpu_create_bo(struct amdgpu_winsys *ws,
> pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry,
> &bo->base,
> heap);
> }
> request.alloc_size = size;
> request.phys_alignment = alignment;
>
> if (initial_domain & RADEON_DOMAIN_VRAM)
> request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
> if (initial_domain & RADEON_DOMAIN_GTT)
> request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
> + if (initial_domain & RADEON_DOMAIN_GDS)
> + request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
> + if (initial_domain & RADEON_DOMAIN_OA)
> + request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
>
> /* Since VRAM and GTT have almost the same performance on APUs, we
> could
> * just set GTT. However, in order to decrease GTT(RAM) usage, which is
> * shared with the OS, allow VRAM placements too. The idea is not to
> use
> * VRAM usefully, but to use it so that it's not unused and wasted.
> */
> if (!ws->info.has_dedicated_vram)
> request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
>
> if (flags & RADEON_FLAG_NO_CPU_ACCESS)
> @@ -493,41 +500,43 @@ static struct amdgpu_winsys_bo
> *amdgpu_create_bo(struct amdgpu_winsys *ws,
>
> r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
> if (r) {
> fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
> fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
> fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
> fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
> goto error_bo_alloc;
> }
>
> - va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
> + if (initial_domain & RADEON_DOMAIN_VRAM_GTT) {
> + unsigned va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 *
> 1024) : 0;
>
> - r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
> - size + va_gap_size,
> - amdgpu_get_optimal_vm_alignment(ws, size,
> alignment),
> - 0, &va, &va_handle,
> - (flags & RADEON_FLAG_32BIT ?
> AMDGPU_VA_RANGE_32_BIT : 0) |
> - AMDGPU_VA_RANGE_HIGH);
> - if (r)
> - goto error_va_alloc;
> + r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
> + size + va_gap_size,
> + amdgpu_get_optimal_vm_alignment(ws, size,
> alignment),
> + 0, &va, &va_handle,
> + (flags & RADEON_FLAG_32BIT ?
> AMDGPU_VA_RANGE_32_BIT : 0) |
> + AMDGPU_VA_RANGE_HIGH);
> + if (r)
> + goto error_va_alloc;
>
> - unsigned vm_flags = AMDGPU_VM_PAGE_READABLE |
> - AMDGPU_VM_PAGE_EXECUTABLE;
> + unsigned vm_flags = AMDGPU_VM_PAGE_READABLE |
> + AMDGPU_VM_PAGE_EXECUTABLE;
>
> - if (!(flags & RADEON_FLAG_READ_ONLY))
> - vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;
> + if (!(flags & RADEON_FLAG_READ_ONLY))
> + vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;
>
> - r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,
> + r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,
> AMDGPU_VA_OP_MAP);
> - if (r)
> - goto error_va_map;
> + if (r)
> + goto error_va_map;
> + }
>
> pipe_reference_init(&bo->base.reference, 1);
> bo->base.alignment = alignment;
> bo->base.usage = 0;
> bo->base.size = size;
> bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
> bo->ws = ws;
> bo->bo = buf_handle;
> bo->va = va;
> bo->u.real.va_handle = va_handle;
> @@ -1328,22 +1337,24 @@ no_slab:
> return amdgpu_bo_sparse_create(ws, size, domain, flags);
> }
>
> /* This flag is irrelevant for the cache. */
> flags &= ~RADEON_FLAG_NO_SUBALLOC;
>
> /* Align size to page size. This is the minimum alignment for normal
> * BOs. Aligning this here helps the cached bufmgr. Especially small
> BOs,
> * like constant/uniform buffers, can benefit from better and more
> reuse.
> */
> - size = align64(size, ws->info.gart_page_size);
> - alignment = align(alignment, ws->info.gart_page_size);
> + if (domain & RADEON_DOMAIN_VRAM_GTT) {
> + size = align64(size, ws->info.gart_page_size);
> + alignment = align(alignment, ws->info.gart_page_size);
> + }
>
> bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
>
> if (use_reusable_pool) {
> heap = radeon_get_heap_index(domain, flags);
> assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
>
> /* Get a buffer from the cache. */
> bo = (struct amdgpu_winsys_bo*)
> pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, 0,
> heap);
> --
> 2.17.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20181127/eeb40b58/attachment.html>
More information about the mesa-dev
mailing list