Same for OA<div><br></div><div>Cheers</div><div><br></div><div>Mike<br><br><div class="gmail_quote"><div dir="ltr">On Tue, 27 Nov 2018, 01:57 Marek Olšák, <<a href="mailto:maraeo@gmail.com">maraeo@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: Marek Olšák <<a href="mailto:marek.olsak@amd.com" target="_blank">marek.olsak@amd.com</a>><br>
<br>
---<br>
 src/gallium/drivers/radeon/radeon_winsys.h |  4 +-<br>
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c  | 55 +++++++++++++---------<br>
 2 files changed, 36 insertions(+), 23 deletions(-)<br>
<br>
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h<br>
index 3d0bb75ef6e..a5dd3e6f9b1 100644<br>
--- a/src/gallium/drivers/radeon/radeon_winsys.h<br>
+++ b/src/gallium/drivers/radeon/radeon_winsys.h<br>
@@ -45,21 +45,23 @@ enum radeon_bo_layout {<br>
     RADEON_LAYOUT_LINEAR = 0,<br>
     RADEON_LAYOUT_TILED,<br>
     RADEON_LAYOUT_SQUARETILED,<br>
<br>
     RADEON_LAYOUT_UNKNOWN<br>
 };<br>
<br>
 enum radeon_bo_domain { /* bitfield */<br>
     RADEON_DOMAIN_GTT  = 2,<br>
     RADEON_DOMAIN_VRAM = 4,<br>
-    RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT<br>
+    RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT,<br>
+    RADEON_DOMAIN_GDS = 8,<br>
+    RADEON_DOMAIN_OA = 16,<br>
 };<br>
<br>
 enum radeon_bo_flag { /* bitfield */<br>
     RADEON_FLAG_GTT_WC =        (1 << 0),<br>
     RADEON_FLAG_NO_CPU_ACCESS = (1 << 1),<br>
     RADEON_FLAG_NO_SUBALLOC =   (1 << 2),<br>
     RADEON_FLAG_SPARSE =        (1 << 3),<br>
     RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4),<br>
     RADEON_FLAG_READ_ONLY =     (1 << 5),<br>
     RADEON_FLAG_32BIT =    (1 << 6),<br>
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c<br>
index a9170a2bc69..1470c873a6a 100644<br>
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c<br>
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c<br>
@@ -177,22 +177,24 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)<br>
       simple_mtx_lock(&ws->global_bo_list_lock);<br>
       LIST_DEL(&bo->u.real.global_list_item);<br>
       ws->num_buffers--;<br>
       simple_mtx_unlock(&ws->global_bo_list_lock);<br>
    }<br>
<br>
    simple_mtx_lock(&ws->bo_export_table_lock);<br>
    util_hash_table_remove(ws->bo_export_table, bo->bo);<br>
    simple_mtx_unlock(&ws->bo_export_table_lock);<br>
<br>
-   amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);<br>
-   amdgpu_va_range_free(bo->u.real.va_handle);<br>
+   if (bo->initial_domain & RADEON_DOMAIN_VRAM_GTT) {<br>
+      amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);<br>
+      amdgpu_va_range_free(bo->u.real.va_handle);<br>
+   }<br>
    amdgpu_bo_free(bo->bo);<br>
<br>
    amdgpu_bo_remove_fences(bo);<br>
<br>
    if (bo->initial_domain & RADEON_DOMAIN_VRAM)<br>
       ws->allocated_vram -= align64(bo->base.size, ws->info.gart_page_size);<br>
    else if (bo->initial_domain & RADEON_DOMAIN_GTT)<br>
       ws->allocated_gtt -= align64(bo->base.size, ws->info.gart_page_size);<br>
<br>
    if (bo->u.real.map_count >= 1) {<br>
@@ -418,25 +420,26 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,<br>
                                                  unsigned alignment,<br>
                                                  enum radeon_bo_domain initial_domain,<br>
                                                  unsigned flags,<br>
                                                  int heap)<br>
 {<br>
    struct amdgpu_bo_alloc_request request = {0};<br>
    amdgpu_bo_handle buf_handle;<br>
    uint64_t va = 0;<br>
    struct amdgpu_winsys_bo *bo;<br>
    amdgpu_va_handle va_handle;<br>
-   unsigned va_gap_size;<br>
    int r;<br>
<br>
    /* VRAM or GTT must be specified, but not both at the same time. */<br>
-   assert(util_bitcount(initial_domain & RADEON_DOMAIN_VRAM_GTT) == 1);<br>
+   assert(util_bitcount(initial_domain & (RADEON_DOMAIN_VRAM_GTT |<br>
+                                          RADEON_DOMAIN_GDS |<br>
+                                          RADEON_DOMAIN_OA)) == 1);<br>
<br>
    /* Gfx9: Overallocate the size to the next power of two for faster address<br>
     * translation if we don't waste too much memory.<br>
     */<br>
    if (ws->info.chip_class >= GFX9) {<br>
       uint64_t next_pot_size = util_next_power_of_two64(size);<br>
<br>
       /* For slightly lower than 4 GB allocations, at most 32 MB are wasted.<br>
        * For slightly lower than 256 MB allocations, at most 2 MB are wasted.<br>
        * For slightly lower than 64 MB allocations, at most 512 KB are wasted.<br>
@@ -464,20 +467,24 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,<br>
       pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,<br>
                           heap);<br>
    }<br>
    request.alloc_size = size;<br>
    request.phys_alignment = alignment;<br>
<br>
    if (initial_domain & RADEON_DOMAIN_VRAM)<br>
       request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;<br>
    if (initial_domain & RADEON_DOMAIN_GTT)<br>
       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;<br>
+   if (initial_domain & RADEON_DOMAIN_GDS)<br>
+      request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;<br>
+   if (initial_domain & RADEON_DOMAIN_OA)<br>
+      request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;<br>
<br>
    /* Since VRAM and GTT have almost the same performance on APUs, we could<br>
     * just set GTT. However, in order to decrease GTT(RAM) usage, which is<br>
     * shared with the OS, allow VRAM placements too. The idea is not to use<br>
     * VRAM usefully, but to use it so that it's not unused and wasted.<br>
     */<br>
    if (!ws->info.has_dedicated_vram)<br>
       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;<br>
<br>
    if (flags & RADEON_FLAG_NO_CPU_ACCESS)<br>
@@ -493,41 +500,43 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,<br>
<br>
    r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);<br>
    if (r) {<br>
       fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");<br>
       fprintf(stderr, "amdgpu:    size      : %"PRIu64" bytes\n", size);<br>
       fprintf(stderr, "amdgpu:    alignment : %u bytes\n", alignment);<br>
       fprintf(stderr, "amdgpu:    domains   : %u\n", initial_domain);<br>
       goto error_bo_alloc;<br>
    }<br>
<br>
-   va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;<br>
+   if (initial_domain & RADEON_DOMAIN_VRAM_GTT) {<br>
+      unsigned va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;<br>
<br>
-   r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,<br>
-                             size + va_gap_size,<br>
-                             amdgpu_get_optimal_vm_alignment(ws, size, alignment),<br>
-                             0, &va, &va_handle,<br>
-                             (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |<br>
-                             AMDGPU_VA_RANGE_HIGH);<br>
-   if (r)<br>
-      goto error_va_alloc;<br>
+      r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,<br>
+                                size + va_gap_size,<br>
+                                amdgpu_get_optimal_vm_alignment(ws, size, alignment),<br>
+                                0, &va, &va_handle,<br>
+                                (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |<br>
+                                AMDGPU_VA_RANGE_HIGH);<br>
+      if (r)<br>
+         goto error_va_alloc;<br>
<br>
-   unsigned vm_flags = AMDGPU_VM_PAGE_READABLE |<br>
-                       AMDGPU_VM_PAGE_EXECUTABLE;<br>
+      unsigned vm_flags = AMDGPU_VM_PAGE_READABLE |<br>
+                          AMDGPU_VM_PAGE_EXECUTABLE;<br>
<br>
-   if (!(flags & RADEON_FLAG_READ_ONLY))<br>
-       vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;<br>
+      if (!(flags & RADEON_FLAG_READ_ONLY))<br>
+         vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;<br>
<br>
-   r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,<br>
+      r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,<br>
                           AMDGPU_VA_OP_MAP);<br>
-   if (r)<br>
-      goto error_va_map;<br>
+      if (r)<br>
+         goto error_va_map;<br>
+   }<br>
<br>
    pipe_reference_init(&bo->base.reference, 1);<br>
    bo->base.alignment = alignment;<br>
    bo->base.usage = 0;<br>
    bo->base.size = size;<br>
    bo->base.vtbl = &amdgpu_winsys_bo_vtbl;<br>
    bo->ws = ws;<br>
    bo->bo = buf_handle;<br>
    bo->va = va;<br>
    bo->u.real.va_handle = va_handle;<br>
@@ -1328,22 +1337,24 @@ no_slab:<br>
       return amdgpu_bo_sparse_create(ws, size, domain, flags);<br>
    }<br>
<br>
    /* This flag is irrelevant for the cache. */<br>
    flags &= ~RADEON_FLAG_NO_SUBALLOC;<br>
<br>
    /* Align size to page size. This is the minimum alignment for normal<br>
     * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,<br>
     * like constant/uniform buffers, can benefit from better and more reuse.<br>
     */<br>
-   size = align64(size, ws->info.gart_page_size);<br>
-   alignment = align(alignment, ws->info.gart_page_size);<br>
+   if (domain & RADEON_DOMAIN_VRAM_GTT) {<br>
+      size = align64(size, ws->info.gart_page_size);<br>
+      alignment = align(alignment, ws->info.gart_page_size);<br>
+   }<br>
<br>
    bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;<br>
<br>
    if (use_reusable_pool) {<br>
        heap = radeon_get_heap_index(domain, flags);<br>
        assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);<br>
<br>
        /* Get a buffer from the cache. */<br>
        bo = (struct amdgpu_winsys_bo*)<br>
             pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, 0, heap);<br>
-- <br>
2.17.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div></div>