Mesa (master): winsys/amdgpu: add RADEON_FLAG_UNCACHED for faster blits over PCIe

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri May 15 22:24:22 UTC 2020


Module: Mesa
Branch: master
Commit: 36c01248049abba6d79b5ff1ddfe38306e0ddbff
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=36c01248049abba6d79b5ff1ddfe38306e0ddbff

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Wed May  6 14:46:13 2020 -0400

winsys/amdgpu: add RADEON_FLAG_UNCACHED for faster blits over PCIe

Small blits benefit more. Good access pattern is required.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4935>

---

 src/gallium/drivers/radeon/radeon_winsys.h | 89 +++++++++++++++++++++++-------
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c  |  3 +
 2 files changed, 72 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index f183fe34cbb..9aba2a9d95f 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -67,7 +67,8 @@ enum radeon_bo_flag
   RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4),
   RADEON_FLAG_READ_ONLY = (1 << 5),
   RADEON_FLAG_32BIT = (1 << 6),
-  RADEON_FLAG_ENCRYPTED = (1 << 7)
+  RADEON_FLAG_ENCRYPTED = (1 << 7),
+  RADEON_FLAG_UNCACHED = (1 << 8), /* only gfx9 and newer */
 };
 
 enum radeon_dependency_flag
@@ -712,6 +713,11 @@ enum radeon_heap
    RADEON_HEAP_GTT_WC_READ_ONLY_32BIT,
    RADEON_HEAP_GTT_WC_32BIT,
    RADEON_HEAP_GTT,
+   RADEON_HEAP_GTT_UNCACHED_WC,
+   RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY,
+   RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT,
+   RADEON_HEAP_GTT_UNCACHED_WC_32BIT,
+   RADEON_HEAP_GTT_UNCACHED,
    RADEON_MAX_SLAB_HEAPS,
    RADEON_MAX_CACHED_HEAPS = RADEON_MAX_SLAB_HEAPS,
 };
@@ -730,6 +736,11 @@ static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap hea
    case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
    case RADEON_HEAP_GTT_WC_32BIT:
    case RADEON_HEAP_GTT:
+   case RADEON_HEAP_GTT_UNCACHED_WC:
+   case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY:
+   case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT:
+   case RADEON_HEAP_GTT_UNCACHED_WC_32BIT:
+   case RADEON_HEAP_GTT_UNCACHED:
       return RADEON_DOMAIN_GTT;
    default:
       assert(0);
@@ -739,36 +750,69 @@ static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap hea
 
 static inline unsigned radeon_flags_from_heap(enum radeon_heap heap)
 {
-   unsigned flags =
-      RADEON_FLAG_NO_INTERPROCESS_SHARING | (heap != RADEON_HEAP_GTT ? RADEON_FLAG_GTT_WC : 0);
+   unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING;
 
    switch (heap) {
-   case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
-      return flags | RADEON_FLAG_NO_CPU_ACCESS;
+   case RADEON_HEAP_GTT:
+   case RADEON_HEAP_GTT_UNCACHED:
+      break;
+   default:
+      flags |= RADEON_FLAG_GTT_WC;
+   }
 
-   case RADEON_HEAP_VRAM_READ_ONLY:
-   case RADEON_HEAP_GTT_WC_READ_ONLY:
-      return flags | RADEON_FLAG_READ_ONLY;
+   switch (heap) {
+   case RADEON_HEAP_GTT_UNCACHED_WC:
+   case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY:
+   case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT:
+   case RADEON_HEAP_GTT_UNCACHED_WC_32BIT:
+   case RADEON_HEAP_GTT_UNCACHED:
+      flags |= RADEON_FLAG_UNCACHED;
+      break;
+   default:
+      break;
+   }
 
+   switch (heap) {
+   case RADEON_HEAP_VRAM_READ_ONLY:
    case RADEON_HEAP_VRAM_READ_ONLY_32BIT:
+   case RADEON_HEAP_GTT_WC_READ_ONLY:
    case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
-      return flags | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT;
+   case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY:
+   case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT:
+      flags |= RADEON_FLAG_READ_ONLY;
+      break;
+   default:
+      break;
+   }
 
+   switch (heap) {
+   case RADEON_HEAP_VRAM_READ_ONLY_32BIT:
    case RADEON_HEAP_VRAM_32BIT:
+   case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
    case RADEON_HEAP_GTT_WC_32BIT:
-      return flags | RADEON_FLAG_32BIT;
+   case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT:
+   case RADEON_HEAP_GTT_UNCACHED_WC_32BIT:
+      flags |= RADEON_FLAG_32BIT;
+   default:
+      break;
+   }
 
-   case RADEON_HEAP_VRAM:
-   case RADEON_HEAP_GTT_WC:
-   case RADEON_HEAP_GTT:
+   switch (heap) {
+   case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
+      flags |= RADEON_FLAG_NO_CPU_ACCESS;
+      break;
    default:
-      return flags;
+      break;
    }
+
+   return flags;
 }
 
 /* Return the heap index for winsys allocators, or -1 on failure. */
 static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeon_bo_flag flags)
 {
+   bool uncached;
+
    /* VRAM implies WC (write combining) */
    assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC);
    /* NO_CPU_ACCESS implies VRAM only. */
@@ -779,7 +823,7 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo
       return -1;
 
    /* Unsupported flags: NO_SUBALLOC, SPARSE. */
-   if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS |
+   if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_UNCACHED |
                  RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT))
       return -1;
 
@@ -806,15 +850,20 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo
       }
       break;
    case RADEON_DOMAIN_GTT:
+      uncached = flags & RADEON_FLAG_UNCACHED;
+
       switch (flags & (RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT)) {
       case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT:
-         return RADEON_HEAP_GTT_WC_READ_ONLY_32BIT;
+         return uncached ? RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT
+                         : RADEON_HEAP_GTT_WC_READ_ONLY_32BIT;
       case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY:
-         return RADEON_HEAP_GTT_WC_READ_ONLY;
+         return uncached ? RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY
+                         : RADEON_HEAP_GTT_WC_READ_ONLY;
       case RADEON_FLAG_GTT_WC | RADEON_FLAG_32BIT:
-         return RADEON_HEAP_GTT_WC_32BIT;
+         return uncached ? RADEON_HEAP_GTT_UNCACHED_WC_32BIT
+                         : RADEON_HEAP_GTT_WC_32BIT;
       case RADEON_FLAG_GTT_WC:
-         return RADEON_HEAP_GTT_WC;
+         return uncached ? RADEON_HEAP_GTT_UNCACHED_WC : RADEON_HEAP_GTT_WC;
       case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT:
       case RADEON_FLAG_READ_ONLY:
          assert(!"READ_ONLY without WC is disallowed");
@@ -823,7 +872,7 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo
          assert(!"32BIT without WC is disallowed");
          return -1;
       case 0:
-         return RADEON_HEAP_GTT;
+         return uncached ? RADEON_HEAP_GTT_UNCACHED : RADEON_HEAP_GTT;
       }
       break;
    default:
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 1a30c9a3cf3..0aef34acd2d 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -554,6 +554,9 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
       if (!(flags & RADEON_FLAG_READ_ONLY))
          vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;
 
+      if (flags & RADEON_FLAG_UNCACHED)
+         vm_flags |= AMDGPU_VM_MTYPE_UC;
+
       r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,
 			   AMDGPU_VA_OP_MAP);
       if (r)



More information about the mesa-commit mailing list