[Mesa-dev] [PATCH 07/14] gallium/radeon: remove RADEON_FLAG_CPU_ACCESS

Marek Olšák maraeo at gmail.com
Thu Jun 29 19:47:42 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

https://lists.freedesktop.org/archives/amd-gfx/2017-June/010591.html
---
 src/gallium/drivers/radeon/r600_buffer_common.c | 4 ----
 src/gallium/drivers/radeon/radeon_winsys.h      | 7 +++----
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c       | 9 +--------
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c       | 3 +--
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c   | 8 +-------
 5 files changed, 6 insertions(+), 25 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index 342695c..262fe1d 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -125,21 +125,20 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen,
 	case PIPE_USAGE_DYNAMIC:
 		/* Older kernels didn't always flush the HDP cache before
 		 * CS execution
 		 */
 		if (rscreen->info.drm_major == 2 &&
 		    rscreen->info.drm_minor < 40) {
 			res->domains = RADEON_DOMAIN_GTT;
 			res->flags |= RADEON_FLAG_GTT_WC;
 			break;
 		}
-		res->flags |= RADEON_FLAG_CPU_ACCESS;
 		/* fall through */
 	case PIPE_USAGE_DEFAULT:
 	case PIPE_USAGE_IMMUTABLE:
 	default:
 		/* Not listing GTT here improves performance in some
 		 * apps. */
 		res->domains = RADEON_DOMAIN_VRAM;
 		res->flags |= RADEON_FLAG_GTT_WC;
 		break;
 	}
@@ -151,29 +150,26 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen,
 		 * kernels, because they didn't always flush the HDP
 		 * cache before CS execution.
 		 *
 		 * Write-combined CPU mappings are fine, the kernel
 		 * ensures all CPU writes finish before the GPU
 		 * executes a command stream.
 		 */
 		if (rscreen->info.drm_major == 2 &&
 		    rscreen->info.drm_minor < 40)
 			res->domains = RADEON_DOMAIN_GTT;
-		else if (res->domains & RADEON_DOMAIN_VRAM)
-			res->flags |= RADEON_FLAG_CPU_ACCESS;
 	}
 
 	/* Tiled textures are unmappable. Always put them in VRAM. */
 	if ((res->b.b.target != PIPE_BUFFER && !rtex->surface.is_linear) ||
 	    res->flags & R600_RESOURCE_FLAG_UNMAPPABLE) {
 		res->domains = RADEON_DOMAIN_VRAM;
-		res->flags &= ~RADEON_FLAG_CPU_ACCESS;
 		res->flags |= RADEON_FLAG_NO_CPU_ACCESS |
 			 RADEON_FLAG_GTT_WC;
 	}
 
 	/* If VRAM is just stolen system memory, allow both VRAM and
 	 * GTT, whichever has free space. If a buffer is evicted from
 	 * VRAM to GTT, it will stay there.
 	 *
 	 * DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only
 	 * placements even with a low amount of stolen VRAM.
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 706188f..1be94f7 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -44,24 +44,23 @@ enum radeon_bo_layout {
 };
 
 enum radeon_bo_domain { /* bitfield */
     RADEON_DOMAIN_GTT  = 2,
     RADEON_DOMAIN_VRAM = 4,
     RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT
 };
 
 enum radeon_bo_flag { /* bitfield */
     RADEON_FLAG_GTT_WC =        (1 << 0),
-    RADEON_FLAG_CPU_ACCESS =    (1 << 1),
-    RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
-    RADEON_FLAG_NO_SUBALLOC =   (1 << 3),
-    RADEON_FLAG_SPARSE =        (1 << 4),
+    RADEON_FLAG_NO_CPU_ACCESS = (1 << 1),
+    RADEON_FLAG_NO_SUBALLOC =   (1 << 2),
+    RADEON_FLAG_SPARSE =        (1 << 3),
 };
 
 enum radeon_bo_usage { /* bitfield */
     RADEON_USAGE_READ = 2,
     RADEON_USAGE_WRITE = 4,
     RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE,
 
     /* The winsys ensures that the CS submission will be scheduled after
      * previously flushed CSs referencing this BO in a conflicting way.
      */
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 5119d3f..9736f44a 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -391,22 +391,20 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
    pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,
                        pb_cache_bucket);
    request.alloc_size = size;
    request.phys_alignment = alignment;
 
    if (initial_domain & RADEON_DOMAIN_VRAM)
       request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
    if (initial_domain & RADEON_DOMAIN_GTT)
       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
 
-   if (flags & RADEON_FLAG_CPU_ACCESS)
-      request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
    if (flags & RADEON_FLAG_NO_CPU_ACCESS)
       request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
    if (flags & RADEON_FLAG_GTT_WC)
       request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
    r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
    if (r) {
       fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
       fprintf(stderr, "amdgpu:    size      : %"PRIu64" bytes\n", size);
       fprintf(stderr, "amdgpu:    alignment : %u bytes\n", alignment);
@@ -499,22 +497,20 @@ struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,
    struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab);
    enum radeon_bo_domain domains;
    enum radeon_bo_flag flags = 0;
    uint32_t base_id;
 
    if (!slab)
       return NULL;
 
    if (heap & 1)
       flags |= RADEON_FLAG_GTT_WC;
-   if (heap & 2)
-      flags |= RADEON_FLAG_CPU_ACCESS;
 
    switch (heap >> 2) {
    case 0:
       domains = RADEON_DOMAIN_VRAM;
       break;
    default:
    case 1:
       domains = RADEON_DOMAIN_VRAM_GTT;
       break;
    case 2:
@@ -1157,23 +1153,21 @@ amdgpu_bo_create(struct radeon_winsys *rws,
 
    /* Sub-allocate small buffers from slabs. */
    if (!(flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE)) &&
        size <= (1 << AMDGPU_SLAB_MAX_SIZE_LOG2) &&
        alignment <= MAX2(1 << AMDGPU_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
       struct pb_slab_entry *entry;
       unsigned heap = 0;
 
       if (flags & RADEON_FLAG_GTT_WC)
          heap |= 1;
-      if (flags & RADEON_FLAG_CPU_ACCESS)
-         heap |= 2;
-      if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS))
+      if (flags & ~RADEON_FLAG_GTT_WC)
          goto no_slab;
 
       switch (domain) {
       case RADEON_DOMAIN_VRAM:
          heap |= 0 * 4;
          break;
       case RADEON_DOMAIN_VRAM_GTT:
          heap |= 1 * 4;
          break;
       case RADEON_DOMAIN_GTT:
@@ -1197,21 +1191,20 @@ amdgpu_bo_create(struct radeon_winsys *rws,
       bo = container_of(entry, bo, u.slab.entry);
 
       pipe_reference_init(&bo->base.reference, 1);
 
       return &bo->base;
    }
 no_slab:
 
    if (flags & RADEON_FLAG_SPARSE) {
       assert(RADEON_SPARSE_PAGE_SIZE % alignment == 0);
-      assert(!(flags & RADEON_FLAG_CPU_ACCESS));
 
       flags |= RADEON_FLAG_NO_CPU_ACCESS;
 
       return amdgpu_bo_sparse_create(ws, size, domain, flags);
    }
 
    /* This flag is irrelevant for the cache. */
    flags &= ~RADEON_FLAG_NO_SUBALLOC;
 
    /* Align size to page size. This is the minimum alignment for normal
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 1b3ca65..a1fb045 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -574,22 +574,21 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib)
       break;
    case IB_MAIN:
       buffer_size = MAX2(buffer_size, 8 * 1024 * 4);
       break;
    default:
       unreachable("unhandled IB type");
    }
 
    pb = ws->base.buffer_create(&ws->base, buffer_size,
                                ws->info.gart_page_size,
-                               RADEON_DOMAIN_GTT,
-                               RADEON_FLAG_CPU_ACCESS);
+                               RADEON_DOMAIN_GTT, 0);
    if (!pb)
       return false;
 
    mapped = ws->base.buffer_map(pb, NULL, PIPE_TRANSFER_WRITE);
    if (!mapped) {
       pb_reference(&pb, NULL);
       return false;
    }
 
    pb_reference(&ib->big_ib_buffer, pb);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 274d576..81a59e5 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -604,22 +604,20 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
     assert((initial_domains &
             ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
 
     args.size = size;
     args.alignment = alignment;
     args.initial_domain = initial_domains;
     args.flags = 0;
 
     if (flags & RADEON_FLAG_GTT_WC)
         args.flags |= RADEON_GEM_GTT_WC;
-    if (flags & RADEON_FLAG_CPU_ACCESS)
-        args.flags |= RADEON_GEM_CPU_ACCESS;
     if (flags & RADEON_FLAG_NO_CPU_ACCESS)
         args.flags |= RADEON_GEM_NO_CPU_ACCESS;
 
     if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
                             &args, sizeof(args))) {
         fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
         fprintf(stderr, "radeon:    size      : %u bytes\n", size);
         fprintf(stderr, "radeon:    alignment : %u bytes\n", alignment);
         fprintf(stderr, "radeon:    domains   : %u\n", args.initial_domain);
         fprintf(stderr, "radeon:    flags     : %u\n", args.flags);
@@ -733,22 +731,20 @@ struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
     struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
     enum radeon_bo_domain domains;
     enum radeon_bo_flag flags = 0;
     unsigned base_hash;
 
     if (!slab)
         return NULL;
 
     if (heap & 1)
         flags |= RADEON_FLAG_GTT_WC;
-    if (heap & 2)
-        flags |= RADEON_FLAG_CPU_ACCESS;
 
     switch (heap >> 2) {
     case 0:
         domains = RADEON_DOMAIN_VRAM;
         break;
     default:
     case 1:
         domains = RADEON_DOMAIN_VRAM_GTT;
         break;
     case 2:
@@ -945,23 +941,21 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
     /* Sub-allocate small buffers from slabs. */
     if (!(flags & RADEON_FLAG_NO_SUBALLOC) &&
         size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
         ws->info.has_virtual_memory &&
         alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
         struct pb_slab_entry *entry;
         unsigned heap = 0;
 
         if (flags & RADEON_FLAG_GTT_WC)
             heap |= 1;
-        if (flags & RADEON_FLAG_CPU_ACCESS)
-            heap |= 2;
-        if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS))
+        if (flags & ~RADEON_FLAG_GTT_WC)
             goto no_slab;
 
         switch (domain) {
         case RADEON_DOMAIN_VRAM:
             heap |= 0 * 4;
             break;
         case RADEON_DOMAIN_VRAM_GTT:
             heap |= 1 * 4;
             break;
         case RADEON_DOMAIN_GTT:
-- 
2.7.4



More information about the mesa-dev mailing list