[Mesa-dev] [PATCH 07/14] gallium/radeon: remove RADEON_FLAG_CPU_ACCESS
Marek Olšák
maraeo at gmail.com
Thu Jun 29 19:47:42 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
https://lists.freedesktop.org/archives/amd-gfx/2017-June/010591.html
---
src/gallium/drivers/radeon/r600_buffer_common.c | 4 ----
src/gallium/drivers/radeon/radeon_winsys.h | 7 +++----
src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 9 +--------
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 3 +--
src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 8 +-------
5 files changed, 6 insertions(+), 25 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index 342695c..262fe1d 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -125,21 +125,20 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen,
case PIPE_USAGE_DYNAMIC:
/* Older kernels didn't always flush the HDP cache before
* CS execution
*/
if (rscreen->info.drm_major == 2 &&
rscreen->info.drm_minor < 40) {
res->domains = RADEON_DOMAIN_GTT;
res->flags |= RADEON_FLAG_GTT_WC;
break;
}
- res->flags |= RADEON_FLAG_CPU_ACCESS;
/* fall through */
case PIPE_USAGE_DEFAULT:
case PIPE_USAGE_IMMUTABLE:
default:
/* Not listing GTT here improves performance in some
* apps. */
res->domains = RADEON_DOMAIN_VRAM;
res->flags |= RADEON_FLAG_GTT_WC;
break;
}
@@ -151,29 +150,26 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen,
* kernels, because they didn't always flush the HDP
* cache before CS execution.
*
* Write-combined CPU mappings are fine, the kernel
* ensures all CPU writes finish before the GPU
* executes a command stream.
*/
if (rscreen->info.drm_major == 2 &&
rscreen->info.drm_minor < 40)
res->domains = RADEON_DOMAIN_GTT;
- else if (res->domains & RADEON_DOMAIN_VRAM)
- res->flags |= RADEON_FLAG_CPU_ACCESS;
}
/* Tiled textures are unmappable. Always put them in VRAM. */
if ((res->b.b.target != PIPE_BUFFER && !rtex->surface.is_linear) ||
res->flags & R600_RESOURCE_FLAG_UNMAPPABLE) {
res->domains = RADEON_DOMAIN_VRAM;
- res->flags &= ~RADEON_FLAG_CPU_ACCESS;
res->flags |= RADEON_FLAG_NO_CPU_ACCESS |
RADEON_FLAG_GTT_WC;
}
/* If VRAM is just stolen system memory, allow both VRAM and
* GTT, whichever has free space. If a buffer is evicted from
* VRAM to GTT, it will stay there.
*
* DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only
* placements even with a low amount of stolen VRAM.
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 706188f..1be94f7 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -44,24 +44,23 @@ enum radeon_bo_layout {
};
enum radeon_bo_domain { /* bitfield */
RADEON_DOMAIN_GTT = 2,
RADEON_DOMAIN_VRAM = 4,
RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT
};
enum radeon_bo_flag { /* bitfield */
RADEON_FLAG_GTT_WC = (1 << 0),
- RADEON_FLAG_CPU_ACCESS = (1 << 1),
- RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
- RADEON_FLAG_NO_SUBALLOC = (1 << 3),
- RADEON_FLAG_SPARSE = (1 << 4),
+ RADEON_FLAG_NO_CPU_ACCESS = (1 << 1),
+ RADEON_FLAG_NO_SUBALLOC = (1 << 2),
+ RADEON_FLAG_SPARSE = (1 << 3),
};
enum radeon_bo_usage { /* bitfield */
RADEON_USAGE_READ = 2,
RADEON_USAGE_WRITE = 4,
RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE,
/* The winsys ensures that the CS submission will be scheduled after
* previously flushed CSs referencing this BO in a conflicting way.
*/
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 5119d3f..9736f44a 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -391,22 +391,20 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,
pb_cache_bucket);
request.alloc_size = size;
request.phys_alignment = alignment;
if (initial_domain & RADEON_DOMAIN_VRAM)
request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
if (initial_domain & RADEON_DOMAIN_GTT)
request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
- if (flags & RADEON_FLAG_CPU_ACCESS)
- request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
if (flags & RADEON_FLAG_NO_CPU_ACCESS)
request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
if (flags & RADEON_FLAG_GTT_WC)
request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
if (r) {
fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
@@ -499,22 +497,20 @@ struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,
struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab);
enum radeon_bo_domain domains;
enum radeon_bo_flag flags = 0;
uint32_t base_id;
if (!slab)
return NULL;
if (heap & 1)
flags |= RADEON_FLAG_GTT_WC;
- if (heap & 2)
- flags |= RADEON_FLAG_CPU_ACCESS;
switch (heap >> 2) {
case 0:
domains = RADEON_DOMAIN_VRAM;
break;
default:
case 1:
domains = RADEON_DOMAIN_VRAM_GTT;
break;
case 2:
@@ -1157,23 +1153,21 @@ amdgpu_bo_create(struct radeon_winsys *rws,
/* Sub-allocate small buffers from slabs. */
if (!(flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE)) &&
size <= (1 << AMDGPU_SLAB_MAX_SIZE_LOG2) &&
alignment <= MAX2(1 << AMDGPU_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
struct pb_slab_entry *entry;
unsigned heap = 0;
if (flags & RADEON_FLAG_GTT_WC)
heap |= 1;
- if (flags & RADEON_FLAG_CPU_ACCESS)
- heap |= 2;
- if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS))
+ if (flags & ~RADEON_FLAG_GTT_WC)
goto no_slab;
switch (domain) {
case RADEON_DOMAIN_VRAM:
heap |= 0 * 4;
break;
case RADEON_DOMAIN_VRAM_GTT:
heap |= 1 * 4;
break;
case RADEON_DOMAIN_GTT:
@@ -1197,21 +1191,20 @@ amdgpu_bo_create(struct radeon_winsys *rws,
bo = container_of(entry, bo, u.slab.entry);
pipe_reference_init(&bo->base.reference, 1);
return &bo->base;
}
no_slab:
if (flags & RADEON_FLAG_SPARSE) {
assert(RADEON_SPARSE_PAGE_SIZE % alignment == 0);
- assert(!(flags & RADEON_FLAG_CPU_ACCESS));
flags |= RADEON_FLAG_NO_CPU_ACCESS;
return amdgpu_bo_sparse_create(ws, size, domain, flags);
}
/* This flag is irrelevant for the cache. */
flags &= ~RADEON_FLAG_NO_SUBALLOC;
/* Align size to page size. This is the minimum alignment for normal
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 1b3ca65..a1fb045 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -574,22 +574,21 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib)
break;
case IB_MAIN:
buffer_size = MAX2(buffer_size, 8 * 1024 * 4);
break;
default:
unreachable("unhandled IB type");
}
pb = ws->base.buffer_create(&ws->base, buffer_size,
ws->info.gart_page_size,
- RADEON_DOMAIN_GTT,
- RADEON_FLAG_CPU_ACCESS);
+ RADEON_DOMAIN_GTT, 0);
if (!pb)
return false;
mapped = ws->base.buffer_map(pb, NULL, PIPE_TRANSFER_WRITE);
if (!mapped) {
pb_reference(&pb, NULL);
return false;
}
pb_reference(&ib->big_ib_buffer, pb);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 274d576..81a59e5 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -604,22 +604,20 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
assert((initial_domains &
~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
args.size = size;
args.alignment = alignment;
args.initial_domain = initial_domains;
args.flags = 0;
if (flags & RADEON_FLAG_GTT_WC)
args.flags |= RADEON_GEM_GTT_WC;
- if (flags & RADEON_FLAG_CPU_ACCESS)
- args.flags |= RADEON_GEM_CPU_ACCESS;
if (flags & RADEON_FLAG_NO_CPU_ACCESS)
args.flags |= RADEON_GEM_NO_CPU_ACCESS;
if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
&args, sizeof(args))) {
fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
fprintf(stderr, "radeon: size : %u bytes\n", size);
fprintf(stderr, "radeon: alignment : %u bytes\n", alignment);
fprintf(stderr, "radeon: domains : %u\n", args.initial_domain);
fprintf(stderr, "radeon: flags : %u\n", args.flags);
@@ -733,22 +731,20 @@ struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
enum radeon_bo_domain domains;
enum radeon_bo_flag flags = 0;
unsigned base_hash;
if (!slab)
return NULL;
if (heap & 1)
flags |= RADEON_FLAG_GTT_WC;
- if (heap & 2)
- flags |= RADEON_FLAG_CPU_ACCESS;
switch (heap >> 2) {
case 0:
domains = RADEON_DOMAIN_VRAM;
break;
default:
case 1:
domains = RADEON_DOMAIN_VRAM_GTT;
break;
case 2:
@@ -945,23 +941,21 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
/* Sub-allocate small buffers from slabs. */
if (!(flags & RADEON_FLAG_NO_SUBALLOC) &&
size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
ws->info.has_virtual_memory &&
alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
struct pb_slab_entry *entry;
unsigned heap = 0;
if (flags & RADEON_FLAG_GTT_WC)
heap |= 1;
- if (flags & RADEON_FLAG_CPU_ACCESS)
- heap |= 2;
- if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS))
+ if (flags & ~RADEON_FLAG_GTT_WC)
goto no_slab;
switch (domain) {
case RADEON_DOMAIN_VRAM:
heap |= 0 * 4;
break;
case RADEON_DOMAIN_VRAM_GTT:
heap |= 1 * 4;
break;
case RADEON_DOMAIN_GTT:
--
2.7.4
More information about the mesa-dev
mailing list