[Mesa-dev] [PATCH 8/8] gallium/radeon: derive buffer placement and flags only once per buffer
Marek Olšák
maraeo at gmail.com
Thu Aug 18 19:46:44 UTC 2016
From: Marek Olšák <marek.olsak at amd.com>
Invalidated buffers don't have to do this.
---
src/gallium/drivers/radeon/r600_buffer_common.c | 147 +++++++++++++-----------
src/gallium/drivers/radeon/r600_pipe_common.h | 2 +
2 files changed, 80 insertions(+), 69 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index 4480293..113a7dc 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -98,91 +98,108 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
/* Setting the CS to NULL will prevent doing checks we have done already. */
return ctx->ws->buffer_map(resource->buf, NULL, usage);
}
bool r600_init_resource(struct r600_common_screen *rscreen,
struct r600_resource *res,
uint64_t size, unsigned alignment)
{
struct r600_texture *rtex = (struct r600_texture*)res;
struct pb_buffer *old_buf, *new_buf;
- enum radeon_bo_flag flags = 0;
-
- switch (res->b.b.usage) {
- case PIPE_USAGE_STREAM:
- flags = RADEON_FLAG_GTT_WC;
- /* fall through */
- case PIPE_USAGE_STAGING:
- /* Transfers are likely to occur more often with these resources. */
- res->domains = RADEON_DOMAIN_GTT;
- break;
- case PIPE_USAGE_DYNAMIC:
- /* Older kernels didn't always flush the HDP cache before
- * CS execution
- */
- if (rscreen->info.drm_major == 2 &&
- rscreen->info.drm_minor < 40) {
+
+ if (!res->initialized) {
+ res->flags = 0;
+
+ switch (res->b.b.usage) {
+ case PIPE_USAGE_STREAM:
+ res->flags = RADEON_FLAG_GTT_WC;
+ /* fall through */
+ case PIPE_USAGE_STAGING:
+ /* Transfers are likely to occur more often with these
+ * resources. */
res->domains = RADEON_DOMAIN_GTT;
- flags |= RADEON_FLAG_GTT_WC;
+ break;
+ case PIPE_USAGE_DYNAMIC:
+ /* Older kernels didn't always flush the HDP cache before
+ * CS execution
+ */
+ if (rscreen->info.drm_major == 2 &&
+ rscreen->info.drm_minor < 40) {
+ res->domains = RADEON_DOMAIN_GTT;
+ res->flags |= RADEON_FLAG_GTT_WC;
+ break;
+ }
+ res->flags |= RADEON_FLAG_CPU_ACCESS;
+ /* fall through */
+ case PIPE_USAGE_DEFAULT:
+ case PIPE_USAGE_IMMUTABLE:
+ default:
+ /* Not listing GTT here improves performance in some
+ * apps. */
+ res->domains = RADEON_DOMAIN_VRAM;
+ res->flags |= RADEON_FLAG_GTT_WC;
break;
}
- flags |= RADEON_FLAG_CPU_ACCESS;
- /* fall through */
- case PIPE_USAGE_DEFAULT:
- case PIPE_USAGE_IMMUTABLE:
- default:
- /* Not listing GTT here improves performance in some apps. */
- res->domains = RADEON_DOMAIN_VRAM;
- flags |= RADEON_FLAG_GTT_WC;
- break;
- }
- if (res->b.b.target == PIPE_BUFFER &&
- res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
- PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
- /* Use GTT for all persistent mappings with older kernels,
- * because they didn't always flush the HDP cache before CS
- * execution.
- *
- * Write-combined CPU mappings are fine, the kernel ensures all CPU
- * writes finish before the GPU executes a command stream.
+ if (res->b.b.target == PIPE_BUFFER &&
+ res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
+ PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
+ /* Use GTT for all persistent mappings with older
+ * kernels, because they didn't always flush the HDP
+ * cache before CS execution.
+ *
+ * Write-combined CPU mappings are fine, the kernel
+ * ensures all CPU writes finish before the GPU
+ * executes a command stream.
+ */
+ if (rscreen->info.drm_major == 2 &&
+ rscreen->info.drm_minor < 40)
+ res->domains = RADEON_DOMAIN_GTT;
+ else if (res->domains & RADEON_DOMAIN_VRAM)
+ res->flags |= RADEON_FLAG_CPU_ACCESS;
+ }
+
+ /* Tiled textures are unmappable. Always put them in VRAM. */
+ if (res->b.b.target != PIPE_BUFFER &&
+ rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
+ res->domains = RADEON_DOMAIN_VRAM;
+ res->flags &= ~RADEON_FLAG_CPU_ACCESS;
+ res->flags |= RADEON_FLAG_NO_CPU_ACCESS |
+ RADEON_FLAG_GTT_WC;
+ }
+
+ /* If VRAM is just stolen system memory, allow both VRAM and
+ * GTT, whichever has free space. If a buffer is evicted from
+ * VRAM to GTT, it will stay there.
*/
- if (rscreen->info.drm_major == 2 &&
- rscreen->info.drm_minor < 40)
- res->domains = RADEON_DOMAIN_GTT;
- else if (res->domains & RADEON_DOMAIN_VRAM)
- flags |= RADEON_FLAG_CPU_ACCESS;
- }
+ if (!rscreen->info.has_dedicated_vram &&
+ res->domains == RADEON_DOMAIN_VRAM)
+ res->domains = RADEON_DOMAIN_VRAM_GTT;
- /* Tiled textures are unmappable. Always put them in VRAM. */
- if (res->b.b.target != PIPE_BUFFER &&
- rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
- res->domains = RADEON_DOMAIN_VRAM;
- flags &= ~RADEON_FLAG_CPU_ACCESS;
- flags |= RADEON_FLAG_NO_CPU_ACCESS |
- RADEON_FLAG_GTT_WC;
- }
+ if (rscreen->debug_flags & DBG_NO_WC)
+ res->flags &= ~RADEON_FLAG_GTT_WC;
- /* If VRAM is just stolen system memory, allow both VRAM and GTT,
- * whichever has free space. If a buffer is evicted from VRAM to GTT,
- * it will stay there.
- */
- if (!rscreen->info.has_dedicated_vram &&
- res->domains == RADEON_DOMAIN_VRAM)
- res->domains = RADEON_DOMAIN_VRAM_GTT;
+ /* Set expected VRAM and GART usage for the buffer. */
+ res->vram_usage = 0;
+ res->gart_usage = 0;
- if (rscreen->debug_flags & DBG_NO_WC)
- flags &= ~RADEON_FLAG_GTT_WC;
+ if (res->domains & RADEON_DOMAIN_VRAM)
+ res->vram_usage = size;
+ else if (res->domains & RADEON_DOMAIN_GTT)
+ res->gart_usage = size;
+
+ res->initialized = true;
+ }
/* Allocate a new resource. */
new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
- res->domains, flags);
+ res->domains, res->flags);
if (!new_buf) {
return false;
}
/* Replace the pointer such that if res->buf wasn't NULL, it won't be
* NULL. This should prevent crashes with multiple contexts using
* the same buffer where one of the contexts invalidates it while
* the others are using it. */
old_buf = res->buf;
res->buf = new_buf; /* should be atomic */
@@ -190,29 +207,20 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
if (rscreen->info.has_virtual_memory)
res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf);
else
res->gpu_address = 0;
pb_reference(&old_buf, NULL);
util_range_set_empty(&res->valid_buffer_range);
res->TC_L2_dirty = false;
- /* Set expected VRAM and GART usage for the buffer. */
- res->vram_usage = 0;
- res->gart_usage = 0;
-
- if (res->domains & RADEON_DOMAIN_VRAM)
- res->vram_usage = size;
- else if (res->domains & RADEON_DOMAIN_GTT)
- res->gart_usage = size;
-
/* Print debug information. */
if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n",
res->gpu_address, res->gpu_address + res->buf->size,
res->buf->size);
}
return true;
}
static void r600_buffer_destroy(struct pipe_screen *screen,
@@ -496,20 +504,21 @@ r600_alloc_buffer_struct(struct pipe_screen *screen,
{
struct r600_resource *rbuffer;
rbuffer = MALLOC_STRUCT(r600_resource);
rbuffer->b.b = *templ;
pipe_reference_init(&rbuffer->b.b.reference, 1);
rbuffer->b.b.screen = screen;
rbuffer->b.vtbl = &r600_buffer_vtbl;
rbuffer->buf = NULL;
+ rbuffer->initialized = false;
rbuffer->TC_L2_dirty = false;
rbuffer->is_shared = false;
util_range_init(&rbuffer->valid_buffer_range);
return rbuffer;
}
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ,
unsigned alignment)
{
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 5375044..358d5f4 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -169,20 +169,22 @@ struct r600_resource {
/* Winsys objects. */
struct pb_buffer *buf;
uint64_t gpu_address;
/* Memory usage if the buffer placement is optimal. */
uint64_t vram_usage;
uint64_t gart_usage;
/* Resource state. */
enum radeon_bo_domain domains;
+ enum radeon_bo_flag flags;
+ bool initialized;
/* The buffer range which is initialized (with a write transfer,
* streamout, DMA, or as a random access target). The rest of
* the buffer is considered invalid and can be mapped unsynchronized.
*
* This allows unsychronized mapping of a buffer range which hasn't
* been used yet. It's for applications which forget to use
* the unsynchronized map flag and expect the driver to figure it out.
*/
struct util_range valid_buffer_range;
--
2.7.4
More information about the mesa-dev
mailing list