[Mesa-dev] [PATCH 01/20] gallium/radeon: derive buffer placement and flags only at initialization

Marek Olšák maraeo at gmail.com
Mon Aug 29 15:28:16 UTC 2016


From: Marek Olšák <marek.olsak at amd.com>

Invalidated buffers don't have to go through it.

Split r600_init_resource into r600_init_resource_fields and
r600_alloc_resource.
---
 src/gallium/drivers/r600/r600_state_common.c    |  5 +-
 src/gallium/drivers/radeon/r600_buffer_common.c | 84 ++++++++++++++-----------
 src/gallium/drivers/radeon/r600_pipe_common.h   | 13 ++--
 src/gallium/drivers/radeon/r600_texture.c       |  9 +--
 src/gallium/drivers/radeonsi/si_descriptors.c   |  5 +-
 5 files changed, 67 insertions(+), 49 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index a5341c3..0349432 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -2774,26 +2774,25 @@ uint32_t r600_colorformat_endian_swap(uint32_t colorformat, bool do_endian_swap)
 		}
 	} else {
 		return ENDIAN_NONE;
 	}
 }
 
 static void r600_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf)
 {
 	struct r600_context *rctx = (struct r600_context*)ctx;
 	struct r600_resource *rbuffer = r600_resource(buf);
-	unsigned i, shader, mask, alignment = rbuffer->buf->alignment;
+	unsigned i, shader, mask;
 	struct r600_pipe_sampler_view *view;
 
 	/* Reallocate the buffer in the same pipe_resource. */
-	r600_init_resource(&rctx->screen->b, rbuffer, rbuffer->b.b.width0,
-			   alignment);
+	r600_alloc_resource(&rctx->screen->b, rbuffer);
 
 	/* We changed the buffer, now we need to bind it where the old one was bound. */
 	/* Vertex buffers. */
 	mask = rctx->vertex_buffer_state.enabled_mask;
 	while (mask) {
 		i = u_bit_scan(&mask);
 		if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) {
 			rctx->vertex_buffer_state.dirty_mask |= 1 << i;
 			r600_vertex_buffers_dirty(rctx);
 		}
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index 4480293..6a55de1 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -92,97 +92,118 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
 			ctx->ws->cs_sync_flush(ctx->gfx.cs);
 			if (ctx->dma.cs)
 				ctx->ws->cs_sync_flush(ctx->dma.cs);
 		}
 	}
 
 	/* Setting the CS to NULL will prevent doing checks we have done already. */
 	return ctx->ws->buffer_map(resource->buf, NULL, usage);
 }
 
-bool r600_init_resource(struct r600_common_screen *rscreen,
-			struct r600_resource *res,
-			uint64_t size, unsigned alignment)
+void r600_init_resource_fields(struct r600_common_screen *rscreen,
+			       struct r600_resource *res,
+			       uint64_t size, unsigned alignment)
 {
 	struct r600_texture *rtex = (struct r600_texture*)res;
-	struct pb_buffer *old_buf, *new_buf;
-	enum radeon_bo_flag flags = 0;
+
+	res->bo_size = size;
+	res->bo_alignment = alignment;
+	res->flags = 0;
 
 	switch (res->b.b.usage) {
 	case PIPE_USAGE_STREAM:
-		flags = RADEON_FLAG_GTT_WC;
+		res->flags = RADEON_FLAG_GTT_WC;
 		/* fall through */
 	case PIPE_USAGE_STAGING:
-		/* Transfers are likely to occur more often with these resources. */
+		/* Transfers are likely to occur more often with these
+		 * resources. */
 		res->domains = RADEON_DOMAIN_GTT;
 		break;
 	case PIPE_USAGE_DYNAMIC:
 		/* Older kernels didn't always flush the HDP cache before
 		 * CS execution
 		 */
 		if (rscreen->info.drm_major == 2 &&
 		    rscreen->info.drm_minor < 40) {
 			res->domains = RADEON_DOMAIN_GTT;
-			flags |= RADEON_FLAG_GTT_WC;
+			res->flags |= RADEON_FLAG_GTT_WC;
 			break;
 		}
-		flags |= RADEON_FLAG_CPU_ACCESS;
+		res->flags |= RADEON_FLAG_CPU_ACCESS;
 		/* fall through */
 	case PIPE_USAGE_DEFAULT:
 	case PIPE_USAGE_IMMUTABLE:
 	default:
-		/* Not listing GTT here improves performance in some apps. */
+		/* Not listing GTT here improves performance in some
+		 * apps. */
 		res->domains = RADEON_DOMAIN_VRAM;
-		flags |= RADEON_FLAG_GTT_WC;
+		res->flags |= RADEON_FLAG_GTT_WC;
 		break;
 	}
 
 	if (res->b.b.target == PIPE_BUFFER &&
 	    res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
 			      PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
-		/* Use GTT for all persistent mappings with older kernels,
-		 * because they didn't always flush the HDP cache before CS
-		 * execution.
+		/* Use GTT for all persistent mappings with older
+		 * kernels, because they didn't always flush the HDP
+		 * cache before CS execution.
 		 *
-		 * Write-combined CPU mappings are fine, the kernel ensures all CPU
-		 * writes finish before the GPU executes a command stream.
+		 * Write-combined CPU mappings are fine, the kernel
+		 * ensures all CPU writes finish before the GPU
+		 * executes a command stream.
 		 */
 		if (rscreen->info.drm_major == 2 &&
 		    rscreen->info.drm_minor < 40)
 			res->domains = RADEON_DOMAIN_GTT;
 		else if (res->domains & RADEON_DOMAIN_VRAM)
-			flags |= RADEON_FLAG_CPU_ACCESS;
+			res->flags |= RADEON_FLAG_CPU_ACCESS;
 	}
 
 	/* Tiled textures are unmappable. Always put them in VRAM. */
 	if (res->b.b.target != PIPE_BUFFER &&
 	    rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
 		res->domains = RADEON_DOMAIN_VRAM;
-		flags &= ~RADEON_FLAG_CPU_ACCESS;
-		flags |= RADEON_FLAG_NO_CPU_ACCESS |
+		res->flags &= ~RADEON_FLAG_CPU_ACCESS;
+		res->flags |= RADEON_FLAG_NO_CPU_ACCESS |
 			 RADEON_FLAG_GTT_WC;
 	}
 
-	/* If VRAM is just stolen system memory, allow both VRAM and GTT,
-	 * whichever has free space. If a buffer is evicted from VRAM to GTT,
-	 * it will stay there.
+	/* If VRAM is just stolen system memory, allow both VRAM and
+	 * GTT, whichever has free space. If a buffer is evicted from
+	 * VRAM to GTT, it will stay there.
 	 */
 	if (!rscreen->info.has_dedicated_vram &&
 	    res->domains == RADEON_DOMAIN_VRAM)
 		res->domains = RADEON_DOMAIN_VRAM_GTT;
 
 	if (rscreen->debug_flags & DBG_NO_WC)
-		flags &= ~RADEON_FLAG_GTT_WC;
+		res->flags &= ~RADEON_FLAG_GTT_WC;
+
+	/* Set expected VRAM and GART usage for the buffer. */
+	res->vram_usage = 0;
+	res->gart_usage = 0;
+
+	if (res->domains & RADEON_DOMAIN_VRAM)
+		res->vram_usage = size;
+	else if (res->domains & RADEON_DOMAIN_GTT)
+		res->gart_usage = size;
+}
+
+bool r600_alloc_resource(struct r600_common_screen *rscreen,
+			 struct r600_resource *res)
+{
+	struct pb_buffer *old_buf, *new_buf;
 
 	/* Allocate a new resource. */
-	new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
-					     res->domains, flags);
+	new_buf = rscreen->ws->buffer_create(rscreen->ws, res->bo_size,
+					     res->bo_alignment,
+					     res->domains, res->flags);
 	if (!new_buf) {
 		return false;
 	}
 
 	/* Replace the pointer such that if res->buf wasn't NULL, it won't be
 	 * NULL. This should prevent crashes with multiple contexts using
 	 * the same buffer where one of the contexts invalidates it while
 	 * the others are using it. */
 	old_buf = res->buf;
 	res->buf = new_buf; /* should be atomic */
@@ -190,29 +211,20 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
 	if (rscreen->info.has_virtual_memory)
 		res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf);
 	else
 		res->gpu_address = 0;
 
 	pb_reference(&old_buf, NULL);
 
 	util_range_set_empty(&res->valid_buffer_range);
 	res->TC_L2_dirty = false;
 
-	/* Set expected VRAM and GART usage for the buffer. */
-	res->vram_usage = 0;
-	res->gart_usage = 0;
-
-	if (res->domains & RADEON_DOMAIN_VRAM)
-		res->vram_usage = size;
-	else if (res->domains & RADEON_DOMAIN_GTT)
-		res->gart_usage = size;
-
 	/* Print debug information. */
 	if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
 		fprintf(stderr, "VM start=0x%"PRIX64"  end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n",
 			res->gpu_address, res->gpu_address + res->buf->size,
 			res->buf->size);
 	}
 	return true;
 }
 
 static void r600_buffer_destroy(struct pipe_screen *screen,
@@ -509,21 +521,23 @@ r600_alloc_buffer_struct(struct pipe_screen *screen,
 	return rbuffer;
 }
 
 struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
 					 const struct pipe_resource *templ,
 					 unsigned alignment)
 {
 	struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
 	struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
 
-	if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment)) {
+	r600_init_resource_fields(rscreen, rbuffer, templ->width0, alignment);
+
+	if (!r600_alloc_resource(rscreen, rbuffer)) {
 		FREE(rbuffer);
 		return NULL;
 	}
 	return &rbuffer->b.b;
 }
 
 struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen,
 						 unsigned bind,
 						 unsigned usage,
 						 unsigned size,
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 5375044..1924535 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -167,22 +167,25 @@ void radeon_shader_binary_clean(struct radeon_shader_binary *b);
 struct r600_resource {
 	struct u_resource		b;
 
 	/* Winsys objects. */
 	struct pb_buffer		*buf;
 	uint64_t			gpu_address;
 	/* Memory usage if the buffer placement is optimal. */
 	uint64_t			vram_usage;
 	uint64_t			gart_usage;
 
-	/* Resource state. */
+	/* Resource properties. */
+	uint64_t			bo_size;
+	unsigned			bo_alignment;
 	enum radeon_bo_domain		domains;
+	enum radeon_bo_flag		flags;
 
 	/* The buffer range which is initialized (with a write transfer,
 	 * streamout, DMA, or as a random access target). The rest of
 	 * the buffer is considered invalid and can be mapped unsynchronized.
 	 *
 	 * This allows unsychronized mapping of a buffer range which hasn't
 	 * been used yet. It's for applications which forget to use
 	 * the unsynchronized map flag and expect the driver to figure it out.
          */
 	struct util_range		valid_buffer_range;
@@ -646,23 +649,25 @@ struct r600_common_context {
 bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
 				     struct pb_buffer *buf,
 				     enum radeon_bo_usage usage);
 void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
                                       struct r600_resource *resource,
                                       unsigned usage);
 void r600_buffer_subdata(struct pipe_context *ctx,
 			 struct pipe_resource *buffer,
 			 unsigned usage, unsigned offset,
 			 unsigned size, const void *data);
-bool r600_init_resource(struct r600_common_screen *rscreen,
-			struct r600_resource *res,
-			uint64_t size, unsigned alignment);
+void r600_init_resource_fields(struct r600_common_screen *rscreen,
+			       struct r600_resource *res,
+			       uint64_t size, unsigned alignment);
+bool r600_alloc_resource(struct r600_common_screen *rscreen,
+			 struct r600_resource *res);
 struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
 					 const struct pipe_resource *templ,
 					 unsigned alignment);
 struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
 						  unsigned bind,
 						  unsigned usage,
 						  unsigned size,
 						  unsigned alignment);
 struct pipe_resource *
 r600_buffer_from_user_memory(struct pipe_screen *screen,
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index e19150b..fb3068a 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1096,22 +1096,24 @@ r600_texture_create_object(struct pipe_screen *screen,
 		    (buf || !(rscreen->debug_flags & DBG_NO_DCC)) &&
 		    !(rtex->surface.flags & RADEON_SURF_SCANOUT)) {
 			/* Reserve space for the DCC buffer. */
 			rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment);
 			rtex->size = rtex->dcc_offset + rtex->surface.dcc_size;
 		}
 	}
 
 	/* Now create the backing buffer. */
 	if (!buf) {
-		if (!r600_init_resource(rscreen, resource, rtex->size,
-					rtex->surface.bo_alignment)) {
+		r600_init_resource_fields(rscreen, resource, rtex->size,
+					  rtex->surface.bo_alignment);
+
+		if (!r600_alloc_resource(rscreen, resource)) {
 			FREE(rtex);
 			return NULL;
 		}
 	} else {
 		resource->buf = buf;
 		resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf);
 		resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf);
 	}
 
 	if (rtex->cmask.size) {
@@ -1411,22 +1413,21 @@ static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen,
 static void r600_texture_invalidate_storage(struct r600_common_context *rctx,
 					    struct r600_texture *rtex)
 {
 	struct r600_common_screen *rscreen = rctx->screen;
 
 	/* There is no point in discarding depth and tiled buffers. */
 	assert(!rtex->is_depth);
 	assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED);
 
 	/* Reallocate the buffer in the same pipe_resource. */
-	r600_init_resource(rscreen, &rtex->resource, rtex->size,
-			   rtex->surface.bo_alignment);
+	r600_alloc_resource(rscreen, &rtex->resource);
 
 	/* Initialize the CMASK base address (needed even without CMASK). */
 	rtex->cmask.base_address_reg =
 		(rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
 
 	r600_dirty_all_framebuffer_states(rscreen);
 	p_atomic_inc(&rscreen->dirty_tex_descriptor_counter);
 
 	rctx->num_alloc_tex_transfer_bytes += rtex->size;
 }
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 0e026e9..b3174c6 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1416,29 +1416,28 @@ static void si_reset_buffer_resources(struct si_context *sctx,
  * bound.
  *
  * This is used to avoid CPU-GPU synchronizations, because it makes the buffer
  * idle by discarding its contents. Apps usually tell us when to do this using
  * map_buffer flags, for example.
  */
 static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf)
 {
 	struct si_context *sctx = (struct si_context*)ctx;
 	struct r600_resource *rbuffer = r600_resource(buf);
-	unsigned i, shader, alignment = rbuffer->buf->alignment;
+	unsigned i, shader;
 	uint64_t old_va = rbuffer->gpu_address;
 	unsigned num_elems = sctx->vertex_elements ?
 				       sctx->vertex_elements->count : 0;
 	struct si_sampler_view *view;
 
 	/* Reallocate the buffer in the same pipe_resource. */
-	r600_init_resource(&sctx->screen->b, rbuffer, rbuffer->b.b.width0,
-			   alignment);
+	r600_alloc_resource(&sctx->screen->b, rbuffer);
 
 	/* We changed the buffer, now we need to bind it where the old one
 	 * was bound. This consists of 2 things:
 	 *   1) Updating the resource descriptor and dirtying it.
 	 *   2) Adding a relocation to the CS, so that it's usable.
 	 */
 
 	/* Vertex buffers. */
 	for (i = 0; i < num_elems; i++) {
 		int vb = sctx->vertex_elements->elements[i].vertex_buffer_index;
-- 
2.7.4



More information about the mesa-dev mailing list