Mesa (master): r600g: only flush the caches that need to be flushed during CP DMA operations

Marek Olšák mareko at kemper.freedesktop.org
Mon Jul 8 18:30:02 UTC 2013


Module: Mesa
Branch: master
Commit: 7948ed1250cae78ae1b22dbce4ab23aceacc6159
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=7948ed1250cae78ae1b22dbce4ab23aceacc6159

Author: Marek Olšák <maraeo at gmail.com>
Date:   Sun Jun 30 19:57:59 2013 +0200

r600g: only flush the caches that need to be flushed during CP DMA operations

This should increase performance if constant uploads are done with the CP DMA,
because only the cache that needs to be flushed is flushed.

Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

---

 src/gallium/drivers/r600/evergreen_hw_context.c |   21 +---
 src/gallium/drivers/r600/r600_hw_context.c      |  126 ++++++++++++++++++++---
 src/gallium/drivers/r600/r600_pipe.h            |    2 +
 3 files changed, 117 insertions(+), 32 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index 075ab17..c428bc1 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -121,18 +121,8 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
 
 	offset += r600_resource_va(&rctx->screen->screen, dst);
 
-	/* We flush the caches, because we might read from or write
-	 * to resources which are bound right now. */
-	rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
-		       R600_CONTEXT_INV_VERTEX_CACHE |
-		       R600_CONTEXT_INV_TEX_CACHE |
-		       R600_CONTEXT_FLUSH_AND_INV |
-		       R600_CONTEXT_FLUSH_AND_INV_CB |
-		       R600_CONTEXT_FLUSH_AND_INV_DB |
-		       R600_CONTEXT_FLUSH_AND_INV_CB_META |
-		       R600_CONTEXT_FLUSH_AND_INV_DB_META |
-		       R600_CONTEXT_STREAMOUT_FLUSH |
-		       R600_CONTEXT_WAIT_3D_IDLE;
+	/* Flush the cache where the resource is bound. */
+	r600_flag_resource_cache_flush(rctx, dst);
 
 	while (size) {
 		unsigned sync = 0;
@@ -169,10 +159,9 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
 		offset += byte_count;
 	}
 
-	/* Invalidate the read caches. */
-	rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
-		       R600_CONTEXT_INV_VERTEX_CACHE |
-		       R600_CONTEXT_INV_TEX_CACHE;
+	/* Flush the cache again in case the 3D engine has been prefetching
+	 * the resource. */
+	r600_flag_resource_cache_flush(rctx, dst);
 
 	util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
 		       offset + size);
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index ff36573..e2444cc 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -626,18 +626,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 	dst_offset += r600_resource_va(&rctx->screen->screen, dst);
 	src_offset += r600_resource_va(&rctx->screen->screen, src);
 
-	/* We flush the caches, because we might read from or write
-	 * to resources which are bound right now. */
-	rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
-		       R600_CONTEXT_INV_VERTEX_CACHE |
-		       R600_CONTEXT_INV_TEX_CACHE |
-		       R600_CONTEXT_FLUSH_AND_INV |
-		       R600_CONTEXT_FLUSH_AND_INV_CB |
-		       R600_CONTEXT_FLUSH_AND_INV_DB |
-		       R600_CONTEXT_FLUSH_AND_INV_CB_META |
-		       R600_CONTEXT_FLUSH_AND_INV_DB_META |
-		       R600_CONTEXT_STREAMOUT_FLUSH |
-		       R600_CONTEXT_WAIT_3D_IDLE;
+	/* Flush the caches where the resources are bound. */
+	r600_flag_resource_cache_flush(rctx, src);
+	r600_flag_resource_cache_flush(rctx, dst);
 
 	/* There are differences between R700 and EG in CP DMA,
 	 * but we only use the common bits here. */
@@ -679,10 +670,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 		dst_offset += byte_count;
 	}
 
-	/* Invalidate the read caches. */
-	rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
-		       R600_CONTEXT_INV_VERTEX_CACHE |
-		       R600_CONTEXT_INV_TEX_CACHE;
+	/* Flush the cache of the dst resource again in case the 3D engine
+	 * has been prefetching it. */
+	r600_flag_resource_cache_flush(rctx, dst);
 
 	util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
 		       dst_offset + size);
@@ -736,3 +726,107 @@ void r600_dma_copy(struct r600_context *rctx,
 	util_range_add(&rdst->valid_buffer_range, dst_offset,
 		       dst_offset + size);
 }
+
+/* Flag the cache of the resource for it to be flushed later if the resource
+ * is bound. Otherwise do nothing. Used for synchronization between engines.
+ */
+void r600_flag_resource_cache_flush(struct r600_context *rctx,
+				    struct pipe_resource *res)
+{
+	/* Check vertex buffers. */
+	uint32_t mask = rctx->vertex_buffer_state.enabled_mask;
+	while (mask) {
+		uint32_t i = u_bit_scan(&mask);
+		if (rctx->vertex_buffer_state.vb[i].buffer == res) {
+			rctx->flags |= R600_CONTEXT_INV_VERTEX_CACHE;
+		}
+	}
+
+	/* Check vertex buffers for compute. */
+	mask = rctx->cs_vertex_buffer_state.enabled_mask;
+	while (mask) {
+		uint32_t i = u_bit_scan(&mask);
+		if (rctx->cs_vertex_buffer_state.vb[i].buffer == res) {
+			rctx->flags |= R600_CONTEXT_INV_VERTEX_CACHE;
+		}
+	}
+
+	/* Check constant buffers. */
+	unsigned shader;
+	for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {
+		struct r600_constbuf_state *state = &rctx->constbuf_state[shader];
+		uint32_t mask = state->enabled_mask;
+
+		while (mask) {
+			unsigned i = u_bit_scan(&mask);
+			if (state->cb[i].buffer == res) {
+				rctx->flags |= R600_CONTEXT_INV_CONST_CACHE;
+
+				shader = PIPE_SHADER_TYPES; /* break the outer loop */
+				break;
+			}
+		}
+	}
+
+	/* Check textures. */
+	for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {
+		struct r600_samplerview_state *state = &rctx->samplers[shader].views;
+		uint32_t mask = state->enabled_mask;
+
+		while (mask) {
+			uint32_t i = u_bit_scan(&mask);
+			if (&state->views[i]->tex_resource->b.b == res) {
+				rctx->flags |= R600_CONTEXT_INV_TEX_CACHE;
+
+				shader = PIPE_SHADER_TYPES; /* break the outer loop */
+				break;
+			}
+		}
+	}
+
+	/* Check streamout buffers. */
+	int i;
+	for (i = 0; i < rctx->streamout.num_targets; i++) {
+		if (rctx->streamout.targets[i]->b.buffer == res) {
+			rctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH |
+				       R600_CONTEXT_FLUSH_AND_INV |
+				       R600_CONTEXT_WAIT_3D_IDLE;
+			break;
+		}
+	}
+
+	/* Check colorbuffers. */
+	for (i = 0; i < rctx->framebuffer.state.nr_cbufs; i++) {
+		if (rctx->framebuffer.state.cbufs[i] &&
+		    rctx->framebuffer.state.cbufs[i]->texture == res) {
+			struct r600_texture *tex =
+				(struct r600_texture*)rctx->framebuffer.state.cbufs[i]->texture;
+
+			rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
+				       R600_CONTEXT_FLUSH_AND_INV |
+				       R600_CONTEXT_WAIT_3D_IDLE;
+
+			if (tex->cmask_size || tex->fmask_size) {
+				rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META;
+			}
+			break;
+		}
+	}
+
+	/* Check a depth buffer. */
+	if (rctx->framebuffer.state.zsbuf) {
+		if (rctx->framebuffer.state.zsbuf->texture == res) {
+			rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB |
+				       R600_CONTEXT_FLUSH_AND_INV |
+				       R600_CONTEXT_WAIT_3D_IDLE;
+		}
+
+		struct r600_texture *tex =
+			(struct r600_texture*)rctx->framebuffer.state.zsbuf->texture;
+		if (tex && tex->htile && &tex->htile->b.b == res) {
+			rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB_META |
+				       R600_CONTEXT_FLUSH_AND_INV |
+				       R600_CONTEXT_WAIT_3D_IDLE;
+		}
+	}
+}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 66ea258..d5f54c4 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -834,6 +834,8 @@ boolean r600_dma_blit(struct pipe_context *ctx,
 			const struct pipe_box *src_box);
 void r600_emit_streamout_begin(struct r600_context *ctx, struct r600_atom *atom);
 void r600_emit_streamout_end(struct r600_context *ctx);
+void r600_flag_resource_cache_flush(struct r600_context *rctx,
+				    struct pipe_resource *res);
 
 /*
  * evergreen_hw_context.c




More information about the mesa-commit mailing list