[Mesa-dev] [PATCH 10/12] r600g: only flush the caches that need to be flushed during CP DMA operations

Marek Olšák maraeo at gmail.com
Sun Jun 30 18:53:51 PDT 2013


This should increase performance if constant uploads are done with the CP DMA.
---
 src/gallium/drivers/r600/evergreen_hw_context.c |  21 +---
 src/gallium/drivers/r600/r600_hw_context.c      | 126 +++++++++++++++++++++---
 src/gallium/drivers/r600/r600_pipe.h            |   2 +
 3 files changed, 117 insertions(+), 32 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index 075ab17..c428bc1 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -121,18 +121,8 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
 
 	offset += r600_resource_va(&rctx->screen->screen, dst);
 
-	/* We flush the caches, because we might read from or write
-	 * to resources which are bound right now. */
-	rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
-		       R600_CONTEXT_INV_VERTEX_CACHE |
-		       R600_CONTEXT_INV_TEX_CACHE |
-		       R600_CONTEXT_FLUSH_AND_INV |
-		       R600_CONTEXT_FLUSH_AND_INV_CB |
-		       R600_CONTEXT_FLUSH_AND_INV_DB |
-		       R600_CONTEXT_FLUSH_AND_INV_CB_META |
-		       R600_CONTEXT_FLUSH_AND_INV_DB_META |
-		       R600_CONTEXT_STREAMOUT_FLUSH |
-		       R600_CONTEXT_WAIT_3D_IDLE;
+	/* Flush the cache where the resource is bound. */
+	r600_flag_resource_cache_flush(rctx, dst);
 
 	while (size) {
 		unsigned sync = 0;
@@ -169,10 +159,9 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
 		offset += byte_count;
 	}
 
-	/* Invalidate the read caches. */
-	rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
-		       R600_CONTEXT_INV_VERTEX_CACHE |
-		       R600_CONTEXT_INV_TEX_CACHE;
+	/* Flush the cache again in case the 3D engine has been prefetching
+	 * the resource. */
+	r600_flag_resource_cache_flush(rctx, dst);
 
 	util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
 		       offset + size);
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index ff36573..e2444cc 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -626,18 +626,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 	dst_offset += r600_resource_va(&rctx->screen->screen, dst);
 	src_offset += r600_resource_va(&rctx->screen->screen, src);
 
-	/* We flush the caches, because we might read from or write
-	 * to resources which are bound right now. */
-	rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
-		       R600_CONTEXT_INV_VERTEX_CACHE |
-		       R600_CONTEXT_INV_TEX_CACHE |
-		       R600_CONTEXT_FLUSH_AND_INV |
-		       R600_CONTEXT_FLUSH_AND_INV_CB |
-		       R600_CONTEXT_FLUSH_AND_INV_DB |
-		       R600_CONTEXT_FLUSH_AND_INV_CB_META |
-		       R600_CONTEXT_FLUSH_AND_INV_DB_META |
-		       R600_CONTEXT_STREAMOUT_FLUSH |
-		       R600_CONTEXT_WAIT_3D_IDLE;
+	/* Flush the caches where the resources are bound. */
+	r600_flag_resource_cache_flush(rctx, src);
+	r600_flag_resource_cache_flush(rctx, dst);
 
 	/* There are differences between R700 and EG in CP DMA,
 	 * but we only use the common bits here. */
@@ -679,10 +670,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 		dst_offset += byte_count;
 	}
 
-	/* Invalidate the read caches. */
-	rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
-		       R600_CONTEXT_INV_VERTEX_CACHE |
-		       R600_CONTEXT_INV_TEX_CACHE;
+	/* Flush the cache of the dst resource again in case the 3D engine
+	 * has been prefetching it. */
+	r600_flag_resource_cache_flush(rctx, dst);
 
 	util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
 		       dst_offset + size);
@@ -736,3 +726,107 @@ void r600_dma_copy(struct r600_context *rctx,
 	util_range_add(&rdst->valid_buffer_range, dst_offset,
 		       dst_offset + size);
 }
+
+/* Flag the cache of the resource for it to be flushed later if the resource
+ * is bound. Otherwise do nothing. Used for synchronization between engines.
+ */
+void r600_flag_resource_cache_flush(struct r600_context *rctx,
+				    struct pipe_resource *res)
+{
+	/* Check vertex buffers. */
+	uint32_t mask = rctx->vertex_buffer_state.enabled_mask;
+	while (mask) {
+		uint32_t i = u_bit_scan(&mask);
+		if (rctx->vertex_buffer_state.vb[i].buffer == res) {
+			rctx->flags |= R600_CONTEXT_INV_VERTEX_CACHE;
+		}
+	}
+
+	/* Check vertex buffers for compute. */
+	mask = rctx->cs_vertex_buffer_state.enabled_mask;
+	while (mask) {
+		uint32_t i = u_bit_scan(&mask);
+		if (rctx->cs_vertex_buffer_state.vb[i].buffer == res) {
+			rctx->flags |= R600_CONTEXT_INV_VERTEX_CACHE;
+		}
+	}
+
+	/* Check constant buffers. */
+	unsigned shader;
+	for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {
+		struct r600_constbuf_state *state = &rctx->constbuf_state[shader];
+		uint32_t mask = state->enabled_mask;
+
+		while (mask) {
+			unsigned i = u_bit_scan(&mask);
+			if (state->cb[i].buffer == res) {
+				rctx->flags |= R600_CONTEXT_INV_CONST_CACHE;
+
+				shader = PIPE_SHADER_TYPES; /* break the outer loop */
+				break;
+			}
+		}
+	}
+
+	/* Check textures. */
+	for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {
+		struct r600_samplerview_state *state = &rctx->samplers[shader].views;
+		uint32_t mask = state->enabled_mask;
+
+		while (mask) {
+			uint32_t i = u_bit_scan(&mask);
+			if (&state->views[i]->tex_resource->b.b == res) {
+				rctx->flags |= R600_CONTEXT_INV_TEX_CACHE;
+
+				shader = PIPE_SHADER_TYPES; /* break the outer loop */
+				break;
+			}
+		}
+	}
+
+	/* Check streamout buffers. */
+	int i;
+	for (i = 0; i < rctx->streamout.num_targets; i++) {
+		if (rctx->streamout.targets[i]->b.buffer == res) {
+			rctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH |
+				       R600_CONTEXT_FLUSH_AND_INV |
+				       R600_CONTEXT_WAIT_3D_IDLE;
+			break;
+		}
+	}
+
+	/* Check colorbuffers. */
+	for (i = 0; i < rctx->framebuffer.state.nr_cbufs; i++) {
+		if (rctx->framebuffer.state.cbufs[i] &&
+		    rctx->framebuffer.state.cbufs[i]->texture == res) {
+			struct r600_texture *tex =
+				(struct r600_texture*)rctx->framebuffer.state.cbufs[i]->texture;
+
+			rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
+				       R600_CONTEXT_FLUSH_AND_INV |
+				       R600_CONTEXT_WAIT_3D_IDLE;
+
+			if (tex->cmask_size || tex->fmask_size) {
+				rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META;
+			}
+			break;
+		}
+	}
+
+	/* Check a depth buffer. */
+	if (rctx->framebuffer.state.zsbuf) {
+		if (rctx->framebuffer.state.zsbuf->texture == res) {
+			rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB |
+				       R600_CONTEXT_FLUSH_AND_INV |
+				       R600_CONTEXT_WAIT_3D_IDLE;
+		}
+
+		struct r600_texture *tex =
+			(struct r600_texture*)rctx->framebuffer.state.zsbuf->texture;
+		if (tex && tex->htile && &tex->htile->b.b == res) {
+			rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB_META |
+				       R600_CONTEXT_FLUSH_AND_INV |
+				       R600_CONTEXT_WAIT_3D_IDLE;
+		}
+	}
+}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 66ea258..d5f54c4 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -834,6 +834,8 @@ boolean r600_dma_blit(struct pipe_context *ctx,
 			const struct pipe_box *src_box);
 void r600_emit_streamout_begin(struct r600_context *ctx, struct r600_atom *atom);
 void r600_emit_streamout_end(struct r600_context *ctx);
+void r600_flag_resource_cache_flush(struct r600_context *rctx,
+				    struct pipe_resource *res);
 
 /*
  * evergreen_hw_context.c
-- 
1.8.1.2



More information about the mesa-dev mailing list