[Mesa-dev] [PATCH 10/13] radeonsi: improve and fix streamout flushing

Marek Olšák maraeo at gmail.com
Mon Jan 5 12:21:00 PST 2015


From: Marek Olšák <marek.olsak at amd.com>

- we don't usually need to flush TC L2
- we should flush KCACHE
  (not really an issue now since we always flush KCACHE when updating
   descriptors, but it could be a problem if we used CE, which doesn't
   require flushing KCACHE)
- add an explicit VS_PARTIAL_FLUSH flag
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 30 +++++++++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_pipe.h        | 13 ++++++------
 src/gallium/drivers/radeonsi/si_state_draw.c  |  7 +++----
 3 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 454e12c..1644ec7 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -856,6 +856,36 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
 	unsigned old_num_targets = sctx->b.streamout.num_targets;
 	unsigned i, bufidx;
 
+	/* We are going to unbind the buffers. Mark which caches need to be flushed. */
+	if (sctx->b.streamout.num_targets && sctx->b.streamout.begin_emitted) {
+		/* Since streamout uses vector writes which go through TC L2
+		 * and most other clients can use TC L2 as well, we don't need
+		 * to flush it.
+		 *
+		 * The only case which requires flushing it is VGT DMA index
+		 * fetching, which is a rare case. Thus, flag the TC L2
+		 * dirtiness in the resource and handle it when index fetching
+		 * is used.
+		 */
+		for (i = 0; i < sctx->b.streamout.num_targets; i++)
+			if (sctx->b.streamout.targets[i])
+				r600_resource(sctx->b.streamout.targets[i]->b.buffer)->TC_L2_dirty = true;
+
+		/* Invalidate the scalar cache in case a streamout buffer is
+		 * going to be used as a constant buffer.
+		 *
+		 * Invalidate TC L1, because streamout bypasses it (done by
+		 * setting GLC=1 in the store instruction), but it can contain
+		 * outdated data of streamout buffers.
+		 *
+		 * VS_PARTIAL_FLUSH is required if the buffers are going to be
+		 * used as an input immediately.
+		 */
+		sctx->b.flags |= SI_CONTEXT_INV_KCACHE |
+				 SI_CONTEXT_INV_TC_L1 |
+				 SI_CONTEXT_VS_PARTIAL_FLUSH;
+	}
+
 	/* Streamout buffers must be bound in 2 places:
 	 * 1) in VGT by setting the VGT_STRMOUT registers
 	 * 2) as shader resources
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 4acc9f9..b360880 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -63,13 +63,14 @@
 #define SI_CONTEXT_FLUSH_AND_INV_DB	(R600_CONTEXT_PRIVATE_FLAG << 6)
 #define SI_CONTEXT_FLUSH_AND_INV_CB	(R600_CONTEXT_PRIVATE_FLAG << 7)
 /* Engine synchronization. */
-#define SI_CONTEXT_PS_PARTIAL_FLUSH	(R600_CONTEXT_PRIVATE_FLAG << 8)
-#define SI_CONTEXT_CS_PARTIAL_FLUSH	(R600_CONTEXT_PRIVATE_FLAG << 9)
-#define SI_CONTEXT_VGT_FLUSH		(R600_CONTEXT_PRIVATE_FLAG << 10)
-#define SI_CONTEXT_VGT_STREAMOUT_SYNC	(R600_CONTEXT_PRIVATE_FLAG << 11)
+#define SI_CONTEXT_VS_PARTIAL_FLUSH	(R600_CONTEXT_PRIVATE_FLAG << 8)
+#define SI_CONTEXT_PS_PARTIAL_FLUSH	(R600_CONTEXT_PRIVATE_FLAG << 9)
+#define SI_CONTEXT_CS_PARTIAL_FLUSH	(R600_CONTEXT_PRIVATE_FLAG << 10)
+#define SI_CONTEXT_VGT_FLUSH		(R600_CONTEXT_PRIVATE_FLAG << 11)
+#define SI_CONTEXT_VGT_STREAMOUT_SYNC	(R600_CONTEXT_PRIVATE_FLAG << 12)
 /* Compute only. */
-#define SI_CONTEXT_FLUSH_WITH_INV_L2	(R600_CONTEXT_PRIVATE_FLAG << 12) /* TODO: merge with TC? */
-#define SI_CONTEXT_FLAG_COMPUTE		(R600_CONTEXT_PRIVATE_FLAG << 13)
+#define SI_CONTEXT_FLUSH_WITH_INV_L2	(R600_CONTEXT_PRIVATE_FLAG << 13) /* TODO: merge with TC? */
+#define SI_CONTEXT_FLAG_COMPUTE		(R600_CONTEXT_PRIVATE_FLAG << 14)
 
 #define SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER (SI_CONTEXT_FLUSH_AND_INV_CB | \
 					      SI_CONTEXT_FLUSH_AND_INV_CB_META | \
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index e6916c1..3703e5f 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -388,9 +388,9 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
 			cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
 	}
 
-	if (sctx->flags & (SI_CONTEXT_INV_TC_L1 | R600_CONTEXT_STREAMOUT_FLUSH))
+	if (sctx->flags & SI_CONTEXT_INV_TC_L1)
 		cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
-	if (sctx->flags & (SI_CONTEXT_INV_TC_L2 | R600_CONTEXT_STREAMOUT_FLUSH))
+	if (sctx->flags & SI_CONTEXT_INV_TC_L2)
 		cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
 
 	if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
@@ -444,8 +444,7 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
 	if (sctx->flags & SI_CONTEXT_PS_PARTIAL_FLUSH) {
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
 		radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-	} else if (sctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
-		/* Needed if streamout buffers are going to be used as a source. */
+	} else if (sctx->flags & SI_CONTEXT_VS_PARTIAL_FLUSH) {
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
 		radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
 	}
-- 
2.1.0



More information about the mesa-dev mailing list