[Mesa-dev] [PATCH 6/8] radeonsi: separate and disable streamout for now

Christian König deathsimple at vodafone.de
Wed Aug 8 04:05:06 PDT 2012


I have my doubts that this code still works on SI.

Signed-off-by: Christian König <deathsimple at vodafone.de>
---
 src/gallium/drivers/radeonsi/Makefile.sources      |    1 +
 .../drivers/radeonsi/evergreen_hw_context.c        |   39 ---
 src/gallium/drivers/radeonsi/r600.h                |    2 -
 src/gallium/drivers/radeonsi/r600_hw_context.c     |  132 +---------
 .../drivers/radeonsi/r600_hw_context_priv.h        |    7 -
 src/gallium/drivers/radeonsi/radeonsi_pipe.c       |    7 +
 src/gallium/drivers/radeonsi/si_state.c            |   68 -----
 src/gallium/drivers/radeonsi/si_state.h            |   13 +
 src/gallium/drivers/radeonsi/si_state_draw.c       |    2 +
 src/gallium/drivers/radeonsi/si_state_streamout.c  |  271 ++++++++++++++++++++
 10 files changed, 301 insertions(+), 241 deletions(-)
 create mode 100644 src/gallium/drivers/radeonsi/si_state_streamout.c

diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources
index 8e27b6c..630afb8 100644
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -12,4 +12,5 @@ C_SOURCES := \
 	r600_state_common.c \
 	radeonsi_pm4.c \
 	si_state.c \
+	si_state_streamout.c \
 	si_state_draw.c
diff --git a/src/gallium/drivers/radeonsi/evergreen_hw_context.c b/src/gallium/drivers/radeonsi/evergreen_hw_context.c
index d071617..56b068f 100644
--- a/src/gallium/drivers/radeonsi/evergreen_hw_context.c
+++ b/src/gallium/drivers/radeonsi/evergreen_hw_context.c
@@ -97,42 +97,3 @@ void si_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 	}
 	cs->cdw += ndwords;
 }
-
-void evergreen_flush_vgt_streamout(struct r600_context *ctx)
-{
-	struct radeon_winsys_cs *cs = ctx->cs;
-
-	cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
-	cs->buf[cs->cdw++] = (R_0084FC_CP_STRMOUT_CNTL - SI_CONFIG_REG_OFFSET) >> 2;
-	cs->buf[cs->cdw++] = 0;
-
-	cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
-	cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0);
-
-	cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0);
-	cs->buf[cs->cdw++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */
-	cs->buf[cs->cdw++] = R_0084FC_CP_STRMOUT_CNTL >> 2;  /* register */
-	cs->buf[cs->cdw++] = 0;
-	cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* reference value */
-	cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* mask */
-	cs->buf[cs->cdw++] = 4; /* poll interval */
-}
-
-void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit)
-{
-	struct radeon_winsys_cs *cs = ctx->cs;
-
-	if (buffer_enable_bit) {
-		cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
-		cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
-		cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(1);
-
-		cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
-		cs->buf[cs->cdw++] = (R_028B98_VGT_STRMOUT_BUFFER_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
-		cs->buf[cs->cdw++] = S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit);
-	} else {
-		cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
-		cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
-		cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(0);
-	}
-}
diff --git a/src/gallium/drivers/radeonsi/r600.h b/src/gallium/drivers/radeonsi/r600.h
index 610b9da..f34d1ff 100644
--- a/src/gallium/drivers/radeonsi/r600.h
+++ b/src/gallium/drivers/radeonsi/r600.h
@@ -126,8 +126,6 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
 void r600_context_emit_fence(struct r600_context *ctx, struct si_resource *fence,
                              unsigned offset, unsigned value);
 
-void r600_context_streamout_begin(struct r600_context *ctx);
-void r600_context_streamout_end(struct r600_context *ctx);
 void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t);
 void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in);
 
diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c b/src/gallium/drivers/radeonsi/r600_hw_context.c
index 509a8bf..6765ef8 100644
--- a/src/gallium/drivers/radeonsi/r600_hw_context.c
+++ b/src/gallium/drivers/radeonsi/r600_hw_context.c
@@ -182,7 +182,10 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 	struct radeon_winsys_cs *cs = ctx->cs;
 	struct r600_block *enable_block = NULL;
 	bool queries_suspended = false;
+
+#if 0
 	bool streamout_suspended = false;
+#endif
 
 	if (!cs->cdw)
 		return;
@@ -193,10 +196,12 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 		queries_suspended = true;
 	}
 
+#if 0
 	if (ctx->num_cs_dw_streamout_end) {
 		r600_context_streamout_end(ctx);
 		streamout_suspended = true;
 	}
+#endif
 
 	r600_flush_framebuffer(ctx, true);
 
@@ -213,10 +218,12 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 	ctx->pm4_dirty_cdwords = 0;
 	ctx->flags = 0;
 
+#if 0
 	if (streamout_suspended) {
 		ctx->streamout_start = TRUE;
 		ctx->streamout_append_bitmask = ~0;
 	}
+#endif
 
 	/* resume queries */
 	if (queries_suspended) {
@@ -638,131 +645,6 @@ void r600_context_queries_resume(struct r600_context *ctx)
 	}
 }
 
-void r600_context_streamout_begin(struct r600_context *ctx)
-{
-	struct radeon_winsys_cs *cs = ctx->cs;
-	struct r600_so_target **t = ctx->so_targets;
-	unsigned *strides = ctx->vs_shader_so_strides;
-	unsigned buffer_en, i;
-
-	buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
-		    (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) |
-		    (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) |
-		    (ctx->num_so_targets >= 4 && t[3] ? 8 : 0);
-
-	ctx->num_cs_dw_streamout_end =
-		12 + /* flush_vgt_streamout */
-		util_bitcount(buffer_en) * 8 +
-		3;
-
-	r600_need_cs_space(ctx,
-			   12 + /* flush_vgt_streamout */
-			   6 + /* enables */
-			   util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 +
-			   util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 +
-			   ctx->num_cs_dw_streamout_end, TRUE);
-
-	if (ctx->chip_class >= CAYMAN) {
-		evergreen_flush_vgt_streamout(ctx);
-		evergreen_set_streamout_enable(ctx, buffer_en);
-	}
-
-	for (i = 0; i < ctx->num_so_targets; i++) {
-#if 0
-		if (t[i]) {
-			t[i]->stride = strides[i];
-			t[i]->so_index = i;
-
-			cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0);
-			cs->buf[cs->cdw++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 +
-							16*i - SI_CONTEXT_REG_OFFSET) >> 2;
-			cs->buf[cs->cdw++] = (t[i]->b.buffer_offset +
-							t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */
-			cs->buf[cs->cdw++] = strides[i] >> 2;		   /* VTX_STRIDE (in DW) */
-			cs->buf[cs->cdw++] = 0;			   /* BUFFER_BASE */
-
-			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-			cs->buf[cs->cdw++] =
-				r600_context_bo_reloc(ctx, si_resource(t[i]->b.buffer),
-						      RADEON_USAGE_WRITE);
-
-			if (ctx->streamout_append_bitmask & (1 << i)) {
-				/* Append. */
-				cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
-				cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
-							       STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */
-				cs->buf[cs->cdw++] = 0; /* unused */
-				cs->buf[cs->cdw++] = 0; /* unused */
-				cs->buf[cs->cdw++] = 0; /* src address lo */
-				cs->buf[cs->cdw++] = 0; /* src address hi */
-
-				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-				cs->buf[cs->cdw++] =
-					r600_context_bo_reloc(ctx,  t[i]->filled_size,
-							      RADEON_USAGE_READ);
-			} else {
-				/* Start from the beginning. */
-				cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
-				cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
-							       STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */
-				cs->buf[cs->cdw++] = 0; /* unused */
-				cs->buf[cs->cdw++] = 0; /* unused */
-				cs->buf[cs->cdw++] = t[i]->b.buffer_offset >> 2; /* buffer offset in DW */
-				cs->buf[cs->cdw++] = 0; /* unused */
-			}
-		}
-#endif
-	}
-}
-
-void r600_context_streamout_end(struct r600_context *ctx)
-{
-	struct radeon_winsys_cs *cs = ctx->cs;
-	struct r600_so_target **t = ctx->so_targets;
-	unsigned i, flush_flags = 0;
-
-	evergreen_flush_vgt_streamout(ctx);
-
-	for (i = 0; i < ctx->num_so_targets; i++) {
-#if 0
-		if (t[i]) {
-			cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
-			cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
-						       STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
-						       STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */
-			cs->buf[cs->cdw++] = 0; /* dst address lo */
-			cs->buf[cs->cdw++] = 0; /* dst address hi */
-			cs->buf[cs->cdw++] = 0; /* unused */
-			cs->buf[cs->cdw++] = 0; /* unused */
-
-			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-			cs->buf[cs->cdw++] =
-				r600_context_bo_reloc(ctx,  t[i]->filled_size,
-						      RADEON_USAGE_WRITE);
-
-			flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i;
-		}
-#endif
-	}
-
-	evergreen_set_streamout_enable(ctx, 0);
-
-	ctx->atom_surface_sync.flush_flags |= flush_flags;
-	r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
-
-	ctx->num_cs_dw_streamout_end = 0;
-
-	/* XXX print some debug info */
-	for (i = 0; i < ctx->num_so_targets; i++) {
-		if (!t[i])
-			continue;
-
-		uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->cs_buf, ctx->cs, RADEON_USAGE_READ);
-		printf("FILLED_SIZE%i: %u\n", i, *ptr);
-		ctx->ws->buffer_unmap(t[i]->filled_size->cs_buf);
-	}
-}
-
 void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t)
 {
 	struct radeon_winsys_cs *cs = ctx->cs;
diff --git a/src/gallium/drivers/radeonsi/r600_hw_context_priv.h b/src/gallium/drivers/radeonsi/r600_hw_context_priv.h
index 6d458d4..c2a15eb 100644
--- a/src/gallium/drivers/radeonsi/r600_hw_context_priv.h
+++ b/src/gallium/drivers/radeonsi/r600_hw_context_priv.h
@@ -35,13 +35,6 @@
 #define PKT_COUNT_C                     0xC000FFFF
 #define PKT_COUNT_S(x)                  (((x) & 0x3FFF) << 16)
 
-/*
- * evergreen_hw_context.c
- */
-void evergreen_flush_vgt_streamout(struct r600_context *ctx);
-void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit);
-
-
 static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct si_resource *rbo,
 					     enum radeon_bo_usage usage)
 {
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
index 3c5eaf7..ad7e595 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
@@ -340,6 +340,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 		return 0;
 
 	/* Stream output. */
+#if 0
 	case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
 		return debug_get_bool_option("R600_STREAMOUT", FALSE) ? 4 : 0;
 	case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
@@ -347,6 +348,12 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
 	case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
 		return 16*4;
+#endif
+	case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+	case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+	case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+	case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+		return 0;
 
 	/* Texturing. */
 	case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index e8a7b77..1d6d214 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2205,74 +2205,6 @@ static void si_set_index_buffer(struct pipe_context *ctx,
 }
 
 /*
- * Stream out
- */
-
-static struct pipe_stream_output_target *
-si_create_so_target(struct pipe_context *ctx,
-		    struct pipe_resource *buffer,
-		    unsigned buffer_offset,
-		    unsigned buffer_size)
-{
-	struct r600_context *rctx = (struct r600_context *)ctx;
-	struct r600_so_target *t;
-	void *ptr;
-
-	t = CALLOC_STRUCT(r600_so_target);
-	if (!t) {
-		return NULL;
-	}
-
-	t->b.reference.count = 1;
-	t->b.context = ctx;
-	pipe_resource_reference(&t->b.buffer, buffer);
-	t->b.buffer_offset = buffer_offset;
-	t->b.buffer_size = buffer_size;
-
-	t->filled_size = si_resource_create_custom(ctx->screen, PIPE_USAGE_STATIC, 4);
-	ptr = rctx->ws->buffer_map(t->filled_size->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
-	memset(ptr, 0, t->filled_size->buf->size);
-	rctx->ws->buffer_unmap(t->filled_size->cs_buf);
-
-	return &t->b;
-}
-
-static void si_so_target_destroy(struct pipe_context *ctx,
-				 struct pipe_stream_output_target *target)
-{
-	struct r600_so_target *t = (struct r600_so_target*)target;
-	pipe_resource_reference(&t->b.buffer, NULL);
-	si_resource_reference(&t->filled_size, NULL);
-	FREE(t);
-}
-
-static void si_set_so_targets(struct pipe_context *ctx,
-			      unsigned num_targets,
-			      struct pipe_stream_output_target **targets,
-			      unsigned append_bitmask)
-{
-	struct r600_context *rctx = (struct r600_context *)ctx;
-	unsigned i;
-
-	/* Stop streamout. */
-	if (rctx->num_so_targets) {
-		r600_context_streamout_end(rctx);
-	}
-
-	/* Set the new targets. */
-	for (i = 0; i < num_targets; i++) {
-		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]);
-	}
-	for (; i < rctx->num_so_targets; i++) {
-		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL);
-	}
-
-	rctx->num_so_targets = num_targets;
-	rctx->streamout_start = num_targets != 0;
-	rctx->streamout_append_bitmask = append_bitmask;
-}
-
-/*
  * Misc
  */
 static void si_set_polygon_stipple(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 6729fd4..a69722c 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -132,6 +132,19 @@ bool si_is_format_supported(struct pipe_screen *screen,
 void si_init_state_functions(struct r600_context *rctx);
 void si_init_config(struct r600_context *rctx);
 
+/* si_state_streamout.c */
+struct pipe_stream_output_target *
+si_create_so_target(struct pipe_context *ctx,
+		    struct pipe_resource *buffer,
+		    unsigned buffer_offset,
+		    unsigned buffer_size);
+void si_so_target_destroy(struct pipe_context *ctx,
+			  struct pipe_stream_output_target *target);
+void si_set_so_targets(struct pipe_context *ctx,
+		       unsigned num_targets,
+		       struct pipe_stream_output_target **targets,
+		       unsigned append_bitmask);
+
 /* si_state_draw.c */
 void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo);
 
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 48a5f30..6670483 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -528,11 +528,13 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
 	si_pm4_emit_dirty(rctx);
 	rctx->pm4_dirty_cdwords = 0;
 
+#if 0
 	/* Enable stream out if needed. */
 	if (rctx->streamout_start) {
 		r600_context_streamout_begin(rctx);
 		rctx->streamout_start = FALSE;
 	}
+#endif
 
 	si_context_draw(rctx, &rdraw);
 
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c
new file mode 100644
index 0000000..3410eb6
--- /dev/null
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *      Christian König <christian.koenig at amd.com>
+ */
+
+#include "radeonsi_pipe.h"
+#include "si_state.h"
+
+/*
+ * Stream out
+ */
+
+#if 0
+void si_context_streamout_begin(struct r600_context *ctx)
+{
+	struct radeon_winsys_cs *cs = ctx->cs;
+	struct si_so_target **t = ctx->so_targets;
+	unsigned *strides = ctx->vs_shader_so_strides;
+	unsigned buffer_en, i;
+
+	buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
+		    (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) |
+		    (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) |
+		    (ctx->num_so_targets >= 4 && t[3] ? 8 : 0);
+
+	ctx->num_cs_dw_streamout_end =
+		12 + /* flush_vgt_streamout */
+		util_bitcount(buffer_en) * 8 +
+		3;
+
+	si_need_cs_space(ctx,
+			   12 + /* flush_vgt_streamout */
+			   6 + /* enables */
+			   util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 +
+			   util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 +
+			   ctx->num_cs_dw_streamout_end, TRUE);
+
+	if (ctx->chip_class >= CAYMAN) {
+		evergreen_flush_vgt_streamout(ctx);
+		evergreen_set_streamout_enable(ctx, buffer_en);
+	}
+
+	for (i = 0; i < ctx->num_so_targets; i++) {
+#if 0
+		if (t[i]) {
+			t[i]->stride = strides[i];
+			t[i]->so_index = i;
+
+			cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0);
+			cs->buf[cs->cdw++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 +
+							16*i - SI_CONTEXT_REG_OFFSET) >> 2;
+			cs->buf[cs->cdw++] = (t[i]->b.buffer_offset +
+							t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */
+			cs->buf[cs->cdw++] = strides[i] >> 2;		   /* VTX_STRIDE (in DW) */
+			cs->buf[cs->cdw++] = 0;			   /* BUFFER_BASE */
+
+			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+			cs->buf[cs->cdw++] =
+				si_context_bo_reloc(ctx, si_resource(t[i]->b.buffer),
+						      RADEON_USAGE_WRITE);
+
+			if (ctx->streamout_append_bitmask & (1 << i)) {
+				/* Append. */
+				cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
+				cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
+							       STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */
+				cs->buf[cs->cdw++] = 0; /* unused */
+				cs->buf[cs->cdw++] = 0; /* unused */
+				cs->buf[cs->cdw++] = 0; /* src address lo */
+				cs->buf[cs->cdw++] = 0; /* src address hi */
+
+				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+				cs->buf[cs->cdw++] =
+					si_context_bo_reloc(ctx,  t[i]->filled_size,
+							      RADEON_USAGE_READ);
+			} else {
+				/* Start from the beginning. */
+				cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
+				cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
+							       STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */
+				cs->buf[cs->cdw++] = 0; /* unused */
+				cs->buf[cs->cdw++] = 0; /* unused */
+				cs->buf[cs->cdw++] = t[i]->b.buffer_offset >> 2; /* buffer offset in DW */
+				cs->buf[cs->cdw++] = 0; /* unused */
+			}
+		}
+#endif
+	}
+}
+
+void si_context_streamout_end(struct r600_context *ctx)
+{
+	struct radeon_winsys_cs *cs = ctx->cs;
+	struct si_so_target **t = ctx->so_targets;
+	unsigned i, flush_flags = 0;
+
+	evergreen_flush_vgt_streamout(ctx);
+
+	for (i = 0; i < ctx->num_so_targets; i++) {
+#if 0
+		if (t[i]) {
+			cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
+			cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
+						       STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
+						       STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */
+			cs->buf[cs->cdw++] = 0; /* dst address lo */
+			cs->buf[cs->cdw++] = 0; /* dst address hi */
+			cs->buf[cs->cdw++] = 0; /* unused */
+			cs->buf[cs->cdw++] = 0; /* unused */
+
+			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+			cs->buf[cs->cdw++] =
+				si_context_bo_reloc(ctx,  t[i]->filled_size,
+						      RADEON_USAGE_WRITE);
+
+			flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i;
+		}
+#endif
+	}
+
+	evergreen_set_streamout_enable(ctx, 0);
+
+	ctx->atom_surface_sync.flush_flags |= flush_flags;
+	si_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
+
+	ctx->num_cs_dw_streamout_end = 0;
+
+	/* XXX print some debug info */
+	for (i = 0; i < ctx->num_so_targets; i++) {
+		if (!t[i])
+			continue;
+
+		uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->cs_buf, ctx->cs, RADEON_USAGE_READ);
+		printf("FILLED_SIZE%i: %u\n", i, *ptr);
+		ctx->ws->buffer_unmap(t[i]->filled_size->cs_buf);
+	}
+}
+
+void evergreen_flush_vgt_streamout(struct si_context *ctx)
+{
+	struct radeon_winsys_cs *cs = ctx->cs;
+
+	cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
+	cs->buf[cs->cdw++] = (R_0084FC_CP_STRMOUT_CNTL - SI_CONFIG_REG_OFFSET) >> 2;
+	cs->buf[cs->cdw++] = 0;
+
+	cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+	cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0);
+
+	cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0);
+	cs->buf[cs->cdw++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */
+	cs->buf[cs->cdw++] = R_0084FC_CP_STRMOUT_CNTL >> 2;  /* register */
+	cs->buf[cs->cdw++] = 0;
+	cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* reference value */
+	cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* mask */
+	cs->buf[cs->cdw++] = 4; /* poll interval */
+}
+
+void evergreen_set_streamout_enable(struct si_context *ctx, unsigned buffer_enable_bit)
+{
+	struct radeon_winsys_cs *cs = ctx->cs;
+
+	if (buffer_enable_bit) {
+		cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
+		cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
+		cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(1);
+
+		cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
+		cs->buf[cs->cdw++] = (R_028B98_VGT_STRMOUT_BUFFER_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
+		cs->buf[cs->cdw++] = S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit);
+	} else {
+		cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
+		cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
+		cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(0);
+	}
+}
+
+#endif
+
+struct pipe_stream_output_target *
+si_create_so_target(struct pipe_context *ctx,
+		    struct pipe_resource *buffer,
+		    unsigned buffer_offset,
+		    unsigned buffer_size)
+{
+#if 0
+	struct si_context *rctx = (struct r600_context *)ctx;
+	struct si_so_target *t;
+	void *ptr;
+
+	t = CALLOC_STRUCT(si_so_target);
+	if (!t) {
+		return NULL;
+	}
+
+	t->b.reference.count = 1;
+	t->b.context = ctx;
+	pipe_resource_reference(&t->b.buffer, buffer);
+	t->b.buffer_offset = buffer_offset;
+	t->b.buffer_size = buffer_size;
+
+	t->filled_size = si_resource_create_custom(ctx->screen, PIPE_USAGE_STATIC, 4);
+	ptr = rctx->ws->buffer_map(t->filled_size->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
+	memset(ptr, 0, t->filled_size->buf->size);
+	rctx->ws->buffer_unmap(t->filled_size->cs_buf);
+
+	return &t->b;
+#endif
+	return NULL;
+}
+
+void si_so_target_destroy(struct pipe_context *ctx,
+			  struct pipe_stream_output_target *target)
+{
+#if 0
+	struct si_so_target *t = (struct r600_so_target*)target;
+	pipe_resource_reference(&t->b.buffer, NULL);
+	si_resource_reference(&t->filled_size, NULL);
+	FREE(t);
+#endif
+}
+
+void si_set_so_targets(struct pipe_context *ctx,
+		       unsigned num_targets,
+		       struct pipe_stream_output_target **targets,
+		       unsigned append_bitmask)
+{
+	assert(num_targets == 0);
+#if 0
+	struct si_context *rctx = (struct r600_context *)ctx;
+	unsigned i;
+
+	/* Stop streamout. */
+	if (rctx->num_so_targets) {
+		si_context_streamout_end(rctx);
+	}
+
+	/* Set the new targets. */
+	for (i = 0; i < num_targets; i++) {
+		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]);
+	}
+	for (; i < rctx->num_so_targets; i++) {
+		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL);
+	}
+
+	rctx->num_so_targets = num_targets;
+	rctx->streamout_start = num_targets != 0;
+	rctx->streamout_append_bitmask = append_bitmask;
+#endif
+}
-- 
1.7.9.5



More information about the mesa-dev mailing list