[Mesa-dev] [PATCH 4/5] r600g: atomize streamout enabling

Marek Olšák maraeo at gmail.com
Tue Feb 26 13:41:08 PST 2013


This doesn't fix any issue we know of, but there indeed is a week spot
in draw_vbo where streamout can fail. After streamout is enabled,
the need_cs_space call can flush the context, which causes the streamout
to be disabled right after it was enabled and bad things happen.

One way to fix it is to atomize the beginning part, so that no context flush
can happen between streamout enabling and the first drawing.
---
 src/gallium/drivers/r600/evergreen_compute.c |    4 --
 src/gallium/drivers/r600/evergreen_state.c   |    1 +
 src/gallium/drivers/r600/r600.h              |    2 -
 src/gallium/drivers/r600/r600_blit.c         |    4 +-
 src/gallium/drivers/r600/r600_buffer.c       |   10 ++--
 src/gallium/drivers/r600/r600_hw_context.c   |   69 +++++++++-----------------
 src/gallium/drivers/r600/r600_pipe.h         |   29 +++++++----
 src/gallium/drivers/r600/r600_state.c        |    1 +
 src/gallium/drivers/r600/r600_state_common.c |   61 ++++++++++++++++-------
 9 files changed, 94 insertions(+), 87 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 128464e..8b3104e 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -438,10 +438,6 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
 	ctx->ws->buffer_wait(onebo->buf, 0);
 
 	COMPUTE_DBG("...\n");
-
-	ctx->streamout_start = TRUE;
-	ctx->streamout_append_bitmask = ~0;
-
 }
 
 
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 4a91942..b804477 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2635,6 +2635,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
 	r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
 	r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 8);
 	r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
+	r600_init_atom(rctx, &rctx->streamout.begin_atom, id++, r600_emit_streamout_begin, 0);
 
 	rctx->context.create_blend_state = evergreen_create_blend_state;
 	rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state;
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 11dbb3b..dfd18b8 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -168,8 +168,6 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen
                              unsigned offset, unsigned value);
 void r600_flush_emit(struct r600_context *ctx);
 
-void r600_context_streamout_begin(struct r600_context *ctx);
-void r600_context_streamout_end(struct r600_context *ctx);
 void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in);
 void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw);
 void r600_dma_copy(struct r600_context *rctx,
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 0d879c9..8fc83aa 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -58,8 +58,8 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
 	util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer_state.vb);
 	util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_fetch_shader.cso);
 	util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader);
-	util_blitter_save_so_targets(rctx->blitter, rctx->num_so_targets,
-				     (struct pipe_stream_output_target**)rctx->so_targets);
+	util_blitter_save_so_targets(rctx->blitter, rctx->streamout.num_targets,
+				     (struct pipe_stream_output_target**)rctx->streamout.targets);
 	util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer_state.cso);
 
 	if (op & R600_SAVE_FRAGMENT_STATE) {
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 8b2f505..1f55b82 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -126,11 +126,11 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
 				}
 			}
 			/* Streamout buffers. */
-			for (i = 0; i < rctx->num_so_targets; i++) {
-				if (rctx->so_targets[i]->b.buffer == &rbuffer->b.b) {
-					r600_context_streamout_end(rctx);
-					rctx->streamout_start = TRUE;
-					rctx->streamout_append_bitmask = ~0;
+			for (i = 0; i < rctx->streamout.num_targets; i++) {
+				if (rctx->streamout.targets[i]->b.buffer == &rbuffer->b.b) {
+					r600_emit_streamout_end(rctx);
+					rctx->streamout.append_bitmask = rctx->streamout.enabled_mask;
+					r600_streamout_buffers_dirty(rctx);
 				}
 			}
 			/* Constant buffers. */
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 6a87abc..39cd999 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -404,7 +404,9 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 	num_dw += ctx->num_cs_dw_nontimer_queries_suspend;
 
 	/* Count in streamout_end at the end of CS. */
-	num_dw += ctx->num_cs_dw_streamout_end;
+	if (ctx->streamout.begin_emitted) {
+		num_dw += ctx->streamout.num_dw_for_end;
+	}
 
 	/* Count in render_condition(NULL) at the end of CS. */
 	if (ctx->predicate_drawing) {
@@ -727,7 +729,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 		return;
 
 	ctx->nontimer_queries_suspended = false;
-	ctx->streamout_suspended = false;
+	ctx->streamout.suspended = false;
 
 	/* suspend queries */
 	if (ctx->num_cs_dw_nontimer_queries_suspend) {
@@ -735,9 +737,9 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 		ctx->nontimer_queries_suspended = true;
 	}
 
-	if (ctx->num_cs_dw_streamout_end) {
-		r600_context_streamout_end(ctx);
-		ctx->streamout_suspended = true;
+	if (ctx->streamout.begin_emitted) {
+		r600_emit_streamout_end(ctx);
+		ctx->streamout.suspended = true;
 	}
 
 	/* flush is needed to avoid lockups on some chips with user fences
@@ -854,9 +856,9 @@ void r600_begin_new_cs(struct r600_context *ctx)
 		r600_sampler_states_dirty(ctx, &samplers->states);
 	}
 
-	if (ctx->streamout_suspended) {
-		ctx->streamout_start = TRUE;
-		ctx->streamout_append_bitmask = ~0;
+	if (ctx->streamout.suspended) {
+		ctx->streamout.append_bitmask = ctx->streamout.enabled_mask;
+		r600_streamout_buffers_dirty(ctx);
 	}
 
 	/* resume queries */
@@ -940,48 +942,23 @@ static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_
 	}
 }
 
-void r600_context_streamout_begin(struct r600_context *ctx)
+void r600_emit_streamout_begin(struct r600_context *ctx, struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
-	struct r600_so_target **t = ctx->so_targets;
+	struct r600_so_target **t = ctx->streamout.targets;
 	unsigned *stride_in_dw = ctx->vs_shader->so.stride;
-	unsigned buffer_en, i, update_flags = 0;
+	unsigned i, update_flags = 0;
 	uint64_t va;
-	unsigned num_cs_dw_streamout_end;
-
-	buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
-		    (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) |
-		    (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) |
-		    (ctx->num_so_targets >= 4 && t[3] ? 8 : 0);
-
-	num_cs_dw_streamout_end =
-		12 + /* flush_vgt_streamout */
-		util_bitcount(buffer_en) * 8 + /* STRMOUT_BUFFER_UPDATE */
-		3 /* set_streamout_enable(0) */;
-
-	r600_need_cs_space(ctx,
-			   12 + /* flush_vgt_streamout */
-			   6 + /* set_streamout_enable */
-			   util_bitcount(buffer_en) * 7 + /* SET_CONTEXT_REG */
-			   (ctx->family >= CHIP_RS780 &&
-			    ctx->family <= CHIP_RV740 ? util_bitcount(buffer_en) * 5 : 0) + /* STRMOUT_BASE_UPDATE */
-			   util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 + /* STRMOUT_BUFFER_UPDATE */
-			   util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 + /* STRMOUT_BUFFER_UPDATE */
-			   (ctx->family > CHIP_R600 && ctx->family < CHIP_RS780 ? 2 : 0) + /* SURFACE_BASE_UPDATE */
-			   num_cs_dw_streamout_end, TRUE);
-
-	/* This must be set after r600_need_cs_space. */
-	ctx->num_cs_dw_streamout_end = num_cs_dw_streamout_end;
 
 	if (ctx->chip_class >= EVERGREEN) {
 		evergreen_flush_vgt_streamout(ctx);
-		evergreen_set_streamout_enable(ctx, buffer_en);
+		evergreen_set_streamout_enable(ctx, ctx->streamout.enabled_mask);
 	} else {
 		r600_flush_vgt_streamout(ctx);
-		r600_set_streamout_enable(ctx, buffer_en);
+		r600_set_streamout_enable(ctx, ctx->streamout.enabled_mask);
 	}
 
-	for (i = 0; i < ctx->num_so_targets; i++) {
+	for (i = 0; i < ctx->streamout.num_targets; i++) {
 		if (t[i]) {
 			t[i]->stride_in_dw = stride_in_dw[i];
 			t[i]->so_index = i;
@@ -1014,7 +991,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
 							      RADEON_USAGE_WRITE);
 			}
 
-			if (ctx->streamout_append_bitmask & (1 << i)) {
+			if (ctx->streamout.append_bitmask & (1 << i)) {
 				va = r600_resource_va(&ctx->screen->screen,
 						      (void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset;
 				/* Append. */
@@ -1043,16 +1020,17 @@ void r600_context_streamout_begin(struct r600_context *ctx)
 		}
 	}
 
-	if (ctx->family > CHIP_R600 && ctx->family < CHIP_RS780) {
+	if (ctx->family > CHIP_R600 && ctx->family < CHIP_RV770) {
 		cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0);
 		cs->buf[cs->cdw++] = update_flags;
 	}
+	ctx->streamout.begin_emitted = true;
 }
 
-void r600_context_streamout_end(struct r600_context *ctx)
+void r600_emit_streamout_end(struct r600_context *ctx)
 {
 	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
-	struct r600_so_target **t = ctx->so_targets;
+	struct r600_so_target **t = ctx->streamout.targets;
 	unsigned i;
 	uint64_t va;
 
@@ -1062,7 +1040,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
 		r600_flush_vgt_streamout(ctx);
 	}
 
-	for (i = 0; i < ctx->num_so_targets; i++) {
+	for (i = 0; i < ctx->streamout.num_targets; i++) {
 		if (t[i]) {
 			va = r600_resource_va(&ctx->screen->screen,
 					      (void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset;
@@ -1079,7 +1057,6 @@ void r600_context_streamout_end(struct r600_context *ctx)
 			cs->buf[cs->cdw++] =
 				r600_context_bo_reloc(ctx,  &ctx->rings.gfx, t[i]->buf_filled_size,
 						      RADEON_USAGE_WRITE);
-
 		}
 	}
 
@@ -1093,7 +1070,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
 		r600_set_streamout_enable(ctx, 0);
 	}
 	ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
-	ctx->num_cs_dw_streamout_end = 0;
+	ctx->streamout.begin_emitted = false;
 }
 
 /* The max number of bytes to copy per packet. */
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 2a5457e..570a284 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -35,7 +35,7 @@
 #include "r600_resource.h"
 #include "evergreen_compute.h"
 
-#define R600_NUM_ATOMS 37
+#define R600_NUM_ATOMS 38
 
 #define R600_TRACE_CS 0
 
@@ -416,6 +416,19 @@ struct r600_fetch_shader {
 	unsigned			offset;
 };
 
+struct r600_streamout {
+	struct r600_atom		begin_atom;
+	bool				begin_emitted;
+	unsigned			num_dw_for_end;
+
+	unsigned			enabled_mask;
+	unsigned			num_targets;
+	struct r600_so_target		*targets[PIPE_MAX_SO_BUFFERS];
+
+	unsigned			append_bitmask;
+	bool				suspended;
+};
+
 struct r600_ring {
 	struct radeon_winsys_cs		*cs;
 	bool				flushing;
@@ -504,6 +517,7 @@ struct r600_context {
 	struct r600_vertexbuf_state	vertex_buffer_state;
 	/** Vertex buffers for compute shaders */
 	struct r600_vertexbuf_state	cs_vertex_buffer_state;
+	struct r600_streamout		streamout;
 
 	/* Additional context states. */
 	unsigned			flags;
@@ -538,14 +552,6 @@ struct r600_context {
 	unsigned			current_render_cond_mode;
 	boolean				predicate_drawing;
 
-	/* Streamout state. */
-	unsigned			num_cs_dw_streamout_end;
-	unsigned			num_so_targets;
-	struct r600_so_target		*so_targets[PIPE_MAX_SO_BUFFERS];
-	boolean				streamout_start;
-	unsigned			streamout_append_bitmask;
-	bool				streamout_suspended;
-
 	/* Deprecated state management. */
 	struct r600_range		*range;
 	unsigned			nblocks;
@@ -717,6 +723,10 @@ unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
 				   const unsigned char *swizzle_view,
 				   boolean vtx);
 
+/* r600_hw_context.c */
+void r600_emit_streamout_begin(struct r600_context *ctx, struct r600_atom *atom);
+void r600_emit_streamout_end(struct r600_context *ctx);
+
 /* r600_state_common.c */
 void r600_init_common_state_functions(struct r600_context *rctx);
 void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom);
@@ -736,6 +746,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx,
 void r600_sampler_states_dirty(struct r600_context *rctx,
 			       struct r600_sampler_states *state);
 void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state);
+void r600_streamout_buffers_dirty(struct r600_context *rctx);
 void r600_draw_rectangle(struct blitter_context *blitter,
 			 int x1, int y1, int x2, int y2, float depth,
 			 enum blitter_attrib_type type, const union pipe_color_union *attrib);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index c6559bb..d935cd6 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2330,6 +2330,7 @@ void r600_init_state_functions(struct r600_context *rctx)
 	r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
 	r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 8);
 	r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5);
+	r600_init_atom(rctx, &rctx->streamout.begin_atom, id++, r600_emit_streamout_begin, 0);
 
 	rctx->context.create_blend_state = r600_create_blend_state;
 	rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 1654233..4c68506 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1004,31 +1004,60 @@ static void r600_so_target_destroy(struct pipe_context *ctx,
 	FREE(t);
 }
 
-static void r600_set_so_targets(struct pipe_context *ctx,
-				unsigned num_targets,
-				struct pipe_stream_output_target **targets,
-				unsigned append_bitmask)
+void r600_streamout_buffers_dirty(struct r600_context *rctx)
+{
+	rctx->streamout.num_dw_for_end =
+		12 + /* flush_vgt_streamout */
+		util_bitcount(rctx->streamout.enabled_mask) * 8 + /* STRMOUT_BUFFER_UPDATE */
+		3 /* set_streamout_enable(0) */;
+
+	rctx->streamout.begin_atom.num_dw =
+		12 + /* flush_vgt_streamout */
+		6 + /* set_streamout_enable */
+		util_bitcount(rctx->streamout.enabled_mask) * 7 + /* SET_CONTEXT_REG */
+		(rctx->family >= CHIP_RS780 &&
+		 rctx->family <= CHIP_RV740 ? util_bitcount(rctx->streamout.enabled_mask) * 5 : 0) + /* STRMOUT_BASE_UPDATE */
+		util_bitcount(rctx->streamout.enabled_mask & rctx->streamout.append_bitmask) * 8 + /* STRMOUT_BUFFER_UPDATE */
+		util_bitcount(rctx->streamout.enabled_mask & ~rctx->streamout.append_bitmask) * 6 + /* STRMOUT_BUFFER_UPDATE */
+		(rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0) + /* SURFACE_BASE_UPDATE */
+		rctx->streamout.num_dw_for_end;
+
+	rctx->streamout.begin_atom.dirty = true;
+}
+
+static void r600_set_streamout_targets(struct pipe_context *ctx,
+				       unsigned num_targets,
+				       struct pipe_stream_output_target **targets,
+				       unsigned append_bitmask)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	unsigned i;
 
 	/* Stop streamout. */
-	if (rctx->num_so_targets && !rctx->streamout_start) {
-		r600_context_streamout_end(rctx);
+	if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) {
+		r600_emit_streamout_end(rctx);
 	}
 
 	/* Set the new targets. */
 	for (i = 0; i < num_targets; i++) {
-		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]);
+		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], targets[i]);
 		r600_context_add_resource_size(ctx, targets[i]->buffer);
 	}
-	for (; i < rctx->num_so_targets; i++) {
-		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL);
+	for (; i < rctx->streamout.num_targets; i++) {
+		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], NULL);
 	}
 
-	rctx->num_so_targets = num_targets;
-	rctx->streamout_start = num_targets != 0;
-	rctx->streamout_append_bitmask = append_bitmask;
+	rctx->streamout.enabled_mask = (num_targets >= 1 && targets[0] ? 1 : 0) |
+				       (num_targets >= 2 && targets[1] ? 2 : 0) |
+				       (num_targets >= 3 && targets[2] ? 4 : 0) |
+				       (num_targets >= 4 && targets[3] ? 8 : 0);
+
+	rctx->streamout.num_targets = num_targets;
+	rctx->streamout.append_bitmask = append_bitmask;
+
+	if (num_targets) {
+		r600_streamout_buffers_dirty(rctx);
+	}
 }
 
 static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
@@ -1342,12 +1371,6 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		info.index_bias = info.start;
 	}
 
-	/* Enable stream out if needed. */
-	if (rctx->streamout_start) {
-		r600_context_streamout_begin(rctx);
-		rctx->streamout_start = FALSE;
-	}
-
 	/* Set the index offset and multi primitive */
 	if (rctx->vgt2_state.vgt_indx_offset != info.index_bias) {
 		rctx->vgt2_state.vgt_indx_offset = info.index_bias;
@@ -1754,7 +1777,7 @@ void r600_init_common_state_functions(struct r600_context *rctx)
 	rctx->context.texture_barrier = r600_texture_barrier;
 	rctx->context.create_stream_output_target = r600_create_so_target;
 	rctx->context.stream_output_target_destroy = r600_so_target_destroy;
-	rctx->context.set_stream_output_targets = r600_set_so_targets;
+	rctx->context.set_stream_output_targets = r600_set_streamout_targets;
 	rctx->context.draw_vbo = r600_draw_vbo;
 }
 
-- 
1.7.10.4



More information about the mesa-dev mailing list