[Mesa-dev] [PATCH 04/13] r600g: do fine-grained vertex buffer updates

Marek Olšák maraeo at gmail.com
Sat Jul 14 19:35:35 PDT 2012


If only some buffers are changed, the other ones don't have to re-emitted.
This uses bitmasks of enabled and dirty buffers just like
emit_constant_buffers does.
---
 src/gallium/drivers/r600/evergreen_compute.c |   12 +++---
 src/gallium/drivers/r600/evergreen_state.c   |   23 +++--------
 src/gallium/drivers/r600/r600_blit.c         |    4 +-
 src/gallium/drivers/r600/r600_buffer.c       |   15 ++++---
 src/gallium/drivers/r600/r600_hw_context.c   |    5 ++-
 src/gallium/drivers/r600/r600_pipe.h         |   12 +++---
 src/gallium/drivers/r600/r600_state.c        |   23 ++++++-----
 src/gallium/drivers/r600/r600_state_common.c |   56 +++++++++++++++++++++-----
 8 files changed, 87 insertions(+), 63 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 947a328..caaa752 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -89,14 +89,15 @@ static void evergreen_cs_set_vertex_buffer(
 	unsigned offset,
 	struct pipe_resource * buffer)
 {
-	struct pipe_vertex_buffer *vb = &rctx->cs_vertex_buffer[vb_index];
-	struct r600_vertexbuf_state * state = &rctx->cs_vertex_buffer_state;
+	struct r600_vertexbuf_state *state = &rctx->cs_vertex_buffer_state;
+	struct pipe_vertex_buffer *vb = &state->vb[vb_index];
 	vb->stride = 1;
 	vb->buffer_offset = offset;
 	vb->buffer = buffer;
 	vb->user_buffer = NULL;
 
 	r600_inval_vertex_cache(rctx);
+	state->enabled_mask |= 1 << vb_index;
 	state->dirty_mask |= 1 << vb_index;
 	r600_atom_dirty(rctx, &state->atom);
 }
@@ -369,7 +370,7 @@ static void compute_emit_cs(struct r600_context *ctx)
 	r600_context_pipe_state_emit(ctx, cb_state, RADEON_CP_PACKET3_COMPUTE_MODE);
 
 	/* Emit vertex buffer state */
-	ctx->cs_vertex_buffer_state.atom.num_dw = 12 * ctx->nr_cs_vertex_buffers;
+	ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask);
 	r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom);
 
 	for (i = 0; i < get_compute_resource_num(); i++) {
@@ -493,10 +494,8 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_,
 			evergreen_cs_set_vertex_buffer(ctx, vtx_id,
 					buffer->chunk->start_in_dw * 4,
 					resources[i]->base.texture);
-			ctx->nr_cs_vertex_buffers = vtx_id + 1;
 		}
 	}
-
 }
 
 static void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
@@ -740,7 +739,8 @@ void evergreen_init_compute_state_functions(struct r600_context *ctx)
 
 	/* We always use at least two vertex buffers for compute, one for
          * parameters and one for global memory */
-	ctx->nr_cs_vertex_buffers = 2;
+	ctx->cs_vertex_buffer_state.enabled_mask =
+	ctx->cs_vertex_buffer_state.dirty_mask = 1 | 2;
 }
 
 
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 0d2fa30..72ddc0b 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1772,8 +1772,6 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
 
 static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
 					  struct r600_vertexbuf_state *state,
-					  struct pipe_vertex_buffer *vertex_buffers,
-					  unsigned vb_count,
 					  unsigned resource_offset,
 					  unsigned pkt_flags)
 {
@@ -1784,13 +1782,11 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
 		struct pipe_vertex_buffer *vb;
 		struct r600_resource *rbuffer;
 		uint64_t va;
-		unsigned buffer_index = ffs(dirty_mask) - 1;
+		unsigned buffer_index = u_bit_scan(&dirty_mask);
 
-		vb = &vertex_buffers[buffer_index];
+		vb = &state->vb[buffer_index];
 		rbuffer = (struct r600_resource*)vb->buffer;
-		if (!rbuffer) {
-			goto next;
-		}
+		assert(rbuffer);
 
 		va = r600_resource_va(&rctx->screen->screen, &rbuffer->b.b);
 		va += vb->buffer_offset;
@@ -1816,26 +1812,19 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
 
 		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
 		r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
-
-next:
-		dirty_mask &= ~(1 << buffer_index);
 	}
 	state->dirty_mask = 0;
 }
 
 static void evergreen_fs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
 {
-	evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state,
-					rctx->vertex_buffer,
-					rctx->nr_vertex_buffers, 992, 0);
+	evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state, 992, 0);
 }
 
 static void evergreen_cs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
 {
-	evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state,
-					rctx->cs_vertex_buffer,
-					rctx->nr_cs_vertex_buffers, 816,
-					RADEON_CP_PACKET3_COMPUTE_MODE);
+	evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state, 816,
+				      RADEON_CP_PACKET3_COMPUTE_MODE);
 }
 
 static void evergreen_emit_constant_buffers(struct r600_context *rctx,
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 98f8b84..ca5aaf8 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -60,8 +60,8 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
 		util_blitter_save_viewport(rctx->blitter, &rctx->viewport);
 	}
 	util_blitter_save_vertex_buffers(rctx->blitter,
-					 rctx->nr_vertex_buffers,
-					 rctx->vertex_buffer);
+					 util_last_bit(rctx->vertex_buffer_state.enabled_mask),
+					 rctx->vertex_buffer_state.vb);
 	util_blitter_save_so_targets(rctx->blitter, rctx->num_so_targets,
 				     (struct pipe_stream_output_target**)rctx->so_targets);
 
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 8e2deb1..165427e 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -93,7 +93,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
 		/* Check if mapping this buffer would cause waiting for the GPU. */
 		if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
 		    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
-			unsigned i;
+			unsigned i, mask;
 
 			/* Discard the buffer. */
 			pb_reference(&rbuffer->buf, NULL);
@@ -105,13 +105,12 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
 
 			/* We changed the buffer, now we need to bind it where the old one was bound. */
 			/* Vertex buffers. */
-			for (i = 0; i < rctx->nr_vertex_buffers; i++) {
-				if (rctx->vertex_buffer[i].buffer == &rbuffer->b.b) {
-					struct r600_vertexbuf_state * state =
-						&rctx->vertex_buffer_state;
-					state->dirty_mask |= 1 << i;
-					r600_inval_vertex_cache(rctx);
-					r600_atom_dirty(rctx, &state->atom);
+			mask = rctx->vertex_buffer_state.enabled_mask;
+			while (mask) {
+				i = u_bit_scan(&mask);
+				if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) {
+					rctx->vertex_buffer_state.dirty_mask |= 1 << i;
+					r600_vertex_buffers_dirty(rctx);
 				}
 			}
 			/* Streamout buffers. */
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 2951b86..d0a5918 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -1274,14 +1274,15 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 	r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
 
 	/* Invalidate caches. */
-	r600_inval_vertex_cache(ctx);
 	r600_inval_texture_cache(ctx);
 	r600_flush_framebuffer(ctx, false);
 
 	/* Re-emit states. */
 	r600_atom_dirty(ctx, &ctx->cb_misc_state.atom);
 	r600_atom_dirty(ctx, &ctx->db_misc_state.atom);
-	r600_atom_dirty(ctx, &ctx->vertex_buffer_state.atom);
+
+	ctx->vertex_buffer_state.dirty_mask = ctx->vertex_buffer_state.enabled_mask;
+	r600_vertex_buffers_dirty(ctx);
 
 	ctx->vs_constbuf_state.dirty_mask = ctx->vs_constbuf_state.enabled_mask;
 	ctx->ps_constbuf_state.dirty_mask = ctx->ps_constbuf_state.enabled_mask;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 6449a4d..200f0a2 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -278,6 +278,8 @@ struct r600_constbuf_state
 struct r600_vertexbuf_state
 {
 	struct r600_atom		atom;
+	struct pipe_vertex_buffer	vb[PIPE_MAX_ATTRIBS];
+	uint32_t			enabled_mask; /* non-NULL buffers */
 	uint32_t			dirty_mask;
 };
 
@@ -399,13 +401,8 @@ struct r600_context {
 
 	boolean			dual_src_blend;
 
-	/* Vertex and index buffers. */
-	bool			vertex_buffers_dirty;
+	/* Index buffer. */
 	struct pipe_index_buffer index_buffer;
-	struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-	unsigned		nr_vertex_buffers;
-	struct pipe_vertex_buffer cs_vertex_buffer[PIPE_MAX_ATTRIBS];
-	unsigned		nr_cs_vertex_buffers;
 };
 
 static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
@@ -528,8 +525,9 @@ unsigned r600_get_cb_flush_flags(struct r600_context *rctx);
 void r600_texture_barrier(struct pipe_context *ctx);
 void r600_set_index_buffer(struct pipe_context *ctx,
 			   const struct pipe_index_buffer *ib);
+void r600_vertex_buffers_dirty(struct r600_context *rctx);
 void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
-			     const struct pipe_vertex_buffer *buffers);
+			     const struct pipe_vertex_buffer *input);
 void *r600_create_vertex_elements(struct pipe_context *ctx,
 				  unsigned count,
 				  const struct pipe_vertex_element *elements);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 3d5835c..4f475b3 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1748,27 +1748,28 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
 static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = rctx->cs;
-	struct pipe_vertex_buffer *vb = rctx->vertex_buffer;
-	unsigned count = rctx->nr_vertex_buffers;
-	unsigned i, offset;
+	uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask;
 
-	for (i = 0; i < count; i++) {
-		struct r600_resource *rbuffer = (struct r600_resource*)vb[i].buffer;
+	while (dirty_mask) {
+		struct pipe_vertex_buffer *vb;
+		struct r600_resource *rbuffer;
+		unsigned offset;
+		unsigned buffer_index = u_bit_scan(&dirty_mask);
 
-		if (!rbuffer) {
-			continue;
-		}
+		vb = &rctx->vertex_buffer_state.vb[buffer_index];
+		rbuffer = (struct r600_resource*)vb->buffer;
+		assert(rbuffer);
 
-		offset = vb[i].buffer_offset;
+		offset = vb->buffer_offset;
 
 		/* fetch resources start at index 320 */
 		r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
-		r600_write_value(cs, (320 + i) * 7);
+		r600_write_value(cs, (320 + buffer_index) * 7);
 		r600_write_value(cs, offset); /* RESOURCEi_WORD0 */
 		r600_write_value(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
 		r600_write_value(cs, /* RESOURCEi_WORD2 */
 				 S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
-				 S_038008_STRIDE(vb[i].stride));
+				 S_038008_STRIDE(vb->stride));
 		r600_write_value(cs, 0); /* RESOURCEi_WORD3 */
 		r600_write_value(cs, 0); /* RESOURCEi_WORD4 */
 		r600_write_value(cs, 0); /* RESOURCEi_WORD5 */
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index be3d101..4fa2699 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -403,22 +403,58 @@ void r600_set_index_buffer(struct pipe_context *ctx,
 	}
 }
 
+void r600_vertex_buffers_dirty(struct r600_context *rctx)
+{
+	if (rctx->vertex_buffer_state.dirty_mask) {
+		r600_inval_vertex_cache(rctx);
+		rctx->vertex_buffer_state.atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) *
+					       util_bitcount(rctx->vertex_buffer_state.dirty_mask);
+		r600_atom_dirty(rctx, &rctx->vertex_buffer_state.atom);
+	}
+}
+
 void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
-			     const struct pipe_vertex_buffer *buffers)
+			     const struct pipe_vertex_buffer *input)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
-	struct r600_vertexbuf_state * state = &rctx->vertex_buffer_state;
+	struct r600_vertexbuf_state *state = &rctx->vertex_buffer_state;
+	struct pipe_vertex_buffer *vb = state->vb;
 	unsigned i;
+	/* This sets 1-bit for buffers with index >= count. */
+	uint32_t disable_mask = ~((1ull << count) - 1);
+	/* These are the new buffers set by this function. */
+	uint32_t new_buffer_mask = 0;
+
+	/* Set buffers with index >= count to NULL. */
+	uint32_t remaining_buffers_mask =
+		rctx->vertex_buffer_state.enabled_mask & disable_mask;
+
+	while (remaining_buffers_mask) {
+		i = u_bit_scan(&remaining_buffers_mask);
+		pipe_resource_reference(&vb[i].buffer, NULL);
+	}
 
-	util_copy_vertex_buffers(rctx->vertex_buffer, &rctx->nr_vertex_buffers, buffers, count);
+	/* Set vertex buffers. */
+	for (i = 0; i < count; i++) {
+		if (memcmp(&input[i], &vb[i], sizeof(struct pipe_vertex_buffer))) {
+			if (input[i].buffer) {
+				vb[i].stride = input[i].stride;
+				vb[i].buffer_offset = input[i].buffer_offset;
+				pipe_resource_reference(&vb[i].buffer, input[i].buffer);
+				new_buffer_mask |= 1 << i;
+			} else {
+				pipe_resource_reference(&vb[i].buffer, NULL);
+				disable_mask |= 1 << i;
+			}
+		}
+        }
 
-	r600_inval_vertex_cache(rctx);
-	state->atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 10) *
-					   rctx->nr_vertex_buffers;
-	for (i = 0 ; i < rctx->nr_vertex_buffers; i++) {
-		state->dirty_mask |= 1 << i;
-	}
-	r600_atom_dirty(rctx, &state->atom);
+	rctx->vertex_buffer_state.enabled_mask &= ~disable_mask;
+	rctx->vertex_buffer_state.dirty_mask &= rctx->vertex_buffer_state.enabled_mask;
+	rctx->vertex_buffer_state.enabled_mask |= new_buffer_mask;
+	rctx->vertex_buffer_state.dirty_mask |= new_buffer_mask;
+
+	r600_vertex_buffers_dirty(rctx);
 }
 
 void *r600_create_vertex_elements(struct pipe_context *ctx,
-- 
1.7.9.5



More information about the mesa-dev mailing list