[Mesa-dev] [PATCH 3/4] r600g: suballocate memory for the STRMOUT_BUFFER_FILLED_SIZE register

Marek Olšák maraeo at gmail.com
Mon Dec 10 12:47:33 PST 2012


Instead of having a 4-byte buffer for each streamout target, we suballocate
each dword from a 4K buffer.

This further reduces the overall number of relocations.
---
 src/gallium/drivers/r600/r600.h              |    4 +++-
 src/gallium/drivers/r600/r600_hw_context.c   |    8 ++++----
 src/gallium/drivers/r600/r600_pipe.c         |    8 ++++++++
 src/gallium/drivers/r600/r600_pipe.h         |    2 ++
 src/gallium/drivers/r600/r600_state_common.c |   22 +++++++++++-----------
 5 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 7d43416..d15cd52 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -175,7 +175,9 @@ struct r600_so_target {
 	struct pipe_stream_output_target b;
 
 	/* The buffer where BUFFER_FILLED_SIZE is stored. */
-	struct r600_resource	*filled_size;
+	struct r600_resource	*buf_filled_size;
+	unsigned		buf_filled_size_offset;
+
 	unsigned		stride_in_dw;
 	unsigned		so_index;
 };
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index c825301..c7a357e 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -1005,7 +1005,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
 
 			if (ctx->streamout_append_bitmask & (1 << i)) {
 				va = r600_resource_va(&ctx->screen->screen,
-						      (void*)t[i]->filled_size);
+						      (void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset;
 				/* Append. */
 				cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
 				cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
@@ -1017,7 +1017,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
 
 				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 				cs->buf[cs->cdw++] =
-					r600_context_bo_reloc(ctx,  t[i]->filled_size,
+					r600_context_bo_reloc(ctx,  t[i]->buf_filled_size,
 							      RADEON_USAGE_READ);
 			} else {
 				/* Start from the beginning. */
@@ -1054,7 +1054,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
 	for (i = 0; i < ctx->num_so_targets; i++) {
 		if (t[i]) {
 			va = r600_resource_va(&ctx->screen->screen,
-					      (void*)t[i]->filled_size);
+					      (void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset;
 			cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
 			cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
 						       STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
@@ -1066,7 +1066,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
 
 			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 			cs->buf[cs->cdw++] =
-				r600_context_bo_reloc(ctx,  t[i]->filled_size,
+				r600_context_bo_reloc(ctx,  t[i]->buf_filled_size,
 						      RADEON_USAGE_WRITE);
 
 		}
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 4f0e232..e1252f0 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -185,6 +185,9 @@ static void r600_destroy_context(struct pipe_context *context)
 	if (rctx->uploader) {
 		u_upload_destroy(rctx->uploader);
 	}
+	if (rctx->allocator_so_filled_size) {
+		u_suballocator_destroy(rctx->allocator_so_filled_size);
+	}
 	util_slab_destroy(&rctx->pool_transfers);
 
 	r600_release_command_buffer(&rctx->start_cs_cmd);
@@ -291,6 +294,11 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
         if (!rctx->uploader)
                 goto fail;
 
+	rctx->allocator_so_filled_size = u_suballocator_create(&rctx->context, 4096, 4,
+                                                               0, PIPE_USAGE_STATIC, TRUE);
+        if (!rctx->allocator_so_filled_size)
+                goto fail;
+
 	rctx->blitter = util_blitter_create(&rctx->context);
 	if (rctx->blitter == NULL)
 		goto fail;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index a61a6e8..e707a4a 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -28,6 +28,7 @@
 
 #include "util/u_blitter.h"
 #include "util/u_slab.h"
+#include "util/u_suballoc.h"
 #include "r600.h"
 #include "r600_llvm.h"
 #include "r600_public.h"
@@ -389,6 +390,7 @@ struct r600_context {
 	struct radeon_winsys_cs		*cs;
 	struct blitter_context		*blitter;
 	struct u_upload_mgr	        *uploader;
+	struct u_suballocator		*allocator_so_filled_size;
 	struct util_slab_mempool	pool_transfers;
 
 	/* Hardware info. */
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index b132850..66120ca 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -956,25 +956,25 @@ r600_create_so_target(struct pipe_context *ctx,
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct r600_so_target *t;
-	void *ptr;
 
 	t = CALLOC_STRUCT(r600_so_target);
 	if (!t) {
 		return NULL;
 	}
 
+	u_suballocator_alloc(rctx->allocator_so_filled_size, 4,
+			     &t->buf_filled_size_offset,
+			     (struct pipe_resource**)&t->buf_filled_size);
+	if (!t->buf_filled_size) {
+		FREE(t);
+		return NULL;
+	}
+
 	t->b.reference.count = 1;
 	t->b.context = ctx;
 	pipe_resource_reference(&t->b.buffer, buffer);
 	t->b.buffer_offset = buffer_offset;
 	t->b.buffer_size = buffer_size;
-
-	t->filled_size = (struct r600_resource*)
-		pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STATIC, 4);
-	ptr = rctx->ws->buffer_map(t->filled_size->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
-	memset(ptr, 0, t->filled_size->buf->size);
-	rctx->ws->buffer_unmap(t->filled_size->cs_buf);
-
 	return &t->b;
 }
 
@@ -983,7 +983,7 @@ static void r600_so_target_destroy(struct pipe_context *ctx,
 {
 	struct r600_so_target *t = (struct r600_so_target*)target;
 	pipe_resource_reference(&t->b.buffer, NULL);
-	pipe_resource_reference((struct pipe_resource**)&t->filled_size, NULL);
+	pipe_resource_reference((struct pipe_resource**)&t->buf_filled_size, NULL);
 	FREE(t);
 }
 
@@ -1308,7 +1308,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	} else {
 		if (info.count_from_stream_output) {
 			struct r600_so_target *t = (struct r600_so_target*)info.count_from_stream_output;
-			uint64_t va = r600_resource_va(&rctx->screen->screen, (void*)t->filled_size);
+			uint64_t va = r600_resource_va(&rctx->screen->screen, (void*)t->buf_filled_size) + t->buf_filled_size_offset;
 
 			r600_write_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
 
@@ -1320,7 +1320,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 			cs->buf[cs->cdw++] = 0; /* unused */
 
 			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-			cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, t->filled_size, RADEON_USAGE_READ);
+			cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, t->buf_filled_size, RADEON_USAGE_READ);
 		}
 
 		cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->predicate_drawing);
-- 
1.7.10.4



More information about the mesa-dev mailing list