[Mesa-dev] [PATCH 4/4] r600g: suballocate memory for fetch shaders from a large buffer

Marek Olšák maraeo at gmail.com
Mon Dec 10 12:47:34 PST 2012


Fetch shaders are usually destroyed at the context destruction by the state
tracker, so we can put them all in a large buffer without wasting memory.

This reduces the number of relocations sent to the kernel a little bit.
---
 src/gallium/drivers/r600/evergreen_state.c   |    6 +++---
 src/gallium/drivers/r600/r600_asm.c          |   26 ++++++++++++++------------
 src/gallium/drivers/r600/r600_pipe.c         |    8 ++++++++
 src/gallium/drivers/r600/r600_pipe.h         |    6 ++++++
 src/gallium/drivers/r600/r600_state.c        |    6 +++---
 src/gallium/drivers/r600/r600_state_common.c |    4 +++-
 6 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 9b898cb..996c1b4 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2367,12 +2367,12 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct
 {
 	struct radeon_winsys_cs *cs = rctx->cs;
 	struct r600_cso_state *state = (struct r600_cso_state*)a;
-	struct r600_resource *shader = (struct r600_resource*)state->cso;
+	struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
 
 	r600_write_context_reg(cs, R_0288A4_SQ_PGM_START_FS,
-			       r600_resource_va(rctx->context.screen, &shader->b.b) >> 8);
+			       (r600_resource_va(rctx->context.screen, &shader->buffer->b.b) + shader->offset) >> 8);
 	r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
-	r600_write_value(cs, r600_context_bo_reloc(rctx, shader, RADEON_USAGE_READ));
+	r600_write_value(cs, r600_context_bo_reloc(rctx, shader->buffer, RADEON_USAGE_READ));
 }
 
 void evergreen_init_state_functions(struct r600_context *rctx)
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index f06af44..268137f 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2766,7 +2766,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
 	unsigned format, num_format, format_comp, endian;
 	uint32_t *bytecode;
 	int i, j, r, fs_size;
-	struct r600_resource *fetch_shader;
+	struct r600_fetch_shader *shader;
 
 	assert(count < 32);
 
@@ -2873,22 +2873,25 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
 
 	fs_size = bc.ndw*4;
 
-	fetch_shader = (struct r600_resource*)
-			pipe_buffer_create(rctx->context.screen,
-					   PIPE_BIND_CUSTOM,
-					   PIPE_USAGE_IMMUTABLE, fs_size);
-	if (fetch_shader == NULL) {
+	/* Allocate the CSO. */
+	shader = CALLOC_STRUCT(r600_fetch_shader);
+	if (!shader) {
 		r600_bytecode_clear(&bc);
 		return NULL;
 	}
 
-	bytecode = rctx->ws->buffer_map(fetch_shader->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
-	if (bytecode == NULL) {
+	u_suballocator_alloc(rctx->allocator_fetch_shader, fs_size, &shader->offset,
+			     (struct pipe_resource**)&shader->buffer);
+	if (!shader->buffer) {
 		r600_bytecode_clear(&bc);
-		pipe_resource_reference((struct pipe_resource**)&fetch_shader, NULL);
+		FREE(shader);
 		return NULL;
 	}
 
+	bytecode = rctx->ws->buffer_map(shader->buffer->cs_buf, rctx->cs,
+					PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
+	bytecode += shader->offset / 4;
+
 	if (R600_BIG_ENDIAN) {
 		for (i = 0; i < fs_size / 4; ++i) {
 			bytecode[i] = bswap_32(bc.bytecode[i]);
@@ -2896,11 +2899,10 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
 	} else {
 		memcpy(bytecode, bc.bytecode, fs_size);
 	}
+	rctx->ws->buffer_unmap(shader->buffer->cs_buf);
 
-	rctx->ws->buffer_unmap(fetch_shader->cs_buf);
 	r600_bytecode_clear(&bc);
-
-	return fetch_shader;
+	return shader;
 }
 
 void r600_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1)
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index e1252f0..b25a6d0 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -188,6 +188,9 @@ static void r600_destroy_context(struct pipe_context *context)
 	if (rctx->allocator_so_filled_size) {
 		u_suballocator_destroy(rctx->allocator_so_filled_size);
 	}
+	if (rctx->allocator_fetch_shader) {
+		u_suballocator_destroy(rctx->allocator_fetch_shader);
+	}
 	util_slab_destroy(&rctx->pool_transfers);
 
 	r600_release_command_buffer(&rctx->start_cs_cmd);
@@ -294,6 +297,11 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
         if (!rctx->uploader)
                 goto fail;
 
+	rctx->allocator_fetch_shader = u_suballocator_create(&rctx->context, 64 * 1024, 256,
+							     0, PIPE_USAGE_STATIC, FALSE);
+        if (!rctx->allocator_fetch_shader)
+                goto fail;
+
 	rctx->allocator_so_filled_size = u_suballocator_create(&rctx->context, 4096, 4,
                                                                0, PIPE_USAGE_STATIC, TRUE);
         if (!rctx->allocator_so_filled_size)
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index e707a4a..c348c76 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -383,6 +383,11 @@ struct r600_scissor_state
 	bool				enable; /* r6xx only */
 };
 
+struct r600_fetch_shader {
+	struct r600_resource		*buffer;
+	unsigned			offset;
+};
+
 struct r600_context {
 	struct pipe_context		context;
 	struct r600_screen		*screen;
@@ -391,6 +396,7 @@ struct r600_context {
 	struct blitter_context		*blitter;
 	struct u_upload_mgr	        *uploader;
 	struct u_suballocator		*allocator_so_filled_size;
+	struct u_suballocator		*allocator_fetch_shader;
 	struct util_slab_mempool	pool_transfers;
 
 	/* Hardware info. */
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index ab658da..9bfae4f 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2117,11 +2117,11 @@ static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600
 {
 	struct radeon_winsys_cs *cs = rctx->cs;
 	struct r600_cso_state *state = (struct r600_cso_state*)a;
-	struct r600_resource *shader = (struct r600_resource*)state->cso;
+	struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
 
-	r600_write_context_reg(cs, R_028894_SQ_PGM_START_FS, 0);
+	r600_write_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
 	r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
-	r600_write_value(cs, r600_context_bo_reloc(rctx, shader, RADEON_USAGE_READ));
+	r600_write_value(cs, r600_context_bo_reloc(rctx, shader->buffer, RADEON_USAGE_READ));
 }
 
 void r600_init_state_functions(struct r600_context *rctx)
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 66120ca..b20f655 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -470,7 +470,9 @@ static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
 
 static void r600_delete_vertex_elements(struct pipe_context *ctx, void *state)
 {
-	pipe_resource_reference((struct pipe_resource**)&state, NULL);
+	struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state;
+	pipe_resource_reference((struct pipe_resource**)&shader->buffer, NULL);
+	FREE(shader);
 }
 
 static void r600_set_index_buffer(struct pipe_context *ctx,
-- 
1.7.10.4



More information about the mesa-dev mailing list