[Mesa-dev] [PATCH 2/3] r600,compute: Upload rodata

Mon May 25 17:05:10 PDT 2015

reserve buffer id 2 for rodata

Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
---

This needs coresponding LLVM changes (see [0], also posted) to work,
but it does not break anything (except waste a buffer) without.

Both series fix table based builtins (acosh, atanh, cosh, log1p),
as well as gegl's format conversion kernels.

There's one more problem that might need to be addressed on mesa's side:
The generated code can have more than one rodata section
(like rodata.cst16, rodata.cst4).
Right now radeon_elf_read only keeps the last section (leaking the previous
ones). We can either concatenate the sections in mesa, or instruct llvm to put
everything in one section. cbrt builtin and the updated(posted) program-scope-arrays piglit hit this.
suggestions welcome.

jan

[0] https://github.com/jvesely/llvm

 src/gallium/drivers/r600/evergreen_compute.c       | 32 +++++++++++++++++++---
 .../drivers/r600/evergreen_compute_internal.h      |  1 +
 src/gallium/drivers/r600/r600_asm.h                |  2 ++
 src/gallium/drivers/r600/r600_llvm.c               |  3 ++
 4 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 4b2d780..25f5f7d 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -229,11 +229,27 @@ void *evergreen_create_compute_state(
 	radeon_elf_read(code, header->num_bytes, &shader->binary, true);
 	r600_create_shader(&shader->bc, &shader->binary, &use_kill);
 
+	/* Upload code */
 	shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
-							shader->bc.ndw * 4);
+	                                                 shader->bc.ndw * 4);
 	p = r600_buffer_map_sync_with_rings(&ctx->b, shader->code_bo, PIPE_TRANSFER_WRITE);
+	//TODO: use util_memcpy_cpu_to_le32 ?
 	memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4);
 	ctx->b.ws->buffer_unmap(shader->code_bo->cs_buf);
+	p = NULL;
+
+	/* Upload const data */
+	if (shader->bc.nrodb) {
+		shader->const_bo = r600_compute_buffer_alloc_vram(ctx->screen,
+	                                                 shader->bc.nrodb);
+		p = r600_buffer_map_sync_with_rings(&ctx->b, shader->const_bo, PIPE_TRANSFER_WRITE);
+		//TODO: use util_memcpy_cpu_to_le32 ?
+		memcpy(p, shader->bc.rodata, shader->bc.nrodb);
+		ctx->b.ws->buffer_unmap(shader->const_bo->cs_buf);
+		p = NULL;
+	} else {
+		shader->const_bo = NULL;
+	}
 #endif
 #endif
 
@@ -628,9 +644,9 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_,
 			start, count);
 
 	for (unsigned i = 0; i < count; i++) {
-		/* The First two vertex buffers are reserved for parameters and
+		/* The First three vertex buffers are reserved for parameters and
 		 * global buffers. */
-		unsigned vtx_id = 2 + i;
+		unsigned vtx_id = 3 + i;
 		if (resources[i]) {
 			struct r600_resource_global *buffer =
 				(struct r600_resource_global*)
@@ -716,9 +732,17 @@ static void evergreen_set_global_binding(
 		*(handles[i]) = util_cpu_to_le32(handle);
 	}
 
-	evergreen_set_rat(ctx->cs_shader_state.shader, 0, pool->bo, 0, pool->size_in_dw * 4);
+	/* globals for writing */
+	evergreen_set_rat(ctx->cs_shader_state.shader, 0, pool->bo, 0,
+	                  pool->size_in_dw * 4);
+	/* globals for reading */
 	evergreen_cs_set_vertex_buffer(ctx, 1, 0,
 				(struct pipe_resource*)pool->bo);
+
+	/* constants for reading */
+	if (ctx->cs_shader_state.shader->const_bo)
+		evergreen_cs_set_vertex_buffer(ctx, 2, 0,
+					(struct pipe_resource*)ctx->cs_shader_state.shader->const_bo);
 }
 
 /**
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.h b/src/gallium/drivers/r600/evergreen_compute_internal.h
index 95593dd..880a993 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.h
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.h
@@ -51,6 +51,7 @@ struct r600_pipe_compute {
 
 	struct radeon_shader_binary binary;
 	struct r600_resource *code_bo;
+	struct r600_resource *const_bo;
 	struct r600_bytecode bc;
 
 	unsigned local_size;
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 4f723c1..df3f7dd 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -208,6 +208,7 @@ struct r600_bytecode {
 	struct list_head		cf;
 	struct r600_bytecode_cf		*cf_last;
 	unsigned			ndw;
+	unsigned			nrodb;
 	unsigned			ncf;
 	unsigned			ngpr;
 	unsigned			nstack;
@@ -215,6 +216,7 @@ struct r600_bytecode {
 	unsigned			nresource;
 	unsigned			force_add_cf;
 	uint32_t			*bytecode;
+	uint32_t			*rodata;
 	uint32_t			fc_sp;
 	struct r600_cf_stack_entry	fc_stack[32];
 	struct r600_stack_info		stack;
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index 72e2dc4..94085fc 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -861,8 +861,11 @@ unsigned r600_create_shader(struct r600_bytecode *bc,
 {
 	assert(binary->code_size % 4 == 0);
 	bc->bytecode = CALLOC(1, binary->code_size);
+	bc->rodata   = CALLOC(1, binary->rodata_size);
 	memcpy(bc->bytecode, binary->code, binary->code_size);
 	bc->ndw = binary->code_size / 4;
+	bc->nrodb = binary->rodata_size;
+	memcpy(bc->rodata, binary->rodata, binary->rodata_size);
 
 	r600_shader_binary_read_config(binary, bc, 0, use_kill);
 
-- 
2.1.0