[Mesa-dev] [PATCH] r600g: make tgsi-to-llvm generates store.pixel* intrinsic for fs

Vincent Lejeune vljn at ovi.com
Sun Oct 28 11:20:25 PDT 2012


---
 src/gallium/drivers/r600/eg_asm.c        | 17 ++++++++
 src/gallium/drivers/r600/r600_asm.c      | 17 ++++++++
 src/gallium/drivers/r600/r600_asm.h      |  2 +
 src/gallium/drivers/r600/r600_llvm.c     | 71 ++++++++++++++++++++++++++++----
 src/gallium/drivers/r600/r600_shader.c   | 32 ++++++++++++--
 src/gallium/drivers/radeon/radeon_llvm.h |  3 ++
 6 files changed, 130 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 310d424..70dc94a 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -145,3 +145,20 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
 	}
 	return 0;
 }
+
+void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
+{
+	output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
+	output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
+	output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
+	output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
+
+	output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
+	output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
+	output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
+	output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
+	output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
+	output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
+	output->inst = EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1));
+	output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
+}
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 51a2e4e..4cc1716 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2941,3 +2941,20 @@ void r600_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint3
 			G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1);
 	}
 }
+
+void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
+{
+	output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
+	output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
+	output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
+	output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
+
+	output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
+	output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
+	output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
+	output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
+	output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
+	output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
+	output->inst = EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1));
+	output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
+}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 8a9f318..b6bd9e1 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -242,5 +242,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
 void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf);
 int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id);
 void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
+void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
+void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
 
 #endif
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index 321966e..c00bbab 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -193,6 +193,9 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
 	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
 	struct lp_build_context * base = &bld_base->base;
 	unsigned i;
+	
+	unsigned color_count = 0;
+	boolean has_color = false;
 
 	/* Add the necessary export instructions */
 	for (i = 0; i < ctx->output_reg_count; i++) {
@@ -201,20 +204,72 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
 			LLVMValueRef output;
 			unsigned adjusted_reg_idx = i +
 					ctx->reserved_reg_count;
-			LLVMValueRef reg_index = lp_build_const_int32(
-				base->gallivm,
-				radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
 
 			output = LLVMBuildLoad(base->gallivm->builder,
 				ctx->soa.outputs[i][chan], "");
 
-			lp_build_intrinsic_binary(
-				base->gallivm->builder,
-				"llvm.AMDGPU.store.output",
-				LLVMVoidTypeInContext(base->gallivm->context),
-				output, reg_index);
+			if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+				LLVMValueRef reg_index = lp_build_const_int32(
+					base->gallivm,
+					radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
+				lp_build_intrinsic_binary(
+					base->gallivm->builder,
+					"llvm.AMDGPU.store.output",
+					LLVMVoidTypeInContext(base->gallivm->context),
+					output, reg_index);
+			} else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+				switch (ctx->r600_outputs[i].name) {
+				case TGSI_SEMANTIC_COLOR:
+					has_color = true;
+					if ( color_count/4 < ctx->color_buffer_count) {
+						if (ctx->fs_color_all) {
+							for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
+								LLVMValueRef reg_index = lp_build_const_int32(
+									base->gallivm,
+									(j * 4) + chan);
+								lp_build_intrinsic_binary(
+									base->gallivm->builder,
+									"llvm.R600.store.pixel.color",
+									LLVMVoidTypeInContext(base->gallivm->context),
+									output, reg_index);
+							}
+						} else {
+							LLVMValueRef reg_index = lp_build_const_int32(
+								base->gallivm,
+								(color_count++/4) * 4 + chan);
+							lp_build_intrinsic_binary(
+								base->gallivm->builder,
+								"llvm.R600.store.pixel.color",
+								LLVMVoidTypeInContext(base->gallivm->context),
+								output, reg_index);
+						}
+					}
+					break;
+				case TGSI_SEMANTIC_POSITION:
+					if (chan != 2)
+						continue;
+					lp_build_intrinsic_unary(
+						base->gallivm->builder,
+						"llvm.R600.store.pixel.depth",
+						LLVMVoidTypeInContext(base->gallivm->context),
+						output);
+					break;
+				case TGSI_SEMANTIC_STENCIL:
+					if (chan != 1)
+						continue;
+					lp_build_intrinsic_unary(
+						base->gallivm->builder,
+						"llvm.R600.store.pixel.stencil",
+						LLVMVoidTypeInContext(base->gallivm->context),
+						output);
+					break;
+				}
+			}
 		}
 	}
+
+	if (!has_color && ctx->type == TGSI_PROCESSOR_FRAGMENT)
+		lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.pixel.dummy", LLVMVoidTypeInContext(base->gallivm->context), 0, 0);
 }
 
 static void llvm_emit_tex(
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index c56efda..24469f3 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -525,6 +525,21 @@ static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
 	return bytes_read;
 }
 
+static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx,
+	unsigned char * bytes, unsigned bytes_read)
+{
+	struct r600_bytecode_output output;
+	memset(&output, 0, sizeof(struct r600_bytecode_output));
+	uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
+	uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
+	if (ctx->bc->chip_class >= EVERGREEN)
+		eg_bytecode_export_read(&output, word0,word1);
+	else
+		r600_bytecode_export_read(&output, word0,word1);
+	r600_bytecode_add_output(ctx->bc, &output);
+	return bytes_read;
+}
+
 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
 				unsigned char * bytes,	unsigned num_bytes)
 {
@@ -559,6 +574,10 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
 			bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
 								bytes_read);
 			break;
+		case 5:
+            bytes_read = r600_export_from_byte_stream(ctx, bytes,
+                                bytes_read);
+            break;
 		default:
 			/* XXX: Error here */
 			break;
@@ -1351,7 +1370,10 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 		radeon_llvm_ctx.two_side = shader->two_side;
 		radeon_llvm_ctx.face_input = ctx.face_gpr;
 		radeon_llvm_ctx.r600_inputs = ctx.shader->input;
+		radeon_llvm_ctx.r600_outputs = ctx.shader->output;
+		radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1);
 		radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
+		radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN);
 		mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
 		if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
 			dump = 1;
@@ -1721,10 +1743,12 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 		}
 	}
 	/* add output to bytecode */
-	for (i = 0; i < noutput; i++) {
-		r = r600_bytecode_add_output(ctx.bc, &output[i]);
-		if (r)
-			goto out_err;
+	if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT) {
+		for (i = 0; i < noutput; i++) {
+			r = r600_bytecode_add_output(ctx.bc, &output[i]);
+			if (r)
+				goto out_err;
+		}
 	}
 	/* add program end */
 	if (ctx.bc->chip_class == CAYMAN)
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index 6118b11..61975c4 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -59,6 +59,9 @@ struct radeon_llvm_context {
 	unsigned face_input;
 	unsigned two_side;
 	struct r600_shader_io * r600_inputs;
+	struct r600_shader_io * r600_outputs;
+	unsigned color_buffer_count;
+	unsigned fs_color_all;
 
 	/*=== Front end configuration ===*/
 
-- 
1.7.11.7



More information about the mesa-dev mailing list