[Mesa-dev] [PATCH 2/2] r600g: tgsi to llvm emits store.swizzle intrinsic for vs/fs output

Vincent Lejeune vljn at ovi.com
Wed Dec 26 08:38:28 PST 2012


---
 src/gallium/drivers/r600/r600_llvm.c     | 194 ++++++++++++++++++++++---------
 src/gallium/drivers/r600/r600_shader.c   |   6 +-
 src/gallium/drivers/radeon/radeon_llvm.h |   1 +
 3 files changed, 144 insertions(+), 57 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index cae2131..203a205 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -9,6 +9,7 @@
 
 #include "r600.h"
 #include "r600_asm.h"
+#include "r600_sq.h"
 #include "r600_opcodes.h"
 #include "r600_shader.h"
 #include "r600_pipe.h"
@@ -238,7 +239,9 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
 	struct lp_build_context * base = &bld_base->base;
 	struct pipe_stream_output_info * so = ctx->stream_outputs;
 	unsigned i;
-	
+	unsigned next_pos = 60;
+	unsigned next_param = 0;
+
 	unsigned color_count = 0;
 	boolean has_color = false;
 
@@ -264,70 +267,151 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
 	/* Add the necessary export instructions */
 	for (i = 0; i < ctx->output_reg_count; i++) {
 		unsigned chan;
+		LLVMValueRef elements[4];
 		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-			LLVMValueRef output;
-			unsigned adjusted_reg_idx = i +
-					ctx->reserved_reg_count;
-
-			output = LLVMBuildLoad(base->gallivm->builder,
+			elements[chan] = LLVMBuildLoad(base->gallivm->builder,
 				ctx->soa.outputs[i][chan], "");
+		}
+		LLVMValueRef output = lp_build_gather_values(base->gallivm, elements, 4);
 
-			if (ctx->type == TGSI_PROCESSOR_VERTEX) {
-				LLVMValueRef reg_index = lp_build_const_int32(
-					base->gallivm,
-					radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
-				lp_build_intrinsic_binary(
+		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+			switch (ctx->r600_outputs[i].name) {
+			case TGSI_SEMANTIC_POSITION:
+			case TGSI_SEMANTIC_PSIZE: {
+				LLVMValueRef args[3];
+				args[0] = output;
+				args[1] = lp_build_const_int32(base->gallivm, next_pos++);
+				args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
+				build_intrinsic(
+					base->gallivm->builder,
+					"llvm.R600.store.swizzle",
+					LLVMVoidTypeInContext(base->gallivm->context),
+					args, 3, 0);
+				break;
+			}
+			case TGSI_SEMANTIC_CLIPVERTEX: {
+				LLVMValueRef args[3];
+				unsigned reg_index;
+				unsigned base_vector_chan;
+				LLVMValueRef adjusted_elements[4];
+				for (reg_index = 0; reg_index < 2; reg_index ++) {
+					for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+						LLVMValueRef offset[2] = {
+							LLVMConstInt(LLVMInt64TypeInContext(bld_base->base.gallivm->context), 0, false),
+							lp_build_const_int32(bld_base->base.gallivm, reg_index * 4 + chan)
+						};
+						LLVMValueRef const_ptr = LLVMGetNamedGlobal(bld_base->base.gallivm->module, "const1");
+						LLVMValueRef ptr = LLVMBuildGEP(bld_base->base.gallivm->builder, const_ptr, offset, 2, "");
+						LLVMValueRef base_vector = LLVMBuildLoad(bld_base->base.gallivm->builder, ptr, "");
+						args[0] = output;
+						args[1] = base_vector;
+						adjusted_elements[chan] = build_intrinsic(base->gallivm->builder,
+							"llvm.AMDGPU.dp4", bld_base->base.elem_type,
+							args, 2, LLVMReadNoneAttribute);
+					}
+					args[0] = lp_build_gather_values(base->gallivm,
+						adjusted_elements, 4);
+					args[1] = lp_build_const_int32(base->gallivm, next_pos++);
+					args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
+					build_intrinsic(
+						base->gallivm->builder,
+						"llvm.R600.store.swizzle",
+						LLVMVoidTypeInContext(base->gallivm->context),
+						args, 3, 0);
+				}
+				break;
+			}
+			case TGSI_SEMANTIC_CLIPDIST : {
+				LLVMValueRef args[3];
+				args[0] = output;
+				args[1] = lp_build_const_int32(base->gallivm, next_pos++);
+				args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
+				build_intrinsic(
+					base->gallivm->builder,
+					"llvm.R600.store.swizzle",
+					LLVMVoidTypeInContext(base->gallivm->context),
+					args, 3, 0);
+				args[1] = lp_build_const_int32(base->gallivm, next_param++);
+				args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
+				build_intrinsic(
+					base->gallivm->builder,
+					"llvm.R600.store.swizzle",
+					LLVMVoidTypeInContext(base->gallivm->context),
+					args, 3, 0);
+				break;
+			}
+			case TGSI_SEMANTIC_FOG: {
+				elements[0] = LLVMBuildLoad(base->gallivm->builder,
+					ctx->soa.outputs[i][0], "");
+				elements[1] = elements[2] = lp_build_const_float(base->gallivm, 0.0f);
+				elements[3] = lp_build_const_float(base->gallivm, 1.0f);
+
+				LLVMValueRef args[3];
+				args[0] = lp_build_gather_values(base->gallivm, elements, 4);
+				args[1] = lp_build_const_int32(base->gallivm, next_param++);
+				args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
+				build_intrinsic(
 					base->gallivm->builder,
-					"llvm.AMDGPU.store.output",
+					"llvm.R600.store.swizzle",
 					LLVMVoidTypeInContext(base->gallivm->context),
-					output, reg_index);
-			} else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
-				switch (ctx->r600_outputs[i].name) {
-				case TGSI_SEMANTIC_COLOR:
-					has_color = true;
-					if ( color_count/4 < ctx->color_buffer_count) {
-						if (ctx->fs_color_all) {
-							for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
-								LLVMValueRef reg_index = lp_build_const_int32(
-									base->gallivm,
-									(j * 4) + chan);
-								lp_build_intrinsic_binary(
-									base->gallivm->builder,
-									"llvm.R600.store.pixel.color",
-									LLVMVoidTypeInContext(base->gallivm->context),
-									output, reg_index);
-							}
-						} else {
-							LLVMValueRef reg_index = lp_build_const_int32(
-								base->gallivm,
-								(color_count++/4) * 4 + chan);
-							lp_build_intrinsic_binary(
+					args, 3, 0);
+				break;
+			}
+			default: {
+				LLVMValueRef args[3];
+				args[0] = output;
+				args[1] = lp_build_const_int32(base->gallivm, next_param++);
+				args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
+				build_intrinsic(
+					base->gallivm->builder,
+					"llvm.R600.store.swizzle",
+					LLVMVoidTypeInContext(base->gallivm->context),
+					args, 3, 0);
+				break;
+			}
+			}
+		} else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+			switch (ctx->r600_outputs[i].name) {
+			case TGSI_SEMANTIC_COLOR:
+				has_color = true;
+				if ( color_count < ctx->color_buffer_count) {
+					LLVMValueRef args[3];
+					args[0] = output;
+					if (ctx->fs_color_all) {
+						for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
+							args[1] = lp_build_const_int32(base->gallivm, j);
+							args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
+							build_intrinsic(
 								base->gallivm->builder,
-								"llvm.R600.store.pixel.color",
+								"llvm.R600.store.swizzle",
 								LLVMVoidTypeInContext(base->gallivm->context),
-								output, reg_index);
+								args, 3, 0);
 						}
+					} else {
+						args[1] = lp_build_const_int32(base->gallivm, color_count++);
+						args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
+						build_intrinsic(
+							base->gallivm->builder,
+							"llvm.R600.store.swizzle",
+							LLVMVoidTypeInContext(base->gallivm->context),
+							args, 3, 0);
 					}
-					break;
-				case TGSI_SEMANTIC_POSITION:
-					if (chan != 2)
-						continue;
-					lp_build_intrinsic_unary(
-						base->gallivm->builder,
-						"llvm.R600.store.pixel.depth",
-						LLVMVoidTypeInContext(base->gallivm->context),
-						output);
-					break;
-				case TGSI_SEMANTIC_STENCIL:
-					if (chan != 1)
-						continue;
-					lp_build_intrinsic_unary(
-						base->gallivm->builder,
-						"llvm.R600.store.pixel.stencil",
-						LLVMVoidTypeInContext(base->gallivm->context),
-						output);
-					break;
 				}
+				break;
+			case TGSI_SEMANTIC_POSITION:
+				lp_build_intrinsic_unary(
+					base->gallivm->builder,
+					"llvm.R600.store.pixel.depth",
+					LLVMVoidTypeInContext(base->gallivm->context),
+					LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][2], ""));
+				break;
+			case TGSI_SEMANTIC_STENCIL:
+				lp_build_intrinsic_unary(
+					base->gallivm->builder,
+					"llvm.R600.store.pixel.stencil",
+					LLVMVoidTypeInContext(base->gallivm->context),
+					LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][1], ""));
+				break;
 			}
 		}
 	}
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index f8eadd3..17b144a 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1384,6 +1384,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 		radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
 		radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN);
 		radeon_llvm_ctx.stream_outputs = &so;
+		radeon_llvm_ctx.clip_vertex = ctx.cv_output;
 		mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
 		if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
 			dump = 1;
@@ -1524,7 +1525,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 				alu.dst.write = (j == ochan);
 				if (j == 3)
 					alu.last = 1;
-				r = r600_bytecode_add_alu(ctx.bc, &alu);
+				if (!use_llvm)
+					r = r600_bytecode_add_alu(ctx.bc, &alu);
 				if (r)
 					return r;
 			}
@@ -1753,7 +1755,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 		}
 	}
 	/* add output to bytecode */
-	if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT) {
+	if (!use_llvm) {
 		for (i = 0; i < noutput; i++) {
 			r = r600_bytecode_add_output(ctx.bc, &output[i]);
 			if (r)
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index c3d691a..5bce368 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -58,6 +58,7 @@ struct radeon_llvm_context {
 	unsigned type;
 	unsigned face_input;
 	unsigned two_side;
+	unsigned clip_vertex;
 	struct r600_shader_io * r600_inputs;
 	struct r600_shader_io * r600_outputs;
 	struct pipe_stream_output_info *stream_outputs;
-- 
1.8.0.1



More information about the mesa-dev mailing list