Mesa (master): radeon/llvm: rework input fetch and output store

Christian König deathsimple at kemper.freedesktop.org
Tue Mar 19 14:16:34 UTC 2013


Module: Mesa
Branch: master
Commit: c05483fc00a0630aba86ad9de68d949bbb68f8c9
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c05483fc00a0630aba86ad9de68d949bbb68f8c9

Author: Christian König <christian.koenig at amd.com>
Date:   Thu Mar  7 11:01:07 2013 +0100

radeon/llvm: rework input fetch and output store

Cleanup the code and implement indirect addressing.

Signed-off-by: Christian König <christian.koenig at amd.com>
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>

---

 src/gallium/drivers/radeon/radeon_llvm.h           |   32 ++-
 .../drivers/radeon/radeon_setup_tgsi_llvm.c        |  241 +++++++++++---------
 2 files changed, 151 insertions(+), 122 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index 4c01d68..1d4bd45 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -35,6 +35,7 @@
 #define RADEON_LLVM_MAX_OUTPUTS 32 * 4
 #define RADEON_LLVM_MAX_BRANCH_DEPTH 16
 #define RADEON_LLVM_MAX_LOOP_DEPTH 16
+#define RADEON_LLVM_MAX_ARRAYS 16
 
 #define RADEON_LLVM_MAX_SYSTEM_VALUES 4
 
@@ -117,35 +118,40 @@ struct radeon_llvm_context {
 	unsigned branch_depth;
 	unsigned loop_depth;
 
+	struct tgsi_declaration_range arrays[RADEON_LLVM_MAX_ARRAYS];
+	unsigned num_arrays;
 
 	LLVMValueRef main_fn;
 
 	struct gallivm_state gallivm;
 };
 
-static inline LLVMValueRef bitcast(
+static inline LLVMTypeRef tgsi2llvmtype(
 		struct lp_build_tgsi_context * bld_base,
-		enum tgsi_opcode_type type,
-		LLVMValueRef value
-)
+		enum tgsi_opcode_type type)
 {
-	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 	LLVMContextRef ctx = bld_base->base.gallivm->context;
-	LLVMTypeRef dst_type;
 
 	switch (type) {
 	case TGSI_TYPE_UNSIGNED:
 	case TGSI_TYPE_SIGNED:
-		dst_type = LLVMInt32TypeInContext(ctx);
-		break;
+		return LLVMInt32TypeInContext(ctx);
 	case TGSI_TYPE_UNTYPED:
 	case TGSI_TYPE_FLOAT:
-		dst_type = LLVMFloatTypeInContext(ctx);
-		break;
-	default:
-		dst_type = 0;
-		break;
+		return LLVMFloatTypeInContext(ctx);
+	default: break;
 	}
+	return 0;
+}
+
+static inline LLVMValueRef bitcast(
+		struct lp_build_tgsi_context * bld_base,
+		enum tgsi_opcode_type type,
+		LLVMValueRef value
+)
+{
+	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+	LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
 
 	if (dst_type)
 		return LLVMBuildBitCast(builder, value, dst_type, "");
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index f017b87..e2a6bee 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -80,136 +80,124 @@ static LLVMValueRef emit_swizzle(
 		LLVMConstVector(swizzles, 4), "");
 }
 
+static struct tgsi_declaration_range
+get_array_range(struct lp_build_tgsi_context *bld_base,
+		unsigned File, const struct tgsi_ind_register *reg)
+{
+	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+	if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
+            reg->ArrayID > RADEON_LLVM_MAX_ARRAYS) {
+		struct tgsi_declaration_range range;
+		range.First = 0;
+		range.Last = bld_base->info->file_max[File];
+		return range;
+	}
+
+	return ctx->arrays[reg->ArrayID - 1];
+}
+
 static LLVMValueRef
 emit_array_index(
 	struct lp_build_tgsi_soa_context *bld,
-	const struct tgsi_full_src_register *reg,
-	unsigned swizzle)
+	const struct tgsi_ind_register *reg,
+	unsigned offset)
 {
 	struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
 
-	LLVMValueRef addr = LLVMBuildLoad(gallivm->builder,
-	bld->addr[reg->Indirect.Index][swizzle], "");
-	LLVMValueRef offset = lp_build_const_int32(gallivm, reg->Register.Index);
-	LLVMValueRef hw_index = LLVMBuildAdd(gallivm->builder, addr, offset, "");
-	LLVMValueRef soa_index = LLVMBuildMul(gallivm->builder, hw_index,
-	lp_build_const_int32(gallivm, 4), "");
-	LLVMValueRef array_index = LLVMBuildAdd(gallivm->builder, soa_index,
-	lp_build_const_int32(gallivm, swizzle), "");
-
-	return array_index;
+	LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], "");
+	return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
 }
 
 static LLVMValueRef
-emit_fetch_immediate(
+emit_fetch(
 	struct lp_build_tgsi_context *bld_base,
 	const struct tgsi_full_src_register *reg,
 	enum tgsi_opcode_type type,
-	unsigned swizzle)
-{
-	LLVMTypeRef ctype;
-	LLVMContextRef ctx = bld_base->base.gallivm->context;
-
-	switch (type) {
-	case TGSI_TYPE_UNSIGNED:
-	case TGSI_TYPE_SIGNED:
-		ctype = LLVMInt32TypeInContext(ctx);
-		break;
-	case TGSI_TYPE_UNTYPED:
-	case TGSI_TYPE_FLOAT:
-		ctype = LLVMFloatTypeInContext(ctx);
-		break;
-	default:
-		ctype = 0;
-		break;
-	}
-
-	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
-	if (swizzle == ~0) {
-		LLVMValueRef values[TGSI_NUM_CHANNELS] = {};
-		unsigned chan;
-		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-                   values[chan] = LLVMConstBitCast(bld->immediates[reg->Register.Index][chan], ctype);
-		}
-		return lp_build_gather_values(bld_base->base.gallivm, values,
-						TGSI_NUM_CHANNELS);
-	} else {
-		return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
-	}
-}
+	unsigned swizzle);
 
 static LLVMValueRef
-emit_fetch_input(
+emit_array_fetch(
 	struct lp_build_tgsi_context *bld_base,
-	const struct tgsi_full_src_register *reg,
-	enum tgsi_opcode_type type,
+	unsigned File, enum tgsi_opcode_type type,
+	struct tgsi_declaration_range range,
 	unsigned swizzle)
 {
-	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
-	if (swizzle == ~0) {
-		LLVMValueRef values[TGSI_NUM_CHANNELS] = {};
-		unsigned chan;
-		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-			values[chan] = ctx->inputs[radeon_llvm_reg_index_soa(
-						reg->Register.Index, chan)];
-		}
-		return lp_build_gather_values(bld_base->base.gallivm, values,
-						TGSI_NUM_CHANNELS);
-	} else {
-		return bitcast(bld_base, type, ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)]);
+	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+	struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
+	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+
+	unsigned i, size = range.Last - range.First + 1;
+	LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
+	LLVMValueRef result = LLVMGetUndef(vec);
+
+	struct tgsi_full_src_register tmp_reg = {};
+	tmp_reg.Register.File = File;
+
+	for (i = 0; i < size; ++i) {
+		tmp_reg.Register.Index = i + range.First;
+		LLVMValueRef temp = emit_fetch(bld_base, &tmp_reg, type, swizzle);
+		result = LLVMBuildInsertElement(builder, result, temp,
+			lp_build_const_int32(gallivm, i), "");
 	}
+	return result;
 }
 
 static LLVMValueRef
-emit_fetch_temporary(
+emit_fetch(
 	struct lp_build_tgsi_context *bld_base,
 	const struct tgsi_full_src_register *reg,
 	enum tgsi_opcode_type type,
 	unsigned swizzle)
 {
+	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
 	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+	LLVMValueRef result, ptr;
+
 	if (swizzle == ~0) {
-		LLVMValueRef values[TGSI_NUM_CHANNELS] = {};
+		LLVMValueRef values[TGSI_NUM_CHANNELS];
 		unsigned chan;
 		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-			values[chan] = emit_fetch_temporary(bld_base, reg, type, chan);
+			values[chan] = emit_fetch(bld_base, reg, type, chan);
 		}
 		return lp_build_gather_values(bld_base->base.gallivm, values,
-						TGSI_NUM_CHANNELS);
+					      TGSI_NUM_CHANNELS);
 	}
 
 	if (reg->Register.Indirect) {
-		LLVMValueRef array_index = emit_array_index(bld, reg, swizzle);
-		LLVMValueRef ptr = LLVMBuildGEP(builder, bld->temps_array, &array_index,
-						1, "");
-		return LLVMBuildLoad(builder, ptr, "");
-	} else {
-		LLVMValueRef temp_ptr;
-		temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
-		return bitcast(bld_base,type,LLVMBuildLoad(builder, temp_ptr, ""));
+		struct tgsi_declaration_range range = get_array_range(bld_base,
+			reg->Register.File, &reg->Indirect);
+		return LLVMBuildExtractElement(builder,
+			emit_array_fetch(bld_base, reg->Register.File, type, range, swizzle),
+			emit_array_index(bld, &reg->Indirect, reg->Register.Index - range.First),
+			"");
 	}
-}
 
-static LLVMValueRef
-emit_fetch_output(
-	struct lp_build_tgsi_context *bld_base,
-	const struct tgsi_full_src_register *reg,
-	enum tgsi_opcode_type type,
-	unsigned swizzle)
-{
-	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
-	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
-	 if (reg->Register.Indirect) {
-		LLVMValueRef array_index = emit_array_index(bld, reg, swizzle);
-		LLVMValueRef ptr = LLVMBuildGEP(builder, bld->outputs_array, &array_index,
-						1, "");
-		return LLVMBuildLoad(builder, ptr, "");
-	} else {
-		LLVMValueRef temp_ptr;
-		temp_ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
-		return LLVMBuildLoad(builder, temp_ptr, "");
-	 }
+	switch(reg->Register.File) {
+	case TGSI_FILE_IMMEDIATE: {
+		LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
+		return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
+	}
+
+	case TGSI_FILE_INPUT:
+		result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
+		break;
+
+	case TGSI_FILE_TEMPORARY:
+		ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
+		result = LLVMBuildLoad(builder, ptr, "");
+		break;
+
+	case TGSI_FILE_OUTPUT:
+		ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
+		result = LLVMBuildLoad(builder, ptr, "");
+		break;
+
+	default:
+		return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
+	}
+
+	return bitcast(bld_base, type, result);
 }
 
 static void emit_declaration(
@@ -233,6 +221,8 @@ static void emit_declaration(
 	}
 
 	case TGSI_FILE_TEMPORARY:
+		if (decl->Declaration.Array && decl->Array.ArrayID <= RADEON_LLVM_MAX_ARRAYS)
+			ctx->arrays[decl->Array.ArrayID - 1] = decl->Range;
 		lp_emit_declaration_soa(bld_base, decl);
 		break;
 
@@ -291,6 +281,7 @@ emit_store(
 	LLVMValueRef temp_ptr;
 	unsigned chan, chan_index;
 	boolean is_vec_store = FALSE;
+
 	if (dst[0]) {
 		LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
 		is_vec_store = (k == LLVMVectorTypeKind);
@@ -333,26 +324,56 @@ emit_store(
 						&clamp_emit_data);
 		}
 
-		switch(reg->Register.File) {
-		case TGSI_FILE_ADDRESS:
+		if (reg->Register.File == TGSI_FILE_ADDRESS) {
 			temp_ptr = bld->addr[reg->Register.Index][chan_index];
 			LLVMBuildStore(builder, value, temp_ptr);
 			continue;
-		case TGSI_FILE_OUTPUT:
-			temp_ptr = bld->outputs[reg->Register.Index][chan_index];
-			break;
+		}
+	
+		value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
 
-		case TGSI_FILE_TEMPORARY:
-			temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
-			break;
+		if (reg->Register.Indirect) {
+			struct tgsi_declaration_range range = get_array_range(bld_base,
+				reg->Register.File, &reg->Indirect);
+
+        		unsigned i, size = range.Last - range.First + 1;
+			LLVMValueRef array = LLVMBuildInsertElement(builder,
+				emit_array_fetch(bld_base, reg->Register.File, TGSI_TYPE_FLOAT, range, chan_index),
+				value,  emit_array_index(bld, &reg->Indirect, reg->Register.Index - range.First), "");
+
+        		for (i = 0; i < size; ++i) {
+				switch(reg->Register.File) {
+				case TGSI_FILE_OUTPUT:
+					temp_ptr = bld->outputs[i + range.First][chan_index];
+					break;
+
+				case TGSI_FILE_TEMPORARY:
+					temp_ptr = lp_get_temp_ptr_soa(bld, i + range.First, chan_index);
+					break;
+
+				default:
+					return;
+				}
+				value = LLVMBuildExtractElement(builder, array, 
+					lp_build_const_int32(gallivm, i), "");
+				LLVMBuildStore(builder, value, temp_ptr);
+			}
 
-		default:
-			return;
-		}
+		} else {
+			switch(reg->Register.File) {
+			case TGSI_FILE_OUTPUT:
+				temp_ptr = bld->outputs[reg->Register.Index][chan_index];
+				break;
 
-		value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
+			case TGSI_FILE_TEMPORARY:
+				temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
+				break;
 
-		LLVMBuildStore(builder, value, temp_ptr);
+			default:
+				return;
+			}
+			LLVMBuildStore(builder, value, temp_ptr);
+		}
 	}
 }
 
@@ -1128,14 +1149,16 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
 	bld_base->emit_declaration = emit_declaration;
 	bld_base->emit_immediate = emit_immediate;
 
-	bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
-	bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
-	bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
-	bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_output;
+	bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch;
+	bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch;
+	bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch;
+	bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch;
 
 	/* Allocate outputs */
 	ctx->soa.outputs = ctx->outputs;
 
+	ctx->num_arrays = 0;
+
 	/* XXX: Is there a better way to initialize all this ? */
 
 	lp_set_default_actions(bld_base);




More information about the mesa-commit mailing list