[Mesa-dev] [PATCH 4/7] gallium/radeon: merge branch and loop flow control stacks

Thu Sep 29 13:15:54 UTC 2016

From: Nicolai Hähnle <nicolai.haehnle at amd.com>

---
 src/gallium/drivers/radeon/radeon_llvm.h           |  20 +--
 .../drivers/radeon/radeon_setup_tgsi_llvm.c        | 140 +++++++++++----------
 2 files changed, 78 insertions(+), 82 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index 58193db..2f9572a 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -33,29 +33,21 @@
 #include "tgsi/tgsi_parse.h"
 
 #define RADEON_LLVM_MAX_INPUT_SLOTS 32
 #define RADEON_LLVM_MAX_INPUTS 32 * 4
 #define RADEON_LLVM_MAX_OUTPUTS 32 * 4
 
 #define RADEON_LLVM_INITIAL_CF_DEPTH 4
 
 #define RADEON_LLVM_MAX_SYSTEM_VALUES 4
 
-struct radeon_llvm_branch {
-	LLVMBasicBlockRef next_block;
-	unsigned has_else;
-};
-
-struct radeon_llvm_loop {
-	LLVMBasicBlockRef loop_block;
-	LLVMBasicBlockRef endloop_block;
-};
+struct radeon_llvm_flow;
 
 struct radeon_llvm_context {
 	struct lp_build_tgsi_soa_context soa;
 
 	/*=== Front end configuration ===*/
 
 	/* Instructions that are not described by any of the TGSI opcodes. */
 
 	/** This function is responsible for initilizing the inputs array and will be
 	  * called once for each input declared in the TGSI shader.
@@ -83,27 +75,23 @@ struct radeon_llvm_context {
 	/** This pointer is used to contain the temporary values.
 	  * The amount of temporary used in tgsi can't be bound to a max value and
 	  * thus we must allocate this array at runtime.
 	  */
 	LLVMValueRef *temps;
 	unsigned temps_count;
 	LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES];
 
 	/*=== Private Members ===*/
 
-	struct radeon_llvm_branch *branch;
-	struct radeon_llvm_loop *loop;
-
-	unsigned branch_depth;
-	unsigned branch_depth_max;
-	unsigned loop_depth;
-	unsigned loop_depth_max;
+	struct radeon_llvm_flow *flow;
+	unsigned flow_depth;
+	unsigned flow_depth_max;
 
 	struct tgsi_array_info *temp_arrays;
 	LLVMValueRef *temp_array_allocas;
 
 	LLVMValueRef undef_alloca;
 
 	LLVMValueRef main_fn;
 	LLVMTypeRef return_type;
 
 	unsigned fpmath_md_kind;
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 6cae858..2f100bd 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -35,20 +35,28 @@
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_parse.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_debug.h"
 
 #include <stdio.h>
 #include <llvm-c/Core.h>
 #include <llvm-c/Transforms/Scalar.h>
 
+/* Data for if/else/endif and bgnloop/endloop control flow structures.
+ */
+struct radeon_llvm_flow {
+	/* Loop exit or next part of if/else/endif. */
+	LLVMBasicBlockRef next_block;
+	LLVMBasicBlockRef loop_entry_block;
+};
+
 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
 			  enum tgsi_opcode_type type)
 {
 	LLVMContextRef ctx = bld_base->base.gallivm->context;
 
 	switch (type) {
 	case TGSI_TYPE_UNSIGNED:
 	case TGSI_TYPE_SIGNED:
 		return LLVMInt32TypeInContext(ctx);
 	case TGSI_TYPE_UNSIGNED64:
@@ -98,29 +106,57 @@ LLVMValueRef radeon_llvm_bound_index(struct radeon_llvm_context *ctx,
 		 * In practice, LLVM generates worse code (at the time of
 		 * writing), because its value tracking is not strong enough.
 		 */
 		cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
 		index = LLVMBuildSelect(builder, cc, index, c_max, "");
 	}
 
 	return index;
 }
 
-static struct radeon_llvm_loop *get_current_loop(struct radeon_llvm_context *ctx)
+static struct radeon_llvm_flow *
+get_current_flow(struct radeon_llvm_context *ctx)
+{
+	if (ctx->flow_depth > 0)
+		return &ctx->flow[ctx->flow_depth - 1];
+	return NULL;
+}
+
+static struct radeon_llvm_flow *
+get_innermost_loop(struct radeon_llvm_context *ctx)
 {
-	return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL;
+	for (unsigned i = ctx->flow_depth; i > 0; --i) {
+		if (ctx->flow[i - 1].loop_entry_block)
+			return &ctx->flow[i - 1];
+	}
+	return NULL;
 }
 
-static struct radeon_llvm_branch *get_current_branch(struct radeon_llvm_context *ctx)
+static struct radeon_llvm_flow *
+push_flow(struct radeon_llvm_context *ctx)
 {
-	return ctx->branch_depth > 0 ?
-			ctx->branch + (ctx->branch_depth - 1) : NULL;
+	struct radeon_llvm_flow *flow;
+
+	if (ctx->flow_depth >= ctx->flow_depth_max) {
+		unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
+		ctx->flow = REALLOC(ctx->flow,
+				    ctx->flow_depth_max * sizeof(*ctx->flow),
+				    new_max * sizeof(*ctx->flow));
+		ctx->flow_depth_max = new_max;
+	}
+
+	flow = &ctx->flow[ctx->flow_depth];
+	ctx->flow_depth++;
+
+	flow->next_block = NULL;
+	flow->loop_entry_block = NULL;
+	return flow;
 }
 
 unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
 {
 	return (index * 4) + chan;
 }
 
 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
 				 LLVMValueRef value,
 				 unsigned swizzle_x,
@@ -816,149 +852,124 @@ static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target
 	if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
 		 LLVMBuildBr(builder, target);
 }
 
 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
 			 struct lp_build_tgsi_context *bld_base,
 			 struct lp_build_emit_data *emit_data)
 {
 	struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
-	LLVMBasicBlockRef loop_block;
-	LLVMBasicBlockRef endloop_block;
-	endloop_block = LLVMAppendBasicBlockInContext(gallivm->context,
+	struct radeon_llvm_flow *flow = push_flow(ctx);
+	flow->next_block = LLVMAppendBasicBlockInContext(gallivm->context,
 						ctx->main_fn, "ENDLOOP");
-	loop_block = LLVMInsertBasicBlockInContext(gallivm->context,
-						endloop_block, "LOOP");
-	set_basicblock_name(loop_block, "loop", bld_base->pc);
-	LLVMBuildBr(gallivm->builder, loop_block);
-	LLVMPositionBuilderAtEnd(gallivm->builder, loop_block);
-
-	if (++ctx->loop_depth > ctx->loop_depth_max) {
-		unsigned new_max = ctx->loop_depth_max << 1;
-
-		if (!new_max)
-			new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
-
-		ctx->loop = REALLOC(ctx->loop, ctx->loop_depth_max *
-				    sizeof(ctx->loop[0]),
-				    new_max * sizeof(ctx->loop[0]));
-		ctx->loop_depth_max = new_max;
-	}
-
-	ctx->loop[ctx->loop_depth - 1].loop_block = loop_block;
-	ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block;
+	flow->loop_entry_block = LLVMInsertBasicBlockInContext(gallivm->context,
+						flow->next_block, "LOOP");
+	set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
+	LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
+	LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
 }
 
 static void brk_emit(const struct lp_build_tgsi_action *action,
 		     struct lp_build_tgsi_context *bld_base,
 		     struct lp_build_emit_data *emit_data)
 {
 	struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
-	struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
+	struct radeon_llvm_flow *flow = get_innermost_loop(ctx);
 
-	LLVMBuildBr(gallivm->builder, current_loop->endloop_block);
+	LLVMBuildBr(gallivm->builder, flow->next_block);
 }
 
 static void cont_emit(const struct lp_build_tgsi_action *action,
 		      struct lp_build_tgsi_context *bld_base,
 		      struct lp_build_emit_data *emit_data)
 {
 	struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
-	struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
+	struct radeon_llvm_flow *flow = get_innermost_loop(ctx);
 
-	LLVMBuildBr(gallivm->builder, current_loop->loop_block);
+	LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
 }
 
 static void else_emit(const struct lp_build_tgsi_action *action,
 		      struct lp_build_tgsi_context *bld_base,
 		      struct lp_build_emit_data *emit_data)
 {
 	struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
-	struct radeon_llvm_branch *current_branch = get_current_branch(ctx);
+	struct radeon_llvm_flow *current_branch = get_current_flow(ctx);
 	LLVMBasicBlockRef endif_block;
 
+	assert(!current_branch->loop_entry_block);
+
 	endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
 						    ctx->main_fn, "ENDIF");
 	emit_default_branch(gallivm->builder, endif_block);
 
-	current_branch->has_else = 1;
 	LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
 	set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
 
 	current_branch->next_block = endif_block;
 }
 
 static void endif_emit(const struct lp_build_tgsi_action *action,
 		       struct lp_build_tgsi_context *bld_base,
 		       struct lp_build_emit_data *emit_data)
 {
 	struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
-	struct radeon_llvm_branch *current_branch = get_current_branch(ctx);
+	struct radeon_llvm_flow *current_branch = get_current_flow(ctx);
+
+	assert(!current_branch->loop_entry_block);
 
 	emit_default_branch(gallivm->builder, current_branch->next_block);
 	LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
 	set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
 
-	ctx->branch_depth--;
+	ctx->flow_depth--;
 }
 
 static void endloop_emit(const struct lp_build_tgsi_action *action,
 			 struct lp_build_tgsi_context *bld_base,
 			 struct lp_build_emit_data *emit_data)
 {
 	struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
-	struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
+	struct radeon_llvm_flow *current_loop = get_current_flow(ctx);
+
+	assert(current_loop->loop_entry_block);
 
-	emit_default_branch(gallivm->builder, current_loop->loop_block);
+	emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
 
-	LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
-	set_basicblock_name(current_loop->endloop_block, "endloop", bld_base->pc);
-	ctx->loop_depth--;
+	LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
+	set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
+	ctx->flow_depth--;
 }
 
 static void if_cond_emit(const struct lp_build_tgsi_action *action,
 			 struct lp_build_tgsi_context *bld_base,
 			 struct lp_build_emit_data *emit_data,
 			 LLVMValueRef cond)
 {
 	struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
-	LLVMBasicBlockRef if_block, else_block;
+	struct radeon_llvm_flow *flow = push_flow(ctx);
+	LLVMBasicBlockRef if_block;
 
-	else_block = LLVMAppendBasicBlockInContext(gallivm->context,
+	flow->next_block = LLVMAppendBasicBlockInContext(gallivm->context,
 						   ctx->main_fn, "ELSE");
 	if_block = LLVMInsertBasicBlockInContext(gallivm->context,
-						else_block, "IF");
+						flow->next_block, "IF");
 	set_basicblock_name(if_block, "if", bld_base->pc);
-	LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block);
+	LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
 	LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
-
-	if (++ctx->branch_depth > ctx->branch_depth_max) {
-		unsigned new_max = ctx->branch_depth_max << 1;
-
-		if (!new_max)
-			new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
-
-		ctx->branch = REALLOC(ctx->branch, ctx->branch_depth_max *
-				      sizeof(ctx->branch[0]),
-				      new_max * sizeof(ctx->branch[0]));
-		ctx->branch_depth_max = new_max;
-	}
-
-	ctx->branch[ctx->branch_depth - 1].next_block = else_block;
-	ctx->branch[ctx->branch_depth - 1].has_else = 0;
 }
 
 static void if_emit(const struct lp_build_tgsi_action *action,
 		    struct lp_build_tgsi_context *bld_base,
 		    struct lp_build_emit_data *emit_data)
 {
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
 	LLVMValueRef cond;
 
 	cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
@@ -2126,17 +2137,14 @@ void radeon_llvm_dispose(struct radeon_llvm_context *ctx)
 {
 	LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
 	LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
 	FREE(ctx->temp_arrays);
 	ctx->temp_arrays = NULL;
 	FREE(ctx->temp_array_allocas);
 	ctx->temp_array_allocas = NULL;
 	FREE(ctx->temps);
 	ctx->temps = NULL;
 	ctx->temps_count = 0;
-	FREE(ctx->loop);
-	ctx->loop = NULL;
-	ctx->loop_depth_max = 0;
-	FREE(ctx->branch);
-	ctx->branch = NULL;
-	ctx->branch_depth_max = 0;
+	FREE(ctx->flow);
+	ctx->flow = NULL;
+	ctx->flow_depth_max = 0;
 }
-- 
2.7.4