[Mesa-dev] [PATCH 4/7] gallium/radeon: merge branch and loop flow control stacks
Nicolai Hähnle
nhaehnle at gmail.com
Thu Sep 29 13:15:54 UTC 2016
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
src/gallium/drivers/radeon/radeon_llvm.h | 20 +--
.../drivers/radeon/radeon_setup_tgsi_llvm.c | 140 +++++++++++----------
2 files changed, 78 insertions(+), 82 deletions(-)
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index 58193db..2f9572a 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -33,29 +33,21 @@
#include "tgsi/tgsi_parse.h"
#define RADEON_LLVM_MAX_INPUT_SLOTS 32
#define RADEON_LLVM_MAX_INPUTS 32 * 4
#define RADEON_LLVM_MAX_OUTPUTS 32 * 4
#define RADEON_LLVM_INITIAL_CF_DEPTH 4
#define RADEON_LLVM_MAX_SYSTEM_VALUES 4
-struct radeon_llvm_branch {
- LLVMBasicBlockRef next_block;
- unsigned has_else;
-};
-
-struct radeon_llvm_loop {
- LLVMBasicBlockRef loop_block;
- LLVMBasicBlockRef endloop_block;
-};
+struct radeon_llvm_flow;
struct radeon_llvm_context {
struct lp_build_tgsi_soa_context soa;
/*=== Front end configuration ===*/
/* Instructions that are not described by any of the TGSI opcodes. */
/** This function is responsible for initilizing the inputs array and will be
* called once for each input declared in the TGSI shader.
@@ -83,27 +75,23 @@ struct radeon_llvm_context {
/** This pointer is used to contain the temporary values.
* The amount of temporary used in tgsi can't be bound to a max value and
* thus we must allocate this array at runtime.
*/
LLVMValueRef *temps;
unsigned temps_count;
LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES];
/*=== Private Members ===*/
- struct radeon_llvm_branch *branch;
- struct radeon_llvm_loop *loop;
-
- unsigned branch_depth;
- unsigned branch_depth_max;
- unsigned loop_depth;
- unsigned loop_depth_max;
+ struct radeon_llvm_flow *flow;
+ unsigned flow_depth;
+ unsigned flow_depth_max;
struct tgsi_array_info *temp_arrays;
LLVMValueRef *temp_array_allocas;
LLVMValueRef undef_alloca;
LLVMValueRef main_fn;
LLVMTypeRef return_type;
unsigned fpmath_md_kind;
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 6cae858..2f100bd 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -35,20 +35,28 @@
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_debug.h"
#include <stdio.h>
#include <llvm-c/Core.h>
#include <llvm-c/Transforms/Scalar.h>
+/* Data for if/else/endif and bgnloop/endloop control flow structures.
+ */
+struct radeon_llvm_flow {
+ /* Loop exit or next part of if/else/endif. */
+ LLVMBasicBlockRef next_block;
+ LLVMBasicBlockRef loop_entry_block;
+};
+
LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
enum tgsi_opcode_type type)
{
LLVMContextRef ctx = bld_base->base.gallivm->context;
switch (type) {
case TGSI_TYPE_UNSIGNED:
case TGSI_TYPE_SIGNED:
return LLVMInt32TypeInContext(ctx);
case TGSI_TYPE_UNSIGNED64:
@@ -98,29 +106,57 @@ LLVMValueRef radeon_llvm_bound_index(struct radeon_llvm_context *ctx,
* In practice, LLVM generates worse code (at the time of
* writing), because its value tracking is not strong enough.
*/
cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
index = LLVMBuildSelect(builder, cc, index, c_max, "");
}
return index;
}
-static struct radeon_llvm_loop *get_current_loop(struct radeon_llvm_context *ctx)
+static struct radeon_llvm_flow *
+get_current_flow(struct radeon_llvm_context *ctx)
+{
+ if (ctx->flow_depth > 0)
+ return &ctx->flow[ctx->flow_depth - 1];
+ return NULL;
+}
+
+static struct radeon_llvm_flow *
+get_innermost_loop(struct radeon_llvm_context *ctx)
{
- return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL;
+ for (unsigned i = ctx->flow_depth; i > 0; --i) {
+ if (ctx->flow[i - 1].loop_entry_block)
+ return &ctx->flow[i - 1];
+ }
+ return NULL;
}
-static struct radeon_llvm_branch *get_current_branch(struct radeon_llvm_context *ctx)
+static struct radeon_llvm_flow *
+push_flow(struct radeon_llvm_context *ctx)
{
- return ctx->branch_depth > 0 ?
- ctx->branch + (ctx->branch_depth - 1) : NULL;
+ struct radeon_llvm_flow *flow;
+
+ if (ctx->flow_depth >= ctx->flow_depth_max) {
+ unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
+ ctx->flow = REALLOC(ctx->flow,
+ ctx->flow_depth_max * sizeof(*ctx->flow),
+ new_max * sizeof(*ctx->flow));
+ ctx->flow_depth_max = new_max;
+ }
+
+ flow = &ctx->flow[ctx->flow_depth];
+ ctx->flow_depth++;
+
+ flow->next_block = NULL;
+ flow->loop_entry_block = NULL;
+ return flow;
}
unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
{
return (index * 4) + chan;
}
static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
LLVMValueRef value,
unsigned swizzle_x,
@@ -816,149 +852,124 @@ static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target
if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
LLVMBuildBr(builder, target);
}
static void bgnloop_emit(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- LLVMBasicBlockRef loop_block;
- LLVMBasicBlockRef endloop_block;
- endloop_block = LLVMAppendBasicBlockInContext(gallivm->context,
+ struct radeon_llvm_flow *flow = push_flow(ctx);
+ flow->next_block = LLVMAppendBasicBlockInContext(gallivm->context,
ctx->main_fn, "ENDLOOP");
- loop_block = LLVMInsertBasicBlockInContext(gallivm->context,
- endloop_block, "LOOP");
- set_basicblock_name(loop_block, "loop", bld_base->pc);
- LLVMBuildBr(gallivm->builder, loop_block);
- LLVMPositionBuilderAtEnd(gallivm->builder, loop_block);
-
- if (++ctx->loop_depth > ctx->loop_depth_max) {
- unsigned new_max = ctx->loop_depth_max << 1;
-
- if (!new_max)
- new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
-
- ctx->loop = REALLOC(ctx->loop, ctx->loop_depth_max *
- sizeof(ctx->loop[0]),
- new_max * sizeof(ctx->loop[0]));
- ctx->loop_depth_max = new_max;
- }
-
- ctx->loop[ctx->loop_depth - 1].loop_block = loop_block;
- ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block;
+ flow->loop_entry_block = LLVMInsertBasicBlockInContext(gallivm->context,
+ flow->next_block, "LOOP");
+ set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
+ LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
+ LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
}
static void brk_emit(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
+ struct radeon_llvm_flow *flow = get_innermost_loop(ctx);
- LLVMBuildBr(gallivm->builder, current_loop->endloop_block);
+ LLVMBuildBr(gallivm->builder, flow->next_block);
}
static void cont_emit(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
+ struct radeon_llvm_flow *flow = get_innermost_loop(ctx);
- LLVMBuildBr(gallivm->builder, current_loop->loop_block);
+ LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
}
static void else_emit(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- struct radeon_llvm_branch *current_branch = get_current_branch(ctx);
+ struct radeon_llvm_flow *current_branch = get_current_flow(ctx);
LLVMBasicBlockRef endif_block;
+ assert(!current_branch->loop_entry_block);
+
endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
ctx->main_fn, "ENDIF");
emit_default_branch(gallivm->builder, endif_block);
- current_branch->has_else = 1;
LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
current_branch->next_block = endif_block;
}
static void endif_emit(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- struct radeon_llvm_branch *current_branch = get_current_branch(ctx);
+ struct radeon_llvm_flow *current_branch = get_current_flow(ctx);
+
+ assert(!current_branch->loop_entry_block);
emit_default_branch(gallivm->builder, current_branch->next_block);
LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
- ctx->branch_depth--;
+ ctx->flow_depth--;
}
static void endloop_emit(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
+ struct radeon_llvm_flow *current_loop = get_current_flow(ctx);
+
+ assert(current_loop->loop_entry_block);
- emit_default_branch(gallivm->builder, current_loop->loop_block);
+ emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
- LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
- set_basicblock_name(current_loop->endloop_block, "endloop", bld_base->pc);
- ctx->loop_depth--;
+ LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
+ set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
+ ctx->flow_depth--;
}
static void if_cond_emit(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data,
LLVMValueRef cond)
{
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- LLVMBasicBlockRef if_block, else_block;
+ struct radeon_llvm_flow *flow = push_flow(ctx);
+ LLVMBasicBlockRef if_block;
- else_block = LLVMAppendBasicBlockInContext(gallivm->context,
+ flow->next_block = LLVMAppendBasicBlockInContext(gallivm->context,
ctx->main_fn, "ELSE");
if_block = LLVMInsertBasicBlockInContext(gallivm->context,
- else_block, "IF");
+ flow->next_block, "IF");
set_basicblock_name(if_block, "if", bld_base->pc);
- LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block);
+ LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
-
- if (++ctx->branch_depth > ctx->branch_depth_max) {
- unsigned new_max = ctx->branch_depth_max << 1;
-
- if (!new_max)
- new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
-
- ctx->branch = REALLOC(ctx->branch, ctx->branch_depth_max *
- sizeof(ctx->branch[0]),
- new_max * sizeof(ctx->branch[0]));
- ctx->branch_depth_max = new_max;
- }
-
- ctx->branch[ctx->branch_depth - 1].next_block = else_block;
- ctx->branch[ctx->branch_depth - 1].has_else = 0;
}
static void if_emit(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMValueRef cond;
cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
@@ -2126,17 +2137,14 @@ void radeon_llvm_dispose(struct radeon_llvm_context *ctx)
{
LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
FREE(ctx->temp_arrays);
ctx->temp_arrays = NULL;
FREE(ctx->temp_array_allocas);
ctx->temp_array_allocas = NULL;
FREE(ctx->temps);
ctx->temps = NULL;
ctx->temps_count = 0;
- FREE(ctx->loop);
- ctx->loop = NULL;
- ctx->loop_depth_max = 0;
- FREE(ctx->branch);
- ctx->branch = NULL;
- ctx->branch_depth_max = 0;
+ FREE(ctx->flow);
+ ctx->flow = NULL;
+ ctx->flow_depth_max = 0;
}
--
2.7.4
More information about the mesa-dev
mailing list