[Mesa-dev] [PATCH 12/16] gallium/radeon: allocate temps array info in radeon_llvm_context_init
Nicolai Hähnle
nhaehnle at gmail.com
Wed Aug 10 19:23:37 UTC 2016
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
Also, prepare for using tgsi_array_info.
This also opens the door for properly handling allocation failures, but I'm
leaving that for a separate change.
---
src/gallium/drivers/radeon/radeon_llvm.h | 11 ++--
.../drivers/radeon/radeon_setup_tgsi_llvm.c | 66 +++++++++++++---------
src/gallium/drivers/radeonsi/si_shader.c | 6 +-
3 files changed, 47 insertions(+), 36 deletions(-)
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index 13f3336..6086dd6 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -43,25 +43,20 @@ struct radeon_llvm_branch {
LLVMBasicBlockRef if_block;
LLVMBasicBlockRef else_block;
unsigned has_else;
};
struct radeon_llvm_loop {
LLVMBasicBlockRef loop_block;
LLVMBasicBlockRef endloop_block;
};
-struct radeon_llvm_array {
- struct tgsi_declaration_range range;
- LLVMValueRef alloca;
-};
-
struct radeon_llvm_context {
struct lp_build_tgsi_soa_context soa;
/*=== Front end configuration ===*/
/* Instructions that are not described by any of the TGSI opcodes. */
/** This function is responsible for initilizing the inputs array and will be
* called once for each input declared in the TGSI shader.
*/
@@ -94,21 +89,22 @@ struct radeon_llvm_context {
/*=== Private Members ===*/
struct radeon_llvm_branch *branch;
struct radeon_llvm_loop *loop;
unsigned branch_depth;
unsigned branch_depth_max;
unsigned loop_depth;
unsigned loop_depth_max;
- struct radeon_llvm_array *arrays;
+ struct tgsi_array_info *temp_arrays;
+ LLVMValueRef *temp_array_allocas;
LLVMValueRef main_fn;
LLVMTypeRef return_type;
unsigned fpmath_md_kind;
LLVMValueRef fpmath_md_2p5_ulp;
struct gallivm_state gallivm;
};
@@ -117,21 +113,22 @@ LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
enum tgsi_opcode_type type, LLVMValueRef value);
void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data,
LLVMValueRef *coords_arg,
LLVMValueRef *derivs_arg);
void radeon_llvm_context_init(struct radeon_llvm_context *ctx,
- const char *triple);
+ const char *triple,
+ const struct tgsi_shader_info *info);
void radeon_llvm_create_func(struct radeon_llvm_context *ctx,
LLVMTypeRef *return_types, unsigned num_return_elems,
LLVMTypeRef *ParamTypes, unsigned ParamCount);
void radeon_llvm_dispose(struct radeon_llvm_context *ctx);
unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan);
void radeon_llvm_finalize_module(struct radeon_llvm_context *ctx);
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index d8ab5b0..2521023 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -108,54 +108,54 @@ static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
value,
LLVMGetUndef(LLVMTypeOf(value)),
LLVMConstVector(swizzles, 4), "");
}
/**
* Return the description of the array covering the given temporary register
* index.
*/
-static const struct radeon_llvm_array *
-get_temp_array(struct lp_build_tgsi_context *bld_base,
- unsigned reg_index,
- const struct tgsi_ind_register *reg)
+static unsigned
+get_temp_array_id(struct lp_build_tgsi_context *bld_base,
+ unsigned reg_index,
+ const struct tgsi_ind_register *reg)
{
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
unsigned num_arrays = ctx->soa.bld_base.info->array_max[TGSI_FILE_TEMPORARY];
unsigned i;
if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
- return &ctx->arrays[reg->ArrayID - 1];
+ return reg->ArrayID;
for (i = 0; i < num_arrays; i++) {
- const struct radeon_llvm_array *array = &ctx->arrays[i];
+ const struct tgsi_array_info *array = &ctx->temp_arrays[i];
if (reg_index >= array->range.First && reg_index <= array->range.Last)
- return array;
+ return i + 1;
}
- return NULL;
+ return 0;
}
static struct tgsi_declaration_range
get_array_range(struct lp_build_tgsi_context *bld_base,
unsigned File, unsigned reg_index,
const struct tgsi_ind_register *reg)
{
+ struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct tgsi_declaration_range range;
if (File == TGSI_FILE_TEMPORARY) {
- const struct radeon_llvm_array *array =
- get_temp_array(bld_base, reg_index, reg);
- if (array)
- return array->range;
+ unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
+ if (array_id)
+ return ctx->temp_arrays[array_id - 1].range;
}
range.First = 0;
range.Last = bld_base->info->file_max[File];
return range;
}
static LLVMValueRef
emit_array_index(struct lp_build_tgsi_soa_context *bld,
const struct tgsi_ind_register *reg,
@@ -177,39 +177,45 @@ emit_array_index(struct lp_build_tgsi_soa_context *bld,
* Returns NULL if the insertelement/extractelement fallback for array access
* must be used.
*/
static LLVMValueRef
get_pointer_into_array(struct radeon_llvm_context *ctx,
unsigned file,
unsigned swizzle,
unsigned reg_index,
const struct tgsi_ind_register *reg_indirect)
{
- const struct radeon_llvm_array *array;
+ unsigned array_id;
struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef idxs[2];
LLVMValueRef index;
+ LLVMValueRef alloca;
if (file != TGSI_FILE_TEMPORARY)
return NULL;
- array = get_temp_array(&ctx->soa.bld_base, reg_index, reg_indirect);
- if (!array || !array->alloca)
+ array_id = get_temp_array_id(&ctx->soa.bld_base, reg_index, reg_indirect);
+ if (!array_id)
return NULL;
- index = emit_array_index(&ctx->soa, reg_indirect, reg_index - array->range.First);
+ alloca = ctx->temp_array_allocas[array_id - 1];
+ if (!alloca)
+ return NULL;
+
+ index = emit_array_index(&ctx->soa, reg_indirect,
+ reg_index - ctx->temp_arrays[array_id - 1].range.First);
index = LLVMBuildMul(builder, index, lp_build_const_int32(gallivm, TGSI_NUM_CHANNELS), "");
index = LLVMBuildAdd(builder, index, lp_build_const_int32(gallivm, swizzle), "");
idxs[0] = ctx->soa.bld_base.uint_bld.zero;
idxs[1] = index;
- return LLVMBuildGEP(builder, array->alloca, idxs, 2, "");
+ return LLVMBuildGEP(builder, alloca, idxs, 2, "");
}
LLVMValueRef
radeon_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
enum tgsi_opcode_type type,
LLVMValueRef ptr,
LLVMValueRef ptr2)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMValueRef result;
@@ -471,26 +477,22 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
case TGSI_FILE_TEMPORARY:
{
char name[16] = "";
LLVMValueRef array_alloca = NULL;
unsigned decl_size;
first = decl->Range.First;
last = decl->Range.Last;
decl_size = 4 * ((last - first) + 1);
if (decl->Declaration.Array) {
unsigned id = decl->Array.ArrayID - 1;
- if (!ctx->arrays) {
- int size = bld_base->info->array_max[TGSI_FILE_TEMPORARY];
- ctx->arrays = CALLOC(size, sizeof(ctx->arrays[0]));
- }
- ctx->arrays[id].range = decl->Range;
+ ctx->temp_arrays[id].range = decl->Range;
/* If the array has more than 16 elements, store it
* in memory using an alloca that spans the entire
* array.
*
* Otherwise, store each array element individually.
* We will then generate vectors (per-channel, up to
* <4 x float>) for indirect addressing.
*
* Note that 16 is the number of vector elements that
@@ -500,21 +502,21 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
* depends on VGPR register pressure elsewhere.
*
* FIXME: We shouldn't need to have the non-alloca
* code path for arrays. LLVM should be smart enough to
* promote allocas into registers when profitable.
*/
if (decl_size > 16) {
array_alloca = LLVMBuildAlloca(builder,
LLVMArrayType(bld_base->base.vec_type,
decl_size), "array");
- ctx->arrays[id].alloca = array_alloca;
+ ctx->temp_array_allocas[id] = array_alloca;
}
}
if (!ctx->temps_count) {
ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
}
if (!array_alloca) {
for (i = 0; i < decl_size; ++i) {
#ifdef DEBUG
@@ -1720,39 +1722,49 @@ static void emit_rsq(const struct lp_build_tgsi_action *action,
{
LLVMValueRef sqrt =
lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
emit_data->args[0]);
emit_data->output[emit_data->chan] =
lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV,
bld_base->base.one, sqrt);
}
-void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char *triple)
+void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char *triple,
+ const struct tgsi_shader_info *info)
{
struct lp_type type;
/* Initialize the gallivm object:
* We are only using the module, context, and builder fields of this struct.
* This should be enough for us to be able to pass our gallivm struct to the
* helper functions in the gallivm module.
*/
memset(&ctx->gallivm, 0, sizeof (ctx->gallivm));
memset(&ctx->soa, 0, sizeof(ctx->soa));
ctx->gallivm.context = LLVMContextCreate();
ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
ctx->gallivm.context);
LLVMSetTarget(ctx->gallivm.module, triple);
ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
+ bld_base->info = info;
+
+ if (info && info->array_max[TGSI_FILE_TEMPORARY] > 0) {
+ int size = info->array_max[TGSI_FILE_TEMPORARY];
+
+ ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
+ ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
+ }
+
type.floating = true;
type.fixed = false;
type.sign = true;
type.norm = false;
type.width = 32;
type.length = 1;
lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
@@ -1959,22 +1971,24 @@ void radeon_llvm_finalize_module(struct radeon_llvm_context *ctx)
LLVMDisposeBuilder(gallivm->builder);
LLVMDisposePassManager(gallivm->passmgr);
gallivm_dispose_target_library_info(target_library_info);
}
void radeon_llvm_dispose(struct radeon_llvm_context *ctx)
{
LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
- FREE(ctx->arrays);
- ctx->arrays = NULL;
+ FREE(ctx->temp_arrays);
+ ctx->temp_arrays = NULL;
+ FREE(ctx->temp_array_allocas);
+ ctx->temp_array_allocas = NULL;
FREE(ctx->temps);
ctx->temps = NULL;
ctx->temps_count = 0;
FREE(ctx->loop);
ctx->loop = NULL;
ctx->loop_depth_max = 0;
FREE(ctx->branch);
ctx->branch = NULL;
ctx->branch_depth_max = 0;
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 2de20cb..c595ee0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6515,21 +6515,23 @@ static void si_dump_shader_key(unsigned shader, union si_shader_key *key,
static void si_init_shader_ctx(struct si_shader_context *ctx,
struct si_screen *sscreen,
struct si_shader *shader,
LLVMTargetMachineRef tm)
{
struct lp_build_tgsi_context *bld_base;
struct lp_build_tgsi_action tmpl = {};
memset(ctx, 0, sizeof(*ctx));
- radeon_llvm_context_init(&ctx->radeon_bld, "amdgcn--");
+ radeon_llvm_context_init(
+ &ctx->radeon_bld, "amdgcn--",
+ (shader && shader->selector) ? &shader->selector->info : NULL);
ctx->tm = tm;
ctx->screen = sscreen;
if (shader && shader->selector)
ctx->type = shader->selector->info.processor;
else
ctx->type = -1;
ctx->shader = shader;
ctx->voidt = LLVMVoidTypeInContext(ctx->radeon_bld.gallivm.context);
ctx->i1 = LLVMInt1TypeInContext(ctx->radeon_bld.gallivm.context);
@@ -6538,22 +6540,20 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
ctx->i64 = LLVMInt64TypeInContext(ctx->radeon_bld.gallivm.context);
ctx->i128 = LLVMIntTypeInContext(ctx->radeon_bld.gallivm.context, 128);
ctx->f32 = LLVMFloatTypeInContext(ctx->radeon_bld.gallivm.context);
ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
bld_base = &ctx->radeon_bld.soa.bld_base;
- if (shader && shader->selector)
- bld_base->info = &shader->selector->info;
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE] = interp_action;
bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET] = interp_action;
bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TEX2] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TXB] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TXB2] = tex_action;
--
2.7.4
More information about the mesa-dev
mailing list