[Mesa-dev] [PATCH 81/92] ac/nir: pass ac_nir_context to emit_ddxy
Nicolai Hähnle
nhaehnle at gmail.com
Mon Jun 26 14:19:10 UTC 2017
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
Allocating the ddxy_lds is considered to be part of the API shader
translation and not part of the ABI.
---
src/amd/common/ac_nir_to_llvm.c | 29 ++++++++++++++---------------
1 file changed, 14 insertions(+), 15 deletions(-)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 7fc0770..6e72bbc 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -60,20 +60,22 @@ struct ac_nir_context {
LLVMValueRef main_function;
LLVMBasicBlockRef continue_block;
LLVMBasicBlockRef break_block;
LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
int num_locals;
LLVMValueRef *locals;
+ LLVMValueRef ddxy_lds;
+
struct nir_to_llvm_context *nctx; /* TODO get rid of this */
};
struct nir_to_llvm_context {
struct ac_llvm_context ac;
const struct ac_nir_compiler_options *options;
struct ac_shader_variant_info *shader_info;
struct ac_shader_abi abi;
struct ac_nir_context *nir;
@@ -162,22 +164,20 @@ struct nir_to_llvm_context {
LLVMValueRef lds;
LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
LLVMValueRef shared_memory;
uint64_t input_mask;
uint64_t output_mask;
uint8_t num_output_clips;
uint8_t num_output_culls;
- bool has_ds_bpermute;
-
bool is_gs_copy_shader;
LLVMValueRef gs_next_vertex;
unsigned gs_max_out_vertices;
unsigned tes_primitive_mode;
uint64_t tess_outputs_written;
uint64_t tess_patch_outputs_written;
};
static inline struct nir_to_llvm_context *
@@ -1461,69 +1461,70 @@ static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
}
LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2);
result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(v2f32), temps[0],
ctx->i32_0, "");
result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
ctx->i32_1, "");
return result;
}
-static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
+static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
nir_op op,
LLVMValueRef src0)
{
unsigned mask;
int idx;
LLVMValueRef result;
+ bool has_ds_bpermute = ctx->abi->chip_class >= VI;
- if (!ctx->lds && !ctx->has_ds_bpermute)
- ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
- LLVMArrayType(ctx->i32, 64),
+ if (!ctx->ddxy_lds && !has_ds_bpermute)
+ ctx->ddxy_lds = LLVMAddGlobalInAddressSpace(ctx->ac.module,
+ LLVMArrayType(ctx->ac.i32, 64),
"ddxy_lds", LOCAL_ADDR_SPACE);
if (op == nir_op_fddx_fine || op == nir_op_fddx)
mask = AC_TID_MASK_LEFT;
else if (op == nir_op_fddy_fine || op == nir_op_fddy)
mask = AC_TID_MASK_TOP;
else
mask = AC_TID_MASK_TOP_LEFT;
/* for DDX we want to next X pixel, DDY next Y pixel. */
if (op == nir_op_fddx_fine ||
op == nir_op_fddx_coarse ||
op == nir_op_fddx)
idx = 1;
else
idx = 2;
- result = ac_build_ddxy(&ctx->ac, ctx->has_ds_bpermute,
- mask, idx, ctx->lds,
+ result = ac_build_ddxy(&ctx->ac, has_ds_bpermute,
+ mask, idx, ctx->ddxy_lds,
src0);
return result;
}
/*
* this takes an I,J coordinate pair,
* and works out the X and Y derivatives.
* it returns DDX(I), DDX(J), DDY(I), DDY(J).
*/
static LLVMValueRef emit_ddxy_interp(
- struct nir_to_llvm_context *ctx,
+ struct ac_nir_context *ctx,
LLVMValueRef interp_ij)
{
LLVMValueRef result[4], a;
unsigned i;
for (i = 0; i < 2; i++) {
- a = LLVMBuildExtractElement(ctx->builder, interp_ij,
- LLVMConstInt(ctx->i32, i, false), "");
+ a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij,
+ LLVMConstInt(ctx->ac.i32, i, false), "");
result[i] = emit_ddxy(ctx, nir_op_fddx, a);
result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
}
return ac_build_gather_values(&ctx->ac, result, 4);
}
static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
{
LLVMValueRef src[4], result = NULL;
unsigned num_components = instr->dest.dest.ssa.num_components;
@@ -1873,21 +1874,21 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
break;
case nir_op_unpack_half_2x16:
result = emit_unpack_half_2x16(&ctx->ac, src[0]);
break;
case nir_op_fddx:
case nir_op_fddy:
case nir_op_fddx_fine:
case nir_op_fddy_fine:
case nir_op_fddx_coarse:
case nir_op_fddy_coarse:
- result = emit_ddxy(ctx->nctx, instr->op, src[0]);
+ result = emit_ddxy(ctx, instr->op, src[0]);
break;
default:
fprintf(stderr, "Unknown NIR alu instr: ");
nir_print_instr(&instr->instr, stderr);
fprintf(stderr, "\n");
abort();
}
if (result) {
assert(instr->dest.dest.is_ssa);
@@ -3731,21 +3732,21 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32zero, "");
src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, "");
src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32one, "");
src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, "");
}
interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
attr_number = LLVMConstInt(ctx->i32, input_index, false);
if (location == INTERP_SAMPLE || location == INTERP_CENTER) {
LLVMValueRef ij_out[2];
- LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param);
+ LLVMValueRef ddxy_out = emit_ddxy_interp(ctx->nir, interp_param);
/*
* take the I then J parameters, and the DDX/Y for it, and
* calculate the IJ inputs for the interpolator.
* temp1 = ddx * offset/sample.x + I;
* interp_param.I = ddy * offset/sample.y + temp1;
* temp1 = ddx * offset/sample.x + J;
* interp_param.J = ddy * offset/sample.y + temp1;
*/
for (unsigned i = 0; i < 2; i++) {
@@ -6041,22 +6042,20 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
struct nir_to_llvm_context ctx = {0};
unsigned i;
ctx.options = options;
ctx.shader_info = shader_info;
ctx.context = LLVMContextCreate();
ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
ac_llvm_context_init(&ctx.ac, ctx.context);
ctx.ac.module = ctx.module;
- ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
-
memset(shader_info, 0, sizeof(*shader_info));
ac_nir_shader_info_pass(nir, options, &shader_info->info);
LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--");
LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
LLVMSetDataLayout(ctx.module, data_layout_str);
LLVMDisposeTargetData(data_layout);
--
2.9.3
More information about the mesa-dev
mailing list