[Mesa-dev] [PATCH v2 59/73] ac/nir: pass ac_nir_context to emit_ddxy

Wed Jul 5 10:48:43 UTC 2017

From: Nicolai Hähnle <nicolai.haehnle at amd.com>

Allocating the ddxy_lds is considered to be part of the API shader
translation and not part of the ABI.
---
 src/amd/common/ac_nir_to_llvm.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index ef1eeac..8953a3d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -60,20 +60,22 @@ struct ac_nir_context {
 
 	LLVMValueRef main_function;
 	LLVMBasicBlockRef continue_block;
 	LLVMBasicBlockRef break_block;
 
 	LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
 
 	int num_locals;
 	LLVMValueRef *locals;
 
+	LLVMValueRef ddxy_lds;
+
 	struct nir_to_llvm_context *nctx; /* TODO get rid of this */
 };
 
 struct nir_to_llvm_context {
 	struct ac_llvm_context ac;
 	const struct ac_nir_compiler_options *options;
 	struct ac_shader_variant_info *shader_info;
 	struct ac_shader_abi abi;
 	struct ac_nir_context *nir;
 
@@ -161,22 +163,20 @@ struct nir_to_llvm_context {
 
 	LLVMValueRef lds;
 	LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
 
 	LLVMValueRef shared_memory;
 	uint64_t input_mask;
 	uint64_t output_mask;
 	uint8_t num_output_clips;
 	uint8_t num_output_culls;
 
-	bool has_ds_bpermute;
-
 	bool is_gs_copy_shader;
 	LLVMValueRef gs_next_vertex;
 	unsigned gs_max_out_vertices;
 
 	unsigned tes_primitive_mode;
 	uint64_t tess_outputs_written;
 	uint64_t tess_patch_outputs_written;
 };
 
 static inline struct nir_to_llvm_context *
@@ -1460,69 +1460,70 @@ static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
 	}
 
 	LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2);
 	result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(v2f32), temps[0],
 					ctx->i32_0, "");
 	result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
 					ctx->i32_1, "");
 	return result;
 }
 
-static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
+static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
 			      nir_op op,
 			      LLVMValueRef src0)
 {
 	unsigned mask;
 	int idx;
 	LLVMValueRef result;
+	bool has_ds_bpermute = ctx->abi->chip_class >= VI;
 
-	if (!ctx->lds && !ctx->has_ds_bpermute)
-		ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
-						       LLVMArrayType(ctx->i32, 64),
+	if (!ctx->ddxy_lds && !has_ds_bpermute)
+		ctx->ddxy_lds = LLVMAddGlobalInAddressSpace(ctx->ac.module,
+						       LLVMArrayType(ctx->ac.i32, 64),
 						       "ddxy_lds", LOCAL_ADDR_SPACE);
 
 	if (op == nir_op_fddx_fine || op == nir_op_fddx)
 		mask = AC_TID_MASK_LEFT;
 	else if (op == nir_op_fddy_fine || op == nir_op_fddy)
 		mask = AC_TID_MASK_TOP;
 	else
 		mask = AC_TID_MASK_TOP_LEFT;
 
 	/* for DDX we want to next X pixel, DDY next Y pixel. */
 	if (op == nir_op_fddx_fine ||
 	    op == nir_op_fddx_coarse ||
 	    op == nir_op_fddx)
 		idx = 1;
 	else
 		idx = 2;
 
-	result = ac_build_ddxy(&ctx->ac, ctx->has_ds_bpermute,
-			      mask, idx, ctx->lds,
+	result = ac_build_ddxy(&ctx->ac, has_ds_bpermute,
+			      mask, idx, ctx->ddxy_lds,
 			      src0);
 	return result;
 }
 
 /*
  * this takes an I,J coordinate pair,
  * and works out the X and Y derivatives.
  * it returns DDX(I), DDX(J), DDY(I), DDY(J).
  */
 static LLVMValueRef emit_ddxy_interp(
-	struct nir_to_llvm_context *ctx,
+	struct ac_nir_context *ctx,
 	LLVMValueRef interp_ij)
 {
 	LLVMValueRef result[4], a;
 	unsigned i;
 
 	for (i = 0; i < 2; i++) {
-		a = LLVMBuildExtractElement(ctx->builder, interp_ij,
-					    LLVMConstInt(ctx->i32, i, false), "");
+		a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij,
+					    LLVMConstInt(ctx->ac.i32, i, false), "");
 		result[i] = emit_ddxy(ctx, nir_op_fddx, a);
 		result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
 	}
 	return ac_build_gather_values(&ctx->ac, result, 4);
 }
 
 static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 {
 	LLVMValueRef src[4], result = NULL;
 	unsigned num_components = instr->dest.dest.ssa.num_components;
@@ -1881,21 +1882,21 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 		break;
 	case nir_op_unpack_half_2x16:
 		result = emit_unpack_half_2x16(&ctx->ac, src[0]);
 		break;
 	case nir_op_fddx:
 	case nir_op_fddy:
 	case nir_op_fddx_fine:
 	case nir_op_fddy_fine:
 	case nir_op_fddx_coarse:
 	case nir_op_fddy_coarse:
-		result = emit_ddxy(ctx->nctx, instr->op, src[0]);
+		result = emit_ddxy(ctx, instr->op, src[0]);
 		break;
 
 	case nir_op_unpack_64_2x32_split_x: {
 		assert(instr->src[0].src.ssa->num_components == 1);
 		LLVMValueRef tmp = LLVMBuildBitCast(ctx->builder, src[0],
 						    LLVMVectorType(ctx->i32, 2),
 						    "");
 		result = LLVMBuildExtractElement(ctx->builder, tmp,
 						 ctx->i32zero, "");
 		break;
@@ -3774,21 +3775,21 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
 		src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32zero, "");
 		src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, "");
 		src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32one, "");
 		src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, "");
 	}
 	interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
 	attr_number = LLVMConstInt(ctx->i32, input_index, false);
 
 	if (location == INTERP_SAMPLE || location == INTERP_CENTER) {
 		LLVMValueRef ij_out[2];
-		LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param);
+		LLVMValueRef ddxy_out = emit_ddxy_interp(ctx->nir, interp_param);
 
 		/*
 		 * take the I then J parameters, and the DDX/Y for it, and
 		 * calculate the IJ inputs for the interpolator.
 		 * temp1 = ddx * offset/sample.x + I;
 		 * interp_param.I = ddy * offset/sample.y + temp1;
 		 * temp1 = ddx * offset/sample.x + J;
 		 * interp_param.J = ddy * offset/sample.y + temp1;
 		 */
 		for (unsigned i = 0; i < 2; i++) {
@@ -6102,22 +6103,20 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
 	struct nir_to_llvm_context ctx = {0};
 	unsigned i;
 	ctx.options = options;
 	ctx.shader_info = shader_info;
 	ctx.context = LLVMContextCreate();
 	ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
 
 	ac_llvm_context_init(&ctx.ac, ctx.context);
 	ctx.ac.module = ctx.module;
 
-	ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
-
 	memset(shader_info, 0, sizeof(*shader_info));
 
 	ac_nir_shader_info_pass(nir, options, &shader_info->info);
 
 	LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--");
 
 	LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
 	char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
 	LLVMSetDataLayout(ctx.module, data_layout_str);
 	LLVMDisposeTargetData(data_layout);
-- 
2.9.3