[Mesa-dev] [PATCH 08/10] ac: add v2f32 to the common code and make use of it

Timothy Arceri tarceri at itsqueeze.com
Thu Nov 2 02:41:15 UTC 2017


---
 src/amd/common/ac_llvm_build.c  |  1 +
 src/amd/common/ac_llvm_build.h  |  1 +
 src/amd/common/ac_nir_to_llvm.c | 15 +++++----------
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 1519262b3d..5640a23b8a 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -62,20 +62,21 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
 	ctx->i8 = LLVMInt8TypeInContext(ctx->context);
 	ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
 	ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
 	ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
 	ctx->f16 = LLVMHalfTypeInContext(ctx->context);
 	ctx->f32 = LLVMFloatTypeInContext(ctx->context);
 	ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
 	ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
 	ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
 	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
+	ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
 	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
 	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
 
 	ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
 	ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
 	ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
 	ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
 
 	ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
 	ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index f662cc4dd8..1f51937c9e 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -48,20 +48,21 @@ struct ac_llvm_context {
 	LLVMTypeRef i8;
 	LLVMTypeRef i16;
 	LLVMTypeRef i32;
 	LLVMTypeRef i64;
 	LLVMTypeRef f16;
 	LLVMTypeRef f32;
 	LLVMTypeRef f64;
 	LLVMTypeRef v2i32;
 	LLVMTypeRef v3i32;
 	LLVMTypeRef v4i32;
+	LLVMTypeRef v2f32;
 	LLVMTypeRef v4f32;
 	LLVMTypeRef v8i32;
 
 	LLVMValueRef i32_0;
 	LLVMValueRef i32_1;
 	LLVMValueRef f32_0;
 	LLVMValueRef f32_1;
 	LLVMValueRef i1true;
 	LLVMValueRef i1false;
 
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 1e1a1c0276..ac50debdde 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -127,21 +127,20 @@ struct nir_to_llvm_context {
 	LLVMValueRef esgs_ring;
 	LLVMValueRef gsvs_ring;
 	LLVMValueRef hs_ring_tess_offchip;
 	LLVMValueRef hs_ring_tess_factor;
 
 	LLVMValueRef prim_mask;
 	LLVMValueRef sample_pos_offset;
 	LLVMValueRef persp_sample, persp_center, persp_centroid;
 	LLVMValueRef linear_sample, linear_center, linear_centroid;
 
-	LLVMTypeRef v2f32;
 	LLVMTypeRef v4f32;
 
 	unsigned uniform_md_kind;
 	LLVMValueRef empty_md;
 	gl_shader_stage stage;
 
 	LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
 
 	uint64_t input_mask;
 	uint64_t output_mask;
@@ -978,21 +977,20 @@ static void create_function(struct nir_to_llvm_context *ctx,
 		break;
 	default:
 		unreachable("Shader stage not implemented");
 	}
 
 	ctx->shader_info->num_user_sgprs = user_sgpr_idx;
 }
 
 static void setup_types(struct nir_to_llvm_context *ctx)
 {
-	ctx->v2f32 = LLVMVectorType(ctx->ac.f32, 2);
 	ctx->v4f32 = LLVMVectorType(ctx->ac.f32, 4);
 
 	ctx->uniform_md_kind =
 	    LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
 	ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
 }
 
 static int get_llvm_num_components(LLVMValueRef value)
 {
 	LLVMTypeRef type = LLVMTypeOf(value);
@@ -1443,22 +1441,21 @@ static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
 	LLVMValueRef temps[2], result, val;
 	int i;
 
 	for (i = 0; i < 2; i++) {
 		val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
 		val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
 		val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
 		temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
 	}
 
-	LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2);
-	result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(v2f32), temps[0],
+	result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
 					ctx->i32_0, "");
 	result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
 					ctx->i32_1, "");
 	return result;
 }
 
 static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
 			      nir_op op,
 			      LLVMValueRef src0)
 {
@@ -2298,25 +2295,23 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
 
 		if (count > 4) {
 			writemask |= ((1u << (count - 4)) - 1u) << (start + 4);
 			count = 4;
 		}
 
 		if (count == 4) {
 			store_name = "llvm.amdgcn.buffer.store.v4f32";
 			data = base_data;
 		} else if (count == 2) {
-			LLVMTypeRef v2f32 = LLVMVectorType(ctx->ac.f32, 2);
-
 			tmp = LLVMBuildExtractElement(ctx->ac.builder,
 						      base_data, LLVMConstInt(ctx->ac.i32, start, false), "");
-			data = LLVMBuildInsertElement(ctx->ac.builder, LLVMGetUndef(v2f32), tmp,
+			data = LLVMBuildInsertElement(ctx->ac.builder, LLVMGetUndef(ctx->ac.v2f32), tmp,
 						      ctx->ac.i32_0, "");
 
 			tmp = LLVMBuildExtractElement(ctx->ac.builder,
 						      base_data, LLVMConstInt(ctx->ac.i32, start + 1, false), "");
 			data = LLVMBuildInsertElement(ctx->ac.builder, data, tmp,
 						      ctx->ac.i32_1, "");
 			store_name = "llvm.amdgcn.buffer.store.v2f32";
 
 		} else {
 			assert(count == 1);
@@ -3796,21 +3791,21 @@ static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
 	return NULL;
 }
 
 static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
 					 LLVMValueRef sample_id)
 {
 	LLVMValueRef result;
 	LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false));
 
 	ptr = LLVMBuildBitCast(ctx->builder, ptr,
-			       const_array(ctx->v2f32, 64), "");
+			       const_array(ctx->ac.v2f32, 64), "");
 
 	sample_id = LLVMBuildAdd(ctx->builder, sample_id, ctx->sample_pos_offset, "");
 	result = ac_build_load_invariant(&ctx->ac, ptr, sample_id);
 
 	return result;
 }
 
 static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
 {
 	LLVMValueRef values[2];
@@ -3899,21 +3894,21 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
 		}
 		interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
 
 	}
 
 	for (chan = 0; chan < 4; chan++) {
 		LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
 
 		if (interp_param) {
 			interp_param = LLVMBuildBitCast(ctx->builder,
-							interp_param, LLVMVectorType(ctx->ac.f32, 2), "");
+							interp_param, ctx->ac.v2f32, "");
 			LLVMValueRef i = LLVMBuildExtractElement(
 				ctx->builder, interp_param, ctx->ac.i32_0, "");
 			LLVMValueRef j = LLVMBuildExtractElement(
 				ctx->builder, interp_param, ctx->ac.i32_1, "");
 
 			result[chan] = ac_build_fs_interp(&ctx->ac,
 							  llvm_chan, attr_number,
 							  ctx->prim_mask, i, j);
 		} else {
 			result[chan] = ac_build_fs_interp_mov(&ctx->ac,
@@ -5038,21 +5033,21 @@ static void interp_fs_input(struct nir_to_llvm_context *ctx,
 	 * interpolation (but the intrinsic can't fetch from the other two
 	 * vertices).
 	 *
 	 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
 	 * to do the right thing. The only reason we use fs.constant is that
 	 * fs.interp cannot be used on integers, because they can be equal
 	 * to NaN.
 	 */
 	if (interp) {
 		interp_param = LLVMBuildBitCast(ctx->builder, interp_param,
-						LLVMVectorType(ctx->ac.f32, 2), "");
+						ctx->ac.v2f32, "");
 
 		i = LLVMBuildExtractElement(ctx->builder, interp_param,
 						ctx->ac.i32_0, "");
 		j = LLVMBuildExtractElement(ctx->builder, interp_param,
 						ctx->ac.i32_1, "");
 	}
 
 	for (chan = 0; chan < 4; chan++) {
 		LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
 
-- 
2.14.3



More information about the mesa-dev mailing list