[Mesa-dev] [PATCH 08/10] ac: add v2f32 to the common code and make use of it
Timothy Arceri
tarceri at itsqueeze.com
Thu Nov 2 02:41:15 UTC 2017
---
src/amd/common/ac_llvm_build.c | 1 +
src/amd/common/ac_llvm_build.h | 1 +
src/amd/common/ac_nir_to_llvm.c | 15 +++++----------
3 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 1519262b3d..5640a23b8a 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -62,20 +62,21 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
ctx->i8 = LLVMInt8TypeInContext(ctx->context);
ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
ctx->f16 = LLVMHalfTypeInContext(ctx->context);
ctx->f32 = LLVMFloatTypeInContext(ctx->context);
ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
+ ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index f662cc4dd8..1f51937c9e 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -48,20 +48,21 @@ struct ac_llvm_context {
LLVMTypeRef i8;
LLVMTypeRef i16;
LLVMTypeRef i32;
LLVMTypeRef i64;
LLVMTypeRef f16;
LLVMTypeRef f32;
LLVMTypeRef f64;
LLVMTypeRef v2i32;
LLVMTypeRef v3i32;
LLVMTypeRef v4i32;
+ LLVMTypeRef v2f32;
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;
LLVMValueRef i32_0;
LLVMValueRef i32_1;
LLVMValueRef f32_0;
LLVMValueRef f32_1;
LLVMValueRef i1true;
LLVMValueRef i1false;
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 1e1a1c0276..ac50debdde 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -127,21 +127,20 @@ struct nir_to_llvm_context {
LLVMValueRef esgs_ring;
LLVMValueRef gsvs_ring;
LLVMValueRef hs_ring_tess_offchip;
LLVMValueRef hs_ring_tess_factor;
LLVMValueRef prim_mask;
LLVMValueRef sample_pos_offset;
LLVMValueRef persp_sample, persp_center, persp_centroid;
LLVMValueRef linear_sample, linear_center, linear_centroid;
- LLVMTypeRef v2f32;
LLVMTypeRef v4f32;
unsigned uniform_md_kind;
LLVMValueRef empty_md;
gl_shader_stage stage;
LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
uint64_t input_mask;
uint64_t output_mask;
@@ -978,21 +977,20 @@ static void create_function(struct nir_to_llvm_context *ctx,
break;
default:
unreachable("Shader stage not implemented");
}
ctx->shader_info->num_user_sgprs = user_sgpr_idx;
}
static void setup_types(struct nir_to_llvm_context *ctx)
{
- ctx->v2f32 = LLVMVectorType(ctx->ac.f32, 2);
ctx->v4f32 = LLVMVectorType(ctx->ac.f32, 4);
ctx->uniform_md_kind =
LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
}
static int get_llvm_num_components(LLVMValueRef value)
{
LLVMTypeRef type = LLVMTypeOf(value);
@@ -1443,22 +1441,21 @@ static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
LLVMValueRef temps[2], result, val;
int i;
for (i = 0; i < 2; i++) {
val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
}
- LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2);
- result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(v2f32), temps[0],
+ result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
ctx->i32_0, "");
result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
ctx->i32_1, "");
return result;
}
static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
nir_op op,
LLVMValueRef src0)
{
@@ -2298,25 +2295,23 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
if (count > 4) {
writemask |= ((1u << (count - 4)) - 1u) << (start + 4);
count = 4;
}
if (count == 4) {
store_name = "llvm.amdgcn.buffer.store.v4f32";
data = base_data;
} else if (count == 2) {
- LLVMTypeRef v2f32 = LLVMVectorType(ctx->ac.f32, 2);
-
tmp = LLVMBuildExtractElement(ctx->ac.builder,
base_data, LLVMConstInt(ctx->ac.i32, start, false), "");
- data = LLVMBuildInsertElement(ctx->ac.builder, LLVMGetUndef(v2f32), tmp,
+ data = LLVMBuildInsertElement(ctx->ac.builder, LLVMGetUndef(ctx->ac.v2f32), tmp,
ctx->ac.i32_0, "");
tmp = LLVMBuildExtractElement(ctx->ac.builder,
base_data, LLVMConstInt(ctx->ac.i32, start + 1, false), "");
data = LLVMBuildInsertElement(ctx->ac.builder, data, tmp,
ctx->ac.i32_1, "");
store_name = "llvm.amdgcn.buffer.store.v2f32";
} else {
assert(count == 1);
@@ -3796,21 +3791,21 @@ static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
return NULL;
}
static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
LLVMValueRef sample_id)
{
LLVMValueRef result;
LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false));
ptr = LLVMBuildBitCast(ctx->builder, ptr,
- const_array(ctx->v2f32, 64), "");
+ const_array(ctx->ac.v2f32, 64), "");
sample_id = LLVMBuildAdd(ctx->builder, sample_id, ctx->sample_pos_offset, "");
result = ac_build_load_invariant(&ctx->ac, ptr, sample_id);
return result;
}
static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
{
LLVMValueRef values[2];
@@ -3899,21 +3894,21 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
}
interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
}
for (chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
if (interp_param) {
interp_param = LLVMBuildBitCast(ctx->builder,
- interp_param, LLVMVectorType(ctx->ac.f32, 2), "");
+ interp_param, ctx->ac.v2f32, "");
LLVMValueRef i = LLVMBuildExtractElement(
ctx->builder, interp_param, ctx->ac.i32_0, "");
LLVMValueRef j = LLVMBuildExtractElement(
ctx->builder, interp_param, ctx->ac.i32_1, "");
result[chan] = ac_build_fs_interp(&ctx->ac,
llvm_chan, attr_number,
ctx->prim_mask, i, j);
} else {
result[chan] = ac_build_fs_interp_mov(&ctx->ac,
@@ -5038,21 +5033,21 @@ static void interp_fs_input(struct nir_to_llvm_context *ctx,
* interpolation (but the intrinsic can't fetch from the other two
* vertices).
*
* Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
* to do the right thing. The only reason we use fs.constant is that
* fs.interp cannot be used on integers, because they can be equal
* to NaN.
*/
if (interp) {
interp_param = LLVMBuildBitCast(ctx->builder, interp_param,
- LLVMVectorType(ctx->ac.f32, 2), "");
+ ctx->ac.v2f32, "");
i = LLVMBuildExtractElement(ctx->builder, interp_param,
ctx->ac.i32_0, "");
j = LLVMBuildExtractElement(ctx->builder, interp_param,
ctx->ac.i32_1, "");
}
for (chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
--
2.14.3
More information about the mesa-dev
mailing list