[Mesa-dev] [PATCH 9/9] ac, radeonsi: use ac_build_gather_values more
Marek Olšák
maraeo at gmail.com
Tue Aug 21 03:23:39 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
---
src/amd/common/ac_nir_to_llvm.c | 14 +++--------
src/gallium/drivers/radeonsi/si_shader.c | 8 +++---
.../drivers/radeonsi/si_shader_tgsi_mem.c | 25 +++++++------------
.../drivers/radeonsi/si_shader_tgsi_setup.c | 17 ++++---------
4 files changed, 20 insertions(+), 44 deletions(-)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 537ac33c044..700e48e14b7 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -479,35 +479,30 @@ static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx,
comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
return LLVMBuildBitCast(ctx->builder, ac_build_cvt_pkrtz_f16(ctx, comp),
ctx->i32, "");
}
static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
LLVMValueRef src0)
{
LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
- LLVMValueRef temps[2], result, val;
+ LLVMValueRef temps[2], val;
int i;
for (i = 0; i < 2; i++) {
val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
}
-
- result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
- ctx->i32_0, "");
- result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
- ctx->i32_1, "");
- return result;
+ return ac_build_gather_values(ctx, temps, 2);
}
static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
nir_op op,
LLVMValueRef src0)
{
unsigned mask;
int idx;
LLVMValueRef result;
@@ -997,24 +992,21 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
ctx->ac.v2i32,
"");
result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
ctx->ac.i32_1, "");
break;
}
case nir_op_pack_64_2x32_split: {
LLVMValueRef tmp = LLVMGetUndef(ctx->ac.v2i32);
- tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
- src[0], ctx->ac.i32_0, "");
- tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
- src[1], ctx->ac.i32_1, "");
+ tmp = ac_build_gather_values(&ctx->ac, src, 2);
result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
break;
}
case nir_op_cube_face_coord: {
src[0] = ac_to_float(&ctx->ac, src[0]);
LLVMValueRef results[2];
LLVMValueRef in[3];
for (unsigned chan = 0; chan < 3; chan++)
in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 66fe5fad218..cfd99b61601 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2717,26 +2717,24 @@ static void emit_streamout_output(struct si_shader_context *ctx,
/* Pack the output. */
LLVMValueRef vdata = NULL;
switch (num_comps) {
case 1: /* as i32 */
vdata = out[0];
break;
case 2: /* as v2i32 */
case 3: /* as v4i32 (aligned to 4) */
+ out[3] = LLVMGetUndef(ctx->i32);
+ /* fall through */
case 4: /* as v4i32 */
- vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, util_next_power_of_two(num_comps)));
- for (int j = 0; j < num_comps; j++) {
- vdata = LLVMBuildInsertElement(ctx->ac.builder, vdata, out[j],
- LLVMConstInt(ctx->i32, j, 0), "");
- }
+ vdata = ac_build_gather_values(&ctx->ac, out, util_next_power_of_two(num_comps));
break;
}
ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf_idx],
vdata, num_comps,
so_write_offsets[buf_idx],
ctx->i32_0,
stream_out->dst_offset * 4, 1, 1, true, false);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 54a0413e464..8e0578b4d5e 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -546,50 +546,43 @@ static void store_emit_buffer(struct si_shader_context *ctx,
unsigned cache_policy,
bool writeonly_memory)
{
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef base_data = value;
LLVMValueRef base_offset = voffset;
while (writemask) {
int start, count;
const char *intrinsic_name;
- LLVMValueRef data, voff, tmp;
+ LLVMValueRef data, voff;
u_bit_scan_consecutive_range(&writemask, &start, &count);
/* Due to an LLVM limitation, split 3-element writes
* into a 2-element and a 1-element write. */
if (count == 3) {
writemask |= 1 << (start + 2);
count = 2;
}
if (count == 4) {
data = base_data;
intrinsic_name = "llvm.amdgcn.buffer.store.v4f32";
} else if (count == 2) {
- LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2);
-
- tmp = LLVMBuildExtractElement(
- builder, base_data,
- LLVMConstInt(ctx->i32, start, 0), "");
- data = LLVMBuildInsertElement(
- builder, LLVMGetUndef(v2f32), tmp,
- ctx->i32_0, "");
-
- tmp = LLVMBuildExtractElement(
- builder, base_data,
- LLVMConstInt(ctx->i32, start + 1, 0), "");
- data = LLVMBuildInsertElement(
- builder, data, tmp, ctx->i32_1, "");
-
+ LLVMValueRef values[2] = {
+ LLVMBuildExtractElement(builder, base_data,
+ LLVMConstInt(ctx->i32, start, 0), ""),
+ LLVMBuildExtractElement(builder, base_data,
+ LLVMConstInt(ctx->i32, start + 1, 0), ""),
+ };
+
+ data = ac_build_gather_values(&ctx->ac, values, 2);
intrinsic_name = "llvm.amdgcn.buffer.store.v2f32";
} else {
assert(count == 1);
data = LLVMBuildExtractElement(
builder, base_data,
LLVMConstInt(ctx->i32, start, 0), "");
intrinsic_name = "llvm.amdgcn.buffer.store.f32";
}
voff = base_offset;
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index 1f37b0ba37d..20164939cb7 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -298,32 +298,25 @@ get_pointer_into_array(struct si_shader_context *ctx,
return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, "");
}
LLVMValueRef
si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
LLVMTypeRef type,
LLVMValueRef ptr,
LLVMValueRef ptr2)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef result;
-
- result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
-
- result = LLVMBuildInsertElement(ctx->ac.builder,
- result,
- ac_to_integer(&ctx->ac, ptr),
- ctx->i32_0, "");
- result = LLVMBuildInsertElement(ctx->ac.builder,
- result,
- ac_to_integer(&ctx->ac, ptr2),
- ctx->i32_1, "");
+ LLVMValueRef values[2] = {
+ ac_to_integer(&ctx->ac, ptr),
+ ac_to_integer(&ctx->ac, ptr2),
+ };
+ LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
}
static LLVMValueRef
emit_array_fetch(struct lp_build_tgsi_context *bld_base,
unsigned File, enum tgsi_opcode_type type,
struct tgsi_declaration_range range,
unsigned swizzle)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
--
2.17.1
More information about the mesa-dev
mailing list