[Mesa-dev] [PATCH 17/19] radeonsi: simplify si_llvm_emit_ddxy
Marek Olšák
maraeo at gmail.com
Sun Oct 2 21:09:32 UTC 2016
From: Marek Olšák <marek.olsak at amd.com>
si_llvm_emit_ddxy is called once per element, so we don't have to generate
code for 4 elements at once.
---
src/gallium/drivers/radeonsi/si_shader.c | 80 ++++++++++++--------------------
1 file changed, 29 insertions(+), 51 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 414810e..c150ae4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4998,98 +4998,76 @@ static void si_llvm_emit_txqs(
#define TID_MASK_TOP 0xfffffffd
#define TID_MASK_LEFT 0xfffffffe
static void si_llvm_emit_ddxy(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- const struct tgsi_full_instruction *inst = emit_data->inst;
- unsigned opcode = inst->Instruction.Opcode;
- LLVMValueRef store_ptr, load_ptr0, load_ptr1, thread_id;
- LLVMValueRef tl, trbl, result[4];
- LLVMValueRef tl_tid, trbl_tid;
- unsigned swizzle[4];
- unsigned c;
+ unsigned opcode = emit_data->info->opcode;
+ LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, val, args[2];
int idx;
unsigned mask;
bool has_ds_bpermute = HAVE_LLVM >= 0x0309 &&
ctx->screen->b.chip_class >= VI;
- thread_id = get_thread_id(ctx);;
+ thread_id = get_thread_id(ctx);
if (opcode == TGSI_OPCODE_DDX_FINE)
mask = TID_MASK_LEFT;
else if (opcode == TGSI_OPCODE_DDY_FINE)
mask = TID_MASK_TOP;
else
mask = TID_MASK_TOP_LEFT;
tl_tid = LLVMBuildAnd(gallivm->builder, thread_id,
lp_build_const_int32(gallivm, mask), "");
/* for DDX we want to next X pixel, DDY next Y pixel. */
idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
trbl_tid = LLVMBuildAdd(gallivm->builder, tl_tid,
lp_build_const_int32(gallivm, idx), "");
- if (!has_ds_bpermute) {
- store_ptr = build_gep0(ctx, ctx->lds, thread_id);
- load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
- load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
- }
-
- for (c = 0; c < 4; ++c) {
- unsigned i;
- LLVMValueRef val;
- LLVMValueRef args[2];
-
- swizzle[c] = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], c);
- for (i = 0; i < c; ++i) {
- if (swizzle[i] == swizzle[c]) {
- result[c] = result[i];
- break;
- }
- }
- if (i != c)
- continue;
-
- val = LLVMBuildBitCast(gallivm->builder,
- lp_build_emit_fetch(bld_base, inst, 0, c),
- ctx->i32, "");
+ val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
- if (has_ds_bpermute) {
- args[0] = LLVMBuildMul(gallivm->builder, tl_tid,
- lp_build_const_int32(gallivm, 4), "");
- args[1] = val;
- tl = lp_build_intrinsic(gallivm->builder,
+ if (has_ds_bpermute) {
+ args[0] = LLVMBuildMul(gallivm->builder, tl_tid,
+ lp_build_const_int32(gallivm, 4), "");
+ args[1] = val;
+ tl = lp_build_intrinsic(gallivm->builder,
"llvm.amdgcn.ds.bpermute", ctx->i32,
args, 2, LLVMReadNoneAttribute);
- args[0] = LLVMBuildMul(gallivm->builder, trbl_tid,
- lp_build_const_int32(gallivm, 4), "");
- trbl = lp_build_intrinsic(gallivm->builder,
- "llvm.amdgcn.ds.bpermute", ctx->i32,
- args, 2, LLVMReadNoneAttribute);
- } else {
- LLVMBuildStore(gallivm->builder, val, store_ptr);
- tl = LLVMBuildLoad(gallivm->builder, load_ptr0, "");
- trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, "");
- }
- tl = LLVMBuildBitCast(gallivm->builder, tl, ctx->f32, "");
- trbl = LLVMBuildBitCast(gallivm->builder, trbl, ctx->f32, "");
- result[c] = LLVMBuildFSub(gallivm->builder, trbl, tl, "");
+ args[0] = LLVMBuildMul(gallivm->builder, trbl_tid,
+ lp_build_const_int32(gallivm, 4), "");
+ trbl = lp_build_intrinsic(gallivm->builder,
+ "llvm.amdgcn.ds.bpermute", ctx->i32,
+ args, 2, LLVMReadNoneAttribute);
+ } else {
+ LLVMValueRef store_ptr, load_ptr0, load_ptr1;
+
+ store_ptr = build_gep0(ctx, ctx->lds, thread_id);
+ load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
+ load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
+
+ LLVMBuildStore(gallivm->builder, val, store_ptr);
+ tl = LLVMBuildLoad(gallivm->builder, load_ptr0, "");
+ trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, "");
}
- emit_data->output[0] = lp_build_gather_values(gallivm, result, 4);
+ tl = LLVMBuildBitCast(gallivm->builder, tl, ctx->f32, "");
+ trbl = LLVMBuildBitCast(gallivm->builder, trbl, ctx->f32, "");
+
+ emit_data->output[emit_data->chan] =
+ LLVMBuildFSub(gallivm->builder, trbl, tl, "");
}
/*
* this takes an I,J coordinate pair,
* and works out the X and Y derivatives.
* it returns DDX(I), DDX(J), DDY(I), DDY(J).
*/
static LLVMValueRef si_llvm_emit_ddxy_interp(
struct lp_build_tgsi_context *bld_base,
LLVMValueRef interp_ij)
--
2.7.4
More information about the mesa-dev
mailing list