Mesa (master): radeonsi: implement TGSI_OPCODE_BFI (v2)

Mon Mar 16 13:58:27 UTC 2015

Module: Mesa
Branch: master
Commit: b5f19db9766ac54d78b8087b0433011f908ebd2c
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b5f19db9766ac54d78b8087b0433011f908ebd2c

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Sat Feb 28 14:31:45 2015 +0100

radeonsi: implement TGSI_OPCODE_BFI (v2)

v2: Don't use the intrinsics, the shader backend can recognize these
    patterns and generates optimal code automatically.

Reviewed-by: Tom Stellard <thomas.stellard at amd.com>

---

 docs/GL3.txt                                       |    2 +-
 .../drivers/radeon/radeon_setup_tgsi_llvm.c        |   34 ++++++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 267740a..b295149 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -102,7 +102,7 @@ GL 4.0, GLSL 4.00:
   - Dynamically uniform UBO array indices              DONE (r600)
   - Implicit signed -> unsigned conversions            DONE
   - Fused multiply-add                                 DONE ()
-  - Packing/bitfield/conversion functions              DONE (r600)
+  - Packing/bitfield/conversion functions              DONE (r600, radeonsi)
   - Enhanced textureGather                             DONE (r600, radeonsi)
   - Geometry shader instancing                         DONE (r600)
   - Geometry shader multiple streams                   DONE ()
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 0034b56..d89e2b4 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1234,6 +1234,39 @@ build_tgsi_intrinsic_nomem(
 	build_tgsi_intrinsic(action, bld_base, emit_data, LLVMReadNoneAttribute);
 }
 
+static void emit_bfi(const struct lp_build_tgsi_action * action,
+		     struct lp_build_tgsi_context * bld_base,
+		     struct lp_build_emit_data * emit_data)
+{
+	struct gallivm_state *gallivm = bld_base->base.gallivm;
+	LLVMBuilderRef builder = gallivm->builder;
+	LLVMValueRef bfi_args[3];
+
+	// Calculate the bitmask: (((1 << src3) - 1) << src2
+	bfi_args[0] = LLVMBuildShl(builder,
+				   LLVMBuildSub(builder,
+						LLVMBuildShl(builder,
+							     bld_base->int_bld.one,
+							     emit_data->args[3], ""),
+						bld_base->int_bld.one, ""),
+				   emit_data->args[2], "");
+
+	bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
+				   emit_data->args[2], "");
+
+	bfi_args[2] = emit_data->args[0];
+
+	/* Calculate:
+	 *   (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
+	 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
+	 */
+	emit_data->output[emit_data->chan] =
+		LLVMBuildXor(builder, bfi_args[2],
+			LLVMBuildAnd(builder, bfi_args[0],
+				LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
+					     ""), ""), "");
+}
+
 /* this is ffs in C */
 static void emit_lsb(const struct lp_build_tgsi_action * action,
 		     struct lp_build_tgsi_context * bld_base,
@@ -1381,6 +1414,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
 	bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "fabs";
 	bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
 	bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
+	bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
 	bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
 	bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
 	bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.AMDGPU.brev";