[Mesa-dev] [PATCH 1/2] radeonsi: emit 1/sqrt for RSQ

Marek Olšák maraeo at gmail.com
Wed Jun 22 13:13:27 UTC 2016


From: Marek Olšák <marek.olsak at amd.com>

We don't need the clamped version and we don't have to use any intrinsic.

Stats on Tonga:

15382 shaders in 9128 tests
Totals:
SGPRS: 1230560 -> 1230560 (0.00 %)
VGPRS: 469577 -> 462504 (-1.51 %)
Code Size: 22089908 -> 21730052 (-1.63 %) bytes
LDS: 598 -> 598 (0.00 %) blocks
Scratch: 283648 -> 281600 (-0.72 %) bytes per wave
Max Waves: 125664 -> 126969 (1.04 %)
Wait states: 0 -> 0 (0.00 %)

Totals from affected shaders:
SGPRS: 547280 -> 547280 (0.00 %)
VGPRS: 269132 -> 262059 (-2.63 %)
Code Size: 15709604 -> 15349748 (-2.29 %) bytes
LDS: 198 -> 198 (0.00 %) blocks
Scratch: 74752 -> 72704 (-2.74 %) bytes per wave
Max Waves: 47840 -> 49145 (2.73 %)
Wait states: 0 -> 0 (0.00 %)
---
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 8084a20..d395208 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1523,6 +1523,21 @@ static void emit_up2h(const struct lp_build_tgsi_action *action,
 	}
 }
 
+/* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
+ * the target machine. f64 needs global unsafe math flags to get rsq. */
+static void emit_rsq(const struct lp_build_tgsi_action *action,
+		     struct lp_build_tgsi_context *bld_base,
+		     struct lp_build_emit_data *emit_data)
+{
+	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+	LLVMValueRef sqrt =
+		lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
+					 emit_data->args[0]);
+
+	emit_data->output[emit_data->chan] =
+		LLVMBuildFDiv(builder, bld_base->base.one, sqrt, "");
+}
+
 void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *triple)
 {
 	struct lp_type type;
@@ -1661,8 +1676,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *trip
 	bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
 	bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
 	bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
-	bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name = "llvm.AMDGPU.rsq.clamped.f32";
-	bld_base->op_actions[TGSI_OPCODE_RSQ].emit = build_tgsi_intrinsic_nomem;
+	bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq;
 	bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
 	bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
 	bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
-- 
2.7.4



More information about the mesa-dev mailing list