[Mesa-dev] [PATCH 4/5] ac/llvm: use min+max instead of AMDGPU.clamp on LLVM 5.0

Marek Olšák maraeo at gmail.com
Thu Feb 16 22:00:43 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

It selects v_med3_f32, which has the same rate & size.
---
 src/amd/common/ac_llvm_build.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 7e8552b..cbc048c 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -759,20 +759,37 @@ ac_emit_sendmsg(struct ac_llvm_context *ctx,
 	LLVMValueRef args[2];
 	const char *intr_name = (HAVE_LLVM < 0x0400) ? "llvm.SI.sendmsg" : "llvm.amdgcn.s.sendmsg";
 	args[0] = LLVMConstInt(ctx->i32, msg, false);
 	args[1] = wave_id;
 	ac_emit_llvm_intrinsic(ctx, intr_name, ctx->voidt,
 			       args, 2, 0);
 }
 
 LLVMValueRef ac_emit_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
 {
+	if (HAVE_LLVM >= 0x0500) {
+		LLVMValueRef max[2] = {
+			value,
+			LLVMConstReal(ctx->f32, 0),
+		};
+		LLVMValueRef min[2] = {
+			LLVMConstReal(ctx->f32, 1),
+		};
+
+		min[1] = ac_emit_llvm_intrinsic(ctx, "llvm.maxnum.f32",
+						ctx->f32, max, 2,
+						AC_FUNC_ATTR_READNONE);
+		return ac_emit_llvm_intrinsic(ctx, "llvm.minnum.f32",
+					      ctx->f32, min, 2,
+					      AC_FUNC_ATTR_READNONE);
+	}
+
 	const char *intr = HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." :
 						 "llvm.AMDIL.clamp.";
 	LLVMValueRef args[3] = {
 		value,
 		LLVMConstReal(ctx->f32, 0),
 		LLVMConstReal(ctx->f32, 1),
 	};
 
 	return ac_emit_llvm_intrinsic(ctx, intr, ctx->f32, args, 3,
 				      AC_FUNC_ATTR_READNONE);
-- 
2.7.4



More information about the mesa-dev mailing list