[Beignet] [PATCH 2/2] GBE: use ISA mad for mad() builtin function.
Ruiling Song
ruiling.song at intel.com
Tue Nov 5 00:37:13 PST 2013
directly map mad() to ISA mad. so mad will have better performance and
less precision loss.
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 5 +++++
backend/src/ir/context.hpp | 1 +
backend/src/ir/instruction.cpp | 3 +++
backend/src/ir/instruction.hpp | 2 ++
backend/src/ir/instruction.hxx | 1 +
backend/src/llvm/llvm_gen_backend.cpp | 9 +++++++++
backend/src/llvm/llvm_gen_ocl_function.hxx | 1 +
backend/src/ocl_stdlib.tmpl.h | 3 ++-
8 files changed, 24 insertions(+), 1 deletion(-)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 1f25f0e..6430ee8 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2707,6 +2707,11 @@ namespace gbe
sel.I64MADSAT(dst, src0, src1, src2, tmp);
break;
}
+ case OP_MAD:
+ {
+ sel.MAD(dst, src2, src0, src1);
+ break;
+ }
default:
NOT_IMPLEMENTED;
}
diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp
index 7bb6e5a..242beaa 100644
--- a/backend/src/ir/context.hpp
+++ b/backend/src/ir/context.hpp
@@ -149,6 +149,7 @@ namespace ir {
}
DECL_THREE_SRC_INSN(SEL);
DECL_THREE_SRC_INSN(I64MADSAT);
+ DECL_THREE_SRC_INSN(MAD);
#undef DECL_THREE_SRC_INSN
/*! For all unary functions */
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 9b3e699..d86c3c0 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1449,6 +1449,9 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
return internal::TernaryInstruction(OP_I64MADSAT, type, dst, src).convert();
}
+ Instruction MAD(Type type, Register dst, Tuple src) {
+ return internal::TernaryInstruction(OP_MAD, type, dst, src).convert();
+ }
// All compare functions
#define DECL_EMIT_FUNCTION(NAME) \
Instruction NAME(Type type, Register dst, Register src0, Register src1) { \
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 90c819b..ae45a63 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -539,6 +539,8 @@ namespace ir {
Instruction I64_MUL_HI(Type type, Register dst, Register src0, Register src1);
/*! i64madsat.type dst src */
Instruction I64MADSAT(Type type, Register dst, Tuple src);
+ /*! mad.type dst src */
+ Instruction MAD(Type type, Register dst, Tuple src);
/*! upsample_short.type dst src */
Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0, Register src1);
/*! upsample_int.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index cd60349..67dc682 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -86,3 +86,4 @@ DECL_INSN(UPSAMPLE_SHORT, BinaryInstruction)
DECL_INSN(UPSAMPLE_INT, BinaryInstruction)
DECL_INSN(UPSAMPLE_LONG, BinaryInstruction)
DECL_INSN(I64MADSAT, TernaryInstruction)
+DECL_INSN(MAD, TernaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index b824bf9..aae52d8 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2016,6 +2016,7 @@ namespace gbe
case GEN_OCL_UPSAMPLE_SHORT:
case GEN_OCL_UPSAMPLE_INT:
case GEN_OCL_UPSAMPLE_LONG:
+ case GEN_OCL_MAD:
case GEN_OCL_SADD_SAT_CHAR:
case GEN_OCL_SADD_SAT_SHORT:
case GEN_OCL_SADD_SAT_INT:
@@ -2533,6 +2534,14 @@ namespace gbe
ctx.I64MADSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1, src2);
break;
}
+ case GEN_OCL_MAD: {
+ GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src2 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ ctx.MAD(getType(ctx, I.getType()), dst, src0, src1, src2);
+ break;
+ }
case GEN_OCL_HADD: {
GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 3f44be8..71034ab 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -31,6 +31,7 @@ DECL_LLVM_GEN_FUNCTION(RNDZ, __gen_ocl_rndz)
DECL_LLVM_GEN_FUNCTION(RNDE, __gen_ocl_rnde)
DECL_LLVM_GEN_FUNCTION(RNDU, __gen_ocl_rndu)
DECL_LLVM_GEN_FUNCTION(RNDD, __gen_ocl_rndd)
+DECL_LLVM_GEN_FUNCTION(MAD, __gen_ocl_mad)
// Barrier function
DECL_LLVM_GEN_FUNCTION(LBARRIER, __gen_ocl_barrier_local)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index a1f365c..09c36d5 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -1558,8 +1558,9 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_rint(float x) {
#define erf __gen_ocl_internal_erf
#define erfc __gen_ocl_internal_erfc
+PURE CONST float __gen_ocl_mad(float a, float b, float c);
INLINE_OVERLOADABLE float mad(float a, float b, float c) {
- return a*b+c;
+ return __gen_ocl_mad(a, b, c);
}
#define DEF(TYPE1, TYPE2) \
--
1.7.9.5
More information about the Beignet
mailing list