[Beignet] [PATCH v2 1/4] replace sin/cos with llvm intrinsic.
xionghu.luo at intel.com
xionghu.luo at intel.com
Thu Jan 29 22:24:31 PST 2015
From: Luo Xionghu <xionghu.luo at intel.com>
replace sin/cos non-stardard math intrinsic call with llvm intrinsic.
translate them to llvm.xxx for fast path, refine the calls to call
emitUnaryCallInst.
v2: some file changes like ocl_math.tmpl.cl and hxx missed.
Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
backend/src/libocl/tmpl/ocl_math.tmpl.cl | 4 +-
backend/src/llvm/llvm_gen_backend.cpp | 59 ++++++++----------------------
backend/src/llvm/llvm_gen_ocl_function.hxx | 2 -
3 files changed, 18 insertions(+), 47 deletions(-)
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index 49c4efa..8f726ff 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -24,8 +24,8 @@
constant int __ocl_math_fastpath_flag = 1;
PURE CONST float __gen_ocl_fabs(float x);
-PURE CONST float __gen_ocl_sin(float x);
-PURE CONST float __gen_ocl_cos(float x);
+CONST float __gen_ocl_sin(float x) __asm("llvm.sin" ".f32");
+CONST float __gen_ocl_cos(float x) __asm("llvm.cos" ".f32");
PURE CONST float __gen_ocl_sqrt(float x);
PURE CONST float __gen_ocl_rsqrt(float x);
PURE CONST float __gen_ocl_log(float x);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index d10d5c0..34c571e 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2794,13 +2794,13 @@ error:
case Intrinsic::bswap:
this->newRegister(&I);
break;
+ case Intrinsic::fabs:
case Intrinsic::sqrt:
case Intrinsic::ceil:
case Intrinsic::fma:
case Intrinsic::trunc:
- this->newRegister(&I);
- break;
- case Intrinsic::fabs:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
this->newRegister(&I);
break;
default:
@@ -2854,8 +2854,6 @@ error:
case GEN_OCL_FBH:
case GEN_OCL_FBL:
case GEN_OCL_CBIT:
- case GEN_OCL_COS:
- case GEN_OCL_SIN:
case GEN_OCL_SQR:
case GEN_OCL_RSQ:
case GEN_OCL_LOG:
@@ -3056,6 +3054,13 @@ error:
if (Function *F = I.getCalledFunction()) {
if (F->getIntrinsicID() != 0) {
const ir::Function &fn = ctx.getFunction();
+
+ // Get the function arguments
+ CallSite CS(&I);
+ CallSite::arg_iterator AI = CS.arg_begin();
+#if GBE_DEBUG
+ CallSite::arg_iterator AE = CS.arg_end();
+#endif /* GBE_DEBUG */
switch (F->getIntrinsicID()) {
case Intrinsic::stacksave:
{
@@ -3212,29 +3217,6 @@ error:
}
}
break;
- case Intrinsic::sqrt:
- {
- const ir::Register dst = this->getRegister(&I);
- const ir::Register src = this->getRegister(I.getOperand(0));
- ctx.ALU1(ir::OP_SQR, ir::TYPE_FLOAT, dst, src);
- }
- break;
- case Intrinsic::fabs:
- {
- ir::Type srcType = getType(ctx, I.getType());
- const ir::Register dst = this->getRegister(&I);
- const ir::Register src = this->getRegister(I.getOperand(0));
- ctx.ALU1(ir::OP_ABS, srcType, dst, src);
- }
- break;
- case Intrinsic::ceil:
- {
- ir::Type srcType = getType(ctx, I.getType());
- const ir::Register dst = this->getRegister(&I);
- const ir::Register src = this->getRegister(I.getOperand(0));
- ctx.ALU1(ir::OP_RNDU, srcType, dst, src);
- }
- break;
case Intrinsic::ctlz:
{
Type *llvmDstType = I.getType();
@@ -3286,19 +3268,12 @@ error:
ctx.MAD(srcType, dst, src0, src1, src2);
}
break;
- case Intrinsic::trunc:
- {
- Type *llvmDstType = I.getType();
- Type *llvmSrcType = I.getOperand(0)->getType();
- ir::Type dstType = getType(ctx, llvmDstType);
- ir::Type srcType = getType(ctx, llvmSrcType);
- GBE_ASSERT(srcType == dstType);
-
- const ir::Register dst = this->getRegister(&I);
- const ir::Register src = this->getRegister(I.getOperand(0));
- ctx.RNDZ(dstType, dst, src);
- }
- break;
+ case Intrinsic::sqrt: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break;
+ case Intrinsic::ceil: this->emitUnaryCallInst(I,CS,ir::OP_RNDU); break;
+ case Intrinsic::fabs: this->emitUnaryCallInst(I,CS,ir::OP_ABS); break;
+ case Intrinsic::trunc: this->emitUnaryCallInst(I,CS,ir::OP_RNDZ); break;
+ case Intrinsic::sin: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
+ case Intrinsic::cos: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
default: NOT_IMPLEMENTED;
}
} else {
@@ -3367,8 +3342,6 @@ error:
ctx.REGION(dst, src, x.getIntegerValue());
break;
}
- case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
- case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
case GEN_OCL_EXP: this->emitUnaryCallInst(I,CS,ir::OP_EXP); break;
case GEN_OCL_SQR: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break;
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 8ec8336..0ae7ec2 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -20,8 +20,6 @@ DECL_LLVM_GEN_FUNCTION(GET_WORK_DIM, __gen_ocl_get_work_dim)
// Math function
DECL_LLVM_GEN_FUNCTION(FABS, __gen_ocl_fabs)
-DECL_LLVM_GEN_FUNCTION(COS, __gen_ocl_cos)
-DECL_LLVM_GEN_FUNCTION(SIN, __gen_ocl_sin)
DECL_LLVM_GEN_FUNCTION(SQR, __gen_ocl_sqrt)
DECL_LLVM_GEN_FUNCTION(RSQ, __gen_ocl_rsqrt)
DECL_LLVM_GEN_FUNCTION(LOG, __gen_ocl_log)
--
1.9.1
More information about the Beignet
mailing list