[Beignet] [PATCH v2 1/4] replace sin/cos with llvm intrinsic.

xionghu.luo at intel.com xionghu.luo at intel.com
Thu Jan 29 22:24:31 PST 2015


From: Luo Xionghu <xionghu.luo at intel.com>

replace sin/cos non-stardard math intrinsic call with llvm intrinsic.
translate them to llvm.xxx for fast path, refine the calls to call
emitUnaryCallInst.
v2: some file changes like ocl_math.tmpl.cl and hxx missed.

Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
 backend/src/libocl/tmpl/ocl_math.tmpl.cl   |  4 +-
 backend/src/llvm/llvm_gen_backend.cpp      | 59 ++++++++----------------------
 backend/src/llvm/llvm_gen_ocl_function.hxx |  2 -
 3 files changed, 18 insertions(+), 47 deletions(-)

diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index 49c4efa..8f726ff 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -24,8 +24,8 @@
 constant int __ocl_math_fastpath_flag = 1;
 
 PURE CONST float __gen_ocl_fabs(float x);
-PURE CONST float __gen_ocl_sin(float x);
-PURE CONST float __gen_ocl_cos(float x);
+CONST float __gen_ocl_sin(float x) __asm("llvm.sin" ".f32");
+CONST float __gen_ocl_cos(float x) __asm("llvm.cos" ".f32");
 PURE CONST float __gen_ocl_sqrt(float x);
 PURE CONST float __gen_ocl_rsqrt(float x);
 PURE CONST float __gen_ocl_log(float x);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index d10d5c0..34c571e 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2794,13 +2794,13 @@ error:
           case Intrinsic::bswap:
             this->newRegister(&I);
           break;
+          case Intrinsic::fabs:
           case Intrinsic::sqrt:
           case Intrinsic::ceil:
           case Intrinsic::fma:
           case Intrinsic::trunc:
-            this->newRegister(&I);
-          break;
-          case Intrinsic::fabs:
+          case Intrinsic::sin:
+          case Intrinsic::cos:
             this->newRegister(&I);
           break;
           default:
@@ -2854,8 +2854,6 @@ error:
       case GEN_OCL_FBH:
       case GEN_OCL_FBL:
       case GEN_OCL_CBIT:
-      case GEN_OCL_COS:
-      case GEN_OCL_SIN:
       case GEN_OCL_SQR:
       case GEN_OCL_RSQ:
       case GEN_OCL_LOG:
@@ -3056,6 +3054,13 @@ error:
     if (Function *F = I.getCalledFunction()) {
       if (F->getIntrinsicID() != 0) {
         const ir::Function &fn = ctx.getFunction();
+
+        // Get the function arguments
+        CallSite CS(&I);
+        CallSite::arg_iterator AI = CS.arg_begin();
+#if GBE_DEBUG
+        CallSite::arg_iterator AE = CS.arg_end();
+#endif /* GBE_DEBUG */
         switch (F->getIntrinsicID()) {
           case Intrinsic::stacksave:
           {
@@ -3212,29 +3217,6 @@ error:
             }
           }
           break;
-          case Intrinsic::sqrt:
-          {
-            const ir::Register dst = this->getRegister(&I);
-            const ir::Register src = this->getRegister(I.getOperand(0));
-            ctx.ALU1(ir::OP_SQR, ir::TYPE_FLOAT, dst, src);
-          }
-          break;
-          case Intrinsic::fabs:
-          {
-            ir::Type srcType = getType(ctx, I.getType());
-            const ir::Register dst = this->getRegister(&I);
-            const ir::Register src = this->getRegister(I.getOperand(0));
-            ctx.ALU1(ir::OP_ABS, srcType, dst, src);
-          }
-          break;
-          case Intrinsic::ceil:
-          {
-            ir::Type srcType = getType(ctx, I.getType());
-            const ir::Register dst = this->getRegister(&I);
-            const ir::Register src = this->getRegister(I.getOperand(0));
-            ctx.ALU1(ir::OP_RNDU, srcType, dst, src);
-          }
-          break;
           case Intrinsic::ctlz:
           {
             Type *llvmDstType = I.getType();
@@ -3286,19 +3268,12 @@ error:
             ctx.MAD(srcType, dst, src0, src1, src2);
           }
           break;
-          case Intrinsic::trunc:
-          {
-            Type *llvmDstType = I.getType();
-            Type *llvmSrcType = I.getOperand(0)->getType();
-            ir::Type dstType = getType(ctx, llvmDstType);
-            ir::Type srcType = getType(ctx, llvmSrcType);
-            GBE_ASSERT(srcType == dstType);
-
-            const ir::Register dst = this->getRegister(&I);
-            const ir::Register src = this->getRegister(I.getOperand(0));
-            ctx.RNDZ(dstType, dst, src);
-          }
-          break;
+          case Intrinsic::sqrt: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break;
+          case Intrinsic::ceil: this->emitUnaryCallInst(I,CS,ir::OP_RNDU); break;
+          case Intrinsic::fabs: this->emitUnaryCallInst(I,CS,ir::OP_ABS); break;
+          case Intrinsic::trunc: this->emitUnaryCallInst(I,CS,ir::OP_RNDZ); break;
+          case Intrinsic::sin: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
+          case Intrinsic::cos: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
           default: NOT_IMPLEMENTED;
         }
       } else {
@@ -3367,8 +3342,6 @@ error:
             ctx.REGION(dst, src, x.getIntegerValue());
             break;
           }
-          case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
-          case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
           case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
           case GEN_OCL_EXP: this->emitUnaryCallInst(I,CS,ir::OP_EXP); break;
           case GEN_OCL_SQR: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break;
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 8ec8336..0ae7ec2 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -20,8 +20,6 @@ DECL_LLVM_GEN_FUNCTION(GET_WORK_DIM, __gen_ocl_get_work_dim)
 
 // Math function
 DECL_LLVM_GEN_FUNCTION(FABS, __gen_ocl_fabs)
-DECL_LLVM_GEN_FUNCTION(COS, __gen_ocl_cos)
-DECL_LLVM_GEN_FUNCTION(SIN, __gen_ocl_sin)
 DECL_LLVM_GEN_FUNCTION(SQR, __gen_ocl_sqrt)
 DECL_LLVM_GEN_FUNCTION(RSQ, __gen_ocl_rsqrt)
 DECL_LLVM_GEN_FUNCTION(LOG, __gen_ocl_log)
-- 
1.9.1



More information about the Beignet mailing list