[Beignet] [PATCH] GBE: use native exp instruction when enough precision

Thu Jan 16 15:00:33 PST 2014

for the input data with enough precision, use the native exp instruction,
otherwise, use the software path to emulate the exp function.

Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp |    1 +
 backend/src/ir/instruction.hpp             |    2 ++
 backend/src/ir/instruction.hxx             |    1 +
 backend/src/llvm/llvm_gen_backend.cpp      |    2 ++
 backend/src/llvm/llvm_gen_ocl_function.hxx |    1 +
 backend/src/ocl_stdlib.tmpl.h              |   14 ++++++++++----
 6 files changed, 17 insertions(+), 4 deletions(-)
 mode change 100644 => 100755 backend/src/backend/gen_insn_selection.cpp
 mode change 100644 => 100755 backend/src/ir/instruction.hpp
 mode change 100644 => 100755 backend/src/ir/instruction.hxx
 mode change 100644 => 100755 backend/src/llvm/llvm_gen_backend.cpp
 mode change 100644 => 100755 backend/src/llvm/llvm_gen_ocl_function.hxx

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
old mode 100644
new mode 100755
index 445fd6d..fc9f305
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1572,6 +1572,7 @@ namespace gbe
         case ir::OP_COS: sel.MATH(dst, GEN_MATH_FUNCTION_COS, src); break;
         case ir::OP_SIN: sel.MATH(dst, GEN_MATH_FUNCTION_SIN, src); break;
         case ir::OP_LOG: sel.MATH(dst, GEN_MATH_FUNCTION_LOG, src); break;
+        case ir::OP_EXP: sel.MATH(dst, GEN_MATH_FUNCTION_EXP, src); break;
         case ir::OP_SQR: sel.MATH(dst, GEN_MATH_FUNCTION_SQRT, src); break;
         case ir::OP_RSQ: sel.MATH(dst, GEN_MATH_FUNCTION_RSQ, src); break;
         case ir::OP_RCP: sel.MATH(dst, GEN_MATH_FUNCTION_INV, src); break;
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
old mode 100644
new mode 100755
index 46577c7..e80badb
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -576,6 +576,8 @@ namespace ir {
   Instruction ABS(Type type, Register dst, Register src);
   /*! log.type dst src */
   Instruction LOG(Type type, Register dst, Register src);
+  /*! exp.type dst src */
+  Instruction EXP(Type type, Register dst, Register src);
   /*! sqr.type dst src */
   Instruction SQR(Type type, Register dst, Register src);
   /*! rsq.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
old mode 100644
new mode 100755
index baaaca2..bb5229a
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -29,6 +29,7 @@ DECL_INSN(MOV, UnaryInstruction)
 DECL_INSN(COS, UnaryInstruction)
 DECL_INSN(SIN, UnaryInstruction)
 DECL_INSN(LOG, UnaryInstruction)
+DECL_INSN(EXP, UnaryInstruction)
 DECL_INSN(SQR, UnaryInstruction)
 DECL_INSN(RSQ, UnaryInstruction)
 DECL_INSN(RCP, UnaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
old mode 100644
new mode 100755
index f178585..5780239
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2035,6 +2035,7 @@ namespace gbe
       case GEN_OCL_SQR:
       case GEN_OCL_RSQ:
       case GEN_OCL_LOG:
+      case GEN_OCL_EXP:
       case GEN_OCL_POW:
       case GEN_OCL_RCP:
       case GEN_OCL_ABS:
@@ -2313,6 +2314,7 @@ namespace gbe
           case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
           case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
           case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
+          case GEN_OCL_EXP: this->emitUnaryCallInst(I,CS,ir::OP_EXP); break;
           case GEN_OCL_SQR: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break;
           case GEN_OCL_RSQ: this->emitUnaryCallInst(I,CS,ir::OP_RSQ); break;
           case GEN_OCL_RCP: this->emitUnaryCallInst(I,CS,ir::OP_RCP); break;
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
old mode 100644
new mode 100755
index c9e634c..de2890c
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -25,6 +25,7 @@ DECL_LLVM_GEN_FUNCTION(SIN, __gen_ocl_sin)
 DECL_LLVM_GEN_FUNCTION(SQR, __gen_ocl_sqrt)
 DECL_LLVM_GEN_FUNCTION(RSQ, __gen_ocl_rsqrt)
 DECL_LLVM_GEN_FUNCTION(LOG, __gen_ocl_log)
+DECL_LLVM_GEN_FUNCTION(EXP, __gen_ocl_exp)
 DECL_LLVM_GEN_FUNCTION(POW, __gen_ocl_pow)
 DECL_LLVM_GEN_FUNCTION(RCP, __gen_ocl_rcp)
 DECL_LLVM_GEN_FUNCTION(RNDZ, __gen_ocl_rndz)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index bbd056f..5c6e2be 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -688,6 +688,7 @@ PURE CONST float __gen_ocl_cos(float x);
 PURE CONST float __gen_ocl_sqrt(float x);
 PURE CONST float __gen_ocl_rsqrt(float x);
 PURE CONST float __gen_ocl_log(float x);
+PURE CONST float __gen_ocl_exp(float x);
 PURE CONST float __gen_ocl_pow(float x, float y);
 PURE CONST float __gen_ocl_rcp(float x);
 PURE CONST float __gen_ocl_rndz(float x);
@@ -2247,7 +2248,7 @@ INLINE_OVERLOADABLE float native_tan(float x) {
 INLINE_OVERLOADABLE float __gen_ocl_internal_tanpi(float x) {
   return native_tan(x * M_PI_F);
 }
-INLINE_OVERLOADABLE float native_exp(float x) { return __gen_ocl_pow(M_E_F, x); }
+INLINE_OVERLOADABLE float native_exp(float x) { return __gen_ocl_exp(x); }
 INLINE_OVERLOADABLE float native_exp2(float x) { return __gen_ocl_pow(2, x); }
 INLINE_OVERLOADABLE float native_exp10(float x) { return __gen_ocl_pow(10, x); }
 INLINE_OVERLOADABLE float __gen_ocl_internal_cbrt(float x) {
@@ -2619,7 +2620,12 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_rint(float x) {
 }
 
 INLINE_OVERLOADABLE float __gen_ocl_internal_exp(float x) {
-  //return native_exp(x);
+  //use native instruction when it has enough precision
+  if (x > 128 || x < -128)
+  {
+    return native_exp(x);
+  }
+
   float o_threshold = 8.8721679688e+01,  /* 0x42b17180 */
   u_threshold = -1.0397208405e+02,  /* 0xc2cff1b5 */
   twom100 = 7.8886090522e-31, 	 /* 2**-100=0x0d800000 */
@@ -2890,7 +2896,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_acosh(float x) {
     return 0.0;			/* acosh(1) = 0 */
   } else if (hx > 0x40000000) {	/* 2**28 > x > 2 */
     t=x*x;
-    return __gen_ocl_internal_log((float)2.0*x-one/(x+__gen_ocl_sqrt(t-one)));			
+    return __gen_ocl_internal_log((float)2.0*x-one/(x+__gen_ocl_sqrt(t-one)));
   } else {			/* 1<x<2 */
     t = x-one;
     return log1p(t+__gen_ocl_sqrt((float)2.0*t+t*t));
@@ -3363,7 +3369,7 @@ INLINE_OVERLOADABLE float hypot(float x, float y) {
   cn = __gen_ocl_sqrt (an * an + bn * bn);
   return ldexp (cn, e);
   }else{
-    if (isinf (x) || isinf (y))  /* x or y is infinite.  Return +Infinity.  */    
+    if (isinf (x) || isinf (y))  /* x or y is infinite.  Return +Infinity.  */
       return INFINITY;
     else        /* x or y is NaN.  Return NaN.  */
       return x + y;
-- 
1.7.9.5