[Beignet] [PATCH] GBE: use native exp instruction when enough precision

Sun Jan 19 23:48:27 PST 2014

Some comments:
1. Don't change the file mode from 644 to 755.
2. Don't touch the line of code that you do not really modify.
Other part of the patch looks good to me. But you need to rebase it to latest master.

Thanks!
Ruiling
-----Original Message-----
From: beignet-bounces at lists.freedesktop.org [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Guo Yejun
Sent: Friday, January 17, 2014 7:01 AM
To: beignet at lists.freedesktop.org; Guo, Yejun
Subject: [Beignet] [PATCH] GBE: use native exp instruction when enough precision

for the input data with enough precision, use the native exp instruction, otherwise, use the software path to emulate the exp function.

Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp |    1 +
 backend/src/ir/instruction.hpp             |    2 ++
 backend/src/ir/instruction.hxx             |    1 +
 backend/src/llvm/llvm_gen_backend.cpp      |    2 ++
 backend/src/llvm/llvm_gen_ocl_function.hxx |    1 +
 backend/src/ocl_stdlib.tmpl.h              |   14 ++++++++++----
 6 files changed, 17 insertions(+), 4 deletions(-)  mode change 100644 => 100755 backend/src/backend/gen_insn_selection.cpp
 mode change 100644 => 100755 backend/src/ir/instruction.hpp  mode change 100644 => 100755 backend/src/ir/instruction.hxx  mode change 100644 => 100755 backend/src/llvm/llvm_gen_backend.cpp
 mode change 100644 => 100755 backend/src/llvm/llvm_gen_ocl_function.hxx

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
old mode 100644
new mode 100755
index 445fd6d..fc9f305
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1572,6 +1572,7 @@ namespace gbe
         case ir::OP_COS: sel.MATH(dst, GEN_MATH_FUNCTION_COS, src); break;
         case ir::OP_SIN: sel.MATH(dst, GEN_MATH_FUNCTION_SIN, src); break;
         case ir::OP_LOG: sel.MATH(dst, GEN_MATH_FUNCTION_LOG, src); break;
+        case ir::OP_EXP: sel.MATH(dst, GEN_MATH_FUNCTION_EXP, src); 
+ break;
         case ir::OP_SQR: sel.MATH(dst, GEN_MATH_FUNCTION_SQRT, src); break;
         case ir::OP_RSQ: sel.MATH(dst, GEN_MATH_FUNCTION_RSQ, src); break;
         case ir::OP_RCP: sel.MATH(dst, GEN_MATH_FUNCTION_INV, src); break; diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp old mode 100644 new mode 100755 index 46577c7..e80badb
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -576,6 +576,8 @@ namespace ir {
   Instruction ABS(Type type, Register dst, Register src);
   /*! log.type dst src */
   Instruction LOG(Type type, Register dst, Register src);
+  /*! exp.type dst src */
+  Instruction EXP(Type type, Register dst, Register src);
   /*! sqr.type dst src */
   Instruction SQR(Type type, Register dst, Register src);
   /*! rsq.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx old mode 100644 new mode 100755 index baaaca2..bb5229a
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -29,6 +29,7 @@ DECL_INSN(MOV, UnaryInstruction)  DECL_INSN(COS, UnaryInstruction)  DECL_INSN(SIN, UnaryInstruction)  DECL_INSN(LOG, UnaryInstruction)
+DECL_INSN(EXP, UnaryInstruction)
 DECL_INSN(SQR, UnaryInstruction)
 DECL_INSN(RSQ, UnaryInstruction)
 DECL_INSN(RCP, UnaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
old mode 100644
new mode 100755
index f178585..5780239
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2035,6 +2035,7 @@ namespace gbe
       case GEN_OCL_SQR:
       case GEN_OCL_RSQ:
       case GEN_OCL_LOG:
+      case GEN_OCL_EXP:
       case GEN_OCL_POW:
       case GEN_OCL_RCP:
       case GEN_OCL_ABS:
@@ -2313,6 +2314,7 @@ namespace gbe
           case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
           case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
           case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
+          case GEN_OCL_EXP: this->emitUnaryCallInst(I,CS,ir::OP_EXP); 
+ break;
           case GEN_OCL_SQR: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break;
           case GEN_OCL_RSQ: this->emitUnaryCallInst(I,CS,ir::OP_RSQ); break;
           case GEN_OCL_RCP: this->emitUnaryCallInst(I,CS,ir::OP_RCP); break; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
old mode 100644
new mode 100755
index c9e634c..de2890c
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -25,6 +25,7 @@ DECL_LLVM_GEN_FUNCTION(SIN, __gen_ocl_sin)  DECL_LLVM_GEN_FUNCTION(SQR, __gen_ocl_sqrt)  DECL_LLVM_GEN_FUNCTION(RSQ, __gen_ocl_rsqrt)  DECL_LLVM_GEN_FUNCTION(LOG, __gen_ocl_log)
+DECL_LLVM_GEN_FUNCTION(EXP, __gen_ocl_exp)
 DECL_LLVM_GEN_FUNCTION(POW, __gen_ocl_pow)  DECL_LLVM_GEN_FUNCTION(RCP, __gen_ocl_rcp)  DECL_LLVM_GEN_FUNCTION(RNDZ, __gen_ocl_rndz) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index bbd056f..5c6e2be 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -688,6 +688,7 @@ PURE CONST float __gen_ocl_cos(float x);  PURE CONST float __gen_ocl_sqrt(float x);  PURE CONST float __gen_ocl_rsqrt(float x);  PURE CONST float __gen_ocl_log(float x);
+PURE CONST float __gen_ocl_exp(float x);
 PURE CONST float __gen_ocl_pow(float x, float y);  PURE CONST float __gen_ocl_rcp(float x);  PURE CONST float __gen_ocl_rndz(float x); @@ -2247,7 +2248,7 @@ INLINE_OVERLOADABLE float native_tan(float x) {  INLINE_OVERLOADABLE float __gen_ocl_internal_tanpi(float x) {
   return native_tan(x * M_PI_F);
 }
-INLINE_OVERLOADABLE float native_exp(float x) { return __gen_ocl_pow(M_E_F, x); }
+INLINE_OVERLOADABLE float native_exp(float x) { return 
+__gen_ocl_exp(x); }
 INLINE_OVERLOADABLE float native_exp2(float x) { return __gen_ocl_pow(2, x); }  INLINE_OVERLOADABLE float native_exp10(float x) { return __gen_ocl_pow(10, x); }  INLINE_OVERLOADABLE float __gen_ocl_internal_cbrt(float x) { @@ -2619,7 +2620,12 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_rint(float x) {  }
 
 INLINE_OVERLOADABLE float __gen_ocl_internal_exp(float x) {
-  //return native_exp(x);
+  //use native instruction when it has enough precision  if (x > 128 || 
+ x < -128)  {
+    return native_exp(x);
+  }
+
   float o_threshold = 8.8721679688e+01,  /* 0x42b17180 */
   u_threshold = -1.0397208405e+02,  /* 0xc2cff1b5 */
   twom100 = 7.8886090522e-31, 	 /* 2**-100=0x0d800000 */
@@ -2890,7 +2896,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_acosh(float x) {
     return 0.0;			/* acosh(1) = 0 */
   } else if (hx > 0x40000000) {	/* 2**28 > x > 2 */
     t=x*x;
-    return __gen_ocl_internal_log((float)2.0*x-one/(x+__gen_ocl_sqrt(t-one)));			
+    return 
+ __gen_ocl_internal_log((float)2.0*x-one/(x+__gen_ocl_sqrt(t-one)));
   } else {			/* 1<x<2 */
     t = x-one;
     return log1p(t+__gen_ocl_sqrt((float)2.0*t+t*t));
@@ -3363,7 +3369,7 @@ INLINE_OVERLOADABLE float hypot(float x, float y) {
   cn = __gen_ocl_sqrt (an * an + bn * bn);
   return ldexp (cn, e);
   }else{
-    if (isinf (x) || isinf (y))  /* x or y is infinite.  Return +Infinity.  */    
+    if (isinf (x) || isinf (y))  /* x or y is infinite.  Return 
+ +Infinity.  */
       return INFINITY;
     else        /* x or y is NaN.  Return NaN.  */
       return x + y;
--
1.7.9.5

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet