[Beignet] [PATCH] [PATCH]GBE: improve precision of exp10
Lv Meng
meng.lv at intel.com
Sun Jan 12 16:54:02 PST 2014
Signed-off-by: Lv Meng <meng.lv at intel.com>
---
backend/src/ocl_stdlib.tmpl.h | 53 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 52 insertions(+), 1 deletion(-)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 07a4fd9..907e326 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -2381,6 +2381,57 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_atanh(float x) {
return __gen_ocl_internal_copysign(t, x);
}
+INLINE_OVERLOADABLE float __gen_ocl_internal_exp10(float x){
+ float px, qx,ans;
+ short n;
+ int i;
+ float*p;
+ float MAXL10 = 38.230809449325611792;
+ float LOG210 = 3.32192809488736234787e0;
+ float LG102A = 3.00781250000000000000E-1;
+ float LG102B = 2.48745663981195213739E-4;
+ float P[6];
+ P[0] = 2.063216740311022E-001;
+ P[1] = 5.420251702225484E-001;
+ P[2] = 1.171292686296281E+000;
+ P[3] = 2.034649854009453E+000;
+ P[4] = 2.650948748208892E+000;
+ P[5] = 2.302585167056758E+000;
+ if( isinf(x))
+ return INFINITY;
+
+ if( x < -MAXL10 )return 0.0;
+ /* The following is necessary because range reduction blows up: */
+ if( x == 0 )return 1.0;
+
+ /* Express 10**x = 10**g 2**n
+ * = 10**g 10**( n log10(2) )
+ * = 10**( g + n log10(2) )
+ */
+ px = x * LOG210;
+ qx = __gen_ocl_internal_floor( px + 0.5 );
+ n = qx;
+ x -= qx * LG102A;
+ x -= qx * LG102B;
+
+ /* rational approximation for exponential
+ * of the fractional part:
+ * 10**x - 1 = 2x P(x**2)/( Q(x**2) - P(x**2) )
+ */
+ p = P;
+ ans = *p++;
+ i = 5;
+ do{
+ ans = ans * x + *p++;
+ }
+ while( --i );
+ px = 1.0 + x * ans;
+
+ /* multiply by power of 2 */
+ x = __gen_ocl_internal_ldexp( px, n );
+ return x;
+}
+
// TODO use llvm intrinsics definitions
#define cos native_cos
#define cospi __gen_ocl_internal_cospi
@@ -2946,7 +2997,7 @@ DECL_HALF_ST_SPACE(__private)
#define log10 __gen_ocl_internal_log10
#define exp __gen_ocl_internal_exp
#define exp2 native_exp2
-#define exp10 native_exp10
+#define exp10 __gen_ocl_internal_exp10
#define expm1 __gen_ocl_internal_expm1
#define fmin __gen_ocl_internal_fmin
#define fmax __gen_ocl_internal_fmax
--
1.7.10.4
More information about the Beignet
mailing list