[Beignet] [PATCH] backend: refine exp function with float input
rander.wang
rander.wang at intel.com
Tue Jun 6 08:57:25 UTC 2017
remove some corner cases check for these path can not be
reached.And refine branch code to select. These improvements
get 20% performance. and the performance of OCL_ExpFixture_Exp
in opencv can match up to other Gen driver
Signed-off-by: rander.wang <rander.wang at intel.com>
---
backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 61 ++++++++++++++++++++++++-
1 file changed, 59 insertions(+), 2 deletions(-)
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
index 6b942db..c131499 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
@@ -1418,6 +1418,63 @@ OVERLOADABLE float __gen_ocl_internal_exp(float x) {
}
}
+OVERLOADABLE float __gen_ocl_internal_simple_exp(float x) {
+ float o_threshold = 8.8721679688e+01, /* 0x42b17180 */
+ u_threshold = -1.0397208405e+02, /* 0xc2cff1b5 */
+ twom100 = 7.8886090522e-31, /* 2**-100=0x0d800000 */
+ ivln2 = 1.4426950216e+00, /* 0x3fb8aa3b =1/ln2 */
+ one = 1.0,
+ huge = 1.0e+30,
+ P1 = 1.6666667163e-01, /* 0x3e2aaaab */
+ P2 = -2.7777778450e-03; /* 0xbb360b61 */
+ float y,hi=0.0,lo=0.0,c,t;
+ int k=0,xsb;
+ unsigned hx;
+ float ln2HI_0 = 6.9313812256e-01; /* 0x3f317180 */
+ float ln2HI_1 = -6.9313812256e-01; /* 0xbf317180 */
+ float ln2LO_0 = 9.0580006145e-06; /* 0x3717f7d1 */
+ float ln2LO_1 = -9.0580006145e-06; /* 0xb717f7d1 */
+ float half_0 = 0.5;
+ float half_1 = -0.5;
+
+ GEN_OCL_GET_FLOAT_WORD(hx,x);
+ xsb = (hx>>31)&1; /* sign bit of x */
+ hx &= 0x7fffffff; /* high word of |x| */
+
+ /* filter out non-finite argument */
+ if(hx >= 0x42b17218)
+ { /* if |x|>=88.721... */
+ float retVal = -1.0f;
+ float tmp = (xsb==0)? x:0.0;
+ retVal = (hx>0x7f800000)? NAN:retVal;
+ retVal = (hx==0x7f800000)? tmp:retVal;
+ retVal = (x > o_threshold)? INFINITY:retVal; /* overflow */
+ retVal = (x < u_threshold)? 0.0:retVal; /* underflow */
+ if(retVal != -1.0f) return retVal;
+ }
+
+ /* argument reduction */
+ float tmp = xsb == 1 ? half_1 : half_0;
+ k = mad(ivln2, x, tmp);
+ t = k;
+ hi = mad(-t, ln2HI_0, x); /* t*ln2HI is exact here */
+ lo = t*ln2LO_0;
+ x = hi - lo;
+
+ /* x is now in primary range */
+ t = x*x;
+ c = mad(-t, mad(t, P2, P1), x);
+ y = one-((lo + (x*c)/(c-2.0f))-hi);
+
+ unsigned hy;
+ GEN_OCL_GET_FLOAT_WORD(hy,y);
+
+ float factor = (k >= -125)? 1.0f:twom100;
+ k = (k >= -125)? k:k+100;
+ GEN_OCL_SET_FLOAT_WORD(y,hy+(k<<23)); /* add k to y's exponent */
+ return y*factor;
+}
+
/* erf,erfc from glibc s_erff.c -- float version of s_erf.c.
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian at cygnus.com.
*/
@@ -3105,10 +3162,10 @@ OVERLOADABLE float exp(float x) {
return __gen_ocl_internal_fastpath_exp(x);
/* Use native instruction when it has enough precision */
- if (x > -0x1.6p1 && x < 0x1.6p1)
+ if (fabs(x) < 0x1.6p1)
return __gen_ocl_internal_fastpath_exp(x);
- return __gen_ocl_internal_exp(x);
+ return __gen_ocl_internal_simple_exp(x);
}
OVERLOADABLE float exp2(float x) {
--
2.7.4
More information about the Beignet
mailing list