[Beignet] [PATCH_V2] [PATCH]GBE: improve precision of exp
Song, Ruiling
ruiling.song at intel.com
Tue Dec 24 18:09:36 PST 2013
Please rebase your change to latest origin/master. add some commit message if needed.
-----Original Message-----
From: beignet-bounces at lists.freedesktop.org [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Meng at freedesktop.org
Sent: Thursday, December 19, 2013 4:14 AM
To: beignet at lists.freedesktop.org
Cc: Lv, Meng
Subject: [Beignet] [PATCH_V2] [PATCH]GBE: improve precision of exp
From: Lv Meng <meng.lv at intel.com>
Signed-off-by: Lv Meng <meng.lv at intel.com>
---
backend/src/ocl_stdlib.tmpl.h | 101 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 100 insertions(+), 1 deletion(-) mode change 100644 => 100755 backend/src/ocl_stdlib.tmpl.h
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h old mode 100644 new mode 100755 index e5f356e..904904b
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -148,6 +148,31 @@ INLINE_OVERLOADABLE float __ocl_nanf(void) {
u.u = 0x7F800001;
return u.f;
}
+typedef union
+{
+ float value;
+ uint word;
+} float_shape_type;
+
+/* Get a 32 bit int from a float. */
+#ifndef GEN_OCL_GET_FLOAT_WORD
+# define GEN_OCL_GET_FLOAT_WORD(i,d) \
+do { \
+ float_shape_type gf_u; \
+ gf_u.value = (d); \
+ (i) = gf_u.word; \
+} while (0)
+#endif
+/* Set a float from a 32 bit int. */
+#ifndef GEN_OCL_SET_FLOAT_WORD
+# define GEN_OCL_SET_FLOAT_WORD(d,i) \
+do { \
+ float_shape_type sf_u; \
+ sf_u.word = (i); \
+ (d) = sf_u.value; \
+} while (0)
+#endif
+
#define HUGE_VALF (__ocl_inff())
#define INFINITY (__ocl_inff())
#define NAN (__ocl_nanf())
@@ -1851,13 +1876,87 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_round(float x) { } INLINE_OVERLOADABLE float __gen_ocl_internal_floor(float x) { return __gen_ocl_rndd(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_ceil(float x) { return __gen_ocl_rndu(x); }
-INLINE_OVERLOADABLE float __gen_ocl_internal_exp(float x) { return native_exp(x); }
INLINE_OVERLOADABLE float powr(float x, float y) { return __gen_ocl_pow(x,y); } INLINE_OVERLOADABLE float fmod(float x, float y) { return x-y*__gen_ocl_rndz(x/y); } INLINE_OVERLOADABLE float remainder(float x, float y) { return x-y*__gen_ocl_rnde(x/y); } INLINE_OVERLOADABLE float __gen_ocl_internal_rint(float x) {
return __gen_ocl_rnde(x);
}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_exp(float x) {
+ //return native_exp(x);
+ float o_threshold = 8.8721679688e+01, /* 0x42b17180 */
+ u_threshold = -1.0397208405e+02, /* 0xc2cff1b5 */
+ twom100 = 7.8886090522e-31, /* 2**-100=0x0d800000 */
+ ivln2 = 1.4426950216e+00, /* 0x3fb8aa3b =1/ln2 */
+ one = 1.0,
+ huge = 1.0e+30,
+ P1 = 1.6666667163e-01, /* 0x3e2aaaab */
+ P2 = -2.7777778450e-03, /* 0xbb360b61 */
+ P3 = 6.6137559770e-05, /* 0x388ab355 */
+ P4 = -1.6533901999e-06, /* 0xb5ddea0e */
+ P5 = 4.1381369442e-08; /* 0x3331bb4c */
+ float ln2HI[2],ln2LO[2],halF[2];
+ float y,hi=0.0,lo=0.0,c,t;
+ int k=0,xsb;
+ unsigned hx;
+ ln2HI[0] = 6.9313812256e-01; /* 0x3f317180 */
+ ln2HI[1] = -6.9313812256e-01; /* 0xbf317180 */
+ ln2LO[0] = 9.0580006145e-06; /* 0x3717f7d1 */
+ ln2LO[1] = -9.0580006145e-06; /* 0xb717f7d1 */
+ halF[0] = 0.5;
+ halF[1] = -0.5;
+
+ GEN_OCL_GET_FLOAT_WORD(hx,x);
+ xsb = (hx>>31)&1; /* sign bit of x */
+ hx &= 0x7fffffff; /* high word of |x| */
+
+ /* filter out non-finite argument */
+ if(hx >= 0x42b17218) { /* if |x|>=88.721... */
+ if(hx>0x7f800000)
+ return x+x; /* NaN */
+ if(hx==0x7f800000)
+ return (xsb==0)? x:0.0; /* exp(+-inf)={inf,0} */
+ if(x > o_threshold) return huge*huge; /* overflow */
+ if(x < u_threshold) return twom100*twom100; /* underflow */ }
+ /* argument reduction */
+ if(hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
+ if(hx < 0x3F851592) { /* and |x| < 1.5 ln2 */
+ hi = x-ln2HI[xsb]; lo=ln2LO[xsb]; k = 1-xsb-xsb;
+ } else {
+ k = ivln2*x+halF[xsb];
+ t = k;
+ hi = x - t*ln2HI[0]; /* t*ln2HI is exact here */
+ lo = t*ln2LO[0];
+ }
+ x = hi - lo;
+ }
+ else if(hx < 0x31800000) { /* when |x|<2**-28 */
+ if(huge+x>one) return one+x;/* trigger inexact */ } else k = 0;
+
+ /* x is now in primary range */
+ t = x*x;
+ c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
+ if(k==0)
+ return one-((x*c)/(c-(float)2.0)-x);
+ else
+ y = one-((lo-(x*c)/((float)2.0-c))-hi);
+ if(k >= -125) {
+ unsigned hy;
+ GEN_OCL_GET_FLOAT_WORD(hy,y);
+ GEN_OCL_SET_FLOAT_WORD(y,hy+(k<<23)); /* add k to y's exponent */
+ return y;
+ } else {
+ unsigned hy;
+ GEN_OCL_GET_FLOAT_WORD(hy,y);
+ GEN_OCL_SET_FLOAT_WORD(y,hy+((k+100)<<23)); /* add k to y's exponent */
+ return y*twom100;
+ }
+}
+
// TODO use llvm intrinsics definitions #define cos native_cos #define cospi __gen_ocl_internal_cospi
--
1.7.10.4
_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list