[Beignet] [PATCH] backend: add double version of acos

Thu Mar 30 05:49:02 UTC 2017

copy it from fdlibm and pass the conformance tests after refined
	the copyright has been declared at the beginning of the file

Signed-off-by: rander <rander.wang at intel.com>
---
 backend/src/libocl/include/ocl_float.h          |   3 +-
 backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 157 ++++++++++++++++++++++++
 backend/src/libocl/tmpl/ocl_math_common.tmpl.h  |   2 +
 3 files changed, 161 insertions(+), 1 deletion(-)

diff --git a/backend/src/libocl/include/ocl_float.h b/backend/src/libocl/include/ocl_float.h
index e78d38b..77b78bb 100644
--- a/backend/src/libocl/include/ocl_float.h
+++ b/backend/src/libocl/include/ocl_float.h
@@ -107,10 +107,11 @@ INLINE_OVERLOADABLE int __ocl_finitef (float x){
 #define DF_EXP_OFFSET 52
 #define DF_SIGN_OFFSET 63
 #define DF_EXP_BIAS 1023
+#define DF_NEGTIVE_INF 0xFFF0000000000000
 
 #define SF_POSITIVE_INF 0x7F800000
 #define SF_NAN 0x7FFFFFFF
 
-#define M_PI 3.1415926535897384626
+#define M_PI 3.141592653589793238462643383279
 
 #endif /* __OCL_FLOAT_H__ */
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
index aee5769..c9c7637 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
@@ -58,6 +58,63 @@ INLINE void  __setLow(double *x, unsigned int val){
     *x = as_double(low);
 };
 
+OVERLOADABLE double acos(double x)
+{
+    double one=  1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
+    pi =  3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
+    pio2_hi =  1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
+    pio2_lo =  6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
+    pS0 =  1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
+    pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
+    pS2 =  2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
+    pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
+    pS4 =  7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
+    pS5 =  3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
+    qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
+    qS2 =  2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
+    qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
+    qS4 =  7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
+
+    double z,p,q,r,w,s,c,df;
+    int hx,ix;
+    hx = __HI(x);
+    ix = hx&0x7fffffff;
+    if(ix>=0x3ff00000) {    /* |x| >= 1 */
+        if(((ix-0x3ff00000)|__LO(x))==0) {  /* |x|==1 */
+        if(hx>0) return 0.0;        /* acos(1) = 0  */
+        else return pi+2.0*pio2_lo; /* acos(-1)= pi */
+        }
+        return (x-x)/(x-x);     /* acos(|x|>1) is NaN */
+    }
+    if(ix<0x3fe00000) { /* |x| < 0.5 */
+        if(ix<=0x3c600000) return pio2_hi+pio2_lo;/*if|x|<2**-57*/
+        z = x*x;
+        p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
+        q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
+        r = p/q;
+        return pio2_hi - (x - (pio2_lo-x*r));
+    } else  if (hx<0) {     /* x < -0.5 */
+        z = (one+x)*0.5;
+        p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
+        q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
+        s = sqrt(z);
+        r = p/q;
+        w = r*s-pio2_lo;
+        return pi - 2.0*(s+w);
+    } else {            /* x > 0.5 */
+        z = (one-x)*0.5;
+        s = sqrt(z);
+        df = s;
+        __setLow(&df, 0);
+        c  = (z-df*df)/(s+df);
+        p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
+        q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
+        r = p/q;
+        w = r*s+c;
+        return 2.0*(df+w);
+    }
+}
+
 OVERLOADABLE double ceil(double x)
 {
     double ret;
@@ -704,6 +761,106 @@ OVERLOADABLE double round(double x)
 	return dp;
 }
 
+OVERLOADABLE double sqrt(double x)
+{
+    double z;
+    int     sign = (int)0x80000000;
+    unsigned r,t1,s1,ix1,q1;
+    int ix0,s0,q,m,t,i;
+    const double    one = 1.0, tiny=1.0e-300;
+
+    ix0 = __HI(x);          /* high word of x */
+    ix1 = __LO(x);      /* low word of x */
+
+    /* take care of Inf and NaN */
+    if((ix0&0x7ff00000)==0x7ff00000) {
+        return x*x+x;       /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
+                       sqrt(-inf)=sNaN */
+    }
+    /* take care of zero */
+    if(ix0<=0) {
+        if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
+        else if(ix0<0)
+        return (x-x)/(x-x);     /* sqrt(-ve) = sNaN */
+    }
+    /* normalize x */
+    m = (ix0>>20);
+    if(m==0) {              /* subnormal x */
+        while(ix0==0) {
+        m -= 21;
+        ix0 |= (ix1>>11); ix1 <<= 21;
+        }
+        for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
+        m -= i-1;
+        ix0 |= (ix1>>(32-i));
+        ix1 <<= i;
+    }
+    m -= 1023;  /* unbias exponent */
+    ix0 = (ix0&0x000fffff)|0x00100000;
+    if(m&1){    /* odd m, double x to make it even */
+        ix0 += ix0 + ((ix1&sign)>>31);
+        ix1 += ix1;
+    }
+    m >>= 1;    /* m = [m/2] */
+
+    /* generate sqrt(x) bit by bit */
+    ix0 += ix0 + ((ix1&sign)>>31);
+    ix1 += ix1;
+    q = q1 = s0 = s1 = 0;   /* [q,q1] = sqrt(x) */
+    r = 0x00200000;     /* r = moving bit from right to left */
+
+    while(r!=0) {
+        t = s0+r;
+        if(t<=ix0) {
+        s0   = t+r;
+        ix0 -= t;
+        q   += r;
+        }
+        ix0 += ix0 + ((ix1&sign)>>31);
+        ix1 += ix1;
+        r>>=1;
+    }
+
+    r = sign;
+    while(r!=0) {
+        t1 = s1+r;
+        t  = s0;
+        if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
+        s1  = t1+r;
+        if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
+        ix0 -= t;
+        if (ix1 < t1) ix0 -= 1;
+        ix1 -= t1;
+        q1  += r;
+        }
+        ix0 += ix0 + ((ix1&sign)>>31);
+        ix1 += ix1;
+        r>>=1;
+    }
+
+    /* use floating add to find out rounding direction */
+    if((ix0|ix1)!=0) {
+        z = one-tiny; /* trigger inexact flag */
+        if (z>=one) {
+            z = one+tiny;
+            if (q1==(unsigned)0xffffffff) { q1=0; q += 1;}
+        else if (z>one) {
+            if (q1==(unsigned)0xfffffffe) q+=1;
+            q1+=2;
+        } else
+                q1 += (q1&1);
+        }
+    }
+    ix0 = (q>>1)+0x3fe00000;
+    ix1 =  q1>>1;
+    if ((q&1)==1) ix1 |= sign;
+    ix0 += (m <<20);
+    __setHigh(&z, ix0);
+    __setLow(&z, ix1);
+    return z;
+}
+
+
 OVERLOADABLE double trunc(double x)
 {
 	double ret = floor(fabs(x));
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
index 2f8eb29..2ffaec1 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
@@ -20,6 +20,7 @@
 
 #include "ocl_types.h"
 
+OVERLOADABLE double acos(double x);
 OVERLOADABLE double ceil(double x);
 OVERLOADABLE double copysign(double x, double y);
 OVERLOADABLE double fabs(double x);
@@ -39,6 +40,7 @@ OVERLOADABLE double nan(ulong code);
 OVERLOADABLE double nextafter(double x, double y);
 OVERLOADABLE double rint(double x);
 OVERLOADABLE double round(double x);
+OVERLOADABLE double sqrt(double x);
 OVERLOADABLE double trunc(double x);
 
 
-- 
2.7.4