[Beignet] [PATCH] backend: add double version of acos
rander
rander.wang at intel.com
Thu Mar 30 05:49:02 UTC 2017
copy it from fdlibm and pass the conformance tests after refined
the copyright has been declared at the beginning of the file
Signed-off-by: rander <rander.wang at intel.com>
---
backend/src/libocl/include/ocl_float.h | 3 +-
backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 157 ++++++++++++++++++++++++
backend/src/libocl/tmpl/ocl_math_common.tmpl.h | 2 +
3 files changed, 161 insertions(+), 1 deletion(-)
diff --git a/backend/src/libocl/include/ocl_float.h b/backend/src/libocl/include/ocl_float.h
index e78d38b..77b78bb 100644
--- a/backend/src/libocl/include/ocl_float.h
+++ b/backend/src/libocl/include/ocl_float.h
@@ -107,10 +107,11 @@ INLINE_OVERLOADABLE int __ocl_finitef (float x){
#define DF_EXP_OFFSET 52
#define DF_SIGN_OFFSET 63
#define DF_EXP_BIAS 1023
+#define DF_NEGTIVE_INF 0xFFF0000000000000
#define SF_POSITIVE_INF 0x7F800000
#define SF_NAN 0x7FFFFFFF
-#define M_PI 3.1415926535897384626
+#define M_PI 3.141592653589793238462643383279
#endif /* __OCL_FLOAT_H__ */
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
index aee5769..c9c7637 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
@@ -58,6 +58,63 @@ INLINE void __setLow(double *x, unsigned int val){
*x = as_double(low);
};
+OVERLOADABLE double acos(double x)
+{
+ double one= 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
+ pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
+ pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
+ pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
+ pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
+ pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
+ pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
+ pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
+ pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
+ pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
+ qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
+ qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
+ qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
+ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
+
+ double z,p,q,r,w,s,c,df;
+ int hx,ix;
+ hx = __HI(x);
+ ix = hx&0x7fffffff;
+ if(ix>=0x3ff00000) { /* |x| >= 1 */
+ if(((ix-0x3ff00000)|__LO(x))==0) { /* |x|==1 */
+ if(hx>0) return 0.0; /* acos(1) = 0 */
+ else return pi+2.0*pio2_lo; /* acos(-1)= pi */
+ }
+ return (x-x)/(x-x); /* acos(|x|>1) is NaN */
+ }
+ if(ix<0x3fe00000) { /* |x| < 0.5 */
+ if(ix<=0x3c600000) return pio2_hi+pio2_lo;/*if|x|<2**-57*/
+ z = x*x;
+ p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
+ q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
+ r = p/q;
+ return pio2_hi - (x - (pio2_lo-x*r));
+ } else if (hx<0) { /* x < -0.5 */
+ z = (one+x)*0.5;
+ p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
+ q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
+ s = sqrt(z);
+ r = p/q;
+ w = r*s-pio2_lo;
+ return pi - 2.0*(s+w);
+ } else { /* x > 0.5 */
+ z = (one-x)*0.5;
+ s = sqrt(z);
+ df = s;
+ __setLow(&df, 0);
+ c = (z-df*df)/(s+df);
+ p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
+ q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
+ r = p/q;
+ w = r*s+c;
+ return 2.0*(df+w);
+ }
+}
+
OVERLOADABLE double ceil(double x)
{
double ret;
@@ -704,6 +761,106 @@ OVERLOADABLE double round(double x)
return dp;
}
+OVERLOADABLE double sqrt(double x)
+{
+ double z;
+ int sign = (int)0x80000000;
+ unsigned r,t1,s1,ix1,q1;
+ int ix0,s0,q,m,t,i;
+ const double one = 1.0, tiny=1.0e-300;
+
+ ix0 = __HI(x); /* high word of x */
+ ix1 = __LO(x); /* low word of x */
+
+ /* take care of Inf and NaN */
+ if((ix0&0x7ff00000)==0x7ff00000) {
+ return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
+ sqrt(-inf)=sNaN */
+ }
+ /* take care of zero */
+ if(ix0<=0) {
+ if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
+ else if(ix0<0)
+ return (x-x)/(x-x); /* sqrt(-ve) = sNaN */
+ }
+ /* normalize x */
+ m = (ix0>>20);
+ if(m==0) { /* subnormal x */
+ while(ix0==0) {
+ m -= 21;
+ ix0 |= (ix1>>11); ix1 <<= 21;
+ }
+ for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
+ m -= i-1;
+ ix0 |= (ix1>>(32-i));
+ ix1 <<= i;
+ }
+ m -= 1023; /* unbias exponent */
+ ix0 = (ix0&0x000fffff)|0x00100000;
+ if(m&1){ /* odd m, double x to make it even */
+ ix0 += ix0 + ((ix1&sign)>>31);
+ ix1 += ix1;
+ }
+ m >>= 1; /* m = [m/2] */
+
+ /* generate sqrt(x) bit by bit */
+ ix0 += ix0 + ((ix1&sign)>>31);
+ ix1 += ix1;
+ q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */
+ r = 0x00200000; /* r = moving bit from right to left */
+
+ while(r!=0) {
+ t = s0+r;
+ if(t<=ix0) {
+ s0 = t+r;
+ ix0 -= t;
+ q += r;
+ }
+ ix0 += ix0 + ((ix1&sign)>>31);
+ ix1 += ix1;
+ r>>=1;
+ }
+
+ r = sign;
+ while(r!=0) {
+ t1 = s1+r;
+ t = s0;
+ if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
+ s1 = t1+r;
+ if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
+ ix0 -= t;
+ if (ix1 < t1) ix0 -= 1;
+ ix1 -= t1;
+ q1 += r;
+ }
+ ix0 += ix0 + ((ix1&sign)>>31);
+ ix1 += ix1;
+ r>>=1;
+ }
+
+ /* use floating add to find out rounding direction */
+ if((ix0|ix1)!=0) {
+ z = one-tiny; /* trigger inexact flag */
+ if (z>=one) {
+ z = one+tiny;
+ if (q1==(unsigned)0xffffffff) { q1=0; q += 1;}
+ else if (z>one) {
+ if (q1==(unsigned)0xfffffffe) q+=1;
+ q1+=2;
+ } else
+ q1 += (q1&1);
+ }
+ }
+ ix0 = (q>>1)+0x3fe00000;
+ ix1 = q1>>1;
+ if ((q&1)==1) ix1 |= sign;
+ ix0 += (m <<20);
+ __setHigh(&z, ix0);
+ __setLow(&z, ix1);
+ return z;
+}
+
+
OVERLOADABLE double trunc(double x)
{
double ret = floor(fabs(x));
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
index 2f8eb29..2ffaec1 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
@@ -20,6 +20,7 @@
#include "ocl_types.h"
+OVERLOADABLE double acos(double x);
OVERLOADABLE double ceil(double x);
OVERLOADABLE double copysign(double x, double y);
OVERLOADABLE double fabs(double x);
@@ -39,6 +40,7 @@ OVERLOADABLE double nan(ulong code);
OVERLOADABLE double nextafter(double x, double y);
OVERLOADABLE double rint(double x);
OVERLOADABLE double round(double x);
+OVERLOADABLE double sqrt(double x);
OVERLOADABLE double trunc(double x);
--
2.7.4
More information about the Beignet
mailing list