[Beignet] [PATCH] backend: refine code structure of ocl math function

rander rander.wang at intel.com
Wed Mar 29 08:56:54 UTC 2017


	there are two math function files, ocl1.2 and ocl2.0.
	the difference is in parameters in some functions.
	Now mov the common functions to a file to maintain
	code easily

Signed-off-by: rander <rander.wang at intel.com>
---
 backend/src/libocl/CMakeLists.txt               |   4 +-
 backend/src/libocl/script/ocl_math_common.def   |  80 +++
 backend/src/libocl/tmpl/ocl_math.tmpl.cl        | 668 +--------------------
 backend/src/libocl/tmpl/ocl_math.tmpl.h         |  23 +-
 backend/src/libocl/tmpl/ocl_math_20.tmpl.cl     | 737 ++----------------------
 backend/src/libocl/tmpl/ocl_math_20.tmpl.h      |  30 +-
 backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 714 +++++++++++++++++++++++
 backend/src/libocl/tmpl/ocl_math_common.tmpl.h  |  45 ++
 8 files changed, 888 insertions(+), 1413 deletions(-)
 create mode 100644 backend/src/libocl/script/ocl_math_common.def
 create mode 100644 backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
 create mode 100644 backend/src/libocl/tmpl/ocl_math_common.tmpl.h

diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt
index c68ecb0..1ebf26b 100644
--- a/backend/src/libocl/CMakeLists.txt
+++ b/backend/src/libocl/CMakeLists.txt
@@ -112,13 +112,13 @@ FOREACH(M ${OCL_PY_GENERATED_MODULES})
     GENERATE_SOURCE_PY(OCL_SOURCE_FILES ${M})
 ENDFOREACH(M) 
 
-SET (OCL_PY_GENERATED_MODULES_12 ocl_math)
+SET (OCL_PY_GENERATED_MODULES_12 ocl_math ocl_math_common)
 FOREACH(M ${OCL_PY_GENERATED_MODULES_12})
     GENERATE_HEADER_PY(${M})
     GENERATE_SOURCE_PY(OCL_SOURCE_FILES_12 ${M})
 ENDFOREACH(M)
 
-SET (OCL_PY_GENERATED_MODULES_20 ocl_math_20)
+SET (OCL_PY_GENERATED_MODULES_20 ocl_math_20 ocl_math_common)
 FOREACH(M ${OCL_PY_GENERATED_MODULES_20})
     GENERATE_HEADER_PY(${M})
     GENERATE_SOURCE_PY(OCL_SOURCE_FILES_20 ${M})
diff --git a/backend/src/libocl/script/ocl_math_common.def b/backend/src/libocl/script/ocl_math_common.def
new file mode 100644
index 0000000..16dd1a7
--- /dev/null
+++ b/backend/src/libocl/script/ocl_math_common.def
@@ -0,0 +1,80 @@
+##math
+double acos (double)
+double acosh (double)
+double acospi (double x)
+double asin (double)
+double asinh (double)
+double asinpi (double x)
+double atan (double y_over_x)
+double atan2 (double y, double x)
+double atanh (double)
+double atanpi (double x)
+double atan2pi (double y, double x)
+double cbrt (double)
+double ceil (double)
+double copysign (double x, double y)
+double cos (double)
+double cosh (double)
+double cospi (double x)
+double erfc (double)
+double erf (double)
+double exp (double x)
+double exp2 (double)
+double exp10 (double)
+double expm1 (double x)
+double fabs (double)
+double fdim (double x, double y)
+double floor (double)
+double fma (double a, double b, double c)
+double fmax (double x, double y)
+double fmax (double x, double y)
+double fmin (double x, double y)
+double fmin (double x, double y)
+double fmod (double x, double y)
+double fract (double x,  double *iptr)
+doublen frexp (doublen x,  intn *exp)
+double frexp (double x,  int *exp)
+double hypot (double x, double y)
+intn ilogb (doublen x)
+int ilogb (double x)
+doublen ldexp (doublen x, intn k)
+doublen ldexp (doublen x, int k)
+double ldexp (double x, int k)
+double lgamma (double x)
+doublen lgamma_r (doublen x,  intn *signp)
+double lgamma_r (double x,  int *signp)
+double log (double)
+double log2 (double)
+double log10 (double)
+double log1p (double x)
+double logb (double x)
+double mad (double a, double b, double c)
+double maxmag (double x, double y)
+double minmag (double x, double y)
+double modf (double x,  double *iptr)
+doublen nan (ulongn nancode)
+double nan (ulong nancode)
+double nextafter (double x, double y)
+double pow (double x, double y)
+doublen pown (doublen x, intn y)
+double pown (double x, int y)
+double powr (double x, double y)
+double remainder (double x, double y)
+doublen remquo (doublen x, doublen y,  intn *quo)
+double remquo (double x, double y,  int *quo)
+double rint (double)
+doublen rootn (doublen x, intn y)
+doublen rootn (double x, int y)
+double round (double x)
+double rsqrt (double)
+double sin (double)
+double sincos (double x,  double *cosval)
+double sinh (double)
+double sinpi (double x)
+double sqrt (double)
+double tan (double)
+double tanh (double)
+double tanpi (double x)
+double tgamma (double)
+double trunc (double)
+
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index a50d4db..a15273c 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -3920,92 +3920,13 @@ INLINE void  __setHigh(double *x, int val){
     *x = as_double(high);
 };
 
-INLINE void  __setLow(double *x, int val){
+INLINE void  __setLow(double *x, unsigned int val){
     long x64 = as_long(*x);
     long low = x64  & 0xFFFFFFFF00000000;
-    low |= val;
+    low |= (long)val;
     *x = as_double(low);
 };
 
-OVERLOADABLE double ceil(double x)
-{
-    double ret;
-    ulong lval = as_ulong(x);
-    int exp = ((lval >> 52) & 0x7FF) - 1023;
-    int sign = (lval >> 63) & 0x1;
-
-    long i = (1L << (52 - exp));
-    long mask = 0x10000000000000 - i;
-    unsigned long uv = 0xFFF0000000000000 + mask;
-    ulong vv = lval & uv;
-    double	dp = as_double(vv);
-    ret = ((vv != lval) && !sign) ? dp +1.0:dp;
-
-    ret = ((exp < 0) && sign) ? 0:ret;
-    double tmp = (lval & DF_ABS_MASK) ? 1.0:0.0;
-    ret = ((exp < 0) && !sign) ? tmp:ret;
-    ret = (exp >= 52) ? x:ret;
-
-     return ret;
-}
-
-OVERLOADABLE double copysign(double x, double y)
-{
-	ulong uy = as_ulong(y);
-	ulong sign = uy & DF_SIGN_MASK;
-	ulong ux = as_ulong(x);
-	ux = (ux & DF_ABS_MASK) | sign;
-	return as_double(ux);
-}
-
-OVERLOADABLE double fabs(double x)
-{
-    long  qw = as_ulong(x);
-    qw &= 0x7FFFFFFFFFFFFFFF;
-    return as_double(qw);
-}
-
-OVERLOADABLE double fdim(double x, double y)
-{
-	if(isnan(x))
-		return x;
-	if(isnan(y))
-		return y;
-	return x > y ? (x - y) : +0.f;
-}
-
-OVERLOADABLE double floor(double x)
-{
-    ulong lval = as_ulong(x);
-    int exp = ((lval >> 52) & 0x7FF) - 1023;
-    int sign = (lval >> 63) & 0x1;
-    if(exp < 0)
-    {
-        if(sign)
-	{
-		if(lval & DF_ABS_MASK)
-			return -1L;
-		else
-			return 0.0;
-	}
-        else return 0.0;
-    }
-    else
-    {
-        if(exp >= 52)
-            return x;
-         long i = (1L << (52 - exp));
-         i = 0x10000000000000 - i;
-         unsigned long uv = 0xFFF0000000000000 + i;
-         ulong vv = lval & uv;
-         double  dp = as_double(vv);
-         if(vv != lval)
-            dp -= sign;
-
-         return dp;
-    }
-}
-
 OVERLOADABLE double fract(double x, global double *p)
 {
     double ret = floor(x);
@@ -4051,588 +3972,3 @@ OVERLOADABLE double frexp(double x, local int *exp){BODY;}
 OVERLOADABLE double frexp(double x, private int *exp){BODY;}
 #undef BODY
 
-/* @(#)e_log.c 1.3 95/01/18 */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-OVERLOADABLE double log(double x)
-{
-	double ln2_hi	=  6.93147180369123816490e-01,	/* 3fe62e42 fee00000 */
-	ln2_lo	=  1.90821492927058770002e-10,	/* 3dea39ef 35793c76 */
-	two54	=  1.80143985094819840000e+16,	/* 43500000 00000000 */
-	Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
-	Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
-	Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
-	Lg4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
-	Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
-	Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
-	Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
-
-	double zero = 0;
-	double hfsq,f,s,z,R,w,t1,t2,dk;
-	int k,hx,i,j;
-	unsigned lx;
-
-	hx = __HI(x);		/* high word of x */
-	lx = __LO(x);		/* low  word of x */
-
-	k=0;
-	if (hx < 0x00100000)
-	{			/* x < 2**-1022  */
-		if (((hx&0x7fffffff)|lx)==0)
-		return -two54/zero;		/* log(+-0)=-inf */
-		if (hx<0)
-			return (x-x)/zero;	/* log(-#) = NaN */
-		k -= 54; x *= two54; /* subnormal number, scale up x */
-		hx = __HI(x);		/* high word of x */
-	}
-	if (hx >= 0x7ff00000) return x+x;
-	k += (hx>>20)-1023;
-	hx &= 0x000fffff;
-	i = (hx+0x95f64)&0x100000;
-	__setHigh(&x, (hx|(i^0x3ff00000)));	/* normalize x or x/2 */
-	k += (i>>20);
-	f = x-1.0;
-	if((0x000fffff&(2+hx))<3) {	/* |f| < 2**-20 */
-		if(f==zero)
-		{
-			if(k==0) return zero;
-			else
-			{
-				dk=(double)k;
-				return dk*ln2_hi+dk*ln2_lo;
-			}
-		}
-
-		R = f*f*(0.5-0.33333333333333333*f);
-		if(k==0)
-			return f-R;
-		else {dk=(double)k;
-			return dk*ln2_hi-((R-dk*ln2_lo)-f);}
-	}
-	s = f/(2.0+f);
-	dk = (double)k;
-	z = s*s;
-	i = hx-0x6147a;
-	w = z*z;
-	j = 0x6b851-hx;
-	t1= w*(Lg2+w*(Lg4+w*Lg6));
-	t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
-	i |= j;
-	R = t2+t1;
-	if(i>0) {
-	hfsq=0.5*f*f;
-	if(k==0)
-		return f-(hfsq-s*(hfsq+R));
-	else
-		return dk*ln2_hi-((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f);
-	}
-	else
-	{
-		if(k==0)
-			return f-s*(f-R);
-		else
-			return dk*ln2_hi-((s*(f-R)-dk*ln2_lo)-f);
-	}
-
-}
-
-OVERLOADABLE double log2(double x)
-{
-	double ln2 = 0.69314718055994530942,
-	zero = 0,
-	two54   =  1.80143985094819840000e+16,  /* 43500000 00000000 */
-	Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
-	Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
-	Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
-	Lg4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
-	Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
-	Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
-	Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
-
-	double hfsq,f,s,z,R,w,t1,t2,dk;
-	int k,hx,i,j;
-	uint lx;
-
-	hx = __HI(x);
-	lx = __LO(x);
-
-	k=0;
-	if (hx < 0x00100000)
-	{			/* x < 2**-1022  */
-		if (((hx&0x7fffffff)|lx)==0)
-			return -two54/(x-x);		/* log(+-0)=-inf */
-
-		if (hx<0) return (x-x)/(x-x);	/* log(-#) = NaN */
-
-		k -= 54; x *= two54; /* subnormal number, scale up x */
-		hx = __HI(x);
-	}
-
-	if (hx >= 0x7ff00000) return x+x;
-	k += (hx>>20)-1023;
-	hx &= 0x000fffff;
-	i = (hx+0x95f64)&0x100000;
-	__setHigh(&x,hx|(i^0x3ff00000));	/* normalize x or x/2 */
-	k += (i>>20);
-	dk = (double) k;
-	f = x-1.0;
-
-	if((0x000fffff&(2+hx))<3)
-	{	/* |f| < 2**-20 */
-		if(f==zero) return dk;
-		R = f*f*(0.5-0.33333333333333333*f);
-		return dk-(R-f)/ln2;
-	}
-
-	s = f/(2.0+f);
-	z = s*s;
-	i = hx-0x6147a;
-	w = z*z;
-	j = 0x6b851-hx;
-	t1= w*(Lg2+w*(Lg4+w*Lg6));
-	t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
-	i |= j;
-	R = t2+t1;
-	if(i>0)
-	{
-		hfsq=0.5*f*f;
-		return dk-((hfsq-(s*(hfsq+R)))-f)/ln2;
-	}
-	else
-	{
-		return dk-((s*(f-R))-f)/ln2;
-	}
-}
-
-OVERLOADABLE double log10(double x)
-{
-	double zero = 0.0,
-	two54	   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
-	ivln10	   =  4.34294481903251816668e-01, /* 0x3FDBCB7B, 0x1526E50E */
-	log10_2hi  =  3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
-	log10_2lo  =  3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
-
-	double y,z;
-	int i,k,hx;
-	unsigned lx;
-
-	hx = __HI(x);	/* high word of x */
-	lx = __LO(x);	/* low word of x */
-
-	k=0;
-	if (hx < 0x00100000)
-	{  /* x < 2**-1022  */
-		if (((hx&0x7fffffff)|lx)==0)
-			return -two54/zero; /* log(+-0)=-inf */
-
-		if (hx<0)
-			return (x-x)/zero;/* log(-#) = NaN */
-
-		k -= 54; x *= two54; /* subnormal number, scale up x */
-		hx = __HI(x);/* high word of x */
-	}
-
-	if (hx >= 0x7ff00000) return x+x;
-	k += (hx>>20)-1023;
-	i  = ((unsigned)k&0x80000000)>>31;
-	hx = (hx&0x000fffff)|((0x3ff-i)<<20);
-	y  = (double)(k+i);
-	__setHigh(&x, hx);
-	z  = y*log10_2lo + ivln10*log(x);
-	return  z+y*log10_2hi;
-}
-
-OVERLOADABLE double log1p(double x)
-{
-	double ln2_hi  =  6.93147180369123816490e-01,	/* 3fe62e42 fee00000 */
-	ln2_lo	=  1.90821492927058770002e-10,	/* 3dea39ef 35793c76 */
-	two54	=  1.80143985094819840000e+16,	/* 43500000 00000000 */
-	Lp1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
-	Lp2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
-	Lp3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
-	Lp4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
-	Lp5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
-	Lp6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
-	Lp7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
-
-	double hfsq,f,c,s,z,R,u,zero = 0;
-	int k,hx,hu,ax;
-
-	hx = __HI(x);		/* high word of x */
-	ax = hx&0x7fffffff;
-
-	k = 1;
-	if (hx < 0x3FDA827A) {			/* x < 0.41422	*/
-		if(ax>=0x3ff00000) {		/* x <= -1.0 */
-		if(x==-1.0) return -two54/zero; /* log1p(-1)=+inf */
-		else return (x-x)/(x-x);	/* log1p(x<-1)=NaN */
-		}
-		if(ax<0x3e200000) { 		/* |x| < 2**-29 */
-		if(two54+x>zero 		/* raise inexact */
-				&&ax<0x3c900000)		/* |x| < 2**-54 */
-			return x;
-		else
-			return x - x*x*0.5;
-		}
-		if(hx>0||hx<=((int)0xbfd2bec3)) {
-		k=0;f=x;hu=1;}	/* -0.2929<x<0.41422 */
-	}
-	if (hx >= 0x7ff00000) return x+x;
-	if(k!=0) {
-		if(hx<0x43400000) {
-		u  = 1.0+x;
-			hu = __HI(u);		/* high word of u */
-			k  = (hu>>20)-1023;
-			c  = (k>0)? 1.0-(u-x):x-(u-1.0);/* correction term */
-		c /= u;
-		} else {
-		u  = x;
-			hu = __HI(u);		/* high word of u */
-			k  = (hu>>20)-1023;
-		c  = 0;
-		}
-		hu &= 0x000fffff;
-		if(hu<0x6a09e) {
-			__setHigh(&u, hu|0x3ff00000);	/* normalize u */
-		} else {
-			k += 1;
-			__setHigh(&u, hu|0x3fe00000);	/* normalize u/2 */
-			hu = (0x00100000-hu)>>2;
-		}
-		f = u-1.0;
-	}
-	hfsq=0.5*f*f;
-	if(hu==0) { /* |f| < 2**-20 */
-		if(f==zero)
-		{
-			if(k==0) return zero;
-			else {c += k*ln2_lo; return k*ln2_hi+c;}
-		}
-		R = hfsq*(1.0-0.66666666666666666*f);
-		if(k==0) return f-R; else
-				 return k*ln2_hi-((R-(k*ln2_lo+c))-f);
-	}
-	s = f/(2.0+f);
-	z = s*s;
-	R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7))))));
-	if(k==0) return f-(hfsq-s*(hfsq+R)); else
-		return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f);
-
-}
-
-OVERLOADABLE double logb(double x)
-{
-	int lx,ix;
-	ix = (__HI(x))&0x7fffffff;	/* high |x| */
-	lx = __LO(x);			/* low x */
-	if((ix|lx)==0) return -1.0/fabs(x);
-	if(ix>=0x7ff00000) return x*x;
-	if((ix>>=20)==0) 			/* IEEE 754 logb */
-	{
-		long qx = as_long(x);
-		qx = qx & DF_ABS_MASK;
-		int msbOne = clz(qx);
-		return (double)(-1022 - (53 -(64 -msbOne)));
-	}
-	else
-		return (double) (ix-1023);
-}
-
-OVERLOADABLE int ilogb(double x)
-{
-	int hx,lx,ix;
-
-	hx  = (__HI(x))&0x7fffffff;	/* high word of x */
-	if(hx == 0x7ff00000 && __LO(x) == 0) return 0x7fffffff;
-
-	if(hx<0x00100000) {
-		lx = __LO(x);
-		if((hx|lx)==0)
-		return 0x80000000;	/* ilogb(0) = 0x80000000 */
-		else			/* subnormal x */
-		if(hx==0) {
-			for (ix = -1043; lx>0; lx<<=1) ix -=1;
-		} else {
-			for (ix = -1022,hx<<=11; hx>0; hx<<=1) ix -=1;
-		}
-		return ix;
-	}
-	else if (hx<0x7ff00000) return (hx>>20)-1023;
-	else return 0x80000000;
-}
-
-CONST OVERLOADABLE double __gen_ocl_mad(double a, double b, double c) __asm("llvm.fma" ".f64");
-OVERLOADABLE double mad(double a, double b, double c)
-{
-	return __gen_ocl_mad(a, b, c);
-}
-
-OVERLOADABLE double nan(ulong code)
-{
-	return as_double(DF_POSITIVE_INF + (code&DF_MAN_MASK));
-}
-
-OVERLOADABLE double nextafter(double x, double y)
-{
-	long hx, hy, ix, iy;
-	hx = as_long(x);
-	hy = as_long(y);
-	ix = hx & DF_ABS_MASK;
-	iy = hy & DF_ABS_MASK;
-
-	if(ix>DF_POSITIVE_INF|| iy>DF_POSITIVE_INF)
-	  return x+y;
-	if(hx == hy)
-	  return y;
-	if(ix == 0) {
-	  if(iy == 0)
-		return y;
-	  else
-		return as_double((hy&DF_SIGN_MASK) | 1);
-	}
-	if(hx >= 0) {
-	  if(hx > hy) {
-		hx -= 1;
-	  } else {
-		hx += 1;
-	  }
-	} else {
-	  if(hy >= 0 || hx > hy){
-		hx -= 1;
-	  } else {
-		hx += 1;
-	  }
-	}
-	return as_double(hx);
-}
-
-OVERLOADABLE double fmax(double a, double b)
-{
-	ulong ua = as_ulong(a);
-	ulong ub =as_ulong(b);
-
-	if((ua & DF_ABS_MASK) > DF_MAX_NORMAL) return b;
-	if((ub & DF_ABS_MASK) > DF_MAX_NORMAL) return a;
-	if(ua == DF_POSITIVE_INF) return a;
-	if(ub == DF_POSITIVE_INF) return b;
-
-	double c = a - b;
-	return (c >= 0) ? a:b;
-}
-
-OVERLOADABLE double fmin(double a, double b)
-{
-	ulong ua = as_ulong(a);
-	ulong ub =as_ulong(b);
-
-	if((ua & DF_ABS_MASK) > DF_MAX_NORMAL) return b;
-	if((ub & DF_ABS_MASK) > DF_MAX_NORMAL) return a;
-	if(ua == DF_NEGTIVE_INF) return a;
-	if(ub == DF_NEGTIVE_INF) return b;
-
-	double c = a - b;
-	return (c <= 0) ? a:b;
-}
-
-OVERLOADABLE double fmod (double x, double y)
-{
-	const double one = 1.0, Zero[] = {0.0, -0.0,};
-	int n,hx,hy,hz,ix,iy,sx,i;
-	uint lx,ly,lz;
-
-	hx = __HI(x);
-	lx = __LO(x);
-	hy = __HI(y);
-	ly = __LO(y);
-	sx = hx&0x80000000;		/* sign of x */
-	hx ^=sx;		/* |x| */
-	hy &= 0x7fffffff;	/* |y| */
-
-    /* purge off exception values */
-	if((hy|ly)==0||(hx>=0x7ff00000)||	/* y=0,or x not finite */
-	  ((hy|((ly|-ly)>>31))>0x7ff00000))	/* or y is NaN */
-	    return (x*y)/(x*y);
-	if(hx<=hy) {
-	    if((hx<hy)||(lx<ly)) return x;	/* |x|<|y| return x */
-	    if(lx==ly) 
-		return Zero[(uint)sx>>31];	/* |x|=|y| return x*0*/
-	}
-
-    /* determine ix = ilogb(x) */
-	if(hx<0x00100000) {	/* subnormal x */
-	    if(hx==0) {
-		for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
-	    } else {
-		for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
-	    }
-	} else ix = (hx>>20)-1023;
-
-    /* determine iy = ilogb(y) */
-	if(hy<0x00100000) {	/* subnormal y */
-	    if(hy==0) {
-		for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
-	    } else {
-		for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
-	    }
-	} else iy = (hy>>20)-1023;
-
-    /* set up {hx,lx}, {hy,ly} and align y to x */
-	if(ix >= -1022) 
-	    hx = 0x00100000|(0x000fffff&hx);
-	else {		/* subnormal x, shift x to normal */
-	    n = -1022-ix;
-	    if(n<=31) {
-	        hx = (hx<<n)|(lx>>(32-n));
-	        lx <<= n;
-	    } else {
-		hx = lx<<(n-32);
-		lx = 0;
-	    }
-	}
-	if(iy >= -1022) 
-	    hy = 0x00100000|(0x000fffff&hy);
-	else {		/* subnormal y, shift y to normal */
-	    n = -1022-iy;
-	    if(n<=31) {
-	        hy = (hy<<n)|(ly>>(32-n));
-	        ly <<= n;
-	    } else {
-		hy = ly<<(n-32);
-		ly = 0;
-	    }
-	}
-
-    /* fix point fmod */
-	n = ix - iy;
-	while(n--) {
-	    hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
-	    if(hz<0){hx = hx+hx+(lx>>31); lx = lx+lx;}
-	    else {
-	    	if((hz|lz)==0) 		/* return sign(x)*0 */
-		    return Zero[(uint)sx>>31];
-	    	hx = hz+hz+(lz>>31); lx = lz+lz;
-	    }
-	}
-	hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
-	if(hz>=0) {hx=hz;lx=lz;}
-
-    /* convert back to floating value and restore the sign */
-	if((hx|lx)==0) 			/* return sign(x)*0 */
-	    return Zero[(uint)sx>>31];	
-	while(hx<0x00100000) {		/* normalize x */
-	    hx = hx+hx+(lx>>31); lx = lx+lx;
-	    iy -= 1;
-	}
-	if(iy>= -1022) {	/* normalize output */
-		hx = ((hx-0x00100000)|((iy+1023)<<20));
-		__setHigh(&x,hx|sx);
-		__setLow(&x, lx);
-	} else {		/* subnormal output */
-	    n = -1022 - iy;
-	    if(n<=20) {
-		lx = (lx>>n)|((uint)hx<<(32-n));
-		hx >>= n;
-	    } else if (n<=31) {
-		lx = (hx<<(32-n))|(lx>>n); hx = sx;
-	    } else {
-		lx = hx>>(n-32); hx = sx;
-	    }
-	__setHigh(&x,hx|sx);
-	__setLow(&x, lx);
-
-	    x *= one;		/* create necessary signal */
-	}
-	return x;		/* exact output */
-
-}
-
-OVERLOADABLE double rint(double x)
-{
-	long ret;
-	long lval = as_long(x);
-	int exp = ((lval & DF_EXP_MASK) >> DF_EXP_OFFSET) - DF_EXP_BIAS;
-	long sign = (lval & DF_SIGN_MASK)?1:0;
-	long ma = (lval &DF_MAN_MASK);
-
-	if((lval & DF_ABS_MASK) == 0)
-		return as_double(sign << 63);
-
-	if(exp < -1)
-	{
-		ret = ((sign << 63)) ;
-		return as_double(ret);
-	}
-
-	if(exp > 51) return x;
-
-	long i = (1L << (52 - exp));
-	i = 0x10000000000000 - i;
-	unsigned long uv = 0xFFF0000000000000 + i;
-	ulong vv = lval & uv;
-	double	dp = as_double(vv);
-	if(exp == -1) dp = 0;
-
-	long fval = ma | DF_IMPLICITE_ONE;
-	long lastBit = (fval & (1L << (52 -exp)));
-	long roundBits = (fval & ( (1L << (52 -exp)) -1));
-
-	if(roundBits > (1L << (51 -exp)))
-		dp = (sign) ? dp-1.0:dp+1.0;
-	else if((roundBits == (1L << (51 -exp))) && lastBit)
-		dp = (sign) ? dp-1.0:dp+1.0;
-
-	return dp;
-}
-
-OVERLOADABLE double round(double x)
-{
-	long ret;
-	long lval = as_long(x);
-	int exp = ((lval & DF_EXP_MASK) >> DF_EXP_OFFSET) - DF_EXP_BIAS;
-	long sign = (lval & DF_SIGN_MASK)?1:0;
-	long ma = (lval &DF_MAN_MASK);
-
-	if((lval & DF_ABS_MASK) == 0)
-		return as_double(sign << 63);
-
-	if(exp < -1)
-	{
-		ret = ((sign << 63)) ;
-		return as_double(ret);
-	}
-
-	if(exp > 51) return x;
-
-	long i = (1L << (52 - exp));
-	i = 0x10000000000000 - i;
-	unsigned long uv = 0xFFF0000000000000 + i;
-	ulong vv = lval & uv;
-	double	dp = as_double(vv);
-	if(exp == -1) dp = 0;
-
-	long fval = ma | DF_IMPLICITE_ONE;
-	long roundBits = (fval & ( (1L << (52 -exp)) -1));
-
-	if(roundBits > (1L << (51 -exp)))
-		dp = (sign) ? dp-1.0:dp+1.0;
-	else if(roundBits == (1L << (51 -exp)))
-		dp = (sign) ? dp-1.0:dp+1.0;
-
-	return dp;
-}
-
-OVERLOADABLE double trunc(double x)
-{
-	double ret = floor(fabs(x));
-	return copysign(ret, x);
-}
-
-
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.h b/backend/src/libocl/tmpl/ocl_math.tmpl.h
index aee332c..b30074e 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.h
@@ -19,6 +19,7 @@
 #define __OCL_MATH_H__
 
 #include "ocl_types.h"
+#include "ocl_math_common.h"
 
 OVERLOADABLE float cospi(float x);
 OVERLOADABLE float cosh(float x);
@@ -233,32 +234,10 @@ OVERLOADABLE float half_sqrt(float x);
 OVERLOADABLE float half_tan(float x);
 
 //------- double -----------
-OVERLOADABLE double ceil(double x);
-OVERLOADABLE double copysign(double x, double y);
-OVERLOADABLE double fabs(double x);
-OVERLOADABLE double fdim(double x, double y);
-OVERLOADABLE double floor(double x);
-OVERLOADABLE double fmax(double a, double b);
-OVERLOADABLE double fmin(double a, double b);
-OVERLOADABLE double fmod (double x, double y);
 OVERLOADABLE double fract(double x, global double *p);
 OVERLOADABLE double fract(double x, local double *p);
 OVERLOADABLE double fract(double x, private double *p);
 OVERLOADABLE double frexp(double x, global int *exp);
 OVERLOADABLE double frexp(double x, local int *exp);
 OVERLOADABLE double frexp(double x, private int *exp);
-OVERLOADABLE double log(double x);
-OVERLOADABLE double log2(double x);
-OVERLOADABLE double log10(double x);
-OVERLOADABLE double log1p(double x);
-OVERLOADABLE double logb(double x);
-OVERLOADABLE int ilogb(double x);
-OVERLOADABLE double mad(double a, double b, double c);
-OVERLOADABLE double nan(ulong code);
-OVERLOADABLE double nextafter(double x, double y);
-OVERLOADABLE double rint(double x);
-OVERLOADABLE double round(double x);
-OVERLOADABLE double trunc(double x);
-
-
 
diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
index a084a88..f1fa4eb 100644
--- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
@@ -3802,712 +3802,57 @@ OVERLOADABLE half rootn(half x, int n) {
 
 
 //-----------------double -----------------------
+INLINE int  __HI(double x){
+    long x64 = as_long(x);
+    int high = convert_int((x64 >> 32) & 0xFFFFFFFF);
+    return high;
+};
 
-OVERLOADABLE double ceil(double x)
-{
-    double ret;
-    ulong lval = as_ulong(x);
-    int exp = ((lval >> 52) & 0x7FF) - 1023;
-    int sign = (lval >> 63) & 0x1;
-
-    long i = (1L << (52 - exp));
-    long mask = 0x10000000000000 - i;
-    unsigned long uv = 0xFFF0000000000000 + mask;
-    ulong vv = lval & uv;
-    double	dp = as_double(vv);
-    ret = ((vv != lval) && !sign) ? dp +1.0:dp;
-
-    ret = ((exp < 0) && sign) ? 0:ret;
-    double tmp = (lval & DF_ABS_MASK) ? 1.0:0.0;
-    ret = ((exp < 0) && !sign) ? tmp:ret;
-    ret = (exp >= 52) ? x:ret;
-
-     return ret;
-}
-
-OVERLOADABLE double copysign(double x, double y)
-{
-	ulong uy = as_ulong(y);
-	ulong sign = uy & DF_SIGN_MASK;
-	ulong ux = as_ulong(x);
-	ux = (ux & DF_ABS_MASK) | sign;
-	return as_double(ux);
-}
-
-OVERLOADABLE double fabs(double x)
-{
-    long  qw = as_ulong(x);
-    qw &= 0x7FFFFFFFFFFFFFFF;
-    return as_double(qw);
-}
-
-OVERLOADABLE double fdim(double x, double y)
-{
-	if(isnan(x))
-		return x;
-	if(isnan(y))
-		return y;
-	return x > y ? (x - y) : +0.f;
-}
-
-OVERLOADABLE double floor(double x)
-{
-    ulong lval = as_ulong(x);
-    int exp = ((lval >> 52) & 0x7FF) - 1023;
-    int sign = (lval >> 63) & 0x1;
-    if(exp < 0)
-    {
-        if(sign)
-	{
-		if(lval & DF_ABS_MASK)
-			return -1L;
-		else
-			return 0.0;
-	}
-        else return 0.0;
-    }
-    else
-    {
-        if(exp >= 52)
-            return x;
-         long i = (1L << (52 - exp));
-         i = 0x10000000000000 - i;
-         unsigned long uv = 0xFFF0000000000000 + i;
-         ulong vv = lval & uv;
-         double  dp = as_double(vv);
-         if(vv != lval)
-            dp -= sign;
-
-         return dp;
-    }
-}
+INLINE int  __LO(double x){
+    long x64 = as_long(x);
+    int low = convert_int(x64  & 0xFFFFFFFFUL);
+    return low;
+};
 
-OVERLOADABLE double fract(double x, global double *p)
-{
-    double ret = floor(x);
-    *p =  ret;
-    return x -ret;
-}
+INLINE void  __setHigh(double *x, int val){
+    long x64 = as_long(*x);
+    long high = x64  & 0x00000000FFFFFFFF;
+    high |= ((long)val << 32);
+    *x = as_double(high);
+};
 
-OVERLOADABLE double fract(double x, local double *p)
-{
-    double ret = floor(x);
-    *p =  ret;
-    return x -ret;
-}
+INLINE void  __setLow(double *x, unsigned int val){
+    long x64 = as_long(*x);
+    long low = x64  & 0xFFFFFFFF00000000;
+    low |= (long)val;
+    *x = as_double(low);
+};
 
-OVERLOADABLE double fract(double x, private double *p)
+OVERLOADABLE double fract(double x, double *p)
 {
     double ret = floor(x);
     *p =  ret;
     return x -ret;
 }
 
-#define BODY \
-double two54 =  1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */ \
-int  hx, ix, lx; \
-hx = __HI(x); \
-ix = 0x7fffffff&hx; \
-lx = __LO(x); \
-*exp = 0; \
-if(ix>=0x7ff00000||((ix|lx)==0)) return x;  /* 0,inf,nan */ \
-if (ix<0x00100000) {        /* subnormal */ \
-    x *= two54; \
-    hx = __HI(x); \
-    ix = hx&0x7fffffff; \
-    *exp = -54; \
-} \
-*exp += (ix>>20)-1022; \
-hx = (hx&0x800fffff)|0x3fe00000; \
-__setHigh(&x, hx); \
-return x;
-
-OVERLOADABLE double frexp(double x, global int *exp){BODY;}
-OVERLOADABLE double frexp(double x, local int *exp){BODY;}
-OVERLOADABLE double frexp(double x, private int *exp){BODY;}
-#undef BODY
-
-/* @(#)e_log.c 1.3 95/01/18 */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-OVERLOADABLE double log(double x)
-{
-	double ln2_hi	=  6.93147180369123816490e-01,	/* 3fe62e42 fee00000 */
-	ln2_lo	=  1.90821492927058770002e-10,	/* 3dea39ef 35793c76 */
-	two54	=  1.80143985094819840000e+16,	/* 43500000 00000000 */
-	Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
-	Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
-	Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
-	Lg4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
-	Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
-	Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
-	Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
-
-	double zero = 0;
-	double hfsq,f,s,z,R,w,t1,t2,dk;
-	int k,hx,i,j;
-	unsigned lx;
-
-	hx = __HI(x);		/* high word of x */
-	lx = __LO(x);		/* low	word of x */
-
-	k=0;
-	if (hx < 0x00100000)
-	{			/* x < 2**-1022  */
-		if (((hx&0x7fffffff)|lx)==0)
-		return -two54/zero; 	/* log(+-0)=-inf */
-		if (hx<0)
-			return (x-x)/zero;	/* log(-#) = NaN */
-		k -= 54; x *= two54; /* subnormal number, scale up x */
-		hx = __HI(x);		/* high word of x */
-	}
-	if (hx >= 0x7ff00000) return x+x;
-	k += (hx>>20)-1023;
-	hx &= 0x000fffff;
-	i = (hx+0x95f64)&0x100000;
-	__setHigh(&x, (hx|(i^0x3ff00000))); /* normalize x or x/2 */
-	k += (i>>20);
-	f = x-1.0;
-	if((0x000fffff&(2+hx))<3) { /* |f| < 2**-20 */
-		if(f==zero)
-		{
-			if(k==0) return zero;
-			else
-			{
-				dk=(double)k;
-				return dk*ln2_hi+dk*ln2_lo;
-			}
-		}
-
-		R = f*f*(0.5-0.33333333333333333*f);
-		if(k==0)
-			return f-R;
-		else {dk=(double)k;
-			return dk*ln2_hi-((R-dk*ln2_lo)-f);}
-	}
-	s = f/(2.0+f);
-	dk = (double)k;
-	z = s*s;
-	i = hx-0x6147a;
-	w = z*z;
-	j = 0x6b851-hx;
-	t1= w*(Lg2+w*(Lg4+w*Lg6));
-	t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
-	i |= j;
-	R = t2+t1;
-	if(i>0) {
-	hfsq=0.5*f*f;
-	if(k==0)
-		return f-(hfsq-s*(hfsq+R));
-	else
-		return dk*ln2_hi-((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f);
-	}
-	else
-	{
-		if(k==0)
-			return f-s*(f-R);
-		else
-			return dk*ln2_hi-((s*(f-R)-dk*ln2_lo)-f);
-	}
-
-}
-
-OVERLOADABLE double log2(double x)
-{
-	double ln2 = 0.69314718055994530942,
-	zero = 0,
-	two54   =  1.80143985094819840000e+16,  /* 43500000 00000000 */
-	Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
-	Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
-	Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
-	Lg4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
-	Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
-	Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
-	Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
-
-	double hfsq,f,s,z,R,w,t1,t2,dk;
-	int k,hx,i,j;
-	uint lx;
-
-	hx = __HI(x);
-	lx = __LO(x);
-
-	k=0;
-	if (hx < 0x00100000)
-	{			/* x < 2**-1022  */
-		if (((hx&0x7fffffff)|lx)==0)
-			return -two54/(x-x);		/* log(+-0)=-inf */
-
-		if (hx<0) return (x-x)/(x-x);	/* log(-#) = NaN */
-
-		k -= 54; x *= two54; /* subnormal number, scale up x */
-		hx = __HI(x);
-	}
-
-	if (hx >= 0x7ff00000) return x+x;
-	k += (hx>>20)-1023;
-	hx &= 0x000fffff;
-	i = (hx+0x95f64)&0x100000;
-	__setHigh(&x,hx|(i^0x3ff00000));	/* normalize x or x/2 */
-	k += (i>>20);
-	dk = (double) k;
-	f = x-1.0;
-
-	if((0x000fffff&(2+hx))<3)
-	{	/* |f| < 2**-20 */
-		if(f==zero) return dk;
-		R = f*f*(0.5-0.33333333333333333*f);
-		return dk-(R-f)/ln2;
-	}
-
-	s = f/(2.0+f);
-	z = s*s;
-	i = hx-0x6147a;
-	w = z*z;
-	j = 0x6b851-hx;
-	t1= w*(Lg2+w*(Lg4+w*Lg6));
-	t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
-	i |= j;
-	R = t2+t1;
-	if(i>0)
-	{
-		hfsq=0.5*f*f;
-		return dk-((hfsq-(s*(hfsq+R)))-f)/ln2;
-	}
-	else
-	{
-		return dk-((s*(f-R))-f)/ln2;
-	}
-}
-
-OVERLOADABLE double log10(double x)
-{
-	double zero = 0.0,
-	two54	   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
-	ivln10	   =  4.34294481903251816668e-01, /* 0x3FDBCB7B, 0x1526E50E */
-	log10_2hi  =  3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
-	log10_2lo  =  3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
-
-	double y,z;
-	int i,k,hx;
-	unsigned lx;
-
-	hx = __HI(x);	/* high word of x */
-	lx = __LO(x);	/* low word of x */
-
-	k=0;
-	if (hx < 0x00100000)
-	{  /* x < 2**-1022  */
-		if (((hx&0x7fffffff)|lx)==0)
-			return -two54/zero; /* log(+-0)=-inf */
-
-		if (hx<0)
-			return (x-x)/zero;/* log(-#) = NaN */
-
-		k -= 54; x *= two54; /* subnormal number, scale up x */
-		hx = __HI(x);/* high word of x */
-	}
-
-	if (hx >= 0x7ff00000) return x+x;
-	k += (hx>>20)-1023;
-	i  = ((unsigned)k&0x80000000)>>31;
-	hx = (hx&0x000fffff)|((0x3ff-i)<<20);
-	y  = (double)(k+i);
-	__setHigh(&x, hx);
-	z  = y*log10_2lo + ivln10*log(x);
-	return  z+y*log10_2hi;
-}
-
-OVERLOADABLE double log1p(double x)
+OVERLOADABLE double frexp(double x, int *exp)
 {
-	double ln2_hi  =  6.93147180369123816490e-01,	/* 3fe62e42 fee00000 */
-	ln2_lo  =  1.90821492927058770002e-10,	/* 3dea39ef 35793c76 */
-	two54   =  1.80143985094819840000e+16,  /* 43500000 00000000 */
-	Lp1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
-	Lp2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
-	Lp3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
-	Lp4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
-	Lp5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
-	Lp6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
-	Lp7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
-
-	double hfsq,f,c,s,z,R,u,zero = 0;
-	int k,hx,hu,ax;
-
-	hx = __HI(x);		/* high word of x */
-	ax = hx&0x7fffffff;
-
-	k = 1;
-	if (hx < 0x3FDA827A) {			/* x < 0.41422  */
-		if(ax>=0x3ff00000) {		/* x <= -1.0 */
-		if(x==-1.0) return -two54/zero; /* log1p(-1)=+inf */
-		else return (x-x)/(x-x);	/* log1p(x<-1)=NaN */
-		}
-		if(ax<0x3e200000) {			/* |x| < 2**-29 */
-		if(two54+x>zero			/* raise inexact */
-				&&ax<0x3c900000) 		/* |x| < 2**-54 */
-			return x;
-		else
-			return x - x*x*0.5;
-		}
-		if(hx>0||hx<=((int)0xbfd2bec3)) {
-		k=0;f=x;hu=1;}	/* -0.2929<x<0.41422 */
-	}
-	if (hx >= 0x7ff00000) return x+x;
-	if(k!=0) {
-		if(hx<0x43400000) {
-		u  = 1.0+x;
-			hu = __HI(u);		/* high word of u */
-			k  = (hu>>20)-1023;
-			c  = (k>0)? 1.0-(u-x):x-(u-1.0);/* correction term */
-		c /= u;
-		} else {
-		u  = x;
-			hu = __HI(u);		/* high word of u */
-			k  = (hu>>20)-1023;
-		c  = 0;
-		}
-		hu &= 0x000fffff;
-		if(hu<0x6a09e) {
-			__setHigh(&u, hu|0x3ff00000);	/* normalize u */
-		} else {
-			k += 1;
-			__setHigh(&u, hu|0x3fe00000);	/* normalize u/2 */
-			hu = (0x00100000-hu)>>2;
-		}
-		f = u-1.0;
-	}
-	hfsq=0.5*f*f;
-	if(hu==0) {	/* |f| < 2**-20 */
-		if(f==zero)
-		{
-			if(k==0) return zero;
-			else {c += k*ln2_lo; return k*ln2_hi+c;}
-		}
-		R = hfsq*(1.0-0.66666666666666666*f);
-		if(k==0) return f-R; else
-				 return k*ln2_hi-((R-(k*ln2_lo+c))-f);
-	}
-	s = f/(2.0+f);
-	z = s*s;
-	R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7))))));
-	if(k==0) return f-(hfsq-s*(hfsq+R)); else
-		return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f);
-
-}
-
-OVERLOADABLE double logb(double x)
-{
-	int lx,ix;
-	ix = (__HI(x))&0x7fffffff;	/* high |x| */
-	lx = __LO(x);			/* low x */
-	if((ix|lx)==0) return -1.0/fabs(x);
-	if(ix>=0x7ff00000) return x*x;
-	if((ix>>=20)==0) 			/* IEEE 754 logb */
-	{
-		long qx = as_long(x);
-		qx = qx & DF_ABS_MASK;
-		int msbOne = clz(qx);
-		return (double)(-1022 - (53 -(64 -msbOne)));
-	}
-	else
-		return (double) (ix-1023);
-}
-
-OVERLOADABLE int ilogb(double x)
-{
-	int hx,lx,ix;
-
-	hx  = (__HI(x))&0x7fffffff;	/* high word of x */
-	if(hx == 0x7ff00000 && __LO(x) == 0) return 0x7fffffff;
-
-	if(hx<0x00100000) {
-		lx = __LO(x);
-		if((hx|lx)==0)
-		return 0x80000000;	/* ilogb(0) = 0x80000000 */
-		else			/* subnormal x */
-		if(hx==0) {
-			for (ix = -1043; lx>0; lx<<=1) ix -=1;
-		} else {
-			for (ix = -1022,hx<<=11; hx>0; hx<<=1) ix -=1;
-		}
-		return ix;
-	}
-	else if (hx<0x7ff00000) return (hx>>20)-1023;
-	else return 0x80000000;
-}
-
-CONST OVERLOADABLE double __gen_ocl_mad(double a, double b, double c) __asm("llvm.fma" ".f64");
-OVERLOADABLE double mad(double a, double b, double c)
-{
-	return __gen_ocl_mad(a, b, c);
-}
-
-OVERLOADABLE double nan(ulong code)
-{
-	return as_double(DF_POSITIVE_INF + (code&DF_MAN_MASK));
-}
-
-OVERLOADABLE double nextafter(double x, double y)
-{
-	long hx, hy, ix, iy;
-	hx = as_long(x);
-	hy = as_long(y);
-	ix = hx & DF_ABS_MASK;
-	iy = hy & DF_ABS_MASK;
-
-	if(ix>DF_POSITIVE_INF|| iy>DF_POSITIVE_INF)
-	  return x+y;
-	if(hx == hy)
-	  return y;
-	if(ix == 0) {
-	  if(iy == 0)
-		return y;
-	  else
-		return as_double((hy&DF_SIGN_MASK) | 1);
-	}
-	if(hx >= 0) {
-	  if(hx > hy) {
-		hx -= 1;
-	  } else {
-		hx += 1;
-	  }
-	} else {
-	  if(hy >= 0 || hx > hy){
-		hx -= 1;
-	  } else {
-		hx += 1;
-	  }
-	}
-	return as_double(hx);
-}
-
-OVERLOADABLE double fmax(double a, double b)
-{
-	ulong ua = as_ulong(a);
-	ulong ub =as_ulong(b);
-
-	if((ua & DF_ABS_MASK) > DF_MAX_NORMAL) return b;
-	if((ub & DF_ABS_MASK) > DF_MAX_NORMAL) return a;
-	if(ua == DF_POSITIVE_INF) return a;
-	if(ub == DF_POSITIVE_INF) return b;
-
-	double c = a - b;
-	return (c >= 0) ? a:b;
-}
-
-OVERLOADABLE double fmin(double a, double b)
-{
-	ulong ua = as_ulong(a);
-	ulong ub =as_ulong(b);
-
-	if((ua & DF_ABS_MASK) > DF_MAX_NORMAL) return b;
-	if((ub & DF_ABS_MASK) > DF_MAX_NORMAL) return a;
-	if(ua == DF_NEGTIVE_INF) return a;
-	if(ub == DF_NEGTIVE_INF) return b;
-
-	double c = a - b;
-	return (c <= 0) ? a:b;
-}
-
-OVERLOADABLE double fmod (double x, double y)
-{
-	const double one = 1.0, Zero[] = {0.0, -0.0,};
-	int n,hx,hy,hz,ix,iy,sx,i;
-	uint lx,ly,lz;
-
-	hx = __HI(x);
-	lx = __LO(x);
-	hy = __HI(y);
-	ly = __LO(y);
-	sx = hx&0x80000000;		/* sign of x */
-	hx ^=sx;		/* |x| */
-	hy &= 0x7fffffff;	/* |y| */
-
-    /* purge off exception values */
-	if((hy|ly)==0||(hx>=0x7ff00000)||	/* y=0,or x not finite */
-	  ((hy|((ly|-ly)>>31))>0x7ff00000))	/* or y is NaN */
-	    return (x*y)/(x*y);
-	if(hx<=hy) {
-	    if((hx<hy)||(lx<ly)) return x;	/* |x|<|y| return x */
-	    if(lx==ly) 
-		return Zero[(uint)sx>>31];	/* |x|=|y| return x*0*/
-	}
-
-    /* determine ix = ilogb(x) */
-	if(hx<0x00100000) {	/* subnormal x */
-	    if(hx==0) {
-		for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
-	    } else {
-		for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
-	    }
-	} else ix = (hx>>20)-1023;
-
-    /* determine iy = ilogb(y) */
-	if(hy<0x00100000) {	/* subnormal y */
-	    if(hy==0) {
-		for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
-	    } else {
-		for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
-	    }
-	} else iy = (hy>>20)-1023;
-
-    /* set up {hx,lx}, {hy,ly} and align y to x */
-	if(ix >= -1022) 
-	    hx = 0x00100000|(0x000fffff&hx);
-	else {		/* subnormal x, shift x to normal */
-	    n = -1022-ix;
-	    if(n<=31) {
-	        hx = (hx<<n)|(lx>>(32-n));
-	        lx <<= n;
-	    } else {
-		hx = lx<<(n-32);
-		lx = 0;
-	    }
-	}
-	if(iy >= -1022) 
-	    hy = 0x00100000|(0x000fffff&hy);
-	else {		/* subnormal y, shift y to normal */
-	    n = -1022-iy;
-	    if(n<=31) {
-	        hy = (hy<<n)|(ly>>(32-n));
-	        ly <<= n;
-	    } else {
-		hy = ly<<(n-32);
-		ly = 0;
-	    }
-	}
-
-    /* fix point fmod */
-	n = ix - iy;
-	while(n--) {
-	    hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
-	    if(hz<0){hx = hx+hx+(lx>>31); lx = lx+lx;}
-	    else {
-	    	if((hz|lz)==0) 		/* return sign(x)*0 */
-		    return Zero[(uint)sx>>31];
-	    	hx = hz+hz+(lz>>31); lx = lz+lz;
-	    }
-	}
-	hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
-	if(hz>=0) {hx=hz;lx=lz;}
-
-    /* convert back to floating value and restore the sign */
-	if((hx|lx)==0) 			/* return sign(x)*0 */
-	    return Zero[(uint)sx>>31];	
-	while(hx<0x00100000) {		/* normalize x */
-	    hx = hx+hx+(lx>>31); lx = lx+lx;
-	    iy -= 1;
-	}
-	if(iy>= -1022) {	/* normalize output */
-		hx = ((hx-0x00100000)|((iy+1023)<<20));
-		__setHigh(&x,hx|sx);
-		__setLow(&x, lx);
-	} else {		/* subnormal output */
-	    n = -1022 - iy;
-	    if(n<=20) {
-		lx = (lx>>n)|((uint)hx<<(32-n));
-		hx >>= n;
-	    } else if (n<=31) {
-		lx = (hx<<(32-n))|(lx>>n); hx = sx;
-	    } else {
-		lx = hx>>(n-32); hx = sx;
-	    }
-	__setHigh(&x,hx|sx);
-	__setLow(&x, lx);
-
-	    x *= one;		/* create necessary signal */
-	}
-	return x;		/* exact output */
-
-}
-
-OVERLOADABLE double rint(double x)
-{
-	long ret;
-	long lval = as_long(x);
-	int exp = ((lval & DF_EXP_MASK) >> DF_EXP_OFFSET) - DF_EXP_BIAS;
-	long sign = (lval & DF_SIGN_MASK)?1:0;
-	long ma = (lval &DF_MAN_MASK);
-
-	if((lval & DF_ABS_MASK) == 0)
-		return as_double(sign << 63);
-
-	if(exp < -1)
-	{
-		ret = ((sign << 63)) ;
-		return as_double(ret);
-	}
-
-	if(exp > 51) return x;
-
-	long i = (1L << (52 - exp));
-	i = 0x10000000000000 - i;
-	unsigned long uv = 0xFFF0000000000000 + i;
-	ulong vv = lval & uv;
-	double	dp = as_double(vv);
-	if(exp == -1) dp = 0;
-
-	long fval = ma | DF_IMPLICITE_ONE;
-	long lastBit = (fval & (1L << (52 -exp)));
-	long roundBits = (fval & ( (1L << (52 -exp)) -1));
-
-	if(roundBits > (1L << (51 -exp)))
-		dp = (sign) ? dp-1.0:dp+1.0;
-	else if((roundBits == (1L << (51 -exp))) && lastBit)
-		dp = (sign) ? dp-1.0:dp+1.0;
-
-	return dp;
-}
-
-OVERLOADABLE double round(double x)
-{
-	long ret;
-	long lval = as_long(x);
-	int exp = ((lval & DF_EXP_MASK) >> DF_EXP_OFFSET) - DF_EXP_BIAS;
-	long sign = (lval & DF_SIGN_MASK)?1:0;
-	long ma = (lval &DF_MAN_MASK);
-
-	if((lval & DF_ABS_MASK) == 0)
-		return as_double(sign << 63);
-
-	if(exp < -1)
-	{
-		ret = ((sign << 63)) ;
-		return as_double(ret);
-	}
-
-	if(exp > 51) return x;
-
-	long i = (1L << (52 - exp));
-	i = 0x10000000000000 - i;
-	unsigned long uv = 0xFFF0000000000000 + i;
-	ulong vv = lval & uv;
-	double	dp = as_double(vv);
-	if(exp == -1) dp = 0;
-
-	long fval = ma | DF_IMPLICITE_ONE;
-	long roundBits = (fval & ( (1L << (52 -exp)) -1));
-
-	if(roundBits > (1L << (51 -exp)))
-		dp = (sign) ? dp-1.0:dp+1.0;
-	else if(roundBits == (1L << (51 -exp)))
-		dp = (sign) ? dp-1.0:dp+1.0;
-
-	return dp;
-}
-
-OVERLOADABLE double trunc(double x)
-{
-	double ret = floor(fabs(x));
-	return copysign(ret, x);
+    double two54 =  1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */
+    int  hx, ix, lx;
+    hx = __HI(x);
+    ix = 0x7fffffff&hx;
+    lx = __LO(x);
+    *exp = 0;
+    if(ix>=0x7ff00000||((ix|lx)==0)) return x;  /* 0,inf,nan */
+    if (ix<0x00100000) {        /* subnormal */
+        x *= two54;
+        hx = __HI(x);
+        ix = hx&0x7fffffff;
+        *exp = -54;
+    }
+    *exp += (ix>>20)-1022;
+    hx = (hx&0x800fffff)|0x3fe00000;
+    __setHigh(&x, hx);
+    return x;
 }
 
diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.h b/backend/src/libocl/tmpl/ocl_math_20.tmpl.h
index d503d40..48cb36e 100644
--- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.h
@@ -19,6 +19,7 @@
 #define __OCL_MATH_20_H__
 
 #include "ocl_types.h"
+#include "ocl_math_common.h"
 
 OVERLOADABLE float cospi(float x);
 OVERLOADABLE float cosh(float x);
@@ -210,31 +211,6 @@ OVERLOADABLE float half_tan(float x);
 
 
 //------- double -----------
-OVERLOADABLE double ceil(double x);
-OVERLOADABLE double copysign(double x, double y);
-OVERLOADABLE double fabs(double x);
-OVERLOADABLE double fdim(double x, double y);
-OVERLOADABLE double fmax(double a, double b);
-OVERLOADABLE double fmin(double a, double b);
-OVERLOADABLE double fmod (double x, double y);
-OVERLOADABLE double floor(double x);
-OVERLOADABLE double fract(double x, global double *p);
-OVERLOADABLE double fract(double x, local double *p);
-OVERLOADABLE double fract(double x, private double *p);
-OVERLOADABLE double frexp(double x, global int *exp);
-OVERLOADABLE double frexp(double x, local int *exp);
-OVERLOADABLE double frexp(double x, private int *exp);
-OVERLOADABLE double log(double x);
-OVERLOADABLE double log2(double x);
-OVERLOADABLE double log10(double x);
-OVERLOADABLE double log1p(double x);
-OVERLOADABLE double logb(double x);
-OVERLOADABLE int ilogb(double x);
-OVERLOADABLE double mad(double a, double b, double c);
-OVERLOADABLE double nan(ulong code);
-OVERLOADABLE double nextafter(double x, double y);
-OVERLOADABLE double rint(double x);
-OVERLOADABLE double round(double x);
-OVERLOADABLE double trunc(double x);
-
+OVERLOADABLE double fract(double x, double *p);
+OVERLOADABLE double frexp(double x, int *exp);
 
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
new file mode 100644
index 0000000..aee5769
--- /dev/null
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
@@ -0,0 +1,714 @@
+/*
+ * Copyright © 2012 - 2014 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "ocl_math_common.h"
+#include "ocl_float.h"
+#include "ocl_relational.h"
+#include "ocl_common.h"
+#include "ocl_integer.h"
+#include "ocl_convert.h"
+
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+INLINE int  __HI(double x){
+    long x64 = as_long(x);
+    int high = convert_int((x64 >> 32) & 0xFFFFFFFF);
+    return high;
+};
+
+INLINE int  __LO(double x){
+    long x64 = as_long(x);
+    int low = convert_int(x64  & 0xFFFFFFFFUL);
+    return low;
+};
+
+INLINE void  __setHigh(double *x, int val){
+    long x64 = as_long(*x);
+    long high = x64  & 0x00000000FFFFFFFF;
+    high |= ((long)val << 32);
+    *x = as_double(high);
+};
+
+INLINE void  __setLow(double *x, unsigned int val){
+    long x64 = as_long(*x);
+    long low = x64  & 0xFFFFFFFF00000000;
+    low |= (long)val;
+    *x = as_double(low);
+};
+
+OVERLOADABLE double ceil(double x)
+{
+    double ret;
+    ulong lval = as_ulong(x);
+    int exp = ((lval >> 52) & 0x7FF) - 1023;
+    int sign = (lval >> 63) & 0x1;
+
+    long i = (1L << (52 - exp));
+    long mask = 0x10000000000000 - i;
+    unsigned long uv = 0xFFF0000000000000 + mask;
+    ulong vv = lval & uv;
+    double	dp = as_double(vv);
+    ret = ((vv != lval) && !sign) ? dp +1.0:dp;
+
+    ret = ((exp < 0) && sign) ? 0:ret;
+    double tmp = (lval & DF_ABS_MASK) ? 1.0:0.0;
+    ret = ((exp < 0) && !sign) ? tmp:ret;
+    ret = (exp >= 52) ? x:ret;
+
+     return ret;
+}
+
+OVERLOADABLE double copysign(double x, double y)
+{
+	ulong uy = as_ulong(y);
+	ulong sign = uy & DF_SIGN_MASK;
+	ulong ux = as_ulong(x);
+	ux = (ux & DF_ABS_MASK) | sign;
+	return as_double(ux);
+}
+
+OVERLOADABLE double fabs(double x)
+{
+    long  qw = as_ulong(x);
+    qw &= 0x7FFFFFFFFFFFFFFF;
+    return as_double(qw);
+}
+
+OVERLOADABLE double fdim(double x, double y)
+{
+	if(isnan(x))
+		return x;
+	if(isnan(y))
+		return y;
+	return x > y ? (x - y) : +0.f;
+}
+
+OVERLOADABLE double floor(double x)
+{
+    ulong lval = as_ulong(x);
+    int exp = ((lval >> 52) & 0x7FF) - 1023;
+    int sign = (lval >> 63) & 0x1;
+    if(exp < 0)
+    {
+        if(sign)
+	{
+		if(lval & DF_ABS_MASK)
+			return -1L;
+		else
+			return 0.0;
+	}
+        else return 0.0;
+    }
+    else
+    {
+        if(exp >= 52)
+            return x;
+         long i = (1L << (52 - exp));
+         i = 0x10000000000000 - i;
+         unsigned long uv = 0xFFF0000000000000 + i;
+         ulong vv = lval & uv;
+         double  dp = as_double(vv);
+         if(vv != lval)
+            dp -= sign;
+
+         return dp;
+    }
+}
+
+OVERLOADABLE double log(double x)
+{
+	double ln2_hi	=  6.93147180369123816490e-01,	/* 3fe62e42 fee00000 */
+	ln2_lo	=  1.90821492927058770002e-10,	/* 3dea39ef 35793c76 */
+	two54	=  1.80143985094819840000e+16,	/* 43500000 00000000 */
+	Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
+	Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
+	Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
+	Lg4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
+	Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
+	Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
+	Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
+
+	double zero = 0;
+	double hfsq,f,s,z,R,w,t1,t2,dk;
+	int k,hx,i,j;
+	unsigned lx;
+
+	hx = __HI(x);		/* high word of x */
+	lx = __LO(x);		/* low  word of x */
+
+	k=0;
+	if (hx < 0x00100000)
+	{			/* x < 2**-1022  */
+		if (((hx&0x7fffffff)|lx)==0)
+		return -two54/zero;		/* log(+-0)=-inf */
+		if (hx<0)
+			return (x-x)/zero;	/* log(-#) = NaN */
+		k -= 54; x *= two54; /* subnormal number, scale up x */
+		hx = __HI(x);		/* high word of x */
+	}
+	if (hx >= 0x7ff00000) return x+x;
+	k += (hx>>20)-1023;
+	hx &= 0x000fffff;
+	i = (hx+0x95f64)&0x100000;
+	__setHigh(&x, (hx|(i^0x3ff00000)));	/* normalize x or x/2 */
+	k += (i>>20);
+	f = x-1.0;
+	if((0x000fffff&(2+hx))<3) {	/* |f| < 2**-20 */
+		if(f==zero)
+		{
+			if(k==0) return zero;
+			else
+			{
+				dk=(double)k;
+				return dk*ln2_hi+dk*ln2_lo;
+			}
+		}
+
+		R = f*f*(0.5-0.33333333333333333*f);
+		if(k==0)
+			return f-R;
+		else {dk=(double)k;
+			return dk*ln2_hi-((R-dk*ln2_lo)-f);}
+	}
+	s = f/(2.0+f);
+	dk = (double)k;
+	z = s*s;
+	i = hx-0x6147a;
+	w = z*z;
+	j = 0x6b851-hx;
+	t1= w*(Lg2+w*(Lg4+w*Lg6));
+	t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
+	i |= j;
+	R = t2+t1;
+	if(i>0) {
+	hfsq=0.5*f*f;
+	if(k==0)
+		return f-(hfsq-s*(hfsq+R));
+	else
+		return dk*ln2_hi-((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f);
+	}
+	else
+	{
+		if(k==0)
+			return f-s*(f-R);
+		else
+			return dk*ln2_hi-((s*(f-R)-dk*ln2_lo)-f);
+	}
+
+}
+
+OVERLOADABLE double log2(double x)
+{
+	double ln2 = 0.69314718055994530942,
+	zero = 0,
+	two54   =  1.80143985094819840000e+16,  /* 43500000 00000000 */
+	Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
+	Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
+	Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
+	Lg4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
+	Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
+	Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
+	Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
+
+	double hfsq,f,s,z,R,w,t1,t2,dk;
+	int k,hx,i,j;
+	uint lx;
+
+	hx = __HI(x);
+	lx = __LO(x);
+
+	k=0;
+	if (hx < 0x00100000)
+	{			/* x < 2**-1022  */
+		if (((hx&0x7fffffff)|lx)==0)
+			return -two54/(x-x);		/* log(+-0)=-inf */
+
+		if (hx<0) return (x-x)/(x-x);	/* log(-#) = NaN */
+
+		k -= 54; x *= two54; /* subnormal number, scale up x */
+		hx = __HI(x);
+	}
+
+	if (hx >= 0x7ff00000) return x+x;
+	k += (hx>>20)-1023;
+	hx &= 0x000fffff;
+	i = (hx+0x95f64)&0x100000;
+	__setHigh(&x,hx|(i^0x3ff00000));	/* normalize x or x/2 */
+	k += (i>>20);
+	dk = (double) k;
+	f = x-1.0;
+
+	if((0x000fffff&(2+hx))<3)
+	{	/* |f| < 2**-20 */
+		if(f==zero) return dk;
+		R = f*f*(0.5-0.33333333333333333*f);
+		return dk-(R-f)/ln2;
+	}
+
+	s = f/(2.0+f);
+	z = s*s;
+	i = hx-0x6147a;
+	w = z*z;
+	j = 0x6b851-hx;
+	t1= w*(Lg2+w*(Lg4+w*Lg6));
+	t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
+	i |= j;
+	R = t2+t1;
+	if(i>0)
+	{
+		hfsq=0.5*f*f;
+		return dk-((hfsq-(s*(hfsq+R)))-f)/ln2;
+	}
+	else
+	{
+		return dk-((s*(f-R))-f)/ln2;
+	}
+}
+
+OVERLOADABLE double log10(double x)
+{
+	double zero = 0.0,
+	two54	   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
+	ivln10	   =  4.34294481903251816668e-01, /* 0x3FDBCB7B, 0x1526E50E */
+	log10_2hi  =  3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
+	log10_2lo  =  3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
+
+	double y,z;
+	int i,k,hx;
+	unsigned lx;
+
+	hx = __HI(x);	/* high word of x */
+	lx = __LO(x);	/* low word of x */
+
+	k=0;
+	if (hx < 0x00100000)
+	{  /* x < 2**-1022  */
+		if (((hx&0x7fffffff)|lx)==0)
+			return -two54/zero; /* log(+-0)=-inf */
+
+		if (hx<0)
+			return (x-x)/zero;/* log(-#) = NaN */
+
+		k -= 54; x *= two54; /* subnormal number, scale up x */
+		hx = __HI(x);/* high word of x */
+	}
+
+	if (hx >= 0x7ff00000) return x+x;
+	k += (hx>>20)-1023;
+	i  = ((unsigned)k&0x80000000)>>31;
+	hx = (hx&0x000fffff)|((0x3ff-i)<<20);
+	y  = (double)(k+i);
+	__setHigh(&x, hx);
+	z  = y*log10_2lo + ivln10*log(x);
+	return  z+y*log10_2hi;
+}
+
+OVERLOADABLE double log1p(double x)
+{
+	double ln2_hi  =  6.93147180369123816490e-01,	/* 3fe62e42 fee00000 */
+	ln2_lo	=  1.90821492927058770002e-10,	/* 3dea39ef 35793c76 */
+	two54	=  1.80143985094819840000e+16,	/* 43500000 00000000 */
+	Lp1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
+	Lp2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
+	Lp3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
+	Lp4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
+	Lp5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
+	Lp6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
+	Lp7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
+
+	double hfsq,f,c,s,z,R,u,zero = 0;
+	int k,hx,hu,ax;
+
+	hx = __HI(x);		/* high word of x */
+	ax = hx&0x7fffffff;
+
+	k = 1;
+	if (hx < 0x3FDA827A) {			/* x < 0.41422	*/
+		if(ax>=0x3ff00000) {		/* x <= -1.0 */
+		if(x==-1.0) return -two54/zero; /* log1p(-1)=+inf */
+		else return (x-x)/(x-x);	/* log1p(x<-1)=NaN */
+		}
+		if(ax<0x3e200000) { 		/* |x| < 2**-29 */
+		if(two54+x>zero 		/* raise inexact */
+				&&ax<0x3c900000)		/* |x| < 2**-54 */
+			return x;
+		else
+			return x - x*x*0.5;
+		}
+		if(hx>0||hx<=((int)0xbfd2bec3)) {
+		k=0;f=x;hu=1;}	/* -0.2929<x<0.41422 */
+	}
+	if (hx >= 0x7ff00000) return x+x;
+	if(k!=0) {
+		if(hx<0x43400000) {
+		u  = 1.0+x;
+			hu = __HI(u);		/* high word of u */
+			k  = (hu>>20)-1023;
+			c  = (k>0)? 1.0-(u-x):x-(u-1.0);/* correction term */
+		c /= u;
+		} else {
+		u  = x;
+			hu = __HI(u);		/* high word of u */
+			k  = (hu>>20)-1023;
+		c  = 0;
+		}
+		hu &= 0x000fffff;
+		if(hu<0x6a09e) {
+			__setHigh(&u, hu|0x3ff00000);	/* normalize u */
+		} else {
+			k += 1;
+			__setHigh(&u, hu|0x3fe00000);	/* normalize u/2 */
+			hu = (0x00100000-hu)>>2;
+		}
+		f = u-1.0;
+	}
+	hfsq=0.5*f*f;
+	if(hu==0) { /* |f| < 2**-20 */
+		if(f==zero)
+		{
+			if(k==0) return zero;
+			else {c += k*ln2_lo; return k*ln2_hi+c;}
+		}
+		R = hfsq*(1.0-0.66666666666666666*f);
+		if(k==0) return f-R; else
+				 return k*ln2_hi-((R-(k*ln2_lo+c))-f);
+	}
+	s = f/(2.0+f);
+	z = s*s;
+	R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7))))));
+	if(k==0) return f-(hfsq-s*(hfsq+R)); else
+		return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f);
+
+}
+
+OVERLOADABLE double logb(double x)
+{
+	int lx,ix;
+	ix = (__HI(x))&0x7fffffff;	/* high |x| */
+	lx = __LO(x);			/* low x */
+	if((ix|lx)==0) return -1.0/fabs(x);
+	if(ix>=0x7ff00000) return x*x;
+	if((ix>>=20)==0) 			/* IEEE 754 logb */
+	{
+		long qx = as_long(x);
+		qx = qx & DF_ABS_MASK;
+		int msbOne = clz(qx);
+		return (double)(-1022 - (53 -(64 -msbOne)));
+	}
+	else
+		return (double) (ix-1023);
+}
+
+OVERLOADABLE int ilogb(double x)
+{
+	int hx,lx,ix;
+
+	hx  = (__HI(x))&0x7fffffff;	/* high word of x */
+	if(hx == 0x7ff00000 && __LO(x) == 0) return 0x7fffffff;
+
+	if(hx<0x00100000) {
+		lx = __LO(x);
+		if((hx|lx)==0)
+		return 0x80000000;	/* ilogb(0) = 0x80000000 */
+		else			/* subnormal x */
+		if(hx==0) {
+			for (ix = -1043; lx>0; lx<<=1) ix -=1;
+		} else {
+			for (ix = -1022,hx<<=11; hx>0; hx<<=1) ix -=1;
+		}
+		return ix;
+	}
+	else if (hx<0x7ff00000) return (hx>>20)-1023;
+	else return 0x80000000;
+}
+
+CONST OVERLOADABLE double __gen_ocl_mad(double a, double b, double c) __asm("llvm.fma" ".f64");
+OVERLOADABLE double mad(double a, double b, double c)
+{
+	return __gen_ocl_mad(a, b, c);
+}
+
+OVERLOADABLE double nan(ulong code)
+{
+	return as_double(DF_POSITIVE_INF + (code&DF_MAN_MASK));
+}
+
+OVERLOADABLE double nextafter(double x, double y)
+{
+	long hx, hy, ix, iy;
+	hx = as_long(x);
+	hy = as_long(y);
+	ix = hx & DF_ABS_MASK;
+	iy = hy & DF_ABS_MASK;
+
+	if(ix>DF_POSITIVE_INF|| iy>DF_POSITIVE_INF)
+	  return x+y;
+	if(hx == hy)
+	  return y;
+	if(ix == 0) {
+	  if(iy == 0)
+		return y;
+	  else
+		return as_double((hy&DF_SIGN_MASK) | 1);
+	}
+	if(hx >= 0) {
+	  if(hx > hy) {
+		hx -= 1;
+	  } else {
+		hx += 1;
+	  }
+	} else {
+	  if(hy >= 0 || hx > hy){
+		hx -= 1;
+	  } else {
+		hx += 1;
+	  }
+	}
+	return as_double(hx);
+}
+
+OVERLOADABLE double fmax(double a, double b)
+{
+	ulong ua = as_ulong(a);
+	ulong ub =as_ulong(b);
+
+	if((ua & DF_ABS_MASK) > DF_MAX_NORMAL) return b;
+	if((ub & DF_ABS_MASK) > DF_MAX_NORMAL) return a;
+	if(ua == DF_POSITIVE_INF) return a;
+	if(ub == DF_POSITIVE_INF) return b;
+
+	double c = a - b;
+	return (c >= 0) ? a:b;
+}
+
+OVERLOADABLE double fmin(double a, double b)
+{
+	ulong ua = as_ulong(a);
+	ulong ub =as_ulong(b);
+
+	if((ua & DF_ABS_MASK) > DF_MAX_NORMAL) return b;
+	if((ub & DF_ABS_MASK) > DF_MAX_NORMAL) return a;
+	if(ua == DF_NEGTIVE_INF) return a;
+	if(ub == DF_NEGTIVE_INF) return b;
+
+	double c = a - b;
+	return (c <= 0) ? a:b;
+}
+
+OVERLOADABLE double fmod (double x, double y)
+{
+	const double one = 1.0, Zero[] = {0.0, -0.0,};
+	int n,hx,hy,hz,ix,iy,sx,i;
+	uint lx,ly,lz;
+
+	hx = __HI(x);
+	lx = __LO(x);
+	hy = __HI(y);
+	ly = __LO(y);
+	sx = hx&0x80000000;		/* sign of x */
+	hx ^=sx;		/* |x| */
+	hy &= 0x7fffffff;	/* |y| */
+
+    /* purge off exception values */
+	if((hy|ly)==0||(hx>=0x7ff00000)||	/* y=0,or x not finite */
+	  ((hy|((ly|-ly)>>31))>0x7ff00000))	/* or y is NaN */
+	    return (x*y)/(x*y);
+	if(hx<=hy) {
+	    if((hx<hy)||(lx<ly)) return x;	/* |x|<|y| return x */
+	    if(lx==ly) 
+		return Zero[(uint)sx>>31];	/* |x|=|y| return x*0*/
+	}
+
+    /* determine ix = ilogb(x) */
+	if(hx<0x00100000) {	/* subnormal x */
+	    if(hx==0) {
+		for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
+	    } else {
+		for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
+	    }
+	} else ix = (hx>>20)-1023;
+
+    /* determine iy = ilogb(y) */
+	if(hy<0x00100000) {	/* subnormal y */
+	    if(hy==0) {
+		for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
+	    } else {
+		for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
+	    }
+	} else iy = (hy>>20)-1023;
+
+    /* set up {hx,lx}, {hy,ly} and align y to x */
+	if(ix >= -1022) 
+	    hx = 0x00100000|(0x000fffff&hx);
+	else {		/* subnormal x, shift x to normal */
+	    n = -1022-ix;
+	    if(n<=31) {
+	        hx = (hx<<n)|(lx>>(32-n));
+	        lx <<= n;
+	    } else {
+		hx = lx<<(n-32);
+		lx = 0;
+	    }
+	}
+	if(iy >= -1022) 
+	    hy = 0x00100000|(0x000fffff&hy);
+	else {		/* subnormal y, shift y to normal */
+	    n = -1022-iy;
+	    if(n<=31) {
+	        hy = (hy<<n)|(ly>>(32-n));
+	        ly <<= n;
+	    } else {
+		hy = ly<<(n-32);
+		ly = 0;
+	    }
+	}
+
+    /* fix point fmod */
+	n = ix - iy;
+	while(n--) {
+	    hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
+	    if(hz<0){hx = hx+hx+(lx>>31); lx = lx+lx;}
+	    else {
+	    	if((hz|lz)==0) 		/* return sign(x)*0 */
+		    return Zero[(uint)sx>>31];
+	    	hx = hz+hz+(lz>>31); lx = lz+lz;
+	    }
+	}
+	hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
+	if(hz>=0) {hx=hz;lx=lz;}
+
+    /* convert back to floating value and restore the sign */
+	if((hx|lx)==0) 			/* return sign(x)*0 */
+	    return Zero[(uint)sx>>31];	
+	while(hx<0x00100000) {		/* normalize x */
+	    hx = hx+hx+(lx>>31); lx = lx+lx;
+	    iy -= 1;
+	}
+	if(iy>= -1022) {	/* normalize output */
+		hx = ((hx-0x00100000)|((iy+1023)<<20));
+		__setHigh(&x,hx|sx);
+		__setLow(&x, lx);
+	} else {		/* subnormal output */
+	    n = -1022 - iy;
+	    if(n<=20) {
+		lx = (lx>>n)|((uint)hx<<(32-n));
+		hx >>= n;
+	    } else if (n<=31) {
+		lx = (hx<<(32-n))|(lx>>n); hx = sx;
+	    } else {
+		lx = hx>>(n-32); hx = sx;
+	    }
+	__setHigh(&x,hx|sx);
+	__setLow(&x, lx);
+
+	    x *= one;		/* create necessary signal */
+	}
+	return x;		/* exact output */
+
+}
+
+OVERLOADABLE double rint(double x)
+{
+	long ret;
+	long lval = as_long(x);
+	int exp = ((lval & DF_EXP_MASK) >> DF_EXP_OFFSET) - DF_EXP_BIAS;
+	long sign = (lval & DF_SIGN_MASK)?1:0;
+	long ma = (lval &DF_MAN_MASK);
+
+	if((lval & DF_ABS_MASK) == 0)
+		return as_double(sign << 63);
+
+	if(exp < -1)
+	{
+		ret = ((sign << 63)) ;
+		return as_double(ret);
+	}
+
+	if(exp > 51) return x;
+
+	long i = (1L << (52 - exp));
+	i = 0x10000000000000 - i;
+	unsigned long uv = 0xFFF0000000000000 + i;
+	ulong vv = lval & uv;
+	double	dp = as_double(vv);
+	if(exp == -1) dp = 0;
+
+	long fval = ma | DF_IMPLICITE_ONE;
+	long lastBit = (fval & (1L << (52 -exp)));
+	long roundBits = (fval & ( (1L << (52 -exp)) -1));
+
+	if(roundBits > (1L << (51 -exp)))
+		dp = (sign) ? dp-1.0:dp+1.0;
+	else if((roundBits == (1L << (51 -exp))) && lastBit)
+		dp = (sign) ? dp-1.0:dp+1.0;
+
+	return dp;
+}
+
+OVERLOADABLE double round(double x)
+{
+	long ret;
+	long lval = as_long(x);
+	int exp = ((lval & DF_EXP_MASK) >> DF_EXP_OFFSET) - DF_EXP_BIAS;
+	long sign = (lval & DF_SIGN_MASK)?1:0;
+	long ma = (lval &DF_MAN_MASK);
+
+	if((lval & DF_ABS_MASK) == 0)
+		return as_double(sign << 63);
+
+	if(exp < -1)
+	{
+		ret = ((sign << 63)) ;
+		return as_double(ret);
+	}
+
+	if(exp > 51) return x;
+
+	long i = (1L << (52 - exp));
+	i = 0x10000000000000 - i;
+	unsigned long uv = 0xFFF0000000000000 + i;
+	ulong vv = lval & uv;
+	double	dp = as_double(vv);
+	if(exp == -1) dp = 0;
+
+	long fval = ma | DF_IMPLICITE_ONE;
+	long roundBits = (fval & ( (1L << (52 -exp)) -1));
+
+	if(roundBits > (1L << (51 -exp)))
+		dp = (sign) ? dp-1.0:dp+1.0;
+	else if(roundBits == (1L << (51 -exp)))
+		dp = (sign) ? dp-1.0:dp+1.0;
+
+	return dp;
+}
+
+OVERLOADABLE double trunc(double x)
+{
+	double ret = floor(fabs(x));
+	return copysign(ret, x);
+}
+
+
+
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
new file mode 100644
index 0000000..2f8eb29
--- /dev/null
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2012 - 2014 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#ifndef __OCL_MATH_COMMON_H__
+#define __OCL_MATH_COMMON_H__
+
+#include "ocl_types.h"
+
+OVERLOADABLE double ceil(double x);
+OVERLOADABLE double copysign(double x, double y);
+OVERLOADABLE double fabs(double x);
+OVERLOADABLE double fdim(double x, double y);
+OVERLOADABLE double floor(double x);
+OVERLOADABLE double fmax(double a, double b);
+OVERLOADABLE double fmin(double a, double b);
+OVERLOADABLE double fmod (double x, double y);
+OVERLOADABLE double log(double x);
+OVERLOADABLE double log2(double x);
+OVERLOADABLE double log10(double x);
+OVERLOADABLE double log1p(double x);
+OVERLOADABLE double logb(double x);
+OVERLOADABLE int ilogb(double x);
+OVERLOADABLE double mad(double a, double b, double c);
+OVERLOADABLE double nan(ulong code);
+OVERLOADABLE double nextafter(double x, double y);
+OVERLOADABLE double rint(double x);
+OVERLOADABLE double round(double x);
+OVERLOADABLE double trunc(double x);
+
+
+
-- 
2.7.4



More information about the Beignet mailing list