[Beignet] [PATCH] backend: add double support to fmod
rander
rander.wang at intel.com
Tue Mar 28 02:15:48 UTC 2017
copy it from fdlibm
Signed-off-by: rander <rander.wang at intel.com>
---
backend/src/libocl/tmpl/ocl_math.tmpl.cl | 112 ++++++++++++++++++++++++++++
backend/src/libocl/tmpl/ocl_math.tmpl.h | 1 +
backend/src/libocl/tmpl/ocl_math_20.tmpl.cl | 112 ++++++++++++++++++++++++++++
backend/src/libocl/tmpl/ocl_math_20.tmpl.h | 1 +
4 files changed, 226 insertions(+)
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index 95fbf0b..a50d4db 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -4442,6 +4442,118 @@ OVERLOADABLE double fmin(double a, double b)
return (c <= 0) ? a:b;
}
+OVERLOADABLE double fmod (double x, double y)
+{
+ const double one = 1.0, Zero[] = {0.0, -0.0,};
+ int n,hx,hy,hz,ix,iy,sx,i;
+ uint lx,ly,lz;
+
+ hx = __HI(x);
+ lx = __LO(x);
+ hy = __HI(y);
+ ly = __LO(y);
+ sx = hx&0x80000000; /* sign of x */
+ hx ^=sx; /* |x| */
+ hy &= 0x7fffffff; /* |y| */
+
+ /* purge off exception values */
+ if((hy|ly)==0||(hx>=0x7ff00000)|| /* y=0,or x not finite */
+ ((hy|((ly|-ly)>>31))>0x7ff00000)) /* or y is NaN */
+ return (x*y)/(x*y);
+ if(hx<=hy) {
+ if((hx<hy)||(lx<ly)) return x; /* |x|<|y| return x */
+ if(lx==ly)
+ return Zero[(uint)sx>>31]; /* |x|=|y| return x*0*/
+ }
+
+ /* determine ix = ilogb(x) */
+ if(hx<0x00100000) { /* subnormal x */
+ if(hx==0) {
+ for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
+ } else {
+ for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
+ }
+ } else ix = (hx>>20)-1023;
+
+ /* determine iy = ilogb(y) */
+ if(hy<0x00100000) { /* subnormal y */
+ if(hy==0) {
+ for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
+ } else {
+ for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
+ }
+ } else iy = (hy>>20)-1023;
+
+ /* set up {hx,lx}, {hy,ly} and align y to x */
+ if(ix >= -1022)
+ hx = 0x00100000|(0x000fffff&hx);
+ else { /* subnormal x, shift x to normal */
+ n = -1022-ix;
+ if(n<=31) {
+ hx = (hx<<n)|(lx>>(32-n));
+ lx <<= n;
+ } else {
+ hx = lx<<(n-32);
+ lx = 0;
+ }
+ }
+ if(iy >= -1022)
+ hy = 0x00100000|(0x000fffff&hy);
+ else { /* subnormal y, shift y to normal */
+ n = -1022-iy;
+ if(n<=31) {
+ hy = (hy<<n)|(ly>>(32-n));
+ ly <<= n;
+ } else {
+ hy = ly<<(n-32);
+ ly = 0;
+ }
+ }
+
+ /* fix point fmod */
+ n = ix - iy;
+ while(n--) {
+ hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
+ if(hz<0){hx = hx+hx+(lx>>31); lx = lx+lx;}
+ else {
+ if((hz|lz)==0) /* return sign(x)*0 */
+ return Zero[(uint)sx>>31];
+ hx = hz+hz+(lz>>31); lx = lz+lz;
+ }
+ }
+ hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
+ if(hz>=0) {hx=hz;lx=lz;}
+
+ /* convert back to floating value and restore the sign */
+ if((hx|lx)==0) /* return sign(x)*0 */
+ return Zero[(uint)sx>>31];
+ while(hx<0x00100000) { /* normalize x */
+ hx = hx+hx+(lx>>31); lx = lx+lx;
+ iy -= 1;
+ }
+ if(iy>= -1022) { /* normalize output */
+ hx = ((hx-0x00100000)|((iy+1023)<<20));
+ __setHigh(&x,hx|sx);
+ __setLow(&x, lx);
+ } else { /* subnormal output */
+ n = -1022 - iy;
+ if(n<=20) {
+ lx = (lx>>n)|((uint)hx<<(32-n));
+ hx >>= n;
+ } else if (n<=31) {
+ lx = (hx<<(32-n))|(lx>>n); hx = sx;
+ } else {
+ lx = hx>>(n-32); hx = sx;
+ }
+ __setHigh(&x,hx|sx);
+ __setLow(&x, lx);
+
+ x *= one; /* create necessary signal */
+ }
+ return x; /* exact output */
+
+}
+
OVERLOADABLE double rint(double x)
{
long ret;
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.h b/backend/src/libocl/tmpl/ocl_math.tmpl.h
index 2e2938a..aee332c 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.h
@@ -240,6 +240,7 @@ OVERLOADABLE double fdim(double x, double y);
OVERLOADABLE double floor(double x);
OVERLOADABLE double fmax(double a, double b);
OVERLOADABLE double fmin(double a, double b);
+OVERLOADABLE double fmod (double x, double y);
OVERLOADABLE double fract(double x, global double *p);
OVERLOADABLE double fract(double x, local double *p);
OVERLOADABLE double fract(double x, private double *p);
diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
index e639e27..a084a88 100644
--- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
@@ -4318,6 +4318,118 @@ OVERLOADABLE double fmin(double a, double b)
return (c <= 0) ? a:b;
}
+OVERLOADABLE double fmod (double x, double y)
+{
+ const double one = 1.0, Zero[] = {0.0, -0.0,};
+ int n,hx,hy,hz,ix,iy,sx,i;
+ uint lx,ly,lz;
+
+ hx = __HI(x);
+ lx = __LO(x);
+ hy = __HI(y);
+ ly = __LO(y);
+ sx = hx&0x80000000; /* sign of x */
+ hx ^=sx; /* |x| */
+ hy &= 0x7fffffff; /* |y| */
+
+ /* purge off exception values */
+ if((hy|ly)==0||(hx>=0x7ff00000)|| /* y=0,or x not finite */
+ ((hy|((ly|-ly)>>31))>0x7ff00000)) /* or y is NaN */
+ return (x*y)/(x*y);
+ if(hx<=hy) {
+ if((hx<hy)||(lx<ly)) return x; /* |x|<|y| return x */
+ if(lx==ly)
+ return Zero[(uint)sx>>31]; /* |x|=|y| return x*0*/
+ }
+
+ /* determine ix = ilogb(x) */
+ if(hx<0x00100000) { /* subnormal x */
+ if(hx==0) {
+ for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
+ } else {
+ for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
+ }
+ } else ix = (hx>>20)-1023;
+
+ /* determine iy = ilogb(y) */
+ if(hy<0x00100000) { /* subnormal y */
+ if(hy==0) {
+ for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
+ } else {
+ for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
+ }
+ } else iy = (hy>>20)-1023;
+
+ /* set up {hx,lx}, {hy,ly} and align y to x */
+ if(ix >= -1022)
+ hx = 0x00100000|(0x000fffff&hx);
+ else { /* subnormal x, shift x to normal */
+ n = -1022-ix;
+ if(n<=31) {
+ hx = (hx<<n)|(lx>>(32-n));
+ lx <<= n;
+ } else {
+ hx = lx<<(n-32);
+ lx = 0;
+ }
+ }
+ if(iy >= -1022)
+ hy = 0x00100000|(0x000fffff&hy);
+ else { /* subnormal y, shift y to normal */
+ n = -1022-iy;
+ if(n<=31) {
+ hy = (hy<<n)|(ly>>(32-n));
+ ly <<= n;
+ } else {
+ hy = ly<<(n-32);
+ ly = 0;
+ }
+ }
+
+ /* fix point fmod */
+ n = ix - iy;
+ while(n--) {
+ hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
+ if(hz<0){hx = hx+hx+(lx>>31); lx = lx+lx;}
+ else {
+ if((hz|lz)==0) /* return sign(x)*0 */
+ return Zero[(uint)sx>>31];
+ hx = hz+hz+(lz>>31); lx = lz+lz;
+ }
+ }
+ hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
+ if(hz>=0) {hx=hz;lx=lz;}
+
+ /* convert back to floating value and restore the sign */
+ if((hx|lx)==0) /* return sign(x)*0 */
+ return Zero[(uint)sx>>31];
+ while(hx<0x00100000) { /* normalize x */
+ hx = hx+hx+(lx>>31); lx = lx+lx;
+ iy -= 1;
+ }
+ if(iy>= -1022) { /* normalize output */
+ hx = ((hx-0x00100000)|((iy+1023)<<20));
+ __setHigh(&x,hx|sx);
+ __setLow(&x, lx);
+ } else { /* subnormal output */
+ n = -1022 - iy;
+ if(n<=20) {
+ lx = (lx>>n)|((uint)hx<<(32-n));
+ hx >>= n;
+ } else if (n<=31) {
+ lx = (hx<<(32-n))|(lx>>n); hx = sx;
+ } else {
+ lx = hx>>(n-32); hx = sx;
+ }
+ __setHigh(&x,hx|sx);
+ __setLow(&x, lx);
+
+ x *= one; /* create necessary signal */
+ }
+ return x; /* exact output */
+
+}
+
OVERLOADABLE double rint(double x)
{
long ret;
diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.h b/backend/src/libocl/tmpl/ocl_math_20.tmpl.h
index 4ef159c..d503d40 100644
--- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.h
@@ -216,6 +216,7 @@ OVERLOADABLE double fabs(double x);
OVERLOADABLE double fdim(double x, double y);
OVERLOADABLE double fmax(double a, double b);
OVERLOADABLE double fmin(double a, double b);
+OVERLOADABLE double fmod (double x, double y);
OVERLOADABLE double floor(double x);
OVERLOADABLE double fract(double x, global double *p);
OVERLOADABLE double fract(double x, local double *p);
--
2.7.4
More information about the Beignet
mailing list