[Beignet] [PATCH V3] [PATCH]GBE: improve precision of fmod

Yang, Rong R rong.r.yang at intel.com
Sun Dec 22 18:45:33 PST 2013


Test pass with multi-thread disabled. Thanks.

-----Original Message-----
From: beignet-bounces at lists.freedesktop.org [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Lv Meng
Sent: Wednesday, December 18, 2013 3:17 PM
To: beignet at lists.freedesktop.org
Cc: Lv, Meng
Subject: [Beignet] [PATCH V3] [PATCH]GBE: improve precision of fmod


Signed-off-by: Lv Meng <meng.lv at intel.com>
---
 backend/src/ocl_stdlib.tmpl.h |   78 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 76 insertions(+), 2 deletions(-)

diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 904904b..c99fde5 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -1877,7 +1877,6 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_round(float x) {  INLINE_OVERLOADABLE float __gen_ocl_internal_floor(float x) { return __gen_ocl_rndd(x); }  INLINE_OVERLOADABLE float __gen_ocl_internal_ceil(float x)  { return __gen_ocl_rndu(x); }  INLINE_OVERLOADABLE float powr(float x, float y) { return __gen_ocl_pow(x,y); } -INLINE_OVERLOADABLE float fmod(float x, float y) { return x-y*__gen_ocl_rndz(x/y); }  INLINE_OVERLOADABLE float remainder(float x, float y) { return x-y*__gen_ocl_rnde(x/y); }  INLINE_OVERLOADABLE float __gen_ocl_internal_rint(float x) {
   return __gen_ocl_rnde(x);
@@ -1956,6 +1955,81 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_exp(float x) {
     return y*twom100;
   }
 }
+INLINE_OVERLOADABLE float __gen_ocl_internal_fmod (float x, float y) {
+  //return x-y*__gen_ocl_rndz(x/y);
+  float one = 1.0;
+  float Zero[2];
+  int n,hx,hy,hz,ix,iy,sx,i;
+  Zero[0] = 0.0;
+  Zero[1] = -0.0;
+  GEN_OCL_GET_FLOAT_WORD(hx,x);
+  GEN_OCL_GET_FLOAT_WORD(hy,y);
+  sx = hx&0x80000000;		/* sign of x */
+  hx ^=sx;		/* |x| */
+  hy &= 0x7fffffff;	/* |y| */
+  /* purge off exception values */
+  if(hy==0||(hx>=0x7f800000)||		/* y=0,or x not finite */
+  (hy>0x7f800000))			/* or y is NaN */
+    return (x*y)/(x*y);
+  if(hx<hy) return x;			/* |x|<|y| return x */
+  if(hx==hy)
+    return Zero[(unsigned)sx>>31];	/* |x|=|y| return x*0*/
+
+  /* determine ix = ilogb(x) */
+  if(hx<0x00800000) {	/* subnormal x */
+    for (ix = -126,i=(hx<<8); i>0; i<<=1) ix -=1;  } else ix = 
+ (hx>>23)-127;
+
+  /* determine iy = ilogb(y) */
+  if(hy<0x00800000) {	/* subnormal y */
+    for (iy = -126,i=(hy<<8); i>=0; i<<=1) iy -=1;  } else iy = 
+ (hy>>23)-127;
+
+  /* set up {hx,lx}, {hy,ly} and align y to x */  if(ix >= -126)
+    hx = 0x00800000|(0x007fffff&hx);
+  else {		/* subnormal x, shift x to normal */
+    n = -126-ix;
+    hx = hx<<n;
+  }
+  if(iy >= -126)
+    hy = 0x00800000|(0x007fffff&hy);
+  else {		/* subnormal y, shift y to normal */
+    n = -126-iy;
+    hy = hy<<n;
+  }
+  /* fix point fmod */
+  n = ix - iy;
+  while(n--) {
+    hz=hx-hy;
+    if(hz<0){hx = hx+hx;}
+    else {
+      if(hz==0)		/* return sign(x)*0 */
+        return Zero[(unsigned)sx>>31];
+      hx = hz+hz;
+    }
+  }
+  hz=hx-hy;
+  if(hz>=0) {hx=hz;}
+
+    /* convert back to floating value and restore the sign */
+  if(hx==0)			/* return sign(x)*0 */
+    return Zero[(unsigned)sx>>31];
+  while(hx<0x00800000) {		/* normalize x */
+    hx = hx+hx;
+    iy -= 1;
+  }
+  if(iy>= -126) {		/* normalize output */
+    hx = ((hx-0x00800000)|((iy+127)<<23));
+	GEN_OCL_SET_FLOAT_WORD(x,hx|sx);
+   } else {		/* subnormal output */
+     n = -126 - iy;
+     hx >>= n;
+     GEN_OCL_SET_FLOAT_WORD(x,hx|sx);
+     x *= one;		/* create necessary signal */
+  }
+  return x;		/* exact output */
+}
 
 // TODO use llvm intrinsics definitions  #define cos native_cos @@ -1984,7 +2058,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_exp(float x) {  #define copysign __gen_ocl_internal_copysign  #define erf __gen_ocl_internal_erf  #define erfc __gen_ocl_internal_erfc
-
+#define fmod __gen_ocl_internal_fmod
 PURE CONST float __gen_ocl_mad(float a, float b, float c);  INLINE_OVERLOADABLE float mad(float a, float b, float c) {
   return __gen_ocl_mad(a, b, c);
--
1.7.10.4

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list