[Beignet] [PATCH] [PATCH]GBE: improve precision of ldexp

Mon Dec 30 22:04:48 PST 2013

One comment. The patch Tested OK.
-----Original Message-----
From: beignet-bounces at lists.freedesktop.org [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Lv Meng
Sent: Monday, December 23, 2013 8:21 AM
To: beignet at lists.freedesktop.org
Cc: Lv, Meng
Subject: [Beignet] [PATCH] [PATCH]GBE: improve precision of ldexp


Signed-off-by: Lv Meng <meng.lv at intel.com>
---
 backend/src/ocl_stdlib.tmpl.h |  100 +++++++++++++++++++++--------------------
 1 file changed, 51 insertions(+), 49 deletions(-)

diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 2345ecb..6ae7cd8 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -173,6 +173,12 @@ do {                                  \
 } while (0)
 #endif
 
+int __ocl_finitef (float x){
It is safe to declare it as "INLINE".
+  unsigned ix;
+  GEN_OCL_GET_FLOAT_WORD (ix, x);
+  return (ix & 0x7fffffff) < 0x7f800000; }
+
 #define HUGE_VALF    (__ocl_inff())
 #define INFINITY     (__ocl_inff())
 #define NAN          (__ocl_nanf())
@@ -1651,6 +1657,14 @@ INLINE_OVERLOADABLE float native_exp10(float x) { return __gen_ocl_pow(10, x); }  INLINE_OVERLOADABLE float __gen_ocl_internal_cbrt(float x) {
   return __gen_ocl_pow(x, 0.3333333333f);  }
+INLINE_OVERLOADABLE float __gen_ocl_internal_copysign(float x, float y) 
+{
+  union { unsigned u; float f; } ux, uy;
+  ux.f = x;
+  uy.f = y;
+  ux.u = (ux.u & 0x7fffffff) | (uy.u & 0x80000000u);
+  return ux.f;
+}
+
 #define BODY \
   *cosval = native_cos(x); \
   return native_sin(x);
@@ -1688,6 +1702,37 @@ INLINE float __gen_ocl_asin_util(float x) {
   float w = p / q;
   return x + x*w;
 }
+float __gen_ocl_scalbnf (float x, int n){
+  float two25 = 3.355443200e+07,	/* 0x4c000000 */
+  twom25 = 2.9802322388e-08,	        /* 0x33000000 */
+  huge = 1.0e+30,
+  tiny = 1.0e-30;
+  int k,ix;
+  GEN_OCL_GET_FLOAT_WORD(ix,x);
+  k = (ix&0x7f800000)>>23; /* extract exponent */
+  if (k==0) {	/* 0 or subnormal x */
+    if ((ix&0x7fffffff)==0) return x; /* +-0 */
+    x *= two25;
+    GEN_OCL_GET_FLOAT_WORD(ix,x);
+    k = ((ix&0x7f800000)>>23) - 25;
+  }
+  if (k==0xff) return x+x;	/* NaN or Inf */
+  if (n< -50000)
+    return tiny*__gen_ocl_internal_copysign(tiny,x);	/*underflow*/
+  if (n> 50000 || k+n > 0xfe)
+    return huge*__gen_ocl_internal_copysign(huge,x); /* overflow  */
+  /* Now k and n are bounded we know that k = k+n does not overflow. */
+  k = k+n;
+  if (k > 0) { /* normal result */
+    GEN_OCL_SET_FLOAT_WORD(x,(ix&0x807fffff)|(k<<23));
+    return x;
+  }
+  if (k <= -25)
+    return tiny*__gen_ocl_internal_copysign(tiny,x);	/*underflow*/
+  k += 25;				/* subnormal result */
+  GEN_OCL_SET_FLOAT_WORD(x,(ix&0x807fffff)|(k<<23));
+  return x*twom25;
+}
 
 INLINE_OVERLOADABLE float __gen_ocl_internal_asin(float x) {
   uint ix;
@@ -1751,13 +1796,6 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_atanpi(float x) {  INLINE_OVERLOADABLE float __gen_ocl_internal_atanh(float x) {
   return 0.5f * native_sqrt((1 + x) / (1 - x));  } -INLINE_OVERLOADABLE float __gen_ocl_internal_copysign(float x, float y) {
-  union { unsigned u; float f; } ux, uy;
-  ux.f = x;
-  uy.f = y;
-  ux.u = (ux.u & 0x7fffffff) | (uy.u & 0x80000000u);
-  return ux.f;
-}
 INLINE_OVERLOADABLE float __gen_ocl_internal_erf(float x) {
   return M_2_SQRTPI_F * (x - __gen_ocl_pow(x, 3) / 3 + __gen_ocl_pow(x, 5) / 10 - __gen_ocl_pow(x, 7) / 42 + __gen_ocl_pow(x, 9) / 216);  } @@ -2308,6 +2346,11 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_remainder(float x, float p){
   return x;
 }
 
+INLINE_OVERLOADABLE float __gen_ocl_internal_ldexp(float x, int n) {
+  if(!__ocl_finitef(x)||x==(float)0.0) return x;
+  x = __gen_ocl_scalbnf(x,n);
+  return x;
+}
 
 // TODO use llvm intrinsics definitions  #define cos native_cos @@ -2338,6 +2381,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_remainder(float x, float p){  #define erfc __gen_ocl_internal_erfc  #define fmod __gen_ocl_internal_fmod  #define remainder __gen_ocl_internal_remainder
+#define ldexp __gen_ocl_internal_ldexp
 PURE CONST float __gen_ocl_mad(float a, float b, float c);  INLINE_OVERLOADABLE float mad(float a, float b, float c) {
   return __gen_ocl_mad(a, b, c);
@@ -2551,48 +2595,6 @@ INLINE_OVERLOADABLE float remquo(float x, float y, local int *quo) { BODY; }  INLINE_OVERLOADABLE float remquo(float x, float y, private int *quo) { BODY; }  #undef BODY  INLINE_OVERLOADABLE float native_divide(float x, float y) { return x/y; } -INLINE_OVERLOADABLE float ldexp(float x, int n) {
-  union { float f; unsigned u; } u;
-  u.f = x;
-  unsigned s = u.u & 0x80000000u, v = u.u & 0x7fffffff, d = 0;
-  if(v >= 0x7f800000)
-    return x;
-  if(v == 0)
-    return x;
-  int e = v >> 23;
-  v &= 0x7fffff;
-  if(e >= 1)
-    v |= 0x800000;
-  else {
-    v <<= 1;
-    while(v < 0x800000) {
-      v <<= 1;
-      e --;
-    }
-  }
-  e = add_sat(e, n);
-  if(e >= 255) {
-    u.u = s | 0x7f800000;
-    return u.f;
-  }
-  if(e > 0) {
-    u.u = s | (e << 23) | (v & 0x7fffff);
-    return u.f;
-  }
-  if(e <= -23) {
-    u.u = s;
-    return u.f;
-  }
-  while(e <= 0) {
-    d = (d >> 1) | (v << 31);
-    v >>= 1;
-    e ++;
-  }
-  if(d > 0x80000000u)
-    v ++;
-  u.u = s | v;
-  return u.f;
-}
 INLINE_OVERLOADABLE float pown(float x, int n) {
   if (x == 0 && n == 0)
     return 1;
--
1.7.10.4

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet