[Beignet] [PATCH] GBE: improve asin/acos precision

Ruiling Song ruiling.song at intel.com
Fri Nov 29 00:03:42 PST 2013


Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
 backend/src/ocl_stdlib.tmpl.h |   77 +++++++++++++++++++++++++----------------
 1 file changed, 47 insertions(+), 30 deletions(-)

diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 555c63c..09a6909 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -1362,51 +1362,68 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_tanh(float x) {
   return (1 - y) / (1 + y);
 }
 
-typedef union
-{
-  float value;
-  int word;
-} ieee_float_shape_type;
-
-#ifndef GET_FLOAT_WORD
-#define GET_FLOAT_WORD(i,d)         \
-do {                                \
-  ieee_float_shape_type gf_u;       \
-  gf_u.value = (d);                 \
-  (i) = gf_u.word;                  \
-} while (0)
-#endif
+INLINE float __gen_ocl_asin_util(float x) {
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+  float
+  pS0 =  1.66666666666666657415e-01,
+  pS1 = -3.25565818622400915405e-01,
+  pS2 =  2.01212532134862925881e-01,
+  pS3 = -4.00555345006794114027e-02,
+  pS4 =  7.91534994289814532176e-04,
+  pS5 =  3.47933107596021167570e-05,
+  qS1 = -2.40339491173441421878e+00,
+  qS2 =  2.02094576023350569471e+00,
+  qS3 = -6.88283971605453293030e-01,
+  qS4 =  7.70381505559019352791e-02;
+
+  float t = x*x;
+  float p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
+  float q = 1.0+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
+  float w = p / q;
+  return x + x*w;
+}
 
 INLINE_OVERLOADABLE float __gen_ocl_internal_asin(float x) {
-  int hx, ix;
-  GET_FLOAT_WORD(hx,x);
-  ix = hx&0x7fffffff;
+  uint ix;
+  union { uint i; float f; } u;
+  u.f = x;
+  ix = u.i & 0x7fffffff;
   if(ix == 0x3f800000) {
     return x * M_PI_2_F;  /* asin(|1|)=+-pi/2 with inexact */
   }
   if(ix > 0x3f800000) {            /* |x|>= 1 */
-    return (x-x) / (x-x);          /* asin(|x|>1) is NaN */
+    return  NAN;          /* asin(|x|>1) is NaN */
   }
+
   if(ix < 0x32000000) {            /* if |x| < 2**-27 */
     if(HUGE_VALF + x > FLT_ONE) return x;   /* return x with inexact if x!=0*/
   }
-  /* 1 > |x| >= 2**-27 */
-  float sum = x, c = x, m = 1.0;
-  int n = 1;
-  do
-  {
-    c *= (2 * n - 1) * x * x;
-    m *= (2 * n);
-    sum += ( c / m / (2 * n + 1));
-    n++;
-  }while( n < 30);
-  return sum;
+
+  if(x < -0.5) {
+    return 2 * __gen_ocl_asin_util(native_sqrt((1+x) / 2)) - M_PI_2_F;
+  } else if(x > 0.5) {
+    return M_PI_2_F - 2 * __gen_ocl_asin_util(native_sqrt((1-x) / 2));
+  } else {
+    return __gen_ocl_asin_util(x);
+  }
 }
 INLINE_OVERLOADABLE float __gen_ocl_internal_asinpi(float x) {
   return __gen_ocl_internal_asin(x) / M_PI_F;
 }
 INLINE_OVERLOADABLE float __gen_ocl_internal_acos(float x) {
-  return M_PI_2_F - __gen_ocl_internal_asin(x);
+  if(x > 0.5)
+    return 2 * __gen_ocl_asin_util(native_sqrt((1-x)/2));
+  else
+    return M_PI_2_F - __gen_ocl_internal_asin(x);
 }
 INLINE_OVERLOADABLE float __gen_ocl_internal_acospi(float x) {
   return __gen_ocl_internal_acos(x) / M_PI_F;
-- 
1.7.9.5



More information about the Beignet mailing list