[Beignet] [PATCH 4/5] GBE: Improve precision of atan2
Ruiling Song
ruiling.song at intel.com
Thu Jan 9 21:39:42 PST 2014
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/ocl_stdlib.tmpl.h | 95 +++++++++++++++++++++++++++++++++--------
1 file changed, 78 insertions(+), 17 deletions(-)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index ecbca20..ca53ab8 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -2414,25 +2414,86 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_erfc(float x) {
#define sqrt native_sqrt
INLINE_OVERLOADABLE float rsqrt(float x) { return native_rsqrt(x); }
INLINE_OVERLOADABLE float __gen_ocl_internal_atan2(float y, float x) {
- uint hx = *(uint *)(&x), ix = hx & 0x7FFFFFFF;
- uint hy = *(uint *)(&y), iy = hy & 0x7FFFFFFF;
- if (ix > 0x7F800000 || iy > 0x7F800000)
- return nan(0u);
- if (ix == 0) {
- if (y > 0)
- return M_PI_2_F;
- if (y < 0)
- return - M_PI_2_F;
- return nan(0u);
- } else {
- float z = __gen_ocl_internal_atan(y / x);
- if (x > 0)
- return z;
- if (y >= 0)
- return M_PI_F + z;
- return - M_PI_F + z;
+ /* copied from fdlibm */
+ float z;
+ int k,m,hx,hy,ix,iy;
+ const float
+ tiny = 1.0e-30,
+ zero = 0.0,
+ pi_o_4 = 7.8539818525e-01, /* 0x3f490fdb */
+ pi_o_2 = 1.5707963705e+00, /* 0x3fc90fdb */
+ pi = 3.1415927410e+00, /* 0x40490fdb */
+ pi_lo = -8.7422776573e-08; /* 0xb3bbbd2e */
+
+ GEN_OCL_GET_FLOAT_WORD(hx,x);
+ ix = hx&0x7fffffff;
+ GEN_OCL_GET_FLOAT_WORD(hy,y);
+ iy = hy&0x7fffffff;
+
+ if((ix>0x7f800000)||
+ (iy>0x7f800000)) /* x or y is NaN */
+ return x+y;
+ if(hx==0x3f800000) return z=__gen_ocl_internal_atan(y); /* x=1.0 */
+ m = ((hy>>31)&1)|((hx>>30)&2); /* 2*sign(x)+sign(y) */
+
+ /* when y = 0 */
+ if(iy==0) {
+ switch(m) {
+ case 0:
+ case 1: return y; /* atan(+-0,+anything)=+-0 */
+ case 2: return pi+tiny;/* atan(+0,-anything) = pi */
+ case 3: return -pi-tiny;/* atan(-0,-anything) =-pi */
+ }
+ }
+ /* when x = 0 */
+ if(ix==0) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny;
+
+ /* both are denorms. Gen does not support denorm, so we convert to normal float number*/
+ if(ix <= 0x7fffff && iy <= 0x7fffff) {
+ x = (float)(ix) * (1.0f - ((hx>>30) & 0x2));
+ y = (float)(iy) * (1.0f - ((hy>>30) & 0x2));
+ }
+
+ /* when x is INF */
+ if(ix==0x7f800000) {
+ if(iy==0x7f800000) {
+ switch(m) {
+ case 0: return pi_o_4+tiny;/* atan(+INF,+INF) */
+ case 1: return -pi_o_4-tiny;/* atan(-INF,+INF) */
+ case 2: return (float)3.0*pi_o_4+tiny;/*atan(+INF,-INF)*/
+ case 3: return (float)-3.0*pi_o_4-tiny;/*atan(-INF,-INF)*/
+ }
+ } else {
+ switch(m) {
+ case 0: return zero ; /* atan(+...,+INF) */
+ case 1: return -zero ; /* atan(-...,+INF) */
+ case 2: return pi+tiny ; /* atan(+...,-INF) */
+ case 3: return -pi-tiny ; /* atan(-...,-INF) */
+ }
+ }
+ }
+ /* when y is INF */
+ if(iy==0x7f800000) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny;
+
+ /* compute y/x */
+ k = (iy-ix)>>23;
+ if(k > 60) z=pi_o_2+(float)0.5*pi_lo; /* |y/x| > 2**60 */
+ else if(hx<0&&k<-60) z=0.0; /* |y|/x < -2**60 */
+ else z=__gen_ocl_internal_atan(__gen_ocl_fabs(y/x)); /* safe to do y/x */
+ switch (m) {
+ case 0: return z ; /* atan(+,+) */
+ case 1: {
+ uint zh;
+ GEN_OCL_GET_FLOAT_WORD(zh,z);
+ GEN_OCL_SET_FLOAT_WORD(z,zh ^ 0x80000000);
+ }
+ return z ; /* atan(-,+) */
+ case 2: return pi-(z-pi_lo);/* atan(+,-) */
+ default: /* case 3 */
+ return (z-pi_lo)-pi;/* atan(-,-) */
}
}
+
INLINE_OVERLOADABLE float __gen_ocl_internal_atan2pi(float y, float x) {
uint ix = as_uint(x), iy = as_uint(y),
pos_zero = 0, neg_zero = 0x80000000u,
--
1.7.9.5
More information about the Beignet
mailing list