[Beignet] [PATCH 3/5] GBE: Improve atan precision
Ruiling Song
ruiling.song at intel.com
Thu Jan 9 21:39:41 PST 2014
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/ocl_stdlib.tmpl.h | 85 ++++++++++++++++++++++++++++++++---------
1 file changed, 68 insertions(+), 17 deletions(-)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 24613cd..ecbca20 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -2330,24 +2330,75 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_acospi(float x) {
return __gen_ocl_internal_acos(x) / M_PI_F;
}
INLINE_OVERLOADABLE float __gen_ocl_internal_atan(float x) {
- float a = 0, c = 1;
- if (x <= -1) {
- a = - M_PI_2_F;
- x = 1 / x;
- c = -1;
- }
- if (x >= 1) {
- a = M_PI_2_F;
- x = 1 / x;
- c = -1;
- }
- a += c*x;
- int i;
- int sign;
- for(i=3, sign=-1; i<63; i+=2, sign=-sign) {
- a += c*sign*__gen_ocl_pow(x,i)/i;
+ /* copied from fdlibm */
+ float atanhi[4];
+ atanhi[0] = 4.6364760399e-01; /* atan(0.5)hi 0x3eed6338 */
+ atanhi[1] = 7.8539812565e-01; /* atan(1.0)hi 0x3f490fda */
+ atanhi[2] = 9.8279368877e-01; /* atan(1.5)hi 0x3f7b985e */
+ atanhi[3] = 1.5707962513e+00; /* atan(inf)hi 0x3fc90fda */
+
+ float atanlo[4];
+ atanlo[0] = 5.0121582440e-09; /* atan(0.5)lo 0x31ac3769 */
+ atanlo[1] = 3.7748947079e-08; /* atan(1.0)lo 0x33222168 */
+ atanlo[2] = 3.4473217170e-08; /* atan(1.5)lo 0x33140fb4 */
+ atanlo[3] = 7.5497894159e-08; /* atan(inf)lo 0x33a22168 */
+
+ float aT[11];
+ aT[0] = 3.3333334327e-01; /* 0x3eaaaaaa */
+ aT[1] = -2.0000000298e-01; /* 0xbe4ccccd */
+ aT[2] = 1.4285714924e-01; /* 0x3e124925 */
+ aT[3] = -1.1111110449e-01; /* 0xbde38e38 */
+ aT[4] = 9.0908870101e-02; /* 0x3dba2e6e */
+ aT[5] = -7.6918758452e-02; /* 0xbd9d8795 */
+ aT[6] = 6.6610731184e-02; /* 0x3d886b35 */
+ aT[7] = -5.8335702866e-02; /* 0xbd6ef16b */
+ aT[8] = 4.9768779427e-02; /* 0x3d4bda59 */
+ aT[9] = -3.6531571299e-02; /* 0xbd15a221 */
+ aT[10] = 1.6285819933e-02; /* 0x3c8569d7 */
+ const float one = 1.0, huge = 1.0e30;
+
+ float w,s1,s2,z;
+ int ix,hx,id;
+
+ GEN_OCL_GET_FLOAT_WORD(hx,x);
+ ix = hx&0x7fffffff;
+ if(ix>=0x50800000) { /* if |x| >= 2^34 */
+ if(ix>0x7f800000)
+ return x+x; /* NaN */
+ if(hx>0) return atanhi[3]+atanlo[3];
+ else return -atanhi[3]-atanlo[3];
+ } if (ix < 0x3ee00000) { /* |x| < 0.4375 */
+ if (ix < 0x31000000) { /* |x| < 2^-29 */
+ if(huge+x>one) return x; /* raise inexact */
+ }
+ id = -1;
+ } else {
+ x = __gen_ocl_fabs(x);
+ if (ix < 0x3f980000) { /* |x| < 1.1875 */
+ if (ix < 0x3f300000) { /* 7/16 <=|x|<11/16 */
+ id = 0; x = ((float)2.0*x-one)/((float)2.0+x);
+ } else { /* 11/16<=|x|< 19/16 */
+ id = 1; x = (x-one)/(x+one);
+ }
+ } else {
+ if (ix < 0x401c0000) { /* |x| < 2.4375 */
+ id = 2; x = (x-(float)1.5)/(one+(float)1.5*x);
+ } else { /* 2.4375 <= |x| < 2^66 */
+ id = 3; x = -(float)1.0/x;
+ }
+ }}
+ /* end of argument reduction */
+ z = x*x;
+ w = z*z;
+ /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
+ s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10])))));
+ s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9]))));
+ if (id<0) return x - x*(s1+s2);
+ else {
+ z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x);
+ return (hx<0)? -z:z;
}
- return a;
+
}
INLINE_OVERLOADABLE float __gen_ocl_internal_atanpi(float x) {
return __gen_ocl_internal_atan(x) / M_PI_F;
--
1.7.9.5
More information about the Beignet
mailing list