[Beignet] [PATCH] GBE: improve asin/acos precision
Zhigang Gong
zhigang.gong at linux.intel.com
Wed Dec 4 23:20:16 PST 2013
Tested OK. Pushed, thanks.
On Thu, Dec 05, 2013 at 02:21:19AM +0000, Song, Ruiling wrote:
> Ping for review. Thanks
>
> -----Original Message-----
> From: Song, Ruiling
> Sent: Friday, November 29, 2013 4:04 PM
> To: beignet at lists.freedesktop.org
> Cc: Song, Ruiling
> Subject: [PATCH] GBE: improve asin/acos precision
>
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
> backend/src/ocl_stdlib.tmpl.h | 77 +++++++++++++++++++++++++----------------
> 1 file changed, 47 insertions(+), 30 deletions(-)
>
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 555c63c..09a6909 100644
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -1362,51 +1362,68 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_tanh(float x) {
> return (1 - y) / (1 + y);
> }
>
> -typedef union
> -{
> - float value;
> - int word;
> -} ieee_float_shape_type;
> -
> -#ifndef GET_FLOAT_WORD
> -#define GET_FLOAT_WORD(i,d) \
> -do { \
> - ieee_float_shape_type gf_u; \
> - gf_u.value = (d); \
> - (i) = gf_u.word; \
> -} while (0)
> -#endif
> +INLINE float __gen_ocl_asin_util(float x) {
> +/*
> + * ====================================================
> + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
> + *
> + * Developed at SunSoft, a Sun Microsystems, Inc. business.
> + * Permission to use, copy, modify, and distribute this
> + * software is freely granted, provided that this notice
> + * is preserved.
> + * ====================================================
> + */
> + float
> + pS0 = 1.66666666666666657415e-01,
> + pS1 = -3.25565818622400915405e-01,
> + pS2 = 2.01212532134862925881e-01,
> + pS3 = -4.00555345006794114027e-02,
> + pS4 = 7.91534994289814532176e-04,
> + pS5 = 3.47933107596021167570e-05,
> + qS1 = -2.40339491173441421878e+00,
> + qS2 = 2.02094576023350569471e+00,
> + qS3 = -6.88283971605453293030e-01,
> + qS4 = 7.70381505559019352791e-02;
> +
> + float t = x*x;
> + float p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
> + float q = 1.0+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
> + float w = p / q;
> + return x + x*w;
> +}
>
> INLINE_OVERLOADABLE float __gen_ocl_internal_asin(float x) {
> - int hx, ix;
> - GET_FLOAT_WORD(hx,x);
> - ix = hx&0x7fffffff;
> + uint ix;
> + union { uint i; float f; } u;
> + u.f = x;
> + ix = u.i & 0x7fffffff;
> if(ix == 0x3f800000) {
> return x * M_PI_2_F; /* asin(|1|)=+-pi/2 with inexact */
> }
> if(ix > 0x3f800000) { /* |x|>= 1 */
> - return (x-x) / (x-x); /* asin(|x|>1) is NaN */
> + return NAN; /* asin(|x|>1) is NaN */
> }
> +
> if(ix < 0x32000000) { /* if |x| < 2**-27 */
> if(HUGE_VALF + x > FLT_ONE) return x; /* return x with inexact if x!=0*/
> }
> - /* 1 > |x| >= 2**-27 */
> - float sum = x, c = x, m = 1.0;
> - int n = 1;
> - do
> - {
> - c *= (2 * n - 1) * x * x;
> - m *= (2 * n);
> - sum += ( c / m / (2 * n + 1));
> - n++;
> - }while( n < 30);
> - return sum;
> +
> + if(x < -0.5) {
> + return 2 * __gen_ocl_asin_util(native_sqrt((1+x) / 2)) - M_PI_2_F;
> + } else if(x > 0.5) {
> + return M_PI_2_F - 2 * __gen_ocl_asin_util(native_sqrt((1-x) / 2));
> + } else {
> + return __gen_ocl_asin_util(x);
> + }
> }
> INLINE_OVERLOADABLE float __gen_ocl_internal_asinpi(float x) {
> return __gen_ocl_internal_asin(x) / M_PI_F; } INLINE_OVERLOADABLE float __gen_ocl_internal_acos(float x) {
> - return M_PI_2_F - __gen_ocl_internal_asin(x);
> + if(x > 0.5)
> + return 2 * __gen_ocl_asin_util(native_sqrt((1-x)/2));
> + else
> + return M_PI_2_F - __gen_ocl_internal_asin(x);
> }
> INLINE_OVERLOADABLE float __gen_ocl_internal_acospi(float x) {
> return __gen_ocl_internal_acos(x) / M_PI_F;
> --
> 1.7.9.5
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list