[Beignet] [PATCH] Make tgamma meet the accuracy standard
Song, Ruiling
ruiling.song at intel.com
Wed Apr 22 21:30:19 PDT 2015
Hi Rebecca,
Thanks for the patch, it looks good.
I find that even with your patch the builtin_tgamma still fails. It turns out to be the issue of the test case.
So I send a patch "[PATCH] utests: fix test case builtin_tgamma." to fix it.
Thanks!
Ruiling
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Rebecca N. Palmer
> Sent: Friday, April 17, 2015 8:48 PM
> To: beignet at lists.freedesktop.org
> Subject: [Beignet] [PATCH] Make tgamma meet the accuracy standard
>
> The old tgamma=exp(lgamma) implementation had high rounding error on
> large outputs, exceeding the 16ulp specification for approx. x>8 (hence the
> test failure in strict conformance mode).
>
> Replace this with an implementation based on glibc's
> http://sources.debian.net/src/glibc/2.19-17/sysdeps/ieee754/flt-32/e_gam
> maf_r.c/
>
> Signed-off-by: Rebecca Palmer <rebecca_palmer at zoho.com>
>
> diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
> b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
> index da5b9a9..49edd95 100644
> --- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
> +++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
> @@ -1744,12 +1744,6 @@ OVERLOADABLE float
> __gen_ocl_internal_exp(float x) {
> }
> }
>
> -INLINE_OVERLOADABLE float tgamma(float x) {
> - float y;
> - int s;
> - y=lgamma_r(x,&s);
> - return __gen_ocl_internal_exp(y)*s;
> -}
>
> /* erf,erfc from glibc s_erff.c -- float version of s_erf.c.
> * Conversion to float by Ian Lance Taylor, Cygnus Support,
> ian at cygnus.com.
> @@ -3165,6 +3159,95 @@ float __gen_ocl_internal_pown(float x, int y) {
> return sn*z;
> }
>
> +OVERLOADABLE float tgamma (float x)
> +{
> + /* based on glibc __ieee754_gammaf_r by Ulrich Drepper
> +<drepper at cygnus.com> */
> +
> + unsigned int hx;
> + GEN_OCL_GET_FLOAT_WORD(hx,x);
> + if (hx == 0xff800000)
> + {
> + /* x == -Inf. According to ISO this is NaN. */
> + return NAN;
> + }
> + if ((hx & 0x7f800000) == 0x7f800000)
> + {
> + /* Positive infinity (return positive infinity) or NaN (return
> + NaN). */
> + return x;
> + }
> + if (x < 0.0f && __gen_ocl_internal_floor (x) == x)
> + {
> + /* integer x < 0 */
> + return NAN;
> + }
> +
> + if (x >= 36.0f)
> + {
> + /* Overflow. */
> + return INFINITY;
> + }
> + else if (x <= 0.0f && x >= -FLT_EPSILON / 4.0f)
> + {
> + return 1.0f / x;
> + }
> + else
> + {
> + float sinpix = __gen_ocl_internal_sinpi(x);
> + if (x <= -42.0f)
> + /* Underflow. */
> + {return 0.0f * sinpix /*for sign*/;}
> + int exp2_adj = 0;
> + float x_abs = __gen_ocl_fabs(x);
> + float gam0;
> +
> + if (x_abs < 4.0f) {
> + /* gamma = exp(lgamma) is only accurate for small lgamma */
> + float prod,x_adj;
> + if (x_abs < 0.5f) {
> + prod = 1.0f / x_abs;
> + x_adj = x_abs + 1.0f;
> + } else if (x_abs <= 1.5f) {
> + prod = 1.0f;
> + x_adj = x_abs;
> + } else if (x_abs < 2.5f) {
> + x_adj = x_abs - 1.0f;
> + prod = x_adj;
> + } else {
> + x_adj = x_abs - 2.0f;
> + prod = x_adj * (x_abs - 1.0f);
> + }
> + gam0 = __gen_ocl_internal_exp (lgamma (x_adj)) * prod;
> + }
> + else {
> + /* Compute gamma (X) using Stirling's approximation,
> + starting by computing pow (X, X) with a power of 2
> + factored out to avoid intermediate overflow. */
> + float x_int = __gen_ocl_internal_round (x_abs);
> + float x_frac = x_abs - x_int;
> + int x_log2;
> + float x_mant = frexp (x_abs, &x_log2);
> + if (x_mant < M_SQRT1_2_F)
> + {
> + x_log2--;
> + x_mant *= 2.0f;
> + }
> + exp2_adj = x_log2 * (int) x_int;
> + float ret = (__gen_ocl_internal_pow(x_mant, x_abs)
> + * exp2 (x_log2 * x_frac)
> + * __gen_ocl_internal_exp (-x_abs)
> + * sqrt (2.0f * M_PI_F / x_abs) );
> +
> + float x2 = x_abs * x_abs;
> + float bsum = (0x3.403404p-12f / x2 -0xb.60b61p-12f) / x2 +
> 0x1.555556p-4f;
> + gam0 = ret + ret * __gen_ocl_internal_expm1 (bsum / x_abs);
> + }
> + if (x > 0.0f) {return __gen_ocl_internal_ldexp (gam0, exp2_adj);}
> + float gam1 = M_PI_F / (-x * sinpix * gam0);
> + return __gen_ocl_internal_ldexp (gam1, -exp2_adj);
> + }
> +}
> +
> OVERLOADABLE float hypot(float x, float y) {
> if (__ocl_math_fastpath_flag)
> return __gen_ocl_internal_fastpath_hypot(x, y);
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list