[Beignet] [PATCH] [PATCH]GBE: improve precision of ldexp
Zhigang Gong
zhigang.gong at linux.intel.com
Mon Dec 30 21:59:49 PST 2013
Modified according to ruiling's comment and pushed. Thanks.
On Tue, Dec 31, 2013 at 06:04:48AM +0000, Song, Ruiling wrote:
>
> One comment. The patch Tested OK.
> -----Original Message-----
> From: beignet-bounces at lists.freedesktop.org [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Lv Meng
> Sent: Monday, December 23, 2013 8:21 AM
> To: beignet at lists.freedesktop.org
> Cc: Lv, Meng
> Subject: [Beignet] [PATCH] [PATCH]GBE: improve precision of ldexp
>
>
> Signed-off-by: Lv Meng <meng.lv at intel.com>
> ---
> backend/src/ocl_stdlib.tmpl.h | 100 +++++++++++++++++++++--------------------
> 1 file changed, 51 insertions(+), 49 deletions(-)
>
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 2345ecb..6ae7cd8 100755
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -173,6 +173,12 @@ do { \
> } while (0)
> #endif
>
> +int __ocl_finitef (float x){
> It is safe to declare it as "INLINE".
> + unsigned ix;
> + GEN_OCL_GET_FLOAT_WORD (ix, x);
> + return (ix & 0x7fffffff) < 0x7f800000; }
> +
> #define HUGE_VALF (__ocl_inff())
> #define INFINITY (__ocl_inff())
> #define NAN (__ocl_nanf())
> @@ -1651,6 +1657,14 @@ INLINE_OVERLOADABLE float native_exp10(float x) { return __gen_ocl_pow(10, x); } INLINE_OVERLOADABLE float __gen_ocl_internal_cbrt(float x) {
> return __gen_ocl_pow(x, 0.3333333333f); }
> +INLINE_OVERLOADABLE float __gen_ocl_internal_copysign(float x, float y)
> +{
> + union { unsigned u; float f; } ux, uy;
> + ux.f = x;
> + uy.f = y;
> + ux.u = (ux.u & 0x7fffffff) | (uy.u & 0x80000000u);
> + return ux.f;
> +}
> +
> #define BODY \
> *cosval = native_cos(x); \
> return native_sin(x);
> @@ -1688,6 +1702,37 @@ INLINE float __gen_ocl_asin_util(float x) {
> float w = p / q;
> return x + x*w;
> }
> +float __gen_ocl_scalbnf (float x, int n){
> + float two25 = 3.355443200e+07, /* 0x4c000000 */
> + twom25 = 2.9802322388e-08, /* 0x33000000 */
> + huge = 1.0e+30,
> + tiny = 1.0e-30;
> + int k,ix;
> + GEN_OCL_GET_FLOAT_WORD(ix,x);
> + k = (ix&0x7f800000)>>23; /* extract exponent */
> + if (k==0) { /* 0 or subnormal x */
> + if ((ix&0x7fffffff)==0) return x; /* +-0 */
> + x *= two25;
> + GEN_OCL_GET_FLOAT_WORD(ix,x);
> + k = ((ix&0x7f800000)>>23) - 25;
> + }
> + if (k==0xff) return x+x; /* NaN or Inf */
> + if (n< -50000)
> + return tiny*__gen_ocl_internal_copysign(tiny,x); /*underflow*/
> + if (n> 50000 || k+n > 0xfe)
> + return huge*__gen_ocl_internal_copysign(huge,x); /* overflow */
> + /* Now k and n are bounded we know that k = k+n does not overflow. */
> + k = k+n;
> + if (k > 0) { /* normal result */
> + GEN_OCL_SET_FLOAT_WORD(x,(ix&0x807fffff)|(k<<23));
> + return x;
> + }
> + if (k <= -25)
> + return tiny*__gen_ocl_internal_copysign(tiny,x); /*underflow*/
> + k += 25; /* subnormal result */
> + GEN_OCL_SET_FLOAT_WORD(x,(ix&0x807fffff)|(k<<23));
> + return x*twom25;
> +}
>
> INLINE_OVERLOADABLE float __gen_ocl_internal_asin(float x) {
> uint ix;
> @@ -1751,13 +1796,6 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_atanpi(float x) { INLINE_OVERLOADABLE float __gen_ocl_internal_atanh(float x) {
> return 0.5f * native_sqrt((1 + x) / (1 - x)); } -INLINE_OVERLOADABLE float __gen_ocl_internal_copysign(float x, float y) {
> - union { unsigned u; float f; } ux, uy;
> - ux.f = x;
> - uy.f = y;
> - ux.u = (ux.u & 0x7fffffff) | (uy.u & 0x80000000u);
> - return ux.f;
> -}
> INLINE_OVERLOADABLE float __gen_ocl_internal_erf(float x) {
> return M_2_SQRTPI_F * (x - __gen_ocl_pow(x, 3) / 3 + __gen_ocl_pow(x, 5) / 10 - __gen_ocl_pow(x, 7) / 42 + __gen_ocl_pow(x, 9) / 216); } @@ -2308,6 +2346,11 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_remainder(float x, float p){
> return x;
> }
>
> +INLINE_OVERLOADABLE float __gen_ocl_internal_ldexp(float x, int n) {
> + if(!__ocl_finitef(x)||x==(float)0.0) return x;
> + x = __gen_ocl_scalbnf(x,n);
> + return x;
> +}
>
> // TODO use llvm intrinsics definitions #define cos native_cos @@ -2338,6 +2381,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_remainder(float x, float p){ #define erfc __gen_ocl_internal_erfc #define fmod __gen_ocl_internal_fmod #define remainder __gen_ocl_internal_remainder
> +#define ldexp __gen_ocl_internal_ldexp
> PURE CONST float __gen_ocl_mad(float a, float b, float c); INLINE_OVERLOADABLE float mad(float a, float b, float c) {
> return __gen_ocl_mad(a, b, c);
> @@ -2551,48 +2595,6 @@ INLINE_OVERLOADABLE float remquo(float x, float y, local int *quo) { BODY; } INLINE_OVERLOADABLE float remquo(float x, float y, private int *quo) { BODY; } #undef BODY INLINE_OVERLOADABLE float native_divide(float x, float y) { return x/y; } -INLINE_OVERLOADABLE float ldexp(float x, int n) {
> - union { float f; unsigned u; } u;
> - u.f = x;
> - unsigned s = u.u & 0x80000000u, v = u.u & 0x7fffffff, d = 0;
> - if(v >= 0x7f800000)
> - return x;
> - if(v == 0)
> - return x;
> - int e = v >> 23;
> - v &= 0x7fffff;
> - if(e >= 1)
> - v |= 0x800000;
> - else {
> - v <<= 1;
> - while(v < 0x800000) {
> - v <<= 1;
> - e --;
> - }
> - }
> - e = add_sat(e, n);
> - if(e >= 255) {
> - u.u = s | 0x7f800000;
> - return u.f;
> - }
> - if(e > 0) {
> - u.u = s | (e << 23) | (v & 0x7fffff);
> - return u.f;
> - }
> - if(e <= -23) {
> - u.u = s;
> - return u.f;
> - }
> - while(e <= 0) {
> - d = (d >> 1) | (v << 31);
> - v >>= 1;
> - e ++;
> - }
> - if(d > 0x80000000u)
> - v ++;
> - u.u = s | v;
> - return u.f;
> -}
> INLINE_OVERLOADABLE float pown(float x, int n) {
> if (x == 0 && n == 0)
> return 1;
> --
> 1.7.10.4
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list