[Beignet] [PATCH] [PATCH]GBE: improve precision of exp10

Zhigang Gong zhigang.gong at linux.intel.com
Wed Jan 15 18:59:38 PST 2014


The three patches are tested OK, pushed, thanks.

On Mon, Jan 13, 2014 at 08:54:02AM +0800, Lv Meng wrote:
> 
> Signed-off-by: Lv Meng <meng.lv at intel.com>
> ---
>  backend/src/ocl_stdlib.tmpl.h |   53 ++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 52 insertions(+), 1 deletion(-)
> 
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index 07a4fd9..907e326 100755
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -2381,6 +2381,57 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_atanh(float x) {
>    return __gen_ocl_internal_copysign(t, x);
>  }
>  
> +INLINE_OVERLOADABLE float __gen_ocl_internal_exp10(float x){
> +  float px, qx,ans;
> +  short n;
> +  int i;
> +  float*p;
> +  float MAXL10 = 38.230809449325611792;
> +  float LOG210 = 3.32192809488736234787e0;
> +  float LG102A = 3.00781250000000000000E-1;
> +  float LG102B = 2.48745663981195213739E-4;
> +  float P[6];
> +  P[0] = 2.063216740311022E-001;
> +  P[1] = 5.420251702225484E-001;
> +  P[2] = 1.171292686296281E+000;
> +  P[3] = 2.034649854009453E+000;
> +  P[4] = 2.650948748208892E+000;
> +  P[5] = 2.302585167056758E+000;
> +  if( isinf(x))
> +    return INFINITY;
> +
> +  if( x < -MAXL10 )return 0.0;
> +  /* The following is necessary because range reduction blows up: */
> +  if( x == 0 )return 1.0;
> +
> +  /* Express 10**x = 10**g 2**n
> +    *	 = 10**g 10**( n log10(2) )
> +    *	 = 10**( g + n log10(2) )
> +    */
> +  px = x * LOG210;
> +  qx = __gen_ocl_internal_floor( px + 0.5 );
> +  n = qx;
> +  x -= qx * LG102A;
> +  x -= qx * LG102B;
> +
> +  /* rational approximation for exponential
> +    * of the fractional part:
> +    * 10**x - 1  =  2x P(x**2)/( Q(x**2) - P(x**2) )
> +    */
> +  p = P;
> +  ans = *p++;
> +  i = 5;
> +  do{
> +    ans = ans * x  +  *p++;
> +  }
> +  while( --i );
> +  px = 1.0 + x * ans;
> +
> +  /* multiply by power of 2 */
> +  x = __gen_ocl_internal_ldexp( px, n );
> +  return x;
> +}
> +
>  // TODO use llvm intrinsics definitions
>  #define cos native_cos
>  #define cospi __gen_ocl_internal_cospi
> @@ -2946,7 +2997,7 @@ DECL_HALF_ST_SPACE(__private)
>  #define log10 __gen_ocl_internal_log10
>  #define exp __gen_ocl_internal_exp
>  #define exp2 native_exp2
> -#define exp10 native_exp10
> +#define exp10 __gen_ocl_internal_exp10
>  #define expm1 __gen_ocl_internal_expm1
>  #define fmin __gen_ocl_internal_fmin
>  #define fmax __gen_ocl_internal_fmax
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list