[Beignet] [PATCH] GBE: add fast path for more math functions

Zhigang Gong zhigang.gong at linux.intel.com
Wed Feb 26 21:35:54 PST 2014


LGTM, pushed, thanks.

On Fri, Feb 21, 2014 at 05:51:33AM +0800, Guo Yejun wrote:
> 
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
>  backend/src/backend/program.cpp      |   27 ++++++-
>  backend/src/builtin_vector_proto.def |   39 ++++++++-
>  backend/src/ocl_stdlib.tmpl.h        |  147 +++++++++++++++++++++++++++++++++-
>  3 files changed, 207 insertions(+), 6 deletions(-)
> 
> diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
> index 98fcded..c2ac83d 100644
> --- a/backend/src/backend/program.cpp
> +++ b/backend/src/backend/program.cpp
> @@ -461,8 +461,33 @@ namespace gbe {
>  
>  #define REDEF_MATH_FUNC(x) "#ifdef "#x"\n#undef "#x"\n#endif\n#define "#x" __gen_ocl_internal_fastpath_"#x"\n"
>    std::string ocl_mathfunc_fastpath_str =
> -    REDEF_MATH_FUNC(sin)
> +    REDEF_MATH_FUNC(acosh)
> +    REDEF_MATH_FUNC(asinh)
> +    REDEF_MATH_FUNC(atanh)
> +    REDEF_MATH_FUNC(cbrt)
>      REDEF_MATH_FUNC(cos)
> +    REDEF_MATH_FUNC(cosh)
> +    REDEF_MATH_FUNC(cospi)
> +    REDEF_MATH_FUNC(exp)
> +    REDEF_MATH_FUNC(exp10)
> +    REDEF_MATH_FUNC(expm1)
> +    REDEF_MATH_FUNC(fmod)
> +    REDEF_MATH_FUNC(hypot)
> +    REDEF_MATH_FUNC(ilogb)
> +    REDEF_MATH_FUNC(ldexp)
> +    REDEF_MATH_FUNC(log)
> +    REDEF_MATH_FUNC(log2)
> +    REDEF_MATH_FUNC(log10)
> +    REDEF_MATH_FUNC(log1p)
> +    REDEF_MATH_FUNC(logb)
> +    REDEF_MATH_FUNC(remainder)
> +    REDEF_MATH_FUNC(rootn)
> +    REDEF_MATH_FUNC(sin)
> +    REDEF_MATH_FUNC(sincos)
> +    REDEF_MATH_FUNC(sinh)
> +    REDEF_MATH_FUNC(sinpi)
> +    REDEF_MATH_FUNC(tan)
> +    REDEF_MATH_FUNC(tanh)
>      "\n"
>    ;
>  
> diff --git a/backend/src/builtin_vector_proto.def b/backend/src/builtin_vector_proto.def
> index 7bc7c48..103e661 100644
> --- a/backend/src/builtin_vector_proto.def
> +++ b/backend/src/builtin_vector_proto.def
> @@ -130,8 +130,43 @@ gentype tgamma (gentype)
>  gentype trunc (gentype)
>  
>  ##math function fast path
> -gentype __gen_ocl_internal_fastpath_sin (gentype)
> -gentype __gen_ocl_internal_fastpath_cos (gentype)
> +gentype __gen_ocl_internal_fastpath_acosh (gentype x)
> +gentype __gen_ocl_internal_fastpath_asinh (gentype x)
> +gentype __gen_ocl_internal_fastpath_atanh (gentype x)
> +gentype __gen_ocl_internal_fastpath_cbrt (gentype x)
> +gentype __gen_ocl_internal_fastpath_cos (gentype x)
> +gentype __gen_ocl_internal_fastpath_cosh (gentype x)
> +gentype __gen_ocl_internal_fastpath_cospi (gentype x)
> +gentype __gen_ocl_internal_fastpath_exp (gentype x)
> +gentype __gen_ocl_internal_fastpath_exp10 (gentype x)
> +gentype __gen_ocl_internal_fastpath_expm1 (gentype x)
> +gentype __gen_ocl_internal_fastpath_fmod (gentype x, gentype y)
> +gentype __gen_ocl_internal_fastpath_hypot (gentype x, gentype y)
> +intn __gen_ocl_internal_fastpath_ilogb (floatn x)
> +int __gen_ocl_internal_fastpath_ilogb (float x)
> +intn __gen_ocl_internal_fastpath_ilogb (doublen x)
> +int __gen_ocl_internal_fastpath_ilogb (double x)
> +floatn __gen_ocl_internal_fastpath_ldexp (floatn x, intn k)
> +floatn __gen_ocl_internal_fastpath_ldexp (floatn x, int k)
> +float __gen_ocl_internal_fastpath_ldexp (float x, int k)
> +doublen __gen_ocl_internal_fastpath_ldexp (doublen x, intn k)
> +doublen __gen_ocl_internal_fastpath_ldexp (doublen x, int k)
> +double __gen_ocl_internal_fastpath_ldexp (double x, int k)
> +gentype __gen_ocl_internal_fastpath_log (gentype x)
> +gentype __gen_ocl_internal_fastpath_log2 (gentype x)
> +gentype __gen_ocl_internal_fastpath_log10 (gentype x)
> +gentype __gen_ocl_internal_fastpath_log1p (gentype x)
> +gentype __gen_ocl_internal_fastpath_logb (gentype x)
> +gentype __gen_ocl_internal_fastpath_remainder (gentype x, gentype y)
> +gentype __gen_ocl_internal_fastpath_rootn (gentype x, int n)
> +gentype __gen_ocl_internal_fastpath_sin (gentype x)
> +gentype __gen_ocl_internal_fastpath_sincos (gentype x, __global gentype *cosval)
> +gentype __gen_ocl_internal_fastpath_sincos (gentype x, __local gentype *cosval)
> +gentype __gen_ocl_internal_fastpath_sincos (gentype x, __private gentype *cosval)
> +gentype __gen_ocl_internal_fastpath_sinh (gentype x)
> +gentype __gen_ocl_internal_fastpath_sinpi (gentype x)
> +gentype __gen_ocl_internal_fastpath_tan (gentype x)
> +gentype __gen_ocl_internal_fastpath_tanh (gentype x)
>  
>  ##half_native_math
>  #gentype half_cos (gentype x)
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index cea4700..46aab79 100755
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -4715,14 +4715,155 @@ INLINE_OVERLOADABLE  size_t get_image_array_size(image1d_array_t image)
>    { return __gen_ocl_get_image_array_size(image); }
>  #endif
>  
> -INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin(float x) {
> -    return native_sin(x);
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_acosh (float x)
> +{
> +    return native_log(x + native_sqrt(x + 1) * native_sqrt(x - 1));
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_asinh (float x)
> +{
> +    return native_log(x + native_sqrt(x * x + 1));
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_atanh (float x)
> +{
> +    return 0.5f * native_sqrt((1 + x) / (1 - x));
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cbrt (float x)
> +{
> +    return __gen_ocl_pow(x, 0.3333333333f);
>  }
>  
> -INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cos(float x) {
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cos (float x)
> +{
>      return native_cos(x);
>  }
>  
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cosh (float x)
> +{
> +    return (1 + native_exp(-2 * x)) / (2 * native_exp(-x));
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cospi (float x)
> +{
> +    return __gen_ocl_cos(x * M_PI_F);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_exp (float x)
> +{
> +    return native_exp(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_exp10 (float x)
> +{
> +    return native_exp10(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_expm1 (float x)
> +{
> +    return __gen_ocl_pow(M_E_F, x) - 1;
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_fmod (float x, float y)
> +{
> +    return x-y*__gen_ocl_rndz(x/y);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_hypot (float x, float y)
> +{
> +    return __gen_ocl_sqrt(x*x + y*y);
> +}
> +
> +INLINE_OVERLOADABLE int __gen_ocl_internal_fastpath_ilogb (float x)
> +{
> +    return __gen_ocl_rndd(native_log2(x));
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_ldexp (float x, int n)
> +{
> +    return __gen_ocl_pow(2, n) * x;
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log (float x)
> +{
> +    return native_log(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log2 (float x)
> +{
> +    return native_log2(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log10 (float x)
> +{
> +    return native_log10(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log1p (float x)
> +{
> +    return native_log(x + 1);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_logb (float x)
> +{
> +    return __gen_ocl_rndd(native_log2(x));
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_remainder (float x, float y)
> +{
> +    return x-y*__gen_ocl_rnde(x/y);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_rootn(float x, int n)
> +{
> +    return __gen_ocl_pow(x, 1.f / n);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin (float x)
> +{
> +    return native_sin(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __global float *cosval)
> +{
> +    *cosval = native_cos(x);
> +    return native_sin(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __local float *cosval)
> +{
> +    *cosval = native_cos(x);
> +    return native_sin(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __private float *cosval)
> +{
> +    *cosval = native_cos(x);
> +    return native_sin(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sinh (float x)
> +{
> +    return (1 - native_exp(-2 * x)) / (2 * native_exp(-x));
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sinpi (float x)
> +{
> +    return __gen_ocl_sin(x * M_PI_F);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tan (float x)
> +{
> +    return native_tan(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tanh (float x)
> +{
> +    float y = native_exp(-2 * x);
> +    return (1 - y) / (1 + y);
> +}
> +
>  #pragma OPENCL EXTENSION cl_khr_fp64 : disable
>  
>  #undef DECL_IMAGE
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list