[Beignet] [PATCH] GBE: add fast path for more math functions
Zhigang Gong
zhigang.gong at linux.intel.com
Wed Feb 26 21:35:54 PST 2014
LGTM, pushed, thanks.
On Fri, Feb 21, 2014 at 05:51:33AM +0800, Guo Yejun wrote:
>
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
> backend/src/backend/program.cpp | 27 ++++++-
> backend/src/builtin_vector_proto.def | 39 ++++++++-
> backend/src/ocl_stdlib.tmpl.h | 147 +++++++++++++++++++++++++++++++++-
> 3 files changed, 207 insertions(+), 6 deletions(-)
>
> diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
> index 98fcded..c2ac83d 100644
> --- a/backend/src/backend/program.cpp
> +++ b/backend/src/backend/program.cpp
> @@ -461,8 +461,33 @@ namespace gbe {
>
> #define REDEF_MATH_FUNC(x) "#ifdef "#x"\n#undef "#x"\n#endif\n#define "#x" __gen_ocl_internal_fastpath_"#x"\n"
> std::string ocl_mathfunc_fastpath_str =
> - REDEF_MATH_FUNC(sin)
> + REDEF_MATH_FUNC(acosh)
> + REDEF_MATH_FUNC(asinh)
> + REDEF_MATH_FUNC(atanh)
> + REDEF_MATH_FUNC(cbrt)
> REDEF_MATH_FUNC(cos)
> + REDEF_MATH_FUNC(cosh)
> + REDEF_MATH_FUNC(cospi)
> + REDEF_MATH_FUNC(exp)
> + REDEF_MATH_FUNC(exp10)
> + REDEF_MATH_FUNC(expm1)
> + REDEF_MATH_FUNC(fmod)
> + REDEF_MATH_FUNC(hypot)
> + REDEF_MATH_FUNC(ilogb)
> + REDEF_MATH_FUNC(ldexp)
> + REDEF_MATH_FUNC(log)
> + REDEF_MATH_FUNC(log2)
> + REDEF_MATH_FUNC(log10)
> + REDEF_MATH_FUNC(log1p)
> + REDEF_MATH_FUNC(logb)
> + REDEF_MATH_FUNC(remainder)
> + REDEF_MATH_FUNC(rootn)
> + REDEF_MATH_FUNC(sin)
> + REDEF_MATH_FUNC(sincos)
> + REDEF_MATH_FUNC(sinh)
> + REDEF_MATH_FUNC(sinpi)
> + REDEF_MATH_FUNC(tan)
> + REDEF_MATH_FUNC(tanh)
> "\n"
> ;
>
> diff --git a/backend/src/builtin_vector_proto.def b/backend/src/builtin_vector_proto.def
> index 7bc7c48..103e661 100644
> --- a/backend/src/builtin_vector_proto.def
> +++ b/backend/src/builtin_vector_proto.def
> @@ -130,8 +130,43 @@ gentype tgamma (gentype)
> gentype trunc (gentype)
>
> ##math function fast path
> -gentype __gen_ocl_internal_fastpath_sin (gentype)
> -gentype __gen_ocl_internal_fastpath_cos (gentype)
> +gentype __gen_ocl_internal_fastpath_acosh (gentype x)
> +gentype __gen_ocl_internal_fastpath_asinh (gentype x)
> +gentype __gen_ocl_internal_fastpath_atanh (gentype x)
> +gentype __gen_ocl_internal_fastpath_cbrt (gentype x)
> +gentype __gen_ocl_internal_fastpath_cos (gentype x)
> +gentype __gen_ocl_internal_fastpath_cosh (gentype x)
> +gentype __gen_ocl_internal_fastpath_cospi (gentype x)
> +gentype __gen_ocl_internal_fastpath_exp (gentype x)
> +gentype __gen_ocl_internal_fastpath_exp10 (gentype x)
> +gentype __gen_ocl_internal_fastpath_expm1 (gentype x)
> +gentype __gen_ocl_internal_fastpath_fmod (gentype x, gentype y)
> +gentype __gen_ocl_internal_fastpath_hypot (gentype x, gentype y)
> +intn __gen_ocl_internal_fastpath_ilogb (floatn x)
> +int __gen_ocl_internal_fastpath_ilogb (float x)
> +intn __gen_ocl_internal_fastpath_ilogb (doublen x)
> +int __gen_ocl_internal_fastpath_ilogb (double x)
> +floatn __gen_ocl_internal_fastpath_ldexp (floatn x, intn k)
> +floatn __gen_ocl_internal_fastpath_ldexp (floatn x, int k)
> +float __gen_ocl_internal_fastpath_ldexp (float x, int k)
> +doublen __gen_ocl_internal_fastpath_ldexp (doublen x, intn k)
> +doublen __gen_ocl_internal_fastpath_ldexp (doublen x, int k)
> +double __gen_ocl_internal_fastpath_ldexp (double x, int k)
> +gentype __gen_ocl_internal_fastpath_log (gentype x)
> +gentype __gen_ocl_internal_fastpath_log2 (gentype x)
> +gentype __gen_ocl_internal_fastpath_log10 (gentype x)
> +gentype __gen_ocl_internal_fastpath_log1p (gentype x)
> +gentype __gen_ocl_internal_fastpath_logb (gentype x)
> +gentype __gen_ocl_internal_fastpath_remainder (gentype x, gentype y)
> +gentype __gen_ocl_internal_fastpath_rootn (gentype x, int n)
> +gentype __gen_ocl_internal_fastpath_sin (gentype x)
> +gentype __gen_ocl_internal_fastpath_sincos (gentype x, __global gentype *cosval)
> +gentype __gen_ocl_internal_fastpath_sincos (gentype x, __local gentype *cosval)
> +gentype __gen_ocl_internal_fastpath_sincos (gentype x, __private gentype *cosval)
> +gentype __gen_ocl_internal_fastpath_sinh (gentype x)
> +gentype __gen_ocl_internal_fastpath_sinpi (gentype x)
> +gentype __gen_ocl_internal_fastpath_tan (gentype x)
> +gentype __gen_ocl_internal_fastpath_tanh (gentype x)
>
> ##half_native_math
> #gentype half_cos (gentype x)
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index cea4700..46aab79 100755
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -4715,14 +4715,155 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
> { return __gen_ocl_get_image_array_size(image); }
> #endif
>
> -INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin(float x) {
> - return native_sin(x);
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_acosh (float x)
> +{
> + return native_log(x + native_sqrt(x + 1) * native_sqrt(x - 1));
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_asinh (float x)
> +{
> + return native_log(x + native_sqrt(x * x + 1));
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_atanh (float x)
> +{
> + return 0.5f * native_sqrt((1 + x) / (1 - x));
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cbrt (float x)
> +{
> + return __gen_ocl_pow(x, 0.3333333333f);
> }
>
> -INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cos(float x) {
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cos (float x)
> +{
> return native_cos(x);
> }
>
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cosh (float x)
> +{
> + return (1 + native_exp(-2 * x)) / (2 * native_exp(-x));
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cospi (float x)
> +{
> + return __gen_ocl_cos(x * M_PI_F);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_exp (float x)
> +{
> + return native_exp(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_exp10 (float x)
> +{
> + return native_exp10(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_expm1 (float x)
> +{
> + return __gen_ocl_pow(M_E_F, x) - 1;
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_fmod (float x, float y)
> +{
> + return x-y*__gen_ocl_rndz(x/y);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_hypot (float x, float y)
> +{
> + return __gen_ocl_sqrt(x*x + y*y);
> +}
> +
> +INLINE_OVERLOADABLE int __gen_ocl_internal_fastpath_ilogb (float x)
> +{
> + return __gen_ocl_rndd(native_log2(x));
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_ldexp (float x, int n)
> +{
> + return __gen_ocl_pow(2, n) * x;
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log (float x)
> +{
> + return native_log(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log2 (float x)
> +{
> + return native_log2(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log10 (float x)
> +{
> + return native_log10(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log1p (float x)
> +{
> + return native_log(x + 1);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_logb (float x)
> +{
> + return __gen_ocl_rndd(native_log2(x));
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_remainder (float x, float y)
> +{
> + return x-y*__gen_ocl_rnde(x/y);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_rootn(float x, int n)
> +{
> + return __gen_ocl_pow(x, 1.f / n);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin (float x)
> +{
> + return native_sin(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __global float *cosval)
> +{
> + *cosval = native_cos(x);
> + return native_sin(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __local float *cosval)
> +{
> + *cosval = native_cos(x);
> + return native_sin(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __private float *cosval)
> +{
> + *cosval = native_cos(x);
> + return native_sin(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sinh (float x)
> +{
> + return (1 - native_exp(-2 * x)) / (2 * native_exp(-x));
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sinpi (float x)
> +{
> + return __gen_ocl_sin(x * M_PI_F);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tan (float x)
> +{
> + return native_tan(x);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tanh (float x)
> +{
> + float y = native_exp(-2 * x);
> + return (1 - y) / (1 + y);
> +}
> +
> #pragma OPENCL EXTENSION cl_khr_fp64 : disable
>
> #undef DECL_IMAGE
> --
> 1.7.9.5
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list