[Beignet] [PATCH] GBE: add fast path for more math functions

Guo Yejun yejun.guo at intel.com
Thu Feb 20 13:51:33 PST 2014


Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 backend/src/backend/program.cpp      |   27 ++++++-
 backend/src/builtin_vector_proto.def |   39 ++++++++-
 backend/src/ocl_stdlib.tmpl.h        |  147 +++++++++++++++++++++++++++++++++-
 3 files changed, 207 insertions(+), 6 deletions(-)

diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 98fcded..c2ac83d 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -461,8 +461,33 @@ namespace gbe {
 
 #define REDEF_MATH_FUNC(x) "#ifdef "#x"\n#undef "#x"\n#endif\n#define "#x" __gen_ocl_internal_fastpath_"#x"\n"
   std::string ocl_mathfunc_fastpath_str =
-    REDEF_MATH_FUNC(sin)
+    REDEF_MATH_FUNC(acosh)
+    REDEF_MATH_FUNC(asinh)
+    REDEF_MATH_FUNC(atanh)
+    REDEF_MATH_FUNC(cbrt)
     REDEF_MATH_FUNC(cos)
+    REDEF_MATH_FUNC(cosh)
+    REDEF_MATH_FUNC(cospi)
+    REDEF_MATH_FUNC(exp)
+    REDEF_MATH_FUNC(exp10)
+    REDEF_MATH_FUNC(expm1)
+    REDEF_MATH_FUNC(fmod)
+    REDEF_MATH_FUNC(hypot)
+    REDEF_MATH_FUNC(ilogb)
+    REDEF_MATH_FUNC(ldexp)
+    REDEF_MATH_FUNC(log)
+    REDEF_MATH_FUNC(log2)
+    REDEF_MATH_FUNC(log10)
+    REDEF_MATH_FUNC(log1p)
+    REDEF_MATH_FUNC(logb)
+    REDEF_MATH_FUNC(remainder)
+    REDEF_MATH_FUNC(rootn)
+    REDEF_MATH_FUNC(sin)
+    REDEF_MATH_FUNC(sincos)
+    REDEF_MATH_FUNC(sinh)
+    REDEF_MATH_FUNC(sinpi)
+    REDEF_MATH_FUNC(tan)
+    REDEF_MATH_FUNC(tanh)
     "\n"
   ;
 
diff --git a/backend/src/builtin_vector_proto.def b/backend/src/builtin_vector_proto.def
index 7bc7c48..103e661 100644
--- a/backend/src/builtin_vector_proto.def
+++ b/backend/src/builtin_vector_proto.def
@@ -130,8 +130,43 @@ gentype tgamma (gentype)
 gentype trunc (gentype)
 
 ##math function fast path
-gentype __gen_ocl_internal_fastpath_sin (gentype)
-gentype __gen_ocl_internal_fastpath_cos (gentype)
+gentype __gen_ocl_internal_fastpath_acosh (gentype x)
+gentype __gen_ocl_internal_fastpath_asinh (gentype x)
+gentype __gen_ocl_internal_fastpath_atanh (gentype x)
+gentype __gen_ocl_internal_fastpath_cbrt (gentype x)
+gentype __gen_ocl_internal_fastpath_cos (gentype x)
+gentype __gen_ocl_internal_fastpath_cosh (gentype x)
+gentype __gen_ocl_internal_fastpath_cospi (gentype x)
+gentype __gen_ocl_internal_fastpath_exp (gentype x)
+gentype __gen_ocl_internal_fastpath_exp10 (gentype x)
+gentype __gen_ocl_internal_fastpath_expm1 (gentype x)
+gentype __gen_ocl_internal_fastpath_fmod (gentype x, gentype y)
+gentype __gen_ocl_internal_fastpath_hypot (gentype x, gentype y)
+intn __gen_ocl_internal_fastpath_ilogb (floatn x)
+int __gen_ocl_internal_fastpath_ilogb (float x)
+intn __gen_ocl_internal_fastpath_ilogb (doublen x)
+int __gen_ocl_internal_fastpath_ilogb (double x)
+floatn __gen_ocl_internal_fastpath_ldexp (floatn x, intn k)
+floatn __gen_ocl_internal_fastpath_ldexp (floatn x, int k)
+float __gen_ocl_internal_fastpath_ldexp (float x, int k)
+doublen __gen_ocl_internal_fastpath_ldexp (doublen x, intn k)
+doublen __gen_ocl_internal_fastpath_ldexp (doublen x, int k)
+double __gen_ocl_internal_fastpath_ldexp (double x, int k)
+gentype __gen_ocl_internal_fastpath_log (gentype x)
+gentype __gen_ocl_internal_fastpath_log2 (gentype x)
+gentype __gen_ocl_internal_fastpath_log10 (gentype x)
+gentype __gen_ocl_internal_fastpath_log1p (gentype x)
+gentype __gen_ocl_internal_fastpath_logb (gentype x)
+gentype __gen_ocl_internal_fastpath_remainder (gentype x, gentype y)
+gentype __gen_ocl_internal_fastpath_rootn (gentype x, int n)
+gentype __gen_ocl_internal_fastpath_sin (gentype x)
+gentype __gen_ocl_internal_fastpath_sincos (gentype x, __global gentype *cosval)
+gentype __gen_ocl_internal_fastpath_sincos (gentype x, __local gentype *cosval)
+gentype __gen_ocl_internal_fastpath_sincos (gentype x, __private gentype *cosval)
+gentype __gen_ocl_internal_fastpath_sinh (gentype x)
+gentype __gen_ocl_internal_fastpath_sinpi (gentype x)
+gentype __gen_ocl_internal_fastpath_tan (gentype x)
+gentype __gen_ocl_internal_fastpath_tanh (gentype x)
 
 ##half_native_math
 #gentype half_cos (gentype x)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index cea4700..46aab79 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -4715,14 +4715,155 @@ INLINE_OVERLOADABLE  size_t get_image_array_size(image1d_array_t image)
   { return __gen_ocl_get_image_array_size(image); }
 #endif
 
-INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin(float x) {
-    return native_sin(x);
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_acosh (float x)
+{
+    return native_log(x + native_sqrt(x + 1) * native_sqrt(x - 1));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_asinh (float x)
+{
+    return native_log(x + native_sqrt(x * x + 1));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_atanh (float x)
+{
+    return 0.5f * native_sqrt((1 + x) / (1 - x));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cbrt (float x)
+{
+    return __gen_ocl_pow(x, 0.3333333333f);
 }
 
-INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cos(float x) {
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cos (float x)
+{
     return native_cos(x);
 }
 
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cosh (float x)
+{
+    return (1 + native_exp(-2 * x)) / (2 * native_exp(-x));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cospi (float x)
+{
+    return __gen_ocl_cos(x * M_PI_F);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_exp (float x)
+{
+    return native_exp(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_exp10 (float x)
+{
+    return native_exp10(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_expm1 (float x)
+{
+    return __gen_ocl_pow(M_E_F, x) - 1;
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_fmod (float x, float y)
+{
+    return x-y*__gen_ocl_rndz(x/y);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_hypot (float x, float y)
+{
+    return __gen_ocl_sqrt(x*x + y*y);
+}
+
+INLINE_OVERLOADABLE int __gen_ocl_internal_fastpath_ilogb (float x)
+{
+    return __gen_ocl_rndd(native_log2(x));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_ldexp (float x, int n)
+{
+    return __gen_ocl_pow(2, n) * x;
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log (float x)
+{
+    return native_log(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log2 (float x)
+{
+    return native_log2(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log10 (float x)
+{
+    return native_log10(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log1p (float x)
+{
+    return native_log(x + 1);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_logb (float x)
+{
+    return __gen_ocl_rndd(native_log2(x));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_remainder (float x, float y)
+{
+    return x-y*__gen_ocl_rnde(x/y);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_rootn(float x, int n)
+{
+    return __gen_ocl_pow(x, 1.f / n);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin (float x)
+{
+    return native_sin(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __global float *cosval)
+{
+    *cosval = native_cos(x);
+    return native_sin(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __local float *cosval)
+{
+    *cosval = native_cos(x);
+    return native_sin(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __private float *cosval)
+{
+    *cosval = native_cos(x);
+    return native_sin(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sinh (float x)
+{
+    return (1 - native_exp(-2 * x)) / (2 * native_exp(-x));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sinpi (float x)
+{
+    return __gen_ocl_sin(x * M_PI_F);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tan (float x)
+{
+    return native_tan(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tanh (float x)
+{
+    float y = native_exp(-2 * x);
+    return (1 - y) / (1 + y);
+}
+
 #pragma OPENCL EXTENSION cl_khr_fp64 : disable
 
 #undef DECL_IMAGE
-- 
1.7.9.5



More information about the Beignet mailing list