[Beignet] [PATCH] GBE: add fast path for more math functions
Guo Yejun
yejun.guo at intel.com
Thu Feb 20 13:51:33 PST 2014
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
backend/src/backend/program.cpp | 27 ++++++-
backend/src/builtin_vector_proto.def | 39 ++++++++-
backend/src/ocl_stdlib.tmpl.h | 147 +++++++++++++++++++++++++++++++++-
3 files changed, 207 insertions(+), 6 deletions(-)
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 98fcded..c2ac83d 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -461,8 +461,33 @@ namespace gbe {
#define REDEF_MATH_FUNC(x) "#ifdef "#x"\n#undef "#x"\n#endif\n#define "#x" __gen_ocl_internal_fastpath_"#x"\n"
std::string ocl_mathfunc_fastpath_str =
- REDEF_MATH_FUNC(sin)
+ REDEF_MATH_FUNC(acosh)
+ REDEF_MATH_FUNC(asinh)
+ REDEF_MATH_FUNC(atanh)
+ REDEF_MATH_FUNC(cbrt)
REDEF_MATH_FUNC(cos)
+ REDEF_MATH_FUNC(cosh)
+ REDEF_MATH_FUNC(cospi)
+ REDEF_MATH_FUNC(exp)
+ REDEF_MATH_FUNC(exp10)
+ REDEF_MATH_FUNC(expm1)
+ REDEF_MATH_FUNC(fmod)
+ REDEF_MATH_FUNC(hypot)
+ REDEF_MATH_FUNC(ilogb)
+ REDEF_MATH_FUNC(ldexp)
+ REDEF_MATH_FUNC(log)
+ REDEF_MATH_FUNC(log2)
+ REDEF_MATH_FUNC(log10)
+ REDEF_MATH_FUNC(log1p)
+ REDEF_MATH_FUNC(logb)
+ REDEF_MATH_FUNC(remainder)
+ REDEF_MATH_FUNC(rootn)
+ REDEF_MATH_FUNC(sin)
+ REDEF_MATH_FUNC(sincos)
+ REDEF_MATH_FUNC(sinh)
+ REDEF_MATH_FUNC(sinpi)
+ REDEF_MATH_FUNC(tan)
+ REDEF_MATH_FUNC(tanh)
"\n"
;
diff --git a/backend/src/builtin_vector_proto.def b/backend/src/builtin_vector_proto.def
index 7bc7c48..103e661 100644
--- a/backend/src/builtin_vector_proto.def
+++ b/backend/src/builtin_vector_proto.def
@@ -130,8 +130,43 @@ gentype tgamma (gentype)
gentype trunc (gentype)
##math function fast path
-gentype __gen_ocl_internal_fastpath_sin (gentype)
-gentype __gen_ocl_internal_fastpath_cos (gentype)
+gentype __gen_ocl_internal_fastpath_acosh (gentype x)
+gentype __gen_ocl_internal_fastpath_asinh (gentype x)
+gentype __gen_ocl_internal_fastpath_atanh (gentype x)
+gentype __gen_ocl_internal_fastpath_cbrt (gentype x)
+gentype __gen_ocl_internal_fastpath_cos (gentype x)
+gentype __gen_ocl_internal_fastpath_cosh (gentype x)
+gentype __gen_ocl_internal_fastpath_cospi (gentype x)
+gentype __gen_ocl_internal_fastpath_exp (gentype x)
+gentype __gen_ocl_internal_fastpath_exp10 (gentype x)
+gentype __gen_ocl_internal_fastpath_expm1 (gentype x)
+gentype __gen_ocl_internal_fastpath_fmod (gentype x, gentype y)
+gentype __gen_ocl_internal_fastpath_hypot (gentype x, gentype y)
+intn __gen_ocl_internal_fastpath_ilogb (floatn x)
+int __gen_ocl_internal_fastpath_ilogb (float x)
+intn __gen_ocl_internal_fastpath_ilogb (doublen x)
+int __gen_ocl_internal_fastpath_ilogb (double x)
+floatn __gen_ocl_internal_fastpath_ldexp (floatn x, intn k)
+floatn __gen_ocl_internal_fastpath_ldexp (floatn x, int k)
+float __gen_ocl_internal_fastpath_ldexp (float x, int k)
+doublen __gen_ocl_internal_fastpath_ldexp (doublen x, intn k)
+doublen __gen_ocl_internal_fastpath_ldexp (doublen x, int k)
+double __gen_ocl_internal_fastpath_ldexp (double x, int k)
+gentype __gen_ocl_internal_fastpath_log (gentype x)
+gentype __gen_ocl_internal_fastpath_log2 (gentype x)
+gentype __gen_ocl_internal_fastpath_log10 (gentype x)
+gentype __gen_ocl_internal_fastpath_log1p (gentype x)
+gentype __gen_ocl_internal_fastpath_logb (gentype x)
+gentype __gen_ocl_internal_fastpath_remainder (gentype x, gentype y)
+gentype __gen_ocl_internal_fastpath_rootn (gentype x, int n)
+gentype __gen_ocl_internal_fastpath_sin (gentype x)
+gentype __gen_ocl_internal_fastpath_sincos (gentype x, __global gentype *cosval)
+gentype __gen_ocl_internal_fastpath_sincos (gentype x, __local gentype *cosval)
+gentype __gen_ocl_internal_fastpath_sincos (gentype x, __private gentype *cosval)
+gentype __gen_ocl_internal_fastpath_sinh (gentype x)
+gentype __gen_ocl_internal_fastpath_sinpi (gentype x)
+gentype __gen_ocl_internal_fastpath_tan (gentype x)
+gentype __gen_ocl_internal_fastpath_tanh (gentype x)
##half_native_math
#gentype half_cos (gentype x)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index cea4700..46aab79 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -4715,14 +4715,155 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
{ return __gen_ocl_get_image_array_size(image); }
#endif
-INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin(float x) {
- return native_sin(x);
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_acosh (float x)
+{
+ return native_log(x + native_sqrt(x + 1) * native_sqrt(x - 1));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_asinh (float x)
+{
+ return native_log(x + native_sqrt(x * x + 1));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_atanh (float x)
+{
+ return 0.5f * native_sqrt((1 + x) / (1 - x));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cbrt (float x)
+{
+ return __gen_ocl_pow(x, 0.3333333333f);
}
-INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cos(float x) {
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cos (float x)
+{
return native_cos(x);
}
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cosh (float x)
+{
+ return (1 + native_exp(-2 * x)) / (2 * native_exp(-x));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cospi (float x)
+{
+ return __gen_ocl_cos(x * M_PI_F);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_exp (float x)
+{
+ return native_exp(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_exp10 (float x)
+{
+ return native_exp10(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_expm1 (float x)
+{
+ return __gen_ocl_pow(M_E_F, x) - 1;
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_fmod (float x, float y)
+{
+ return x-y*__gen_ocl_rndz(x/y);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_hypot (float x, float y)
+{
+ return __gen_ocl_sqrt(x*x + y*y);
+}
+
+INLINE_OVERLOADABLE int __gen_ocl_internal_fastpath_ilogb (float x)
+{
+ return __gen_ocl_rndd(native_log2(x));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_ldexp (float x, int n)
+{
+ return __gen_ocl_pow(2, n) * x;
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log (float x)
+{
+ return native_log(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log2 (float x)
+{
+ return native_log2(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log10 (float x)
+{
+ return native_log10(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log1p (float x)
+{
+ return native_log(x + 1);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_logb (float x)
+{
+ return __gen_ocl_rndd(native_log2(x));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_remainder (float x, float y)
+{
+ return x-y*__gen_ocl_rnde(x/y);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_rootn(float x, int n)
+{
+ return __gen_ocl_pow(x, 1.f / n);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin (float x)
+{
+ return native_sin(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __global float *cosval)
+{
+ *cosval = native_cos(x);
+ return native_sin(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __local float *cosval)
+{
+ *cosval = native_cos(x);
+ return native_sin(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __private float *cosval)
+{
+ *cosval = native_cos(x);
+ return native_sin(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sinh (float x)
+{
+ return (1 - native_exp(-2 * x)) / (2 * native_exp(-x));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sinpi (float x)
+{
+ return __gen_ocl_sin(x * M_PI_F);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tan (float x)
+{
+ return native_tan(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tanh (float x)
+{
+ float y = native_exp(-2 * x);
+ return (1 - y) / (1 + y);
+}
+
#pragma OPENCL EXTENSION cl_khr_fp64 : disable
#undef DECL_IMAGE
--
1.7.9.5
More information about the Beignet
mailing list