[Beignet] [PATCH 1/2] Enabled nineteen built-in functions

Homer Hsing homer.xing at intel.com
Sun May 12 18:25:45 PDT 2013


Enabled hypot, sinpi, log1p, logb, tanpi, sinh, cosh, tanh,
asinh, acosh, atanh, remainder, rint, fdim, fract, native_divide,
pown, rootn, ldexp.

Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/ocl_stdlib.h | 102 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)

diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
index 8a92248..0ebc059 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -394,11 +394,15 @@ PURE CONST float __gen_ocl_rndz(float x);
 PURE CONST float __gen_ocl_rnde(float x);
 PURE CONST float __gen_ocl_rndu(float x);
 PURE CONST float __gen_ocl_rndd(float x);
+INLINE OVERLOADABLE float hypot(float x, float y) { return __gen_ocl_sqrt(x*x + y*y); }
 INLINE OVERLOADABLE float native_cos(float x) { return __gen_ocl_cos(x); }
 INLINE OVERLOADABLE float __gen_ocl_internal_cospi(float x) {
   return __gen_ocl_cos(x * M_PI_F);
 }
 INLINE OVERLOADABLE float native_sin(float x) { return __gen_ocl_sin(x); }
+INLINE OVERLOADABLE float __gen_ocl_internal_sinpi(float x) {
+  return __gen_ocl_sin(x * M_PI_F);
+}
 INLINE OVERLOADABLE float native_sqrt(float x) { return __gen_ocl_sqrt(x); }
 INLINE OVERLOADABLE float native_rsqrt(float x) { return __gen_ocl_rsqrt(x); }
 INLINE OVERLOADABLE float native_log2(float x) { return __gen_ocl_log(x); }
@@ -408,11 +412,16 @@ INLINE OVERLOADABLE float native_log(float x) {
 INLINE OVERLOADABLE float native_log10(float x) {
   return native_log2(x) * 0.3010299956f;
 }
+INLINE OVERLOADABLE float log1p(float x) { return native_log(x + 1); }
+INLINE OVERLOADABLE float logb(float x) { return __gen_ocl_rndd(native_log2(x)); }
 INLINE OVERLOADABLE float native_powr(float x, float y) { return __gen_ocl_pow(x,y); }
 INLINE OVERLOADABLE float native_recip(float x) { return __gen_ocl_rcp(x); }
 INLINE OVERLOADABLE float native_tan(float x) {
   return native_sin(x) / native_cos(x);
 }
+INLINE OVERLOADABLE float __gen_ocl_internal_tanpi(float x) {
+  return native_tan(x * M_PI_F);
+}
 INLINE OVERLOADABLE float native_exp(float x) { return __gen_ocl_pow(M_E_F, x); }
 INLINE OVERLOADABLE float native_exp2(float x) { return __gen_ocl_pow(2, x); }
 INLINE OVERLOADABLE float native_exp10(float x) { return __gen_ocl_pow(10, x); }
@@ -420,6 +429,25 @@ INLINE OVERLOADABLE float __gen_ocl_internal_expm1(float x) { return __gen_ocl_p
 INLINE OVERLOADABLE float __gen_ocl_internal_cbrt(float x) {
   return __gen_ocl_pow(x, 0.3333333333f);
 }
+INLINE OVERLOADABLE float __gen_ocl_internal_sinh(float x) {
+  return (1 - native_exp(-2 * x)) / (2 * native_exp(-x));
+}
+INLINE OVERLOADABLE float __gen_ocl_internal_cosh(float x) {
+  return (1 + native_exp(-2 * x)) / (2 * native_exp(-x));
+}
+INLINE OVERLOADABLE float __gen_ocl_internal_tanh(float x) {
+  float y = native_exp(-2 * x);
+  return (1 - y) / (1 + y);
+}
+INLINE OVERLOADABLE float __gen_ocl_internal_asinh(float x) {
+  return native_log(x + native_sqrt(x * x + 1));
+}
+INLINE OVERLOADABLE float __gen_ocl_internal_acosh(float x) {
+  return native_log(x + native_sqrt(x + 1) * native_sqrt(x - 1));
+}
+INLINE OVERLOADABLE float __gen_ocl_internal_atanh(float x) {
+  return 0.5f * native_sqrt((1 + x) / (1 - x));
+}
 
 // XXX work-around PTX profile
 #define sqrt native_sqrt
@@ -435,14 +463,27 @@ INLINE OVERLOADABLE float __gen_ocl_internal_log10(float x) { return native_log1
 INLINE OVERLOADABLE float __gen_ocl_internal_exp(float x)   { return native_exp(x); }
 INLINE OVERLOADABLE float powr(float x, float y) { return __gen_ocl_pow(x,y); }
 INLINE OVERLOADABLE float fmod(float x, float y) { return x-y*__gen_ocl_rndz(x/y); }
+INLINE OVERLOADABLE float remainder(float x, float y) { return x-y*__gen_ocl_rnde(x/y); }
+INLINE OVERLOADABLE float __gen_ocl_internal_rint(float x) {
+  return 2 * __gen_ocl_internal_round(x / 2);
+}
 
 // TODO use llvm intrinsics definitions
 #define cos native_cos
 #define cospi __gen_ocl_internal_cospi
+#define cosh __gen_ocl_internal_cosh
+#define acosh __gen_ocl_internal_acosh
 #define sin native_sin
+#define sinpi __gen_ocl_internal_sinpi
+#define sinh __gen_ocl_internal_sinh
+#define asinh __gen_ocl_internal_asinh
 #define tan native_tan
+#define tanpi __gen_ocl_internal_tanpi
+#define tanh __gen_ocl_internal_tanh
+#define atanh __gen_ocl_internal_atanh
 #define pow powr
 #define cbrt __gen_ocl_internal_cbrt
+#define rint __gen_ocl_internal_rint
 
 INLINE OVERLOADABLE float mad(float a, float b, float c) {
   return a*b+c;
@@ -513,6 +554,23 @@ DECL_MIN_MAX(unsigned char)
 INLINE OVERLOADABLE float __gen_ocl_internal_fmax(float a, float b) { return max(a,b); }
 INLINE OVERLOADABLE float __gen_ocl_internal_fmin(float a, float b) { return min(a,b); }
 INLINE OVERLOADABLE float mix(float x, float y, float a) { return x + (y-x)*a;}
+INLINE OVERLOADABLE float __gen_ocl_internal_fdim(float x, float y) {
+  return __gen_ocl_internal_fmax(x, y) - y;
+}
+INLINE OVERLOADABLE float fract(float x, float *p) {
+  *p = __gen_ocl_internal_floor(x);
+  return __gen_ocl_internal_fmin(x - *p, 0x1.FFFFFep-1F);
+}
+INLINE OVERLOADABLE float native_divide(float x, float y) { return x/y; }
+INLINE OVERLOADABLE float ldexp(float x, int n) {
+  return __gen_ocl_pow(2, n) * x;
+}
+INLINE OVERLOADABLE float pown(float x, int n) {
+  return powr(x, n);
+}
+INLINE OVERLOADABLE float rootn(float x, int n) {
+  return powr(x, 1.f / n);
+}
 
 /////////////////////////////////////////////////////////////////////////////
 // Geometric functions (see 6.11.5 of OCL 1.1 spec)
@@ -652,11 +710,21 @@ DECL_UNTYPED_RW_ALL(float)
   }
 DECL_VECTOR_1OP(native_cos, float);
 DECL_VECTOR_1OP(__gen_ocl_internal_cospi, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_cosh, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_acosh, float);
 DECL_VECTOR_1OP(native_sin, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_sinpi, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_sinh, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_asinh, float);
 DECL_VECTOR_1OP(native_tan, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_tanpi, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_tanh, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_atanh, float);
 DECL_VECTOR_1OP(native_sqrt, float);
 DECL_VECTOR_1OP(native_rsqrt, float);
 DECL_VECTOR_1OP(native_log2, float);
+DECL_VECTOR_1OP(log1p, float);
+DECL_VECTOR_1OP(logb, float);
 DECL_VECTOR_1OP(native_recip, float);
 DECL_VECTOR_1OP(native_exp2, float);
 DECL_VECTOR_1OP(native_exp10, float);
@@ -670,6 +738,7 @@ DECL_VECTOR_1OP(__gen_ocl_internal_ceil, float);
 DECL_VECTOR_1OP(__gen_ocl_internal_log, float);
 DECL_VECTOR_1OP(__gen_ocl_internal_log2, float);
 DECL_VECTOR_1OP(__gen_ocl_internal_log10, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_rint, float);
 #undef DECL_VECTOR_1OP
 /////////////////////////////////////////////////////////////////////////////
 // Arithmetic functions
@@ -697,12 +766,43 @@ DECL_VECTOR_1OP(__gen_ocl_internal_log10, float);
     dst.s89abcdef = NAME(v0.s89abcdef, v1.s89abcdef);\
     return dst;\
   }
+DECL_VECTOR_2OP(hypot, float);
 DECL_VECTOR_2OP(min, float);
 DECL_VECTOR_2OP(max, float);
 DECL_VECTOR_2OP(__gen_ocl_internal_fmin, float);
 DECL_VECTOR_2OP(__gen_ocl_internal_fmax, float);
+DECL_VECTOR_2OP(__gen_ocl_internal_fdim, float);
 DECL_VECTOR_2OP(fmod, float);
+DECL_VECTOR_2OP(remainder, float);
 DECL_VECTOR_2OP(powr, float);
+DECL_VECTOR_2OP(native_divide, float);
+#undef DECL_VECTOR_2OP
+
+#define DECL_VECTOR_2OP(NAME, TYPE, TYPE2) \
+  INLINE OVERLOADABLE TYPE##2 NAME(TYPE##2 v0, TYPE2##2 v1) { \
+    return (TYPE##2)(NAME(v0.x, v1.x), NAME(v1.y, v1.y)); \
+  }\
+  INLINE OVERLOADABLE TYPE##3 NAME(TYPE##3 v0, TYPE2##3 v1) { \
+    return (TYPE##3)(NAME(v0.x, v1.x), NAME(v0.y, v1.y), NAME(v0.z, v1.z)); \
+  }\
+  INLINE OVERLOADABLE TYPE##4 NAME(TYPE##4 v0, TYPE2##4 v1) { \
+    return (TYPE##4)(NAME(v0.x, v1.x), NAME(v0.y, v1.y), NAME(v0.z, v1.z), NAME(v0.w, v1.w)); \
+  }\
+  INLINE OVERLOADABLE TYPE##8 NAME(TYPE##8 v0, TYPE2##8 v1) { \
+    TYPE##8 dst;\
+    dst.s0123 = NAME(v0.s0123, v1.s0123);\
+    dst.s4567 = NAME(v0.s4567, v1.s4567);\
+    return dst;\
+  }\
+  INLINE OVERLOADABLE TYPE##16 NAME(TYPE##16 v0, TYPE2##16 v1) { \
+    TYPE##16 dst;\
+    dst.s01234567 = NAME(v0.s01234567, v1.s01234567);\
+    dst.s89abcdef = NAME(v0.s89abcdef, v1.s89abcdef);\
+    return dst;\
+  }
+DECL_VECTOR_2OP(ldexp, float, int);
+DECL_VECTOR_2OP(pown, float, int);
+DECL_VECTOR_2OP(rootn, float, int);
 #undef DECL_VECTOR_2OP
 
 #define DECL_VECTOR_3OP(NAME, TYPE) \
@@ -753,6 +853,8 @@ INLINE OVERLOADABLE float16 mix(float16 x, float16 y, float a) { return mix(x,y,
 #define expm1 __gen_ocl_internal_expm1
 #define fmin __gen_ocl_internal_fmin
 #define fmax __gen_ocl_internal_fmax
+#define fma mad
+#define fdim __gen_ocl_internal_fdim
 
 /////////////////////////////////////////////////////////////////////////////
 // Synchronization functions
-- 
1.8.1.2



More information about the Beignet mailing list