[Beignet] [PATCH] add built-in function "atan2"
Homer Hsing
homer.xing at intel.com
Wed Aug 28 22:41:24 PDT 2013
also improve the accuracy of built-in function "atan"
also add a test case
Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
backend/src/builtin_vector_proto.def | 3 +--
backend/src/ocl_stdlib.tmpl.h | 29 +++++++++++++++++++++++-
kernels/builtin_atan2.cl | 4 ++++
utests/CMakeLists.txt | 1 +
utests/builtin_atan2.cpp | 43 ++++++++++++++++++++++++++++++++++++
5 files changed, 77 insertions(+), 3 deletions(-)
create mode 100644 kernels/builtin_atan2.cl
create mode 100644 utests/builtin_atan2.cpp
diff --git a/backend/src/builtin_vector_proto.def b/backend/src/builtin_vector_proto.def
index 2a3daf2..b24e7ea 100644
--- a/backend/src/builtin_vector_proto.def
+++ b/backend/src/builtin_vector_proto.def
@@ -6,8 +6,7 @@ gentype asin (gentype)
gentype asinh (gentype)
gentype asinpi (gentype x)
gentype atan (gentype y_over_x)
-# XXX atan2 is a builtin function
-#gentype atan2 (gentype y, gentype x)
+gentype atan2 (gentype y, gentype x)
gentype atanh (gentype)
gentype atanpi (gentype x)
#gentype atan2pi (gentype y, gentype x)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index ac1999d..7f1032f 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -1311,7 +1311,13 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_atan(float x) {
x = 1 / x;
c = -1;
}
- return a + c * (x - __gen_ocl_pow(x, 3) / 3 + __gen_ocl_pow(x, 5) / 5 - __gen_ocl_pow(x, 7) / 7 + __gen_ocl_pow(x, 9) / 9 - __gen_ocl_pow(x, 11) / 11);
+ a += c*x;
+ int i;
+ int sign;
+ for(i=3, sign=-1; i<63; i+=2, sign=-sign) {
+ a += c*sign*__gen_ocl_pow(x,i)/i;
+ }
+ return a;
}
INLINE_OVERLOADABLE float __gen_ocl_internal_atanpi(float x) {
return __gen_ocl_internal_atan(x) / M_PI_F;
@@ -1338,6 +1344,26 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_erfc(float x) {
// XXX work-around PTX profile
#define sqrt native_sqrt
INLINE_OVERLOADABLE float rsqrt(float x) { return native_rsqrt(x); }
+INLINE_OVERLOADABLE float __gen_ocl_internal_atan2(float y, float x) {
+ uint hx = *(uint *)(&x), ix = hx & 0x7FFFFFFF;
+ uint hy = *(uint *)(&y), iy = hy & 0x7FFFFFFF;
+ if (ix > 0x7F800000 || iy > 0x7F800000)
+ return nan(0u);
+ if (ix == 0) {
+ if (y > 0)
+ return M_PI_2_F;
+ if (y < 0)
+ return - M_PI_2_F;
+ return nan(0u);
+ } else {
+ float z = __gen_ocl_internal_atan(y / x);
+ if (x > 0)
+ return z;
+ if (y >= 0)
+ return M_PI_F + z;
+ return - M_PI_F + z;
+ }
+}
INLINE_OVERLOADABLE float __gen_ocl_internal_fabs(float x) { return __gen_ocl_fabs(x); }
INLINE_OVERLOADABLE float __gen_ocl_internal_trunc(float x) { return __gen_ocl_rndz(x); }
INLINE_OVERLOADABLE float __gen_ocl_internal_round(float x) { return __gen_ocl_rnde(x); }
@@ -1370,6 +1396,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_rint(float x) {
#define tanpi __gen_ocl_internal_tanpi
#define tanh __gen_ocl_internal_tanh
#define atan __gen_ocl_internal_atan
+#define atan2 __gen_ocl_internal_atan2
#define atanpi __gen_ocl_internal_atanpi
#define atanh __gen_ocl_internal_atanh
#define pow powr
diff --git a/kernels/builtin_atan2.cl b/kernels/builtin_atan2.cl
new file mode 100644
index 0000000..aba73be
--- /dev/null
+++ b/kernels/builtin_atan2.cl
@@ -0,0 +1,4 @@
+kernel void builtin_atan2(global float *y, global float *x, global float *dst) {
+ int i = get_global_id(0);
+ dst[i] = atan2(y[i], x[i]);
+};
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 08b4e32..835524d 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -104,6 +104,7 @@ set (utests_sources
compiler_vector_load_store.cpp
compiler_cl_finish.cpp
get_cl_info.cpp
+ builtin_atan2.cpp
builtin_bitselect.cpp
builtin_frexp.cpp
builtin_mad_sat.cpp
diff --git a/utests/builtin_atan2.cpp b/utests/builtin_atan2.cpp
new file mode 100644
index 0000000..29dd7b4
--- /dev/null
+++ b/utests/builtin_atan2.cpp
@@ -0,0 +1,43 @@
+#include <cmath>
+#include "utest_helper.hpp"
+
+void builtin_atan2(void) {
+ const int n = 1024;
+ float y[n], x[n];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("builtin_atan2");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
+ OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+ globals[0] = n;
+ locals[0] = 16;
+
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (int i = 0; i < n; ++i) {
+ y[i] = ((float*) buf_data[0])[i] = (rand()&255) * 0.01f;
+ x[i] = ((float*) buf_data[1])[i] = (rand()&255) * 0.01f;
+ }
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+
+ OCL_NDRANGE(1);
+
+ OCL_MAP_BUFFER(2);
+ float *dst = (float*) buf_data[2];
+ for (int i = 0; i < n; ++i) {
+ float cpu = atan2f(y[i], x[i]);
+ float gpu = dst[i];
+ if (fabsf(cpu - gpu) >= 1e-2) {
+ printf("%f %f %f %f\n", y[i], x[i], cpu, gpu);
+ OCL_ASSERT(0);
+ }
+ }
+ OCL_UNMAP_BUFFER(2);
+}
+
+MAKE_UTEST_FROM_FUNCTION (builtin_atan2);
--
1.8.1.2
More information about the Beignet
mailing list