[Beignet] [PATCH 2/2] Add the test case for builtin step() function

junyan.he at inbox.com junyan.he at inbox.com
Thu Jul 4 00:35:34 PDT 2013


From: Junyan He <junyan.he at linux.intel.com>

The step function has two kind of prototype:
gentypen step(gentypen edge, gentypen x)
and
gentypen step(float edge, gentypen x)

The first's test name is compiler_step_floatX
The second's test name is compiler_stepf_floatX

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 kernels/compiler_step.cl |   38 ++++++
 utests/CMakeLists.txt    |    1 +
 utests/compiler_step.cpp |  338 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 377 insertions(+)
 create mode 100644 kernels/compiler_step.cl
 create mode 100644 utests/compiler_step.cpp

diff --git a/kernels/compiler_step.cl b/kernels/compiler_step.cl
new file mode 100644
index 0000000..ef77f05
--- /dev/null
+++ b/kernels/compiler_step.cl
@@ -0,0 +1,38 @@
+#define COMPILER_STEP_FUNC_N(TYPE, N) \
+    kernel void compiler_step_##TYPE##N ( \
+           global TYPE##N* edge, global TYPE##N* x, global TYPE##N* dst) { \
+        int i = get_global_id(0); \
+        dst[i] = step(edge[i], x[i]);     \
+    }
+
+kernel void compiler_step_float (global float* edge,
+                                 global float* x, global float* dst)
+{
+    int i = get_global_id(0);
+    dst[i] = step(edge[i], x[i]);
+}
+
+COMPILER_STEP_FUNC_N(float, 2)
+COMPILER_STEP_FUNC_N(float, 3)
+COMPILER_STEP_FUNC_N(float, 4)
+COMPILER_STEP_FUNC_N(float, 8)
+COMPILER_STEP_FUNC_N(float, 16)
+
+#define COMPILER_STEPF_FUNC_N(TYPE, N) \
+    kernel void compiler_stepf_##TYPE##N ( \
+           float edge, global TYPE##N* x, global TYPE##N* dst) { \
+        int i = get_global_id(0); \
+        dst[i] = step(edge, x[i]);     \
+    }
+
+kernel void compiler_stepf_float (float edge, global float* x, global float* dst)
+{
+    int i = get_global_id(0);
+    dst[i] = step(edge, x[i]);
+}
+
+COMPILER_STEPF_FUNC_N(float, 2)
+COMPILER_STEPF_FUNC_N(float, 3)
+COMPILER_STEPF_FUNC_N(float, 4)
+COMPILER_STEPF_FUNC_N(float, 8)
+COMPILER_STEPF_FUNC_N(float, 16)
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 0863071..82e0a40 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -30,6 +30,7 @@ set (utests_sources
   compiler_copy_image.cpp
   compiler_copy_image_3d.cpp
   compiler_copy_buffer_row.cpp
+  compiler_step.cpp
   compiler_fabs.cpp
   compiler_abs.cpp
   compiler_abs_diff.cpp
diff --git a/utests/compiler_step.cpp b/utests/compiler_step.cpp
new file mode 100644
index 0000000..3285dda
--- /dev/null
+++ b/utests/compiler_step.cpp
@@ -0,0 +1,338 @@
+#include "utest_helper.hpp"
+#include "string.h"
+
+template <typename T, int N>
+struct cl_vec {
+    T ptr[((N+1)/2)*2]; //align to 2 elements.
+
+    typedef cl_vec<T, N> vec_type;
+
+    cl_vec(void) {
+        memset(ptr, 0, sizeof(T) * ((N+1)/2)*2);
+    }
+    cl_vec(vec_type & other) {
+        memset(ptr, 0, sizeof(T) * ((N+1)/2)*2);
+        memcpy (this->ptr, other.ptr, sizeof(T) * N);
+    }
+
+    vec_type& operator= (vec_type & other) {
+        memset(ptr, 0, sizeof(T) * ((N+1)/2)*2);
+        memcpy (this->ptr, other.ptr, sizeof(T) * N);
+        return *this;
+    }
+
+    template <typename U> vec_type& operator= (cl_vec<U, N> & other) {
+        memset(ptr, 0, sizeof(T) * ((N+1)/2)*2);
+        memcpy (this->ptr, other.ptr, sizeof(T) * N);
+        return *this;
+    }
+
+    bool operator== (vec_type & other) {
+        return !memcmp (this->ptr, other.ptr, sizeof(T) * N);
+    }
+
+    void step (vec_type & other) {
+        int i = 0;
+        for (; i < N; i++) {
+            T a = ptr[i];
+            T edge = other.ptr[i];
+            T f = a < edge ? 0.0 : 1.0;
+            ptr[i] = f;
+        }
+    }
+
+    void step (float & edge) {
+        int i = 0;
+        for (; i < N; i++) {
+            T a = ptr[i];
+            T f = a < edge ? 0.0 : 1.0;
+            ptr[i] = f;
+        }
+    }
+};
+
+template <typename T, typename U, int N> static void cpu (int global_id,
+        cl_vec<T, N> *edge, cl_vec<T, N> *src, cl_vec<U, N> *dst)
+{
+    cl_vec<T, N> v  = src[global_id];
+    v.step(edge[global_id]);
+    dst[global_id] = v;
+}
+
+template <typename T, typename U> static void cpu(int global_id, T *edge, T *src, U *dst)
+{
+    T f = src[global_id];
+    T e = edge[global_id];
+    f = f < e ? 0.0 : 1.0;
+    dst[global_id] = (U)f;
+}
+
+template <typename T, typename U, int N> static void cpu (int global_id,
+        float edge, cl_vec<T, N> *src, cl_vec<U, N> *dst)
+{
+    cl_vec<T, N> v  = src[global_id];
+    v.step(edge);
+    dst[global_id] = v;
+}
+
+template <typename T, typename U> static void cpu(int global_id, float edge, T *src, U *dst)
+{
+    T f = src[global_id];
+    f = f < edge ? 0.0 : 1.0;
+    dst[global_id] = (U)f;
+}
+
+template <typename T, int N> static void gen_rand_val (cl_vec<T, N>& vect)
+{
+    int i = 0;
+
+    memset(vect.ptr, 0, sizeof(T) * ((N+1)/2)*2);
+    for (; i < N; i++) {
+        vect.ptr[i] = static_cast<T>(.1f * (rand() & 15) - .75f);
+    }
+}
+
+template <typename T> static void gen_rand_val (T & val)
+{
+    val = static_cast<T>(.1f * (rand() & 15) - .75f);
+}
+
+template <typename T>
+inline static void print_data (T& val)
+{
+    if (std::is_unsigned<T>::value)
+        printf(" %u", val);
+    else
+        printf(" %d", val);
+}
+
+inline static void print_data (float& val)
+{
+    printf(" %f", val);
+}
+
+template <typename T, typename U, int N> static void dump_data (cl_vec<T, N>* edge,
+        cl_vec<T, N>* src, cl_vec<U, N>* dst, int n)
+{
+    U* val = reinterpret_cast<U *>(dst);
+
+    n = n*((N+1)/2)*2;
+
+    printf("\nEdge: \n");
+    for (int32_t i = 0; i < (int32_t) n; ++i) {
+        print_data(((T *)buf_data[0])[i]);
+    }
+    printf("\nx: \n");
+    for (int32_t i = 0; i < (int32_t) n; ++i) {
+        print_data(((T *)buf_data[1])[i]);
+    }
+
+    printf("\nCPU: \n");
+    for (int32_t i = 0; i < (int32_t) n; ++i) {
+        print_data(val[i]);
+    }
+    printf("\nGPU: \n");
+    for (int32_t i = 0; i < (int32_t) n; ++i) {
+        print_data(((U *)buf_data[2])[i]);
+    }
+}
+
+template <typename T, typename U> static void dump_data (T* edge, T* src, U* dst, int n)
+{
+    printf("\nedge: \n");
+    for (int32_t i = 0; i < (int32_t) n; ++i) {
+        print_data(((T *)buf_data[0])[i]);
+    }
+
+    printf("\nx: \n");
+    for (int32_t i = 0; i < (int32_t) n; ++i) {
+        print_data(((T *)buf_data[1])[i]);
+    }
+
+    printf("\nCPU: \n");
+    for (int32_t i = 0; i < (int32_t) n; ++i) {
+        print_data(dst[i]);
+    }
+    printf("\nGPU: \n");
+    for (int32_t i = 0; i < (int32_t) n; ++i) {
+        print_data(((U *)buf_data[2])[i]);
+    }
+}
+
+template <typename T, typename U, int N> static void dump_data (float edge,
+        cl_vec<T, N>* src, cl_vec<U, N>* dst, int n)
+{
+    U* val = reinterpret_cast<U *>(dst);
+
+    n = n*((N+1)/2)*2;
+
+    printf("\nEdge: %f\n", edge);
+    printf("\nx: \n");
+    for (int32_t i = 0; i < (int32_t) n; ++i) {
+        print_data(((T *)buf_data[0])[i]);
+    }
+
+    printf("\nCPU: \n");
+    for (int32_t i = 0; i < (int32_t) n; ++i) {
+        print_data(val[i]);
+    }
+    printf("\nGPU: \n");
+    for (int32_t i = 0; i < (int32_t) n; ++i) {
+        print_data(((U *)buf_data[1])[i]);
+    }
+}
+
+template <typename T, typename U> static void dump_data (float edge, T* src, U* dst, int n)
+{
+    printf("\nedge: %f\n", edge);
+    printf("\nx: \n");
+    for (int32_t i = 0; i < (int32_t) n; ++i) {
+        print_data(((T *)buf_data[0])[i]);
+    }
+
+    printf("\nCPU: \n");
+    for (int32_t i = 0; i < (int32_t) n; ++i) {
+        print_data(dst[i]);
+    }
+    printf("\nGPU: \n");
+    for (int32_t i = 0; i < (int32_t) n; ++i) {
+        print_data(((U *)buf_data[1])[i]);
+    }
+}
+
+template <typename T> static void compiler_step_with_type(void)
+{
+    const size_t n = 16;
+    T cpu_dst[n], cpu_src[n];
+    T edge[n];
+
+    // Setup buffers
+    OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL);
+    OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL);
+    OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(T), NULL);
+    OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+    OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+    OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+    globals[0] = n;
+    locals[0] = n;
+
+    // Run random tests
+    for (uint32_t pass = 0; pass < 8; ++pass) {
+        OCL_MAP_BUFFER(0);
+        OCL_MAP_BUFFER(1);
+
+        /* Clear the dst buffer to avoid random data. */
+        OCL_MAP_BUFFER(2);
+        memset(buf_data[2], 0, sizeof(T) * n);
+        OCL_UNMAP_BUFFER(2);
+
+        for (int32_t i = 0; i < (int32_t) n; ++i) {
+            gen_rand_val(cpu_src[i]);
+            gen_rand_val(edge[i]);
+        }
+
+        memcpy(buf_data[1], cpu_src, sizeof(T) * n);
+        memcpy(buf_data[0], edge, sizeof(T) * n);
+
+        // Run the kernel on GPU
+        OCL_NDRANGE(1);
+
+        // Run on CPU
+        for (int32_t i = 0; i < (int32_t) n; ++i)
+            cpu(i, edge, cpu_src, cpu_dst);
+
+        // Compare
+        OCL_MAP_BUFFER(2);
+
+        //dump_data(edge, cpu_src, cpu_dst, n);
+
+        OCL_ASSERT(!memcmp(buf_data[2], cpu_dst, sizeof(T) * n));
+        OCL_UNMAP_BUFFER(2);
+        OCL_UNMAP_BUFFER(1);
+        OCL_UNMAP_BUFFER(0);
+    }
+}
+
+#define STEP_TEST_TYPE(TYPE) \
+	static void compiler_step_##TYPE (void) \
+        { \
+           OCL_CALL (cl_kernel_init, "compiler_step.cl", "compiler_step_"#TYPE, SOURCE, NULL);  \
+           compiler_step_with_type<TYPE>(); \
+        } \
+	MAKE_UTEST_FROM_FUNCTION(compiler_step_##TYPE);
+
+typedef cl_vec<float, 2> float2;
+typedef cl_vec<float, 3> float3;
+typedef cl_vec<float, 4> float4;
+typedef cl_vec<float, 8> float8;
+typedef cl_vec<float, 16> float16;
+STEP_TEST_TYPE(float)
+STEP_TEST_TYPE(float2)
+STEP_TEST_TYPE(float3)
+STEP_TEST_TYPE(float4)
+STEP_TEST_TYPE(float8)
+STEP_TEST_TYPE(float16)
+
+
+template <typename T> static void compiler_stepf_with_type(void)
+{
+    const size_t n = 16;
+    T cpu_dst[n], cpu_src[n];
+    float edge = (float)(.1f * (rand() & 15) - .75f);
+
+    // Setup buffers
+    OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL);
+    OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL);
+    OCL_SET_ARG(0, sizeof(float), &edge);
+    OCL_SET_ARG(1, sizeof(cl_mem), &buf[0]);
+    OCL_SET_ARG(2, sizeof(cl_mem), &buf[1]);
+    globals[0] = n;
+    locals[0] = n;
+
+    // Run random tests
+    for (uint32_t pass = 0; pass < 8; ++pass) {
+        OCL_MAP_BUFFER(0);
+
+        /* Clear the dst buffer to avoid random data. */
+        OCL_MAP_BUFFER(1);
+        memset(buf_data[1], 0, sizeof(T) * n);
+        OCL_UNMAP_BUFFER(1);
+
+        for (int32_t i = 0; i < (int32_t) n; ++i) {
+            gen_rand_val(cpu_src[i]);
+        }
+
+        memcpy(buf_data[0], cpu_src, sizeof(T) * n);
+
+        // Run the kernel on GPU
+        OCL_NDRANGE(1);
+
+        // Run on CPU
+        for (int32_t i = 0; i < (int32_t) n; ++i)
+            cpu(i, edge, cpu_src, cpu_dst);
+
+        // Compare
+        OCL_MAP_BUFFER(1);
+
+        //dump_data(edge, cpu_src, cpu_dst, n);
+
+        OCL_ASSERT(!memcmp(buf_data[1], cpu_dst, sizeof(T) * n));
+        OCL_UNMAP_BUFFER(1);
+        OCL_UNMAP_BUFFER(0);
+    }
+}
+
+#define STEPF_TEST_TYPE(TYPE) \
+	static void compiler_stepf_##TYPE (void) \
+        { \
+           OCL_CALL (cl_kernel_init, "compiler_step.cl", "compiler_stepf_"#TYPE, SOURCE, NULL);  \
+           compiler_stepf_with_type<TYPE>(); \
+        } \
+	MAKE_UTEST_FROM_FUNCTION(compiler_stepf_##TYPE);
+
+STEPF_TEST_TYPE(float)
+STEPF_TEST_TYPE(float2)
+STEPF_TEST_TYPE(float3)
+STEPF_TEST_TYPE(float4)
+STEPF_TEST_TYPE(float8)
+STEPF_TEST_TYPE(float16)
-- 
1.7.9.5



More information about the Beignet mailing list