[Beignet] [PATCH 2/3] add clz(count leading zero) utest.
xionghu.luo at intel.com
xionghu.luo at intel.com
Wed Jan 14 21:22:08 PST 2015
From: Luo Xionghu <xionghu.luo at intel.com>
this kernl calls the llvm __builtin_clz to generate llvm.clz function
then call the gen instruction clz, different from the test
compiler_clz_int, which use the fbh to implement.
Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
kernels/compiler_clz.cl | 12 +++++++++
utests/CMakeLists.txt | 1 +
utests/compiler_clz.cpp | 67 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 80 insertions(+)
create mode 100644 kernels/compiler_clz.cl
create mode 100644 utests/compiler_clz.cpp
diff --git a/kernels/compiler_clz.cl b/kernels/compiler_clz.cl
new file mode 100644
index 0000000..7ab6261
--- /dev/null
+++ b/kernels/compiler_clz.cl
@@ -0,0 +1,12 @@
+#define COMPILER_CLZ(TYPE) \
+ kernel void compiler_clz_##TYPE(global TYPE* src, global TYPE* dst) \
+{ \
+ __global TYPE* A = &src[get_global_id(0)]; \
+ __global TYPE* B = &dst[get_global_id(0)]; \
+ *B = __builtin_clz(*A); \
+}
+
+COMPILER_CLZ(uint)
+COMPILER_CLZ(ulong)
+COMPILER_CLZ(ushort)
+COMPILER_CLZ(uchar)
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 5b29c0b..193fef3 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -105,6 +105,7 @@ set (utests_sources
compiler_write_only_shorts.cpp
compiler_switch.cpp
compiler_bswap.cpp
+ compiler_clz.cpp
compiler_math.cpp
compiler_atomic_functions.cpp
compiler_async_copy.cpp
diff --git a/utests/compiler_clz.cpp b/utests/compiler_clz.cpp
new file mode 100644
index 0000000..901e19b
--- /dev/null
+++ b/utests/compiler_clz.cpp
@@ -0,0 +1,67 @@
+#include "utest_helper.hpp"
+
+namespace {
+
+template <typename U>
+U get_max()
+{
+ int shift_bit = sizeof(U)*8;
+ U u_max = 0;
+ for (int i = 0; i < shift_bit; i++)
+ u_max |= 1<<(shift_bit-i-1);
+ return u_max;
+}
+
+template<typename U>
+void test(const char *kernel_name)
+{
+ const size_t n = 64;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL_FROM_FILE("compiler_clz", kernel_name);
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(U), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(U), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+
+ U max = get_max<U>();
+
+ OCL_MAP_BUFFER(0);
+ for (uint32_t i = 0; i < n; ++i) {
+ ((U*)buf_data[0])[i] = max >> i;
+ }
+ OCL_UNMAP_BUFFER(0);
+
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < n; ++i) {
+ if(sizeof(U) == 1 && i < 8 )
+ OCL_ASSERT(((U*)buf_data[1])[i] == (i+24) );
+ else if(sizeof(U) == 2 && i < 16 )
+ OCL_ASSERT(((U*)buf_data[1])[i] == (i+16) );
+ else if(sizeof(U) == 4 && i < 32 )
+ OCL_ASSERT(((U*)buf_data[1])[i] == i );
+ else if(sizeof(U) == 8 && i < 32 )
+ OCL_ASSERT(((U*)buf_data[1])[i] == 0 );
+ else if(sizeof(U) == 8 && i > 31)
+ OCL_ASSERT(((U*)buf_data[1])[i] == (i-32) );
+ }
+ OCL_UNMAP_BUFFER(1);
+
+}
+
+}
+
+#define compiler_clz(type, kernel) \
+static void compiler_clz_ ##type(void)\
+{\
+ test<type>(# kernel);\
+}\
+MAKE_UTEST_FROM_FUNCTION(compiler_clz_ ## type);
+
+compiler_clz(uint64_t, compiler_clz_ulong)
+compiler_clz(uint32_t, compiler_clz_uint)
+compiler_clz(uint16_t, compiler_clz_ushort)
+compiler_clz(uint8_t, compiler_clz_uchar)
--
1.9.1
More information about the Beignet
mailing list