[Beignet] [PATCH 4/4] Utest: Add test case for sub_group_shuffle_down/up/xor
Xiuli Pan
xiuli.pan at intel.com
Fri Jun 3 02:43:49 UTC 2016
From: Pan Xiuli <xiuli.pan at intel.com>
Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
kernels/compiler_sub_group_shuffle_down.cl | 19 ++++++++++++
kernels/compiler_sub_group_shuffle_up.cl | 19 ++++++++++++
kernels/compiler_sub_group_shuffle_xor.cl | 19 ++++++++++++
utests/CMakeLists.txt | 3 ++
utests/compiler_sub_group_shuffle_down.cpp | 46 ++++++++++++++++++++++++++++++
utests/compiler_sub_group_shuffle_up.cpp | 46 ++++++++++++++++++++++++++++++
utests/compiler_sub_group_shuffle_xor.cpp | 46 ++++++++++++++++++++++++++++++
7 files changed, 198 insertions(+)
create mode 100644 kernels/compiler_sub_group_shuffle_down.cl
create mode 100644 kernels/compiler_sub_group_shuffle_up.cl
create mode 100644 kernels/compiler_sub_group_shuffle_xor.cl
create mode 100644 utests/compiler_sub_group_shuffle_down.cpp
create mode 100644 utests/compiler_sub_group_shuffle_up.cpp
create mode 100644 utests/compiler_sub_group_shuffle_xor.cpp
diff --git a/kernels/compiler_sub_group_shuffle_down.cl b/kernels/compiler_sub_group_shuffle_down.cl
new file mode 100644
index 0000000..769fc3f
--- /dev/null
+++ b/kernels/compiler_sub_group_shuffle_down.cl
@@ -0,0 +1,19 @@
+__kernel void compiler_sub_group_shuffle_down(global int *dst, int c)
+{
+ int i = get_global_id(0);
+ if (i == 0)
+ dst[0] = get_max_sub_group_size();
+ dst++;
+
+ int from = i;
+ int j = get_max_sub_group_size() - get_sub_group_local_id() - 1;
+ int k = get_sub_group_local_id() + 1;
+ int o0 = intel_sub_group_shuffle_down(123, 456, c);
+ int o1 = intel_sub_group_shuffle_down(123, from, c);
+ int o2 = intel_sub_group_shuffle_down(from, -from, k);
+ int o3 = intel_sub_group_shuffle_down(from, 321, j);
+ dst[i*4] = o0;
+ dst[i*4+1] = o1;
+ dst[i*4+2] = o2;
+ dst[i*4+3] = o3;
+}
diff --git a/kernels/compiler_sub_group_shuffle_up.cl b/kernels/compiler_sub_group_shuffle_up.cl
new file mode 100644
index 0000000..5c5cee1
--- /dev/null
+++ b/kernels/compiler_sub_group_shuffle_up.cl
@@ -0,0 +1,19 @@
+__kernel void compiler_sub_group_shuffle_up(global int *dst, int c)
+{
+ int i = get_global_id(0);
+ if (i == 0)
+ dst[0] = get_max_sub_group_size();
+ dst++;
+
+ int from = i;
+ int j = get_sub_group_local_id() + 1;
+ int k = get_max_sub_group_size() - get_sub_group_local_id() - 1;
+ int o0 = intel_sub_group_shuffle_up(123, 456, c);
+ int o1 = intel_sub_group_shuffle_up(123, from, c);
+ int o2 = intel_sub_group_shuffle_up(from, -from, k);
+ int o3 = intel_sub_group_shuffle_up(from, 321, j);
+ dst[i*4] = o0;
+ dst[i*4+1] = o1;
+ dst[i*4+2] = o2;
+ dst[i*4+3] = o3;
+}
diff --git a/kernels/compiler_sub_group_shuffle_xor.cl b/kernels/compiler_sub_group_shuffle_xor.cl
new file mode 100644
index 0000000..8bc15d3
--- /dev/null
+++ b/kernels/compiler_sub_group_shuffle_xor.cl
@@ -0,0 +1,19 @@
+__kernel void compiler_sub_group_shuffle_xor(global int *dst, int c)
+{
+ int i = get_global_id(0);
+ if (i == 0)
+ dst[0] = get_max_sub_group_size();
+ dst++;
+
+ int from = i;
+ int j = get_max_sub_group_size() - get_sub_group_local_id() - 1;
+ int k = get_sub_group_local_id() + 1;
+ int o0 = get_sub_group_local_id();
+ int o1 = intel_sub_group_shuffle_xor(from, c);
+ int o2 = intel_sub_group_shuffle_xor(from, j);
+ int o3 = intel_sub_group_shuffle_xor(from, k);
+ dst[i*4] = o0;
+ dst[i*4+1] = o1;
+ dst[i*4+2] = o2;
+ dst[i*4+3] = o3;
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index f56c497..a711bd6 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -277,6 +277,9 @@ set (utests_sources
compiler_get_max_sub_group_size.cpp
compiler_get_sub_group_local_id.cpp
compiler_sub_group_shuffle.cpp
+ compiler_sub_group_shuffle_down.cpp
+ compiler_sub_group_shuffle_up.cpp
+ compiler_sub_group_shuffle_xor.cpp
builtin_global_linear_id.cpp
builtin_local_linear_id.cpp
compiler_mix.cpp
diff --git a/utests/compiler_sub_group_shuffle_down.cpp b/utests/compiler_sub_group_shuffle_down.cpp
new file mode 100644
index 0000000..27bfcac
--- /dev/null
+++ b/utests/compiler_sub_group_shuffle_down.cpp
@@ -0,0 +1,46 @@
+#include "utest_helper.hpp"
+
+void compiler_sub_group_shuffle_down(void)
+{
+ const size_t n = 32;
+ const int32_t buf_size = 4 * n + 1;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_sub_group_shuffle_down");
+ OCL_CREATE_BUFFER(buf[0], 0, buf_size * sizeof(int), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+ int c = 13;
+ OCL_SET_ARG(1, sizeof(int), &c);
+
+ globals[0] = n;
+ locals[0] = 16;
+
+ OCL_MAP_BUFFER(0);
+ for (int32_t i = 0; i < buf_size; ++i)
+ ((int*)buf_data[0])[i] = -1;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Compare
+ OCL_MAP_BUFFER(0);
+ int* dst = (int *)buf_data[0];
+ int suggroupsize = dst[0];
+ OCL_ASSERT(suggroupsize == 8 || suggroupsize == 16);
+
+ dst++;
+ for (int32_t i = 0; i < (int32_t) n; ++i){
+ int round = i / suggroupsize;
+ int index = i % suggroupsize;
+ //printf("%d %d %d %d\n",dst[4*i], dst[4*i+1], dst[4*i+2], dst[4*i+3]);
+ OCL_ASSERT( (index + c >= suggroupsize ? 456 : 123) == dst[4*i]);
+ OCL_ASSERT( (index + c >= suggroupsize ? (round * suggroupsize + (i + c) % suggroupsize): 123) == dst[4*i+1]);
+ OCL_ASSERT( (index + index + 1 >= suggroupsize ? -(round * suggroupsize + (i + index + 1) % suggroupsize) : (round * suggroupsize + (i + index + 1) % suggroupsize)) == dst[4*i+2]);
+ OCL_ASSERT((round * suggroupsize + (suggroupsize - 1)) == dst[4*i+3]);
+ }
+ OCL_UNMAP_BUFFER(0);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_sub_group_shuffle_down);
diff --git a/utests/compiler_sub_group_shuffle_up.cpp b/utests/compiler_sub_group_shuffle_up.cpp
new file mode 100644
index 0000000..a3fcadb
--- /dev/null
+++ b/utests/compiler_sub_group_shuffle_up.cpp
@@ -0,0 +1,46 @@
+#include "utest_helper.hpp"
+
+void compiler_sub_group_shuffle_up(void)
+{
+ const size_t n = 32;
+ const int32_t buf_size = 4 * n + 1;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_sub_group_shuffle_up");
+ OCL_CREATE_BUFFER(buf[0], 0, buf_size * sizeof(int), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+ int c = 13;
+ OCL_SET_ARG(1, sizeof(int), &c);
+
+ globals[0] = n;
+ locals[0] = 16;
+
+ OCL_MAP_BUFFER(0);
+ for (int32_t i = 0; i < buf_size; ++i)
+ ((int*)buf_data[0])[i] = -1;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Compare
+ OCL_MAP_BUFFER(0);
+ int* dst = (int *)buf_data[0];
+ int suggroupsize = dst[0];
+ OCL_ASSERT(suggroupsize == 8 || suggroupsize == 16);
+
+ dst++;
+ for (int32_t i = 0; i < (int32_t) n; ++i){
+ int round = i / suggroupsize;
+ int index = i % suggroupsize;
+ //printf("%d %d %d %d\n",dst[4*i], dst[4*i+1], dst[4*i+2], dst[4*i+3]);
+ OCL_ASSERT( ((c - index) > 0 ? 123 : 456) == dst[4*i]);
+ OCL_ASSERT( ((c - index) > 0 ? 123 : (i - c)) == dst[4*i+1]);
+ OCL_ASSERT( ((suggroupsize - index - 1 - index) > 0 ? (i + index + 1) : -(i + index + 1 - suggroupsize)) == dst[4*i+2]);
+ OCL_ASSERT((round * suggroupsize + (suggroupsize - 1)) == dst[4*i+3]);
+ }
+ OCL_UNMAP_BUFFER(0);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_sub_group_shuffle_up);
diff --git a/utests/compiler_sub_group_shuffle_xor.cpp b/utests/compiler_sub_group_shuffle_xor.cpp
new file mode 100644
index 0000000..3a3c16a
--- /dev/null
+++ b/utests/compiler_sub_group_shuffle_xor.cpp
@@ -0,0 +1,46 @@
+#include "utest_helper.hpp"
+
+void compiler_sub_group_shuffle_xor(void)
+{
+ const size_t n = 32;
+ const int32_t buf_size = 4 * n + 1;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_sub_group_shuffle_xor");
+ OCL_CREATE_BUFFER(buf[0], 0, buf_size * sizeof(int), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+ int c = 3;
+ OCL_SET_ARG(1, sizeof(int), &c);
+
+ globals[0] = n;
+ locals[0] = 16;
+
+ OCL_MAP_BUFFER(0);
+ for (int32_t i = 0; i < buf_size; ++i)
+ ((int*)buf_data[0])[i] = -1;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Compare
+ OCL_MAP_BUFFER(0);
+ int* dst = (int *)buf_data[0];
+ int suggroupsize = dst[0];
+ OCL_ASSERT(suggroupsize == 8 || suggroupsize == 16);
+
+ dst++;
+ for (int32_t i = 0; i < (int32_t) n; ++i){
+ int round = i / suggroupsize;
+ int index = i % suggroupsize;
+ OCL_ASSERT(index == dst[4*i]);
+ //printf("%d %d %d %d\n", i, dst[4*i+1], dst[4*i+2], dst[4*i+3]);
+ OCL_ASSERT((round * suggroupsize + (c ^ index)) == dst[4*i+1]);
+ OCL_ASSERT((round * suggroupsize + (index ^ (suggroupsize - index -1))) == dst[4*i+2]);
+ OCL_ASSERT((round * suggroupsize + (index ^ (index + 1) % suggroupsize)) == dst[4*i+3]);
+ }
+ OCL_UNMAP_BUFFER(0);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_sub_group_shuffle_xor);
--
2.7.4
More information about the Beignet
mailing list