[Beignet] [PATCH 13/15] Utests: Add test cases for workgroup reduce max/min.

Pan Xiuli xiuli.pan at intel.com
Wed Jan 20 22:51:53 PST 2016


From: Junyan He <junyan.he at linux.intel.com>

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
Reviewed-by: Yang Rong <rong.r.yang at intel.com>
---
 kernels/compiler_workgroup_reduce.cl |  28 ++++++
 utests/CMakeLists.txt                |   1 +
 utests/compiler_workgroup_reduce.cpp | 174 +++++++++++++++++++++++++++++++++++
 3 files changed, 203 insertions(+)
 create mode 100644 kernels/compiler_workgroup_reduce.cl
 create mode 100644 utests/compiler_workgroup_reduce.cpp

diff --git a/kernels/compiler_workgroup_reduce.cl b/kernels/compiler_workgroup_reduce.cl
new file mode 100644
index 0000000..27d306b
--- /dev/null
+++ b/kernels/compiler_workgroup_reduce.cl
@@ -0,0 +1,28 @@
+kernel void compiler_workgroup_reduce_min_uniform(uint src, global uint *dst) {
+   uint min_val = work_group_reduce_min(src);
+   dst[get_local_id(0)] = min_val;
+}
+
+kernel void compiler_workgroup_reduce_min_uint(global uint *src, global uint *dst) {
+   uint val = src[get_local_id(0)];
+   uint min_val = work_group_reduce_min(val);
+   dst[get_local_id(0)] = min_val;
+}
+
+kernel void compiler_workgroup_reduce_max_uint(global uint *src, global uint *dst) {
+   uint val = src[get_local_id(0)];
+   uint max_val = work_group_reduce_max(val);
+   dst[get_local_id(0)] = max_val;
+}
+
+kernel void compiler_workgroup_reduce_min_float(global float *src, global float *dst) {
+   float val = src[get_local_id(0)];
+   float min_val = work_group_reduce_min(val);
+   dst[get_local_id(0)] = min_val;
+}
+
+kernel void compiler_workgroup_reduce_max_float(global float *src, global float *dst) {
+   float val = src[get_local_id(0)];
+   float max_val = work_group_reduce_max(val);
+   dst[get_local_id(0)] = max_val;
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 78442cb..fac303c 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -121,6 +121,7 @@ set (utests_sources
   compiler_atomic_functions.cpp
   compiler_async_copy.cpp
   compiler_workgroup_broadcast.cpp
+  compiler_workgroup_reduce.cpp
   compiler_async_stride_copy.cpp
   compiler_insn_selection_min.cpp
   compiler_insn_selection_max.cpp
diff --git a/utests/compiler_workgroup_reduce.cpp b/utests/compiler_workgroup_reduce.cpp
new file mode 100644
index 0000000..6340cb2
--- /dev/null
+++ b/utests/compiler_workgroup_reduce.cpp
@@ -0,0 +1,174 @@
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include "utest_helper.hpp"
+
+void compiler_workgroup_reduce_min_uniform(void)
+{
+  const size_t n = 17;
+  uint32_t src = 253;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce", "compiler_workgroup_reduce_min_uniform");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+  OCL_SET_ARG(0, sizeof(uint32_t), &src);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[0]);
+  globals[0] = n;
+  locals[0] = n;
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(0);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    //printf("%u ", ((uint32_t *)buf_data[0])[i]);
+    OCL_ASSERT(((uint32_t *)buf_data[0])[i] == 253);
+  }
+  OCL_UNMAP_BUFFER(0);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_reduce_min_uniform);
+
+static uint32_t test_array_uint[64] = {23, 34, 16, 91, 25, 133, 7787, 134, 987, 9853, 33, 21, 865, 1441, 9083, 812,
+                                  10, 43435, 63, 445, 253, 65, 24, 30, 76, 989, 120 ,113 ,133, 41, 18, 91,
+                                  8321, 6712, 881, 911, 5, 788, 8991, 88, 19, 1110, 1231, 1341, 1983, 1983, 91, 212,
+                                  712, 31, 881, 963, 6801, 651, 9810, 77, 98, 5, 16, 1888, 141, 1613, 1771, 16};
+
+void compiler_workgroup_reduce_min_uint(void)
+{
+  const size_t n = 60;
+  uint32_t* src = test_array_uint;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce", "compiler_workgroup_reduce_min_uint");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = n;
+  locals[0] = n;
+
+  OCL_MAP_BUFFER(0);
+  memcpy(buf_data[0], src, n * sizeof(uint32_t));
+  OCL_UNMAP_BUFFER(0);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(1);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    //printf("%u ", ((uint32_t *)buf_data[1])[i]);
+    OCL_ASSERT(((uint32_t *)buf_data[1])[i] == 5);
+  }
+  OCL_UNMAP_BUFFER(1);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_reduce_min_uint);
+
+void compiler_workgroup_reduce_max_uint(void)
+{
+  const size_t n = 60;
+  uint32_t* src = test_array_uint;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce", "compiler_workgroup_reduce_max_uint");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = n;
+  locals[0] = n;
+
+  OCL_MAP_BUFFER(0);
+  memcpy(buf_data[0], src, n * sizeof(uint32_t));
+  OCL_UNMAP_BUFFER(0);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(1);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    //printf("%u ", ((uint32_t *)buf_data[1])[i]);
+    OCL_ASSERT(((uint32_t *)buf_data[1])[i] == 43435);
+  }
+  OCL_UNMAP_BUFFER(1);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_reduce_max_uint);
+
+static float test_array_float[64] =
+  {1.0234f, 0.34e32f, -13441.4334f, 1893.21f, -9999.0f, -88.00f, 1.3f, 1.0f,
+   2.33f, 134.44f, 263.0f, 1.0f, 0.0f, 344.900043f, 0.1e30f, 1.0e10f,
+
+   10.0f, 43.435f, 6.3f, 44.545f, 0.253f, 6.5f, 0.24f, 10.30f,
+   1312.76f, -0.00989f, 124213.120f, 1.13f, 1.33f, 4.1f, 1.8f, 3234.91f,
+
+   3.21e38f, 6.712f, 0.881f, 12.91f, 5.0f, 7.88f, 128991.0f, 8.8f,
+   0.0019f, -0.1110f, 12.0e31f, -3.3E38f, 1.983f, 1.983f, 10091.0f, 2.12f,
+
+   0.88712, 1e31f, -881.0f, -196e3f, 68.01f, -651.121f, 9.810f, -0.77f,
+   100.98f, 50.0f, 1000.16f, -18e18f, 0.141f, 1613.0f, 1.771f, -16.13f};
+
+void compiler_workgroup_reduce_min_float(void)
+{
+  const size_t n = 60;
+  float* src = test_array_float;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce", "compiler_workgroup_reduce_min_float");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = n;
+  locals[0] = n;
+
+  OCL_MAP_BUFFER(0);
+  memcpy(buf_data[0], src, n * sizeof(float));
+  OCL_UNMAP_BUFFER(0);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(1);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    //printf("%f ", ((float *)buf_data[1])[i]);
+    OCL_ASSERT(((float *)buf_data[1])[i] == -3.3E38f);
+  }
+  OCL_UNMAP_BUFFER(1);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_reduce_min_float);
+
+void compiler_workgroup_reduce_max_float(void)
+{
+  const size_t n = 60;
+  float* src = test_array_float;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce", "compiler_workgroup_reduce_max_float");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = n;
+  locals[0] = n;
+
+  OCL_MAP_BUFFER(0);
+  memcpy(buf_data[0], src, n * sizeof(float));
+  OCL_UNMAP_BUFFER(0);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(1);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    //printf("%f ", ((float *)buf_data[1])[i]);
+    OCL_ASSERT(((float *)buf_data[1])[i] == 3.21e38f);
+  }
+  OCL_UNMAP_BUFFER(1);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_reduce_max_float);
+
-- 
2.5.0



More information about the Beignet mailing list