[Beignet] [PATCH 4/4] Utest: Add subgroup work item test cases

Xiuli Pan xiuli.pan at intel.com
Mon May 16 01:55:23 UTC 2016


From: Pan Xiuli <xiuli.pan at intel.com>

Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
 kernels/builtin_max_sub_group_size.cl |  7 ++++
 kernels/builtin_num_sub_groups.cl     |  7 ++++
 kernels/builtin_sub_group_id.cl       |  7 ++++
 kernels/builtin_sub_group_size.cl     |  7 ++++
 utests/CMakeLists.txt                 |  4 +++
 utests/builtin_max_sub_group_size.cpp | 60 ++++++++++++++++++++++++++++++++++
 utests/builtin_num_sub_groups.cpp     | 60 ++++++++++++++++++++++++++++++++++
 utests/builtin_sub_group_id.cpp       | 61 +++++++++++++++++++++++++++++++++++
 utests/builtin_sub_group_size.cpp     | 61 +++++++++++++++++++++++++++++++++++
 9 files changed, 274 insertions(+)
 create mode 100644 kernels/builtin_max_sub_group_size.cl
 create mode 100644 kernels/builtin_num_sub_groups.cl
 create mode 100644 kernels/builtin_sub_group_id.cl
 create mode 100644 kernels/builtin_sub_group_size.cl
 create mode 100644 utests/builtin_max_sub_group_size.cpp
 create mode 100644 utests/builtin_num_sub_groups.cpp
 create mode 100644 utests/builtin_sub_group_id.cpp
 create mode 100644 utests/builtin_sub_group_size.cpp

diff --git a/kernels/builtin_max_sub_group_size.cl b/kernels/builtin_max_sub_group_size.cl
new file mode 100644
index 0000000..c2f3b5e
--- /dev/null
+++ b/kernels/builtin_max_sub_group_size.cl
@@ -0,0 +1,7 @@
+__kernel void builtin_max_sub_group_size(global int *dst)
+{
+  int lid = get_local_linear_id();
+  int lsz = get_local_size(0) * get_local_size(1) * get_local_size(2);
+  int gid = lid + lsz*(get_num_groups(1) * get_num_groups(0) * get_group_id(2) + get_num_groups(0) * get_group_id(1) + get_group_id(0));
+  dst[gid] = get_max_sub_group_size();
+}
diff --git a/kernels/builtin_num_sub_groups.cl b/kernels/builtin_num_sub_groups.cl
new file mode 100644
index 0000000..08b5673
--- /dev/null
+++ b/kernels/builtin_num_sub_groups.cl
@@ -0,0 +1,7 @@
+__kernel void builtin_num_sub_groups(global int *dst)
+{
+  int lid = get_local_linear_id();
+  int lsz = get_local_size(0) * get_local_size(1) * get_local_size(2);
+  int gid = lid + lsz*(get_num_groups(1) * get_num_groups(0) * get_group_id(2) + get_num_groups(0) * get_group_id(1) + get_group_id(0));
+  dst[gid] = get_num_sub_groups();
+}
diff --git a/kernels/builtin_sub_group_id.cl b/kernels/builtin_sub_group_id.cl
new file mode 100644
index 0000000..accf3ad
--- /dev/null
+++ b/kernels/builtin_sub_group_id.cl
@@ -0,0 +1,7 @@
+__kernel void builtin_sub_group_id(global int *dst)
+{
+  int lid = get_local_linear_id();
+  int lsz = get_local_size(0) * get_local_size(1) * get_local_size(2);
+  int gid = lid + lsz*(get_num_groups(1) * get_num_groups(0) * get_group_id(2) + get_num_groups(0) * get_group_id(1) + get_group_id(0));
+  dst[gid] = get_sub_group_id();
+}
diff --git a/kernels/builtin_sub_group_size.cl b/kernels/builtin_sub_group_size.cl
new file mode 100644
index 0000000..1e034bb
--- /dev/null
+++ b/kernels/builtin_sub_group_size.cl
@@ -0,0 +1,7 @@
+__kernel void builtin_sub_group_size(global int *dst)
+{
+  int lid = get_local_linear_id();
+  int lsz = get_local_size(0) * get_local_size(1) * get_local_size(2);
+  int gid = lid + lsz*(get_num_groups(1) * get_num_groups(0) * get_group_id(2) + get_num_groups(0) * get_group_id(1) + get_group_id(0));
+  dst[gid] = get_sub_group_size();
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 26a2264..76b50a5 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -206,6 +206,10 @@ set (utests_sources
   builtin_global_id.cpp
   builtin_num_groups.cpp
   builtin_local_id.cpp
+  builtin_sub_group_size.cpp
+  builtin_max_sub_group_size.cpp
+  builtin_num_sub_groups.cpp
+  builtin_sub_group_id.cpp
   builtin_acos_asin.cpp
   builtin_pow.cpp
   builtin_exp.cpp
diff --git a/utests/builtin_max_sub_group_size.cpp b/utests/builtin_max_sub_group_size.cpp
new file mode 100644
index 0000000..bb1423b
--- /dev/null
+++ b/utests/builtin_max_sub_group_size.cpp
@@ -0,0 +1,60 @@
+/*
+According to the OpenCL cl_intel_subgroups.
+Now define local and global size as following:
+  globals[0] = 4;
+  globals[1] = 9;
+  globals[2] = 16;
+  locals[0] = 2;
+  locals[1] = 3;
+  locals[2] = 4;
+*/
+
+#define udebug 0
+#include "utest_helper.hpp"
+static void builtin_max_sub_group_size(void)
+{
+
+  // Setup kernel and buffers
+  size_t dim, i,local_sz = 1,buf_len = 1;
+  OCL_CREATE_KERNEL("builtin_max_sub_group_size");
+  size_t sub_sz;
+
+
+  OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int)*576, NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+  for( dim=1; dim <= 3; dim++ )
+  {
+    buf_len = 1;
+    local_sz = 1;
+    for(i=1; i <= dim; i++)
+    {
+      locals[i - 1] = i + 1;
+      globals[i - 1] = (i + 1) * (i + 1);
+      buf_len *= ((i + 1) * (i + 1));
+      local_sz *= i + 1;
+    }
+    for(i = dim+1; i <= 3; i++)
+    {
+      globals[i - 1] = 0;
+      locals[i - 1] = 0;
+    }
+
+    OCL_CALL(clGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*dim,locals,sizeof(size_t),&sub_sz,NULL);
+    // Run the kernel
+    OCL_NDRANGE( dim );
+    clFinish(queue);
+
+    OCL_MAP_BUFFER(0);
+
+    for( i = 0; i < buf_len; i++) {
+#if udebug
+      printf("got %d expect %d\n", ((uint32_t*)buf_data[0])[i], sub_sz);
+#endif
+      OCL_ASSERT( ((uint32_t*)buf_data[0])[i] == sub_sz);
+    }
+    OCL_UNMAP_BUFFER(0);
+  }
+}
+
+MAKE_UTEST_FROM_FUNCTION(builtin_max_sub_group_size);
diff --git a/utests/builtin_num_sub_groups.cpp b/utests/builtin_num_sub_groups.cpp
new file mode 100644
index 0000000..78acb13
--- /dev/null
+++ b/utests/builtin_num_sub_groups.cpp
@@ -0,0 +1,60 @@
+/*
+According to the OpenCL cl_intel_subgroups.
+Now define local and global size as following:
+  globals[0] = 4;
+  globals[1] = 9;
+  globals[2] = 16;
+  locals[0] = 2;
+  locals[1] = 3;
+  locals[2] = 4;
+*/
+
+#define udebug 0
+#include "utest_helper.hpp"
+static void builtin_num_sub_groups(void)
+{
+
+  // Setup kernel and buffers
+  size_t dim, i,local_sz = 1,buf_len = 1;
+  OCL_CREATE_KERNEL("builtin_num_sub_groups");
+  size_t num_sub;
+
+
+  OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int)*576, NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+  for( dim=1; dim <= 3; dim++ )
+  {
+    buf_len = 1;
+    local_sz = 1;
+    for(i=1; i <= dim; i++)
+    {
+      locals[i - 1] = i + 1;
+      globals[i - 1] = (i + 1) * (i + 1);
+      buf_len *= ((i + 1) * (i + 1));
+      local_sz *= i + 1;
+    }
+    for(i = dim+1; i <= 3; i++)
+    {
+      globals[i - 1] = 0;
+      locals[i - 1] = 0;
+    }
+
+    OCL_CALL(clGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR ,sizeof(size_t)*dim,locals,sizeof(size_t),&num_sub,NULL);
+    // Run the kernel
+    OCL_NDRANGE( dim );
+    clFinish(queue);
+
+    OCL_MAP_BUFFER(0);
+
+    for( i = 0; i < buf_len; i++) {
+#if udebug
+      printf("%zu get %d, expect %zu\n",i, ((uint32_t*)buf_data[0])[i], num_sub);
+#endif
+      OCL_ASSERT( ((uint32_t*)buf_data[0])[i] == num_sub);
+    }
+    OCL_UNMAP_BUFFER(0);
+  }
+}
+
+MAKE_UTEST_FROM_FUNCTION(builtin_num_sub_groups);
diff --git a/utests/builtin_sub_group_id.cpp b/utests/builtin_sub_group_id.cpp
new file mode 100644
index 0000000..e81d173
--- /dev/null
+++ b/utests/builtin_sub_group_id.cpp
@@ -0,0 +1,61 @@
+/*
+According to the OpenCL cl_intel_subgroups.
+Now define local and global size as following:
+  globals[0] = 4;
+  globals[1] = 9;
+  globals[2] = 16;
+  locals[0] = 2;
+  locals[1] = 3;
+  locals[2] = 4;
+*/
+
+#define udebug 0
+#include "utest_helper.hpp"
+static void builtin_sub_group_id(void)
+{
+
+  // Setup kernel and buffers
+  size_t dim, i,local_sz = 1,buf_len = 1;
+  OCL_CREATE_KERNEL("builtin_sub_group_id");
+  size_t max_sub_sz;
+
+
+  OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int)*576, NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+  for( dim=1; dim <= 3; dim++ )
+  {
+    buf_len = 1;
+    local_sz = 1;
+    for(i=1; i <= dim; i++)
+    {
+      locals[i - 1] = i + 1;
+      globals[i - 1] = (i + 1) * (i + 1);
+      buf_len *= ((i + 1) * (i + 1));
+      local_sz *= i + 1;
+    }
+    for(i = dim+1; i <= 3; i++)
+    {
+      globals[i - 1] = 0;
+      locals[i - 1] = 0;
+    }
+
+    OCL_CALL(clGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*dim,locals,sizeof(size_t),&max_sub_sz,NULL);
+    // Run the kernel
+    OCL_NDRANGE( dim );
+    clFinish(queue);
+
+    OCL_MAP_BUFFER(0);
+
+    for( i = 0; i < buf_len; i++) {
+      size_t expect_id = (i % local_sz) / max_sub_sz;
+#if udebug
+      printf("%zu get %d, expect %zu\n",i, ((uint32_t*)buf_data[0])[i], expect_id);
+#endif
+      OCL_ASSERT( ((uint32_t*)buf_data[0])[i] == expect_id);
+    }
+    OCL_UNMAP_BUFFER(0);
+  }
+}
+
+MAKE_UTEST_FROM_FUNCTION(builtin_sub_group_id);
diff --git a/utests/builtin_sub_group_size.cpp b/utests/builtin_sub_group_size.cpp
new file mode 100644
index 0000000..1dc24ed
--- /dev/null
+++ b/utests/builtin_sub_group_size.cpp
@@ -0,0 +1,61 @@
+/*
+According to the OpenCL cl_intel_subgroups.
+Now define local and global size as following:
+  globals[0] = 4;
+  globals[1] = 9;
+  globals[2] = 16;
+  locals[0] = 2;
+  locals[1] = 3;
+  locals[2] = 4;
+*/
+
+#define udebug 0
+#include "utest_helper.hpp"
+static void builtin_sub_group_size(void)
+{
+
+  // Setup kernel and buffers
+  size_t dim, i,local_sz = 1,buf_len = 1;
+  OCL_CREATE_KERNEL("builtin_sub_group_size");
+  size_t max_sub_sz;
+
+
+  OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int)*576, NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+  for( dim=1; dim <= 3; dim++ )
+  {
+    buf_len = 1;
+    local_sz = 1;
+    for(i=1; i <= dim; i++)
+    {
+      locals[i - 1] = i + 1;
+      globals[i - 1] = (i + 1) * (i + 1);
+      buf_len *= ((i + 1) * (i + 1));
+      local_sz *= i + 1;
+    }
+    for(i = dim+1; i <= 3; i++)
+    {
+      globals[i - 1] = 0;
+      locals[i - 1] = 0;
+    }
+
+    OCL_CALL(clGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*dim,locals,sizeof(size_t),&max_sub_sz,NULL);
+    // Run the kernel
+    OCL_NDRANGE( dim );
+    clFinish(queue);
+
+    OCL_MAP_BUFFER(0);
+
+    for( i = 0; i < buf_len; i++) {
+      size_t expect_sz = (i % local_sz) < (local_sz / max_sub_sz * max_sub_sz) ? max_sub_sz : (local_sz % max_sub_sz);
+#if udebug
+      printf("%zu get %d, expect %zu\n",i, ((uint32_t*)buf_data[0])[i], expect_sz);
+#endif
+      OCL_ASSERT( ((uint32_t*)buf_data[0])[i] == expect_sz);
+    }
+    OCL_UNMAP_BUFFER(0);
+  }
+}
+
+MAKE_UTEST_FROM_FUNCTION(builtin_sub_group_size);
-- 
2.7.4



More information about the Beignet mailing list