[Beignet] [PATCH 04/12] Utest: Add subgroup work item test cases
Xiuli Pan
xiuli.pan at intel.com
Thu May 26 03:14:19 UTC 2016
From: Pan Xiuli <xiuli.pan at intel.com>
Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
kernels/builtin_max_sub_group_size.cl | 7 ++++
kernels/builtin_num_sub_groups.cl | 7 ++++
kernels/builtin_sub_group_id.cl | 7 ++++
kernels/builtin_sub_group_size.cl | 7 ++++
utests/CMakeLists.txt | 4 +++
utests/builtin_max_sub_group_size.cpp | 60 ++++++++++++++++++++++++++++++++++
utests/builtin_num_sub_groups.cpp | 60 ++++++++++++++++++++++++++++++++++
utests/builtin_sub_group_id.cpp | 61 +++++++++++++++++++++++++++++++++++
utests/builtin_sub_group_size.cpp | 61 +++++++++++++++++++++++++++++++++++
9 files changed, 274 insertions(+)
create mode 100644 kernels/builtin_max_sub_group_size.cl
create mode 100644 kernels/builtin_num_sub_groups.cl
create mode 100644 kernels/builtin_sub_group_id.cl
create mode 100644 kernels/builtin_sub_group_size.cl
create mode 100644 utests/builtin_max_sub_group_size.cpp
create mode 100644 utests/builtin_num_sub_groups.cpp
create mode 100644 utests/builtin_sub_group_id.cpp
create mode 100644 utests/builtin_sub_group_size.cpp
diff --git a/kernels/builtin_max_sub_group_size.cl b/kernels/builtin_max_sub_group_size.cl
new file mode 100644
index 0000000..c2f3b5e
--- /dev/null
+++ b/kernels/builtin_max_sub_group_size.cl
@@ -0,0 +1,7 @@
+__kernel void builtin_max_sub_group_size(global int *dst)
+{
+ int lid = get_local_linear_id();
+ int lsz = get_local_size(0) * get_local_size(1) * get_local_size(2);
+ int gid = lid + lsz*(get_num_groups(1) * get_num_groups(0) * get_group_id(2) + get_num_groups(0) * get_group_id(1) + get_group_id(0));
+ dst[gid] = get_max_sub_group_size();
+}
diff --git a/kernels/builtin_num_sub_groups.cl b/kernels/builtin_num_sub_groups.cl
new file mode 100644
index 0000000..08b5673
--- /dev/null
+++ b/kernels/builtin_num_sub_groups.cl
@@ -0,0 +1,7 @@
+__kernel void builtin_num_sub_groups(global int *dst)
+{
+ int lid = get_local_linear_id();
+ int lsz = get_local_size(0) * get_local_size(1) * get_local_size(2);
+ int gid = lid + lsz*(get_num_groups(1) * get_num_groups(0) * get_group_id(2) + get_num_groups(0) * get_group_id(1) + get_group_id(0));
+ dst[gid] = get_num_sub_groups();
+}
diff --git a/kernels/builtin_sub_group_id.cl b/kernels/builtin_sub_group_id.cl
new file mode 100644
index 0000000..accf3ad
--- /dev/null
+++ b/kernels/builtin_sub_group_id.cl
@@ -0,0 +1,7 @@
+__kernel void builtin_sub_group_id(global int *dst)
+{
+ int lid = get_local_linear_id();
+ int lsz = get_local_size(0) * get_local_size(1) * get_local_size(2);
+ int gid = lid + lsz*(get_num_groups(1) * get_num_groups(0) * get_group_id(2) + get_num_groups(0) * get_group_id(1) + get_group_id(0));
+ dst[gid] = get_sub_group_id();
+}
diff --git a/kernels/builtin_sub_group_size.cl b/kernels/builtin_sub_group_size.cl
new file mode 100644
index 0000000..1e034bb
--- /dev/null
+++ b/kernels/builtin_sub_group_size.cl
@@ -0,0 +1,7 @@
+__kernel void builtin_sub_group_size(global int *dst)
+{
+ int lid = get_local_linear_id();
+ int lsz = get_local_size(0) * get_local_size(1) * get_local_size(2);
+ int gid = lid + lsz*(get_num_groups(1) * get_num_groups(0) * get_group_id(2) + get_num_groups(0) * get_group_id(1) + get_group_id(0));
+ dst[gid] = get_sub_group_size();
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 26a2264..76b50a5 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -206,6 +206,10 @@ set (utests_sources
builtin_global_id.cpp
builtin_num_groups.cpp
builtin_local_id.cpp
+ builtin_sub_group_size.cpp
+ builtin_max_sub_group_size.cpp
+ builtin_num_sub_groups.cpp
+ builtin_sub_group_id.cpp
builtin_acos_asin.cpp
builtin_pow.cpp
builtin_exp.cpp
diff --git a/utests/builtin_max_sub_group_size.cpp b/utests/builtin_max_sub_group_size.cpp
new file mode 100644
index 0000000..bb1423b
--- /dev/null
+++ b/utests/builtin_max_sub_group_size.cpp
@@ -0,0 +1,60 @@
+/*
+According to the OpenCL cl_intel_subgroups.
+Now define local and global size as following:
+ globals[0] = 4;
+ globals[1] = 9;
+ globals[2] = 16;
+ locals[0] = 2;
+ locals[1] = 3;
+ locals[2] = 4;
+*/
+
+#define udebug 0
+#include "utest_helper.hpp"
+static void builtin_max_sub_group_size(void)
+{
+
+ // Setup kernel and buffers
+ size_t dim, i,local_sz = 1,buf_len = 1;
+ OCL_CREATE_KERNEL("builtin_max_sub_group_size");
+ size_t sub_sz;
+
+
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int)*576, NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+ for( dim=1; dim <= 3; dim++ )
+ {
+ buf_len = 1;
+ local_sz = 1;
+ for(i=1; i <= dim; i++)
+ {
+ locals[i - 1] = i + 1;
+ globals[i - 1] = (i + 1) * (i + 1);
+ buf_len *= ((i + 1) * (i + 1));
+ local_sz *= i + 1;
+ }
+ for(i = dim+1; i <= 3; i++)
+ {
+ globals[i - 1] = 0;
+ locals[i - 1] = 0;
+ }
+
+ OCL_CALL(clGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*dim,locals,sizeof(size_t),&sub_sz,NULL);
+ // Run the kernel
+ OCL_NDRANGE( dim );
+ clFinish(queue);
+
+ OCL_MAP_BUFFER(0);
+
+ for( i = 0; i < buf_len; i++) {
+#if udebug
+ printf("got %d expect %d\n", ((uint32_t*)buf_data[0])[i], sub_sz);
+#endif
+ OCL_ASSERT( ((uint32_t*)buf_data[0])[i] == sub_sz);
+ }
+ OCL_UNMAP_BUFFER(0);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(builtin_max_sub_group_size);
diff --git a/utests/builtin_num_sub_groups.cpp b/utests/builtin_num_sub_groups.cpp
new file mode 100644
index 0000000..78acb13
--- /dev/null
+++ b/utests/builtin_num_sub_groups.cpp
@@ -0,0 +1,60 @@
+/*
+According to the OpenCL cl_intel_subgroups.
+Now define local and global size as following:
+ globals[0] = 4;
+ globals[1] = 9;
+ globals[2] = 16;
+ locals[0] = 2;
+ locals[1] = 3;
+ locals[2] = 4;
+*/
+
+#define udebug 0
+#include "utest_helper.hpp"
+static void builtin_num_sub_groups(void)
+{
+
+ // Setup kernel and buffers
+ size_t dim, i,local_sz = 1,buf_len = 1;
+ OCL_CREATE_KERNEL("builtin_num_sub_groups");
+ size_t num_sub;
+
+
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int)*576, NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+ for( dim=1; dim <= 3; dim++ )
+ {
+ buf_len = 1;
+ local_sz = 1;
+ for(i=1; i <= dim; i++)
+ {
+ locals[i - 1] = i + 1;
+ globals[i - 1] = (i + 1) * (i + 1);
+ buf_len *= ((i + 1) * (i + 1));
+ local_sz *= i + 1;
+ }
+ for(i = dim+1; i <= 3; i++)
+ {
+ globals[i - 1] = 0;
+ locals[i - 1] = 0;
+ }
+
+ OCL_CALL(clGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR ,sizeof(size_t)*dim,locals,sizeof(size_t),&num_sub,NULL);
+ // Run the kernel
+ OCL_NDRANGE( dim );
+ clFinish(queue);
+
+ OCL_MAP_BUFFER(0);
+
+ for( i = 0; i < buf_len; i++) {
+#if udebug
+ printf("%zu get %d, expect %zu\n",i, ((uint32_t*)buf_data[0])[i], num_sub);
+#endif
+ OCL_ASSERT( ((uint32_t*)buf_data[0])[i] == num_sub);
+ }
+ OCL_UNMAP_BUFFER(0);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(builtin_num_sub_groups);
diff --git a/utests/builtin_sub_group_id.cpp b/utests/builtin_sub_group_id.cpp
new file mode 100644
index 0000000..e81d173
--- /dev/null
+++ b/utests/builtin_sub_group_id.cpp
@@ -0,0 +1,61 @@
+/*
+According to the OpenCL cl_intel_subgroups.
+Now define local and global size as following:
+ globals[0] = 4;
+ globals[1] = 9;
+ globals[2] = 16;
+ locals[0] = 2;
+ locals[1] = 3;
+ locals[2] = 4;
+*/
+
+#define udebug 0
+#include "utest_helper.hpp"
+static void builtin_sub_group_id(void)
+{
+
+ // Setup kernel and buffers
+ size_t dim, i,local_sz = 1,buf_len = 1;
+ OCL_CREATE_KERNEL("builtin_sub_group_id");
+ size_t max_sub_sz;
+
+
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int)*576, NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+ for( dim=1; dim <= 3; dim++ )
+ {
+ buf_len = 1;
+ local_sz = 1;
+ for(i=1; i <= dim; i++)
+ {
+ locals[i - 1] = i + 1;
+ globals[i - 1] = (i + 1) * (i + 1);
+ buf_len *= ((i + 1) * (i + 1));
+ local_sz *= i + 1;
+ }
+ for(i = dim+1; i <= 3; i++)
+ {
+ globals[i - 1] = 0;
+ locals[i - 1] = 0;
+ }
+
+ OCL_CALL(clGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*dim,locals,sizeof(size_t),&max_sub_sz,NULL);
+ // Run the kernel
+ OCL_NDRANGE( dim );
+ clFinish(queue);
+
+ OCL_MAP_BUFFER(0);
+
+ for( i = 0; i < buf_len; i++) {
+ size_t expect_id = (i % local_sz) / max_sub_sz;
+#if udebug
+ printf("%zu get %d, expect %zu\n",i, ((uint32_t*)buf_data[0])[i], expect_id);
+#endif
+ OCL_ASSERT( ((uint32_t*)buf_data[0])[i] == expect_id);
+ }
+ OCL_UNMAP_BUFFER(0);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(builtin_sub_group_id);
diff --git a/utests/builtin_sub_group_size.cpp b/utests/builtin_sub_group_size.cpp
new file mode 100644
index 0000000..1dc24ed
--- /dev/null
+++ b/utests/builtin_sub_group_size.cpp
@@ -0,0 +1,61 @@
+/*
+According to the OpenCL cl_intel_subgroups.
+Now define local and global size as following:
+ globals[0] = 4;
+ globals[1] = 9;
+ globals[2] = 16;
+ locals[0] = 2;
+ locals[1] = 3;
+ locals[2] = 4;
+*/
+
+#define udebug 0
+#include "utest_helper.hpp"
+static void builtin_sub_group_size(void)
+{
+
+ // Setup kernel and buffers
+ size_t dim, i,local_sz = 1,buf_len = 1;
+ OCL_CREATE_KERNEL("builtin_sub_group_size");
+ size_t max_sub_sz;
+
+
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int)*576, NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+ for( dim=1; dim <= 3; dim++ )
+ {
+ buf_len = 1;
+ local_sz = 1;
+ for(i=1; i <= dim; i++)
+ {
+ locals[i - 1] = i + 1;
+ globals[i - 1] = (i + 1) * (i + 1);
+ buf_len *= ((i + 1) * (i + 1));
+ local_sz *= i + 1;
+ }
+ for(i = dim+1; i <= 3; i++)
+ {
+ globals[i - 1] = 0;
+ locals[i - 1] = 0;
+ }
+
+ OCL_CALL(clGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*dim,locals,sizeof(size_t),&max_sub_sz,NULL);
+ // Run the kernel
+ OCL_NDRANGE( dim );
+ clFinish(queue);
+
+ OCL_MAP_BUFFER(0);
+
+ for( i = 0; i < buf_len; i++) {
+ size_t expect_sz = (i % local_sz) < (local_sz / max_sub_sz * max_sub_sz) ? max_sub_sz : (local_sz % max_sub_sz);
+#if udebug
+ printf("%zu get %d, expect %zu\n",i, ((uint32_t*)buf_data[0])[i], expect_sz);
+#endif
+ OCL_ASSERT( ((uint32_t*)buf_data[0])[i] == expect_sz);
+ }
+ OCL_UNMAP_BUFFER(0);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(builtin_sub_group_size);
--
2.7.4
More information about the Beignet
mailing list