[Beignet] [PATCH 12/12] Utest: Add tset case for block read/write image

Xiuli Pan xiuli.pan at intel.com
Thu May 26 03:14:27 UTC 2016


From: Pan Xiuli <xiuli.pan at intel.com>

Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
 kernels/compiler_subgroup_image_block_read.cl  |  31 ++++
 kernels/compiler_subgroup_image_block_write.cl |  27 ++++
 utests/CMakeLists.txt                          |   2 +
 utests/compiler_subgroup_image_block_read.cpp  | 189 ++++++++++++++++++++++++
 utests/compiler_subgroup_image_block_write.cpp | 193 +++++++++++++++++++++++++
 5 files changed, 442 insertions(+)
 create mode 100644 kernels/compiler_subgroup_image_block_read.cl
 create mode 100644 kernels/compiler_subgroup_image_block_write.cl
 create mode 100644 utests/compiler_subgroup_image_block_read.cpp
 create mode 100644 utests/compiler_subgroup_image_block_write.cpp

diff --git a/kernels/compiler_subgroup_image_block_read.cl b/kernels/compiler_subgroup_image_block_read.cl
new file mode 100644
index 0000000..d5df6db
--- /dev/null
+++ b/kernels/compiler_subgroup_image_block_read.cl
@@ -0,0 +1,31 @@
+__kernel void compiler_subgroup_image_block_read1(image2d_t src, global uint *dst)
+{
+  int id = get_global_id(0);
+  int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0);
+  uint tmp = intel_sub_group_block_read(src,coord);
+  dst[id] = tmp;
+}
+
+__kernel void compiler_subgroup_image_block_read2(image2d_t src, global uint2 *dst)
+{
+  int id = get_global_id(0);
+  int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0);
+  uint2 tmp = intel_sub_group_block_read2(src,coord);
+  dst[id] = tmp;
+}
+
+__kernel void compiler_subgroup_image_block_read4(image2d_t src, global uint4 *dst)
+{
+  int id = get_global_id(0);
+  int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0);
+  uint4 tmp = intel_sub_group_block_read4(src,coord);
+  dst[id] = tmp;
+}
+
+__kernel void compiler_subgroup_image_block_read8(image2d_t src, global uint8 *dst)
+{
+  int id = get_global_id(0);
+  int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0);
+  uint8 tmp = intel_sub_group_block_read8(src,coord);
+  dst[id] = tmp;
+}
diff --git a/kernels/compiler_subgroup_image_block_write.cl b/kernels/compiler_subgroup_image_block_write.cl
new file mode 100644
index 0000000..d9b3717
--- /dev/null
+++ b/kernels/compiler_subgroup_image_block_write.cl
@@ -0,0 +1,27 @@
+__kernel void compiler_subgroup_image_block_write1(image2d_t dst, global uint *src)
+{
+  int id = get_global_id(0);
+  int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0);
+  intel_sub_group_block_write(dst,coord, src[id]);
+}
+
+__kernel void compiler_subgroup_image_block_write2(image2d_t dst, global uint2 *src)
+{
+  int id = get_global_id(0);
+  int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0);
+  intel_sub_group_block_write2(dst,coord, src[id]);
+}
+
+__kernel void compiler_subgroup_image_block_write4(image2d_t dst, global uint4 *src)
+{
+  int id = get_global_id(0);
+  int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0);
+  intel_sub_group_block_write4(dst,coord, src[id]);
+}
+
+__kernel void compiler_subgroup_image_block_write8(image2d_t dst, global uint8 *src)
+{
+  int id = get_global_id(0);
+  int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0);
+  intel_sub_group_block_write8(dst,coord, src[id]);
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 8765775..f56c497 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -172,6 +172,8 @@ set (utests_sources
   compiler_subgroup_scan_inclusive.cpp
   compiler_subgroup_buffer_block_read.cpp
   compiler_subgroup_buffer_block_write.cpp
+  compiler_subgroup_image_block_read.cpp
+  compiler_subgroup_image_block_write.cpp
   compiler_async_stride_copy.cpp
   compiler_insn_selection_min.cpp
   compiler_insn_selection_max.cpp
diff --git a/utests/compiler_subgroup_image_block_read.cpp b/utests/compiler_subgroup_image_block_read.cpp
new file mode 100644
index 0000000..daccaa5
--- /dev/null
+++ b/utests/compiler_subgroup_image_block_read.cpp
@@ -0,0 +1,189 @@
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include "utest_helper.hpp"
+
+using namespace std;
+
+/* set to 1 for debug, output of input-expected data */
+#define DEBUG_STDOUT    0
+
+/* NDRANGE */
+#define WG_GLOBAL_SIZE  32
+#define WG_LOCAL_SIZE   32
+/*
+ * Generic compute-expected function for meida block read
+ */
+template<class T>
+static void compute_expected(T* input,
+                             T* expected,
+                             size_t VEC_SIZE)
+{
+  for(uint32_t i = 0; i < WG_GLOBAL_SIZE; i++)
+    for(uint32_t j = 0; j < VEC_SIZE; j++)
+      expected[i * VEC_SIZE + j] = input[WG_GLOBAL_SIZE * j + i];
+}
+
+/*
+ * Generic input-expected generate function for media block read
+ */
+template<class T>
+static void generate_data(T* &input,
+                          T* &expected,
+                          size_t VEC_SIZE)
+{
+  /* allocate input and expected arrays */
+  input = new T[WG_GLOBAL_SIZE * VEC_SIZE];
+  expected = new T[WG_GLOBAL_SIZE * VEC_SIZE];
+
+  /* base value for all data types */
+  T base_val = (int)7 << (sizeof(T) * 5 - 3);
+
+  /* seed for random inputs */
+  srand (time(NULL));
+
+#if DEBUG_STDOUT
+    cout << endl << "IN: " << endl;
+#endif
+  /* generate inputs and expected values */
+  for(uint32_t gid = 0; gid < WG_GLOBAL_SIZE * VEC_SIZE; gid++)
+  {
+    /* initially 0, augment after */
+    input[gid] = ((rand() % 2 - 1) * base_val) + (rand() % 112);
+
+#if DEBUG_STDOUT
+    /* output generated input */
+    cout << setw(4) << input[gid] << ", " ;
+    if((gid + 1) % 8 == 0)
+          cout << endl;
+#endif
+
+  }
+  /* expected values */
+  compute_expected(input, expected, VEC_SIZE);
+
+#if DEBUG_STDOUT
+  /* output expected input */
+  cout << endl << "EXP: " << endl;
+  for(uint32_t gid = 0; gid < WG_GLOBAL_SIZE; gid++)
+  {
+    cout << "(";
+    for(uint32_t vsz = 0; vsz < VEC_SIZE; vsz++)
+      cout << setw(4) << expected[gid* VEC_SIZE + vsz] << ", " ;
+    cout << ")";
+    if((gid + 1) % 8 == 0)
+        cout << endl;
+    cout << endl;
+  }
+#endif
+}
+
+/*
+ * Generic subgroup utest function for media block read
+ */
+template<class T>
+static void subgroup_generic(T* input,
+                             T* expected,
+                             size_t VEC_SIZE)
+{
+  cl_image_format format;
+  cl_image_desc desc;
+
+  memset(&desc, 0x0, sizeof(cl_image_desc));
+  memset(&format, 0x0, sizeof(cl_image_format));
+
+  /* get simd size */
+  globals[0] = WG_GLOBAL_SIZE;
+  locals[0] = WG_LOCAL_SIZE;
+  size_t SIMD_SIZE = 0;
+  OCL_CALL(clGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*1,locals,sizeof(size_t),&SIMD_SIZE,NULL);
+
+  size_t buf_sz = VEC_SIZE * WG_GLOBAL_SIZE;
+  /* input and expected data */
+  generate_data(input, expected, VEC_SIZE);
+
+  /* prepare input for datatype */
+  format.image_channel_order = CL_R;
+  format.image_channel_data_type = CL_UNSIGNED_INT32;
+  desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+  desc.image_width = WG_GLOBAL_SIZE;
+  desc.image_height = VEC_SIZE;
+  desc.image_row_pitch = WG_GLOBAL_SIZE * sizeof(uint32_t);
+
+  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, input);
+  OCL_CREATE_BUFFER(buf[1], 0, buf_sz * sizeof(T), NULL);
+
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+
+  /* run the kernel on GPU */
+  OCL_NDRANGE(1);
+
+  /* check if mismatch */
+  OCL_MAP_BUFFER(1);
+  uint32_t mismatches = 0;
+
+  for (uint32_t i = 0; i < buf_sz; i++)
+    if(((T *)buf_data[1])[i] != *(expected + i))
+    {
+      /* found mismatch, increment */
+      mismatches++;
+
+#if DEBUG_STDOUT
+      /* output mismatch */
+      cout << "Err at " << i << ", " <<
+        ((T *)buf_data[1])[i] << " != " << *(expected + i) << endl;
+#endif
+    }
+
+#if DEBUG_STDOUT
+  /* output mismatch count */
+  cout << "mismatches " << mismatches << endl;
+#endif
+
+  OCL_UNMAP_BUFFER(1);
+
+  OCL_ASSERT(mismatches == 0);
+  free(input);
+  free(expected);
+}
+
+/*
+ * sub_group image block read functions
+ */
+void compiler_subgroup_image_block_read1(void)
+{
+  cl_uint *input = NULL;
+  cl_uint *expected = NULL;
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_read",
+                              "compiler_subgroup_image_block_read1");
+  subgroup_generic(input, expected, 1);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read1);
+void compiler_subgroup_image_block_read2(void)
+{
+  cl_uint *input = NULL;
+  cl_uint *expected = NULL;
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_read",
+                              "compiler_subgroup_image_block_read2");
+  subgroup_generic(input, expected, 2);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read2);
+void compiler_subgroup_image_block_read4(void)
+{
+  cl_uint *input = NULL;
+  cl_uint *expected = NULL;
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_read",
+                              "compiler_subgroup_image_block_read4");
+  subgroup_generic(input, expected, 4);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read4);
+void compiler_subgroup_image_block_read8(void)
+{
+  cl_uint *input = NULL;
+  cl_uint *expected = NULL;
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_read",
+                              "compiler_subgroup_image_block_read8");
+  subgroup_generic(input, expected, 8);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read8);
diff --git a/utests/compiler_subgroup_image_block_write.cpp b/utests/compiler_subgroup_image_block_write.cpp
new file mode 100644
index 0000000..9ef2127
--- /dev/null
+++ b/utests/compiler_subgroup_image_block_write.cpp
@@ -0,0 +1,193 @@
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include "utest_helper.hpp"
+
+using namespace std;
+
+/* set to 1 for debug, output of input-expected data */
+#define DEBUG_STDOUT    0
+
+/* NDRANGE */
+#define WG_GLOBAL_SIZE  32
+#define WG_LOCAL_SIZE   32
+/*
+ * Generic compute-expected function for meida block write
+ */
+template<class T>
+static void compute_expected(T* input,
+                             T* expected,
+                             size_t VEC_SIZE)
+{
+  for(uint32_t i = 0; i < WG_GLOBAL_SIZE; i++)
+    for(uint32_t j = 0; j < VEC_SIZE; j++)
+      expected[WG_GLOBAL_SIZE * j + i] = input[i * VEC_SIZE + j];
+}
+
+/*
+ * Generic input-expected generate function for media block write
+ */
+template<class T>
+static void generate_data(T* &input,
+                          T* &expected,
+                          size_t VEC_SIZE)
+{
+  /* allocate input and expected arrays */
+  input = new T[WG_GLOBAL_SIZE * VEC_SIZE];
+  expected = new T[WG_GLOBAL_SIZE * VEC_SIZE];
+
+  /* base value for all data types */
+  T base_val = (long)7 << (sizeof(T) * 5 - 3);
+
+  /* seed for random inputs */
+  srand (time(NULL));
+
+#if DEBUG_STDOUT
+    cout << endl << "IN: " << endl;
+#endif
+  /* generate inputs and expected values */
+  for(uint32_t gid = 0; gid < WG_GLOBAL_SIZE * VEC_SIZE; gid++)
+  {
+    /* initially 0, augment after */
+    input[gid] = ((rand() % 2 - 1) * base_val) + (rand() % 112);
+
+#if DEBUG_STDOUT
+    /* output generated input */
+    cout << setw(4) << input[gid] << ", " ;
+    if((gid + 1) % 8 == 0)
+          cout << endl;
+#endif
+
+  }
+  /* expected values */
+  compute_expected(input, expected, VEC_SIZE);
+
+#if DEBUG_STDOUT
+  /* output expected input */
+  cout << endl << "EXP: " << endl;
+  for(uint32_t gid = 0; gid < WG_GLOBAL_SIZE; gid++)
+  {
+    cout << "(";
+    for(uint32_t vsz = 0; vsz < VEC_SIZE; vsz++)
+      cout << setw(4) << expected[gid* VEC_SIZE + vsz] << ", " ;
+    cout << ")";
+    if((gid + 1) % 8 == 0)
+        cout << endl;
+    cout << endl;
+  }
+#endif
+}
+
+/*
+ * Generic subgroup utest function for media block write
+ */
+template<class T>
+static void subgroup_generic(T* input,
+                             T* expected,
+                             size_t VEC_SIZE)
+{
+  cl_image_format format;
+  cl_image_desc desc;
+
+  memset(&desc, 0x0, sizeof(cl_image_desc));
+  memset(&format, 0x0, sizeof(cl_image_format));
+
+  /* get simd size */
+  globals[0] = WG_GLOBAL_SIZE;
+  locals[0] = WG_LOCAL_SIZE;
+  size_t SIMD_SIZE = 0;
+  OCL_CALL(clGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*1,locals,sizeof(size_t),&SIMD_SIZE,NULL);
+
+  size_t buf_sz = VEC_SIZE * WG_GLOBAL_SIZE;
+  /* input and expected data */
+  generate_data(input, expected, VEC_SIZE);
+
+  /* prepare input for datatype */
+  format.image_channel_order = CL_R;
+  format.image_channel_data_type = CL_UNSIGNED_INT32;
+  desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+  desc.image_width = WG_GLOBAL_SIZE;
+  desc.image_height = VEC_SIZE;
+  desc.image_row_pitch = 0;
+
+  OCL_CREATE_IMAGE(buf[0], 0, &format, &desc, NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, buf_sz * sizeof(T), NULL);
+
+  /* set input data for GPU */
+  OCL_MAP_BUFFER(1);
+  memcpy(buf_data[1], input,  buf_sz* sizeof(T));
+  OCL_UNMAP_BUFFER(1);
+
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+
+  /* run the kernel on GPU */
+  OCL_NDRANGE(1);
+
+  /* check if mismatch */
+  OCL_MAP_BUFFER_GTT(0);
+  uint32_t mismatches = 0;
+  size_t image_row_pitch = 0;
+  OCL_CALL(clGetImageInfo, buf[0], CL_IMAGE_ROW_PITCH, sizeof(image_row_pitch), &image_row_pitch, NULL);
+  image_row_pitch /= sizeof(T);
+  T *out = (T *)buf_data[0];
+
+  for (uint32_t vsz = 0; vsz < VEC_SIZE; vsz++)
+    for (uint32_t i = 0; i < WG_GLOBAL_SIZE; i++)
+      if (out[vsz * image_row_pitch + i] != expected[WG_GLOBAL_SIZE * vsz + i]) {
+        /* found mismatch, increment */
+        mismatches++;
+
+#if DEBUG_STDOUT
+        /* output mismatch */
+        cout << "Err at " << WG_GLOBAL_SIZE * vsz + i << ", " << out[vsz * image_row_pitch + i]
+             << " != " << expected[WG_GLOBAL_SIZE * vsz + i] << endl;
+#endif
+      }
+
+  OCL_UNMAP_BUFFER_GTT(0);
+
+  OCL_ASSERT(mismatches == 0);
+  free(input);
+  free(expected);
+}
+
+/*
+ * sub_group image block write functions
+ */
+void compiler_subgroup_image_block_write1(void)
+{
+  cl_uint *input = NULL;
+  cl_uint *expected = NULL;
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_write",
+                              "compiler_subgroup_image_block_write1");
+  subgroup_generic(input, expected, 1);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write1);
+void compiler_subgroup_image_block_write2(void)
+{
+  cl_uint *input = NULL;
+  cl_uint *expected = NULL;
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_write",
+                              "compiler_subgroup_image_block_write2");
+  subgroup_generic(input, expected, 2);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write2);
+void compiler_subgroup_image_block_write4(void)
+{
+  cl_uint *input = NULL;
+  cl_uint *expected = NULL;
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_write",
+                              "compiler_subgroup_image_block_write4");
+  subgroup_generic(input, expected, 4);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write4);
+void compiler_subgroup_image_block_write8(void)
+{
+  cl_uint *input = NULL;
+  cl_uint *expected = NULL;
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_write",
+                              "compiler_subgroup_image_block_write8");
+  subgroup_generic(input, expected, 8);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write8);
-- 
2.7.4



More information about the Beignet mailing list