[Beignet] [PATCH 1/2] Utest: Add workgroup broadcast tests

Grigore Lupescu grigore.lupescu at intel.com
Tue Apr 5 18:06:51 UTC 2016


From: Grigore Lupescu <grigore.lupescu at intel.com>

Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
 kernels/compiler_workgroup_broadcast.cl | 128 +++++++++++++-
 utests/compiler_workgroup_broadcast.cpp | 289 ++++++++++++++++++++++++++++----
 2 files changed, 380 insertions(+), 37 deletions(-)

diff --git a/kernels/compiler_workgroup_broadcast.cl b/kernels/compiler_workgroup_broadcast.cl
index 4df74e3..47ff0b7 100644
--- a/kernels/compiler_workgroup_broadcast.cl
+++ b/kernels/compiler_workgroup_broadcast.cl
@@ -1,9 +1,121 @@
-kernel void compiler_workgroup_broadcast(global uint *src, global uint *dst) {
-    uint val = src[get_group_id(0)*(get_local_size(1) * get_local_size(0))
-	+ get_group_id(1)*(get_local_size(1) * get_local_size(0) * get_num_groups(0))
-	+ get_local_id(1)* get_local_size(0) + get_local_id(0)];
-    uint bv = work_group_broadcast(val, 8, 3);
-    dst[get_group_id(0)*(get_local_size(1) * get_local_size(0))
-	+ get_group_id(1)*(get_local_size(1) * get_local_size(0) * get_num_groups(0))
-	+ get_local_id(1)* get_local_size(0) + get_local_id(0)] = bv;
+/*
+ * Workgroup broadcast 1D functions
+ */
+
+kernel void compiler_workgroup_broadcast_1D_int(global int *src,
+                                                global int *dst,
+                                                uint wg_local_x,
+                                                uint wg_local_y,
+                                                uint wg_local_z)
+{
+  uint offset = 0;
+  uint index = offset + get_global_id(0);
+
+  int val = src[index];
+  int broadcast_val = work_group_broadcast(val,
+                                            wg_local_x);
+  dst[index] = broadcast_val;
+}
+
+kernel void compiler_workgroup_broadcast_1D_long(global long *src,
+                                                global long *dst,
+                                                uint wg_local_x,
+                                                uint wg_local_y,
+                                                uint wg_local_z)
+{
+  uint offset = 0;
+  uint index = offset + get_global_id(0);
+
+  long val = src[index];
+  long broadcast_val = work_group_broadcast(val,
+                                            wg_local_x);
+  dst[index] = broadcast_val;
+}
+
+/*
+ * Workgroup broadcast 2D functions
+ */
+kernel void compiler_workgroup_broadcast_2D_int(global int *src,
+                                                global int *dst,
+                                                uint wg_local_x,
+                                                uint wg_local_y,
+                                                uint wg_local_z)
+{
+  uint lsize = get_local_size(0) * get_local_size(1);
+  uint offset = get_group_id(0) * lsize +
+      get_group_id(1) * get_num_groups(0) * lsize;
+  uint index = offset + get_local_id(0) +
+      get_local_id(1) * get_local_size(0);
+
+  int val = src[index];
+  int broadcast_val = work_group_broadcast(val,
+                                            wg_local_x,
+                                            wg_local_y);
+  dst[index] = broadcast_val;
+}
+
+kernel void compiler_workgroup_broadcast_2D_long(global long *src,
+                                                global long *dst,
+                                                uint wg_local_x,
+                                                uint wg_local_y,
+                                                uint wg_local_z)
+{
+  uint lsize = get_local_size(0) * get_local_size(1);
+  uint offset = get_group_id(0) * lsize +
+      get_group_id(1) * get_num_groups(0) * lsize;
+  uint index = offset + get_local_id(0) +
+      get_local_id(1) * get_local_size(0);
+
+  long val = src[index];
+  long broadcast_val = work_group_broadcast(val,
+                                            wg_local_x,
+                                            wg_local_y);
+  dst[index] = broadcast_val;
+}
+
+/*
+ * Workgroup broadcast 3D functions
+ */
+kernel void compiler_workgroup_broadcast_3D_int(global int *src,
+                                                global int *dst,
+                                                uint wg_local_x,
+                                                uint wg_local_y,
+                                                uint wg_local_z)
+{
+  uint lsize = get_local_size(0) * get_local_size(1) * get_local_size(2);
+  uint offset = get_group_id(0) * lsize +
+      get_group_id(1) * get_num_groups(0) * lsize +
+      get_group_id(2) * get_num_groups(1) * get_num_groups(0) * lsize;
+  uint index = offset + get_local_id(0) +
+      get_local_id(1) * get_local_size(0) +
+      get_local_id(2) * get_local_size(1) * get_local_size(0);
+
+  int val = src[index];
+  int broadcast_val = work_group_broadcast(val,
+                                            wg_local_x,
+                                            wg_local_y,
+                                            wg_local_z);
+  dst[index] = broadcast_val;
+}
+
+kernel void compiler_workgroup_broadcast_3D_long(global long *src,
+                                                global long *dst,
+                                                uint wg_local_x,
+                                                uint wg_local_y,
+                                                uint wg_local_z)
+{
+  uint lsize = get_local_size(0) * get_local_size(1) * get_local_size(2);
+  uint offset = get_group_id(0) * lsize +
+      get_group_id(1) * get_num_groups(0) * lsize +
+      get_group_id(2) * get_num_groups(0) * get_num_groups(1) * lsize;
+  uint index = offset + get_local_id(0) +
+      get_local_id(1) * get_local_size(0) +
+      get_local_id(2) * get_local_size(1) * get_local_size(0);
+
+  long val = src[index];
+  long broadcast_val = work_group_broadcast(val,
+                                            wg_local_x,
+                                            wg_local_y,
+                                            wg_local_z);
+  dst[index] = broadcast_val;
 }
diff --git a/utests/compiler_workgroup_broadcast.cpp b/utests/compiler_workgroup_broadcast.cpp
index d45e5d8..0a45015 100644
--- a/utests/compiler_workgroup_broadcast.cpp
+++ b/utests/compiler_workgroup_broadcast.cpp
@@ -3,45 +3,276 @@
 #include <iostream>
 #include "utest_helper.hpp"
 
-void compiler_workgroup_broadcast(void)
+using namespace std;
+
+/* NDRANGE */
+#define WG_GLOBAL_SIZE_X        16
+#define WG_GLOBAL_SIZE_Y        4
+#define WG_GLOBAL_SIZE_Z        4
+
+#define WG_LOCAL_SIZE_X         16
+#define WG_LOCAL_SIZE_Y         2
+#define WG_LOCAL_SIZE_Z         2
+
+/* TODO debug bellow case, lid2 always stays 0, instead of 0 and 1
+ *
+ * #define WG_GLOBAL_SIZE_X        16
+ * #define WG_GLOBAL_SIZE_Y        1
+ * #define WG_GLOBAL_SIZE_Z        4
+ *
+ * #define WG_LOCAL_SIZE_X         16
+ * #define WG_LOCAL_SIZE_Y         1
+ * #define WG_LOCAL_SIZE_Z         2
+ */
+
+#define WG_LOCAL_X    5
+#define WG_LOCAL_Y    0
+#define WG_LOCAL_Z    0
+
+enum WG_BROADCAST
+{
+  WG_BROADCAST_1D,
+  WG_BROADCAST_2D,
+  WG_BROADCAST_3D
+};
+
+/*
+ * Generic compute-expected on CPU function for any workgroup type
+ * and any variable type
+ */
+template<class T>
+static void compute_expected(WG_BROADCAST wg_broadcast,
+                             T* input,
+                             T* expected,
+                             uint32_t wg_global_size,
+                             uint32_t wg_local_size)
 {
-  const size_t n0 = 32;
-  const size_t n1 = 16;
-  const size_t n = n0 * n1;
-  uint32_t src[n];
+  if(wg_broadcast == WG_BROADCAST_1D)
+  {
+    for(uint32_t i = 0; i < wg_local_size; i++)
+      expected[i] = input[WG_LOCAL_X];
+  }
+  else if(wg_broadcast == WG_BROADCAST_2D)
+  {
+    for(uint32_t i = 0; i < wg_local_size; i++)
+      expected[i] =
+          input[WG_LOCAL_X +
+                WG_LOCAL_Y * WG_LOCAL_SIZE_X];
+  }
+  else if(wg_broadcast == WG_BROADCAST_3D)
+  {
+    for(uint32_t i = 0; i < wg_local_size; i++)
+      expected[i] =
+        input[WG_LOCAL_X +
+              WG_LOCAL_Y * WG_LOCAL_SIZE_X +
+              WG_LOCAL_Z * WG_LOCAL_SIZE_X * WG_LOCAL_SIZE_Y];
+  }
+}
+
+/*
+ * Generic input-expected generate function for any workgroup type
+ * and any variable type
+ */
+template<class T>
+static void generate_data(WG_BROADCAST wg_broadcast,
+                   T* &input,
+                   T* &expected,
+                   uint32_t &wg_global_size,
+                   uint32_t &wg_local_size)
+{
+  if(wg_broadcast == WG_BROADCAST_1D)
+  {
+    wg_global_size = WG_GLOBAL_SIZE_X;
+    wg_local_size = WG_LOCAL_SIZE_X;
+  }
+  else if(wg_broadcast == WG_BROADCAST_2D)
+  {
+    wg_global_size = WG_GLOBAL_SIZE_X * WG_GLOBAL_SIZE_Y;
+    wg_local_size = WG_LOCAL_SIZE_X * WG_LOCAL_SIZE_Y;
+  }
+  else if(wg_broadcast == WG_BROADCAST_3D)
+  {
+    wg_global_size = WG_GLOBAL_SIZE_X * WG_GLOBAL_SIZE_Y * WG_GLOBAL_SIZE_Z;
+    wg_local_size = WG_LOCAL_SIZE_X * WG_LOCAL_SIZE_Y * WG_LOCAL_SIZE_Z;
+  }
+
+  /* allocate input and expected arrays */
+  input = new T[wg_global_size];
+  expected = new T[wg_global_size];
+
+  /* base value for all datatypes */
+  T base_val = (long)7 << (sizeof(T) * 5 - 3);
+
+  /* seed for random inputs */
+  srand (time(NULL));
+
+  /* generate inputs and expected values */
+  for(uint32_t gid = 0; gid < wg_global_size; gid += wg_local_size)
+  {
+    /* input values */
+    cout << endl << "IN: " << endl;
+    for(uint32_t lid = 0; lid < wg_local_size; lid++){
+      input[gid + lid] = (rand() % 2 - 1) * base_val + (rand() % 112);
+      cout << setw(4) << input[gid + lid] << ", " ;
+      if((lid + 1) % 8 == 0)
+        cout << endl;
+    }
+
+    /* expected values */
+    cout << endl << "EXP: " << endl;
+    compute_expected(wg_broadcast, input + gid, expected + gid, wg_global_size, wg_local_size);
+    for(uint32_t lid = 0; lid < wg_local_size; lid++){
+      cout << setw(4) << expected[gid + lid] << ", " ;
+      if((lid + 1) % 8 == 0)
+        cout << endl;
+    }
+  }
+}
+
+/*
+ * Generic workgroup utest function for any workgroup type
+ * and any variable type
+ */
+template<class T>
+static void workgroup_generic(WG_BROADCAST wg_broadcast,
+                       T* input,
+                       T* expected)
+{
+  uint32_t wg_global_size = 0;
+  uint32_t wg_local_size = 0;
+
+  cl_uint wg_local_x = WG_LOCAL_X;
+  cl_uint wg_local_y = WG_LOCAL_Y;
+  cl_uint wg_local_z = WG_LOCAL_Z;
+
+  /* input and expected data */
+  generate_data(wg_broadcast, input, expected, wg_global_size, wg_local_size);
 
-  // Setup kernel and buffers
-  OCL_CREATE_KERNEL("compiler_workgroup_broadcast");
-  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
-  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+  /* prepare input for datatype */
+  OCL_CREATE_BUFFER(buf[0], 0, wg_global_size * sizeof(T), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, wg_global_size * sizeof(T), NULL);
   OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
   OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
-  globals[0] = n0;
-  globals[1] = n1;
-  locals[0] = 16;
-  locals[1] = 16;
+  OCL_SET_ARG(2, sizeof(cl_uint), &wg_local_x);
+  OCL_SET_ARG(3, sizeof(cl_uint), &wg_local_y);
+  OCL_SET_ARG(4, sizeof(cl_uint), &wg_local_z);
 
-  for (int32_t i = 0; i < (int32_t) n; ++i) {
-    src[i] = i;
-  }
+  /* set input data for GPU */
   OCL_MAP_BUFFER(0);
-  memcpy(buf_data[0], src, sizeof(src));
+  memcpy(buf_data[0], input, wg_global_size * sizeof(T));
   OCL_UNMAP_BUFFER(0);
 
-  // Run the kernel on GPU
-  OCL_NDRANGE(2);
-
-  // Compare
-  OCL_MAP_BUFFER(1);
-  for (int32_t i = 0; i < (int32_t) n/2; ++i) {
-//    printf("%u ", ((uint32_t *)buf_data[1])[i]);
-    OCL_ASSERT(((uint32_t *)buf_data[1])[i] == 56);
+  /* run the kernel on GPU */
+  if(wg_broadcast == WG_BROADCAST_1D)
+  {
+    globals[0] = WG_GLOBAL_SIZE_X;
+    locals[0] = WG_LOCAL_SIZE_X;
+    OCL_NDRANGE(1);
+  }
+  else if(wg_broadcast == WG_BROADCAST_2D)
+  {
+    globals[0] = WG_GLOBAL_SIZE_X;
+    locals[0] = WG_LOCAL_SIZE_X;
+    globals[1] = WG_GLOBAL_SIZE_Y;
+    locals[1] = WG_LOCAL_SIZE_Y;
+    OCL_NDRANGE(2);
   }
-  for (int32_t i = n/2; i < (int32_t) n; ++i) {
-    //	printf("%u ", ((uint32_t *)buf_data[1])[i]);
-    OCL_ASSERT(((uint32_t *)buf_data[1])[i] == 312);
+  else if(wg_broadcast == WG_BROADCAST_3D)
+  {
+    globals[0] = WG_GLOBAL_SIZE_X;
+    locals[0] = WG_LOCAL_SIZE_X;
+    globals[1] = WG_GLOBAL_SIZE_Y;
+    locals[1] = WG_LOCAL_SIZE_Y;
+    globals[2] = WG_GLOBAL_SIZE_Z;
+    locals[2] = WG_LOCAL_SIZE_Y;
+    OCL_NDRANGE(3);
   }
+
+  /* check if mismatch */
+  OCL_MAP_BUFFER(1);
+  uint32_t mistmatches = 0;
+  cout << endl << endl << "CHECK" << endl;
+  for (uint32_t i = 0; i < wg_global_size; i++)
+    if(((T *)buf_data[1])[i] != *(expected + i)){
+      cout << "Err at " << i << ", " <<
+        ((T *)buf_data[1])[i] << " != " << *(expected + i) << endl;
+      mistmatches++;
+    }
+  cout << "MISTMATCHES " << mistmatches << endl;
+
+  cout << std::flush;
   OCL_UNMAP_BUFFER(1);
+
+  OCL_ASSERT(mistmatches == 0);
 }
 
-MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast);
+
+/*
+ * Workgroup broadcast 1D functions
+ */
+void compiler_workgroup_broadcast_1D_int(void)
+{
+  cl_int *input = NULL;
+  cl_int *expected = NULL;
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
+                              "compiler_workgroup_broadcast_1D_int");
+  workgroup_generic(WG_BROADCAST_1D, input, expected);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_1D_int);
+
+void compiler_workgroup_broadcast_1D_long(void)
+{
+  cl_long *input = NULL;
+  cl_long *expected = NULL;
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
+                              "compiler_workgroup_broadcast_1D_long");
+  workgroup_generic(WG_BROADCAST_1D, input, expected);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_1D_long);
+
+/*
+ * Workgroup broadcast 2D functions
+ */
+void compiler_workgroup_broadcast_2D_int(void)
+{
+  cl_int *input = NULL;
+  cl_int *expected = NULL;
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
+                              "compiler_workgroup_broadcast_2D_int");
+  workgroup_generic(WG_BROADCAST_2D, input, expected);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_2D_int);
+
+void compiler_workgroup_broadcast_2D_long(void)
+{
+  cl_int *input = NULL;
+  cl_int *expected = NULL;
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
+                              "compiler_workgroup_broadcast_2D_long");
+  workgroup_generic(WG_BROADCAST_2D, input, expected);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_2D_long);
+
+
+/*
+ * Workgroup broadcast 3D functions
+ */
+void compiler_workgroup_broadcast_3D_int(void)
+{
+  cl_int *input = NULL;
+  cl_int *expected = NULL;
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
+                              "compiler_workgroup_broadcast_3D_int");
+  workgroup_generic(WG_BROADCAST_3D, input, expected);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_3D_int);
+
+void compiler_workgroup_broadcast_3D_long(void)
+{
+  cl_int *input = NULL;
+  cl_int *expected = NULL;
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
+                              "compiler_workgroup_broadcast_3D_long");
+  workgroup_generic(WG_BROADCAST_3D, input, expected);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_3D_long);
-- 
2.5.0



More information about the Beignet mailing list