[Beignet] [PATCH 3/4] Add atomic test case.

Yang Rong rong.r.yang at intel.com
Thu Jun 27 01:47:57 PDT 2013


The test case include local memory and global memory, atomic operations from
different threads and different work groups.

Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 kernels/compiler_atomic_functions.cl |   55 ++++++++++++++++-----
 utests/CMakeLists.txt                |    1 +
 utests/compiler_atomic_functions.cpp |   87 ++++++++++++++++++++++++++++++++--
 3 files changed, 127 insertions(+), 16 deletions(-)

diff --git a/kernels/compiler_atomic_functions.cl b/kernels/compiler_atomic_functions.cl
index 23f3e73..24f17c2 100644
--- a/kernels/compiler_atomic_functions.cl
+++ b/kernels/compiler_atomic_functions.cl
@@ -1,14 +1,43 @@
-/* test OpenCL 1.1 Atomic Functions (section 6.11.1, 9.4) */
-__kernel void compiler_atomic_functions(global int *a, global int *b) {
-  atomic_add(a, *b);
-  atomic_sub(a, *b);
-  atomic_xchg(a, *b);
-  atomic_inc(a);
-  atomic_dec(a);
-  atomic_cmpxchg(a, b, 100);
-  atomic_min(a, *b);
-  atomic_max(a, *b);
-  atomic_and(a, *b);
-  atomic_or(a, *b);
-  atomic_xor(a, *b);
+__kernel void compiler_atomic_functions(__global int *dst, __local int *tmp, __global int *src) {
+  int lid = get_local_id(0);
+  int i = lid % 12;
+  atomic_xchg(&tmp[4], -1);
+  switch(i) {
+    case 0: atomic_inc(&tmp[i]); break;
+    case 1: atomic_dec(&tmp[i]); break;
+    case 2: atomic_add(&tmp[i], src[lid]); break;
+    case 3: atomic_sub(&tmp[i], src[lid]); break;
+    case 4: atomic_and(&tmp[i], ~(src[lid]<<(lid>>2))); break;
+    case 5: atomic_or (&tmp[i], src[lid]<<(lid>>2)); break;
+    case 6: atomic_xor(&tmp[i], src[lid]); break;
+    case 7: atomic_min(&tmp[i], -src[lid]); break;
+    case 8: atomic_max(&tmp[i], src[lid]); break;
+    case 9: atomic_min((__local unsigned int *)&tmp[i], -src[lid]); break;
+    case 10: atomic_max((__local unsigned int *)&tmp[i], src[lid]); break;
+    case 11: atomic_cmpxchg(&(tmp[i]), 0, src[10]); break;
+    default:  break;
+  }
+
+  switch(i) {
+    case 0: atomic_inc(&dst[i]); break;
+    case 1: atomic_dec(&dst[i]); break;
+    case 2: atomic_add(&dst[i], src[lid]); break;
+    case 3: atomic_sub(&dst[i], src[lid]); break;
+    case 4: atomic_and(&dst[i], ~(src[lid]<<(lid>>2))); break;
+    case 5: atomic_or (&dst[i], src[lid]<<(lid>>2)); break;
+    case 6: atomic_xor(&dst[i], src[lid]); break;
+    case 7: atomic_min(&dst[i], -src[lid]); break;
+    case 8: atomic_max(&dst[i], src[lid]); break;
+    case 9: atomic_min((__global unsigned int *)&dst[i], -src[lid]); break;
+    case 10: atomic_max((__global unsigned int *)&dst[i], src[lid]); break;
+    case 11: atomic_cmpxchg(&dst[i], 0, src[10]); break;
+    default:  break;
+  }
+
+  barrier(CLK_GLOBAL_MEM_FENCE);
+
+  if(get_global_id(0) == 0) {
+    for(i=0; i<12; i=i+1)
+      atomic_add(&dst[i], tmp[i]);
+  }
 }
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index c009d99..d4d0c6f 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -71,6 +71,7 @@ set (utests_sources
   compiler_write_only_shorts.cpp
   compiler_switch.cpp
   compiler_math.cpp
+  compiler_atomic_functions.cpp
   compiler_insn_selection_min.cpp
   compiler_insn_selection_max.cpp
   compiler_insn_selection_masked_min_max.cpp
diff --git a/utests/compiler_atomic_functions.cpp b/utests/compiler_atomic_functions.cpp
index 20202da..71e8384 100644
--- a/utests/compiler_atomic_functions.cpp
+++ b/utests/compiler_atomic_functions.cpp
@@ -1,10 +1,91 @@
 #include "utest_helper.hpp"
+#include <cmath>
+#include <algorithm>
 
-void compiler_atomic_functions(void)
+#define GROUP_NUM 16
+#define LOCAL_SIZE 64
+static void cpu_compiler_atomic(int *dst, int *src)
 {
-  OCL_CREATE_KERNEL("compiler_atomic_functions");
+  dst[4] = 0xffffffff;
+  int tmp[16] = { 0 };
+
+  for(int j=0; j<LOCAL_SIZE; j++) {
+    int i = j % 12;
+
+    switch(i) {
+      case 0: tmp[i] += 1; break;
+      case 1: tmp[i] -= 1; break;
+      case 2: tmp[i] += src[j]; break;
+      case 3: tmp[i] -= src[j]; break;
+      case 4: tmp[i] &= ~(src[j]<<(j>>2)); break;
+      case 5: tmp[i] |= src[j]<<(j>>2); break;
+      case 6: tmp[i] ^= src[j]; break;
+      case 7: tmp[i] = tmp[i] < -src[j] ? tmp[i] : -src[j]; break;
+      case 8: tmp[i] = tmp[i] > src[j] ? tmp[i] : src[j]; break;
+      case 9: tmp[i] = (unsigned int)tmp[i] < (unsigned int)(-src[j]) ? tmp[i] : -src[j]; break;
+      case 10: tmp[i] = (unsigned int)tmp[i] > (unsigned int)(src[j]) ? tmp[i] : src[j]; break;
+      case 11:  tmp[i] = src[10]; break;
+      default:  break;
+    }
+  }
+
+  for(int k=0; k<GROUP_NUM; k++) {
+    for(int j=0; j<LOCAL_SIZE; j++) {
+      int i = j % 12;
+
+      switch(i) {
+        case 0: dst[i] += 1; break;
+        case 1: dst[i] -= 1; break;
+        case 2: dst[i] += src[j]; break;
+        case 3: dst[i] -= src[j]; break;
+        case 4: dst[i] &= ~(src[j]<<(j>>2)); break;
+        case 5: dst[i] |= src[j]<<(j>>2); break;
+        case 6: dst[i] ^= src[j]; break;
+        case 7: dst[i] = dst[i] < -src[j] ? dst[i] : -src[j]; break;
+        case 8: dst[i] = dst[i] > src[j] ? dst[i] : src[j]; break;
+        case 9: dst[i] = (unsigned int)dst[i] < (unsigned int)(-src[j]) ? dst[i] : -src[j]; break;
+        case 10: dst[i] = (unsigned int)dst[i] > (unsigned int)(src[j]) ? dst[i] : src[j]; break;
+        case 11:  dst[i] = src[10]; break;
+        default:  break;
+      }
+    }
+  }
+
+  for(int i=0; i<12; i++)
+    dst[i] += tmp[i];
 }
 
-MAKE_UTEST_FROM_FUNCTION(compiler_atomic_functions);
+static void compiler_atomic_functions(void)
+{
+  const size_t n = GROUP_NUM * LOCAL_SIZE;
+  int cpu_dst[16] = {0}, cpu_src[256];
 
+  globals[0] = n;
+  locals[0] = LOCAL_SIZE;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_atomic_functions");
+  OCL_CREATE_BUFFER(buf[0], 0, 16 * sizeof(int), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, locals[0] * sizeof(int), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, 16 * sizeof(int), NULL);
+  OCL_SET_ARG(2, sizeof(cl_mem), &buf[1]);
+
+  OCL_MAP_BUFFER(1);
+  for (uint32_t i = 0; i < locals[0]; ++i)
+      cpu_src[i] = ((int*)buf_data[1])[i] = rand() & 0xff;
+  cpu_compiler_atomic(cpu_dst, cpu_src);
+  OCL_UNMAP_BUFFER(1);
+  OCL_NDRANGE(1);
+
+  OCL_MAP_BUFFER(0);
+
+  // Check results
+  for(int i=0; i<12; i++) {
+    //printf("The dst(%d) gpu(0x%x) cpu(0x%x)\n", i, ((uint32_t *)buf_data[0])[i], cpu_dst[i]);
+    OCL_ASSERT(((int *)buf_data[0])[i] == cpu_dst[i]);
+  }
+  OCL_UNMAP_BUFFER(0);
+}
 
+MAKE_UTEST_FROM_FUNCTION(compiler_atomic_functions)
-- 
1.7.10.4



More information about the Beignet mailing list