[Beignet] [PATCH 4/4] rename __gen_ocl_simd_any/all to sub_group_any/all

Guo Yejun yejun.guo at intel.com
Tue May 12 01:28:11 PDT 2015


it is defined in https://www.khronos.org/registry/cl/extensions/intel/cl_intel_subgroups.txt

Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp |  2 ++
 backend/src/libocl/include/ocl_misc.h      |  8 ------
 backend/src/libocl/tmpl/ocl_simd.tmpl.h    |  2 ++
 backend/src/llvm/llvm_gen_backend.cpp      |  4 +--
 backend/src/llvm/llvm_gen_ocl_function.hxx |  4 +--
 kernels/compiler_simd_all.cl               | 12 ---------
 kernels/compiler_simd_any.cl               | 15 -----------
 kernels/compiler_sub_group_all.cl          | 12 +++++++++
 kernels/compiler_sub_group_any.cl          | 15 +++++++++++
 utests/CMakeLists.txt                      |  4 +--
 utests/compiler_simd_all.cpp               | 43 ------------------------------
 utests/compiler_simd_any.cpp               | 43 ------------------------------
 utests/compiler_sub_group_all.cpp          | 43 ++++++++++++++++++++++++++++++
 utests/compiler_sub_group_any.cpp          | 43 ++++++++++++++++++++++++++++++
 14 files changed, 123 insertions(+), 127 deletions(-)
 delete mode 100644 kernels/compiler_simd_all.cl
 delete mode 100644 kernels/compiler_simd_any.cl
 create mode 100644 kernels/compiler_sub_group_all.cl
 create mode 100644 kernels/compiler_sub_group_any.cl
 delete mode 100644 utests/compiler_simd_all.cpp
 delete mode 100644 utests/compiler_simd_any.cpp
 create mode 100644 utests/compiler_sub_group_all.cpp
 create mode 100644 utests/compiler_sub_group_any.cpp

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 98d8780..105983c 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2170,6 +2170,8 @@ namespace gbe
         return insnType;
       if (opcode == ir::OP_FBH || opcode == ir::OP_FBL || opcode == ir::OP_LZD)
         return ir::TYPE_U32;
+      if (opcode == ir::OP_SIMD_ANY || opcode == ir::OP_SIMD_ALL)
+        return ir::TYPE_S32;
       if (insnType == ir::TYPE_S16 || insnType == ir::TYPE_U16)
         return insnType;
       if (insnType == ir::TYPE_BOOL)
diff --git a/backend/src/libocl/include/ocl_misc.h b/backend/src/libocl/include/ocl_misc.h
index aa3f504..359025b 100644
--- a/backend/src/libocl/include/ocl_misc.h
+++ b/backend/src/libocl/include/ocl_misc.h
@@ -128,14 +128,6 @@ DEF(ulong)
 #undef DEC16
 #undef DEC16X
 
-
-/* Temp to add the SIMD functions here. */
-/////////////////////////////////////////////////////////////////////////////
-// SIMD level function
-/////////////////////////////////////////////////////////////////////////////
-short __gen_ocl_simd_any(short);
-short __gen_ocl_simd_all(short);
-
 struct time_stamp {
   // time tick
   ulong tick;
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
index 14e5750..67a1cee 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
@@ -23,6 +23,8 @@
 /////////////////////////////////////////////////////////////////////////////
 // SIMD level function
 /////////////////////////////////////////////////////////////////////////////
+int sub_group_any(int);
+int sub_group_all(int);
 
 uint get_sub_group_size(void);
 uint get_sub_group_id(void);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index f5743ba..fadc97b 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -3063,14 +3063,14 @@ namespace gbe
           {
             const ir::Register src = this->getRegister(*AI);
             const ir::Register dst = this->getRegister(&I);
-            ctx.ALU1(ir::OP_SIMD_ALL, ir::TYPE_S16, dst, src);
+            ctx.ALU1(ir::OP_SIMD_ALL, ir::TYPE_S32, dst, src);
             break;
           }
           case GEN_OCL_SIMD_ANY:
           {
             const ir::Register src = this->getRegister(*AI);
             const ir::Register dst = this->getRegister(&I);
-            ctx.ALU1(ir::OP_SIMD_ANY, ir::TYPE_S16, dst, src);
+            ctx.ALU1(ir::OP_SIMD_ANY, ir::TYPE_S32, dst, src);
             break;
           }
           case GEN_OCL_READ_TM:
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index a0e0b94..671e785 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -152,8 +152,8 @@ DECL_LLVM_GEN_FUNCTION(CONV_F16_TO_F32, __gen_ocl_f16to32)
 DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16)
 
 // SIMD level function for internal usage
-DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any)
-DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all)
+DECL_LLVM_GEN_FUNCTION(SIMD_ANY, sub_group_any)
+DECL_LLVM_GEN_FUNCTION(SIMD_ALL, sub_group_all)
 DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, get_sub_group_size)
 DECL_LLVM_GEN_FUNCTION(SIMD_ID, get_sub_group_id)
 DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE, intel_sub_group_shuffle)
diff --git a/kernels/compiler_simd_all.cl b/kernels/compiler_simd_all.cl
deleted file mode 100644
index 504710b..0000000
--- a/kernels/compiler_simd_all.cl
+++ /dev/null
@@ -1,12 +0,0 @@
-__kernel void compiler_simd_all(global int *src, global int *dst)
-{
-  int i = get_global_id(0);
-  if (i % 2 == 1) {
-    if (__gen_ocl_simd_all((src[i] < 12) && (src[i] > 0)))
-      dst[i] = 1;
-    else
-      dst[i] = 2;
-  }
-  else
-    dst[i] = 3;
-}
diff --git a/kernels/compiler_simd_any.cl b/kernels/compiler_simd_any.cl
deleted file mode 100644
index 3b04f82..0000000
--- a/kernels/compiler_simd_any.cl
+++ /dev/null
@@ -1,15 +0,0 @@
-__kernel void compiler_simd_any(global int *src, global int *dst)
-{
-  int i = get_global_id(0);
-
-  if (i % 2 == 1) {
-    if (__gen_ocl_simd_any(src[i] == 5) || __gen_ocl_simd_any(src[i] == 9))
-      dst[i] = 1;
-    else if (__gen_ocl_simd_any(src[i] == 6))
-      dst[i] = 0;
-    else
-      dst[i] = 2;
-  }
-  else
-    dst[i] = 3;
-}
diff --git a/kernels/compiler_sub_group_all.cl b/kernels/compiler_sub_group_all.cl
new file mode 100644
index 0000000..30db5bc
--- /dev/null
+++ b/kernels/compiler_sub_group_all.cl
@@ -0,0 +1,12 @@
+__kernel void compiler_sub_group_all(global int *src, global int *dst)
+{
+  int i = get_global_id(0);
+  if (i % 2 == 1) {
+    if (sub_group_all((src[i] < 12) && (src[i] > 0)))
+      dst[i] = 1;
+    else
+      dst[i] = 2;
+  }
+  else
+    dst[i] = 3;
+}
diff --git a/kernels/compiler_sub_group_any.cl b/kernels/compiler_sub_group_any.cl
new file mode 100644
index 0000000..15702db
--- /dev/null
+++ b/kernels/compiler_sub_group_any.cl
@@ -0,0 +1,15 @@
+__kernel void compiler_sub_group_any(global int *src, global int *dst)
+{
+  int i = get_global_id(0);
+
+  if (i % 2 == 1) {
+    if (sub_group_any(src[i] == 5) || sub_group_any(src[i] == 9))
+      dst[i] = 1;
+    else if (sub_group_any(src[i] == 6))
+      dst[i] = 0;
+    else
+      dst[i] = 2;
+  }
+  else
+    dst[i] = 3;
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 977e459..899b52c 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -187,8 +187,8 @@ set (utests_sources
   compiler_private_const.cpp
   compiler_private_data_overflow.cpp
   compiler_getelementptr_bitcast.cpp
-  compiler_simd_any.cpp
-  compiler_simd_all.cpp
+  compiler_sub_group_any.cpp
+  compiler_sub_group_all.cpp
   compiler_time_stamp.cpp
   compiler_double_precision.cpp
   load_program_from_gen_bin.cpp
diff --git a/utests/compiler_simd_all.cpp b/utests/compiler_simd_all.cpp
deleted file mode 100644
index 086c54f..0000000
--- a/utests/compiler_simd_all.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-#include "utest_helper.hpp"
-
-void compiler_simd_all(void)
-{
-  const size_t n = 40;
-
-  // Setup kernel and buffers
-  OCL_CREATE_KERNEL("compiler_simd_all");
-  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL);
-  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL);
-  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
-  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
-
-  globals[0] = n;
-  locals[0] = 10;
-
-  OCL_MAP_BUFFER(0);
-  for (int32_t i = 0; i < (int32_t) n; ++i)
-    ((int*)buf_data[0])[i] = i;
-  OCL_UNMAP_BUFFER(0);
-
-  // Run the kernel on GPU
-  OCL_NDRANGE(1);
-
-  // Run on CPU
-
-  // Compare
-  OCL_MAP_BUFFER(1);
-  for (int32_t i = 0; i < (int32_t) n; ++i) {
-    //printf("%d %d\n", i, ((int *)buf_data[1])[i]);
-    if (i % 2 == 1) {
-      if (i < (int32_t)locals[0])
-        OCL_ASSERT(((int *)buf_data[1])[i] == 1);
-      else
-        OCL_ASSERT(((int *)buf_data[1])[i] == 2);
-    }
-    else
-      OCL_ASSERT(((int *)buf_data[1])[i] == 3);
-  }
-  OCL_UNMAP_BUFFER(1);
-}
-
-MAKE_UTEST_FROM_FUNCTION(compiler_simd_all);
diff --git a/utests/compiler_simd_any.cpp b/utests/compiler_simd_any.cpp
deleted file mode 100644
index dcc5ef1..0000000
--- a/utests/compiler_simd_any.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-#include "utest_helper.hpp"
-
-void compiler_simd_any(void)
-{
-  const size_t n = 40;
-
-  // Setup kernel and buffers
-  OCL_CREATE_KERNEL("compiler_simd_any");
-  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL);
-  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL);
-  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
-  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
-
-  globals[0] = n;
-  locals[0] = 10;
-
-  OCL_MAP_BUFFER(0);
-  for (int32_t i = 0; i < (int32_t) n; ++i)
-    ((int*)buf_data[0])[i] = i;
-  OCL_UNMAP_BUFFER(0);
-
-  // Run the kernel on GPU
-  OCL_NDRANGE(1);
-
-  // Run on CPU
-
-  // Compare
-  OCL_MAP_BUFFER(1);
-  for (int32_t i = 0; i < (int32_t) n; ++i){
-    //printf("%d %d\n", i, ((int *)buf_data[1])[i]);
-    if (i % 2 == 1) {
-      if (i < (int32_t)locals[0])
-        OCL_ASSERT(((int *)buf_data[1])[i] == 1);
-      else
-        OCL_ASSERT(((int *)buf_data[1])[i] == 2);
-    }
-    else
-      OCL_ASSERT(((int *)buf_data[1])[i] == 3);
-  }
-  OCL_UNMAP_BUFFER(1);
-}
-
-MAKE_UTEST_FROM_FUNCTION(compiler_simd_any);
diff --git a/utests/compiler_sub_group_all.cpp b/utests/compiler_sub_group_all.cpp
new file mode 100644
index 0000000..d8e4130
--- /dev/null
+++ b/utests/compiler_sub_group_all.cpp
@@ -0,0 +1,43 @@
+#include "utest_helper.hpp"
+
+void compiler_sub_group_all(void)
+{
+  const size_t n = 40;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_sub_group_all");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+
+  globals[0] = n;
+  locals[0] = 10;
+
+  OCL_MAP_BUFFER(0);
+  for (int32_t i = 0; i < (int32_t) n; ++i)
+    ((int*)buf_data[0])[i] = i;
+  OCL_UNMAP_BUFFER(0);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Run on CPU
+
+  // Compare
+  OCL_MAP_BUFFER(1);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    //printf("%d %d\n", i, ((int *)buf_data[1])[i]);
+    if (i % 2 == 1) {
+      if (i < (int32_t)locals[0])
+        OCL_ASSERT(((int *)buf_data[1])[i] == 1);
+      else
+        OCL_ASSERT(((int *)buf_data[1])[i] == 2);
+    }
+    else
+      OCL_ASSERT(((int *)buf_data[1])[i] == 3);
+  }
+  OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_sub_group_all);
diff --git a/utests/compiler_sub_group_any.cpp b/utests/compiler_sub_group_any.cpp
new file mode 100644
index 0000000..98b1bdd
--- /dev/null
+++ b/utests/compiler_sub_group_any.cpp
@@ -0,0 +1,43 @@
+#include "utest_helper.hpp"
+
+void compiler_sub_group_any(void)
+{
+  const size_t n = 40;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_sub_group_any");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+
+  globals[0] = n;
+  locals[0] = 10;
+
+  OCL_MAP_BUFFER(0);
+  for (int32_t i = 0; i < (int32_t) n; ++i)
+    ((int*)buf_data[0])[i] = i;
+  OCL_UNMAP_BUFFER(0);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Run on CPU
+
+  // Compare
+  OCL_MAP_BUFFER(1);
+  for (int32_t i = 0; i < (int32_t) n; ++i){
+    //printf("%d %d\n", i, ((int *)buf_data[1])[i]);
+    if (i % 2 == 1) {
+      if (i < (int32_t)locals[0])
+        OCL_ASSERT(((int *)buf_data[1])[i] == 1);
+      else
+        OCL_ASSERT(((int *)buf_data[1])[i] == 2);
+    }
+    else
+      OCL_ASSERT(((int *)buf_data[1])[i] == 3);
+  }
+  OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_sub_group_any);
-- 
1.9.1



More information about the Beignet mailing list