[Beignet] [PATCH 1/4] rename __gen_ocl_get_simd_id/size to get_sub_group_id/size

Guo Yejun yejun.guo at intel.com
Tue May 12 01:25:56 PDT 2015


Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 backend/src/libocl/tmpl/ocl_simd.tmpl.h    |  8 ++++++--
 backend/src/llvm/llvm_gen_ocl_function.hxx |  5 +++--
 kernels/compiler_get_simd_id.cl            |  8 --------
 kernels/compiler_get_simd_size.cl          |  5 -----
 kernels/compiler_get_sub_group_id.cl       |  8 ++++++++
 src/cl_command_queue_gen7.c                |  2 +-
 utests/CMakeLists.txt                      |  5 +++--
 utests/compiler_get_simd_id.cpp            | 33 ------------------------------
 utests/compiler_get_simd_size.cpp          | 32 -----------------------------
 utests/compiler_get_sub_group_id.cpp       | 33 ++++++++++++++++++++++++++++++
 utests/compiler_get_sub_group_size.cpp     | 32 +++++++++++++++++++++++++++++
 11 files changed, 86 insertions(+), 85 deletions(-)
 delete mode 100644 kernels/compiler_get_simd_id.cl
 delete mode 100644 kernels/compiler_get_simd_size.cl
 create mode 100644 kernels/compiler_get_sub_group_id.cl
 delete mode 100644 utests/compiler_get_simd_id.cpp
 delete mode 100644 utests/compiler_get_simd_size.cpp
 create mode 100644 utests/compiler_get_sub_group_id.cpp
 create mode 100644 utests/compiler_get_sub_group_size.cpp

diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
index 620e329..14e5750 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
@@ -24,5 +24,9 @@
 // SIMD level function
 /////////////////////////////////////////////////////////////////////////////
 
-uint __gen_ocl_get_simd_size(void);
-uint __gen_ocl_get_simd_id(void);
+uint get_sub_group_size(void);
+uint get_sub_group_id(void);
+
+OVERLOADABLE float intel_sub_group_shuffle(float x, uint c);
+OVERLOADABLE int intel_sub_group_shuffle(int x, uint c);
+OVERLOADABLE uint intel_sub_group_shuffle(uint x, uint c);
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index e2bffde..a0e0b94 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -154,8 +154,9 @@ DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16)
 // SIMD level function for internal usage
 DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any)
 DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all)
-DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, __gen_ocl_get_simd_size)
-DECL_LLVM_GEN_FUNCTION(SIMD_ID, __gen_ocl_get_simd_id)
+DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, get_sub_group_size)
+DECL_LLVM_GEN_FUNCTION(SIMD_ID, get_sub_group_id)
+DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE, intel_sub_group_shuffle)
 
 DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm)
 DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region)
diff --git a/kernels/compiler_get_simd_id.cl b/kernels/compiler_get_simd_id.cl
deleted file mode 100644
index dfe625a..0000000
--- a/kernels/compiler_get_simd_id.cl
+++ /dev/null
@@ -1,8 +0,0 @@
-__kernel void compiler_get_simd_id(global int *dst)
-{
-  int i = get_global_id(0);
-  if (i == 0)
-    dst[0] = __gen_ocl_get_simd_size();
-
-  dst[i+1] = __gen_ocl_get_simd_id();
-}
diff --git a/kernels/compiler_get_simd_size.cl b/kernels/compiler_get_simd_size.cl
deleted file mode 100644
index 6e303a3..0000000
--- a/kernels/compiler_get_simd_size.cl
+++ /dev/null
@@ -1,5 +0,0 @@
-__kernel void compiler_get_simd_size(global int *dst)
-{
-  int i = get_global_id(0);
-  dst[i] = __gen_ocl_get_simd_size();
-}
diff --git a/kernels/compiler_get_sub_group_id.cl b/kernels/compiler_get_sub_group_id.cl
new file mode 100644
index 0000000..10033ff
--- /dev/null
+++ b/kernels/compiler_get_sub_group_id.cl
@@ -0,0 +1,8 @@
+__kernel void compiler_get_sub_group_id(global int *dst)
+{
+  int i = get_global_id(0);
+  if (i == 0)
+    dst[0] = get_sub_group_size();
+
+  dst[i+1] = get_sub_group_id();
+}
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index e27a211..89f39b3 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -210,7 +210,7 @@ cl_curbe_fill(cl_kernel ker,
   UPLOAD(GBE_CURBE_WORK_DIM, work_dim);
 #undef UPLOAD
 
-  /* __gen_ocl_get_simd_id needs it */
+  /* get_sub_group_id needs it */
   if ((offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LANE_ID, 0)) >= 0) {
     const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);
     uint32_t *laneid = (uint32_t *) (ker->curbe + offset);
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index dcb3385..977e459 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -209,8 +209,9 @@ set (utests_sources
   vload_bench.cpp
   runtime_use_host_ptr_buffer.cpp
   runtime_alloc_host_ptr_buffer.cpp
-  compiler_get_simd_size.cpp
-  compiler_get_simd_id.cpp)
+  compiler_get_sub_group_size.cpp
+  compiler_get_sub_group_id.cpp
+  compiler_sub_group_shuffle.cpp)
 
 if (LLVM_VERSION_NODOT VERSION_GREATER 34)
   SET(utests_sources
diff --git a/utests/compiler_get_simd_id.cpp b/utests/compiler_get_simd_id.cpp
deleted file mode 100644
index ad10bf7..0000000
--- a/utests/compiler_get_simd_id.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "utest_helper.hpp"
-
-void compiler_get_simd_id(void)
-{
-  const size_t n = 256;
-
-  // Setup kernel and buffers
-  OCL_CREATE_KERNEL("compiler_get_simd_id");
-  OCL_CREATE_BUFFER(buf[0], 0, (n+1) * sizeof(int), NULL);
-  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
-
-  globals[0] = n;
-  locals[0] = 16;
-
-  OCL_MAP_BUFFER(0);
-  for (int32_t i = 0; i < (int32_t) (n+1); ++i)
-    ((int*)buf_data[0])[i] = -1;
-  OCL_UNMAP_BUFFER(0);
-
-  // Run the kernel on GPU
-  OCL_NDRANGE(1);
-
-  // Compare
-  OCL_MAP_BUFFER(0);
-  int* dst = (int *)buf_data[0];
-  OCL_ASSERT(8 == dst[0] || 16 == dst[0]);
-  for (int32_t i = 1; i < (int32_t) n; ++i){
-    OCL_ASSERT((i-1) % dst[0] == dst[i]);
-  }
-  OCL_UNMAP_BUFFER(0);
-}
-
-MAKE_UTEST_FROM_FUNCTION(compiler_get_simd_id);
diff --git a/utests/compiler_get_simd_size.cpp b/utests/compiler_get_simd_size.cpp
deleted file mode 100644
index ea70cd9..0000000
--- a/utests/compiler_get_simd_size.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#include "utest_helper.hpp"
-
-void compiler_get_simd_size(void)
-{
-  const size_t n = 256;
-
-  // Setup kernel and buffers
-  OCL_CREATE_KERNEL("compiler_get_simd_size");
-  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL);
-  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
-
-  globals[0] = n;
-  locals[0] = 16;
-
-  OCL_MAP_BUFFER(0);
-  for (int32_t i = 0; i < (int32_t) n; ++i)
-    ((int*)buf_data[0])[i] = -1;
-  OCL_UNMAP_BUFFER(0);
-
-  // Run the kernel on GPU
-  OCL_NDRANGE(1);
-
-  // Compare
-  OCL_MAP_BUFFER(0);
-  int* dst = (int *)buf_data[0];
-  for (int32_t i = 0; i < (int32_t) n; ++i){
-    OCL_ASSERT(8 == dst[i] || 16 == dst[i]);
-  }
-  OCL_UNMAP_BUFFER(0);
-}
-
-MAKE_UTEST_FROM_FUNCTION(compiler_get_simd_size);
diff --git a/utests/compiler_get_sub_group_id.cpp b/utests/compiler_get_sub_group_id.cpp
new file mode 100644
index 0000000..0d88d29
--- /dev/null
+++ b/utests/compiler_get_sub_group_id.cpp
@@ -0,0 +1,33 @@
+#include "utest_helper.hpp"
+
+void compiler_get_sub_group_id(void)
+{
+  const size_t n = 256;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_get_sub_group_id");
+  OCL_CREATE_BUFFER(buf[0], 0, (n+1) * sizeof(int), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+  globals[0] = n;
+  locals[0] = 16;
+
+  OCL_MAP_BUFFER(0);
+  for (int32_t i = 0; i < (int32_t) (n+1); ++i)
+    ((int*)buf_data[0])[i] = -1;
+  OCL_UNMAP_BUFFER(0);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(0);
+  int* dst = (int *)buf_data[0];
+  OCL_ASSERT(8 == dst[0] || 16 == dst[0]);
+  for (int32_t i = 1; i < (int32_t) n; ++i){
+    OCL_ASSERT((i-1) % dst[0] == dst[i]);
+  }
+  OCL_UNMAP_BUFFER(0);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_get_sub_group_id);
diff --git a/utests/compiler_get_sub_group_size.cpp b/utests/compiler_get_sub_group_size.cpp
new file mode 100644
index 0000000..20339d7
--- /dev/null
+++ b/utests/compiler_get_sub_group_size.cpp
@@ -0,0 +1,32 @@
+#include "utest_helper.hpp"
+
+void compiler_get_sub_group_size(void)
+{
+  const size_t n = 256;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_get_sub_group_size");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+  globals[0] = n;
+  locals[0] = 16;
+
+  OCL_MAP_BUFFER(0);
+  for (int32_t i = 0; i < (int32_t) n; ++i)
+    ((int*)buf_data[0])[i] = -1;
+  OCL_UNMAP_BUFFER(0);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(0);
+  int* dst = (int *)buf_data[0];
+  for (int32_t i = 0; i < (int32_t) n; ++i){
+    OCL_ASSERT(8 == dst[i] || 16 == dst[i]);
+  }
+  OCL_UNMAP_BUFFER(0);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_get_sub_group_size);
-- 
1.9.1



More information about the Beignet mailing list