[Beignet] [PATCH 1/2] change built-in function name from get_sub_group_size to get_max_sub_group_size

Guo Yejun yejun.guo at intel.com
Tue Dec 22 16:15:08 PST 2015


Fix bug at https://bugs.freedesktop.org/show_bug.cgi?id=93469

The fucntion is mapped to OP_SIMD_SIZE which returns the constant
SIMD width, the correct function name is get_max_sub_group_size.

contributor: Georg Kolling <georg.kolling at gmail.com>
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 backend/src/libocl/tmpl/ocl_simd.tmpl.h            |  2 +-
 backend/src/llvm/llvm_gen_ocl_function.hxx         |  2 +-
 kernels/compiler_get_max_sub_group_size.cl         |  5 ++++
 kernels/compiler_get_sub_group_id.cl               |  2 +-
 kernels/compiler_get_sub_group_size.cl             |  5 ----
 kernels/compiler_sub_group_shuffle.cl              |  4 +--
 .../cl_internal_block_motion_estimate_intel.cl     |  2 +-
 utests/CMakeLists.txt                              |  2 +-
 utests/compiler_get_max_sub_group_size.cpp         | 32 ++++++++++++++++++++++
 utests/compiler_get_sub_group_size.cpp             | 32 ----------------------
 10 files changed, 44 insertions(+), 44 deletions(-)
 create mode 100644 kernels/compiler_get_max_sub_group_size.cl
 delete mode 100644 kernels/compiler_get_sub_group_size.cl
 create mode 100644 utests/compiler_get_max_sub_group_size.cpp
 delete mode 100644 utests/compiler_get_sub_group_size.cpp

diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
index 67a1cee..4055070 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
@@ -26,7 +26,7 @@
 int sub_group_any(int);
 int sub_group_all(int);
 
-uint get_sub_group_size(void);
+uint get_max_sub_group_size(void);
 uint get_sub_group_id(void);
 
 OVERLOADABLE float intel_sub_group_shuffle(float x, uint c);
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 8023744..046e1ae 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -161,7 +161,7 @@ DECL_LLVM_GEN_FUNCTION(SAT_CONV_F16_TO_U32, _Z16convert_uint_satDh)
 // SIMD level function for internal usage
 DECL_LLVM_GEN_FUNCTION(SIMD_ANY, sub_group_any)
 DECL_LLVM_GEN_FUNCTION(SIMD_ALL, sub_group_all)
-DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, get_sub_group_size)
+DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, get_max_sub_group_size)
 DECL_LLVM_GEN_FUNCTION(SIMD_ID, get_sub_group_id)
 DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE, intel_sub_group_shuffle)
 
diff --git a/kernels/compiler_get_max_sub_group_size.cl b/kernels/compiler_get_max_sub_group_size.cl
new file mode 100644
index 0000000..8fb263b
--- /dev/null
+++ b/kernels/compiler_get_max_sub_group_size.cl
@@ -0,0 +1,5 @@
+__kernel void compiler_get_max_sub_group_size(global int *dst)
+{
+  int i = get_global_id(0);
+  dst[i] = get_max_sub_group_size();
+}
diff --git a/kernels/compiler_get_sub_group_id.cl b/kernels/compiler_get_sub_group_id.cl
index 10033ff..afaa2a6 100644
--- a/kernels/compiler_get_sub_group_id.cl
+++ b/kernels/compiler_get_sub_group_id.cl
@@ -2,7 +2,7 @@ __kernel void compiler_get_sub_group_id(global int *dst)
 {
   int i = get_global_id(0);
   if (i == 0)
-    dst[0] = get_sub_group_size();
+    dst[0] = get_max_sub_group_size();
 
   dst[i+1] = get_sub_group_id();
 }
diff --git a/kernels/compiler_get_sub_group_size.cl b/kernels/compiler_get_sub_group_size.cl
deleted file mode 100644
index 4d5e3eb..0000000
--- a/kernels/compiler_get_sub_group_size.cl
+++ /dev/null
@@ -1,5 +0,0 @@
-__kernel void compiler_get_sub_group_size(global int *dst)
-{
-  int i = get_global_id(0);
-  dst[i] = get_sub_group_size();
-}
diff --git a/kernels/compiler_sub_group_shuffle.cl b/kernels/compiler_sub_group_shuffle.cl
index 75adde3..a171faa 100644
--- a/kernels/compiler_sub_group_shuffle.cl
+++ b/kernels/compiler_sub_group_shuffle.cl
@@ -2,11 +2,11 @@ __kernel void compiler_sub_group_shuffle(global int *dst, int c)
 {
   int i = get_global_id(0);
   if (i == 0)
-    dst[0] = get_sub_group_size();
+    dst[0] = get_max_sub_group_size();
   dst++;
 
   int from = i;
-  int j = get_sub_group_size() - get_sub_group_id() - 1;
+  int j = get_max_sub_group_size() - get_sub_group_id() - 1;
   int o0 = get_sub_group_id();
   int o1 = intel_sub_group_shuffle(from, c);
   int o2 = intel_sub_group_shuffle(from, 5);
diff --git a/src/kernels/cl_internal_block_motion_estimate_intel.cl b/src/kernels/cl_internal_block_motion_estimate_intel.cl
index 1f28f4e..23c5488 100644
--- a/src/kernels/cl_internal_block_motion_estimate_intel.cl
+++ b/src/kernels/cl_internal_block_motion_estimate_intel.cl
@@ -262,7 +262,7 @@ void block_motion_estimate_intel(accelerator_intel_t accel,
   ushort res[16];
 
   uint write_back_dwx;
-  uint simd_width = get_sub_group_size();
+  uint simd_width = get_max_sub_group_size();
 
   /* In simd 8 mode, one kernel variable 'uint' map to 8 dword.
    * In simd 16 mode, one kernel variable 'uint' map to 16 dword.
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index d846b7b..2c6aea4 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -220,7 +220,7 @@ set (utests_sources
   runtime_use_host_ptr_buffer.cpp
   runtime_alloc_host_ptr_buffer.cpp
   runtime_use_host_ptr_image.cpp
-  compiler_get_sub_group_size.cpp
+  compiler_get_max_sub_group_size.cpp
   compiler_get_sub_group_id.cpp
   compiler_sub_group_shuffle.cpp
   builtin_global_linear_id.cpp
diff --git a/utests/compiler_get_max_sub_group_size.cpp b/utests/compiler_get_max_sub_group_size.cpp
new file mode 100644
index 0000000..debdf94
--- /dev/null
+++ b/utests/compiler_get_max_sub_group_size.cpp
@@ -0,0 +1,32 @@
+#include "utest_helper.hpp"
+
+void compiler_get_max_sub_group_size(void)
+{
+  const size_t n = 256;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_get_max_sub_group_size");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+  globals[0] = n;
+  locals[0] = 16;
+
+  OCL_MAP_BUFFER(0);
+  for (int32_t i = 0; i < (int32_t) n; ++i)
+    ((int*)buf_data[0])[i] = -1;
+  OCL_UNMAP_BUFFER(0);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(0);
+  int* dst = (int *)buf_data[0];
+  for (int32_t i = 0; i < (int32_t) n; ++i){
+    OCL_ASSERT(8 == dst[i] || 16 == dst[i]);
+  }
+  OCL_UNMAP_BUFFER(0);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_get_max_sub_group_size);
diff --git a/utests/compiler_get_sub_group_size.cpp b/utests/compiler_get_sub_group_size.cpp
deleted file mode 100644
index 20339d7..0000000
--- a/utests/compiler_get_sub_group_size.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#include "utest_helper.hpp"
-
-void compiler_get_sub_group_size(void)
-{
-  const size_t n = 256;
-
-  // Setup kernel and buffers
-  OCL_CREATE_KERNEL("compiler_get_sub_group_size");
-  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL);
-  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
-
-  globals[0] = n;
-  locals[0] = 16;
-
-  OCL_MAP_BUFFER(0);
-  for (int32_t i = 0; i < (int32_t) n; ++i)
-    ((int*)buf_data[0])[i] = -1;
-  OCL_UNMAP_BUFFER(0);
-
-  // Run the kernel on GPU
-  OCL_NDRANGE(1);
-
-  // Compare
-  OCL_MAP_BUFFER(0);
-  int* dst = (int *)buf_data[0];
-  for (int32_t i = 0; i < (int32_t) n; ++i){
-    OCL_ASSERT(8 == dst[i] || 16 == dst[i]);
-  }
-  OCL_UNMAP_BUFFER(0);
-}
-
-MAKE_UTEST_FROM_FUNCTION(compiler_get_sub_group_size);
-- 
1.9.1



More information about the Beignet mailing list