[Beignet] [PATCH 2/2] change built-in function name from get_sub_group_id to get_sub_group_local_id
Guo, Yejun
yejun.guo at intel.com
Wed Jan 6 16:51:32 PST 2016
ping for review, thanks.
-----Original Message-----
From: Guo, Yejun
Sent: Wednesday, December 23, 2015 8:18 AM
To: beignet at lists.freedesktop.org
Cc: Guo, Yejun
Subject: [PATCH 2/2] change built-in function name from get_sub_group_id to get_sub_group_local_id
Fix bug at https://bugs.freedesktop.org/show_bug.cgi?id=93469
The fucntion is mapped to OP_SIMD_ID which returns the SIMD lane ID.
However, the SIMD lane ID is the equivalent of get_sub_group_local_id().
contributor: Georg Kolling <georg.kolling at gmail.com>
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
backend/src/libocl/tmpl/ocl_simd.tmpl.h | 2 +-
backend/src/llvm/llvm_gen_ocl_function.hxx | 2 +-
kernels/compiler_get_sub_group_id.cl | 8 --------
kernels/compiler_get_sub_group_local_id.cl | 8 ++++++++
kernels/compiler_sub_group_shuffle.cl | 4 ++--
utests/CMakeLists.txt | 2 +-
utests/compiler_get_sub_group_id.cpp | 33 ------------------------------
utests/compiler_get_sub_group_local_id.cpp | 33 ++++++++++++++++++++++++++++++
8 files changed, 46 insertions(+), 46 deletions(-) delete mode 100644 kernels/compiler_get_sub_group_id.cl
create mode 100644 kernels/compiler_get_sub_group_local_id.cl
delete mode 100644 utests/compiler_get_sub_group_id.cpp
create mode 100644 utests/compiler_get_sub_group_local_id.cpp
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
index 4055070..9d9404b 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
@@ -27,7 +27,7 @@ int sub_group_any(int); int sub_group_all(int);
uint get_max_sub_group_size(void);
-uint get_sub_group_id(void);
+uint get_sub_group_local_id(void);
OVERLOADABLE float intel_sub_group_shuffle(float x, uint c); OVERLOADABLE int intel_sub_group_shuffle(int x, uint c); diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 046e1ae..e3d89a3 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -162,7 +162,7 @@ DECL_LLVM_GEN_FUNCTION(SAT_CONV_F16_TO_U32, _Z16convert_uint_satDh) DECL_LLVM_GEN_FUNCTION(SIMD_ANY, sub_group_any) DECL_LLVM_GEN_FUNCTION(SIMD_ALL, sub_group_all) DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, get_max_sub_group_size) -DECL_LLVM_GEN_FUNCTION(SIMD_ID, get_sub_group_id)
+DECL_LLVM_GEN_FUNCTION(SIMD_ID, get_sub_group_local_id)
DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE, intel_sub_group_shuffle)
DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm) diff --git a/kernels/compiler_get_sub_group_id.cl b/kernels/compiler_get_sub_group_id.cl
deleted file mode 100644
index afaa2a6..0000000
--- a/kernels/compiler_get_sub_group_id.cl
+++ /dev/null
@@ -1,8 +0,0 @@
-__kernel void compiler_get_sub_group_id(global int *dst) -{
- int i = get_global_id(0);
- if (i == 0)
- dst[0] = get_max_sub_group_size();
-
- dst[i+1] = get_sub_group_id();
-}
diff --git a/kernels/compiler_get_sub_group_local_id.cl b/kernels/compiler_get_sub_group_local_id.cl
new file mode 100644
index 0000000..0a28285
--- /dev/null
+++ b/kernels/compiler_get_sub_group_local_id.cl
@@ -0,0 +1,8 @@
+__kernel void compiler_get_sub_group_local_id(global int *dst) {
+ int i = get_global_id(0);
+ if (i == 0)
+ dst[0] = get_max_sub_group_size();
+
+ dst[i+1] = get_sub_group_local_id();
+}
diff --git a/kernels/compiler_sub_group_shuffle.cl b/kernels/compiler_sub_group_shuffle.cl
index a171faa..322da74 100644
--- a/kernels/compiler_sub_group_shuffle.cl
+++ b/kernels/compiler_sub_group_shuffle.cl
@@ -6,8 +6,8 @@ __kernel void compiler_sub_group_shuffle(global int *dst, int c)
dst++;
int from = i;
- int j = get_max_sub_group_size() - get_sub_group_id() - 1;
- int o0 = get_sub_group_id();
+ int j = get_max_sub_group_size() - get_sub_group_local_id() - 1; int
+ o0 = get_sub_group_local_id();
int o1 = intel_sub_group_shuffle(from, c);
int o2 = intel_sub_group_shuffle(from, 5);
int o3 = intel_sub_group_shuffle(from, j); diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 2c6aea4..db62e38 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -221,7 +221,7 @@ set (utests_sources
runtime_alloc_host_ptr_buffer.cpp
runtime_use_host_ptr_image.cpp
compiler_get_max_sub_group_size.cpp
- compiler_get_sub_group_id.cpp
+ compiler_get_sub_group_local_id.cpp
compiler_sub_group_shuffle.cpp
builtin_global_linear_id.cpp
builtin_local_linear_id.cpp
diff --git a/utests/compiler_get_sub_group_id.cpp b/utests/compiler_get_sub_group_id.cpp
deleted file mode 100644
index 0d88d29..0000000
--- a/utests/compiler_get_sub_group_id.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "utest_helper.hpp"
-
-void compiler_get_sub_group_id(void)
-{
- const size_t n = 256;
-
- // Setup kernel and buffers
- OCL_CREATE_KERNEL("compiler_get_sub_group_id");
- OCL_CREATE_BUFFER(buf[0], 0, (n+1) * sizeof(int), NULL);
- OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
-
- globals[0] = n;
- locals[0] = 16;
-
- OCL_MAP_BUFFER(0);
- for (int32_t i = 0; i < (int32_t) (n+1); ++i)
- ((int*)buf_data[0])[i] = -1;
- OCL_UNMAP_BUFFER(0);
-
- // Run the kernel on GPU
- OCL_NDRANGE(1);
-
- // Compare
- OCL_MAP_BUFFER(0);
- int* dst = (int *)buf_data[0];
- OCL_ASSERT(8 == dst[0] || 16 == dst[0]);
- for (int32_t i = 1; i < (int32_t) n; ++i){
- OCL_ASSERT((i-1) % dst[0] == dst[i]);
- }
- OCL_UNMAP_BUFFER(0);
-}
-
-MAKE_UTEST_FROM_FUNCTION(compiler_get_sub_group_id);
diff --git a/utests/compiler_get_sub_group_local_id.cpp b/utests/compiler_get_sub_group_local_id.cpp
new file mode 100644
index 0000000..2df4e9b
--- /dev/null
+++ b/utests/compiler_get_sub_group_local_id.cpp
@@ -0,0 +1,33 @@
+#include "utest_helper.hpp"
+
+void compiler_get_sub_group_local_id(void)
+{
+ const size_t n = 256;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_get_sub_group_local_id");
+ OCL_CREATE_BUFFER(buf[0], 0, (n+1) * sizeof(int), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+ globals[0] = n;
+ locals[0] = 16;
+
+ OCL_MAP_BUFFER(0);
+ for (int32_t i = 0; i < (int32_t) (n+1); ++i)
+ ((int*)buf_data[0])[i] = -1;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Compare
+ OCL_MAP_BUFFER(0);
+ int* dst = (int *)buf_data[0];
+ OCL_ASSERT(8 == dst[0] || 16 == dst[0]);
+ for (int32_t i = 1; i < (int32_t) n; ++i){
+ OCL_ASSERT((i-1) % dst[0] == dst[i]);
+ }
+ OCL_UNMAP_BUFFER(0);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_get_sub_group_local_id);
--
1.9.1
More information about the Beignet
mailing list