[Beignet] [PATCH 1/5 OpenCL-2.0] libocl: Add the module for work_group functions.
junyan.he at inbox.com
junyan.he at inbox.com
Mon Apr 20 23:11:01 PDT 2015
From: Junyan He <junyan.he at linux.intel.com>
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/libocl/CMakeLists.txt | 2 +-
backend/src/libocl/include/ocl.h | 1 +
backend/src/libocl/include/ocl_work_group.h | 118 +++++++++++++++++++++++++
backend/src/libocl/src/ocl_work_group.cl | 126 +++++++++++++++++++++++++++
4 files changed, 246 insertions(+), 1 deletion(-)
create mode 100644 backend/src/libocl/include/ocl_work_group.h
create mode 100644 backend/src/libocl/src/ocl_work_group.cl
diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt
index 16f00ee..4601c54 100644
--- a/backend/src/libocl/CMakeLists.txt
+++ b/backend/src/libocl/CMakeLists.txt
@@ -52,7 +52,7 @@ FOREACH(M ${OCL_COPY_HEADERS})
COPY_THE_HEADER(${M})
ENDFOREACH(M)
-SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_misc ocl_vload ocl_geometric ocl_image)
+SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_misc ocl_vload ocl_geometric ocl_image ocl_work_group)
FOREACH(M ${OCL_COPY_MODULES})
COPY_THE_HEADER(${M})
COPY_THE_SOURCE(${M})
diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h
index e886670..30b5a85 100644
--- a/backend/src/libocl/include/ocl.h
+++ b/backend/src/libocl/include/ocl.h
@@ -36,6 +36,7 @@
#include "ocl_sync.h"
#include "ocl_vload.h"
#include "ocl_workitem.h"
+#include "ocl_work_group.h"
#pragma OPENCL EXTENSION cl_khr_fp64 : disable
#endif
diff --git a/backend/src/libocl/include/ocl_work_group.h b/backend/src/libocl/include/ocl_work_group.h
new file mode 100644
index 0000000..ebd264f
--- /dev/null
+++ b/backend/src/libocl/include/ocl_work_group.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright © 2012 - 2014 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#ifndef __OCL_WORK_GROUP_H__
+#define __OCL_WORK_GROUP_H__
+#include "ocl_types.h"
+
+int work_group_all(int predicate);
+int work_group_any(int predicate);
+
+/* broadcast */
+OVERLOADABLE int work_group_broadcast(int a, size_t local_id);
+OVERLOADABLE uint work_group_broadcast(uint a, size_t local_id);
+OVERLOADABLE long work_group_broadcast(long a, size_t local_id);
+OVERLOADABLE ulong work_group_broadcast(ulong a, size_t local_id);
+OVERLOADABLE float work_group_broadcast(float a, size_t local_id);
+OVERLOADABLE double work_group_broadcast(double a, size_t local_id);
+
+OVERLOADABLE int work_group_broadcast(int a, size_t local_id_x, size_t local_id_y);
+OVERLOADABLE uint work_group_broadcast(uint a, size_t local_id_x, size_t local_id_y);
+OVERLOADABLE long work_group_broadcast(long a, size_t local_id_x, size_t local_id_y);
+OVERLOADABLE ulong work_group_broadcast(ulong a, size_t local_id_x, size_t local_id_y);
+OVERLOADABLE float work_group_broadcast(float a, size_t local_id_x, size_t local_id_y);
+OVERLOADABLE double work_group_broadcast(double a, size_t local_id_x, size_t local_id_y);
+
+OVERLOADABLE int work_group_broadcast(int a, size_t local_id_x, size_t local_id_y, size_t local_id_z);
+OVERLOADABLE uint work_group_broadcast(uint a, size_t local_id_x, size_t local_id_y, size_t local_id_z);
+OVERLOADABLE long work_group_broadcast(long a, size_t local_id_x, size_t local_id_y, size_t local_id_z);
+OVERLOADABLE ulong work_group_broadcast(ulong a, size_t local_id_x, size_t local_id_y, size_t local_id_z);
+OVERLOADABLE float work_group_broadcast(float a, size_t local_id_x, size_t local_id_y, size_t local_id_z);
+OVERLOADABLE double work_group_broadcast(double a, size_t local_id_x, size_t local_id_y, size_t local_id_z);
+
+/* reduce add */
+OVERLOADABLE int work_group_reduce_add(int x);
+OVERLOADABLE uint work_group_reduce_add(uint x);
+OVERLOADABLE long work_group_reduce_add(long x);
+OVERLOADABLE ulong work_group_reduce_add(ulong x);
+OVERLOADABLE float work_group_reduce_add(float x);
+OVERLOADABLE double work_group_reduce_add(double x);
+
+/* reduce min */
+OVERLOADABLE int work_group_reduce_min(int x);
+OVERLOADABLE uint work_group_reduce_min(uint x);
+OVERLOADABLE long work_group_reduce_min(long x);
+OVERLOADABLE ulong work_group_reduce_min(ulong x);
+OVERLOADABLE float work_group_reduce_min(float x);
+OVERLOADABLE double work_group_reduce_min(double x);
+
+/* reduce max */
+OVERLOADABLE int work_group_reduce_max(int x);
+OVERLOADABLE uint work_group_reduce_max(uint x);
+OVERLOADABLE long work_group_reduce_max(long x);
+OVERLOADABLE ulong work_group_reduce_max(ulong x);
+OVERLOADABLE float work_group_reduce_max(float x);
+OVERLOADABLE double work_group_reduce_max(double x);
+
+/* scan_inclusive add */
+OVERLOADABLE int work_group_scan_inclusive_add(int x);
+OVERLOADABLE uint work_group_scan_inclusive_add(uint x);
+OVERLOADABLE long work_group_scan_inclusive_add(long x);
+OVERLOADABLE ulong work_group_scan_inclusive_add(ulong x);
+OVERLOADABLE float work_group_scan_inclusive_add(float x);
+OVERLOADABLE double work_group_scan_inclusive_add(double x);
+
+/* scan_inclusive min */
+OVERLOADABLE int work_group_scan_inclusive_min(int x);
+OVERLOADABLE uint work_group_scan_inclusive_min(uint x);
+OVERLOADABLE long work_group_scan_inclusive_min(long x);
+OVERLOADABLE ulong work_group_scan_inclusive_min(ulong x);
+OVERLOADABLE float work_group_scan_inclusive_min(float x);
+OVERLOADABLE double work_group_scan_inclusive_min(double x);
+
+/* scan_inclusive max */
+OVERLOADABLE int work_group_scan_inclusive_max(int x);
+OVERLOADABLE uint work_group_scan_inclusive_max(uint x);
+OVERLOADABLE long work_group_scan_inclusive_max(long x);
+OVERLOADABLE ulong work_group_scan_inclusive_max(ulong x);
+OVERLOADABLE float work_group_scan_inclusive_max(float x);
+OVERLOADABLE double work_group_scan_inclusive_max(double x);
+
+/* scan_exclusive add */
+OVERLOADABLE int work_group_scan_exclusive_add(int x);
+OVERLOADABLE uint work_group_scan_exclusive_add(uint x);
+OVERLOADABLE long work_group_scan_exclusive_add(long x);
+OVERLOADABLE ulong work_group_scan_exclusive_add(ulong x);
+OVERLOADABLE float work_group_scan_exclusive_add(float x);
+OVERLOADABLE double work_group_scan_exclusive_add(double x);
+
+/* scan_exclusive min */
+OVERLOADABLE int work_group_scan_exclusive_min(int x);
+OVERLOADABLE uint work_group_scan_exclusive_min(uint x);
+OVERLOADABLE long work_group_scan_exclusive_min(long x);
+OVERLOADABLE ulong work_group_scan_exclusive_min(ulong x);
+OVERLOADABLE float work_group_scan_exclusive_min(float x);
+OVERLOADABLE double work_group_scan_exclusive_min(double x);
+
+/* scan_exclusive max */
+OVERLOADABLE int work_group_scan_exclusive_max(int x);
+OVERLOADABLE uint work_group_scan_exclusive_max(uint x);
+OVERLOADABLE long work_group_scan_exclusive_max(long x);
+OVERLOADABLE ulong work_group_scan_exclusive_max(ulong x);
+OVERLOADABLE float work_group_scan_exclusive_max(float x);
+OVERLOADABLE double work_group_scan_exclusive_max(double x);
+#endif /* __OCL_WORK_GROUP_H__ */
diff --git a/backend/src/libocl/src/ocl_work_group.cl b/backend/src/libocl/src/ocl_work_group.cl
new file mode 100644
index 0000000..065b223
--- /dev/null
+++ b/backend/src/libocl/src/ocl_work_group.cl
@@ -0,0 +1,126 @@
+/*
+ * Copyright © 2012 - 2014 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "ocl_work_group.h"
+
+int __gen_ocl_work_group_all(int predicate);
+int work_group_all(int predicate) {
+ return __gen_ocl_work_group_all(predicate);
+}
+
+int __gen_ocl_work_group_any(int predicate);
+int work_group_any(int predicate) {
+ return __gen_ocl_work_group_any(predicate);
+}
+
+/* broadcast */
+#define BROADCAST_IMPL(GEN_TYPE) \
+ OVERLOADABLE GEN_TYPE __gen_ocl_work_group_broadcast(GEN_TYPE a, size_t local_id); \
+ OVERLOADABLE GEN_TYPE work_group_broadcast(GEN_TYPE a, size_t local_id) { \
+ return __gen_ocl_work_group_broadcast(a, local_id); \
+ } \
+ OVERLOADABLE GEN_TYPE __gen_ocl_work_group_broadcast(GEN_TYPE a, size_t local_id_x, size_t local_id_y); \
+ OVERLOADABLE GEN_TYPE work_group_broadcast(GEN_TYPE a, size_t local_id_x, size_t local_id_y) { \
+ return __gen_ocl_work_group_broadcast(a, local_id_x, local_id_y); \
+ } \
+ OVERLOADABLE GEN_TYPE __gen_ocl_work_group_broadcast(GEN_TYPE a, size_t local_id_x, size_t local_id_y, size_t local_id_z); \
+ OVERLOADABLE GEN_TYPE work_group_broadcast(GEN_TYPE a, size_t local_id_x, size_t local_id_y, size_t local_id_z) { \
+ return __gen_ocl_work_group_broadcast(a, local_id_x, local_id_y, local_id_z); \
+ }
+
+BROADCAST_IMPL(int)
+BROADCAST_IMPL(uint)
+BROADCAST_IMPL(long)
+BROADCAST_IMPL(ulong)
+BROADCAST_IMPL(float)
+BROADCAST_IMPL(double)
+#undef BROADCAST_IMPL
+
+
+#define RANGE_OP(RANGE, OP, GEN_TYPE) \
+ OVERLOADABLE GEN_TYPE __gen_ocl_work_group_##RANGE##_##OP(GEN_TYPE x); \
+ OVERLOADABLE GEN_TYPE work_group_##RANGE##_##OP(GEN_TYPE x) { \
+ return __gen_ocl_work_group_##RANGE##_##OP(x); \
+ }
+
+/* reduce add */
+RANGE_OP(reduce, add, int)
+RANGE_OP(reduce, add, uint)
+RANGE_OP(reduce, add, long)
+RANGE_OP(reduce, add, ulong)
+RANGE_OP(reduce, add, float)
+RANGE_OP(reduce, add, double)
+/* reduce min */
+RANGE_OP(reduce, min, int)
+RANGE_OP(reduce, min, uint)
+RANGE_OP(reduce, min, long)
+RANGE_OP(reduce, min, ulong)
+RANGE_OP(reduce, min, float)
+RANGE_OP(reduce, min, double)
+/* reduce max */
+RANGE_OP(reduce, max, int)
+RANGE_OP(reduce, max, uint)
+RANGE_OP(reduce, max, long)
+RANGE_OP(reduce, max, ulong)
+RANGE_OP(reduce, max, float)
+RANGE_OP(reduce, max, double)
+
+/* scan_inclusive add */
+RANGE_OP(scan_inclusive, add, int)
+RANGE_OP(scan_inclusive, add, uint)
+RANGE_OP(scan_inclusive, add, long)
+RANGE_OP(scan_inclusive, add, ulong)
+RANGE_OP(scan_inclusive, add, float)
+RANGE_OP(scan_inclusive, add, double)
+/* scan_inclusive min */
+RANGE_OP(scan_inclusive, min, int)
+RANGE_OP(scan_inclusive, min, uint)
+RANGE_OP(scan_inclusive, min, long)
+RANGE_OP(scan_inclusive, min, ulong)
+RANGE_OP(scan_inclusive, min, float)
+RANGE_OP(scan_inclusive, min, double)
+/* scan_inclusive max */
+RANGE_OP(scan_inclusive, max, int)
+RANGE_OP(scan_inclusive, max, uint)
+RANGE_OP(scan_inclusive, max, long)
+RANGE_OP(scan_inclusive, max, ulong)
+RANGE_OP(scan_inclusive, max, float)
+RANGE_OP(scan_inclusive, max, double)
+
+/* scan_exclusive add */
+RANGE_OP(scan_exclusive, add, int)
+RANGE_OP(scan_exclusive, add, uint)
+RANGE_OP(scan_exclusive, add, long)
+RANGE_OP(scan_exclusive, add, ulong)
+RANGE_OP(scan_exclusive, add, float)
+RANGE_OP(scan_exclusive, add, double)
+/* scan_exclusive min */
+RANGE_OP(scan_exclusive, min, int)
+RANGE_OP(scan_exclusive, min, uint)
+RANGE_OP(scan_exclusive, min, long)
+RANGE_OP(scan_exclusive, min, ulong)
+RANGE_OP(scan_exclusive, min, float)
+RANGE_OP(scan_exclusive, min, double)
+/* scan_exclusive max */
+RANGE_OP(scan_exclusive, max, int)
+RANGE_OP(scan_exclusive, max, uint)
+RANGE_OP(scan_exclusive, max, long)
+RANGE_OP(scan_exclusive, max, ulong)
+RANGE_OP(scan_exclusive, max, float)
+RANGE_OP(scan_exclusive, max, double)
+
+#undef RANGE_OP
--
1.7.9.5
More information about the Beignet
mailing list