[Beignet] [PATCH 02/10] OCL20: add device enqueue builtins.
Yang Rong
rong.r.yang at intel.com
Thu Mar 17 10:53:50 UTC 2016
Add three helper function calls for it. Store the ndrange info to stack,
and write the device enqueue infos to the auxiliary global buffer.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
backend/src/backend/program.cpp | 1 +
backend/src/libocl/CMakeLists.txt | 4 +-
backend/src/libocl/include/ocl.h | 1 +
backend/src/libocl/include/ocl_enqueue.h | 67 +++++++++++++
backend/src/libocl/src/ocl_enqueue.cl | 156 +++++++++++++++++++++++++++++++
5 files changed, 227 insertions(+), 2 deletions(-)
create mode 100644 backend/src/libocl/include/ocl_enqueue.h
create mode 100644 backend/src/libocl/src/ocl_enqueue.cl
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 0119670..1580fe8 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -546,6 +546,7 @@ namespace gbe {
// FIXME we haven't implement those builtin functions,
// so disable it currently.
args.push_back("-fno-builtin");
+ args.push_back("-fblocks");
args.push_back("-disable-llvm-optzns");
if(bFastMath)
args.push_back("-D __FAST_RELAXED_MATH__=1");
diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt
index d7ed841..3b0d5f8 100644
--- a/backend/src/libocl/CMakeLists.txt
+++ b/backend/src/libocl/CMakeLists.txt
@@ -53,7 +53,7 @@ FOREACH(M ${OCL_COPY_HEADERS})
ENDFOREACH(M)
SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_memcpy
- ocl_memset ocl_misc ocl_vload ocl_geometric ocl_image ocl_work_group ocl_pipe)
+ ocl_memset ocl_misc ocl_vload ocl_geometric ocl_image ocl_work_group ocl_pipe ocl_enqueue)
FOREACH(M ${OCL_COPY_MODULES})
COPY_THE_HEADER(${M})
COPY_THE_SOURCE(${M})
@@ -130,7 +130,7 @@ FOREACH(M ${OCL_BASH_GENERATED_MODULES})
ENDFOREACH(M)
-SET (CLANG_OCL_FLAGS -fno-builtin -ffp-contract=off -cl-kernel-arg-info -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND "-cl-std=CL2.0")
+SET (CLANG_OCL_FLAGS -fno-builtin -ffp-contract=off -cl-kernel-arg-info -fblocks -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND "-cl-std=CL2.0")
MACRO(ADD_CL_TO_BC_TARGET _file)
# CMake seems can not add pattern rule, use MACRO to replace.
STRING(REGEX REPLACE "${LIBOCL_BINARY_DIR}/src/\(o.*\)\\.cl" "${OCL_OBJECT_DIR}/\\1.bc" output_name ${_file})
diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h
index e2918c6..852a523 100644
--- a/backend/src/libocl/include/ocl.h
+++ b/backend/src/libocl/include/ocl.h
@@ -41,6 +41,7 @@
#include "ocl_simd.h"
#include "ocl_work_group.h"
#include "ocl_pipe.h"
+#include "ocl_enqueue.h"
#pragma OPENCL EXTENSION cl_khr_fp64 : disable
#pragma OPENCL EXTENSION cl_khr_fp16 : disable
#endif
diff --git a/backend/src/libocl/include/ocl_enqueue.h b/backend/src/libocl/include/ocl_enqueue.h
new file mode 100644
index 0000000..a578846
--- /dev/null
+++ b/backend/src/libocl/include/ocl_enqueue.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright © 2012 - 2014 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef __OCL_ENQUEUE_H__
+#define __OCL_ENQUEUE_H__
+
+#include "ocl_types.h"
+#define CLK_ENQUEUE_FLAGS_WAIT_KERNEL 0
+#define CLK_SUCCESS 0
+
+struct ndrange_info_t {
+ int type;
+ int global_work_size[3];
+ int local_work_size[3];
+ int global_work_offset[3];
+};
+
+struct Block_literal {
+ void *isa; // initialized to &_NSConcreteStackBlock or &_NSConcreteGlobalBlock
+ int flags;
+ int reserved;
+ __global void (*invoke)(void *, ...);
+ struct Block_descriptor_1 {
+ unsigned long int reserved; // NULL
+ unsigned long int size; // sizeof(struct Block_literal_1)
+ // optional helper functions
+ void (*copy_helper)(void *dst, void *src); // IFF (1<<25)
+ void (*dispose_helper)(void *src); // IFF (1<<25)
+ // required ABI.2010.3.16
+ const char *signature; // IFF (1<<30)
+ } *descriptor;
+ // imported variables
+};
+
+int enqueue_kernel(queue_t q, int flag, ndrange_t ndrange, void (^block)(void));
+
+queue_t get_default_queue(void);
+int __gen_enqueue_kernel(queue_t q, int flag, ndrange_t ndrange, void (^block)(void), int size);
+
+OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size);
+OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size, size_t local_work_size);
+OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_offset, size_t global_work_size, size_t local_work_size);
+
+OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_size[2]);
+OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_size[2], size_t local_work_size[2]);
+OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_offset[2], size_t global_work_size[2], size_t local_work_size[2]);
+
+OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_size[3]);
+OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_size[3], size_t local_work_size[3]);
+OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_offset[3], size_t global_work_size[3], size_t local_work_size[3]);
+
+#endif
diff --git a/backend/src/libocl/src/ocl_enqueue.cl b/backend/src/libocl/src/ocl_enqueue.cl
new file mode 100644
index 0000000..78c39c4
--- /dev/null
+++ b/backend/src/libocl/src/ocl_enqueue.cl
@@ -0,0 +1,156 @@
+/*
+ * Copyright © 2012 - 2014 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "ocl_types.h"
+#include "ocl_enqueue.h"
+#include "ocl_workitem.h"
+#include "ocl_atom.h"
+
+queue_t get_default_queue(void)
+{
+ queue_t queue;
+ return queue; //return NULL queue
+}
+
+ndrange_t __gen_ocl_set_ndrange_info(__private struct ndrange_info_t *info);
+__private struct ndrange_info_t* __gen_ocl_get_ndrange_info(ndrange_t info);
+__global int* __gen_ocl_get_enqueue_info_addr(void);
+
+int enqueue_kernel(queue_t q, int flag, ndrange_t ndrange, void (^block)(void))
+{
+ int i;
+ struct Block_literal *literal = (struct Block_literal *)block;
+ uchar *data = (uchar *)block;
+ int size = literal->descriptor->size;
+ __global int* start_addr = __gen_ocl_get_enqueue_info_addr();
+ int offset = atomic_add(start_addr, size + sizeof(struct ndrange_info_t));
+ __global uchar* addr = (__global uchar*)start_addr + offset + sizeof(int);
+ __private struct ndrange_info_t *info = __gen_ocl_get_ndrange_info(ndrange);
+
+ *((__global struct ndrange_info_t *)addr) = *info;
+ addr += sizeof(*info);
+
+ for(i=0; i< size; i++) {
+ addr[i] = data[i];
+ }
+ return 0;
+}
+
+OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size)
+{
+ struct ndrange_info_t info;
+ info.type = 0x1;
+ info.global_work_size[0] = global_work_size;
+ return __gen_ocl_set_ndrange_info(&info);
+ //return ndrange;
+}
+
+OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size, size_t local_work_size)
+{
+ struct ndrange_info_t info;
+ info.type = 0x2;
+ info.global_work_size[0] = global_work_size;
+ info.local_work_size[0] = local_work_size;
+ return __gen_ocl_set_ndrange_info(&info);
+ // return ndrange;
+}
+
+
+OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_offset, size_t global_work_size, size_t local_work_size)
+{
+ struct ndrange_info_t info;
+ info.type = 0x3;
+ info.global_work_size[0] = global_work_size;
+ info.local_work_size[0] = local_work_size;
+ info.global_work_offset[0] = global_work_offset;
+ return __gen_ocl_set_ndrange_info(&info);
+ //return ndrange;
+}
+
+OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_size[2])
+{
+ struct ndrange_info_t info;
+ info.type = 0x11;
+ info.global_work_size[0] = global_work_size[0];
+ info.global_work_size[1] = global_work_size[1];
+ return __gen_ocl_set_ndrange_info(&info);
+ //return ndrange;
+}
+
+OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_size[2], size_t local_work_size[2])
+{
+ struct ndrange_info_t info;
+ info.type = 0x12;
+ info.global_work_size[0] = global_work_size[0];
+ info.global_work_size[1] = global_work_size[1];
+ info.local_work_size[0] = local_work_size[0];
+ info.local_work_size[1] = local_work_size[1];
+ return __gen_ocl_set_ndrange_info(&info);
+}
+
+
+OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_offset[2], size_t global_work_size[2], size_t local_work_size[2])
+{
+ struct ndrange_info_t info;
+ info.type = 0x13;
+ info.global_work_size[0] = global_work_size[0];
+ info.global_work_size[1] = global_work_size[1];
+ info.local_work_size[0] = local_work_size[0];
+ info.local_work_size[1] = local_work_size[1];
+ info.global_work_offset[0] = global_work_offset[0];
+ info.global_work_offset[1] = global_work_offset[1];
+ return __gen_ocl_set_ndrange_info(&info);
+}
+
+OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_size[3])
+{
+ struct ndrange_info_t info;
+ info.type = 0x21;
+ info.global_work_size[0] = global_work_size[0];
+ info.global_work_size[1] = global_work_size[1];
+ info.global_work_size[2] = global_work_size[2];
+ return __gen_ocl_set_ndrange_info(&info);
+}
+
+OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_size[3], size_t local_work_size[3])
+{
+ struct ndrange_info_t info;
+ info.type = 0x22;
+ info.global_work_size[0] = global_work_size[0];
+ info.global_work_size[1] = global_work_size[1];
+ info.global_work_size[2] = global_work_size[2];
+ info.local_work_size[0] = local_work_size[0];
+ info.local_work_size[1] = local_work_size[1];
+ info.local_work_size[2] = local_work_size[2];
+ return __gen_ocl_set_ndrange_info(&info);
+}
+
+OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_offset[3], size_t global_work_size[3], size_t local_work_size[3])
+{
+ struct ndrange_info_t info;
+ info.type = 0x23;
+ info.global_work_size[0] = global_work_size[0];
+ info.global_work_size[1] = global_work_size[1];
+ info.global_work_size[2] = global_work_size[2];
+ info.local_work_size[0] = local_work_size[0];
+ info.local_work_size[1] = local_work_size[1];
+ info.local_work_size[2] = local_work_size[2];
+ info.global_work_offset[0] = global_work_offset[0];
+ info.global_work_offset[1] = global_work_offset[1];
+ info.global_work_offset[2] = global_work_offset[2];
+ return __gen_ocl_set_ndrange_info(&info);
+}
--
1.9.1
More information about the Beignet
mailing list