[Beignet] [PATCH 1/3] [opencl-1.2] Add the kernels used by clEnqueueBufferFill API
junyan.he at inbox.com
junyan.he at inbox.com
Wed Apr 23 01:35:18 PDT 2014
From: Junyan He <junyan.he at linux.intel.com>
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
src/CMakeLists.txt | 5 ++++-
src/kernels/cl_internal_fill_buf_align128.cl | 9 +++++++++
src/kernels/cl_internal_fill_buf_align2.cl | 8 ++++++++
src/kernels/cl_internal_fill_buf_align4.cl | 8 ++++++++
src/kernels/cl_internal_fill_buf_align8.cl | 14 ++++++++++++++
src/kernels/cl_internal_fill_buf_unalign.cl | 8 ++++++++
6 files changed, 51 insertions(+), 1 deletion(-)
create mode 100644 src/kernels/cl_internal_fill_buf_align128.cl
create mode 100644 src/kernels/cl_internal_fill_buf_align2.cl
create mode 100644 src/kernels/cl_internal_fill_buf_align4.cl
create mode 100644 src/kernels/cl_internal_fill_buf_align8.cl
create mode 100644 src/kernels/cl_internal_fill_buf_unalign.cl
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 8164a44..a3bac02 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -20,7 +20,10 @@ endmacro (MakeKernelBinStr)
set (KERNEL_STR_FILES)
set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4
cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset
-cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset)
+cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset
+cl_internal_fill_buf_align8 cl_internal_fill_buf_align4
+cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign
+cl_internal_fill_buf_align128)
MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
set(OPENCL_SRC
diff --git a/src/kernels/cl_internal_fill_buf_align128.cl b/src/kernels/cl_internal_fill_buf_align128.cl
new file mode 100644
index 0000000..552820c
--- /dev/null
+++ b/src/kernels/cl_internal_fill_buf_align128.cl
@@ -0,0 +1,9 @@
+kernel void __cl_fill_region_align128 ( global float16* dst, float16 pattern0,
+ unsigned int offset, unsigned int size, float16 pattern1)
+{
+ int i = get_global_id(0);
+ if (i < size) {
+ dst[i*2+offset] = pattern0;
+ dst[i*2+offset+1] = pattern1;
+ }
+}
diff --git a/src/kernels/cl_internal_fill_buf_align2.cl b/src/kernels/cl_internal_fill_buf_align2.cl
new file mode 100644
index 0000000..0b9a4cf
--- /dev/null
+++ b/src/kernels/cl_internal_fill_buf_align2.cl
@@ -0,0 +1,8 @@
+kernel void __cl_fill_region_align2 ( global char2 * dst, char2 pattern,
+ unsigned int offset, unsigned int size)
+{
+ int i = get_global_id(0);
+ if (i < size) {
+ dst[i+offset] = pattern;
+ }
+}
diff --git a/src/kernels/cl_internal_fill_buf_align4.cl b/src/kernels/cl_internal_fill_buf_align4.cl
new file mode 100644
index 0000000..aefd92f
--- /dev/null
+++ b/src/kernels/cl_internal_fill_buf_align4.cl
@@ -0,0 +1,8 @@
+kernel void __cl_fill_region_align4 ( global float* dst, float pattern,
+ unsigned int offset, unsigned int size)
+{
+ int i = get_global_id(0);
+ if (i < size) {
+ dst[i+offset] = pattern;
+ }
+}
diff --git a/src/kernels/cl_internal_fill_buf_align8.cl b/src/kernels/cl_internal_fill_buf_align8.cl
new file mode 100644
index 0000000..edaff77
--- /dev/null
+++ b/src/kernels/cl_internal_fill_buf_align8.cl
@@ -0,0 +1,14 @@
+#define COMPILER_ABS_FUNC_N(N) \
+ kernel void __cl_fill_region_align8_##N ( global float##N* dst, float##N pattern, \
+ unsigned int offset, unsigned int size) { \
+ int i = get_global_id(0); \
+ if (i < size) { \
+ dst[i+offset] = pattern; \
+ } \
+ }
+
+
+COMPILER_ABS_FUNC_N(2)
+COMPILER_ABS_FUNC_N(4)
+COMPILER_ABS_FUNC_N(8)
+COMPILER_ABS_FUNC_N(16)
diff --git a/src/kernels/cl_internal_fill_buf_unalign.cl b/src/kernels/cl_internal_fill_buf_unalign.cl
new file mode 100644
index 0000000..90762b0
--- /dev/null
+++ b/src/kernels/cl_internal_fill_buf_unalign.cl
@@ -0,0 +1,8 @@
+kernel void __cl_fill_region_unalign ( global char * dst, char pattern,
+ unsigned int offset, unsigned int size)
+{
+ int i = get_global_id(0);
+ if (i < size) {
+ dst[i+offset] = pattern;
+ }
+}
--
1.8.3.2
More information about the Beignet
mailing list