[Beignet] [PATCH] [PATCH_V2] GBE: Improve the clEnqueueCopyBuffer performance in not-aligned case

Lv Meng meng.lv at intel.com
Wed Mar 19 23:40:51 PDT 2014


Signed-off-by: Lv Meng <meng.lv at intel.com>
---
 src/kernels/cl_internel_copy_buf_dword_copy.cl | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100755 src/kernels/cl_internel_copy_buf_dword_copy.cl

diff --git a/src/kernels/cl_internel_copy_buf_dword_copy.cl b/src/kernels/cl_internel_copy_buf_dword_copy.cl
new file mode 100755
index 0000000..55a76d0
--- /dev/null
+++ b/src/kernels/cl_internel_copy_buf_dword_copy.cl
@@ -0,0 +1,19 @@
+kernel void dword_copy(__global unsigned int*src,int srcoffset,__global unsigned int*dst,int dstalignoffset,int size){
+    unsigned int outdata = 0;
+    unsigned char lsm[8];
+    unsigned int* li = lsm;
+    int lsmoffset = srcoffset%4;
+    __global unsigned int *src_algin = src+(srcoffset/4);
+    __global unsigned int *dst_align = dst+dstalignoffset;
+    int gid = get_global_id(0);
+    if(gid<size){
+     *li = src_algin[gid];
+     if(lsmoffset){
+       *(li+1) = src_algin[gid+1];
+       outdata = (lsm[lsmoffset])|(lsm[lsmoffset+1]<<8)|(lsm[lsmoffset+2]<<16)|(lsm[lsmoffset+3]<<24);
+     }
+     else
+       outdata = *li;
+     dst_align[gid] = outdata;
+    }
+}
\ No newline at end of file
-- 
1.8.3.2



More information about the Beignet mailing list