[Beignet] [PATCH] [PATCH_V2] GBE: Improve the clEnqueueCopyBuffer performance in not-aligned case
Lv Meng
meng.lv at intel.com
Wed Mar 19 23:40:51 PDT 2014
Signed-off-by: Lv Meng <meng.lv at intel.com>
---
src/kernels/cl_internel_copy_buf_dword_copy.cl | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
create mode 100755 src/kernels/cl_internel_copy_buf_dword_copy.cl
diff --git a/src/kernels/cl_internel_copy_buf_dword_copy.cl b/src/kernels/cl_internel_copy_buf_dword_copy.cl
new file mode 100755
index 0000000..55a76d0
--- /dev/null
+++ b/src/kernels/cl_internel_copy_buf_dword_copy.cl
@@ -0,0 +1,19 @@
+kernel void dword_copy(__global unsigned int*src,int srcoffset,__global unsigned int*dst,int dstalignoffset,int size){
+ unsigned int outdata = 0;
+ unsigned char lsm[8];
+ unsigned int* li = lsm;
+ int lsmoffset = srcoffset%4;
+ __global unsigned int *src_algin = src+(srcoffset/4);
+ __global unsigned int *dst_align = dst+dstalignoffset;
+ int gid = get_global_id(0);
+ if(gid<size){
+ *li = src_algin[gid];
+ if(lsmoffset){
+ *(li+1) = src_algin[gid+1];
+ outdata = (lsm[lsmoffset])|(lsm[lsmoffset+1]<<8)|(lsm[lsmoffset+2]<<16)|(lsm[lsmoffset+3]<<24);
+ }
+ else
+ outdata = *li;
+ dst_align[gid] = outdata;
+ }
+}
\ No newline at end of file
--
1.8.3.2
More information about the Beignet
mailing list