[Beignet] [PATCH 17/18] Add memcpy and memset function into the linkage
junyan.he at inbox.com
junyan.he at inbox.com
Tue Aug 12 00:33:05 PDT 2014
From: Junyan He <junyan.he at linux.intel.com>
Because the memcpy and memset function suite will be
lost during the module linkage, we need to add it
obviously into the module linkage to avoid lost.
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/libocl/Makefile.in | 2 +-
backend/src/libocl/lib/ocl_memcpy.ll | 336 +++++++++++++++++++++++++++++++++
backend/src/libocl/lib/ocl_memset.ll | 127 +++++++++++++
backend/src/llvm/llvm_bitcode_link.cpp | 15 ++
backend/src/ocl_memcpy.ll | 336 ---------------------------------
backend/src/ocl_memset.ll | 127 -------------
6 files changed, 479 insertions(+), 464 deletions(-)
create mode 100644 backend/src/libocl/lib/ocl_memcpy.ll
create mode 100644 backend/src/libocl/lib/ocl_memset.ll
delete mode 100644 backend/src/ocl_memcpy.ll
delete mode 100644 backend/src/ocl_memset.ll
diff --git a/backend/src/libocl/Makefile.in b/backend/src/libocl/Makefile.in
index 2b9de51..47a92f2 100644
--- a/backend/src/libocl/Makefile.in
+++ b/backend/src/libocl/Makefile.in
@@ -8,7 +8,7 @@ GENERATED_HEADERS=ocl_defines.h ocl_as.h ocl_convert.h ocl_common.h ocl_relation
GENERATED_CL_SRCS=$(addprefix lib/, $(GENERATED_FILES))
GENERATED_CL_HEADERS=$(addprefix include/, $(GENERATED_HEADERS))
CL_FILE_NAMES=ocl_workitem.cl ocl_atom.cl ocl_async.cl ocl_sync.cl ocl_misc.cl ocl_vload.cl ocl_geometric.cl ocl_image.cl $(GENERATED_FILES)
-LL_FILE_NAMES=ocl_barrier.ll
+LL_FILE_NAMES=ocl_barrier.ll ocl_memcpy.ll ocl_memset.ll
CL_SRCS=$(addprefix lib/, $(CL_FILE_NAMES))
LL_SRCS=$(addprefix lib/, $(LL_FILE_NAMES))
CL_BITCODES=$(patsubst %.cl, %.bc, $(CL_SRCS))
diff --git a/backend/src/libocl/lib/ocl_memcpy.ll b/backend/src/libocl/lib/ocl_memcpy.ll
new file mode 100644
index 0000000..476033e
--- /dev/null
+++ b/backend/src/libocl/lib/ocl_memcpy.ll
@@ -0,0 +1,336 @@
+;The memcpy's source code.
+; INLINE_OVERLOADABLE void __gen_memcpy(uchar* dst, uchar* src, size_t size) {
+; size_t index = 0;
+; while((index + 4) >= size) {
+; *((uint *)(dst + index)) = *((uint *)(src + index));
+; index += 4;
+; }
+; while(index < size) {
+; dst[index] = src[index];
+; index++;
+; }
+; }
+
+define void @__gen_memcpy_gg(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
+ %1 = load i32 addrspace(1)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
+ store i32 %1, i32 addrspace(1)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1
+ %3 = load i8 addrspace(1)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
+
+define void @__gen_memcpy_gp(i8 addrspace(1)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)*
+ %1 = load i32 addrspace(0)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
+ store i32 %1, i32 addrspace(1)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1
+ %3 = load i8 addrspace(0)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
+
+define void @__gen_memcpy_gl(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
+ %1 = load i32 addrspace(3)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
+ store i32 %1, i32 addrspace(1)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1
+ %3 = load i8 addrspace(3)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
+
+define void @__gen_memcpy_pg(i8 addrspace(0)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
+ %1 = load i32 addrspace(1)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
+ store i32 %1, i32 addrspace(0)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1
+ %3 = load i8 addrspace(1)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
+
+define void @__gen_memcpy_pp(i8 addrspace(0)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)*
+ %1 = load i32 addrspace(0)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
+ store i32 %1, i32 addrspace(0)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1
+ %3 = load i8 addrspace(0)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
+
+define void @__gen_memcpy_pl(i8 addrspace(0)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
+ %1 = load i32 addrspace(3)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
+ store i32 %1, i32 addrspace(0)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1
+ %3 = load i8 addrspace(3)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
+
+define void @__gen_memcpy_lg(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
+ %1 = load i32 addrspace(1)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
+ store i32 %1, i32 addrspace(3)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1
+ %3 = load i8 addrspace(1)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
+
+define void @__gen_memcpy_lp(i8 addrspace(3)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)*
+ %1 = load i32 addrspace(0)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
+ store i32 %1, i32 addrspace(3)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1
+ %3 = load i8 addrspace(0)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
+
+define void @__gen_memcpy_ll(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
+ %1 = load i32 addrspace(3)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
+ store i32 %1, i32 addrspace(3)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1
+ %3 = load i8 addrspace(3)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
diff --git a/backend/src/libocl/lib/ocl_memset.ll b/backend/src/libocl/lib/ocl_memset.ll
new file mode 100644
index 0000000..addf9f5
--- /dev/null
+++ b/backend/src/libocl/lib/ocl_memset.ll
@@ -0,0 +1,127 @@
+;The memset's source code.
+; INLINE_OVERLOADABLE void __gen_memset(uchar* dst, uchar val, size_t size) {
+; size_t index = 0;
+; uint v = (val << 24) | (val << 16) | (val << 8) | val;
+; while((index + 4) >= size) {
+; *((uint *)(dst + index)) = v;
+; index += 4;
+; }
+; while(index < size) {
+; dst[index] = val;
+; index++;
+; }
+; }
+
+define void @__gen_memset_p(i8* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline {
+entry:
+ %conv = zext i8 %val to i32
+ %shl = shl nuw i32 %conv, 24
+ %shl2 = shl nuw nsw i32 %conv, 16
+ %or = or i32 %shl, %shl2
+ %shl4 = shl nuw nsw i32 %conv, 8
+ %or5 = or i32 %or, %shl4
+ %or7 = or i32 %or5, %conv
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond10, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8* %dst, i32 %index.0
+ %0 = bitcast i8* %add.ptr to i32*
+ store i32 %or7, i32* %0, align 4
+ br label %while.cond
+
+while.cond10: ; preds = %while.cond, %while.body13
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ]
+ %cmp11 = icmp ult i32 %index.1, %size
+ br i1 %cmp11, label %while.body13, label %while.end14
+
+while.body13: ; preds = %while.cond10
+ %arrayidx = getelementptr inbounds i8* %dst, i32 %index.1
+ store i8 %val, i8* %arrayidx, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond10
+
+while.end14: ; preds = %while.cond10
+ ret void
+}
+
+define void @__gen_memset_g(i8 addrspace(1)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline {
+entry:
+ %conv = zext i8 %val to i32
+ %shl = shl nuw i32 %conv, 24
+ %shl2 = shl nuw nsw i32 %conv, 16
+ %or = or i32 %shl, %shl2
+ %shl4 = shl nuw nsw i32 %conv, 8
+ %or5 = or i32 %or, %shl4
+ %or7 = or i32 %or5, %conv
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond10, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+ %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
+ store i32 %or7, i32 addrspace(1)* %0, align 4
+ br label %while.cond
+
+while.cond10: ; preds = %while.cond, %while.body13
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ]
+ %cmp11 = icmp ult i32 %index.1, %size
+ br i1 %cmp11, label %while.body13, label %while.end14
+
+while.body13: ; preds = %while.cond10
+ %arrayidx = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+ store i8 %val, i8 addrspace(1)* %arrayidx, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond10
+
+while.end14: ; preds = %while.cond10
+ ret void
+}
+
+define void @__gen_memset_l(i8 addrspace(3)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline {
+entry:
+ %conv = zext i8 %val to i32
+ %shl = shl nuw i32 %conv, 24
+ %shl2 = shl nuw nsw i32 %conv, 16
+ %or = or i32 %shl, %shl2
+ %shl4 = shl nuw nsw i32 %conv, 8
+ %or5 = or i32 %or, %shl4
+ %or7 = or i32 %or5, %conv
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond10, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+ %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
+ store i32 %or7, i32 addrspace(3)* %0, align 4
+ br label %while.cond
+
+while.cond10: ; preds = %while.cond, %while.body13
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ]
+ %cmp11 = icmp ult i32 %index.1, %size
+ br i1 %cmp11, label %while.body13, label %while.end14
+
+while.body13: ; preds = %while.cond10
+ %arrayidx = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+ store i8 %val, i8 addrspace(3)* %arrayidx, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond10
+
+while.end14: ; preds = %while.cond10
+ ret void
+}
diff --git a/backend/src/llvm/llvm_bitcode_link.cpp b/backend/src/llvm/llvm_bitcode_link.cpp
index 90ab4e1..615bbcb 100644
--- a/backend/src/llvm/llvm_bitcode_link.cpp
+++ b/backend/src/llvm/llvm_bitcode_link.cpp
@@ -99,6 +99,21 @@ namespace gbe
return NULL;
}
+ /* Add the memset and memcpy functions here. */
+ kernels.push_back("__gen_memcpy_gg");
+ kernels.push_back("__gen_memcpy_gp");
+ kernels.push_back("__gen_memcpy_gl");
+ kernels.push_back("__gen_memcpy_pg");
+ kernels.push_back("__gen_memcpy_pp");
+ kernels.push_back("__gen_memcpy_pl");
+ kernels.push_back("__gen_memcpy_lg");
+ kernels.push_back("__gen_memcpy_lp");
+ kernels.push_back("__gen_memcpy_ll");
+
+ kernels.push_back("__gen_memset_p");
+ kernels.push_back("__gen_memset_g");
+ kernels.push_back("__gen_memset_l");
+
Module* clonedLib = createOclBitCodeModule(ctx);
assert(clonedLib && "Can not create the beignet bitcode\n");
diff --git a/backend/src/ocl_memcpy.ll b/backend/src/ocl_memcpy.ll
deleted file mode 100644
index 476033e..0000000
--- a/backend/src/ocl_memcpy.ll
+++ /dev/null
@@ -1,336 +0,0 @@
-;The memcpy's source code.
-; INLINE_OVERLOADABLE void __gen_memcpy(uchar* dst, uchar* src, size_t size) {
-; size_t index = 0;
-; while((index + 4) >= size) {
-; *((uint *)(dst + index)) = *((uint *)(src + index));
-; index += 4;
-; }
-; while(index < size) {
-; dst[index] = src[index];
-; index++;
-; }
-; }
-
-define void @__gen_memcpy_gg(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
-entry:
- br label %while.cond
-
-while.cond: ; preds = %while.body, %entry
- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
- %add = add i32 %index.0, 4
- %cmp = icmp ult i32 %add, %size
- br i1 %cmp, label %while.cond3, label %while.body
-
-while.body: ; preds = %while.cond
- %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0
- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
- %1 = load i32 addrspace(1)* %0, align 4
- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
- store i32 %1, i32 addrspace(1)* %2, align 4
- br label %while.cond
-
-while.cond3: ; preds = %while.cond, %while.body5
- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
- %cmp4 = icmp ult i32 %index.1, %size
- br i1 %cmp4, label %while.body5, label %while.end7
-
-while.body5: ; preds = %while.cond3
- %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1
- %3 = load i8 addrspace(1)* %arrayidx, align 1
- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
- %inc = add i32 %index.1, 1
- br label %while.cond3
-
-while.end7: ; preds = %while.cond3
- ret void
-}
-
-define void @__gen_memcpy_gp(i8 addrspace(1)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
-entry:
- br label %while.cond
-
-while.cond: ; preds = %while.body, %entry
- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
- %add = add i32 %index.0, 4
- %cmp = icmp ult i32 %add, %size
- br i1 %cmp, label %while.cond3, label %while.body
-
-while.body: ; preds = %while.cond
- %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0
- %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)*
- %1 = load i32 addrspace(0)* %0, align 4
- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
- store i32 %1, i32 addrspace(1)* %2, align 4
- br label %while.cond
-
-while.cond3: ; preds = %while.cond, %while.body5
- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
- %cmp4 = icmp ult i32 %index.1, %size
- br i1 %cmp4, label %while.body5, label %while.end7
-
-while.body5: ; preds = %while.cond3
- %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1
- %3 = load i8 addrspace(0)* %arrayidx, align 1
- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
- %inc = add i32 %index.1, 1
- br label %while.cond3
-
-while.end7: ; preds = %while.cond3
- ret void
-}
-
-define void @__gen_memcpy_gl(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
-entry:
- br label %while.cond
-
-while.cond: ; preds = %while.body, %entry
- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
- %add = add i32 %index.0, 4
- %cmp = icmp ult i32 %add, %size
- br i1 %cmp, label %while.cond3, label %while.body
-
-while.body: ; preds = %while.cond
- %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0
- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
- %1 = load i32 addrspace(3)* %0, align 4
- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
- store i32 %1, i32 addrspace(1)* %2, align 4
- br label %while.cond
-
-while.cond3: ; preds = %while.cond, %while.body5
- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
- %cmp4 = icmp ult i32 %index.1, %size
- br i1 %cmp4, label %while.body5, label %while.end7
-
-while.body5: ; preds = %while.cond3
- %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1
- %3 = load i8 addrspace(3)* %arrayidx, align 1
- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
- %inc = add i32 %index.1, 1
- br label %while.cond3
-
-while.end7: ; preds = %while.cond3
- ret void
-}
-
-define void @__gen_memcpy_pg(i8 addrspace(0)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
-entry:
- br label %while.cond
-
-while.cond: ; preds = %while.body, %entry
- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
- %add = add i32 %index.0, 4
- %cmp = icmp ult i32 %add, %size
- br i1 %cmp, label %while.cond3, label %while.body
-
-while.body: ; preds = %while.cond
- %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0
- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
- %1 = load i32 addrspace(1)* %0, align 4
- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
- store i32 %1, i32 addrspace(0)* %2, align 4
- br label %while.cond
-
-while.cond3: ; preds = %while.cond, %while.body5
- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
- %cmp4 = icmp ult i32 %index.1, %size
- br i1 %cmp4, label %while.body5, label %while.end7
-
-while.body5: ; preds = %while.cond3
- %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1
- %3 = load i8 addrspace(1)* %arrayidx, align 1
- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
- %inc = add i32 %index.1, 1
- br label %while.cond3
-
-while.end7: ; preds = %while.cond3
- ret void
-}
-
-define void @__gen_memcpy_pp(i8 addrspace(0)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
-entry:
- br label %while.cond
-
-while.cond: ; preds = %while.body, %entry
- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
- %add = add i32 %index.0, 4
- %cmp = icmp ult i32 %add, %size
- br i1 %cmp, label %while.cond3, label %while.body
-
-while.body: ; preds = %while.cond
- %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0
- %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)*
- %1 = load i32 addrspace(0)* %0, align 4
- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
- store i32 %1, i32 addrspace(0)* %2, align 4
- br label %while.cond
-
-while.cond3: ; preds = %while.cond, %while.body5
- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
- %cmp4 = icmp ult i32 %index.1, %size
- br i1 %cmp4, label %while.body5, label %while.end7
-
-while.body5: ; preds = %while.cond3
- %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1
- %3 = load i8 addrspace(0)* %arrayidx, align 1
- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
- %inc = add i32 %index.1, 1
- br label %while.cond3
-
-while.end7: ; preds = %while.cond3
- ret void
-}
-
-define void @__gen_memcpy_pl(i8 addrspace(0)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
-entry:
- br label %while.cond
-
-while.cond: ; preds = %while.body, %entry
- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
- %add = add i32 %index.0, 4
- %cmp = icmp ult i32 %add, %size
- br i1 %cmp, label %while.cond3, label %while.body
-
-while.body: ; preds = %while.cond
- %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0
- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
- %1 = load i32 addrspace(3)* %0, align 4
- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
- store i32 %1, i32 addrspace(0)* %2, align 4
- br label %while.cond
-
-while.cond3: ; preds = %while.cond, %while.body5
- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
- %cmp4 = icmp ult i32 %index.1, %size
- br i1 %cmp4, label %while.body5, label %while.end7
-
-while.body5: ; preds = %while.cond3
- %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1
- %3 = load i8 addrspace(3)* %arrayidx, align 1
- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
- %inc = add i32 %index.1, 1
- br label %while.cond3
-
-while.end7: ; preds = %while.cond3
- ret void
-}
-
-define void @__gen_memcpy_lg(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
-entry:
- br label %while.cond
-
-while.cond: ; preds = %while.body, %entry
- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
- %add = add i32 %index.0, 4
- %cmp = icmp ult i32 %add, %size
- br i1 %cmp, label %while.cond3, label %while.body
-
-while.body: ; preds = %while.cond
- %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0
- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
- %1 = load i32 addrspace(1)* %0, align 4
- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
- store i32 %1, i32 addrspace(3)* %2, align 4
- br label %while.cond
-
-while.cond3: ; preds = %while.cond, %while.body5
- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
- %cmp4 = icmp ult i32 %index.1, %size
- br i1 %cmp4, label %while.body5, label %while.end7
-
-while.body5: ; preds = %while.cond3
- %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1
- %3 = load i8 addrspace(1)* %arrayidx, align 1
- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
- %inc = add i32 %index.1, 1
- br label %while.cond3
-
-while.end7: ; preds = %while.cond3
- ret void
-}
-
-define void @__gen_memcpy_lp(i8 addrspace(3)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
-entry:
- br label %while.cond
-
-while.cond: ; preds = %while.body, %entry
- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
- %add = add i32 %index.0, 4
- %cmp = icmp ult i32 %add, %size
- br i1 %cmp, label %while.cond3, label %while.body
-
-while.body: ; preds = %while.cond
- %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0
- %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)*
- %1 = load i32 addrspace(0)* %0, align 4
- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
- store i32 %1, i32 addrspace(3)* %2, align 4
- br label %while.cond
-
-while.cond3: ; preds = %while.cond, %while.body5
- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
- %cmp4 = icmp ult i32 %index.1, %size
- br i1 %cmp4, label %while.body5, label %while.end7
-
-while.body5: ; preds = %while.cond3
- %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1
- %3 = load i8 addrspace(0)* %arrayidx, align 1
- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
- %inc = add i32 %index.1, 1
- br label %while.cond3
-
-while.end7: ; preds = %while.cond3
- ret void
-}
-
-define void @__gen_memcpy_ll(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
-entry:
- br label %while.cond
-
-while.cond: ; preds = %while.body, %entry
- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
- %add = add i32 %index.0, 4
- %cmp = icmp ult i32 %add, %size
- br i1 %cmp, label %while.cond3, label %while.body
-
-while.body: ; preds = %while.cond
- %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0
- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
- %1 = load i32 addrspace(3)* %0, align 4
- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
- store i32 %1, i32 addrspace(3)* %2, align 4
- br label %while.cond
-
-while.cond3: ; preds = %while.cond, %while.body5
- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
- %cmp4 = icmp ult i32 %index.1, %size
- br i1 %cmp4, label %while.body5, label %while.end7
-
-while.body5: ; preds = %while.cond3
- %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1
- %3 = load i8 addrspace(3)* %arrayidx, align 1
- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
- %inc = add i32 %index.1, 1
- br label %while.cond3
-
-while.end7: ; preds = %while.cond3
- ret void
-}
diff --git a/backend/src/ocl_memset.ll b/backend/src/ocl_memset.ll
deleted file mode 100644
index addf9f5..0000000
--- a/backend/src/ocl_memset.ll
+++ /dev/null
@@ -1,127 +0,0 @@
-;The memset's source code.
-; INLINE_OVERLOADABLE void __gen_memset(uchar* dst, uchar val, size_t size) {
-; size_t index = 0;
-; uint v = (val << 24) | (val << 16) | (val << 8) | val;
-; while((index + 4) >= size) {
-; *((uint *)(dst + index)) = v;
-; index += 4;
-; }
-; while(index < size) {
-; dst[index] = val;
-; index++;
-; }
-; }
-
-define void @__gen_memset_p(i8* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline {
-entry:
- %conv = zext i8 %val to i32
- %shl = shl nuw i32 %conv, 24
- %shl2 = shl nuw nsw i32 %conv, 16
- %or = or i32 %shl, %shl2
- %shl4 = shl nuw nsw i32 %conv, 8
- %or5 = or i32 %or, %shl4
- %or7 = or i32 %or5, %conv
- br label %while.cond
-
-while.cond: ; preds = %while.body, %entry
- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
- %add = add i32 %index.0, 4
- %cmp = icmp ult i32 %add, %size
- br i1 %cmp, label %while.cond10, label %while.body
-
-while.body: ; preds = %while.cond
- %add.ptr = getelementptr inbounds i8* %dst, i32 %index.0
- %0 = bitcast i8* %add.ptr to i32*
- store i32 %or7, i32* %0, align 4
- br label %while.cond
-
-while.cond10: ; preds = %while.cond, %while.body13
- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ]
- %cmp11 = icmp ult i32 %index.1, %size
- br i1 %cmp11, label %while.body13, label %while.end14
-
-while.body13: ; preds = %while.cond10
- %arrayidx = getelementptr inbounds i8* %dst, i32 %index.1
- store i8 %val, i8* %arrayidx, align 1
- %inc = add i32 %index.1, 1
- br label %while.cond10
-
-while.end14: ; preds = %while.cond10
- ret void
-}
-
-define void @__gen_memset_g(i8 addrspace(1)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline {
-entry:
- %conv = zext i8 %val to i32
- %shl = shl nuw i32 %conv, 24
- %shl2 = shl nuw nsw i32 %conv, 16
- %or = or i32 %shl, %shl2
- %shl4 = shl nuw nsw i32 %conv, 8
- %or5 = or i32 %or, %shl4
- %or7 = or i32 %or5, %conv
- br label %while.cond
-
-while.cond: ; preds = %while.body, %entry
- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
- %add = add i32 %index.0, 4
- %cmp = icmp ult i32 %add, %size
- br i1 %cmp, label %while.cond10, label %while.body
-
-while.body: ; preds = %while.cond
- %add.ptr = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
- store i32 %or7, i32 addrspace(1)* %0, align 4
- br label %while.cond
-
-while.cond10: ; preds = %while.cond, %while.body13
- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ]
- %cmp11 = icmp ult i32 %index.1, %size
- br i1 %cmp11, label %while.body13, label %while.end14
-
-while.body13: ; preds = %while.cond10
- %arrayidx = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
- store i8 %val, i8 addrspace(1)* %arrayidx, align 1
- %inc = add i32 %index.1, 1
- br label %while.cond10
-
-while.end14: ; preds = %while.cond10
- ret void
-}
-
-define void @__gen_memset_l(i8 addrspace(3)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline {
-entry:
- %conv = zext i8 %val to i32
- %shl = shl nuw i32 %conv, 24
- %shl2 = shl nuw nsw i32 %conv, 16
- %or = or i32 %shl, %shl2
- %shl4 = shl nuw nsw i32 %conv, 8
- %or5 = or i32 %or, %shl4
- %or7 = or i32 %or5, %conv
- br label %while.cond
-
-while.cond: ; preds = %while.body, %entry
- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
- %add = add i32 %index.0, 4
- %cmp = icmp ult i32 %add, %size
- br i1 %cmp, label %while.cond10, label %while.body
-
-while.body: ; preds = %while.cond
- %add.ptr = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
- store i32 %or7, i32 addrspace(3)* %0, align 4
- br label %while.cond
-
-while.cond10: ; preds = %while.cond, %while.body13
- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ]
- %cmp11 = icmp ult i32 %index.1, %size
- br i1 %cmp11, label %while.body13, label %while.end14
-
-while.body13: ; preds = %while.cond10
- %arrayidx = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
- store i8 %val, i8 addrspace(3)* %arrayidx, align 1
- %inc = add i32 %index.1, 1
- br label %while.cond10
-
-while.end14: ; preds = %while.cond10
- ret void
-}
--
1.8.3.2
More information about the Beignet
mailing list