[Beignet] [PATCH 1/2] GBE: Add constant pointer in the memcpy intrinsic.

Zhigang Gong zhigang.gong at linux.intel.com
Sun Dec 14 22:55:45 PST 2014


Right, the current implementation for all the memcpy/memset intrinsics is simply not good.
We need to find out a graceful way to expand those intrinsics to replace current very hacky
method in the future.

One question on your comment is that is this comment for this patch only or for the whole patchset?
Thanks.

On Mon, Dec 15, 2014 at 02:44:54AM +0000, Song, Ruiling wrote:
> As a temporary solution, it looks good to me.
> I think we need to re-write the memset/memcpy lowering pass to get rid of the .ll files. We can use C++ code to generate those IRs and replace the memset/memcpy calling. Although it is a little complex.
> 
> 
> > -----Original Message-----
> > From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> > Zhigang Gong
> > Sent: Monday, December 15, 2014 9:18 AM
> > To: Gong, Zhigang
> > Cc: beignet at lists.freedesktop.org
> > Subject: Re: [Beignet] [PATCH 1/2] GBE: Add constant pointer in the memcpy
> > intrinsic.
> > 
> > Ping for review.
> > 
> > On Thu, Dec 04, 2014 at 05:21:59PM +0800, Zhigang Gong wrote:
> > > From: Zhigang Gong <zhigang.gong at linux.intel.com>
> > >
> > > Blender may generate such type of intrinsics. Now fix it.
> > > Also fixed a previous typo which will not assert when it should
> > > assert.
> > >
> > > Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
> > > ---
> > >  backend/src/libocl/src/ocl_memcpy.ll         | 177
> > +++++++++++++++++++++++++++
> > >  backend/src/llvm/llvm_bitcode_link.cpp       |   7 ++
> > >  backend/src/llvm/llvm_intrinsic_lowering.cpp |   4 +-
> > >  3 files changed, 187 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/backend/src/libocl/src/ocl_memcpy.ll
> > > b/backend/src/libocl/src/ocl_memcpy.ll
> > > index fbc44d1..b3fadb2 100644
> > > --- a/backend/src/libocl/src/ocl_memcpy.ll
> > > +++ b/backend/src/libocl/src/ocl_memcpy.ll
> > > @@ -550,3 +550,180 @@
> > while.body:                                       ; preds
> > = %entry, %while.body
> > >  while.end:                                        ; preds
> > = %while.body, %entry
> > >    ret void
> > >  }
> > > +
> > > +define void @__gen_memcpy_gc_align(i8 addrspace(1)* %dst, i8
> > > +addrspace(2)* %src, i32 %size) nounwind alwaysinline {
> > > +entry:
> > > +  br label %while.cond
> > > +
> > > +while.cond:                                       ; preds
> > = %while.body, %entry
> > > +  %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
> > > +  %add = add i32 %index.0, 4
> > > +  %cmp = icmp ugt i32 %add, %size
> > > +  br i1 %cmp, label %while.cond3, label %while.body
> > > +
> > > +while.body:                                       ; preds
> > = %while.cond
> > > +  %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32
> > > +%index.0
> > > +  %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)*
> > > +  %1 = load i32 addrspace(2)* %0, align 4
> > > +  %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32
> > > +%index.0
> > > +  %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
> > > +  store i32 %1, i32 addrspace(1)* %2, align 4
> > > +  br label %while.cond
> > > +
> > > +while.cond3:                                      ; preds
> > = %while.cond, %while.body5
> > > +  %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5
> > > +]
> > > +  %cmp4 = icmp ult i32 %index.1, %size
> > > +  br i1 %cmp4, label %while.body5, label %while.end7
> > > +
> > > +while.body5:                                      ; preds
> > = %while.cond3
> > > +  %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32
> > > +%index.1
> > > +  %3 = load i8 addrspace(2)* %arrayidx, align 1
> > > +  %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32
> > > +%index.1
> > > +  store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
> > > +  %inc = add i32 %index.1, 1
> > > +  br label %while.cond3
> > > +
> > > +while.end7:                                       ; preds
> > = %while.cond3
> > > +  ret void
> > > +}
> > > +
> > > +define void @__gen_memcpy_pc_align(i8 addrspace(0)* %dst, i8
> > > +addrspace(2)* %src, i32 %size) nounwind alwaysinline {
> > > +entry:
> > > +  br label %while.cond
> > > +
> > > +while.cond:                                       ; preds
> > = %while.body, %entry
> > > +  %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
> > > +  %add = add i32 %index.0, 4
> > > +  %cmp = icmp ugt i32 %add, %size
> > > +  br i1 %cmp, label %while.cond3, label %while.body
> > > +
> > > +while.body:                                       ; preds
> > = %while.cond
> > > +  %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32
> > > +%index.0
> > > +  %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)*
> > > +  %1 = load i32 addrspace(2)* %0, align 4
> > > +  %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32
> > > +%index.0
> > > +  %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
> > > +  store i32 %1, i32 addrspace(0)* %2, align 4
> > > +  br label %while.cond
> > > +
> > > +while.cond3:                                      ; preds
> > = %while.cond, %while.body5
> > > +  %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5
> > > +]
> > > +  %cmp4 = icmp ult i32 %index.1, %size
> > > +  br i1 %cmp4, label %while.body5, label %while.end7
> > > +
> > > +while.body5:                                      ; preds
> > = %while.cond3
> > > +  %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32
> > > +%index.1
> > > +  %3 = load i8 addrspace(2)* %arrayidx, align 1
> > > +  %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32
> > > +%index.1
> > > +  store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
> > > +  %inc = add i32 %index.1, 1
> > > +  br label %while.cond3
> > > +
> > > +while.end7:                                       ; preds
> > = %while.cond3
> > > +  ret void
> > > +}
> > > +
> > > +define void @__gen_memcpy_lc_align(i8 addrspace(3)* %dst, i8
> > > +addrspace(2)* %src, i32 %size) nounwind alwaysinline {
> > > +entry:
> > > +  br label %while.cond
> > > +
> > > +while.cond:                                       ; preds
> > = %while.body, %entry
> > > +  %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
> > > +  %add = add i32 %index.0, 4
> > > +  %cmp = icmp ugt i32 %add, %size
> > > +  br i1 %cmp, label %while.cond3, label %while.body
> > > +
> > > +while.body:                                       ; preds
> > = %while.cond
> > > +  %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32
> > > +%index.0
> > > +  %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)*
> > > +  %1 = load i32 addrspace(2)* %0, align 4
> > > +  %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32
> > > +%index.0
> > > +  %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
> > > +  store i32 %1, i32 addrspace(3)* %2, align 4
> > > +  br label %while.cond
> > > +
> > > +while.cond3:                                      ; preds
> > = %while.cond, %while.body5
> > > +  %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5
> > > +]
> > > +  %cmp4 = icmp ult i32 %index.1, %size
> > > +  br i1 %cmp4, label %while.body5, label %while.end7
> > > +
> > > +while.body5:                                      ; preds
> > = %while.cond3
> > > +  %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32
> > > +%index.1
> > > +  %3 = load i8 addrspace(2)* %arrayidx, align 1
> > > +  %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32
> > > +%index.1
> > > +  store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
> > > +  %inc = add i32 %index.1, 1
> > > +  br label %while.cond3
> > > +
> > > +while.end7:                                       ; preds
> > = %while.cond3
> > > +  ret void
> > > +}
> > > +
> > > +define void @__gen_memcpy_pc(i8 addrspace(0)* %dst, i8 addrspace(2)*
> > > +%src, i32 %size) nounwind alwaysinline {
> > > +entry:
> > > +  %cmp4 = icmp eq i32 %size, 0
> > > +  br i1 %cmp4, label %while.end, label %while.body
> > > +
> > > +while.body:                                       ; preds
> > = %entry, %while.body
> > > +  %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
> > > +  %0 = ptrtoint i8 addrspace(2)* %src to i32
> > > +  %1 = add i32 %0, %index.05
> > > +  %2 = inttoptr i32 %1 to i8 addrspace(2)*
> > > +  %3 = load i8 addrspace(2)* %2, align 1
> > > +  %4 = ptrtoint i8 addrspace(0)* %dst to i32
> > > +  %5 = add i32 %4, %index.05
> > > +  %6 = inttoptr i32 %5 to i8 addrspace(0)*
> > > +  store i8 %3, i8 addrspace(0)* %6, align 1
> > > +  %inc = add i32 %index.05, 1
> > > +  %cmp = icmp ult i32 %inc, %size
> > > +  br i1 %cmp, label %while.body, label %while.end
> > > +
> > > +while.end:                                        ; preds
> > = %while.body, %entry
> > > +  ret void
> > > +}
> > > +
> > > +define void @__gen_memcpy_gc(i8 addrspace(1)* %dst, i8 addrspace(2)*
> > > +%src, i32 %size) nounwind alwaysinline {
> > > +entry:
> > > +  %cmp4 = icmp eq i32 %size, 0
> > > +  br i1 %cmp4, label %while.end, label %while.body
> > > +
> > > +while.body:                                       ; preds
> > = %entry, %while.body
> > > +  %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
> > > +  %0 = ptrtoint i8 addrspace(2)* %src to i32
> > > +  %1 = add i32 %0, %index.05
> > > +  %2 = inttoptr i32 %1 to i8 addrspace(2)*
> > > +  %3 = load i8 addrspace(2)* %2, align 1
> > > +  %4 = ptrtoint i8 addrspace(1)* %dst to i32
> > > +  %5 = add i32 %4, %index.05
> > > +  %6 = inttoptr i32 %5 to i8 addrspace(1)*
> > > +  store i8 %3, i8 addrspace(1)* %6, align 1
> > > +  %inc = add i32 %index.05, 1
> > > +  %cmp = icmp ult i32 %inc, %size
> > > +  br i1 %cmp, label %while.body, label %while.end
> > > +
> > > +while.end:                                        ; preds
> > = %while.body, %entry
> > > +  ret void
> > > +}
> > > +
> > > +define void @__gen_memcpy_lc(i8 addrspace(3)* %dst, i8 addrspace(2)*
> > > +%src, i32 %size) nounwind alwaysinline {
> > > +entry:
> > > +  %cmp4 = icmp eq i32 %size, 0
> > > +  br i1 %cmp4, label %while.end, label %while.body
> > > +
> > > +while.body:                                       ; preds
> > = %entry, %while.body
> > > +  %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
> > > +  %0 = ptrtoint i8 addrspace(2)* %src to i32
> > > +  %1 = add i32 %0, %index.05
> > > +  %2 = inttoptr i32 %1 to i8 addrspace(2)*
> > > +  %3 = load i8 addrspace(2)* %2, align 1
> > > +  %4 = ptrtoint i8 addrspace(3)* %dst to i32
> > > +  %5 = add i32 %4, %index.05
> > > +  %6 = inttoptr i32 %5 to i8 addrspace(3)*
> > > +  store i8 %3, i8 addrspace(3)* %6, align 1
> > > +  %inc = add i32 %index.05, 1
> > > +  %cmp = icmp ult i32 %inc, %size
> > > +  br i1 %cmp, label %while.body, label %while.end
> > > +
> > > +while.end:                                        ; preds
> > = %while.body, %entry
> > > +  ret void
> > > +}
> > > diff --git a/backend/src/llvm/llvm_bitcode_link.cpp
> > > b/backend/src/llvm/llvm_bitcode_link.cpp
> > > index f5e9f81..d3058d6 100644
> > > --- a/backend/src/llvm/llvm_bitcode_link.cpp
> > > +++ b/backend/src/llvm/llvm_bitcode_link.cpp
> > > @@ -170,6 +170,13 @@ namespace gbe
> > >      builtinFuncs.push_back("__gen_memset_g_align");
> > >      builtinFuncs.push_back("__gen_memset_l_align");
> > >
> > > +    builtinFuncs.push_back("__gen_memcpy_pc");
> > > +    builtinFuncs.push_back("__gen_memcpy_gc");
> > > +    builtinFuncs.push_back("__gen_memcpy_lc");
> > > +
> > > +    builtinFuncs.push_back("__gen_memcpy_pc_align");
> > > +    builtinFuncs.push_back("__gen_memcpy_gc_align");
> > > +    builtinFuncs.push_back("__gen_memcpy_lc_align");
> > >
> > >      for (Module::iterator SF = mod->begin(), E = mod->end(); SF != E;
> > ++SF) {
> > >        if (SF->isDeclaration()) continue; diff --git
> > > a/backend/src/llvm/llvm_intrinsic_lowering.cpp
> > > b/backend/src/llvm/llvm_intrinsic_lowering.cpp
> > > index 52f99c1..7d1f8f0 100644
> > > --- a/backend/src/llvm/llvm_intrinsic_lowering.cpp
> > > +++ b/backend/src/llvm/llvm_intrinsic_lowering.cpp
> > > @@ -72,10 +72,12 @@ namespace gbe {
> > >              return 'p';
> > >            case 1:
> > >              return 'g';
> > > +          case 2:
> > > +            return 'c';
> > >            case 3:
> > >              return 'l';
> > >            default:
> > > -            assert("Non support address space");
> > > +            assert(0 && "Non support address space");
> > >              return '\0';
> > >          }
> > >        }
> > > --
> > > 1.8.3.2
> > >
> > > _______________________________________________
> > > Beignet mailing list
> > > Beignet at lists.freedesktop.org
> > > http://lists.freedesktop.org/mailman/listinfo/beignet
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list