[Beignet] [PATCH 2/3] Add all atomic built-in functions support.

Yang Rong rong.r.yang at intel.com
Wed Jun 26 00:29:22 PDT 2013


Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 backend/src/llvm/llvm_gen_backend.cpp      |   76 +++++++++++++++++++++
 backend/src/llvm/llvm_gen_ocl_function.hxx |   28 ++++++++
 backend/src/ocl_stdlib.h                   |  100 +++++++++++++++++++++++++++-
 3 files changed, 203 insertions(+), 1 deletion(-)

diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 5b7754c..8faeba8 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -534,6 +534,8 @@ namespace gbe
 
     // Emit unary instructions from gen native function
     void emitUnaryCallInst(CallInst &I, CallSite &CS, ir::Opcode opcode);
+    // Emit unary instructions from gen native function
+    void emitAtomicInst(CallInst &I, CallSite &CS, ir::Opcode opcode);
 
     // These instructions are not supported at all
     void visitVAArgInst(VAArgInst &I) {NOT_SUPPORTED;}
@@ -693,10 +695,12 @@ namespace gbe
           return doIt(uint64_t(0));
         }
       }
+
       // NULL pointers
       if(isa<ConstantPointerNull>(CPV)) {
         return doIt(uint32_t(0));
       }
+
       // Floats and doubles
       const Type::TypeID typeID = CPV->getType()->getTypeID();
       switch (typeID) {
@@ -1695,6 +1699,32 @@ namespace gbe
       case GEN_OCL_GET_IMAGE_CHANNEL_DATA_TYPE:
       case GEN_OCL_GET_IMAGE_CHANNEL_ORDER:
       case GEN_OCL_GET_IMAGE_DEPTH:
+      case GEN_OCL_ATOMIC_ADD0:
+      case GEN_OCL_ATOMIC_ADD1:
+      case GEN_OCL_ATOMIC_SUB0:
+      case GEN_OCL_ATOMIC_SUB1:
+      case GEN_OCL_ATOMIC_AND0:
+      case GEN_OCL_ATOMIC_AND1:
+      case GEN_OCL_ATOMIC_OR0:
+      case GEN_OCL_ATOMIC_OR1:
+      case GEN_OCL_ATOMIC_XOR0:
+      case GEN_OCL_ATOMIC_XOR1:
+      case GEN_OCL_ATOMIC_XCHG0:
+      case GEN_OCL_ATOMIC_XCHG1:
+      case GEN_OCL_ATOMIC_UMAX0:
+      case GEN_OCL_ATOMIC_UMAX1:
+      case GEN_OCL_ATOMIC_UMIN0:
+      case GEN_OCL_ATOMIC_UMIN1:
+      case GEN_OCL_ATOMIC_IMAX0:
+      case GEN_OCL_ATOMIC_IMAX1:
+      case GEN_OCL_ATOMIC_IMIN0:
+      case GEN_OCL_ATOMIC_IMIN1:
+      case GEN_OCL_ATOMIC_INC0:
+      case GEN_OCL_ATOMIC_INC1:
+      case GEN_OCL_ATOMIC_DEC0:
+      case GEN_OCL_ATOMIC_DEC1:
+      case GEN_OCL_ATOMIC_CMPXCHG0:
+      case GEN_OCL_ATOMIC_CMPXCHG1:
         // No structure can be returned
         this->newRegister(&I);
         break;
@@ -1779,6 +1809,26 @@ namespace gbe
     ctx.ALU1(opcode, ir::TYPE_FLOAT, dst, src);
   }
 
+  void GenWriter::emitAtomicInst(CallInst &I, CallSite &CS, ir::Opcode opcode) {
+    CallSite::arg_iterator AI = CS.arg_begin();
+#if GBE_DEBUG
+    CallSite::arg_iterator AE = CS.arg_end();
+#endif /* GBE_DEBUG */
+    GBE_ASSERT(AI != AE);
+    unsigned int llvmSpace = (*AI)->getType()->getPointerAddressSpace();
+    const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmSpace);
+    const ir::Register dst = this->getRegister(&I);
+
+    vector<ir::Register> src;
+    uint32_t srcNum = 0;
+    while(AI != AE) {
+      src.push_back(this->getRegister(*(AI++)));
+      srcNum++;
+    }
+    const ir::Tuple srcTuple = ctx.arrayTuple(&src[0], srcNum);
+    ctx.ATOMIC(opcode, dst, addrSpace, srcTuple);
+  }
+
   void GenWriter::emitCallInst(CallInst &I) {
     if (Function *F = I.getCalledFunction()) {
       if (F->getIntrinsicID() != 0) {
@@ -1858,6 +1908,32 @@ namespace gbe
           case GEN_OCL_LBARRIER: ctx.SYNC(ir::syncLocalBarrier); break;
           case GEN_OCL_GBARRIER: ctx.SYNC(ir::syncGlobalBarrier); break;
           case GEN_OCL_LGBARRIER: ctx.SYNC(ir::syncLocalBarrier | ir::syncGlobalBarrier); break;
+          case GEN_OCL_ATOMIC_ADD0:
+          case GEN_OCL_ATOMIC_ADD1: this->emitAtomicInst(I,CS,ir::OP_ATOMIC_ADD); break;
+          case GEN_OCL_ATOMIC_SUB0:
+          case GEN_OCL_ATOMIC_SUB1: this->emitAtomicInst(I,CS,ir::OP_ATOMIC_SUB); break;
+          case GEN_OCL_ATOMIC_AND0:
+          case GEN_OCL_ATOMIC_AND1: this->emitAtomicInst(I,CS,ir::OP_ATOMIC_AND); break;
+          case GEN_OCL_ATOMIC_OR0:
+          case GEN_OCL_ATOMIC_OR1: this->emitAtomicInst(I,CS,ir::OP_ATOMIC_OR); break;
+          case GEN_OCL_ATOMIC_XOR0:
+          case GEN_OCL_ATOMIC_XOR1: this->emitAtomicInst(I,CS,ir::OP_ATOMIC_XOR); break;
+          case GEN_OCL_ATOMIC_XCHG0:
+          case GEN_OCL_ATOMIC_XCHG1: this->emitAtomicInst(I,CS,ir::OP_ATOMIC_XCHG); break;
+          case GEN_OCL_ATOMIC_INC0:
+          case GEN_OCL_ATOMIC_INC1: this->emitAtomicInst(I,CS,ir::OP_ATOMIC_INC); break;
+          case GEN_OCL_ATOMIC_DEC0:
+          case GEN_OCL_ATOMIC_DEC1: this->emitAtomicInst(I,CS,ir::OP_ATOMIC_DEC); break;
+          case GEN_OCL_ATOMIC_UMIN0:
+          case GEN_OCL_ATOMIC_UMIN1: this->emitAtomicInst(I,CS,ir::OP_ATOMIC_UMIN); break;
+          case GEN_OCL_ATOMIC_UMAX0:
+          case GEN_OCL_ATOMIC_UMAX1: this->emitAtomicInst(I,CS,ir::OP_ATOMIC_UMAX); break;
+          case GEN_OCL_ATOMIC_IMIN0:
+          case GEN_OCL_ATOMIC_IMIN1: this->emitAtomicInst(I,CS,ir::OP_ATOMIC_IMIN); break;
+          case GEN_OCL_ATOMIC_IMAX0:
+          case GEN_OCL_ATOMIC_IMAX1: this->emitAtomicInst(I,CS,ir::OP_ATOMIC_IMAX); break;
+          case GEN_OCL_ATOMIC_CMPXCHG0:
+          case GEN_OCL_ATOMIC_CMPXCHG1: this->emitAtomicInst(I,CS,ir::OP_ATOMIC_CMPXCHG); break;
           case GEN_OCL_GET_IMAGE_WIDTH:
           case GEN_OCL_GET_IMAGE_HEIGHT:
           case GEN_OCL_GET_IMAGE_DEPTH:
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 6cd7298..99dbfec 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -78,6 +78,34 @@ DECL_LLVM_GEN_FUNCTION(GET_IMAGE_DEPTH,  __gen_ocl_get_image_depth)
 DECL_LLVM_GEN_FUNCTION(GET_IMAGE_CHANNEL_DATA_TYPE,  __gen_ocl_get_image_channel_data_type)
 DECL_LLVM_GEN_FUNCTION(GET_IMAGE_CHANNEL_ORDER,  __gen_ocl_get_image_channel_order)
 
+// atomic related functions.
+DECL_LLVM_GEN_FUNCTION(ATOMIC_ADD0, _Z20__gen_ocl_atomic_addPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_ADD1, _Z20__gen_ocl_atomic_addPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_SUB0, _Z20__gen_ocl_atomic_subPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_SUB1, _Z20__gen_ocl_atomic_subPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_AND0, _Z20__gen_ocl_atomic_andPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_AND1, _Z20__gen_ocl_atomic_andPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_OR0,  _Z19__gen_ocl_atomic_orPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_OR1,  _Z19__gen_ocl_atomic_orPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_XOR0, _Z20__gen_ocl_atomic_xorPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_XOR1, _Z20__gen_ocl_atomic_xorPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_UMIN0, _Z21__gen_ocl_atomic_uminPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_UMIN1, _Z21__gen_ocl_atomic_uminPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_UMAX0, _Z21__gen_ocl_atomic_umaxPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_UMAX1, _Z21__gen_ocl_atomic_umaxPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_IMIN0, _Z21__gen_ocl_atomic_iminPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_IMIN1, _Z21__gen_ocl_atomic_iminPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_IMAX0, _Z21__gen_ocl_atomic_imaxPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_IMAX1, _Z21__gen_ocl_atomic_imaxPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_XCHG0, _Z21__gen_ocl_atomic_xchgPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_XCHG1, _Z21__gen_ocl_atomic_xchgPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_INC0, _Z20__gen_ocl_atomic_incPU3AS1j)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_INC1, _Z20__gen_ocl_atomic_incPU3AS3j)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_DEC0, _Z20__gen_ocl_atomic_decPU3AS1j)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_DEC1, _Z20__gen_ocl_atomic_decPU3AS3j)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_CMPXCHG0, _Z24__gen_ocl_atomic_cmpxchgPU3AS1jjj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_CMPXCHG1, _Z24__gen_ocl_atomic_cmpxchgPU3AS3jjj)
+
 // saturation related functions.
 DECL_LLVM_GEN_FUNCTION(SADD_SAT_CHAR, _Z12ocl_sadd_satcc)
 DECL_LLVM_GEN_FUNCTION(SADD_SAT_SHORT, _Z12ocl_sadd_satss)
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
index 81a0193..0c4673c 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -5034,6 +5034,104 @@ INLINE void write_mem_fence(cl_mem_fence_flags flags) {
 }
 
 /////////////////////////////////////////////////////////////////////////////
+// Atomic functions
+/////////////////////////////////////////////////////////////////////////////
+OVERLOADABLE uint __gen_ocl_atomic_add(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_add(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_sub(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_sub(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_and(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_and(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_or(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_or(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_xor(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_xor(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_xchg(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_xchg(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_inc(__global uint *p);
+OVERLOADABLE uint __gen_ocl_atomic_inc(__local uint *p);
+OVERLOADABLE uint __gen_ocl_atomic_dec(__global uint *p);
+OVERLOADABLE uint __gen_ocl_atomic_dec(__local uint *p);
+OVERLOADABLE uint __gen_ocl_atomic_cmpxchg(__global uint *p, uint cmp, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_cmpxchg(__local uint *p, uint cmp, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_imin(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_imin(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_imax(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_imax(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_umin(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_umin(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_umax(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_umax(__local uint *p, uint val);
+
+#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE, PREFIX)                        \
+  INLINE_OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p, TYPE val) { \
+    return (TYPE)__gen_ocl_##PREFIX##NAME((SPACE uint *)p, val);            \
+  }
+
+#define DECL_ATOMIC_OP_TYPE(NAME, TYPE, PREFIX) \
+  DECL_ATOMIC_OP_SPACE(NAME, TYPE, __global, PREFIX) \
+  DECL_ATOMIC_OP_SPACE(NAME, TYPE, __local, PREFIX) \
+
+#define DECL_ATOMIC_OP(NAME) \
+  DECL_ATOMIC_OP_TYPE(NAME, uint, atomic_)              \
+  DECL_ATOMIC_OP_TYPE(NAME, int, atomic_)
+
+DECL_ATOMIC_OP(add)
+DECL_ATOMIC_OP(sub)
+DECL_ATOMIC_OP(and)
+DECL_ATOMIC_OP(or)
+DECL_ATOMIC_OP(xor)
+DECL_ATOMIC_OP(xchg)
+DECL_ATOMIC_OP_TYPE(xchg, float, atomic_)
+DECL_ATOMIC_OP_TYPE(min, int, atomic_i)
+DECL_ATOMIC_OP_TYPE(max, int, atomic_i)
+DECL_ATOMIC_OP_TYPE(min, uint, atomic_u)
+DECL_ATOMIC_OP_TYPE(max, uint, atomic_u)
+
+#undef DECL_ATOMIC_OP
+#undef DECL_ATOMIC_OP_TYPE
+#undef DECL_ATOMIC_OP_SPACE
+
+#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE) \
+  INLINE_OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p) { \
+    return (TYPE)__gen_ocl_atomic_##NAME((SPACE uint *)p); \
+  }
+
+#define DECL_ATOMIC_OP_TYPE(NAME, TYPE) \
+  DECL_ATOMIC_OP_SPACE(NAME, TYPE, __global) \
+  DECL_ATOMIC_OP_SPACE(NAME, TYPE, __local)
+
+#define DECL_ATOMIC_OP(NAME) \
+  DECL_ATOMIC_OP_TYPE(NAME, uint) \
+  DECL_ATOMIC_OP_TYPE(NAME, int)
+
+DECL_ATOMIC_OP(inc)
+DECL_ATOMIC_OP(dec)
+
+#undef DECL_ATOMIC_OP
+#undef DECL_ATOMIC_OP_TYPE
+#undef DECL_ATOMIC_OP_SPACE
+
+#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE)  \
+  INLINE_OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p, TYPE cmp, TYPE val) { \
+    return (TYPE)__gen_ocl_atomic_##NAME((SPACE uint *)p, (uint)cmp, (uint)val); \
+  }
+
+#define DECL_ATOMIC_OP_TYPE(NAME, TYPE) \
+  DECL_ATOMIC_OP_SPACE(NAME, TYPE, __global) \
+  DECL_ATOMIC_OP_SPACE(NAME, TYPE, __local)
+
+#define DECL_ATOMIC_OP(NAME) \
+  DECL_ATOMIC_OP_TYPE(NAME, uint) \
+  DECL_ATOMIC_OP_TYPE(NAME, int)
+
+DECL_ATOMIC_OP(cmpxchg)
+
+#undef DECL_ATOMIC_OP
+#undef DECL_ATOMIC_OP_TYPE
+#undef DECL_ATOMIC_OP_SPACE
+
+/////////////////////////////////////////////////////////////////////////////
 // Force the compilation to SIMD8 or SIMD16
 /////////////////////////////////////////////////////////////////////////////
 
-- 
1.7.10.4



More information about the Beignet mailing list