[Beignet] [PATCH 2/4] Add all atomic built-in functions.
Yang Rong
rong.r.yang at intel.com
Thu Jun 27 01:47:56 PDT 2013
Treat all atomic function's operands as unsigned int, except imin/imax.
So use the different function __gen_ocl_atomic_umin and __gen_ocl_atomic_imin.
Overload different address space, local and global.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
backend/src/llvm/llvm_gen_backend.cpp | 76 +++++++++++++++++++++
backend/src/llvm/llvm_gen_ocl_function.hxx | 28 ++++++++
backend/src/ocl_stdlib.h | 100 +++++++++++++++++++++++++++-
3 files changed, 203 insertions(+), 1 deletion(-)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 5b7754c..28dcdbe 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -534,6 +534,8 @@ namespace gbe
// Emit unary instructions from gen native function
void emitUnaryCallInst(CallInst &I, CallSite &CS, ir::Opcode opcode);
+ // Emit unary instructions from gen native function
+ void emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode);
// These instructions are not supported at all
void visitVAArgInst(VAArgInst &I) {NOT_SUPPORTED;}
@@ -693,10 +695,12 @@ namespace gbe
return doIt(uint64_t(0));
}
}
+
// NULL pointers
if(isa<ConstantPointerNull>(CPV)) {
return doIt(uint32_t(0));
}
+
// Floats and doubles
const Type::TypeID typeID = CPV->getType()->getTypeID();
switch (typeID) {
@@ -1695,6 +1699,32 @@ namespace gbe
case GEN_OCL_GET_IMAGE_CHANNEL_DATA_TYPE:
case GEN_OCL_GET_IMAGE_CHANNEL_ORDER:
case GEN_OCL_GET_IMAGE_DEPTH:
+ case GEN_OCL_ATOMIC_ADD0:
+ case GEN_OCL_ATOMIC_ADD1:
+ case GEN_OCL_ATOMIC_SUB0:
+ case GEN_OCL_ATOMIC_SUB1:
+ case GEN_OCL_ATOMIC_AND0:
+ case GEN_OCL_ATOMIC_AND1:
+ case GEN_OCL_ATOMIC_OR0:
+ case GEN_OCL_ATOMIC_OR1:
+ case GEN_OCL_ATOMIC_XOR0:
+ case GEN_OCL_ATOMIC_XOR1:
+ case GEN_OCL_ATOMIC_XCHG0:
+ case GEN_OCL_ATOMIC_XCHG1:
+ case GEN_OCL_ATOMIC_UMAX0:
+ case GEN_OCL_ATOMIC_UMAX1:
+ case GEN_OCL_ATOMIC_UMIN0:
+ case GEN_OCL_ATOMIC_UMIN1:
+ case GEN_OCL_ATOMIC_IMAX0:
+ case GEN_OCL_ATOMIC_IMAX1:
+ case GEN_OCL_ATOMIC_IMIN0:
+ case GEN_OCL_ATOMIC_IMIN1:
+ case GEN_OCL_ATOMIC_INC0:
+ case GEN_OCL_ATOMIC_INC1:
+ case GEN_OCL_ATOMIC_DEC0:
+ case GEN_OCL_ATOMIC_DEC1:
+ case GEN_OCL_ATOMIC_CMPXCHG0:
+ case GEN_OCL_ATOMIC_CMPXCHG1:
// No structure can be returned
this->newRegister(&I);
break;
@@ -1779,6 +1809,26 @@ namespace gbe
ctx.ALU1(opcode, ir::TYPE_FLOAT, dst, src);
}
+ void GenWriter::emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode) {
+ CallSite::arg_iterator AI = CS.arg_begin();
+#if GBE_DEBUG
+ CallSite::arg_iterator AE = CS.arg_end();
+#endif /* GBE_DEBUG */
+ GBE_ASSERT(AI != AE);
+ unsigned int llvmSpace = (*AI)->getType()->getPointerAddressSpace();
+ const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmSpace);
+ const ir::Register dst = this->getRegister(&I);
+
+ vector<ir::Register> src;
+ uint32_t srcNum = 0;
+ while(AI != AE) {
+ src.push_back(this->getRegister(*(AI++)));
+ srcNum++;
+ }
+ const ir::Tuple srcTuple = ctx.arrayTuple(&src[0], srcNum);
+ ctx.ATOMIC(opcode, dst, addrSpace, srcTuple);
+ }
+
void GenWriter::emitCallInst(CallInst &I) {
if (Function *F = I.getCalledFunction()) {
if (F->getIntrinsicID() != 0) {
@@ -1858,6 +1908,32 @@ namespace gbe
case GEN_OCL_LBARRIER: ctx.SYNC(ir::syncLocalBarrier); break;
case GEN_OCL_GBARRIER: ctx.SYNC(ir::syncGlobalBarrier); break;
case GEN_OCL_LGBARRIER: ctx.SYNC(ir::syncLocalBarrier | ir::syncGlobalBarrier); break;
+ case GEN_OCL_ATOMIC_ADD0:
+ case GEN_OCL_ATOMIC_ADD1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_ADD); break;
+ case GEN_OCL_ATOMIC_SUB0:
+ case GEN_OCL_ATOMIC_SUB1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_SUB); break;
+ case GEN_OCL_ATOMIC_AND0:
+ case GEN_OCL_ATOMIC_AND1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_AND); break;
+ case GEN_OCL_ATOMIC_OR0:
+ case GEN_OCL_ATOMIC_OR1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_OR); break;
+ case GEN_OCL_ATOMIC_XOR0:
+ case GEN_OCL_ATOMIC_XOR1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_XOR); break;
+ case GEN_OCL_ATOMIC_XCHG0:
+ case GEN_OCL_ATOMIC_XCHG1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_XCHG); break;
+ case GEN_OCL_ATOMIC_INC0:
+ case GEN_OCL_ATOMIC_INC1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_INC); break;
+ case GEN_OCL_ATOMIC_DEC0:
+ case GEN_OCL_ATOMIC_DEC1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_DEC); break;
+ case GEN_OCL_ATOMIC_UMIN0:
+ case GEN_OCL_ATOMIC_UMIN1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_UMIN); break;
+ case GEN_OCL_ATOMIC_UMAX0:
+ case GEN_OCL_ATOMIC_UMAX1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_UMAX); break;
+ case GEN_OCL_ATOMIC_IMIN0:
+ case GEN_OCL_ATOMIC_IMIN1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_IMIN); break;
+ case GEN_OCL_ATOMIC_IMAX0:
+ case GEN_OCL_ATOMIC_IMAX1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_IMAX); break;
+ case GEN_OCL_ATOMIC_CMPXCHG0:
+ case GEN_OCL_ATOMIC_CMPXCHG1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_CMPXCHG); break;
case GEN_OCL_GET_IMAGE_WIDTH:
case GEN_OCL_GET_IMAGE_HEIGHT:
case GEN_OCL_GET_IMAGE_DEPTH:
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 6cd7298..99dbfec 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -78,6 +78,34 @@ DECL_LLVM_GEN_FUNCTION(GET_IMAGE_DEPTH, __gen_ocl_get_image_depth)
DECL_LLVM_GEN_FUNCTION(GET_IMAGE_CHANNEL_DATA_TYPE, __gen_ocl_get_image_channel_data_type)
DECL_LLVM_GEN_FUNCTION(GET_IMAGE_CHANNEL_ORDER, __gen_ocl_get_image_channel_order)
+// atomic related functions.
+DECL_LLVM_GEN_FUNCTION(ATOMIC_ADD0, _Z20__gen_ocl_atomic_addPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_ADD1, _Z20__gen_ocl_atomic_addPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_SUB0, _Z20__gen_ocl_atomic_subPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_SUB1, _Z20__gen_ocl_atomic_subPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_AND0, _Z20__gen_ocl_atomic_andPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_AND1, _Z20__gen_ocl_atomic_andPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_OR0, _Z19__gen_ocl_atomic_orPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_OR1, _Z19__gen_ocl_atomic_orPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_XOR0, _Z20__gen_ocl_atomic_xorPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_XOR1, _Z20__gen_ocl_atomic_xorPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_UMIN0, _Z21__gen_ocl_atomic_uminPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_UMIN1, _Z21__gen_ocl_atomic_uminPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_UMAX0, _Z21__gen_ocl_atomic_umaxPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_UMAX1, _Z21__gen_ocl_atomic_umaxPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_IMIN0, _Z21__gen_ocl_atomic_iminPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_IMIN1, _Z21__gen_ocl_atomic_iminPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_IMAX0, _Z21__gen_ocl_atomic_imaxPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_IMAX1, _Z21__gen_ocl_atomic_imaxPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_XCHG0, _Z21__gen_ocl_atomic_xchgPU3AS1jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_XCHG1, _Z21__gen_ocl_atomic_xchgPU3AS3jj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_INC0, _Z20__gen_ocl_atomic_incPU3AS1j)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_INC1, _Z20__gen_ocl_atomic_incPU3AS3j)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_DEC0, _Z20__gen_ocl_atomic_decPU3AS1j)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_DEC1, _Z20__gen_ocl_atomic_decPU3AS3j)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_CMPXCHG0, _Z24__gen_ocl_atomic_cmpxchgPU3AS1jjj)
+DECL_LLVM_GEN_FUNCTION(ATOMIC_CMPXCHG1, _Z24__gen_ocl_atomic_cmpxchgPU3AS3jjj)
+
// saturation related functions.
DECL_LLVM_GEN_FUNCTION(SADD_SAT_CHAR, _Z12ocl_sadd_satcc)
DECL_LLVM_GEN_FUNCTION(SADD_SAT_SHORT, _Z12ocl_sadd_satss)
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
index 81a0193..0c4673c 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -5034,6 +5034,104 @@ INLINE void write_mem_fence(cl_mem_fence_flags flags) {
}
/////////////////////////////////////////////////////////////////////////////
+// Atomic functions
+/////////////////////////////////////////////////////////////////////////////
+OVERLOADABLE uint __gen_ocl_atomic_add(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_add(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_sub(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_sub(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_and(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_and(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_or(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_or(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_xor(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_xor(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_xchg(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_xchg(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_inc(__global uint *p);
+OVERLOADABLE uint __gen_ocl_atomic_inc(__local uint *p);
+OVERLOADABLE uint __gen_ocl_atomic_dec(__global uint *p);
+OVERLOADABLE uint __gen_ocl_atomic_dec(__local uint *p);
+OVERLOADABLE uint __gen_ocl_atomic_cmpxchg(__global uint *p, uint cmp, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_cmpxchg(__local uint *p, uint cmp, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_imin(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_imin(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_imax(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_imax(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_umin(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_umin(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_umax(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_umax(__local uint *p, uint val);
+
+#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE, PREFIX) \
+ INLINE_OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p, TYPE val) { \
+ return (TYPE)__gen_ocl_##PREFIX##NAME((SPACE uint *)p, val); \
+ }
+
+#define DECL_ATOMIC_OP_TYPE(NAME, TYPE, PREFIX) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __global, PREFIX) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __local, PREFIX) \
+
+#define DECL_ATOMIC_OP(NAME) \
+ DECL_ATOMIC_OP_TYPE(NAME, uint, atomic_) \
+ DECL_ATOMIC_OP_TYPE(NAME, int, atomic_)
+
+DECL_ATOMIC_OP(add)
+DECL_ATOMIC_OP(sub)
+DECL_ATOMIC_OP(and)
+DECL_ATOMIC_OP(or)
+DECL_ATOMIC_OP(xor)
+DECL_ATOMIC_OP(xchg)
+DECL_ATOMIC_OP_TYPE(xchg, float, atomic_)
+DECL_ATOMIC_OP_TYPE(min, int, atomic_i)
+DECL_ATOMIC_OP_TYPE(max, int, atomic_i)
+DECL_ATOMIC_OP_TYPE(min, uint, atomic_u)
+DECL_ATOMIC_OP_TYPE(max, uint, atomic_u)
+
+#undef DECL_ATOMIC_OP
+#undef DECL_ATOMIC_OP_TYPE
+#undef DECL_ATOMIC_OP_SPACE
+
+#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE) \
+ INLINE_OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p) { \
+ return (TYPE)__gen_ocl_atomic_##NAME((SPACE uint *)p); \
+ }
+
+#define DECL_ATOMIC_OP_TYPE(NAME, TYPE) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __global) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __local)
+
+#define DECL_ATOMIC_OP(NAME) \
+ DECL_ATOMIC_OP_TYPE(NAME, uint) \
+ DECL_ATOMIC_OP_TYPE(NAME, int)
+
+DECL_ATOMIC_OP(inc)
+DECL_ATOMIC_OP(dec)
+
+#undef DECL_ATOMIC_OP
+#undef DECL_ATOMIC_OP_TYPE
+#undef DECL_ATOMIC_OP_SPACE
+
+#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE) \
+ INLINE_OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p, TYPE cmp, TYPE val) { \
+ return (TYPE)__gen_ocl_atomic_##NAME((SPACE uint *)p, (uint)cmp, (uint)val); \
+ }
+
+#define DECL_ATOMIC_OP_TYPE(NAME, TYPE) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __global) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __local)
+
+#define DECL_ATOMIC_OP(NAME) \
+ DECL_ATOMIC_OP_TYPE(NAME, uint) \
+ DECL_ATOMIC_OP_TYPE(NAME, int)
+
+DECL_ATOMIC_OP(cmpxchg)
+
+#undef DECL_ATOMIC_OP
+#undef DECL_ATOMIC_OP_TYPE
+#undef DECL_ATOMIC_OP_SPACE
+
+/////////////////////////////////////////////////////////////////////////////
// Force the compilation to SIMD8 or SIMD16
/////////////////////////////////////////////////////////////////////////////
--
1.7.10.4
More information about the Beignet
mailing list