[Beignet] [PATCH 03/22 V2] Add the atomic module into the libocl
junyan.he at inbox.com
junyan.he at inbox.com
Sun Aug 31 19:06:35 PDT 2014
From: Junyan He <junyan.he at linux.intel.com>
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/libocl/include/ocl_atom.h | 84 +++++++++++++++++++++++
backend/src/libocl/src/ocl_atom.cl | 122 +++++++++++++++++++++++++++++++++
2 files changed, 206 insertions(+)
create mode 100644 backend/src/libocl/include/ocl_atom.h
create mode 100644 backend/src/libocl/src/ocl_atom.cl
diff --git a/backend/src/libocl/include/ocl_atom.h b/backend/src/libocl/include/ocl_atom.h
new file mode 100644
index 0000000..b2cfcbf
--- /dev/null
+++ b/backend/src/libocl/include/ocl_atom.h
@@ -0,0 +1,84 @@
+#ifndef __OCL_ATOM_H__
+#define __OCL_ATOM_H__
+#include "ocl_types.h"
+
+/////////////////////////////////////////////////////////////////////////////
+// Atomic functions
+/////////////////////////////////////////////////////////////////////////////
+
+OVERLOADABLE uint atomic_add(volatile __global uint *p, uint val);
+OVERLOADABLE uint atomic_add(volatile __local uint *p, uint val);
+OVERLOADABLE int atomic_add(volatile __global int *p, int val);
+OVERLOADABLE int atomic_add(volatile __local int *p, int val);
+
+OVERLOADABLE uint atomic_sub(volatile __global uint *p, uint val);
+OVERLOADABLE uint atomic_sub(volatile __local uint *p, uint val);
+OVERLOADABLE int atomic_sub(volatile __global int *p, int val);
+OVERLOADABLE int atomic_sub(volatile __local int *p, int val);
+
+OVERLOADABLE uint atomic_and(volatile __global uint *p, uint val);
+OVERLOADABLE uint atomic_and(volatile __local uint *p, uint val);
+OVERLOADABLE int atomic_and(volatile __global int *p, int val);
+OVERLOADABLE int atomic_and(volatile __local int *p, int val);
+
+OVERLOADABLE uint atomic_or(volatile __global uint *p, uint val);
+OVERLOADABLE uint atomic_or(volatile __local uint *p, uint val);
+OVERLOADABLE int atomic_or(volatile __global int *p, int val);
+OVERLOADABLE int atomic_or(volatile __local int *p, int val);
+
+OVERLOADABLE uint atomic_xor(volatile __global uint *p, uint val);
+OVERLOADABLE uint atomic_xor(volatile __local uint *p, uint val);
+OVERLOADABLE int atomic_xor(volatile __global int *p, int val);
+OVERLOADABLE int atomic_xor(volatile __local int *p, int val);
+
+OVERLOADABLE uint atomic_xchg(volatile __global uint *p, uint val);
+OVERLOADABLE uint atomic_xchg(volatile __local uint *p, uint val);
+OVERLOADABLE int atomic_xchg(volatile __global int *p, int val);
+OVERLOADABLE int atomic_xchg(volatile __local int *p, int val);
+
+OVERLOADABLE int atomic_min(volatile __global int *p, int val);
+OVERLOADABLE int atomic_min(volatile __local int *p, int val);
+
+OVERLOADABLE int atomic_max(volatile __global int *p, int val);
+OVERLOADABLE int atomic_max(volatile __local int *p, int val);
+
+OVERLOADABLE uint atomic_min(volatile __global uint *p, uint val);
+OVERLOADABLE uint atomic_min(volatile __local uint *p, uint val);
+
+OVERLOADABLE uint atomic_max(volatile __global uint *p, uint val);
+OVERLOADABLE uint atomic_max(volatile __local uint *p, uint val);
+
+OVERLOADABLE float atomic_xchg (volatile __global float *p, float val);
+OVERLOADABLE float atomic_xchg (volatile __local float *p, float val);
+
+OVERLOADABLE uint atomic_inc (volatile __global uint *p);
+OVERLOADABLE uint atomic_inc (volatile __local uint *p);
+OVERLOADABLE int atomic_inc (volatile __global int *p);
+OVERLOADABLE int atomic_inc (volatile __local int *p);
+
+OVERLOADABLE uint atomic_dec (volatile __global uint *p);
+OVERLOADABLE uint atomic_dec (volatile __local uint *p);
+OVERLOADABLE int atomic_dec (volatile __global int *p);
+OVERLOADABLE int atomic_dec (volatile __local int *p);
+
+OVERLOADABLE uint atomic_cmpxchg (volatile __global uint *p, uint cmp, uint val);
+OVERLOADABLE uint atomic_cmpxchg (volatile __local uint *p, uint cmp, uint val);
+OVERLOADABLE int atomic_cmpxchg (volatile __global int *p, int cmp, int val);
+OVERLOADABLE int atomic_cmpxchg (volatile __local int *p, int cmp, int val);
+
+
+// XXX for conformance test
+// The following atom_xxx api is on OpenCL spec 1.0.
+#define atom_sub atomic_sub
+#define atom_and atomic_and
+#define atom_or atomic_or
+#define atom_xor atomic_xor
+#define atom_xchg atomic_xchg
+#define atom_min atomic_min
+#define atom_max atomic_max
+#define atom_inc atomic_inc
+#define atom_dec atomic_dec
+#define atom_cmpxchg atomic_cmpxchg
+
+
+#endif /* __OCL_ATOM_H__ */
diff --git a/backend/src/libocl/src/ocl_atom.cl b/backend/src/libocl/src/ocl_atom.cl
new file mode 100644
index 0000000..ad09d9e
--- /dev/null
+++ b/backend/src/libocl/src/ocl_atom.cl
@@ -0,0 +1,122 @@
+#include "ocl_atom.h"
+#include "ocl_as.h"
+
+OVERLOADABLE uint __gen_ocl_atomic_add(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_add(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_sub(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_sub(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_and(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_and(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_or(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_or(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_xor(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_xor(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_xchg(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_xchg(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_inc(__global uint *p);
+OVERLOADABLE uint __gen_ocl_atomic_inc(__local uint *p);
+OVERLOADABLE uint __gen_ocl_atomic_dec(__global uint *p);
+OVERLOADABLE uint __gen_ocl_atomic_dec(__local uint *p);
+OVERLOADABLE uint __gen_ocl_atomic_cmpxchg(__global uint *p, uint cmp, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_cmpxchg(__local uint *p, uint cmp, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_imin(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_imin(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_imax(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_imax(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_umin(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_umin(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_umax(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_umax(__local uint *p, uint val);
+
+#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE, PREFIX) \
+ OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p, TYPE val) { \
+ return (TYPE)__gen_ocl_##PREFIX##NAME((SPACE uint *)p, val); \
+ }
+
+#define DECL_ATOMIC_OP_TYPE(NAME, TYPE, PREFIX) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __global, PREFIX) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __local, PREFIX)
+
+#define DECL_ATOMIC_OP(NAME) \
+ DECL_ATOMIC_OP_TYPE(NAME, uint, atomic_) \
+ DECL_ATOMIC_OP_TYPE(NAME, int, atomic_)
+
+DECL_ATOMIC_OP(add)
+DECL_ATOMIC_OP(sub)
+DECL_ATOMIC_OP(and)
+DECL_ATOMIC_OP(or)
+DECL_ATOMIC_OP(xor)
+DECL_ATOMIC_OP(xchg)
+DECL_ATOMIC_OP_TYPE(min, int, atomic_i)
+DECL_ATOMIC_OP_TYPE(max, int, atomic_i)
+DECL_ATOMIC_OP_TYPE(min, uint, atomic_u)
+DECL_ATOMIC_OP_TYPE(max, uint, atomic_u)
+
+#undef DECL_ATOMIC_OP_SPACE
+
+#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE, PREFIX) \
+ OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p, TYPE val) { \
+ return as_float(__gen_ocl_##PREFIX##NAME((SPACE uint *)p, as_uint(val))); \
+ }
+DECL_ATOMIC_OP_SPACE(xchg, float, __global, atomic_)
+DECL_ATOMIC_OP_SPACE(xchg, float, __local, atomic_)
+
+#undef DECL_ATOMIC_OP
+#undef DECL_ATOMIC_OP_TYPE
+#undef DECL_ATOMIC_OP_SPACE
+
+#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE) \
+ OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p) { \
+ return (TYPE)__gen_ocl_atomic_##NAME((SPACE uint *)p); \
+ }
+
+#define DECL_ATOMIC_OP_TYPE(NAME, TYPE) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __global) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __local)
+
+#define DECL_ATOMIC_OP(NAME) \
+ DECL_ATOMIC_OP_TYPE(NAME, uint) \
+ DECL_ATOMIC_OP_TYPE(NAME, int)
+
+DECL_ATOMIC_OP(inc)
+DECL_ATOMIC_OP(dec)
+
+#undef DECL_ATOMIC_OP
+#undef DECL_ATOMIC_OP_TYPE
+#undef DECL_ATOMIC_OP_SPACE
+
+#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE) \
+ OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p, TYPE cmp, TYPE val) { \
+ return (TYPE)__gen_ocl_atomic_##NAME((SPACE uint *)p, (uint)cmp, (uint)val); \
+ }
+
+#define DECL_ATOMIC_OP_TYPE(NAME, TYPE) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __global) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __local)
+
+#define DECL_ATOMIC_OP(NAME) \
+ DECL_ATOMIC_OP_TYPE(NAME, uint) \
+ DECL_ATOMIC_OP_TYPE(NAME, int)
+
+DECL_ATOMIC_OP(cmpxchg)
+
+#undef DECL_ATOMIC_OP
+#undef DECL_ATOMIC_OP_TYPE
+#undef DECL_ATOMIC_OP_SPACE
+
+// XXX for conformance test
+// The following atom_xxx api is on OpenCL spec 1.0.
+// But the conformance test suite will test them anyway.
+#define atom_add atomic_add
+#define atom_sub atomic_sub
+#define atom_and atomic_and
+#define atom_or atomic_or
+#define atom_xor atomic_xor
+#define atom_xchg atomic_xchg
+#define atom_min atomic_min
+#define atom_max atomic_max
+#define atom_inc atomic_inc
+#define atom_dec atomic_dec
+#define atom_cmpxchg atomic_cmpxchg
+
+
--
1.7.9.5
More information about the Beignet
mailing list