[Beignet] [PATCH 1/2] add built-in function "hadd", "rhadd"
Song, Ruiling
ruiling.song at intel.com
Thu Jun 27 22:18:35 PDT 2013
I think the implementation does not comply with SPEC.
Like for uint, if x is 0xffffffff, y is also 0xffffffff. Then (x+y) will overflow 32 bit uint.
Using the implementation, the result is 0xffffffff>>1, which is 0x7fffffff.
But the spec says "The intermediate sum does not modulo overflow".
In the example, the correct result should be 0xffffffff.
Thanks!
Ruiling
-----Original Message-----
From: beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org [mailto:beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org] On Behalf Of Homer Hsing
Sent: Friday, June 28, 2013 12:43 PM
To: beignet at lists.freedesktop.org
Cc: Xing, Homer
Subject: [Beignet] [PATCH 1/2] add built-in function "hadd", "rhadd"
Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
backend/src/ocl_stdlib.h | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h index 148ac4d..d816a8a 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -4388,6 +4388,36 @@ DEF(16)
#undef DEC8
#undef DEC16
+#define DEC DEF(char); DEF(uchar); DEF(short); DEF(ushort); DEF(int);
+DEF(uint) #define DEF(type) INLINE_OVERLOADABLE type hadd(type x, type
+y) { return (x + y) >> 1; } DEC #undef DEF #define DEF(type)
+INLINE_OVERLOADABLE type rhadd(type x, type y) { return (x + y + 1) >>
+1; } DEC #undef DEF #undef DEC #define DEC2(func, type)
+INLINE_OVERLOADABLE type##2 func(type##2 a, type##2 b) { return
+(func(a.s0, b.s0), func(a.s1, b.s1)); } #define DEC3(func, type)
+INLINE_OVERLOADABLE type##3 func(type##3 a, type##3 b) { return
+(func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, b.s2)); } #define
+DEC4(func, type) INLINE_OVERLOADABLE type##4 func(type##4 a, type##4 b)
+{ return (func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, b.s2),
+func(a.s3, b.s3)); } #define DEC8(func, type) INLINE_OVERLOADABLE
+type##8 func(type##8 a, type##8 b) { return (func(a.s0, b.s0),
+func(a.s1, b.s1), func(a.s2, b.s2), func(a.s3, b.s3), func(a.s4, b.s4),
+func(a.s5, b.s5), func(a.s6, b.s6), func(a.s7, b.s7)); } #define
+DEC16(func, type) INLINE_OVERLOADABLE type##16 func(type##16 a,
+type##16 b) { return (func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2,
+b.s2), func(a.s3, b.s3), func(a.s4, b.s4), func(a.s5, b.s5), func(a.s6,
+b.s6), func(a.s7, b.s7), func(a.s8, b.s8), func(a.s9, b.s9), func(a.sa,
+b.sa), func(a.sb, b.sb), func(a.sc, b.sc), func(a.sd, b.sd), func(a.se,
+b.se), func(a.sf, b.sf)); } #define DEF(func, n) DEC##n(func, char);
+DEC##n(func, uchar); DEC##n(func, short); DEC##n(func, ushort);
+DEC##n(func, int); DEC##n(func, uint) DEF(hadd, 2) DEF(hadd, 3)
+DEF(hadd, 4) DEF(hadd, 8) DEF(hadd, 16) DEF(rhadd, 2) DEF(rhadd, 3)
+DEF(rhadd, 4) DEF(rhadd, 8) DEF(rhadd, 16) #undef DEF #undef DEC2
+#undef DEC3 #undef DEC4 #undef DEC8 #undef DEC16
/////////////////////////////////////////////////////////////////////////////
// Work Items functions (see 6.11.1 of OCL 1.1 spec) /////////////////////////////////////////////////////////////////////////////
--
1.8.1.2
_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list