[Beignet] [PATCH] Add the vector3 support for builtin abs function

junyan.he at inbox.com junyan.he at inbox.com
Tue Jul 2 02:08:30 PDT 2013


From: Junyan He <junyan.he at linux.intel.com>

Add the forgetten abs vector3 for all the types.
Because the kernel input alignment, improve the test
case to match the alignment request.

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 backend/src/ocl_stdlib.h |    4 +++-
 kernels/compiler_abs.cl  |    1 +
 utests/compiler_abs.cpp  |   46 +++++++++++++++++++++++++++++++++++++---------
 3 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
index 04984d8..9acaebe 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -4481,6 +4481,7 @@ int __gen_ocl_abs(int x);
 #define ABS_I(I, CVT)  (CVT)__gen_ocl_abs(x.s##I)
 #define ABS_VEC1(CVT)  (CVT)__gen_ocl_abs(x)
 #define ABS_VEC2(CVT)  ABS_I(0, CVT), ABS_I(1, CVT)
+#define ABS_VEC3(CVT)  ABS_I(0, CVT), ABS_I(1, CVT), ABS_I(2, CVT)
 #define ABS_VEC4(CVT)  ABS_VEC2(CVT), ABS_I(2, CVT), ABS_I(3, CVT)
 #define ABS_VEC8(CVT)  ABS_VEC4(CVT), ABS_I(4, CVT), ABS_I(5, CVT),\
 	               ABS_I(6, CVT), ABS_I(7, CVT)
@@ -4490,7 +4491,7 @@ int __gen_ocl_abs(int x);
 
 #define DEC_1(TYPE) INLINE_OVERLOADABLE u##TYPE abs(TYPE x) { return ABS_VEC1(u##TYPE); }
 #define DEC_N(TYPE, N) INLINE_OVERLOADABLE u##TYPE##N abs(TYPE##N x) { return (u##TYPE##N)(ABS_VEC##N(u##TYPE)); };
-#define DEC(TYPE) DEC_1(TYPE) DEC_N(TYPE, 2) DEC_N(TYPE, 4) DEC_N(TYPE, 8) DEC_N(TYPE, 16)
+#define DEC(TYPE) DEC_1(TYPE) DEC_N(TYPE, 2) DEC_N(TYPE, 3) DEC_N(TYPE, 4) DEC_N(TYPE, 8) DEC_N(TYPE, 16)
 
 DEC(int)
 DEC(short)
@@ -4509,6 +4510,7 @@ DEC(uchar)
 #undef ABS_I
 #undef ABS_VEC1
 #undef ABS_VEC2
+#undef ABS_VEC3
 #undef ABS_VEC4
 #undef ABS_VEC8
 #undef ABS_VEC16
diff --git a/kernels/compiler_abs.cl b/kernels/compiler_abs.cl
index 9e77c2b..549575c 100644
--- a/kernels/compiler_abs.cl
+++ b/kernels/compiler_abs.cl
@@ -15,6 +15,7 @@
 #define COMPILER_ABS(TYPE, UTYPE)  \
     COMPILER_ABS_FUNC_1(TYPE, UTYPE) \
     COMPILER_ABS_FUNC_N(TYPE, UTYPE, 2) \
+    COMPILER_ABS_FUNC_N(TYPE, UTYPE, 3) \
     COMPILER_ABS_FUNC_N(TYPE, UTYPE, 4) \
     COMPILER_ABS_FUNC_N(TYPE, UTYPE, 8) \
     COMPILER_ABS_FUNC_N(TYPE, UTYPE, 16)
diff --git a/utests/compiler_abs.cpp b/utests/compiler_abs.cpp
index 59d8365..764970a 100644
--- a/utests/compiler_abs.cpp
+++ b/utests/compiler_abs.cpp
@@ -3,7 +3,7 @@
 
 template <typename T, int N>
 struct cl_vec {
-    T ptr[N];
+    T ptr[((N+1)/2)*2]; //align to 2 elements.
 
     typedef cl_vec<T, N> vec_type;
 
@@ -11,15 +11,18 @@ struct cl_vec {
         memset(ptr, 0, sizeof(T) * N);
     }
     cl_vec(vec_type & other) {
+        memset(ptr, 0, sizeof(T) * ((N+1)/2)*2);
         memcpy (this->ptr, other.ptr, sizeof(T) * N);
     }
 
     vec_type& operator= (vec_type & other) {
+        memset(ptr, 0, sizeof(T) * ((N+1)/2)*2);
         memcpy (this->ptr, other.ptr, sizeof(T) * N);
         return *this;
     }
 
     template <typename U> vec_type& operator= (cl_vec<U, N> & other) {
+        memset(ptr, 0, sizeof(T) * ((N+1)/2)*2);
         memcpy (this->ptr, other.ptr, sizeof(T) * N);
         return *this;
     }
@@ -56,6 +59,8 @@ template <typename T, typename U> static void cpu(int global_id, T *src, U *dst)
 template <typename T, int N> static void gen_rand_val (cl_vec<T, N>& vect)
 {
     int i = 0;
+
+    memset(vect.ptr, 0, sizeof(T) * ((N+1)/2)*2);
     for (; i < N; i++) {
         vect.ptr[i] = static_cast<T>((rand() & 63) - 32);
     }
@@ -66,25 +71,34 @@ template <typename T> static void gen_rand_val (T & val)
     val = static_cast<T>((rand() & 63) - 32);
 }
 
+template <typename T>
+inline static void print_data (T& val)
+{
+    if (std::is_unsigned<T>::value)
+        printf(" %u", val);
+    else
+        printf(" %d", val);
+}
+
 template <typename T, typename U, int N> static void dump_data (cl_vec<T, N>* src,
-	cl_vec<U, N>* dst, int n)
+        cl_vec<U, N>* dst, int n)
 {
     U* val = reinterpret_cast<U *>(dst);
 
-    n = n*N;
+    n = n*((N+1)/2)*2;
 
     printf("\nRaw: \n");
     for (int32_t i = 0; i < (int32_t) n; ++i) {
-        printf(" %d", ((T *)buf_data[0])[i]);
+        print_data(((T *)buf_data[0])[i]);
     }
 
     printf("\nCPU: \n");
     for (int32_t i = 0; i < (int32_t) n; ++i) {
-        printf(" %d", val[i]);
+        print_data(val[i]);
     }
     printf("\nGPU: \n");
     for (int32_t i = 0; i < (int32_t) n; ++i) {
-        printf(" %d", ((U *)buf_data[1])[i]);
+        print_data(((U *)buf_data[1])[i]);
     }
 }
 
@@ -92,16 +106,16 @@ template <typename T, typename U> static void dump_data (T* src, U* dst, int n)
 {
     printf("\nRaw: \n");
     for (int32_t i = 0; i < (int32_t) n; ++i) {
-        printf(" %d", ((T *)buf_data[0])[i]);
+        print_data(((T *)buf_data[0])[i]);
     }
 
     printf("\nCPU: \n");
     for (int32_t i = 0; i < (int32_t) n; ++i) {
-        printf(" %d", dst[i]);
+        print_data(dst[i]);
     }
     printf("\nGPU: \n");
     for (int32_t i = 0; i < (int32_t) n; ++i) {
-        printf(" %d", ((U *)buf_data[1])[i]);
+        print_data(((U *)buf_data[1])[i]);
     }
 }
 
@@ -111,6 +125,8 @@ template <typename T, typename U> static void compiler_abs_with_type(void)
     U cpu_dst[16];
     T cpu_src[16];
 
+    printf("sizeof T, is %u, sizeof U is %u\n", (int)sizeof(T), (int)sizeof(U));
+
     // Setup buffers
     OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL);
     OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL);
@@ -166,54 +182,66 @@ ABS_TEST_TYPE(uchar, uchar)
 
 
 typedef cl_vec<int, 2> int2;
+typedef cl_vec<int, 3> int3;
 typedef cl_vec<int, 4> int4;
 typedef cl_vec<int, 8> int8;
 typedef cl_vec<int, 16> int16;
 typedef cl_vec<unsigned int, 2> uint2;
+typedef cl_vec<unsigned int, 3> uint3;
 typedef cl_vec<unsigned int, 4> uint4;
 typedef cl_vec<unsigned int, 8> uint8;
 typedef cl_vec<unsigned int, 16> uint16;
 ABS_TEST_TYPE(int2, uint2)
+ABS_TEST_TYPE(int3, uint3)
 ABS_TEST_TYPE(int4, uint4)
 ABS_TEST_TYPE(int8, uint8)
 ABS_TEST_TYPE(int16, uint16)
 ABS_TEST_TYPE(uint2, uint2)
+ABS_TEST_TYPE(uint3, uint3)
 ABS_TEST_TYPE(uint4, uint4)
 ABS_TEST_TYPE(uint8, uint8)
 ABS_TEST_TYPE(uint16, uint16)
 
 
 typedef cl_vec<char, 2> char2;
+typedef cl_vec<char, 3> char3;
 typedef cl_vec<char, 4> char4;
 typedef cl_vec<char, 8> char8;
 typedef cl_vec<char, 16> char16;
 typedef cl_vec<unsigned char, 2> uchar2;
+typedef cl_vec<unsigned char, 3> uchar3;
 typedef cl_vec<unsigned char, 4> uchar4;
 typedef cl_vec<unsigned char, 8> uchar8;
 typedef cl_vec<unsigned char, 16> uchar16;
 ABS_TEST_TYPE(char2, uchar2)
+ABS_TEST_TYPE(char3, uchar3)
 ABS_TEST_TYPE(char4, uchar4)
 ABS_TEST_TYPE(char8, uchar8)
 ABS_TEST_TYPE(char16, uchar16)
 ABS_TEST_TYPE(uchar2, uchar2)
+ABS_TEST_TYPE(uchar3, uchar3)
 ABS_TEST_TYPE(uchar4, uchar4)
 ABS_TEST_TYPE(uchar8, uchar8)
 ABS_TEST_TYPE(uchar16, uchar16)
 
 
 typedef cl_vec<short, 2> short2;
+typedef cl_vec<short, 3> short3;
 typedef cl_vec<short, 4> short4;
 typedef cl_vec<short, 8> short8;
 typedef cl_vec<short, 16> short16;
 typedef cl_vec<unsigned short, 2> ushort2;
+typedef cl_vec<unsigned short, 3> ushort3;
 typedef cl_vec<unsigned short, 4> ushort4;
 typedef cl_vec<unsigned short, 8> ushort8;
 typedef cl_vec<unsigned short, 16> ushort16;
 ABS_TEST_TYPE(short2, ushort2)
+ABS_TEST_TYPE(short3, ushort3)
 ABS_TEST_TYPE(short4, ushort4)
 ABS_TEST_TYPE(short8, ushort8)
 ABS_TEST_TYPE(short16, ushort16)
 ABS_TEST_TYPE(ushort2, ushort2)
+ABS_TEST_TYPE(ushort3, ushort3)
 ABS_TEST_TYPE(ushort4, ushort4)
 ABS_TEST_TYPE(ushort8, ushort8)
 ABS_TEST_TYPE(ushort16, ushort16)
-- 
1.7.9.5



More information about the Beignet mailing list