[Beignet] [V2 PATCH 1/3] Add the vector3 support for builtin abs function
junyan.he at inbox.com
junyan.he at inbox.com
Wed Jul 3 00:16:59 PDT 2013
From: Junyan He <junyan.he at linux.intel.com>
Add the forgetten abs vector3 for all the types.
Because the kernel input alignment, improve the test
case to match the alignment request.
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/ocl_stdlib.h | 4 +++-
kernels/compiler_abs.cl | 1 +
utests/compiler_abs.cpp | 54 +++++++++++++++++++++++++++++++++++++---------
3 files changed, 48 insertions(+), 11 deletions(-)
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
index 04984d8..9acaebe 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -4481,6 +4481,7 @@ int __gen_ocl_abs(int x);
#define ABS_I(I, CVT) (CVT)__gen_ocl_abs(x.s##I)
#define ABS_VEC1(CVT) (CVT)__gen_ocl_abs(x)
#define ABS_VEC2(CVT) ABS_I(0, CVT), ABS_I(1, CVT)
+#define ABS_VEC3(CVT) ABS_I(0, CVT), ABS_I(1, CVT), ABS_I(2, CVT)
#define ABS_VEC4(CVT) ABS_VEC2(CVT), ABS_I(2, CVT), ABS_I(3, CVT)
#define ABS_VEC8(CVT) ABS_VEC4(CVT), ABS_I(4, CVT), ABS_I(5, CVT),\
ABS_I(6, CVT), ABS_I(7, CVT)
@@ -4490,7 +4491,7 @@ int __gen_ocl_abs(int x);
#define DEC_1(TYPE) INLINE_OVERLOADABLE u##TYPE abs(TYPE x) { return ABS_VEC1(u##TYPE); }
#define DEC_N(TYPE, N) INLINE_OVERLOADABLE u##TYPE##N abs(TYPE##N x) { return (u##TYPE##N)(ABS_VEC##N(u##TYPE)); };
-#define DEC(TYPE) DEC_1(TYPE) DEC_N(TYPE, 2) DEC_N(TYPE, 4) DEC_N(TYPE, 8) DEC_N(TYPE, 16)
+#define DEC(TYPE) DEC_1(TYPE) DEC_N(TYPE, 2) DEC_N(TYPE, 3) DEC_N(TYPE, 4) DEC_N(TYPE, 8) DEC_N(TYPE, 16)
DEC(int)
DEC(short)
@@ -4509,6 +4510,7 @@ DEC(uchar)
#undef ABS_I
#undef ABS_VEC1
#undef ABS_VEC2
+#undef ABS_VEC3
#undef ABS_VEC4
#undef ABS_VEC8
#undef ABS_VEC16
diff --git a/kernels/compiler_abs.cl b/kernels/compiler_abs.cl
index 9e77c2b..549575c 100644
--- a/kernels/compiler_abs.cl
+++ b/kernels/compiler_abs.cl
@@ -15,6 +15,7 @@
#define COMPILER_ABS(TYPE, UTYPE) \
COMPILER_ABS_FUNC_1(TYPE, UTYPE) \
COMPILER_ABS_FUNC_N(TYPE, UTYPE, 2) \
+ COMPILER_ABS_FUNC_N(TYPE, UTYPE, 3) \
COMPILER_ABS_FUNC_N(TYPE, UTYPE, 4) \
COMPILER_ABS_FUNC_N(TYPE, UTYPE, 8) \
COMPILER_ABS_FUNC_N(TYPE, UTYPE, 16)
diff --git a/utests/compiler_abs.cpp b/utests/compiler_abs.cpp
index 59d8365..a1b14b4 100644
--- a/utests/compiler_abs.cpp
+++ b/utests/compiler_abs.cpp
@@ -3,23 +3,26 @@
template <typename T, int N>
struct cl_vec {
- T ptr[N];
+ T ptr[((N+1)/2)*2]; //align to 2 elements.
typedef cl_vec<T, N> vec_type;
cl_vec(void) {
- memset(ptr, 0, sizeof(T) * N);
+ memset(ptr, 0, sizeof(T) * ((N+1)/2)*2);
}
cl_vec(vec_type & other) {
+ memset(ptr, 0, sizeof(T) * ((N+1)/2)*2);
memcpy (this->ptr, other.ptr, sizeof(T) * N);
}
vec_type& operator= (vec_type & other) {
+ memset(ptr, 0, sizeof(T) * ((N+1)/2)*2);
memcpy (this->ptr, other.ptr, sizeof(T) * N);
return *this;
}
template <typename U> vec_type& operator= (cl_vec<U, N> & other) {
+ memset(ptr, 0, sizeof(T) * ((N+1)/2)*2);
memcpy (this->ptr, other.ptr, sizeof(T) * N);
return *this;
}
@@ -56,6 +59,8 @@ template <typename T, typename U> static void cpu(int global_id, T *src, U *dst)
template <typename T, int N> static void gen_rand_val (cl_vec<T, N>& vect)
{
int i = 0;
+
+ memset(vect.ptr, 0, sizeof(T) * ((N+1)/2)*2);
for (; i < N; i++) {
vect.ptr[i] = static_cast<T>((rand() & 63) - 32);
}
@@ -66,25 +71,34 @@ template <typename T> static void gen_rand_val (T & val)
val = static_cast<T>((rand() & 63) - 32);
}
+template <typename T>
+inline static void print_data (T& val)
+{
+ if (std::is_unsigned<T>::value)
+ printf(" %u", val);
+ else
+ printf(" %d", val);
+}
+
template <typename T, typename U, int N> static void dump_data (cl_vec<T, N>* src,
- cl_vec<U, N>* dst, int n)
+ cl_vec<U, N>* dst, int n)
{
U* val = reinterpret_cast<U *>(dst);
- n = n*N;
+ n = n*((N+1)/2)*2;
printf("\nRaw: \n");
for (int32_t i = 0; i < (int32_t) n; ++i) {
- printf(" %d", ((T *)buf_data[0])[i]);
+ print_data(((T *)buf_data[0])[i]);
}
printf("\nCPU: \n");
for (int32_t i = 0; i < (int32_t) n; ++i) {
- printf(" %d", val[i]);
+ print_data(val[i]);
}
printf("\nGPU: \n");
for (int32_t i = 0; i < (int32_t) n; ++i) {
- printf(" %d", ((U *)buf_data[1])[i]);
+ print_data(((U *)buf_data[1])[i]);
}
}
@@ -92,16 +106,16 @@ template <typename T, typename U> static void dump_data (T* src, U* dst, int n)
{
printf("\nRaw: \n");
for (int32_t i = 0; i < (int32_t) n; ++i) {
- printf(" %d", ((T *)buf_data[0])[i]);
+ print_data(((T *)buf_data[0])[i]);
}
printf("\nCPU: \n");
for (int32_t i = 0; i < (int32_t) n; ++i) {
- printf(" %d", dst[i]);
+ print_data(dst[i]);
}
printf("\nGPU: \n");
for (int32_t i = 0; i < (int32_t) n; ++i) {
- printf(" %d", ((U *)buf_data[1])[i]);
+ print_data(((U *)buf_data[1])[i]);
}
}
@@ -111,6 +125,8 @@ template <typename T, typename U> static void compiler_abs_with_type(void)
U cpu_dst[16];
T cpu_src[16];
+ printf("sizeof T, is %u, sizeof U is %u\n", (int)sizeof(T), (int)sizeof(U));
+
// Setup buffers
OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL);
OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL);
@@ -122,6 +138,12 @@ template <typename T, typename U> static void compiler_abs_with_type(void)
// Run random tests
for (uint32_t pass = 0; pass < 8; ++pass) {
OCL_MAP_BUFFER(0);
+
+ /* Clear the dst buffer to avoid random data. */
+ OCL_MAP_BUFFER(1);
+ memset(buf_data[1], 0, sizeof(U) * n);
+ OCL_UNMAP_BUFFER(1);
+
for (int32_t i = 0; i < (int32_t) n; ++i) {
gen_rand_val(cpu_src[i]);
}
@@ -166,54 +188,66 @@ ABS_TEST_TYPE(uchar, uchar)
typedef cl_vec<int, 2> int2;
+typedef cl_vec<int, 3> int3;
typedef cl_vec<int, 4> int4;
typedef cl_vec<int, 8> int8;
typedef cl_vec<int, 16> int16;
typedef cl_vec<unsigned int, 2> uint2;
+typedef cl_vec<unsigned int, 3> uint3;
typedef cl_vec<unsigned int, 4> uint4;
typedef cl_vec<unsigned int, 8> uint8;
typedef cl_vec<unsigned int, 16> uint16;
ABS_TEST_TYPE(int2, uint2)
+ABS_TEST_TYPE(int3, uint3)
ABS_TEST_TYPE(int4, uint4)
ABS_TEST_TYPE(int8, uint8)
ABS_TEST_TYPE(int16, uint16)
ABS_TEST_TYPE(uint2, uint2)
+ABS_TEST_TYPE(uint3, uint3)
ABS_TEST_TYPE(uint4, uint4)
ABS_TEST_TYPE(uint8, uint8)
ABS_TEST_TYPE(uint16, uint16)
typedef cl_vec<char, 2> char2;
+typedef cl_vec<char, 3> char3;
typedef cl_vec<char, 4> char4;
typedef cl_vec<char, 8> char8;
typedef cl_vec<char, 16> char16;
typedef cl_vec<unsigned char, 2> uchar2;
+typedef cl_vec<unsigned char, 3> uchar3;
typedef cl_vec<unsigned char, 4> uchar4;
typedef cl_vec<unsigned char, 8> uchar8;
typedef cl_vec<unsigned char, 16> uchar16;
ABS_TEST_TYPE(char2, uchar2)
+ABS_TEST_TYPE(char3, uchar3)
ABS_TEST_TYPE(char4, uchar4)
ABS_TEST_TYPE(char8, uchar8)
ABS_TEST_TYPE(char16, uchar16)
ABS_TEST_TYPE(uchar2, uchar2)
+ABS_TEST_TYPE(uchar3, uchar3)
ABS_TEST_TYPE(uchar4, uchar4)
ABS_TEST_TYPE(uchar8, uchar8)
ABS_TEST_TYPE(uchar16, uchar16)
typedef cl_vec<short, 2> short2;
+typedef cl_vec<short, 3> short3;
typedef cl_vec<short, 4> short4;
typedef cl_vec<short, 8> short8;
typedef cl_vec<short, 16> short16;
typedef cl_vec<unsigned short, 2> ushort2;
+typedef cl_vec<unsigned short, 3> ushort3;
typedef cl_vec<unsigned short, 4> ushort4;
typedef cl_vec<unsigned short, 8> ushort8;
typedef cl_vec<unsigned short, 16> ushort16;
ABS_TEST_TYPE(short2, ushort2)
+ABS_TEST_TYPE(short3, ushort3)
ABS_TEST_TYPE(short4, ushort4)
ABS_TEST_TYPE(short8, ushort8)
ABS_TEST_TYPE(short16, ushort16)
ABS_TEST_TYPE(ushort2, ushort2)
+ABS_TEST_TYPE(ushort3, ushort3)
ABS_TEST_TYPE(ushort4, ushort4)
ABS_TEST_TYPE(ushort8, ushort8)
ABS_TEST_TYPE(ushort16, ushort16)
--
1.7.9.5
More information about the Beignet
mailing list