[Beignet] [PATCH V2 2/2] Add the test case for builtin abs() function

Song, Ruiling ruiling.song at intel.com
Thu Jun 27 23:01:21 PDT 2013


GEN IR seems wrong, maybe the bug is in llvm_gen_backend.cpp.
I think you need check the LLVM IR also.


-----Original Message-----
From: beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org [mailto:beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org] On Behalf Of He Junyan
Sent: Friday, June 28, 2013 1:52 PM
To: beignet at lists.freedesktop.org
Subject: Re: [Beignet] [PATCH V2 2/2] Add the test case for builtin abs() function

I find the problem caused by translation of LLVM IR

for the kernel like
kernel void compiler_abs_int2(global int2 *src, global int2 *dst) {
       int i = get_global_id(0);
       dst[i] = abs(src[i]);
  }



INLINE_OVERLOADABLE int2 abs(int2 x) {
      int a1 = (int)__gen_ocl_abs(x.s0);
      int a2 = (int)__gen_ocl_abs(x.s1);
      return (a1, a2);
}
the return state will generate the IR:

STORE.int32.global.aligned %34 {%33 %33}

and we should write like this:
INLINE_OVERLOADABLE int2 abs(int2 x) {
      int a1 = (int)__gen_ocl_abs(x.s0);
      int a2 = (int)__gen_ocl_abs(x.s1);
      return (int2)(a1, a2);
}


Because I copied it from other code in ocl_stdlib.h, I think others written like my abs need to be modified.


On 06/27/2013 05:17 PM, He Junyan wrote:
> on vector check,
> I find the vector has something wield,
>
>
> compiler_abs_char2:
>
> Raw:
>  6 -4 -1 -5 -7 8 3 -3 1 -2 -4 3 3 -5 8 6 -5 5 -4 -6 -6 4 4 -1 4 8 3 -6
> -4 -3 0 -7
> CPU:
>  6 4 1 5 7 8 3 3 1 2 4 3 3 5 8 6 5 5 4 6 6 4 4 1 4 8 3 6 4 3 0 7
> GPU:
>  4 4 5 5 8 8 3 3 2 2 3 3 5 5 6 6 5 5 6 6 4 4 1 1 8 8 6 6 3 3 7 7 
> compiler_abs_char2()    [FAILED]
>     Error: !memcmp(buf_data[1], cpu_dst, sizeof(T) * n)
>   at file /home/robinhe/CL/beignet/utests/compiler_abs.cpp, function 
> compiler_abs_with_type, line 136
>
> element2 seems over write the data of elemet1?
>
>
>
>
> On 06/27/2013 05:01 PM, junyan.he at inbox.com wrote:
>> From: Junyan He <junyan.he at linux.intel.com>
>>
>> All the integer value types check are supported.
>> Please use the case named compiler_abs_xxxx, where xxxx means the 
>> data type such as int2, char4
>>
>> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
>> ---
>>   backend/src/llvm/llvm_gen_ocl_function.hxx |    1 -
>>   kernels/compiler_abs.cl                    |  150 +++++++++++++++++++
>>   utests/CMakeLists.txt                      |    1 +
>>   utests/compiler_abs.cpp                    |  215 
>> ++++++++++++++++++++++++++++
>>   4 files changed, 366 insertions(+), 1 deletion(-)
>>   create mode 100644 kernels/compiler_abs.cl
>>   create mode 100644 utests/compiler_abs.cpp
>>
>> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx
>> b/backend/src/llvm/llvm_gen_ocl_function.hxx
>> index 529c4aa..e82c36e 100644
>> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
>> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
>> @@ -1,4 +1,3 @@
>> -D
>>   DECL_LLVM_GEN_FUNCTION(GET_GROUP_ID0, __gen_ocl_get_group_id0)
>>   DECL_LLVM_GEN_FUNCTION(GET_GROUP_ID1, __gen_ocl_get_group_id1)
>>   DECL_LLVM_GEN_FUNCTION(GET_GROUP_ID2, __gen_ocl_get_group_id2) diff 
>> --git a/kernels/compiler_abs.cl b/kernels/compiler_abs.cl new file 
>> mode 100644 index 0000000..0bd4187
>> --- /dev/null
>> +++ b/kernels/compiler_abs.cl
>> @@ -0,0 +1,150 @@
>> +kernel void compiler_abs_int(global int *src, global int *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_short(global short *src, global short *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_char(global char *src, global char *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_uint(global uint *src, global uint *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_ushort(global ushort *src, global ushort
>> *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_uchar(global uchar *src, global uchar *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_int2(global int2 *src, global int2 *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_int4(global int4 *src, global int4 *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_int8(global int8 *src, global int8 *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_int16(global int16 *src, global int16 *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_char2(global char2 *src, global char2 *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_char4(global char4 *src, global char4 *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_char8(global char8 *src, global char8 *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_char16(global char16 *src, global char16
>> *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_short2(global short2 *src, global short2
>> *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_short4(global short4 *src, global short4
>> *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_short8(global short8 *src, global short8
>> *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_short16(global short16 *src, global short16
>> *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +
>> +kernel void compiler_abs_uint2(global uint2 *src, global uint2 *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_uint4(global uint4 *src, global uint4 *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_uint8(global uint8 *src, global uint8 *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_uint16(global uint16 *src, global uint16
>> *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_uchar2(global uchar2 *src, global uchar2
>> *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_uchar4(global uchar4 *src, global uchar4
>> *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_uchar8(global uchar8 *src, global uchar8
>> *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_uchar16(global uchar16 *src, global uchar16
>> *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_ushort2(global ushort2 *src, global ushort2
>> *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_ushort4(global ushort4 *src, global ushort4
>> *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_ushort8(global ushort8 *src, global ushort8
>> *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> +
>> +kernel void compiler_abs_ushort16(global ushort16 *src, global
>> ushort16 *dst) {
>> +    int i = get_global_id(0);
>> +    dst[i] = abs(src[i]);
>> +}
>> diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 
>> fa36277..8eced91 100644
>> --- a/utests/CMakeLists.txt
>> +++ b/utests/CMakeLists.txt
>> @@ -34,6 +34,7 @@ set (utests_sources
>>     compiler_double_2.cpp
>>     compiler_double_3.cpp
>>     compiler_fabs.cpp
>> +  compiler_abs.cpp
>>     compiler_fill_image.cpp
>>     compiler_fill_image0.cpp
>>     compiler_fill_image_3d.cpp
>> diff --git a/utests/compiler_abs.cpp b/utests/compiler_abs.cpp new 
>> file mode 100644 index 0000000..3e617a6
>> --- /dev/null
>> +++ b/utests/compiler_abs.cpp
>> @@ -0,0 +1,215 @@
>> +#include "utest_helper.hpp"
>> +#include "string.h"
>> +
>> +template <typename T, int N>
>> +struct cl_vec {
>> +    T ptr[N];
>> +
>> +    typedef cl_vec<T, N> vec_type;
>> +
>> +    cl_vec(void) {
>> +        memset(ptr, 0, sizeof(T) * N);
>> +    }
>> +    cl_vec(vec_type & other) {
>> +        memcpy (this->ptr, other.ptr, sizeof(T) * N);
>> +    }
>> +
>> +    vec_type& operator= (vec_type & other) {
>> +        memcpy (this->ptr, other.ptr, sizeof(T) * N);
>> +        return *this;
>> +    }
>> +
>> +    bool operator== (vec_type & other) {
>> +        return !memcmp (this->ptr, other.ptr, sizeof(T) * N);
>> +    }
>> +
>> +    void abs(void) {
>> +        int i = 0;
>> +        for (; i < N; i++) {
>> +            T f = ptr[i];
>> +            f = f < 0 ? -f : f;
>> +            ptr[i] = f;
>> +        }
>> +    }
>> +};
>> +
>> +template <typename T, int N> static void cpu (int global_id,
>> +        cl_vec<T, N> *src, cl_vec<T, N> *dst) {
>> +    cl_vec<T, N> v  = src[global_id];
>> +    v.abs();
>> +    dst[global_id] = v;
>> +}
>> +
>> +template <typename T> static void cpu(int global_id, T *src, T *dst) 
>> +{
>> +    T f = src[global_id];
>> +    f = f < 0 ? -f : f;
>> +    dst[global_id] = f;
>> +}
>> +
>> +template <typename T, int N> static void gen_rand_val (cl_vec<T, N>&
>> vect)
>> +{
>> +    int i = 0;
>> +    for (; i < N; i++) {
>> +        vect.ptr[i] = static_cast<T>((rand() & 15) - 7);
>> +    }
>> +}
>> +
>> +template <typename T> static void gen_rand_val (T & val) {
>> +    val = static_cast<T>((rand() & 15) - 7); }
>> +
>> +template <typename T, int N> static void dump_data (cl_vec<T, N>*
>> vect, int n)
>> +{
>> +    T* val = reinterpret_cast<T *>(vect);
>> +
>> +    n = n*N;
>> +
>> +    printf("\nRaw: \n");
>> +    for (int32_t i = 0; i < (int32_t) n; ++i) {
>> +        printf(" %d", ((T *)buf_data[0])[i]);
>> +    }
>> +
>> +    printf("\nCPU: \n");
>> +    for (int32_t i = 0; i < (int32_t) n; ++i) {
>> +        printf(" %d", val[i]);
>> +    }
>> +    printf("\nGPU: \n");
>> +    for (int32_t i = 0; i < (int32_t) n; ++i) {
>> +        printf(" %d", ((T *)buf_data[1])[i]);
>> +    }
>> +}
>> +
>> +template <typename T> static void dump_data (T* val, int n) {
>> +    printf("\nRaw: \n");
>> +    for (int32_t i = 0; i < (int32_t) n; ++i) {
>> +        printf(" %d", ((T *)buf_data[0])[i]);
>> +    }
>> +
>> +    printf("\nCPU: \n");
>> +    for (int32_t i = 0; i < (int32_t) n; ++i) {
>> +        printf(" %d", val[i]);
>> +    }
>> +    printf("\nGPU: \n");
>> +    for (int32_t i = 0; i < (int32_t) n; ++i) {
>> +        printf(" %d", ((T *)buf_data[1])[i]);
>> +    }
>> +}
>> +
>> +template <typename T> static void compiler_abs_with_type(void) {
>> +    const size_t n = 16;
>> +    T cpu_dst[16], cpu_src[16];
>> +
>> +    // Setup buffers
>> +    OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL);
>> +    OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL);
>> +    OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
>> +    OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
>> +    globals[0] = 16;
>> +    locals[0] = 16;
>> +
>> +    // Run random tests
>> +    for (uint32_t pass = 0; pass < 8; ++pass) {
>> +        OCL_MAP_BUFFER(0);
>> +        for (int32_t i = 0; i < (int32_t) n; ++i) {
>> +            gen_rand_val(cpu_src[i]);
>> +        }
>> +
>> +        memcpy(buf_data[0], cpu_src, sizeof(T) * n);
>> +
>> +        // Run the kernel on GPU
>> +        OCL_NDRANGE(1);
>> +
>> +        // Run on CPU
>> +        for (int32_t i = 0; i < (int32_t) n; ++i)
>> +            cpu(i, cpu_src, cpu_dst);
>> +
>> +        // Compare
>> +        OCL_MAP_BUFFER(1);
>> +
>> +//      dump_data(cpu_dst, n);
>> +
>> +        OCL_ASSERT(!memcmp(buf_data[1], cpu_dst, sizeof(T) * n));
>> +        OCL_UNMAP_BUFFER(1);
>> +        OCL_UNMAP_BUFFER(0);
>> +    }
>> +}
>> +
>> +#define ABS_TEST_TYPE(TYPE) \
>> +    static void compiler_abs_##TYPE (void) \
>> +        { \
>> +           OCL_CALL (cl_kernel_init, "compiler_abs.cl",
>> "compiler_abs_"#TYPE, SOURCE, NULL);  \
>> +           compiler_abs_with_type<TYPE>(); \
>> +        } \
>> +    MAKE_UTEST_FROM_FUNCTION(compiler_abs_##TYPE);
>> +
>> +ABS_TEST_TYPE(int)
>> +ABS_TEST_TYPE(short)
>> +ABS_TEST_TYPE(char)
>> +
>> +typedef unsigned char uchar;
>> +typedef unsigned short ushort;
>> +typedef unsigned int uint;
>> +ABS_TEST_TYPE(uint)
>> +ABS_TEST_TYPE(ushort)
>> +ABS_TEST_TYPE(uchar)
>> +
>> +typedef cl_vec<int, 2> int2;
>> +typedef cl_vec<int, 4> int4;
>> +typedef cl_vec<int, 8> int8;
>> +typedef cl_vec<int, 16> int16;
>> +ABS_TEST_TYPE(int2)
>> +ABS_TEST_TYPE(int4)
>> +ABS_TEST_TYPE(int8)
>> +ABS_TEST_TYPE(int16)
>> +
>> +typedef cl_vec<char, 2> char2;
>> +typedef cl_vec<char, 4> char4;
>> +typedef cl_vec<char, 8> char8;
>> +typedef cl_vec<char, 16> char16;
>> +ABS_TEST_TYPE(char2)
>> +ABS_TEST_TYPE(char4)
>> +ABS_TEST_TYPE(char8)
>> +ABS_TEST_TYPE(char16)
>> +
>> +typedef cl_vec<short, 2> short2;
>> +typedef cl_vec<short, 4> short4;
>> +typedef cl_vec<short, 8> short8;
>> +typedef cl_vec<short, 16> short16;
>> +ABS_TEST_TYPE(short2)
>> +ABS_TEST_TYPE(short4)
>> +ABS_TEST_TYPE(short8)
>> +ABS_TEST_TYPE(short16)
>> +
>> +
>> +typedef cl_vec<unsigned int, 2> uint2; typedef cl_vec<unsigned int, 
>> +4> uint4; typedef cl_vec<unsigned int, 8> uint8; typedef 
>> +cl_vec<unsigned int, 16> uint16;
>> +ABS_TEST_TYPE(uint2)
>> +ABS_TEST_TYPE(uint4)
>> +ABS_TEST_TYPE(uint8)
>> +ABS_TEST_TYPE(uint16)
>> +
>> +typedef cl_vec<unsigned char, 2> uchar2; typedef cl_vec<unsigned 
>> +char, 4> uchar4; typedef cl_vec<unsigned char, 8> uchar8; typedef 
>> +cl_vec<unsigned char, 16> uchar16;
>> +ABS_TEST_TYPE(uchar2)
>> +ABS_TEST_TYPE(uchar4)
>> +ABS_TEST_TYPE(uchar8)
>> +ABS_TEST_TYPE(uchar16)
>> +
>> +typedef cl_vec<unsigned short, 2> ushort2; typedef cl_vec<unsigned 
>> +short, 4> ushort4; typedef cl_vec<unsigned short, 8> ushort8; 
>> +typedef cl_vec<unsigned short, 16> ushort16;
>> +ABS_TEST_TYPE(ushort2)
>> +ABS_TEST_TYPE(ushort4)
>> +ABS_TEST_TYPE(ushort8)
>> +ABS_TEST_TYPE(ushort16)
>> +
>
>
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet



_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list