[Beignet] [PATCH] GBE: enable double vector load/store support.

Tue Aug 6 22:07:52 PDT 2013

Ping for comments. Thanks.

On Tue, Aug 06, 2013 at 12:01:46PM +0800, Zhigang Gong wrote:
> We have some accurate problem for double calculation
> on GPU side. I have to change the test case for double
> type to add a tolerate error when check the double
> data result.
> 
> Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
> ---
>  backend/src/llvm/llvm_gen_backend.cpp |    2 --
>  backend/src/ocl_stdlib.tmpl.h         |    1 +
>  kernels/compiler_vector_load_store.cl |    6 +++---
>  utests/compiler_vector_load_store.cpp |   12 ++++++++----
>  4 files changed, 12 insertions(+), 9 deletions(-)
> 
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index b5963ad..18448cf 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2371,8 +2371,6 @@ namespace gbe
>      // Scalar is easy. We neednot build register tuples
>      if (isScalarType(llvmType) == true) {
>        const ir::Type type = getType(ctx, llvmType);
> -      //if(type == ir::TYPE_DOUBLE) // 64bit-float load(store) don't support SIMD16
> -      //  OCL_SIMD_WIDTH = 8;
>        const ir::Register values = this->getRegister(llvmValues);
>        if (isLoad)
>          ctx.LOAD(type, ptr, addrSpace, dwAligned, values);
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index c9e54e2..548c96f 100644
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -955,6 +955,7 @@ DECL_UNTYPED_RW_ALL(uint)
>  DECL_UNTYPED_RW_ALL(long)
>  DECL_UNTYPED_RW_ALL(ulong)
>  DECL_UNTYPED_RW_ALL(float)
> +DECL_UNTYPED_RW_ALL(double)
>  
>  #undef DECL_UNTYPED_RW_ALL
>  #undef DECL_UNTYPED_RW_ALL_SPACE
> diff --git a/kernels/compiler_vector_load_store.cl b/kernels/compiler_vector_load_store.cl
> index 30f0e1e..320194e 100644
> --- a/kernels/compiler_vector_load_store.cl
> +++ b/kernels/compiler_vector_load_store.cl
> @@ -1,5 +1,5 @@
>  /* test OpenCL 1.1 Vector Data Load/Store Functions (section 6.11.7) */
> -
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
>  #define OFFSET2(type)  (type ##2) {(type)1, (type)2}
>  #define OFFSET3(type)  (type ##3) {(type)1, (type)2, (type)3}
>  #define OFFSET4(type)  (type ##4) {(type)1, (type)2, (type)3, (type)4}
> @@ -24,10 +24,10 @@ __kernel void test_##type ##n(__global type *pin, \
>    TEST_TYPE(ushort,n)\
>    TEST_TYPE(int,n)   \
>    TEST_TYPE(uint,n)  \
> -  TEST_TYPE(float,n)
> +  TEST_TYPE(float,n) \
> +  TEST_TYPE(double,n)
>  
>  #if 0
> -  TEST_TYPE(double,n)
>    TEST_TYPE(long,n)
>    TEST_TYPE(ulong,n)
>    TEST_TYPE(half,n)
> diff --git a/utests/compiler_vector_load_store.cpp b/utests/compiler_vector_load_store.cpp
> index 79f284f..7deb7cb 100644
> --- a/utests/compiler_vector_load_store.cpp
> +++ b/utests/compiler_vector_load_store.cpp
> @@ -1,4 +1,5 @@
>  #include "utest_helper.hpp"
> +#include <string.h>
>  template<typename T>
>  static void compiler_vector_load_store(int elemNum, const char *kernelName)
>  {
> @@ -9,8 +10,8 @@ static void compiler_vector_load_store(int elemNum, const char *kernelName)
>    buf_data[0] = (T*) malloc(sizeof(T) * n);
>    for (uint32_t i = 0; i < n; ++i)
>      ((T*)buf_data[0])[i] = i;
> -  OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(float), buf_data[0]);
> -  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
> +  OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(T), buf_data[0]);
> +  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL);
>    free(buf_data[0]);
>    buf_data[0] = NULL;
>  
> @@ -27,7 +28,10 @@ static void compiler_vector_load_store(int elemNum, const char *kernelName)
>    for (uint32_t i = 0; i < n; ++i)
>    {
>      int shift = ((i % elemNum) + 1);
> -    OCL_ASSERT(((T*)buf_data[1])[i] == (T)(((T*)buf_data[0])[i] + shift));
> +    if (strstr(kernelName, "double") == NULL)
> +      OCL_ASSERT(((T*)buf_data[1])[i] == (T)(((T*)buf_data[0])[i] + shift));
> +    else
> +      OCL_ASSERT((((T*)buf_data[1])[i] - ((T)((T*)buf_data[0])[i] + shift)) < 1e-5);
>    }
>    OCL_UNMAP_BUFFER(0);
>    OCL_UNMAP_BUFFER(1);
> @@ -54,6 +58,6 @@ test_all_vector(uint16_t, ushort)
>  test_all_vector(int32_t, int)
>  test_all_vector(uint32_t, uint)
>  test_all_vector(float, float)
> -//test_all_vector(double, double)
> +test_all_vector(double, double)
>  //test_all_vector(int64_t, long)
>  //test_all_vector(uint64_t, ulong)
> -- 
> 1.7.9.5
>