[Beignet] [PATCH v2] GBE: remove the user defined macro cl_khr_fp64.

He Junyan junyan.he at inbox.com
Wed Sep 3 23:59:54 PDT 2014


LGTM,
thanks.

On 四, 2014-09-04 at 13:59 +0800, Zhigang Gong wrote:
> This is not a predefined macro according to the spec. Let's not
> define it by default. This patch also disable the fp64 when enter
> user kernels.
> 
> v2:
> Some internal .cl files require cl_khr_fp64 enabled. Fixed that issue
> by move the enable macro to ocl_types.h.
> 
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> Reviewed-by: Junyan He <junyan.he at linux.intel.com>
> ---
>  backend/src/backend/program.cpp        |  2 --
>  backend/src/libocl/CMakeLists.txt      |  2 +-
>  backend/src/libocl/include/ocl.h       |  1 +
>  backend/src/libocl/include/ocl_types.h |  3 ---
>  backend/src/libocl/src/ocl_async.cl    |  1 +
>  backend/src/libocl/src/ocl_image.cl    | 26 +++++++++++++-------------
>  backend/src/libocl/src/ocl_vload.cl    |  1 +
>  7 files changed, 17 insertions(+), 19 deletions(-)
> 
> diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
> index 42cd989..98e8a34 100644
> --- a/backend/src/backend/program.cpp
> +++ b/backend/src/backend/program.cpp
> @@ -516,8 +516,6 @@ namespace gbe {
>      }
>  
>      args.push_back("-cl-kernel-arg-info");
> -    args.push_back("-Dcl_khr_fp64");
> -
>      args.push_back("-mllvm");
>      args.push_back("-inline-threshold=200000");
>  #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
> diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt
> index d4e3a53..fb93da1 100644
> --- a/backend/src/libocl/CMakeLists.txt
> +++ b/backend/src/libocl/CMakeLists.txt
> @@ -129,7 +129,7 @@ FOREACH(M ${OCL_BASH_GENERATED_MODULES})
>  ENDFOREACH(M) 
>  
> 
> -SET (CLANG_OCL_FLAGS -fno-builtin -Dcl_khr_fp64 -ffp-contract=off -cl-kernel-arg-info -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND)
> +SET (CLANG_OCL_FLAGS -fno-builtin -ffp-contract=off -cl-kernel-arg-info -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND)
>  
>  MACRO(ADD_CL_TO_BC_TARGET _file)
>      # CMake seems can not add pattern rule, use MACRO to replace.
> diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h
> index a7d03e6..d4a8805 100644
> --- a/backend/src/libocl/include/ocl.h
> +++ b/backend/src/libocl/include/ocl.h
> @@ -19,5 +19,6 @@
>  #include "ocl_sync.h"
>  #include "ocl_vload.h"
>  #include "ocl_workitem.h"
> +#pragma OPENCL EXTENSION cl_khr_fp64 : disable
>  
>  #endif
> diff --git a/backend/src/libocl/include/ocl_types.h b/backend/src/libocl/include/ocl_types.h
> index 05a2dae..87e9bf5 100644
> --- a/backend/src/libocl/include/ocl_types.h
> +++ b/backend/src/libocl/include/ocl_types.h
> @@ -1,10 +1,7 @@
>  #ifndef __OCL_TYPES_H__
>  #define __OCL_TYPES_H__
>  
> -#ifdef cl_khr_fp64
>  #pragma OPENCL EXTENSION cl_khr_fp64 : enable
> -#endif
> -
>  #include "ocl_defines.h"
>  
>  #define NULL ((void*)0)
> diff --git a/backend/src/libocl/src/ocl_async.cl b/backend/src/libocl/src/ocl_async.cl
> index 57d6859..e6f9a36 100644
> --- a/backend/src/libocl/src/ocl_async.cl
> +++ b/backend/src/libocl/src/ocl_async.cl
> @@ -1,3 +1,4 @@
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
>  #include "ocl_async.h"
>  #include "ocl_sync.h"
>  #include "ocl_workitem.h"
> diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl
> index 00c3e8f..7202802 100644
> --- a/backend/src/libocl/src/ocl_image.cl
> +++ b/backend/src/libocl/src/ocl_image.cl
> @@ -188,7 +188,7 @@ OVERLOADABLE int __gen_compute_array_index(int index, image2d_array_t image)
>  #define FIXUP_FLOAT_COORD(tmpCoord)                            \
>    {                                                            \
>      if (tmpCoord < 0 && tmpCoord > -0x1p-20f)                  \
> -      tmpCoord += -0x1p-9;                                     \
> +      tmpCoord += -0x1p-9f;                                     \
>    }
>  
>  DECL_IMAGE(GEN_FIX_1, image1d_t, int4, i)
> @@ -229,7 +229,7 @@ DECL_IMAGE_INFO_COMMON(image1d_buffer_t)
>  #define FIXUP_FLOAT_COORD(tmpCoord)                            \
>    {                                                            \
>      if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)            \
> -      tmpCoord.s0 += -0x1p-9;                                  \
> +      tmpCoord.s0 += -0x1p-9f;                                  \
>      if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)            \
>        tmpCoord.s1 += -0x1p-9f;                                 \
>    }
> @@ -258,7 +258,7 @@ DECL_IMAGE(0, image2d_t, float4, f, 2)
>  #define FIXUP_FLOAT_COORD(tmpCoord)                            \
>    {                                                            \
>      if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)            \
> -      tmpCoord.s0 += -0x1p-9;                                  \
> +      tmpCoord.s0 += -0x1p-9f;                                  \
>    }
>  
>  DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2)
> @@ -306,12 +306,12 @@ OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
>  
>  #define FIXUP_FLOAT_COORD(tmpCoord)                             \
>    {                                                             \
> -    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20)              \
> -      tmpCoord.s0 += -0x1p-9;                                   \
> -    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20)              \
> -      tmpCoord.s1 += -0x1p-9;                                   \
> -    if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20)              \
> -      tmpCoord.s2 += -0x1p-9;                                   \
> +    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)              \
> +      tmpCoord.s0 += -0x1p-9f;                                   \
> +    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)              \
> +      tmpCoord.s1 += -0x1p-9f;                                   \
> +    if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f)              \
> +      tmpCoord.s2 += -0x1p-9f;                                   \
>    }
>  #define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
>  
> @@ -341,10 +341,10 @@ DECL_IMAGE(0, image3d_t, float4, f, 3)
>  
>  #define FIXUP_FLOAT_COORD(tmpCoord)                             \
>    {                                                             \
> -    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20)              \
> -      tmpCoord.s0 += -0x1p-9;                                   \
> -    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20)              \
> -      tmpCoord.s1 += -0x1p-9;                                   \
> +    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)              \
> +      tmpCoord.s0 += -0x1p-9f;                                   \
> +    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)              \
> +      tmpCoord.s1 += -0x1p-9f;                                   \
>    }
>  #define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
>    coord_type ai = __gen_compute_array_index(coord.s2, image);
> diff --git a/backend/src/libocl/src/ocl_vload.cl b/backend/src/libocl/src/ocl_vload.cl
> index 6bf7b8d..996ab61 100644
> --- a/backend/src/libocl/src/ocl_vload.cl
> +++ b/backend/src/libocl/src/ocl_vload.cl
> @@ -1,3 +1,4 @@
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
>  #include "ocl_vload.h"
>  #include "ocl_relational.h"
>  





More information about the Beignet mailing list