[Beignet] [PATCH v2] GBE: remove the user defined macro cl_khr_fp64.
He Junyan
junyan.he at inbox.com
Wed Sep 3 23:59:54 PDT 2014
LGTM,
thanks.
On 四, 2014-09-04 at 13:59 +0800, Zhigang Gong wrote:
> This is not a predefined macro according to the spec. Let's not
> define it by default. This patch also disable the fp64 when enter
> user kernels.
>
> v2:
> Some internal .cl files require cl_khr_fp64 enabled. Fixed that issue
> by move the enable macro to ocl_types.h.
>
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> Reviewed-by: Junyan He <junyan.he at linux.intel.com>
> ---
> backend/src/backend/program.cpp | 2 --
> backend/src/libocl/CMakeLists.txt | 2 +-
> backend/src/libocl/include/ocl.h | 1 +
> backend/src/libocl/include/ocl_types.h | 3 ---
> backend/src/libocl/src/ocl_async.cl | 1 +
> backend/src/libocl/src/ocl_image.cl | 26 +++++++++++++-------------
> backend/src/libocl/src/ocl_vload.cl | 1 +
> 7 files changed, 17 insertions(+), 19 deletions(-)
>
> diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
> index 42cd989..98e8a34 100644
> --- a/backend/src/backend/program.cpp
> +++ b/backend/src/backend/program.cpp
> @@ -516,8 +516,6 @@ namespace gbe {
> }
>
> args.push_back("-cl-kernel-arg-info");
> - args.push_back("-Dcl_khr_fp64");
> -
> args.push_back("-mllvm");
> args.push_back("-inline-threshold=200000");
> #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
> diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt
> index d4e3a53..fb93da1 100644
> --- a/backend/src/libocl/CMakeLists.txt
> +++ b/backend/src/libocl/CMakeLists.txt
> @@ -129,7 +129,7 @@ FOREACH(M ${OCL_BASH_GENERATED_MODULES})
> ENDFOREACH(M)
>
>
> -SET (CLANG_OCL_FLAGS -fno-builtin -Dcl_khr_fp64 -ffp-contract=off -cl-kernel-arg-info -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND)
> +SET (CLANG_OCL_FLAGS -fno-builtin -ffp-contract=off -cl-kernel-arg-info -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND)
>
> MACRO(ADD_CL_TO_BC_TARGET _file)
> # CMake seems can not add pattern rule, use MACRO to replace.
> diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h
> index a7d03e6..d4a8805 100644
> --- a/backend/src/libocl/include/ocl.h
> +++ b/backend/src/libocl/include/ocl.h
> @@ -19,5 +19,6 @@
> #include "ocl_sync.h"
> #include "ocl_vload.h"
> #include "ocl_workitem.h"
> +#pragma OPENCL EXTENSION cl_khr_fp64 : disable
>
> #endif
> diff --git a/backend/src/libocl/include/ocl_types.h b/backend/src/libocl/include/ocl_types.h
> index 05a2dae..87e9bf5 100644
> --- a/backend/src/libocl/include/ocl_types.h
> +++ b/backend/src/libocl/include/ocl_types.h
> @@ -1,10 +1,7 @@
> #ifndef __OCL_TYPES_H__
> #define __OCL_TYPES_H__
>
> -#ifdef cl_khr_fp64
> #pragma OPENCL EXTENSION cl_khr_fp64 : enable
> -#endif
> -
> #include "ocl_defines.h"
>
> #define NULL ((void*)0)
> diff --git a/backend/src/libocl/src/ocl_async.cl b/backend/src/libocl/src/ocl_async.cl
> index 57d6859..e6f9a36 100644
> --- a/backend/src/libocl/src/ocl_async.cl
> +++ b/backend/src/libocl/src/ocl_async.cl
> @@ -1,3 +1,4 @@
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> #include "ocl_async.h"
> #include "ocl_sync.h"
> #include "ocl_workitem.h"
> diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl
> index 00c3e8f..7202802 100644
> --- a/backend/src/libocl/src/ocl_image.cl
> +++ b/backend/src/libocl/src/ocl_image.cl
> @@ -188,7 +188,7 @@ OVERLOADABLE int __gen_compute_array_index(int index, image2d_array_t image)
> #define FIXUP_FLOAT_COORD(tmpCoord) \
> { \
> if (tmpCoord < 0 && tmpCoord > -0x1p-20f) \
> - tmpCoord += -0x1p-9; \
> + tmpCoord += -0x1p-9f; \
> }
>
> DECL_IMAGE(GEN_FIX_1, image1d_t, int4, i)
> @@ -229,7 +229,7 @@ DECL_IMAGE_INFO_COMMON(image1d_buffer_t)
> #define FIXUP_FLOAT_COORD(tmpCoord) \
> { \
> if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
> - tmpCoord.s0 += -0x1p-9; \
> + tmpCoord.s0 += -0x1p-9f; \
> if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \
> tmpCoord.s1 += -0x1p-9f; \
> }
> @@ -258,7 +258,7 @@ DECL_IMAGE(0, image2d_t, float4, f, 2)
> #define FIXUP_FLOAT_COORD(tmpCoord) \
> { \
> if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
> - tmpCoord.s0 += -0x1p-9; \
> + tmpCoord.s0 += -0x1p-9f; \
> }
>
> DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2)
> @@ -306,12 +306,12 @@ OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
>
> #define FIXUP_FLOAT_COORD(tmpCoord) \
> { \
> - if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20) \
> - tmpCoord.s0 += -0x1p-9; \
> - if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20) \
> - tmpCoord.s1 += -0x1p-9; \
> - if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20) \
> - tmpCoord.s2 += -0x1p-9; \
> + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
> + tmpCoord.s0 += -0x1p-9f; \
> + if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \
> + tmpCoord.s1 += -0x1p-9f; \
> + if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f) \
> + tmpCoord.s2 += -0x1p-9f; \
> }
> #define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
>
> @@ -341,10 +341,10 @@ DECL_IMAGE(0, image3d_t, float4, f, 3)
>
> #define FIXUP_FLOAT_COORD(tmpCoord) \
> { \
> - if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20) \
> - tmpCoord.s0 += -0x1p-9; \
> - if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20) \
> - tmpCoord.s1 += -0x1p-9; \
> + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
> + tmpCoord.s0 += -0x1p-9f; \
> + if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \
> + tmpCoord.s1 += -0x1p-9f; \
> }
> #define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
> coord_type ai = __gen_compute_array_index(coord.s2, image);
> diff --git a/backend/src/libocl/src/ocl_vload.cl b/backend/src/libocl/src/ocl_vload.cl
> index 6bf7b8d..996ab61 100644
> --- a/backend/src/libocl/src/ocl_vload.cl
> +++ b/backend/src/libocl/src/ocl_vload.cl
> @@ -1,3 +1,4 @@
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> #include "ocl_vload.h"
> #include "ocl_relational.h"
>
More information about the Beignet
mailing list