[Beignet] [PATCH v2] GBE: remove the user defined macro cl_khr_fp64.

Zhigang Gong zhigang.gong at intel.com
Wed Sep 3 22:59:31 PDT 2014


This is not a predefined macro according to the spec. Let's not
define it by default. This patch also disable the fp64 when enter
user kernels.

v2:
Some internal .cl files require cl_khr_fp64 enabled. Fixed that issue
by move the enable macro to ocl_types.h.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
Reviewed-by: Junyan He <junyan.he at linux.intel.com>
---
 backend/src/backend/program.cpp        |  2 --
 backend/src/libocl/CMakeLists.txt      |  2 +-
 backend/src/libocl/include/ocl.h       |  1 +
 backend/src/libocl/include/ocl_types.h |  3 ---
 backend/src/libocl/src/ocl_async.cl    |  1 +
 backend/src/libocl/src/ocl_image.cl    | 26 +++++++++++++-------------
 backend/src/libocl/src/ocl_vload.cl    |  1 +
 7 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 42cd989..98e8a34 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -516,8 +516,6 @@ namespace gbe {
     }
 
     args.push_back("-cl-kernel-arg-info");
-    args.push_back("-Dcl_khr_fp64");
-
     args.push_back("-mllvm");
     args.push_back("-inline-threshold=200000");
 #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt
index d4e3a53..fb93da1 100644
--- a/backend/src/libocl/CMakeLists.txt
+++ b/backend/src/libocl/CMakeLists.txt
@@ -129,7 +129,7 @@ FOREACH(M ${OCL_BASH_GENERATED_MODULES})
 ENDFOREACH(M) 
 
 
-SET (CLANG_OCL_FLAGS -fno-builtin -Dcl_khr_fp64 -ffp-contract=off -cl-kernel-arg-info -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND)
+SET (CLANG_OCL_FLAGS -fno-builtin -ffp-contract=off -cl-kernel-arg-info -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND)
 
 MACRO(ADD_CL_TO_BC_TARGET _file)
     # CMake seems can not add pattern rule, use MACRO to replace.
diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h
index a7d03e6..d4a8805 100644
--- a/backend/src/libocl/include/ocl.h
+++ b/backend/src/libocl/include/ocl.h
@@ -19,5 +19,6 @@
 #include "ocl_sync.h"
 #include "ocl_vload.h"
 #include "ocl_workitem.h"
+#pragma OPENCL EXTENSION cl_khr_fp64 : disable
 
 #endif
diff --git a/backend/src/libocl/include/ocl_types.h b/backend/src/libocl/include/ocl_types.h
index 05a2dae..87e9bf5 100644
--- a/backend/src/libocl/include/ocl_types.h
+++ b/backend/src/libocl/include/ocl_types.h
@@ -1,10 +1,7 @@
 #ifndef __OCL_TYPES_H__
 #define __OCL_TYPES_H__
 
-#ifdef cl_khr_fp64
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
-#endif
-
 #include "ocl_defines.h"
 
 #define NULL ((void*)0)
diff --git a/backend/src/libocl/src/ocl_async.cl b/backend/src/libocl/src/ocl_async.cl
index 57d6859..e6f9a36 100644
--- a/backend/src/libocl/src/ocl_async.cl
+++ b/backend/src/libocl/src/ocl_async.cl
@@ -1,3 +1,4 @@
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
 #include "ocl_async.h"
 #include "ocl_sync.h"
 #include "ocl_workitem.h"
diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl
index 00c3e8f..7202802 100644
--- a/backend/src/libocl/src/ocl_image.cl
+++ b/backend/src/libocl/src/ocl_image.cl
@@ -188,7 +188,7 @@ OVERLOADABLE int __gen_compute_array_index(int index, image2d_array_t image)
 #define FIXUP_FLOAT_COORD(tmpCoord)                            \
   {                                                            \
     if (tmpCoord < 0 && tmpCoord > -0x1p-20f)                  \
-      tmpCoord += -0x1p-9;                                     \
+      tmpCoord += -0x1p-9f;                                     \
   }
 
 DECL_IMAGE(GEN_FIX_1, image1d_t, int4, i)
@@ -229,7 +229,7 @@ DECL_IMAGE_INFO_COMMON(image1d_buffer_t)
 #define FIXUP_FLOAT_COORD(tmpCoord)                            \
   {                                                            \
     if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)            \
-      tmpCoord.s0 += -0x1p-9;                                  \
+      tmpCoord.s0 += -0x1p-9f;                                  \
     if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)            \
       tmpCoord.s1 += -0x1p-9f;                                 \
   }
@@ -258,7 +258,7 @@ DECL_IMAGE(0, image2d_t, float4, f, 2)
 #define FIXUP_FLOAT_COORD(tmpCoord)                            \
   {                                                            \
     if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)            \
-      tmpCoord.s0 += -0x1p-9;                                  \
+      tmpCoord.s0 += -0x1p-9f;                                  \
   }
 
 DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2)
@@ -306,12 +306,12 @@ OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
 
 #define FIXUP_FLOAT_COORD(tmpCoord)                             \
   {                                                             \
-    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20)              \
-      tmpCoord.s0 += -0x1p-9;                                   \
-    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20)              \
-      tmpCoord.s1 += -0x1p-9;                                   \
-    if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20)              \
-      tmpCoord.s2 += -0x1p-9;                                   \
+    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)              \
+      tmpCoord.s0 += -0x1p-9f;                                   \
+    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)              \
+      tmpCoord.s1 += -0x1p-9f;                                   \
+    if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f)              \
+      tmpCoord.s2 += -0x1p-9f;                                   \
   }
 #define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
 
@@ -341,10 +341,10 @@ DECL_IMAGE(0, image3d_t, float4, f, 3)
 
 #define FIXUP_FLOAT_COORD(tmpCoord)                             \
   {                                                             \
-    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20)              \
-      tmpCoord.s0 += -0x1p-9;                                   \
-    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20)              \
-      tmpCoord.s1 += -0x1p-9;                                   \
+    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)              \
+      tmpCoord.s0 += -0x1p-9f;                                   \
+    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)              \
+      tmpCoord.s1 += -0x1p-9f;                                   \
   }
 #define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
   coord_type ai = __gen_compute_array_index(coord.s2, image);
diff --git a/backend/src/libocl/src/ocl_vload.cl b/backend/src/libocl/src/ocl_vload.cl
index 6bf7b8d..996ab61 100644
--- a/backend/src/libocl/src/ocl_vload.cl
+++ b/backend/src/libocl/src/ocl_vload.cl
@@ -1,3 +1,4 @@
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
 #include "ocl_vload.h"
 #include "ocl_relational.h"
 
-- 
1.8.3.2



More information about the Beignet mailing list