[Beignet] [PATCH v2] GBE: remove the user defined macro cl_khr_fp64.
Zhigang Gong
zhigang.gong at intel.com
Wed Sep 3 22:59:31 PDT 2014
This is not a predefined macro according to the spec. Let's not
define it by default. This patch also disable the fp64 when enter
user kernels.
v2:
Some internal .cl files require cl_khr_fp64 enabled. Fixed that issue
by move the enable macro to ocl_types.h.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
Reviewed-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/backend/program.cpp | 2 --
backend/src/libocl/CMakeLists.txt | 2 +-
backend/src/libocl/include/ocl.h | 1 +
backend/src/libocl/include/ocl_types.h | 3 ---
backend/src/libocl/src/ocl_async.cl | 1 +
backend/src/libocl/src/ocl_image.cl | 26 +++++++++++++-------------
backend/src/libocl/src/ocl_vload.cl | 1 +
7 files changed, 17 insertions(+), 19 deletions(-)
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 42cd989..98e8a34 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -516,8 +516,6 @@ namespace gbe {
}
args.push_back("-cl-kernel-arg-info");
- args.push_back("-Dcl_khr_fp64");
-
args.push_back("-mllvm");
args.push_back("-inline-threshold=200000");
#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt
index d4e3a53..fb93da1 100644
--- a/backend/src/libocl/CMakeLists.txt
+++ b/backend/src/libocl/CMakeLists.txt
@@ -129,7 +129,7 @@ FOREACH(M ${OCL_BASH_GENERATED_MODULES})
ENDFOREACH(M)
-SET (CLANG_OCL_FLAGS -fno-builtin -Dcl_khr_fp64 -ffp-contract=off -cl-kernel-arg-info -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND)
+SET (CLANG_OCL_FLAGS -fno-builtin -ffp-contract=off -cl-kernel-arg-info -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND)
MACRO(ADD_CL_TO_BC_TARGET _file)
# CMake seems can not add pattern rule, use MACRO to replace.
diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h
index a7d03e6..d4a8805 100644
--- a/backend/src/libocl/include/ocl.h
+++ b/backend/src/libocl/include/ocl.h
@@ -19,5 +19,6 @@
#include "ocl_sync.h"
#include "ocl_vload.h"
#include "ocl_workitem.h"
+#pragma OPENCL EXTENSION cl_khr_fp64 : disable
#endif
diff --git a/backend/src/libocl/include/ocl_types.h b/backend/src/libocl/include/ocl_types.h
index 05a2dae..87e9bf5 100644
--- a/backend/src/libocl/include/ocl_types.h
+++ b/backend/src/libocl/include/ocl_types.h
@@ -1,10 +1,7 @@
#ifndef __OCL_TYPES_H__
#define __OCL_TYPES_H__
-#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-#endif
-
#include "ocl_defines.h"
#define NULL ((void*)0)
diff --git a/backend/src/libocl/src/ocl_async.cl b/backend/src/libocl/src/ocl_async.cl
index 57d6859..e6f9a36 100644
--- a/backend/src/libocl/src/ocl_async.cl
+++ b/backend/src/libocl/src/ocl_async.cl
@@ -1,3 +1,4 @@
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#include "ocl_async.h"
#include "ocl_sync.h"
#include "ocl_workitem.h"
diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl
index 00c3e8f..7202802 100644
--- a/backend/src/libocl/src/ocl_image.cl
+++ b/backend/src/libocl/src/ocl_image.cl
@@ -188,7 +188,7 @@ OVERLOADABLE int __gen_compute_array_index(int index, image2d_array_t image)
#define FIXUP_FLOAT_COORD(tmpCoord) \
{ \
if (tmpCoord < 0 && tmpCoord > -0x1p-20f) \
- tmpCoord += -0x1p-9; \
+ tmpCoord += -0x1p-9f; \
}
DECL_IMAGE(GEN_FIX_1, image1d_t, int4, i)
@@ -229,7 +229,7 @@ DECL_IMAGE_INFO_COMMON(image1d_buffer_t)
#define FIXUP_FLOAT_COORD(tmpCoord) \
{ \
if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
- tmpCoord.s0 += -0x1p-9; \
+ tmpCoord.s0 += -0x1p-9f; \
if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \
tmpCoord.s1 += -0x1p-9f; \
}
@@ -258,7 +258,7 @@ DECL_IMAGE(0, image2d_t, float4, f, 2)
#define FIXUP_FLOAT_COORD(tmpCoord) \
{ \
if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
- tmpCoord.s0 += -0x1p-9; \
+ tmpCoord.s0 += -0x1p-9f; \
}
DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2)
@@ -306,12 +306,12 @@ OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
#define FIXUP_FLOAT_COORD(tmpCoord) \
{ \
- if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20) \
- tmpCoord.s0 += -0x1p-9; \
- if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20) \
- tmpCoord.s1 += -0x1p-9; \
- if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20) \
- tmpCoord.s2 += -0x1p-9; \
+ if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
+ tmpCoord.s0 += -0x1p-9f; \
+ if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \
+ tmpCoord.s1 += -0x1p-9f; \
+ if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f) \
+ tmpCoord.s2 += -0x1p-9f; \
}
#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
@@ -341,10 +341,10 @@ DECL_IMAGE(0, image3d_t, float4, f, 3)
#define FIXUP_FLOAT_COORD(tmpCoord) \
{ \
- if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20) \
- tmpCoord.s0 += -0x1p-9; \
- if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20) \
- tmpCoord.s1 += -0x1p-9; \
+ if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
+ tmpCoord.s0 += -0x1p-9f; \
+ if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \
+ tmpCoord.s1 += -0x1p-9f; \
}
#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
coord_type ai = __gen_compute_array_index(coord.s2, image);
diff --git a/backend/src/libocl/src/ocl_vload.cl b/backend/src/libocl/src/ocl_vload.cl
index 6bf7b8d..996ab61 100644
--- a/backend/src/libocl/src/ocl_vload.cl
+++ b/backend/src/libocl/src/ocl_vload.cl
@@ -1,3 +1,4 @@
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#include "ocl_vload.h"
#include "ocl_relational.h"
--
1.8.3.2
More information about the Beignet
mailing list