[Beignet] [PATCH 1/9] gbe/libocl: define the vloada_xxx function instead of using MACRO.

xionghu.luo at intel.com xionghu.luo at intel.com
Thu Nov 12 00:41:47 PST 2015


From: Luo Xionghu <xionghu.luo at intel.com>

these function need to be overloadable for link.

Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
 backend/src/libocl/include/ocl_vload.h | 13 ++++++-------
 backend/src/libocl/src/ocl_vload.cl    | 21 ++++++++++++++++++++-
 2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/backend/src/libocl/include/ocl_vload.h b/backend/src/libocl/include/ocl_vload.h
index b1b1a32..1e6871e 100644
--- a/backend/src/libocl/include/ocl_vload.h
+++ b/backend/src/libocl/include/ocl_vload.h
@@ -109,12 +109,17 @@ DECL_UNTYPED_RW_ALL(double)
 
 #define DECL_HALF_LD_SPACE(SPACE) \
 OVERLOADABLE float vload_half(size_t offset, const SPACE half *p);  \
+OVERLOADABLE float vloada_half(size_t offset, const SPACE half *p);  \
 OVERLOADABLE float2 vload_half2(size_t offset, const SPACE half *p); \
+OVERLOADABLE float2 vloada_half2(size_t offset, const SPACE half *p); \
 OVERLOADABLE float3 vload_half3(size_t offset, const SPACE half *p); \
 OVERLOADABLE float3 vloada_half3(size_t offset, const SPACE half *p); \
 OVERLOADABLE float4 vload_half4(size_t offset, const SPACE half *p);  \
+OVERLOADABLE float4 vloada_half4(size_t offset, const SPACE half *p);  \
 OVERLOADABLE float8 vload_half8(size_t offset, const SPACE half *p);  \
-OVERLOADABLE float16 vload_half16(size_t offset, const SPACE half *p);
+OVERLOADABLE float8 vloada_half8(size_t offset, const SPACE half *p);  \
+OVERLOADABLE float16 vload_half16(size_t offset, const SPACE half *p); \
+OVERLOADABLE float16 vloada_half16(size_t offset, const SPACE half *p); \
 
 #define DECL_HALF_ST_SPACE_ROUND(SPACE, ROUND, FUNC) \
 OVERLOADABLE void vstore_half##ROUND(float data, size_t offset, SPACE half *p);  \
@@ -151,10 +156,4 @@ DECL_HALF_ST_SPACE(__private)
 #undef DECL_HALF_ST_SPACE
 #undef DECL_HALF_ST_SPACE_ROUND
 
-#define vloada_half vload_half
-#define vloada_half2 vload_half2
-#define vloada_half4 vload_half4
-#define vloada_half8 vload_half8
-#define vloada_half16 vload_half16
-
 #endif  /* __OCL_VLOAD_H__ */
diff --git a/backend/src/libocl/src/ocl_vload.cl b/backend/src/libocl/src/ocl_vload.cl
index fa5e04f..38a87c4 100644
--- a/backend/src/libocl/src/ocl_vload.cl
+++ b/backend/src/libocl/src/ocl_vload.cl
@@ -179,10 +179,17 @@ OVERLOADABLE short f32to16_rtz(float f) {
 OVERLOADABLE float vload_half(size_t offset, const SPACE half *p) { \
   return __gen_ocl_f16to32(*(SPACE short *)(p + offset)); \
 } \
+OVERLOADABLE float vloada_half(size_t offset, const SPACE half *p) { \
+  return vload_half(offset, p); \
+} \
 OVERLOADABLE float2 vload_half2(size_t offset, const SPACE half *p) { \
   return (float2)(vload_half(offset*2, p), \
                   vload_half(offset*2 + 1, p)); \
 } \
+OVERLOADABLE float2 vloada_half2(size_t offset, const SPACE half *p) { \
+  return (float2)(vloada_half(offset*2, p), \
+                  vloada_half(offset*2 + 1, p)); \
+} \
 OVERLOADABLE float3 vload_half3(size_t offset, const SPACE half *p) { \
   return (float3)(vload_half(offset*3, p), \
                   vload_half(offset*3 + 1, p), \
@@ -197,14 +204,26 @@ OVERLOADABLE float4 vload_half4(size_t offset, const SPACE half *p) { \
   return (float4)(vload_half2(offset*2, p), \
                   vload_half2(offset*2 + 1, p)); \
 } \
+OVERLOADABLE float4 vloada_half4(size_t offset, const SPACE half *p) { \
+  return (float4)(vloada_half2(offset*2, p), \
+                  vloada_half2(offset*2 + 1, p)); \
+} \
 OVERLOADABLE float8 vload_half8(size_t offset, const SPACE half *p) { \
   return (float8)(vload_half4(offset*2, p), \
                   vload_half4(offset*2 + 1, p)); \
 } \
+OVERLOADABLE float8 vloada_half8(size_t offset, const SPACE half *p) { \
+  return (float8)(vloada_half4(offset*2, p), \
+                  vloada_half4(offset*2 + 1, p)); \
+} \
 OVERLOADABLE float16 vload_half16(size_t offset, const SPACE half *p) { \
   return (float16)(vload_half8(offset*2, p), \
                    vload_half8(offset*2 + 1, p)); \
-}
+}\
+OVERLOADABLE float16 vloada_half16(size_t offset, const SPACE half *p) { \
+  return (float16)(vloada_half8(offset*2, p), \
+                   vloada_half8(offset*2 + 1, p)); \
+}\
 
 #define DECL_HALF_ST_SPACE_ROUND(SPACE, ROUND, FUNC) \
 OVERLOADABLE void vstore_half##ROUND(float data, size_t offset, SPACE half *p) { \
-- 
1.9.1



More information about the Beignet mailing list