[Beignet] [PATCH V2 5/9] Libocl: Add half type dot
Xiuli Pan
xiuli.pan at intel.com
Mon Aug 8 03:31:23 UTC 2016
From: Pan Xiuli <xiuli.pan at intel.com>
This is in OpenCL 1.0 and 1.1 spec, but missing in OpenCL 1.2 spec.
Add it for some usage.
Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
backend/src/libocl/include/ocl_geometric.h | 4 ++++
backend/src/libocl/src/ocl_geometric.cl | 12 ++++++++++++
2 files changed, 16 insertions(+)
diff --git a/backend/src/libocl/include/ocl_geometric.h b/backend/src/libocl/include/ocl_geometric.h
index 86d543b..1713f8f 100644
--- a/backend/src/libocl/include/ocl_geometric.h
+++ b/backend/src/libocl/include/ocl_geometric.h
@@ -24,6 +24,10 @@ OVERLOADABLE float dot(float p0, float p1);
OVERLOADABLE float dot(float2 p0, float2 p1);
OVERLOADABLE float dot(float3 p0, float3 p1);
OVERLOADABLE float dot(float4 p0, float4 p1);
+OVERLOADABLE half dot(half p0, half p1);
+OVERLOADABLE half dot(half2 p0, half2 p1);
+OVERLOADABLE half dot(half3 p0, half3 p1);
+OVERLOADABLE half dot(half4 p0, half4 p1);
OVERLOADABLE float length(float x);
OVERLOADABLE float length(float2 x);
OVERLOADABLE float length(float3 x);
diff --git a/backend/src/libocl/src/ocl_geometric.cl b/backend/src/libocl/src/ocl_geometric.cl
index 886e88c..cf98503 100644
--- a/backend/src/libocl/src/ocl_geometric.cl
+++ b/backend/src/libocl/src/ocl_geometric.cl
@@ -35,6 +35,18 @@ OVERLOADABLE float dot(float3 p0, float3 p1) {
OVERLOADABLE float dot(float4 p0, float4 p1) {
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
}
+OVERLOADABLE half dot(half p0, half p1) {
+ return p0 * p1;
+}
+OVERLOADABLE half dot(half2 p0, half2 p1) {
+ return p0.x * p1.x + p0.y * p1.y;
+}
+OVERLOADABLE half dot(half3 p0, half3 p1) {
+ return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
+}
+OVERLOADABLE half dot(half4 p0, half4 p1) {
+ return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
+}
OVERLOADABLE float length(float x) { return __gen_ocl_fabs(x); }
#define BODY \
--
2.7.4
More information about the Beignet
mailing list