[Beignet] [PATCH] libocl: using mad() to implement dot()
Ruiling Song
ruiling.song at intel.com
Wed Jan 28 18:36:19 PST 2015
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/libocl/src/ocl_geometric.cl | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/backend/src/libocl/src/ocl_geometric.cl b/backend/src/libocl/src/ocl_geometric.cl
index e469ff9..cd09968 100644
--- a/backend/src/libocl/src/ocl_geometric.cl
+++ b/backend/src/libocl/src/ocl_geometric.cl
@@ -27,13 +27,13 @@ OVERLOADABLE float dot(float p0, float p1) {
return p0 * p1;
}
OVERLOADABLE float dot(float2 p0, float2 p1) {
- return p0.x * p1.x + p0.y * p1.y;
+ return mad(p0.x, p1.x, p0.y * p1.y);
}
OVERLOADABLE float dot(float3 p0, float3 p1) {
- return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
+ return mad(p0.x, p1.x, mad(p0.y, p1.y, p0.z*p1.z));
}
OVERLOADABLE float dot(float4 p0, float4 p1) {
- return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
+ return mad(p0.x, p1.x, mad(p0.y, p1.y, mad(p0.z, p1.z, p0.w * p1.w)));
}
OVERLOADABLE float length(float x) { return __gen_ocl_fabs(x); }
--
1.7.10.4
More information about the Beignet
mailing list