[Beignet] [PATCH] libocl: using mad() to implement dot()

Ruiling Song ruiling.song at intel.com
Wed Jan 28 18:36:19 PST 2015


Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
 backend/src/libocl/src/ocl_geometric.cl |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/backend/src/libocl/src/ocl_geometric.cl b/backend/src/libocl/src/ocl_geometric.cl
index e469ff9..cd09968 100644
--- a/backend/src/libocl/src/ocl_geometric.cl
+++ b/backend/src/libocl/src/ocl_geometric.cl
@@ -27,13 +27,13 @@ OVERLOADABLE float dot(float p0, float p1) {
   return p0 * p1;
 }
 OVERLOADABLE float dot(float2 p0, float2 p1) {
-  return p0.x * p1.x + p0.y * p1.y;
+  return mad(p0.x, p1.x, p0.y * p1.y);
 }
 OVERLOADABLE float dot(float3 p0, float3 p1) {
-  return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
+  return mad(p0.x, p1.x, mad(p0.y, p1.y, p0.z*p1.z));
 }
 OVERLOADABLE float dot(float4 p0, float4 p1) {
-  return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
+  return mad(p0.x, p1.x, mad(p0.y, p1.y, mad(p0.z, p1.z, p0.w * p1.w)));
 }
 OVERLOADABLE float length(float x) { return __gen_ocl_fabs(x); }
 
-- 
1.7.10.4



More information about the Beignet mailing list