[Beignet] [PATCH 13/18] Add the geometric functions into lib ocl
junyan.he at inbox.com
junyan.he at inbox.com
Tue Aug 12 00:32:38 PDT 2014
From: Junyan He <junyan.he at linux.intel.com>
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/libocl/Makefile.in | 2 +-
backend/src/libocl/include/ocl_geometric.h | 39 ++++++++++++
backend/src/libocl/lib/ocl_geometric.cl | 96 ++++++++++++++++++++++++++++++
3 files changed, 136 insertions(+), 1 deletion(-)
create mode 100644 backend/src/libocl/include/ocl_geometric.h
create mode 100644 backend/src/libocl/lib/ocl_geometric.cl
diff --git a/backend/src/libocl/Makefile.in b/backend/src/libocl/Makefile.in
index be91ac9..e6e0350 100644
--- a/backend/src/libocl/Makefile.in
+++ b/backend/src/libocl/Makefile.in
@@ -7,7 +7,7 @@ GENERATED_FILES=ocl_as.cl ocl_convert.cl ocl_common.cl ocl_relational.cl ocl_int
GENERATED_HEADERS=ocl_defines.h ocl_as.h ocl_convert.h ocl_common.h ocl_relational.h ocl_integer.h ocl_math.h
GENERATED_CL_SRCS=$(addprefix lib/, $(GENERATED_FILES))
GENERATED_CL_HEADERS=$(addprefix include/, $(GENERATED_HEADERS))
-CL_FILE_NAMES=ocl_workitem.cl ocl_atom.cl ocl_async.cl ocl_sync.cl ocl_misc.cl ocl_vload.cl $(GENERATED_FILES)
+CL_FILE_NAMES=ocl_workitem.cl ocl_atom.cl ocl_async.cl ocl_sync.cl ocl_misc.cl ocl_vload.cl ocl_geometric.cl $(GENERATED_FILES)
LL_FILE_NAMES=
CL_SRCS=$(addprefix lib/, $(CL_FILE_NAMES))
LL_SRCS=$(addprefix lib/, $(LL_FILE_NAMES))
diff --git a/backend/src/libocl/include/ocl_geometric.h b/backend/src/libocl/include/ocl_geometric.h
new file mode 100644
index 0000000..9d953f1
--- /dev/null
+++ b/backend/src/libocl/include/ocl_geometric.h
@@ -0,0 +1,39 @@
+#ifndef __OCL_GEOMETRIC_H__
+#define __OCL_GEOMETRIC_H__
+
+#include "ocl_types.h"
+
+OVERLOADABLE float dot(float p0, float p1);
+OVERLOADABLE float dot(float2 p0, float2 p1);
+OVERLOADABLE float dot(float3 p0, float3 p1);
+OVERLOADABLE float dot(float4 p0, float4 p1);
+OVERLOADABLE float length(float x);
+OVERLOADABLE float length(float2 x);
+OVERLOADABLE float length(float3 x);
+OVERLOADABLE float length(float4 x);
+OVERLOADABLE float distance(float x, float y);
+OVERLOADABLE float distance(float2 x, float2 y);
+OVERLOADABLE float distance(float3 x, float3 y);
+OVERLOADABLE float distance(float4 x, float4 y);
+OVERLOADABLE float normalize(float x);
+OVERLOADABLE float2 normalize(float2 x);
+OVERLOADABLE float3 normalize(float3 x);
+OVERLOADABLE float4 normalize(float4 x);
+
+OVERLOADABLE float fast_length(float x);
+OVERLOADABLE float fast_length(float2 x);
+OVERLOADABLE float fast_length(float3 x);
+OVERLOADABLE float fast_length(float4 x);
+OVERLOADABLE float fast_distance(float x, float y);
+OVERLOADABLE float fast_distance(float2 x, float2 y);
+OVERLOADABLE float fast_distance(float3 x, float3 y);
+OVERLOADABLE float fast_distance(float4 x, float4 y);
+OVERLOADABLE float fast_normalize(float x);
+OVERLOADABLE float2 fast_normalize(float2 x);
+OVERLOADABLE float3 fast_normalize(float3 x);
+OVERLOADABLE float4 fast_normalize(float4 x);
+
+OVERLOADABLE float3 cross(float3 v0, float3 v1);
+OVERLOADABLE float4 cross(float4 v0, float4 v1);
+
+#endif
diff --git a/backend/src/libocl/lib/ocl_geometric.cl b/backend/src/libocl/lib/ocl_geometric.cl
new file mode 100644
index 0000000..9f92295
--- /dev/null
+++ b/backend/src/libocl/lib/ocl_geometric.cl
@@ -0,0 +1,96 @@
+#include "ocl_geometric.h"
+#include "ocl_common.h"
+#include "ocl_relational.h"
+#include "ocl_math.h"
+#include "ocl_float.h"
+
+PURE CONST float __gen_ocl_fabs(float x);
+
+OVERLOADABLE float dot(float p0, float p1) {
+ return p0 * p1;
+}
+OVERLOADABLE float dot(float2 p0, float2 p1) {
+ return p0.x * p1.x + p0.y * p1.y;
+}
+OVERLOADABLE float dot(float3 p0, float3 p1) {
+ return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
+}
+OVERLOADABLE float dot(float4 p0, float4 p1) {
+ return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
+}
+OVERLOADABLE float length(float x) { return __gen_ocl_fabs(x); }
+
+#define BODY \
+ if(m == 0) \
+ return 0; \
+ if(isinf(m)) \
+ return INFINITY; \
+ if(m < 1) \
+ m = 1; \
+ x /= m; \
+ return m * sqrt(dot(x,x));
+OVERLOADABLE float length(float2 x) {
+ float m = max(__gen_ocl_fabs(x.s0), __gen_ocl_fabs(x.s1));
+ BODY;
+}
+OVERLOADABLE float length(float3 x) {
+ float m = max(__gen_ocl_fabs(x.s0), max(__gen_ocl_fabs(x.s1), __gen_ocl_fabs(x.s2)));
+ BODY;
+}
+OVERLOADABLE float length(float4 x) {
+ float m = max(__gen_ocl_fabs(x.s0), max(__gen_ocl_fabs(x.s1), max(__gen_ocl_fabs(x.s2), __gen_ocl_fabs(x.s3))));
+ BODY;
+}
+#undef BODY
+OVERLOADABLE float distance(float x, float y) { return length(x-y); }
+OVERLOADABLE float distance(float2 x, float2 y) { return length(x-y); }
+OVERLOADABLE float distance(float3 x, float3 y) { return length(x-y); }
+OVERLOADABLE float distance(float4 x, float4 y) { return length(x-y); }
+OVERLOADABLE float normalize(float x) {
+ union { float f; unsigned u; } u;
+ u.f = x;
+ if(u.u == 0)
+ return 0.f;
+ if(isnan(x))
+ return NAN;
+ return u.u < 0x7fffffff ? 1.f : -1.f;
+}
+OVERLOADABLE float2 normalize(float2 x) {
+ float m = length(x);
+ if(m == 0)
+ return 0;
+ return x / m;
+}
+OVERLOADABLE float3 normalize(float3 x) {
+ float m = length(x);
+ if(m == 0)
+ return 0;
+ return x / m;
+}
+OVERLOADABLE float4 normalize(float4 x) {
+ float m = length(x);
+ if(m == 0)
+ return 0;
+ return x / m;
+}
+
+OVERLOADABLE float fast_length(float x) { return __gen_ocl_fabs(x); }
+OVERLOADABLE float fast_length(float2 x) { return sqrt(dot(x,x)); }
+OVERLOADABLE float fast_length(float3 x) { return sqrt(dot(x,x)); }
+OVERLOADABLE float fast_length(float4 x) { return sqrt(dot(x,x)); }
+OVERLOADABLE float fast_distance(float x, float y) { return length(x-y); }
+OVERLOADABLE float fast_distance(float2 x, float2 y) { return length(x-y); }
+OVERLOADABLE float fast_distance(float3 x, float3 y) { return length(x-y); }
+OVERLOADABLE float fast_distance(float4 x, float4 y) { return length(x-y); }
+OVERLOADABLE float fast_normalize(float x) { return x > 0 ? 1.f : (x < 0 ? -1.f : 0.f); }
+OVERLOADABLE float2 fast_normalize(float2 x) { return x * rsqrt(dot(x, x)); }
+OVERLOADABLE float3 fast_normalize(float3 x) { return x * rsqrt(dot(x, x)); }
+OVERLOADABLE float4 fast_normalize(float4 x) { return x * rsqrt(dot(x, x)); }
+
+OVERLOADABLE float3 cross(float3 v0, float3 v1) {
+ return v0.yzx*v1.zxy-v0.zxy*v1.yzx;
+}
+OVERLOADABLE float4 cross(float4 v0, float4 v1) {
+ return (float4)(v0.yzx*v1.zxy-v0.zxy*v1.yzx, 0.f);
+}
+
--
1.8.3.2
More information about the Beignet
mailing list