[Beignet] [PATCH] add address space qualifier to vectorized "fract" and "frexp"

Homer Hsing homer.xing at intel.com
Wed Jul 24 22:37:00 PDT 2013


rename vectorized builtin function "fract" and "frexp" to
"__gen_ocl" internal name, then add their address space qualifier

also add 3-component version of "fract"

Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/ocl_stdlib.tmpl.h | 121 ++++++++++++++++++++++++++----------------
 1 file changed, 74 insertions(+), 47 deletions(-)

diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 8d21de4..6efce0e 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -779,26 +779,37 @@ INLINE_OVERLOADABLE float frexp(float x, global int *exp) { return __gen_ocl_fre
 INLINE_OVERLOADABLE float frexp(float x, local int *exp) { return __gen_ocl_frexp(x, (int *)exp); }
 INLINE_OVERLOADABLE float frexp(float x, private int *exp) { return __gen_ocl_frexp(x, (int *)exp); }
 
-INLINE_OVERLOADABLE float2 frexp(float2 x, int2 *exp) {
-  return (float2)(frexp(x.s0, (int *)exp), frexp(x.s1, 1 + (int *)exp));
+INLINE_OVERLOADABLE float2 __gen_ocl_frexp(float2 x, int2 *exp) {
+  return (float2)(__gen_ocl_frexp(x.s0, (int *)exp), __gen_ocl_frexp(x.s1, 1 + (int *)exp));
 }
 
-INLINE_OVERLOADABLE float3 frexp(float3 x, int3 *exp) {
-  return (float3)(frexp(x.s0, (int *)exp), frexp(x.s1, 1 + (int *)exp), frexp(x.s2, 2 + (int *)exp));
+INLINE_OVERLOADABLE float3 __gen_ocl_frexp(float3 x, int3 *exp) {
+  return (float3)(__gen_ocl_frexp(x.s0, (int *)exp), __gen_ocl_frexp(x.s1, 1 + (int *)exp), __gen_ocl_frexp(x.s2, 2 + (int *)exp));
 }
 
-INLINE_OVERLOADABLE float4 frexp(float4 x, int4 *exp) {
-  return (float4)(frexp(x.s0, (int *)exp), frexp(x.s1, 1 + (int *)exp), frexp(x.s2, 2 + (int *)exp), frexp(x.s3, 3 + (int *)exp));
+INLINE_OVERLOADABLE float4 __gen_ocl_frexp(float4 x, int4 *exp) {
+  return (float4)(__gen_ocl_frexp(x.s0, (int *)exp), __gen_ocl_frexp(x.s1, 1 + (int *)exp), __gen_ocl_frexp(x.s2, 2 + (int *)exp), __gen_ocl_frexp(x.s3, 3 + (int *)exp));
 }
 
-INLINE_OVERLOADABLE float8 frexp(float8 x, int8 *exp) {
-  return (float8)(frexp(x.s0, (int *)exp), frexp(x.s1, 1 + (int *)exp), frexp(x.s2, 2 + (int *)exp), frexp(x.s3, 3 + (int *)exp), frexp(x.s4, 4 + (int *)exp), frexp(x.s5, 5 + (int *)exp), frexp(x.s6, 6 + (int *)exp), frexp(x.s7, 7 + (int *)exp));
+INLINE_OVERLOADABLE float8 __gen_ocl_frexp(float8 x, int8 *exp) {
+  return (float8)(__gen_ocl_frexp(x.s0, (int *)exp), __gen_ocl_frexp(x.s1, 1 + (int *)exp), __gen_ocl_frexp(x.s2, 2 + (int *)exp), __gen_ocl_frexp(x.s3, 3 + (int *)exp), __gen_ocl_frexp(x.s4, 4 + (int *)exp), __gen_ocl_frexp(x.s5, 5 + (int *)exp), __gen_ocl_frexp(x.s6, 6 + (int *)exp), __gen_ocl_frexp(x.s7, 7 + (int *)exp));
 }
 
-INLINE_OVERLOADABLE float16 frexp(float16 x, int16 *exp) {
-  return (float16)(frexp(x.s0, (int *)exp), frexp(x.s1, 1 + (int *)exp), frexp(x.s2, 2 + (int *)exp), frexp(x.s3, 3 + (int *)exp), frexp(x.s4, 4 + (int *)exp), frexp(x.s5, 5 + (int *)exp), frexp(x.s6, 6 + (int *)exp), frexp(x.s7, 7 + (int *)exp), frexp(x.s8, 8 + (int *)exp), frexp(x.s9, 9 + (int *)exp), frexp(x.sa, 10 + (int *)exp), frexp(x.sb, 11 + (int *)exp), frexp(x.sc, 12 + (int *)exp), frexp(x.sd, 13 + (int *)exp), frexp(x.se, 14 + (int *)exp), frexp(x.sf, 15 + (int *)exp));
+INLINE_OVERLOADABLE float16 __gen_ocl_frexp(float16 x, int16 *exp) {
+  return (float16)(__gen_ocl_frexp(x.s0, (int *)exp), __gen_ocl_frexp(x.s1, 1 + (int *)exp), __gen_ocl_frexp(x.s2, 2 + (int *)exp), __gen_ocl_frexp(x.s3, 3 + (int *)exp), __gen_ocl_frexp(x.s4, 4 + (int *)exp), __gen_ocl_frexp(x.s5, 5 + (int *)exp), __gen_ocl_frexp(x.s6, 6 + (int *)exp), __gen_ocl_frexp(x.s7, 7 + (int *)exp), __gen_ocl_frexp(x.s8, 8 + (int *)exp), __gen_ocl_frexp(x.s9, 9 + (int *)exp), __gen_ocl_frexp(x.sa, 10 + (int *)exp), __gen_ocl_frexp(x.sb, 11 + (int *)exp), __gen_ocl_frexp(x.sc, 12 + (int *)exp), __gen_ocl_frexp(x.sd, 13 + (int *)exp), __gen_ocl_frexp(x.se, 14 + (int *)exp), __gen_ocl_frexp(x.sf, 15 + (int *)exp));
 }
 
+#define DEF(n) \
+  INLINE_OVERLOADABLE float##n frexp(float##n x, global int##n *exp) { return __gen_ocl_frexp(x, (int##n *)exp); } \
+  INLINE_OVERLOADABLE float##n frexp(float##n x, local int##n *exp) { return __gen_ocl_frexp(x, (int##n *)exp); } \
+  INLINE_OVERLOADABLE float##n frexp(float##n x, private int##n *exp) { return __gen_ocl_frexp(x, (int##n *)exp); }
+DEF(2)
+DEF(3)
+DEF(4)
+DEF(8)
+DEF(16)
+#undef DEF
+
 INLINE_OVERLOADABLE float nextafter(float x, float y) {
   uint hx = as_uint(x), ix = hx & 0x7FFFFFFF;
   uint hy = as_uint(y), iy = hy & 0x7FFFFFFF;
@@ -903,45 +914,61 @@ INLINE_OVERLOADABLE float fract(float x, global float *p) { return __gen_ocl_fra
 INLINE_OVERLOADABLE float fract(float x, local float *p) { return __gen_ocl_fract(x, (float *)p); }
 INLINE_OVERLOADABLE float fract(float x, private float *p) { return __gen_ocl_fract(x, (float *)p); }
 
-INLINE_OVERLOADABLE float2 fract(float2 x, float2 *p) {
-  return (float2)(fract(x.s0, (float *)p),
-                  fract(x.s1, 1 + (float *)p));
-}
-INLINE_OVERLOADABLE float4 fract(float4 x, float4 *p) {
-  return (float4)(fract(x.s0, (float *)p),
-                  fract(x.s1, 1 + (float *)p),
-                  fract(x.s2, 2 + (float *)p),
-                  fract(x.s3, 3 + (float *)p));
-}
-INLINE_OVERLOADABLE float8 fract(float8 x, float8 *p) {
-  return (float8)(fract(x.s0, (float *)p),
-                  fract(x.s1, 1 + (float *)p),
-                  fract(x.s2, 2 + (float *)p),
-                  fract(x.s3, 3 + (float *)p),
-                  fract(x.s4, 4 + (float *)p),
-                  fract(x.s5, 5 + (float *)p),
-                  fract(x.s6, 6 + (float *)p),
-                  fract(x.s7, 7 + (float *)p));
-}
-INLINE_OVERLOADABLE float16 fract(float16 x, float16 *p) {
-  return (float16)(fract(x.s0, (float *)p),
-                   fract(x.s1, 1 + (float *)p),
-                   fract(x.s2, 2 + (float *)p),
-                   fract(x.s3, 3 + (float *)p),
-                   fract(x.s4, 4 + (float *)p),
-                   fract(x.s5, 5 + (float *)p),
-                   fract(x.s6, 6 + (float *)p),
-                   fract(x.s7, 7 + (float *)p),
-                   fract(x.s8, 8 + (float *)p),
-                   fract(x.s9, 9 + (float *)p),
-                   fract(x.sa, 10 + (float *)p),
-                   fract(x.sb, 11 + (float *)p),
-                   fract(x.sc, 12 + (float *)p),
-                   fract(x.sd, 13 + (float *)p),
-                   fract(x.se, 14 + (float *)p),
-                   fract(x.sf, 15 + (float *)p));
+INLINE_OVERLOADABLE float2 __gen_ocl_fract(float2 x, float2 *p) {
+  return (float2)(__gen_ocl_fract(x.s0, (float *)p),
+                  __gen_ocl_fract(x.s1, 1 + (float *)p));
+}
+INLINE_OVERLOADABLE float3 __gen_ocl_fract(float3 x, float3 *p) {
+  return (float3)(__gen_ocl_fract(x.s0, (float *)p),
+                  __gen_ocl_fract(x.s1, 1 + (float *)p),
+                  __gen_ocl_fract(x.s2, 2 + (float *)p));
+}
+INLINE_OVERLOADABLE float4 __gen_ocl_fract(float4 x, float4 *p) {
+  return (float4)(__gen_ocl_fract(x.s0, (float *)p),
+                  __gen_ocl_fract(x.s1, 1 + (float *)p),
+                  __gen_ocl_fract(x.s2, 2 + (float *)p),
+                  __gen_ocl_fract(x.s3, 3 + (float *)p));
+}
+INLINE_OVERLOADABLE float8 __gen_ocl_fract(float8 x, float8 *p) {
+  return (float8)(__gen_ocl_fract(x.s0, (float *)p),
+                  __gen_ocl_fract(x.s1, 1 + (float *)p),
+                  __gen_ocl_fract(x.s2, 2 + (float *)p),
+                  __gen_ocl_fract(x.s3, 3 + (float *)p),
+                  __gen_ocl_fract(x.s4, 4 + (float *)p),
+                  __gen_ocl_fract(x.s5, 5 + (float *)p),
+                  __gen_ocl_fract(x.s6, 6 + (float *)p),
+                  __gen_ocl_fract(x.s7, 7 + (float *)p));
+}
+INLINE_OVERLOADABLE float16 __gen_ocl_fract(float16 x, float16 *p) {
+  return (float16)(__gen_ocl_fract(x.s0, (float *)p),
+                   __gen_ocl_fract(x.s1, 1 + (float *)p),
+                   __gen_ocl_fract(x.s2, 2 + (float *)p),
+                   __gen_ocl_fract(x.s3, 3 + (float *)p),
+                   __gen_ocl_fract(x.s4, 4 + (float *)p),
+                   __gen_ocl_fract(x.s5, 5 + (float *)p),
+                   __gen_ocl_fract(x.s6, 6 + (float *)p),
+                   __gen_ocl_fract(x.s7, 7 + (float *)p),
+                   __gen_ocl_fract(x.s8, 8 + (float *)p),
+                   __gen_ocl_fract(x.s9, 9 + (float *)p),
+                   __gen_ocl_fract(x.sa, 10 + (float *)p),
+                   __gen_ocl_fract(x.sb, 11 + (float *)p),
+                   __gen_ocl_fract(x.sc, 12 + (float *)p),
+                   __gen_ocl_fract(x.sd, 13 + (float *)p),
+                   __gen_ocl_fract(x.se, 14 + (float *)p),
+                   __gen_ocl_fract(x.sf, 15 + (float *)p));
 }
 
+#define DEF(n) \
+  INLINE_OVERLOADABLE float##n fract(float##n x, global float##n *p) { return __gen_ocl_fract(x, (float##n *)p); } \
+  INLINE_OVERLOADABLE float##n fract(float##n x, local float##n *p) { return __gen_ocl_fract(x, (float##n *)p); } \
+  INLINE_OVERLOADABLE float##n fract(float##n x, private float##n *p) { return __gen_ocl_fract(x, (float##n *)p); }
+DEF(2)
+DEF(3)
+DEF(4)
+DEF(8)
+DEF(16)
+#undef DEF
+
 INLINE_OVERLOADABLE float __gen_ocl_remquo(float x, float y, int *quo) {
   uint hx = as_uint(x), ix = hx & 0x7FFFFFFF, hy = as_uint(y), iy = hy & 0x7FFFFFFF;
   if (ix > 0x7F800000 || iy > 0x7F800000 || ix == 0x7F800000 || iy == 0)
-- 
1.8.1.2



More information about the Beignet mailing list