[Beignet] [PATCH] Added memory space parameters support at the autogeneration script.

Thu Jul 25 00:07:02 PDT 2013

Enhance the python script to support pointer with memory space
type, such as :

gentype fract (gentype x, __global gentype *iptr)
gentype fract (gentype x, __local gentype *iptr)
gentype fract (gentype x, __private gentype *iptr)

So enable the following function at the builtin function spec file
fract/frexp/modf/nextafter/remquo/sincos.

Remove the duplicate at the ocl_stdlib.tmp.h.

Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
---
 backend/src/builtin_vector_proto.def |   68 +++++++--------
 backend/src/gen_builtin_vector.py    |   69 ++++++++++++---
 backend/src/ocl_stdlib.tmpl.h        |  157 ----------------------------------
 3 files changed, 89 insertions(+), 205 deletions(-)

diff --git a/backend/src/builtin_vector_proto.def b/backend/src/builtin_vector_proto.def
index 13d7a34..d55272d 100644
--- a/backend/src/builtin_vector_proto.def
+++ b/backend/src/builtin_vector_proto.def
@@ -35,21 +35,21 @@ gentype fmin (gentype x, gentype y)
 gentypef fmin (gentypef x, float y)
 gentyped fmin (gentyped x, double y)
 gentype fmod (gentype x, gentype y)
-#gentype fract (gentype x, __global gentype *iptr)
-#gentype fract (gentype x, __local gentype *iptr)
-#gentype fract (gentype x, __private gentype *iptr)
-#floatn frexp (floatn x, __global intn *exp)
-#floatn frexp (floatn x, __local intn *exp)
-#floatn frexp (floatn x, __private intn *exp)
-#float frexp (float x, __global int *exp)
-#float frexp (float x, __local int *exp)
-#float frexp (float x, __private int *exp)
-#doublen frexp (doublen x, __global intn *exp)
-#doublen frexp (doublen x, __local intn *exp)
-#doublen frexp (doublen x, __private intn *exp)
-#double frexp (double x, __global int *exp)
-#double frexp (double x, __local int *exp)
-#double frexp (double x, __private int *exp)
+gentype fract (gentype x, __global gentype *iptr)
+gentype fract (gentype x, __local gentype *iptr)
+gentype fract (gentype x, __private gentype *iptr)
+floatn frexp (floatn x, __global intn *exp)
+floatn frexp (floatn x, __local intn *exp)
+floatn frexp (floatn x, __private intn *exp)
+float frexp (float x, __global int *exp)
+float frexp (float x, __local int *exp)
+float frexp (float x, __private int *exp)
+doublen frexp (doublen x, __global intn *exp)
+doublen frexp (doublen x, __local intn *exp)
+doublen frexp (doublen x, __private intn *exp)
+double frexp (double x, __global int *exp)
+double frexp (double x, __local int *exp)
+double frexp (double x, __private int *exp)
 gentype hypot (gentype x, gentype y)
 intn ilogb (floatn x)
 int ilogb (float x)
@@ -82,14 +82,14 @@ gentype logb (gentype x)
 gentype mad (gentype a, gentype b, gentype c)
 gentype maxmag (gentype x, gentype y)
 gentype minmag (gentype x, gentype y)
-#gentype modf (gentype x, __global gentype *iptr)
-#gentype modf (gentype x, __local gentype *iptr)
-#gentype modf (gentype x, __private gentype *iptr)
+gentype modf (gentype x, __global gentype *iptr)
+gentype modf (gentype x, __local gentype *iptr)
+gentype modf (gentype x, __private gentype *iptr)
 floatn nan (uintn nancode)
 float nan (uint nancode)
 doublen nan (ulongn nancode)
 double nan (ulong nancode)
-#gentype nextafter (gentype x, gentype y)
+gentype nextafter (gentype x, gentype y)
 gentype pow (gentype x, gentype y)
 floatn pown (floatn x, intn y)
 float pown (float x, int y)
@@ -98,18 +98,18 @@ double pown (double x, int y)
 #XXX we define powr as pow
 #gentype powr (gentype x, gentype y)
 gentype remainder (gentype x, gentype y)
-#floatn remquo (floatn x, floatn y, __global intn *quo)
-#floatn remquo (floatn x, floatn y, __local intn *quo)
-#floatn remquo (floatn x, floatn y, __private intn *quo)
-#float remquo (float x, float y, __global int *quo)
-#float remquo (float x, float y, __local int *quo)
-#float remquo (float x, float y, __private int *quo)
-#doublen remquo (doublen x, doublen y, __global intn *quo)
-#doublen remquo (doublen x, doublen y, __local intn *quo)
-#doublen remquo (doublen x, doublen y, __private intn *quo)
-#double remquo (double x, double y, __global int *quo)
-#double remquo (double x, double y, __local int *quo)
-#double remquo (double x, double y, __private int *quo)
+floatn remquo (floatn x, floatn y, __global intn *quo)
+floatn remquo (floatn x, floatn y, __local intn *quo)
+floatn remquo (floatn x, floatn y, __private intn *quo)
+float remquo (float x, float y, __global int *quo)
+float remquo (float x, float y, __local int *quo)
+float remquo (float x, float y, __private int *quo)
+doublen remquo (doublen x, doublen y, __global intn *quo)
+doublen remquo (doublen x, doublen y, __local intn *quo)
+doublen remquo (doublen x, doublen y, __private intn *quo)
+double remquo (double x, double y, __global int *quo)
+double remquo (double x, double y, __local int *quo)
+double remquo (double x, double y, __private int *quo)
 gentype rint (gentype)
 floatn rootn (floatn x, intn y)
 
@@ -118,9 +118,9 @@ doublen rootn (double x, int y)
 gentype round (gentype x)
 gentype rsqrt (gentype)
 gentype sin (gentype)
-#gentype sincos (gentype x, __global gentype *cosval)
-#gentype sincos (gentype x, __local gentype *cosval)
-#gentype sincos (gentype x, __private gentype *cosval)
+gentype sincos (gentype x, __global gentype *cosval)
+gentype sincos (gentype x, __local gentype *cosval)
+gentype sincos (gentype x, __private gentype *cosval)
 gentype sinh (gentype)
 gentype sinpi (gentype x)
 gentype sqrt (gentype)
diff --git a/backend/src/gen_builtin_vector.py b/backend/src/gen_builtin_vector.py
index f5b0f3c..b073682 100755
--- a/backend/src/gen_builtin_vector.py
+++ b/backend/src/gen_builtin_vector.py
@@ -39,6 +39,19 @@ def gen_vector_type(type_set, vector_set = all_vector):
             ret.append((t, i))
     return ret
 
+def set_vector_memspace(vector_type_set, memspace):
+    ret = []
+    if memspace == '':
+        return vector_type_set
+    for t in vector_type_set:
+        ret.append((t[0], t[1], memspace))
+    return ret
+
+# if we have 3 elements in the type tuple, we are a pointer with a memory space type
+# at the third element.
+def isPointer(t):
+    return len(t) == 3
+
 all_itype = "char","short","int","long"
 all_utype = "uchar","ushort","uint","ulong"
 all_int_type = all_itype + all_utype
@@ -98,8 +111,18 @@ def _prefix(prefix, dtype):
         return prefix + '_' + dtype
     return dtype
 
+memspaces = ["__local ", "__private ", "__global "]
+
+def stripMemSpace(t):
+    if t[0:2] == '__':
+        for memspace in memspaces :
+            if t[0:len(memspace)] == memspace:
+                return memspace, t[len(memspace):]
+    return '', t
+
 def check_type(types):
     for t in types:
+        memspace, t = stripMemSpace(t)
         if not t in type_dict:
             print t
             raise "found invalid type."
@@ -167,7 +190,6 @@ def fixup_type(dstType, srcType, n):
     print dstType, srcType
     raise "type mispatch"
 
-
 class builtinProto():
     valueTypeStr = ""
     functionName = ""
@@ -191,7 +213,6 @@ class builtinProto():
 
     def append(self, line, nextInit = ""):
         self.outputStr.append(line);
-        #print line
         return nextInit;
 
     def indentSpace(self):
@@ -203,25 +224,34 @@ class builtinProto():
 
     def init_from_line(self, t):
         self.append('//{}'.format(t))
-        line = filter(None, re.split(',| |\(', t.rstrip('\n)')))
+        line = filter(None, re.split(',| |\(', t.rstrip(')\n')))
         self.paramCount = 0
+        stripped = 0
+        memSpace = ''
         for i, text in enumerate(line):
-            if i == 0:
+            idx = i - stripped
+            if idx == 0:
                 self.valueTypeStr = _prefix(self.prefix, line[i])
                 continue
 
-            if i == 1:
+            if idx == 1:
                 self.functionName = line[i];
                 continue
 
-            if i % 2 == 0:
+            if idx % 2 == 0:
                 if line[i][0] == '(':
                     tmpType = line[i][1:]
                 else:
                     tmpType = line[i]
-
-                self.paramTypeStrs.append(_prefix(self.prefix, tmpType))
-                self.paramCount += 1;
+                if tmpType == '__local' or   \
+                   tmpType == '__private' or \
+                   tmpType == '__global':
+                   memSpace = tmpType + ' '
+                   stripped += 1
+                   continue
+                self.paramTypeStrs.append(memSpace + _prefix(self.prefix, tmpType))
+                memSpace = ''
+                self.paramCount += 1
 
     def gen_proto_str_1(self, vtypeSeq, ptypeSeqs, i):
         for n in range(0, self.paramCount):
@@ -243,10 +273,16 @@ class builtinProto():
             if vtype[1] == 1:
                 return
 
+            if isPointer(ptype):
+                formatStr += ptype[2]
+                pointerStr = '*'
+            else:
+                pointerStr = ''
+
             if ptype[1] != 1:
-                formatStr += '{}{} param{}'.format(ptype[0], ptype[1], n)
+                formatStr += '{}{} {}param{}'.format(ptype[0], ptype[1], pointerStr, n)
             else:
-                formatStr += '{} param{}'.format(ptype[0], n)
+                formatStr += '{} {}param{}'.format(ptype[0], pointerStr, n)
 
         formatStr += ')'
         formatStr = self.append(formatStr, '{{return ({}{})('.format(vtype[0], vtype[1]))
@@ -269,11 +305,15 @@ class builtinProto():
                 if vtype[1] != ptype[1]:
                     if ptype[1] != 1:
                         raise "parameter is not a scalar but has different width with result value."
-
+                    if isPointer(ptype):
+                        formatStr += '&'
                     formatStr += 'param{}'.format(n)
                     continue
 
-                formatStr += 'param{}.s{:x}'.format(n, j)
+                if (isPointer(ptype)):
+                    formatStr += '({} {} *)param{} + {:2d}'.format(ptype[2], ptype[0], n, j)
+                else:
+                    formatStr += 'param{}.s{:x}'.format(n, j)
 
             formatStr += ')'
 
@@ -296,7 +336,8 @@ class builtinProto():
         ptypeSeqs = []
         count = len(vtypeSeq);
         for t in self.paramTypeStrs:
-            ptypeSeqs.append(type_dict[t])
+            memspace,t = stripMemSpace(t)
+            ptypeSeqs.append(set_vector_memspace(type_dict[t], memspace))
             count = max(count, len(type_dict[t]))
 
         for i in range(count):
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 5badd56..cc626a1 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -548,44 +548,6 @@ INLINE_OVERLOADABLE float sincos(float x, global float *cosval) { return __gen_o
 INLINE_OVERLOADABLE float sincos(float x, local float *cosval) { return __gen_ocl_internal_sincos(x, (float*)cosval); }
 INLINE_OVERLOADABLE float sincos(float x, private float *cosval) { return __gen_ocl_internal_sincos(x, (float*)cosval); }
 
-INLINE_OVERLOADABLE float2 __gen_ocl_internal_sincos(float2 x, float2 *cosval) {
-  return (float2)(__gen_ocl_internal_sincos(x.s0, (float *)cosval),
-                  __gen_ocl_internal_sincos(x.s1, 1 + (float *)cosval));
-}
-INLINE_OVERLOADABLE float4 __gen_ocl_internal_sincos(float4 x, float4 *cosval) {
-  return (float4)(__gen_ocl_internal_sincos(x.s0, (float *)cosval),
-                  __gen_ocl_internal_sincos(x.s1, 1 + (float *)cosval),
-                  __gen_ocl_internal_sincos(x.s2, 2 + (float *)cosval),
-                  __gen_ocl_internal_sincos(x.s3, 3 + (float *)cosval));
-}
-INLINE_OVERLOADABLE float8 __gen_ocl_internal_sincos(float8 x, float8 *cosval) {
-  return (float8)(__gen_ocl_internal_sincos(x.s0, (float *)cosval),
-                  __gen_ocl_internal_sincos(x.s1, 1 + (float *)cosval),
-                  __gen_ocl_internal_sincos(x.s2, 2 + (float *)cosval),
-                  __gen_ocl_internal_sincos(x.s3, 3 + (float *)cosval),
-                  __gen_ocl_internal_sincos(x.s4, 4 + (float *)cosval),
-                  __gen_ocl_internal_sincos(x.s5, 5 + (float *)cosval),
-                  __gen_ocl_internal_sincos(x.s6, 6 + (float *)cosval),
-                  __gen_ocl_internal_sincos(x.s7, 7 + (float *)cosval));
-}
-INLINE_OVERLOADABLE float16 __gen_ocl_internal_sincos(float16 x, float16 *cosval) {
-  return (float16)(__gen_ocl_internal_sincos(x.s0, (float *)cosval),
-                   __gen_ocl_internal_sincos(x.s1, 1 + (float *)cosval),
-                   __gen_ocl_internal_sincos(x.s2, 2 + (float *)cosval),
-                   __gen_ocl_internal_sincos(x.s3, 3 + (float *)cosval),
-                   __gen_ocl_internal_sincos(x.s4, 4 + (float *)cosval),
-                   __gen_ocl_internal_sincos(x.s5, 5 + (float *)cosval),
-                   __gen_ocl_internal_sincos(x.s6, 6 + (float *)cosval),
-                   __gen_ocl_internal_sincos(x.s7, 7 + (float *)cosval),
-                   __gen_ocl_internal_sincos(x.s8, 8 + (float *)cosval),
-                   __gen_ocl_internal_sincos(x.s9, 9 + (float *)cosval),
-                   __gen_ocl_internal_sincos(x.sa, 10 + (float *)cosval),
-                   __gen_ocl_internal_sincos(x.sb, 11 + (float *)cosval),
-                   __gen_ocl_internal_sincos(x.sc, 12 + (float *)cosval),
-                   __gen_ocl_internal_sincos(x.sd, 13 + (float *)cosval),
-                   __gen_ocl_internal_sincos(x.se, 14 + (float *)cosval),
-                   __gen_ocl_internal_sincos(x.sf, 15 + (float *)cosval));
-}
 INLINE_OVERLOADABLE float __gen_ocl_internal_sinh(float x) {
   return (1 - native_exp(-2 * x)) / (2 * native_exp(-x));
 }
@@ -779,26 +741,6 @@ INLINE_OVERLOADABLE float frexp(float x, global int *exp) { return __gen_ocl_fre
 INLINE_OVERLOADABLE float frexp(float x, local int *exp) { return __gen_ocl_frexp(x, (int *)exp); }
 INLINE_OVERLOADABLE float frexp(float x, private int *exp) { return __gen_ocl_frexp(x, (int *)exp); }
 
-INLINE_OVERLOADABLE float2 frexp(float2 x, int2 *exp) {
-  return (float2)(frexp(x.s0, (int *)exp), frexp(x.s1, 1 + (int *)exp));
-}
-
-INLINE_OVERLOADABLE float3 frexp(float3 x, int3 *exp) {
-  return (float3)(frexp(x.s0, (int *)exp), frexp(x.s1, 1 + (int *)exp), frexp(x.s2, 2 + (int *)exp));
-}
-
-INLINE_OVERLOADABLE float4 frexp(float4 x, int4 *exp) {
-  return (float4)(frexp(x.s0, (int *)exp), frexp(x.s1, 1 + (int *)exp), frexp(x.s2, 2 + (int *)exp), frexp(x.s3, 3 + (int *)exp));
-}
-
-INLINE_OVERLOADABLE float8 frexp(float8 x, int8 *exp) {
-  return (float8)(frexp(x.s0, (int *)exp), frexp(x.s1, 1 + (int *)exp), frexp(x.s2, 2 + (int *)exp), frexp(x.s3, 3 + (int *)exp), frexp(x.s4, 4 + (int *)exp), frexp(x.s5, 5 + (int *)exp), frexp(x.s6, 6 + (int *)exp), frexp(x.s7, 7 + (int *)exp));
-}
-
-INLINE_OVERLOADABLE float16 frexp(float16 x, int16 *exp) {
-  return (float16)(frexp(x.s0, (int *)exp), frexp(x.s1, 1 + (int *)exp), frexp(x.s2, 2 + (int *)exp), frexp(x.s3, 3 + (int *)exp), frexp(x.s4, 4 + (int *)exp), frexp(x.s5, 5 + (int *)exp), frexp(x.s6, 6 + (int *)exp), frexp(x.s7, 7 + (int *)exp), frexp(x.s8, 8 + (int *)exp), frexp(x.s9, 9 + (int *)exp), frexp(x.sa, 10 + (int *)exp), frexp(x.sb, 11 + (int *)exp), frexp(x.sc, 12 + (int *)exp), frexp(x.sd, 13 + (int *)exp), frexp(x.se, 14 + (int *)exp), frexp(x.sf, 15 + (int *)exp));
-}
-
 INLINE_OVERLOADABLE float nextafter(float x, float y) {
   uint hx = as_uint(x), ix = hx & 0x7FFFFFFF;
   uint hy = as_uint(y), iy = hy & 0x7FFFFFFF;
@@ -829,39 +771,9 @@ INLINE_OVERLOADABLE float __gen_ocl_modf(float x, float *i) {
   return x - *i;
 }
 
-INLINE_OVERLOADABLE float2 __gen_ocl_modf(float2 x, float2 *i) {
-  return (float2)(__gen_ocl_modf(x.s0, (float *)i), __gen_ocl_modf(x.s1, 1 + (float *)i));
-}
-
-INLINE_OVERLOADABLE float3 __gen_ocl_modf(float3 x, float3 *i) {
-  return (float3)(__gen_ocl_modf(x.s0, (float *)i), __gen_ocl_modf(x.s1, 1 + (float *)i), __gen_ocl_modf(x.s2, 2 + (float *)i));
-}
-
-INLINE_OVERLOADABLE float4 __gen_ocl_modf(float4 x, float4 *i) {
-  return (float4)(__gen_ocl_modf(x.s0, (float *)i), __gen_ocl_modf(x.s1, 1 + (float *)i), __gen_ocl_modf(x.s2, 2 + (float *)i), __gen_ocl_modf(x.s3, 3 + (float *)i));
-}
-
-INLINE_OVERLOADABLE float8 __gen_ocl_modf(float8 x, float8 *i) {
-  return (float8)(__gen_ocl_modf(x.s0, (float *)i), __gen_ocl_modf(x.s1, 1 + (float *)i), __gen_ocl_modf(x.s2, 2 + (float *)i), __gen_ocl_modf(x.s3, 3 + (float *)i), __gen_ocl_modf(x.s4, 4 + (float *)i), __gen_ocl_modf(x.s5, 5 + (float *)i), __gen_ocl_modf(x.s6, 6 + (float *)i), __gen_ocl_modf(x.s7, 7 + (float *)i));
-}
-
-INLINE_OVERLOADABLE float16 __gen_ocl_modf(float16 x, float16 *i) {
-  return (float16)(__gen_ocl_modf(x.s0, (float *)i), __gen_ocl_modf(x.s1, 1 + (float *)i), __gen_ocl_modf(x.s2, 2 + (float *)i), __gen_ocl_modf(x.s3, 3 + (float *)i), __gen_ocl_modf(x.s4, 4 + (float *)i), __gen_ocl_modf(x.s5, 5 + (float *)i), __gen_ocl_modf(x.s6, 6 + (float *)i), __gen_ocl_modf(x.s7, 7 + (float *)i), __gen_ocl_modf(x.s8, 8 + (float *)i), __gen_ocl_modf(x.s9, 9 + (float *)i), __gen_ocl_modf(x.sa, 10 + (float *)i), __gen_ocl_modf(x.sb, 11 + (float *)i), __gen_ocl_modf(x.sc, 12 + (float *)i), __gen_ocl_modf(x.sd, 13 + (float *)i), __gen_ocl_modf(x.se, 14 + (float *)i), __gen_ocl_modf(x.sf, 15 + (float *)i));
-}
-
 INLINE_OVERLOADABLE float modf(float x, global float *i) { return __gen_ocl_modf(x, (float *)i); }
 INLINE_OVERLOADABLE float modf(float x, local float *i) { return __gen_ocl_modf(x, (float *)i); }
 INLINE_OVERLOADABLE float modf(float x, private float *i) { return __gen_ocl_modf(x, (float *)i); }
-#define DEF(n) \
-  INLINE_OVERLOADABLE float##n modf(float##n x, global float##n *i) { return __gen_ocl_modf(x, (float##n *)i); } \
-  INLINE_OVERLOADABLE float##n modf(float##n x, local float##n *i) { return __gen_ocl_modf(x, (float##n *)i); } \
-  INLINE_OVERLOADABLE float##n modf(float##n x, private float##n *i) { return __gen_ocl_modf(x, (float##n *)i); }
-DEF(2)
-DEF(3)
-DEF(4)
-DEF(8)
-DEF(16)
-#undef DEF
 
 INLINE_OVERLOADABLE float degrees(float radians) { return (180 / M_PI_F) * radians; }
 INLINE_OVERLOADABLE float radians(float degrees) { return (M_PI_F / 180) * degrees; }
@@ -903,45 +815,6 @@ INLINE_OVERLOADABLE float fract(float x, global float *p) { return __gen_ocl_fra
 INLINE_OVERLOADABLE float fract(float x, local float *p) { return __gen_ocl_fract(x, (float *)p); }
 INLINE_OVERLOADABLE float fract(float x, private float *p) { return __gen_ocl_fract(x, (float *)p); }
 
-INLINE_OVERLOADABLE float2 fract(float2 x, float2 *p) {
-  return (float2)(fract(x.s0, (float *)p),
-                  fract(x.s1, 1 + (float *)p));
-}
-INLINE_OVERLOADABLE float4 fract(float4 x, float4 *p) {
-  return (float4)(fract(x.s0, (float *)p),
-                  fract(x.s1, 1 + (float *)p),
-                  fract(x.s2, 2 + (float *)p),
-                  fract(x.s3, 3 + (float *)p));
-}
-INLINE_OVERLOADABLE float8 fract(float8 x, float8 *p) {
-  return (float8)(fract(x.s0, (float *)p),
-                  fract(x.s1, 1 + (float *)p),
-                  fract(x.s2, 2 + (float *)p),
-                  fract(x.s3, 3 + (float *)p),
-                  fract(x.s4, 4 + (float *)p),
-                  fract(x.s5, 5 + (float *)p),
-                  fract(x.s6, 6 + (float *)p),
-                  fract(x.s7, 7 + (float *)p));
-}
-INLINE_OVERLOADABLE float16 fract(float16 x, float16 *p) {
-  return (float16)(fract(x.s0, (float *)p),
-                   fract(x.s1, 1 + (float *)p),
-                   fract(x.s2, 2 + (float *)p),
-                   fract(x.s3, 3 + (float *)p),
-                   fract(x.s4, 4 + (float *)p),
-                   fract(x.s5, 5 + (float *)p),
-                   fract(x.s6, 6 + (float *)p),
-                   fract(x.s7, 7 + (float *)p),
-                   fract(x.s8, 8 + (float *)p),
-                   fract(x.s9, 9 + (float *)p),
-                   fract(x.sa, 10 + (float *)p),
-                   fract(x.sb, 11 + (float *)p),
-                   fract(x.sc, 12 + (float *)p),
-                   fract(x.sd, 13 + (float *)p),
-                   fract(x.se, 14 + (float *)p),
-                   fract(x.sf, 15 + (float *)p));
-}
-
 INLINE_OVERLOADABLE float __gen_ocl_remquo(float x, float y, int *quo) {
   uint hx = as_uint(x), ix = hx & 0x7FFFFFFF, hy = as_uint(y), iy = hy & 0x7FFFFFFF;
   if (ix > 0x7F800000 || iy > 0x7F800000 || ix == 0x7F800000 || iy == 0)
@@ -956,39 +829,9 @@ INLINE_OVERLOADABLE float __gen_ocl_remquo(float x, float y, int *quo) {
   return as_float(hr);
 }
 
-INLINE_OVERLOADABLE float2 __gen_ocl_remquo(float2 x, float2 y, int2 *i) {
-  return (float2)(__gen_ocl_remquo(x.s0, y.s0, (int *)i), __gen_ocl_remquo(x.s1, y.s1, 1 + (int *)i));
-}
-
-INLINE_OVERLOADABLE float3 __gen_ocl_remquo(float3 x, float3 y, int3 *i) {
-  return (float3)(__gen_ocl_remquo(x.s0, y.s0, (int *)i), __gen_ocl_remquo(x.s1, y.s1, 1 + (int *)i), __gen_ocl_remquo(x.s2, y.s2, 2 + (int *)i));
-}
-
-INLINE_OVERLOADABLE float4 __gen_ocl_remquo(float4 x, float4 y, int4 *i) {
-  return (float4)(__gen_ocl_remquo(x.s0, y.s0, (int *)i), __gen_ocl_remquo(x.s1, y.s1, 1 + (int *)i), __gen_ocl_remquo(x.s2, y.s2, 2 + (int *)i), __gen_ocl_remquo(x.s3, y.s3, 3 + (int *)i));
-}
-
-INLINE_OVERLOADABLE float8 __gen_ocl_remquo(float8 x, float8 y, int8 *i) {
-  return (float8)(__gen_ocl_remquo(x.s0, y.s0, (int *)i), __gen_ocl_remquo(x.s1, y.s1, 1 + (int *)i), __gen_ocl_remquo(x.s2, y.s2, 2 + (int *)i), __gen_ocl_remquo(x.s3, y.s3, 3 + (int *)i), __gen_ocl_remquo(x.s4, y.s4, 4 + (int *)i), __gen_ocl_remquo(x.s5, y.s5, 5 + (int *)i), __gen_ocl_remquo(x.s6, y.s6, 6 + (int *)i), __gen_ocl_remquo(x.s7, y.s7, 7 + (int *)i));
-}
-
-INLINE_OVERLOADABLE float16 __gen_ocl_remquo(float16 x, float16 y, int16 *i) {
-  return (float16)(__gen_ocl_remquo(x.s0, y.s0, (int *)i), __gen_ocl_remquo(x.s1, y.s1, 1 + (int *)i), __gen_ocl_remquo(x.s2, y.s2, 2 + (int *)i), __gen_ocl_remquo(x.s3, y.s3, 3 + (int *)i), __gen_ocl_remquo(x.s4, y.s4, 4 + (int *)i), __gen_ocl_remquo(x.s5, y.s5, 5 + (int *)i), __gen_ocl_remquo(x.s6, y.s6, 6 + (int *)i), __gen_ocl_remquo(x.s7, y.s7, 7 + (int *)i), __gen_ocl_remquo(x.s8, y.s8, 8 + (int *)i), __gen_ocl_remquo(x.s9, y.s9, 9 + (int *)i), __gen_ocl_remquo(x.sa, y.sa, 10 + (int *)i), __gen_ocl_remquo(x.sb, y.sb, 11 + (int *)i), __gen_ocl_remquo(x.sc, y.sc, 12 + (int *)i), __gen_ocl_remquo(x.sd, y.sd, 13 + (int *)i), __gen_ocl_remquo(x.se, y.se, 14 + (int *)i), __gen_ocl_remquo(x.sf, y.sf, 15 + (int *)i));
-}
-
 INLINE_OVERLOADABLE float remquo(float x, float y, global int *quo) { return __gen_ocl_remquo(x, y, (int *)quo); }
 INLINE_OVERLOADABLE float remquo(float x, float y, local int *quo) { return __gen_ocl_remquo(x, y, (int *)quo); }
 INLINE_OVERLOADABLE float remquo(float x, float y, private int *quo) { return __gen_ocl_remquo(x, y, (int *)quo); }
-#define DEF(n) \
-  INLINE_OVERLOADABLE float##n remquo(float##n x, float##n y, global int##n *quo) { return __gen_ocl_remquo(x, y, (int##n *)quo); } \
-  INLINE_OVERLOADABLE float##n remquo(float##n x, float##n y, local int##n *quo) { return __gen_ocl_remquo(x, y, (int##n *)quo); } \
-  INLINE_OVERLOADABLE float##n remquo(float##n x, float##n y, private int##n *quo) { return __gen_ocl_remquo(x, y, (int##n *)quo); }
-DEF(2)
-DEF(3)
-DEF(4)
-DEF(8)
-DEF(16)
-#undef DEF
 
 INLINE_OVERLOADABLE float native_divide(float x, float y) { return x/y; }
 INLINE_OVERLOADABLE float ldexp(float x, int n) {
-- 
1.7.9.5