[Beignet] [PATCH] improve the build performance of vector type built-in function.
Zhigang Gong
zhigang.gong at linux.intel.com
Mon Jul 28 22:00:48 PDT 2014
In practice for the vector which has <= 4 components, it's better
to keep the original method.
For a 4 or less components vector, Use the complex union
and the for loop may bring too much overhead.
What's your opinion?
On Thu, Jul 24, 2014 at 04:23:24AM +0800, xionghu.luo at intel.com wrote:
> From: LuoXionghu <xionghu.luo at intel.com>
>
> expand the gentypen with loop to reduce the redundant inline.
>
> Signed-off-by: LuoXionghu <xionghu.luo at intel.com>
> ---
> backend/src/gen_builtin_vector.py | 42 +++++++++++++++++++++++++++++++++------
> 1 file changed, 36 insertions(+), 6 deletions(-)
>
> diff --git a/backend/src/gen_builtin_vector.py b/backend/src/gen_builtin_vector.py
> index b100bbf..83e2bcb 100755
> --- a/backend/src/gen_builtin_vector.py
> +++ b/backend/src/gen_builtin_vector.py
> @@ -283,9 +283,39 @@ class builtinProto():
> formatStr += '{0} {1}param{2}'.format(ptype[0], pointerStr, n)
>
> formatStr += ')'
> - formatStr = self.append(formatStr, '{{return ({0}{1})('.format(vtype[0], vtype[1]))
> - self.indent = len(formatStr)
> - for j in range(0, vtype[1]):
> + if self.functionName != 'select' and ptypeSeqs[0] == ptypeSeqs[self.paramCount-1]:
> + formatStr += '\n{ \n union{'
> + formatStr = self.append(formatStr, ' {0} va[{1}];'.format(vtype[0], vtype[1]))
> + formatStr = self.append(formatStr, ' {0}{1} vv{2};'.format(vtype[0], vtype[1], vtype[1]))
> + formatStr += '\n }uret;'
> + formatStr += '\n union{'
> + formatStr = self.append(formatStr, ' {0} pa[{1}];'.format(ptype[0], ptype[1]))
> + formatStr = self.append(formatStr, ' {0}{1} pv{2};'.format(ptype[0], ptype[1], ptype[1]))
> + formatStr += '\n }'
> + for n in range(0, self.paramCount):
> + formatStr += 'usrc{0}'.format(n)
> + if n+1 != self.paramCount:
> + formatStr +=', '
> + formatStr += ';'
> +
> + for n in range(0, self.paramCount):
> + formatStr = self.append(formatStr, ' usrc{0}.pv{1} = param{2};'.format(n, ptype[1], n))
> + formatStr = self.append(formatStr, ' for(int i =0; i < {0}; i++)'.format(ptype[1]))
> + formatStr = self.append(formatStr, ' uret.va[i] = {0}('.format(self.functionName))
> +
> + for n in range(0, self.paramCount):
> + formatStr += 'usrc{0}.pa[i]'.format(n)
> + if n+1 != self.paramCount:
> + formatStr +=', '
> + formatStr += ');'
> + formatStr = self.append(formatStr, ' return uret.vv{0};'.format(vtype[1]))
> + formatStr += '\n}'
> + formatStr = self.append(formatStr)
> + return formatStr
> + else:
> + formatStr = self.append(formatStr, '{{return ({0}{1})('.format(vtype[0], vtype[1]))
> + self.indent = len(formatStr)
> + for j in range(0, vtype[1]):
> if (j != 0):
> formatStr += ','
> if (j + 1) % 2 == 0:
> @@ -320,10 +350,10 @@ class builtinProto():
>
> formatStr += ')'
>
> - formatStr += '); }\n'
> - self.append(formatStr)
> + formatStr += '); }\n'
> + self.append(formatStr)
>
> - return formatStr
> + return formatStr
>
> def output(self):
> for line in self.outputStr:
> --
> 1.8.1.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list