[Piglit] [PATCH v3 2/3] arb_shader_precision: add framework for calculating tolerances for complex functions

Fri Feb 27 14:36:55 PST 2015

The bigfloat package is a necessary dependency for this filter, as it provides
both arbitrary-size floating point operations and it allows all floating point
values and operations in a code block to be forced into a particular precision,
leaving no question as to what precision was employed for an intermediate
calculation.  This comes in handy when running the reference versions of the
complex built-ins.  Performance impact is small (~6s for the entire
gen_shader_precision_tests.py script on IvyBridge i7 @ 2.9GHz) and is localized
to build-time.
---
CMakeLists.txt                                |   1 +
cmake/Modules/FindPythonBigfloat.cmake        |  43 ++
generated_tests/gen_shader_precision_tests.py | 603 +++++++++++++++++++++++++-
3 files changed, 633 insertions(+), 14 deletions(-)
create mode 100644 cmake/Modules/FindPythonBigfloat.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1a81eed..b13acdb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -193,6 +193,7 @@ find_package(PythonInterp 2.7 REQUIRED)
find_package(PythonNumpy 1.6.2 REQUIRED)
find_package(PythonMako 0.7.3 REQUIRED)
find_package(PythonSix REQUIRED)
+find_package(PythonBigfloat 3.1.0 REQUIRED)

# Default to compiling with debug information (`gcc -g`):
if(NOT CMAKE_BUILD_TYPE)
diff --git a/cmake/Modules/FindPythonBigfloat.cmake b/cmake/Modules/FindPythonBigfloat.cmake
new file mode 100644
index 0000000..0b65b55
--- /dev/null
+++ b/cmake/Modules/FindPythonBigfloat.cmake
@@ -0,0 +1,43 @@
+# Copyright (C) 2015 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+# Find bigfloat 
+execute_process(
+   COMMAND ${PYTHON_EXECUTABLE} -c "import sys, bigfloat; sys.stdout.write(bigfloat.MPFR_VERSION_STRING)"
+	OUTPUT_VARIABLE _version
+	RESULT_VARIABLE _status
+	ERROR_QUIET
+	OUTPUT_STRIP_TRAILING_WHITESPACE
+)
+
+set(PythonBigfloat_VERSION_STRING ${_version})
+
+# A status returns 0 if everything is okay. And zero is false. To make
+# checking in the outer scope less surprising
+if (_status EQUAL 0)
+   set("PythonBigfloat_STATUS" "success")
+endif (_status EQUAL 0)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(
+	"PythonBigfloat"
+	REQUIRED_VARS "PythonBigfloat_STATUS"
+	VERSION_VAR "PythonBigfloat_VERSION_STRING"
+)
diff --git a/generated_tests/gen_shader_precision_tests.py b/generated_tests/gen_shader_precision_tests.py
index 6c011a3..3195f89 100644
--- a/generated_tests/gen_shader_precision_tests.py
+++ b/generated_tests/gen_shader_precision_tests.py
@@ -50,31 +50,606 @@ from __future__ import print_function, division, absolute_import
from builtin_function import *
import os 
import numpy
+import struct
+import bigfloat

import six
from six.moves import range

from templates import template_file

-tolerances = {'pow': 16.0, 
-              'exp': 3.0,
-              'exp2': 3.0,
-              'log': 3.0,
-              'log2': 3.0,
-              'sqrt': 3.0,
-              'inversesqrt': 2.0,
-              'op-div': 2.5,
-              'op-assign-div': 2.5,
-              }
+
+allowed_error_scale = 4.0

trig_builtins = ('sin', 'cos', 'tan', 
                 'asin', 'acos', 'atan', 
                 'sinh', 'cosh', 'tanh', 
                 'asinh', 'acosh', 'atanh')

+high_precision = bigfloat.quadruple_precision
+low_precision = bigfloat.single_precision
+
def _is_sequence(arg):
    return isinstance(arg, (collections.Sequence, numpy.ndarray))

+def _ulpsize(f):
+    """ determine _ulpsize in the direction of nearest infinity
+        which gives the worst case scenario for edge cases
+    """
+    if f >= 0:
+        return bigfloat.next_up(f) - f
+    else:
+        return f - bigfloat.next_down(f)
+
+def _listify(arg):
+    """ if arg is a scalar, convert it to an array
+        if it's an array, return it as-is
+    """
+    return arg if _is_sequence(arg) else [arg]
+
+def _to_bigfloat(arg):
+    if _is_sequence(arg):
+        for ar in arg:
+            assert(isinstance(ar, numpy.float32) or isinstance(ar, bigfloat.BigFloat))
+
+    else:
+        assert(isinstance(arg, numpy.float32) or isinstance(arg, bigfloat.BigFloat))
+
+    if isinstance(arg, bigfloat.BigFloat) or \
+       (_is_sequence(arg) and isinstance(arg[0], bigfloat.BigFloat)):
+        return arg
+    else:
+        return list(bigfloat.BigFloat('%.8e' % x) for x in arg)
+
+def _mod_ref(args):
+    """Modulus reference function.
+    Returns x  floor (x/y).
+
+    This is a scalar implementation of a vector-componentwise GLSL built-in -
+    arguments must be in scalar form (float).  Vector arguments must be
+    partitioned by the caller.
+    Arguments are converted to BigFloat type.
+    Return value is a list of a single BigFloat value.
+    
+    arguments:
+    args -- list of all arguments
+    args[0] -- x
+    args[1] -- y
+    """
+    bfargs = _to_bigfloat(args)
+    x = bfargs[0]
+    y = bfargs[1]
+    return [x - y * (bigfloat.floor(x / y))]
+
+def _smoothstep_ref(args):
+    """Smoothstep reference function.
+    Returns 0.0 if x <= edge0 and 1.0 if x >= edge1 and
+    performs smooth Hermite interpolation between 0 and 1
+    when edge0 < x < edge1. This is useful in cases where
+    you would want a threshold function with a smooth
+    transition. This is equivalent to:
+    t = clamp ((x edge0), 0, 1);
+    return t * t * (3 a) + y * a
+
+    This is a scalar implementation of a vector-componentwise GLSL built-in -
+    arguments must be in scalar form (float).  Vector arguments must be
+    partitioned by the caller.
+    Arguments are converted to BigFloat type.
+    Return value is a list of a single BigFloat value.
+    
+    arguments:
+    args -- list of all arguments
+    args[0] -- x
+    args[1] -- y
+    args[2] -- a
+    """
+    bfargs = _to_bigfloat(args)
+    x = bfargs[0]
+    y = bfargs[1]
+    a = bfargs[2]
+    return [x * (bigfloat.BigFloat(1.0) - a) + y * a]
+
+def _length_ref(args):
+    """Length reference function.
+    Returns the length of vector x, i.e.,
+    sqrt(x[0]^2 + x[1]^2 + ...)
+
+    This is a non-componentwise GLSL built-in -
+    arguments may be scalar (float) or vector form (list).
+    Arguments are converted to BigFloat type.
+    Return value is a list of a single BigFloat value.
+    
+    arguments:
+    args -- list of all arguments
+    args[0] -- x
+    """
+    component_sum = bigfloat.BigFloat(0.0)
+    x = _to_bigfloat(args[0])
+    for arg in x:
+        component_sum += bigfloat.pow(arg, 2.0)
+    return [bigfloat.sqrt(component_sum)]
+
+def _distance_ref(args):
+    """Distance reference function.
+    Returns the distance between p0 and p1, i.e.,
+    length (p0 y[1] * x[2]
+     x[2] * y[0] y[0] * x[1]]
+
+    This is a non-componentwise GLSL built-in -
+    arguments are 3-element vectors.
+    Arguments are converted to BigFloat type.
+    Return value is a 3-element list of BigFloat values.
+    
+    arguments:
+    args -- list of all arguments
+    args[0] -- x
+    args[1] -- y
+    """
+    x = _to_bigfloat(args[0])
+    y = _to_bigfloat(args[1])
+    ret = [x[1] * y[2] - y[1] * x[2], x[2] * y[0] - y[2] * x[0], x[0] * y[1] - y[0] * x[1]]
+    return ret
+
+def _normalize_ref(args):
+    """Normalize reference function.
+    Returns a vector in the same direction as x but with a
+    length of 1, ie. x/length(x).
+
+    This is a non-componentwise GLSL built-in -
+    arguments may be scalar (float) or vector form (list).
+    Arguments are converted to BigFloat type.
+    Return value is a list of BigFloat values equal in count to the number of
+    items in x.
+    
+    arguments:
+    args -- list of all arguments
+    args[0] -- x
+    """
+    bfargs = _to_bigfloat(args[0])
+    normalized_components = []
+    comp_len = _length_ref(args)
+    for component in bfargs:
+        normalized_components.append(component / comp_len[0])
+    return normalized_components
+
+def _faceforward_ref(args):
+    """Faceforward reference function.
+    If dot(Nref, I) < 0 return N, otherwise return 2  N
+    N must already be normalized in order to achieve the
+    desired result.
+
+    This is a non-componentwise GLSL built-in -
+    arguments may be scalar (float) or vector form (list).
+    Arguments are converted to BigFloat type.
+    Return value is a list of BigFloat values equal in count to the number of
+    items in I.
+    
+    arguments:
+    args -- list of all arguments
+    args[0] -- I
+    args[1] -- N
+    """
+    I = _to_bigfloat(args[0])
+    N = _to_bigfloat(args[1])
+    dotNI = _dot_ref([N,I])[0]
+    ret = []
+    for Ncomponent, Icomponent in zip(N,I):
+        ret.append(Icomponent - bigfloat.BigFloat(2.0) * dotNI * Ncomponent)
+    return ret
+
+def _refract_ref(args):
+    """Refract reference function.
+    For the incident vector I and surface normal N, and the
+    ratio of indices of refraction eta, return the refraction
+    vector. The result is computed by
+    k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I))
+    if (k < 0.0)
+    return (0.0)
+    else
+    return eta * I - (eta * dot(N, I) + sqrt(k)) * N
+    The input parameters for the incident vector I and the
+    surface normal N must already be normalized to get the
+    desired results.
+
+    This is a non-componentwise GLSL built-in -
+    arguments may be scalar (float) or vector form (list).
+    Arguments are converted to BigFloat type.
+    Return value is a list of BigFloat values equal in count to the number of
+    items in I.
+    
+    arguments:
+    args -- list of all arguments
+    args[0] -- I
+    args[1] -- N
+    args[2] -- eta
+    """
+    I = _to_bigfloat(args[0])
+    N = _to_bigfloat(args[1])
+    eta = _to_bigfloat(args[2])[0]
+    ret = []
+    k = bigfloat.BigFloat(1.0) - eta * eta * \
+        (bigfloat.BigFloat(1.0) - _dot_ref([N,I])[0] * _dot_ref([N,I])[0])
+    if k < 0.0:
+        for i in range(len(I)):
+            ret.append(bigfloat.BigFloat(0.0))
+    else:
+        Ntemp = []
+        Itemp = []
+        for Ncomponent, Icomponent in zip(N,I):
+            Ntemp = (eta * _dot_ref([N,I])[0] + bigfloat.sqrt(k)) * Ncomponent
+            Itemp = eta * Icomponent
+            ret.append(Itemp - Ntemp)
+    return ret
+
+def _vec_times_mat_ref(args):
+    """Matrix Multiplication (Vector * Matrix) reference function.
+    vec3 v, u;
+    mat3 m;
+    u = v * m;
+    u.x = dot(v, m[0]); // m[0] is the left column of m
+    u.y = dot(v, m[1]); // dot(a,b) is the inner (dot) product of a and b
+    u.z = dot(v, m[2]);
+
+    This is a non-componentwise GLSL built-in -
+    arguments are one vector (list) and one matrix (list (rows) of lists (cols)) 
+    ex. arg[0] = [a, b, c]; arg[1] = [[d, e, f], [g, h, i], [j, k, l]]; m[0] == [d, g, j]
+    Arguments are converted to BigFloat type.
+    Return value is a list of BigFloat values equal in count to the number of
+    items in v.
+    
+    arguments:
+    args -- list of all arguments
+    args[0] -- v
+    args[1] -- m
+    """
+    v = args[0]
+    m = args[1]
+    m_type = glsl_type_of(m)
+    num_cols = m_type.num_cols
+    num_rows = m_type.num_rows
+    components = num_cols
+    ret = []
+    for i in range(components):
+        m_col = []
+        for j in range(num_rows):
+            m_col.append(m[j][i])
+        ret.append(_dot_ref([v,m_col])[0])
+    return ret
+
+def _mat_times_vec_ref(args):
+    """Matrix Multiplication (Matrix * Vector) reference function.
+    u = m * v;
+    u.x = m[0].x * v.x + m[1].x * v.y + m[2].x * v.z;
+    u.y = m[0].y * v.x + m[1].y * v.y + m[2].y * v.z;
+    u.z = m[0].z * v.x + m[1].z * v.y + m[2].z * v.z;
+
+    This is a non-componentwise GLSL built-in -
+    arguments are one matrix (list (rows) of lists (cols)) and one vector (list)
+    ex. arg[0] = [[d, e, f], [g, h, i], [j, k, l]]; m[0] == [d, g, j]; arg[1] = [a, b, c]; 
+    Arguments are converted to BigFloat type.
+    Return value is a list of BigFloat values equal in count to the number of
+    items in v.
+    
+    arguments:
+    args -- list of all arguments
+    args[0] -- m
+    args[1] -- v
+    """
+    m = args[0]
+    v = args[1]
+    m_type = glsl_type_of(m)
+    num_rows = m_type.num_rows
+    components = num_rows
+    ret = []
+    for i in range(components):
+        m_col = m[i]
+        ret.append(_dot_ref([v,m_col])[0])
+    return ret
+
+def _mat_times_mat_ref(args):
+    """Matrix Multiplication (Matrix * Matrix) reference function.
+    mat3 m, n, r;
+    r = m * n;
+    r[0].x = m[0].x * n[0].x + m[1].x * n[0].y + m[2].x * n[0].z;
+    r[1].x = m[0].x * n[1].x + m[1].x * n[1].y + m[2].x * n[1].z;
+    r[2].x = m[0].x * n[2].x + m[1].x * n[2].y + m[2].x * n[2].z;
+
+    r[0].y = m[0].y * n[0].x + m[1].y * n[0].y + m[2].y * n[0].z;
+    r[1].y = m[0].y * n[1].x + m[1].y * n[1].y + m[2].y * n[1].z;
+    r[2].y = m[0].y * n[2].x + m[1].y * n[2].y + m[2].y * n[2].z;
+
+    r[0].z = m[0].z * n[0].x + m[1].z * n[0].y + m[2].z * n[0].z;
+    r[1].z = m[0].z * n[1].x + m[1].z * n[1].y + m[2].z * n[1].z;
+    r[2].z = m[0].z * n[2].x + m[1].z * n[2].y + m[2].z * n[2].z;
+
+    This is a non-componentwise GLSL built-in -
+    arguments are two matrices (list (rows) of lists (cols))
+    ex. arg[0] = [[d, e, f], [g, h, i], [j, k, l]]; m[0] == [d, g, j]
+    Arguments are converted to BigFloat type.
+    Return value is a flattened matrix in the form of a list of BigFloat values
+    equal in count to the number of rows in matrix m times the number of
+    columns in matrix n, ie. [[r0c0, r0c1, r0c2], [r1c0, r1c1, r1c2], ... ].
+    
+    arguments:
+    args -- list of all arguments
+    args[0] -- m
+    args[1] -- n
+    """
+    m = args[0]
+    n = args[1]
+    m_type = glsl_type_of(m)
+    n_type = glsl_type_of(n)
+    ret = []
+    for i in range(m_type.num_rows):
+        for j in range(n_type.num_cols):
+            n_col = []
+            for k in range(n_type.num_rows):
+                n_col.append(n[k][j])
+            ret.append(_dot_ref([m[i],n_col])[0])
+    return ret
+
+def _capture_error(precise, imprecise):
+    """Perform the legwork of calculating the difference in error of the high
+    precision and low precision runs.  Decide whether this difference in error
+    is within allowable tolerances.  The range of allowable tolerances is
+    subjective, as ARB_shader_precision (and GLSL spec as of v4.5) gives no
+    direct guidance for complex functions.  Toronto, et.  al. use quadrupled
+    error as a limit in "Practically Accurate Floating-Point Math," Computing
+    Now, Oct. 2014.  Also use the difference in error and the value of one ulp
+    at the output to calculate the tolerance range in ulps for use by the
+    shader test, should this vector pass the badlands check.
+    """
+
+    errors = []
+    badlands = []
+    component_tolerances = []
+    with high_precision:
+        error = bigfloat.abs(precise - imprecise)
+    errors.append(error)
+    with low_precision:
+        ulpsz = _ulpsize(imprecise)
+    with high_precision:
+        badlands.append(error > ulpsz * allowed_error_scale)
+        component_tolerances.append(bigfloat.round(error / ulpsz))
+    return errors, badlands, component_tolerances
+
+def _analyze_ref_fn(fn, args):
+    """Many functions contain ill-conditioned regions referred to as "badlands"
+    (see Toronto, et. al., "Practically Accurate Floating-Point Math,"
+    Computing Now, Oct. 2014).  Within these regions errors in the inputs are
+    magnified significantly, making the function impossible to test with any
+    reasonable accuracy.  A complex function that operates on floating point
+    numbers has the potential to generate such error propagation even if the
+    inputs are exact floating point numbers, since intermediate results can be
+    generated with error.  In order to identify and avoid these areas, we run
+    the function once at a lower precision and once at a higher precision, and
+    compare the outputs.  Propagating errors will be greater at lower precision
+    and less at higher precision for a given set of function inputs, allowing
+    us to identify the badlands of the function.
+    """
+
+    errors = []
+    badlands = []
+    component_tolerances = []
+    with high_precision:
+        precise = fn(args)
+    with low_precision:
+        imprecise = fn(args)
+    for i, arg in enumerate(imprecise):
+        cur_errors, cur_badlands, cur_component_tolerances = _capture_error(precise[i], arg)
+        errors.extend(cur_errors)
+        badlands.extend(cur_badlands)
+        component_tolerances.extend(cur_component_tolerances)
+    return errors, badlands, component_tolerances
+
+
+# These three dicts (simple_fns, complex_fns and mult_fns) divide the names of
+# the glsl built-in operations into three mutually exclusive groups.  They do
+# not need to cover all built-ins however, as a default tolerance is assigned
+# for names not appearing in these dicts.
+#
+# simple_fns are accompanied by a single blanket tolerance value used for all
+# calculations (these come from the EXT).
+#
+# complex_fns are accompanied by a reference to a function that can be used to
+# calculate a list of non-uniform tolerances.
+#
+# the two mult_fns (mult and assign-mult) are special in that they can act like
+# a simple function when multiplying floats or vec members together in a
+# componentwise manner, or they can act like complex functions when matrices
+# are involved and they switch to matrix multiplication rules.
+simple_fns = {'op-div': 2.5,
+              'op-assign-div': 2.5,
+              'pow': 16.0, 
+              'exp': 3.0,
+              'exp2': 3.0,
+              'log': 3.0,
+              'log2': 3.0,
+              'sqrt': 3.0,
+              'inversesqrt': 2.0}
+ 
+complex_fns = {'mod': _mod_ref,
+             'smoothstep': _smoothstep_ref,
+             'mix': _mix_ref,
+             'length': _length_ref,
+             'distance': _distance_ref,
+             'dot': _dot_ref,
+             'cross': _cross_ref,
+             'normalize': _normalize_ref,
+             'faceforward': _faceforward_ref,
+             'reflect': _reflect_ref,
+             'refract': _refract_ref}
+
+mult_fns = {'op-mult': 0.0,
+            'op-assign-mult': 0.0}
+
+# the complex functions listed here operate in a componentwise manner - the rest do not
+componentwise = ('mod', 'mix', 'smoothstep' )
+
+def _gen_tolerance(name, rettype, args):
+    """Return the tolerance that should be allowed for a function for the
+    test vector passed in.  Return -1 for any vectors that would push the
+    tolerance outside of acceptable bounds 
+    """
+    if name in simple_fns:
+        return simple_fns[name] 
+    elif name in complex_fns:
+        if name in componentwise:
+            # for componentwise functions, call the reference function
+            # for each component (scalar components multiplex here)
+            # eg. for fn(float a, vec2 b, vec2 c) call:
+            # _fn_ref(a, b[0], c[0])
+            # _fn_ref(a, b[1], c[1])
+            # and then capture the results in a list
+            errors = []
+            badlands = []
+            component_tolerances = []
+            for component in range(rettype.num_cols * rettype.num_rows):
+                current_args = []
+                for arg in args:
+                    # sanitize args so that all analyze operations can be performed on a list
+                    sanitized_arg = _listify(arg)
+                    current_args.append(sanitized_arg[component % len(sanitized_arg)])
+                cur_errors, cur_badlands, cur_component_tolerances = _analyze_ref_fn(complex_fns[name], current_args)
+                errors.extend(cur_errors)
+                badlands.extend(cur_badlands)
+                component_tolerances.extend(cur_component_tolerances)
+        else:
+            # for non-componentwise functions, all args must be passed in in a single run
+            # sanitize args so that all analyze operations can be performed on a list
+            current_args = map(_listify, args)
+            errors, badlands, component_tolerances = _analyze_ref_fn(complex_fns[name], current_args)
+        # results are in a list, and may have one or more members
+        return -1.0 if True in badlands else map(float, component_tolerances)
+    elif name in mult_fns:
+        x_type = glsl_type_of(args[0])
+        y_type = glsl_type_of(args[1])
+        # if matrix types are involved, the multiplication will be matrix multiplication
+        # and we will have to pass the args through a reference function
+        if x_type.is_vector and y_type.is_matrix:
+            mult_func = _vec_times_mat_ref
+        elif x_type.is_matrix and y_type.is_vector:
+            mult_func = _mat_times_vec_ref
+        elif x_type.is_matrix and y_type.is_matrix:
+            mult_func = _mat_times_mat_ref
+        else:
+            # float*float, float*vec, vec*float or vec*vec are all done as
+            # normal multiplication and share a single tolerance
+            return mult_fns[name] 
+        # sanitize args so that all analyze operations can be performed on a list
+        errors, badlands, component_tolerances = _analyze_ref_fn(mult_func, _listify(args))
+        # results are in a list, and may have one or more members
+        return -1.0 if True in badlands else map(float, component_tolerances)
+    else:
+        # default for any operations without specified tolerances
+        return 0.0
+
def make_indexers(signature):
   """Build a list of strings which index into every possible
   value of the result.  For example, if the result is a vec2,
@@ -116,7 +691,7 @@ def shader_runner_format(values):
        retval = ''
        for x in values:
            assert isinstance(x, (float, np.float32))
-            retval+=' {0:1.8e}'.format(x)
+            retval += ' {0:1.8e}'.format(x)
    else:
        assert isinstance(values, (float, np.float32))
        retval = '{0:1.8e}'.format(values)
@@ -126,14 +701,14 @@ def shader_runner_format(values):

def main():
    """ Main function """
-
+    
    for signature, test_vectors in sorted(six.iteritems(test_suite)):
        arg_float_check = tuple(
                        arg.base_type == glsl_float for arg in signature.argtypes)
        # Filter the test vectors down to only those which deal exclusively in float types
        #and are not trig functions or determinant()
        indexers = make_indexers(signature)
-        num_elements = signature.rettype.num_cols*signature.rettype.num_rows
+        num_elements = signature.rettype.num_cols * signature.rettype.num_rows
        invocation = signature.template.format( *['arg{0}'.format(i) 
                                                for i in range(len(signature.argtypes))])
        if (signature.rettype.base_type == glsl_float and
@@ -155,7 +730,7 @@ def main():
                with open(output_filename, 'w') as f:
                    f.write(template.render_unicode( signature=signature, 
                                                     test_vectors=test_vectors,
-                                                     tolerances=tolerances,
+                                                     tolerances=simple_fns,
                                                     invocation=invocation,
                                                     num_elements=num_elements,
                                                     indexers=indexers,
-- 
2.2.2