[Piglit] [PATCH 2/3] arb_shader_precision: add framework for calculating tolerances for complex functions

Tue Feb 24 15:49:07 PST 2015

The bigfloat package is a necessary dependency for this filter, as it provides
both arbitrary-size floating point operations and it allows all floating point
values and operations in a code block to be forced into a particular precision,
leaving no question as to what precision was employed for an intermediate
calculation.  This comes in handy when running the reference versions of the
complex built-ins.  Performance impact is small (~6s for the entire
gen_shader_precision_tests.py script on IvyBridge i7 @ 2.9GHz) and is localized
to build-time.
---
 CMakeLists.txt                                |   1 +
 cmake/Modules/FindPythonBigfloat.cmake        |  43 ++++
 generated_tests/gen_shader_precision_tests.py | 357 +++++++++++++++++++++++++-
 3 files changed, 388 insertions(+), 13 deletions(-)
 create mode 100644 cmake/Modules/FindPythonBigfloat.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1a81eed..b13acdb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -193,6 +193,7 @@ find_package(PythonInterp 2.7 REQUIRED)
 find_package(PythonNumpy 1.6.2 REQUIRED)
 find_package(PythonMako 0.7.3 REQUIRED)
 find_package(PythonSix REQUIRED)
+find_package(PythonBigfloat 3.1.0 REQUIRED)
 
 # Default to compiling with debug information (`gcc -g`):
 if(NOT CMAKE_BUILD_TYPE)
diff --git a/cmake/Modules/FindPythonBigfloat.cmake b/cmake/Modules/FindPythonBigfloat.cmake
new file mode 100644
index 0000000..0b65b55
--- /dev/null
+++ b/cmake/Modules/FindPythonBigfloat.cmake
@@ -0,0 +1,43 @@
+# Copyright (C) 2015 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+# Find bigfloat 
+execute_process(
+   COMMAND ${PYTHON_EXECUTABLE} -c "import sys, bigfloat; sys.stdout.write(bigfloat.MPFR_VERSION_STRING)"
+	OUTPUT_VARIABLE _version
+	RESULT_VARIABLE _status
+	ERROR_QUIET
+	OUTPUT_STRIP_TRAILING_WHITESPACE
+)
+
+set(PythonBigfloat_VERSION_STRING ${_version})
+
+# A status returns 0 if everything is okay. And zero is false. To make
+# checking in the outer scope less surprising
+if (_status EQUAL 0)
+   set("PythonBigfloat_STATUS" "success")
+endif (_status EQUAL 0)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(
+	"PythonBigfloat"
+	REQUIRED_VARS "PythonBigfloat_STATUS"
+	VERSION_VAR "PythonBigfloat_VERSION_STRING"
+)
diff --git a/generated_tests/gen_shader_precision_tests.py b/generated_tests/gen_shader_precision_tests.py
index 6c011a3..3492c87 100644
--- a/generated_tests/gen_shader_precision_tests.py
+++ b/generated_tests/gen_shader_precision_tests.py
@@ -50,31 +50,362 @@ from __future__ import print_function, division, absolute_import
 from builtin_function import *
 import os 
 import numpy
+import struct
+import bigfloat
 
 import six
 from six.moves import range
 
 from templates import template_file
 
-tolerances = {'pow': 16.0, 
-              'exp': 3.0,
-              'exp2': 3.0,
-              'log': 3.0,
-              'log2': 3.0,
-              'sqrt': 3.0,
-              'inversesqrt': 2.0,
-              'op-div': 2.5,
-              'op-assign-div': 2.5,
-              }
+
+allowed_error_scale = 4.0
 
 trig_builtins = ('sin', 'cos', 'tan', 
                  'asin', 'acos', 'atan', 
                  'sinh', 'cosh', 'tanh', 
                  'asinh', 'acosh', 'atanh')
 
+high_precision = bigfloat.precision(113)
+low_precision = bigfloat.precision(23)
+
 def _is_sequence(arg):
     return isinstance(arg, (collections.Sequence, numpy.ndarray))
 
+def _floatToBits(f):
+    s = struct.pack('>f', f)
+    return struct.unpack('>l', s)[0]
+
+def _bitsToFloat(b):
+    s = struct.pack('>l', b)
+    return struct.unpack('>f', s)[0]
+
+def _ulpsize(f):
+    """ determine _ulpsize in the direction of nearest infinity
+        which gives the worst case scenario for edge cases
+    """
+    return bigfloat.next_up(f) - f if f >= 0.0 \
+            else f - bigfloat.next_down(f)
+
+def _sanitize_arg(arg):
+    """ if arg is a scalar, convert it to an array
+        if it's an array, return it as-is
+    """
+    return arg if _is_sequence(arg) else [arg]
+
+def _to_bigfloat_list(arg):
+    if type(arg).__name__ == 'BigFloat' or \
+       (_is_sequence(arg) and type(arg[0]).__name__ == 'BigFloat'):
+        return arg
+    else:
+        return map(bigfloat.BigFloat, map('{0:1.8e}'.format, arg))
+
+def _mod_ref(args):
+    bfargs = _to_bigfloat_list(args)
+    x = bfargs[0]
+    y = bfargs[1]
+    return [x - y * (bigfloat.floor(x / y))]
+
+def _smoothstep_ref(args):
+    bfargs = _to_bigfloat_list(args)
+    edge0 = bfargs[0]
+    edge1 = bfargs[1]
+    x = bfargs[2]
+    t = max(min((x - edge0) / (edge1 - edge0), 1.0), 0.0)
+    return [t * t * (3.0 - 2.0 * t)]
+
+def _mix_ref(args):
+    bfargs = _to_bigfloat_list(args)
+    x = bfargs[0]
+    y = bfargs[1]
+    a = bfargs[2]
+    return [x * (1.0 - a) + y * a]
+
+def _length_ref(args):
+    component_sum = bigfloat.BigFloat(0.0)
+    x = _to_bigfloat_list(args[0])
+    for arg in x:
+        component_sum += bigfloat.pow(arg, 2.0)
+    return [bigfloat.sqrt(component_sum)]
+
+def _distance_ref(args):
+    components = len(args[0])
+    difference = []
+    for i in range(components):
+        p0 = _to_bigfloat_list(args[0])[i]
+        p1 = _to_bigfloat_list(args[1])[i]
+        difference.append(p0 - p1)
+    return _length_ref([difference])
+
+def _dot_ref(args):
+    components = len(args[0])
+    x = _to_bigfloat_list(args[0])
+    y = _to_bigfloat_list(args[1])
+    product = bigfloat.BigFloat(0.0)
+    for i in range(components):
+        xcomp = x[i]
+        ycomp = y[i]
+        product += xcomp * ycomp
+    return [product]
+
+def _cross_ref(args):
+    x0 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[0][0]), precision=23)
+    x1 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[0][1]), precision=23)
+    x2 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[0][2]), precision=23)
+    y0 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[1][0]), precision=23)
+    y1 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[1][1]), precision=23)
+    y2 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[1][2]), precision=23)
+    ret = [x1 * y2 - y1 * x2, x2 * y0 - y2 * x0, x0 * y1 - y0 * x1]
+    return ret
+
+def _normalize_ref(args):
+    bfargs = _to_bigfloat_list(args[0])
+    normalized_components = []
+    comp_len = _length_ref(args)
+    for component in bfargs:
+        normalized_components.append(component / comp_len[0])
+    return normalized_components
+
+def _faceforward_ref(args):
+    N = _to_bigfloat_list(args[0])
+    I = _to_bigfloat_list(args[1])
+    Nref = _to_bigfloat_list(args[2])
+    components = len(args[0])
+    if _dot_ref([Nref,I])[0] < 0.0:
+        ret = N
+    else:
+        negN = []
+        for i in range(components):
+            negN.append(N[i] * -1.0)
+        ret = negN
+    return ret
+
+def _reflect_ref(args):
+    I = _to_bigfloat_list(args[0])
+    N = _to_bigfloat_list(args[1])
+    dotNI = _dot_ref([N,I])[0]
+    ret = []
+    for Ncomponent, Icomponent in zip(N,I):
+        ret.append(Icomponent - 2.0 * dotNI * Ncomponent)
+    return ret
+
+def _refract_ref(args):
+    I = _to_bigfloat_list(args[0])
+    N = _to_bigfloat_list(args[1])
+    eta = _to_bigfloat_list(args[2])[0]
+    ret = []
+    k = 1.0 - eta * eta * (1.0 - _dot_ref([N,I])[0] * _dot_ref([N,I])[0])
+    if k < 0.0:
+        for i in range(len(I)):
+            ret.append(bigfloat.BigFloat(0.0))
+    else:
+        Ntemp = []
+        Itemp = []
+        for Ncomponent, Icomponent in zip(N,I):
+            Ntemp = (eta * _dot_ref([N,I])[0] + bigfloat.sqrt(k)) * Ncomponent
+            Itemp = eta * Icomponent
+            ret.append(Itemp - Ntemp)
+    return ret
+
+def _vec_times_mat_ref(args):
+    v = args[0]
+    m = args[1]
+    m_type = glsl_type_of(m)
+    num_cols = m_type.num_cols
+    num_rows = m_type.num_rows
+    components = num_cols
+    ret = []
+    for i in range(components):
+        m_col = []
+        for j in range(num_rows):
+            m_col.append(m[j][i])
+        ret.append(_dot_ref([v,m_col])[0])
+    return ret
+
+def _mat_times_vec_ref(args):
+    m = args[0]
+    v = args[1]
+    m_type = glsl_type_of(m)
+    num_rows = m_type.num_rows
+    components = num_rows
+    ret = []
+    for i in range(components):
+        m_col = m[i]
+        ret.append(_dot_ref([v,m_col])[0])
+    return ret
+
+def _mat_times_mat_ref(args):
+    mx = args[0]
+    my = args[1]
+    mx_type = glsl_type_of(mx)
+    my_type = glsl_type_of(my)
+    ret = []
+    for i in range(mx_type.num_rows):
+        for j in range(my_type.num_cols):
+            my_col = []
+            for k in range(my_type.num_rows):
+                my_col.append(my[k][j])
+            ret.append(_dot_ref([mx[i],my_col])[0])
+    return ret
+
+def _capture_error(precise, imprecise):
+    """Perform the legwork of calculating the difference in error of the high
+    precision and low precision runs.  Decide whether this difference in error
+    is within allowable tolerances.  The range of allowable tolerances is
+    subjective, as ARB_shader_precision (and GLSL spec as of v4.5) gives no
+    direct guidance for complex functions.  Toronto, et.  al. use quadrupled
+    error as a limit in "Practically Accurate Floating-Point Math," Computing
+    Now, Oct. 2014.  Also use the difference in error and the value of one ulp
+    at the output to calculate the tolerance range in ulps for use by the
+    shader test, should this vector pass the badlands check.
+    """
+
+    errors = []
+    badlands = []
+    component_tolerances = []
+    with high_precision:
+        error = bigfloat.abs(precise - imprecise)
+    errors.append(error)
+    with low_precision:
+        ulpsz = _ulpsize(imprecise)
+    with high_precision:
+        badlands.append(error > ulpsz * allowed_error_scale)
+        component_tolerances.append(bigfloat.round(error / ulpsz))
+    return errors, badlands, component_tolerances
+
+def _analyze_ref_fn(fn, args):
+    """Many functions contain ill-conditioned regions referred to as "badlands"
+    (see Toronto, et. al., "Practically Accurate Floating-Point Math,"
+    Computing Now, Oct. 2014).  Within these regions errors in the inputs are
+    magnified significantly, making the function impossible to test with any
+    reasonable accuracy.  A complex function that operates on floating point
+    numbers has the potential to generate such error propagation even if the
+    inputs are exact floating point numbers, since intermediate results can be
+    generated with error.  In order to identify and avoid these areas, we run
+    the function once at a lower precision and once at a higher precision, and
+    compare the outputs.  Propagating errors will be greater at lower precision
+    and less at higher precision for a given set of function inputs, allowing
+    us to identify the badlands of the function.
+    """
+
+    errors = []
+    badlands = []
+    component_tolerances = []
+    with high_precision:
+        precise = fn(args)
+    with low_precision:
+        imprecise = fn(args)
+    for i, arg in enumerate(imprecise):
+        cur_errors, cur_badlands, cur_component_tolerances = _capture_error(precise[i], arg)
+        errors.extend(cur_errors)
+        badlands.extend(cur_badlands)
+        component_tolerances.extend(cur_component_tolerances)
+    return errors, badlands, component_tolerances
+
+
+# These three dicts (simple_fns, complex_fns and mult_fns) divide the names of
+# the glsl built-in operations into three mutually exclusive groups.  They do
+# not need to cover all built-ins however, as a default tolerance is assigned
+# for names not appearing in these dicts.
+#
+# simple_fns are accompanied by a single blanket tolerance value used for all
+# calculations (these come from the EXT).
+#
+# complex_fns are accompanied by a reference to a function that can be used to
+# calculate a list of non-uniform tolerances.
+#
+# the two mult_fns (mult and assign-mult) are special in that they can act like
+# a simple function when multiplying floats or vec members together in a
+# componentwise manner, or they can act like complex functions when matrices
+# are involved and they switch to matrix multiplication rules.
+simple_fns = {'op-div': 2.5,
+              'op-assign-div': 2.5,
+              'pow': 16.0, 
+              'exp': 3.0,
+              'exp2': 3.0,
+              'log': 3.0,
+              'log2': 3.0,
+              'sqrt': 3.0,
+              'inversesqrt': 2.0}
+ 
+complex_fns = {'mod': _mod_ref,
+             'smoothstep': _smoothstep_ref,
+             'mix': _mix_ref,
+             'length': _length_ref,
+             'distance': _distance_ref,
+             'dot': _dot_ref,
+             'cross': _cross_ref,
+             'normalize': _normalize_ref,
+             'faceforward': _faceforward_ref,
+             'reflect': _reflect_ref,
+             'refract': _refract_ref}
+
+mult_fns = {'op-mult': 0.0,
+            'op-assign-mult': 0.0}
+
+# the complex functions listed here operate in a componentwise manner - the rest do not
+componentwise = ('mod', 'mix', 'smoothstep' )
+
+def _gen_tolerance(name, rettype, args):
+    """Return the tolerance that should be allowed for a function for the
+    test vector passed in.  Return -1 for any vectors that would push the
+    tolerance outside of acceptable bounds 
+    """
+    if name in simple_fns:
+        return simple_fns[name] 
+    elif name in complex_fns:
+        if name in componentwise:
+            # for componentwise functions, call the reference function
+            # for each component (scalar components multiplex here)
+            # eg. for fn(float a, vec2 b, vec2 c) call:
+            # _fn_ref(a, b[0], c[0])
+            # _fn_ref(a, b[1], c[1])
+            # and then capture the results in a list
+            errors = []
+            badlands = []
+            component_tolerances = []
+            for component in range(rettype.num_cols * rettype.num_rows):
+                current_args = []
+                for arg in args:
+                    # sanitize args so that all analyze operations can be performed on a list
+                    sanitized_arg = _sanitize_arg(arg)
+                    current_args.append(sanitized_arg[component % len(sanitized_arg)])
+                cur_errors, cur_badlands, cur_component_tolerances = _analyze_ref_fn(complex_fns[name], current_args)
+                errors.extend(cur_errors)
+                badlands.extend(cur_badlands)
+                component_tolerances.extend(cur_component_tolerances)
+        else:
+            # for non-componentwise functions, all args must be passed in in a single run
+            current_args = []
+            for arg in args:
+                # sanitize args so that all analyze operations can be performed on a list
+                current_args.append(_sanitize_arg(arg))
+            errors, badlands, component_tolerances = _analyze_ref_fn(complex_fns[name], current_args)
+        # results are in a list, and may have one or more members
+        return -1.0 if True in badlands else map(float, component_tolerances)
+    elif name in mult_fns:
+        x_type = glsl_type_of(args[0])
+        y_type = glsl_type_of(args[1])
+        # if matrix types are involved, the multiplication will be matrix multiplication
+        # and we will have to pass the args through a reference function
+        if x_type.is_vector and y_type.is_matrix:
+            mult_func = _vec_times_mat_ref
+        elif x_type.is_matrix and y_type.is_vector:
+            mult_func = _mat_times_vec_ref
+        elif x_type.is_matrix and y_type.is_matrix:
+            mult_func = _mat_times_mat_ref
+        else:
+            # float*float, float*vec, vec*float or vec*vec are all done as
+            # normal multiplication and share a single tolerance
+            return mult_fns[name] 
+        # sanitize args so that all analyze operations can be performed on a list
+        errors, badlands, component_tolerances = _analyze_ref_fn(mult_func, _sanitize_arg(args))
+        # results are in a list, and may have one or more members
+        return -1.0 if True in badlands else map(float, component_tolerances)
+    else:
+        # default for any operations without specified tolerances
+        return 0.0
+
 def make_indexers(signature):
    """Build a list of strings which index into every possible
    value of the result.  For example, if the result is a vec2,
@@ -116,7 +447,7 @@ def shader_runner_format(values):
         retval = ''
         for x in values:
             assert isinstance(x, (float, np.float32))
-            retval+=' {0:1.8e}'.format(x)
+            retval += ' {0:1.8e}'.format(x)
     else:
         assert isinstance(values, (float, np.float32))
         retval = '{0:1.8e}'.format(values)
@@ -133,7 +464,7 @@ def main():
         # Filter the test vectors down to only those which deal exclusively in float types
         #and are not trig functions or determinant()
         indexers = make_indexers(signature)
-        num_elements = signature.rettype.num_cols*signature.rettype.num_rows
+        num_elements = signature.rettype.num_cols * signature.rettype.num_rows
         invocation = signature.template.format( *['arg{0}'.format(i) 
                                                 for i in range(len(signature.argtypes))])
         if (signature.rettype.base_type == glsl_float and
@@ -155,7 +486,7 @@ def main():
                 with open(output_filename, 'w') as f:
                     f.write(template.render_unicode( signature=signature, 
                                                      test_vectors=test_vectors,
-                                                     tolerances=tolerances,
+                                                     tolerances=simple_fns,
                                                      invocation=invocation,
                                                      num_elements=num_elements,
                                                      indexers=indexers,
-- 
2.2.2