[Piglit] [PATCH 2/3] arb_shader_precision: add framework for calculating tolerances for complex functions
Micah Fedke
micah.fedke at collabora.co.uk
Tue Feb 24 15:49:07 PST 2015
The bigfloat package is a necessary dependency for this filter, as it provides
both arbitrary-size floating point operations and it allows all floating point
values and operations in a code block to be forced into a particular precision,
leaving no question as to what precision was employed for an intermediate
calculation. This comes in handy when running the reference versions of the
complex built-ins. Performance impact is small (~6s for the entire
gen_shader_precision_tests.py script on IvyBridge i7 @ 2.9GHz) and is localized
to build-time.
---
CMakeLists.txt | 1 +
cmake/Modules/FindPythonBigfloat.cmake | 43 ++++
generated_tests/gen_shader_precision_tests.py | 357 +++++++++++++++++++++++++-
3 files changed, 388 insertions(+), 13 deletions(-)
create mode 100644 cmake/Modules/FindPythonBigfloat.cmake
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1a81eed..b13acdb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -193,6 +193,7 @@ find_package(PythonInterp 2.7 REQUIRED)
find_package(PythonNumpy 1.6.2 REQUIRED)
find_package(PythonMako 0.7.3 REQUIRED)
find_package(PythonSix REQUIRED)
+find_package(PythonBigfloat 3.1.0 REQUIRED)
# Default to compiling with debug information (`gcc -g`):
if(NOT CMAKE_BUILD_TYPE)
diff --git a/cmake/Modules/FindPythonBigfloat.cmake b/cmake/Modules/FindPythonBigfloat.cmake
new file mode 100644
index 0000000..0b65b55
--- /dev/null
+++ b/cmake/Modules/FindPythonBigfloat.cmake
@@ -0,0 +1,43 @@
+# Copyright (C) 2015 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+# Find bigfloat
+execute_process(
+ COMMAND ${PYTHON_EXECUTABLE} -c "import sys, bigfloat; sys.stdout.write(bigfloat.MPFR_VERSION_STRING)"
+ OUTPUT_VARIABLE _version
+ RESULT_VARIABLE _status
+ ERROR_QUIET
+ OUTPUT_STRIP_TRAILING_WHITESPACE
+)
+
+set(PythonBigfloat_VERSION_STRING ${_version})
+
+# A status returns 0 if everything is okay. And zero is false. To make
+# checking in the outer scope less surprising
+if (_status EQUAL 0)
+ set("PythonBigfloat_STATUS" "success")
+endif (_status EQUAL 0)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(
+ "PythonBigfloat"
+ REQUIRED_VARS "PythonBigfloat_STATUS"
+ VERSION_VAR "PythonBigfloat_VERSION_STRING"
+)
diff --git a/generated_tests/gen_shader_precision_tests.py b/generated_tests/gen_shader_precision_tests.py
index 6c011a3..3492c87 100644
--- a/generated_tests/gen_shader_precision_tests.py
+++ b/generated_tests/gen_shader_precision_tests.py
@@ -50,31 +50,362 @@ from __future__ import print_function, division, absolute_import
from builtin_function import *
import os
import numpy
+import struct
+import bigfloat
import six
from six.moves import range
from templates import template_file
-tolerances = {'pow': 16.0,
- 'exp': 3.0,
- 'exp2': 3.0,
- 'log': 3.0,
- 'log2': 3.0,
- 'sqrt': 3.0,
- 'inversesqrt': 2.0,
- 'op-div': 2.5,
- 'op-assign-div': 2.5,
- }
+
+allowed_error_scale = 4.0
trig_builtins = ('sin', 'cos', 'tan',
'asin', 'acos', 'atan',
'sinh', 'cosh', 'tanh',
'asinh', 'acosh', 'atanh')
+high_precision = bigfloat.precision(113)
+low_precision = bigfloat.precision(23)
+
def _is_sequence(arg):
return isinstance(arg, (collections.Sequence, numpy.ndarray))
+def _floatToBits(f):
+ s = struct.pack('>f', f)
+ return struct.unpack('>l', s)[0]
+
+def _bitsToFloat(b):
+ s = struct.pack('>l', b)
+ return struct.unpack('>f', s)[0]
+
+def _ulpsize(f):
+ """ determine _ulpsize in the direction of nearest infinity
+ which gives the worst case scenario for edge cases
+ """
+ return bigfloat.next_up(f) - f if f >= 0.0 \
+ else f - bigfloat.next_down(f)
+
+def _sanitize_arg(arg):
+ """ if arg is a scalar, convert it to an array
+ if it's an array, return it as-is
+ """
+ return arg if _is_sequence(arg) else [arg]
+
+def _to_bigfloat_list(arg):
+ if type(arg).__name__ == 'BigFloat' or \
+ (_is_sequence(arg) and type(arg[0]).__name__ == 'BigFloat'):
+ return arg
+ else:
+ return map(bigfloat.BigFloat, map('{0:1.8e}'.format, arg))
+
+def _mod_ref(args):
+ bfargs = _to_bigfloat_list(args)
+ x = bfargs[0]
+ y = bfargs[1]
+ return [x - y * (bigfloat.floor(x / y))]
+
+def _smoothstep_ref(args):
+ bfargs = _to_bigfloat_list(args)
+ edge0 = bfargs[0]
+ edge1 = bfargs[1]
+ x = bfargs[2]
+ t = max(min((x - edge0) / (edge1 - edge0), 1.0), 0.0)
+ return [t * t * (3.0 - 2.0 * t)]
+
+def _mix_ref(args):
+ bfargs = _to_bigfloat_list(args)
+ x = bfargs[0]
+ y = bfargs[1]
+ a = bfargs[2]
+ return [x * (1.0 - a) + y * a]
+
+def _length_ref(args):
+ component_sum = bigfloat.BigFloat(0.0)
+ x = _to_bigfloat_list(args[0])
+ for arg in x:
+ component_sum += bigfloat.pow(arg, 2.0)
+ return [bigfloat.sqrt(component_sum)]
+
+def _distance_ref(args):
+ components = len(args[0])
+ difference = []
+ for i in range(components):
+ p0 = _to_bigfloat_list(args[0])[i]
+ p1 = _to_bigfloat_list(args[1])[i]
+ difference.append(p0 - p1)
+ return _length_ref([difference])
+
+def _dot_ref(args):
+ components = len(args[0])
+ x = _to_bigfloat_list(args[0])
+ y = _to_bigfloat_list(args[1])
+ product = bigfloat.BigFloat(0.0)
+ for i in range(components):
+ xcomp = x[i]
+ ycomp = y[i]
+ product += xcomp * ycomp
+ return [product]
+
+def _cross_ref(args):
+ x0 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[0][0]), precision=23)
+ x1 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[0][1]), precision=23)
+ x2 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[0][2]), precision=23)
+ y0 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[1][0]), precision=23)
+ y1 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[1][1]), precision=23)
+ y2 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[1][2]), precision=23)
+ ret = [x1 * y2 - y1 * x2, x2 * y0 - y2 * x0, x0 * y1 - y0 * x1]
+ return ret
+
+def _normalize_ref(args):
+ bfargs = _to_bigfloat_list(args[0])
+ normalized_components = []
+ comp_len = _length_ref(args)
+ for component in bfargs:
+ normalized_components.append(component / comp_len[0])
+ return normalized_components
+
+def _faceforward_ref(args):
+ N = _to_bigfloat_list(args[0])
+ I = _to_bigfloat_list(args[1])
+ Nref = _to_bigfloat_list(args[2])
+ components = len(args[0])
+ if _dot_ref([Nref,I])[0] < 0.0:
+ ret = N
+ else:
+ negN = []
+ for i in range(components):
+ negN.append(N[i] * -1.0)
+ ret = negN
+ return ret
+
+def _reflect_ref(args):
+ I = _to_bigfloat_list(args[0])
+ N = _to_bigfloat_list(args[1])
+ dotNI = _dot_ref([N,I])[0]
+ ret = []
+ for Ncomponent, Icomponent in zip(N,I):
+ ret.append(Icomponent - 2.0 * dotNI * Ncomponent)
+ return ret
+
+def _refract_ref(args):
+ I = _to_bigfloat_list(args[0])
+ N = _to_bigfloat_list(args[1])
+ eta = _to_bigfloat_list(args[2])[0]
+ ret = []
+ k = 1.0 - eta * eta * (1.0 - _dot_ref([N,I])[0] * _dot_ref([N,I])[0])
+ if k < 0.0:
+ for i in range(len(I)):
+ ret.append(bigfloat.BigFloat(0.0))
+ else:
+ Ntemp = []
+ Itemp = []
+ for Ncomponent, Icomponent in zip(N,I):
+ Ntemp = (eta * _dot_ref([N,I])[0] + bigfloat.sqrt(k)) * Ncomponent
+ Itemp = eta * Icomponent
+ ret.append(Itemp - Ntemp)
+ return ret
+
+def _vec_times_mat_ref(args):
+ v = args[0]
+ m = args[1]
+ m_type = glsl_type_of(m)
+ num_cols = m_type.num_cols
+ num_rows = m_type.num_rows
+ components = num_cols
+ ret = []
+ for i in range(components):
+ m_col = []
+ for j in range(num_rows):
+ m_col.append(m[j][i])
+ ret.append(_dot_ref([v,m_col])[0])
+ return ret
+
+def _mat_times_vec_ref(args):
+ m = args[0]
+ v = args[1]
+ m_type = glsl_type_of(m)
+ num_rows = m_type.num_rows
+ components = num_rows
+ ret = []
+ for i in range(components):
+ m_col = m[i]
+ ret.append(_dot_ref([v,m_col])[0])
+ return ret
+
+def _mat_times_mat_ref(args):
+ mx = args[0]
+ my = args[1]
+ mx_type = glsl_type_of(mx)
+ my_type = glsl_type_of(my)
+ ret = []
+ for i in range(mx_type.num_rows):
+ for j in range(my_type.num_cols):
+ my_col = []
+ for k in range(my_type.num_rows):
+ my_col.append(my[k][j])
+ ret.append(_dot_ref([mx[i],my_col])[0])
+ return ret
+
+def _capture_error(precise, imprecise):
+ """Perform the legwork of calculating the difference in error of the high
+ precision and low precision runs. Decide whether this difference in error
+ is within allowable tolerances. The range of allowable tolerances is
+ subjective, as ARB_shader_precision (and GLSL spec as of v4.5) gives no
+ direct guidance for complex functions. Toronto, et. al. use quadrupled
+ error as a limit in "Practically Accurate Floating-Point Math," Computing
+ Now, Oct. 2014. Also use the difference in error and the value of one ulp
+ at the output to calculate the tolerance range in ulps for use by the
+ shader test, should this vector pass the badlands check.
+ """
+
+ errors = []
+ badlands = []
+ component_tolerances = []
+ with high_precision:
+ error = bigfloat.abs(precise - imprecise)
+ errors.append(error)
+ with low_precision:
+ ulpsz = _ulpsize(imprecise)
+ with high_precision:
+ badlands.append(error > ulpsz * allowed_error_scale)
+ component_tolerances.append(bigfloat.round(error / ulpsz))
+ return errors, badlands, component_tolerances
+
+def _analyze_ref_fn(fn, args):
+ """Many functions contain ill-conditioned regions referred to as "badlands"
+ (see Toronto, et. al., "Practically Accurate Floating-Point Math,"
+ Computing Now, Oct. 2014). Within these regions errors in the inputs are
+ magnified significantly, making the function impossible to test with any
+ reasonable accuracy. A complex function that operates on floating point
+ numbers has the potential to generate such error propagation even if the
+ inputs are exact floating point numbers, since intermediate results can be
+ generated with error. In order to identify and avoid these areas, we run
+ the function once at a lower precision and once at a higher precision, and
+ compare the outputs. Propagating errors will be greater at lower precision
+ and less at higher precision for a given set of function inputs, allowing
+ us to identify the badlands of the function.
+ """
+
+ errors = []
+ badlands = []
+ component_tolerances = []
+ with high_precision:
+ precise = fn(args)
+ with low_precision:
+ imprecise = fn(args)
+ for i, arg in enumerate(imprecise):
+ cur_errors, cur_badlands, cur_component_tolerances = _capture_error(precise[i], arg)
+ errors.extend(cur_errors)
+ badlands.extend(cur_badlands)
+ component_tolerances.extend(cur_component_tolerances)
+ return errors, badlands, component_tolerances
+
+
+# These three dicts (simple_fns, complex_fns and mult_fns) divide the names of
+# the glsl built-in operations into three mutually exclusive groups. They do
+# not need to cover all built-ins however, as a default tolerance is assigned
+# for names not appearing in these dicts.
+#
+# simple_fns are accompanied by a single blanket tolerance value used for all
+# calculations (these come from the EXT).
+#
+# complex_fns are accompanied by a reference to a function that can be used to
+# calculate a list of non-uniform tolerances.
+#
+# the two mult_fns (mult and assign-mult) are special in that they can act like
+# a simple function when multiplying floats or vec members together in a
+# componentwise manner, or they can act like complex functions when matrices
+# are involved and they switch to matrix multiplication rules.
+simple_fns = {'op-div': 2.5,
+ 'op-assign-div': 2.5,
+ 'pow': 16.0,
+ 'exp': 3.0,
+ 'exp2': 3.0,
+ 'log': 3.0,
+ 'log2': 3.0,
+ 'sqrt': 3.0,
+ 'inversesqrt': 2.0}
+
+complex_fns = {'mod': _mod_ref,
+ 'smoothstep': _smoothstep_ref,
+ 'mix': _mix_ref,
+ 'length': _length_ref,
+ 'distance': _distance_ref,
+ 'dot': _dot_ref,
+ 'cross': _cross_ref,
+ 'normalize': _normalize_ref,
+ 'faceforward': _faceforward_ref,
+ 'reflect': _reflect_ref,
+ 'refract': _refract_ref}
+
+mult_fns = {'op-mult': 0.0,
+ 'op-assign-mult': 0.0}
+
+# the complex functions listed here operate in a componentwise manner - the rest do not
+componentwise = ('mod', 'mix', 'smoothstep' )
+
+def _gen_tolerance(name, rettype, args):
+ """Return the tolerance that should be allowed for a function for the
+ test vector passed in. Return -1 for any vectors that would push the
+ tolerance outside of acceptable bounds
+ """
+ if name in simple_fns:
+ return simple_fns[name]
+ elif name in complex_fns:
+ if name in componentwise:
+ # for componentwise functions, call the reference function
+ # for each component (scalar components multiplex here)
+ # eg. for fn(float a, vec2 b, vec2 c) call:
+ # _fn_ref(a, b[0], c[0])
+ # _fn_ref(a, b[1], c[1])
+ # and then capture the results in a list
+ errors = []
+ badlands = []
+ component_tolerances = []
+ for component in range(rettype.num_cols * rettype.num_rows):
+ current_args = []
+ for arg in args:
+ # sanitize args so that all analyze operations can be performed on a list
+ sanitized_arg = _sanitize_arg(arg)
+ current_args.append(sanitized_arg[component % len(sanitized_arg)])
+ cur_errors, cur_badlands, cur_component_tolerances = _analyze_ref_fn(complex_fns[name], current_args)
+ errors.extend(cur_errors)
+ badlands.extend(cur_badlands)
+ component_tolerances.extend(cur_component_tolerances)
+ else:
+ # for non-componentwise functions, all args must be passed in in a single run
+ current_args = []
+ for arg in args:
+ # sanitize args so that all analyze operations can be performed on a list
+ current_args.append(_sanitize_arg(arg))
+ errors, badlands, component_tolerances = _analyze_ref_fn(complex_fns[name], current_args)
+ # results are in a list, and may have one or more members
+ return -1.0 if True in badlands else map(float, component_tolerances)
+ elif name in mult_fns:
+ x_type = glsl_type_of(args[0])
+ y_type = glsl_type_of(args[1])
+ # if matrix types are involved, the multiplication will be matrix multiplication
+ # and we will have to pass the args through a reference function
+ if x_type.is_vector and y_type.is_matrix:
+ mult_func = _vec_times_mat_ref
+ elif x_type.is_matrix and y_type.is_vector:
+ mult_func = _mat_times_vec_ref
+ elif x_type.is_matrix and y_type.is_matrix:
+ mult_func = _mat_times_mat_ref
+ else:
+ # float*float, float*vec, vec*float or vec*vec are all done as
+ # normal multiplication and share a single tolerance
+ return mult_fns[name]
+ # sanitize args so that all analyze operations can be performed on a list
+ errors, badlands, component_tolerances = _analyze_ref_fn(mult_func, _sanitize_arg(args))
+ # results are in a list, and may have one or more members
+ return -1.0 if True in badlands else map(float, component_tolerances)
+ else:
+ # default for any operations without specified tolerances
+ return 0.0
+
def make_indexers(signature):
"""Build a list of strings which index into every possible
value of the result. For example, if the result is a vec2,
@@ -116,7 +447,7 @@ def shader_runner_format(values):
retval = ''
for x in values:
assert isinstance(x, (float, np.float32))
- retval+=' {0:1.8e}'.format(x)
+ retval += ' {0:1.8e}'.format(x)
else:
assert isinstance(values, (float, np.float32))
retval = '{0:1.8e}'.format(values)
@@ -133,7 +464,7 @@ def main():
# Filter the test vectors down to only those which deal exclusively in float types
#and are not trig functions or determinant()
indexers = make_indexers(signature)
- num_elements = signature.rettype.num_cols*signature.rettype.num_rows
+ num_elements = signature.rettype.num_cols * signature.rettype.num_rows
invocation = signature.template.format( *['arg{0}'.format(i)
for i in range(len(signature.argtypes))])
if (signature.rettype.base_type == glsl_float and
@@ -155,7 +486,7 @@ def main():
with open(output_filename, 'w') as f:
f.write(template.render_unicode( signature=signature,
test_vectors=test_vectors,
- tolerances=tolerances,
+ tolerances=simple_fns,
invocation=invocation,
num_elements=num_elements,
indexers=indexers,
--
2.2.2
More information about the Piglit
mailing list