[Piglit] [PATCH 2/3] arb_shader_precision: add framework for calculating tolerances for complex functions
Ilia Mirkin
imirkin at alum.mit.edu
Tue Feb 24 16:14:32 PST 2015
High-level -- I'm still confused by the whole array vs not-array
thing, I don't understand what functions receive under various
situations.
On Tue, Feb 24, 2015 at 6:49 PM, Micah Fedke
<micah.fedke at collabora.co.uk> wrote:
> The bigfloat package is a necessary dependency for this filter, as it provides
Among other things, bigfloat is not in the package repo on gentoo.
Having deps like that (which aren't easy to install) increases the
barrier to entry for piglit users. Please investigate whether you can
achieve the same effects without the bigfloat package. e.g. can you
use numpy.float96/float128?
> both arbitrary-size floating point operations and it allows all floating point
> values and operations in a code block to be forced into a particular precision,
> leaving no question as to what precision was employed for an intermediate
> calculation. This comes in handy when running the reference versions of the
> complex built-ins. Performance impact is small (~6s for the entire
> gen_shader_precision_tests.py script on IvyBridge i7 @ 2.9GHz) and is localized
> to build-time.
> ---
> CMakeLists.txt | 1 +
> cmake/Modules/FindPythonBigfloat.cmake | 43 ++++
> generated_tests/gen_shader_precision_tests.py | 357 +++++++++++++++++++++++++-
> 3 files changed, 388 insertions(+), 13 deletions(-)
> create mode 100644 cmake/Modules/FindPythonBigfloat.cmake
>
> diff --git a/CMakeLists.txt b/CMakeLists.txt
> index 1a81eed..b13acdb 100644
> --- a/CMakeLists.txt
> +++ b/CMakeLists.txt
> @@ -193,6 +193,7 @@ find_package(PythonInterp 2.7 REQUIRED)
> find_package(PythonNumpy 1.6.2 REQUIRED)
> find_package(PythonMako 0.7.3 REQUIRED)
> find_package(PythonSix REQUIRED)
> +find_package(PythonBigfloat 3.1.0 REQUIRED)
>
> # Default to compiling with debug information (`gcc -g`):
> if(NOT CMAKE_BUILD_TYPE)
> diff --git a/cmake/Modules/FindPythonBigfloat.cmake b/cmake/Modules/FindPythonBigfloat.cmake
> new file mode 100644
> index 0000000..0b65b55
> --- /dev/null
> +++ b/cmake/Modules/FindPythonBigfloat.cmake
> @@ -0,0 +1,43 @@
> +# Copyright (C) 2015 Intel Corporation
> +#
> +# Permission is hereby granted, free of charge, to any person obtaining
> +# a copy of this software and associated documentation files (the "Software"),
> +# to deal in the Software without restriction, including without limitation
> +# the rights to use, copy, modify, merge, publish, distribute, sublicense,
> +# and/or sell copies of the Software, and to permit persons to whom the
> +# Software is furnished to do so, subject to the following conditions:
> +#
> +# The above copyright notice and this permission notice shall be included
> +# in all copies or substantial portions of the Software.
> +#
> +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
> +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
> +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
> +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
> +# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> +
> +# Find bigfloat
> +execute_process(
> + COMMAND ${PYTHON_EXECUTABLE} -c "import sys, bigfloat; sys.stdout.write(bigfloat.MPFR_VERSION_STRING)"
> + OUTPUT_VARIABLE _version
> + RESULT_VARIABLE _status
> + ERROR_QUIET
> + OUTPUT_STRIP_TRAILING_WHITESPACE
> +)
> +
> +set(PythonBigfloat_VERSION_STRING ${_version})
> +
> +# A status returns 0 if everything is okay. And zero is false. To make
> +# checking in the outer scope less surprising
> +if (_status EQUAL 0)
> + set("PythonBigfloat_STATUS" "success")
> +endif (_status EQUAL 0)
> +
> +include(FindPackageHandleStandardArgs)
> +find_package_handle_standard_args(
> + "PythonBigfloat"
> + REQUIRED_VARS "PythonBigfloat_STATUS"
> + VERSION_VAR "PythonBigfloat_VERSION_STRING"
> +)
> diff --git a/generated_tests/gen_shader_precision_tests.py b/generated_tests/gen_shader_precision_tests.py
> index 6c011a3..3492c87 100644
> --- a/generated_tests/gen_shader_precision_tests.py
> +++ b/generated_tests/gen_shader_precision_tests.py
> @@ -50,31 +50,362 @@ from __future__ import print_function, division, absolute_import
> from builtin_function import *
> import os
> import numpy
> +import struct
> +import bigfloat
>
> import six
> from six.moves import range
>
> from templates import template_file
>
> -tolerances = {'pow': 16.0,
> - 'exp': 3.0,
> - 'exp2': 3.0,
> - 'log': 3.0,
> - 'log2': 3.0,
> - 'sqrt': 3.0,
> - 'inversesqrt': 2.0,
> - 'op-div': 2.5,
> - 'op-assign-div': 2.5,
> - }
> +
> +allowed_error_scale = 4.0
>
> trig_builtins = ('sin', 'cos', 'tan',
> 'asin', 'acos', 'atan',
> 'sinh', 'cosh', 'tanh',
> 'asinh', 'acosh', 'atanh')
>
> +high_precision = bigfloat.precision(113)
> +low_precision = bigfloat.precision(23)
> +
> def _is_sequence(arg):
> return isinstance(arg, (collections.Sequence, numpy.ndarray))
>
> +def _floatToBits(f):
> + s = struct.pack('>f', f)
> + return struct.unpack('>l', s)[0]
> +
> +def _bitsToFloat(b):
> + s = struct.pack('>l', b)
> + return struct.unpack('>f', s)[0]
Should this instead return a bigfloat?
> +
> +def _ulpsize(f):
> + """ determine _ulpsize in the direction of nearest infinity
> + which gives the worst case scenario for edge cases
> + """
> + return bigfloat.next_up(f) - f if f >= 0.0 \
> + else f - bigfloat.next_down(f)
Clever syntax, but a lot less readable than
if f >= 0:
return bigfloat.next_up(f) - f
else:
return f - bigfloat.next_down(f)
> +
> +def _sanitize_arg(arg):
How about
def _listify(arg)
or something else that conveys the nature of what you're doing
> + """ if arg is a scalar, convert it to an array
> + if it's an array, return it as-is
> + """
> + return arg if _is_sequence(arg) else [arg]
> +
> +def _to_bigfloat_list(arg):
> + if type(arg).__name__ == 'BigFloat' or \
> + (_is_sequence(arg) and type(arg[0]).__name__ == 'BigFloat'):
> + return arg
This doesn't necessarily return a list, e.g. if arg is a bigfloat.
Also, this opencoded comparison to 'BigFloat' isn't great. Can you use
isinstance?
> + else:
> + return map(bigfloat.BigFloat, map('{0:1.8e}'.format, arg))
This is a case where doing something like
return list(bigfloat.BigFloat('%.8e' % x) for x in arg)
becomes more readable. Also is .8 enough? Although I'd still go with
map if you can just apply bigfloat.BigFloat without the format step.
[I know Dylan will complain about the % for formatting, but if there's
no way to get format to accept the %e notation that _every_ other
thing in the world uses, then... it stinks. And I just can't bring
myself to care about python3.]
> +
> +def _mod_ref(args):
What is 'args' precisely here? Let's say I have
vec4 a, b;
mod(a, b)
Is args an array of 2 things, which in turn might themselves be arrays
or not? If so, I'd much rather _mod_ref take 2 args explicitly. (It'd
also mean that your _to_bigfloat_list thing won't work properly.)
> + bfargs = _to_bigfloat_list(args)
Can you not fix this up in the caller? Or is the caller that
_synthesize_bla in the other file? [In which case this is fine... you
could be fancy and use a decorator, but you don't have to.]
> + x = bfargs[0]
> + y = bfargs[1]
> + return [x - y * (bigfloat.floor(x / y))]
> +
> +def _smoothstep_ref(args):
> + bfargs = _to_bigfloat_list(args)
> + edge0 = bfargs[0]
> + edge1 = bfargs[1]
> + x = bfargs[2]
> + t = max(min((x - edge0) / (edge1 - edge0), 1.0), 0.0)
Do you need to use bigfloat.min/max? Or do the builtins return the
original values?
> + return [t * t * (3.0 - 2.0 * t)]
> +
> +def _mix_ref(args):
> + bfargs = _to_bigfloat_list(args)
> + x = bfargs[0]
> + y = bfargs[1]
> + a = bfargs[2]
> + return [x * (1.0 - a) + y * a]
> +
> +def _length_ref(args):
> + component_sum = bigfloat.BigFloat(0.0)
> + x = _to_bigfloat_list(args[0])
Everywhere else you do it on args. What's different here?
> + for arg in x:
> + component_sum += bigfloat.pow(arg, 2.0)
You can be clever and do
sum((bigfloat.pow(arg, 2.0) for arg in x), BigFloat(0.0))
> + return [bigfloat.sqrt(component_sum)]
> +
> +def _distance_ref(args):
> + components = len(args[0])
> + difference = []
> + for i in range(components):
> + p0 = _to_bigfloat_list(args[0])[i]
> + p1 = _to_bigfloat_list(args[1])[i]
> + difference.append(p0 - p1)
> + return _length_ref([difference])
> +
> +def _dot_ref(args):
> + components = len(args[0])
> + x = _to_bigfloat_list(args[0])
> + y = _to_bigfloat_list(args[1])
> + product = bigfloat.BigFloat(0.0)
> + for i in range(components):
> + xcomp = x[i]
> + ycomp = y[i]
> + product += xcomp * ycomp
> + return [product]
> +
> +def _cross_ref(args):
> + x0 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[0][0]), precision=23)
> + x1 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[0][1]), precision=23)
> + x2 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[0][2]), precision=23)
> + y0 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[1][0]), precision=23)
> + y1 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[1][1]), precision=23)
> + y2 = bigfloat.BigFloat.exact('{0:1.8e}'.format(args[1][2]), precision=23)
> + ret = [x1 * y2 - y1 * x2, x2 * y0 - y2 * x0, x0 * y1 - y0 * x1]
> + return ret
> +
> +def _normalize_ref(args):
> + bfargs = _to_bigfloat_list(args[0])
> + normalized_components = []
> + comp_len = _length_ref(args)
> + for component in bfargs:
> + normalized_components.append(component / comp_len[0])
> + return normalized_components
> +
> +def _faceforward_ref(args):
> + N = _to_bigfloat_list(args[0])
> + I = _to_bigfloat_list(args[1])
> + Nref = _to_bigfloat_list(args[2])
> + components = len(args[0])
> + if _dot_ref([Nref,I])[0] < 0.0:
> + ret = N
> + else:
> + negN = []
> + for i in range(components):
> + negN.append(N[i] * -1.0)
> + ret = negN
> + return ret
> +
> +def _reflect_ref(args):
> + I = _to_bigfloat_list(args[0])
> + N = _to_bigfloat_list(args[1])
> + dotNI = _dot_ref([N,I])[0]
> + ret = []
> + for Ncomponent, Icomponent in zip(N,I):
> + ret.append(Icomponent - 2.0 * dotNI * Ncomponent)
> + return ret
> +
> +def _refract_ref(args):
> + I = _to_bigfloat_list(args[0])
> + N = _to_bigfloat_list(args[1])
> + eta = _to_bigfloat_list(args[2])[0]
> + ret = []
> + k = 1.0 - eta * eta * (1.0 - _dot_ref([N,I])[0] * _dot_ref([N,I])[0])
> + if k < 0.0:
> + for i in range(len(I)):
> + ret.append(bigfloat.BigFloat(0.0))
> + else:
> + Ntemp = []
> + Itemp = []
> + for Ncomponent, Icomponent in zip(N,I):
> + Ntemp = (eta * _dot_ref([N,I])[0] + bigfloat.sqrt(k)) * Ncomponent
> + Itemp = eta * Icomponent
> + ret.append(Itemp - Ntemp)
> + return ret
> +
> +def _vec_times_mat_ref(args):
> + v = args[0]
> + m = args[1]
> + m_type = glsl_type_of(m)
> + num_cols = m_type.num_cols
> + num_rows = m_type.num_rows
> + components = num_cols
> + ret = []
> + for i in range(components):
> + m_col = []
> + for j in range(num_rows):
> + m_col.append(m[j][i])
> + ret.append(_dot_ref([v,m_col])[0])
> + return ret
> +
> +def _mat_times_vec_ref(args):
> + m = args[0]
> + v = args[1]
> + m_type = glsl_type_of(m)
> + num_rows = m_type.num_rows
> + components = num_rows
> + ret = []
> + for i in range(components):
> + m_col = m[i]
> + ret.append(_dot_ref([v,m_col])[0])
> + return ret
> +
> +def _mat_times_mat_ref(args):
> + mx = args[0]
> + my = args[1]
> + mx_type = glsl_type_of(mx)
> + my_type = glsl_type_of(my)
> + ret = []
> + for i in range(mx_type.num_rows):
> + for j in range(my_type.num_cols):
> + my_col = []
> + for k in range(my_type.num_rows):
> + my_col.append(my[k][j])
> + ret.append(_dot_ref([mx[i],my_col])[0])
> + return ret
> +
> +def _capture_error(precise, imprecise):
> + """Perform the legwork of calculating the difference in error of the high
> + precision and low precision runs. Decide whether this difference in error
> + is within allowable tolerances. The range of allowable tolerances is
> + subjective, as ARB_shader_precision (and GLSL spec as of v4.5) gives no
> + direct guidance for complex functions. Toronto, et. al. use quadrupled
> + error as a limit in "Practically Accurate Floating-Point Math," Computing
> + Now, Oct. 2014. Also use the difference in error and the value of one ulp
> + at the output to calculate the tolerance range in ulps for use by the
> + shader test, should this vector pass the badlands check.
> + """
> +
> + errors = []
> + badlands = []
> + component_tolerances = []
> + with high_precision:
> + error = bigfloat.abs(precise - imprecise)
> + errors.append(error)
> + with low_precision:
> + ulpsz = _ulpsize(imprecise)
> + with high_precision:
> + badlands.append(error > ulpsz * allowed_error_scale)
> + component_tolerances.append(bigfloat.round(error / ulpsz))
> + return errors, badlands, component_tolerances
> +
> +def _analyze_ref_fn(fn, args):
> + """Many functions contain ill-conditioned regions referred to as "badlands"
> + (see Toronto, et. al., "Practically Accurate Floating-Point Math,"
> + Computing Now, Oct. 2014). Within these regions errors in the inputs are
> + magnified significantly, making the function impossible to test with any
> + reasonable accuracy. A complex function that operates on floating point
> + numbers has the potential to generate such error propagation even if the
> + inputs are exact floating point numbers, since intermediate results can be
> + generated with error. In order to identify and avoid these areas, we run
> + the function once at a lower precision and once at a higher precision, and
> + compare the outputs. Propagating errors will be greater at lower precision
> + and less at higher precision for a given set of function inputs, allowing
> + us to identify the badlands of the function.
> + """
> +
> + errors = []
> + badlands = []
> + component_tolerances = []
> + with high_precision:
> + precise = fn(args)
> + with low_precision:
> + imprecise = fn(args)
> + for i, arg in enumerate(imprecise):
> + cur_errors, cur_badlands, cur_component_tolerances = _capture_error(precise[i], arg)
> + errors.extend(cur_errors)
> + badlands.extend(cur_badlands)
> + component_tolerances.extend(cur_component_tolerances)
> + return errors, badlands, component_tolerances
> +
> +
> +# These three dicts (simple_fns, complex_fns and mult_fns) divide the names of
> +# the glsl built-in operations into three mutually exclusive groups. They do
> +# not need to cover all built-ins however, as a default tolerance is assigned
> +# for names not appearing in these dicts.
> +#
> +# simple_fns are accompanied by a single blanket tolerance value used for all
> +# calculations (these come from the EXT).
> +#
> +# complex_fns are accompanied by a reference to a function that can be used to
> +# calculate a list of non-uniform tolerances.
> +#
> +# the two mult_fns (mult and assign-mult) are special in that they can act like
> +# a simple function when multiplying floats or vec members together in a
> +# componentwise manner, or they can act like complex functions when matrices
> +# are involved and they switch to matrix multiplication rules.
> +simple_fns = {'op-div': 2.5,
> + 'op-assign-div': 2.5,
> + 'pow': 16.0,
> + 'exp': 3.0,
> + 'exp2': 3.0,
> + 'log': 3.0,
> + 'log2': 3.0,
> + 'sqrt': 3.0,
> + 'inversesqrt': 2.0}
> +
> +complex_fns = {'mod': _mod_ref,
> + 'smoothstep': _smoothstep_ref,
> + 'mix': _mix_ref,
> + 'length': _length_ref,
> + 'distance': _distance_ref,
> + 'dot': _dot_ref,
> + 'cross': _cross_ref,
> + 'normalize': _normalize_ref,
> + 'faceforward': _faceforward_ref,
> + 'reflect': _reflect_ref,
> + 'refract': _refract_ref}
> +
> +mult_fns = {'op-mult': 0.0,
> + 'op-assign-mult': 0.0}
> +
> +# the complex functions listed here operate in a componentwise manner - the rest do not
> +componentwise = ('mod', 'mix', 'smoothstep' )
> +
> +def _gen_tolerance(name, rettype, args):
> + """Return the tolerance that should be allowed for a function for the
> + test vector passed in. Return -1 for any vectors that would push the
> + tolerance outside of acceptable bounds
> + """
> + if name in simple_fns:
> + return simple_fns[name]
> + elif name in complex_fns:
> + if name in componentwise:
> + # for componentwise functions, call the reference function
> + # for each component (scalar components multiplex here)
> + # eg. for fn(float a, vec2 b, vec2 c) call:
> + # _fn_ref(a, b[0], c[0])
> + # _fn_ref(a, b[1], c[1])
> + # and then capture the results in a list
> + errors = []
> + badlands = []
> + component_tolerances = []
> + for component in range(rettype.num_cols * rettype.num_rows):
> + current_args = []
> + for arg in args:
> + # sanitize args so that all analyze operations can be performed on a list
> + sanitized_arg = _sanitize_arg(arg)
> + current_args.append(sanitized_arg[component % len(sanitized_arg)])
> + cur_errors, cur_badlands, cur_component_tolerances = _analyze_ref_fn(complex_fns[name], current_args)
> + errors.extend(cur_errors)
> + badlands.extend(cur_badlands)
> + component_tolerances.extend(cur_component_tolerances)
Or you could just pass the errors/badlands/component_tolerances in as
arguments that are appended-to by the function.
> + else:
> + # for non-componentwise functions, all args must be passed in in a single run
> + current_args = []
> + for arg in args:
> + # sanitize args so that all analyze operations can be performed on a list
> + current_args.append(_sanitize_arg(arg))
This is a good case for
current_args = map(_sanitize_arg, arg)
Or even a comprehension/generator.
> + errors, badlands, component_tolerances = _analyze_ref_fn(complex_fns[name], current_args)
> + # results are in a list, and may have one or more members
> + return -1.0 if True in badlands else map(float, component_tolerances)
> + elif name in mult_fns:
> + x_type = glsl_type_of(args[0])
> + y_type = glsl_type_of(args[1])
> + # if matrix types are involved, the multiplication will be matrix multiplication
> + # and we will have to pass the args through a reference function
> + if x_type.is_vector and y_type.is_matrix:
> + mult_func = _vec_times_mat_ref
> + elif x_type.is_matrix and y_type.is_vector:
> + mult_func = _mat_times_vec_ref
> + elif x_type.is_matrix and y_type.is_matrix:
> + mult_func = _mat_times_mat_ref
> + else:
> + # float*float, float*vec, vec*float or vec*vec are all done as
> + # normal multiplication and share a single tolerance
> + return mult_fns[name]
> + # sanitize args so that all analyze operations can be performed on a list
> + errors, badlands, component_tolerances = _analyze_ref_fn(mult_func, _sanitize_arg(args))
> + # results are in a list, and may have one or more members
> + return -1.0 if True in badlands else map(float, component_tolerances)
> + else:
> + # default for any operations without specified tolerances
> + return 0.0
> +
> def make_indexers(signature):
> """Build a list of strings which index into every possible
> value of the result. For example, if the result is a vec2,
> @@ -116,7 +447,7 @@ def shader_runner_format(values):
> retval = ''
> for x in values:
> assert isinstance(x, (float, np.float32))
> - retval+=' {0:1.8e}'.format(x)
> + retval += ' {0:1.8e}'.format(x)
> else:
> assert isinstance(values, (float, np.float32))
> retval = '{0:1.8e}'.format(values)
> @@ -133,7 +464,7 @@ def main():
> # Filter the test vectors down to only those which deal exclusively in float types
> #and are not trig functions or determinant()
> indexers = make_indexers(signature)
> - num_elements = signature.rettype.num_cols*signature.rettype.num_rows
> + num_elements = signature.rettype.num_cols * signature.rettype.num_rows
> invocation = signature.template.format( *['arg{0}'.format(i)
> for i in range(len(signature.argtypes))])
> if (signature.rettype.base_type == glsl_float and
> @@ -155,7 +486,7 @@ def main():
> with open(output_filename, 'w') as f:
> f.write(template.render_unicode( signature=signature,
> test_vectors=test_vectors,
> - tolerances=tolerances,
> + tolerances=simple_fns,
> invocation=invocation,
> num_elements=num_elements,
> indexers=indexers,
> --
> 2.2.2
>
> _______________________________________________
> Piglit mailing list
> Piglit at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/piglit
More information about the Piglit
mailing list