[Mesa-dev] [PATCH] util: implement table-based + linear interpolation linear-to-srgb conversion
Zack Rusin
zackr at vmware.com
Mon Aug 5 20:40:22 PDT 2013
Looks good to me. A small comment above the disabled version noting that it's disabled because it's a bit slower might be useful for the next person who reads the code.
Reviewed-by: Zack Rusin <zackr at vmware.com>
----- Original Message -----
> From: Roland Scheidegger <sroland at vmware.com>
>
> Should be much faster, seems to work in softpipe.
> While here (also it's now disabled) fix up the pow factor - the former value
> is what is in GL core it is however not actually accurate to fp32 standard
> (as it is 1.0/2.4), and if someone would do all the accurate math there's no
> reason to waste 8 mantissa bits or so...
>
> v2: use real table generating function instead of just printing the values
> (might take a bit longer as it does calculations on some 3+ million floats
> but much more descriptive obviously).
> Also fix up another pow factor (this time in the python code) - wondering
> where the couple one bit errors came from :-(.
> ---
> src/gallium/auxiliary/util/u_format_srgb.h | 55
> +++++++++++++++++++++-----
> src/gallium/auxiliary/util/u_format_srgb.py | 57
> ++++++++++++++++++++++++++-
> 2 files changed, 101 insertions(+), 11 deletions(-)
>
> diff --git a/src/gallium/auxiliary/util/u_format_srgb.h
> b/src/gallium/auxiliary/util/u_format_srgb.h
> index 82ed957..f3e1b20 100644
> --- a/src/gallium/auxiliary/util/u_format_srgb.h
> +++ b/src/gallium/auxiliary/util/u_format_srgb.h
> @@ -39,6 +39,7 @@
>
>
> #include "pipe/p_compiler.h"
> +#include "u_pack_color.h"
> #include "u_math.h"
>
>
> @@ -51,23 +52,57 @@ util_format_srgb_to_linear_8unorm_table[256];
> extern const uint8_t
> util_format_linear_to_srgb_8unorm_table[256];
>
> +extern const unsigned
> +util_format_linear_to_srgb_helper_table[104];
> +
>
> /**
> * Convert a unclamped linear float to srgb value in the [0,255].
> - * XXX this hasn't been tested (render to srgb surface).
> - * XXX this needs optimization.
> */
> static INLINE uint8_t
> util_format_linear_float_to_srgb_8unorm(float x)
> {
> - if (x >= 1.0f)
> - return 255;
> - else if (x >= 0.0031308f)
> - return float_to_ubyte(1.055f * powf(x, 0.41666f) - 0.055f);
> - else if (x > 0.0f)
> - return float_to_ubyte(12.92f * x);
> - else
> - return 0;
> + if (0) {
> + if (x >= 1.0f)
> + return 255;
> + else if (x >= 0.0031308f)
> + return float_to_ubyte(1.055f * powf(x, 0.41666666f) - 0.055f);
> + else if (x > 0.0f)
> + return float_to_ubyte(12.92f * x);
> + else
> + return 0;
> + }
> + else {
> + /*
> + * This is taken from https://gist.github.com/rygorous/2203834
> + * Use LUT and do linear interpolation.
> + */
> + union fi almostone, minval, f;
> + unsigned tab, bias, scale, t;
> +
> + almostone.ui = 0x3f7fffff;
> + minval.ui = (127-13) << 23;
> +
> + /*
> + * Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1,
> respectively.
> + * The tests are carefully written so that NaNs map to 0, same as in
> the
> + * reference implementation.
> + */
> + if (!(x > minval.f))
> + x = minval.f;
> + if (x > almostone.f)
> + x = almostone.f;
> +
> + /* Do the table lookup and unpack bias, scale */
> + f.f = x;
> + tab = util_format_linear_to_srgb_helper_table[(f.ui - minval.ui) >>
> 20];
> + bias = (tab >> 16) << 9;
> + scale = tab & 0xffff;
> +
> + /* Grab next-highest mantissa bits and perform linear interpolation */
> + t = (f.ui >> 12) & 0xff;
> + return (uint8_t) ((bias + scale*t) >> 16);
> + }
> }
>
>
> diff --git a/src/gallium/auxiliary/util/u_format_srgb.py
> b/src/gallium/auxiliary/util/u_format_srgb.py
> index cd63ae7..c6c02f0 100644
> --- a/src/gallium/auxiliary/util/u_format_srgb.py
> +++ b/src/gallium/auxiliary/util/u_format_srgb.py
> @@ -40,6 +40,7 @@ CopyRight = '''
>
>
> import math
> +import struct
>
>
> def srgb_to_linear(x):
> @@ -51,10 +52,11 @@ def srgb_to_linear(x):
>
> def linear_to_srgb(x):
> if x >= 0.0031308:
> - return 1.055 * math.pow(x, 0.41666) - 0.055
> + return 1.055 * math.pow(x, 0.41666666) - 0.055
> else:
> return 12.92 * x
>
> +
> def generate_srgb_tables():
> print 'const float'
> print 'util_format_srgb_8unorm_to_linear_float_table[256] = {'
> @@ -84,6 +86,59 @@ def generate_srgb_tables():
> print '};'
> print
>
> +# calculate the table interpolation values used in float linear to unorm8
> srgb
> + numexp = 13
> + mantissa_msb = 3
> +# stepshift is just used to only use every x-th float to make things faster,
> +# 5 is largest value which still gives exact same table as 0
> + stepshift = 5
> + nbuckets = numexp << mantissa_msb
> + bucketsize = (1 << (23 - mantissa_msb)) >> stepshift
> + mantshift = 12
> + valtable = []
> + sum_aa = float(bucketsize)
> + sum_ab = 0.0
> + sum_bb = 0.0
> + for i in range(0, bucketsize):
> + j = (i << stepshift) >> mantshift
> + sum_ab += j
> + sum_bb += j*j
> + inv_det = 1.0 / (sum_aa * sum_bb - sum_ab * sum_ab)
> +
> + for bucket in range(0, nbuckets):
> + start = ((127 - numexp) << 23) + bucket*(bucketsize << stepshift)
> + sum_a = 0.0
> + sum_b = 0.0
> +
> + for i in range(0, bucketsize):
> + j = (i << stepshift) >> mantshift
> + fint = start + (i << stepshift)
> + ffloat = struct.unpack('f', struct.pack('I', fint))[0]
> + val = linear_to_srgb(ffloat) * 255.0 + 0.5
> + sum_a += val
> + sum_b += j*val
> +
> + solved_a = inv_det * (sum_bb*sum_a - sum_ab*sum_b)
> + solved_b = inv_det * (sum_aa*sum_b - sum_ab*sum_a)
> +
> + scaled_a = solved_a * 65536.0 / 512.0
> + scaled_b = solved_b * 65536.0
> +
> + int_a = int(scaled_a + 0.5)
> + int_b = int(scaled_b + 0.5)
> +
> + valtable.append((int_a << 16) + int_b)
> +
> + print 'const unsigned'
> + print 'util_format_linear_to_srgb_helper_table[104] = {'
> +
> + for j in range(0, nbuckets, 4):
> + print ' ',
> + for i in range(j, j + 4):
> + print '0x%08x,' % (valtable[i],),
> + print
> + print '};'
> + print
>
> def main():
> print '/* This file is autogenerated by u_format_srgb.py. Do not edit
> directly. */'
> --
> 1.7.9.5
>
More information about the mesa-dev
mailing list