[Mesa-dev] [PATCH] util: implement table-based + linear interpolation linear-to-srgb conversion

Mon Aug 5 20:40:22 PDT 2013

Looks good to me. A small comment above the disabled version noting that it's disabled because it's a bit slower might be useful for the next person who reads the code.

Reviewed-by: Zack Rusin <zackr at vmware.com>

----- Original Message -----
> From: Roland Scheidegger <sroland at vmware.com>
> 
> Should be much faster, seems to work in softpipe.
> While here (also it's now disabled) fix up the pow factor - the former value
> is what is in GL core it is however not actually accurate to fp32 standard
> (as it is 1.0/2.4), and if someone would do all the accurate math there's no
> reason to waste 8 mantissa bits or so...
> 
> v2: use real table generating function instead of just printing the values
> (might take a bit longer as it does calculations on some 3+ million floats
> but much more descriptive obviously).
> Also fix up another pow factor (this time in the python code) - wondering
> where the couple one bit errors came from :-(.
> ---
>  src/gallium/auxiliary/util/u_format_srgb.h  |   55
>  +++++++++++++++++++++-----
>  src/gallium/auxiliary/util/u_format_srgb.py |   57
>  ++++++++++++++++++++++++++-
>  2 files changed, 101 insertions(+), 11 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/util/u_format_srgb.h
> b/src/gallium/auxiliary/util/u_format_srgb.h
> index 82ed957..f3e1b20 100644
> --- a/src/gallium/auxiliary/util/u_format_srgb.h
> +++ b/src/gallium/auxiliary/util/u_format_srgb.h
> @@ -39,6 +39,7 @@
>  
>  
>  #include "pipe/p_compiler.h"
> +#include "u_pack_color.h"
>  #include "u_math.h"
>  
>  
> @@ -51,23 +52,57 @@ util_format_srgb_to_linear_8unorm_table[256];
>  extern const uint8_t
>  util_format_linear_to_srgb_8unorm_table[256];
>  
> +extern const unsigned
> +util_format_linear_to_srgb_helper_table[104];
> +
>  
>  /**
>   * Convert a unclamped linear float to srgb value in the [0,255].
> - * XXX this hasn't been tested (render to srgb surface).
> - * XXX this needs optimization.
>   */
>  static INLINE uint8_t
>  util_format_linear_float_to_srgb_8unorm(float x)
>  {
> -   if (x >= 1.0f)
> -      return 255;
> -   else if (x >= 0.0031308f)
> -      return float_to_ubyte(1.055f * powf(x, 0.41666f) - 0.055f);
> -   else if (x > 0.0f)
> -      return float_to_ubyte(12.92f * x);
> -   else
> -      return 0;
> +   if (0) {
> +      if (x >= 1.0f)
> +         return 255;
> +      else if (x >= 0.0031308f)
> +         return float_to_ubyte(1.055f * powf(x, 0.41666666f) - 0.055f);
> +      else if (x > 0.0f)
> +         return float_to_ubyte(12.92f * x);
> +      else
> +         return 0;
> +   }
> +   else {
> +      /*
> +       * This is taken from https://gist.github.com/rygorous/2203834
> +       * Use LUT and do linear interpolation.
> +       */
> +      union fi almostone, minval, f;
> +      unsigned tab, bias, scale, t;
> +
> +      almostone.ui = 0x3f7fffff;
> +      minval.ui = (127-13) << 23;
> +
> +      /*
> +       * Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1,
> respectively.
> +       * The tests are carefully written so that NaNs map to 0, same as in
> the
> +       * reference implementation.
> +       */
> +      if (!(x > minval.f))
> +         x = minval.f;
> +      if (x > almostone.f)
> +         x = almostone.f;
> +
> +      /* Do the table lookup and unpack bias, scale */
> +      f.f = x;
> +      tab = util_format_linear_to_srgb_helper_table[(f.ui - minval.ui) >>
> 20];
> +      bias = (tab >> 16) << 9;
> +      scale = tab & 0xffff;
> +
> +      /* Grab next-highest mantissa bits and perform linear interpolation */
> +      t = (f.ui >> 12) & 0xff;
> +      return (uint8_t) ((bias + scale*t) >> 16);
> +   }
>  }
>  
>  
> diff --git a/src/gallium/auxiliary/util/u_format_srgb.py
> b/src/gallium/auxiliary/util/u_format_srgb.py
> index cd63ae7..c6c02f0 100644
> --- a/src/gallium/auxiliary/util/u_format_srgb.py
> +++ b/src/gallium/auxiliary/util/u_format_srgb.py
> @@ -40,6 +40,7 @@ CopyRight = '''
>  
>  
>  import math
> +import struct
>  
>  
>  def srgb_to_linear(x):
> @@ -51,10 +52,11 @@ def srgb_to_linear(x):
>  
>  def linear_to_srgb(x):
>      if x >= 0.0031308:
> -        return 1.055 * math.pow(x, 0.41666) - 0.055
> +        return 1.055 * math.pow(x, 0.41666666) - 0.055
>      else:
>          return 12.92 * x
>  
> +
>  def generate_srgb_tables():
>      print 'const float'
>      print 'util_format_srgb_8unorm_to_linear_float_table[256] = {'
> @@ -84,6 +86,59 @@ def generate_srgb_tables():
>      print '};'
>      print
>  
> +# calculate the table interpolation values used in float linear to unorm8
> srgb
> +    numexp = 13
> +    mantissa_msb = 3
> +# stepshift is just used to only use every x-th float to make things faster,
> +# 5 is largest value which still gives exact same table as 0
> +    stepshift = 5
> +    nbuckets = numexp << mantissa_msb
> +    bucketsize = (1 << (23 - mantissa_msb)) >> stepshift
> +    mantshift = 12
> +    valtable = []
> +    sum_aa = float(bucketsize)
> +    sum_ab = 0.0
> +    sum_bb = 0.0
> +    for i in range(0, bucketsize):
> +        j = (i << stepshift) >> mantshift
> +        sum_ab += j
> +        sum_bb += j*j
> +    inv_det = 1.0 / (sum_aa * sum_bb - sum_ab * sum_ab)
> +
> +    for bucket in range(0, nbuckets):
> +        start = ((127 - numexp) << 23) + bucket*(bucketsize << stepshift)
> +        sum_a = 0.0
> +        sum_b = 0.0
> +
> +        for i in range(0, bucketsize):
> +            j = (i << stepshift) >> mantshift
> +            fint = start + (i << stepshift)
> +            ffloat = struct.unpack('f', struct.pack('I', fint))[0]
> +            val = linear_to_srgb(ffloat) * 255.0 + 0.5
> +            sum_a += val
> +            sum_b += j*val
> +
> +        solved_a = inv_det * (sum_bb*sum_a - sum_ab*sum_b)
> +        solved_b = inv_det * (sum_aa*sum_b - sum_ab*sum_a)
> +
> +        scaled_a = solved_a * 65536.0 / 512.0
> +        scaled_b = solved_b * 65536.0
> +
> +        int_a = int(scaled_a + 0.5)
> +        int_b = int(scaled_b + 0.5)
> +
> +        valtable.append((int_a << 16) + int_b)
> +
> +    print 'const unsigned'
> +    print 'util_format_linear_to_srgb_helper_table[104] = {'
> +
> +    for j in range(0, nbuckets, 4):
> +        print '   ',
> +        for i in range(j, j + 4):
> +            print '0x%08x,' % (valtable[i],),
> +        print
> +    print '};'
> +    print
>  
>  def main():
>      print '/* This file is autogenerated by u_format_srgb.py. Do not edit
>      directly. */'
> --
> 1.7.9.5
>