[Mesa-dev] [PATCH 2/2] util: implement table-based + linear interpolation linear-to-srgb conversion

Fri Aug 2 15:38:07 PDT 2013

From: Roland Scheidegger <sroland at vmware.com>

Should be much faster, seems to work in softpipe.
While here (also it's now disabled) fix up the pow factor - the former value
is what is in GL core it is however not actually accurate to fp32 standard
(as it is 1.0/2.4), and if someone would do all the accurate math there's no
reason to waste 8 mantissa bits or so...
---
 src/gallium/auxiliary/util/u_format_srgb.h  |   55 ++++++++++++++++++++++-----
 src/gallium/auxiliary/util/u_format_srgb.py |   33 ++++++++++++++++
 2 files changed, 78 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_format_srgb.h b/src/gallium/auxiliary/util/u_format_srgb.h
index 82ed957..f3e1b20 100644
--- a/src/gallium/auxiliary/util/u_format_srgb.h
+++ b/src/gallium/auxiliary/util/u_format_srgb.h
@@ -39,6 +39,7 @@
 
 
 #include "pipe/p_compiler.h"
+#include "u_pack_color.h"
 #include "u_math.h"
 
 
@@ -51,23 +52,57 @@ util_format_srgb_to_linear_8unorm_table[256];
 extern const uint8_t
 util_format_linear_to_srgb_8unorm_table[256];
 
+extern const unsigned
+util_format_linear_to_srgb_helper_table[104];
+
 
 /**
  * Convert a unclamped linear float to srgb value in the [0,255].
- * XXX this hasn't been tested (render to srgb surface).
- * XXX this needs optimization.
  */
 static INLINE uint8_t
 util_format_linear_float_to_srgb_8unorm(float x)
 {
-   if (x >= 1.0f)
-      return 255;
-   else if (x >= 0.0031308f)
-      return float_to_ubyte(1.055f * powf(x, 0.41666f) - 0.055f);
-   else if (x > 0.0f)
-      return float_to_ubyte(12.92f * x);
-   else
-      return 0;
+   if (0) {
+      if (x >= 1.0f)
+         return 255;
+      else if (x >= 0.0031308f)
+         return float_to_ubyte(1.055f * powf(x, 0.41666666f) - 0.055f);
+      else if (x > 0.0f)
+         return float_to_ubyte(12.92f * x);
+      else
+         return 0;
+   }
+   else {
+      /*
+       * This is taken from https://gist.github.com/rygorous/2203834
+       * Use LUT and do linear interpolation.
+       */
+      union fi almostone, minval, f;
+      unsigned tab, bias, scale, t;
+
+      almostone.ui = 0x3f7fffff;
+      minval.ui = (127-13) << 23;
+
+      /*
+       * Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
+       * The tests are carefully written so that NaNs map to 0, same as in the
+       * reference implementation.
+       */
+      if (!(x > minval.f))
+         x = minval.f;
+      if (x > almostone.f)
+         x = almostone.f;
+
+      /* Do the table lookup and unpack bias, scale */
+      f.f = x;
+      tab = util_format_linear_to_srgb_helper_table[(f.ui - minval.ui) >> 20];
+      bias = (tab >> 16) << 9;
+      scale = tab & 0xffff;
+
+      /* Grab next-highest mantissa bits and perform linear interpolation */
+      t = (f.ui >> 12) & 0xff;
+      return (uint8_t) ((bias + scale*t) >> 16);
+   }
 }
 
 
diff --git a/src/gallium/auxiliary/util/u_format_srgb.py b/src/gallium/auxiliary/util/u_format_srgb.py
index cd63ae7..f95b22f 100644
--- a/src/gallium/auxiliary/util/u_format_srgb.py
+++ b/src/gallium/auxiliary/util/u_format_srgb.py
@@ -84,6 +84,39 @@ def generate_srgb_tables():
     print '};'
     print
 
+    # Hmm.
+    print 'const unsigned'
+    print 'util_format_linear_to_srgb_helper_table[104] = {'
+    print '   0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d,'
+    print '   0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d,'
+    print '   0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a,'
+    print '   0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a,'
+    print '   0x010e0033, 0x01280033, 0x01410033, 0x015b0033,'
+    print '   0x01750033, 0x018f0033, 0x01a80033, 0x01c20033,'
+    print '   0x01dc0067, 0x020f0067, 0x02430067, 0x02760067,'
+    print '   0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067,'
+    print '   0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce,'
+    print '   0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5,'
+    print '   0x06970158, 0x07420142, 0x07e30130, 0x087b0120,'
+    print '   0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2,'
+    print '   0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180,'
+    print '   0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143,'
+    print '   0x11070264, 0x1238023e, 0x1357021d, 0x14660201,'
+    print '   0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af,'
+    print '   0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad,'
+    print '   0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240,'
+    print '   0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392,'
+    print '   0x2b6a0367, 0x2d1d0341, 0x2ebe031f, 0x304d0300,'
+    print '   0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5,'
+    print '   0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401,'
+    print '   0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d,'
+    print '   0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559,'
+    print '   0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f,'
+    print '   0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723,'
+    print '};'
+    print
+
+
 
 def main():
     print '/* This file is autogenerated by u_format_srgb.py. Do not edit directly. */'
-- 
1.7.9.5