Mesa (master): llvmpipe: add support for b5g6r5_srgb

Fri Mar 21 16:24:05 UTC 2014

Module: Mesa
Branch: master
Commit: 9477d8c862b206c35de0bc957a08524188abc898
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9477d8c862b206c35de0bc957a08524188abc898

Author: Roland Scheidegger <sroland at vmware.com>
Date:   Thu Mar 20 16:43:36 2014 +0100

llvmpipe: add support for b5g6r5_srgb

The conversion code for srgb was tuned for n x 4x8bit AoS -> 4 x nxfloat SoA
(and vice versa), fix this to handle also 16bit 565-style srgb formats.
Still not really all that generic, things like r10g10b10a2_srgb or
r4g4b4a4_srgb wouldn't work (the latter trivial to fix, the former would not
require more work to not crash but near certainly need some higher precision
calculation) but not needed right now.
The code is not fully optimized for this (could use more direct calculation
instead of expanding to 8-bit range first) but should be good enough.

Reviewed-by: Jose Fonseca <jfonseca at vmware.com>

---

 src/gallium/auxiliary/gallivm/lp_bld_format.h      |    1 +
 src/gallium/auxiliary/gallivm/lp_bld_format_soa.c  |    3 +-
 src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c |   26 +++++++++++--
 src/gallium/drivers/llvmpipe/lp_screen.c           |    1 +
 src/gallium/drivers/llvmpipe/lp_state_fs.c         |   39 ++++++++++++++++++--
 5 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h
index a7a4ba0..1177fb2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -167,6 +167,7 @@ lp_build_float_to_srgb_packed(struct gallivm_state *gallivm,
 LLVMValueRef
 lp_build_srgb_to_linear(struct gallivm_state *gallivm,
                         struct lp_type src_type,
+                        unsigned chan_bits,
                         LLVMValueRef src);
 
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 81cd2b0..ff2887e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -165,13 +165,12 @@ lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
 
          if (type.floating) {
             if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
-               assert(width == 8);
                if (format_desc->swizzle[3] == chan) {
                   input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
                }
                else {
                   struct lp_type conv_type = lp_uint_type(type);
-                  input = lp_build_srgb_to_linear(gallivm, conv_type, input);
+                  input = lp_build_srgb_to_linear(gallivm, conv_type, width, input);
                }
             }
             else {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
index 6645151..e4849fe 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
@@ -88,11 +88,12 @@
  *   (3rd order polynomial is required for crappy but just sufficient accuracy)
  *
  * @param src   integer (vector) value(s) to convert
- *              (8 bit values unpacked to 32 bit already).
+ *              (chan_bits bit values unpacked to 32 bit already).
  */
 LLVMValueRef
 lp_build_srgb_to_linear(struct gallivm_state *gallivm,
                         struct lp_type src_type,
+                        unsigned chan_bits,
                         LLVMValueRef src)
 {
    struct lp_type f32_type = lp_type_float_vec(32, src_type.length * 32);
@@ -105,6 +106,8 @@ lp_build_srgb_to_linear(struct gallivm_state *gallivm,
    };
 
    assert(src_type.width == 32);
+   /* Technically this would work with more bits too but would be inaccurate. */
+   assert(chan_bits <= 8);
 
    lp_build_context_init(&f32_bld, gallivm, f32_type);
 
@@ -124,6 +127,12 @@ lp_build_srgb_to_linear(struct gallivm_state *gallivm,
     */
    /* doing the 1/255 mul as part of the approximation */
    srcf = lp_build_int_to_float(&f32_bld, src);
+   if (chan_bits != 8) {
+      /* could adjust all the constants instead */
+      LLVMValueRef rescale_const = lp_build_const_vec(gallivm, f32_type,
+                                                      255.0f / ((1 << chan_bits) - 1));
+      srcf = lp_build_mul(&f32_bld, srcf, rescale_const);
+   }
    lin_const = lp_build_const_vec(gallivm, f32_type, 1.0f / (12.6f * 255.0f));
    part_lin = lp_build_mul(&f32_bld, srcf, lin_const);
 
@@ -150,6 +159,7 @@ lp_build_srgb_to_linear(struct gallivm_state *gallivm,
 static LLVMValueRef
 lp_build_linear_to_srgb(struct gallivm_state *gallivm,
                         struct lp_type src_type,
+                        unsigned chan_bits,
                         LLVMValueRef src)
 {
    LLVMBuilderRef builder = gallivm->builder;
@@ -292,6 +302,13 @@ lp_build_linear_to_srgb(struct gallivm_state *gallivm,
    is_linear = lp_build_compare(gallivm, src_type, PIPE_FUNC_LEQUAL, src, lin_thresh);
    tmp = lp_build_select(&f32_bld, is_linear, lin, pow_final);
 
+   if (chan_bits != 8) {
+      /* could adjust all the constants instead */
+      LLVMValueRef rescale_const = lp_build_const_vec(gallivm, src_type,
+                                                      ((1 << chan_bits) - 1) / 255.0f);
+      tmp = lp_build_mul(&f32_bld, tmp, rescale_const);
+   }
+
    f32_bld.type.sign = 0;
    return lp_build_iround(&f32_bld, tmp);
 }
@@ -300,7 +317,9 @@ lp_build_linear_to_srgb(struct gallivm_state *gallivm,
 /**
  * Convert linear float soa values to packed srgb AoS values.
  * This only handles packed formats which are 4x8bit in size
- * (rgba and rgbx plus swizzles).
+ * (rgba and rgbx plus swizzles), and 16bit 565-style formats
+ * with no alpha. (In the latter case the return values won't be
+ * fully packed, it will look like r5g6b5x16r5g6b5x16...)
  *
  * @param src   float SoA (vector) values to convert.
  */
@@ -320,7 +339,8 @@ lp_build_float_to_srgb_packed(struct gallivm_state *gallivm,
 
    /* rgb is subject to linear->srgb conversion, alpha is not */
    for (chan = 0; chan < 3; chan++) {
-      tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, src[chan]);
+      unsigned chan_bits = dst_fmt->channel[dst_fmt->swizzle[chan]].size;
+      tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, chan_bits, src[chan]);
    }
    /*
     * can't use lp_build_conv since we want to keep values as 32bit
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index c8e95fe..fe06e34 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -342,6 +342,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
 
    if (bind & PIPE_BIND_RENDER_TARGET) {
       if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+         /* this is a lie actually other formats COULD exist where we would fail */
          if (format_desc->nr_channels < 3)
             return FALSE;
       }
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 2f9f907..5e28f0e 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -868,12 +868,12 @@ lp_mem_type_from_format_desc(const struct util_format_description *format_desc,
    unsigned chan;
 
    if (format_expands_to_float_soa(format_desc)) {
-      /* just make this a 32bit uint */
+      /* just make this a uint with width of block */
       type->floating = false;
       type->fixed = false;
       type->sign = false;
       type->norm = false;
-      type->width = 32;
+      type->width = format_desc->block.bits;
       type->length = 1;
       return;
    }
@@ -1137,12 +1137,24 @@ convert_to_blend_type(struct gallivm_state *gallivm,
        * This is pretty suboptimal for this case blending in SoA would be much
        * better, since conversion gets us SoA values so need to convert back.
        */
-      assert(src_type.width == 32);
+      assert(src_type.width == 32 || src_type.width == 16);
       assert(dst_type.floating);
       assert(dst_type.width == 32);
       assert(dst_type.length % 4 == 0);
       assert(num_srcs % 4 == 0);
 
+      if (src_type.width == 16) {
+         /* expand 4x16bit values to 4x32bit */
+         struct lp_type type32x4 = src_type;
+         LLVMTypeRef ltype32x4;
+         unsigned num_fetch = dst_type.length == 8 ? num_srcs / 2 : num_srcs / 4;
+         type32x4.width = 32;
+         ltype32x4 = lp_build_vec_type(gallivm, type32x4);
+         for (i = 0; i < num_fetch; i++) {
+            src[i] = LLVMBuildZExt(builder, src[i], ltype32x4, "");
+         }
+         src_type.width = 32;
+      }
       for (i = 0; i < 4; i++) {
          tmpsrc[i] = src[i];
       }
@@ -1298,7 +1310,7 @@ convert_from_blend_type(struct gallivm_state *gallivm,
       assert(src_type.floating);
       assert(src_type.width == 32);
       assert(src_type.length % 4 == 0);
-      assert(dst_type.width == 32);
+      assert(dst_type.width == 32 || dst_type.width == 16);
 
       for (i = 0; i < num_srcs / 4; i++) {
          LLVMValueRef tmpsoa[4], tmpdst;
@@ -1333,6 +1345,25 @@ convert_from_blend_type(struct gallivm_state *gallivm,
             src[i] = tmpdst;
          }
       }
+      if (dst_type.width == 16) {
+         struct lp_type type16x8 = dst_type;
+         struct lp_type type32x4 = dst_type;
+         LLVMTypeRef ltype16x4, ltypei64, ltypei128;
+         unsigned num_fetch = src_type.length == 8 ? num_srcs / 2 : num_srcs / 4;
+         type16x8.length = 8;
+         type32x4.width = 32;
+         ltypei128 = LLVMIntTypeInContext(gallivm->context, 128);
+         ltypei64 = LLVMIntTypeInContext(gallivm->context, 64);
+         ltype16x4 = lp_build_vec_type(gallivm, dst_type);
+         /* We could do vector truncation but it doesn't generate very good code */
+         for (i = 0; i < num_fetch; i++) {
+            src[i] = lp_build_pack2(gallivm, type32x4, type16x8,
+                                    src[i], lp_build_zero(gallivm, type32x4));
+            src[i] = LLVMBuildBitCast(builder, src[i], ltypei128, "");
+            src[i] = LLVMBuildTrunc(builder, src[i], ltypei64, "");
+            src[i] = LLVMBuildBitCast(builder, src[i], ltype16x4, "");
+         }
+      }
       return;
    }