[Mesa-dev] [PATCH 2/2] llvmpipe: add support for b5g6r5_srgb

Thu Mar 20 10:41:17 PDT 2014

Series looks good to me.

Jose

----- Original Message -----
> From: Roland Scheidegger <sroland at vmware.com>
> 
> The conversion code for srgb was tuned for n x 4x8bit AoS -> 4 x nxfloat SoA
> (and vice versa), fix this to handle also 16bit 565-style srgb formats.
> Still not really all that generic, things like r10g10b10a2_srgb or
> r4g4b4a4_srgb wouldn't work (the latter trivial to fix, the former would not
> require more work to not crash but near certainly need some higher precision
> calculation) but not needed right now.
> The code is not fully optimized for this (could use more direct calculation
> instead of expanding to 8-bit range first) but should be good enough.
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_format.h      |    1 +
>  src/gallium/auxiliary/gallivm/lp_bld_format_soa.c  |    3 +-
>  src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c |   26 +++++++++++--
>  src/gallium/drivers/llvmpipe/lp_screen.c           |    1 +
>  src/gallium/drivers/llvmpipe/lp_state_fs.c         |   39
>  ++++++++++++++++++--
>  5 files changed, 61 insertions(+), 9 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h
> b/src/gallium/auxiliary/gallivm/lp_bld_format.h
> index a7a4ba0..1177fb2 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
> @@ -167,6 +167,7 @@ lp_build_float_to_srgb_packed(struct gallivm_state
> *gallivm,
>  LLVMValueRef
>  lp_build_srgb_to_linear(struct gallivm_state *gallivm,
>                          struct lp_type src_type,
> +                        unsigned chan_bits,
>                          LLVMValueRef src);
>  
>  
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
> index 81cd2b0..ff2887e 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
> @@ -165,13 +165,12 @@ lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
>  
>           if (type.floating) {
>              if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
> -               assert(width == 8);
>                 if (format_desc->swizzle[3] == chan) {
>                    input = lp_build_unsigned_norm_to_float(gallivm, width,
>                    type, input);
>                 }
>                 else {
>                    struct lp_type conv_type = lp_uint_type(type);
> -                  input = lp_build_srgb_to_linear(gallivm, conv_type,
> input);
> +                  input = lp_build_srgb_to_linear(gallivm, conv_type, width,
> input);
>                 }
>              }
>              else {
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
> b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
> index 6645151..e4849fe 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
> @@ -88,11 +88,12 @@
>   *   (3rd order polynomial is required for crappy but just sufficient
>   accuracy)
>   *
>   * @param src   integer (vector) value(s) to convert
> - *              (8 bit values unpacked to 32 bit already).
> + *              (chan_bits bit values unpacked to 32 bit already).
>   */
>  LLVMValueRef
>  lp_build_srgb_to_linear(struct gallivm_state *gallivm,
>                          struct lp_type src_type,
> +                        unsigned chan_bits,
>                          LLVMValueRef src)
>  {
>     struct lp_type f32_type = lp_type_float_vec(32, src_type.length * 32);
> @@ -105,6 +106,8 @@ lp_build_srgb_to_linear(struct gallivm_state *gallivm,
>     };
>  
>     assert(src_type.width == 32);
> +   /* Technically this would work with more bits too but would be
> inaccurate. */
> +   assert(chan_bits <= 8);
>  
>     lp_build_context_init(&f32_bld, gallivm, f32_type);
>  
> @@ -124,6 +127,12 @@ lp_build_srgb_to_linear(struct gallivm_state *gallivm,
>      */
>     /* doing the 1/255 mul as part of the approximation */
>     srcf = lp_build_int_to_float(&f32_bld, src);
> +   if (chan_bits != 8) {
> +      /* could adjust all the constants instead */
> +      LLVMValueRef rescale_const = lp_build_const_vec(gallivm, f32_type,
> +                                                      255.0f / ((1 <<
> chan_bits) - 1));
> +      srcf = lp_build_mul(&f32_bld, srcf, rescale_const);
> +   }
>     lin_const = lp_build_const_vec(gallivm, f32_type, 1.0f / (12.6f *
>     255.0f));
>     part_lin = lp_build_mul(&f32_bld, srcf, lin_const);
>  
> @@ -150,6 +159,7 @@ lp_build_srgb_to_linear(struct gallivm_state *gallivm,
>  static LLVMValueRef
>  lp_build_linear_to_srgb(struct gallivm_state *gallivm,
>                          struct lp_type src_type,
> +                        unsigned chan_bits,
>                          LLVMValueRef src)
>  {
>     LLVMBuilderRef builder = gallivm->builder;
> @@ -292,6 +302,13 @@ lp_build_linear_to_srgb(struct gallivm_state *gallivm,
>     is_linear = lp_build_compare(gallivm, src_type, PIPE_FUNC_LEQUAL, src,
>     lin_thresh);
>     tmp = lp_build_select(&f32_bld, is_linear, lin, pow_final);
>  
> +   if (chan_bits != 8) {
> +      /* could adjust all the constants instead */
> +      LLVMValueRef rescale_const = lp_build_const_vec(gallivm, src_type,
> +                                                      ((1 << chan_bits) - 1)
> / 255.0f);
> +      tmp = lp_build_mul(&f32_bld, tmp, rescale_const);
> +   }
> +
>     f32_bld.type.sign = 0;
>     return lp_build_iround(&f32_bld, tmp);
>  }
> @@ -300,7 +317,9 @@ lp_build_linear_to_srgb(struct gallivm_state *gallivm,
>  /**
>   * Convert linear float soa values to packed srgb AoS values.
>   * This only handles packed formats which are 4x8bit in size
> - * (rgba and rgbx plus swizzles).
> + * (rgba and rgbx plus swizzles), and 16bit 565-style formats
> + * with no alpha. (In the latter case the return values won't be
> + * fully packed, it will look like r5g6b5x16r5g6b5x16...)
>   *
>   * @param src   float SoA (vector) values to convert.
>   */
> @@ -320,7 +339,8 @@ lp_build_float_to_srgb_packed(struct gallivm_state
> *gallivm,
>  
>     /* rgb is subject to linear->srgb conversion, alpha is not */
>     for (chan = 0; chan < 3; chan++) {
> -      tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, src[chan]);
> +      unsigned chan_bits = dst_fmt->channel[dst_fmt->swizzle[chan]].size;
> +      tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, chan_bits,
> src[chan]);
>     }
>     /*
>      * can't use lp_build_conv since we want to keep values as 32bit
> diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c
> b/src/gallium/drivers/llvmpipe/lp_screen.c
> index c8e95fe..fe06e34 100644
> --- a/src/gallium/drivers/llvmpipe/lp_screen.c
> +++ b/src/gallium/drivers/llvmpipe/lp_screen.c
> @@ -342,6 +342,7 @@ llvmpipe_is_format_supported( struct pipe_screen
> *_screen,
>  
>     if (bind & PIPE_BIND_RENDER_TARGET) {
>        if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
> +         /* this is a lie actually other formats COULD exist where we would
> fail */
>           if (format_desc->nr_channels < 3)
>              return FALSE;
>        }
> diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c
> b/src/gallium/drivers/llvmpipe/lp_state_fs.c
> index 2f9f907..5e28f0e 100644
> --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
> +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
> @@ -868,12 +868,12 @@ lp_mem_type_from_format_desc(const struct
> util_format_description *format_desc,
>     unsigned chan;
>  
>     if (format_expands_to_float_soa(format_desc)) {
> -      /* just make this a 32bit uint */
> +      /* just make this a uint with width of block */
>        type->floating = false;
>        type->fixed = false;
>        type->sign = false;
>        type->norm = false;
> -      type->width = 32;
> +      type->width = format_desc->block.bits;
>        type->length = 1;
>        return;
>     }
> @@ -1137,12 +1137,24 @@ convert_to_blend_type(struct gallivm_state *gallivm,
>         * This is pretty suboptimal for this case blending in SoA would be
>         much
>         * better, since conversion gets us SoA values so need to convert
>         back.
>         */
> -      assert(src_type.width == 32);
> +      assert(src_type.width == 32 || src_type.width == 16);
>        assert(dst_type.floating);
>        assert(dst_type.width == 32);
>        assert(dst_type.length % 4 == 0);
>        assert(num_srcs % 4 == 0);
>  
> +      if (src_type.width == 16) {
> +         /* expand 4x16bit values to 4x32bit */
> +         struct lp_type type32x4 = src_type;
> +         LLVMTypeRef ltype32x4;
> +         unsigned num_fetch = dst_type.length == 8 ? num_srcs / 2 : num_srcs
> / 4;
> +         type32x4.width = 32;
> +         ltype32x4 = lp_build_vec_type(gallivm, type32x4);
> +         for (i = 0; i < num_fetch; i++) {
> +            src[i] = LLVMBuildZExt(builder, src[i], ltype32x4, "");
> +         }
> +         src_type.width = 32;
> +      }
>        for (i = 0; i < 4; i++) {
>           tmpsrc[i] = src[i];
>        }
> @@ -1298,7 +1310,7 @@ convert_from_blend_type(struct gallivm_state *gallivm,
>        assert(src_type.floating);
>        assert(src_type.width == 32);
>        assert(src_type.length % 4 == 0);
> -      assert(dst_type.width == 32);
> +      assert(dst_type.width == 32 || dst_type.width == 16);
>  
>        for (i = 0; i < num_srcs / 4; i++) {
>           LLVMValueRef tmpsoa[4], tmpdst;
> @@ -1333,6 +1345,25 @@ convert_from_blend_type(struct gallivm_state *gallivm,
>              src[i] = tmpdst;
>           }
>        }
> +      if (dst_type.width == 16) {
> +         struct lp_type type16x8 = dst_type;
> +         struct lp_type type32x4 = dst_type;
> +         LLVMTypeRef ltype16x4, ltypei64, ltypei128;
> +         unsigned num_fetch = src_type.length == 8 ? num_srcs / 2 : num_srcs
> / 4;
> +         type16x8.length = 8;
> +         type32x4.width = 32;
> +         ltypei128 = LLVMIntTypeInContext(gallivm->context, 128);
> +         ltypei64 = LLVMIntTypeInContext(gallivm->context, 64);
> +         ltype16x4 = lp_build_vec_type(gallivm, dst_type);
> +         /* We could do vector truncation but it doesn't generate very good
> code */
> +         for (i = 0; i < num_fetch; i++) {
> +            src[i] = lp_build_pack2(gallivm, type32x4, type16x8,
> +                                    src[i], lp_build_zero(gallivm,
> type32x4));
> +            src[i] = LLVMBuildBitCast(builder, src[i], ltypei128, "");
> +            src[i] = LLVMBuildTrunc(builder, src[i], ltypei64, "");
> +            src[i] = LLVMBuildBitCast(builder, src[i], ltype16x4, "");
> +         }
> +      }
>        return;
>     }
>  
> --
> 1.7.9.5
>