[Mesa-dev] [PATCH 2/2] gallivm: fix float->SNORM conversion

Sat Jul 27 03:48:02 PDT 2013

Series looks alright AFAICT.

Jose

----- Original Message -----
> From: Roland Scheidegger <sroland at vmware.com>
> 
> Just like the UNORM case we need to use round to nearest, not trunc.
> (There's also another problem, we're using the formula for SNORM->float
> which will produce a value below -1.0 for the most negative value which
> according to both OpenGL and d3d10 would need clamping. However, no actual
> failures have been observed due to that hence keep cheating on that.)
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_conv.c       |   52
>  ++++++++++++++++-----
>  src/gallium/auxiliary/gallivm/lp_bld_format_soa.c |    6 +++
>  2 files changed, 47 insertions(+), 11 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
> b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
> index cbea966..56c1581 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
> @@ -257,6 +257,7 @@ lp_build_clamped_float_to_unsigned_norm(struct
> gallivm_state *gallivm,
>        bias = (double)(1ULL << (mantissa - dst_width));
>  
>        res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm,
>        src_type, scale), "");
> +      /* instead of fadd/and could (with sse2) just use lp_build_iround */
>        res = LLVMBuildFAdd(builder, res, lp_build_const_vec(gallivm,
>        src_type, bias), "");
>        res = LLVMBuildBitCast(builder, res, int_vec_type, "");
>        res = LLVMBuildAnd(builder, res,
> @@ -742,7 +743,6 @@ lp_build_conv(struct gallivm_state *gallivm,
>        }
>        else {
>           double dst_scale = lp_const_scale(dst_type);
> -         LLVMTypeRef tmp_vec_type;
>  
>           if (dst_scale != 1.0) {
>              LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type,
>              dst_scale);
> @@ -750,19 +750,37 @@ lp_build_conv(struct gallivm_state *gallivm,
>                 tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
>           }
>  
> -         /* Use an equally sized integer for intermediate computations */
> -         tmp_type.floating = FALSE;
> -         tmp_vec_type = lp_build_vec_type(gallivm, tmp_type);
> -         for(i = 0; i < num_tmps; ++i) {
> +         /*
> +          * these functions will use fptosi in some form which won't work
> +          * with 32bit uint dst.
> +          */
> +         assert(dst_type.sign || dst_type.width < 32);
> +
> +         if (dst_type.sign && dst_type.norm && !dst_type.fixed) {
> +            struct lp_build_context bld;
> +
> +            lp_build_context_init(&bld, gallivm, tmp_type);
> +            for(i = 0; i < num_tmps; ++i) {
> +               tmp[i] = lp_build_iround(&bld, tmp[i]);
> +            }
> +            tmp_type.floating = FALSE;
> +         }
> +         else {
> +            LLVMTypeRef tmp_vec_type;
> +
> +            tmp_type.floating = FALSE;
> +            tmp_vec_type = lp_build_vec_type(gallivm, tmp_type);
> +            for(i = 0; i < num_tmps; ++i) {
>  #if 0
> -            if(dst_type.sign)
> -               tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
> -            else
> -               tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, "");
> +               if(dst_type.sign)
> +                  tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type,
> "");
> +               else
> +                  tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type,
> "");
>  #else
> -           /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */
> -            tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
> +              /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */
> +               tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
>  #endif
> +            }
>           }
>        }
>     }
> @@ -860,6 +878,18 @@ lp_build_conv(struct gallivm_state *gallivm,
>               for(i = 0; i < num_tmps; ++i)
>                  tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
>            }
> +
> +          /* the formula above will produce value below -1.0 for most
> negative
> +           * value but everything seems happy with that hence disable for
> now */
> +          if (0 && !src_type.fixed && src_type.norm && src_type.sign) {
> +             struct lp_build_context bld;
> +
> +             lp_build_context_init(&bld, gallivm, dst_type);
> +             for(i = 0; i < num_tmps; ++i) {
> +                tmp[i] = lp_build_max(&bld, tmp[i],
> +                                      lp_build_const_vec(gallivm, dst_type,
> -1.0f));
> +             }
> +          }
>        }
>      }
>      else {
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
> index 114ce03..81cd2b0 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
> @@ -39,6 +39,7 @@
>  #include "lp_bld_gather.h"
>  #include "lp_bld_debug.h"
>  #include "lp_bld_format.h"
> +#include "lp_bld_arit.h"
>  
>  
>  void
> @@ -221,6 +222,11 @@ lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
>                 double scale = 1.0 / ((1 << (format_desc->channel[chan].size
>                 - 1)) - 1);
>                 LLVMValueRef scale_val = lp_build_const_vec(gallivm, type,
>                 scale);
>                 input = LLVMBuildFMul(builder, input, scale_val, "");
> +               /* the formula above will produce value below -1.0 for most
> negative
> +                * value but everything seems happy with that hence disable
> for now */
> +               if (0)
> +                  input = lp_build_max(&bld, input,
> +                                       lp_build_const_vec(gallivm, type,
> -1.0f));
>              }
>           }
>           else if (format_desc->channel[chan].pure_integer) {
> --
> 1.7.9.5
>