[Mesa-dev] [PATCH] gallivm: Fix lp_build_float_to_half.

Thu Nov 29 07:50:48 PST 2012

----- Original Message -----
> On 11/29/2012 08:32 AM, jfonseca at vmware.com wrote:
> > From: José Fonseca<jfonseca at vmware.com>
> >
> > The current implementation was close by not fully correct: several
> > operations that should be done in floating point were being done in
> > integer.
> >
> > Fixes piglit fbo-clear-formats GL_ARB_texture_float
> > ---
> >   src/gallium/auxiliary/gallivm/lp_bld_conv.c |  103
> >   +++++++++++++++++++--------
> >   1 file changed, 73 insertions(+), 30 deletions(-)
> >
> > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
> > b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
> > index cd18b0c..a48b053 100644
> > --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
> > +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
> > @@ -63,15 +63,18 @@
> >
> >   #include "util/u_debug.h"
> >   #include "util/u_math.h"
> > +#include "util/u_half.h"
> >   #include "util/u_cpu_detect.h"
> >
> >   #include "lp_bld_type.h"
> >   #include "lp_bld_const.h"
> >   #include "lp_bld_arit.h"
> > +#include "lp_bld_bitarit.h"
> >   #include "lp_bld_pack.h"
> >   #include "lp_bld_conv.h"
> >   #include "lp_bld_logic.h"
> >   #include "lp_bld_intr.h"
> > +#include "lp_bld_printf.h"
> >
> >
> >
> > @@ -220,64 +223,104 @@ lp_build_half_to_float(struct gallivm_state
> > *gallivm,
> >    *
> >    * ref
> >    http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
> >    * ref https://gist.github.com/2156668
> > + *
> > + * XXX: This is approximation. It is is faster certain NaNs are
> > converted to
> > + * infinity, and rounding is not correct.
> 
> I can't quite parse that comment.

Oops. Will correct.

> >    */
> >   LLVMValueRef
> >   lp_build_float_to_half(struct gallivm_state *gallivm,
> >                          LLVMValueRef src)
> >   {
> > -   struct lp_type i32_type = lp_type_int_vec(32, 32 *
> > LLVMGetVectorSize(LLVMTypeOf(src)));
> > -
> >      LLVMBuilderRef builder = gallivm->builder;
> > -   LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm,
> > i32_type);
> > -
> > -   struct lp_build_context bld;
> > -
> > +   LLVMTypeRef f32_vec_type = LLVMTypeOf(src);
> > +   unsigned length = LLVMGetTypeKind(f32_vec_type) ==
> > LLVMVectorTypeKind
> > +                   ? LLVMGetVectorSize(f32_vec_type) : 1;
> > +   struct lp_type f32_type = lp_type_float_vec(32, 32 * length);
> > +   struct lp_type u32_type = lp_type_uint_vec(32, 32 * length);
> > +   struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
> > +   LLVMTypeRef u32_vec_type = lp_build_vec_type(gallivm,
> > u32_type);
> > +   LLVMTypeRef i16_vec_type = lp_build_vec_type(gallivm,
> > i16_type);
> > +   struct lp_build_context f32_bld;
> > +   struct lp_build_context u32_bld;
> >      LLVMValueRef result;
> >
> > -   lp_build_context_init(&bld, gallivm, i32_type);
> > +   lp_build_context_init(&f32_bld, gallivm, f32_type);
> > +   lp_build_context_init(&u32_bld, gallivm, u32_type);
> >
> > -   /* Extra scope because lp_build_min needs a build context, le
> > sigh */
> >      {
> >         /* Constants */
> > -      LLVMValueRef i32_13        = lp_build_const_int_vec(gallivm,
> > i32_type, 13);
> > -      LLVMValueRef i32_16        = lp_build_const_int_vec(gallivm,
> > i32_type, 16);
> > -      LLVMValueRef i32_mask_fabs = lp_build_const_int_vec(gallivm,
> > i32_type, 0x7fffffff);
> > -      LLVMValueRef i32_f32infty  = lp_build_const_int_vec(gallivm,
> > i32_type, 0xff<<  23);
> > -      LLVMValueRef i32_expinf    = lp_build_const_int_vec(gallivm,
> > i32_type, 0xe0<<  23);
> > -      LLVMValueRef i32_f16max    = lp_build_const_int_vec(gallivm,
> > i32_type, 0x8f<<  23);
> > -      LLVMValueRef i32_magic     = lp_build_const_int_vec(gallivm,
> > i32_type, 0x0f<<  23);
> > +      LLVMValueRef u32_f32inf    = lp_build_const_int_vec(gallivm,
> > u32_type, 0xff<<  23);
> > +      LLVMValueRef u32_expinf    = lp_build_const_int_vec(gallivm,
> > u32_type, 0xe0<<  23);
> > +      LLVMValueRef f32_f16max    = lp_build_const_vec(gallivm,
> > f32_type, 65536.0); // 0x8f<<  23
> > +      LLVMValueRef f32_magic     = lp_build_const_vec(gallivm,
> > f32_type, 1.92592994e-34); // 0x0f<<  23
> >
> >         /* Cast from float32 to int32 */
> > -      LLVMValueRef f             = LLVMBuildBitCast(builder, src,
> > int_vec_type, "");
> > +      LLVMValueRef f             = LLVMBuildBitCast(builder, src,
> > u32_vec_type, "");
> >
> >         /* Remove sign */
> > -      LLVMValueRef fabs          = LLVMBuildAnd(builder,
> > i32_mask_fabs, f, "");
> > +      LLVMValueRef srcabs         = lp_build_abs(&f32_bld, src);
> > +      LLVMValueRef fabs           = LLVMBuildBitCast(builder,
> > srcabs, u32_vec_type, "");
> >
> >         /* Magic conversion */
> > -      LLVMValueRef clamped       = lp_build_min(&bld, i32_f16max,
> > fabs);
> > -      LLVMValueRef scaled        = LLVMBuildMul(builder, clamped,
> > i32_magic, "");
> > -
> > +      LLVMValueRef clamped       = lp_build_min(&f32_bld,
> > f32_f16max, srcabs);
> > +      LLVMValueRef scaled        = LLVMBuildBitCast(builder,
> > +
> >                                                    LLVMBuildFMul(builder,
> > +
> >                                                                  clamped,
> > +
> >                                                                  f32_magic,
> > +
> >                                                                  ""),
> > +                                                    u32_vec_type,
> > +                                                    "");
> >         /* Make sure Inf/NaN and unormalised survive */
> > -      LLVMValueRef infnancase    = LLVMBuildXor(builder,
> > i32_expinf, fabs, "");
> > -      LLVMValueRef b_notnormal   = lp_build_compare(gallivm,
> > i32_type, PIPE_FUNC_GREATER, fabs, i32_f32infty);
> > +      LLVMValueRef infnancase    = LLVMBuildXor(builder,
> > u32_expinf, fabs, "");
> > +      LLVMValueRef b_notnormal   = lp_build_compare(gallivm,
> > f32_type, PIPE_FUNC_GEQUAL,
> > +                                                    srcabs,
> > +
> >                                                    LLVMBuildBitCast(builder,
> > u32_f32inf, f32_vec_type, ""));
> >
> >         /* Merge normal / unnormal case */
> > -      LLVMValueRef merge1        = LLVMBuildAnd(builder,
> > infnancase, b_notnormal, "");
> > -      LLVMValueRef merge2        = LLVMBuildNot(builder,
> > LLVMBuildAnd(builder, b_notnormal, scaled, ""), "");
> > -      LLVMValueRef merged        = LLVMBuildOr(builder, merge1,
> > merge2, "");
> > -      LLVMValueRef shifted       = LLVMBuildLShr(builder, merged,
> > i32_13, "");
> > +      LLVMValueRef merged        = lp_build_select(&u32_bld,
> > b_notnormal, infnancase, scaled);
> > +      LLVMValueRef shifted       = lp_build_shr_imm(&u32_bld,
> > merged, 13);
> >
> >         /* Sign bit */
> >         LLVMValueRef justsign      = LLVMBuildXor(builder, f, fabs,
> >         "");
> > -      LLVMValueRef signshifted   = LLVMBuildLShr(builder,
> > justsign, i32_16, "");
> > +      LLVMValueRef signshifted   = lp_build_shr_imm(&u32_bld,
> > justsign, 16);
> >
> >         /* Combine result */
> >         result                     = LLVMBuildOr(builder, shifted,
> >         signshifted, "");
> >      }
> >
> > -   /* Truncate from 32 bit to 16 bit */
> > -   i32_type.width = 16;
> > -   return LLVMBuildTrunc(builder, result,
> > lp_build_vec_type(gallivm, i32_type), "");
> > +   result = LLVMBuildTrunc(builder, result, i16_vec_type, "");
> > +
> 
> The following if (0) block is just debug code, right?  

Yes.

> It's a little
> suspect because it manipulates the 'result' return value.  Maybe add
> a
> comment about what's going on.

I'm sure we'll need to revisit this code, due to the liberties it takes converting, which is why I left the debug code here. I'll add a comment to this effect.

> 
> > +   if (0) {
> > +     LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
> > +     LLVMTypeRef i16t = LLVMInt16TypeInContext(gallivm->context);
> > +     LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
> > +     unsigned i;
> > +
> > +     LLVMTypeRef func_type = LLVMFunctionType(i16t,&f32t, 1, 0);
> > +     LLVMValueRef func = lp_build_const_int_pointer(gallivm,
> > func_to_pointer((func_pointer)util_float_to_half));
> > +     func = LLVMBuildBitCast(builder, func,
> > LLVMPointerType(func_type, 0), "util_float_to_half");
> > +
> > +     lp_build_print_value(gallivm, "src  = ", src);
> > +     lp_build_print_value(gallivm, "util = ", result);
> > +     lp_build_print_value(gallivm, "llvm = ", result);
> > +
> > +     result = LLVMGetUndef(LLVMVectorType(i16t, length));
> > +     for (i = 0; i<  length; ++i) {
> > +        LLVMValueRef index = LLVMConstInt(i32t, i, 0);
> > +        LLVMValueRef f32 = LLVMBuildExtractElement(builder, src,
> > index, "");
> > +#if 0
> > +        /* XXX: not really supported by backends */
> > +        LLVMValueRef f16 = lp_build_intrinsic_unary(builder,
> > "llvm.convert.to.fp16", i16t, f32);
> > +#else
> > +        LLVMValueRef f16 = LLVMBuildCall(builder, func,&f32, 1,
> > "");
> > +#endif
> > +        result = LLVMBuildInsertElement(builder, result, f16,
> > index, "");
> > +     }
> > +
> > +     lp_build_printf(gallivm, "\n");
> > +  }
> > +
> > +   return result;
> >   }
> >
> >
> 
> I didn't study the code in detail (it's pretty dense), but it looks
> OK
> on the surface.  Roland?
> 
> Reviewed-by: Brian Paul <brianp at vmware.com>

Thanks.

Jose