[Mesa-dev] [PATCH] gallivm: Fix lp_build_float_to_half.
Brian Paul
brianp at vmware.com
Thu Nov 29 07:44:31 PST 2012
On 11/29/2012 08:32 AM, jfonseca at vmware.com wrote:
> From: José Fonseca<jfonseca at vmware.com>
>
> The current implementation was close by not fully correct: several
> operations that should be done in floating point were being done in
> integer.
>
> Fixes piglit fbo-clear-formats GL_ARB_texture_float
> ---
> src/gallium/auxiliary/gallivm/lp_bld_conv.c | 103 +++++++++++++++++++--------
> 1 file changed, 73 insertions(+), 30 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
> index cd18b0c..a48b053 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
> @@ -63,15 +63,18 @@
>
> #include "util/u_debug.h"
> #include "util/u_math.h"
> +#include "util/u_half.h"
> #include "util/u_cpu_detect.h"
>
> #include "lp_bld_type.h"
> #include "lp_bld_const.h"
> #include "lp_bld_arit.h"
> +#include "lp_bld_bitarit.h"
> #include "lp_bld_pack.h"
> #include "lp_bld_conv.h"
> #include "lp_bld_logic.h"
> #include "lp_bld_intr.h"
> +#include "lp_bld_printf.h"
>
>
>
> @@ -220,64 +223,104 @@ lp_build_half_to_float(struct gallivm_state *gallivm,
> *
> * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
> * ref https://gist.github.com/2156668
> + *
> + * XXX: This is approximation. It is is faster certain NaNs are converted to
> + * infinity, and rounding is not correct.
I can't quite parse that comment.
> */
> LLVMValueRef
> lp_build_float_to_half(struct gallivm_state *gallivm,
> LLVMValueRef src)
> {
> - struct lp_type i32_type = lp_type_int_vec(32, 32 * LLVMGetVectorSize(LLVMTypeOf(src)));
> -
> LLVMBuilderRef builder = gallivm->builder;
> - LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type);
> -
> - struct lp_build_context bld;
> -
> + LLVMTypeRef f32_vec_type = LLVMTypeOf(src);
> + unsigned length = LLVMGetTypeKind(f32_vec_type) == LLVMVectorTypeKind
> + ? LLVMGetVectorSize(f32_vec_type) : 1;
> + struct lp_type f32_type = lp_type_float_vec(32, 32 * length);
> + struct lp_type u32_type = lp_type_uint_vec(32, 32 * length);
> + struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
> + LLVMTypeRef u32_vec_type = lp_build_vec_type(gallivm, u32_type);
> + LLVMTypeRef i16_vec_type = lp_build_vec_type(gallivm, i16_type);
> + struct lp_build_context f32_bld;
> + struct lp_build_context u32_bld;
> LLVMValueRef result;
>
> - lp_build_context_init(&bld, gallivm, i32_type);
> + lp_build_context_init(&f32_bld, gallivm, f32_type);
> + lp_build_context_init(&u32_bld, gallivm, u32_type);
>
> - /* Extra scope because lp_build_min needs a build context, le sigh */
> {
> /* Constants */
> - LLVMValueRef i32_13 = lp_build_const_int_vec(gallivm, i32_type, 13);
> - LLVMValueRef i32_16 = lp_build_const_int_vec(gallivm, i32_type, 16);
> - LLVMValueRef i32_mask_fabs = lp_build_const_int_vec(gallivm, i32_type, 0x7fffffff);
> - LLVMValueRef i32_f32infty = lp_build_const_int_vec(gallivm, i32_type, 0xff<< 23);
> - LLVMValueRef i32_expinf = lp_build_const_int_vec(gallivm, i32_type, 0xe0<< 23);
> - LLVMValueRef i32_f16max = lp_build_const_int_vec(gallivm, i32_type, 0x8f<< 23);
> - LLVMValueRef i32_magic = lp_build_const_int_vec(gallivm, i32_type, 0x0f<< 23);
> + LLVMValueRef u32_f32inf = lp_build_const_int_vec(gallivm, u32_type, 0xff<< 23);
> + LLVMValueRef u32_expinf = lp_build_const_int_vec(gallivm, u32_type, 0xe0<< 23);
> + LLVMValueRef f32_f16max = lp_build_const_vec(gallivm, f32_type, 65536.0); // 0x8f<< 23
> + LLVMValueRef f32_magic = lp_build_const_vec(gallivm, f32_type, 1.92592994e-34); // 0x0f<< 23
>
> /* Cast from float32 to int32 */
> - LLVMValueRef f = LLVMBuildBitCast(builder, src, int_vec_type, "");
> + LLVMValueRef f = LLVMBuildBitCast(builder, src, u32_vec_type, "");
>
> /* Remove sign */
> - LLVMValueRef fabs = LLVMBuildAnd(builder, i32_mask_fabs, f, "");
> + LLVMValueRef srcabs = lp_build_abs(&f32_bld, src);
> + LLVMValueRef fabs = LLVMBuildBitCast(builder, srcabs, u32_vec_type, "");
>
> /* Magic conversion */
> - LLVMValueRef clamped = lp_build_min(&bld, i32_f16max, fabs);
> - LLVMValueRef scaled = LLVMBuildMul(builder, clamped, i32_magic, "");
> -
> + LLVMValueRef clamped = lp_build_min(&f32_bld, f32_f16max, srcabs);
> + LLVMValueRef scaled = LLVMBuildBitCast(builder,
> + LLVMBuildFMul(builder,
> + clamped,
> + f32_magic,
> + ""),
> + u32_vec_type,
> + "");
> /* Make sure Inf/NaN and unormalised survive */
> - LLVMValueRef infnancase = LLVMBuildXor(builder, i32_expinf, fabs, "");
> - LLVMValueRef b_notnormal = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER, fabs, i32_f32infty);
> + LLVMValueRef infnancase = LLVMBuildXor(builder, u32_expinf, fabs, "");
> + LLVMValueRef b_notnormal = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GEQUAL,
> + srcabs,
> + LLVMBuildBitCast(builder, u32_f32inf, f32_vec_type, ""));
>
> /* Merge normal / unnormal case */
> - LLVMValueRef merge1 = LLVMBuildAnd(builder, infnancase, b_notnormal, "");
> - LLVMValueRef merge2 = LLVMBuildNot(builder, LLVMBuildAnd(builder, b_notnormal, scaled, ""), "");
> - LLVMValueRef merged = LLVMBuildOr(builder, merge1, merge2, "");
> - LLVMValueRef shifted = LLVMBuildLShr(builder, merged, i32_13, "");
> + LLVMValueRef merged = lp_build_select(&u32_bld, b_notnormal, infnancase, scaled);
> + LLVMValueRef shifted = lp_build_shr_imm(&u32_bld, merged, 13);
>
> /* Sign bit */
> LLVMValueRef justsign = LLVMBuildXor(builder, f, fabs, "");
> - LLVMValueRef signshifted = LLVMBuildLShr(builder, justsign, i32_16, "");
> + LLVMValueRef signshifted = lp_build_shr_imm(&u32_bld, justsign, 16);
>
> /* Combine result */
> result = LLVMBuildOr(builder, shifted, signshifted, "");
> }
>
> - /* Truncate from 32 bit to 16 bit */
> - i32_type.width = 16;
> - return LLVMBuildTrunc(builder, result, lp_build_vec_type(gallivm, i32_type), "");
> + result = LLVMBuildTrunc(builder, result, i16_vec_type, "");
> +
The following if (0) block is just debug code, right? It's a little
suspect because it manipulates the 'result' return value. Maybe add a
comment about what's going on.
> + if (0) {
> + LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
> + LLVMTypeRef i16t = LLVMInt16TypeInContext(gallivm->context);
> + LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
> + unsigned i;
> +
> + LLVMTypeRef func_type = LLVMFunctionType(i16t,&f32t, 1, 0);
> + LLVMValueRef func = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer)util_float_to_half));
> + func = LLVMBuildBitCast(builder, func, LLVMPointerType(func_type, 0), "util_float_to_half");
> +
> + lp_build_print_value(gallivm, "src = ", src);
> + lp_build_print_value(gallivm, "util = ", result);
> + lp_build_print_value(gallivm, "llvm = ", result);
> +
> + result = LLVMGetUndef(LLVMVectorType(i16t, length));
> + for (i = 0; i< length; ++i) {
> + LLVMValueRef index = LLVMConstInt(i32t, i, 0);
> + LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, "");
> +#if 0
> + /* XXX: not really supported by backends */
> + LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32);
> +#else
> + LLVMValueRef f16 = LLVMBuildCall(builder, func,&f32, 1, "");
> +#endif
> + result = LLVMBuildInsertElement(builder, result, f16, index, "");
> + }
> +
> + lp_build_printf(gallivm, "\n");
> + }
> +
> + return result;
> }
>
>
I didn't study the code in detail (it's pretty dense), but it looks OK
on the surface. Roland?
Reviewed-by: Brian Paul <brianp at vmware.com>
More information about the mesa-dev
mailing list