[Mesa-dev] [PATCH] gallivm, llvmpipe: fix float->srgb conversion to handle NaNs
Jose Fonseca
jfonseca at vmware.com
Wed Nov 13 12:21:24 PST 2013
The introduction and use of lp_build_clamp_zero_one_nanzero looks good.
I can't comment on the changes to the existing paths though, as they are too subtle for me. As long there are no regressions I'm good.
Jose
----- Original Message -----
> From: Roland Scheidegger <sroland at vmware.com>
>
> d3d10 requires us to convert NaNs to zero for any float->int conversion.
> We don't really do that but mostly seems to work. In particular I suspect the
> very common float->unorm8 path only really passes because it relies on sse2
> pack intrinsics which just happen to work by luck for NaNs (float->int
> conversion in hw gives integer indeterminate value, which just happens to be
> -0x80000000 hence gets converted to zero in the end after pack intrinsics).
> However, float->srgb didn't get so lucky, because we need to clamp before
> blending and clamping resulted in NaN behavior being undefined (and actually
> got converted to 1.0 by clamping with sse2). Fix this by using a zero/one
> clamp
> with defined nan behavior as we can handle the NaN for free this way.
> I suspect there's more bugs lurking in this area (e.g. converting floats to
> snorm) as we don't really use defined NaN behavior everywhere but this seems
> to be good enough.
> While here respecify nan behavior modes a bit, in particular the
> return_second
> mode didn't really do what we wanted. From the caller's perspective, we
> really
> wanted to say we need the non-nan result, but we already know the second arg
> isn't a NaN. So we use this now instead, which means that cpu architectures
> which actually implement min/max by always returning non-nan (that is
> adhering
> to ieee754-2008 rules) don't need to bend over backwards for nothing.
> ---
> src/gallium/auxiliary/gallivm/lp_bld_arit.c | 44
> +++++++++++++-------
> src/gallium/auxiliary/gallivm/lp_bld_arit.h | 12 ++++--
> src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c | 2 +-
> src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 11 ++---
> src/gallium/drivers/llvmpipe/lp_state_fs.c | 4 +-
> 5 files changed, 45 insertions(+), 28 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> index 00052ed..70929e7 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> @@ -123,8 +123,10 @@ lp_build_min_simple(struct lp_build_context *bld,
> }
> }
> else if (type.floating && util_cpu_caps.has_altivec) {
> - debug_printf("%s: altivec doesn't support nan behavior modes\n",
> - __FUNCTION__);
> + if (nan_behavior == GALLIVM_NAN_RETURN_NAN) {
> + debug_printf("%s: altivec doesn't support nan return nan
> behavior\n",
> + __FUNCTION__);
> + }
> if (type.width == 32 && type.length == 4) {
> intrinsic = "llvm.ppc.altivec.vminfp";
> intr_size = 128;
> @@ -159,8 +161,6 @@ lp_build_min_simple(struct lp_build_context *bld,
> }
> } else if (util_cpu_caps.has_altivec) {
> intr_size = 128;
> - debug_printf("%s: altivec doesn't support nan behavior modes\n",
> - __FUNCTION__);
> if (type.width == 8) {
> if (!type.sign) {
> intrinsic = "llvm.ppc.altivec.vminub";
> @@ -191,7 +191,7 @@ lp_build_min_simple(struct lp_build_context *bld,
> */
> if (util_cpu_caps.has_sse && type.floating &&
> nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
> - nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
> + nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) {
> LLVMValueRef isnan, max;
> max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
> type,
> @@ -227,7 +227,7 @@ lp_build_min_simple(struct lp_build_context *bld,
> return lp_build_select(bld, cond, a, b);
> }
> break;
> - case GALLIVM_NAN_RETURN_SECOND:
> + case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
> cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b);
> return lp_build_select(bld, cond, a, b);
> case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
> @@ -299,8 +299,10 @@ lp_build_max_simple(struct lp_build_context *bld,
> }
> }
> else if (type.floating && util_cpu_caps.has_altivec) {
> - debug_printf("%s: altivec doesn't support nan behavior modes\n",
> - __FUNCTION__);
> + if (nan_behavior == GALLIVM_NAN_RETURN_NAN) {
> + debug_printf("%s: altivec doesn't support nan return nan
> behavior\n",
> + __FUNCTION__);
> + }
> if (type.width == 32 || type.length == 4) {
> intrinsic = "llvm.ppc.altivec.vmaxfp";
> intr_size = 128;
> @@ -336,8 +338,6 @@ lp_build_max_simple(struct lp_build_context *bld,
> }
> } else if (util_cpu_caps.has_altivec) {
> intr_size = 128;
> - debug_printf("%s: altivec doesn't support nan behavior modes\n",
> - __FUNCTION__);
> if (type.width == 8) {
> if (!type.sign) {
> intrinsic = "llvm.ppc.altivec.vmaxub";
> @@ -362,7 +362,7 @@ lp_build_max_simple(struct lp_build_context *bld,
> if(intrinsic) {
> if (util_cpu_caps.has_sse && type.floating &&
> nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
> - nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
> + nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) {
> LLVMValueRef isnan, min;
> min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
> type,
> @@ -398,7 +398,7 @@ lp_build_max_simple(struct lp_build_context *bld,
> return lp_build_select(bld, cond, a, b);
> }
> break;
> - case GALLIVM_NAN_RETURN_SECOND:
> + case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
> cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b);
> return lp_build_select(bld, cond, a, b);
> case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
> @@ -1399,6 +1399,7 @@ lp_build_max_ext(struct lp_build_context *bld,
>
> /**
> * Generate clamp(a, min, max)
> + * NaN behavior (for any of a, min, max) is undefined.
> * Do checks for special cases.
> */
> LLVMValueRef
> @@ -1418,6 +1419,20 @@ lp_build_clamp(struct lp_build_context *bld,
>
>
> /**
> + * Generate clamp(a, 0, 1)
> + * A NaN will get converted to zero.
> + */
> +LLVMValueRef
> +lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld,
> + LLVMValueRef a)
> +{
> + a = lp_build_max_ext(bld, a, bld->zero,
> GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
> + a = lp_build_min(bld, a, bld->one);
> + return a;
> +}
> +
> +
> +/**
> * Generate abs(a)
> */
> LLVMValueRef
> @@ -3029,9 +3044,8 @@ lp_build_exp2(struct lp_build_context *bld,
> /* We want to preserve NaN and make sure than for exp2 if x > 128,
> * the result is INF and if it's smaller than -126.9 the result is 0 */
> x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type, 128.0),
> x,
> - GALLIVM_NAN_RETURN_SECOND);
> - x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type,
> -126.99999), x,
> - GALLIVM_NAN_RETURN_SECOND);
> + GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
> + x = lp_build_max(bld, lp_build_const_vec(bld->gallivm, type, -126.99999),
> x);
>
> /* ipart = floor(x) */
> /* fpart = x - ipart */
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> index 49d4e2c..75bf89e 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> @@ -142,9 +142,11 @@ enum gallivm_nan_behavior {
> GALLIVM_NAN_RETURN_NAN,
> /* If one of the inputs is NaN, the other operand is returned */
> GALLIVM_NAN_RETURN_OTHER,
> - /* If one of the inputs is NaN, the second operand is returned.
> - * In min/max it will be as fast as undefined with sse opcodes */
> - GALLIVM_NAN_RETURN_SECOND
> + /* If one of the inputs is NaN, the other operand is returned,
> + * but we guarantee the second operand is not a NaN.
> + * In min/max it will be as fast as undefined with sse opcodes,
> + * and archs having native return_other can benefit too. */
> + GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN
> };
>
> LLVMValueRef
> @@ -176,6 +178,10 @@ lp_build_clamp(struct lp_build_context *bld,
> LLVMValueRef max);
>
> LLVMValueRef
> +lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld,
> + LLVMValueRef a);
> +
> +LLVMValueRef
> lp_build_abs(struct lp_build_context *bld,
> LLVMValueRef a);
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
> b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
> index 2b1fe64..6645151 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
> @@ -326,7 +326,7 @@ lp_build_float_to_srgb_packed(struct gallivm_state
> *gallivm,
> * can't use lp_build_conv since we want to keep values as 32bit
> * here so we can interleave with rgb to go from SoA->AoS.
> */
> - alpha = lp_build_clamp(&f32_bld, src[3], f32_bld.zero, f32_bld.one);
> + alpha = lp_build_clamp_zero_one_nanzero(&f32_bld, src[3]);
> alpha = lp_build_mul(&f32_bld, alpha,
> lp_build_const_vec(gallivm, src_type, 255.0f));
> tmpsrgb[3] = lp_build_iround(&f32_bld, alpha);
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 5f81066..5fc47ed 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -1384,21 +1384,18 @@ emit_store_chan(
> assert(dtype == TGSI_TYPE_FLOAT ||
> dtype == TGSI_TYPE_UNTYPED);
> value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
> - value = lp_build_max_ext(float_bld, value, float_bld->zero,
> - GALLIVM_NAN_RETURN_SECOND);
> - value = lp_build_min_ext(float_bld, value, float_bld->one,
> - GALLIVM_NAN_BEHAVIOR_UNDEFINED);
> + value = lp_build_clamp_zero_one_nanzero(float_bld, value);
> break;
>
> case TGSI_SAT_MINUS_PLUS_ONE:
> assert(dtype == TGSI_TYPE_FLOAT ||
> dtype == TGSI_TYPE_UNTYPED);
> value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
> + /* This will give -1.0 for NaN which is probably not what we want. */
> value = lp_build_max_ext(float_bld, value,
> lp_build_const_vec(gallivm, float_bld->type,
> -1.0),
> - GALLIVM_NAN_RETURN_SECOND);
> - value = lp_build_min_ext(float_bld, value, float_bld->one,
> - GALLIVM_NAN_BEHAVIOR_UNDEFINED);
> + GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
> + value = lp_build_min(float_bld, value, float_bld->one);
> break;
>
> default:
> diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c
> b/src/gallium/drivers/llvmpipe/lp_state_fs.c
> index 8223d2a..b5816e0 100644
> --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
> +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
> @@ -1760,11 +1760,11 @@ generate_unswizzled_blend(struct gallivm_state
> *gallivm,
> assert(row_type.floating);
> lp_build_context_init(&f32_bld, gallivm, row_type);
> for (i = 0; i < src_count; i++) {
> - src[i] = lp_build_clamp(&f32_bld, src[i], f32_bld.zero,
> f32_bld.one);
> + src[i] = lp_build_clamp_zero_one_nanzero(&f32_bld, src[i]);
> }
> if (dual_source_blend) {
> for (i = 0; i < src_count; i++) {
> - src1[i] = lp_build_clamp(&f32_bld, src1[i], f32_bld.zero,
> f32_bld.one);
> + src1[i] = lp_build_clamp_zero_one_nanzero(&f32_bld, src1[i]);
> }
> }
> /* probably can't be different than row_type but better safe than
> sorry... */
> --
> 1.7.9.5
>
More information about the mesa-dev
mailing list