[Mesa-dev] [PATCH] gallivm: fix somewhat broken NaN behavior for exp2

sroland at vmware.com sroland at vmware.com
Thu Aug 28 20:14:54 PDT 2014


From: Roland Scheidegger <sroland at vmware.com>

I actually screwed that up in 754319490f6946a9ad5ee619822d5fe4254e6759,
mistakenly thinking the code actually wanted the non-nan result before.
So, introduce that missing nan behavior case and use that instead.
For sse, there's no actual change in the resulting code at all, the fallback
code wouldn't have done the right thing though.
Of course, the actual issue I saw with pow() was completely unrelated...
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c | 28 +++++++++++++++++-----------
 src/gallium/auxiliary/gallivm/lp_bld_arit.h | 10 ++++++++--
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 3d34144..82eebfb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -134,7 +134,8 @@ lp_build_min_simple(struct lp_build_context *bld,
       }
    }
    else if (type.floating && util_cpu_caps.has_altivec) {
-      if (nan_behavior == GALLIVM_NAN_RETURN_NAN) {
+      if (nan_behavior == GALLIVM_NAN_RETURN_NAN ||
+          nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
          debug_printf("%s: altivec doesn't support nan return nan behavior\n",
                       __FUNCTION__);
       }
@@ -202,7 +203,8 @@ lp_build_min_simple(struct lp_build_context *bld,
        */
       if (util_cpu_caps.has_sse && type.floating &&
           nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
-          nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) {
+          nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN &&
+          nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
          LLVMValueRef isnan, max;
          max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
                                                    type,
@@ -241,6 +243,9 @@ lp_build_min_simple(struct lp_build_context *bld,
       case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
          cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b);
          return lp_build_select(bld, cond, a, b);
+      case GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN:
+         cond = lp_build_cmp(bld, PIPE_FUNC_LESS, b, a);
+         return lp_build_select(bld, cond, b, a);
       case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
          cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
          return lp_build_select(bld, cond, a, b);
@@ -310,7 +315,8 @@ lp_build_max_simple(struct lp_build_context *bld,
       }
    }
    else if (type.floating && util_cpu_caps.has_altivec) {
-      if (nan_behavior == GALLIVM_NAN_RETURN_NAN) {
+      if (nan_behavior == GALLIVM_NAN_RETURN_NAN ||
+          nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
          debug_printf("%s: altivec doesn't support nan return nan behavior\n",
                       __FUNCTION__);
       }
@@ -373,7 +379,8 @@ lp_build_max_simple(struct lp_build_context *bld,
    if(intrinsic) {
       if (util_cpu_caps.has_sse && type.floating &&
           nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
-          nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) {
+          nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN &&
+          nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
          LLVMValueRef isnan, min;
          min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
                                                    type,
@@ -412,6 +419,9 @@ lp_build_max_simple(struct lp_build_context *bld,
       case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
          cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b);
          return lp_build_select(bld, cond, a, b);
+      case GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN:
+         cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, b, a);
+         return lp_build_select(bld, cond, b, a);
       case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
          cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
          return lp_build_select(bld, cond, a, b);
@@ -3040,7 +3050,6 @@ lp_build_exp2(struct lp_build_context *bld,
 
    assert(lp_check_value(bld->type, x));
 
-
    /* TODO: optimize the constant case */
    if (gallivm_debug & GALLIVM_DEBUG_PERF &&
        LLVMIsConstant(x)) {
@@ -3053,15 +3062,14 @@ lp_build_exp2(struct lp_build_context *bld,
    /* We want to preserve NaN and make sure than for exp2 if x > 128,
     * the result is INF  and if it's smaller than -126.9 the result is 0 */
    x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type,  128.0), x,
-                        GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
-   x = lp_build_max(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x);
+                        GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN);
+   x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, -126.99999),
+                        x, GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN);
 
    /* ipart = floor(x) */
    /* fpart = x - ipart */
    lp_build_ifloor_fract(bld, x, &ipart, &fpart);
 
-
-
    /* expipart = (float) (1 << ipart) */
    expipart = LLVMBuildAdd(builder, ipart,
                            lp_build_const_int_vec(bld->gallivm, type, 127), "");
@@ -3069,13 +3077,11 @@ lp_build_exp2(struct lp_build_context *bld,
                            lp_build_const_int_vec(bld->gallivm, type, 23), "");
    expipart = LLVMBuildBitCast(builder, expipart, vec_type, "");
 
-
    expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
                                   Elements(lp_build_exp2_polynomial));
 
    res = LLVMBuildFMul(builder, expipart, expfpart, "");
 
-
    return res;
 }
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index 9d29093..e76977c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -138,7 +138,7 @@ lp_build_lerp_3d(struct lp_build_context *bld,
 enum gallivm_nan_behavior {
    /* Results are undefined with NaN. Results in fastest code */
    GALLIVM_NAN_BEHAVIOR_UNDEFINED,
-   /* If input is NaN, NaN is returned */
+   /* If one of the inputs is NaN, NaN is returned */
    GALLIVM_NAN_RETURN_NAN,
    /* If one of the inputs is NaN, the other operand is returned */
    GALLIVM_NAN_RETURN_OTHER,
@@ -146,7 +146,13 @@ enum gallivm_nan_behavior {
     * but we guarantee the second operand is not a NaN.
     * In min/max it will be as fast as undefined with sse opcodes,
     * and archs having native return_other can benefit too. */
-   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN
+   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN,
+   /* If one of the inputs is NaN, NaN is returned,
+    * but we guarantee the first operand is not a NaN.
+    * In min/max it will be as fast as undefined with sse opcodes,
+    * and archs having native return_nan can benefit too. */
+   GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN,
+
 };
 
 LLVMValueRef
-- 
1.9.1


More information about the mesa-dev mailing list