[Mesa-dev] [PATCH 2/3] gallivm: Prefer backend agnostic intrinsic for rounding.

Sun Apr 3 15:20:22 UTC 2016

Am 03.04.2016 um 12:24 schrieb Jose Fonseca:
> We could unconditionally use these instrinsics, but performance with SSE2
> would suck, as LLVM falls back to calling libm.
Would that really work now? Last time I checked, calls out to libm from
the jitted code weren't just slow, they simply crashed.

> 
> lp_test_arit.
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_arit.c | 46 ++++++++++++++++++++++++-----
>  1 file changed, 39 insertions(+), 7 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> index 01c6ba9..1277743 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> @@ -1675,13 +1675,13 @@ enum lp_build_round_mode
>   * result is the even value.  That is, rounding 2.5 will be 2.0, and not 3.0.
>   */
>  static inline LLVMValueRef
> -lp_build_round_sse41(struct lp_build_context *bld,
> -                     LLVMValueRef a,
> -                     enum lp_build_round_mode mode)
> +lp_build_nearest_sse41(struct lp_build_context *bld,
> +                       LLVMValueRef a)
>  {
>     LLVMBuilderRef builder = bld->gallivm->builder;
>     const struct lp_type type = bld->type;
>     LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
> +   LLVMValueRef mode = LLVMConstNull(i32t);
>     const char *intrinsic;
>     LLVMValueRef res;
>  
> @@ -1714,7 +1714,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
>  
>        args[0] = undef;
>        args[1] = LLVMBuildInsertElement(builder, undef, a, index0, "");
> -      args[2] = LLVMConstInt(i32t, mode, 0);
> +      args[2] = mode;
>  
>        res = lp_build_intrinsic(builder, intrinsic,
>                                 vec_type, args, Elements(args), 0);
> @@ -1754,7 +1754,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
>  
>        res = lp_build_intrinsic_binary(builder, intrinsic,
>                                        bld->vec_type, a,
> -                                      LLVMConstInt(i32t, mode, 0));
> +                                      mode);
>     }
>  
>     return res;
> @@ -1856,8 +1856,40 @@ lp_build_round_arch(struct lp_build_context *bld,
>                      LLVMValueRef a,
>                      enum lp_build_round_mode mode)
>  {
> -   if (util_cpu_caps.has_sse4_1)
> -     return lp_build_round_sse41(bld, a, mode);
> +   if (util_cpu_caps.has_sse4_1) {
> +      LLVMBuilderRef builder = bld->gallivm->builder;
> +      const struct lp_type type = bld->type;
> +      const char *intrinsic_root;
> +      char intrinsic[32];
> +
> +      assert(type.floating);
> +      assert(lp_check_value(type, a));
> +      (void)type;
> +
> +      switch (mode) {
> +      case LP_BUILD_ROUND_NEAREST:
> +         if (HAVE_LLVM >= 0x0304) {
> +            intrinsic_root = "llvm.round";
> +         } else {
> +            return lp_build_nearest_sse41(bld, a);
> +         }
> +         break;
> +      case LP_BUILD_ROUND_FLOOR:
> +         intrinsic_root = "llvm.floor";
> +         break;
> +      case LP_BUILD_ROUND_CEIL:
> +         intrinsic_root = "llvm.ceil";
> +         break;
> +      case LP_BUILD_ROUND_TRUNCATE:
> +         intrinsic_root = "llvm.trunc";
> +         break;
> +      }
> +
> +      util_snprintf(intrinsic, sizeof intrinsic, "%s.v%uf%u",
> +                    intrinsic_root, type.length, type.width);
> +
> +      return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
> +   }
>     else /* (util_cpu_caps.has_altivec) */
>       return lp_build_round_altivec(bld, a, mode);
>  }
> 

Looks good to me. I'd guess it would work for altivec too, though it's
better to let someone handle that who can test it :-).
Those were all new in llvm 3.3, too bad they forgot the round...

Reviewed-by: Roland Scheidegger <sroland at vmware.com>