[Mesa-dev] [PATCH] PowerPC: Altivec IROUND operation
Jose Fonseca
jfonseca at vmware.com
Tue Apr 2 07:45:16 PDT 2013
I don't see need/benefit in mixing "iround" (ie, float -> int) with "round" (ie, float -> float).
If this is a one-off, then you should just call
lp_build_intrinsic_unary(builder, "llvm.ppc.altivec.vctsxs", ...)
If you really need an generic intrinsic helper for iround, then please add a new
lp_build_iround_foo(..., enum lp_build_round_mode mode)
which takes enum lp_build_round_mode
LP_BUILD_ROUND_NEAREST -> iround
LP_BUILD_ROUND_FLOOR -> ifloor
LP_BUILD_ROUND_CEIL -> iceil
LP_BUILD_ROUND_TRUNCATE -> itrunc
Jose
----- Original Message -----
> From: Adhemerval Zanella <azanella at linux.vnet.ibm.com>
>
> This adds another rounding mode to the enum, which happens otherwise to
> match SSE4.1's rounding modes. This should be safe as long as the
> IROUND case never hits the SSE4.1 path.
>
> Reviewed-by: Adam Jackson <ajax at redhat.com>
> Signed-off-by: Adhemerval Zanella <azanella at linux.vnet.ibm.com>
> ---
> src/gallium/auxiliary/gallivm/lp_bld_arit.c | 29
> +++++++++++++++++++----------
> 1 file changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> index ec05026..021cd6e 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> @@ -1360,10 +1360,17 @@ lp_build_int_to_float(struct lp_build_context *bld,
> static boolean
> arch_rounding_available(const struct lp_type type)
> {
> + /* SSE4 vector rounding. */
> if ((util_cpu_caps.has_sse4_1 &&
> (type.length == 1 || type.width*type.length == 128)) ||
> (util_cpu_caps.has_avx && type.width*type.length == 256))
> return TRUE;
> + /* SSE2 vector to word. */
> + else if ((util_cpu_caps.has_sse2 &&
> + ((type.width == 32) && (type.length == 1 || type.length == 4)))
> ||
> + (util_cpu_caps.has_avx && type.width == 32 && type.length == 8))
> + return TRUE;
> + /* Altivec rounding and vector to word. */
> else if ((util_cpu_caps.has_altivec &&
> (type.width == 32 && type.length == 4)))
> return TRUE;
> @@ -1376,7 +1383,8 @@ enum lp_build_round_mode
> LP_BUILD_ROUND_NEAREST = 0,
> LP_BUILD_ROUND_FLOOR = 1,
> LP_BUILD_ROUND_CEIL = 2,
> - LP_BUILD_ROUND_TRUNCATE = 3
> + LP_BUILD_ROUND_TRUNCATE = 3,
> + LP_BUILD_IROUND = 4
> };
>
> /**
> @@ -1400,6 +1408,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
>
> assert(lp_check_value(type, a));
> assert(util_cpu_caps.has_sse4_1);
> + assert(mode != LP_BUILD_IROUND);
>
> if (type.length == 1) {
> LLVMTypeRef vec_type;
> @@ -1526,8 +1535,6 @@ lp_build_iround_nearest_sse2(struct lp_build_context
> *bld,
> }
>
>
> -/*
> - */
> static INLINE LLVMValueRef
> lp_build_round_altivec(struct lp_build_context *bld,
> LLVMValueRef a,
> @@ -1536,8 +1543,10 @@ lp_build_round_altivec(struct lp_build_context *bld,
> LLVMBuilderRef builder = bld->gallivm->builder;
> const struct lp_type type = bld->type;
> const char *intrinsic = NULL;
> + LLVMTypeRef ret_type = bld->vec_type;
>
> assert(type.floating);
> + assert(type.width == 32);
>
> assert(lp_check_value(type, a));
> assert(util_cpu_caps.has_altivec);
> @@ -1555,9 +1564,12 @@ lp_build_round_altivec(struct lp_build_context *bld,
> case LP_BUILD_ROUND_TRUNCATE:
> intrinsic = "llvm.ppc.altivec.vrfiz";
> break;
> + case LP_BUILD_IROUND:
> + ret_type = lp_build_int_vec_type(bld->gallivm, bld->type);
> + intrinsic = "llvm.ppc.altivec.vctsxs";
> }
>
> - return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
> + return lp_build_intrinsic_unary(builder, intrinsic, ret_type, a);
> }
>
> static INLINE LLVMValueRef
> @@ -1565,7 +1577,9 @@ lp_build_round_arch(struct lp_build_context *bld,
> LLVMValueRef a,
> enum lp_build_round_mode mode)
> {
> - if (util_cpu_caps.has_sse4_1)
> + if (util_cpu_caps.has_sse2 && (mode == LP_BUILD_IROUND))
> + return lp_build_iround_nearest_sse2(bld, a);
> + else if (util_cpu_caps.has_sse4_1)
> return lp_build_round_sse41(bld, a, mode);
> else /* (util_cpu_caps.has_altivec) */
> return lp_build_round_altivec(bld, a, mode);
> @@ -1893,11 +1907,6 @@ lp_build_iround(struct lp_build_context *bld,
>
> assert(lp_check_value(type, a));
>
> - if ((util_cpu_caps.has_sse2 &&
> - ((type.width == 32) && (type.length == 1 || type.length == 4))) ||
> - (util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) {
> - return lp_build_iround_nearest_sse2(bld, a);
> - }
> if (arch_rounding_available(type)) {
> res = lp_build_round_arch(bld, a, LP_BUILD_ROUND_NEAREST);
> }
> --
> 1.7.11.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
More information about the mesa-dev
mailing list