[Mesa-dev] [PATCH] PowerPC: Altivec IROUND operation

Adam Jackson ajax at redhat.com
Mon Apr 1 05:48:43 PDT 2013


From: Adhemerval Zanella <azanella at linux.vnet.ibm.com>

This adds another rounding mode to the enum, which happens otherwise to
match SSE4.1's rounding modes.  This should be safe as long as the
IROUND case never hits the SSE4.1 path.

Reviewed-by: Adam Jackson <ajax at redhat.com>
Signed-off-by: Adhemerval Zanella <azanella at linux.vnet.ibm.com>
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index ec05026..021cd6e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1360,10 +1360,17 @@ lp_build_int_to_float(struct lp_build_context *bld,
 static boolean
 arch_rounding_available(const struct lp_type type)
 {
+   /* SSE4 vector rounding. */
    if ((util_cpu_caps.has_sse4_1 &&
        (type.length == 1 || type.width*type.length == 128)) ||
        (util_cpu_caps.has_avx && type.width*type.length == 256))
       return TRUE;
+   /* SSE2 vector to word. */
+   else if ((util_cpu_caps.has_sse2 &&
+            ((type.width == 32) && (type.length == 1 || type.length == 4))) ||
+            (util_cpu_caps.has_avx && type.width == 32 && type.length == 8))
+      return TRUE;
+   /* Altivec rounding and vector to word. */
    else if ((util_cpu_caps.has_altivec &&
             (type.width == 32 && type.length == 4)))
       return TRUE;
@@ -1376,7 +1383,8 @@ enum lp_build_round_mode
    LP_BUILD_ROUND_NEAREST = 0,
    LP_BUILD_ROUND_FLOOR = 1,
    LP_BUILD_ROUND_CEIL = 2,
-   LP_BUILD_ROUND_TRUNCATE = 3
+   LP_BUILD_ROUND_TRUNCATE = 3,
+   LP_BUILD_IROUND = 4
 };
 
 /**
@@ -1400,6 +1408,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
 
    assert(lp_check_value(type, a));
    assert(util_cpu_caps.has_sse4_1);
+   assert(mode != LP_BUILD_IROUND);
 
    if (type.length == 1) {
       LLVMTypeRef vec_type;
@@ -1526,8 +1535,6 @@ lp_build_iround_nearest_sse2(struct lp_build_context *bld,
 }
 
 
-/*
- */
 static INLINE LLVMValueRef
 lp_build_round_altivec(struct lp_build_context *bld,
                        LLVMValueRef a,
@@ -1536,8 +1543,10 @@ lp_build_round_altivec(struct lp_build_context *bld,
    LLVMBuilderRef builder = bld->gallivm->builder;
    const struct lp_type type = bld->type;
    const char *intrinsic = NULL;
+   LLVMTypeRef ret_type = bld->vec_type;
 
    assert(type.floating);
+   assert(type.width == 32);
 
    assert(lp_check_value(type, a));
    assert(util_cpu_caps.has_altivec);
@@ -1555,9 +1564,12 @@ lp_build_round_altivec(struct lp_build_context *bld,
    case LP_BUILD_ROUND_TRUNCATE:
       intrinsic = "llvm.ppc.altivec.vrfiz";
       break;
+   case LP_BUILD_IROUND:
+      ret_type = lp_build_int_vec_type(bld->gallivm, bld->type);
+      intrinsic = "llvm.ppc.altivec.vctsxs";
    }
 
-   return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
+   return lp_build_intrinsic_unary(builder, intrinsic, ret_type, a);
 }
 
 static INLINE LLVMValueRef
@@ -1565,7 +1577,9 @@ lp_build_round_arch(struct lp_build_context *bld,
                     LLVMValueRef a,
                     enum lp_build_round_mode mode)
 {
-   if (util_cpu_caps.has_sse4_1)
+   if (util_cpu_caps.has_sse2 && (mode == LP_BUILD_IROUND))
+     return lp_build_iround_nearest_sse2(bld, a);
+   else if (util_cpu_caps.has_sse4_1)
      return lp_build_round_sse41(bld, a, mode);
    else /* (util_cpu_caps.has_altivec) */
      return lp_build_round_altivec(bld, a, mode);
@@ -1893,11 +1907,6 @@ lp_build_iround(struct lp_build_context *bld,
 
    assert(lp_check_value(type, a));
 
-   if ((util_cpu_caps.has_sse2 &&
-       ((type.width == 32) && (type.length == 1 || type.length == 4))) ||
-       (util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) {
-      return lp_build_iround_nearest_sse2(bld, a);
-   }
    if (arch_rounding_available(type)) {
       res = lp_build_round_arch(bld, a, LP_BUILD_ROUND_NEAREST);
    }
-- 
1.7.11.4



More information about the mesa-dev mailing list