[Liboil-commit] liboil/sse

David Schleef ds at kemper.freedesktop.org
Thu Mar 13 13:51:20 PDT 2008


 liboil/sse/math_sse.c         |    9 ++++-----
 liboil/sse/math_sse_unroll2.c |   12 +++++-------
 2 files changed, 9 insertions(+), 12 deletions(-)

New commits:
commit f38029458f36490833fe2563f939b103319c7957
Author: David Schleef <ds at ginger.bigkitten.com>
Date:   Thu Mar 13 13:53:18 2008 -0700

    Fix scalarmultiply_f64_ns SSE code

diff --git a/liboil/sse/math_sse.c b/liboil/sse/math_sse.c
index 70952b4..e5d238d 100644
--- a/liboil/sse/math_sse.c
+++ b/liboil/sse/math_sse.c
@@ -338,7 +338,6 @@ scalarmultiply_f32_ns_sse (float *dest, float *src1, float *val, int n)
 }
 OIL_DEFINE_IMPL_FULL (scalarmultiply_f32_ns_sse, scalarmultiply_f32_ns, OIL_IMPL_FLAG_SSE);
 
-#ifdef ENABLE_BROKEN_IMPLS
 static void
 scalarmultiply_f64_ns_sse2 (double *dest, double *src1, double *val, int n)
 {
@@ -349,17 +348,17 @@ scalarmultiply_f64_ns_sse2 (double *dest, double *src1, double *val, int n)
     *dest++ = *src1++ * *val;
   }
   xmm1 = _mm_load_pd1(val);
-  for (; n >= 8; n -= 8) {
+  for (; n >= 2; n -= 2) {
     __m128d xmm0;
     xmm0 = _mm_loadu_pd(src1);
     xmm0 = _mm_mul_pd(xmm0, xmm1);
     _mm_store_pd(dest, xmm0);
-    dest += 8;
-    src1 += 8;
+    dest += 2;
+    src1 += 2;
   }
   for (; n > 0; n--) {
     *dest++ = *src1++ * *val;
   }
 }
 OIL_DEFINE_IMPL_FULL (scalarmultiply_f64_ns_sse2, scalarmultiply_f64_ns, OIL_IMPL_FLAG_SSE2);
-#endif
+
diff --git a/liboil/sse/math_sse_unroll2.c b/liboil/sse/math_sse_unroll2.c
index b340031..51dca09 100644
--- a/liboil/sse/math_sse_unroll2.c
+++ b/liboil/sse/math_sse_unroll2.c
@@ -311,7 +311,6 @@ scalarmultiply_f32_ns_sse_unroll2 (float *dest, float *src1, float *val, int n)
 }
 OIL_DEFINE_IMPL_FULL (scalarmultiply_f32_ns_sse_unroll2, scalarmultiply_f32_ns, OIL_IMPL_FLAG_SSE);
 
-#ifdef ENABLE_BROKEN_IMPLS
 static void
 scalarmultiply_f64_ns_sse2_unroll2 (double *dest, double *src1, double *val, int n)
 {
@@ -322,21 +321,20 @@ scalarmultiply_f64_ns_sse2_unroll2 (double *dest, double *src1, double *val, int
     *dest++ = *src1++ * *val;
   }
   xmm1 = _mm_load_pd1(val);
-  for (; n >= 8; n -= 8) {
+  for (; n >= 4; n -= 4) {
     __m128d xmm0;
     xmm0 = _mm_loadu_pd(src1);
     xmm0 = _mm_mul_pd(xmm0, xmm1);
     _mm_store_pd(dest, xmm0);
-    xmm0 = _mm_loadu_pd(src1 + 4);
+    xmm0 = _mm_loadu_pd(src1 + 2);
     xmm0 = _mm_mul_pd(xmm0, xmm1);
-    _mm_store_pd(dest + 4, xmm0);
-    dest += 8;
-    src1 += 8;
+    _mm_store_pd(dest + 2, xmm0);
+    dest += 4;
+    src1 += 4;
   }
   for (; n > 0; n--) {
     *dest++ = *src1++ * *val;
   }
 }
 OIL_DEFINE_IMPL_FULL (scalarmultiply_f64_ns_sse2_unroll2, scalarmultiply_f64_ns, OIL_IMPL_FLAG_SSE2);
-#endif
 


More information about the Liboil-commit mailing list