[Liboil-commit] liboil/sse
David Schleef
ds at kemper.freedesktop.org
Thu Mar 13 13:51:20 PDT 2008
liboil/sse/math_sse.c | 9 ++++-----
liboil/sse/math_sse_unroll2.c | 12 +++++-------
2 files changed, 9 insertions(+), 12 deletions(-)
New commits:
commit f38029458f36490833fe2563f939b103319c7957
Author: David Schleef <ds at ginger.bigkitten.com>
Date: Thu Mar 13 13:53:18 2008 -0700
Fix scalarmultiply_f64_ns SSE code
diff --git a/liboil/sse/math_sse.c b/liboil/sse/math_sse.c
index 70952b4..e5d238d 100644
--- a/liboil/sse/math_sse.c
+++ b/liboil/sse/math_sse.c
@@ -338,7 +338,6 @@ scalarmultiply_f32_ns_sse (float *dest, float *src1, float *val, int n)
}
OIL_DEFINE_IMPL_FULL (scalarmultiply_f32_ns_sse, scalarmultiply_f32_ns, OIL_IMPL_FLAG_SSE);
-#ifdef ENABLE_BROKEN_IMPLS
static void
scalarmultiply_f64_ns_sse2 (double *dest, double *src1, double *val, int n)
{
@@ -349,17 +348,17 @@ scalarmultiply_f64_ns_sse2 (double *dest, double *src1, double *val, int n)
*dest++ = *src1++ * *val;
}
xmm1 = _mm_load_pd1(val);
- for (; n >= 8; n -= 8) {
+ for (; n >= 2; n -= 2) {
__m128d xmm0;
xmm0 = _mm_loadu_pd(src1);
xmm0 = _mm_mul_pd(xmm0, xmm1);
_mm_store_pd(dest, xmm0);
- dest += 8;
- src1 += 8;
+ dest += 2;
+ src1 += 2;
}
for (; n > 0; n--) {
*dest++ = *src1++ * *val;
}
}
OIL_DEFINE_IMPL_FULL (scalarmultiply_f64_ns_sse2, scalarmultiply_f64_ns, OIL_IMPL_FLAG_SSE2);
-#endif
+
diff --git a/liboil/sse/math_sse_unroll2.c b/liboil/sse/math_sse_unroll2.c
index b340031..51dca09 100644
--- a/liboil/sse/math_sse_unroll2.c
+++ b/liboil/sse/math_sse_unroll2.c
@@ -311,7 +311,6 @@ scalarmultiply_f32_ns_sse_unroll2 (float *dest, float *src1, float *val, int n)
}
OIL_DEFINE_IMPL_FULL (scalarmultiply_f32_ns_sse_unroll2, scalarmultiply_f32_ns, OIL_IMPL_FLAG_SSE);
-#ifdef ENABLE_BROKEN_IMPLS
static void
scalarmultiply_f64_ns_sse2_unroll2 (double *dest, double *src1, double *val, int n)
{
@@ -322,21 +321,20 @@ scalarmultiply_f64_ns_sse2_unroll2 (double *dest, double *src1, double *val, int
*dest++ = *src1++ * *val;
}
xmm1 = _mm_load_pd1(val);
- for (; n >= 8; n -= 8) {
+ for (; n >= 4; n -= 4) {
__m128d xmm0;
xmm0 = _mm_loadu_pd(src1);
xmm0 = _mm_mul_pd(xmm0, xmm1);
_mm_store_pd(dest, xmm0);
- xmm0 = _mm_loadu_pd(src1 + 4);
+ xmm0 = _mm_loadu_pd(src1 + 2);
xmm0 = _mm_mul_pd(xmm0, xmm1);
- _mm_store_pd(dest + 4, xmm0);
- dest += 8;
- src1 += 8;
+ _mm_store_pd(dest + 2, xmm0);
+ dest += 4;
+ src1 += 4;
}
for (; n > 0; n--) {
*dest++ = *src1++ * *val;
}
}
OIL_DEFINE_IMPL_FULL (scalarmultiply_f64_ns_sse2_unroll2, scalarmultiply_f64_ns, OIL_IMPL_FLAG_SSE2);
-#endif
More information about the Liboil-commit
mailing list