[Libreoffice-commits] core.git: sc/source
dante (via logerrit)
logerrit at kemper.freedesktop.org
Mon May 10 10:20:49 UTC 2021
sc/source/core/tool/arraysumSSE2.cxx | 52 ++++++++++++++++++++++++++++++-----
1 file changed, 46 insertions(+), 6 deletions(-)
New commits:
commit a25c9a2925e64f3adb46a749ce18393aa01b1870
Author: dante <dante19031999 at gmail.com>
AuthorDate: Tue May 4 21:09:57 2021 +0200
Commit: Tomaž Vajngerl <quikee at gmail.com>
CommitDate: Mon May 10 12:20:11 2021 +0200
tdf#137679 Use KahanSum for SSE2
Change-Id: I97970cbb7a9562081f9a84b1d81423c80ed7f7f7
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/115113
Tested-by: Jenkins
Reviewed-by: Tomaž Vajngerl <quikee at gmail.com>
diff --git a/sc/source/core/tool/arraysumSSE2.cxx b/sc/source/core/tool/arraysumSSE2.cxx
index 894675335834..e69f672b6014 100644
--- a/sc/source/core/tool/arraysumSSE2.cxx
+++ b/sc/source/core/tool/arraysumSSE2.cxx
@@ -27,25 +27,65 @@ double ArraySumFunctor::executeSSE2(size_t& i, const double* pCurrent) const
__m128d sum3 = _mm_setzero_pd();
__m128d sum4 = _mm_setzero_pd();
+ __m128d err1 = _mm_setzero_pd();
+ __m128d err2 = _mm_setzero_pd();
+ __m128d err3 = _mm_setzero_pd();
+ __m128d err4 = _mm_setzero_pd();
+
+ __m128d y, t;
+
for (; i < nUnrolledSize; i += 8)
{
+ // Kahan sum 1
__m128d load1 = _mm_load_pd(pCurrent);
- sum1 = _mm_add_pd(sum1, load1);
+ y = _mm_sub_pd(load1, err1);
+ t = _mm_add_pd(sum1, y);
+ err1 = _mm_sub_pd(_mm_sub_pd(t, sum1), y);
+ sum1 = t;
pCurrent += 2;
+ // Kahan sum 2
__m128d load2 = _mm_load_pd(pCurrent);
- sum2 = _mm_add_pd(sum2, load2);
+ y = _mm_sub_pd(load2, err2);
+ t = _mm_add_pd(sum2, y);
+ err2 = _mm_sub_pd(_mm_sub_pd(t, sum2), y);
+ sum2 = t;
pCurrent += 2;
+ // Kahan sum 3
__m128d load3 = _mm_load_pd(pCurrent);
- sum3 = _mm_add_pd(sum3, load3);
+ y = _mm_sub_pd(load3, err3);
+ t = _mm_add_pd(sum3, y);
+ err3 = _mm_sub_pd(_mm_sub_pd(t, sum3), y);
+ sum3 = t;
pCurrent += 2;
+ // Kahan sum 4
__m128d load4 = _mm_load_pd(pCurrent);
- sum4 = _mm_add_pd(sum4, load4);
+ y = _mm_sub_pd(load4, err4);
+ t = _mm_add_pd(sum4, y);
+ err4 = _mm_sub_pd(_mm_sub_pd(t, sum4), y);
+ sum4 = t;
pCurrent += 2;
}
- sum1 = _mm_add_pd(_mm_add_pd(sum1, sum2), _mm_add_pd(sum3, sum4));
+
+ // Now we combine pairwise summation with Kahan summation
+
+ // sum 1 + sum 2
+ y = _mm_sub_pd(sum2, err1);
+ t = _mm_add_pd(sum1, y);
+ err1 = _mm_sub_pd(_mm_sub_pd(t, sum1), y);
+ sum1 = t;
+
+ // sum 3 + sum 4
+ y = _mm_sub_pd(sum4, err3);
+ t = _mm_add_pd(sum3, y);
+ sum3 = t;
+
+ // sum 1 + sum 3
+ y = _mm_sub_pd(sum3, err1);
+ t = _mm_add_pd(sum1, y);
+ sum1 = t;
double temp;
@@ -62,4 +102,4 @@ double ArraySumFunctor::executeSSE2(size_t& i, const double* pCurrent) const
return 0.0;
#endif
}
-}
\ No newline at end of file
+}
More information about the Libreoffice-commits
mailing list