[Libreoffice-commits] core.git: sc/source

dante (via logerrit) logerrit at kemper.freedesktop.org
Mon May 10 10:20:49 UTC 2021


 sc/source/core/tool/arraysumSSE2.cxx |   52 ++++++++++++++++++++++++++++++-----
 1 file changed, 46 insertions(+), 6 deletions(-)

New commits:
commit a25c9a2925e64f3adb46a749ce18393aa01b1870
Author:     dante <dante19031999 at gmail.com>
AuthorDate: Tue May 4 21:09:57 2021 +0200
Commit:     Tomaž Vajngerl <quikee at gmail.com>
CommitDate: Mon May 10 12:20:11 2021 +0200

    tdf#137679 Use KahanSum for SSE2
    
    Change-Id: I97970cbb7a9562081f9a84b1d81423c80ed7f7f7
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/115113
    Tested-by: Jenkins
    Reviewed-by: Tomaž Vajngerl <quikee at gmail.com>

diff --git a/sc/source/core/tool/arraysumSSE2.cxx b/sc/source/core/tool/arraysumSSE2.cxx
index 894675335834..e69f672b6014 100644
--- a/sc/source/core/tool/arraysumSSE2.cxx
+++ b/sc/source/core/tool/arraysumSSE2.cxx
@@ -27,25 +27,65 @@ double ArraySumFunctor::executeSSE2(size_t& i, const double* pCurrent) const
         __m128d sum3 = _mm_setzero_pd();
         __m128d sum4 = _mm_setzero_pd();
 
+        __m128d err1 = _mm_setzero_pd();
+        __m128d err2 = _mm_setzero_pd();
+        __m128d err3 = _mm_setzero_pd();
+        __m128d err4 = _mm_setzero_pd();
+
+        __m128d y, t;
+
         for (; i < nUnrolledSize; i += 8)
         {
+            // Kahan sum 1
             __m128d load1 = _mm_load_pd(pCurrent);
-            sum1 = _mm_add_pd(sum1, load1);
+            y = _mm_sub_pd(load1, err1);
+            t = _mm_add_pd(sum1, y);
+            err1 = _mm_sub_pd(_mm_sub_pd(t, sum1), y);
+            sum1 = t;
             pCurrent += 2;
 
+            // Kahan sum 2
             __m128d load2 = _mm_load_pd(pCurrent);
-            sum2 = _mm_add_pd(sum2, load2);
+            y = _mm_sub_pd(load2, err2);
+            t = _mm_add_pd(sum2, y);
+            err2 = _mm_sub_pd(_mm_sub_pd(t, sum2), y);
+            sum2 = t;
             pCurrent += 2;
 
+            // Kahan sum 3
             __m128d load3 = _mm_load_pd(pCurrent);
-            sum3 = _mm_add_pd(sum3, load3);
+            y = _mm_sub_pd(load3, err3);
+            t = _mm_add_pd(sum3, y);
+            err3 = _mm_sub_pd(_mm_sub_pd(t, sum3), y);
+            sum3 = t;
             pCurrent += 2;
 
+            // Kahan sum 4
             __m128d load4 = _mm_load_pd(pCurrent);
-            sum4 = _mm_add_pd(sum4, load4);
+            y = _mm_sub_pd(load4, err4);
+            t = _mm_add_pd(sum4, y);
+            err4 = _mm_sub_pd(_mm_sub_pd(t, sum4), y);
+            sum4 = t;
             pCurrent += 2;
         }
-        sum1 = _mm_add_pd(_mm_add_pd(sum1, sum2), _mm_add_pd(sum3, sum4));
+
+        // Now we combine pairwise summation with Kahan summation
+
+        // sum 1 + sum 2
+        y = _mm_sub_pd(sum2, err1);
+        t = _mm_add_pd(sum1, y);
+        err1 = _mm_sub_pd(_mm_sub_pd(t, sum1), y);
+        sum1 = t;
+
+        // sum 3 + sum 4
+        y = _mm_sub_pd(sum4, err3);
+        t = _mm_add_pd(sum3, y);
+        sum3 = t;
+
+        // sum 1 + sum 3
+        y = _mm_sub_pd(sum3, err1);
+        t = _mm_add_pd(sum1, y);
+        sum1 = t;
 
         double temp;
 
@@ -62,4 +102,4 @@ double ArraySumFunctor::executeSSE2(size_t& i, const double* pCurrent) const
     return 0.0;
 #endif
 }
-}
\ No newline at end of file
+}


More information about the Libreoffice-commits mailing list