[Libreoffice-commits] core.git: Branch 'feature/fixes11' - include/tools sc/source tools/Library_tl.mk tools/source
Tomaž Vajngerl
tomaz.vajngerl at collabora.com
Tue Nov 3 01:02:12 PST 2015
include/tools/cpuid.hxx | 28 +++++++
sc/source/core/inc/arraysumfunctor.hxx | 118 +++++++++++++++++++++++++++++++++
sc/source/core/tool/interpr6.cxx | 18 +----
tools/Library_tl.mk | 1
tools/source/misc/cpuid.cxx | 63 +++++++++++++++++
5 files changed, 217 insertions(+), 11 deletions(-)
New commits:
commit e59e6c572f3e7531800b396f7e4ad5f52f98d987
Author: Tomaž Vajngerl <tomaz.vajngerl at collabora.com>
Date: Tue Nov 3 09:55:42 2015 +0100
arraysumfunctor: fast sum a double array, use for SUM() in Calc
This adds an array sum functor which sums a double array in a
as fast as possible way. There are 2 implementations: SSE2 and
a simple unrolled implementation. SSE2 implementation is used if
SSE2 is detected at runtime.
Change-Id: I729203b8af203db6b72ba5151c630544755e9689
diff --git a/include/tools/cpuid.hxx b/include/tools/cpuid.hxx
new file mode 100644
index 0000000..316e656
--- /dev/null
+++ b/include/tools/cpuid.hxx
@@ -0,0 +1,28 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#ifndef INCLUDED_TOOLS_CPUID_HXX
+#define INCLUDED_TOOLS_CPUID_HXX
+
+#include <sal/config.h>
+#include <tools/toolsdllapi.h>
+
+namespace tools
+{
+namespace cpuid
+{
+ TOOLS_DLLPUBLIC bool hasSSE();
+ TOOLS_DLLPUBLIC bool hasSSE2();
+}
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/inc/arraysumfunctor.hxx b/sc/source/core/inc/arraysumfunctor.hxx
new file mode 100644
index 0000000..9e4ce97
--- /dev/null
+++ b/sc/source/core/inc/arraysumfunctor.hxx
@@ -0,0 +1,118 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#ifndef INCLUDED_SC_SOURCE_CORE_INC_ARRAYSUMFUNCTOR_HXX
+#define INCLUDED_SC_SOURCE_CORE_INC_ARRAYSUMFUNCTOR_HXX
+
+#include <emmintrin.h>
+#include <tools/cpuid.hxx>
+
+namespace sc
+{
+
+struct ArraySumFunctor
+{
+private:
+ const double* mpArray;
+ size_t mnSize;
+
+public:
+ ArraySumFunctor(const double* pArray, size_t nSize)
+ : mpArray(pArray)
+ , mnSize(nSize)
+ {
+ }
+
+ double operator() () const
+ {
+ static bool hasSSE2 = tools::cpuid::hasSSE2();
+ printf("SSE used %d\n", hasSSE2);
+
+ double fSum = 0.0;
+ size_t i = 0;
+
+ if (hasSSE2)
+ fSum += executeSSE2(i);
+ else
+ fSum += executeUnrolled(i);
+
+ // sum rest of the array
+
+ for (; i < mnSize; ++i)
+ fSum += mpArray[i];
+
+ return fSum;
+ }
+
+private:
+ inline double executeSSE2(size_t& i) const
+ {
+ double fSum = 0.0;
+ size_t nUnrolledSize = mnSize - (mnSize % 4);
+
+ if (nUnrolledSize > 0)
+ {
+ register __m128d sum1 = _mm_set_pd(0.0, 0.0);
+ register __m128d sum2 = _mm_set_pd(0.0, 0.0);
+
+ const double* pCurrent = mpArray;
+
+ for (; i < nUnrolledSize; i += 4)
+ {
+ sum1 = _mm_add_pd(sum1, _mm_loadu_pd(pCurrent));
+ pCurrent += 2;
+
+ sum2 = _mm_add_pd(sum2, _mm_loadu_pd(pCurrent));
+ pCurrent += 2;
+ }
+ sum1 = _mm_add_pd(sum1, sum2);
+
+ double temp;
+
+ _mm_storel_pd(&temp, sum1);
+ fSum += temp;
+
+ _mm_storeh_pd(&temp, sum1);
+ fSum += temp;
+ }
+ return fSum;
+ }
+
+ inline double executeUnrolled(size_t& i) const
+ {
+ size_t nUnrolledSize = mnSize - (mnSize % 4);
+
+ if (nUnrolledSize > 0)
+ {
+ double sum0 = 0.0;
+ double sum1 = 0.0;
+ double sum2 = 0.0;
+ double sum3 = 0.0;
+
+ const double* pCurrent = mpArray;
+
+ for (; i < nUnrolledSize; i += 4)
+ {
+ sum0 += *pCurrent++;
+ sum1 += *pCurrent++;
+ sum2 += *pCurrent++;
+ sum3 += *pCurrent++;
+ }
+ return sum0 + sum1 + sum2 + sum3;
+ }
+ return 0.0;
+ }
+};
+
+} // end namespace sc
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/interpr6.cxx b/sc/source/core/tool/interpr6.cxx
index 50c0768a..c673fdb 100644
--- a/sc/source/core/tool/interpr6.cxx
+++ b/sc/source/core/tool/interpr6.cxx
@@ -26,8 +26,11 @@
#include "mtvcellfunc.hxx"
#include "scmatrix.hxx"
+#include "arraysumfunctor.hxx"
+
#include <formula/token.hxx>
+
using namespace formula;
double const fHalfMachEps = 0.5 * ::std::numeric_limits<double>::epsilon();
@@ -224,18 +227,11 @@ public:
{
const puncture_mdds_encap *pBlock = static_cast<const puncture_mdds_encap *>(rNode.data);
const double *p = pBlock->getPtr(nOffset);
- size_t i, nUnrolled = (nDataSize & 0x3) >> 2;
- // Try to encourage the compiler/CPU to do something sensible (?)
- for (i = 0; i < nUnrolled; i+=4)
- {
- mfSum += p[i];
- mfSum += p[i+1];
- mfSum += p[i+2];
- mfSum += p[i+3];
- }
- for (; i < nDataSize; ++i)
- mfSum += p[i];
+ sc::ArraySumFunctor functor(p, nDataSize);
+
+ mfSum += functor();
+
break;
}
diff --git a/tools/Library_tl.mk b/tools/Library_tl.mk
index 2d105cd..65ba17c 100644
--- a/tools/Library_tl.mk
+++ b/tools/Library_tl.mk
@@ -69,6 +69,7 @@ $(eval $(call gb_Library_add_exception_objects,tl,\
tools/source/memtools/multisel \
tools/source/memtools/unqidx \
tools/source/misc/appendunixshellword \
+ tools/source/misc/cpuid \
tools/source/misc/extendapplicationenvironment \
tools/source/misc/getprocessworkingdir \
tools/source/misc/solarmutex \
diff --git a/tools/source/misc/cpuid.cxx b/tools/source/misc/cpuid.cxx
new file mode 100644
index 0000000..1d0518c
--- /dev/null
+++ b/tools/source/misc/cpuid.cxx
@@ -0,0 +1,63 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include <tools/cpuid.hxx>
+#include <cstdint>
+
+namespace tools
+{
+namespace cpuid
+{
+
+// First minimize to MSVC / GCC compat. compiler and x86 / x64 architecture
+#if (defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
+
+namespace
+{
+#if defined(_MSC_VER)
+#include <intrin.h>
+static void getCpuId(uint32_t array[4])
+{
+ __cpuid((int*)array, 1);
+}
+#else
+#include <cpuid.h>
+static void getCpuId(uint32_t array[4])
+{
+ __get_cpuid(1, array + 0, array + 1, array + 2, array + 3);
+}
+#endif
+}
+
+bool hasSSE()
+{
+ uint32_t cpuInfoArray[] = {0, 0, 0, 0};
+ getCpuId(cpuInfoArray);
+ return (cpuInfoArray[3] & (1 << 25)) != 0;
+}
+bool hasSSE2()
+{
+ uint32_t cpuInfoArray[] = {0, 0, 0, 0};
+ getCpuId(cpuInfoArray);
+ return (cpuInfoArray[3] & (1 << 26)) != 0;
+}
+
+#else
+
+bool hasSSE() { return false; }
+bool hasSSE2() { return false; }
+
+#endif
+
+}
+}
+
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
More information about the Libreoffice-commits
mailing list