[Libreoffice-commits] core.git: Branch 'feature/cpu_intrinsics_support' - vcl/inc vcl/Library_vcl.mk vcl/source
Tomaž Vajngerl
tomaz.vajngerl at collabora.co.uk
Thu Jul 13 20:17:40 UTC 2017
vcl/Library_vcl.mk | 16 ++++++
vcl/inc/ScanlineTools.hxx | 28 +++++++++++
vcl/source/bitmap/ScanlineTools.cxx | 30 ++++++++++++
vcl/source/bitmap/ScanlineToolsAVX2.cxx | 64 ++++++++++++++++++++++++++
vcl/source/bitmap/ScanlineToolsSSE2.cxx | 76 +++++++++++++++++++++++++++++++
vcl/source/bitmap/ScanlineToolsSSSE3.cxx | 61 ++++++++++++++++++++++++
6 files changed, 275 insertions(+)
New commits:
commit 81910410d22c060f9901b129697ea43a25cfbd99
Author: Tomaž Vajngerl <tomaz.vajngerl at collabora.co.uk>
Date: Thu Jul 13 22:02:19 2017 +0200
scanline tools: convert RGBA <-> BGRA + vectorized fast paths
This adds a tool to convert a scanline from RGBA color channel
order to BGRA color channel order and back. It also includes the
vectorized fast path to accelerate it with SSE2 (~1.7x faster),
SSSE3 (~4x faster), AVX2 (~8x faster).
Change-Id: Ic427eed15d3cef40f9ad87220fb6b71770673c92
diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk
index a0113a2e85d4..93981505a600 100644
--- a/vcl/Library_vcl.mk
+++ b/vcl/Library_vcl.mk
@@ -411,6 +411,22 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\
vcl/backendtest/outputdevice/rectangle \
))
+$(eval $(call gb_Library_add_cxxobjects,vcl,\
+ vcl/source/bitmap/ScanlineTools, $(gb_LinkTarget_EXCEPTIONFLAGS) \
+))
+
+$(eval $(call gb_Library_add_cxxobjects,vcl,\
+ vcl/source/bitmap/ScanlineToolsSSE2, $(gb_LinkTarget_EXCEPTIONFLAGS) $(INTRINSICS_CXXFLAGS)\
+))
+
+$(eval $(call gb_Library_add_cxxobjects,vcl,\
+ vcl/source/bitmap/ScanlineToolsSSSE3, $(gb_LinkTarget_EXCEPTIONFLAGS) $(INTRINSICS_CXXFLAGS)\
+))
+
+$(eval $(call gb_Library_add_cxxobjects,vcl,\
+ vcl/source/bitmap/ScanlineToolsAVX2, $(gb_LinkTarget_EXCEPTIONFLAGS) $(INTRINSICS_CXXFLAGS)\
+))
+
$(eval $(call gb_Library_add_cobjects,vcl,\
vcl/source/filter/jpeg/transupp \
))
diff --git a/vcl/inc/ScanlineTools.hxx b/vcl/inc/ScanlineTools.hxx
new file mode 100644
index 000000000000..898019903a15
--- /dev/null
+++ b/vcl/inc/ScanlineTools.hxx
@@ -0,0 +1,28 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#ifndef INCLUDED_VCL_INC_SCANLINETOOLS_HXX
+#define INCLUDED_VCL_INC_SCANLINETOOLS_HXX
+
+#include <vcl/dllapi.h>
+
+namespace vcl {
+namespace scanline {
+
+bool VCL_DLLPUBLIC swapABCDtoCBAD(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize);
+bool VCL_DLLPUBLIC swapABCDtoCBAD_SSE2(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize);
+bool VCL_DLLPUBLIC swapABCDtoCBAD_SSSE3(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize);
+bool VCL_DLLPUBLIC swapABCDtoCBAD_AVX2(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize);
+
+}} // end vcl::scanline
+
+#endif // INCLUDED_VCL_INC_BITMAPSYMMETRYCHECK_HXX
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/bitmap/ScanlineTools.cxx b/vcl/source/bitmap/ScanlineTools.cxx
new file mode 100644
index 000000000000..ceefd0f2c30f
--- /dev/null
+++ b/vcl/source/bitmap/ScanlineTools.cxx
@@ -0,0 +1,30 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include "ScanlineTools.hxx"
+
+namespace vcl {
+namespace scanline {
+
+bool swapABCDtoCBAD(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize)
+{
+ for (sal_Int32 i = 0; i < nScanlineSize; i += 4, pSource += 4, pDestination += 4)
+ {
+ pDestination[0] = pSource[2];
+ pDestination[1] = pSource[1];
+ pDestination[2] = pSource[0];
+ pDestination[3] = pSource[3];
+ }
+ return true;
+}
+
+}} // end vcl::scanline
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/bitmap/ScanlineToolsAVX2.cxx b/vcl/source/bitmap/ScanlineToolsAVX2.cxx
new file mode 100644
index 000000000000..1ef386f8256b
--- /dev/null
+++ b/vcl/source/bitmap/ScanlineToolsAVX2.cxx
@@ -0,0 +1,64 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include "ScanlineTools.hxx"
+
+#include <tools/simdsupport.hxx>
+
+#if defined(LO_AVX2_AVAILABLE)
+#include <immintrin.h>
+#endif
+
+namespace vcl {
+namespace scanline {
+
+#if defined(LO_AVX2_AVAILABLE)
+bool swapABCDtoCBAD_AVX2(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize)
+{
+ __m256i aShuffleMask = _mm256_set_epi8(31, 28, 29, 30, 27, 24, 25, 26,
+ 23, 20, 21, 22, 19, 16, 17, 18,
+ 15, 12, 13, 14, 11, 8, 9, 10,
+ 7, 4, 5, 6, 3, 0, 1, 2);
+
+ sal_Int32 nBlocks = nScanlineSize / 32;
+
+ if (nBlocks > 0)
+ {
+ __m256i* pSource256 = reinterpret_cast<__m256i*>(pSource);
+ __m256i* pDestination256 = reinterpret_cast<__m256i*>(pDestination);
+
+ for (sal_Int32 x = 0; x < nBlocks; ++x, ++pDestination256, ++pSource256)
+ {
+ _mm256_storeu_si256(pDestination256, _mm256_shuffle_epi8(_mm256_loadu_si256(pSource256), aShuffleMask));
+ }
+ }
+
+ pSource += nBlocks * 32;
+ pDestination += nBlocks * 32;
+
+ for (sal_Int32 i = nBlocks * 32; i < nScanlineSize; i += 4, pSource += 4, pDestination += 4)
+ {
+ pDestination[0] = pSource[2];
+ pDestination[1] = pSource[1];
+ pDestination[2] = pSource[0];
+ pDestination[3] = pSource[3];
+ }
+ return true;
+}
+#else
+bool swapABCDtoCBAD_AVX2(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize)
+{
+ return false;
+}
+#endif
+
+}} // end vcl::scanline
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/bitmap/ScanlineToolsSSE2.cxx b/vcl/source/bitmap/ScanlineToolsSSE2.cxx
new file mode 100644
index 000000000000..1f52fb6c5ec7
--- /dev/null
+++ b/vcl/source/bitmap/ScanlineToolsSSE2.cxx
@@ -0,0 +1,76 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include "ScanlineTools.hxx"
+
+#include <tools/simdsupport.hxx>
+
+#if defined(LO_SSE2_AVAILABLE)
+#include <emmintrin.h>
+#endif
+
+namespace vcl {
+namespace scanline {
+
+#if defined(LO_SSE2_AVAILABLE)
+bool swapABCDtoCBAD_SSE2(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize)
+{
+
+ sal_Int32 nBlocks = nScanlineSize / 16;
+
+ if (nBlocks > 0)
+ {
+ __m128i* pSource128 = reinterpret_cast<__m128i*>(pSource);
+ __m128i* pDestination128 = reinterpret_cast<__m128i*>(pDestination);
+
+ __m128i agmask = _mm_set1_epi32(0xFF00FF00);
+
+ for (sal_Int32 x = 0; x < nBlocks; ++x, ++pDestination128, ++pSource128)
+ {
+ // RGBA RGBA RGBA RGBA
+ __m128i rgba = _mm_loadu_si128(pSource128);
+
+ // 0G0A 0G0A 0G0A 0G0A
+ __m128i ag = _mm_and_si128(agmask, rgba);
+ // R0B0 R0B0 R0B0 R0B0
+ __m128i rb = _mm_andnot_si128(agmask, rgba);
+
+ // Swap R and B
+ // B0R0 B0R0 B0R0 B0R0
+ __m128i br = _mm_shufflehi_epi16(_mm_shufflelo_epi16(rb, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));
+
+ // B0R0 B0R0 B0R0 B0R0
+ // or 0G0A 0G0A 0G0A 0G0A
+ _mm_storeu_si128(pDestination128, _mm_or_si128(ag, br));
+ }
+ }
+
+ pSource += nBlocks * 16;
+ pDestination += nBlocks * 16;
+
+ for (sal_Int32 i = nBlocks * 16; i < nScanlineSize; i += 4, pSource += 4, pDestination += 4)
+ {
+ pDestination[0] = pSource[2];
+ pDestination[1] = pSource[1];
+ pDestination[2] = pSource[0];
+ pDestination[3] = pSource[3];
+ }
+ return true;
+}
+#else
+bool swapABCDtoCBAD_SSE2(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize)
+{
+ return false;
+}
+#endif
+
+}} // end vcl::scanline
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/bitmap/ScanlineToolsSSSE3.cxx b/vcl/source/bitmap/ScanlineToolsSSSE3.cxx
new file mode 100644
index 000000000000..bced320b4809
--- /dev/null
+++ b/vcl/source/bitmap/ScanlineToolsSSSE3.cxx
@@ -0,0 +1,61 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include "ScanlineTools.hxx"
+
+#include <tools/simdsupport.hxx>
+
+#if defined(LO_SSSE3_AVAILABLE)
+#include <tmmintrin.h>
+#endif
+
+namespace vcl {
+namespace scanline {
+
+#if defined(LO_SSSE3_AVAILABLE)
+bool swapABCDtoCBAD_SSSE3(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize)
+{
+ __m128i aShuffleMask = _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2);
+
+ sal_Int32 nBlocks = nScanlineSize / 16;
+
+ if (nBlocks > 0)
+ {
+ __m128i* pSource128 = reinterpret_cast<__m128i*>(pSource);
+ __m128i* pDestination128 = reinterpret_cast<__m128i*>(pDestination);
+
+ for (sal_Int32 x = 0; x < nBlocks; ++x, ++pDestination128, ++pSource128)
+ {
+ // _mm_lddqu_si128 - faster unaligned load with ssse3
+ _mm_storeu_si128(pDestination128, _mm_shuffle_epi8(_mm_lddqu_si128(pSource128), aShuffleMask));
+ }
+ }
+
+ pSource += nBlocks * 16;
+ pDestination += nBlocks * 16;
+
+ for (sal_Int32 i = nBlocks * 16; i < nScanlineSize; i += 4, pSource += 4, pDestination += 4)
+ {
+ pDestination[0] = pSource[2];
+ pDestination[1] = pSource[1];
+ pDestination[2] = pSource[0];
+ pDestination[3] = pSource[3];
+ }
+ return true;
+}
+#else
+bool swapABCDtoCBAD_SSSE3(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize)
+{
+ return false;
+}
+#endif
+}} // end vcl::scanline
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
More information about the Libreoffice-commits
mailing list