[Libreoffice-commits] core.git: sw/CppunitTest_sw_tox.mk sw/inc sw/Library_sw.mk sw/Module_sw.mk sw/qa sw/source
Tobias Lippert
drtl at fastmail.fm
Thu Jun 5 09:26:25 PDT 2014
sw/CppunitTest_sw_tox.mk | 50 +++++++
sw/Library_sw.mk | 1
sw/Module_sw.mk | 4
sw/inc/ToxWhitespaceStripper.hxx | 52 +++++++
sw/qa/cppunit/tox/test_ToxWhitespaceStripper.cxx | 150 +++++++++++++++++++++++
sw/source/core/tox/ToxTextGenerator.cxx | 36 -----
sw/source/core/tox/ToxWhitespaceStripper.cxx | 62 +++++++++
7 files changed, 324 insertions(+), 31 deletions(-)
New commits:
commit e5345f62bf525b6258736f1ce11a61b5e638e0ff
Author: Tobias Lippert <drtl at fastmail.fm>
Date: Sun Jun 1 14:15:35 2014 +0200
Unittest the whitespace stripping logic for tox text generation
The separate class allows to unittest this functionality in isoloation.
Change-Id: I1e5eddfb455ca85a662ea38c03302883decc5d58
Reviewed-on: https://gerrit.libreoffice.org/9608
Tested-by: Caolán McNamara <caolanm at redhat.com>
Reviewed-by: Caolán McNamara <caolanm at redhat.com>
diff --git a/sw/CppunitTest_sw_tox.mk b/sw/CppunitTest_sw_tox.mk
new file mode 100644
index 0000000..f372442
--- /dev/null
+++ b/sw/CppunitTest_sw_tox.mk
@@ -0,0 +1,50 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+# This file contains the unit test definition for class in the sources/core/tox subfolder
+# The macro which defines the main method is contained in test_ToxWhitespaceStripper.cxx
+
+$(eval $(call gb_CppunitTest_CppunitTest,sw_tox_test))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,sw_tox_test, \
+ sw/qa/cppunit/tox/test_ToxWhitespaceStripper \
+))
+
+$(eval $(call gb_CppunitTest_use_libraries,sw_tox_test, \
+ comphelper \
+ cppu \
+ cppuhelper \
+ sal \
+ svt \
+ sw \
+ test \
+ unotest \
+ vcl \
+ tl \
+ utl \
+ $(gb_UWINAPI) \
+))
+
+$(eval $(call gb_CppunitTest_use_externals,sw_tox_test, \
+ boost_headers \
+ libxml2 \
+))
+
+$(eval $(call gb_CppunitTest_use_api,sw_tox_test,\
+ offapi \
+ udkapi \
+))
+
+$(eval $(call gb_CppunitTest_set_include,sw_tox_test,\
+ -I$(SRCDIR)/sw/inc \
+ -I$(SRCDIR)/sw/source/core/inc \
+ $$(INCLUDE) \
+))
+
+# vim: set noet sw=4 ts=4:
diff --git a/sw/Library_sw.mk b/sw/Library_sw.mk
index c9804a0..a7500ad 100644
--- a/sw/Library_sw.mk
+++ b/sw/Library_sw.mk
@@ -389,6 +389,7 @@ $(eval $(call gb_Library_add_exception_objects,sw,\
sw/source/core/tox/toxhlp \
sw/source/core/tox/txmsrt \
sw/source/core/tox/ToxTextGenerator \
+ sw/source/core/tox/ToxWhitespaceStripper \
sw/source/core/txtnode/SwGrammarContact \
sw/source/core/txtnode/atrfld \
sw/source/core/txtnode/atrflyin \
diff --git a/sw/Module_sw.mk b/sw/Module_sw.mk
index 53a19bb..7e71e2c 100644
--- a/sw/Module_sw.mk
+++ b/sw/Module_sw.mk
@@ -46,6 +46,10 @@ $(eval $(call gb_Module_add_targets,sw,\
endif
+$(eval $(call gb_Module_add_check_targets,sw,\
+ CppunitTest_sw_tox \
+))
+
$(eval $(call gb_Module_add_slowcheck_targets,sw,\
CppunitTest_sw_uwriter \
CppunitTest_sw_htmlexport \
diff --git a/sw/inc/ToxWhitespaceStripper.hxx b/sw/inc/ToxWhitespaceStripper.hxx
new file mode 100644
index 0000000..96b254b
--- /dev/null
+++ b/sw/inc/ToxWhitespaceStripper.hxx
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+
+#ifndef TOXWHITESPACESTRIPPER_HXX_
+#define TOXWHITESPACESTRIPPER_HXX_
+
+#include "rtl/ustring.hxx"
+
+#include <vector>
+
+namespace sw {
+
+/** This class helps to remove unwanted whitespaces from a string to use in a Tox.
+ *
+ * The new string will have
+ * - Newlines changed to spaces
+ * - Consecutive spaces merged
+ * - Trailing spaces removed
+ *
+ * It also allows to find the corresponding new positions of the input string in the stripped string.
+ * This is important for attributes which might have to be imported, e.g., it helps to answer the question:
+ * The 3rd character of the input string is subscript, which character in the output string is that?
+ *
+ * @note One leading whitespace is preserved.
+ */
+class SAL_DLLPUBLIC ToxWhitespaceStripper {
+public:
+ ToxWhitespaceStripper(const OUString&);
+
+ sal_Int32
+ GetPositionInStrippedString(sal_Int32 pos) const;
+
+ OUString
+ GetStrippedString() const;
+
+private:
+ OUString mStripped;
+ std::vector<sal_Int32> mNewPositions;
+};
+
+} // end namespace sw
+
+
+
+#endif /* TOXWHITESPACESTRIPPER_HXX_ */
diff --git a/sw/qa/cppunit/tox/test_ToxWhitespaceStripper.cxx b/sw/qa/cppunit/tox/test_ToxWhitespaceStripper.cxx
new file mode 100644
index 0000000..fdbd47c
--- /dev/null
+++ b/sw/qa/cppunit/tox/test_ToxWhitespaceStripper.cxx
@@ -0,0 +1,150 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <stdexcept>
+
+#include <sal/types.h>
+
+#include <rtl/ustring.hxx>
+
+#include <ToxWhitespaceStripper.hxx>
+
+#include <cppunit/TestAssert.h>
+#include <cppunit/TestFixture.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/plugin/TestPlugIn.h>
+
+using namespace sw;
+
+class ToxWhitespaceStripperTest : public CppUnit::TestFixture
+{
+ void
+ MappingCharactersToVariousStrippedStringsWorks();
+
+ void
+ StrippingWhitespacesFromVariousStringsWorks();
+
+ void
+ PositionAfterStringCanBeRequested();
+
+ CPPUNIT_TEST_SUITE(ToxWhitespaceStripperTest);
+ CPPUNIT_TEST(MappingCharactersToVariousStrippedStringsWorks);
+ CPPUNIT_TEST(StrippingWhitespacesFromVariousStringsWorks);
+ CPPUNIT_TEST(PositionAfterStringCanBeRequested);
+
+ CPPUNIT_TEST_SUITE_END();
+
+};
+
+void
+ToxWhitespaceStripperTest::MappingCharactersToVariousStrippedStringsWorks()
+{
+ {
+ OUString test("abc\n");
+ ToxWhitespaceStripper sut(test);
+ CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0));
+ CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(1));
+ CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(2));
+ CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(3));
+ }
+ {
+ OUString test("abc\n\n");
+ ToxWhitespaceStripper sut(test);
+ CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0));
+ CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(1));
+ CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(2));
+ CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(3));
+ CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(4));
+ }
+ {
+ OUString test("abc\ndef");
+ ToxWhitespaceStripper sut(test);
+ CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0));
+ CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(1));
+ CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(2));
+ CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(3));
+ CPPUNIT_ASSERT_EQUAL(4, sut.GetPositionInStrippedString(4));
+ CPPUNIT_ASSERT_EQUAL(5, sut.GetPositionInStrippedString(5));
+ CPPUNIT_ASSERT_EQUAL(6, sut.GetPositionInStrippedString(6));
+ }
+ {
+ // 012345 6789
+ OUString test(" abc \ndef");
+ // 01234567
+ // " abc def"
+ ToxWhitespaceStripper sut(test);
+ CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0));
+ CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(1));
+ CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(2));
+ CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(3));
+ CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(4));
+ CPPUNIT_ASSERT_EQUAL(4, sut.GetPositionInStrippedString(5));
+ CPPUNIT_ASSERT_EQUAL(4, sut.GetPositionInStrippedString(6));
+ CPPUNIT_ASSERT_EQUAL(5, sut.GetPositionInStrippedString(7));
+ CPPUNIT_ASSERT_EQUAL(6, sut.GetPositionInStrippedString(8));
+ CPPUNIT_ASSERT_EQUAL(7, sut.GetPositionInStrippedString(9));
+ }
+}
+
+void
+ToxWhitespaceStripperTest::StrippingWhitespacesFromVariousStringsWorks()
+{
+ {
+ OUString test("abc\n");
+ OUString expected("abc");
+ ToxWhitespaceStripper sut(test);
+ CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
+ }
+ {
+ OUString test("abc\n\n");
+ OUString expected("abc");
+ ToxWhitespaceStripper sut(test);
+ CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
+ }
+ {
+ OUString test("abc\ndef");
+ OUString expected("abc def");
+ ToxWhitespaceStripper sut(test);
+ CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
+ }
+ {
+ OUString test(" abc \ndef");
+ OUString expected(" abc def");
+ ToxWhitespaceStripper sut(test);
+ CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
+ }
+ {
+ OUString test(" ");
+ OUString expected("");
+ ToxWhitespaceStripper sut(test);
+ CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
+ }
+ {
+ OUString test("d ");
+ OUString expected("d");
+ ToxWhitespaceStripper sut(test);
+ CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
+ }
+}
+
+void
+ToxWhitespaceStripperTest::PositionAfterStringCanBeRequested()
+{
+ OUString test("abc");
+ ToxWhitespaceStripper sut(test);
+ sal_Int32 expected = test.getLength();
+ CPPUNIT_ASSERT_EQUAL(expected, sut.GetPositionInStrippedString(test.getLength()));
+}
+
+// Put the test suite in the registry
+CPPUNIT_TEST_SUITE_REGISTRATION(ToxWhitespaceStripperTest);
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/core/tox/ToxTextGenerator.cxx b/sw/source/core/tox/ToxTextGenerator.cxx
index 54d7b6a..8554c88 100644
--- a/sw/source/core/tox/ToxTextGenerator.cxx
+++ b/sw/source/core/tox/ToxTextGenerator.cxx
@@ -33,6 +33,7 @@
#include "fmtpdsc.hxx"
#include "DocumentSettingManager.hxx"
#include "SwStyleNameMapper.hxx"
+#include "ToxWhitespaceStripper.hxx"
#include "editeng/tstpitem.hxx"
#include "editeng/lrspitem.hxx"
@@ -49,32 +50,6 @@ struct LinkStruct
nEndTextPos(nEnd) {}
};
-/// Generate String with newlines changed to spaces, consecutive spaces changed
-/// to a single space, and trailing space removed.
-OUString lcl_RemoveLineBreaks(const OUString &rRet)
-{
- if (rRet.isEmpty())
- return rRet;
- sal_Int32 nOffset = 0;
- OUStringBuffer sRet(rRet.replace('\n', ' '));
- for (sal_Int32 i = 1; i < sRet.getLength(); ++i)
- {
- if ( sRet[i - 1] == ' ' && sRet[i] == ' ' )
- {
- nOffset += 1;
- }
- else
- {
- sRet[i - nOffset] = sRet[i];
- }
- }
- if (sRet[sRet.getLength() - 1] == ' ')
- {
- nOffset += 1;
- }
- return sRet.copy(0, sRet.getLength() - nOffset).toString();
-}
-
/// Generate String according to the Form and remove the
/// special characters 0-31 and 255.
static OUString lcl_GetNumString( const SwTOXSortTabBase& rBase, bool bUsePrefix, sal_uInt8 nLevel )
@@ -144,8 +119,8 @@ void ToxTextGenerator::GenerateText(SwDoc* pDoc, const std::vector<SwTOXSortTabB
case TOKEN_ENTRY_TEXT:
{
SwIndex aIdx( pTOXNd, std::min(pTOXNd->GetTxt().getLength(),rTxt.getLength()) );
- rBase.FillText( *pTOXNd, aIdx );
- rTxt = lcl_RemoveLineBreaks(rTxt);
+ ToxWhitespaceStripper stripper(rBase.GetTxt().sText);
+ pTOXNd->InsertText(stripper.GetStrippedString(), aIdx);
}
break;
@@ -153,10 +128,9 @@ void ToxTextGenerator::GenerateText(SwDoc* pDoc, const std::vector<SwTOXSortTabB
{
// for TOC numbering
rTxt += lcl_GetNumString( rBase, true, MAXLEVEL );
-
SwIndex aIdx( pTOXNd, rTxt.getLength() );
- rBase.FillText( *pTOXNd, aIdx );
- rTxt = lcl_RemoveLineBreaks(rTxt);
+ ToxWhitespaceStripper stripper(rBase.GetTxt().sText);
+ pTOXNd->InsertText(stripper.GetStrippedString(), aIdx);
}
break;
diff --git a/sw/source/core/tox/ToxWhitespaceStripper.cxx b/sw/source/core/tox/ToxWhitespaceStripper.cxx
new file mode 100644
index 0000000..b01c92c
--- /dev/null
+++ b/sw/source/core/tox/ToxWhitespaceStripper.cxx
@@ -0,0 +1,62 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "ToxWhitespaceStripper.hxx"
+
+#include "rtl/ustrbuf.hxx"
+#include <boost/numeric/conversion/cast.hpp>
+
+namespace sw {
+
+ToxWhitespaceStripper::ToxWhitespaceStripper(const OUString& inputString)
+{
+ OUStringBuffer buffer;
+
+ bool lastCharacterWasWhitespace = false;
+ for (sal_Int32 pos = 0; pos < inputString.getLength(); ++pos) {
+ sal_Unicode cur = inputString[pos];
+
+ if (cur == ' ' || cur == '\n') {
+ // merge consecutive whitespaces (and translate them to spaces)
+ if (!lastCharacterWasWhitespace) {
+ buffer.append(' ');
+ }
+ lastCharacterWasWhitespace = true;
+ }
+ else {
+ buffer.append(cur);
+ lastCharacterWasWhitespace = false;
+ }
+ mNewPositions.push_back(buffer.getLength()-1);
+ }
+ // Add one position if the position after the stripped string is requested, e.g., for attributes which
+ // extend beyond the string.
+ mNewPositions.push_back(buffer.getLength());
+ // strip the last whitespace (if there was one)
+ if (lastCharacterWasWhitespace) {
+ buffer.truncate(buffer.getLength() - 1);
+ }
+ mStripped = buffer.getStr();
+}
+
+
+sal_Int32
+ToxWhitespaceStripper::GetPositionInStrippedString(sal_Int32 pos) const
+{
+ size_t upos = boost::numeric_cast<size_t>(pos);
+ return mNewPositions.at(upos);
+}
+
+OUString
+ToxWhitespaceStripper::GetStrippedString() const
+{
+ return mStripped;
+}
+
+}
More information about the Libreoffice-commits
mailing list