[Libreoffice-commits] core.git: sw/CppunitTest_sw_tox.mk sw/inc sw/Library_sw.mk sw/Module_sw.mk sw/qa sw/source

Tobias Lippert drtl at fastmail.fm
Thu Jun 5 09:26:25 PDT 2014


 sw/CppunitTest_sw_tox.mk                         |   50 +++++++
 sw/Library_sw.mk                                 |    1 
 sw/Module_sw.mk                                  |    4 
 sw/inc/ToxWhitespaceStripper.hxx                 |   52 +++++++
 sw/qa/cppunit/tox/test_ToxWhitespaceStripper.cxx |  150 +++++++++++++++++++++++
 sw/source/core/tox/ToxTextGenerator.cxx          |   36 -----
 sw/source/core/tox/ToxWhitespaceStripper.cxx     |   62 +++++++++
 7 files changed, 324 insertions(+), 31 deletions(-)

New commits:
commit e5345f62bf525b6258736f1ce11a61b5e638e0ff
Author: Tobias Lippert <drtl at fastmail.fm>
Date:   Sun Jun 1 14:15:35 2014 +0200

    Unittest the whitespace stripping logic for tox text generation
    
    The separate class allows to unittest this functionality in isoloation.
    
    Change-Id: I1e5eddfb455ca85a662ea38c03302883decc5d58
    Reviewed-on: https://gerrit.libreoffice.org/9608
    Tested-by: Caolán McNamara <caolanm at redhat.com>
    Reviewed-by: Caolán McNamara <caolanm at redhat.com>

diff --git a/sw/CppunitTest_sw_tox.mk b/sw/CppunitTest_sw_tox.mk
new file mode 100644
index 0000000..f372442
--- /dev/null
+++ b/sw/CppunitTest_sw_tox.mk
@@ -0,0 +1,50 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+# This file contains the unit test definition for class in the sources/core/tox subfolder
+# The macro which defines the main method is contained in test_ToxWhitespaceStripper.cxx
+
+$(eval $(call gb_CppunitTest_CppunitTest,sw_tox_test))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,sw_tox_test, \
+	sw/qa/cppunit/tox/test_ToxWhitespaceStripper \
+))
+
+$(eval $(call gb_CppunitTest_use_libraries,sw_tox_test, \
+	comphelper \
+	cppu \
+	cppuhelper \
+	sal \
+	svt \
+	sw \
+	test \
+	unotest \
+	vcl \
+	tl \
+	utl \
+	$(gb_UWINAPI) \
+))
+
+$(eval $(call gb_CppunitTest_use_externals,sw_tox_test, \
+	boost_headers \
+	libxml2 \
+))
+
+$(eval $(call gb_CppunitTest_use_api,sw_tox_test,\
+	offapi \
+	udkapi \
+))
+
+$(eval $(call gb_CppunitTest_set_include,sw_tox_test,\
+    -I$(SRCDIR)/sw/inc \
+    -I$(SRCDIR)/sw/source/core/inc \
+    $$(INCLUDE) \
+))
+
+# vim: set noet sw=4 ts=4:
diff --git a/sw/Library_sw.mk b/sw/Library_sw.mk
index c9804a0..a7500ad 100644
--- a/sw/Library_sw.mk
+++ b/sw/Library_sw.mk
@@ -389,6 +389,7 @@ $(eval $(call gb_Library_add_exception_objects,sw,\
     sw/source/core/tox/toxhlp \
     sw/source/core/tox/txmsrt \
     sw/source/core/tox/ToxTextGenerator \
+    sw/source/core/tox/ToxWhitespaceStripper \
     sw/source/core/txtnode/SwGrammarContact \
     sw/source/core/txtnode/atrfld \
     sw/source/core/txtnode/atrflyin \
diff --git a/sw/Module_sw.mk b/sw/Module_sw.mk
index 53a19bb..7e71e2c 100644
--- a/sw/Module_sw.mk
+++ b/sw/Module_sw.mk
@@ -46,6 +46,10 @@ $(eval $(call gb_Module_add_targets,sw,\
 
 endif
 
+$(eval $(call gb_Module_add_check_targets,sw,\
+    CppunitTest_sw_tox \
+))
+
 $(eval $(call gb_Module_add_slowcheck_targets,sw,\
     CppunitTest_sw_uwriter \
     CppunitTest_sw_htmlexport \
diff --git a/sw/inc/ToxWhitespaceStripper.hxx b/sw/inc/ToxWhitespaceStripper.hxx
new file mode 100644
index 0000000..96b254b
--- /dev/null
+++ b/sw/inc/ToxWhitespaceStripper.hxx
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+
+#ifndef TOXWHITESPACESTRIPPER_HXX_
+#define TOXWHITESPACESTRIPPER_HXX_
+
+#include "rtl/ustring.hxx"
+
+#include <vector>
+
+namespace sw {
+
+/** This class helps to remove unwanted whitespaces from a string to use in a Tox.
+ *
+ * The new string will have
+ * - Newlines changed to spaces
+ * - Consecutive spaces merged
+ * - Trailing spaces removed
+ *
+ * It also allows to find the corresponding new positions of the input string in the stripped string.
+ * This is important for attributes which might have to be imported, e.g., it helps to answer the question:
+ * The 3rd character of the input string is subscript, which character in the output string is that?
+ *
+ * @note One leading whitespace is preserved.
+ */
+class SAL_DLLPUBLIC ToxWhitespaceStripper {
+public:
+    ToxWhitespaceStripper(const OUString&);
+
+    sal_Int32
+    GetPositionInStrippedString(sal_Int32 pos) const;
+
+    OUString
+    GetStrippedString() const;
+
+private:
+    OUString mStripped;
+    std::vector<sal_Int32> mNewPositions;
+};
+
+} // end namespace sw
+
+
+
+#endif /* TOXWHITESPACESTRIPPER_HXX_ */
diff --git a/sw/qa/cppunit/tox/test_ToxWhitespaceStripper.cxx b/sw/qa/cppunit/tox/test_ToxWhitespaceStripper.cxx
new file mode 100644
index 0000000..fdbd47c
--- /dev/null
+++ b/sw/qa/cppunit/tox/test_ToxWhitespaceStripper.cxx
@@ -0,0 +1,150 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <stdexcept>
+
+#include <sal/types.h>
+
+#include <rtl/ustring.hxx>
+
+#include <ToxWhitespaceStripper.hxx>
+
+#include <cppunit/TestAssert.h>
+#include <cppunit/TestFixture.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/plugin/TestPlugIn.h>
+
+using namespace sw;
+
+class ToxWhitespaceStripperTest : public CppUnit::TestFixture
+{
+    void
+    MappingCharactersToVariousStrippedStringsWorks();
+
+    void
+    StrippingWhitespacesFromVariousStringsWorks();
+
+    void
+    PositionAfterStringCanBeRequested();
+
+    CPPUNIT_TEST_SUITE(ToxWhitespaceStripperTest);
+    CPPUNIT_TEST(MappingCharactersToVariousStrippedStringsWorks);
+    CPPUNIT_TEST(StrippingWhitespacesFromVariousStringsWorks);
+    CPPUNIT_TEST(PositionAfterStringCanBeRequested);
+
+    CPPUNIT_TEST_SUITE_END();
+
+};
+
+void
+ToxWhitespaceStripperTest::MappingCharactersToVariousStrippedStringsWorks()
+{
+    {
+        OUString test("abc\n");
+        ToxWhitespaceStripper sut(test);
+        CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0));
+        CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(1));
+        CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(2));
+        CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(3));
+    }
+    {
+        OUString test("abc\n\n");
+        ToxWhitespaceStripper sut(test);
+        CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0));
+        CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(1));
+        CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(2));
+        CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(3));
+        CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(4));
+    }
+    {
+        OUString test("abc\ndef");
+        ToxWhitespaceStripper sut(test);
+        CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0));
+        CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(1));
+        CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(2));
+        CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(3));
+        CPPUNIT_ASSERT_EQUAL(4, sut.GetPositionInStrippedString(4));
+        CPPUNIT_ASSERT_EQUAL(5, sut.GetPositionInStrippedString(5));
+        CPPUNIT_ASSERT_EQUAL(6, sut.GetPositionInStrippedString(6));
+    }
+    {
+        //             012345 6789
+        OUString test("  abc \ndef");
+        //             01234567
+        //            " abc def"
+        ToxWhitespaceStripper sut(test);
+        CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0));
+        CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(1));
+        CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(2));
+        CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(3));
+        CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(4));
+        CPPUNIT_ASSERT_EQUAL(4, sut.GetPositionInStrippedString(5));
+        CPPUNIT_ASSERT_EQUAL(4, sut.GetPositionInStrippedString(6));
+        CPPUNIT_ASSERT_EQUAL(5, sut.GetPositionInStrippedString(7));
+        CPPUNIT_ASSERT_EQUAL(6, sut.GetPositionInStrippedString(8));
+        CPPUNIT_ASSERT_EQUAL(7, sut.GetPositionInStrippedString(9));
+    }
+}
+
+void
+ToxWhitespaceStripperTest::StrippingWhitespacesFromVariousStringsWorks()
+{
+    {
+        OUString test("abc\n");
+        OUString expected("abc");
+        ToxWhitespaceStripper sut(test);
+        CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
+    }
+    {
+        OUString test("abc\n\n");
+        OUString expected("abc");
+        ToxWhitespaceStripper sut(test);
+        CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
+    }
+    {
+        OUString test("abc\ndef");
+        OUString expected("abc def");
+        ToxWhitespaceStripper sut(test);
+        CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
+    }
+    {
+        OUString test("  abc \ndef");
+        OUString expected(" abc def");
+        ToxWhitespaceStripper sut(test);
+        CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
+    }
+    {
+        OUString test("  ");
+        OUString expected("");
+        ToxWhitespaceStripper sut(test);
+        CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
+    }
+    {
+        OUString test("d  ");
+        OUString expected("d");
+        ToxWhitespaceStripper sut(test);
+        CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
+    }
+}
+
+void
+ToxWhitespaceStripperTest::PositionAfterStringCanBeRequested()
+{
+    OUString test("abc");
+    ToxWhitespaceStripper sut(test);
+    sal_Int32 expected = test.getLength();
+    CPPUNIT_ASSERT_EQUAL(expected, sut.GetPositionInStrippedString(test.getLength()));
+}
+
+// Put the test suite in the registry
+CPPUNIT_TEST_SUITE_REGISTRATION(ToxWhitespaceStripperTest);
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/core/tox/ToxTextGenerator.cxx b/sw/source/core/tox/ToxTextGenerator.cxx
index 54d7b6a..8554c88 100644
--- a/sw/source/core/tox/ToxTextGenerator.cxx
+++ b/sw/source/core/tox/ToxTextGenerator.cxx
@@ -33,6 +33,7 @@
 #include "fmtpdsc.hxx"
 #include "DocumentSettingManager.hxx"
 #include "SwStyleNameMapper.hxx"
+#include "ToxWhitespaceStripper.hxx"
 
 #include "editeng/tstpitem.hxx"
 #include "editeng/lrspitem.hxx"
@@ -49,32 +50,6 @@ struct LinkStruct
         nEndTextPos(nEnd) {}
 };
 
-/// Generate String with newlines changed to spaces, consecutive spaces changed
-/// to a single space, and trailing space removed.
-OUString lcl_RemoveLineBreaks(const OUString &rRet)
-{
-    if (rRet.isEmpty())
-        return rRet;
-    sal_Int32 nOffset = 0;
-    OUStringBuffer sRet(rRet.replace('\n', ' '));
-    for (sal_Int32 i = 1; i < sRet.getLength(); ++i)
-    {
-        if ( sRet[i - 1] == ' ' && sRet[i] == ' ' )
-        {
-            nOffset += 1;
-        }
-        else
-        {
-            sRet[i - nOffset] = sRet[i];
-        }
-    }
-    if (sRet[sRet.getLength() - 1] == ' ')
-    {
-        nOffset += 1;
-    }
-    return sRet.copy(0, sRet.getLength() - nOffset).toString();
-}
-
 /// Generate String according to the Form and remove the
 /// special characters 0-31 and 255.
 static OUString lcl_GetNumString( const SwTOXSortTabBase& rBase, bool bUsePrefix, sal_uInt8 nLevel )
@@ -144,8 +119,8 @@ void ToxTextGenerator::GenerateText(SwDoc* pDoc, const std::vector<SwTOXSortTabB
             case TOKEN_ENTRY_TEXT:
                 {
                     SwIndex aIdx( pTOXNd, std::min(pTOXNd->GetTxt().getLength(),rTxt.getLength()) );
-                    rBase.FillText( *pTOXNd, aIdx );
-                    rTxt = lcl_RemoveLineBreaks(rTxt);
+                    ToxWhitespaceStripper stripper(rBase.GetTxt().sText);
+                    pTOXNd->InsertText(stripper.GetStrippedString(), aIdx);
                 }
                 break;
 
@@ -153,10 +128,9 @@ void ToxTextGenerator::GenerateText(SwDoc* pDoc, const std::vector<SwTOXSortTabB
                 {
                     // for TOC numbering
                     rTxt += lcl_GetNumString( rBase, true, MAXLEVEL );
-
                     SwIndex aIdx( pTOXNd, rTxt.getLength() );
-                    rBase.FillText( *pTOXNd, aIdx );
-                    rTxt = lcl_RemoveLineBreaks(rTxt);
+                    ToxWhitespaceStripper stripper(rBase.GetTxt().sText);
+                    pTOXNd->InsertText(stripper.GetStrippedString(), aIdx);
                 }
                 break;
 
diff --git a/sw/source/core/tox/ToxWhitespaceStripper.cxx b/sw/source/core/tox/ToxWhitespaceStripper.cxx
new file mode 100644
index 0000000..b01c92c
--- /dev/null
+++ b/sw/source/core/tox/ToxWhitespaceStripper.cxx
@@ -0,0 +1,62 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "ToxWhitespaceStripper.hxx"
+
+#include "rtl/ustrbuf.hxx"
+#include <boost/numeric/conversion/cast.hpp>
+
+namespace sw {
+
+ToxWhitespaceStripper::ToxWhitespaceStripper(const OUString& inputString)
+{
+    OUStringBuffer buffer;
+
+    bool lastCharacterWasWhitespace = false;
+    for (sal_Int32 pos = 0; pos < inputString.getLength(); ++pos) {
+        sal_Unicode cur = inputString[pos];
+
+        if (cur == ' ' || cur == '\n') {
+            // merge consecutive whitespaces (and translate them to spaces)
+            if (!lastCharacterWasWhitespace) {
+                buffer.append(' ');
+            }
+            lastCharacterWasWhitespace = true;
+        }
+        else {
+            buffer.append(cur);
+            lastCharacterWasWhitespace = false;
+        }
+        mNewPositions.push_back(buffer.getLength()-1);
+    }
+    // Add one position if the position after the stripped string is requested, e.g., for attributes which
+    // extend beyond the string.
+    mNewPositions.push_back(buffer.getLength());
+    // strip the last whitespace (if there was one)
+    if (lastCharacterWasWhitespace) {
+        buffer.truncate(buffer.getLength() - 1);
+    }
+    mStripped = buffer.getStr();
+}
+
+
+sal_Int32
+ToxWhitespaceStripper::GetPositionInStrippedString(sal_Int32 pos) const
+{
+    size_t upos = boost::numeric_cast<size_t>(pos);
+    return mNewPositions.at(upos);
+}
+
+OUString
+ToxWhitespaceStripper::GetStrippedString() const
+{
+    return mStripped;
+}
+
+}


More information about the Libreoffice-commits mailing list