[Libreoffice-commits] core.git: Branch 'distro/collabora/cp-5.3' - sw/qa writerfilter/source

Mike Kaganski mike.kaganski at collabora.com
Wed Nov 15 11:31:21 UTC 2017


 sw/qa/extras/ooxmlexport/data/tdf111964.docx          |binary
 sw/qa/extras/ooxmlexport/ooxmlexport9.cxx             |   10 ++++++
 writerfilter/source/ooxml/OOXMLFastContextHandler.cxx |   28 +++++++++++++++++-
 3 files changed, 37 insertions(+), 1 deletion(-)

New commits:
commit 1abb4d3469b4ca982602a199578b929cf02d3cdc
Author: Mike Kaganski <mike.kaganski at collabora.com>
Date:   Wed Aug 23 09:09:57 2017 +0300

    tdf#111964: only trim XML whitespace
    
    OUString::trim() uses rtl_uString_newTrim, which relies upon
    rtl_ImplIsWhitespace. The latter treats as whitespaces not only
    characters with values less than or equal to 32, but also Unicode
    General Punctuation area Space and some Control characters. Thus,
    using OUString::trim() is incorrect when the goal is to trim XML
    whitespace, which is defined as one of 0x09, 0x0A, 0x0D, 0x20.
    
    A unit test included.
    
    Change-Id: I45a132be923a52dcd5a4c35aeecb53d423b49fec
    Reviewed-on: https://gerrit.libreoffice.org/41444
    Reviewed-by: Mike Kaganski <mike.kaganski at collabora.com>
    Tested-by: Mike Kaganski <mike.kaganski at collabora.com>
    Reviewed-on: https://gerrit.libreoffice.org/44746
    Reviewed-by: Aron Budea <aron.budea at collabora.com>
    Tested-by: Aron Budea <aron.budea at collabora.com>

diff --git a/sw/qa/extras/ooxmlexport/data/tdf111964.docx b/sw/qa/extras/ooxmlexport/data/tdf111964.docx
new file mode 100644
index 000000000000..7cb85a1d87df
Binary files /dev/null and b/sw/qa/extras/ooxmlexport/data/tdf111964.docx differ
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport9.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport9.cxx
index 313a8e3d4e3c..a7b36683f943 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport9.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport9.cxx
@@ -564,6 +564,16 @@ DECLARE_OOXMLEXPORT_TEST(tdf112169, "tdf112169.odt")
     // LO crashed while export because of chararacter background color handling
 }
 
+DECLARE_OOXMLEXPORT_TEST(testTdf111964, "tdf111964.docx")
+{
+    xmlDocPtr pXmlDoc = parseExport("word/document.xml");
+    if (!pXmlDoc)
+        return;
+    // Unicode spaces that are not XML whitespace must not be trimmed
+    const sal_Unicode sWSReference [] { 0x2002, 0x2002, 0x2002, 0x2002, 0x2002, 0 };
+    assertXPathContent(pXmlDoc, "/w:document/w:body/w:p/w:r[4]/w:t", sWSReference);
+}
+
 CPPUNIT_PLUGIN_IMPLEMENT();
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
index 3b39eaa1a954..388f02603f85 100644
--- a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
+++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
@@ -620,6 +620,32 @@ void OOXMLFastContextHandler::endTxbxContent()
     mpParserState->endTxbxContent();
 }
 
+namespace {
+// XML schema defines white space as one of four characters:
+// #x9 (tab), #xA (line feed), #xD (carriage return), and #x20 (space)
+bool IsXMLWhitespace(sal_Unicode cChar)
+{
+    return cChar == 0x9 || cChar == 0xA || cChar == 0xD || cChar == 0x20;
+}
+
+OUString TrimXMLWhitespace(const OUString & sText)
+{
+    sal_Int32 nTrimmedStart = 0;
+    const sal_Int32 nLen = sText.getLength();
+    sal_Int32 nTrimmedEnd = nLen - 1;
+    while (nTrimmedStart < nLen && IsXMLWhitespace(sText[nTrimmedStart]))
+        ++nTrimmedStart;
+    while (nTrimmedStart <= nTrimmedEnd && IsXMLWhitespace(sText[nTrimmedEnd]))
+        --nTrimmedEnd;
+    if ((nTrimmedStart == 0) && (nTrimmedEnd == nLen - 1))
+        return sText;
+    else if (nTrimmedStart > nTrimmedEnd)
+        return OUString();
+    else
+        return sText.copy(nTrimmedStart, nTrimmedEnd-nTrimmedStart+1);
+}
+}
+
 void OOXMLFastContextHandler::text(const OUString & sText)
 {
     if (isForwardEvents())
@@ -631,7 +657,7 @@ void OOXMLFastContextHandler::text(const OUString & sText)
         // tabs are converted to spaces
         if (!IsPreserveSpace())
         {
-            sNormalizedText = sNormalizedText.trim().replaceAll("\t", " ");
+            sNormalizedText = TrimXMLWhitespace(sNormalizedText).replaceAll("\t", " ");
         }
         mpStream->utext(reinterpret_cast < const sal_uInt8 * >
                         (sNormalizedText.getStr()),


More information about the Libreoffice-commits mailing list