[Libreoffice-commits] core.git: sw/qa sw/source

Adam Kovacs (via logerrit) logerrit at kemper.freedesktop.org
Mon May 6 08:30:39 UTC 2019


 sw/qa/extras/ooxmlexport/data/tdf113483_crossreflink_nonascii_bookmarkname.docx |binary
 sw/qa/extras/ooxmlexport/ooxmlexport13.cxx                                      |   10 ++++++++++
 sw/source/filter/ww8/docxattributeoutput.cxx                                    |    9 ++++++++-
 3 files changed, 18 insertions(+), 1 deletion(-)

New commits:
commit b9afb9959c31c3c57d0f2fe91107a92abfd82cdb
Author:     Adam Kovacs <christo161 at gmail.com>
AuthorDate: Tue Apr 30 10:53:08 2019 +0200
Commit:     László Németh <nemeth at numbertext.org>
CommitDate: Mon May 6 10:29:48 2019 +0200

    tdf#113483: DOCX: fix encoding of bookmarks with non-ASCII letters
    
    Non-ASCII letters were stored using percent-encoding, resulting
    broken bookmark names after export/import. For example, the word "Első"
    became the wrong "Els%C5%91". Now only the reversed ASCII characters
    are stored in percent-encoding.
    
    For example, the name "Első!" stored in the following form:
    
    <w:bookmarkStart w:name="Első%21" w:id="0"/>
    <w:instrText> REF Első%21 \h </w:instrText>
    
    Change-Id: I65168e071b6baa12385c0aaa12d9f2ae4ccf9f98
    Reviewed-on: https://gerrit.libreoffice.org/71299
    Reviewed-by: László Németh <nemeth at numbertext.org>
    Tested-by: László Németh <nemeth at numbertext.org>

diff --git a/sw/qa/extras/ooxmlexport/data/tdf113483_crossreflink_nonascii_bookmarkname.docx b/sw/qa/extras/ooxmlexport/data/tdf113483_crossreflink_nonascii_bookmarkname.docx
new file mode 100644
index 000000000000..ec129909bc01
Binary files /dev/null and b/sw/qa/extras/ooxmlexport/data/tdf113483_crossreflink_nonascii_bookmarkname.docx differ
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport13.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport13.cxx
index dfde7c2f399a..aae0813900ed 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport13.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport13.cxx
@@ -261,6 +261,16 @@ DECLARE_OOXMLIMPORT_TEST(testTdf123460, "tdf123460.docx")
     CPPUNIT_ASSERT_EQUAL(true, bCaught);
 }
 
+//tdf#113483: fix handling of non-ascii characters in bookmark names and instrText xml tags
+DECLARE_OOXMLEXPORT_TEST(testTdf113483, "tdf113483_crossreflink_nonascii_bookmarkname.docx")
+{
+    xmlDocPtr pXmlDoc = parseExport("word/document.xml");
+    if (!pXmlDoc)
+        return;
+    assertXPath(pXmlDoc, "/w:document/w:body/w:p[1]/w:bookmarkStart[1]", "name", OUString::fromUtf8("Els\u0151"));
+    assertXPathContent(pXmlDoc, "/w:document/w:body/w:p[5]/w:r[2]/w:instrText[1]", OUString::fromUtf8(" REF Els\u0151 \\h "));
+}
+
 CPPUNIT_PLUGIN_IMPLEMENT();
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/filter/ww8/docxattributeoutput.cxx b/sw/source/filter/ww8/docxattributeoutput.cxx
index bc20d072a38c..d351101cf4a7 100644
--- a/sw/source/filter/ww8/docxattributeoutput.cxx
+++ b/sw/source/filter/ww8/docxattributeoutput.cxx
@@ -96,6 +96,7 @@
 #include <svl/grabbagitem.hxx>
 #include <sfx2/sfxbasemodel.hxx>
 #include <tools/datetimeutils.hxx>
+#include <tools/urlobj.hxx>
 #include <svl/whiter.hxx>
 #include <rtl/tencinfo.h>
 #include <sal/log.hxx>
@@ -1611,7 +1612,7 @@ void DocxAttributeOutput::DoWriteBookmarkTagStart(const OUString & bookmarkName)
 {
     m_pSerializer->singleElementNS(XML_w, XML_bookmarkStart,
         FSNS(XML_w, XML_id), OString::number(m_nNextBookmarkId),
-        FSNS(XML_w, XML_name), BookmarkToWord(bookmarkName).toUtf8());
+        FSNS(XML_w, XML_name), INetURLObject::decode(BookmarkToWord(bookmarkName), INetURLObject::DecodeMechanism::Unambiguous, RTL_TEXTENCODING_UTF8).toUtf8());
 }
 
 void DocxAttributeOutput::DoWriteBookmarkTagEnd(const OUString & bookmarkName)
@@ -1980,6 +1981,12 @@ void DocxAttributeOutput::CmdField_Impl( const SwTextNode* pNode, sal_Int32 nPos
                sToken = sToken.replaceAll("NNNN", "dddd");
                sToken = sToken.replaceAll("NN", "ddd");
             }
+            //tdf#113483: fix non-ascii characters inside instrText xml tags
+            else if ( rInfos.eType ==  ww::eREF
+              || rInfos.eType ==  ww::ePAGEREF )
+            {
+                sToken = INetURLObject::decode(sToken, INetURLObject::DecodeMechanism::Unambiguous, RTL_TEXTENCODING_UTF8);
+            }
 
             // Write the Field command
             DoWriteCmd( sToken );


More information about the Libreoffice-commits mailing list