[Libreoffice-commits] core.git: Branch 'libreoffice-4-0' - filter/inc filter/source sw/qa sw/source writerfilter/source

Miklos Vajna vmiklos at suse.cz
Thu Feb 28 07:12:17 PST 2013


 filter/inc/filter/msfilter/rtfutil.hxx         |   23 ++++++++++--
 filter/source/msfilter/rtfutil.cxx             |   47 ++++++++++++++++++++++---
 sw/qa/extras/rtfexport/data/fdo61507.rtf       |   12 ++++++
 sw/qa/extras/rtfexport/rtfexport.cxx           |   19 ++++++++++
 sw/source/filter/ww8/rtfexport.cxx             |   15 +++++--
 sw/source/filter/ww8/rtfexport.hxx             |    2 -
 writerfilter/source/rtftok/rtfdocumentimpl.cxx |   25 ++++++++++---
 writerfilter/source/rtftok/rtfdocumentimpl.hxx |    1 
 8 files changed, 125 insertions(+), 19 deletions(-)

New commits:
commit 3209bc81245452ef6976c77818186b05c3026438
Author: Miklos Vajna <vmiklos at suse.cz>
Date:   Tue Feb 26 15:36:09 2013 +0100

    fdo#61507 import/export RTF_UPR and RTF_UD
    
    In short, these commits make the RTF filter import and export unicode
    characters in document title properly. Previously we failed to import
    such files from Word, and the export result caused problems in Wordpad
    (Word handled it fine).
    
    (cherry picked from commits 0805b222f87bf99ec0c53ca678d1c670eb5293a2,
    3a934d928e455eca38f124072c20a624a64aa225 and
    5de52551a963b932cc23c2ea75f709fa1924520b)
    
    Change-Id: Ic9417d0f23d44149acb3ae3dc9d4c281058a1b36
    Reviewed-on: https://gerrit.libreoffice.org/2436
    Tested-by: Caolán McNamara <caolanm at redhat.com>
    Reviewed-by: Caolán McNamara <caolanm at redhat.com>

diff --git a/filter/inc/filter/msfilter/rtfutil.hxx b/filter/inc/filter/msfilter/rtfutil.hxx
index 6f5d82c..aa842fd 100644
--- a/filter/inc/filter/msfilter/rtfutil.hxx
+++ b/filter/inc/filter/msfilter/rtfutil.hxx
@@ -41,10 +41,27 @@ namespace rtfutil {
 MSFILTER_DLLPUBLIC OString OutHex(sal_uLong nHex, sal_uInt8 nLen);
 
 /// Handles correct unicode and legacy export of a single character.
-MSFILTER_DLLPUBLIC OString OutChar(sal_Unicode c, int *pUCMode, rtl_TextEncoding eDestEnc);
+MSFILTER_DLLPUBLIC OString OutChar(sal_Unicode c, int *pUCMode, rtl_TextEncoding eDestEnc, bool* pSuccess = 0, bool bUnicode = true);
 
-/// Handles correct unicode and legacy export of a string.
-MSFILTER_DLLPUBLIC OString OutString(const String &rStr, rtl_TextEncoding eDestEnc);
+/**
+ * Handles correct unicode and legacy export of a string.
+ *
+ * @param rStr the string to export
+ * @param eDestEnc the legacy encoding to use
+ * @param bUnicode if unicode output is wanted as well, or just legacy
+ */
+MSFILTER_DLLPUBLIC OString OutString(const String &rStr, rtl_TextEncoding eDestEnc, bool bUnicode = true);
+
+/**
+ * Handles correct unicode and legacy export of a string, when a
+ * '{' \upr '{' keyword ansi_text '}{\*' \ud '{' keyword Unicode_text '}}}'
+ * construct should be used.
+ *
+ * @param pToken the keyword
+ * @param rStr the text to export
+ * @param eDestEnc the legacy encoding to use
+ */
+MSFILTER_DLLPUBLIC OString OutStringUpr(const sal_Char *pToken, const String &rStr, rtl_TextEncoding eDestEnc);
 
 }
 }
diff --git a/filter/source/msfilter/rtfutil.cxx b/filter/source/msfilter/rtfutil.cxx
index ebb72bd..3e2dfb0 100644
--- a/filter/source/msfilter/rtfutil.cxx
+++ b/filter/source/msfilter/rtfutil.cxx
@@ -53,8 +53,10 @@ OString OutHex(sal_uLong nHex, sal_uInt8 nLen)
     return OString(pStr);
 }
 
-OString OutChar(sal_Unicode c, int *pUCMode, rtl_TextEncoding eDestEnc)
+OString OutChar(sal_Unicode c, int *pUCMode, rtl_TextEncoding eDestEnc, bool* pSuccess, bool bUnicode)
 {
+    if (pSuccess)
+        *pSuccess = true;
     OStringBuffer aBuf;
     const sal_Char* pStr = 0;
     // 0x0b instead of \n, etc because of the replacements in SwWW8AttrIter::GetSnippet()
@@ -91,10 +93,13 @@ OString OutChar(sal_Unicode c, int *pUCMode, rtl_TextEncoding eDestEnc)
             else {
                 OUString sBuf(&c, 1);
                 OString sConverted;
-                sBuf.convertToString(&sConverted, eDestEnc, OUSTRING_TO_OSTRING_CVTFLAGS);
+                if (pSuccess)
+                    *pSuccess &= sBuf.convertToString(&sConverted, eDestEnc, RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR);
+                else
+                    sBuf.convertToString(&sConverted, eDestEnc, OUSTRING_TO_OSTRING_CVTFLAGS);
                 const sal_Int32 nLen = sConverted.getLength();
 
-                if (pUCMode)
+                if (pUCMode && bUnicode)
                 {
                     if (*pUCMode != nLen)
                     {
@@ -130,13 +135,13 @@ OString OutChar(sal_Unicode c, int *pUCMode, rtl_TextEncoding eDestEnc)
     return aBuf.makeStringAndClear();
 }
 
-OString OutString(const String &rStr, rtl_TextEncoding eDestEnc)
+OString OutString(const String &rStr, rtl_TextEncoding eDestEnc, bool bUnicode)
 {
     SAL_INFO("filter.ms", OSL_THIS_FUNC << ", rStr = '" << OUString(rStr) << "'");
     OStringBuffer aBuf;
     int nUCMode = 1;
     for (xub_StrLen n = 0; n < rStr.Len(); ++n)
-        aBuf.append(OutChar(rStr.GetChar(n), &nUCMode, eDestEnc));
+        aBuf.append(OutChar(rStr.GetChar(n), &nUCMode, eDestEnc, 0, bUnicode));
     if (nUCMode != 1) {
         aBuf.append(OOO_STRING_SVTOOLS_RTF_UC);
         aBuf.append((sal_Int32)1);
@@ -145,6 +150,38 @@ OString OutString(const String &rStr, rtl_TextEncoding eDestEnc)
     return aBuf.makeStringAndClear();
 }
 
+/// Checks if lossless conversion of the string to eDestEnc is possible or not.
+static bool TryOutString(const String &rStr, rtl_TextEncoding eDestEnc)
+{
+    int nUCMode = 1;
+    for (xub_StrLen n = 0; n < rStr.Len(); ++n)
+    {
+        bool bRet;
+        OutChar(rStr.GetChar(n), &nUCMode, eDestEnc, &bRet);
+        if (!bRet)
+            return false;
+    }
+    return true;
+}
+
+OString OutStringUpr(const sal_Char *pToken, const String &rStr, rtl_TextEncoding eDestEnc)
+{
+    if (TryOutString(rStr, eDestEnc))
+        return OString("{") + pToken + " " + OutString(rStr, eDestEnc) + "}";
+
+    OStringBuffer aRet;
+    aRet.append("{" OOO_STRING_SVTOOLS_RTF_UPR "{");
+    aRet.append(pToken);
+    aRet.append(" ");
+    aRet.append(OutString(rStr, eDestEnc, /*bUnicode =*/ false));
+    aRet.append("}{" OOO_STRING_SVTOOLS_RTF_IGNORE OOO_STRING_SVTOOLS_RTF_UD "{");
+    aRet.append(pToken);
+    aRet.append(" ");
+    aRet.append(OutString(rStr, eDestEnc));
+    aRet.append("}}}");
+    return aRet.makeStringAndClear();
+}
+
 }
 }
 
diff --git a/sw/qa/extras/rtfexport/data/fdo61507.rtf b/sw/qa/extras/rtfexport/data/fdo61507.rtf
new file mode 100644
index 0000000..1fe8654
--- /dev/null
+++ b/sw/qa/extras/rtfexport/data/fdo61507.rtf
@@ -0,0 +1,12 @@
+{\rtf1
+{\info
+{\upr
+{\title \'c9\'c1???}
+{\*\ud\uc0
+{\title \'c9\'c1
+{\uc1\u336 O\u368 U\u8749 ?}
+}
+}
+}
+}
+Hello.}
diff --git a/sw/qa/extras/rtfexport/rtfexport.cxx b/sw/qa/extras/rtfexport/rtfexport.cxx
index 4f92fbb..18ddf1b 100644
--- a/sw/qa/extras/rtfexport/rtfexport.cxx
+++ b/sw/qa/extras/rtfexport/rtfexport.cxx
@@ -70,6 +70,7 @@ public:
     void testTextFrames();
     void testFdo53604();
     void testFdo52286();
+    void testFdo61507();
 
     CPPUNIT_TEST_SUITE(Test);
 #if !defined(MACOSX) && !defined(WNT)
@@ -114,6 +115,7 @@ void Test::run()
         {"textframes.odt", &Test::testTextFrames},
         {"fdo53604.odt", &Test::testFdo53604},
         {"fdo52286.odt", &Test::testFdo52286},
+        {"fdo61507.rtf", &Test::testFdo61507},
     };
     // Don't test the first import of these, for some reason those tests fail
     const char* aBlacklist[] = {
@@ -469,6 +471,23 @@ void Test::testFdo52286()
     CPPUNIT_ASSERT_EQUAL(sal_Int32(58), getProperty<sal_Int32>(getRun(getParagraph(2), 2), "CharEscapementHeight"));
 }
 
+void Test::testFdo61507()
+{
+    /*
+     * Unicode-only characters in \title confused Wordpad. Once the exporter
+     * was fixed to guard the problematic characters with \upr and \ud, the
+     * importer didn't cope with these new keywords.
+     */
+
+    uno::Reference<document::XDocumentPropertiesSupplier> xDocumentPropertiesSupplier(mxComponent, uno::UNO_QUERY);
+    uno::Reference<document::XDocumentProperties> xDocumentProperties(xDocumentPropertiesSupplier->getDocumentProperties());
+    OUString aExpected = OUString("ÉÁŐŰ∭", 11, RTL_TEXTENCODING_UTF8);
+    CPPUNIT_ASSERT_EQUAL(aExpected, xDocumentProperties->getTitle());
+
+    // Only "Hello.", no additional characters.
+    CPPUNIT_ASSERT_EQUAL(6, getLength());
+}
+
 CPPUNIT_TEST_SUITE_REGISTRATION(Test);
 
 CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sw/source/filter/ww8/rtfexport.cxx b/sw/source/filter/ww8/rtfexport.cxx
index 8259d59..fc1afec 100644
--- a/sw/source/filter/ww8/rtfexport.cxx
+++ b/sw/source/filter/ww8/rtfexport.cxx
@@ -395,7 +395,7 @@ void RtfExport::WriteInfo()
     }
 
     if (xDocProps.is()) {
-        OutUnicode(OOO_STRING_SVTOOLS_RTF_TITLE, xDocProps->getTitle());
+        OutUnicode(OOO_STRING_SVTOOLS_RTF_TITLE, xDocProps->getTitle(), true);
         OutUnicode(OOO_STRING_SVTOOLS_RTF_SUBJECT, xDocProps->getSubject());
 
         OutUnicode(OOO_STRING_SVTOOLS_RTF_KEYWORDS,
@@ -791,13 +791,18 @@ SvStream& RtfExport::OutLong( long nVal )
     return m_pWriter->OutLong( Strm(), nVal );
 }
 
-void RtfExport::OutUnicode(const sal_Char *pToken, const String &rContent)
+void RtfExport::OutUnicode(const sal_Char *pToken, const String &rContent, bool bUpr)
 {
     if (rContent.Len())
     {
-        Strm() << '{' << pToken << ' ';
-        Strm() << msfilter::rtfutil::OutString( rContent, eCurrentEncoding ).getStr();
-        Strm() << '}';
+        if (!bUpr)
+        {
+            Strm() << '{' << pToken << ' ';
+            Strm() << msfilter::rtfutil::OutString( rContent, eCurrentEncoding ).getStr();
+            Strm() << '}';
+        }
+        else
+            Strm() << msfilter::rtfutil::OutStringUpr(pToken, rContent, eCurrentEncoding).getStr();
     }
 }
 
diff --git a/sw/source/filter/ww8/rtfexport.hxx b/sw/source/filter/ww8/rtfexport.hxx
index c21a5e9..30d4c79 100644
--- a/sw/source/filter/ww8/rtfexport.hxx
+++ b/sw/source/filter/ww8/rtfexport.hxx
@@ -156,7 +156,7 @@ public:
     SvStream& Strm();
     SvStream& OutULong( sal_uLong nVal );
     SvStream& OutLong( long nVal );
-    void OutUnicode(const sal_Char *pToken, const String &rContent);
+    void OutUnicode(const sal_Char *pToken, const String &rContent, bool bUpr = false);
     void OutDateTime(const sal_Char* pStr, const util::DateTime& rDT );
     void OutPageDescription( const SwPageDesc& rPgDsc, sal_Bool bWriteReset, sal_Bool bCheckForFirstPage );
 
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index cf1a1aa..d966275 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -1516,7 +1516,11 @@ int RTFDocumentImpl::dispatchDestination(RTFKeyword nKeyword)
             m_aStates.top().nDestinationState = DESTINATION_PARAGRAPHNUMBERING_TEXTBEFORE;
             break;
         case RTF_TITLE:
-            m_aStates.top().nDestinationState = DESTINATION_TITLE;
+            // \title inside \upr but outside \ud should be ignored.
+            if (m_aStates.top().nDestinationState != DESTINATION_UPR)
+                m_aStates.top().nDestinationState = DESTINATION_TITLE;
+            else
+                m_aStates.top().nDestinationState = DESTINATION_SKIP;
             break;
         case RTF_SUBJECT:
             m_aStates.top().nDestinationState = DESTINATION_SUBJECT;
@@ -1614,6 +1618,13 @@ int RTFDocumentImpl::dispatchDestination(RTFKeyword nKeyword)
         OPEN_M_TOKEN(SPREPR, sPrePr);
         OPEN_M_TOKEN(BOX, box);
         OPEN_M_TOKEN(EQARR, eqArr);
+        case RTF_UPR:
+            m_aStates.top().nDestinationState = DESTINATION_UPR;
+            break;
+        case RTF_UD:
+            // Anything inside \ud is just normal Unicode content.
+            m_aStates.top().nDestinationState = DESTINATION_NORMAL;
+            break;
         default:
             SAL_INFO("writerfilter", OSL_THIS_FUNC << ": TODO handle destination '" << lcl_RtfToString(nKeyword) << "'");
             // Make sure we skip destinations (even without \*) till we don't handle them
@@ -3807,10 +3818,6 @@ int RTFDocumentImpl::popState()
     if (m_xDocumentProperties.is())
         m_xDocumentProperties->setGenerator(m_aStates.top().aDestinationText.makeStringAndClear());
     break;
-    case DESTINATION_TITLE:
-    if (m_xDocumentProperties.is())
-        m_xDocumentProperties->setTitle(m_aStates.top().aDestinationText.makeStringAndClear());
-    break;
     case DESTINATION_SUBJECT:
     if (m_xDocumentProperties.is())
         m_xDocumentProperties->setSubject(m_aStates.top().aDestinationText.makeStringAndClear());
@@ -4233,6 +4240,14 @@ int RTFDocumentImpl::popState()
             aState.nDestinationState == DESTINATION_SHPPICT ||
             aState.nDestinationState == DESTINATION_SHAPE)
         m_aStates.top().aFrame = aState.aFrame;
+    else if (aState.nDestinationState == DESTINATION_TITLE)
+    {
+        if (m_aStates.top().nDestinationState == DESTINATION_TITLE)
+            // The parent is a title as well, just append what we have so far.
+            m_aStates.top().aDestinationText.append(aState.aDestinationText.makeStringAndClear());
+        else if (m_xDocumentProperties.is())
+            m_xDocumentProperties->setTitle(aState.aDestinationText.makeStringAndClear());
+    }
     if (m_pCurrentBuffer == &m_aSuperBuffer)
     {
         if (!m_bHasFootnote)
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
index 82490eb..a26d3ff 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
@@ -178,6 +178,7 @@ namespace writerfilter {
             DESTINATION_MGROW,
             DESTINATION_MBOX,
             DESTINATION_MEQARR,
+            DESTINATION_UPR,
         };
 
         enum RTFBorderState


More information about the Libreoffice-commits mailing list