[Libreoffice-commits] core.git: Branch 'libreoffice-4-2' - sw/qa writerfilter/source

Michael Stahl mstahl at redhat.com
Wed Jun 4 00:35:31 PDT 2014


 sw/qa/extras/rtfimport/data/fdo79384.rtf       |    9 ++++++
 sw/qa/extras/rtfimport/rtfimport.cxx           |    8 +++++
 writerfilter/source/rtftok/rtfdocumentimpl.cxx |   34 ++++++++++++++++++++++---
 writerfilter/source/rtftok/rtfdocumentimpl.hxx |    2 -
 4 files changed, 48 insertions(+), 5 deletions(-)

New commits:
commit d15eb9f09c8854bd58fecd3dc6a31fa678e392a1
Author: Michael Stahl <mstahl at redhat.com>
Date:   Mon Jun 2 23:57:13 2014 +0200

    fdo#79384: RTF import: fix literal Shift-JIS text
    
    This is a variable-length encoding, and the second byte may be a RTF
    syntax character like \, {, }.
    
    (cherry picked from commit 061190a62fcdbfb3a0b266d5afffbd257a3e692e)
    
    Conflicts:
    	writerfilter/source/rtftok/rtfdocumentimpl.cxx
    	writerfilter/source/rtftok/rtfdocumentimpl.hxx
    
    Change-Id: I813ccafda18388af3bf05eb7ce9a0253c627b1c4
    Reviewed-on: https://gerrit.libreoffice.org/9632
    Reviewed-by: Miklos Vajna <vmiklos at collabora.co.uk>
    Tested-by: Miklos Vajna <vmiklos at collabora.co.uk>

diff --git a/sw/qa/extras/rtfimport/data/fdo79384.rtf b/sw/qa/extras/rtfimport/data/fdo79384.rtf
new file mode 100644
index 0000000..2a90085
--- /dev/null
+++ b/sw/qa/extras/rtfimport/data/fdo79384.rtf
@@ -0,0 +1,9 @@
+{\rtf1\ansi
+{\fonttbl{\f5\fnil\fprq0\fcharset128 OpenSymbol{\*\falt Arial Unicode MS};}}
+
+\pard\plain
+
+\dbch\f5 „M„p„‚„{„u„‚„ „ƒ„„y„ƒ„{„p
+„}„\
+
+\par }
diff --git a/sw/qa/extras/rtfimport/rtfimport.cxx b/sw/qa/extras/rtfimport/rtfimport.cxx
index 562b9df..e8f40c8 100644
--- a/sw/qa/extras/rtfimport/rtfimport.cxx
+++ b/sw/qa/extras/rtfimport/rtfimport.cxx
@@ -286,6 +286,14 @@ DECLARE_RTFIMPORT_TEST(testN751020, "n751020.rtf")
     CPPUNIT_ASSERT_EQUAL(sal_Int32(TWIP_TO_MM100(200)), getProperty<sal_Int32>(xParaEnum->nextElement(), "ParaBottomMargin"));
 }
 
+DECLARE_RTFIMPORT_TEST(testFdo79384, "fdo79384.rtf")
+{
+    uno::Reference<text::XTextRange> xTextRange = getRun(getParagraph(1), 1);
+
+    CPPUNIT_ASSERT_EQUAL(OUString("Маркеры спискамЫ", 31, RTL_TEXTENCODING_UTF8),
+            xTextRange->getString());
+}
+
 DECLARE_RTFIMPORT_TEST(testFdo47326, "fdo47326.rtf")
 {
     // This was 15 only, as \super buffered text, then the contents of it got lost.
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index e8316ff..9946de2 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -963,9 +963,33 @@ int RTFDocumentImpl::resolveChars(char ch)
                 m_aStates.top().nCharsToSkip--;
             }
         }
+
         // read a single char if we're in hex mode
         if (m_aStates.top().nInternalState == INTERNAL_HEX)
             break;
+
+        if (RTFParserState::DBCH == m_aStates.top().eRunType &&
+            RTL_TEXTENCODING_MS_932 == m_aStates.top().nCurrentEncoding)
+        {
+            unsigned char uch = ch;
+            if ((uch >= 0x80 && uch <= 0x9F) || uch >= 0xE0)
+            {
+                // read second byte of 2-byte Shift-JIS - may be \ { }
+                Strm() >> ch;
+                if (m_aStates.top().nCharsToSkip == 0)
+                {
+                    assert(bUnicodeChecked);
+                    aBuf.append(ch);
+                }
+                else
+                {
+                    assert(bSkipped);
+                    // anybody who uses \ucN with Shift-JIS is insane
+                    m_aStates.top().nCharsToSkip--;
+                }
+            }
+        }
+
         Strm() >> ch;
     }
     if (m_aStates.top().nInternalState != INTERNAL_HEX && !Strm().IsEof())
@@ -2747,12 +2771,13 @@ int RTFDocumentImpl::dispatchFlag(RTFKeyword nKeyword)
             break;
         case RTF_LOCH:
             // Noop, dmapper detects this automatically.
+            m_aStates.top().eRunType = RTFParserState::LOCH;
             break;
         case RTF_HICH:
-            m_aStates.top().bIsCjk = true;
+            m_aStates.top().eRunType = RTFParserState::HICH;
             break;
         case RTF_DBCH:
-            m_aStates.top().bIsCjk = false;
+            m_aStates.top().eRunType = RTFParserState::DBCH;
             break;
         case RTF_TITLEPG:
             {
@@ -3165,7 +3190,8 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam)
             if (nKeyword == RTF_F)
                 nSprm = NS_sprm::LN_CRgFtc0;
             else
-                nSprm = (m_aStates.top().bIsCjk ? NS_sprm::LN_CRgFtc1 : NS_sprm::LN_CRgFtc2);
+                nSprm = (m_aStates.top().eRunType == RTFParserState::HICH
+                    ? NS_sprm::LN_CRgFtc1 : NS_sprm::LN_CRgFtc2);
             if (m_aStates.top().nDestinationState == DESTINATION_FONTTABLE || m_aStates.top().nDestinationState == DESTINATION_FONTENTRY)
             {
                 m_aFontIndexes.push_back(nParam);
@@ -5171,7 +5197,7 @@ RTFParserState::RTFParserState(RTFDocumentImpl *pDocumentImpl)
     aShape(),
     aDrawingObject(),
     aFrame(this),
-    bIsCjk(false),
+    eRunType(LOCH),
     nYear(0),
     nMonth(0),
     nDay(0),
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
index 1d10b6f7..a6e4406 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
@@ -254,7 +254,7 @@ namespace writerfilter {
                 RTFFrame aFrame;
 
                 /// CJK or CTL?
-                bool bIsCjk;
+                enum { LOCH, HICH, DBCH } eRunType;
 
                 // Info group.
                 int nYear;


More information about the Libreoffice-commits mailing list