[Libreoffice-commits] core.git: 4 commits - sw/qa writerfilter/source

Michael Stahl mstahl at redhat.com
Tue Jun 3 11:55:12 PDT 2014


 sw/qa/core/data/rtf/pass/fdo79384.rtf          |    1 
 sw/qa/extras/rtfimport/data/fdo79384.rtf       |    9 +++
 sw/qa/extras/rtfimport/rtfimport.cxx           |    8 ++
 writerfilter/source/rtftok/rtfdocumentimpl.cxx |   72 ++++++++++++++++++-------
 writerfilter/source/rtftok/rtfdocumentimpl.hxx |    4 +
 5 files changed, 75 insertions(+), 19 deletions(-)

New commits:
commit c087b60b0dd70c4a711ba1b4d556206a136fa468
Author: Michael Stahl <mstahl at redhat.com>
Date:   Tue Jun 3 20:32:13 2014 +0200

    (related: bnc#823675) RTF import: get rid of hacks for \f in LISTLEVEL
    
    These weird hacks are apparently needed only because the \loch \hich
    \dbch were mapped wrongly; for the list level destination it's only
    important that the existing fonts are not overwritten.
    
    Change-Id: Ie2b9adf332b74c2744e9b1dbc4e878638e5ee078

diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index 89d2294..77f0379 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -3518,14 +3518,11 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam)
         {
             RTFSprms aFontAttributes;
             aFontAttributes.set(nSprm, RTFValue::Pointer_t(new RTFValue(m_aFontNames[getFontIndex(nParam)])));
-            // In the context of listlevels, \af seems to imply \f.
-            if (nKeyword == RTF_AF)
-                aFontAttributes.set(NS_ooxml::LN_CT_Fonts_ascii, RTFValue::Pointer_t(new RTFValue(m_aFontNames[getFontIndex(nParam)])));
             RTFSprms aRunPropsSprms;
             aRunPropsSprms.set(NS_ooxml::LN_EG_RPrBase_rFonts, RTFValue::Pointer_t(new RTFValue(aFontAttributes)));
-            // If there are multiple \f or \af tokens, only handle the first one.
-            if (!m_aStates.top().aTableSprms.find(NS_ooxml::LN_CT_Lvl_rPr))
-                m_aStates.top().aTableSprms.set(NS_ooxml::LN_CT_Lvl_rPr, RTFValue::Pointer_t(new RTFValue(RTFSprms(), aRunPropsSprms)));
+            m_aStates.top().aTableSprms.set(NS_ooxml::LN_CT_Lvl_rPr,
+                RTFValue::Pointer_t(new RTFValue(RTFSprms(), aRunPropsSprms)),
+                OVERWRITE_NO_APPEND);
         }
         else
         {
commit 36246aa9fb57c9fe4e546c91a8274d8828b1424e
Author: Michael Stahl <mstahl at redhat.com>
Date:   Tue Jun 3 20:18:59 2014 +0200

    RTF import: fix handling of \loch \hich \dbch \ltrch \rtlch
    
    The logic is not immediately obvious from the RTF spec; let's do what
    the editengine RTF import does, but without the unnecessary complexity.
    
    Change-Id: I60e69130e6e5aed1f5d237f64b1656c3141e402a

diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index 6334031..89d2294 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -2825,8 +2825,10 @@ int RTFDocumentImpl::dispatchFlag(RTFKeyword nKeyword)
         break;
     case RTF_LTRCH:
         // dmapper does not support this.
+        m_aStates.top().isRightToLeft = false;
         break;
     case RTF_RTLCH:
+        m_aStates.top().isRightToLeft = true;
         if (m_aDefaultState.nCurrentEncoding == RTL_TEXTENCODING_MS_1255)
             m_aStates.top().nCurrentEncoding = m_aDefaultState.nCurrentEncoding;
         break;
@@ -3493,11 +3495,20 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam)
     {
     case RTF_F:
     case RTF_AF:
-        if (nKeyword == RTF_F)
-            nSprm = NS_ooxml::LN_CT_Fonts_ascii;
+        if (m_aStates.top().isRightToLeft
+            || m_aStates.top().eRunType == RTFParserState::HICH)
+        {
+            nSprm = NS_ooxml::LN_CT_Fonts_cs;
+        }
+        else if (m_aStates.top().eRunType == RTFParserState::DBCH)
+        {
+            nSprm = NS_ooxml::LN_CT_Fonts_eastAsia;
+        }
         else
-            nSprm = (m_aStates.top().eRunType == RTFParserState::HICH
-                ? NS_ooxml::LN_CT_Fonts_eastAsia : NS_ooxml::LN_CT_Fonts_cs);
+        {
+            assert(m_aStates.top().eRunType == RTFParserState::LOCH);
+            nSprm = NS_ooxml::LN_CT_Fonts_ascii;
+        }
         if (m_aStates.top().nDestinationState == DESTINATION_FONTTABLE || m_aStates.top().nDestinationState == DESTINATION_FONTENTRY)
         {
             m_aFontIndexes.push_back(nParam);
@@ -3521,7 +3532,8 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam)
             m_nCurrentFontIndex = getFontIndex(nParam);
             RTFValue::Pointer_t pValue(new RTFValue(getFontName(m_nCurrentFontIndex)));
             lcl_putNestedAttribute(m_aStates.top().aCharacterSprms, NS_ooxml::LN_EG_RPrBase_rFonts, nSprm, pValue);
-            m_aStates.top().nCurrentEncoding = getEncoding(m_nCurrentFontIndex);
+            if (nKeyword == RTF_F)
+                m_aStates.top().nCurrentEncoding = getEncoding(m_nCurrentFontIndex);
         }
         break;
     case RTF_RED:
@@ -5741,6 +5753,7 @@ RTFParserState::RTFParserState(RTFDocumentImpl* pDocumentImpl)
       aDrawingObject(),
       aFrame(this),
       eRunType(LOCH),
+      isRightToLeft(false),
       nYear(0),
       nMonth(0),
       nDay(0),
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
index d069baa..b86c54f 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
@@ -257,6 +257,8 @@ public:
 
     /// CJK or CTL?
     enum { LOCH, HICH, DBCH } eRunType;
+    /// ltrch or rtlch
+    bool isRightToLeft;
 
     // Info group.
     int nYear;
commit d71387ca81b61416b9a7b82cd6cf67d496b81fc2
Author: Michael Stahl <mstahl at redhat.com>
Date:   Tue Jun 3 19:32:10 2014 +0200

    fdo#79384: replace the work-around with a different one
    
    Word will reject Shift-JIS following \loch, but apparently OOo could read
    and (worse) write such documents, so accept Shift-JIS regardless of run
    charset type.
    
    Change-Id: Ib181956e9f218548a52037dd76fa1d3ecdc006bd

diff --git a/sw/qa/core/data/rtf/pass/fdo79384.rtf b/sw/qa/core/data/rtf/pass/fdo79384.rtf
index 84875a9..c9d6b33 100644
--- a/sw/qa/core/data/rtf/pass/fdo79384.rtf
+++ b/sw/qa/core/data/rtf/pass/fdo79384.rtf
@@ -1,4 +1,5 @@
 {\rtf1
+{\fonttbl{\f5\fnil\fprq0\fcharset128 OpenSymbol;}}
 {\stylesheet
 {\*\cs35\snext35\hich\af5\dbch\af5\loch\f5 „M„p„‚„{„u„‚„ „ƒ„„y„ƒ„{„p;}
 }
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index d852be8..6334031 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -957,11 +957,8 @@ int RTFDocumentImpl::resolveChars(char ch)
     bool bUnicodeChecked = false;
     bool bSkipped = false;
 
-    // Workaround for buggy input: if we're inside a style entry, then ignore
-    // the fact that '{' without a matching '}' is invalid.
-    bool bStyleEntry = m_aStates.top().nDestinationState == DESTINATION_STYLEENTRY;
-
-    while (!Strm().IsEof() && (m_aStates.top().nInternalState == INTERNAL_HEX || ((ch != '{' || bStyleEntry) && ch != '}' && ch != '\\')))
+    while (!Strm().IsEof() && (m_aStates.top().nInternalState == INTERNAL_HEX
+                               || (ch != '{' && ch != '}' && ch != '\\')))
     {
         if (m_aStates.top().nInternalState == INTERNAL_HEX || (ch != 0x0d && ch != 0x0a))
         {
@@ -985,9 +982,12 @@ int RTFDocumentImpl::resolveChars(char ch)
         if (m_aStates.top().nInternalState == INTERNAL_HEX)
             break;
 
-        if (RTFParserState::DBCH == m_aStates.top().eRunType &&
-            RTL_TEXTENCODING_MS_932 == m_aStates.top().nCurrentEncoding)
+        if (RTL_TEXTENCODING_MS_932 == m_aStates.top().nCurrentEncoding)
         {
+            // fdo#79384: Word will reject Shift-JIS following \loch
+            // but apparently OOo could read and (worse) write such documents
+            SAL_INFO_IF(m_aStates.top().eRunType != RTFParserState::DBCH,
+                "writerfilter.rtftok", "invalid Shift-JIS without DBCH");
             unsigned char uch = ch;
             if ((uch >= 0x80 && uch <= 0x9F) || uch >= 0xE0)
             {
commit 061190a62fcdbfb3a0b266d5afffbd257a3e692e
Author: Michael Stahl <mstahl at redhat.com>
Date:   Mon Jun 2 23:57:13 2014 +0200

    fdo#79384: RTF import: fix literal Shift-JIS text
    
    This is a variable-length encoding, and the second byte may be a RTF
    syntax character like \, {, }.
    
    Change-Id: I813ccafda18388af3bf05eb7ce9a0253c627b1c4

diff --git a/sw/qa/extras/rtfimport/data/fdo79384.rtf b/sw/qa/extras/rtfimport/data/fdo79384.rtf
new file mode 100644
index 0000000..2a90085
--- /dev/null
+++ b/sw/qa/extras/rtfimport/data/fdo79384.rtf
@@ -0,0 +1,9 @@
+{\rtf1\ansi
+{\fonttbl{\f5\fnil\fprq0\fcharset128 OpenSymbol{\*\falt Arial Unicode MS};}}
+
+\pard\plain
+
+\dbch\f5 „M„p„‚„{„u„‚„ „ƒ„„y„ƒ„{„p
+„}„\
+
+\par }
diff --git a/sw/qa/extras/rtfimport/rtfimport.cxx b/sw/qa/extras/rtfimport/rtfimport.cxx
index b9ea9d8..1654922 100644
--- a/sw/qa/extras/rtfimport/rtfimport.cxx
+++ b/sw/qa/extras/rtfimport/rtfimport.cxx
@@ -290,6 +290,14 @@ DECLARE_RTFIMPORT_TEST(testN751020, "n751020.rtf")
     CPPUNIT_ASSERT_EQUAL(sal_Int32(convertTwipToMm100(200)), getProperty<sal_Int32>(xParaEnum->nextElement(), "ParaBottomMargin"));
 }
 
+DECLARE_RTFIMPORT_TEST(testFdo79384, "fdo79384.rtf")
+{
+    uno::Reference<text::XTextRange> xTextRange = getRun(getParagraph(1), 1);
+
+    CPPUNIT_ASSERT_EQUAL(OUString("Маркеры спискамЫ", 31, RTL_TEXTENCODING_UTF8),
+            xTextRange->getString());
+}
+
 DECLARE_RTFIMPORT_TEST(testFdo47326, "fdo47326.rtf")
 {
     // This was 15 only, as \super buffered text, then the contents of it got lost.
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index 079f43e..d852be8 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -980,9 +980,33 @@ int RTFDocumentImpl::resolveChars(char ch)
                 m_aStates.top().nCharsToSkip--;
             }
         }
+
         // read a single char if we're in hex mode
         if (m_aStates.top().nInternalState == INTERNAL_HEX)
             break;
+
+        if (RTFParserState::DBCH == m_aStates.top().eRunType &&
+            RTL_TEXTENCODING_MS_932 == m_aStates.top().nCurrentEncoding)
+        {
+            unsigned char uch = ch;
+            if ((uch >= 0x80 && uch <= 0x9F) || uch >= 0xE0)
+            {
+                // read second byte of 2-byte Shift-JIS - may be \ { }
+                Strm().ReadChar(ch);
+                if (m_aStates.top().nCharsToSkip == 0)
+                {
+                    assert(bUnicodeChecked);
+                    aBuf.append(ch);
+                }
+                else
+                {
+                    assert(bSkipped);
+                    // anybody who uses \ucN with Shift-JIS is insane
+                    m_aStates.top().nCharsToSkip--;
+                }
+            }
+        }
+
         Strm().ReadChar(ch);
     }
     if (m_aStates.top().nInternalState != INTERNAL_HEX && !Strm().IsEof())
@@ -2980,12 +3004,13 @@ int RTFDocumentImpl::dispatchFlag(RTFKeyword nKeyword)
         break;
     case RTF_LOCH:
         // Noop, dmapper detects this automatically.
+        m_aStates.top().eRunType = RTFParserState::LOCH;
         break;
     case RTF_HICH:
-        m_aStates.top().bIsCjk = true;
+        m_aStates.top().eRunType = RTFParserState::HICH;
         break;
     case RTF_DBCH:
-        m_aStates.top().bIsCjk = false;
+        m_aStates.top().eRunType = RTFParserState::DBCH;
         break;
     case RTF_TITLEPG:
     {
@@ -3471,7 +3496,8 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam)
         if (nKeyword == RTF_F)
             nSprm = NS_ooxml::LN_CT_Fonts_ascii;
         else
-            nSprm = (m_aStates.top().bIsCjk ? NS_ooxml::LN_CT_Fonts_eastAsia : NS_ooxml::LN_CT_Fonts_cs);
+            nSprm = (m_aStates.top().eRunType == RTFParserState::HICH
+                ? NS_ooxml::LN_CT_Fonts_eastAsia : NS_ooxml::LN_CT_Fonts_cs);
         if (m_aStates.top().nDestinationState == DESTINATION_FONTTABLE || m_aStates.top().nDestinationState == DESTINATION_FONTENTRY)
         {
             m_aFontIndexes.push_back(nParam);
@@ -5714,7 +5740,7 @@ RTFParserState::RTFParserState(RTFDocumentImpl* pDocumentImpl)
       aShape(),
       aDrawingObject(),
       aFrame(this),
-      bIsCjk(false),
+      eRunType(LOCH),
       nYear(0),
       nMonth(0),
       nDay(0),
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
index 3c7d2f3..d069baa 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
@@ -256,7 +256,7 @@ public:
     RTFFrame aFrame;
 
     /// CJK or CTL?
-    bool bIsCjk;
+    enum { LOCH, HICH, DBCH } eRunType;
 
     // Info group.
     int nYear;


More information about the Libreoffice-commits mailing list