[Libreoffice-commits] core.git: 4 commits - sw/qa writerfilter/source
Michael Stahl
mstahl at redhat.com
Tue Jun 3 11:55:12 PDT 2014
sw/qa/core/data/rtf/pass/fdo79384.rtf | 1
sw/qa/extras/rtfimport/data/fdo79384.rtf | 9 +++
sw/qa/extras/rtfimport/rtfimport.cxx | 8 ++
writerfilter/source/rtftok/rtfdocumentimpl.cxx | 72 ++++++++++++++++++-------
writerfilter/source/rtftok/rtfdocumentimpl.hxx | 4 +
5 files changed, 75 insertions(+), 19 deletions(-)
New commits:
commit c087b60b0dd70c4a711ba1b4d556206a136fa468
Author: Michael Stahl <mstahl at redhat.com>
Date: Tue Jun 3 20:32:13 2014 +0200
(related: bnc#823675) RTF import: get rid of hacks for \f in LISTLEVEL
These weird hacks are apparently needed only because the \loch \hich
\dbch were mapped wrongly; for the list level destination it's only
important that the existing fonts are not overwritten.
Change-Id: Ie2b9adf332b74c2744e9b1dbc4e878638e5ee078
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index 89d2294..77f0379 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -3518,14 +3518,11 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam)
{
RTFSprms aFontAttributes;
aFontAttributes.set(nSprm, RTFValue::Pointer_t(new RTFValue(m_aFontNames[getFontIndex(nParam)])));
- // In the context of listlevels, \af seems to imply \f.
- if (nKeyword == RTF_AF)
- aFontAttributes.set(NS_ooxml::LN_CT_Fonts_ascii, RTFValue::Pointer_t(new RTFValue(m_aFontNames[getFontIndex(nParam)])));
RTFSprms aRunPropsSprms;
aRunPropsSprms.set(NS_ooxml::LN_EG_RPrBase_rFonts, RTFValue::Pointer_t(new RTFValue(aFontAttributes)));
- // If there are multiple \f or \af tokens, only handle the first one.
- if (!m_aStates.top().aTableSprms.find(NS_ooxml::LN_CT_Lvl_rPr))
- m_aStates.top().aTableSprms.set(NS_ooxml::LN_CT_Lvl_rPr, RTFValue::Pointer_t(new RTFValue(RTFSprms(), aRunPropsSprms)));
+ m_aStates.top().aTableSprms.set(NS_ooxml::LN_CT_Lvl_rPr,
+ RTFValue::Pointer_t(new RTFValue(RTFSprms(), aRunPropsSprms)),
+ OVERWRITE_NO_APPEND);
}
else
{
commit 36246aa9fb57c9fe4e546c91a8274d8828b1424e
Author: Michael Stahl <mstahl at redhat.com>
Date: Tue Jun 3 20:18:59 2014 +0200
RTF import: fix handling of \loch \hich \dbch \ltrch \rtlch
The logic is not immediately obvious from the RTF spec; let's do what
the editengine RTF import does, but without the unnecessary complexity.
Change-Id: I60e69130e6e5aed1f5d237f64b1656c3141e402a
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index 6334031..89d2294 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -2825,8 +2825,10 @@ int RTFDocumentImpl::dispatchFlag(RTFKeyword nKeyword)
break;
case RTF_LTRCH:
// dmapper does not support this.
+ m_aStates.top().isRightToLeft = false;
break;
case RTF_RTLCH:
+ m_aStates.top().isRightToLeft = true;
if (m_aDefaultState.nCurrentEncoding == RTL_TEXTENCODING_MS_1255)
m_aStates.top().nCurrentEncoding = m_aDefaultState.nCurrentEncoding;
break;
@@ -3493,11 +3495,20 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam)
{
case RTF_F:
case RTF_AF:
- if (nKeyword == RTF_F)
- nSprm = NS_ooxml::LN_CT_Fonts_ascii;
+ if (m_aStates.top().isRightToLeft
+ || m_aStates.top().eRunType == RTFParserState::HICH)
+ {
+ nSprm = NS_ooxml::LN_CT_Fonts_cs;
+ }
+ else if (m_aStates.top().eRunType == RTFParserState::DBCH)
+ {
+ nSprm = NS_ooxml::LN_CT_Fonts_eastAsia;
+ }
else
- nSprm = (m_aStates.top().eRunType == RTFParserState::HICH
- ? NS_ooxml::LN_CT_Fonts_eastAsia : NS_ooxml::LN_CT_Fonts_cs);
+ {
+ assert(m_aStates.top().eRunType == RTFParserState::LOCH);
+ nSprm = NS_ooxml::LN_CT_Fonts_ascii;
+ }
if (m_aStates.top().nDestinationState == DESTINATION_FONTTABLE || m_aStates.top().nDestinationState == DESTINATION_FONTENTRY)
{
m_aFontIndexes.push_back(nParam);
@@ -3521,7 +3532,8 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam)
m_nCurrentFontIndex = getFontIndex(nParam);
RTFValue::Pointer_t pValue(new RTFValue(getFontName(m_nCurrentFontIndex)));
lcl_putNestedAttribute(m_aStates.top().aCharacterSprms, NS_ooxml::LN_EG_RPrBase_rFonts, nSprm, pValue);
- m_aStates.top().nCurrentEncoding = getEncoding(m_nCurrentFontIndex);
+ if (nKeyword == RTF_F)
+ m_aStates.top().nCurrentEncoding = getEncoding(m_nCurrentFontIndex);
}
break;
case RTF_RED:
@@ -5741,6 +5753,7 @@ RTFParserState::RTFParserState(RTFDocumentImpl* pDocumentImpl)
aDrawingObject(),
aFrame(this),
eRunType(LOCH),
+ isRightToLeft(false),
nYear(0),
nMonth(0),
nDay(0),
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
index d069baa..b86c54f 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
@@ -257,6 +257,8 @@ public:
/// CJK or CTL?
enum { LOCH, HICH, DBCH } eRunType;
+ /// ltrch or rtlch
+ bool isRightToLeft;
// Info group.
int nYear;
commit d71387ca81b61416b9a7b82cd6cf67d496b81fc2
Author: Michael Stahl <mstahl at redhat.com>
Date: Tue Jun 3 19:32:10 2014 +0200
fdo#79384: replace the work-around with a different one
Word will reject Shift-JIS following \loch, but apparently OOo could read
and (worse) write such documents, so accept Shift-JIS regardless of run
charset type.
Change-Id: Ib181956e9f218548a52037dd76fa1d3ecdc006bd
diff --git a/sw/qa/core/data/rtf/pass/fdo79384.rtf b/sw/qa/core/data/rtf/pass/fdo79384.rtf
index 84875a9..c9d6b33 100644
--- a/sw/qa/core/data/rtf/pass/fdo79384.rtf
+++ b/sw/qa/core/data/rtf/pass/fdo79384.rtf
@@ -1,4 +1,5 @@
{\rtf1
+{\fonttbl{\f5\fnil\fprq0\fcharset128 OpenSymbol;}}
{\stylesheet
{\*\cs35\snext35\hich\af5\dbch\af5\loch\f5 Mp{u y{p;}
}
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index d852be8..6334031 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -957,11 +957,8 @@ int RTFDocumentImpl::resolveChars(char ch)
bool bUnicodeChecked = false;
bool bSkipped = false;
- // Workaround for buggy input: if we're inside a style entry, then ignore
- // the fact that '{' without a matching '}' is invalid.
- bool bStyleEntry = m_aStates.top().nDestinationState == DESTINATION_STYLEENTRY;
-
- while (!Strm().IsEof() && (m_aStates.top().nInternalState == INTERNAL_HEX || ((ch != '{' || bStyleEntry) && ch != '}' && ch != '\\')))
+ while (!Strm().IsEof() && (m_aStates.top().nInternalState == INTERNAL_HEX
+ || (ch != '{' && ch != '}' && ch != '\\')))
{
if (m_aStates.top().nInternalState == INTERNAL_HEX || (ch != 0x0d && ch != 0x0a))
{
@@ -985,9 +982,12 @@ int RTFDocumentImpl::resolveChars(char ch)
if (m_aStates.top().nInternalState == INTERNAL_HEX)
break;
- if (RTFParserState::DBCH == m_aStates.top().eRunType &&
- RTL_TEXTENCODING_MS_932 == m_aStates.top().nCurrentEncoding)
+ if (RTL_TEXTENCODING_MS_932 == m_aStates.top().nCurrentEncoding)
{
+ // fdo#79384: Word will reject Shift-JIS following \loch
+ // but apparently OOo could read and (worse) write such documents
+ SAL_INFO_IF(m_aStates.top().eRunType != RTFParserState::DBCH,
+ "writerfilter.rtftok", "invalid Shift-JIS without DBCH");
unsigned char uch = ch;
if ((uch >= 0x80 && uch <= 0x9F) || uch >= 0xE0)
{
commit 061190a62fcdbfb3a0b266d5afffbd257a3e692e
Author: Michael Stahl <mstahl at redhat.com>
Date: Mon Jun 2 23:57:13 2014 +0200
fdo#79384: RTF import: fix literal Shift-JIS text
This is a variable-length encoding, and the second byte may be a RTF
syntax character like \, {, }.
Change-Id: I813ccafda18388af3bf05eb7ce9a0253c627b1c4
diff --git a/sw/qa/extras/rtfimport/data/fdo79384.rtf b/sw/qa/extras/rtfimport/data/fdo79384.rtf
new file mode 100644
index 0000000..2a90085
--- /dev/null
+++ b/sw/qa/extras/rtfimport/data/fdo79384.rtf
@@ -0,0 +1,9 @@
+{\rtf1\ansi
+{\fonttbl{\f5\fnil\fprq0\fcharset128 OpenSymbol{\*\falt Arial Unicode MS};}}
+
+\pard\plain
+
+\dbch\f5 Mp{u y{p
+}\
+
+\par }
diff --git a/sw/qa/extras/rtfimport/rtfimport.cxx b/sw/qa/extras/rtfimport/rtfimport.cxx
index b9ea9d8..1654922 100644
--- a/sw/qa/extras/rtfimport/rtfimport.cxx
+++ b/sw/qa/extras/rtfimport/rtfimport.cxx
@@ -290,6 +290,14 @@ DECLARE_RTFIMPORT_TEST(testN751020, "n751020.rtf")
CPPUNIT_ASSERT_EQUAL(sal_Int32(convertTwipToMm100(200)), getProperty<sal_Int32>(xParaEnum->nextElement(), "ParaBottomMargin"));
}
+DECLARE_RTFIMPORT_TEST(testFdo79384, "fdo79384.rtf")
+{
+ uno::Reference<text::XTextRange> xTextRange = getRun(getParagraph(1), 1);
+
+ CPPUNIT_ASSERT_EQUAL(OUString("ÐаÑкеÑÑ ÑпиÑкамЫ", 31, RTL_TEXTENCODING_UTF8),
+ xTextRange->getString());
+}
+
DECLARE_RTFIMPORT_TEST(testFdo47326, "fdo47326.rtf")
{
// This was 15 only, as \super buffered text, then the contents of it got lost.
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index 079f43e..d852be8 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -980,9 +980,33 @@ int RTFDocumentImpl::resolveChars(char ch)
m_aStates.top().nCharsToSkip--;
}
}
+
// read a single char if we're in hex mode
if (m_aStates.top().nInternalState == INTERNAL_HEX)
break;
+
+ if (RTFParserState::DBCH == m_aStates.top().eRunType &&
+ RTL_TEXTENCODING_MS_932 == m_aStates.top().nCurrentEncoding)
+ {
+ unsigned char uch = ch;
+ if ((uch >= 0x80 && uch <= 0x9F) || uch >= 0xE0)
+ {
+ // read second byte of 2-byte Shift-JIS - may be \ { }
+ Strm().ReadChar(ch);
+ if (m_aStates.top().nCharsToSkip == 0)
+ {
+ assert(bUnicodeChecked);
+ aBuf.append(ch);
+ }
+ else
+ {
+ assert(bSkipped);
+ // anybody who uses \ucN with Shift-JIS is insane
+ m_aStates.top().nCharsToSkip--;
+ }
+ }
+ }
+
Strm().ReadChar(ch);
}
if (m_aStates.top().nInternalState != INTERNAL_HEX && !Strm().IsEof())
@@ -2980,12 +3004,13 @@ int RTFDocumentImpl::dispatchFlag(RTFKeyword nKeyword)
break;
case RTF_LOCH:
// Noop, dmapper detects this automatically.
+ m_aStates.top().eRunType = RTFParserState::LOCH;
break;
case RTF_HICH:
- m_aStates.top().bIsCjk = true;
+ m_aStates.top().eRunType = RTFParserState::HICH;
break;
case RTF_DBCH:
- m_aStates.top().bIsCjk = false;
+ m_aStates.top().eRunType = RTFParserState::DBCH;
break;
case RTF_TITLEPG:
{
@@ -3471,7 +3496,8 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam)
if (nKeyword == RTF_F)
nSprm = NS_ooxml::LN_CT_Fonts_ascii;
else
- nSprm = (m_aStates.top().bIsCjk ? NS_ooxml::LN_CT_Fonts_eastAsia : NS_ooxml::LN_CT_Fonts_cs);
+ nSprm = (m_aStates.top().eRunType == RTFParserState::HICH
+ ? NS_ooxml::LN_CT_Fonts_eastAsia : NS_ooxml::LN_CT_Fonts_cs);
if (m_aStates.top().nDestinationState == DESTINATION_FONTTABLE || m_aStates.top().nDestinationState == DESTINATION_FONTENTRY)
{
m_aFontIndexes.push_back(nParam);
@@ -5714,7 +5740,7 @@ RTFParserState::RTFParserState(RTFDocumentImpl* pDocumentImpl)
aShape(),
aDrawingObject(),
aFrame(this),
- bIsCjk(false),
+ eRunType(LOCH),
nYear(0),
nMonth(0),
nDay(0),
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
index 3c7d2f3..d069baa 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
@@ -256,7 +256,7 @@ public:
RTFFrame aFrame;
/// CJK or CTL?
- bool bIsCjk;
+ enum { LOCH, HICH, DBCH } eRunType;
// Info group.
int nYear;
More information about the Libreoffice-commits
mailing list