[Libreoffice-commits] .: 2 commits - writerfilter/source

Miklos Vajna vmiklos at kemper.freedesktop.org
Mon Nov 28 15:51:42 PST 2011


 writerfilter/source/rtftok/rtfdocumentimpl.cxx |   40 +++++++++++++++++++++----
 writerfilter/source/rtftok/rtfdocumentimpl.hxx |   10 ++++--
 2 files changed, 41 insertions(+), 9 deletions(-)

New commits:
commit 4f6d80fbb8a83ef98dd3c0d746fa7fe650d71f02
Author: Miklos Vajna <vmiklos at frugalware.org>
Date:   Tue Nov 29 00:48:55 2011 +0100

    RTF: Avoid importing unicode characters one by one
    
    This imprives speed of importing unicode text further. Results of a
    sample of chinese text of 5 pages:
    
    - before: 6.692s
    - after: 1.388s

diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index 0e81b4a..1a138c0 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -291,7 +291,8 @@ RTFDocumentImpl::RTFDocumentImpl(uno::Reference<uno::XComponentContext> const& x
     m_nCurrentStyleIndex(0),
     m_bEq(false),
     m_bWasInFrame(false),
-    m_bIsInFrame(false)
+    m_bIsInFrame(false),
+    m_aUnicodeBuffer()
 {
     OSL_ASSERT(xInputStream.is());
     m_pInStream.reset(utl::UcbStreamHelper::CreateStream(xInputStream, sal_True));
@@ -339,6 +340,7 @@ bool RTFDocumentImpl::isSubstream() const
 
 void RTFDocumentImpl::finishSubstream()
 {
+    checkUnicode();
     // At the end of a footnote stream, we need to emit a run break when importing from Word.
     // We can't do so unconditionally, as Writer already writes a \par at the end of the footnote.
     if (m_bNeedCr)
@@ -697,12 +699,20 @@ int RTFDocumentImpl::resolveChars(char ch)
 {
     OStringBuffer aBuf;
 
+    bool bUnicodeChecked = false;
     while(!Strm().IsEof() && ch != '{' && ch != '}' && ch != '\\')
     {
         if (ch != 0x0d && ch != 0x0a)
         {
             if (m_aStates.top().nCharsToSkip == 0)
+            {
+                if (!bUnicodeChecked)
+                {
+                    checkUnicode();
+                    bUnicodeChecked = true;
+                }
                 aBuf.append(ch);
+            }
             else
                 m_aStates.top().nCharsToSkip--;
         }
@@ -989,6 +999,7 @@ void RTFDocumentImpl::replayBuffer(RTFBuffer_t& rBuffer)
 
 int RTFDocumentImpl::dispatchDestination(RTFKeyword nKeyword)
 {
+    checkUnicode();
     RTFSkipDestination aSkip(*this);
     switch (nKeyword)
     {
@@ -1277,6 +1288,8 @@ int RTFDocumentImpl::dispatchDestination(RTFKeyword nKeyword)
 
 int RTFDocumentImpl::dispatchSymbol(RTFKeyword nKeyword)
 {
+    if (nKeyword != RTF_HEXCHAR)
+        checkUnicode();
     RTFSkipDestination aSkip(*this);
     sal_uInt8 cCh = 0;
 
@@ -1450,6 +1463,7 @@ int RTFDocumentImpl::dispatchSymbol(RTFKeyword nKeyword)
 
 int RTFDocumentImpl::dispatchFlag(RTFKeyword nKeyword)
 {
+    checkUnicode();
     RTFSkipDestination aSkip(*this);
     int nParam = -1;
 
@@ -1905,6 +1919,8 @@ int RTFDocumentImpl::dispatchFlag(RTFKeyword nKeyword)
 
 int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam)
 {
+    if (nKeyword != RTF_U)
+        checkUnicode();
     RTFSkipDestination aSkip(*this);
     int nSprm = 0;
     RTFValue::Pointer_t pIntValue(new RTFValue(nParam));
@@ -2231,8 +2247,7 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam)
         case RTF_U:
             if ((SAL_MIN_INT16 <= nParam) && (nParam <= SAL_MAX_INT16))
             {
-                OUString aStr(static_cast<sal_Unicode>(nParam));
-                text(aStr);
+                m_aUnicodeBuffer.append(static_cast<sal_Unicode>(nParam));
                 m_aStates.top().nCharsToSkip = m_aStates.top().nUc;
             }
             break;
@@ -2550,6 +2565,7 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam)
 
 int RTFDocumentImpl::dispatchToggle(RTFKeyword nKeyword, bool bParam, int nParam)
 {
+    checkUnicode();
     RTFSkipDestination aSkip(*this);
     int nSprm = -1;
     RTFValue::Pointer_t pBoolValue(new RTFValue(!bParam || nParam != 0));
@@ -2650,6 +2666,7 @@ int RTFDocumentImpl::pushState()
 {
     //OSL_TRACE("%s before push: %d", OSL_THIS_FUNC, m_nGroup);
 
+    checkUnicode();
     m_nGroupStartPos = Strm().Tell();
     RTFParserState aState;
     if (m_aStates.empty())
@@ -2728,6 +2745,7 @@ int RTFDocumentImpl::popState()
 {
     //OSL_TRACE("%s before pop: m_nGroup %d, dest state: %d", OSL_THIS_FUNC, m_nGroup, m_aStates.top().nDestinationState);
 
+    checkUnicode();
     RTFSprms aSprms;
     RTFSprms aAttributes;
     OUStringBuffer aDestinationText;
@@ -3184,6 +3202,16 @@ void RTFDocumentImpl::setSkipUnknown(bool bSkipUnknown)
     m_bSkipUnknown = bSkipUnknown;
 }
 
+void RTFDocumentImpl::checkUnicode()
+{
+    if (m_aUnicodeBuffer.getLength() > 0)
+    {
+        OSL_TRACE("debug, sending collected unicode chars");
+        OUString aString = m_aUnicodeBuffer.makeStringAndClear();
+        text(aString);
+    }
+}
+
 RTFParserState::RTFParserState()
     : nInternalState(INTERNAL_NORMAL),
     nDestinationState(DESTINATION_NORMAL),
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
index 2e63604..863a6d5 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
@@ -341,6 +341,8 @@ namespace writerfilter {
                 /// If we got tokens indicating we're in a frame.
                 bool inFrame();
                 void checkChangedFrame();
+                /// If we have some unicode characters to send.
+                void checkUnicode();
 
                 uno::Reference<uno::XComponentContext> const& m_xContext;
                 uno::Reference<io::XInputStream> const& m_xInputStream;
@@ -425,6 +427,8 @@ namespace writerfilter {
                 bool m_bWasInFrame;
                 /// If a frame start token is already sent to dmapper (nesting them is not OK).
                 bool m_bIsInFrame;
+                // Unicode characters are collected here so we don't have to send them one by one.
+                rtl::OUStringBuffer m_aUnicodeBuffer;
 
         };
     } // namespace rtftok
commit 9ef6890543761a348b6152031113a832010bc253
Author: Miklos Vajna <vmiklos at frugalware.org>
Date:   Tue Nov 29 00:22:31 2011 +0100

    mark these as const

diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index 9e1be73..0e81b4a 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -332,7 +332,7 @@ void RTFDocumentImpl::setAuthor(rtl::OUString& rAuthor)
     m_aAuthor = rAuthor;
 }
 
-bool RTFDocumentImpl::isSubstream()
+bool RTFDocumentImpl::isSubstream() const
 {
     return m_bIsSubstream;
 }
@@ -3153,12 +3153,12 @@ RTFParserState& RTFDocumentImpl::getState()
     return m_aStates.top();
 }
 
-int RTFDocumentImpl::getGroup()
+int RTFDocumentImpl::getGroup() const
 {
     return m_nGroup;
 }
 
-bool RTFDocumentImpl::isEmpty()
+bool RTFDocumentImpl::isEmpty() const
 {
     return m_aStates.empty();
 }
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
index d0aa294..2e63604 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
@@ -292,15 +292,15 @@ namespace writerfilter {
                 Stream& Mapper();
                 void setSubstream(bool bIsSubtream);
                 void setAuthor(rtl::OUString& rAuthor);
-                bool isSubstream();
+                bool isSubstream() const;
                 void finishSubstream();
                 void setIgnoreFirst(rtl::OUString& rIgnoreFirst);
                 void seek(sal_uInt32 nPos);
                 uno::Reference<lang::XMultiServiceFactory> getModelFactory();
                 RTFParserState& getState();
                 /// If the stack of states is empty.
-                bool isEmpty();
-                int getGroup();
+                bool isEmpty() const;
+                int getGroup() const;
                 void setDestinationText(rtl::OUString& rString);
                 /// Resolve a picture: If not inline, then anchored.
                 int resolvePict(bool bInline);


More information about the Libreoffice-commits mailing list