[Libreoffice-commits] .: 2 commits - writerfilter/source

Miklos Vajna vmiklos at kemper.freedesktop.org
Sat Apr 21 10:15:55 PDT 2012


 writerfilter/source/filter/RtfFilter.cxx       |    4 ++-
 writerfilter/source/rtftok/rtfcontrolwords.cxx |   18 ++++++++++++++++
 writerfilter/source/rtftok/rtfcontrolwords.hxx |    4 +++
 writerfilter/source/rtftok/rtftokenizer.cxx    |   28 ++++++++++++-------------
 writerfilter/source/rtftok/rtftokenizer.hxx    |    2 +
 5 files changed, 41 insertions(+), 15 deletions(-)

New commits:
commit 22eb78b6eee38e11aec32909b6983becb309ce13
Author: Miklos Vajna <vmiklos at suse.cz>
Date:   Sat Apr 21 18:58:31 2012 +0200

    fdo#44736 speed up RTF import a bit by sorting keywords
    
    If we sort the keywords once in the constructor, then we can do binary
    search when looking up RTF keywords, and that speeds up the import by
    about 20% using the first testcase from the bug.

diff --git a/writerfilter/source/rtftok/rtfcontrolwords.cxx b/writerfilter/source/rtftok/rtfcontrolwords.cxx
index cce0c2e..11c4d3d 100644
--- a/writerfilter/source/rtftok/rtfcontrolwords.cxx
+++ b/writerfilter/source/rtftok/rtfcontrolwords.cxx
@@ -27,6 +27,7 @@
 
 #include <rtfcontrolwords.hxx>
 #include <sal/macros.h>
+#include <string.h>
 
 namespace writerfilter {
 namespace rtftok {
@@ -1856,6 +1857,23 @@ RTFSymbol aRTFControlWords[] = {
 };
 int nRTFControlWords = SAL_N_ELEMENTS(aRTFControlWords);
 
+bool RTFSymbol::operator<(const RTFSymbol& rOther) const
+{
+    return strcmp(sKeyword, rOther.sKeyword) < 0;
+}
+
+RTFSymbol::RTFSymbol(const char* pKeyword)
+    : sKeyword(pKeyword)
+{
+}
+
+RTFSymbol::RTFSymbol(const char *pKeyword, int pControlType, RTFKeyword pIndex)
+    : sKeyword(pKeyword),
+    nControlType(pControlType),
+    nIndex(pIndex)
+{
+}
+
 } // namespace rtftok
 } // namespace writerfilter
 
diff --git a/writerfilter/source/rtftok/rtfcontrolwords.hxx b/writerfilter/source/rtftok/rtfcontrolwords.hxx
index 5afb8c6..477ddb5 100644
--- a/writerfilter/source/rtftok/rtfcontrolwords.hxx
+++ b/writerfilter/source/rtftok/rtfcontrolwords.hxx
@@ -1866,6 +1866,10 @@ struct RTFSymbol
     const char *sKeyword;
     int nControlType;
     RTFKeyword nIndex;
+
+    bool operator<(const RTFSymbol& rOther) const;
+    RTFSymbol(const char* pKeyword);
+    RTFSymbol(const char *pKeyword, int pControlType, RTFKeyword pIndex);
 };
 
 extern RTFSymbol aRTFControlWords[];
diff --git a/writerfilter/source/rtftok/rtftokenizer.cxx b/writerfilter/source/rtftok/rtftokenizer.cxx
index f247317..b40a912 100644
--- a/writerfilter/source/rtftok/rtftokenizer.cxx
+++ b/writerfilter/source/rtftok/rtftokenizer.cxx
@@ -45,8 +45,10 @@ namespace rtftok {
 RTFTokenizer::RTFTokenizer(RTFDocumentImpl& rImport, SvStream* pInStream, uno::Reference<task::XStatusIndicator> const& xStatusIndicator)
     : m_rImport(rImport),
     m_pInStream(pInStream),
-    m_xStatusIndicator(xStatusIndicator)
+    m_xStatusIndicator(xStatusIndicator),
+    m_aRTFControlWords(std::vector<RTFSymbol>(aRTFControlWords, aRTFControlWords + nRTFControlWords))
 {
+    std::sort(m_aRTFControlWords.begin(), m_aRTFControlWords.end());
 }
 
 RTFTokenizer::~RTFTokenizer()
@@ -266,13 +268,10 @@ int RTFTokenizer::dispatchKeyword(OString& rKeyword, bool bParam, int nParam)
         return 0;
     /*SAL_INFO("writefilter", OSL_THIS_FUNC << ": keyword '\\" << rKeyword.getStr() <<
                "' with param? " << (bParam ? 1 : 0) <<" param val: '" << (bParam ? nParam : 0) << "'");*/
-    int i, ret;
-    for (i = 0; i < nRTFControlWords; i++)
-    {
-        if (!strcmp(rKeyword.getStr(), aRTFControlWords[i].sKeyword))
-            break;
-    }
-    if (i == nRTFControlWords)
+    RTFSymbol aSymbol(rKeyword.getStr());
+    std::vector<RTFSymbol>::iterator low = std::lower_bound(m_aRTFControlWords.begin(), m_aRTFControlWords.end(), aSymbol);
+    int i = low - m_aRTFControlWords.begin();
+    if (low == m_aRTFControlWords.end() || aSymbol < *low)
     {
         SAL_INFO("writerfilter", OSL_THIS_FUNC << ": unknown keyword '\\" << rKeyword.getStr() << "'");
         RTFSkipDestination aSkip(m_rImport);
@@ -280,35 +279,36 @@ int RTFTokenizer::dispatchKeyword(OString& rKeyword, bool bParam, int nParam)
         return 0;
     }
 
-    switch (aRTFControlWords[i].nControlType)
+    int ret;
+    switch (m_aRTFControlWords[i].nControlType)
     {
         case CONTROL_FLAG:
             // flags ignore any parameter by definition
-            ret = m_rImport.dispatchFlag(aRTFControlWords[i].nIndex);
+            ret = m_rImport.dispatchFlag(m_aRTFControlWords[i].nIndex);
             if (ret)
                 return ret;
             break;
         case CONTROL_DESTINATION:
             // same for destinations
-            ret = m_rImport.dispatchDestination(aRTFControlWords[i].nIndex);
+            ret = m_rImport.dispatchDestination(m_aRTFControlWords[i].nIndex);
             if (ret)
                 return ret;
             break;
         case CONTROL_SYMBOL:
             // and symbols
-            ret = m_rImport.dispatchSymbol(aRTFControlWords[i].nIndex);
+            ret = m_rImport.dispatchSymbol(m_aRTFControlWords[i].nIndex);
             if (ret)
                 return ret;
             break;
         case CONTROL_TOGGLE:
-            ret = m_rImport.dispatchToggle(aRTFControlWords[i].nIndex, bParam, nParam);
+            ret = m_rImport.dispatchToggle(m_aRTFControlWords[i].nIndex, bParam, nParam);
             if (ret)
                 return ret;
             break;
         case CONTROL_VALUE:
             // values require a parameter by definition
             if (bParam) {
-                ret = m_rImport.dispatchValue(aRTFControlWords[i].nIndex, nParam);
+                ret = m_rImport.dispatchValue(m_aRTFControlWords[i].nIndex, nParam);
                 if (ret)
                     return ret;
             }
diff --git a/writerfilter/source/rtftok/rtftokenizer.hxx b/writerfilter/source/rtftok/rtftokenizer.hxx
index 3b8dee0..bcaafda 100644
--- a/writerfilter/source/rtftok/rtftokenizer.hxx
+++ b/writerfilter/source/rtftok/rtftokenizer.hxx
@@ -51,6 +51,8 @@ namespace writerfilter {
                 RTFDocumentImpl& m_rImport;
                 SvStream* m_pInStream;
                 uno::Reference<task::XStatusIndicator> const& m_xStatusIndicator;
+                // This is the same as m_aRTFControlWords, but sorted
+                std::vector<RTFSymbol> m_aRTFControlWords;
         };
     } // namespace rtftok
 } // namespace writerfilter
commit da07d3a7bdb75efb34448dfb5ebca8b8b6135546
Author: Miklos Vajna <vmiklos at suse.cz>
Date:   Sat Apr 21 17:07:54 2012 +0200

    rtftok: make it possible to see the time spent in the filter

diff --git a/writerfilter/source/filter/RtfFilter.cxx b/writerfilter/source/filter/RtfFilter.cxx
index b57207f..0f37279 100644
--- a/writerfilter/source/filter/RtfFilter.cxx
+++ b/writerfilter/source/filter/RtfFilter.cxx
@@ -54,7 +54,7 @@ RtfFilter::~RtfFilter()
 sal_Bool RtfFilter::filter( const uno::Sequence< beans::PropertyValue >& aDescriptor )
    throw (uno::RuntimeException)
 {
-    SAL_INFO("writerfilter", OSL_THIS_FUNC);
+    sal_uInt32 nStartTime = osl_getGlobalTimer();
     if( m_xSrcDoc.is() )
     {
         uno::Reference< lang::XMultiServiceFactory > xMSF(m_xContext->getServiceManager(), uno::UNO_QUERY_THROW);
@@ -102,6 +102,8 @@ sal_Bool RtfFilter::filter( const uno::Sequence< beans::PropertyValue >& aDescri
 #endif
         if (xStatusIndicator.is())
             xStatusIndicator->end();
+        sal_uInt32 nEndTime = osl_getGlobalTimer();
+        SAL_INFO("writerfilter.profile", OSL_THIS_FUNC << " finished in " << nEndTime - nStartTime << " ms");
         return sal_True;
     }
     catch (const uno::Exception& e)


More information about the Libreoffice-commits mailing list