[Libreoffice-commits] core.git: Branch 'private/kohei/calc-data-stream' - RepositoryExternal.mk sc/inc sc/Library_sc.mk sc/Library_scqahelper.mk sc/source

Kohei Yoshida kohei.yoshida at collabora.com
Tue Dec 17 18:19:02 PST 2013


 RepositoryExternal.mk                        |    4 
 sc/Library_sc.mk                             |    6 +
 sc/Library_scqahelper.mk                     |    8 +
 sc/inc/documentstreamaccess.hxx              |   24 ++++
 sc/inc/mtvelements.hxx                       |    2 
 sc/source/core/data/documentstreamaccess.cxx |   97 ++++++++++++++++++
 sc/source/core/data/mtvelements.cxx          |    6 +
 sc/source/ui/docshell/datastream.cxx         |  143 ++++++++++++++-------------
 sc/source/ui/inc/datastream.hxx              |    6 -
 9 files changed, 222 insertions(+), 74 deletions(-)

New commits:
commit b5818b58b3906a3b5dc8c8ee18218dc72ae4b5fd
Author: Kohei Yoshida <kohei.yoshida at collabora.com>
Date:   Tue Dec 17 21:18:46 2013 -0500

    Use orcus csv parser to streamd data which has much less overhead.
    
    Also, use DocumentStreamAccess to modify ScDocument's content.
    
    Change-Id: I516260cff1f2315267afcff05e36e620798a1aed

diff --git a/RepositoryExternal.mk b/RepositoryExternal.mk
index b44927e..babfe54 100644
--- a/RepositoryExternal.mk
+++ b/RepositoryExternal.mk
@@ -2624,6 +2624,10 @@ endef
 
 define gb_LinkTarget__use_orcus-parser
 $(call gb_LinkTarget_use_external_project,$(1),liborcus)
+$(call gb_LinkTarget_set_include,$(1),\
+	-I$(call gb_UnpackedTarball_get_dir,liborcus/include) \
+	$$(INCLUDE) \
+)
 $(call gb_LinkTarget_add_libs,$(1),\
 	$(call gb_UnpackedTarball_get_dir,liborcus)/src/parser/.libs/liborcus-parser-0.6$(gb_StaticLibrary_PLAINEXT) \
 )
diff --git a/sc/Library_sc.mk b/sc/Library_sc.mk
index b60af82..01a4ab9 100644
--- a/sc/Library_sc.mk
+++ b/sc/Library_sc.mk
@@ -45,6 +45,12 @@ $(eval $(call gb_Library_use_externals,sc,\
     mdds_headers \
 ))
 
+ifeq ($(SYSTEM_LIBORCUS),YES)
+$(eval $(call gb_Library_use_externals,sc,orcus))
+else
+$(eval $(call gb_Library_use_externals,sc,orcus-parser))
+endif
+
 ifeq ($(ENABLE_TELEPATHY),TRUE)
 $(eval $(call gb_Library_use_libraries,sc,tubes))
 
diff --git a/sc/Library_scqahelper.mk b/sc/Library_scqahelper.mk
index 37b1d54..cbdc99d 100644
--- a/sc/Library_scqahelper.mk
+++ b/sc/Library_scqahelper.mk
@@ -18,11 +18,15 @@ $(eval $(call gb_Library_set_include,scqahelper,\
 $(eval $(call gb_Library_use_externals,scqahelper, \
 	boost_headers \
 	mdds_headers \
-	orcus \
-	orcus-parser \
 	cppunit \
 ))
 
+ifeq ($(SYSTEM_LIBORCUS),YES)
+$(eval $(call gb_Library_use_externals,scqahelper,orcus))
+else
+$(eval $(call gb_Library_use_externals,scqahelper,orcus-parser))
+endif
+
 $(eval $(call gb_Library_add_defs,scqahelper,\
 	-DSCQAHELPER_DLLIMPLEMENTATION \
 ))
diff --git a/sc/inc/documentstreamaccess.hxx b/sc/inc/documentstreamaccess.hxx
index 1450f21..74f8914 100644
--- a/sc/inc/documentstreamaccess.hxx
+++ b/sc/inc/documentstreamaccess.hxx
@@ -10,7 +10,11 @@
 #ifndef SC_DOCUMENTSTREAMACCESS_HXX
 #define SC_DOCUMENTSTREAMACCESS_HXX
 
+#include <rtl/ustring.hxx>
+
 class ScDocument;
+class ScAddress;
+class ScRange;
 
 namespace sc {
 
@@ -28,6 +32,26 @@ class DocumentStreamAccess
 
 public:
     DocumentStreamAccess( ScDocument& rDoc );
+
+    void setStringCell( const ScAddress& rPos, const OUString& rStr );
+
+    /**
+     * Clear its internal state, and more importantly all the block position
+     * hints currently held.
+     */
+    void reset();
+
+    /**
+     * Pop the top row inside specified range, shift all the other rows up by
+     * one, then set the bottom row empty.
+     */
+    void shiftRangeUp( const ScRange& rRange );
+
+    /**
+     * Top the bottom row inside specified range, shift all the other rows
+     * above downward by one by inserting an empty row at the top.
+     */
+    void shiftRangeDown( const ScRange& rRange );
 };
 
 }
diff --git a/sc/inc/mtvelements.hxx b/sc/inc/mtvelements.hxx
index e5efbf1..0fcafde 100644
--- a/sc/inc/mtvelements.hxx
+++ b/sc/inc/mtvelements.hxx
@@ -145,6 +145,8 @@ public:
     ColumnBlockPositionSet(ScDocument& rDoc);
 
     ColumnBlockPosition* getBlockPosition(SCTAB nTab, SCCOL nCol);
+
+    void clear();
 };
 
 ScRefCellValue toRefCell( const sc::CellStoreType::const_iterator& itPos, size_t nOffset );
diff --git a/sc/source/core/data/documentstreamaccess.cxx b/sc/source/core/data/documentstreamaccess.cxx
index 2d427af..81ca160 100644
--- a/sc/source/core/data/documentstreamaccess.cxx
+++ b/sc/source/core/data/documentstreamaccess.cxx
@@ -9,19 +9,114 @@
 
 #include "documentstreamaccess.hxx"
 #include "document.hxx"
+#include "table.hxx"
+#include "column.hxx"
+#include "mtvelements.hxx"
+
+#include "svl/sharedstringpool.hxx"
 
 namespace sc {
 
 struct DocumentStreamAccessImpl
 {
     ScDocument& mrDoc;
+    ColumnBlockPositionSet maBlockPosSet;
 
-    DocumentStreamAccessImpl( ScDocument& rDoc ) : mrDoc(rDoc) {}
+    DocumentStreamAccessImpl( ScDocument& rDoc ) :
+        mrDoc(rDoc),
+        maBlockPosSet(rDoc)
+    {}
 };
 
 DocumentStreamAccess::DocumentStreamAccess( ScDocument& rDoc ) :
     mpImpl(new DocumentStreamAccessImpl(rDoc)) {}
 
+void DocumentStreamAccess::setStringCell( const ScAddress& rPos, const OUString& rStr )
+{
+    ScTable* pTab = mpImpl->mrDoc.FetchTable(rPos.Tab());
+    if (!pTab)
+        return;
+
+    ColumnBlockPosition* pBlockPos =
+        mpImpl->maBlockPosSet.getBlockPosition(rPos.Tab(), rPos.Col());
+
+    if (!pBlockPos)
+        return;
+
+    svl::SharedString aSS = mpImpl->mrDoc.GetSharedStringPool().intern(rStr);
+    if (!aSS.getData())
+        return;
+
+    // Set the string.
+    CellStoreType& rCells = pTab->aCol[rPos.Col()].maCells;
+    pBlockPos->miCellPos = rCells.set(pBlockPos->miCellPos, rPos.Row(), aSS);
+
+    // Be sure to set the corresponding text attribute to the default value.
+    CellTextAttrStoreType& rAttrs = pTab->aCol[rPos.Col()].maCellTextAttrs;
+    pBlockPos->miCellTextAttrPos = rAttrs.set(pBlockPos->miCellTextAttrPos, rPos.Row(), CellTextAttr());
+}
+
+void DocumentStreamAccess::reset()
+{
+    mpImpl->maBlockPosSet.clear();
+}
+
+void DocumentStreamAccess::shiftRangeUp( const ScRange& rRange )
+{
+    ScTable* pTab = mpImpl->mrDoc.FetchTable(rRange.aStart.Tab());
+    if (!pTab)
+        return;
+
+    SCROW nTopRow = rRange.aStart.Row();
+    SCROW nLastRow = rRange.aEnd.Row();
+
+    for (SCCOL nCol = rRange.aStart.Col(); nCol <= rRange.aEnd.Col(); ++nCol)
+    {
+        ColumnBlockPosition* pBlockPos =
+            mpImpl->maBlockPosSet.getBlockPosition(rRange.aStart.Tab(), nCol);
+
+        if (!pBlockPos)
+            return;
+
+        CellStoreType& rCells = pTab->aCol[nCol].maCells;
+        rCells.erase(nTopRow, nTopRow); // Erase the top, and shift the rest up.
+        pBlockPos->miCellPos = rCells.insert_empty(nLastRow, 1);
+
+        // Do the same for the text attribute storage.
+        CellTextAttrStoreType& rAttrs = pTab->aCol[nCol].maCellTextAttrs;
+        rAttrs.erase(nTopRow, nTopRow);
+        pBlockPos->miCellTextAttrPos = rAttrs.insert_empty(nLastRow, 1);
+    }
+}
+
+void DocumentStreamAccess::shiftRangeDown( const ScRange& rRange )
+{
+    ScTable* pTab = mpImpl->mrDoc.FetchTable(rRange.aStart.Tab());
+    if (!pTab)
+        return;
+
+    SCROW nTopRow = rRange.aStart.Row();
+    SCROW nLastRow = rRange.aEnd.Row();
+
+    for (SCCOL nCol = rRange.aStart.Col(); nCol <= rRange.aEnd.Col(); ++nCol)
+    {
+        ColumnBlockPosition* pBlockPos =
+            mpImpl->maBlockPosSet.getBlockPosition(rRange.aStart.Tab(), nCol);
+
+        if (!pBlockPos)
+            return;
+
+        CellStoreType& rCells = pTab->aCol[nCol].maCells;
+        rCells.erase(nLastRow, nLastRow); // Erase the bottom.
+        pBlockPos->miCellPos = rCells.insert_empty(nTopRow, 1); // insert at the top and shift everything down.
+
+        // Do the same for the text attribute storage.
+        CellTextAttrStoreType& rAttrs = pTab->aCol[nCol].maCellTextAttrs;
+        rAttrs.erase(nLastRow, nLastRow);
+        pBlockPos->miCellTextAttrPos = rAttrs.insert_empty(nTopRow, 1);
+    }
+}
+
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/data/mtvelements.cxx b/sc/source/core/data/mtvelements.cxx
index 5a94606..1110ab6 100644
--- a/sc/source/core/data/mtvelements.cxx
+++ b/sc/source/core/data/mtvelements.cxx
@@ -67,6 +67,12 @@ ColumnBlockPosition* ColumnBlockPositionSet::getBlockPosition(SCTAB nTab, SCCOL
     return &it->second;
 }
 
+void ColumnBlockPositionSet::clear()
+{
+    osl::MutexGuard aGuard(&maMtxTables);
+    maTables.clear();
+}
+
 ScRefCellValue toRefCell( const sc::CellStoreType::const_iterator& itPos, size_t nOffset )
 {
     switch (itPos->type)
diff --git a/sc/source/ui/docshell/datastream.cxx b/sc/source/ui/docshell/datastream.cxx
index f053371..a798b84 100644
--- a/sc/source/ui/docshell/datastream.cxx
+++ b/sc/source/ui/docshell/datastream.cxx
@@ -28,6 +28,15 @@
 #include <tabvwsh.hxx>
 #include <viewdata.hxx>
 
+#include <config_orcus.h>
+
+#if ENABLE_ORCUS
+#if defined WNT
+#define __ORCUS_STATIC_LIB
+#endif
+#include <orcus/csv_parser.hpp>
+#endif
+
 #include <queue>
 
 namespace sc {
@@ -316,6 +325,7 @@ void DataStream::StartImport()
 {
     if (mbRunning)
         return;
+
     if (!mxReaderThread.is())
     {
         SvStream *pStream = 0;
@@ -327,6 +337,7 @@ void DataStream::StartImport()
         mxReaderThread->launch();
     }
     mbRunning = true;
+    maDocAccess.reset();
     mxThread->maStart.set();
 }
 
@@ -334,6 +345,7 @@ void DataStream::StopImport()
 {
     if (!mbRunning)
         return;
+
     mbRunning = false;
     Repaint();
 }
@@ -362,23 +374,32 @@ void DataStream::MoveData()
     switch (meMove)
     {
         case RANGE_DOWN:
+        {
             if (mnCurRow == mpEndRange->aStart.Row())
                 meMove = MOVE_UP;
-            break;
+        }
+        break;
         case MOVE_UP:
+        {
             // Remove the top row and shift the remaining rows upward. Then
             // insert a new row at the end row position.
-            mpDoc->DeleteRow(maStartRange);
-            mpDoc->InsertRow(*mpEndRange);
-            break;
+            ScRange aRange = maStartRange;
+            aRange.aEnd = mpEndRange->aEnd;
+            maDocAccess.shiftRangeUp(aRange);
+        }
+        break;
         case MOVE_DOWN:
+        {
             // Remove the end row and shift the remaining rows downward by
             // inserting a new row at the top row.
-            mpDoc->DeleteRow(*mpEndRange);
-            mpDoc->InsertRow(maStartRange);
-            break;
+            ScRange aRange = maStartRange;
+            aRange.aEnd = mpEndRange->aEnd;
+            maDocAccess.shiftRangeDown(aRange);
+        }
+        break;
         case NO_MOVE:
-            break;
+        default:
+            ;
     }
 }
 
@@ -388,77 +409,61 @@ IMPL_LINK_NOARG(DataStream, RefreshHdl)
     return 0;
 }
 
-//  lcl_ScanString and Text2Doc is simplified version
-//  of code from sc/source/ui/docshell/impex.cxx
-const sal_Unicode* lcl_ScanString( const sal_Unicode* p, OUString& rString, sal_Unicode cStr)
+#if ENABLE_ORCUS
+
+namespace {
+
+/**
+ * This handler handles a single line CSV input.
+ */
+class CSVHandler
 {
-    const sal_Unicode* p0 = p;
-    for( ;; )
+    DocumentStreamAccess& mrDoc;
+    ScAddress maPos;
+    SCROW mnRow;
+    SCCOL mnCol;
+    SCCOL mnEndCol;
+    SCTAB mnTab;
+
+public:
+    CSVHandler( DocumentStreamAccess& rDoc, const ScAddress& rPos, SCCOL nEndCol ) :
+        mrDoc(rDoc), maPos(rPos), mnEndCol(nEndCol) {}
+
+    void begin_parse() {}
+    void end_parse() {}
+    void begin_row() {}
+    void end_row() {}
+
+    void cell(const char* p, size_t n)
     {
-        if (!*p)
-            break;
-        if (*p == cStr)
+        if (maPos.Col() <= mnEndCol)
         {
-            if (*++p != cStr)
-                break;
-            p++;
+            mrDoc.setStringCell(maPos, OUString(p, n, RTL_TEXTENCODING_UTF8));
         }
-        else
-            p++;
+        maPos.IncCol();
     }
-    if (p0 < p)
-        if (rString.getLength() + (p - p0) <= STRING_MAXLEN)
-            rString += OUString( p0, sal::static_int_cast<sal_Int32>( p - p0 ) );
-    return p;
+};
+
 }
 
 void DataStream::Text2Doc()
 {
-    sal_Unicode cSep(',');
-    sal_Unicode cStr('"');
-    SCCOL nStartCol = maStartRange.aStart.Col();
-    SCCOL nEndCol = maStartRange.aEnd.Col();
-    OUString aCell;
-    ScDocumentImport aDocImport(*mpDoc);
-
-    SCCOL nCol = nStartCol;
-    OUString sLine( OStringToOUString(ConsumeLine(), RTL_TEXTENCODING_UTF8) );
-    const sal_Unicode* p = sLine.getStr();
-    while (*p)
-    {
-        aCell = "";
-        const sal_Unicode* q = p;
-        while (*p && *p != cSep)
-        {
-            // Always look for a pairing quote and ignore separator in between.
-            while (*p && *p == cStr)
-                q = p = lcl_ScanString(p, aCell, cStr);
-            // All until next separator or quote.
-            while (*p && *p != cSep && *p != cStr)
-                ++p;
-            if (aCell.getLength() + (p - q) <= STRING_MAXLEN)
-                aCell += OUString( q, sal::static_int_cast<sal_Int32>( p - q ) );
-            q = p;
-        }
-        if (*p)
-            ++p;
-        if (nCol <= nEndCol)
-        {
-            ScAddress aAddress(nCol, mnCurRow, maStartRange.aStart.Tab());
-            if (aCell == "0" || ( aCell.indexOf(':') == -1 && aCell.toDouble() ))
-                aDocImport.setNumericCell(aAddress, aCell.toDouble());
-            else
-                aDocImport.setStringCell(aAddress, aCell);
-        }
-        ++nCol;
-    }
+    OString aLine = ConsumeLine();
+    orcus::csv_parser_config aConfig;
+    aConfig.delimiters.push_back(',');
+    aConfig.text_qualifier = '"';
+    CSVHandler aHdl(maDocAccess, ScAddress(maStartRange.aStart.Col(), mnCurRow, maStartRange.aStart.Tab()), maStartRange.aEnd.Col());
+    orcus::csv_parser<CSVHandler> parser(aLine.getStr(), aLine.getLength(), aHdl, aConfig);
+    parser.parse();
+
     ++mnRepaintCounter;
+}
 
-    aDocImport.finalize();
+#else
 
-    ScRange aBCRange(nStartCol, mnCurRow, maStartRange.aStart.Tab(), nEndCol, mnCurRow, maStartRange.aStart.Tab());
-    maBroadcastRanges.Join(aBCRange);
-}
+void DataStream::Text2Doc() {}
+
+#endif
 
 bool DataStream::ImportData()
 {
@@ -474,6 +479,7 @@ bool DataStream::ImportData()
     }
     else
     {
+#if 0 // TODO : temporarily disable this code.
         ScDocumentImport aDocImport(*mpDoc);
         // read more lines at once but not too much
         for (int i = 0; i < 10; ++i)
@@ -496,8 +502,9 @@ bool DataStream::ImportData()
             maBroadcastRanges.Join(aAddress);
         }
         aDocImport.finalize();
+#endif
     }
-    mpDocShell->SetDocumentModified();
+
     if (meMove == NO_MOVE)
         return mbRunning;
 
diff --git a/sc/source/ui/inc/datastream.hxx b/sc/source/ui/inc/datastream.hxx
index 70857dc..5f20bf4 100644
--- a/sc/source/ui/inc/datastream.hxx
+++ b/sc/source/ui/inc/datastream.hxx
@@ -78,8 +78,8 @@ private:
     void Broadcast();
 
 private:
-    ScDocShell *mpDocShell;
-    ScDocument *mpDoc;
+    ScDocShell* mpDocShell;
+    ScDocument* mpDoc;
     DocumentStreamAccess maDocAccess;
     OUString msURL;
     OUString msMove;
@@ -88,7 +88,7 @@ private:
     MoveEnum meMove;
     bool mbRunning;
     bool mbValuesInLine;
-    LinesList *mpLines;
+    LinesList* mpLines;
     size_t mnLinesCount;
     size_t mnRepaintCounter;
     SCROW mnCurRow;


More information about the Libreoffice-commits mailing list