[Libreoffice-commits] core.git: sc/Library_sc.mk sc/source

Markus Mohrhard markus.mohrhard at googlemail.com
Sat Aug 12 16:36:53 UTC 2017


 sc/Library_sc.mk                               |    1 
 sc/source/ui/dataprovider/dataprovider.cxx     |    7 
 sc/source/ui/dataprovider/htmldataprovider.cxx |  209 +++++++++++++++++++++++++
 sc/source/ui/dataprovider/htmldataprovider.hxx |   48 +++++
 4 files changed, 264 insertions(+), 1 deletion(-)

New commits:
commit 1a4dd1fa2a851f678d728ed342a59d48f8cc74ea
Author: Markus Mohrhard <markus.mohrhard at googlemail.com>
Date:   Sat Aug 12 11:30:38 2017 +0200

    external data: add html data provider
    
    Change-Id: I4ae266707f5cf3b5231f726082950f90df3ca1eb
    Reviewed-on: https://gerrit.libreoffice.org/41083
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Markus Mohrhard <markus.mohrhard at googlemail.com>

diff --git a/sc/Library_sc.mk b/sc/Library_sc.mk
index ffe9b96bc488..8df21d883a2b 100644
--- a/sc/Library_sc.mk
+++ b/sc/Library_sc.mk
@@ -385,6 +385,7 @@ $(eval $(call gb_Library_add_exception_objects,sc,\
     sc/source/ui/condformat/colorformat \
     sc/source/ui/dataprovider/csvdataprovider \
     sc/source/ui/dataprovider/dataprovider \
+    sc/source/ui/dataprovider/htmldataprovider \
     sc/source/ui/dbgui/asciiopt \
     sc/source/ui/dbgui/consdlg \
     sc/source/ui/dbgui/csvcontrol \
diff --git a/sc/source/ui/dataprovider/dataprovider.cxx b/sc/source/ui/dataprovider/dataprovider.cxx
index 4c23420a18a4..119dbdd18e9b 100644
--- a/sc/source/ui/dataprovider/dataprovider.cxx
+++ b/sc/source/ui/dataprovider/dataprovider.cxx
@@ -14,6 +14,8 @@
 #include "officecfg/Office/Calc.hxx"
 #include <rtl/strbuf.hxx>
 
+#include "htmldataprovider.hxx"
+
 using namespace com::sun::star;
 
 namespace sc {
@@ -203,13 +205,16 @@ bool DataProviderFactory::isInternalDataProvider(const OUString& rProvider)
     return rProvider.startsWith("org.libreoffice.calc");
 }
 
-std::shared_ptr<DataProvider> DataProviderFactory::getDataProvider(ScDocument* pDoc, const OUString& rProvider, const OUString& rURL, const OUString& /*rID*/, ScDBDataManager* pManager)
+std::shared_ptr<DataProvider> DataProviderFactory::getDataProvider(ScDocument* pDoc, const OUString& rProvider,
+        const OUString& rURL, const OUString& rID, ScDBDataManager* pManager)
 {
     bool bInternal = DataProviderFactory::isInternalDataProvider(rProvider);
     if (bInternal)
     {
         if (rProvider == "org.libreoffice.calc.csv")
             return std::shared_ptr<DataProvider>(new CSVDataProvider(pDoc, rURL, pManager));
+        else if (rProvider == "org.libreoffice.calc.html")
+            return std::shared_ptr<DataProvider>(new HTMLDataProvider(pDoc, rURL, pManager, rID));
     }
     else
     {
diff --git a/sc/source/ui/dataprovider/htmldataprovider.cxx b/sc/source/ui/dataprovider/htmldataprovider.cxx
new file mode 100644
index 000000000000..c73efee0260c
--- /dev/null
+++ b/sc/source/ui/dataprovider/htmldataprovider.cxx
@@ -0,0 +1,209 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "htmldataprovider.hxx"
+#include <salhelper/thread.hxx>
+
+#include <libxml/HTMLparser.h>
+#include <libxml/HTMLtree.h>
+
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
+
+#include <comphelper/string.hxx>
+
+namespace sc {
+
+class HTMLFetchThread : public salhelper::Thread
+{
+    ScDocument& mrDocument;
+    OUString maURL;
+    OUString maID;
+
+    Idle* mpIdle;
+
+    void handleTable(xmlNodePtr pTable);
+    void handleRow(xmlNodePtr pRow, SCROW nRow);
+    void skipHeadBody(xmlNodePtr pSkip, SCROW& rRow);
+    void handleCell(xmlNodePtr pCell, SCROW nRow, SCCOL nCol);
+
+public:
+    HTMLFetchThread(ScDocument& rDoc, const OUString&, const OUString& rID, Idle* pIdle);
+
+    virtual void execute() override;
+};
+
+HTMLFetchThread::HTMLFetchThread(ScDocument& rDoc, const OUString& rURL, const OUString& rID, Idle* pIdle):
+    salhelper::Thread("HTML Fetch Thread"),
+    mrDocument(rDoc),
+    maURL(rURL),
+    maID(rID),
+    mpIdle(pIdle)
+{
+}
+
+namespace {
+
+OString toString(const xmlChar* pStr)
+{
+    return OString(reinterpret_cast<const char*>(pStr), xmlStrlen(pStr));
+}
+
+}
+
+void HTMLFetchThread::handleCell(xmlNodePtr pCellNode, SCROW nRow, SCCOL nCol)
+{
+    for (xmlNodePtr cur_node = pCellNode->children; cur_node; cur_node = cur_node->next)
+    {
+        if (cur_node->type == XML_TEXT_NODE)
+        {
+            OUString aString = OStringToOUString(toString(cur_node->content), RTL_TEXTENCODING_UTF8);
+            OUString aOldString;
+            do
+            {
+                aOldString = aString;
+                aString = comphelper::string::strip(aString, ' ');
+                aString = comphelper::string::strip(aString, '\n');
+                aString = comphelper::string::strip(aString, '\r');
+                aString = comphelper::string::strip(aString, '\t');
+            }
+            while (aOldString != aString);
+
+            mrDocument.SetString(nCol, nRow, 0, aString);
+        }
+    }
+}
+
+void HTMLFetchThread::handleRow(xmlNodePtr pRowNode, SCROW nRow)
+{
+    sal_Int32 nCol = 0;
+    for (xmlNodePtr cur_node = pRowNode->children; cur_node; cur_node = cur_node->next)
+    {
+        if (cur_node->type == XML_ELEMENT_NODE)
+        {
+            OString aNodeName = toString(cur_node->name);
+            if (aNodeName == "td" || aNodeName == "th")
+            {
+                handleCell(cur_node, nRow, nCol);
+                ++nCol;
+            }
+        }
+    }
+}
+
+void HTMLFetchThread::skipHeadBody(xmlNodePtr pSkipElement, SCROW& rRow)
+{
+    for (xmlNodePtr cur_node = pSkipElement->children; cur_node; cur_node = cur_node->next)
+    {
+        if (cur_node->type == XML_ELEMENT_NODE)
+        {
+            OString aNodeName = toString(cur_node->name);
+            if (aNodeName == "tr")
+            {
+                handleRow(cur_node, rRow);
+                ++rRow;
+            }
+
+        }
+    }
+}
+
+void HTMLFetchThread::handleTable(xmlNodePtr pTable)
+{
+    sal_Int32 nRow = 0;
+    for (xmlNodePtr cur_node = pTable->children; cur_node; cur_node = cur_node->next)
+    {
+        if (cur_node->type == XML_ELEMENT_NODE)
+        {
+            OString aNodeName = toString(cur_node->name);
+            if (aNodeName == "tr")
+            {
+                handleRow(cur_node, nRow);
+                ++nRow;
+            }
+            else if (aNodeName == "thead" || aNodeName == "tbody")
+            {
+                skipHeadBody(cur_node, nRow);
+            }
+        }
+    }
+}
+
+void HTMLFetchThread::execute()
+{
+    OStringBuffer aBuffer(64000);
+    std::unique_ptr<SvStream> pStream = DataProvider::FetchStreamFromURL(maURL, aBuffer);
+
+    htmlDocPtr pHtmlPtr = htmlParseDoc(reinterpret_cast<xmlChar*>(const_cast<char*>(aBuffer.getStr())), nullptr);
+
+    OString aID = OUStringToOString(maID, RTL_TEXTENCODING_UTF8);
+    xmlXPathContextPtr pXmlXpathCtx = xmlXPathNewContext(pHtmlPtr);
+    xmlXPathObjectPtr pXmlXpathObj = xmlXPathEvalExpression(BAD_CAST(aID.getStr()), pXmlXpathCtx);
+    xmlNodeSetPtr pXmlNodes = pXmlXpathObj->nodesetval;
+
+    if (pXmlNodes->nodeNr == 0)
+        return;
+
+    xmlNodePtr pNode = pXmlNodes->nodeTab[0];
+    handleTable(pNode);
+
+    xmlXPathFreeNodeSet(pXmlNodes);
+    xmlXPathFreeNodeSetList(pXmlXpathObj);
+    xmlXPathFreeContext(pXmlXpathCtx);
+
+    SolarMutexGuard aGuard;
+    mpIdle->Start();
+}
+
+HTMLDataProvider::HTMLDataProvider(ScDocument* pDoc, const OUString& rURL, ScDBDataManager* pDBManager,
+        const OUString& rID):
+    maID(rID),
+    maURL(rURL),
+    mpDocument(pDoc),
+    mpDBDataManager(pDBManager),
+    maIdle("HTMLDataProvider CopyHandler")
+{
+    maIdle.SetInvokeHandler(LINK(this, HTMLDataProvider, ImportFinishedHdl));
+}
+
+HTMLDataProvider::~HTMLDataProvider()
+{
+    if (mxHTMLFetchThread.is())
+    {
+        mxHTMLFetchThread->join();
+    }
+}
+
+void HTMLDataProvider::Import()
+{
+    // already importing data
+    if (mpDoc)
+        return;
+
+    mpDoc.reset(new ScDocument(SCDOCMODE_CLIP));
+    mpDoc->ResetClip(mpDocument, (SCTAB)0);
+    mxHTMLFetchThread = new HTMLFetchThread(*mpDoc, maURL, maID, &maIdle);
+    mxHTMLFetchThread->launch();
+}
+
+IMPL_LINK_NOARG(HTMLDataProvider, ImportFinishedHdl, Timer*, void)
+{
+    mpDBDataManager->WriteToDoc(*mpDoc);
+    mxHTMLFetchThread.clear();
+    mpDoc.reset();
+}
+
+const OUString& HTMLDataProvider::GetURL() const
+{
+    return maURL;
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/ui/dataprovider/htmldataprovider.hxx b/sc/source/ui/dataprovider/htmldataprovider.hxx
new file mode 100644
index 000000000000..adbcf37baf5e
--- /dev/null
+++ b/sc/source/ui/dataprovider/htmldataprovider.hxx
@@ -0,0 +1,48 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_SC_SOURCE_UI_DATAPROVIDER_HTMLDATAPROVIDER_HXX
+#define INCLUDED_SC_SOURCE_UI_DATAPROVIDER_HTMLDATAPROVIDER_HXX
+
+#include "dataprovider.hxx"
+
+namespace sc {
+
+class HTMLFetchThread;
+
+class HTMLDataProvider : public DataProvider
+{
+private:
+
+    OUString maID;
+    OUString maURL;
+    ScDocument* mpDocument;
+    ScDBDataManager* mpDBDataManager;
+    rtl::Reference<HTMLFetchThread> mxHTMLFetchThread;
+
+    std::unique_ptr<ScDocument> mpDoc;
+    Idle maIdle;
+
+public:
+
+    HTMLDataProvider(ScDocument* pDoc, const OUString& rURL, ScDBDataManager* pDBManager, const OUString& rID);
+    virtual ~HTMLDataProvider() override;
+
+    virtual void Import() override;
+
+    virtual const OUString& GetURL() const override;
+
+    DECL_LINK( ImportFinishedHdl, Timer*, void );
+};
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */


More information about the Libreoffice-commits mailing list