[Libreoffice-commits] core.git: sc/Library_sc.mk sc/source
Markus Mohrhard
markus.mohrhard at googlemail.com
Sat Aug 12 16:36:53 UTC 2017
sc/Library_sc.mk | 1
sc/source/ui/dataprovider/dataprovider.cxx | 7
sc/source/ui/dataprovider/htmldataprovider.cxx | 209 +++++++++++++++++++++++++
sc/source/ui/dataprovider/htmldataprovider.hxx | 48 +++++
4 files changed, 264 insertions(+), 1 deletion(-)
New commits:
commit 1a4dd1fa2a851f678d728ed342a59d48f8cc74ea
Author: Markus Mohrhard <markus.mohrhard at googlemail.com>
Date: Sat Aug 12 11:30:38 2017 +0200
external data: add html data provider
Change-Id: I4ae266707f5cf3b5231f726082950f90df3ca1eb
Reviewed-on: https://gerrit.libreoffice.org/41083
Tested-by: Jenkins <ci at libreoffice.org>
Reviewed-by: Markus Mohrhard <markus.mohrhard at googlemail.com>
diff --git a/sc/Library_sc.mk b/sc/Library_sc.mk
index ffe9b96bc488..8df21d883a2b 100644
--- a/sc/Library_sc.mk
+++ b/sc/Library_sc.mk
@@ -385,6 +385,7 @@ $(eval $(call gb_Library_add_exception_objects,sc,\
sc/source/ui/condformat/colorformat \
sc/source/ui/dataprovider/csvdataprovider \
sc/source/ui/dataprovider/dataprovider \
+ sc/source/ui/dataprovider/htmldataprovider \
sc/source/ui/dbgui/asciiopt \
sc/source/ui/dbgui/consdlg \
sc/source/ui/dbgui/csvcontrol \
diff --git a/sc/source/ui/dataprovider/dataprovider.cxx b/sc/source/ui/dataprovider/dataprovider.cxx
index 4c23420a18a4..119dbdd18e9b 100644
--- a/sc/source/ui/dataprovider/dataprovider.cxx
+++ b/sc/source/ui/dataprovider/dataprovider.cxx
@@ -14,6 +14,8 @@
#include "officecfg/Office/Calc.hxx"
#include <rtl/strbuf.hxx>
+#include "htmldataprovider.hxx"
+
using namespace com::sun::star;
namespace sc {
@@ -203,13 +205,16 @@ bool DataProviderFactory::isInternalDataProvider(const OUString& rProvider)
return rProvider.startsWith("org.libreoffice.calc");
}
-std::shared_ptr<DataProvider> DataProviderFactory::getDataProvider(ScDocument* pDoc, const OUString& rProvider, const OUString& rURL, const OUString& /*rID*/, ScDBDataManager* pManager)
+std::shared_ptr<DataProvider> DataProviderFactory::getDataProvider(ScDocument* pDoc, const OUString& rProvider,
+ const OUString& rURL, const OUString& rID, ScDBDataManager* pManager)
{
bool bInternal = DataProviderFactory::isInternalDataProvider(rProvider);
if (bInternal)
{
if (rProvider == "org.libreoffice.calc.csv")
return std::shared_ptr<DataProvider>(new CSVDataProvider(pDoc, rURL, pManager));
+ else if (rProvider == "org.libreoffice.calc.html")
+ return std::shared_ptr<DataProvider>(new HTMLDataProvider(pDoc, rURL, pManager, rID));
}
else
{
diff --git a/sc/source/ui/dataprovider/htmldataprovider.cxx b/sc/source/ui/dataprovider/htmldataprovider.cxx
new file mode 100644
index 000000000000..c73efee0260c
--- /dev/null
+++ b/sc/source/ui/dataprovider/htmldataprovider.cxx
@@ -0,0 +1,209 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "htmldataprovider.hxx"
+#include <salhelper/thread.hxx>
+
+#include <libxml/HTMLparser.h>
+#include <libxml/HTMLtree.h>
+
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
+
+#include <comphelper/string.hxx>
+
+namespace sc {
+
+class HTMLFetchThread : public salhelper::Thread
+{
+ ScDocument& mrDocument;
+ OUString maURL;
+ OUString maID;
+
+ Idle* mpIdle;
+
+ void handleTable(xmlNodePtr pTable);
+ void handleRow(xmlNodePtr pRow, SCROW nRow);
+ void skipHeadBody(xmlNodePtr pSkip, SCROW& rRow);
+ void handleCell(xmlNodePtr pCell, SCROW nRow, SCCOL nCol);
+
+public:
+ HTMLFetchThread(ScDocument& rDoc, const OUString&, const OUString& rID, Idle* pIdle);
+
+ virtual void execute() override;
+};
+
+HTMLFetchThread::HTMLFetchThread(ScDocument& rDoc, const OUString& rURL, const OUString& rID, Idle* pIdle):
+ salhelper::Thread("HTML Fetch Thread"),
+ mrDocument(rDoc),
+ maURL(rURL),
+ maID(rID),
+ mpIdle(pIdle)
+{
+}
+
+namespace {
+
+OString toString(const xmlChar* pStr)
+{
+ return OString(reinterpret_cast<const char*>(pStr), xmlStrlen(pStr));
+}
+
+}
+
+void HTMLFetchThread::handleCell(xmlNodePtr pCellNode, SCROW nRow, SCCOL nCol)
+{
+ for (xmlNodePtr cur_node = pCellNode->children; cur_node; cur_node = cur_node->next)
+ {
+ if (cur_node->type == XML_TEXT_NODE)
+ {
+ OUString aString = OStringToOUString(toString(cur_node->content), RTL_TEXTENCODING_UTF8);
+ OUString aOldString;
+ do
+ {
+ aOldString = aString;
+ aString = comphelper::string::strip(aString, ' ');
+ aString = comphelper::string::strip(aString, '\n');
+ aString = comphelper::string::strip(aString, '\r');
+ aString = comphelper::string::strip(aString, '\t');
+ }
+ while (aOldString != aString);
+
+ mrDocument.SetString(nCol, nRow, 0, aString);
+ }
+ }
+}
+
+void HTMLFetchThread::handleRow(xmlNodePtr pRowNode, SCROW nRow)
+{
+ sal_Int32 nCol = 0;
+ for (xmlNodePtr cur_node = pRowNode->children; cur_node; cur_node = cur_node->next)
+ {
+ if (cur_node->type == XML_ELEMENT_NODE)
+ {
+ OString aNodeName = toString(cur_node->name);
+ if (aNodeName == "td" || aNodeName == "th")
+ {
+ handleCell(cur_node, nRow, nCol);
+ ++nCol;
+ }
+ }
+ }
+}
+
+void HTMLFetchThread::skipHeadBody(xmlNodePtr pSkipElement, SCROW& rRow)
+{
+ for (xmlNodePtr cur_node = pSkipElement->children; cur_node; cur_node = cur_node->next)
+ {
+ if (cur_node->type == XML_ELEMENT_NODE)
+ {
+ OString aNodeName = toString(cur_node->name);
+ if (aNodeName == "tr")
+ {
+ handleRow(cur_node, rRow);
+ ++rRow;
+ }
+
+ }
+ }
+}
+
+void HTMLFetchThread::handleTable(xmlNodePtr pTable)
+{
+ sal_Int32 nRow = 0;
+ for (xmlNodePtr cur_node = pTable->children; cur_node; cur_node = cur_node->next)
+ {
+ if (cur_node->type == XML_ELEMENT_NODE)
+ {
+ OString aNodeName = toString(cur_node->name);
+ if (aNodeName == "tr")
+ {
+ handleRow(cur_node, nRow);
+ ++nRow;
+ }
+ else if (aNodeName == "thead" || aNodeName == "tbody")
+ {
+ skipHeadBody(cur_node, nRow);
+ }
+ }
+ }
+}
+
+void HTMLFetchThread::execute()
+{
+ OStringBuffer aBuffer(64000);
+ std::unique_ptr<SvStream> pStream = DataProvider::FetchStreamFromURL(maURL, aBuffer);
+
+ htmlDocPtr pHtmlPtr = htmlParseDoc(reinterpret_cast<xmlChar*>(const_cast<char*>(aBuffer.getStr())), nullptr);
+
+ OString aID = OUStringToOString(maID, RTL_TEXTENCODING_UTF8);
+ xmlXPathContextPtr pXmlXpathCtx = xmlXPathNewContext(pHtmlPtr);
+ xmlXPathObjectPtr pXmlXpathObj = xmlXPathEvalExpression(BAD_CAST(aID.getStr()), pXmlXpathCtx);
+ xmlNodeSetPtr pXmlNodes = pXmlXpathObj->nodesetval;
+
+ if (pXmlNodes->nodeNr == 0)
+ return;
+
+ xmlNodePtr pNode = pXmlNodes->nodeTab[0];
+ handleTable(pNode);
+
+ xmlXPathFreeNodeSet(pXmlNodes);
+ xmlXPathFreeNodeSetList(pXmlXpathObj);
+ xmlXPathFreeContext(pXmlXpathCtx);
+
+ SolarMutexGuard aGuard;
+ mpIdle->Start();
+}
+
+HTMLDataProvider::HTMLDataProvider(ScDocument* pDoc, const OUString& rURL, ScDBDataManager* pDBManager,
+ const OUString& rID):
+ maID(rID),
+ maURL(rURL),
+ mpDocument(pDoc),
+ mpDBDataManager(pDBManager),
+ maIdle("HTMLDataProvider CopyHandler")
+{
+ maIdle.SetInvokeHandler(LINK(this, HTMLDataProvider, ImportFinishedHdl));
+}
+
+HTMLDataProvider::~HTMLDataProvider()
+{
+ if (mxHTMLFetchThread.is())
+ {
+ mxHTMLFetchThread->join();
+ }
+}
+
+void HTMLDataProvider::Import()
+{
+ // already importing data
+ if (mpDoc)
+ return;
+
+ mpDoc.reset(new ScDocument(SCDOCMODE_CLIP));
+ mpDoc->ResetClip(mpDocument, (SCTAB)0);
+ mxHTMLFetchThread = new HTMLFetchThread(*mpDoc, maURL, maID, &maIdle);
+ mxHTMLFetchThread->launch();
+}
+
+IMPL_LINK_NOARG(HTMLDataProvider, ImportFinishedHdl, Timer*, void)
+{
+ mpDBDataManager->WriteToDoc(*mpDoc);
+ mxHTMLFetchThread.clear();
+ mpDoc.reset();
+}
+
+const OUString& HTMLDataProvider::GetURL() const
+{
+ return maURL;
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/ui/dataprovider/htmldataprovider.hxx b/sc/source/ui/dataprovider/htmldataprovider.hxx
new file mode 100644
index 000000000000..adbcf37baf5e
--- /dev/null
+++ b/sc/source/ui/dataprovider/htmldataprovider.hxx
@@ -0,0 +1,48 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_SC_SOURCE_UI_DATAPROVIDER_HTMLDATAPROVIDER_HXX
+#define INCLUDED_SC_SOURCE_UI_DATAPROVIDER_HTMLDATAPROVIDER_HXX
+
+#include "dataprovider.hxx"
+
+namespace sc {
+
+class HTMLFetchThread;
+
+class HTMLDataProvider : public DataProvider
+{
+private:
+
+ OUString maID;
+ OUString maURL;
+ ScDocument* mpDocument;
+ ScDBDataManager* mpDBDataManager;
+ rtl::Reference<HTMLFetchThread> mxHTMLFetchThread;
+
+ std::unique_ptr<ScDocument> mpDoc;
+ Idle maIdle;
+
+public:
+
+ HTMLDataProvider(ScDocument* pDoc, const OUString& rURL, ScDBDataManager* pDBManager, const OUString& rID);
+ virtual ~HTMLDataProvider() override;
+
+ virtual void Import() override;
+
+ virtual const OUString& GetURL() const override;
+
+ DECL_LINK( ImportFinishedHdl, Timer*, void );
+};
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
More information about the Libreoffice-commits
mailing list