[Libreoffice-commits] .: Branch 'feature/calc-xml-source' - sc/inc sc/source

Libreoffice Gerrit user logerrit at kemper.freedesktop.org
Wed Nov 21 12:22:46 PST 2012


 sc/inc/orcusxml.hxx                     |    4 ++
 sc/source/core/tool/orcusxml.cxx        |    9 +++++
 sc/source/filter/orcus/xmlcontext.cxx   |   54 +++++++++++++++++++++++++++-----
 sc/source/ui/xmlsource/xmlsourcedlg.cxx |   31 ++++++++++++++----
 4 files changed, 84 insertions(+), 14 deletions(-)

New commits:
commit ae9da93d4b4cbcd349e19c2bf4f9e7a27f31965f
Author: Kohei Yoshida <kohei.yoshida at gmail.com>
Date:   Wed Nov 21 15:21:41 2012 -0500

    Correctly handle xml namespaces when importing xml content.
    
    But it's still not working. I need to dig some more.
    
    Change-Id: I06d4d70e1a4234b031741a496f2651d016c35ecc

diff --git a/sc/inc/orcusxml.hxx b/sc/inc/orcusxml.hxx
index c3a981d..87be8a2 100644
--- a/sc/inc/orcusxml.hxx
+++ b/sc/inc/orcusxml.hxx
@@ -29,6 +29,7 @@ struct ScOrcusXMLTreeParam
     /** Custom data stored with each tree item. */
     struct EntryData
     {
+        size_t mnNamespaceID; /// numerical ID for xml namespace
         EntryType meType;
         ScAddress maLinkedPos; /// linked cell position (invalid if unlinked)
         bool mbRangeParent:1;
@@ -72,8 +73,11 @@ struct ScOrcusImportXMLParam
     typedef std::vector<CellLink> CellLinksType;
     typedef std::vector<RangeLink> RangeLinksType;
 
+    std::vector<size_t> maNamespaces;
     CellLinksType maCellLinks;
     RangeLinksType maRangeLinks;
+
+    SC_DLLPUBLIC static rtl::OString getShortNamespaceName(size_t nIndex);
 };
 
 #endif
diff --git a/sc/source/core/tool/orcusxml.cxx b/sc/source/core/tool/orcusxml.cxx
index 3f3c93c..cef0333 100644
--- a/sc/source/core/tool/orcusxml.cxx
+++ b/sc/source/core/tool/orcusxml.cxx
@@ -10,6 +10,7 @@
 #include "orcusxml.hxx"
 
 #include "svtools/treelistbox.hxx"
+#include "rtl/strbuf.hxx"
 
 ScOrcusXMLTreeParam::EntryData::EntryData(EntryType eType) :
     meType(eType), maLinkedPos(ScAddress::INITIALIZE_INVALID), mbRangeParent(false), mbLeafNode(true) {}
@@ -27,4 +28,12 @@ const ScOrcusXMLTreeParam::EntryData* ScOrcusXMLTreeParam::getUserData(const SvT
 ScOrcusImportXMLParam::CellLink::CellLink(const ScAddress& rPos, const OString& rPath) :
     maPos(rPos), maPath(rPath) {}
 
+OString ScOrcusImportXMLParam::getShortNamespaceName(size_t nIndex)
+{
+    OStringBuffer aBuf;
+    aBuf.append("ns");
+    aBuf.append(static_cast<sal_Int32>(nIndex));
+    return aBuf.makeStringAndClear();
+}
+
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/filter/orcus/xmlcontext.cxx b/sc/source/filter/orcus/xmlcontext.cxx
index 86865cd..3ad6951 100644
--- a/sc/source/filter/orcus/xmlcontext.cxx
+++ b/sc/source/filter/orcus/xmlcontext.cxx
@@ -31,6 +31,13 @@ ScOrcusXMLTreeParam::EntryData& setUserDataToEntry(
     return rStore.back();
 }
 
+void setEntityNameToUserData(
+    ScOrcusXMLTreeParam::EntryData& rEntryData,
+    const orcus::xml_structure_tree::entity_name& entity, const orcus::xml_structure_tree::walker& walker)
+{
+    rEntryData.mnNamespaceID = walker.get_xmlns_index(entity.ns);
+}
+
 OUString toString(const orcus::xml_structure_tree::entity_name& entity, const orcus::xml_structure_tree::walker& walker)
 {
     OUStringBuffer aBuf;
@@ -43,9 +50,10 @@ OUString toString(const orcus::xml_structure_tree::entity_name& entity, const or
             aBuf.append("???");
         else
         {
-            aBuf.append("ns");
-            aBuf.append(static_cast<sal_Int32>(index));
+            OString aName = ScOrcusImportXMLParam::getShortNamespaceName(index);
+            aBuf.append(OUString(aName.getStr(), aName.getLength(), RTL_TEXTENCODING_UTF8));
         }
+
         aBuf.append(':');
     }
     aBuf.append(OUString(entity.name.get(), entity.name.size(), RTL_TEXTENCODING_UTF8));
@@ -66,6 +74,8 @@ void populateTree(
         *pEntry, rParam.maUserDataStore,
         bRepeat ? ScOrcusXMLTreeParam::ElementRepeat : ScOrcusXMLTreeParam::ElementDefault);
 
+    setEntityNameToUserData(rEntryData, rElemName, rWalker);
+
     if (bRepeat)
     {
         // Recurring elements use different icon.
@@ -84,13 +94,16 @@ void populateTree(
     orcus::xml_structure_tree::entity_names_type::const_iterator itEnd = aNames.end();
     for (; it != itEnd; ++it)
     {
-        orcus::xml_structure_tree::entity_name aAttrName = *it;
-        SvTreeListEntry* pAttr = rTreeCtrl.InsertEntry(toString(aAttrName, rWalker), pEntry);
+        const orcus::xml_structure_tree::entity_name& rAttrName = *it;
+        SvTreeListEntry* pAttr = rTreeCtrl.InsertEntry(toString(rAttrName, rWalker), pEntry);
 
         if (!pAttr)
             continue;
 
-        setUserDataToEntry(*pAttr, rParam.maUserDataStore, ScOrcusXMLTreeParam::Attribute);
+        ScOrcusXMLTreeParam::EntryData& rAttrData =
+            setUserDataToEntry(*pAttr, rParam.maUserDataStore, ScOrcusXMLTreeParam::Attribute);
+        setEntityNameToUserData(rAttrData, rAttrName, rWalker);
+
         rTreeCtrl.SetExpandedEntryBmp(pAttr, rParam.maImgAttribute);
         rTreeCtrl.SetCollapsedEntryBmp(pAttr, rParam.maImgAttribute);
     }
@@ -183,6 +196,30 @@ bool ScOrcusXMLContextImpl::loadXMLStructure(SvTreeListBox& rTreeCtrl, ScOrcusXM
     return true;
 }
 
+namespace {
+
+class SetNamespaceAlias : std::unary_function<size_t, void>
+{
+    orcus::orcus_xml& mrFilter;
+    orcus::xmlns_repository& mrNsRepo;
+public:
+    SetNamespaceAlias(orcus::orcus_xml& filter, orcus::xmlns_repository& repo) :
+        mrFilter(filter), mrNsRepo(repo) {}
+
+    void operator() (size_t index)
+    {
+        orcus::xmlns_id_t nsid = mrNsRepo.get_identifier(index);
+        if (nsid == orcus::XMLNS_UNKNOWN_ID)
+            return;
+
+        OString aAlias = ScOrcusImportXMLParam::getShortNamespaceName(index);
+        mrFilter.set_namespace_alias(aAlias.getStr(), nsid);
+    }
+};
+
+
+}
+
 bool ScOrcusXMLContextImpl::importXML(const ScOrcusImportXMLParam& rParam)
 {
     ScOrcusFactory aFactory(mrDoc);
@@ -192,8 +229,11 @@ bool ScOrcusXMLContextImpl::importXML(const ScOrcusImportXMLParam& rParam)
     {
         orcus::orcus_xml filter(maNsRepo, &aFactory, NULL);
 
-        // Set cell links.
+        // Define all used namespaces.
+        std::for_each(rParam.maNamespaces.begin(), rParam.maNamespaces.end(), SetNamespaceAlias(filter, maNsRepo));
+
         {
+            // Set cell links.
             ScOrcusImportXMLParam::CellLinksType::const_iterator it = rParam.maCellLinks.begin();
             ScOrcusImportXMLParam::CellLinksType::const_iterator itEnd = rParam.maCellLinks.end();
             for (; it != itEnd; ++it)
@@ -208,8 +248,8 @@ bool ScOrcusXMLContextImpl::importXML(const ScOrcusImportXMLParam& rParam)
             }
         }
 
-        // Set range links.
         {
+            // Set range links.
             ScOrcusImportXMLParam::RangeLinksType::const_iterator it = rParam.maRangeLinks.begin();
             ScOrcusImportXMLParam::RangeLinksType::const_iterator itEnd = rParam.maRangeLinks.end();
             for (; it != itEnd; ++it)
diff --git a/sc/source/ui/xmlsource/xmlsourcedlg.cxx b/sc/source/ui/xmlsource/xmlsourcedlg.cxx
index 5019306..38a1d09 100644
--- a/sc/source/ui/xmlsource/xmlsourcedlg.cxx
+++ b/sc/source/ui/xmlsource/xmlsourcedlg.cxx
@@ -39,7 +39,8 @@ bool isAttribute(const SvTreeListEntry& rEntry)
     return pUserData->meType == ScOrcusXMLTreeParam::Attribute;
 }
 
-OUString getXPath(const SvTreeListBox& rTree, const SvTreeListEntry& rEntry)
+OUString getXPath(
+    const SvTreeListBox& rTree, const SvTreeListEntry& rEntry, std::vector<size_t>& rNamespaces)
 {
     OUStringBuffer aBuf;
     for (const SvTreeListEntry* p = &rEntry; p; p = rTree.GetParent(p))
@@ -48,6 +49,11 @@ OUString getXPath(const SvTreeListBox& rTree, const SvTreeListEntry& rEntry)
         if (!pItem)
             continue;
 
+        // Collect used namespace.
+        const ScOrcusXMLTreeParam::EntryData* pData = ScOrcusXMLTreeParam::getUserData(*p);
+        if (pData)
+            rNamespaces.push_back(pData->mnNamespaceID);
+
         const SvLBoxString* pStr = static_cast<const SvLBoxString*>(pItem);
         aBuf.insert(0, pStr->GetText());
         aBuf.insert(0, isAttribute(*p) ? '@' : '/');
@@ -428,7 +434,9 @@ namespace {
 /**
  * Pick only the leaf elements.
  */
-void getFieldLinks(ScOrcusImportXMLParam::RangeLink& rRangeLink, const SvTreeListBox& rTree, const SvTreeListEntry& rEntry)
+void getFieldLinks(
+    ScOrcusImportXMLParam::RangeLink& rRangeLink, std::vector<size_t>& rNamespaces,
+    const SvTreeListBox& rTree, const SvTreeListEntry& rEntry)
 {
     const SvTreeListEntries& rChildren = rEntry.GetChildEntries();
     if (rChildren.empty())
@@ -439,7 +447,7 @@ void getFieldLinks(ScOrcusImportXMLParam::RangeLink& rRangeLink, const SvTreeLis
     for (; it != itEnd; ++it)
     {
         const SvTreeListEntry& rChild = *it;
-        OUString aPath = getXPath(rTree, rChild);
+        OUString aPath = getXPath(rTree, rChild, rNamespaces);
         const ScOrcusXMLTreeParam::EntryData* pUserData = ScOrcusXMLTreeParam::getUserData(rChild);
 
         if (pUserData && pUserData->mbLeafNode)
@@ -450,10 +458,17 @@ void getFieldLinks(ScOrcusImportXMLParam::RangeLink& rRangeLink, const SvTreeLis
         }
 
         // Walk recursively.
-        getFieldLinks(rRangeLink, rTree, rChild);
+        getFieldLinks(rRangeLink, rNamespaces, rTree, rChild);
     }
 }
 
+void removeDuplicates(std::vector<size_t>& rArray)
+{
+    std::sort(rArray.begin(), rArray.end());
+    std::vector<size_t>::iterator it = std::unique(rArray.begin(), rArray.end());
+    rArray.erase(it, rArray.end());
+}
+
 }
 
 void ScXMLSourceDlg::OkPressed()
@@ -471,7 +486,7 @@ void ScXMLSourceDlg::OkPressed()
         for (; it != itEnd; ++it)
         {
             const SvTreeListEntry& rEntry = **it;
-            OUString aPath = getXPath(maLbTree, rEntry);
+            OUString aPath = getXPath(maLbTree, rEntry, aParam.maNamespaces);
             const ScOrcusXMLTreeParam::EntryData* pUserData = ScOrcusXMLTreeParam::getUserData(rEntry);
 
             aParam.maCellLinks.push_back(
@@ -493,14 +508,16 @@ void ScXMLSourceDlg::OkPressed()
             aRangeLink.maPos = pUserData->maLinkedPos;
 
             // Go through all its child elements.
-            getFieldLinks(aRangeLink, maLbTree, rEntry);
+            getFieldLinks(aRangeLink, aParam.maNamespaces, maLbTree, rEntry);
 
             aParam.maRangeLinks.push_back(aRangeLink);
         }
     }
 
-    // Now do the import.
+    // Remove duplicate namespace IDs.
+    removeDuplicates(aParam.maNamespaces);
 
+    // Now do the import.
     mpXMLContext->importXML(aParam);
 
     // Don't forget to broadcast the change.


More information about the Libreoffice-commits mailing list