[Libreoffice-commits] libvisio.git: src/lib src/test

Mihai Varga mihai.varga at collabora.com
Fri Feb 27 05:53:46 PST 2015


 src/lib/VSDMetaData.cpp  |  152 +++++++++++++++++++++++++++++++++++++----------
 src/lib/VSDMetaData.h    |    4 -
 src/lib/VSDParser.cpp    |   26 +++++---
 src/lib/VSDXMetaData.cpp |   21 ++++++
 src/lib/VSDXParser.cpp   |   38 ++++++++---
 src/lib/VSDXParser.h     |    2 
 src/lib/tokens.txt       |    5 +
 src/test/Makefile.am     |    2 
 src/test/data/dwg.vsd    |binary
 src/test/data/dwg.vsdx   |binary
 src/test/importtest.cpp  |   21 ++++++
 11 files changed, 219 insertions(+), 52 deletions(-)

New commits:
commit 3a8a08caebd5c8fbbc28ff34b0d7d0979662a56a
Author: Mihai Varga <mihai.varga at collabora.com>
Date:   Fri Feb 27 14:32:11 2015 +0200

    More metadata extracted from VSD/VSDX files
    
    Template, language, company and category metadata are extracted from
    VSD/VSDX files. Company and category are set as custom properties.
    I've also added unit tests for those 4 new document properties.
    
    Change-Id: Ic14bfa11a2a8253c79dd4c4466afc7f6b2ce4ea9
    Signed-off-by: Miklos Vajna <vmiklos at collabora.co.uk>

diff --git a/src/lib/VSDMetaData.cpp b/src/lib/VSDMetaData.cpp
index 4235b56..6449384 100644
--- a/src/lib/VSDMetaData.cpp
+++ b/src/lib/VSDMetaData.cpp
@@ -9,6 +9,8 @@
 
 #include "VSDMetaData.h"
 #include <cmath>
+#include <cstring>
+#include <string>
 #include <unicode/ucnv.h>
 #include <ctime>
 
@@ -21,6 +23,60 @@ libvisio::VSDMetaData::~VSDMetaData()
 {
 }
 
+enum PIDDSI
+{
+  PIDDSI_CODEPAGE          = 0x00000001,
+  PIDDSI_CATEGORY          = 0x00000002,
+  PIDDSI_PRESFORMAT        = 0x00000003,
+  PIDDSI_BYTECOUNT         = 0x00000004,
+  PIDDSI_LINECOUNT         = 0x00000005,
+  PIDDSI_PARACOUNT         = 0x00000006,
+  PIDDSI_SLIDECOUNT        = 0x00000007,
+  PIDDSI_NOTECOUNT         = 0x00000008,
+  PIDDSI_HIDDENCOUNT       = 0x00000009,
+  PIDDSI_MMCLIPCOUNT       = 0x0000000A,
+  PIDDSI_SCALE             = 0x0000000B,
+  PIDDSI_HEADINGPAIR       = 0x0000000C,
+  PIDDSI_DOCPARTS          = 0x0000000D,
+  PIDDSI_MANAGER           = 0x0000000E,
+  PIDDSI_COMPANY           = 0x0000000F,
+  PIDDSI_LINKSDIRTY        = 0x00000010,
+  PIDDSI_CCHWITHSPACES     = 0x00000011,
+  PIDDSI_SHAREDDOC         = 0x00000013,
+  PIDDSI_LINKBASE          = 0x00000014,
+  PIDDSI_HLINKS            = 0x00000015,
+  PIDDSI_HYPERLINKSCHANGED = 0x00000016,
+  PIDDSI_VERSION           = 0x00000017,
+  PIDDSI_DIGSIG            = 0x00000018,
+  PIDDSI_CONTENTTYPE       = 0x0000001A,
+  PIDDSI_CONTENTSTATUS     = 0x0000001B,
+  PIDDSI_LANGUAGE          = 0x0000001C,
+  PIDDSI_DOCVERSION        = 0x0000001D
+};
+
+enum PIDSI
+{
+  CODEPAGE_PROPERTY_IDENTIFIER = 0x00000001,
+  PIDSI_TITLE                  = 0x00000002,
+  PIDSI_SUBJECT                = 0x00000003,
+  PIDSI_AUTHOR                 = 0x00000004,
+  PIDSI_KEYWORDS               = 0x00000005,
+  PIDSI_COMMENTS               = 0x00000006,
+  PIDSI_TEMPLATE               = 0x00000007,
+  PIDSI_LASTAUTHOR             = 0x00000008,
+  PIDSI_REVNUMBER              = 0x00000009,
+  PIDSI_EDITTIME               = 0x0000000A,
+  PIDSI_LASTPRINTED            = 0x0000000B,
+  PIDSI_CREATE_DTM             = 0x0000000C,
+  PIDSI_LASTSAVE_DTM           = 0x0000000D,
+  PIDSI_PAGECOUNT              = 0x0000000E,
+  PIDSI_WORDCOUNT              = 0x0000000F,
+  PIDSI_CHARCOUNT              = 0x00000010,
+  PIDSI_THUMBNAIL              = 0x00000011,
+  PIDSI_APPNAME                = 0x00000012,
+  PIDSI_DOC_SECURITY           = 0x00000013
+};
+
 bool libvisio::VSDMetaData::parse(librevenge::RVNGInputStream *input)
 {
   if (!input)
@@ -44,12 +100,24 @@ void libvisio::VSDMetaData::readPropertySetStream(librevenge::RVNGInputStream *i
   // NumPropertySets
   input->seek(4, librevenge::RVNG_SEEK_CUR);
   // FMTID0
-  input->seek(16, librevenge::RVNG_SEEK_CUR);
+  //input->seek(16, librevenge::RVNG_SEEK_CUR);
+  uint32_t data1 = readU32(input);
+  uint16_t data2 = readU16(input);
+  uint16_t data3 = readU16(input);
+  uint8_t data4[8];
+  for (int i = 0; i < 8; i++)
+  {
+    data4[i] = readU8(input);
+  }
+  char FMTID0[36];
+  sprintf(FMTID0, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", data1, data2, data3,
+          data4[0], data4[1], data4[2], data4[3], data4[4], data4[5], data4[6], data4[7]);
+
   uint32_t offset0 = readU32(input);
-  readPropertySet(input, offset0);
+  readPropertySet(input, offset0, FMTID0);
 }
 
-void libvisio::VSDMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset)
+void libvisio::VSDMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset, char *FMTID)
 {
   input->seek(offset, librevenge::RVNG_SEEK_SET);
 
@@ -62,17 +130,15 @@ void libvisio::VSDMetaData::readPropertySet(librevenge::RVNGInputStream *input,
   {
     if (i >= m_idsAndOffsets.size())
       break;
-    readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second);
+    readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second, FMTID);
   }
 }
 
-#define CODEPAGE_PROPERTY_IDENTIFIER 0x00000001
-
 uint32_t libvisio::VSDMetaData::getCodePage()
 {
   for (size_t i = 0; i < m_idsAndOffsets.size(); ++i)
   {
-    if (m_idsAndOffsets[i].first == CODEPAGE_PROPERTY_IDENTIFIER)
+    if (m_idsAndOffsets[i].first == PIDSI::CODEPAGE_PROPERTY_IDENTIFIER)
     {
       if (i >= m_typedPropertyValues.size())
         break;
@@ -93,13 +159,10 @@ void libvisio::VSDMetaData::readPropertyIdentifierAndOffset(librevenge::RVNGInpu
 #define VT_I2 0x0002
 #define VT_LPSTR 0x001E
 
-#define PIDSI_TITLE 0x00000002
-#define PIDSI_SUBJECT 0x00000003
-#define PIDSI_AUTHOR 0x00000004
-#define PIDSI_KEYWORDS 0x00000005
-#define PIDSI_COMMENTS 0x00000006
-
-void libvisio::VSDMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset)
+void libvisio::VSDMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input,
+                                                   uint32_t index,
+                                                   uint32_t offset,
+                                                   char *FMTID)
 {
   input->seek(offset, librevenge::RVNG_SEEK_SET);
   uint16_t type = readU16(input);
@@ -119,24 +182,51 @@ void libvisio::VSDMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *
       if (index >= m_idsAndOffsets.size())
         return;
 
-      switch (m_idsAndOffsets[index].first)
+      if (!strcmp(FMTID, "f29f85e0-4ff9-1068-ab91-08002b27b3d9"))
       {
-      case PIDSI_TITLE:
-        m_metaData.insert("dc:title", string);
-        break;
-      case PIDSI_SUBJECT:
-        m_metaData.insert("dc:subject", string);
-        break;
-      case PIDSI_AUTHOR:
-        m_metaData.insert("meta:initial-creator", string);
-        m_metaData.insert("dc:creator", string);
-        break;
-      case PIDSI_KEYWORDS:
-        m_metaData.insert("meta:keyword", string);
-        break;
-      case PIDSI_COMMENTS:
-        m_metaData.insert("dc:description", string);
-        break;
+        switch (m_idsAndOffsets[index].first)
+        {
+        case PIDSI::PIDSI_TITLE:
+          m_metaData.insert("dc:title", string);
+          break;
+        case PIDSI::PIDSI_SUBJECT:
+          m_metaData.insert("dc:subject", string);
+          break;
+        case PIDSI::PIDSI_AUTHOR:
+          m_metaData.insert("meta:initial-creator", string);
+          m_metaData.insert("dc:creator", string);
+          break;
+        case PIDSI::PIDSI_KEYWORDS:
+          m_metaData.insert("meta:keyword", string);
+          break;
+        case PIDSI::PIDSI_COMMENTS:
+          m_metaData.insert("dc:description", string);
+          break;
+        case PIDSI::PIDSI_TEMPLATE:
+          std::string templateHref(string.cstr());
+          size_t found = templateHref.find_last_of("/\\");
+          if (found != std::string::npos)
+            string = librevenge::RVNGString(templateHref.substr(found+1).c_str());
+          m_metaData.insert("librevenge:template", string);
+          break;
+        }
+      }
+      else if (!strcmp(FMTID,"d5cdd502-2e9c-101b-9397-08002b2cf9ae"))
+      {
+        switch (m_idsAndOffsets[index].first)
+        {
+        case PIDDSI::PIDDSI_CATEGORY:
+          m_metaData.insert("librevenge:category", string);
+          break;
+        case PIDDSI::PIDDSI_LINECOUNT:
+          // this should actually be PIDDSI::PIDDSI_COMPANY but this
+          // is what company is mapped to
+          m_metaData.insert("librevenge:company", string);
+          break;
+        case PIDDSI::PIDDSI_LANGUAGE:
+          m_metaData.insert("dc:language", string);
+          break;
+        }
       }
     }
   }
diff --git a/src/lib/VSDMetaData.h b/src/lib/VSDMetaData.h
index 581b0a2..dcb06ee 100644
--- a/src/lib/VSDMetaData.h
+++ b/src/lib/VSDMetaData.h
@@ -34,9 +34,9 @@ private:
   VSDMetaData &operator=(const VSDMetaData &);
 
   void readPropertySetStream(librevenge::RVNGInputStream *input);
-  void readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset);
+  void readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset, char *FMTID);
   void readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input);
-  void readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset);
+  void readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset, char *FMTID);
   librevenge::RVNGString readCodePageString(librevenge::RVNGInputStream *input);
 
   uint32_t getCodePage();
diff --git a/src/lib/VSDParser.cpp b/src/lib/VSDParser.cpp
index 388e84d..5da88c7 100644
--- a/src/lib/VSDParser.cpp
+++ b/src/lib/VSDParser.cpp
@@ -154,18 +154,30 @@ bool libvisio::VSDParser::parseMetaData()
   m_container->seek(0, librevenge::RVNG_SEEK_SET);
   if (!m_container->isStructured())
     return false;
-  librevenge::RVNGInputStream *stream = m_container->getSubStreamByName("\x05SummaryInformation");
-  if (!stream)
-    return false;
-
+  bool result = false;
   VSDMetaData metaData;
-  metaData.parse(stream);
+
+  librevenge::RVNGInputStream *sumaryInfo = m_container->getSubStreamByName("\x05SummaryInformation");
+  if (sumaryInfo)
+  {
+    result = true;
+    metaData.parse(sumaryInfo);
+    delete sumaryInfo;
+  }
+
+  librevenge::RVNGInputStream *docSumaryInfo = m_container->getSubStreamByName("\005DocumentSummaryInformation");
+  if (docSumaryInfo)
+  {
+    result = true;
+    metaData.parse(docSumaryInfo);
+    delete docSumaryInfo;
+  }
+
   m_container->seek(0, librevenge::RVNG_SEEK_SET);
   metaData.parseTimes(m_container);
   m_collector->collectMetaData(metaData.getMetaData());
 
-  delete stream;
-  return true;
+  return result;
 }
 
 bool libvisio::VSDParser::parseDocument(librevenge::RVNGInputStream *input, unsigned shift)
diff --git a/src/lib/VSDXMetaData.cpp b/src/lib/VSDXMetaData.cpp
index 4987cb2..94b1f99 100644
--- a/src/lib/VSDXMetaData.cpp
+++ b/src/lib/VSDXMetaData.cpp
@@ -10,6 +10,7 @@
 #include "VSDXMetaData.h"
 #include "VSDXMLTokenMap.h"
 #include "libvisio_utils.h"
+#include <string>
 
 libvisio::VSDXMetaData::VSDXMetaData()
   : m_metaData()
@@ -82,14 +83,31 @@ void libvisio::VSDXMetaData::readCoreProperties(xmlTextReaderPtr reader)
     case XML_CP_LASTMODIFIEDBY:
       m_metaData.insert("dc:creator", readString(reader, XML_CP_LASTMODIFIEDBY));
       break;
+    case XML_DC_LANGUAGE:
+      m_metaData.insert("dc:language", readString(reader, XML_DC_LANGUAGE));
+      break;
     case XML_CP_CATEGORY:
       m_metaData.insert("librevenge:category", readString(reader, XML_CP_CATEGORY));
       break;
+    case XML_COMPANY:
+      m_metaData.insert("librevenge:company", readString(reader, XML_COMPANY));
+      break;
+    case XML_TEMPLATE:
+    {
+      librevenge::RVNGString templateHrefRVNG = readString(reader, XML_TEMPLATE);
+      std::string templateHref(templateHrefRVNG.cstr());
+      size_t found = templateHref.find_last_of("/\\");
+      if (found != std::string::npos)
+        templateHrefRVNG = librevenge::RVNGString(templateHref.substr(found+1).c_str());
+      m_metaData.insert("librevenge:template", templateHrefRVNG);
+      break;
+    }
     default:
       break;
     }
   }
-  while ((XML_CP_COREPROPERTIES != tokenId || XML_READER_TYPE_END_ELEMENT != tokenType) && 1 == ret);
+  while ((XML_CP_COREPROPERTIES != tokenId || XML_READER_TYPE_END_ELEMENT != tokenType ||
+          XML_PROPERTIES != tokenId) && 1 == ret);
 }
 
 bool libvisio::VSDXMetaData::parse(librevenge::RVNGInputStream *input)
@@ -110,6 +128,7 @@ bool libvisio::VSDXMetaData::parse(librevenge::RVNGInputStream *input)
       switch (tokenId)
       {
       case XML_CP_COREPROPERTIES:
+      case XML_PROPERTIES:
         readCoreProperties(reader);
         break;
       default:
diff --git a/src/lib/VSDXParser.cpp b/src/lib/VSDXParser.cpp
index d694650..f11d778 100644
--- a/src/lib/VSDXParser.cpp
+++ b/src/lib/VSDXParser.cpp
@@ -93,9 +93,7 @@ bool libvisio::VSDXParser::parseMain()
 
     VSDContentCollector contentCollector(m_painter, groupXFormsSequence, groupMembershipsSequence, documentPageShapeOrders, styles, m_stencils);
     m_collector = &contentCollector;
-    const libvisio::VSDXRelationship *metaDataRel = rootRels.getRelationshipByType("http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties");
-    if (metaDataRel)
-      parseMetaData(m_input, metaDataRel->getTarget().c_str());
+    parseMetaData(m_input, rootRels);
 
     if (!parseDocument(m_input, rel->getTarget().c_str()))
       return false;
@@ -280,23 +278,43 @@ bool libvisio::VSDXParser::parseTheme(librevenge::RVNGInputStream *input, const
   return true;
 }
 
-bool libvisio::VSDXParser::parseMetaData(librevenge::RVNGInputStream *input, const char *name)
+bool libvisio::VSDXParser::parseMetaData(librevenge::RVNGInputStream *input, libvisio::VSDXRelationships &rels)
 {
   if (!input)
     return false;
   input->seek(0, librevenge::RVNG_SEEK_SET);
   if (!input->isStructured())
     return false;
-  librevenge::RVNGInputStream *stream = input->getSubStreamByName(name);
-  if (!stream)
-    return false;
+
+  bool result = false;
 
   VSDXMetaData metaData;
-  metaData.parse(stream);
+  const libvisio::VSDXRelationship *coreProp = rels.getRelationshipByType("http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties");
+  if (coreProp)
+  {
+    librevenge::RVNGInputStream *stream = input->getSubStreamByName(coreProp->getTarget().c_str());
+    if (stream)
+    {
+      result = true;
+      metaData.parse(stream);
+      delete stream;
+    }
+  }
+
+  const libvisio::VSDXRelationship *extendedProp = rels.getRelationshipByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties");
+  if (extendedProp)
+  {
+    librevenge::RVNGInputStream *stream = input->getSubStreamByName(extendedProp->getTarget().c_str());
+    if (stream)
+    {
+      result = true;
+      metaData.parse(stream);
+      delete stream;
+    }
+  }
   m_collector->collectMetaData(metaData.getMetaData());
 
-  delete stream;
-  return true;
+  return result;
 }
 
 void libvisio::VSDXParser::processXmlDocument(librevenge::RVNGInputStream *input, VSDXRelationships &rels)
diff --git a/src/lib/VSDXParser.h b/src/lib/VSDXParser.h
index 8566403..069204c 100644
--- a/src/lib/VSDXParser.h
+++ b/src/lib/VSDXParser.h
@@ -54,7 +54,7 @@ private:
   bool parsePages(librevenge::RVNGInputStream *input, const char *name);
   bool parsePage(librevenge::RVNGInputStream *input, const char *name);
   bool parseTheme(librevenge::RVNGInputStream *input, const char *name);
-  bool parseMetaData(librevenge::RVNGInputStream *input, const char *name);
+  bool parseMetaData(librevenge::RVNGInputStream *input, VSDXRelationships &rels);
   void processXmlDocument(librevenge::RVNGInputStream *input, VSDXRelationships &rels);
   void processXmlNode(xmlTextReaderPtr reader);
 
diff --git a/src/lib/tokens.txt b/src/lib/tokens.txt
index 6b01690..c165c0f 100644
--- a/src/lib/tokens.txt
+++ b/src/lib/tokens.txt
@@ -234,5 +234,10 @@ dc:title
 dcterms:created
 dcterms:modified
 dc:description
+dc:template
 cp:lastModifiedBy
 cp:category
+Company
+Properties
+Template
+dc:language
diff --git a/src/test/Makefile.am b/src/test/Makefile.am
index 9285cbf..619302a 100644
--- a/src/test/Makefile.am
+++ b/src/test/Makefile.am
@@ -23,6 +23,8 @@ EXTRA_DIST = \
 	     data/fdo86664.vsdx \
 	     data/fdo86729-ms1252.vsd \
 	     data/fdo86729-utf8.vsd \
+	     data/dwg.vsd \
+	     data/dwg.vsdx \
 	     $(test_SOURCES)
 
 TESTS = test
diff --git a/src/test/data/dwg.vsd b/src/test/data/dwg.vsd
new file mode 100644
index 0000000..bea1075
Binary files /dev/null and b/src/test/data/dwg.vsd differ
diff --git a/src/test/data/dwg.vsdx b/src/test/data/dwg.vsdx
new file mode 100644
index 0000000..6642f8c
Binary files /dev/null and b/src/test/data/dwg.vsdx differ
diff --git a/src/test/importtest.cpp b/src/test/importtest.cpp
index 29b99fd..7aba7bc 100644
--- a/src/test/importtest.cpp
+++ b/src/test/importtest.cpp
@@ -124,11 +124,15 @@ class ImportTest : public CPPUNIT_NS::TestFixture
   CPPUNIT_TEST(testVsdxMetadataTitle);
   CPPUNIT_TEST(testVsdMetadataTitleMs1252);
   CPPUNIT_TEST(testVsdMetadataTitleUtf8);
+  CPPUNIT_TEST(testVsdUserDefinedMetadata);
+  CPPUNIT_TEST(testVsdxUserDefinedMetadata);
   CPPUNIT_TEST_SUITE_END();
 
   void testVsdxMetadataTitle();
   void testVsdMetadataTitleMs1252();
   void testVsdMetadataTitleUtf8();
+  void testVsdUserDefinedMetadata();
+  void testVsdxUserDefinedMetadata();
 
   xmlBufferPtr m_buffer;
   xmlDocPtr m_doc;
@@ -203,6 +207,23 @@ void ImportTest::testVsdMetadataTitleUtf8()
   assertXPath(m_doc, "/document/setDocumentMetaData", "date", "2014-11-26T09:24:56Z");
 }
 
+void ImportTest::testVsdUserDefinedMetadata()
+{
+  m_doc = parse("dwg.vsd", m_buffer);
+  assertXPath(m_doc, "/document/setDocumentMetaData", "category", "Category test");
+  assertXPath(m_doc, "/document/setDocumentMetaData", "company", "Company test");
+  assertXPath(m_doc, "/document/setDocumentMetaData", "template", "BASICD_M.VSTX");
+}
+
+void ImportTest::testVsdxUserDefinedMetadata()
+{
+  m_doc = parse("dwg.vsdx", m_buffer);
+  assertXPath(m_doc, "/document/setDocumentMetaData", "category", "Category test");
+  assertXPath(m_doc, "/document/setDocumentMetaData", "company", "Company test");
+  assertXPath(m_doc, "/document/setDocumentMetaData", "language", "en-US");
+  assertXPath(m_doc, "/document/setDocumentMetaData", "template", "BASICD_M.VSTX");
+}
+
 CPPUNIT_TEST_SUITE_REGISTRATION(ImportTest);
 
 /* vim:set shiftwidth=2 softtabstop=2 expandtab: */


More information about the Libreoffice-commits mailing list