[Libreoffice-commits] libvisio.git: src/lib src/test
Mihai Varga
mihai.varga at collabora.com
Fri Feb 27 05:53:46 PST 2015
src/lib/VSDMetaData.cpp | 152 +++++++++++++++++++++++++++++++++++++----------
src/lib/VSDMetaData.h | 4 -
src/lib/VSDParser.cpp | 26 +++++---
src/lib/VSDXMetaData.cpp | 21 ++++++
src/lib/VSDXParser.cpp | 38 ++++++++---
src/lib/VSDXParser.h | 2
src/lib/tokens.txt | 5 +
src/test/Makefile.am | 2
src/test/data/dwg.vsd |binary
src/test/data/dwg.vsdx |binary
src/test/importtest.cpp | 21 ++++++
11 files changed, 219 insertions(+), 52 deletions(-)
New commits:
commit 3a8a08caebd5c8fbbc28ff34b0d7d0979662a56a
Author: Mihai Varga <mihai.varga at collabora.com>
Date: Fri Feb 27 14:32:11 2015 +0200
More metadata extracted from VSD/VSDX files
Template, language, company and category metadata are extracted from
VSD/VSDX files. Company and category are set as custom properties.
I've also added unit tests for those 4 new document properties.
Change-Id: Ic14bfa11a2a8253c79dd4c4466afc7f6b2ce4ea9
Signed-off-by: Miklos Vajna <vmiklos at collabora.co.uk>
diff --git a/src/lib/VSDMetaData.cpp b/src/lib/VSDMetaData.cpp
index 4235b56..6449384 100644
--- a/src/lib/VSDMetaData.cpp
+++ b/src/lib/VSDMetaData.cpp
@@ -9,6 +9,8 @@
#include "VSDMetaData.h"
#include <cmath>
+#include <cstring>
+#include <string>
#include <unicode/ucnv.h>
#include <ctime>
@@ -21,6 +23,60 @@ libvisio::VSDMetaData::~VSDMetaData()
{
}
+enum PIDDSI
+{
+ PIDDSI_CODEPAGE = 0x00000001,
+ PIDDSI_CATEGORY = 0x00000002,
+ PIDDSI_PRESFORMAT = 0x00000003,
+ PIDDSI_BYTECOUNT = 0x00000004,
+ PIDDSI_LINECOUNT = 0x00000005,
+ PIDDSI_PARACOUNT = 0x00000006,
+ PIDDSI_SLIDECOUNT = 0x00000007,
+ PIDDSI_NOTECOUNT = 0x00000008,
+ PIDDSI_HIDDENCOUNT = 0x00000009,
+ PIDDSI_MMCLIPCOUNT = 0x0000000A,
+ PIDDSI_SCALE = 0x0000000B,
+ PIDDSI_HEADINGPAIR = 0x0000000C,
+ PIDDSI_DOCPARTS = 0x0000000D,
+ PIDDSI_MANAGER = 0x0000000E,
+ PIDDSI_COMPANY = 0x0000000F,
+ PIDDSI_LINKSDIRTY = 0x00000010,
+ PIDDSI_CCHWITHSPACES = 0x00000011,
+ PIDDSI_SHAREDDOC = 0x00000013,
+ PIDDSI_LINKBASE = 0x00000014,
+ PIDDSI_HLINKS = 0x00000015,
+ PIDDSI_HYPERLINKSCHANGED = 0x00000016,
+ PIDDSI_VERSION = 0x00000017,
+ PIDDSI_DIGSIG = 0x00000018,
+ PIDDSI_CONTENTTYPE = 0x0000001A,
+ PIDDSI_CONTENTSTATUS = 0x0000001B,
+ PIDDSI_LANGUAGE = 0x0000001C,
+ PIDDSI_DOCVERSION = 0x0000001D
+};
+
+enum PIDSI
+{
+ CODEPAGE_PROPERTY_IDENTIFIER = 0x00000001,
+ PIDSI_TITLE = 0x00000002,
+ PIDSI_SUBJECT = 0x00000003,
+ PIDSI_AUTHOR = 0x00000004,
+ PIDSI_KEYWORDS = 0x00000005,
+ PIDSI_COMMENTS = 0x00000006,
+ PIDSI_TEMPLATE = 0x00000007,
+ PIDSI_LASTAUTHOR = 0x00000008,
+ PIDSI_REVNUMBER = 0x00000009,
+ PIDSI_EDITTIME = 0x0000000A,
+ PIDSI_LASTPRINTED = 0x0000000B,
+ PIDSI_CREATE_DTM = 0x0000000C,
+ PIDSI_LASTSAVE_DTM = 0x0000000D,
+ PIDSI_PAGECOUNT = 0x0000000E,
+ PIDSI_WORDCOUNT = 0x0000000F,
+ PIDSI_CHARCOUNT = 0x00000010,
+ PIDSI_THUMBNAIL = 0x00000011,
+ PIDSI_APPNAME = 0x00000012,
+ PIDSI_DOC_SECURITY = 0x00000013
+};
+
bool libvisio::VSDMetaData::parse(librevenge::RVNGInputStream *input)
{
if (!input)
@@ -44,12 +100,24 @@ void libvisio::VSDMetaData::readPropertySetStream(librevenge::RVNGInputStream *i
// NumPropertySets
input->seek(4, librevenge::RVNG_SEEK_CUR);
// FMTID0
- input->seek(16, librevenge::RVNG_SEEK_CUR);
+ //input->seek(16, librevenge::RVNG_SEEK_CUR);
+ uint32_t data1 = readU32(input);
+ uint16_t data2 = readU16(input);
+ uint16_t data3 = readU16(input);
+ uint8_t data4[8];
+ for (int i = 0; i < 8; i++)
+ {
+ data4[i] = readU8(input);
+ }
+ char FMTID0[36];
+ sprintf(FMTID0, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", data1, data2, data3,
+ data4[0], data4[1], data4[2], data4[3], data4[4], data4[5], data4[6], data4[7]);
+
uint32_t offset0 = readU32(input);
- readPropertySet(input, offset0);
+ readPropertySet(input, offset0, FMTID0);
}
-void libvisio::VSDMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset)
+void libvisio::VSDMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset, char *FMTID)
{
input->seek(offset, librevenge::RVNG_SEEK_SET);
@@ -62,17 +130,15 @@ void libvisio::VSDMetaData::readPropertySet(librevenge::RVNGInputStream *input,
{
if (i >= m_idsAndOffsets.size())
break;
- readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second);
+ readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second, FMTID);
}
}
-#define CODEPAGE_PROPERTY_IDENTIFIER 0x00000001
-
uint32_t libvisio::VSDMetaData::getCodePage()
{
for (size_t i = 0; i < m_idsAndOffsets.size(); ++i)
{
- if (m_idsAndOffsets[i].first == CODEPAGE_PROPERTY_IDENTIFIER)
+ if (m_idsAndOffsets[i].first == PIDSI::CODEPAGE_PROPERTY_IDENTIFIER)
{
if (i >= m_typedPropertyValues.size())
break;
@@ -93,13 +159,10 @@ void libvisio::VSDMetaData::readPropertyIdentifierAndOffset(librevenge::RVNGInpu
#define VT_I2 0x0002
#define VT_LPSTR 0x001E
-#define PIDSI_TITLE 0x00000002
-#define PIDSI_SUBJECT 0x00000003
-#define PIDSI_AUTHOR 0x00000004
-#define PIDSI_KEYWORDS 0x00000005
-#define PIDSI_COMMENTS 0x00000006
-
-void libvisio::VSDMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset)
+void libvisio::VSDMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input,
+ uint32_t index,
+ uint32_t offset,
+ char *FMTID)
{
input->seek(offset, librevenge::RVNG_SEEK_SET);
uint16_t type = readU16(input);
@@ -119,24 +182,51 @@ void libvisio::VSDMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *
if (index >= m_idsAndOffsets.size())
return;
- switch (m_idsAndOffsets[index].first)
+ if (!strcmp(FMTID, "f29f85e0-4ff9-1068-ab91-08002b27b3d9"))
{
- case PIDSI_TITLE:
- m_metaData.insert("dc:title", string);
- break;
- case PIDSI_SUBJECT:
- m_metaData.insert("dc:subject", string);
- break;
- case PIDSI_AUTHOR:
- m_metaData.insert("meta:initial-creator", string);
- m_metaData.insert("dc:creator", string);
- break;
- case PIDSI_KEYWORDS:
- m_metaData.insert("meta:keyword", string);
- break;
- case PIDSI_COMMENTS:
- m_metaData.insert("dc:description", string);
- break;
+ switch (m_idsAndOffsets[index].first)
+ {
+ case PIDSI::PIDSI_TITLE:
+ m_metaData.insert("dc:title", string);
+ break;
+ case PIDSI::PIDSI_SUBJECT:
+ m_metaData.insert("dc:subject", string);
+ break;
+ case PIDSI::PIDSI_AUTHOR:
+ m_metaData.insert("meta:initial-creator", string);
+ m_metaData.insert("dc:creator", string);
+ break;
+ case PIDSI::PIDSI_KEYWORDS:
+ m_metaData.insert("meta:keyword", string);
+ break;
+ case PIDSI::PIDSI_COMMENTS:
+ m_metaData.insert("dc:description", string);
+ break;
+ case PIDSI::PIDSI_TEMPLATE:
+ std::string templateHref(string.cstr());
+ size_t found = templateHref.find_last_of("/\\");
+ if (found != std::string::npos)
+ string = librevenge::RVNGString(templateHref.substr(found+1).c_str());
+ m_metaData.insert("librevenge:template", string);
+ break;
+ }
+ }
+ else if (!strcmp(FMTID,"d5cdd502-2e9c-101b-9397-08002b2cf9ae"))
+ {
+ switch (m_idsAndOffsets[index].first)
+ {
+ case PIDDSI::PIDDSI_CATEGORY:
+ m_metaData.insert("librevenge:category", string);
+ break;
+ case PIDDSI::PIDDSI_LINECOUNT:
+ // this should actually be PIDDSI::PIDDSI_COMPANY but this
+ // is what company is mapped to
+ m_metaData.insert("librevenge:company", string);
+ break;
+ case PIDDSI::PIDDSI_LANGUAGE:
+ m_metaData.insert("dc:language", string);
+ break;
+ }
}
}
}
diff --git a/src/lib/VSDMetaData.h b/src/lib/VSDMetaData.h
index 581b0a2..dcb06ee 100644
--- a/src/lib/VSDMetaData.h
+++ b/src/lib/VSDMetaData.h
@@ -34,9 +34,9 @@ private:
VSDMetaData &operator=(const VSDMetaData &);
void readPropertySetStream(librevenge::RVNGInputStream *input);
- void readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset);
+ void readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset, char *FMTID);
void readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input);
- void readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset);
+ void readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset, char *FMTID);
librevenge::RVNGString readCodePageString(librevenge::RVNGInputStream *input);
uint32_t getCodePage();
diff --git a/src/lib/VSDParser.cpp b/src/lib/VSDParser.cpp
index 388e84d..5da88c7 100644
--- a/src/lib/VSDParser.cpp
+++ b/src/lib/VSDParser.cpp
@@ -154,18 +154,30 @@ bool libvisio::VSDParser::parseMetaData()
m_container->seek(0, librevenge::RVNG_SEEK_SET);
if (!m_container->isStructured())
return false;
- librevenge::RVNGInputStream *stream = m_container->getSubStreamByName("\x05SummaryInformation");
- if (!stream)
- return false;
-
+ bool result = false;
VSDMetaData metaData;
- metaData.parse(stream);
+
+ librevenge::RVNGInputStream *sumaryInfo = m_container->getSubStreamByName("\x05SummaryInformation");
+ if (sumaryInfo)
+ {
+ result = true;
+ metaData.parse(sumaryInfo);
+ delete sumaryInfo;
+ }
+
+ librevenge::RVNGInputStream *docSumaryInfo = m_container->getSubStreamByName("\005DocumentSummaryInformation");
+ if (docSumaryInfo)
+ {
+ result = true;
+ metaData.parse(docSumaryInfo);
+ delete docSumaryInfo;
+ }
+
m_container->seek(0, librevenge::RVNG_SEEK_SET);
metaData.parseTimes(m_container);
m_collector->collectMetaData(metaData.getMetaData());
- delete stream;
- return true;
+ return result;
}
bool libvisio::VSDParser::parseDocument(librevenge::RVNGInputStream *input, unsigned shift)
diff --git a/src/lib/VSDXMetaData.cpp b/src/lib/VSDXMetaData.cpp
index 4987cb2..94b1f99 100644
--- a/src/lib/VSDXMetaData.cpp
+++ b/src/lib/VSDXMetaData.cpp
@@ -10,6 +10,7 @@
#include "VSDXMetaData.h"
#include "VSDXMLTokenMap.h"
#include "libvisio_utils.h"
+#include <string>
libvisio::VSDXMetaData::VSDXMetaData()
: m_metaData()
@@ -82,14 +83,31 @@ void libvisio::VSDXMetaData::readCoreProperties(xmlTextReaderPtr reader)
case XML_CP_LASTMODIFIEDBY:
m_metaData.insert("dc:creator", readString(reader, XML_CP_LASTMODIFIEDBY));
break;
+ case XML_DC_LANGUAGE:
+ m_metaData.insert("dc:language", readString(reader, XML_DC_LANGUAGE));
+ break;
case XML_CP_CATEGORY:
m_metaData.insert("librevenge:category", readString(reader, XML_CP_CATEGORY));
break;
+ case XML_COMPANY:
+ m_metaData.insert("librevenge:company", readString(reader, XML_COMPANY));
+ break;
+ case XML_TEMPLATE:
+ {
+ librevenge::RVNGString templateHrefRVNG = readString(reader, XML_TEMPLATE);
+ std::string templateHref(templateHrefRVNG.cstr());
+ size_t found = templateHref.find_last_of("/\\");
+ if (found != std::string::npos)
+ templateHrefRVNG = librevenge::RVNGString(templateHref.substr(found+1).c_str());
+ m_metaData.insert("librevenge:template", templateHrefRVNG);
+ break;
+ }
default:
break;
}
}
- while ((XML_CP_COREPROPERTIES != tokenId || XML_READER_TYPE_END_ELEMENT != tokenType) && 1 == ret);
+ while ((XML_CP_COREPROPERTIES != tokenId || XML_READER_TYPE_END_ELEMENT != tokenType ||
+ XML_PROPERTIES != tokenId) && 1 == ret);
}
bool libvisio::VSDXMetaData::parse(librevenge::RVNGInputStream *input)
@@ -110,6 +128,7 @@ bool libvisio::VSDXMetaData::parse(librevenge::RVNGInputStream *input)
switch (tokenId)
{
case XML_CP_COREPROPERTIES:
+ case XML_PROPERTIES:
readCoreProperties(reader);
break;
default:
diff --git a/src/lib/VSDXParser.cpp b/src/lib/VSDXParser.cpp
index d694650..f11d778 100644
--- a/src/lib/VSDXParser.cpp
+++ b/src/lib/VSDXParser.cpp
@@ -93,9 +93,7 @@ bool libvisio::VSDXParser::parseMain()
VSDContentCollector contentCollector(m_painter, groupXFormsSequence, groupMembershipsSequence, documentPageShapeOrders, styles, m_stencils);
m_collector = &contentCollector;
- const libvisio::VSDXRelationship *metaDataRel = rootRels.getRelationshipByType("http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties");
- if (metaDataRel)
- parseMetaData(m_input, metaDataRel->getTarget().c_str());
+ parseMetaData(m_input, rootRels);
if (!parseDocument(m_input, rel->getTarget().c_str()))
return false;
@@ -280,23 +278,43 @@ bool libvisio::VSDXParser::parseTheme(librevenge::RVNGInputStream *input, const
return true;
}
-bool libvisio::VSDXParser::parseMetaData(librevenge::RVNGInputStream *input, const char *name)
+bool libvisio::VSDXParser::parseMetaData(librevenge::RVNGInputStream *input, libvisio::VSDXRelationships &rels)
{
if (!input)
return false;
input->seek(0, librevenge::RVNG_SEEK_SET);
if (!input->isStructured())
return false;
- librevenge::RVNGInputStream *stream = input->getSubStreamByName(name);
- if (!stream)
- return false;
+
+ bool result = false;
VSDXMetaData metaData;
- metaData.parse(stream);
+ const libvisio::VSDXRelationship *coreProp = rels.getRelationshipByType("http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties");
+ if (coreProp)
+ {
+ librevenge::RVNGInputStream *stream = input->getSubStreamByName(coreProp->getTarget().c_str());
+ if (stream)
+ {
+ result = true;
+ metaData.parse(stream);
+ delete stream;
+ }
+ }
+
+ const libvisio::VSDXRelationship *extendedProp = rels.getRelationshipByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties");
+ if (extendedProp)
+ {
+ librevenge::RVNGInputStream *stream = input->getSubStreamByName(extendedProp->getTarget().c_str());
+ if (stream)
+ {
+ result = true;
+ metaData.parse(stream);
+ delete stream;
+ }
+ }
m_collector->collectMetaData(metaData.getMetaData());
- delete stream;
- return true;
+ return result;
}
void libvisio::VSDXParser::processXmlDocument(librevenge::RVNGInputStream *input, VSDXRelationships &rels)
diff --git a/src/lib/VSDXParser.h b/src/lib/VSDXParser.h
index 8566403..069204c 100644
--- a/src/lib/VSDXParser.h
+++ b/src/lib/VSDXParser.h
@@ -54,7 +54,7 @@ private:
bool parsePages(librevenge::RVNGInputStream *input, const char *name);
bool parsePage(librevenge::RVNGInputStream *input, const char *name);
bool parseTheme(librevenge::RVNGInputStream *input, const char *name);
- bool parseMetaData(librevenge::RVNGInputStream *input, const char *name);
+ bool parseMetaData(librevenge::RVNGInputStream *input, VSDXRelationships &rels);
void processXmlDocument(librevenge::RVNGInputStream *input, VSDXRelationships &rels);
void processXmlNode(xmlTextReaderPtr reader);
diff --git a/src/lib/tokens.txt b/src/lib/tokens.txt
index 6b01690..c165c0f 100644
--- a/src/lib/tokens.txt
+++ b/src/lib/tokens.txt
@@ -234,5 +234,10 @@ dc:title
dcterms:created
dcterms:modified
dc:description
+dc:template
cp:lastModifiedBy
cp:category
+Company
+Properties
+Template
+dc:language
diff --git a/src/test/Makefile.am b/src/test/Makefile.am
index 9285cbf..619302a 100644
--- a/src/test/Makefile.am
+++ b/src/test/Makefile.am
@@ -23,6 +23,8 @@ EXTRA_DIST = \
data/fdo86664.vsdx \
data/fdo86729-ms1252.vsd \
data/fdo86729-utf8.vsd \
+ data/dwg.vsd \
+ data/dwg.vsdx \
$(test_SOURCES)
TESTS = test
diff --git a/src/test/data/dwg.vsd b/src/test/data/dwg.vsd
new file mode 100644
index 0000000..bea1075
Binary files /dev/null and b/src/test/data/dwg.vsd differ
diff --git a/src/test/data/dwg.vsdx b/src/test/data/dwg.vsdx
new file mode 100644
index 0000000..6642f8c
Binary files /dev/null and b/src/test/data/dwg.vsdx differ
diff --git a/src/test/importtest.cpp b/src/test/importtest.cpp
index 29b99fd..7aba7bc 100644
--- a/src/test/importtest.cpp
+++ b/src/test/importtest.cpp
@@ -124,11 +124,15 @@ class ImportTest : public CPPUNIT_NS::TestFixture
CPPUNIT_TEST(testVsdxMetadataTitle);
CPPUNIT_TEST(testVsdMetadataTitleMs1252);
CPPUNIT_TEST(testVsdMetadataTitleUtf8);
+ CPPUNIT_TEST(testVsdUserDefinedMetadata);
+ CPPUNIT_TEST(testVsdxUserDefinedMetadata);
CPPUNIT_TEST_SUITE_END();
void testVsdxMetadataTitle();
void testVsdMetadataTitleMs1252();
void testVsdMetadataTitleUtf8();
+ void testVsdUserDefinedMetadata();
+ void testVsdxUserDefinedMetadata();
xmlBufferPtr m_buffer;
xmlDocPtr m_doc;
@@ -203,6 +207,23 @@ void ImportTest::testVsdMetadataTitleUtf8()
assertXPath(m_doc, "/document/setDocumentMetaData", "date", "2014-11-26T09:24:56Z");
}
+void ImportTest::testVsdUserDefinedMetadata()
+{
+ m_doc = parse("dwg.vsd", m_buffer);
+ assertXPath(m_doc, "/document/setDocumentMetaData", "category", "Category test");
+ assertXPath(m_doc, "/document/setDocumentMetaData", "company", "Company test");
+ assertXPath(m_doc, "/document/setDocumentMetaData", "template", "BASICD_M.VSTX");
+}
+
+void ImportTest::testVsdxUserDefinedMetadata()
+{
+ m_doc = parse("dwg.vsdx", m_buffer);
+ assertXPath(m_doc, "/document/setDocumentMetaData", "category", "Category test");
+ assertXPath(m_doc, "/document/setDocumentMetaData", "company", "Company test");
+ assertXPath(m_doc, "/document/setDocumentMetaData", "language", "en-US");
+ assertXPath(m_doc, "/document/setDocumentMetaData", "template", "BASICD_M.VSTX");
+}
+
CPPUNIT_TEST_SUITE_REGISTRATION(ImportTest);
/* vim:set shiftwidth=2 softtabstop=2 expandtab: */
More information about the Libreoffice-commits
mailing list