[Libreoffice-commits] libmspub.git: 5 commits - src/lib
David Tardon
dtardon at redhat.com
Sat Feb 6 09:20:50 UTC 2016
src/lib/MSPUBMetaData.cpp | 155 +++++++++++++++++++++++++++++++++++++---------
src/lib/MSPUBMetaData.h | 4 -
src/lib/MSPUBParser.cpp | 28 +++++---
src/lib/MSPUBParser.h | 2
4 files changed, 147 insertions(+), 42 deletions(-)
New commits:
commit e625c6e60185100438edcb06566a5f8a2a137deb
Author: David Tardon <dtardon at redhat.com>
Date: Sat Jul 25 09:12:40 2015 +0200
add missing include
Change-Id: Ibd003515271ad6211189e12b85eeddd980319770
diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp
index 52fdac4..c298bd1 100644
--- a/src/lib/MSPUBMetaData.cpp
+++ b/src/lib/MSPUBMetaData.cpp
@@ -8,6 +8,7 @@
*/
#include <cmath>
+#include <cstdio>
#include <cstring>
#include <ctime>
commit 39a9a9ecea325c66fb9d5f503848553ac0e14dcf
Author: David Tardon <dtardon at redhat.com>
Date: Tue Aug 25 16:12:25 2015 +0200
afl: avoid out of bounds access to vector
Change-Id: I51fdad6cca395bb5aadc916ef452ee020f666607
diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp
index 9d0446b..52fdac4 100644
--- a/src/lib/MSPUBMetaData.cpp
+++ b/src/lib/MSPUBMetaData.cpp
@@ -237,6 +237,9 @@ librevenge::RVNGString libmspub::MSPUBMetaData::readCodePageString(librevenge::R
{
uint32_t size = readU32(input);
+ if (size == 0)
+ return librevenge::RVNGString();
+
std::vector<unsigned char> characters;
for (uint32_t i = 0; i < size; ++i)
characters.push_back(readU8(input));
commit 0a83689e2f13d0bb584fb004c9065463271ac9e4
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Tue Jul 21 09:32:10 2015 +0200
need space for the terminating null-character
Change-Id: Ie9cab1687dd3187819ec8e3e89b3e9355da9b255
diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp
index ca8b756..9d0446b 100644
--- a/src/lib/MSPUBMetaData.cpp
+++ b/src/lib/MSPUBMetaData.cpp
@@ -109,7 +109,8 @@ void libmspub::MSPUBMetaData::readPropertySetStream(librevenge::RVNGInputStream
{
data4[i] = readU8(input);
}
- char FMTID0[36];
+ // Pretty-printed GUID is 36 bytes + the terminating null-character.
+ char FMTID0[37];
sprintf(FMTID0, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", data1, data2, data3,
data4[0], data4[1], data4[2], data4[3], data4[4], data4[5], data4[6], data4[7]);
commit 8eaa9d24d235b1390402c232bed49e81ab2d40f4
Author: David Tardon <dtardon at redhat.com>
Date: Sat Jul 25 09:18:10 2015 +0200
C++11 syntax is not allowed
Change-Id: Ia705f789b3f9d600b03d25db90972a315e782302
diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp
index 96e58a0..ca8b756 100644
--- a/src/lib/MSPUBMetaData.cpp
+++ b/src/lib/MSPUBMetaData.cpp
@@ -215,15 +215,15 @@ void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream
{
switch (m_idsAndOffsets[index].first)
{
- case PIDDSI::PIDDSI_CATEGORY:
+ case PIDDSI_CATEGORY:
m_metaData.insert("librevenge:category", string);
break;
- case PIDDSI::PIDDSI_LINECOUNT:
- // this should actually be PIDDSI::PIDDSI_COMPANY but this
+ case PIDDSI_LINECOUNT:
+ // this should actually be PIDDSI_COMPANY but this
// is what company is mapped to
m_metaData.insert("librevenge:company", string);
break;
- case PIDDSI::PIDDSI_LANGUAGE:
+ case PIDDSI_LANGUAGE:
m_metaData.insert("dc:language", string);
break;
}
commit 207e6da1240c4255d2b4c5c28d405ace84ed042f
Author: David Tardon <dtardon at redhat.com>
Date: Sat Feb 6 07:45:22 2016 +0100
extract more metadata
Template, language, company and category metadata are extracted from
MSPUB files. Company and category are set as custom properties.
Change-Id: Ic14bfa11a2a8253c79dd4c4466afc7f6b2ce4ea9
diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp
index 1234fe2..96e58a0 100644
--- a/src/lib/MSPUBMetaData.cpp
+++ b/src/lib/MSPUBMetaData.cpp
@@ -8,6 +8,7 @@
*/
#include <cmath>
+#include <cstring>
#include <ctime>
#include "libmspub_utils.h"
@@ -22,6 +23,60 @@ libmspub::MSPUBMetaData::~MSPUBMetaData()
{
}
+enum PIDDSI
+{
+ PIDDSI_CODEPAGE = 0x00000001,
+ PIDDSI_CATEGORY = 0x00000002,
+ PIDDSI_PRESFORMAT = 0x00000003,
+ PIDDSI_BYTECOUNT = 0x00000004,
+ PIDDSI_LINECOUNT = 0x00000005,
+ PIDDSI_PARACOUNT = 0x00000006,
+ PIDDSI_SLIDECOUNT = 0x00000007,
+ PIDDSI_NOTECOUNT = 0x00000008,
+ PIDDSI_HIDDENCOUNT = 0x00000009,
+ PIDDSI_MMCLIPCOUNT = 0x0000000A,
+ PIDDSI_SCALE = 0x0000000B,
+ PIDDSI_HEADINGPAIR = 0x0000000C,
+ PIDDSI_DOCPARTS = 0x0000000D,
+ PIDDSI_MANAGER = 0x0000000E,
+ PIDDSI_COMPANY = 0x0000000F,
+ PIDDSI_LINKSDIRTY = 0x00000010,
+ PIDDSI_CCHWITHSPACES = 0x00000011,
+ PIDDSI_SHAREDDOC = 0x00000013,
+ PIDDSI_LINKBASE = 0x00000014,
+ PIDDSI_HLINKS = 0x00000015,
+ PIDDSI_HYPERLINKSCHANGED = 0x00000016,
+ PIDDSI_VERSION = 0x00000017,
+ PIDDSI_DIGSIG = 0x00000018,
+ PIDDSI_CONTENTTYPE = 0x0000001A,
+ PIDDSI_CONTENTSTATUS = 0x0000001B,
+ PIDDSI_LANGUAGE = 0x0000001C,
+ PIDDSI_DOCVERSION = 0x0000001D
+};
+
+enum PIDSI
+{
+ CODEPAGE_PROPERTY_IDENTIFIER = 0x00000001,
+ PIDSI_TITLE = 0x00000002,
+ PIDSI_SUBJECT = 0x00000003,
+ PIDSI_AUTHOR = 0x00000004,
+ PIDSI_KEYWORDS = 0x00000005,
+ PIDSI_COMMENTS = 0x00000006,
+ PIDSI_TEMPLATE = 0x00000007,
+ PIDSI_LASTAUTHOR = 0x00000008,
+ PIDSI_REVNUMBER = 0x00000009,
+ PIDSI_EDITTIME = 0x0000000A,
+ PIDSI_LASTPRINTED = 0x0000000B,
+ PIDSI_CREATE_DTM = 0x0000000C,
+ PIDSI_LASTSAVE_DTM = 0x0000000D,
+ PIDSI_PAGECOUNT = 0x0000000E,
+ PIDSI_WORDCOUNT = 0x0000000F,
+ PIDSI_CHARCOUNT = 0x00000010,
+ PIDSI_THUMBNAIL = 0x00000011,
+ PIDSI_APPNAME = 0x00000012,
+ PIDSI_DOC_SECURITY = 0x00000013
+};
+
bool libmspub::MSPUBMetaData::parse(librevenge::RVNGInputStream *input)
{
if (!input)
@@ -45,12 +100,24 @@ void libmspub::MSPUBMetaData::readPropertySetStream(librevenge::RVNGInputStream
// NumPropertySets
input->seek(4, librevenge::RVNG_SEEK_CUR);
// FMTID0
- input->seek(16, librevenge::RVNG_SEEK_CUR);
+ //input->seek(16, librevenge::RVNG_SEEK_CUR);
+ uint32_t data1 = readU32(input);
+ uint16_t data2 = readU16(input);
+ uint16_t data3 = readU16(input);
+ uint8_t data4[8];
+ for (int i = 0; i < 8; i++)
+ {
+ data4[i] = readU8(input);
+ }
+ char FMTID0[36];
+ sprintf(FMTID0, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", data1, data2, data3,
+ data4[0], data4[1], data4[2], data4[3], data4[4], data4[5], data4[6], data4[7]);
+
uint32_t offset0 = readU32(input);
- readPropertySet(input, offset0);
+ readPropertySet(input, offset0, FMTID0);
}
-void libmspub::MSPUBMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset)
+void libmspub::MSPUBMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset, char *FMTID)
{
input->seek(offset, librevenge::RVNG_SEEK_SET);
@@ -63,17 +130,15 @@ void libmspub::MSPUBMetaData::readPropertySet(librevenge::RVNGInputStream *input
{
if (i >= m_idsAndOffsets.size())
break;
- readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second);
+ readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second, FMTID);
}
}
-#define CODEPAGE_PROPERTY_IDENTIFIER 0x00000001
-
uint32_t libmspub::MSPUBMetaData::getCodePage()
{
for (size_t i = 0; i < m_idsAndOffsets.size(); ++i)
{
- if (m_idsAndOffsets[i].first == CODEPAGE_PROPERTY_IDENTIFIER)
+ if (m_idsAndOffsets[i].first == PIDSI::CODEPAGE_PROPERTY_IDENTIFIER)
{
if (i >= m_typedPropertyValues.size())
break;
@@ -94,13 +159,10 @@ void libmspub::MSPUBMetaData::readPropertyIdentifierAndOffset(librevenge::RVNGIn
#define VT_I2 0x0002
#define VT_LPSTR 0x001E
-#define PIDSI_TITLE 0x00000002
-#define PIDSI_SUBJECT 0x00000003
-#define PIDSI_AUTHOR 0x00000004
-#define PIDSI_KEYWORDS 0x00000005
-#define PIDSI_COMMENTS 0x00000006
-
-void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset)
+void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input,
+ uint32_t index,
+ uint32_t offset,
+ char *FMTID)
{
input->seek(offset, librevenge::RVNG_SEEK_SET);
uint16_t type = readU16(input);
@@ -120,23 +182,51 @@ void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream
if (index >= m_idsAndOffsets.size())
return;
- switch (m_idsAndOffsets[index].first)
+ if (!strcmp(FMTID, "f29f85e0-4ff9-1068-ab91-08002b27b3d9"))
{
- case PIDSI_TITLE:
- m_metaData.insert("dc:title", string);
- break;
- case PIDSI_SUBJECT:
- m_metaData.insert("dc:subject", string);
- break;
- case PIDSI_AUTHOR:
- m_metaData.insert("meta:initial-creator", string);
- break;
- case PIDSI_KEYWORDS:
- m_metaData.insert("meta:keyword", string);
- break;
- case PIDSI_COMMENTS:
- m_metaData.insert("dc:description", string);
- break;
+ switch (m_idsAndOffsets[index].first)
+ {
+ case PIDSI::PIDSI_TITLE:
+ m_metaData.insert("dc:title", string);
+ break;
+ case PIDSI::PIDSI_SUBJECT:
+ m_metaData.insert("dc:subject", string);
+ break;
+ case PIDSI::PIDSI_AUTHOR:
+ m_metaData.insert("meta:initial-creator", string);
+ m_metaData.insert("dc:creator", string);
+ break;
+ case PIDSI::PIDSI_KEYWORDS:
+ m_metaData.insert("meta:keyword", string);
+ break;
+ case PIDSI::PIDSI_COMMENTS:
+ m_metaData.insert("dc:description", string);
+ break;
+ case PIDSI::PIDSI_TEMPLATE:
+ std::string templateHref(string.cstr());
+ size_t found = templateHref.find_last_of("/\\");
+ if (found != std::string::npos)
+ string = librevenge::RVNGString(templateHref.substr(found+1).c_str());
+ m_metaData.insert("librevenge:template", string);
+ break;
+ }
+ }
+ else if (!strcmp(FMTID,"d5cdd502-2e9c-101b-9397-08002b2cf9ae"))
+ {
+ switch (m_idsAndOffsets[index].first)
+ {
+ case PIDDSI::PIDDSI_CATEGORY:
+ m_metaData.insert("librevenge:category", string);
+ break;
+ case PIDDSI::PIDDSI_LINECOUNT:
+ // this should actually be PIDDSI::PIDDSI_COMPANY but this
+ // is what company is mapped to
+ m_metaData.insert("librevenge:company", string);
+ break;
+ case PIDDSI::PIDDSI_LANGUAGE:
+ m_metaData.insert("dc:language", string);
+ break;
+ }
}
}
}
diff --git a/src/lib/MSPUBMetaData.h b/src/lib/MSPUBMetaData.h
index 18b14a0..f0f994c 100644
--- a/src/lib/MSPUBMetaData.h
+++ b/src/lib/MSPUBMetaData.h
@@ -34,9 +34,9 @@ private:
MSPUBMetaData &operator=(const MSPUBMetaData &);
void readPropertySetStream(librevenge::RVNGInputStream *input);
- void readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset);
+ void readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset, char *FMTID);
void readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input);
- void readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset);
+ void readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset, char *FMTID);
librevenge::RVNGString readCodePageString(librevenge::RVNGInputStream *input);
uint32_t getCodePage();
diff --git a/src/lib/MSPUBParser.cpp b/src/lib/MSPUBParser.cpp
index 6b79243..9c746d7 100644
--- a/src/lib/MSPUBParser.cpp
+++ b/src/lib/MSPUBParser.cpp
@@ -124,11 +124,8 @@ bool MSPUBParser::parse()
MSPUB_DEBUG_MSG(("***NOTE***: Where applicable, the meanings of block/chunk IDs and Types printed below may be found in:\n\t***MSPUBBlockType.h\n\t***MSPUBBlockID.h\n\t***MSPUBContentChunkType.h\n*****\n"));
if (!m_input->isStructured())
return false;
- librevenge::RVNGInputStream *metaData = m_input->getSubStreamByName("\x05SummaryInformation");
- if (metaData)
- // No check: metadata are not important enough to fail if they can't be parsed
- parseMetaData(metaData);
- delete metaData;
+ // No check: metadata are not important enough to fail if they can't be parsed
+ parseMetaData();
librevenge::RVNGInputStream *quill = m_input->getSubStreamByName("Quill/QuillSub/CONTENTS");
if (!quill)
{
@@ -2530,12 +2527,25 @@ void MSPUBParser::parsePaletteEntry(librevenge::RVNGInputStream *input, MSPUBBlo
}
}
-bool MSPUBParser::parseMetaData(librevenge::RVNGInputStream *const input)
+bool MSPUBParser::parseMetaData()
{
- assert(input);
-
+ m_input->seek(0, librevenge::RVNG_SEEK_SET);
MSPUBMetaData metaData;
- metaData.parse(input);
+
+ librevenge::RVNGInputStream *sumaryInfo = m_input->getSubStreamByName("\x05SummaryInformation");
+ if (sumaryInfo)
+ {
+ metaData.parse(sumaryInfo);
+ delete sumaryInfo;
+ }
+
+ librevenge::RVNGInputStream *docSumaryInfo = m_input->getSubStreamByName("\005DocumentSummaryInformation");
+ if (docSumaryInfo)
+ {
+ metaData.parse(docSumaryInfo);
+ delete docSumaryInfo;
+ }
+
m_input->seek(0, librevenge::RVNG_SEEK_SET);
metaData.parseTimes(m_input);
m_collector->collectMetaData(metaData.getMetaData());
diff --git a/src/lib/MSPUBParser.h b/src/lib/MSPUBParser.h
index b6145ce..e1edad2 100644
--- a/src/lib/MSPUBParser.h
+++ b/src/lib/MSPUBParser.h
@@ -91,7 +91,7 @@ protected:
MSPUBParser(const MSPUBParser &);
MSPUBParser &operator=(const MSPUBParser &);
virtual bool parseContents(librevenge::RVNGInputStream *input);
- bool parseMetaData(librevenge::RVNGInputStream *input);
+ bool parseMetaData();
bool parseQuill(librevenge::RVNGInputStream *input);
bool parseEscher(librevenge::RVNGInputStream *input);
bool parseEscherDelay(librevenge::RVNGInputStream *input);
More information about the Libreoffice-commits
mailing list