[Libreoffice-commits] libmspub.git: 4 commits - src/lib
David Tardon
dtardon at redhat.com
Tue Dec 30 04:04:48 PST 2014
src/lib/MSPUBCollector.cpp | 9 +
src/lib/MSPUBCollector.h | 4
src/lib/MSPUBMetaData.cpp | 236 +++++++++++++++++++++++++++++++++++++++++++++
src/lib/MSPUBMetaData.h | 53 ++++++++++
src/lib/MSPUBParser.cpp | 30 +++++
src/lib/MSPUBParser.h | 1
src/lib/Makefile.am | 62 ++++++-----
7 files changed, 363 insertions(+), 32 deletions(-)
New commits:
commit 7d50db7ceeda663451ef2fe6cbc9ea0d25668e1e
Author: David Tardon <dtardon at redhat.com>
Date: Tue Dec 30 12:57:58 2014 +0100
fix parsing of escher values
The list of (ID, value) pairs is terminated by ID == 0 without a value.
So, the shortest possible record has length 8: 2 byte ID, followed by 4
byte value, followed by 2 byte terminator ID.
Change-Id: I4501e42164b4376f16feca21e32221a4084f22f4
diff --git a/src/lib/MSPUBParser.cpp b/src/lib/MSPUBParser.cpp
index fac39cb..0dca90c 100644
--- a/src/lib/MSPUBParser.cpp
+++ b/src/lib/MSPUBParser.cpp
@@ -2311,6 +2311,12 @@ std::map<unsigned short, unsigned> MSPUBParser::extractEscherValues(librevenge::
while (stillReading(input, record.contentsOffset + record.contentsLength))
{
unsigned short id = readU16(input);
+ if (id == 0)
+ {
+ if (!stillReading(input, record.contentsOffset + record.contentsLength))
+ break;
+ MSPUB_DEBUG_MSG(("found escher value with ID 0!\n"));
+ }
unsigned value = readU32(input);
ret[id] = value;
}
commit a5aa054c0996cc4e8c428a88c7f14c18e4e4e57e
Author: David Tardon <dtardon at redhat.com>
Date: Tue Dec 30 12:05:57 2014 +0100
import metadata
Change-Id: I64dc77ef7f2e12a5d9f2358b1ebaa5e593282414
diff --git a/src/lib/MSPUBCollector.cpp b/src/lib/MSPUBCollector.cpp
index 71e3846..e2cd967 100644
--- a/src/lib/MSPUBCollector.cpp
+++ b/src/lib/MSPUBCollector.cpp
@@ -237,6 +237,11 @@ void mapTableTextToCells(
} // anonymous namespace
+void MSPUBCollector::collectMetaData(const librevenge::RVNGPropertyList &metaData)
+{
+ m_metaData = metaData;
+}
+
void MSPUBCollector::addEOTFont(const librevenge::RVNGString &name, const librevenge::RVNGBinaryData &data)
{
m_embeddedFonts.push_back(EmbeddedFontInfo(name, data));
@@ -381,7 +386,8 @@ MSPUBCollector::MSPUBCollector(librevenge::RVNGDrawingInterface *painter) :
m_tableCellTextEndsByTextId(), m_stringOffsetsByTextId(),
m_calculationValuesSeen(), m_pageSeqNumsOrdered(),
m_encodingHeuristic(false), m_allText(),
- m_calculatedEncoding()
+ m_calculatedEncoding(),
+ m_metaData()
{
}
@@ -1699,6 +1705,7 @@ bool MSPUBCollector::go()
addBlackToPaletteIfNecessary();
assignShapesToPages();
m_painter->startDocument(librevenge::RVNGPropertyList());
+ m_painter->setDocumentMetaData(m_metaData);
for (std::list<EmbeddedFontInfo>::const_iterator i = m_embeddedFonts.begin(); i != m_embeddedFonts.end(); ++i)
{
diff --git a/src/lib/MSPUBCollector.h b/src/lib/MSPUBCollector.h
index 7e4b953..5d96c05 100644
--- a/src/lib/MSPUBCollector.h
+++ b/src/lib/MSPUBCollector.h
@@ -57,6 +57,8 @@ public:
virtual ~MSPUBCollector();
// collector functions
+ void collectMetaData(const librevenge::RVNGPropertyList &metaData);
+
bool addPage(unsigned seqNum);
bool addTextString(const std::vector<TextParagraph> &str, unsigned id);
void addTextShape(unsigned stringId, unsigned seqNum);
@@ -172,6 +174,8 @@ private:
bool m_encodingHeuristic;
std::vector<unsigned char> m_allText;
mutable boost::optional<const char *> m_calculatedEncoding;
+ librevenge::RVNGPropertyList m_metaData;
+
// helper functions
std::vector<int> getShapeAdjustValues(const ShapeInfo &info) const;
boost::optional<unsigned> getMasterPageSeqNum(unsigned pageSeqNum) const;
diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp
new file mode 100644
index 0000000..eca048b
--- /dev/null
+++ b/src/lib/MSPUBMetaData.cpp
@@ -0,0 +1,236 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * This file is part of the libmspub project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <cmath>
+#include <ctime>
+
+#include "libmspub_utils.h"
+#include "MSPUBMetaData.h"
+
+libmspub::MSPUBMetaData::MSPUBMetaData()
+ : m_idsAndOffsets(), m_typedPropertyValues(), m_metaData()
+{
+}
+
+libmspub::MSPUBMetaData::~MSPUBMetaData()
+{
+}
+
+bool libmspub::MSPUBMetaData::parse(librevenge::RVNGInputStream *input)
+{
+ if (!input)
+ return false;
+
+ readPropertySetStream(input);
+
+ return true;
+}
+
+void libmspub::MSPUBMetaData::readPropertySetStream(librevenge::RVNGInputStream *input)
+{
+ // ByteOrder
+ input->seek(2, librevenge::RVNG_SEEK_CUR);
+ // Version
+ input->seek(2, librevenge::RVNG_SEEK_CUR);
+ // SystemIdentifier
+ input->seek(4, librevenge::RVNG_SEEK_CUR);
+ // CLSID
+ input->seek(16, librevenge::RVNG_SEEK_CUR);
+ // NumPropertySets
+ input->seek(4, librevenge::RVNG_SEEK_CUR);
+ // FMTID0
+ input->seek(16, librevenge::RVNG_SEEK_CUR);
+ uint32_t offset0 = readU32(input);
+ readPropertySet(input, offset0);
+}
+
+void libmspub::MSPUBMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset)
+{
+ input->seek(offset, librevenge::RVNG_SEEK_SET);
+
+ // Size
+ input->seek(4, librevenge::RVNG_SEEK_CUR);
+ uint32_t numProperties = readU32(input);
+ for (uint32_t i = 0; i < numProperties; ++i)
+ readPropertyIdentifierAndOffset(input);
+ for (uint32_t i = 0; i < numProperties; ++i)
+ {
+ if (i >= m_idsAndOffsets.size())
+ break;
+ readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second);
+ }
+}
+
+#define CODEPAGE_PROPERTY_IDENTIFIER 0x00000001
+
+uint32_t libmspub::MSPUBMetaData::getCodePage()
+{
+ for (size_t i = 0; i < m_idsAndOffsets.size(); ++i)
+ {
+ if (m_idsAndOffsets[i].first == CODEPAGE_PROPERTY_IDENTIFIER)
+ {
+ if (i >= m_typedPropertyValues.size())
+ break;
+ return m_typedPropertyValues[i];
+ }
+ }
+
+ return 0;
+}
+
+void libmspub::MSPUBMetaData::readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input)
+{
+ uint32_t propertyIdentifier = readU32(input);
+ uint32_t offset = readU32(input);
+ m_idsAndOffsets.push_back(std::make_pair(propertyIdentifier, offset));
+}
+
+#define VT_I2 0x0002
+#define VT_LPSTR 0x001E
+
+#define PIDSI_TITLE 0x00000002
+#define PIDSI_SUBJECT 0x00000003
+#define PIDSI_AUTHOR 0x00000004
+#define PIDSI_KEYWORDS 0x00000005
+#define PIDSI_COMMENTS 0x00000006
+
+void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset)
+{
+ input->seek(offset, librevenge::RVNG_SEEK_SET);
+ uint16_t type = readU16(input);
+ // Padding
+ input->seek(2, librevenge::RVNG_SEEK_CUR);
+
+ if (type == VT_I2)
+ {
+ uint16_t value = readU16(input);
+ m_typedPropertyValues[index] = value;
+ }
+ else if (type == VT_LPSTR)
+ {
+ librevenge::RVNGString string = readCodePageString(input);
+ if (!string.empty())
+ {
+ if (index >= m_idsAndOffsets.size())
+ return;
+
+ switch (m_idsAndOffsets[index].first)
+ {
+ case PIDSI_TITLE:
+ m_metaData.insert("dc:title", string);
+ break;
+ case PIDSI_SUBJECT:
+ m_metaData.insert("dc:subject", string);
+ break;
+ case PIDSI_AUTHOR:
+ m_metaData.insert("meta:initial-creator", string);
+ break;
+ case PIDSI_KEYWORDS:
+ m_metaData.insert("meta:keyword", string);
+ break;
+ case PIDSI_COMMENTS:
+ m_metaData.insert("dc:description", string);
+ break;
+ }
+ }
+ }
+}
+
+librevenge::RVNGString libmspub::MSPUBMetaData::readCodePageString(librevenge::RVNGInputStream *input)
+{
+ uint32_t size = readU32(input);
+
+ std::vector<unsigned char> characters;
+ for (uint32_t i = 0; i < size; ++i)
+ characters.push_back(readU8(input));
+
+ uint32_t codepage = getCodePage();
+ librevenge::RVNGString string;
+
+ if (codepage == 65001)
+ {
+ // http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx
+ // says this is UTF-8.
+ for (std::vector<unsigned char>::const_iterator i = characters.begin(); i != characters.end(); ++i)
+ string.append((const char)*i);
+ }
+ else
+ {
+ switch (codepage)
+ {
+ case 1252:
+ // http://msdn.microsoft.com/en-us/goglobal/bb964654
+ appendCharacters(string, characters, "windows-1252");
+ break;
+ default:
+ MSPUB_DEBUG_MSG(("MSPUBMetaData::readCodePageString: Unknown codepage %u found\n", unsigned(codepage)));
+ }
+ }
+
+ return string;
+}
+
+bool libmspub::MSPUBMetaData::parseTimes(librevenge::RVNGInputStream *input)
+{
+ // Parse the header
+ // HeaderSignature: 8 bytes
+ // HeaderCLSID: 16 bytes
+ // MinorVersion: 2 bytes
+ // MajorVersion: 2 bytes
+ // ByteOrder: 2 bytes
+ input->seek(30, librevenge::RVNG_SEEK_CUR);
+ uint16_t sectorShift = readU16(input);
+ // MiniSectorShift: 2 bytes
+ // Reserved: 6 bytes
+ // NumDirectorySectors: 4 bytes
+ // NumFATSectors: 4 bytes
+ input->seek(16, librevenge::RVNG_SEEK_CUR);
+ uint32_t firstDirSectorLocation = readU32(input);
+
+ // Seek to the Root Directory Entry
+ size_t sectorSize = pow(2, sectorShift);
+ input->seek((firstDirSectorLocation + 1) * sectorSize, librevenge::RVNG_SEEK_SET);
+ // DirectoryEntryName: 64 bytes
+ // DirectoryEntryNameLength: 2 bytes
+ // ObjectType: 1 byte
+ // ColorFlag: 1 byte
+ // LeftSiblingID: 4 bytes
+ // RightSiblingID: 4 bytes
+ // ChildID: 4 bytes
+ // CLSID: 16 bytes
+ // StateBits: 4 bytes
+ // CreationTime: 8 bytes
+ input->seek(108, librevenge::RVNG_SEEK_CUR);
+ uint64_t modifiedTime = readU64(input);
+
+ // modifiedTime is number of 100ns since Jan 1 1601
+ static const uint64_t epoch = 11644473600;
+ time_t sec = (modifiedTime / 10000000) - epoch;
+ const struct tm *time = localtime(&sec);
+ if (time)
+ {
+ static const int MAX_BUFFER = 1024;
+ char buffer[MAX_BUFFER];
+ strftime(&buffer[0], MAX_BUFFER-1, "%Y-%m-%dT%H:%M:%SZ", time);
+ librevenge::RVNGString result;
+ result.append(buffer);
+ // Visio UI uses modifiedTime for both purposes.
+ m_metaData.insert("meta:creation-date", result);
+ m_metaData.insert("dc:date", result);
+ return true;
+ }
+ return false;
+}
+
+const librevenge::RVNGPropertyList &libmspub::MSPUBMetaData::getMetaData()
+{
+ return m_metaData;
+}
+
+/* vim:set shiftwidth=2 softtabstop=2 expandtab: */
diff --git a/src/lib/MSPUBMetaData.h b/src/lib/MSPUBMetaData.h
new file mode 100644
index 0000000..18b14a0
--- /dev/null
+++ b/src/lib/MSPUBMetaData.h
@@ -0,0 +1,53 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * This file is part of the libmspub project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef __MSPUBMETADATA_H__
+#define __MSPUBMETADATA_H__
+
+#include <vector>
+#include <utility>
+#include <map>
+#include <librevenge-stream/librevenge-stream.h>
+#include <librevenge/librevenge.h>
+#include "libmspub_utils.h"
+
+namespace libmspub
+{
+
+class MSPUBMetaData
+{
+public:
+ MSPUBMetaData();
+ ~MSPUBMetaData();
+ bool parse(librevenge::RVNGInputStream *input);
+ bool parseTimes(librevenge::RVNGInputStream *input);
+ const librevenge::RVNGPropertyList &getMetaData();
+
+private:
+ MSPUBMetaData(const MSPUBMetaData &);
+ MSPUBMetaData &operator=(const MSPUBMetaData &);
+
+ void readPropertySetStream(librevenge::RVNGInputStream *input);
+ void readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset);
+ void readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input);
+ void readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset);
+ librevenge::RVNGString readCodePageString(librevenge::RVNGInputStream *input);
+
+ uint32_t getCodePage();
+
+ std::vector< std::pair<uint32_t, uint32_t> > m_idsAndOffsets;
+ std::map<uint16_t, uint16_t> m_typedPropertyValues;
+ librevenge::RVNGPropertyList m_metaData;
+};
+
+} // namespace libmspub
+
+#endif // __MSPUBMETADATA_H__
+
+/* vim:set shiftwidth=2 softtabstop=2 expandtab: */
diff --git a/src/lib/MSPUBParser.cpp b/src/lib/MSPUBParser.cpp
index 6ebe098..fac39cb 100644
--- a/src/lib/MSPUBParser.cpp
+++ b/src/lib/MSPUBParser.cpp
@@ -7,6 +7,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
+#include <cassert>
#include <set>
#include <sstream>
#include <string>
@@ -14,6 +15,8 @@
#include <string.h>
#include <librevenge-stream/librevenge-stream.h>
#include <zlib.h>
+
+#include "MSPUBMetaData.h"
#include "MSPUBParser.h"
#include "MSPUBCollector.h"
#include "MSPUBBlockID.h"
@@ -115,6 +118,11 @@ bool MSPUBParser::parse()
MSPUB_DEBUG_MSG(("***NOTE***: Where applicable, the meanings of block/chunk IDs and Types printed below may be found in:\n\t***MSPUBBlockType.h\n\t***MSPUBBlockID.h\n\t***MSPUBContentChunkType.h\n*****\n"));
if (!m_input->isStructured())
return false;
+ librevenge::RVNGInputStream *metaData = m_input->getSubStreamByName("\x05SummaryInformation");
+ if (metaData)
+ // No check: metadata are not important enough to fail if they can't be parsed
+ parseMetaData(metaData);
+ delete metaData;
librevenge::RVNGInputStream *quill = m_input->getSubStreamByName("Quill/QuillSub/CONTENTS");
if (!quill)
{
@@ -2525,6 +2533,20 @@ void MSPUBParser::parsePaletteEntry(librevenge::RVNGInputStream *input, MSPUBBlo
}
}
+bool MSPUBParser::parseMetaData(librevenge::RVNGInputStream *const input)
+{
+ assert(input);
+
+ MSPUBMetaData metaData;
+ metaData.parse(input);
+ m_input->seek(0, librevenge::RVNG_SEEK_SET);
+ metaData.parseTimes(m_input);
+ m_collector->collectMetaData(metaData.getMetaData());
+
+ return true;
+}
+
+
}
/* vim:set shiftwidth=2 softtabstop=2 expandtab: */
diff --git a/src/lib/MSPUBParser.h b/src/lib/MSPUBParser.h
index 3d97ffd..54e41aa 100644
--- a/src/lib/MSPUBParser.h
+++ b/src/lib/MSPUBParser.h
@@ -91,6 +91,7 @@ protected:
MSPUBParser(const MSPUBParser &);
MSPUBParser &operator=(const MSPUBParser &);
virtual bool parseContents(librevenge::RVNGInputStream *input);
+ bool parseMetaData(librevenge::RVNGInputStream *input);
bool parseQuill(librevenge::RVNGInputStream *input);
bool parseEscher(librevenge::RVNGInputStream *input);
bool parseEscherDelay(librevenge::RVNGInputStream *input);
diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
index c626442..f54feaa 100644
--- a/src/lib/Makefile.am
+++ b/src/lib/Makefile.am
@@ -38,6 +38,8 @@ libmspub_ at MSPUB_MAJOR_VERSION@_ at MSPUB_MINOR_VERSION@_la_SOURCES = \
MSPUBConstants.h \
MSPUBContentChunkType.h \
MSPUBDocument.cpp \
+ MSPUBMetaData.cpp \
+ MSPUBMetaData.h \
MSPUBParser.cpp \
MSPUBParser.h \
MSPUBParser2k.cpp \
commit 09a84ccf477902196fc94c71f9bc244e2e28ae5c
Author: David Tardon <dtardon at redhat.com>
Date: Tue Dec 30 11:36:28 2014 +0100
keep the sources list sorted
Change-Id: I61061f8f43be40cc3f886a29a74c31dc39d4893f
diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
index 89c06e1..c626442 100644
--- a/src/lib/Makefile.am
+++ b/src/lib/Makefile.am
@@ -16,54 +16,54 @@ libmspub_ at MSPUB_MAJOR_VERSION@_ at MSPUB_MINOR_VERSION@_la_LIBADD = $(REVENGE_LIBS
libmspub_ at MSPUB_MAJOR_VERSION@_ at MSPUB_MINOR_VERSION@_la_DEPENDENCIES = @LIBMSPUB_WIN32_RESOURCE@
libmspub_ at MSPUB_MAJOR_VERSION@_ at MSPUB_MINOR_VERSION@_la_LDFLAGS = $(version_info) -export-dynamic -no-undefined
libmspub_ at MSPUB_MAJOR_VERSION@_ at MSPUB_MINOR_VERSION@_la_SOURCES = \
- MSPUBCollector.cpp \
- MSPUBDocument.cpp \
- MSPUBParser.cpp \
- MSPUBParser2k.cpp \
- Fill.cpp \
- libmspub_utils.cpp \
- PolygonUtils.cpp \
- ShapeGroupElement.cpp \
+ Arrow.h \
+ BorderArtInfo.h \
ColorReference.cpp \
- VectorTransformation2D.cpp \
- MSPUBParser97.cpp \
+ ColorReference.h \
+ Coordinate.h \
Dash.cpp \
- Shadow.cpp \
+ Dash.h \
+ EmbeddedFontInfo.h \
EscherContainerType.h \
EscherFieldIds.h \
+ Fill.cpp \
+ Fill.h \
FillType.h \
+ Line.h \
+ ListInfo.h \
MSPUBBlockID.h \
MSPUBBlockType.h \
+ MSPUBCollector.cpp \
MSPUBCollector.h \
MSPUBConstants.h \
MSPUBContentChunkType.h \
+ MSPUBDocument.cpp \
+ MSPUBParser.cpp \
MSPUBParser.h \
+ MSPUBParser2k.cpp \
MSPUBParser2k.h \
+ MSPUBParser97.cpp \
+ MSPUBParser97.h \
MSPUBTypes.h \
- libmspub_utils.h \
- ShapeFlags.h \
- ShapeType.h \
- Fill.h \
- ColorReference.h \
+ Margins.h \
+ NumberingDelimiter.h \
+ NumberingType.h \
+ PolygonUtils.cpp \
PolygonUtils.h \
- Shapes.h \
- VectorTransformation2D.h \
- Coordinate.h \
+ Shadow.cpp \
+ Shadow.h \
+ ShapeFlags.h \
+ ShapeGroupElement.cpp \
ShapeGroupElement.h \
ShapeInfo.h \
- Line.h \
- Margins.h \
- MSPUBParser97.h \
- BorderArtInfo.h \
- NumberingType.h \
- NumberingDelimiter.h \
- ListInfo.h \
- Dash.h \
+ ShapeType.h \
+ Shapes.h \
TableInfo.h \
- Arrow.h \
+ VectorTransformation2D.cpp \
+ VectorTransformation2D.h \
VerticalAlign.h \
- EmbeddedFontInfo.h \
- Shadow.h
+ libmspub_utils.cpp \
+ libmspub_utils.h
if OS_WIN32
commit 3b6a71216cab7172b947dbb6946f8dcb8b81792d
Author: David Tardon <dtardon at redhat.com>
Date: Tue Dec 30 12:07:54 2014 +0100
fix warning
Change-Id: I77492d801c43d178be210323ef1dafc7eefdacb6
diff --git a/src/lib/MSPUBParser.cpp b/src/lib/MSPUBParser.cpp
index 1810152..6ebe098 100644
--- a/src/lib/MSPUBParser.cpp
+++ b/src/lib/MSPUBParser.cpp
@@ -821,7 +821,7 @@ bool MSPUBParser::parseShape(librevenge::RVNGInputStream *input,
if (bool(cellCount) && (get(cellCount) != ti.m_cells.size()))
{
- MSPUB_DEBUG_MSG(("%u cell records expected, but read %u\n", get(cellCount), ti.m_cells.size()));
+ MSPUB_DEBUG_MSG(("%u cell records expected, but read %u\n", get(cellCount), unsigned(ti.m_cells.size())));
}
}
More information about the Libreoffice-commits
mailing list