[Libreoffice-commits] libmspub.git: 4 commits - src/lib

David Tardon dtardon at redhat.com
Tue Dec 30 04:04:48 PST 2014


 src/lib/MSPUBCollector.cpp |    9 +
 src/lib/MSPUBCollector.h   |    4 
 src/lib/MSPUBMetaData.cpp  |  236 +++++++++++++++++++++++++++++++++++++++++++++
 src/lib/MSPUBMetaData.h    |   53 ++++++++++
 src/lib/MSPUBParser.cpp    |   30 +++++
 src/lib/MSPUBParser.h      |    1 
 src/lib/Makefile.am        |   62 ++++++-----
 7 files changed, 363 insertions(+), 32 deletions(-)

New commits:
commit 7d50db7ceeda663451ef2fe6cbc9ea0d25668e1e
Author: David Tardon <dtardon at redhat.com>
Date:   Tue Dec 30 12:57:58 2014 +0100

    fix parsing of escher values
    
    The list of (ID, value) pairs is terminated by ID == 0 without a value.
    So, the shortest possible record has length 8: 2 byte ID, followed by 4
    byte value, followed by 2 byte terminator ID.
    
    Change-Id: I4501e42164b4376f16feca21e32221a4084f22f4

diff --git a/src/lib/MSPUBParser.cpp b/src/lib/MSPUBParser.cpp
index fac39cb..0dca90c 100644
--- a/src/lib/MSPUBParser.cpp
+++ b/src/lib/MSPUBParser.cpp
@@ -2311,6 +2311,12 @@ std::map<unsigned short, unsigned> MSPUBParser::extractEscherValues(librevenge::
   while (stillReading(input, record.contentsOffset + record.contentsLength))
   {
     unsigned short id = readU16(input);
+    if (id == 0)
+    {
+      if (!stillReading(input, record.contentsOffset + record.contentsLength))
+        break;
+      MSPUB_DEBUG_MSG(("found escher value with ID 0!\n"));
+    }
     unsigned value = readU32(input);
     ret[id] = value;
   }
commit a5aa054c0996cc4e8c428a88c7f14c18e4e4e57e
Author: David Tardon <dtardon at redhat.com>
Date:   Tue Dec 30 12:05:57 2014 +0100

    import metadata
    
    Change-Id: I64dc77ef7f2e12a5d9f2358b1ebaa5e593282414

diff --git a/src/lib/MSPUBCollector.cpp b/src/lib/MSPUBCollector.cpp
index 71e3846..e2cd967 100644
--- a/src/lib/MSPUBCollector.cpp
+++ b/src/lib/MSPUBCollector.cpp
@@ -237,6 +237,11 @@ void mapTableTextToCells(
 
 } // anonymous namespace
 
+void MSPUBCollector::collectMetaData(const librevenge::RVNGPropertyList &metaData)
+{
+  m_metaData = metaData;
+}
+
 void MSPUBCollector::addEOTFont(const librevenge::RVNGString &name, const librevenge::RVNGBinaryData &data)
 {
   m_embeddedFonts.push_back(EmbeddedFontInfo(name, data));
@@ -381,7 +386,8 @@ MSPUBCollector::MSPUBCollector(librevenge::RVNGDrawingInterface *painter) :
   m_tableCellTextEndsByTextId(), m_stringOffsetsByTextId(),
   m_calculationValuesSeen(), m_pageSeqNumsOrdered(),
   m_encodingHeuristic(false), m_allText(),
-  m_calculatedEncoding()
+  m_calculatedEncoding(),
+  m_metaData()
 {
 }
 
@@ -1699,6 +1705,7 @@ bool MSPUBCollector::go()
   addBlackToPaletteIfNecessary();
   assignShapesToPages();
   m_painter->startDocument(librevenge::RVNGPropertyList());
+  m_painter->setDocumentMetaData(m_metaData);
 
   for (std::list<EmbeddedFontInfo>::const_iterator i = m_embeddedFonts.begin(); i != m_embeddedFonts.end(); ++i)
   {
diff --git a/src/lib/MSPUBCollector.h b/src/lib/MSPUBCollector.h
index 7e4b953..5d96c05 100644
--- a/src/lib/MSPUBCollector.h
+++ b/src/lib/MSPUBCollector.h
@@ -57,6 +57,8 @@ public:
   virtual ~MSPUBCollector();
 
   // collector functions
+  void collectMetaData(const librevenge::RVNGPropertyList &metaData);
+
   bool addPage(unsigned seqNum);
   bool addTextString(const std::vector<TextParagraph> &str, unsigned id);
   void addTextShape(unsigned stringId, unsigned seqNum);
@@ -172,6 +174,8 @@ private:
   bool m_encodingHeuristic;
   std::vector<unsigned char> m_allText;
   mutable boost::optional<const char *> m_calculatedEncoding;
+  librevenge::RVNGPropertyList m_metaData;
+
   // helper functions
   std::vector<int> getShapeAdjustValues(const ShapeInfo &info) const;
   boost::optional<unsigned> getMasterPageSeqNum(unsigned pageSeqNum) const;
diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp
new file mode 100644
index 0000000..eca048b
--- /dev/null
+++ b/src/lib/MSPUBMetaData.cpp
@@ -0,0 +1,236 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * This file is part of the libmspub project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <cmath>
+#include <ctime>
+
+#include "libmspub_utils.h"
+#include "MSPUBMetaData.h"
+
+libmspub::MSPUBMetaData::MSPUBMetaData()
+  : m_idsAndOffsets(), m_typedPropertyValues(), m_metaData()
+{
+}
+
+libmspub::MSPUBMetaData::~MSPUBMetaData()
+{
+}
+
+bool libmspub::MSPUBMetaData::parse(librevenge::RVNGInputStream *input)
+{
+  if (!input)
+    return false;
+
+  readPropertySetStream(input);
+
+  return true;
+}
+
+void libmspub::MSPUBMetaData::readPropertySetStream(librevenge::RVNGInputStream *input)
+{
+  // ByteOrder
+  input->seek(2, librevenge::RVNG_SEEK_CUR);
+  // Version
+  input->seek(2, librevenge::RVNG_SEEK_CUR);
+  // SystemIdentifier
+  input->seek(4, librevenge::RVNG_SEEK_CUR);
+  // CLSID
+  input->seek(16, librevenge::RVNG_SEEK_CUR);
+  // NumPropertySets
+  input->seek(4, librevenge::RVNG_SEEK_CUR);
+  // FMTID0
+  input->seek(16, librevenge::RVNG_SEEK_CUR);
+  uint32_t offset0 = readU32(input);
+  readPropertySet(input, offset0);
+}
+
+void libmspub::MSPUBMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset)
+{
+  input->seek(offset, librevenge::RVNG_SEEK_SET);
+
+  // Size
+  input->seek(4, librevenge::RVNG_SEEK_CUR);
+  uint32_t numProperties = readU32(input);
+  for (uint32_t i = 0; i < numProperties; ++i)
+    readPropertyIdentifierAndOffset(input);
+  for (uint32_t i = 0; i < numProperties; ++i)
+  {
+    if (i >= m_idsAndOffsets.size())
+      break;
+    readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second);
+  }
+}
+
+#define CODEPAGE_PROPERTY_IDENTIFIER 0x00000001
+
+uint32_t libmspub::MSPUBMetaData::getCodePage()
+{
+  for (size_t i = 0; i < m_idsAndOffsets.size(); ++i)
+  {
+    if (m_idsAndOffsets[i].first == CODEPAGE_PROPERTY_IDENTIFIER)
+    {
+      if (i >= m_typedPropertyValues.size())
+        break;
+      return m_typedPropertyValues[i];
+    }
+  }
+
+  return 0;
+}
+
+void libmspub::MSPUBMetaData::readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input)
+{
+  uint32_t propertyIdentifier = readU32(input);
+  uint32_t offset = readU32(input);
+  m_idsAndOffsets.push_back(std::make_pair(propertyIdentifier, offset));
+}
+
+#define VT_I2 0x0002
+#define VT_LPSTR 0x001E
+
+#define PIDSI_TITLE 0x00000002
+#define PIDSI_SUBJECT 0x00000003
+#define PIDSI_AUTHOR 0x00000004
+#define PIDSI_KEYWORDS 0x00000005
+#define PIDSI_COMMENTS 0x00000006
+
+void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset)
+{
+  input->seek(offset, librevenge::RVNG_SEEK_SET);
+  uint16_t type = readU16(input);
+  // Padding
+  input->seek(2, librevenge::RVNG_SEEK_CUR);
+
+  if (type == VT_I2)
+  {
+    uint16_t value = readU16(input);
+    m_typedPropertyValues[index] = value;
+  }
+  else if (type == VT_LPSTR)
+  {
+    librevenge::RVNGString string = readCodePageString(input);
+    if (!string.empty())
+    {
+      if (index >= m_idsAndOffsets.size())
+        return;
+
+      switch (m_idsAndOffsets[index].first)
+      {
+      case PIDSI_TITLE:
+        m_metaData.insert("dc:title", string);
+        break;
+      case PIDSI_SUBJECT:
+        m_metaData.insert("dc:subject", string);
+        break;
+      case PIDSI_AUTHOR:
+        m_metaData.insert("meta:initial-creator", string);
+        break;
+      case PIDSI_KEYWORDS:
+        m_metaData.insert("meta:keyword", string);
+        break;
+      case PIDSI_COMMENTS:
+        m_metaData.insert("dc:description", string);
+        break;
+      }
+    }
+  }
+}
+
+librevenge::RVNGString libmspub::MSPUBMetaData::readCodePageString(librevenge::RVNGInputStream *input)
+{
+  uint32_t size = readU32(input);
+
+  std::vector<unsigned char> characters;
+  for (uint32_t i = 0; i < size; ++i)
+    characters.push_back(readU8(input));
+
+  uint32_t codepage = getCodePage();
+  librevenge::RVNGString string;
+
+  if (codepage == 65001)
+  {
+    // http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx
+    // says this is UTF-8.
+    for (std::vector<unsigned char>::const_iterator i = characters.begin(); i != characters.end(); ++i)
+      string.append((const char)*i);
+  }
+  else
+  {
+    switch (codepage)
+    {
+    case 1252:
+      // http://msdn.microsoft.com/en-us/goglobal/bb964654
+      appendCharacters(string, characters, "windows-1252");
+      break;
+    default:
+      MSPUB_DEBUG_MSG(("MSPUBMetaData::readCodePageString: Unknown codepage %u found\n", unsigned(codepage)));
+    }
+  }
+
+  return string;
+}
+
+bool libmspub::MSPUBMetaData::parseTimes(librevenge::RVNGInputStream *input)
+{
+  // Parse the header
+  // HeaderSignature: 8 bytes
+  // HeaderCLSID: 16 bytes
+  // MinorVersion: 2 bytes
+  // MajorVersion: 2 bytes
+  // ByteOrder: 2 bytes
+  input->seek(30, librevenge::RVNG_SEEK_CUR);
+  uint16_t sectorShift = readU16(input);
+  // MiniSectorShift: 2 bytes
+  // Reserved: 6 bytes
+  // NumDirectorySectors: 4 bytes
+  // NumFATSectors: 4 bytes
+  input->seek(16, librevenge::RVNG_SEEK_CUR);
+  uint32_t firstDirSectorLocation = readU32(input);
+
+  // Seek to the Root Directory Entry
+  size_t sectorSize = pow(2, sectorShift);
+  input->seek((firstDirSectorLocation + 1) * sectorSize, librevenge::RVNG_SEEK_SET);
+  // DirectoryEntryName: 64 bytes
+  // DirectoryEntryNameLength: 2 bytes
+  // ObjectType: 1 byte
+  // ColorFlag: 1 byte
+  // LeftSiblingID: 4 bytes
+  // RightSiblingID: 4 bytes
+  // ChildID: 4 bytes
+  // CLSID: 16 bytes
+  // StateBits: 4 bytes
+  // CreationTime: 8 bytes
+  input->seek(108, librevenge::RVNG_SEEK_CUR);
+  uint64_t modifiedTime = readU64(input);
+
+  // modifiedTime is number of 100ns since Jan 1 1601
+  static const uint64_t epoch = 11644473600;
+  time_t sec = (modifiedTime / 10000000) - epoch;
+  const struct tm *time = localtime(&sec);
+  if (time)
+  {
+    static const int MAX_BUFFER = 1024;
+    char buffer[MAX_BUFFER];
+    strftime(&buffer[0], MAX_BUFFER-1, "%Y-%m-%dT%H:%M:%SZ", time);
+    librevenge::RVNGString result;
+    result.append(buffer);
+    // Visio UI uses modifiedTime for both purposes.
+    m_metaData.insert("meta:creation-date", result);
+    m_metaData.insert("dc:date", result);
+    return true;
+  }
+  return false;
+}
+
+const librevenge::RVNGPropertyList &libmspub::MSPUBMetaData::getMetaData()
+{
+  return m_metaData;
+}
+
+/* vim:set shiftwidth=2 softtabstop=2 expandtab: */
diff --git a/src/lib/MSPUBMetaData.h b/src/lib/MSPUBMetaData.h
new file mode 100644
index 0000000..18b14a0
--- /dev/null
+++ b/src/lib/MSPUBMetaData.h
@@ -0,0 +1,53 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * This file is part of the libmspub project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef __MSPUBMETADATA_H__
+#define __MSPUBMETADATA_H__
+
+#include <vector>
+#include <utility>
+#include <map>
+#include <librevenge-stream/librevenge-stream.h>
+#include <librevenge/librevenge.h>
+#include "libmspub_utils.h"
+
+namespace libmspub
+{
+
+class MSPUBMetaData
+{
+public:
+  MSPUBMetaData();
+  ~MSPUBMetaData();
+  bool parse(librevenge::RVNGInputStream *input);
+  bool parseTimes(librevenge::RVNGInputStream *input);
+  const librevenge::RVNGPropertyList &getMetaData();
+
+private:
+  MSPUBMetaData(const MSPUBMetaData &);
+  MSPUBMetaData &operator=(const MSPUBMetaData &);
+
+  void readPropertySetStream(librevenge::RVNGInputStream *input);
+  void readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset);
+  void readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input);
+  void readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset);
+  librevenge::RVNGString readCodePageString(librevenge::RVNGInputStream *input);
+
+  uint32_t getCodePage();
+
+  std::vector< std::pair<uint32_t, uint32_t> > m_idsAndOffsets;
+  std::map<uint16_t, uint16_t> m_typedPropertyValues;
+  librevenge::RVNGPropertyList m_metaData;
+};
+
+} // namespace libmspub
+
+#endif // __MSPUBMETADATA_H__
+
+/* vim:set shiftwidth=2 softtabstop=2 expandtab: */
diff --git a/src/lib/MSPUBParser.cpp b/src/lib/MSPUBParser.cpp
index 6ebe098..fac39cb 100644
--- a/src/lib/MSPUBParser.cpp
+++ b/src/lib/MSPUBParser.cpp
@@ -7,6 +7,7 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
  */
 
+#include <cassert>
 #include <set>
 #include <sstream>
 #include <string>
@@ -14,6 +15,8 @@
 #include <string.h>
 #include <librevenge-stream/librevenge-stream.h>
 #include <zlib.h>
+
+#include "MSPUBMetaData.h"
 #include "MSPUBParser.h"
 #include "MSPUBCollector.h"
 #include "MSPUBBlockID.h"
@@ -115,6 +118,11 @@ bool MSPUBParser::parse()
   MSPUB_DEBUG_MSG(("***NOTE***: Where applicable, the meanings of block/chunk IDs and Types printed below may be found in:\n\t***MSPUBBlockType.h\n\t***MSPUBBlockID.h\n\t***MSPUBContentChunkType.h\n*****\n"));
   if (!m_input->isStructured())
     return false;
+  librevenge::RVNGInputStream *metaData = m_input->getSubStreamByName("\x05SummaryInformation");
+  if (metaData)
+    // No check: metadata are not important enough to fail if they can't be parsed
+    parseMetaData(metaData);
+  delete metaData;
   librevenge::RVNGInputStream *quill = m_input->getSubStreamByName("Quill/QuillSub/CONTENTS");
   if (!quill)
   {
@@ -2525,6 +2533,20 @@ void MSPUBParser::parsePaletteEntry(librevenge::RVNGInputStream *input, MSPUBBlo
   }
 }
 
+bool MSPUBParser::parseMetaData(librevenge::RVNGInputStream *const input)
+{
+  assert(input);
+
+  MSPUBMetaData metaData;
+  metaData.parse(input);
+  m_input->seek(0, librevenge::RVNG_SEEK_SET);
+  metaData.parseTimes(m_input);
+  m_collector->collectMetaData(metaData.getMetaData());
+
+  return true;
+}
+
+
 }
 
 /* vim:set shiftwidth=2 softtabstop=2 expandtab: */
diff --git a/src/lib/MSPUBParser.h b/src/lib/MSPUBParser.h
index 3d97ffd..54e41aa 100644
--- a/src/lib/MSPUBParser.h
+++ b/src/lib/MSPUBParser.h
@@ -91,6 +91,7 @@ protected:
   MSPUBParser(const MSPUBParser &);
   MSPUBParser &operator=(const MSPUBParser &);
   virtual bool parseContents(librevenge::RVNGInputStream *input);
+  bool parseMetaData(librevenge::RVNGInputStream *input);
   bool parseQuill(librevenge::RVNGInputStream *input);
   bool parseEscher(librevenge::RVNGInputStream *input);
   bool parseEscherDelay(librevenge::RVNGInputStream *input);
diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
index c626442..f54feaa 100644
--- a/src/lib/Makefile.am
+++ b/src/lib/Makefile.am
@@ -38,6 +38,8 @@ libmspub_ at MSPUB_MAJOR_VERSION@_ at MSPUB_MINOR_VERSION@_la_SOURCES = \
 	MSPUBConstants.h \
 	MSPUBContentChunkType.h \
 	MSPUBDocument.cpp \
+	MSPUBMetaData.cpp \
+	MSPUBMetaData.h \
 	MSPUBParser.cpp \
 	MSPUBParser.h \
 	MSPUBParser2k.cpp \
commit 09a84ccf477902196fc94c71f9bc244e2e28ae5c
Author: David Tardon <dtardon at redhat.com>
Date:   Tue Dec 30 11:36:28 2014 +0100

    keep the sources list sorted
    
    Change-Id: I61061f8f43be40cc3f886a29a74c31dc39d4893f

diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
index 89c06e1..c626442 100644
--- a/src/lib/Makefile.am
+++ b/src/lib/Makefile.am
@@ -16,54 +16,54 @@ libmspub_ at MSPUB_MAJOR_VERSION@_ at MSPUB_MINOR_VERSION@_la_LIBADD  = $(REVENGE_LIBS
 libmspub_ at MSPUB_MAJOR_VERSION@_ at MSPUB_MINOR_VERSION@_la_DEPENDENCIES = @LIBMSPUB_WIN32_RESOURCE@
 libmspub_ at MSPUB_MAJOR_VERSION@_ at MSPUB_MINOR_VERSION@_la_LDFLAGS = $(version_info) -export-dynamic -no-undefined
 libmspub_ at MSPUB_MAJOR_VERSION@_ at MSPUB_MINOR_VERSION@_la_SOURCES = \
-	MSPUBCollector.cpp \
-	MSPUBDocument.cpp \
-	MSPUBParser.cpp \
-	MSPUBParser2k.cpp \
-	Fill.cpp \
-	libmspub_utils.cpp \
-	PolygonUtils.cpp \
-	ShapeGroupElement.cpp \
+	Arrow.h \
+	BorderArtInfo.h \
 	ColorReference.cpp \
-	VectorTransformation2D.cpp \
-	MSPUBParser97.cpp \
+	ColorReference.h \
+	Coordinate.h \
 	Dash.cpp \
-	Shadow.cpp \
+	Dash.h \
+	EmbeddedFontInfo.h \
 	EscherContainerType.h \
 	EscherFieldIds.h \
+	Fill.cpp \
+	Fill.h \
 	FillType.h \
+	Line.h \
+	ListInfo.h \
 	MSPUBBlockID.h \
 	MSPUBBlockType.h \
+	MSPUBCollector.cpp \
 	MSPUBCollector.h \
 	MSPUBConstants.h \
 	MSPUBContentChunkType.h \
+	MSPUBDocument.cpp \
+	MSPUBParser.cpp \
 	MSPUBParser.h \
+	MSPUBParser2k.cpp \
 	MSPUBParser2k.h \
+	MSPUBParser97.cpp \
+	MSPUBParser97.h \
 	MSPUBTypes.h \
-	libmspub_utils.h \
-	ShapeFlags.h \
-	ShapeType.h \
-	Fill.h \
-	ColorReference.h \
+	Margins.h \
+	NumberingDelimiter.h \
+	NumberingType.h \
+	PolygonUtils.cpp \
 	PolygonUtils.h \
-	Shapes.h \
-	VectorTransformation2D.h \
-	Coordinate.h \
+	Shadow.cpp \
+	Shadow.h \
+	ShapeFlags.h \
+	ShapeGroupElement.cpp \
 	ShapeGroupElement.h \
 	ShapeInfo.h \
-	Line.h \
-	Margins.h \
-	MSPUBParser97.h \
-	BorderArtInfo.h \
-	NumberingType.h \
-	NumberingDelimiter.h \
-	ListInfo.h \
-	Dash.h \
+	ShapeType.h \
+	Shapes.h \
 	TableInfo.h \
-	Arrow.h \
+	VectorTransformation2D.cpp \
+	VectorTransformation2D.h \
 	VerticalAlign.h \
-	EmbeddedFontInfo.h \
-	Shadow.h
+	libmspub_utils.cpp \
+	libmspub_utils.h
 
 if OS_WIN32
 
commit 3b6a71216cab7172b947dbb6946f8dcb8b81792d
Author: David Tardon <dtardon at redhat.com>
Date:   Tue Dec 30 12:07:54 2014 +0100

    fix warning
    
    Change-Id: I77492d801c43d178be210323ef1dafc7eefdacb6

diff --git a/src/lib/MSPUBParser.cpp b/src/lib/MSPUBParser.cpp
index 1810152..6ebe098 100644
--- a/src/lib/MSPUBParser.cpp
+++ b/src/lib/MSPUBParser.cpp
@@ -821,7 +821,7 @@ bool MSPUBParser::parseShape(librevenge::RVNGInputStream *input,
 
         if (bool(cellCount) && (get(cellCount) != ti.m_cells.size()))
         {
-          MSPUB_DEBUG_MSG(("%u cell records expected, but read %u\n", get(cellCount), ti.m_cells.size()));
+          MSPUB_DEBUG_MSG(("%u cell records expected, but read %u\n", get(cellCount), unsigned(ti.m_cells.size())));
         }
       }
 


More information about the Libreoffice-commits mailing list