[Libreoffice-commits] libmspub.git: 5 commits - src/lib

David Tardon dtardon at redhat.com
Sat Feb 6 09:20:50 UTC 2016


 src/lib/MSPUBMetaData.cpp |  155 +++++++++++++++++++++++++++++++++++++---------
 src/lib/MSPUBMetaData.h   |    4 -
 src/lib/MSPUBParser.cpp   |   28 +++++---
 src/lib/MSPUBParser.h     |    2 
 4 files changed, 147 insertions(+), 42 deletions(-)

New commits:
commit e625c6e60185100438edcb06566a5f8a2a137deb
Author: David Tardon <dtardon at redhat.com>
Date:   Sat Jul 25 09:12:40 2015 +0200

    add missing include
    
    Change-Id: Ibd003515271ad6211189e12b85eeddd980319770

diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp
index 52fdac4..c298bd1 100644
--- a/src/lib/MSPUBMetaData.cpp
+++ b/src/lib/MSPUBMetaData.cpp
@@ -8,6 +8,7 @@
  */
 
 #include <cmath>
+#include <cstdio>
 #include <cstring>
 #include <ctime>
 
commit 39a9a9ecea325c66fb9d5f503848553ac0e14dcf
Author: David Tardon <dtardon at redhat.com>
Date:   Tue Aug 25 16:12:25 2015 +0200

    afl: avoid out of bounds access to vector
    
    Change-Id: I51fdad6cca395bb5aadc916ef452ee020f666607

diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp
index 9d0446b..52fdac4 100644
--- a/src/lib/MSPUBMetaData.cpp
+++ b/src/lib/MSPUBMetaData.cpp
@@ -237,6 +237,9 @@ librevenge::RVNGString libmspub::MSPUBMetaData::readCodePageString(librevenge::R
 {
   uint32_t size = readU32(input);
 
+  if (size == 0)
+    return librevenge::RVNGString();
+
   std::vector<unsigned char> characters;
   for (uint32_t i = 0; i < size; ++i)
     characters.push_back(readU8(input));
commit 0a83689e2f13d0bb584fb004c9065463271ac9e4
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Tue Jul 21 09:32:10 2015 +0200

    need space for the terminating null-character
    
    Change-Id: Ie9cab1687dd3187819ec8e3e89b3e9355da9b255

diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp
index ca8b756..9d0446b 100644
--- a/src/lib/MSPUBMetaData.cpp
+++ b/src/lib/MSPUBMetaData.cpp
@@ -109,7 +109,8 @@ void libmspub::MSPUBMetaData::readPropertySetStream(librevenge::RVNGInputStream
   {
     data4[i] = readU8(input);
   }
-  char FMTID0[36];
+  // Pretty-printed GUID is 36 bytes + the terminating null-character.
+  char FMTID0[37];
   sprintf(FMTID0, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", data1, data2, data3,
           data4[0], data4[1], data4[2], data4[3], data4[4], data4[5], data4[6], data4[7]);
 
commit 8eaa9d24d235b1390402c232bed49e81ab2d40f4
Author: David Tardon <dtardon at redhat.com>
Date:   Sat Jul 25 09:18:10 2015 +0200

    C++11 syntax is not allowed
    
    Change-Id: Ia705f789b3f9d600b03d25db90972a315e782302

diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp
index 96e58a0..ca8b756 100644
--- a/src/lib/MSPUBMetaData.cpp
+++ b/src/lib/MSPUBMetaData.cpp
@@ -215,15 +215,15 @@ void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream
       {
         switch (m_idsAndOffsets[index].first)
         {
-        case PIDDSI::PIDDSI_CATEGORY:
+        case PIDDSI_CATEGORY:
           m_metaData.insert("librevenge:category", string);
           break;
-        case PIDDSI::PIDDSI_LINECOUNT:
-          // this should actually be PIDDSI::PIDDSI_COMPANY but this
+        case PIDDSI_LINECOUNT:
+          // this should actually be PIDDSI_COMPANY but this
           // is what company is mapped to
           m_metaData.insert("librevenge:company", string);
           break;
-        case PIDDSI::PIDDSI_LANGUAGE:
+        case PIDDSI_LANGUAGE:
           m_metaData.insert("dc:language", string);
           break;
         }
commit 207e6da1240c4255d2b4c5c28d405ace84ed042f
Author: David Tardon <dtardon at redhat.com>
Date:   Sat Feb 6 07:45:22 2016 +0100

    extract more metadata
    
    Template, language, company and category metadata are extracted from
    MSPUB files. Company and category are set as custom properties.
    
    Change-Id: Ic14bfa11a2a8253c79dd4c4466afc7f6b2ce4ea9

diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp
index 1234fe2..96e58a0 100644
--- a/src/lib/MSPUBMetaData.cpp
+++ b/src/lib/MSPUBMetaData.cpp
@@ -8,6 +8,7 @@
  */
 
 #include <cmath>
+#include <cstring>
 #include <ctime>
 
 #include "libmspub_utils.h"
@@ -22,6 +23,60 @@ libmspub::MSPUBMetaData::~MSPUBMetaData()
 {
 }
 
+enum PIDDSI
+{
+  PIDDSI_CODEPAGE          = 0x00000001,
+  PIDDSI_CATEGORY          = 0x00000002,
+  PIDDSI_PRESFORMAT        = 0x00000003,
+  PIDDSI_BYTECOUNT         = 0x00000004,
+  PIDDSI_LINECOUNT         = 0x00000005,
+  PIDDSI_PARACOUNT         = 0x00000006,
+  PIDDSI_SLIDECOUNT        = 0x00000007,
+  PIDDSI_NOTECOUNT         = 0x00000008,
+  PIDDSI_HIDDENCOUNT       = 0x00000009,
+  PIDDSI_MMCLIPCOUNT       = 0x0000000A,
+  PIDDSI_SCALE             = 0x0000000B,
+  PIDDSI_HEADINGPAIR       = 0x0000000C,
+  PIDDSI_DOCPARTS          = 0x0000000D,
+  PIDDSI_MANAGER           = 0x0000000E,
+  PIDDSI_COMPANY           = 0x0000000F,
+  PIDDSI_LINKSDIRTY        = 0x00000010,
+  PIDDSI_CCHWITHSPACES     = 0x00000011,
+  PIDDSI_SHAREDDOC         = 0x00000013,
+  PIDDSI_LINKBASE          = 0x00000014,
+  PIDDSI_HLINKS            = 0x00000015,
+  PIDDSI_HYPERLINKSCHANGED = 0x00000016,
+  PIDDSI_VERSION           = 0x00000017,
+  PIDDSI_DIGSIG            = 0x00000018,
+  PIDDSI_CONTENTTYPE       = 0x0000001A,
+  PIDDSI_CONTENTSTATUS     = 0x0000001B,
+  PIDDSI_LANGUAGE          = 0x0000001C,
+  PIDDSI_DOCVERSION        = 0x0000001D
+};
+
+enum PIDSI
+{
+  CODEPAGE_PROPERTY_IDENTIFIER = 0x00000001,
+  PIDSI_TITLE                  = 0x00000002,
+  PIDSI_SUBJECT                = 0x00000003,
+  PIDSI_AUTHOR                 = 0x00000004,
+  PIDSI_KEYWORDS               = 0x00000005,
+  PIDSI_COMMENTS               = 0x00000006,
+  PIDSI_TEMPLATE               = 0x00000007,
+  PIDSI_LASTAUTHOR             = 0x00000008,
+  PIDSI_REVNUMBER              = 0x00000009,
+  PIDSI_EDITTIME               = 0x0000000A,
+  PIDSI_LASTPRINTED            = 0x0000000B,
+  PIDSI_CREATE_DTM             = 0x0000000C,
+  PIDSI_LASTSAVE_DTM           = 0x0000000D,
+  PIDSI_PAGECOUNT              = 0x0000000E,
+  PIDSI_WORDCOUNT              = 0x0000000F,
+  PIDSI_CHARCOUNT              = 0x00000010,
+  PIDSI_THUMBNAIL              = 0x00000011,
+  PIDSI_APPNAME                = 0x00000012,
+  PIDSI_DOC_SECURITY           = 0x00000013
+};
+
 bool libmspub::MSPUBMetaData::parse(librevenge::RVNGInputStream *input)
 {
   if (!input)
@@ -45,12 +100,24 @@ void libmspub::MSPUBMetaData::readPropertySetStream(librevenge::RVNGInputStream
   // NumPropertySets
   input->seek(4, librevenge::RVNG_SEEK_CUR);
   // FMTID0
-  input->seek(16, librevenge::RVNG_SEEK_CUR);
+  //input->seek(16, librevenge::RVNG_SEEK_CUR);
+  uint32_t data1 = readU32(input);
+  uint16_t data2 = readU16(input);
+  uint16_t data3 = readU16(input);
+  uint8_t data4[8];
+  for (int i = 0; i < 8; i++)
+  {
+    data4[i] = readU8(input);
+  }
+  char FMTID0[36];
+  sprintf(FMTID0, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", data1, data2, data3,
+          data4[0], data4[1], data4[2], data4[3], data4[4], data4[5], data4[6], data4[7]);
+
   uint32_t offset0 = readU32(input);
-  readPropertySet(input, offset0);
+  readPropertySet(input, offset0, FMTID0);
 }
 
-void libmspub::MSPUBMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset)
+void libmspub::MSPUBMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset, char *FMTID)
 {
   input->seek(offset, librevenge::RVNG_SEEK_SET);
 
@@ -63,17 +130,15 @@ void libmspub::MSPUBMetaData::readPropertySet(librevenge::RVNGInputStream *input
   {
     if (i >= m_idsAndOffsets.size())
       break;
-    readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second);
+    readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second, FMTID);
   }
 }
 
-#define CODEPAGE_PROPERTY_IDENTIFIER 0x00000001
-
 uint32_t libmspub::MSPUBMetaData::getCodePage()
 {
   for (size_t i = 0; i < m_idsAndOffsets.size(); ++i)
   {
-    if (m_idsAndOffsets[i].first == CODEPAGE_PROPERTY_IDENTIFIER)
+    if (m_idsAndOffsets[i].first == PIDSI::CODEPAGE_PROPERTY_IDENTIFIER)
     {
       if (i >= m_typedPropertyValues.size())
         break;
@@ -94,13 +159,10 @@ void libmspub::MSPUBMetaData::readPropertyIdentifierAndOffset(librevenge::RVNGIn
 #define VT_I2 0x0002
 #define VT_LPSTR 0x001E
 
-#define PIDSI_TITLE 0x00000002
-#define PIDSI_SUBJECT 0x00000003
-#define PIDSI_AUTHOR 0x00000004
-#define PIDSI_KEYWORDS 0x00000005
-#define PIDSI_COMMENTS 0x00000006
-
-void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset)
+void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input,
+                                                     uint32_t index,
+                                                     uint32_t offset,
+                                                     char *FMTID)
 {
   input->seek(offset, librevenge::RVNG_SEEK_SET);
   uint16_t type = readU16(input);
@@ -120,23 +182,51 @@ void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream
       if (index >= m_idsAndOffsets.size())
         return;
 
-      switch (m_idsAndOffsets[index].first)
+      if (!strcmp(FMTID, "f29f85e0-4ff9-1068-ab91-08002b27b3d9"))
       {
-      case PIDSI_TITLE:
-        m_metaData.insert("dc:title", string);
-        break;
-      case PIDSI_SUBJECT:
-        m_metaData.insert("dc:subject", string);
-        break;
-      case PIDSI_AUTHOR:
-        m_metaData.insert("meta:initial-creator", string);
-        break;
-      case PIDSI_KEYWORDS:
-        m_metaData.insert("meta:keyword", string);
-        break;
-      case PIDSI_COMMENTS:
-        m_metaData.insert("dc:description", string);
-        break;
+        switch (m_idsAndOffsets[index].first)
+        {
+        case PIDSI::PIDSI_TITLE:
+          m_metaData.insert("dc:title", string);
+          break;
+        case PIDSI::PIDSI_SUBJECT:
+          m_metaData.insert("dc:subject", string);
+          break;
+        case PIDSI::PIDSI_AUTHOR:
+          m_metaData.insert("meta:initial-creator", string);
+          m_metaData.insert("dc:creator", string);
+          break;
+        case PIDSI::PIDSI_KEYWORDS:
+          m_metaData.insert("meta:keyword", string);
+          break;
+        case PIDSI::PIDSI_COMMENTS:
+          m_metaData.insert("dc:description", string);
+          break;
+        case PIDSI::PIDSI_TEMPLATE:
+          std::string templateHref(string.cstr());
+          size_t found = templateHref.find_last_of("/\\");
+          if (found != std::string::npos)
+            string = librevenge::RVNGString(templateHref.substr(found+1).c_str());
+          m_metaData.insert("librevenge:template", string);
+          break;
+        }
+      }
+      else if (!strcmp(FMTID,"d5cdd502-2e9c-101b-9397-08002b2cf9ae"))
+      {
+        switch (m_idsAndOffsets[index].first)
+        {
+        case PIDDSI::PIDDSI_CATEGORY:
+          m_metaData.insert("librevenge:category", string);
+          break;
+        case PIDDSI::PIDDSI_LINECOUNT:
+          // this should actually be PIDDSI::PIDDSI_COMPANY but this
+          // is what company is mapped to
+          m_metaData.insert("librevenge:company", string);
+          break;
+        case PIDDSI::PIDDSI_LANGUAGE:
+          m_metaData.insert("dc:language", string);
+          break;
+        }
       }
     }
   }
diff --git a/src/lib/MSPUBMetaData.h b/src/lib/MSPUBMetaData.h
index 18b14a0..f0f994c 100644
--- a/src/lib/MSPUBMetaData.h
+++ b/src/lib/MSPUBMetaData.h
@@ -34,9 +34,9 @@ private:
   MSPUBMetaData &operator=(const MSPUBMetaData &);
 
   void readPropertySetStream(librevenge::RVNGInputStream *input);
-  void readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset);
+  void readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset, char *FMTID);
   void readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input);
-  void readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset);
+  void readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset, char *FMTID);
   librevenge::RVNGString readCodePageString(librevenge::RVNGInputStream *input);
 
   uint32_t getCodePage();
diff --git a/src/lib/MSPUBParser.cpp b/src/lib/MSPUBParser.cpp
index 6b79243..9c746d7 100644
--- a/src/lib/MSPUBParser.cpp
+++ b/src/lib/MSPUBParser.cpp
@@ -124,11 +124,8 @@ bool MSPUBParser::parse()
   MSPUB_DEBUG_MSG(("***NOTE***: Where applicable, the meanings of block/chunk IDs and Types printed below may be found in:\n\t***MSPUBBlockType.h\n\t***MSPUBBlockID.h\n\t***MSPUBContentChunkType.h\n*****\n"));
   if (!m_input->isStructured())
     return false;
-  librevenge::RVNGInputStream *metaData = m_input->getSubStreamByName("\x05SummaryInformation");
-  if (metaData)
-    // No check: metadata are not important enough to fail if they can't be parsed
-    parseMetaData(metaData);
-  delete metaData;
+  // No check: metadata are not important enough to fail if they can't be parsed
+  parseMetaData();
   librevenge::RVNGInputStream *quill = m_input->getSubStreamByName("Quill/QuillSub/CONTENTS");
   if (!quill)
   {
@@ -2530,12 +2527,25 @@ void MSPUBParser::parsePaletteEntry(librevenge::RVNGInputStream *input, MSPUBBlo
   }
 }
 
-bool MSPUBParser::parseMetaData(librevenge::RVNGInputStream *const input)
+bool MSPUBParser::parseMetaData()
 {
-  assert(input);
-
+  m_input->seek(0, librevenge::RVNG_SEEK_SET);
   MSPUBMetaData metaData;
-  metaData.parse(input);
+
+  librevenge::RVNGInputStream *sumaryInfo = m_input->getSubStreamByName("\x05SummaryInformation");
+  if (sumaryInfo)
+  {
+    metaData.parse(sumaryInfo);
+    delete sumaryInfo;
+  }
+
+  librevenge::RVNGInputStream *docSumaryInfo = m_input->getSubStreamByName("\005DocumentSummaryInformation");
+  if (docSumaryInfo)
+  {
+    metaData.parse(docSumaryInfo);
+    delete docSumaryInfo;
+  }
+
   m_input->seek(0, librevenge::RVNG_SEEK_SET);
   metaData.parseTimes(m_input);
   m_collector->collectMetaData(metaData.getMetaData());
diff --git a/src/lib/MSPUBParser.h b/src/lib/MSPUBParser.h
index b6145ce..e1edad2 100644
--- a/src/lib/MSPUBParser.h
+++ b/src/lib/MSPUBParser.h
@@ -91,7 +91,7 @@ protected:
   MSPUBParser(const MSPUBParser &);
   MSPUBParser &operator=(const MSPUBParser &);
   virtual bool parseContents(librevenge::RVNGInputStream *input);
-  bool parseMetaData(librevenge::RVNGInputStream *input);
+  bool parseMetaData();
   bool parseQuill(librevenge::RVNGInputStream *input);
   bool parseEscher(librevenge::RVNGInputStream *input);
   bool parseEscherDelay(librevenge::RVNGInputStream *input);


More information about the Libreoffice-commits mailing list