[Libreoffice-commits] libvisio.git: 2 commits - src/lib

Miklos Vajna vmiklos at collabora.co.uk
Mon Dec 1 06:03:28 PST 2014


 src/lib/Makefile.am             |    2 
 src/lib/VSDContentCollector.cpp |   28 ------
 src/lib/VSDMetaData.cpp         |  180 ++++++++++++++++++++++++++++++++++++++++
 src/lib/VSDMetaData.h           |   51 +++++++++++
 src/lib/VSDParser.cpp           |   27 +++++-
 src/lib/VSDParser.h             |    5 -
 src/lib/VisioDocument.cpp       |    2 
 src/lib/libvisio_utils.cpp      |   14 +++
 src/lib/libvisio_utils.h        |    3 
 9 files changed, 283 insertions(+), 29 deletions(-)

New commits:
commit ccfd70d06cdd47965bccf24b6dfa6ca3d472517e
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Mon Dec 1 15:02:16 2014 +0100

    fdo#86729 VSD: import metadata
    
    Only title as a start.
    
    Change-Id: If876d0985bb68d87600d805368aff404dfd6fba4

diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
index e2daff8..f07c5ac 100644
--- a/src/lib/Makefile.am
+++ b/src/lib/Makefile.am
@@ -39,6 +39,7 @@ libvisio_ at VSD_MAJOR_VERSION@_ at VSD_MINOR_VERSION@_la_SOURCES = \
 	VSDPages.cpp \
 	VSDParagraphList.cpp \
 	VSDParser.cpp \
+	VSDMetaData.cpp \
 	VSDShapeList.cpp \
 	VSDStencils.cpp \
 	VSDStyles.cpp \
@@ -58,6 +59,7 @@ libvisio_ at VSD_MAJOR_VERSION@_ at VSD_MINOR_VERSION@_la_SOURCES = \
 	VSDPages.h \
 	VSDParagraphList.h \
 	VSDParser.h \
+	VSDMetaData.h \
 	VSDShapeList.h \
 	VSDStencils.h \
 	VSDStyles.h \
diff --git a/src/lib/VSDMetaData.cpp b/src/lib/VSDMetaData.cpp
new file mode 100644
index 0000000..209cc34
--- /dev/null
+++ b/src/lib/VSDMetaData.cpp
@@ -0,0 +1,180 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * This file is part of the libvisio project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "VSDMetaData.h"
+#include <unicode/ucnv.h>
+
+libvisio::VSDMetaData::VSDMetaData()
+{
+}
+
+libvisio::VSDMetaData::~VSDMetaData()
+{
+}
+
+bool libvisio::VSDMetaData::parse(librevenge::RVNGInputStream *input)
+{
+  if (!input)
+    return false;
+
+  readPropertySetStream(input);
+
+  return true;
+}
+
+void libvisio::VSDMetaData::readPropertySetStream(librevenge::RVNGInputStream *input)
+{
+  // ByteOrder
+  input->seek(2, librevenge::RVNG_SEEK_CUR);
+  // Version
+  input->seek(2, librevenge::RVNG_SEEK_CUR);
+  // SystemIdentifier
+  input->seek(4, librevenge::RVNG_SEEK_CUR);
+  // CLSID
+  input->seek(16, librevenge::RVNG_SEEK_CUR);
+  // NumPropertySets
+  input->seek(4, librevenge::RVNG_SEEK_CUR);
+  // FMTID0
+  input->seek(16, librevenge::RVNG_SEEK_CUR);
+  uint32_t offset0 = readU32(input);
+  readPropertySet(input, offset0);
+}
+
+void libvisio::VSDMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset)
+{
+  input->seek(offset, librevenge::RVNG_SEEK_SET);
+
+  // Size
+  input->seek(4, librevenge::RVNG_SEEK_CUR);
+  uint32_t numProperties = readU32(input);
+  for (uint32_t i = 0; i < numProperties; ++i)
+    readPropertyIdentifierAndOffset(input);
+  for (uint32_t i = 0; i < numProperties; ++i)
+  {
+    if (i >= m_idsAndOffsets.size())
+      break;
+    readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second);
+  }
+}
+
+#define CODEPAGE_PROPERTY_IDENTIFIER 0x00000001
+
+uint32_t libvisio::VSDMetaData::getCodePage()
+{
+  for (size_t i = 0; i < m_idsAndOffsets.size(); ++i)
+  {
+    if (m_idsAndOffsets[i].first == CODEPAGE_PROPERTY_IDENTIFIER)
+    {
+      if (i >= m_typedPropertyValues.size())
+        break;
+      return m_typedPropertyValues[i];
+    }
+  }
+
+  return 0;
+}
+
+void libvisio::VSDMetaData::readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input)
+{
+  uint32_t propertyIdentifier = readU32(input);
+  uint32_t offset = readU32(input);
+  m_idsAndOffsets.push_back(std::make_pair(propertyIdentifier, offset));
+}
+
+#define VT_I2 0x0002
+#define VT_LPSTR 0x001E
+
+#define PIDSI_TITLE 0x00000002
+
+void libvisio::VSDMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset)
+{
+  input->seek(offset, librevenge::RVNG_SEEK_SET);
+  uint16_t type = readU16(input);
+  // Padding
+  input->seek(2, librevenge::RVNG_SEEK_CUR);
+
+  if (type == VT_I2)
+  {
+    uint16_t value = readU16(input);
+    m_typedPropertyValues[index] = value;
+  }
+  else if (type == VT_LPSTR)
+  {
+    librevenge::RVNGString string = readCodePageString(input);
+    if (!string.empty())
+    {
+      if (index >= m_idsAndOffsets.size())
+        return;
+
+      switch (m_idsAndOffsets[index].first)
+      {
+      case PIDSI_TITLE:
+        m_metaData.insert("dc:title", string);
+        break;
+      }
+    }
+  }
+}
+
+librevenge::RVNGString libvisio::VSDMetaData::readCodePageString(librevenge::RVNGInputStream *input)
+{
+  uint32_t size = readU32(input);
+
+  std::vector<unsigned char> characters;
+  for (uint32_t i = 0; i < size; ++i)
+    characters.push_back(readU8(input));
+
+  uint32_t codepage = getCodePage();
+  librevenge::RVNGString string;
+
+  if (codepage == 65001)
+  {
+    // http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx
+    // says this is UTF-8.
+    for (std::vector<unsigned char>::const_iterator i = characters.begin(); i != characters.end(); ++i)
+      string.append((const char)*i);
+  }
+  else
+  {
+    UErrorCode status = U_ZERO_ERROR;
+    UConverter *conv = 0;
+
+    switch (codepage)
+    {
+    case 1252:
+      // http://msdn.microsoft.com/en-us/goglobal/bb964654
+      conv = ucnv_open("windows-1252", &status);
+      break;
+    }
+
+    if (U_SUCCESS(status) && conv)
+    {
+      const char *src = (const char *)&characters[0];
+      const char *srcLimit = (const char *)src + characters.size();
+      while (src < srcLimit)
+      {
+        UChar32 ucs4Character = ucnv_getNextUChar(conv, &src, srcLimit, &status);
+        if (U_SUCCESS(status) && U_IS_UNICODE_CHAR(ucs4Character))
+          appendUCS4(string, ucs4Character);
+      }
+    }
+
+    if (conv)
+      ucnv_close(conv);
+  }
+
+  return string;
+}
+
+const librevenge::RVNGPropertyList &libvisio::VSDMetaData::getMetaData()
+{
+  return m_metaData;
+}
+
+/* vim:set shiftwidth=2 softtabstop=2 expandtab: */
diff --git a/src/lib/VSDMetaData.h b/src/lib/VSDMetaData.h
new file mode 100644
index 0000000..c185894
--- /dev/null
+++ b/src/lib/VSDMetaData.h
@@ -0,0 +1,51 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * This file is part of the libvisio project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef __VSDMETADATA_H__
+#define __VSDMETADATA_H__
+
+#include <vector>
+#include <utility>
+#include <map>
+#include <librevenge-stream/librevenge-stream.h>
+#include <librevenge/librevenge.h>
+#include "libvisio_utils.h"
+
+namespace libvisio
+{
+
+class VSDMetaData
+{
+public:
+  VSDMetaData();
+  ~VSDMetaData();
+  bool parse(librevenge::RVNGInputStream *input);
+  const librevenge::RVNGPropertyList &getMetaData();
+
+private:
+  VSDMetaData(const VSDMetaData &);
+  VSDMetaData &operator=(const VSDMetaData &);
+
+  void readPropertySetStream(librevenge::RVNGInputStream *input);
+  void readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset);
+  void readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input);
+  void readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset);
+  librevenge::RVNGString readCodePageString(librevenge::RVNGInputStream *input);
+
+  uint32_t getCodePage();
+
+  std::vector< std::pair<uint32_t, uint32_t> > m_idsAndOffsets;
+  std::map<uint16_t, uint16_t> m_typedPropertyValues;
+  librevenge::RVNGPropertyList m_metaData;
+};
+
+} // namespace libvisio
+
+#endif // __VSDMETADATA_H__
+/* vim:set shiftwidth=2 softtabstop=2 expandtab: */
diff --git a/src/lib/VSDParser.cpp b/src/lib/VSDParser.cpp
index c5c7541..b69e5fd 100644
--- a/src/lib/VSDParser.cpp
+++ b/src/lib/VSDParser.cpp
@@ -19,9 +19,10 @@
 #include "VSDDocumentStructure.h"
 #include "VSDContentCollector.h"
 #include "VSDStylesCollector.h"
+#include "VSDMetaData.h"
 
-libvisio::VSDParser::VSDParser(librevenge::RVNGInputStream *input, librevenge::RVNGDrawingInterface *painter)
-  : m_input(input), m_painter(painter), m_header(), m_collector(0), m_shapeList(), m_currentLevel(0),
+libvisio::VSDParser::VSDParser(librevenge::RVNGInputStream *input, librevenge::RVNGDrawingInterface *painter, librevenge::RVNGInputStream *container)
+  : m_input(input), m_painter(painter), m_container(container), m_header(), m_collector(0), m_shapeList(), m_currentLevel(0),
     m_stencils(), m_currentStencil(0), m_shape(), m_isStencilStarted(false), m_isInStyles(false),
     m_currentShapeLevel(0), m_currentShapeID(MINUS_ONE), m_extractStencils(false), m_colours(),
     m_isBackgroundPage(false), m_isShapeStarted(false), m_shadowOffsetX(0.0), m_shadowOffsetY(0.0),
@@ -139,6 +140,9 @@ bool libvisio::VSDParser::parseMain()
 
   VSDContentCollector contentCollector(m_painter, groupXFormsSequence, groupMembershipsSequence, documentPageShapeOrders, styles, m_stencils);
   m_collector = &contentCollector;
+  if (m_container)
+    parseMetaData();
+
   VSD_DEBUG_MSG(("VSDParser::parseMain 2nd pass\n"));
   if (!parseDocument(&trailerStream, shift))
     return false;
@@ -146,6 +150,25 @@ bool libvisio::VSDParser::parseMain()
   return true;
 }
 
+bool libvisio::VSDParser::parseMetaData()
+{
+  if (!m_container)
+    return false;
+  m_container->seek(0, librevenge::RVNG_SEEK_SET);
+  if (!m_container->isStructured())
+    return false;
+  librevenge::RVNGInputStream *stream = m_container->getSubStreamByName("\x05SummaryInformation");
+  if (!stream)
+    return false;
+
+  VSDMetaData metaData;
+  metaData.parse(stream);
+  m_collector->collectMetaData(metaData.getMetaData());
+
+  delete stream;
+  return true;
+}
+
 bool libvisio::VSDParser::parseDocument(librevenge::RVNGInputStream *input, unsigned shift)
 {
   try
diff --git a/src/lib/VSDParser.h b/src/lib/VSDParser.h
index 8df2cac..e8da164 100644
--- a/src/lib/VSDParser.h
+++ b/src/lib/VSDParser.h
@@ -46,7 +46,7 @@ struct Pointer
 class VSDParser
 {
 public:
-  explicit VSDParser(librevenge::RVNGInputStream *input, librevenge::RVNGDrawingInterface *painter);
+  explicit VSDParser(librevenge::RVNGInputStream *input, librevenge::RVNGDrawingInterface *painter, librevenge::RVNGInputStream *container = 0);
   virtual ~VSDParser();
   bool parseMain();
   bool extractStencils();
@@ -115,6 +115,8 @@ protected:
   // parser of one pass
   bool parseDocument(librevenge::RVNGInputStream *input, unsigned shift);
 
+  bool parseMetaData();
+
   // Stream handlers
   void handleStreams(librevenge::RVNGInputStream *input, unsigned ptrType, unsigned shift, unsigned level);
   void handleStream(const Pointer &ptr, unsigned idx, unsigned level);
@@ -135,6 +137,7 @@ protected:
 
   librevenge::RVNGInputStream *m_input;
   librevenge::RVNGDrawingInterface *m_painter;
+  librevenge::RVNGInputStream *m_container;
   ChunkHeader m_header;
   VSDCollector *m_collector;
   VSDShapeList m_shapeList;
diff --git a/src/lib/VisioDocument.cpp b/src/lib/VisioDocument.cpp
index 951fb28..5b27404 100644
--- a/src/lib/VisioDocument.cpp
+++ b/src/lib/VisioDocument.cpp
@@ -158,7 +158,7 @@ static bool parseBinaryVisioDocument(librevenge::RVNGInputStream *input, libreve
       parser = new libvisio::VSD6Parser(docStream, painter);
       break;
     case 11:
-      parser = new libvisio::VSDParser(docStream, painter);
+      parser = new libvisio::VSDParser(docStream, painter, input);
       break;
     default:
       break;
commit 07566047a76defe96447c7fd0757c6dda0ba5b2a
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Mon Dec 1 12:39:13 2014 +0100

    Move _appendUCS4() to libvisio_utils
    
    Change-Id: Ie924018aa6fa0bc758ee034f8522f32a87fff391

diff --git a/src/lib/VSDContentCollector.cpp b/src/lib/VSDContentCollector.cpp
index e617244..d7365b0 100644
--- a/src/lib/VSDContentCollector.cpp
+++ b/src/lib/VSDContentCollector.cpp
@@ -11,7 +11,6 @@
 #include <stack>
 #include <boost/spirit/include/classic.hpp>
 #include <unicode/ucnv.h>
-#include <unicode/utypes.h>
 #include <unicode/utf8.h>
 
 #include "VSDContentCollector.h"
@@ -33,27 +32,6 @@ static unsigned bitmapId = 0;
 
 #define SURROGATE_VALUE(h,l) (((h) - 0xd800) * 0x400 + (l) - 0xdc00 + 0x10000)
 
-namespace
-{
-
-static void _appendUCS4(librevenge::RVNGString &text, UChar32 ucs4Character)
-{
-  // Convert carriage returns to new line characters
-  // Writerperfect/LibreOffice will replace them by <text:line-break>
-  if (ucs4Character == (UChar32) 0x0d || ucs4Character == (UChar32) 0x0e)
-    ucs4Character = (UChar32) '\n';
-
-  unsigned char outbuf[U8_MAX_LENGTH+1];
-  int i = 0;
-  U8_APPEND_UNSAFE(&outbuf[0], i, ucs4Character);
-  outbuf[i] = 0;
-
-  text.append((char *)outbuf);
-}
-
-} // anonymous namespace
-
-
 libvisio::VSDContentCollector::VSDContentCollector(
   librevenge::RVNGDrawingInterface *painter,
   std::vector<std::map<unsigned, XForm> > &groupXFormsSequence,
@@ -2876,7 +2854,7 @@ void libvisio::VSDContentCollector::appendCharacters(librevenge::RVNGString &tex
         ucs4Character = 0x20;
       else
         ucs4Character = symbolmap[*iter - 0x20];
-      _appendUCS4(text, ucs4Character);
+      appendUCS4(text, ucs4Character);
     }
   }
   else
@@ -2940,7 +2918,7 @@ void libvisio::VSDContentCollector::appendCharacters(librevenge::RVNGString &tex
           if (0x1e == ucs4Character)
             _appendField(text);
           else
-            _appendUCS4(text, ucs4Character);
+            appendUCS4(text, ucs4Character);
         }
       }
     }
@@ -2966,7 +2944,7 @@ void libvisio::VSDContentCollector::appendCharacters(librevenge::RVNGString &tex
         if (0xfffc == ucs4Character)
           _appendField(text);
         else
-          _appendUCS4(text, ucs4Character);
+          appendUCS4(text, ucs4Character);
       }
     }
   }
diff --git a/src/lib/libvisio_utils.cpp b/src/lib/libvisio_utils.cpp
index b137e24..e622417 100644
--- a/src/lib/libvisio_utils.cpp
+++ b/src/lib/libvisio_utils.cpp
@@ -107,6 +107,20 @@ const librevenge::RVNGString libvisio::getColourString(const Colour &c)
   return sColour;
 }
 
+void libvisio::appendUCS4(librevenge::RVNGString &text, UChar32 ucs4Character)
+{
+  // Convert carriage returns to new line characters
+  // Writerperfect/LibreOffice will replace them by <text:line-break>
+  if (ucs4Character == (UChar32) 0x0d || ucs4Character == (UChar32) 0x0e)
+    ucs4Character = (UChar32) '\n';
+
+  unsigned char outbuf[U8_MAX_LENGTH+1];
+  int i = 0;
+  U8_APPEND_UNSAFE(&outbuf[0], i, ucs4Character);
+  outbuf[i] = 0;
+
+  text.append((char *)outbuf);
+}
 
 
 /* vim:set shiftwidth=2 softtabstop=2 expandtab: */
diff --git a/src/lib/libvisio_utils.h b/src/lib/libvisio_utils.h
index 08ebb04..60be13f 100644
--- a/src/lib/libvisio_utils.h
+++ b/src/lib/libvisio_utils.h
@@ -47,6 +47,7 @@ typedef unsigned __int64 uint64_t;
 
 #include <librevenge/librevenge.h>
 #include <librevenge-stream/librevenge-stream.h>
+#include <unicode/utypes.h>
 
 // debug message includes source file and line number
 //#define VERBOSE_DEBUG 1
@@ -79,6 +80,8 @@ double readDouble(librevenge::RVNGInputStream *input);
 
 const librevenge::RVNGString getColourString(const Colour &c);
 
+void appendUCS4(librevenge::RVNGString &text, UChar32 ucs4Character);
+
 class EndOfStreamException
 {
 };


More information about the Libreoffice-commits mailing list