[Libreoffice-commits] libvisio.git: 2 commits - src/lib
Miklos Vajna
vmiklos at collabora.co.uk
Mon Dec 1 06:03:28 PST 2014
src/lib/Makefile.am | 2
src/lib/VSDContentCollector.cpp | 28 ------
src/lib/VSDMetaData.cpp | 180 ++++++++++++++++++++++++++++++++++++++++
src/lib/VSDMetaData.h | 51 +++++++++++
src/lib/VSDParser.cpp | 27 +++++-
src/lib/VSDParser.h | 5 -
src/lib/VisioDocument.cpp | 2
src/lib/libvisio_utils.cpp | 14 +++
src/lib/libvisio_utils.h | 3
9 files changed, 283 insertions(+), 29 deletions(-)
New commits:
commit ccfd70d06cdd47965bccf24b6dfa6ca3d472517e
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Mon Dec 1 15:02:16 2014 +0100
fdo#86729 VSD: import metadata
Only title as a start.
Change-Id: If876d0985bb68d87600d805368aff404dfd6fba4
diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
index e2daff8..f07c5ac 100644
--- a/src/lib/Makefile.am
+++ b/src/lib/Makefile.am
@@ -39,6 +39,7 @@ libvisio_ at VSD_MAJOR_VERSION@_ at VSD_MINOR_VERSION@_la_SOURCES = \
VSDPages.cpp \
VSDParagraphList.cpp \
VSDParser.cpp \
+ VSDMetaData.cpp \
VSDShapeList.cpp \
VSDStencils.cpp \
VSDStyles.cpp \
@@ -58,6 +59,7 @@ libvisio_ at VSD_MAJOR_VERSION@_ at VSD_MINOR_VERSION@_la_SOURCES = \
VSDPages.h \
VSDParagraphList.h \
VSDParser.h \
+ VSDMetaData.h \
VSDShapeList.h \
VSDStencils.h \
VSDStyles.h \
diff --git a/src/lib/VSDMetaData.cpp b/src/lib/VSDMetaData.cpp
new file mode 100644
index 0000000..209cc34
--- /dev/null
+++ b/src/lib/VSDMetaData.cpp
@@ -0,0 +1,180 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * This file is part of the libvisio project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "VSDMetaData.h"
+#include <unicode/ucnv.h>
+
+libvisio::VSDMetaData::VSDMetaData()
+{
+}
+
+libvisio::VSDMetaData::~VSDMetaData()
+{
+}
+
+bool libvisio::VSDMetaData::parse(librevenge::RVNGInputStream *input)
+{
+ if (!input)
+ return false;
+
+ readPropertySetStream(input);
+
+ return true;
+}
+
+void libvisio::VSDMetaData::readPropertySetStream(librevenge::RVNGInputStream *input)
+{
+ // ByteOrder
+ input->seek(2, librevenge::RVNG_SEEK_CUR);
+ // Version
+ input->seek(2, librevenge::RVNG_SEEK_CUR);
+ // SystemIdentifier
+ input->seek(4, librevenge::RVNG_SEEK_CUR);
+ // CLSID
+ input->seek(16, librevenge::RVNG_SEEK_CUR);
+ // NumPropertySets
+ input->seek(4, librevenge::RVNG_SEEK_CUR);
+ // FMTID0
+ input->seek(16, librevenge::RVNG_SEEK_CUR);
+ uint32_t offset0 = readU32(input);
+ readPropertySet(input, offset0);
+}
+
+void libvisio::VSDMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset)
+{
+ input->seek(offset, librevenge::RVNG_SEEK_SET);
+
+ // Size
+ input->seek(4, librevenge::RVNG_SEEK_CUR);
+ uint32_t numProperties = readU32(input);
+ for (uint32_t i = 0; i < numProperties; ++i)
+ readPropertyIdentifierAndOffset(input);
+ for (uint32_t i = 0; i < numProperties; ++i)
+ {
+ if (i >= m_idsAndOffsets.size())
+ break;
+ readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second);
+ }
+}
+
+#define CODEPAGE_PROPERTY_IDENTIFIER 0x00000001
+
+uint32_t libvisio::VSDMetaData::getCodePage()
+{
+ for (size_t i = 0; i < m_idsAndOffsets.size(); ++i)
+ {
+ if (m_idsAndOffsets[i].first == CODEPAGE_PROPERTY_IDENTIFIER)
+ {
+ if (i >= m_typedPropertyValues.size())
+ break;
+ return m_typedPropertyValues[i];
+ }
+ }
+
+ return 0;
+}
+
+void libvisio::VSDMetaData::readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input)
+{
+ uint32_t propertyIdentifier = readU32(input);
+ uint32_t offset = readU32(input);
+ m_idsAndOffsets.push_back(std::make_pair(propertyIdentifier, offset));
+}
+
+#define VT_I2 0x0002
+#define VT_LPSTR 0x001E
+
+#define PIDSI_TITLE 0x00000002
+
+void libvisio::VSDMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset)
+{
+ input->seek(offset, librevenge::RVNG_SEEK_SET);
+ uint16_t type = readU16(input);
+ // Padding
+ input->seek(2, librevenge::RVNG_SEEK_CUR);
+
+ if (type == VT_I2)
+ {
+ uint16_t value = readU16(input);
+ m_typedPropertyValues[index] = value;
+ }
+ else if (type == VT_LPSTR)
+ {
+ librevenge::RVNGString string = readCodePageString(input);
+ if (!string.empty())
+ {
+ if (index >= m_idsAndOffsets.size())
+ return;
+
+ switch (m_idsAndOffsets[index].first)
+ {
+ case PIDSI_TITLE:
+ m_metaData.insert("dc:title", string);
+ break;
+ }
+ }
+ }
+}
+
+librevenge::RVNGString libvisio::VSDMetaData::readCodePageString(librevenge::RVNGInputStream *input)
+{
+ uint32_t size = readU32(input);
+
+ std::vector<unsigned char> characters;
+ for (uint32_t i = 0; i < size; ++i)
+ characters.push_back(readU8(input));
+
+ uint32_t codepage = getCodePage();
+ librevenge::RVNGString string;
+
+ if (codepage == 65001)
+ {
+ // http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx
+ // says this is UTF-8.
+ for (std::vector<unsigned char>::const_iterator i = characters.begin(); i != characters.end(); ++i)
+ string.append((const char)*i);
+ }
+ else
+ {
+ UErrorCode status = U_ZERO_ERROR;
+ UConverter *conv = 0;
+
+ switch (codepage)
+ {
+ case 1252:
+ // http://msdn.microsoft.com/en-us/goglobal/bb964654
+ conv = ucnv_open("windows-1252", &status);
+ break;
+ }
+
+ if (U_SUCCESS(status) && conv)
+ {
+ const char *src = (const char *)&characters[0];
+ const char *srcLimit = (const char *)src + characters.size();
+ while (src < srcLimit)
+ {
+ UChar32 ucs4Character = ucnv_getNextUChar(conv, &src, srcLimit, &status);
+ if (U_SUCCESS(status) && U_IS_UNICODE_CHAR(ucs4Character))
+ appendUCS4(string, ucs4Character);
+ }
+ }
+
+ if (conv)
+ ucnv_close(conv);
+ }
+
+ return string;
+}
+
+const librevenge::RVNGPropertyList &libvisio::VSDMetaData::getMetaData()
+{
+ return m_metaData;
+}
+
+/* vim:set shiftwidth=2 softtabstop=2 expandtab: */
diff --git a/src/lib/VSDMetaData.h b/src/lib/VSDMetaData.h
new file mode 100644
index 0000000..c185894
--- /dev/null
+++ b/src/lib/VSDMetaData.h
@@ -0,0 +1,51 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * This file is part of the libvisio project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef __VSDMETADATA_H__
+#define __VSDMETADATA_H__
+
+#include <vector>
+#include <utility>
+#include <map>
+#include <librevenge-stream/librevenge-stream.h>
+#include <librevenge/librevenge.h>
+#include "libvisio_utils.h"
+
+namespace libvisio
+{
+
+class VSDMetaData
+{
+public:
+ VSDMetaData();
+ ~VSDMetaData();
+ bool parse(librevenge::RVNGInputStream *input);
+ const librevenge::RVNGPropertyList &getMetaData();
+
+private:
+ VSDMetaData(const VSDMetaData &);
+ VSDMetaData &operator=(const VSDMetaData &);
+
+ void readPropertySetStream(librevenge::RVNGInputStream *input);
+ void readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset);
+ void readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input);
+ void readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset);
+ librevenge::RVNGString readCodePageString(librevenge::RVNGInputStream *input);
+
+ uint32_t getCodePage();
+
+ std::vector< std::pair<uint32_t, uint32_t> > m_idsAndOffsets;
+ std::map<uint16_t, uint16_t> m_typedPropertyValues;
+ librevenge::RVNGPropertyList m_metaData;
+};
+
+} // namespace libvisio
+
+#endif // __VSDMETADATA_H__
+/* vim:set shiftwidth=2 softtabstop=2 expandtab: */
diff --git a/src/lib/VSDParser.cpp b/src/lib/VSDParser.cpp
index c5c7541..b69e5fd 100644
--- a/src/lib/VSDParser.cpp
+++ b/src/lib/VSDParser.cpp
@@ -19,9 +19,10 @@
#include "VSDDocumentStructure.h"
#include "VSDContentCollector.h"
#include "VSDStylesCollector.h"
+#include "VSDMetaData.h"
-libvisio::VSDParser::VSDParser(librevenge::RVNGInputStream *input, librevenge::RVNGDrawingInterface *painter)
- : m_input(input), m_painter(painter), m_header(), m_collector(0), m_shapeList(), m_currentLevel(0),
+libvisio::VSDParser::VSDParser(librevenge::RVNGInputStream *input, librevenge::RVNGDrawingInterface *painter, librevenge::RVNGInputStream *container)
+ : m_input(input), m_painter(painter), m_container(container), m_header(), m_collector(0), m_shapeList(), m_currentLevel(0),
m_stencils(), m_currentStencil(0), m_shape(), m_isStencilStarted(false), m_isInStyles(false),
m_currentShapeLevel(0), m_currentShapeID(MINUS_ONE), m_extractStencils(false), m_colours(),
m_isBackgroundPage(false), m_isShapeStarted(false), m_shadowOffsetX(0.0), m_shadowOffsetY(0.0),
@@ -139,6 +140,9 @@ bool libvisio::VSDParser::parseMain()
VSDContentCollector contentCollector(m_painter, groupXFormsSequence, groupMembershipsSequence, documentPageShapeOrders, styles, m_stencils);
m_collector = &contentCollector;
+ if (m_container)
+ parseMetaData();
+
VSD_DEBUG_MSG(("VSDParser::parseMain 2nd pass\n"));
if (!parseDocument(&trailerStream, shift))
return false;
@@ -146,6 +150,25 @@ bool libvisio::VSDParser::parseMain()
return true;
}
+bool libvisio::VSDParser::parseMetaData()
+{
+ if (!m_container)
+ return false;
+ m_container->seek(0, librevenge::RVNG_SEEK_SET);
+ if (!m_container->isStructured())
+ return false;
+ librevenge::RVNGInputStream *stream = m_container->getSubStreamByName("\x05SummaryInformation");
+ if (!stream)
+ return false;
+
+ VSDMetaData metaData;
+ metaData.parse(stream);
+ m_collector->collectMetaData(metaData.getMetaData());
+
+ delete stream;
+ return true;
+}
+
bool libvisio::VSDParser::parseDocument(librevenge::RVNGInputStream *input, unsigned shift)
{
try
diff --git a/src/lib/VSDParser.h b/src/lib/VSDParser.h
index 8df2cac..e8da164 100644
--- a/src/lib/VSDParser.h
+++ b/src/lib/VSDParser.h
@@ -46,7 +46,7 @@ struct Pointer
class VSDParser
{
public:
- explicit VSDParser(librevenge::RVNGInputStream *input, librevenge::RVNGDrawingInterface *painter);
+ explicit VSDParser(librevenge::RVNGInputStream *input, librevenge::RVNGDrawingInterface *painter, librevenge::RVNGInputStream *container = 0);
virtual ~VSDParser();
bool parseMain();
bool extractStencils();
@@ -115,6 +115,8 @@ protected:
// parser of one pass
bool parseDocument(librevenge::RVNGInputStream *input, unsigned shift);
+ bool parseMetaData();
+
// Stream handlers
void handleStreams(librevenge::RVNGInputStream *input, unsigned ptrType, unsigned shift, unsigned level);
void handleStream(const Pointer &ptr, unsigned idx, unsigned level);
@@ -135,6 +137,7 @@ protected:
librevenge::RVNGInputStream *m_input;
librevenge::RVNGDrawingInterface *m_painter;
+ librevenge::RVNGInputStream *m_container;
ChunkHeader m_header;
VSDCollector *m_collector;
VSDShapeList m_shapeList;
diff --git a/src/lib/VisioDocument.cpp b/src/lib/VisioDocument.cpp
index 951fb28..5b27404 100644
--- a/src/lib/VisioDocument.cpp
+++ b/src/lib/VisioDocument.cpp
@@ -158,7 +158,7 @@ static bool parseBinaryVisioDocument(librevenge::RVNGInputStream *input, libreve
parser = new libvisio::VSD6Parser(docStream, painter);
break;
case 11:
- parser = new libvisio::VSDParser(docStream, painter);
+ parser = new libvisio::VSDParser(docStream, painter, input);
break;
default:
break;
commit 07566047a76defe96447c7fd0757c6dda0ba5b2a
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Mon Dec 1 12:39:13 2014 +0100
Move _appendUCS4() to libvisio_utils
Change-Id: Ie924018aa6fa0bc758ee034f8522f32a87fff391
diff --git a/src/lib/VSDContentCollector.cpp b/src/lib/VSDContentCollector.cpp
index e617244..d7365b0 100644
--- a/src/lib/VSDContentCollector.cpp
+++ b/src/lib/VSDContentCollector.cpp
@@ -11,7 +11,6 @@
#include <stack>
#include <boost/spirit/include/classic.hpp>
#include <unicode/ucnv.h>
-#include <unicode/utypes.h>
#include <unicode/utf8.h>
#include "VSDContentCollector.h"
@@ -33,27 +32,6 @@ static unsigned bitmapId = 0;
#define SURROGATE_VALUE(h,l) (((h) - 0xd800) * 0x400 + (l) - 0xdc00 + 0x10000)
-namespace
-{
-
-static void _appendUCS4(librevenge::RVNGString &text, UChar32 ucs4Character)
-{
- // Convert carriage returns to new line characters
- // Writerperfect/LibreOffice will replace them by <text:line-break>
- if (ucs4Character == (UChar32) 0x0d || ucs4Character == (UChar32) 0x0e)
- ucs4Character = (UChar32) '\n';
-
- unsigned char outbuf[U8_MAX_LENGTH+1];
- int i = 0;
- U8_APPEND_UNSAFE(&outbuf[0], i, ucs4Character);
- outbuf[i] = 0;
-
- text.append((char *)outbuf);
-}
-
-} // anonymous namespace
-
-
libvisio::VSDContentCollector::VSDContentCollector(
librevenge::RVNGDrawingInterface *painter,
std::vector<std::map<unsigned, XForm> > &groupXFormsSequence,
@@ -2876,7 +2854,7 @@ void libvisio::VSDContentCollector::appendCharacters(librevenge::RVNGString &tex
ucs4Character = 0x20;
else
ucs4Character = symbolmap[*iter - 0x20];
- _appendUCS4(text, ucs4Character);
+ appendUCS4(text, ucs4Character);
}
}
else
@@ -2940,7 +2918,7 @@ void libvisio::VSDContentCollector::appendCharacters(librevenge::RVNGString &tex
if (0x1e == ucs4Character)
_appendField(text);
else
- _appendUCS4(text, ucs4Character);
+ appendUCS4(text, ucs4Character);
}
}
}
@@ -2966,7 +2944,7 @@ void libvisio::VSDContentCollector::appendCharacters(librevenge::RVNGString &tex
if (0xfffc == ucs4Character)
_appendField(text);
else
- _appendUCS4(text, ucs4Character);
+ appendUCS4(text, ucs4Character);
}
}
}
diff --git a/src/lib/libvisio_utils.cpp b/src/lib/libvisio_utils.cpp
index b137e24..e622417 100644
--- a/src/lib/libvisio_utils.cpp
+++ b/src/lib/libvisio_utils.cpp
@@ -107,6 +107,20 @@ const librevenge::RVNGString libvisio::getColourString(const Colour &c)
return sColour;
}
+void libvisio::appendUCS4(librevenge::RVNGString &text, UChar32 ucs4Character)
+{
+ // Convert carriage returns to new line characters
+ // Writerperfect/LibreOffice will replace them by <text:line-break>
+ if (ucs4Character == (UChar32) 0x0d || ucs4Character == (UChar32) 0x0e)
+ ucs4Character = (UChar32) '\n';
+
+ unsigned char outbuf[U8_MAX_LENGTH+1];
+ int i = 0;
+ U8_APPEND_UNSAFE(&outbuf[0], i, ucs4Character);
+ outbuf[i] = 0;
+
+ text.append((char *)outbuf);
+}
/* vim:set shiftwidth=2 softtabstop=2 expandtab: */
diff --git a/src/lib/libvisio_utils.h b/src/lib/libvisio_utils.h
index 08ebb04..60be13f 100644
--- a/src/lib/libvisio_utils.h
+++ b/src/lib/libvisio_utils.h
@@ -47,6 +47,7 @@ typedef unsigned __int64 uint64_t;
#include <librevenge/librevenge.h>
#include <librevenge-stream/librevenge-stream.h>
+#include <unicode/utypes.h>
// debug message includes source file and line number
//#define VERBOSE_DEBUG 1
@@ -79,6 +80,8 @@ double readDouble(librevenge::RVNGInputStream *input);
const librevenge::RVNGString getColourString(const Colour &c);
+void appendUCS4(librevenge::RVNGString &text, UChar32 ucs4Character);
+
class EndOfStreamException
{
};
More information about the Libreoffice-commits
mailing list