[Libreoffice-commits] mso-dumper.git: 7 commits - msodumper/vsdstream.py
Miklos Vajna
vmiklos at collabora.co.uk
Wed Nov 26 04:11:35 PST 2014
msodumper/vsdstream.py | 202 +++++++++++++++++++++++++++++++++++++++----------
1 file changed, 164 insertions(+), 38 deletions(-)
New commits:
commit 18e52f3e1c61d1d644b427cd354d694f6a457f34
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Wed Nov 26 12:46:20 2014 +0100
VSD: start dumping the user-defined set
diff --git a/msodumper/vsdstream.py b/msodumper/vsdstream.py
index 8dcd1bf..d2403a3 100644
--- a/msodumper/vsdstream.py
+++ b/msodumper/vsdstream.py
@@ -209,19 +209,22 @@ class PropertySetStream(DOCDirStream):
self.printAndSet("CLSID2", self.readuInt32())
self.printAndSet("CLSID3", self.readuInt32())
self.printAndSet("NumPropertySets", self.readuInt32())
- GUID(self, "FMTID").dump()
+ GUID(self, "FMTID0").dump()
self.printAndSet("Offset0", self.readuInt32())
+ PropertySet(self, self.Offset0).dump()
if self.NumPropertySets == 0x00000002:
- print '<todo what="PropertySetStream::dump: handle NumPropertySets == 0x00000002"/>'
- PropertySet(self).dump()
+ GUID(self, "FMTID1").dump()
+ self.printAndSet("Offset1", self.readuInt32())
+ self.propertyIds = {}
+ PropertySet(self, self.Offset1).dump()
print '</propertySetStream>'
class PropertySet(DOCDirStream):
- def __init__(self, parent):
+ def __init__(self, parent, offset):
DOCDirStream.__init__(self, parent.bytes)
self.parent = parent
- self.pos = parent.Offset0
+ self.pos = offset
def getCodePage(self):
for index, idAndOffset in enumerate(self.idsAndOffsets):
commit c2cd85eddf02c74e0f26955e6e847ebc61d79593
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Wed Nov 26 12:36:52 2014 +0100
VSD: dump PIDDSI
diff --git a/msodumper/vsdstream.py b/msodumper/vsdstream.py
index 380c7ef..8dcd1bf 100644
--- a/msodumper/vsdstream.py
+++ b/msodumper/vsdstream.py
@@ -119,7 +119,34 @@ def createVSDFile(chars, params):
return VSDFile(chars, params)
-PropertyIdentifierDocumentSummaryInformation = {
+PIDDSI = {
+ 0x00000001: "PIDDSI_CODEPAGE",
+ 0x00000002: "PIDDSI_CATEGORY",
+ 0x00000003: "PIDDSI_PRESFORMAT",
+ 0x00000004: "PIDDSI_BYTECOUNT",
+ 0x00000005: "PIDDSI_LINECOUNT",
+ 0x00000006: "PIDDSI_PARACOUNT",
+ 0x00000007: "PIDDSI_SLIDECOUNT",
+ 0x00000008: "PIDDSI_NOTECOUNT",
+ 0x00000009: "PIDDSI_HIDDENCOUNT",
+ 0x0000000A: "PIDDSI_MMCLIPCOUNT",
+ 0x0000000B: "PIDDSI_SCALE",
+ 0x0000000C: "PIDDSI_HEADINGPAIR",
+ 0x0000000D: "PIDDSI_DOCPARTS",
+ 0x0000000E: "PIDDSI_MANAGER",
+ 0x0000000F: "PIDDSI_COMPANY",
+ 0x00000010: "PIDDSI_LINKSDIRTY",
+ 0x00000011: "PIDDSI_CCHWITHSPACES",
+ 0x00000013: "PIDDSI_SHAREDDOC",
+ 0x00000014: "PIDDSI_LINKBASE",
+ 0x00000015: "PIDDSI_HLINKS",
+ 0x00000016: "PIDDSI_HYPERLINKSCHANGED",
+ 0x00000017: "PIDDSI_VERSION",
+ 0x00000018: "PIDDSI_DIGSIG",
+ 0x0000001A: "PIDDSI_CONTENTTYPE",
+ 0x0000001B: "PIDDSI_CONTENTSTATUS",
+ 0x0000001C: "PIDDSI_LANGUAGE",
+ 0x0000001D: "PIDDSI_DOCVERSION",
}
@@ -129,11 +156,11 @@ class DocumentSummaryInformationStream(DOCDirStream):
def dump(self):
print '<stream name="\\x05DocumentSummaryInformation" size="%d">' % self.size
- PropertySetStream(self, PropertyIdentifierDocumentSummaryInformation).dump()
+ PropertySetStream(self, PIDDSI).dump()
print '</stream>'
-PropertyIdentifierSummaryInformation = {
+PIDSI = {
0x00000001: "CODEPAGE_PROPERTY_IDENTIFIER",
0x00000002: "PIDSI_TITLE",
0x00000003: "PIDSI_SUBJECT",
@@ -162,7 +189,7 @@ class SummaryInformationStream(DOCDirStream):
def dump(self):
print '<stream name="\\x05SummaryInformation" size="%d">' % self.size
- PropertySetStream(self, PropertyIdentifierSummaryInformation).dump()
+ PropertySetStream(self, PIDSI).dump()
print '</stream>'
commit 7432d469dd0333bdb7156484c6fb32fb03e1d173
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Wed Nov 26 11:17:04 2014 +0100
VSD: dump GUIDs properly
diff --git a/msodumper/vsdstream.py b/msodumper/vsdstream.py
index 481f086..380c7ef 100644
--- a/msodumper/vsdstream.py
+++ b/msodumper/vsdstream.py
@@ -182,10 +182,7 @@ class PropertySetStream(DOCDirStream):
self.printAndSet("CLSID2", self.readuInt32())
self.printAndSet("CLSID3", self.readuInt32())
self.printAndSet("NumPropertySets", self.readuInt32())
- self.printAndSet("FMTID00", self.readuInt32())
- self.printAndSet("FMTID01", self.readuInt32())
- self.printAndSet("FMTID02", self.readuInt32())
- self.printAndSet("FMTID03", self.readuInt32())
+ GUID(self, "FMTID").dump()
self.printAndSet("Offset0", self.readuInt32())
if self.NumPropertySets == 0x00000002:
print '<todo what="PropertySetStream::dump: handle NumPropertySets == 0x00000002"/>'
@@ -363,4 +360,23 @@ class CodePageString(DOCDirStream):
print '<todo what="CodePageString::dump: unhandled codepage %s"/>' % codepage
print '</%s>' % self.name
+
+class GUID(DOCDirStream):
+ def __init__(self, parent, name):
+ DOCDirStream.__init__(self, parent.bytes)
+ self.pos = parent.pos
+ self.parent = parent
+ self.name = name
+
+ def dump(self):
+ Data1 = self.readuInt32()
+ Data2 = self.readuInt16()
+ Data3 = self.readuInt16()
+ Data4 = []
+ for i in range(8):
+ Data4.append(self.readuInt8())
+ value = "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x" % (Data1, Data2, Data3, Data4[0], Data4[1], Data4[2], Data4[3], Data4[4], Data4[5], Data4[6], Data4[7])
+ print '<%s type="GUID" value="%s"/>' % (self.name, value)
+ self.parent.pos = self.pos
+
# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
commit c4c94fa5d494c105f17d41e3d5e1e3973cf44e8c
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Wed Nov 26 10:37:52 2014 +0100
vsd: complete PropertyType enumeration
diff --git a/msodumper/vsdstream.py b/msodumper/vsdstream.py
index 5132c87..481f086 100644
--- a/msodumper/vsdstream.py
+++ b/msodumper/vsdstream.py
@@ -119,12 +119,17 @@ def createVSDFile(chars, params):
return VSDFile(chars, params)
+PropertyIdentifierDocumentSummaryInformation = {
+}
+
+
class DocumentSummaryInformationStream(DOCDirStream):
def __init__(self, bytes, params, doc):
DOCDirStream.__init__(self, bytes, params, "\x05DocumentSummaryInformation", doc=doc)
def dump(self):
print '<stream name="\\x05DocumentSummaryInformation" size="%d">' % self.size
+ PropertySetStream(self, PropertyIdentifierDocumentSummaryInformation).dump()
print '</stream>'
@@ -264,6 +269,45 @@ PropertyType = {
0x0047: "VT_CF",
0x0048: "VT_CLSID",
0x0049: "VT_VERSIONED_STREAM",
+ 0x1002: "VT_VECTOR | VT_I2",
+ 0x1003: "VT_VECTOR | VT_I4",
+ 0x1004: "VT_VECTOR | VT_R4",
+ 0x1005: "VT_VECTOR | VT_R8",
+ 0x1006: "VT_VECTOR | VT_CY",
+ 0x1007: "VT_VECTOR | VT_DATE",
+ 0x1008: "VT_VECTOR | VT_BSTR",
+ 0x100A: "VT_VECTOR | VT_ERROR",
+ 0x100B: "VT_VECTOR | VT_BOOL",
+ 0x100C: "VT_VECTOR | VT_VARIANT",
+ 0x1010: "VT_VECTOR | VT_I1",
+ 0x1011: "VT_VECTOR | VT_UI1",
+ 0x1012: "VT_VECTOR | VT_UI2",
+ 0x1013: "VT_VECTOR | VT_UI4",
+ 0x1014: "VT_VECTOR | VT_I8",
+ 0x1015: "VT_VECTOR | VT_UI8",
+ 0x101E: "VT_VECTOR | VT_LPSTR",
+ 0x101F: "VT_VECTOR | VT_LPWSTR",
+ 0x1040: "VT_VECTOR | VT_FILETIME",
+ 0x1047: "VT_VECTOR | VT_CF",
+ 0x1048: "VT_VECTOR | VT_CLSID",
+ 0x2002: "VT_ARRAY | VT_I2",
+ 0x2003: "VT_ARRAY | VT_I4",
+ 0x2004: "VT_ARRAY | VT_R4",
+ 0x2005: "VT_ARRAY | VT_R8",
+ 0x2006: "VT_ARRAY | VT_CY",
+ 0x2007: "VT_ARRAY | VT_DATE",
+ 0x2008: "VT_ARRAY | VT_BSTR",
+ 0x200A: "VT_ARRAY | VT_ERROR",
+ 0x200B: "VT_ARRAY | VT_BOOL",
+ 0x200C: "VT_ARRAY | VT_VARIANT",
+ 0x200E: "VT_ARRAY | VT_DECIMAL",
+ 0x2010: "VT_ARRAY | VT_I1",
+ 0x2011: "VT_ARRAY | VT_UI1",
+ 0x2012: "VT_ARRAY | VT_UI2",
+ 0x2013: "VT_ARRAY | VT_UI4",
+ 0x2016: "VT_ARRAY | VT_INT",
+ 0x2017: "VT_ARRAY | VT_UINT",
+
}
commit afbd91f0a3d180def770c4a070ddbd57cc6849c8
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Wed Nov 26 10:25:58 2014 +0100
vsd: PIDSI is specific to the SummaryInformation stream
diff --git a/msodumper/vsdstream.py b/msodumper/vsdstream.py
index f91380c..5132c87 100644
--- a/msodumper/vsdstream.py
+++ b/msodumper/vsdstream.py
@@ -46,6 +46,8 @@ class VSDFile:
def getStreamFromBytes(self, name, bytes):
if name == "\x05SummaryInformation":
return SummaryInformationStream(bytes, self.params, doc=self)
+ elif name == "\x05DocumentSummaryInformation":
+ return DocumentSummaryInformationStream(bytes, self.params, doc=self)
else:
return DOCDirStream(bytes, self.params, name, doc=self)
@@ -117,20 +119,53 @@ def createVSDFile(chars, params):
return VSDFile(chars, params)
+class DocumentSummaryInformationStream(DOCDirStream):
+ def __init__(self, bytes, params, doc):
+ DOCDirStream.__init__(self, bytes, params, "\x05DocumentSummaryInformation", doc=doc)
+
+ def dump(self):
+ print '<stream name="\\x05DocumentSummaryInformation" size="%d">' % self.size
+ print '</stream>'
+
+
+PropertyIdentifierSummaryInformation = {
+ 0x00000001: "CODEPAGE_PROPERTY_IDENTIFIER",
+ 0x00000002: "PIDSI_TITLE",
+ 0x00000003: "PIDSI_SUBJECT",
+ 0x00000004: "PIDSI_AUTHOR",
+ 0x00000005: "PIDSI_KEYWORDS",
+ 0x00000006: "PIDSI_COMMENTS",
+ 0x00000007: "PIDSI_TEMPLATE",
+ 0x00000008: "PIDSI_LASTAUTHOR",
+ 0x00000009: "PIDSI_REVNUMBER",
+ 0x0000000A: "PIDSI_EDITTIME",
+ 0x0000000B: "PIDSI_LASTPRINTED",
+ 0x0000000C: "PIDSI_CREATE_DTM",
+ 0x0000000D: "PIDSI_LASTSAVE_DTM",
+ 0x0000000E: "PIDSI_PAGECOUNT",
+ 0x0000000F: "PIDSI_WORDCOUNT",
+ 0x00000010: "PIDSI_CHARCOUNT",
+ 0x00000011: "PIDSI_THUMBNAIL",
+ 0x00000012: "PIDSI_APPNAME",
+ 0x00000013: "PIDSI_DOC_SECURITY",
+}
+
+
class SummaryInformationStream(DOCDirStream):
def __init__(self, bytes, params, doc):
DOCDirStream.__init__(self, bytes, params, "\x05SummaryInformation", doc=doc)
def dump(self):
print '<stream name="\\x05SummaryInformation" size="%d">' % self.size
- PropertySetStream(self).dump()
+ PropertySetStream(self, PropertyIdentifierSummaryInformation).dump()
print '</stream>'
class PropertySetStream(DOCDirStream):
- def __init__(self, parent):
+ def __init__(self, parent, PropertyIds):
DOCDirStream.__init__(self, parent.bytes)
self.parent = parent
+ self.propertyIds = PropertyIds
def dump(self):
print '<propertySetStream type="PropertySetStream" offset="%s">' % self.pos
@@ -181,28 +216,6 @@ class PropertySet(DOCDirStream):
self.typedPropertyValues.append(typedPropertyValue)
print '</propertySet>'
-PropertyIdentifier = {
- 0x00000001: "CODEPAGE_PROPERTY_IDENTIFIER",
- 0x00000002: "PIDSI_TITLE",
- 0x00000003: "PIDSI_SUBJECT",
- 0x00000004: "PIDSI_AUTHOR",
- 0x00000005: "PIDSI_KEYWORDS",
- 0x00000006: "PIDSI_COMMENTS",
- 0x00000007: "PIDSI_TEMPLATE",
- 0x00000008: "PIDSI_LASTAUTHOR",
- 0x00000009: "PIDSI_REVNUMBER",
- 0x0000000A: "PIDSI_EDITTIME",
- 0x0000000B: "PIDSI_LASTPRINTED",
- 0x0000000C: "PIDSI_CREATE_DTM",
- 0x0000000D: "PIDSI_LASTSAVE_DTM",
- 0x0000000E: "PIDSI_PAGECOUNT",
- 0x0000000F: "PIDSI_WORDCOUNT",
- 0x00000010: "PIDSI_CHARCOUNT",
- 0x00000011: "PIDSI_THUMBNAIL",
- 0x00000012: "PIDSI_APPNAME",
- 0x00000013: "PIDSI_DOC_SECURITY",
-}
-
class PropertyIdentifierAndOffset(DOCDirStream):
def __init__(self, parent, index):
@@ -213,7 +226,7 @@ class PropertyIdentifierAndOffset(DOCDirStream):
def dump(self):
print '<propertyIdentifierAndOffset%s type="PropertyIdentifierAndOffset" offset="%s">' % (self.index, self.pos)
- self.printAndSet("PropertyIdentifier", self.readuInt32(), dict=PropertyIdentifier)
+ self.printAndSet("PropertyIdentifier", self.readuInt32(), dict=self.parent.parent.propertyIds)
self.printAndSet("Offset", self.readuInt32())
print '</propertyIdentifierAndOffset%s>' % self.index
self.parent.pos = self.pos
commit 84940f4150fa56256ddff631fe6dc671e10f5b93
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Wed Nov 26 09:59:03 2014 +0100
vsdstream: dump utf8 titles
diff --git a/msodumper/vsdstream.py b/msodumper/vsdstream.py
index 4e99aa5..f91380c 100644
--- a/msodumper/vsdstream.py
+++ b/msodumper/vsdstream.py
@@ -290,12 +290,20 @@ class CodePageString(DOCDirStream):
if c == 0:
break
bytes.append(c)
+ codepage = self.parent.parent.getCodePage()
+ if codepage < 0:
+ codepage += 2 ** 16 # signed -> unsigned
encoding = ""
- if self.parent.parent.getCodePage() == 1252:
+ if codepage == 1252:
# http://msdn.microsoft.com/en-us/goglobal/bb964654
encoding = "latin1"
+ elif codepage == 65001:
+ # http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx
+ encoding = "utf-8"
if len(encoding):
print '<Characters value="%s"/>' % "".join(map(lambda c: chr(c), bytes)).decode(encoding).encode('utf-8')
+ else:
+ print '<todo what="CodePageString::dump: unhandled codepage %s"/>' % codepage
print '</%s>' % self.name
# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
commit ae210089c3a5d6dd0932564c6f600be88739db45
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Wed Nov 26 09:41:38 2014 +0100
vsdstream: dump latin1 titles
diff --git a/msodumper/vsdstream.py b/msodumper/vsdstream.py
index 36c279a..4e99aa5 100644
--- a/msodumper/vsdstream.py
+++ b/msodumper/vsdstream.py
@@ -159,17 +159,26 @@ class PropertySet(DOCDirStream):
self.parent = parent
self.pos = parent.Offset0
+ def getCodePage(self):
+ for index, idAndOffset in enumerate(self.idsAndOffsets):
+ if idAndOffset.PropertyIdentifier == 0x00000001: # CODEPAGE_PROPERTY_IDENTIFIER
+ return self.typedPropertyValues[index].Value
+
def dump(self):
self.posOrig = self.pos
print '<propertySet type="PropertySet" offset="%s">' % self.pos
self.printAndSet("Size", self.readuInt32())
self.printAndSet("NumProperties", self.readuInt32())
- self.idsAndOffsets = {}
+ self.idsAndOffsets = []
for i in range(self.NumProperties):
- self.idsAndOffsets[i] = PropertyIdentifierAndOffset(self, i)
- self.idsAndOffsets[i].dump()
+ idAndOffset = PropertyIdentifierAndOffset(self, i)
+ idAndOffset.dump()
+ self.idsAndOffsets.append(idAndOffset)
+ self.typedPropertyValues = []
for i in range(self.NumProperties):
- TypedPropertyValue(self, i).dump()
+ typedPropertyValue = TypedPropertyValue(self, i)
+ typedPropertyValue.dump()
+ self.typedPropertyValues.append(typedPropertyValue)
print '</propertySet>'
PropertyIdentifier = {
@@ -269,6 +278,7 @@ class CodePageString(DOCDirStream):
def __init__(self, parent, name):
DOCDirStream.__init__(self, parent.bytes)
self.pos = parent.pos
+ self.parent = parent
self.name = name
def dump(self):
@@ -280,7 +290,12 @@ class CodePageString(DOCDirStream):
if c == 0:
break
bytes.append(c)
- print '<Characters value="%s"/>' % "".join(map(lambda c: chr(c), bytes))
+ encoding = ""
+ if self.parent.parent.getCodePage() == 1252:
+ # http://msdn.microsoft.com/en-us/goglobal/bb964654
+ encoding = "latin1"
+ if len(encoding):
+ print '<Characters value="%s"/>' % "".join(map(lambda c: chr(c), bytes)).decode(encoding).encode('utf-8')
print '</%s>' % self.name
# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
More information about the Libreoffice-commits
mailing list