[Libreoffice-commits] mso-dumper.git: 12 commits - compress.py decompress.py man/ppt-dump.py.1 misc/mso-dumper.spec msodumper/formula.py msodumper/globals.py msodumper/msocrypto.py msodumper/msodraw.py msodumper/node.py msodumper/ole.py msodumper/olestream.py msodumper/oletool.py msodumper/pptrecord.py msodumper/pptstream.py msodumper/vbahelper.py ppt-dump.py setup.py test/doc vbadump.py xls-dump.py

Tue Dec 3 10:04:28 PST 2013

compress.py            |   31 +------
 decompress.py          |   29 +------
 man/ppt-dump.py.1      |   51 +++++++++++++
 misc/mso-dumper.spec   |   15 ++-
 msodumper/formula.py   |   30 +------
 msodumper/globals.py   |   51 ++++++-------
 msodumper/msocrypto.py |    5 -
 msodumper/msodraw.py   |   28 +------
 msodumper/node.py      |   30 +------
 msodumper/ole.py       |  190 ++++++++++++++++++++++---------------------------
 msodumper/olestream.py |  108 +++++++++++----------------
 msodumper/oletool.py   |   29 +------
 msodumper/pptrecord.py |   22 ++++-
 msodumper/pptstream.py |   60 +++++++++------
 msodumper/vbahelper.py |   29 +------
 ppt-dump.py            |   77 ++++++++++---------
 setup.py               |   13 +++
 test/doc/test.py       |    2 
 vbadump.py             |   76 +++++++------------
 xls-dump.py            |   14 +--
 20 files changed, 405 insertions(+), 485 deletions(-)

New commits:
commit 0d2411758d0aa9e724f98921e7e008214c66b61f
Author: Thorsten Behrens <thb at documentfoundation.org>
Date:   Tue Dec 3 18:59:29 2013 +0100

    Cleanup trailing whitespace some more.

diff --git a/compress.py b/compress.py
index f23e62f..e6b7ed7 100755
--- a/compress.py
+++ b/compress.py
@@ -20,7 +20,7 @@ def main():
     decompressed = vbahelper.UnCompressedVBAStream( chars, offset )
     compressed = decompressed.compress()
     sys.stdout.write(compressed)
-    
+
     exit(0)
 
 if __name__ == '__main__':
diff --git a/msodumper/globals.py b/msodumper/globals.py
index df112c0..c5d89d8 100644
--- a/msodumper/globals.py
+++ b/msodumper/globals.py
@@ -35,7 +35,7 @@ class Params(object):
         self.dumpText = False
         self.dumpedIds = []
         self.noRawDump = False
-        
+
 # Global parameters / run configuration, to be set up by the main
 # program during initialization
 params = Params()
@@ -138,7 +138,7 @@ def output (msg, recordType = -1):
 
 def outputln(msg, recordType = -1):
     output(msg + "\n", recordType)
-    
+
 def error (msg):
     sys.stderr.write("Error: " + msg)
 
diff --git a/msodumper/pptrecord.py b/msodumper/pptrecord.py
index a6c5407..c1458c8 100644
--- a/msodumper/pptrecord.py
+++ b/msodumper/pptrecord.py
@@ -52,7 +52,7 @@ append a line to be displayed.
         # to params. Have to use a global if we want to keep a minimal
         # modification
         globals.textdump += text + "\n"
-        
+
     def appendLine (self, line):
         self.lines.append(line)
 
@@ -138,7 +138,7 @@ class UniString(BaseRecordHandler):
         self.appendProperty(name)
         self.appendLine("text: '%s'"%name)
         self.appendText(name)
-            
+
 def ShapeUniString (*args):
     args += "ShapeText",
     return UniString(*args)
diff --git a/test/doc/test.py b/test/doc/test.py
index b6c4d36..1812d53 100755
--- a/test/doc/test.py
+++ b/test/doc/test.py
@@ -111,7 +111,7 @@ class Test(unittest.TestCase):
         self.dump('comment')
         comments = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfandTxt/plcfandTxt/aCP')
         self.assertEqual(2, len(comments))
-        
+
         self.assertEqual('This is a comment.\\x0D', comments[0].findall('transformed')[0].attrib['value'])
         self.assertEqual('This is also commented.\\x0D', comments[1].findall('transformed')[0].attrib['value'])
 
diff --git a/vbadump.py b/vbadump.py
index 439ca36..7aa049d 100755
--- a/vbadump.py
+++ b/vbadump.py
@@ -109,13 +109,13 @@ class CodePageReader(StdReader):
         if codePageMap.has_key( self.reader.codepage ):
             self.reader.codepageName = codePageMap[  self.reader.codepage ]
         print("  codepage: %i"%self.reader.codepage)
-     
+
 class ProjectNameReader(StdReader):
     def parse(self):
         size = self.reader.readUnsignedInt( 4 )
         bytes = self.reader.readBytes( size )
         print("  ProjectName: %s"%bytes.decode(self.reader.codepageName))
-                 
+
 class DocStringRecordReader(StdReader):
     def parse(self):
         size = self.reader.readUnsignedInt( 4 )
@@ -238,8 +238,8 @@ class ModuleOffSetReader(StdReader):
         size = self.reader.readUnsignedInt( 4 )
         moduleInfo = self.reader.CurrentModule
         moduleInfo.offset = self.reader.readUnsignedInt( size )
-     
-        print("  Offset: 0x%x"%moduleInfo.offset) 
+
+        print("  Offset: 0x%x"%moduleInfo.offset)
 
 class ProjectModuleTermReader(StdReader):
     def parse(self):
@@ -266,13 +266,13 @@ class SysKindReader(StdReader):
     def parse(self):
         size = self.reader.readUnsignedInt( 4 )
         val = self.reader.readUnsignedInt( size )
-        sysKind = "Unknown" 
+        sysKind = "Unknown"
         if val == 0:
-           sysKind = "16 bit windows" 
+           sysKind = "16 bit windows"
         elif val == 1:
-           sysKind = "32 bit windows" 
+           sysKind = "32 bit windows"
         elif val == 2:
-           sysKind = "Macintosh" 
+           sysKind = "Macintosh"
         print("  SysType: %s"%sysKind)
 
 class LcidReader(StdReader):
@@ -280,13 +280,12 @@ class LcidReader(StdReader):
         size = self.reader.readUnsignedInt( 4 )
         val = self.reader.readUnsignedInt( size )
         print("  LCID: 0x%x ( expected 0x409 )"%val)
-   
+
 class LcidInvokeReader(StdReader):
     def parse(self):
         size = self.reader.readUnsignedInt( 4 )
         val = self.reader.readUnsignedInt( size )
         print("  LCIDINVOKE: 0x%x ( expected 0x409 )"%val)
-   
 
 class LibFlagReader(StdReader):
     def parse(self):
@@ -324,8 +323,8 @@ class ReferenceControlReaderPart1(StdReader):
         sizeOfLibidTwiddled =  self.reader.readUnsignedInt( 4 )
         sLibidTwiddledBytes =  self.reader.readBytes( sizeOfLibidTwiddled )
         print("  LibIdTwiddled: %s"%sLibidTwiddledBytes.decode( self.reader.codepageName))
-        
-        # Reserved1 & Reserved2 ( suppose we could really read these and assert if 
+
+        # Reserved1 & Reserved2 ( suppose we could really read these and assert if
         # they don't conform to expected values ( 0x00000000 & 0x00000000 )
         self.reader.readBytes( 6 )
 
@@ -335,8 +334,8 @@ class ReferenceControlReaderPart2(StdReader):
         sizeOfLibidExtended =  self.reader.readUnsignedInt( 4 )
         sLibidExtendedBytes =  self.reader.readBytes( sizeOfLibidExtended )
         print("  LibidExtended: %s"%sLibidExtendedBytes.decode( self.reader.codepageName))
-        
-        # Reserved4 & Reserved5 ( suppose we could really read these and assert if 
+
+        # Reserved4 & Reserved5 ( suppose we could really read these and assert if
         # they don't conform to expected values ( 0x00000000 & 0x00000000 )
         self.reader.readBytes( 6 )
         origTypeLib = self.reader.readBytes( 16 )
@@ -393,16 +392,16 @@ dirRecordData = {
     0x0022: ["MODULETYPE", "ModuleTypeDocClassOrDesgn", ModuleTypeOtherReader],
     0x0025: ["MODULEREADONLY", "ModuleReadOnly"],
     0x0028: ["MODULEPRIVATE", "ModulePrivate"],
-}    
+}
 
 
 class ModuleInfo:
     def __init__(self):
         self.name = ""
         self.offset = 0
-        self.streamname = "" 
-        
-class DirStreamReader( globals.ByteStream ): 
+        self.streamname = ""
+
+class DirStreamReader( globals.ByteStream ):
     def __init__ (self, bytes ):
         globals.ByteStream.__init__(self, bytes)
         self.Modules = []
@@ -432,7 +431,7 @@ class DirStreamReader( globals.ByteStream ):
             else:
                 size = self.readUnsignedInt( 4 )
                 if size:
-                    self.readBytes(size)        
+                    self.readBytes(size)
 
 class VBAContainer:
     def __init__( self, root ):
@@ -477,7 +476,7 @@ class VBAContainer:
         dirName = self.vbaRoot.getHierarchicalName() + "VBA/dir"
         dirNode = self.__findNodeByHierarchicalName( self.vbaRoot, dirName )
         if dirNode != None:
-            #decompress 
+            #decompress
             bytes = dirNode.getStream()
             compressed = vbahelper.CompressedVBAStream( bytes, 0 )
             bytes = compressed.decompress()
@@ -496,7 +495,7 @@ class VBAContainer:
                         #straight text file
                         print("%s"%bytes.decode(reader.codepageName))
                     else:
-                        globals.dumpBytes( bytes, 512) 
+                        globals.dumpBytes( bytes, 512)
             for module in reader.Modules:
                 fullStreamName = self.vbaRoot.getHierarchicalName() + "VBA/" + module.streamname
                 moduleNode = self.__findNodeByHierarchicalName( self.vbaRoot, fullStreamName )
@@ -515,17 +514,17 @@ def main():
 
     if ( len ( sys.argv ) <= 1 ):
         print("usage: vbadump: file")
-        sys.exit(1) 
+        sys.exit(1)
     # prepare for supporting more options
     options, args = parser.parse_args()
 
-    params = globals.Params()    
+    params = globals.Params()
 
     container = ole.OleContainer( args[ 0 ], params )
 
     container.read()
     root = container.getRoot()
-    vba = VBAContainer( root ) 
+    vba = VBAContainer( root )
     vba.dump()
 
     exit(0)
diff --git a/xls-dump.py b/xls-dump.py
index 219430e..a50d0ad 100755
--- a/xls-dump.py
+++ b/xls-dump.py
@@ -2,7 +2,7 @@
 ########################################################################
 #
 #  Copyright (c) 2010 Kohei Yoshida
-#  
+#
 #  Permission is hereby granted, free of charge, to any person
 #  obtaining a copy of this software and associated documentation
 #  files (the "Software"), to deal in the Software without
@@ -11,10 +11,10 @@
 #  copies of the Software, and to permit persons to whom the
 #  Software is furnished to do so, subject to the following
 #  conditions:
-#  
+#
 #  The above copyright notice and this permission notice shall be
 #  included in all copies or substantial portions of the Software.
-#  
+#
 #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 #  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@@ -85,7 +85,7 @@ class XLDumper(object):
         dirs = self.strm.getDirectoryEntries()
         docroot = node.Root()
         root = docroot.appendElement('xls-dump')
-        
+
         for d in dirs:
             if d.Name != "Workbook":
                 # for now, we only dump the Workbook directory stream.
@@ -112,7 +112,7 @@ class XLDumper(object):
             wbmodel = self.__buildWorkbookModel(dirstrm)
             wbmodel.encrypted = self.strmData.encrypted
             root.appendChild(wbmodel.createDOM())
-        
+
         node.prettyPrint(sys.stdout, docroot)
 
     def dump (self):
@@ -136,7 +136,7 @@ class XLDumper(object):
 
             elif dirname == "Workbook":
                 success = True
-                while success: 
+                while success:
                     success = self.__readSubStream(dirstrm)
 
             elif dirname == "Revision Log":
@@ -196,7 +196,7 @@ class XLDumper(object):
                 self.__dumpDataAsXML(x, root)
         else:
             pass # we're skipping all unknown elems
-        
+
     def __readSubStreamXML (self, strm):
         handlers = []
         try:
commit 1df9101b6605319dcc581f7fd5d2a354b173b857
Author: Thorsten Behrens <thb at documentfoundation.org>
Date:   Tue Dec 3 18:54:33 2013 +0100

    Update author email, align packaging files with new license.

diff --git a/misc/mso-dumper.spec b/misc/mso-dumper.spec
index 9a8f63e..6fa0923 100644
--- a/misc/mso-dumper.spec
+++ b/misc/mso-dumper.spec
@@ -5,8 +5,8 @@ BuildRequires:  python
 Version:        0.2
 Release:        1
 BuildArch:      noarch
-License:        LGPL v2 or later
-Packager:       Kohei Yoshida  <kyoshida at novell.com>
+License:        MPLv2
+Packager:       Kohei Yoshida  <kohei.yoshida at collabora.com>
 Source:         mso-dumper-0.2.tar.gz
 Group:          Development/Tools/Debuggers
 Summary:        Dumper tool for Microsoft Office binary file format
@@ -21,9 +21,14 @@ formats are supported.
 
 Authors:
 --------
-    Kohei Yoshida <kyoshida at novell.com>
-    Thorsten Behrens <tbehrens at novell.com>
-    Fredrik Ekholdt (for xmlpp.py)  
+    Kohei Yoshida <kohei.yoshida at collabora.com>
+    Thorsten Behrens <tbehrens at suse.com>
+    Miklos Vajna <vmiklos at collabora.com>
+    Noel Power <nopower at suse.com>
+    Jean-Francois Dockes <jf at dockes.org>
+    Sergey Kishchenko <voidwrk at gmail.com>
+    Fredrik Ekholdt (for xmlpp.py)
+    and contributors
 
 %prep
 %setup -q
diff --git a/msodumper/pptstream.py b/msodumper/pptstream.py
index 3f966e4..c3d8115 100644
--- a/msodumper/pptstream.py
+++ b/msodumper/pptstream.py
@@ -34,7 +34,7 @@ class PPTFile(object):
 
     def printStreamInfo (self):
         self.__printSep('=', 68)
-        globals.outputln("PPT File Format Dumper by Kohei Yoshida & Thorsten Behrens")
+        globals.outputln("PPT File Format Dumper by K. Yoshida, T. Behrens & contributors")
         globals.outputln("  total stream size: %d bytes"%self.size)
         self.__printSep('=', 68)
         globals.outputln('')
diff --git a/setup.py b/setup.py
index 1e00cc3..4d5f691 100644
--- a/setup.py
+++ b/setup.py
@@ -5,9 +5,9 @@ setup(
     packages = ['msodumper'],
     scripts = ['ppt-dump.py'],
     data_files=[('man/man1', ['man/ppt-dump.py.1'])],
-    author = "Kohei Yoshida <kyoshida at novell.com>, Thorsten Behrens <tbehrens at novell.com>",
+    author = "Kohei Yoshida <kohei.yoshida at collabora.com>, Thorsten Behrens <tbehrens at suse.com>",
     author_email = "libreoffice at lists.freedesktop.org",
     description = "A package for analysing and dumping MS office formats",
-    license = "MPL/LGPLv3+",
+    license = "MPL",
     url = "http://cgit.freedesktop.org/libreoffice/contrib/mso-dumper/"
 )
commit 94199e25686552c0836cf5677b0ff34e5200d094
Author: Thorsten Behrens <thb at documentfoundation.org>
Date:   Tue Dec 3 18:53:45 2013 +0100

    Fix typo in global dump param.

diff --git a/msodumper/globals.py b/msodumper/globals.py
index f929b6e..df112c0 100644
--- a/msodumper/globals.py
+++ b/msodumper/globals.py
@@ -256,7 +256,7 @@ Note the following:
 
 
 def dumpBytes (chars, subDivide=None):
-    if params.noStructOutput or params.noRawDumps:
+    if params.noStructOutput or params.noRawDump:
         return
     line = 0
     subDivideLine = None
commit 9a1d4381d31ffa4ef587941142821008d66612ae
Author: Jean-Francois Dockes <jf at dockes.org>
Date:   Wed Nov 27 16:55:37 2013 +0100

    usage string and man page: the forgotten parts

diff --git a/ppt-dump.py b/ppt-dump.py
index 37df99b..0a753e2 100755
--- a/ppt-dump.py
+++ b/ppt-dump.py
@@ -15,9 +15,11 @@ def usage (exname):
 
 Options:
   --help        displays this help message.
-  --no-struct-output suppress normal disassembly output
-  --dump-text   print the textual content
-"""%exname
+  --no-struct-output suppress normal structure analysis output
+  --dump-text   extract and print the textual content
+  --no-raw-dumps suppress raw hex dumps of uninterpreted areas
+  --id-select=id1[,id2 ...] limit output to selected record Ids
+""" % exname
     print msg
 
 
diff --git a/setup.py b/setup.py
index c1dd84e..1e00cc3 100644
--- a/setup.py
+++ b/setup.py
@@ -1,10 +1,10 @@
-from setuptools import setup, find_packages
+from distutils.core import setup
 setup(
     name = "msodumper",
     version = "0.3.0",
-    packages = find_packages(),
+    packages = ['msodumper'],
     scripts = ['ppt-dump.py'],
-    zip_safe = True,
+    data_files=[('man/man1', ['man/ppt-dump.py.1'])],
     author = "Kohei Yoshida <kyoshida at novell.com>, Thorsten Behrens <tbehrens at novell.com>",
     author_email = "libreoffice at lists.freedesktop.org",
     description = "A package for analysing and dumping MS office formats",
commit 4367aefc30559dc9ef5cfefa437f1448b691b788
Author: Jean-Francois Dockes <jf at dockes.org>
Date:   Wed Nov 27 16:45:03 2013 +0100

    usage string and man page

diff --git a/man/ppt-dump.py.1 b/man/ppt-dump.py.1
new file mode 100644
index 0000000..8c25653
--- /dev/null
+++ b/man/ppt-dump.py.1
@@ -0,0 +1,51 @@
+.TH PPT-DUMP.PY 1 "27 November 2013"
+.SH NAME
+ppt-dump.py \- PowerPoint 97-2007 file analysis and text extraction
+.SH SYNOPSIS
+.B ppt\-dump.py
+[
+.B \-\-help
+]
+.br
+.B ppt\-dump.py
+[
+.B \-\-no\-struct\-output
+]
+[
+.B \-\-dump\-text
+]
+[
+.B \-\-no\-raw\-dumps
+]
+[
+.B \-\-id\-select=id1[,id2...]
+]
+<filename.ppt>
+
+.SH DESCRIPTION
+The 
+.B ppt-dump.py
+command will analyse a PowerPoint 97-2007 file and print selected
+elements. By default the command will print out a detailed description of
+the file data and structure. In this mode the
+.B \-\-id\-select
+option will allow to select what specific records to print, and the 
+.B \-\-no\-raw\-dumps
+option will suppress hexadecimal dumps of uninterpreted data.
+.P
+If option
+.B \-\-no\-struct\-output
+is given, the default output will be suppressed. In this mode, if option
+.B \-\-dump\-text
+is given, the command will print out the file text content, encoded as UTF-8.
+.SH EXAMPLES
+Printing out only the header, directory and record types 4000 and 4008:
+.RS
+ppt\-dump.py \-\-no\-raw\-dumps \-\-id\-select=4000,4008 myfile.ppt
+.RE
+.PP
+Extracting the slides text as UTF\-8:
+.RS
+ppt\-dump.py \-\-no\-struct\-output \-\-dump\-text myfile.ppt
+.RE
+
commit 7fdcf440ebc128f0501cc70a87b252da555bfb8e
Author: Jean-Francois Dockes <jf at dockes.org>
Date:   Wed Nov 27 10:37:56 2013 +0100

    Set up a tentative output selection mechanism.
    
    This is based on an integer parameter, in practise the record id
    from pptstream.py:recData, and uses a command-line option to set
    the list of record types which should be printed. Also added
    command-line option to suppress raw dumpBytes output, and store the
    command line option as a global object, as this is the only
    practical way to get the values down into the low level routines.

diff --git a/msodumper/globals.py b/msodumper/globals.py
index 4cf9bed..f929b6e 100644
--- a/msodumper/globals.py
+++ b/msodumper/globals.py
@@ -33,6 +33,12 @@ class Params(object):
         self.showStreamPos = False
         self.noStructOutput = False
         self.dumpText = False
+        self.dumpedIds = []
+        self.noRawDump = False
+        
+# Global parameters / run configuration, to be set up by the main
+# program during initialization
+params = Params()
 
 class ByteStream(object):
 
@@ -123,22 +129,15 @@ def getValueOrUnknown (list, idx, errmsg='(unknown)'):
 
 textdump = b""
 
-_noOutput = 0
-
-def muteOutput(onoff):
-    global _noOutput
-    if onoff:
-        _noOutput = 1
-    else:
-        _noOutput = 0
-
-def output (msg):
-    global _noOutput
-    if _noOutput == 0:
+def output (msg, recordType = -1):
+    if params.noStructOutput:
+        return
+    if recordType == -1 or not params.dumpedIds or \
+           recordType in params.dumpedIds:
         sys.stdout.write(msg)
 
-def outputln(msg):
-    output(msg + "\n")
+def outputln(msg, recordType = -1):
+    output(msg + "\n", recordType)
     
 def error (msg):
     sys.stderr.write("Error: " + msg)
@@ -257,7 +256,7 @@ Note the following:
 
 
 def dumpBytes (chars, subDivide=None):
-    if _noOutput:
+    if params.noStructOutput or params.noRawDumps:
         return
     line = 0
     subDivideLine = None
diff --git a/msodumper/pptrecord.py b/msodumper/pptrecord.py
index b1150ca..a6c5407 100644
--- a/msodumper/pptrecord.py
+++ b/msodumper/pptrecord.py
@@ -34,9 +34,10 @@ append a line to be displayed.
 
     def __print (self, text):
         try:
-            globals.outputln(self.prefix + text)
+            globals.outputln(self.prefix + text, recordType = self.recordType)
         except UnicodeEncodeError:
-            globals.outputln(self.prefix + "<%d invalid chars>"%len(text))
+            globals.outputln(self.prefix + "<%d invalid chars>"%len(text),
+                             recordType = self.recordType)
 
     def output (self):
         self.parseBytes()
diff --git a/msodumper/pptstream.py b/msodumper/pptstream.py
index e5ec3da..3f966e4 100644
--- a/msodumper/pptstream.py
+++ b/msodumper/pptstream.py
@@ -98,19 +98,18 @@ class PPTDirStream(object):
         return globals.getUnsignedInt(self.readBytes(size))
 
 
-    def __print (self, text):
-        globals.outputln(self.prefix + text)
+    def __print (self, text, recordType = -1):
+        globals.outputln(self.prefix + text, recordType = recordType)
 
 
-    def __printSep (self, c='-', w=68, prefix=''):
-        self.__print(prefix + c*w)
+    def __printSep (self, c='-', w=68, prefix='', recordType = -1):
+        self.__print(prefix + c*w, recordType)
 
 
     def readRecords (self):
         try:
             # read until data is exhausted (min record size: 8 bytes)
             while self.pos+8 < self.size:
-                globals.outputln("")
                 self.readRecord()
             return True
         except EndOfStream:
@@ -120,30 +119,30 @@ class PPTDirStream(object):
     def printRecordHeader (self, startPos, recordInstance, recordVersion, recordType, size):
         if self.params.noStructOutput and self.params.dumpText:
             return
-        self.__printSep('=')
+        self.__printSep('=', recordType = recordType)
         if recordType in recData:
-            self.__print("[%s]"%recData[recordType][0])
+            self.__print("[%s]"%recData[recordType][0], recordType)
         else:
-            self.__print("[anon record]")
+            self.__print("[anon record]", recordType)
         self.__print("(type: %4.4Xh (%d) inst: %4.4Xh (%d), vers: %4.4Xh, start: %d, size: %d)"%
-              (recordType, recordType, recordInstance, recordInstance, recordVersion, startPos, size))
-        self.__printSep('=')
+              (recordType, recordType, recordInstance, recordInstance, recordVersion, startPos, size), recordType)
+        self.__printSep('=', recordType = recordType)
 
 
     def printRecordDump (self, bytes, recordType):
         if self.params.noStructOutput and self.params.dumpText:
             return
         size = len(bytes)
-        self.__printSep('-', 61, "%4.4Xh: "%recordType)
+        self.__printSep('-', 61, "%4.4Xh: "%recordType, recordType = recordType)
         for i in xrange(0, size):
             if (i+1) % 16 == 1:
-                output(self.prefix + "%4.4Xh: "%recordType)
-            output("%2.2X "%ord(bytes[i]))
+                output(self.prefix + "%4.4Xh: "%recordType, recordType = recordType)
+            output("%2.2X "%ord(bytes[i]), recordType = recordType)
             if (i+1) % 16 == 0 and i != size-1:
-                globals.outputln("")
+                globals.outputln("", recordType = recordType)
         if size > 0:
-            globals.outputln("")
-            self.__printSep('-', 61, "%4.4Xh: "%recordType)
+            globals.outputln("", recordType = recordType)
+            self.__printSep('-', 61, "%4.4Xh: "%recordType, recordType = recordType)
 
 
     def readRecord (self):
@@ -154,6 +153,7 @@ class PPTDirStream(object):
         recordType = self.readUnsignedInt(2)
         size = self.readUnsignedInt(4)
 
+        globals.outputln("", recordType = recordType)
         self.printRecordHeader(startPos, recordInstance, recordVersion, recordType, size)
         bytes = self.readBytes(size)
 
@@ -173,13 +173,13 @@ class PPTDirStream(object):
                 self.handlePPT10BinaryTags(bytes,recordInfo)
         elif recordInfo is not None:
             handler = recordInfo[1](recordType, recordInstance, size, bytes, self.properties, self.prefix)
-            globals.outputln("")
+            globals.outputln("", recordType = recordType)
             # call special record handler, if any
             if handler is not None:
                 handler.output()
             self.printRecordDump(bytes, recordType)
         elif size > 0:
-            globals.outputln("")
+            globals.outputln("", recordType = recordType)
             self.printRecordDump(bytes, recordType)
 
     def checkPPT10BinaryTag (recordType, recordInstance, size, bytes, properties, prefix):
diff --git a/ppt-dump.py b/ppt-dump.py
index 658e7a5..37df99b 100755
--- a/ppt-dump.py
+++ b/ppt-dump.py
@@ -81,24 +81,29 @@ def main (args):
         usage(exname)
         return
 
-    params = globals.Params()
     try:
         opts, args = getopt.getopt(args, "h",
                                    ["help", "debug", "show-sector-chain",
-                                    "no-struct-output", "dump-text"])
+                                    "no-struct-output", "dump-text",
+                                    "id-select=", "no-raw-dumps"])
         for opt, arg in opts:
             if opt in ['-h', '--help']:
                 usage(exname)
                 return
             elif opt in ['--debug']:
-                params.debug = True
+                globals.params.debug = True
             elif opt in ['--show-sector-chain']:
-                params.showSectorChain = True
+                globals.params.showSectorChain = True
             elif opt in ['--no-struct-output']:
-                globals.muteOutput(1)
-                params.noStructOutput = True
+                globals.params.noStructOutput = True
             elif opt in ['--dump-text']:
-                params.dumpText = True
+                globals.params.dumpText = True
+            elif opt in ['--no-raw-dumps']:
+                globals.params.noRawDumps = True
+            elif opt in ['--id-select']:
+                globals.params.dumpedIds = arg.split(",")
+                globals.params.dumpedIds = \
+                    set([int(val) for val in globals.params.dumpedIds if val])
             else:
                 error("unknown option %s\n"%opt)
                 usage()
@@ -108,10 +113,10 @@ def main (args):
         usage(exname)
         return
 
-    dumper = PPTDumper(args[0], params)
+    dumper = PPTDumper(args[0], globals.params)
     if not dumper.dump():
         error("FAILURE\n")
-    if params.dumpText:
+    if globals.params.dumpText:
         print(globals.textdump.replace("\r", "\n"))
 
 if __name__ == '__main__':
commit 20c4d7fecefda39d8977e1d25715d5920ec2152f
Author: Jean-Francois Dockes <jf at dockes.org>
Date:   Tue Nov 26 14:05:03 2013 +0100

    Added basic setup.py. Only installs the package and ppt-dump.py for now

diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..c1dd84e
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,13 @@
+from setuptools import setup, find_packages
+setup(
+    name = "msodumper",
+    version = "0.3.0",
+    packages = find_packages(),
+    scripts = ['ppt-dump.py'],
+    zip_safe = True,
+    author = "Kohei Yoshida <kyoshida at novell.com>, Thorsten Behrens <tbehrens at novell.com>",
+    author_email = "libreoffice at lists.freedesktop.org",
+    description = "A package for analysing and dumping MS office formats",
+    license = "MPL/LGPLv3+",
+    url = "http://cgit.freedesktop.org/libreoffice/contrib/mso-dumper/"
+)
commit 3b53afc62352743c81c97c4f26ecdd552593bc26
Author: Jean-Francois Dockes <jf at dockes.org>
Date:   Thu Nov 21 12:43:35 2013 +0100

    Check for infinite loop inside getSectorIDChain(). This could happen on
    corrupted or invalid files.

diff --git a/msodumper/ole.py b/msodumper/ole.py
index 518e624..dc284fb 100644
--- a/msodumper/ole.py
+++ b/msodumper/ole.py
@@ -411,6 +411,9 @@ class SAT(object):
         nextID = self.array[initID]
         while nextID != -2:
             chain.append(nextID)
+            if nextID == self.array[nextID]:
+                # Beware of infinite loop: happens on bad files.
+                break
             nextID = self.array[nextID]
         return chain
 
commit 98ccb94f1bea2451a7d1c515b40267eae213702c
Author: Jean-Francois Dockes <jf at dockes.org>
Date:   Tue Nov 19 14:20:56 2013 +0100

    PPT text extracter: optimizations and error catching

diff --git a/msodumper/globals.py b/msodumper/globals.py
index 0af10d8..4cf9bed 100644
--- a/msodumper/globals.py
+++ b/msodumper/globals.py
@@ -257,6 +257,8 @@ Note the following:
 
 
 def dumpBytes (chars, subDivide=None):
+    if _noOutput:
+        return
     line = 0
     subDivideLine = None
     if subDivide != None:
diff --git a/msodumper/pptrecord.py b/msodumper/pptrecord.py
index 7e3ac6d..b1150ca 100644
--- a/msodumper/pptrecord.py
+++ b/msodumper/pptrecord.py
@@ -517,7 +517,11 @@ class AnimationInfo(BaseRecordHandler):
                      "graph by series","graph by category","element in series",
                      "element in category"]
         buildType = self.readUnsignedInt(1)
-        self.appendLine("build type: %s"%buildDesc[buildType])
+        try:
+            # can fail with index out of range
+            self.appendLine("build type: %s"%buildDesc[buildType])
+        except Exception, err:
+            error("AnimationInfo::parsebytes: %s: %s" % (str(buildType),str(err)))
 
         flyDesc = ["none","random","blinds","checker","cover","dissolve",
                    "fade","pull","random bar","strips","wipe","zoom","fly",
diff --git a/msodumper/pptstream.py b/msodumper/pptstream.py
index d5ed635..e5ec3da 100644
--- a/msodumper/pptstream.py
+++ b/msodumper/pptstream.py
@@ -19,6 +19,11 @@ class PPTFile(object):
         self.version = None
         self.params = params
 
+        # If we are a text dumper, skip irrelevant records
+        global recData, textRecData
+        if params.noStructOutput and params.dumpText:
+            recData = textRecData
+
         self.header = ole.Header(self.chars, self.params)
         self.pos = self.header.parse()
 
@@ -113,6 +118,8 @@ class PPTDirStream(object):
 
 
     def printRecordHeader (self, startPos, recordInstance, recordVersion, recordType, size):
+        if self.params.noStructOutput and self.params.dumpText:
+            return
         self.__printSep('=')
         if recordType in recData:
             self.__print("[%s]"%recData[recordType][0])
@@ -124,6 +131,8 @@ class PPTDirStream(object):
 
 
     def printRecordDump (self, bytes, recordType):
+        if self.params.noStructOutput and self.params.dumpText:
+            return
         size = len(bytes)
         self.__printSep('-', 61, "%4.4Xh: "%recordType)
         for i in xrange(0, size):
@@ -189,6 +198,13 @@ class PPTDirStream(object):
 #
 # opcode: [canonical name, handler (optional)]
 
+# A shorter recData which we use when extracting text (speeds things up by
+# skipping irrelevant records)
+textRecData = {
+    4000:  ["DFF_PST_TextCharsAtom", pptrecord.ShapeUniString],
+    4008:  ["DFF_PST_TextBytesAtom", pptrecord.ShapeString],
+    }
+
 recData = {
 
     0:  ["DFF_PST_Unknown"],
diff --git a/ppt-dump.py b/ppt-dump.py
index 59884f8..658e7a5 100755
--- a/ppt-dump.py
+++ b/ppt-dump.py
@@ -47,7 +47,14 @@ class PPTDumper(object):
             if len(dirname) == 0 or dirname == 'Root Entry':
                 continue
 
-            dirstrm = strm.getDirectoryStreamByName(dirname)
+            try:
+                dirstrm = strm.getDirectoryStreamByName(dirname)
+            except Exception, err:
+                error("getDirectoryStreamByName(%s): %s\n" % (dirname,str(err)))
+                # The previous version was killed by the exception
+                # here, so the equivalent is to break, but maybe there
+                # is no reason to do so.
+                break
             self.__printDirHeader(dirname, len(dirstrm.bytes))
             if  dirname == "PowerPoint Document":
                 if not self.__readSubStream(dirstrm):
commit a03632ad5a99d346b334e10b8ee85d028a8f4cee
Author: Jean-Francois Dockes <jf at dockes.org>
Date:   Tue Nov 19 14:18:47 2013 +0100

    PPT text extracter: command line option and behaviour switching

diff --git a/msodumper/globals.py b/msodumper/globals.py
index 27bb36b..0af10d8 100644
--- a/msodumper/globals.py
+++ b/msodumper/globals.py
@@ -31,7 +31,8 @@ class Params(object):
         self.debug = False
         self.showSectorChain = False
         self.showStreamPos = False
-
+        self.noStructOutput = False
+        self.dumpText = False
 
 class ByteStream(object):
 
@@ -120,13 +121,25 @@ def getValueOrUnknown (list, idx, errmsg='(unknown)'):
 
     return errmsg
 
+textdump = b""
+
+_noOutput = 0
+
+def muteOutput(onoff):
+    global _noOutput
+    if onoff:
+        _noOutput = 1
+    else:
+        _noOutput = 0
 
 def output (msg):
-    sys.stdout.write(msg)
+    global _noOutput
+    if _noOutput == 0:
+        sys.stdout.write(msg)
 
 def outputln(msg):
     output(msg + "\n")
-
+    
 def error (msg):
     sys.stderr.write("Error: " + msg)
 
diff --git a/msodumper/pptrecord.py b/msodumper/pptrecord.py
index 79bc059..7e3ac6d 100644
--- a/msodumper/pptrecord.py
+++ b/msodumper/pptrecord.py
@@ -44,6 +44,14 @@ append a line to be displayed.
         for line in self.lines:
             self.__print("%4.4Xh: %s"%(self.recordType, line))
 
+    def appendText(self, text):
+        global textdump
+        # unfortunaley we have no access to the globals.params, so we
+        # can't keep the text local and let output() behave according
+        # to params. Have to use a global if we want to keep a minimal
+        # modification
+        globals.textdump += text + "\n"
+        
     def appendLine (self, line):
         self.lines.append(line)
 
@@ -103,6 +111,7 @@ append a line to be displayed.
         yratio = self.readRatio()
         return "(%s,%s)"%(xratio, yratio)
 
+
 class String(BaseRecordHandler):
     """Textual content."""
 
@@ -114,6 +123,7 @@ class String(BaseRecordHandler):
         # this transformation.
         name = name.decode('cp1252').encode('UTF-8')
         self.appendLine("text: '%s'"%name)
+        self.appendText(name)
 
 def ShapeString (*args):
     args += "ShapeText",
@@ -126,7 +136,8 @@ class UniString(BaseRecordHandler):
         name = globals.getUTF8FromUTF16(globals.getTextBytes(self.readRemainingBytes()))
         self.appendProperty(name)
         self.appendLine("text: '%s'"%name)
-
+        self.appendText(name)
+            
 def ShapeUniString (*args):
     args += "ShapeText",
     return UniString(*args)
diff --git a/ppt-dump.py b/ppt-dump.py
index 2148ca8..59884f8 100755
--- a/ppt-dump.py
+++ b/ppt-dump.py
@@ -15,6 +15,8 @@ def usage (exname):
 
 Options:
   --help        displays this help message.
+  --no-struct-output suppress normal disassembly output
+  --dump-text   print the textual content
 """%exname
     print msg
 
@@ -74,7 +76,9 @@ def main (args):
 
     params = globals.Params()
     try:
-        opts, args = getopt.getopt(args, "h", ["help", "debug", "show-sector-chain"])
+        opts, args = getopt.getopt(args, "h",
+                                   ["help", "debug", "show-sector-chain",
+                                    "no-struct-output", "dump-text"])
         for opt, arg in opts:
             if opt in ['-h', '--help']:
                 usage(exname)
@@ -83,6 +87,11 @@ def main (args):
                 params.debug = True
             elif opt in ['--show-sector-chain']:
                 params.showSectorChain = True
+            elif opt in ['--no-struct-output']:
+                globals.muteOutput(1)
+                params.noStructOutput = True
+            elif opt in ['--dump-text']:
+                params.dumpText = True
             else:
                 error("unknown option %s\n"%opt)
                 usage()
@@ -95,7 +104,8 @@ def main (args):
     dumper = PPTDumper(args[0], params)
     if not dumper.dump():
         error("FAILURE\n")
-
+    if params.dumpText:
+        print(globals.textdump.replace("\r", "\n"))
 
 if __name__ == '__main__':
     main(sys.argv)
commit ee100db9ab39fd19a96f0e92abf1e658e434e090
Author: Jean-Francois Dockes <jf at dockes.org>
Date:   Mon Dec 2 02:32:02 2013 +0100

    PPT text dumper: change all direct print calls to go through output() instead

diff --git a/msodumper/globals.py b/msodumper/globals.py
index d19feaf..27bb36b 100644
--- a/msodumper/globals.py
+++ b/msodumper/globals.py
@@ -124,6 +124,9 @@ def getValueOrUnknown (list, idx, errmsg='(unknown)'):
 def output (msg):
     sys.stdout.write(msg)
 
+def outputln(msg):
+    output(msg + "\n")
+
 def error (msg):
     sys.stderr.write("Error: " + msg)
 
diff --git a/msodumper/ole.py b/msodumper/ole.py
index 523de80..518e624 100644
--- a/msodumper/ole.py
+++ b/msodumper/ole.py
@@ -96,10 +96,10 @@ class Header(object):
             output("\n")
 
         def printSep (c, w, prefix=''):
-            print(prefix + c*w)
+            globals.outputln(prefix + c*w)
 
         printSep('=', globals.OutputWidth)
-        print("Compound Document Header")
+        globals.outputln("Compound Document Header")
         printSep('-', globals.OutputWidth)
 
         if self.params.debug:
@@ -113,47 +113,47 @@ class Header(object):
         printRawBytes(self.uId)
 
         # revision and version
-        print("Revision: %d  Version: %d"%(self.revision, self.version))
+        globals.outputln("Revision: %d  Version: %d"%(self.revision, self.version))
 
         # byte order
         output("Byte order: ")
         if self.byteOrder == ByteOrder.LittleEndian:
-            print("little endian")
+            globals.outputln("little endian")
         elif self.byteOrder == ByteOrder.BigEndian:
-            print("big endian")
+            globals.outputln("big endian")
         else:
-            print("unknown")
+            globals.outputln("unknown")
 
         # sector size (usually 512 bytes)
-        print("Sector size: %d (%d)"%(2**self.secSize, self.secSize))
+        globals.outputln("Sector size: %d (%d)"%(2**self.secSize, self.secSize))
 
         # short sector size (usually 64 bytes)
-        print("Short sector size: %d (%d)"%(2**self.secSizeShort, self.secSizeShort))
+        globals.outputln("Short sector size: %d (%d)"%(2**self.secSizeShort, self.secSizeShort))
 
         # total number of sectors in SAT (equals the number of sector IDs
         # stored in the MSAT).
-        print("Total number of sectors used in SAT: %d"%self.numSecSAT)
+        globals.outputln("Total number of sectors used in SAT: %d"%self.numSecSAT)
 
-        print("Sector ID of the first sector of the directory stream: %d"%
+        globals.outputln("Sector ID of the first sector of the directory stream: %d"%
               self.__secIDFirstDirStrm)
 
-        print("Minimum stream size: %d"%self.minStreamSize)
+        globals.outputln("Minimum stream size: %d"%self.minStreamSize)
 
         if self.__secIDFirstSSAT == -2:
-            print("Sector ID of the first SSAT sector: [none]")
+            globals.outputln("Sector ID of the first SSAT sector: [none]")
         else:
-            print("Sector ID of the first SSAT sector: %d"%self.__secIDFirstSSAT)
+            globals.outputln("Sector ID of the first SSAT sector: %d"%self.__secIDFirstSSAT)
 
-        print("Total number of sectors used in SSAT: %d"%self.numSecSSAT)
+        globals.outputln("Total number of sectors used in SSAT: %d"%self.numSecSSAT)
 
         if self.__secIDFirstMSAT == -2:
             # There is no more sector ID stored outside the header.
-            print("Sector ID of the first MSAT sector: [end of chain]")
+            globals.outputln("Sector ID of the first MSAT sector: [end of chain]")
         else:
             # There is more sector IDs than 109 IDs stored in the header.
-            print("Sector ID of the first MSAT sector: %d"%(self.__secIDFirstMSAT))
+            globals.outputln("Sector ID of the first MSAT sector: %d"%(self.__secIDFirstMSAT))
 
-        print("Total number of sectors used to store additional MSAT: %d"%self.numSecMSAT)
+        globals.outputln("Total number of sectors used to store additional MSAT: %d"%self.numSecMSAT)
 
 
     def parse (self):
@@ -285,13 +285,13 @@ all the sectors pointed by the sector IDs in order of occurrence.
         self.secIDs.append(id)
 
     def output (self):
-        print('')
-        print("="*globals.OutputWidth)
-        print("Master Sector Allocation Table (MSAT)")
-        print("-"*globals.OutputWidth)
+        globals.outputln('')
+        globals.outputln("="*globals.OutputWidth)
+        globals.outputln("Master Sector Allocation Table (MSAT)")
+        globals.outputln("-"*globals.OutputWidth)
 
         for id in self.secIDs:
-            print("sector ID: %5d   (pos: %7d)"%(id, 512+id*self.sectorSize))
+            globals.outputln("sector ID: %5d   (pos: %7d)"%(id, 512+id*self.sectorSize))
 
     def getSATSectorPosList (self):
         list = []
@@ -379,27 +379,27 @@ class SAT(object):
                 sectorMElse += 1
             else:
                 sectorLiveTotal += 1
-        print("total sector count:          %4d"%sectorTotal)
-        print("* live sector count:         %4d"%sectorP)
-        print("* end-of-chain sector count: %4d"%sectorM2)  # end-of-chain is also live
+        globals.outputln("total sector count:          %4d"%sectorTotal)
+        globals.outputln("* live sector count:         %4d"%sectorP)
+        globals.outputln("* end-of-chain sector count: %4d"%sectorM2)  # end-of-chain is also live
 
-        print("* free sector count:         %4d"%sectorM1)
-        print("* SAT sector count:          %4d"%sectorM3)
-        print("* MSAT sector count:         %4d"%sectorM4)
-        print("* other sector count:        %4d"%sectorMElse)
+        globals.outputln("* free sector count:         %4d"%sectorM1)
+        globals.outputln("* SAT sector count:          %4d"%sectorM3)
+        globals.outputln("* MSAT sector count:         %4d"%sectorM4)
+        globals.outputln("* other sector count:        %4d"%sectorMElse)
 
 
     def output (self):
-        print('')
-        print("="*globals.OutputWidth)
-        print("Sector Allocation Table (SAT)")
-        print("-"*globals.OutputWidth)
+        globals.outputln('')
+        globals.outputln("="*globals.OutputWidth)
+        globals.outputln("Sector Allocation Table (SAT)")
+        globals.outputln("-"*globals.OutputWidth)
         if self.params.debug:
             self.outputRawBytes()
-            print("-"*globals.OutputWidth)
+            globals.outputln("-"*globals.OutputWidth)
             for i in xrange(0, len(self.array)):
-                print("%5d: %5d"%(i, self.array[i]))
-            print("-"*globals.OutputWidth)
+                globals.outputln("%5d: %5d"%(i, self.array[i]))
+            globals.outputln("-"*globals.OutputWidth)
 
         self.outputArrayStats()
 
@@ -428,13 +428,13 @@ sectors are contained in the SAT as a sector ID chain.
 """
 
     def output (self):
-        print('')
-        print("="*globals.OutputWidth)
-        print("Short Sector Allocation Table (SSAT)")
-        print("-"*globals.OutputWidth)
+        globals.outputln('')
+        globals.outputln("="*globals.OutputWidth)
+        globals.outputln("Short Sector Allocation Table (SSAT)")
+        globals.outputln("-"*globals.OutputWidth)
         if self.params.debug:
             self.outputRawBytes()
-            print("-"*globals.OutputWidth)
+            globals.outputln("-"*globals.OutputWidth)
             for i in xrange(0, len(self.array)):
                 item = self.array[i]
                 output("%3d : %3d\n"%(i, item))
@@ -556,21 +556,21 @@ entire file stream.
 
 
     def output (self, debug=False):
-        print('')
-        print("="*globals.OutputWidth)
-        print("Directory")
+        globals.outputln('')
+        globals.outputln("="*globals.OutputWidth)
+        globals.outputln("Directory")
 
         if debug:
-            print("-"*globals.OutputWidth)
-            print("sector(s) used:")
+            globals.outputln("-"*globals.OutputWidth)
+            globals.outputln("sector(s) used:")
             for secID in self.sectorIDs:
-                print("  sector %d"%secID)
+                globals.outputln("  sector %d"%secID)
 
-            print("")
+            globals.outputln("")
             for secID in self.sectorIDs:
-                print("-"*globals.OutputWidth)
-                print("  Raw Hex Dump (sector %d)"%secID)
-                print("-"*globals.OutputWidth)
+                globals.outputln("-"*globals.OutputWidth)
+                globals.outputln("  Raw Hex Dump (sector %d)"%secID)
+                globals.outputln("-"*globals.OutputWidth)
                 pos = globals.getSectorPos(secID, self.sectorSize)
                 globals.dumpBytes(self.bytes[pos:pos+self.sectorSize], 128)
 
@@ -578,45 +578,45 @@ entire file stream.
             self.__outputEntry(entry, debug)
 
     def __outputEntry (self, entry, debug):
-        print("-"*globals.OutputWidth)
+        globals.outputln("-"*globals.OutputWidth)
         if len(entry.Name) > 0:
             name = entry.Name
             if ord(name[0]) <= 5:
                 name = "<%2.2Xh>%s"%(ord(name[0]), name[1:])
-            print("name: %s   (name buffer size: %d bytes)"%(name, entry.CharBufferSize))
+            globals.outputln("name: %s   (name buffer size: %d bytes)"%(name, entry.CharBufferSize))
         else:
-            print("name: [empty]   (name buffer size: %d bytes)"%entry.CharBufferSize)
+            globals.outputln("name: [empty]   (name buffer size: %d bytes)"%entry.CharBufferSize)
 
         if self.params.debug:
-            print("-"*globals.OutputWidth)
+            globals.outputln("-"*globals.OutputWidth)
             globals.dumpBytes(entry.bytes)
-            print("-"*globals.OutputWidth)
+            globals.outputln("-"*globals.OutputWidth)
 
         output("type: ")
         if entry.Type == Directory.Type.Empty:
-            print("empty")
+            globals.outputln("empty")
         elif entry.Type == Directory.Type.LockBytes:
-            print("lock bytes")
+            globals.outputln("lock bytes")
         elif entry.Type == Directory.Type.Property:
-            print("property")
+            globals.outputln("property")
         elif entry.Type == Directory.Type.RootStorage:
-            print("root storage")
+            globals.outputln("root storage")
         elif entry.Type == Directory.Type.UserStorage:
-            print("user storage")
+            globals.outputln("user storage")
         elif entry.Type == Directory.Type.UserStream:
-            print("user stream")
+            globals.outputln("user stream")
         else:
-            print("[unknown type]")
+            globals.outputln("[unknown type]")
 
         output("node color: ")
         if entry.NodeColor == Directory.NodeColor.Red:
-            print("red")
+            globals.outputln("red")
         elif entry.NodeColor == Directory.NodeColor.Black:
-            print("black")
+            globals.outputln("black")
         elif entry.NodeColor == Directory.NodeColor.Unknown:
-            print("[unknown color]")
+            globals.outputln("[unknown color]")
 
-        print("linked dir entries: left: %d; right: %d; root: %d"%
+        globals.outputln("linked dir entries: left: %d; right: %d; root: %d"%
               (entry.DirIDLeft, entry.DirIDRight, entry.DirIDRoot))
 
         self.__outputRaw("unique ID",  entry.UniqueID)
@@ -626,12 +626,12 @@ entire file stream.
 
         output("stream info: ")
         if entry.StreamSectorID < 0 or entry.StreamSize == 0:
-            print("[empty stream]")
+            globals.outputln("[empty stream]")
         else:
             strmLoc = "SAT"
             if entry.StreamLocation == StreamLocation.SSAT:
                 strmLoc = "SSAT"
-            print("(first sector ID: %d; size: %d; location: %s)"%
+            globals.outputln("(first sector ID: %d; size: %d; location: %s)"%
                   (entry.StreamSectorID, entry.StreamSize, strmLoc))
 
             satObj = None
@@ -644,8 +644,8 @@ entire file stream.
                 secSize = self.header.getShortSectorSize()
             if satObj != None:
                 chain = satObj.getSectorIDChain(entry.StreamSectorID)
-                print("sector count: %d"%len(chain))
-                print("total sector size: %d"%(len(chain)*secSize))
+                globals.outputln("sector count: %d"%len(chain))
+                globals.outputln("total sector size: %d"%(len(chain)*secSize))
                 if self.params.showSectorChain:
                     self.__outputSectorChain(chain)
 
@@ -657,7 +657,7 @@ entire file stream.
             frag = "%d, "%id
             fragLen = len(frag)
             if lineLen + fragLen > 68:
-                print(line)
+                globals.outputln(line)
                 line = frag
                 lineLen = fragLen
             else:
@@ -667,7 +667,7 @@ entire file stream.
             line = line[:-2]
             lineLen -= 2
         if lineLen > 0:
-            print(line)
+            globals.outputln(line)
 
 
     def __outputRaw (self, name, bytes):
@@ -677,7 +677,7 @@ entire file stream.
         output("%s: "%name)
         for byte in bytes:
             output("%2.2X "%ord(byte))
-        print("")
+        globals.outputln("")
 
     def getDirectoryEntries (self):
         return self.entries
@@ -864,16 +864,16 @@ class OleContainer:
         dateInfo = self.__getModifiedTime( treeNode.Entry )
 
         if len( treeNode.HierachicalName ) > 0 :
-            print '{0:8d}  {1:0<2d}-{2:0<2d}-{3:0<2d} {4:0<2d}:{5:0<2d}   {6}'.format(treeNode.Entry.StreamSize, dateInfo.day, dateInfo.month, dateInfo.year, dateInfo.hour, dateInfo.second, treeNode.HierachicalName )
+            globals.outputln('{0:8d}  {1:0<2d}-{2:0<2d}-{3:0<2d} {4:0<2d}:{5:0<2d}   {6}'.format(treeNode.Entry.StreamSize, dateInfo.day, dateInfo.month, dateInfo.year, dateInfo.hour, dateInfo.second, treeNode.HierachicalName ))
 
         for node in treeNode.Nodes:
             # ignore the root
             self.__printListReport( node )
 
     def __printHeader(self):
-        print ("OLE: %s")%self.filePath
-        print (" Length     Date   Time    Name")
-        print ("--------    ----   ----    ----")
+        globals.outputln("OLE: %s")%self.filePath
+        globals.outputln(" Length     Date   Time    Name")
+        globals.outputln("--------    ----   ----    ----")
 
     def list(self):
         # need to share the inititialisation and parse stuff between the different options
@@ -907,7 +907,7 @@ class OleContainer:
             file.write( bytes )
             file.close
         else:
-            print("failed to initialise ole container")
+            globals.outputln("failed to initialise ole container")
 
     def read(self):
         self.__parseFile()
diff --git a/msodumper/olestream.py b/msodumper/olestream.py
index 928dca9..9148d70 100644
--- a/msodumper/olestream.py
+++ b/msodumper/olestream.py
@@ -15,12 +15,12 @@ class MonikerStream(object):
         self.strm = globals.ByteStream(bytes)
 
     def read (self):
-        print ("moniker size: %d"%(len(self.strm.bytes)-16))
+        globals.outputln("moniker size: %d"%(len(self.strm.bytes)-16))
         clsID = self.strm.readBytes(16)
-        print ("CLS ID: %s"%globals.getRawBytes(clsID, True, False))
-        print ("stream data (implemention specific):")
+        globals.outputln("CLS ID: %s"%globals.getRawBytes(clsID, True, False))
+        globals.outputln("stream data (implemention specific):")
         globals.dumpBytes(self.strm.readRemainingBytes())
-        print ("")
+        globals.outputln("")
 
 class OLEStream(object):
 
@@ -29,13 +29,13 @@ class OLEStream(object):
 
     def read (self):
         ver = self.strm.readUnsignedInt(4)
-        print ("version: 0x%8.8X"%ver)
+        globals.outputln("version: 0x%8.8X"%ver)
         flags = self.strm.readUnsignedInt(4)
-        print ("flags: %d"%flags)
+        globals.outputln("flags: %d"%flags)
         linkUpdateOption = self.strm.readUnsignedInt(4)
-        print ("link update option: %d"%linkUpdateOption)
+        globals.outputln("link update option: %d"%linkUpdateOption)
         reserved = self.strm.readUnsignedInt(4)
-        print ("")
+        globals.outputln("")
 
         # Reserved moniker (must be ignored)
         monikerSize = self.strm.readUnsignedInt(4)
@@ -56,9 +56,9 @@ class OLEStream(object):
             strm.read()
 
         clsIDIndicator = self.strm.readSignedInt(4)
-        print ("cls ID indicator: %d"%clsIDIndicator)
+        globals.outputln("cls ID indicator: %d"%clsIDIndicator)
         clsID = self.strm.readBytes(16)
-        print ("CLS ID: %s"%globals.getRawBytes(clsID, True, False))
+        globals.outputln("CLS ID: %s"%globals.getRawBytes(clsID, True, False))
 #       globals.dumpBytes(self.strm.bytes, 512)
 
 class CompObjStream(object):
@@ -71,7 +71,7 @@ class CompObjStream(object):
         reserved = self.strm.readBytes(4)
         ver = self.strm.readUnsignedInt(4)
         reserved = self.strm.readBytes(20)
-        print ("version: 0x%4.4X"%ver)
+        globals.outputln("version: 0x%4.4X"%ver)
 
         # LengthPrefixedAnsiString
         length = self.strm.readUnsignedInt(4)
@@ -80,7 +80,7 @@ class CompObjStream(object):
             # must be null-terminated.
             raise CompObjStreamError()
 
-        print ("display name: " + displayName[:-1])
+        globals.outputln("display name: " + displayName[:-1])
 
         # ClipboardFormatOrAnsiString
         marker = self.strm.readUnsignedInt(4)
@@ -89,13 +89,13 @@ class CompObjStream(object):
             pass
         elif marker == 0xFFFFFFFF or marker == 0xFFFFFFFE:
             clipFormatID = self.strm.readUnsignedInt(4)
-            print ("clipboard format ID: %d"%clipFormatID)
+            globals.outputln("clipboard format ID: %d"%clipFormatID)
         else:
             clipName = self.strm.readBytes(marker)
             if ord(clipName[-1]) != 0x00:
                 # must be null-terminated.
                 raise CompObjStreamError()
-            print ("clipboard format name: %s"%clipName[:-1])
+            globals.outputln("clipboard format name: %s"%clipName[:-1])
 
         # LengthPrefixedAnsiString
         length = self.strm.readUnsignedInt(4)
@@ -108,7 +108,7 @@ class CompObjStream(object):
             # must be null-terminated.
             raise CompObjStreamError()
 
-        print ("reserved name : %s"%reserved[:-1])
+        globals.outputln("reserved name : %s"%reserved[:-1])
         unicodeMarker = self.strm.readUnsignedInt(4)
         if unicodeMarker != 0x71B239F4:
             raise CompObjStreamError()
@@ -117,7 +117,7 @@ class CompObjStream(object):
         length = self.strm.readUnsignedInt(4)
         if length > 0:
             s = globals.getUTF8FromUTF16(self.strm.readBytes(length*2))
-            print ("display name (unicode): %s"%s)
+            globals.outputln("display name (unicode): %s"%s)
 
         # ClipboardFormatOrAnsiString
         marker = self.strm.readUnsignedInt(4)
@@ -126,13 +126,13 @@ class CompObjStream(object):
             pass
         elif marker == 0xFFFFFFFF or marker == 0xFFFFFFFE:
             clipFormatID = self.strm.readUnsignedInt(4)
-            print ("clipboard format ID: %d"%clipFormatID)
+            globals.outputln("clipboard format ID: %d"%clipFormatID)
         else:
             clipName = globals.getUTF8FromUTF16(self.strm.readBytes(marker*2))
             if ord(clipName[-1]) != 0x00:
                 # must be null-terminated.
                 raise CompObjStreamError()
-            print ("clipboard format name: %s"%clipName[:-1])
+            globals.outputln("clipboard format name: %s"%clipName[:-1])
 
 class PropertySetStream(object):
 
@@ -144,59 +144,59 @@ class PropertySetStream(object):
         if byteorder != 0xFFFE:
             raise PropertySetStreamError()
         ver = self.strm.readUnsignedInt(2)
-        print ("version: 0x%4.4X"%ver)
+        globals.outputln("version: 0x%4.4X"%ver)
         sID = self.strm.readUnsignedInt(4)
-        print ("system identifier: 0x%4.4X"%sID)
+        globals.outputln("system identifier: 0x%4.4X"%sID)
         clsID = self.strm.readBytes(16)
-        print ("CLS ID: %s"%globals.getRawBytes(clsID, True, False))
+        globals.outputln("CLS ID: %s"%globals.getRawBytes(clsID, True, False))
         sets = self.strm.readUnsignedInt(4)
-        print ("number of property sets: 0x%4.4X"%sets)
+        globals.outputln("number of property sets: 0x%4.4X"%sets)
         fmtID0 = self.strm.readBytes(16)
-        print ("FMT ID 0: %s"%globals.getRawBytes(fmtID0, True, False))
+        globals.outputln("FMT ID 0: %s"%globals.getRawBytes(fmtID0, True, False))
         offset0 = self.strm.readUnsignedInt(4)
-        print ("offset 0: 0x%4.4X"%offset0)
+        globals.outputln("offset 0: 0x%4.4X"%offset0)
         if sets > 1:
             fmtID1 = self.strm.readBytes(16)
-            print ("FMT ID 1: %s"%globals.getRawBytes(fmtID0, True, False))
+            globals.outputln("FMT ID 1: %s"%globals.getRawBytes(fmtID0, True, False))
             offset1 = self.strm.readUnsignedInt(4)
-            print ("offset 1: 0x%4.4X\n"%offset1)
+            globals.outputln("offset 1: 0x%4.4X\n"%offset1)
         self.readSet(offset0)
         if sets > 1:
             self.strm.setCurrentPos(offset1);
             self.readSet(offset1)
 
     def readSet (self, setOffset):
-        print ("-----------------------------")
-        print ("Property set")
-        print ("-----------------------------")
+        globals.outputln("-----------------------------")
+        globals.outputln("Property set")
+        globals.outputln("-----------------------------")
         size = self.strm.readUnsignedInt(4)
-        print ("size: 0x%4.4X"%size)
+        globals.outputln("size: 0x%4.4X"%size)
         props = self.strm.readUnsignedInt(4)
-        print ("number of properties: 0x%4.4X"%props)
+        globals.outputln("number of properties: 0x%4.4X"%props)
         pos = 0
         while pos < props:
             self.strm.setCurrentPos(setOffset + 8 + pos*8);
             id = self.strm.readUnsignedInt(4)
             offset = self.strm.readUnsignedInt(4)
-            print ("ID: 0x%4.4X offset: 0x%4.4X"%(id, offset))
+            globals.outputln("ID: 0x%4.4X offset: 0x%4.4X"%(id, offset))
             self.strm.setCurrentPos(setOffset + offset);
             type = self.strm.readUnsignedInt(2)
             padding = self.strm.readUnsignedInt(2)
             if padding != 0:
                 raise PropertySetStreamError()
-            print ("type: 0x%4.4X"%type)
+            globals.outputln("type: 0x%4.4X"%type)
             if type == 2:
                 value = self.strm.readSignedInt(2)
-                print ("VT_I2: %d"%value)
+                globals.outputln("VT_I2: %d"%value)
             elif type == 0x41:
                 blobSize = self.strm.readUnsignedInt(4)
-                print ("VT_BLOB size: 0x%4.4X"%blobSize)
-                print ("------------------------------------------------------------------------")
+                globals.outputln("VT_BLOB size: 0x%4.4X"%blobSize)
+                globals.outputln("------------------------------------------------------------------------")
                 globals.dumpBytes(self.strm.bytes[self.strm.pos:self.strm.pos+blobSize], blobSize)
-                print ("------------------------------------------------------------------------")
+                globals.outputln("------------------------------------------------------------------------")
             else:
-                print ("unknown type")
+                globals.outputln("unknown type")
             pos += 1
-        print ("")
+        globals.outputln("")
 
 # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
diff --git a/msodumper/pptrecord.py b/msodumper/pptrecord.py
index 51bb160..79bc059 100644
--- a/msodumper/pptrecord.py
+++ b/msodumper/pptrecord.py
@@ -34,9 +34,9 @@ append a line to be displayed.
 
     def __print (self, text):
         try:
-            print(self.prefix + text)
+            globals.outputln(self.prefix + text)
         except UnicodeEncodeError:
-            print(self.prefix + "<%d invalid chars>"%len(text))
+            globals.outputln(self.prefix + "<%d invalid chars>"%len(text))
 
     def output (self):
         self.parseBytes()
diff --git a/msodumper/pptstream.py b/msodumper/pptstream.py
index d1ad3f1..d5ed635 100644
--- a/msodumper/pptstream.py
+++ b/msodumper/pptstream.py
@@ -24,15 +24,15 @@ class PPTFile(object):
 
 
     def __printSep (self, c='-', w=68, prefix=''):
-        print(prefix + c*w)
+        globals.outputln(prefix + c*w)
 
 
     def printStreamInfo (self):
         self.__printSep('=', 68)
-        print("PPT File Format Dumper by Kohei Yoshida & Thorsten Behrens")
-        print("  total stream size: %d bytes"%self.size)
+        globals.outputln("PPT File Format Dumper by Kohei Yoshida & Thorsten Behrens")
+        globals.outputln("  total stream size: %d bytes"%self.size)
         self.__printSep('=', 68)
-        print('')
+        globals.outputln('')
 
 
     def printHeader (self):
@@ -94,7 +94,7 @@ class PPTDirStream(object):
 
 
     def __print (self, text):
-        print(self.prefix + text)
+        globals.outputln(self.prefix + text)
 
 
     def __printSep (self, c='-', w=68, prefix=''):
@@ -105,7 +105,7 @@ class PPTDirStream(object):
         try:
             # read until data is exhausted (min record size: 8 bytes)
             while self.pos+8 < self.size:
-                print("")
+                globals.outputln("")
                 self.readRecord()
             return True
         except EndOfStream:
@@ -131,9 +131,9 @@ class PPTDirStream(object):
                 output(self.prefix + "%4.4Xh: "%recordType)
             output("%2.2X "%ord(bytes[i]))
             if (i+1) % 16 == 0 and i != size-1:
-                print("")
+                globals.outputln("")
         if size > 0:
-            print("")
+            globals.outputln("")
             self.__printSep('-', 61, "%4.4Xh: "%recordType)
 
 
@@ -164,13 +164,13 @@ class PPTDirStream(object):
                 self.handlePPT10BinaryTags(bytes,recordInfo)
         elif recordInfo is not None:
             handler = recordInfo[1](recordType, recordInstance, size, bytes, self.properties, self.prefix)
-            print("")
+            globals.outputln("")
             # call special record handler, if any
             if handler is not None:
                 handler.output()
             self.printRecordDump(bytes, recordType)
         elif size > 0:
-            print("")
+            globals.outputln("")
             self.printRecordDump(bytes, recordType)
 
     def checkPPT10BinaryTag (recordType, recordInstance, size, bytes, properties, prefix):
diff --git a/ppt-dump.py b/ppt-dump.py
index f3c64c2..2148ca8 100755
--- a/ppt-dump.py
+++ b/ppt-dump.py
@@ -27,10 +27,10 @@ class PPTDumper(object):
 
     def __printDirHeader (self, dirname, byteLen):
         dirname = globals.encodeName(dirname)
-        print("")
-        print("="*68)
-        print("%s (size: %d bytes)"%(dirname, byteLen))
-        print("-"*68)
+        globals.outputln("")
+        globals.outputln("="*68)
+        globals.outputln("%s (size: %d bytes)"%(dirname, byteLen))
+        globals.outputln("-"*68)
 
     def dump (self):
         file = open(self.filepath, 'rb')
commit d46b85c396e0a7c7d355fe73010ca39c25f92d49
Author: Thorsten Behrens <tbehrens at suse.com>
Date:   Mon Dec 2 02:04:16 2013 +0100

    Move codebase to MPL.
    
    SUSE-contributed code is MPLv2, other patch contributors have
    consented to new license.
    
    Signed-off-by: Kohei Yoshida <kohei.yoshida at gmail.com>
    Signed-off-by: Miklos Vajna <vmiklos at suse.cz>
    Signed-off-by: Noel Power <noel.power at suse.com>

diff --git a/compress.py b/compress.py
index 72a0ad8..f23e62f 100755
--- a/compress.py
+++ b/compress.py
@@ -1,30 +1,9 @@
 #!/usr/bin/env python2
-########################################################################
 #
-#  Copyright (c) 2013 Noel Power
-#  
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#  
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#  
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #
-########################################################################
 
 import sys, os.path, optparse
 
@@ -46,3 +25,5 @@ def main():
 
 if __name__ == '__main__':
     main()
+
+# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
diff --git a/decompress.py b/decompress.py
index 5b4b0fb..cb2264c 100755
--- a/decompress.py
+++ b/decompress.py
@@ -1,30 +1,9 @@
 #!/usr/bin/env python2
-########################################################################
 #
-#  Copyright (c) 2013 Noel Power
-#  
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#  
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#  
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #
-########################################################################
 
 import sys, os.path, optparse
 
@@ -46,3 +25,5 @@ def main():
 
 if __name__ == '__main__':
     main()
+
+# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
diff --git a/msodumper/formula.py b/msodumper/formula.py
index 4faeb01..9c16b56 100644
--- a/msodumper/formula.py
+++ b/msodumper/formula.py
@@ -1,29 +1,9 @@
-########################################################################
+# -*- tab-width: 4; indent-tabs-mode: nil -*-
 #
-#  Copyright (c) 2010-2013 Kohei Yoshida
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
 
 import struct, sys
 import globals
@@ -725,3 +705,5 @@ associated token classes will be without the leading underscore (_)."""
 
     def getTokens (self):
         return self.tokens
+
+# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
diff --git a/msodumper/globals.py b/msodumper/globals.py
index a3fa6a1..d19feaf 100644
--- a/msodumper/globals.py
+++ b/msodumper/globals.py
@@ -1,29 +1,9 @@
-########################################################################
+# -*- tab-width: 4; indent-tabs-mode: nil -*-
 #
-#  Copyright (c) 2010 Kohei Yoshida
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
 
 import sys, struct, math, zipfile, xmlpp, StringIO
 
@@ -477,3 +457,5 @@ def stringizeColorRef(colorRef, colorName="color"):
         return "%s = colorschemecolor(%d)"%(colorName, colorValue)
     else:
         return "%s = <unidentified color>(%4.4Xh)"%(colorName, colorValue)
+
+# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
diff --git a/msodumper/msocrypto.py b/msodumper/msocrypto.py
index 541dd75..5516fb2 100644
--- a/msodumper/msocrypto.py
+++ b/msodumper/msocrypto.py
@@ -1,10 +1,9 @@
-########################################################################
+# -*- tab-width: 4; indent-tabs-mode: nil -*-
 #
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #
-########################################################################
 
 import globals
 
@@ -75,3 +74,5 @@ class EncryptionInfo(object):
 
     def outputAgile (self):
         print (self.bytes)
+
+# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
diff --git a/msodumper/msodraw.py b/msodumper/msodraw.py
index 5c40562..0b4f99a 100644
--- a/msodumper/msodraw.py
+++ b/msodumper/msodraw.py
@@ -1,29 +1,9 @@
-########################################################################
+# -*- tab-width: 4; indent-tabs-mode: nil -*-
 #
-#  Copyright (c) 2010 Kohei Yoshida
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
 
 import globals, xlsmodel
 import sys
diff --git a/msodumper/node.py b/msodumper/node.py
index e139122..c376ac6 100644
--- a/msodumper/node.py
+++ b/msodumper/node.py
@@ -1,29 +1,9 @@
-########################################################################
+# -*- tab-width: 4; indent-tabs-mode: nil -*-
 #
-#  Copyright (c) 2010 Kohei Yoshida
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
 
 # This file (node.py) gets copied in several of my projects.  Find out a way
 # to avoid making duplicate copies in each of my projects.
@@ -217,3 +197,5 @@ def printNode (fd, node, level, breakLine):
         content = encodeString(content)
         if len(content) > 0:
             fd.write (indent + content + lf)
+
+# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
diff --git a/msodumper/ole.py b/msodumper/ole.py
index 736c6e9..523de80 100644
--- a/msodumper/ole.py
+++ b/msodumper/ole.py
@@ -1,29 +1,9 @@
-########################################################################
+# -*- tab-width: 4; indent-tabs-mode: nil -*-
 #
-#  Copyright (c) 2010 Kohei Yoshida
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
 
 import sys
 import globals
@@ -936,3 +916,4 @@ class OleContainer:
         self.__parseFile()
         return self.rootNode
 
+# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
diff --git a/msodumper/olestream.py b/msodumper/olestream.py
index ef458ad..928dca9 100644
--- a/msodumper/olestream.py
+++ b/msodumper/olestream.py
@@ -1,29 +1,9 @@
-########################################################################
+# -*- tab-width: 4; indent-tabs-mode: nil -*-
 #
-#  Copyright (c) 2011 Kohei Yoshida
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
 
 import sys
 import globals
@@ -218,3 +198,5 @@ class PropertySetStream(object):
                 print ("unknown type")
             pos += 1
         print ("")
+
+# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
diff --git a/msodumper/oletool.py b/msodumper/oletool.py
index 5236902..dfab178 100755
--- a/msodumper/oletool.py
+++ b/msodumper/oletool.py
@@ -1,30 +1,9 @@
 #!/usr/bin/env python2
-########################################################################
 #
-#  Copyright (c) 2013 Noel Power
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
 
 import sys, os.path, optparse
 sys.path.append(sys.path[0]+"/src")
@@ -63,3 +42,5 @@ def main ():
 
 if __name__ == '__main__':
     main()
+
+# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
diff --git a/msodumper/vbahelper.py b/msodumper/vbahelper.py
index c032d20..f36b736 100644
--- a/msodumper/vbahelper.py
+++ b/msodumper/vbahelper.py
@@ -1,29 +1,9 @@
-########################################################################
+# -*- tab-width: 4; indent-tabs-mode: nil -*-
 #
-#  Copyright (c) 2013 Noel Power
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
 
 import  sys, struct
 
@@ -278,3 +258,4 @@ class CompressedVBAStream(VBAStreamBase):
 
         return None
 
+# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
diff --git a/ppt-dump.py b/ppt-dump.py
index b7bcc32..f3c64c2 100755
--- a/ppt-dump.py
+++ b/ppt-dump.py
@@ -1,30 +1,9 @@
 #!/usr/bin/env python2
-########################################################################
 #
-#  Copyright (c) 2010 Kohei Yoshida, Thorsten Behrens
-#  
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#  
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#  
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #
-########################################################################
 
 import sys, os.path, getopt
 from msodumper import ole, pptstream, globals, olestream
@@ -120,3 +99,5 @@ def main (args):
 
 if __name__ == '__main__':
     main(sys.argv)
+
+# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
diff --git a/vbadump.py b/vbadump.py
index 2c930ec..439ca36 100755
--- a/vbadump.py
+++ b/vbadump.py
@@ -1,30 +1,9 @@
 #!/usr/bin/env python2
-########################################################################
 #
-#  Copyright (c) 2013 Noel Power
-#  
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#  
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#  
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #
-########################################################################
 
 import sys, os.path, optparse, math
 
@@ -553,3 +532,5 @@ def main():
 
 if __name__ == '__main__':
     main()
+
+# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab: