[Libreoffice-commits] mso-dumper.git: 3 commits - src/docdirstream.py src/docrecord.py src/docstream.py src/formula.py src/globals.py src/msodraw.py src/node.py src/ole.py src/olestream.py src/oletool.py src/pptrecord.py src/vbahelper.py src/xlsmodel.py src/xlsparser.py src/xmlpp.py

Thorsten Behrens thorsten at kemper.freedesktop.org
Sun Nov 24 14:20:08 PST 2013


 src/docdirstream.py |    2 
 src/docrecord.py    |   10 +-
 src/docstream.py    |   60 ++++++------
 src/formula.py      |    8 -
 src/globals.py      |   46 ++++-----
 src/msodraw.py      |   10 +-
 src/node.py         |   14 +-
 src/ole.py          |   60 ++++++------
 src/olestream.py    |    6 -
 src/oletool.py      |   12 +-
 src/pptrecord.py    |   22 ++--
 src/vbahelper.py    |   34 +++---
 src/xlsmodel.py     |   12 +-
 src/xlsparser.py    |  256 +++++++++++++++++++++++++---------------------------
 src/xmlpp.py        |   52 +++++-----
 15 files changed, 299 insertions(+), 305 deletions(-)

New commits:
commit 74619c34ce4e0db46f068136b907b6aa0a57b2cf
Author: Thorsten Behrens <thb at documentfoundation.org>
Date:   Sun Nov 24 23:14:57 2013 +0100

    Cleanup trailing whitespace.

diff --git a/src/docdirstream.py b/src/docdirstream.py
index e73c935..060a520 100644
--- a/src/docdirstream.py
+++ b/src/docdirstream.py
@@ -20,7 +20,7 @@ class DOCDirStream:
         self.name = name
         self.mainStream = mainStream
         self.doc = doc
-    
+
     def printAndSet(self, key, value, hexdump = True, end = True, offset = False, silent = False, dict = None, default = None):
         setattr(self, key, value)
         if silent:
diff --git a/src/docrecord.py b/src/docrecord.py
index a0d3ec3..a6e63c4 100644
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -257,7 +257,7 @@ class Sepx(DOCDirStream):
             prl.dump()
             pos += prl.getSize()
         print '</sepx>'
-    
+
 class Sed(DOCDirStream):
     """The Sed structure specifies the location of the section properties."""
     size = 12 # defined by 2.8.26
@@ -665,7 +665,7 @@ class SPgbPropOperand(DOCDirStream):
     def __init__(self, parent):
         DOCDirStream.__init__(self, parent.bytes)
         self.pos = parent.pos
-    
+
     def dump(self):
         print '<sPgbPropOperand type="SPgbPropOperand" offset="%d">' % self.pos
         buf = self.readuInt8()
@@ -1318,7 +1318,7 @@ class Chpx(DOCDirStream):
         for prl in self.prls:
             prl.dump()
         print '</chpx>'
-    
+
 class PapxInFkp(DOCDirStream):
     """The PapxInFkp structure specifies a set of text properties."""
     def __init__(self, bytes, mainStream, offset):
@@ -1335,7 +1335,7 @@ class PapxInFkp(DOCDirStream):
             grpPrlAndIstd = GrpPrlAndIstd(self.bytes, self.pos, self.cb)
         grpPrlAndIstd.dump()
         print '</papxInFkp>'
-    
+
 class BxPap(DOCDirStream):
     """The BxPap structure specifies the offset of a PapxInFkp in PapxFkp."""
     size = 13 # in bytes, see 2.9.23
@@ -2742,7 +2742,7 @@ class StshiLsd(DOCDirStream):
         DOCDirStream.__init__(self, bytes)
         self.stshi = stshi
         self.pos = offset
-    
+
     def dump(self):
         print '<stshiLsd type="StshiLsd" offset="%d">' % (self.pos)
         self.printAndSet("cbLSD", self.readuInt16())
diff --git a/src/docstream.py b/src/docstream.py
index d7afe4c..7a8e995 100644
--- a/src/docstream.py
+++ b/src/docstream.py
@@ -716,36 +716,36 @@ class WordDocumentStream(DOCDirStream):
     def __dumpFibRgFcLcb2000(self):
         self.__dumpFibRgFcLcb97()
         fields = [
-            "fcPlcfTch", 
-            "lcbPlcfTch", 
-            "fcRmdThreading", 
-            "lcbRmdThreading", 
-            "fcMid", 
-            "lcbMid", 
-            "fcSttbRgtplc", 
-            "lcbSttbRgtplc", 
-            "fcMsoEnvelope", 
-            "lcbMsoEnvelope", 
-            "fcPlcfLad", 
-            "lcbPlcfLad", 
-            "fcRgDofr", 
-            "lcbRgDofr", 
-            "fcPlcosl", 
-            "lcbPlcosl", 
-            "fcPlcfCookieOld", 
-            "lcbPlcfCookieOld", 
-            "fcPgdMotherOld", 
-            "lcbPgdMotherOld", 
-            "fcBkdMotherOld", 
-            "lcbBkdMotherOld", 
-            "fcPgdFtnOld", 
-            "lcbPgdFtnOld", 
-            "fcBkdFtnOld", 
-            "lcbBkdFtnOld", 
-            "fcPgdEdnOld", 
-            "lcbPgdEdnOld", 
-            "fcBkdEdnOld", 
-            "lcbBkdEdnOld", 
+            "fcPlcfTch",
+            "lcbPlcfTch",
+            "fcRmdThreading",
+            "lcbRmdThreading",
+            "fcMid",
+            "lcbMid",
+            "fcSttbRgtplc",
+            "lcbSttbRgtplc",
+            "fcMsoEnvelope",
+            "lcbMsoEnvelope",
+            "fcPlcfLad",
+            "lcbPlcfLad",
+            "fcRgDofr",
+            "lcbRgDofr",
+            "fcPlcosl",
+            "lcbPlcosl",
+            "fcPlcfCookieOld",
+            "lcbPlcfCookieOld",
+            "fcPgdMotherOld",
+            "lcbPgdMotherOld",
+            "fcBkdMotherOld",
+            "lcbBkdMotherOld",
+            "fcPgdFtnOld",
+            "lcbPgdFtnOld",
+            "fcBkdFtnOld",
+            "lcbBkdFtnOld",
+            "fcPgdEdnOld",
+            "lcbPgdEdnOld",
+            "fcBkdEdnOld",
+            "lcbBkdEdnOld",
                 ]
         for i in fields:
             self.printAndSet(i, self.readuInt32())
diff --git a/src/formula.py b/src/formula.py
index 963fb96..4faeb01 100644
--- a/src/formula.py
+++ b/src/formula.py
@@ -1,7 +1,7 @@
 ########################################################################
 #
 #  Copyright (c) 2010-2013 Kohei Yoshida
-#  
+#
 #  Permission is hereby granted, free of charge, to any person
 #  obtaining a copy of this software and associated documentation
 #  files (the "Software"), to deal in the Software without
@@ -10,10 +10,10 @@
 #  copies of the Software, and to permit persons to whom the
 #  Software is furnished to do so, subject to the following
 #  conditions:
-#  
+#
 #  The above copyright notice and this permission notice shall be
 #  included in all copies or substantial portions of the Software.
-#  
+#
 #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 #  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@@ -697,7 +697,7 @@ _tokenMap = {
 class FormulaParser(object):
     """This is a new formula parser that will eventually replace the old one.
 
-Once replaced, I'll change the name to FormulaParser and the names of the 
+Once replaced, I'll change the name to FormulaParser and the names of the
 associated token classes will be without the leading underscore (_)."""
 
 
diff --git a/src/globals.py b/src/globals.py
index 3bcba17..a3fa6a1 100644
--- a/src/globals.py
+++ b/src/globals.py
@@ -1,7 +1,7 @@
 ########################################################################
 #
 #  Copyright (c) 2010 Kohei Yoshida
-#  
+#
 #  Permission is hereby granted, free of charge, to any person
 #  obtaining a copy of this software and associated documentation
 #  files (the "Software"), to deal in the Software without
@@ -10,10 +10,10 @@
 #  copies of the Software, and to permit persons to whom the
 #  Software is furnished to do so, subject to the following
 #  conditions:
-#  
+#
 #  The above copyright notice and this permission notice shall be
 #  included in all copies or substantial portions of the Software.
-#  
+#
 #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 #  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@@ -193,7 +193,7 @@ def getUnicodeRichExtText (bytes):
     phoneticBytes = 0
     if hasPhonetic:
         phoneticBytes = strm.readUnsignedInt(4)
-        
+
     if isDoubleByte:
         # double-byte string (UTF-16)
         text = ''
@@ -221,9 +221,9 @@ def getRichText (bytes, textLen=None):
 Note the following:
 
   * The 1st byte always contains flag.
-  * The actual number of bytes read may differ depending on the values of the 
-    flags, so the client code should pass an open-ended stream of bytes and 
-    always query for the actual bytes read to adjust for the new stream 
+  * The actual number of bytes read may differ depending on the values of the
+    flags, so the client code should pass an open-ended stream of bytes and
+    always query for the actual bytes read to adjust for the new stream
     position when this function returns.
 """
 
@@ -460,13 +460,13 @@ def outputZipContent (bytes, printer, width=80):
             wrapper.flush()
         else:
             dumpBytes(contents)
-            
+
     zipFile.close()
 
 def stringizeColorRef(colorRef, colorName="color"):
     def split (packedColor):
         return ((packedColor & 0xFF0000) // 0x10000, (packedColor & 0xFF00) / 0x100, (packedColor & 0xFF))
-    
+
     colorValue = colorRef & 0xFFFFFF
     if colorRef & 0xFE000000 == 0xFE000000 or colorRef & 0xFF000000 == 0:
         colors = split(colorValue)
diff --git a/src/msodraw.py b/src/msodraw.py
index 7af5809..5c40562 100644
--- a/src/msodraw.py
+++ b/src/msodraw.py
@@ -1,7 +1,7 @@
 ########################################################################
 #
 #  Copyright (c) 2010 Kohei Yoshida
-#  
+#
 #  Permission is hereby granted, free of charge, to any person
 #  obtaining a copy of this software and associated documentation
 #  files (the "Software"), to deal in the Software without
@@ -10,10 +10,10 @@
 #  copies of the Software, and to permit persons to whom the
 #  Software is furnished to do so, subject to the following
 #  conditions:
-#  
+#
 #  The above copyright notice and this permission notice shall be
 #  included in all copies or substantial portions of the Software.
-#  
+#
 #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 #  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@@ -144,7 +144,7 @@ class RecordHeader:
 class ColorRef:
     def __init__ (self, byte):
         self.red   = (byte & 0x000000FF)
-        self.green = (byte & 0x0000FF00) / 256 
+        self.green = (byte & 0x0000FF00) / 256
         self.blue  = (byte & 0x00FF0000) / 65536
         self.flag  = (byte & 0xFF000000) / 16777216
 
@@ -264,7 +264,7 @@ class FDGGBlock:
         self.head = FDGG(strm)
         self.idcls = []
         # NOTE: The spec says head.cidcl stores the number of IDCL's, but each
-        # FDGGBlock only contains bytes enough to store (head.cidcl - 1) of 
+        # FDGGBlock only contains bytes enough to store (head.cidcl - 1) of
         # IDCL's.
         for i in xrange(0, self.head.cidcl-1):
             idcl = IDCL(strm)
diff --git a/src/node.py b/src/node.py
index 777655d..e139122 100644
--- a/src/node.py
+++ b/src/node.py
@@ -1,7 +1,7 @@
 ########################################################################
 #
 #  Copyright (c) 2010 Kohei Yoshida
-#  
+#
 #  Permission is hereby granted, free of charge, to any person
 #  obtaining a copy of this software and associated documentation
 #  files (the "Software"), to deal in the Software without
@@ -10,10 +10,10 @@
 #  copies of the Software, and to permit persons to whom the
 #  Software is furnished to do so, subject to the following
 #  conditions:
-#  
+#
 #  The above copyright notice and this permission notice shall be
 #  included in all copies or substantial portions of the Software.
-#  
+#
 #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 #  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@@ -25,7 +25,7 @@
 #
 ########################################################################
 
-# This file (node.py) gets copied in several of my projects.  Find out a way 
+# This file (node.py) gets copied in several of my projects.  Find out a way
 # to avoid making duplicate copies in each of my projects.
 
 import sys
@@ -33,9 +33,9 @@ import sys
 class NodeType:
     # unknown node type.
     Unknown = 0
-    # the document root - typically has only one child element, but it can 
+    # the document root - typically has only one child element, but it can
     # have multiple children.
-    Root    = 1 
+    Root    = 1
     # node that has name and attributes, and may have child nodes.
     Element = 2
     # node that only has textural content.
@@ -183,7 +183,7 @@ def printNode (fd, node, level, breakLine):
     elif node.nodeType == NodeType.Element:
         hasChildren = len(node.getChildNodes()) > 0
 
-        # We add '<' and '>' (or '/>') after the element content gets 
+        # We add '<' and '>' (or '/>') after the element content gets
         # encoded.
         line = node.name
         if len(node.attrs) > 0:
diff --git a/src/ole.py b/src/ole.py
index cc4c584..736c6e9 100644
--- a/src/ole.py
+++ b/src/ole.py
@@ -1,7 +1,7 @@
 ########################################################################
 #
 #  Copyright (c) 2010 Kohei Yoshida
-#  
+#
 #  Permission is hereby granted, free of charge, to any person
 #  obtaining a copy of this software and associated documentation
 #  files (the "Software"), to deal in the Software without
@@ -10,10 +10,10 @@
 #  copies of the Software, and to permit persons to whom the
 #  Software is furnished to do so, subject to the following
 #  conditions:
-#  
+#
 #  The above copyright notice and this permission notice shall be
 #  included in all copies or substantial portions of the Software.
-#  
+#
 #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 #  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@@ -150,7 +150,7 @@ class Header(object):
         # short sector size (usually 64 bytes)
         print("Short sector size: %d (%d)"%(2**self.secSizeShort, self.secSizeShort))
 
-        # total number of sectors in SAT (equals the number of sector IDs 
+        # total number of sectors in SAT (equals the number of sector IDs
         # stored in the MSAT).
         print("Total number of sectors used in SAT: %d"%self.numSecSAT)
 
@@ -195,7 +195,7 @@ class Header(object):
         # short sector size (usually 64 bytes)
         self.secSizeShort = getSignedInt(self.bytes[32:34])
 
-        # total number of sectors in SAT (equals the number of sector IDs 
+        # total number of sectors in SAT (equals the number of sector IDs
         # stored in the MSAT).
         self.numSecSAT = getSignedInt(self.bytes[44:48])
 
@@ -241,7 +241,7 @@ class Header(object):
                     else:
                         self.MSAT.appendSectorID(id)
 
-        return 512 
+        return 512
 
 
     def getMSAT (self):
@@ -288,9 +288,9 @@ class Header(object):
 class MSAT(object):
     """Master Sector Allocation Table (MSAT)
 
-This class represents the master sector allocation table (MSAT) that stores 
-sector IDs that point to all the sectors that are used by the sector 
-allocation table (SAT).  The actual SAT are to be constructed by combining 
+This class represents the master sector allocation table (MSAT) that stores
+sector IDs that point to all the sectors that are used by the sector
+allocation table (SAT).  The actual SAT are to be constructed by combining
 all the sectors pointed by the sector IDs in order of occurrence.
 """
     def __init__ (self, sectorSize, bytes, params):
@@ -438,12 +438,12 @@ class SAT(object):
 class SSAT(SAT):
     """Short Sector Allocation Table (SSAT)
 
-SSAT contains an array of sector ID chains of all short streams, as oppposed 
+SSAT contains an array of sector ID chains of all short streams, as oppposed
 to SAT which contains an array of sector ID chains of all standard streams.
 The sector IDs included in the SSAT point to the short sectors in the short
 stream container stream.
 
-The first sector ID of SSAT is in the header, and the IDs of the remaining 
+The first sector ID of SSAT is in the header, and the IDs of the remaining
 sectors are contained in the SAT as a sector ID chain.
 """
 
@@ -481,7 +481,7 @@ entire file stream.
         Red = 0
         Black = 1
         Unknown = 99
-        
+
     class Entry:
         def __init__ (self):
             self.Name = ''
@@ -653,7 +653,7 @@ entire file stream.
                 strmLoc = "SSAT"
             print("(first sector ID: %d; size: %d; location: %s)"%
                   (entry.StreamSectorID, entry.StreamSize, strmLoc))
-    
+
             satObj = None
             secSize = 0
             if entry.StreamLocation == StreamLocation.SAT:
@@ -765,7 +765,7 @@ entire file stream.
 class DateTime:
     def __init__(self):
         self.day = 0
-        self.month = 0 
+        self.month = 0
         self.year = 0
         self.hour = 0
         self.second = 0
@@ -787,7 +787,7 @@ class DirNode:
         return self.HierachicalName
 
     def getChildren(self):
-        return self.Nodes  
+        return self.Nodes
 
     def getStream(self):
         return self.OleContainer.getStreamForEntry( self.Entry )
@@ -799,14 +799,14 @@ class OleContainer:
         self.header = None
         self.rootNode = None
         self.params = params
-        
+
     def __getModifiedTime(self, entry):
         # need parse/decode Entry.TimeModified
         # ( although the documentation indicates that it might not be
         # worth it 'cause they are not universally used
         modified  = DateTime
         modified.day = 0
-        modified.month = 0 
+        modified.month = 0
         modified.year = 0
         modified.hour = 0
         modified.second = 0
@@ -816,13 +816,13 @@ class OleContainer:
         if self.rootNode == None:
             file = open(self.filePath, 'rb')
             self.chars = file.read()
-            file.close()    
+            file.close()
             self.header = Header(self.chars, self.params)
             self.header.parse()
             self.obj = self.header.getDirectory()
             self.obj.parseDirEntries()
             count = 0
-            self.rootNode = self.__buildTree( self.obj.entries )   
+            self.rootNode = self.__buildTree( self.obj.entries )
 
     def __addSiblings( self, entries, parent, child ):
         # add left siblings
@@ -834,18 +834,18 @@ class OleContainer:
             if  newEntry.Entry.DirIDRoot > 0:
                 newEntry.HierachicalName = newEntry.HierachicalName + '/'
 
-            self.__addSiblings( entries, parent, newEntry ) 
+            self.__addSiblings( entries, parent, newEntry )
             parent.Nodes.insert( 0, newEntry )
 
         nextRight = child.Entry.DirIDRight
-        # add children to the right 
+        # add children to the right
         if ( nextRight > 0 ):
             newEntry = DirNode( entries[ nextRight ], self )
 #            newEntry.HierachicalName = parent.HierachicalName + globals.encodeName( newEntry.Entry.Name )
             newEntry.HierachicalName = parent.HierachicalName + newEntry.Entry.Name
             if  newEntry.Entry.DirIDRoot > 0:
                 newEntry.HierachicalName = newEntry.HierachicalName + '/'
-            self.__addSiblings( entries, parent, newEntry ) 
+            self.__addSiblings( entries, parent, newEntry )
             parent.Nodes.append( newEntry )
 
     def __buildTreeImpl(self, entries, parent ):
@@ -859,13 +859,13 @@ class OleContainer:
 
             self.__addSiblings( entries, parent, newEntry )
             parent.Nodes.append( newEntry )
-            
+
         for child in parent.Nodes:
             if child.Entry.DirIDRoot > 0:
                 self.__buildTreeImpl( entries, child )
 
     def __buildTree(self, entries ):
-        treeRoot = DirNode( entries[0], self ) 
+        treeRoot = DirNode( entries[0], self )
         self.__buildTreeImpl( entries, treeRoot )
         return treeRoot
 
@@ -876,8 +876,8 @@ class OleContainer:
             for child in node.Nodes:
                 result = self.__findEntryByHierachicalName( child, name )
                 if result != None:
-                    return result 
-        return None 
+                    return result
+        return None
 
     def __printListReport( self, treeNode ):
 
@@ -885,7 +885,7 @@ class OleContainer:
 
         if len( treeNode.HierachicalName ) > 0 :
             print '{0:8d}  {1:0<2d}-{2:0<2d}-{3:0<2d} {4:0<2d}:{5:0<2d}   {6}'.format(treeNode.Entry.StreamSize, dateInfo.day, dateInfo.month, dateInfo.year, dateInfo.hour, dateInfo.second, treeNode.HierachicalName )
-     
+
         for node in treeNode.Nodes:
             # ignore the root
             self.__printListReport( node )
@@ -897,7 +897,7 @@ class OleContainer:
 
     def list(self):
         # need to share the inititialisation and parse stuff between the different options
-       
+
         self.__parseFile()
         if  self.rootNode != None:
             self.__printHeader()
@@ -923,7 +923,7 @@ class OleContainer:
         if  self.rootNode != None:
             entry = self.__findEntryByHierachicalName( self.rootNode, name )
             bytes = self.getStreamForEntry( entry )
-            file = open(entry.Name, 'wb') 
+            file = open(entry.Name, 'wb')
             file.write( bytes )
             file.close
         else:
@@ -934,5 +934,5 @@ class OleContainer:
 
     def getRoot(self):
         self.__parseFile()
-        return self.rootNode 
+        return self.rootNode
 
diff --git a/src/olestream.py b/src/olestream.py
index a622082..ef458ad 100644
--- a/src/olestream.py
+++ b/src/olestream.py
@@ -1,7 +1,7 @@
 ########################################################################
 #
 #  Copyright (c) 2011 Kohei Yoshida
-#  
+#
 #  Permission is hereby granted, free of charge, to any person
 #  obtaining a copy of this software and associated documentation
 #  files (the "Software"), to deal in the Software without
@@ -10,10 +10,10 @@
 #  copies of the Software, and to permit persons to whom the
 #  Software is furnished to do so, subject to the following
 #  conditions:
-#  
+#
 #  The above copyright notice and this permission notice shall be
 #  included in all copies or substantial portions of the Software.
-#  
+#
 #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 #  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
diff --git a/src/oletool.py b/src/oletool.py
index b11d78e..5236902 100755
--- a/src/oletool.py
+++ b/src/oletool.py
@@ -2,7 +2,7 @@
 ########################################################################
 #
 #  Copyright (c) 2013 Noel Power
-#  
+#
 #  Permission is hereby granted, free of charge, to any person
 #  obtaining a copy of this software and associated documentation
 #  files (the "Software"), to deal in the Software without
@@ -11,10 +11,10 @@
 #  copies of the Software, and to permit persons to whom the
 #  Software is furnished to do so, subject to the following
 #  conditions:
-#  
+#
 #  The above copyright notice and this permission notice shall be
 #  included in all copies or substantial portions of the Software.
-#  
+#
 #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 #  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@@ -53,13 +53,13 @@ def main ():
     container = ole.OleContainer( args[ 0 ], params )
 
     if params.list == True:
-        container.list() 
+        container.list()
     if params.extract:
        files = args
        files.pop(0)
-           
+
        for file in files:
-           container.extract( file ) 
+           container.extract( file )
 
 if __name__ == '__main__':
     main()
diff --git a/src/pptrecord.py b/src/pptrecord.py
index f29d590..cbbf32d 100644
--- a/src/pptrecord.py
+++ b/src/pptrecord.py
@@ -41,7 +41,7 @@ append a line to be displayed.
         pass
 
     def __print (self, text):
-        try:    
+        try:
             print(self.prefix + text)
         except UnicodeEncodeError:
             print(self.prefix + "<%d invalid chars>"%len(text))
@@ -105,7 +105,7 @@ append a line to be displayed.
         numer = self.readSignedInt(4)
         denom = self.readSignedInt(4)
         return "%d/%d"%(numer, denom)
-    
+
     def readScaling (self):
         xratio = self.readRatio()
         yratio = self.readRatio()
@@ -504,7 +504,7 @@ class AnimationInfo(BaseRecordHandler):
         orderID = self.readSignedInt(2)
         if orderID == -2:
             self.appendLine("order: follow master slide")
-        else:    
+        else:
             self.appendLine("order: ID %4.4Xh"%orderID)
 
         self.appendLine("num slides to play object: %d"%self.readUnsignedInt(2))
@@ -570,7 +570,7 @@ class AnimNode(BaseRecordHandler):
             self.appendLine("nodeKind: animation")
         elif nodeKind == 24:
             self.appendLine("nodeKind: transitionFilter")
-        else:    
+        else:
             self.appendLine("nodeKind: unknown/ignore")
 
 
@@ -579,17 +579,17 @@ class AnimAttributeValue(BaseRecordHandler):
 
     def handleByte (self):
         self.appendLine("byte value: %2.2Xh"%self.readUnsignedInt(1))
-        
+
     def handleLong (self):
         self.appendLine("long value: %d"%self.readUnsignedInt(4))
-        
+
     def handleFloat (self):
         self.appendLine("float value: %f"%globals.getFloat(self.readBytes(4)))
-        
+
     def handleString (self):
         value = globals.getUTF8FromUTF16(globals.getTextBytes(self.readRemainingBytes()))
         self.appendLine("text value: '%s'"%value)
-    
+
     valueHandlers=[handleByte,handleLong,handleFloat,handleString]
 
     def parseBytes (self):
@@ -653,7 +653,7 @@ class AnimValue(BaseRecordHandler):
     def handleAutoReverse (self):
         if self.readUnsignedInt(4) != 0:
             self.appendLine("autoReverse is on")
-    
+
     valueHandlers=[handleRepeat,handleDefault,handleDefault,handleAccelerate,handleDecelerate,handleAutoReverse]
 
     def parseBytes (self):
diff --git a/src/vbahelper.py b/src/vbahelper.py
index c47e6ef..c032d20 100644
--- a/src/vbahelper.py
+++ b/src/vbahelper.py
@@ -1,7 +1,7 @@
 ########################################################################
 #
 #  Copyright (c) 2013 Noel Power
-#  
+#
 #  Permission is hereby granted, free of charge, to any person
 #  obtaining a copy of this software and associated documentation
 #  files (the "Software"), to deal in the Software without
@@ -10,10 +10,10 @@
 #  copies of the Software, and to permit persons to whom the
 #  Software is furnished to do so, subject to the following
 #  conditions:
-#  
+#
 #  The above copyright notice and this permission notice shall be
 #  included in all copies or substantial portions of the Software.
-#  
+#
 #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 #  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@@ -26,12 +26,13 @@
 ########################################################################
 
 import  sys, struct
+
 class VBAStreamBase:
     CHUNKSIZE = 4096
     def __init__(self, chars, offset):
         self.mnOffset = offset
         self.chars = chars
- 
+
     def copyTokenHelp(self):
         difference = self.DecompressedCurrent - self.DecompressedChunkStart
         bitCount = 0
@@ -70,7 +71,7 @@ class UnCompressedVBAStream(VBAStreamBase):
             padCount = padCount - 1
 
         for index in xrange( 0, padCount ):
-            self.CompressedContainer[ self.CompressedCurrent ] = 0x0;   
+            self.CompressedContainer[ self.CompressedCurrent ] = 0x0;
             self.CompressedCurrent = self.CompressedCurrent + 1
 
     def __matching( self, decompressedEnd ):
@@ -92,7 +93,7 @@ class UnCompressedVBAStream(VBAStreamBase):
             lengthMask, offSetMask, bitCount, maximumLength = self.copyTokenHelp()
             length = bestLength
             if ( maximumLength < bestLength ):
-                length = maximumLength 
+                length = maximumLength
             offset = self.DecompressedCurrent - bestCandidate
         else:
             length = 0
@@ -128,7 +129,7 @@ class UnCompressedVBAStream(VBAStreamBase):
         flagByteIndex = self.CompressedCurrent
         tokenFlags = 0
         self.CompressedCurrent = self.CompressedCurrent + 1
-        for index in xrange(0,8): 
+        for index in xrange(0,8):
             if ( ( self.DecompressedCurrent < decompressedEnd )
                 and (self.CompressedCurrent < compressedEnd) ):
 
@@ -188,7 +189,6 @@ class UnCompressedVBAStream(VBAStreamBase):
         return self.CompressedContainer
 
 class CompressedVBAStream(VBAStreamBase):
-    
     def __decompressRawChunk (self):
         for i in xrange(0,self.CHUNKSIZE):
             self.DecompressedChunk[ self.DecompressedCurrent + i ] =  self.chars[self.CompressedCurrent + i ]
@@ -196,7 +196,7 @@ class CompressedVBAStream(VBAStreamBase):
         self.DecompressedCurrent += self.CHUNKSIZE
 
     def __unPackCopyToken (self, copyToken ):
-       lengthMask, offSetMask, bitCount, maximumLength = self.copyTokenHelp() 
+       lengthMask, offSetMask, bitCount, maximumLength = self.copyTokenHelp()
        length = ( copyToken & lengthMask ) + 3
        temp1 = copyToken & offSetMask
        temp2 = 16 - bitCount
@@ -204,15 +204,14 @@ class CompressedVBAStream(VBAStreamBase):
        return offSet, length
 
     def __byteCopy( self, srcOffSet, dstOffSet, length ):
- 
         destSize = len( self.DecompressedChunk )
         srcCurrent = srcOffSet
-        dstCurrent = dstOffSet 
+        dstCurrent = dstOffSet
         for i in xrange( 0, length ):
             self.DecompressedChunk[ dstCurrent ] = self.DecompressedChunk[ srcCurrent ]
             srcCurrent +=1
             dstCurrent +=1
-                
+
     def __decompressToken (self, index, flagByte):
         flagBit = ( ( flagByte >> index ) & 1 )
         if flagBit == 0:
@@ -220,7 +219,7 @@ class CompressedVBAStream(VBAStreamBase):
             self.CompressedCurrent += 1
             self.DecompressedCurrent += 1
         else:
-            copyToken = struct.unpack_from("<H", self.chars, self.CompressedCurrent)[0] 
+            copyToken = struct.unpack_from("<H", self.chars, self.CompressedCurrent)[0]
             offSet, length = self.__unPackCopyToken( copyToken )
             copySource =  self.DecompressedCurrent - offSet
             self.__byteCopy( copySource, self.DecompressedCurrent, length )
@@ -234,17 +233,16 @@ class CompressedVBAStream(VBAStreamBase):
             for i in xrange(0,8):
                 if  self.CompressedCurrent < self.CompressedEnd:
                     self.__decompressToken(i,flagByte)
- 
+
     def decompressCompressedChunk (self):
-        
         header = struct.unpack_from("<H",self.chars, self.CompressedChunkStart)[0]
         #extract size from header
-        size = header & 0xFFF 
+        size = header & 0xFFF
         size = size + 3
         #extract chunk sig from header
         sigflag = header >> 12
         sig = sigflag & 0x7
-        #extract chunk flag from sig 
+        #extract chunk flag from sig
         compressedChunkFlag = (( sigflag & 0x8 ) ==  0x8)
         self.DecompressedChunk = bytearray(self.CHUNKSIZE);
         self.DecompressedChunkStart = 0
@@ -277,6 +275,6 @@ class CompressedVBAStream(VBAStreamBase):
             return self.DecompressedContainer
         else:
             raise Exception("error decompressing container invalid signature byte %i"%val)
-         
+
         return None
 
diff --git a/src/xlsmodel.py b/src/xlsmodel.py
index 5094ee5..bb4288a 100644
--- a/src/xlsmodel.py
+++ b/src/xlsmodel.py
@@ -1,7 +1,7 @@
 ########################################################################
 #
 #  Copyright (c) 2010 Kohei Yoshida
-#  
+#
 #  Permission is hereby granted, free of charge, to any person
 #  obtaining a copy of this software and associated documentation
 #  files (the "Software"), to deal in the Software without
@@ -10,10 +10,10 @@
 #  copies of the Software, and to permit persons to whom the
 #  Software is furnished to do so, subject to the following
 #  conditions:
-#  
+#
 #  The above copyright notice and this permission notice shall be
 #  included in all copies or substantial portions of the Software.
-#  
+#
 #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 #  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@@ -50,12 +50,12 @@ class Workbook(ModelBase):
         # private members
         self.__sheets = []
 
-    
+
     def appendSheet (self, sheetType):
         def raiseError(cause):
             def errorFunc():
                 raise Exception(cause)
-                
+
         HANDLERS = { 0x0005: WorkbookGlobal,
                      0x0006: raiseError("Unsupported sheet type: Visual Basic module"),
                      0x0010: lambda: Worksheet(len(self.__sheets)),
@@ -429,7 +429,7 @@ class Worksheet(SheetBase):
         elem = baseNode.appendElement('hidden-rows')
         for rowRange in self.__hiddenRows.getAllRanges():
             elem.appendElement('range').setAttr('span', "%d:%d"%(rowRange[0]+1, rowRange[1]+1))
-        
+
     def __appendAutoFilterNode (self, wb, baseNode):
         if len(self.__autoFilterArrows) <= 0:
             # No autofilter in this sheet.
diff --git a/src/xlsparser.py b/src/xlsparser.py
index d1a0f80..2a58285 100644
--- a/src/xlsparser.py
+++ b/src/xlsparser.py
@@ -2,7 +2,7 @@
 ########################################################################
 #
 #  Copyright (c) 2011 Sergey Kishchenko
-#  
+#
 #  Permission is hereby granted, free of charge, to any person
 #  obtaining a copy of this software and associated documentation
 #  files (the "Software"), to deal in the Software without
@@ -11,10 +11,10 @@
 #  copies of the Software, and to permit persons to whom the
 #  Software is furnished to do so, subject to the following
 #  conditions:
-#  
+#
 #  The above copyright notice and this permission notice shall be
 #  included in all copies or substantial portions of the Software.
-#  
+#
 #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 #  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@@ -36,7 +36,7 @@ class TokenStream(object):
     def __init__(self, tokens):
         self.tokens = tokens
         self.currentIndex = 0
-    
+
     def readToken(self):
         if self.currentIndex >= len(self.tokens):
             return None
@@ -58,16 +58,16 @@ class BaseParser(object):
             return "NONIMPL"
         else:
             return str(parser)
-        
+
     def __lshift__(self, other):
         if isinstance(self, Seq):
             self.appendParser(other)
             return self
         else:
             return Seq(self, other)
-        
+
 def safeParse(parser, stream):
-    #print "TRACE:[%s,%s]" % (str(parser), str(stream.tokens[stream.currentIndex]))  
+    #print "TRACE:[%s,%s]" % (str(parser), str(stream.tokens[stream.currentIndex]))
 
     parsed = None
     try:
@@ -89,7 +89,7 @@ def getParsedOrNone(parser, stream):
 class Term(BaseParser):
     def __init__(self, tokenType):
         self.__tokenType = tokenType
-        
+
     def parse(self, stream):
         curIndex = stream.currentIndex
         token = stream.readToken()
@@ -98,7 +98,7 @@ class Term(BaseParser):
         else:
             stream.currentIndex = curIndex
             return None
-        
+
     def __str__(self):
         return 'Term(%s)' % str(self.__tokenType)
 
@@ -108,31 +108,31 @@ class Opt(BaseParser):
 
     def parse(self, stream):
         return getParsedOrNone(self.__parser, stream)
-    
+
     def __str__(self):
         return 'Opt(%s)' % str(self.__parser)
 
 class Req(BaseParser):
     def __init__(self, parser):
         self.__parser = parser
-    
+
     def parse(self, stream):
         parsed = safeParse(self.__parser, stream)
         if parsed is None:
             currentToken = "<<<End Of Token Stream>>>"
             if stream.currentIndex < len(stream.tokens):
                 currentToken = stream.tokens[stream.currentIndex]
-            raise ParseException("%s failed but it is required, next token is [%s]" % 
+            raise ParseException("%s failed but it is required, next token is [%s]" %
                                  (str(self.__parser), str(currentToken)))
         return parsed
-    
+
     def __str__(self):
         return 'Req(%s)' % str(self.__parser)
-            
+
 class AnyButThis(BaseParser):
     def __init__(self, parser):
         self.__parser = parser
-    
+
     def parse(self, stream):
         curIndex = stream.currentIndex
         parsed = getParsedOrNone(self.__parser, stream)
@@ -151,7 +151,7 @@ class Many(BaseParser):
         self.__parser = parser
         self.__min = min
         self.__max = max
-        
+
     def parse(self, stream):
         if self.__min == 0 and self.__max == 0:
             return None
@@ -175,21 +175,21 @@ class Many(BaseParser):
 class OneOf(BaseParser):
     def __init__(self, *args):
         self.__parsers = args
-        
+
     def parse(self, stream):
         for parser in self.__parsers:
             parsed = getParsedOrNone(parser, stream)
             if not parsed is None:
                 return parsed
         raise ParseException("No suitable options: [%s]" % ','.join(str(x) for x in self.__parsers))
-    
+
     def __str__(self):
         return 'OneOf(%s)' % ','.join(str(x) for x in self.__parsers)
 
 class Seq(BaseParser):
     def __init__(self, *args):
         self.__parsers = list(args)
-        
+
     def parse(self, stream):
         parsedList = []
         for parser in self.__parsers:
@@ -197,10 +197,10 @@ class Seq(BaseParser):
             if not parsed is None:
                 parsedList.append(parsed)
         return parsedList
-    
+
     def appendParser(self, parser):
         self.__parsers.append(parser)
-        
+
     def __str__(self):
         return 'Seq(%s)' % ','.join(str(x) for x in self.__parsers)
 
@@ -208,14 +208,14 @@ class Group(BaseParser):
     def __init__(self, name, parser):
         self.__name = name
         self.__parser = parser
-    
+
     def parse(self, stream):
         parsed = self.__parser.parse(stream)
         if not parsed is None:
             return (self.__name, parsed)
         else:
             return None
-    
+
     def __str__(self):
         return 'Group(%s, %s)' % (self.__name, str(self.__parser))
 
@@ -239,14 +239,14 @@ class VCenter(BaseParser):
     PARSER = Term(xlsrecord.VCenter)
 
 class MarginBaseParser(BaseParser): pass
-class LeftMargin(MarginBaseParser): pass        
-class RightMargin(MarginBaseParser): pass        
-class TopMargin(MarginBaseParser): pass        
-class BottomMargin(MarginBaseParser): pass        
-class Pls(BaseParser): 
+class LeftMargin(MarginBaseParser): pass
+class RightMargin(MarginBaseParser): pass
+class TopMargin(MarginBaseParser): pass
+class BottomMargin(MarginBaseParser): pass
+class Pls(BaseParser):
     PARSER = Term(xlsrecord.Pls)
 
-class Continue(BaseParser): pass        
+class Continue(BaseParser): pass
 
 class Setup(BaseParser):
     PARSER = Term(xlsrecord.Setup)
@@ -255,43 +255,43 @@ class PAGESETUP(BaseParser):
     #PAGESETUP = Header Footer HCenter VCenter [LeftMargin] [RightMargin] [TopMargin]
     #[BottomMargin] [Pls *Continue] Setup
     PARSER = Group('page-setup', Req(Header()) << Req(Footer()) << Req(HCenter()) << Req(VCenter()) <<
-                   LeftMargin() << RightMargin() << TopMargin() << BottomMargin() << 
+                   LeftMargin() << RightMargin() << TopMargin() << BottomMargin() <<
                    Seq(Pls(), Many('continues', Continue())) << Setup())
 
 
 class PrintSize(BaseParser):
     PARSER = Term(xlsrecord.PrintSize)
 
-class HeaderFooter(BaseParser): pass        
-class BACKGROUND(BaseParser): pass        
+class HeaderFooter(BaseParser): pass
+class BACKGROUND(BaseParser): pass
 
 class Fbi(BaseParser):
     PARSER = Term(xlsrecord.Fbi)
-    
-class Fbi2(BaseParser): pass        
-class ClrtClient(BaseParser): pass        
+
+class Fbi2(BaseParser): pass
+class ClrtClient(BaseParser): pass
 
 class PROTECTION(BaseParser):
     PARSER = Term(xlsrecord.Protect)
 
-class Palette(BaseParser): pass        
-class SXViewLink(BaseParser): pass        
-class PivotChartBits(BaseParser): pass        
-class SBaseRef(BaseParser): pass        
-class MsoDrawingGroup(BaseParser): pass        
+class Palette(BaseParser): pass
+class SXViewLink(BaseParser): pass
+class PivotChartBits(BaseParser): pass
+class SBaseRef(BaseParser): pass
+class MsoDrawingGroup(BaseParser): pass
 
 
-class MSODRAWING(BaseParser): pass        
-class TEXTOBJECT(BaseParser): pass        
-class OBJ(BaseParser): pass        
-class MsoDrawingSelection(BaseParser): pass        
+class MSODRAWING(BaseParser): pass
+class TEXTOBJECT(BaseParser): pass
+class OBJ(BaseParser): pass
+class MsoDrawingSelection(BaseParser): pass
 
 class OBJECTS(BaseParser):
     #*(MSODRAWING *(TEXTOBJECT / OBJ)) [MsoDrawingSelection]
-    PARSER = Group('objects', Many('drawings', Seq(Req(MSODRAWING()), 
-                                                   Many('obj-list', 
-                                                        OneOf(TEXTOBJECT(), OBJ())))) << 
-                              MsoDrawingSelection()) 
+    PARSER = Group('objects', Many('drawings', Seq(Req(MSODRAWING()),
+                                                   Many('obj-list',
+                                                        OneOf(TEXTOBJECT(), OBJ())))) <<
+                              MsoDrawingSelection())
 
 class Units(BaseParser):
     PARSER = Term(xlsrecord.Units)
@@ -311,7 +311,7 @@ class PlotArea(BaseParser):
 class CrtLink(BaseParser):
     PARSER = Term(xlsrecord.CrtLink)
 
-class FONTLIST(BaseParser): pass        
+class FONTLIST(BaseParser): pass
 
 class Frame(BaseParser):
     PARSER = Term(xlsrecord.Frame)
@@ -321,18 +321,18 @@ class LineFormat(BaseParser):
 
 class AreaFormat(BaseParser):
     PARSER = Term(xlsrecord.AreaFormat)
-    
+
 class PICF(BaseParser): pass # PICF = Begin PicF End
 
 class GelFrame(BaseParser):
     PARSER = Term(xlsrecord.GelFrame)
 
-class GELFRAME(BaseParser): 
+class GELFRAME(BaseParser):
     #GELFRAME = 1*2GelFrame *Continue [PICF]
-    PARSER = Group('gel-frame-root', Many('gel-frame-list', GelFrame(), min=1, max=2) << 
+    PARSER = Group('gel-frame-root', Many('gel-frame-list', GelFrame(), min=1, max=2) <<
                                      Many('continue-list', Continue()) << Opt(PICF()))
 
-class SHAPEPROPS(BaseParser): pass        
+class SHAPEPROPS(BaseParser): pass
 
 class FRAME(BaseParser):
     PARSER = Group('frame', Req(Frame()) << Req(Begin()) << Req(LineFormat()) << Req(AreaFormat()) <<
@@ -357,13 +357,13 @@ class BRAI(BaseParser):
 class AI(BaseParser):
     PARSER = Req(BRAI()) << SeriesText()
 
-class SerParent(BaseParser): pass        
-class SerAuxTrend(BaseParser): pass        
-class SerAuxErrBar(BaseParser): pass        
-class SerToCrt(BaseParser): 
+class SerParent(BaseParser): pass
+class SerAuxTrend(BaseParser): pass
+class SerAuxErrBar(BaseParser): pass
+class SerToCrt(BaseParser):
     PARSER = Term(xlsrecord.SerToCrt)
-    
-class LegendException(BaseParser): pass        
+
+class LegendException(BaseParser): pass
 
 class DataFormat(BaseParser):
     PARSER = Term(xlsrecord.DataFormat)
@@ -371,13 +371,13 @@ class DataFormat(BaseParser):
 class Chart3DBarShape(BaseParser):
     PARSER = Term(xlsrecord.Chart3DBarShape)
 
-class PieFormat(BaseParser): 
+class PieFormat(BaseParser):
     PARSER = Term(xlsrecord.PieFormat)
 
-class SerFmt(BaseParser):  
+class SerFmt(BaseParser):
     PARSER = Term(xlsrecord.SerFmt)
 
-class MarkerFormat(BaseParser): 
+class MarkerFormat(BaseParser):
     PARSER = Term(xlsrecord.MarkerFormat)
 
 class Text(BaseParser):
@@ -391,10 +391,10 @@ class FontX(BaseParser):
 
 class AlRuns(BaseParser): pass
 
-class ObjectLink(BaseParser): 
+class ObjectLink(BaseParser):
     PARSER = Term(xlsrecord.ObjectLink)
-    
-class DataLabExtContents(BaseParser): 
+
+class DataLabExtContents(BaseParser):
     PARSER = Term(xlsrecord.DataLabExtContents)
 
 class CrtLayout12(BaseParser): pass
@@ -414,9 +414,9 @@ class ATTACHEDLABEL(BaseParser):
 class SS(BaseParser):
     #SS = DataFormat Begin [Chart3DBarShape] [LineFormat AreaFormat PieFormat] [SerFmt]
     #[GELFRAME] [MarkerFormat] [AttachedLabel] *2SHAPEPROPS [CRTMLFRT] End
-    PARSER = Group('ss', Seq(Req(DataFormat()), Req(Begin()), Chart3DBarShape(), 
+    PARSER = Group('ss', Seq(Req(DataFormat()), Req(Begin()), Chart3DBarShape(),
                              Opt(Seq(Req(LineFormat()), Req(AreaFormat()), Req(PieFormat()))),
-                             SerFmt(), Opt(GELFRAME()), MarkerFormat(), AttachedLabel(), 
+                             SerFmt(), Opt(GELFRAME()), MarkerFormat(), AttachedLabel(),
                              Many('shape-props-list', SHAPEPROPS(), max=2), CRTMLFRT(),
                              Req(End())))
 
@@ -431,13 +431,13 @@ class SERIESFORMAT(BaseParser):
     #*(LegendException [Begin ATTACHEDLABEL [TEXTPROPS] End]) End
     PARSER = Group('series-fmt', Req(Series()) << Req(Begin()) << Many('ai-list', AI(), min=4, max=4) <<
                 Many('ss-list', SS()) << OneOf(SerToCrt(), Seq(SerParent(), OneOf(SerAuxTrend(), SerAuxErrBar()))) <<
-                Many('legend-exceptions', Group('legend-exception-root', 
-                                                Seq(Req(LegendException()), 
+                Many('legend-exceptions', Group('legend-exception-root',
+                                                Seq(Req(LegendException()),
                                                     Seq(Req(Begin()), Req(ATTACHEDLABEL()), TEXTPROPS(), Req(End()))))) <<
                 EndBlock() << Req(End()))
 
 
-        
+
 class ShtProps(BaseParser):
     PARSER = Term(xlsrecord.CHProperties)
 
@@ -469,7 +469,7 @@ class CatSerRange(BaseParser):
 class AxcExt(BaseParser):
     PARSER = Term(xlsrecord.AxcExt)
 
-class CatLab(BaseParser): 
+class CatLab(BaseParser):
     PARSER = Term(xlsrecord.CatLab)
 
 class IFmtRecord(BaseParser): pass
@@ -491,9 +491,9 @@ class ChartFrtInfo(BaseParser):
 class AXS(BaseParser):
     # AXS = [IFmtRecord] [Tick] [FontX] *4(AxisLine LineFormat) [AreaFormat] [GELFRAME]
     # *4SHAPEPROPS [TextPropsStream *ContinueFrt12]
-    PARSER = Group('axs', IFmtRecord() << Tick() << FontX() << 
+    PARSER = Group('axs', IFmtRecord() << Tick() << FontX() <<
                 Many('axis-lines', Seq(Req(AxisLine()), Req(LineFormat())), max=4) <<
-                AreaFormat() << Opt(GELFRAME()) << Many('shape-props-list', SHAPEPROPS(), max=4) << 
+                AreaFormat() << Opt(GELFRAME()) << Many('shape-props-list', SHAPEPROPS(), max=4) <<
                 Opt(Seq(Req(TextPropsStream()), Many('continue-frt12-list', ContinueFrt12()))))
 
 class IVAXIS(BaseParser):
@@ -501,10 +501,10 @@ class IVAXIS(BaseParser):
     # IVAXIS = Axis Begin [CatSerRange] AxcExt [CatLab] AXS [CRTMLFRT] End
     # it seems it's usual too have several future records indicators just after AxcExt and before the End:
     # IVAXIS = Axis Begin [CatSerRange] AxcExt ([ChartFrtInfo] *StartBlock) [CatLab] AXS [CRTMLFRT] [EndBlock] End
-    
-    PARSER = Group('ivaxis', Req(Axis()) << Req(Begin()) << CatSerRange() << Req(AxcExt()) << 
-                Group('future', Seq(ChartFrtInfo(), 
-                                    Many('start-blocks', StartBlock()))) << CatLab() << 
+
+    PARSER = Group('ivaxis', Req(Axis()) << Req(Begin()) << CatSerRange() << Req(AxcExt()) <<
+                Group('future', Seq(ChartFrtInfo(),
+                                    Many('start-blocks', StartBlock()))) << CatLab() <<
                 Req(AXS()) << Opt(CRTMLFRT()) << EndBlock() << Req(End()))
 
 class ValueRange(BaseParser):
@@ -514,19 +514,19 @@ class AXM(BaseParser): pass
 
 class DVAXIS(BaseParser):
     #DVAXIS = Axis Begin [ValueRange] [AXM] AXS [CRTMLFRT] End
-    PARSER = Group('dvaxis', Req(Axis()) << Req(Begin()) << ValueRange() << AXM() << Req(AXS()) << CRTMLFRT() << 
+    PARSER = Group('dvaxis', Req(Axis()) << Req(Begin()) << ValueRange() << AXM() << Req(AXS()) << CRTMLFRT() <<
                 Req(End()))
 
-class SERIESAXIS(BaseParser): 
+class SERIESAXIS(BaseParser):
     #SERIESAXIS = Axis Begin [CatSerRange] AXS [CRTMLFRT] End
-    PARSER = Group('series-axis', Req(Axis()) << Req(Begin()) << CatSerRange() << 
+    PARSER = Group('series-axis', Req(Axis()) << Req(Begin()) << CatSerRange() <<
                                   Req(AXS()) << Opt(CRTMLFRT()) << Req(End()))
 
 class AXES(BaseParser):
     #AXES = [IVAXIS DVAXIS [SERIESAXIS] / DVAXIS DVAXIS] *3ATTACHEDLABEL [PlotArea FRAME]
     # TODO: recheck it. The rule above leaks some brackets :(
-    PARSER = Group('axes', Seq(OneOf(Seq(Req(IVAXIS()), Req(DVAXIS()), Opt(SERIESAXIS())), 
-                                     Seq(Req(DVAXIS()), Req(DVAXIS()))), 
+    PARSER = Group('axes', Seq(OneOf(Seq(Req(IVAXIS()), Req(DVAXIS()), Opt(SERIESAXIS())),
+                                     Seq(Req(DVAXIS()), Req(DVAXIS()))),
                                Many('attached-labels', ATTACHEDLABEL(), max=3),
                                Opt(Seq(Req(PlotArea()), Req(FRAME())))))
 
@@ -534,7 +534,7 @@ class AXES(BaseParser):
 class ChartFormat(BaseParser):
     PARSER = Term(xlsrecord.ChartFormat)
 
-class BobPop(BaseParser): 
+class BobPop(BaseParser):
     PARSER = Term(xlsrecord.BobPop)
 
 class BobPopCustom(BaseParser): pass
@@ -542,30 +542,30 @@ class BobPopCustom(BaseParser): pass
 class Bar(BaseParser):
     PARSER = Term(xlsrecord.CHBar)
 
-class Line(BaseParser): 
+class Line(BaseParser):
     PARSER = Term(xlsrecord.CHLine)
-    
+
 class Pie(BaseParser):
     PARSER = Term(xlsrecord.CHPie)
-    
+
 class Area(BaseParser):
     PARSER = Term(xlsrecord.CHArea)
-    
-class Scatter(BaseParser): 
+
+class Scatter(BaseParser):
     PARSER = Term(xlsrecord.CHScatter)
-    
-class Radar(BaseParser): 
+
+class Radar(BaseParser):
     PARSER = Term(xlsrecord.CHRadar)
-    
+
 class RadarArea(BaseParser): pass
 
-class Surf(BaseParser): 
+class Surf(BaseParser):
      PARSER = Term(xlsrecord.CHSurf)
 
-class SeriesList(BaseParser):  
+class SeriesList(BaseParser):
      PARSER = Term(xlsrecord.SeriesList)
-     
-class Chart3d(BaseParser): 
+
+class Chart3d(BaseParser):
     PARSER = Term(xlsrecord.Chart3d)
 
 
@@ -574,26 +574,26 @@ class Legend(BaseParser):
 
 class LD(BaseParser):
     #LD = Legend Begin Pos ATTACHEDLABEL [FRAME] [CrtLayout12] [TEXTPROPS] [CRTMLFRT] End
-    PARSER = Group('ld', Req(Legend()) << Req(Begin()) << Req(Pos()) << Req(ATTACHEDLABEL()) << 
+    PARSER = Group('ld', Req(Legend()) << Req(Begin()) << Req(Pos()) << Req(ATTACHEDLABEL()) <<
                 Opt(FRAME()) << CrtLayout12() << TEXTPROPS() << CRTMLFRT() << Req(End()))
 
 class DropBar(BaseParser):
     PARSER = Term(xlsrecord.DropBar)
 
-class DROPBAR(BaseParser): 
+class DROPBAR(BaseParser):
     # DROPBAR = DropBar Begin LineFormat AreaFormat [GELFRAME] [SHAPEPROPS] End
-    PARSER = Group('drop-bar-root', Req(DropBar()) << Req(Begin()) << Req(LineFormat()) << 
-                                    Req(AreaFormat()) << Opt(GELFRAME()) << Opt(SHAPEPROPS()) << 
+    PARSER = Group('drop-bar-root', Req(DropBar()) << Req(Begin()) << Req(LineFormat()) <<
+                                    Req(AreaFormat()) << Opt(GELFRAME()) << Opt(SHAPEPROPS()) <<
                                     Req(End()))
-class CrtLine(BaseParser): 
+class CrtLine(BaseParser):
     PARSER = Term(xlsrecord.CrtLine)
-    
+
 class CrtLayout12A(BaseParser): pass
 
-class Dat(BaseParser): 
+class Dat(BaseParser):
     PARSER = Term(xlsrecord.Dat)
 
-class DAT(BaseParser): 
+class DAT(BaseParser):
     #DAT = Dat Begin LD End
     PARSER = Group('dat-root', Req(Dat()) << Req(Begin()) << Req(LD()) << Req(End()))
 
@@ -605,15 +605,15 @@ class CRT(BaseParser):
     #*2DFTTEXT [DataLabExtContents] [SS] *4SHAPEPROPS End
     # It seems there are optional StartBlock and EndBlock on the last line:
     #*2DFTTEXT [StartBlock] [DataLabExtContents]  [SS] *4SHAPEPROPS [EndBlock] End
-    
-    
+
+
     PARSER = Group('crt', Req(ChartFormat()) << Req(Begin()) << OneOf(Bar(), Line(), Opt(Seq(Req(BobPop()), BobPopCustom())),
                                                                   Pie(), Area(), Scatter(), Radar(),
                                                                   RadarArea(), Surf()) <<
-                Req(CrtLink()) << SeriesList() << Chart3d() << Opt(LD()) << Many('drop-bars', DROPBAR(), max=2) << 
-                Many('crt-lines', Seq(Req(CrtLine()), 
+                Req(CrtLink()) << SeriesList() << Chart3d() << Opt(LD()) << Many('drop-bars', DROPBAR(), max=2) <<
+                Many('crt-lines', Seq(Req(CrtLine()),
                                       Req(LineFormat()))) << Many('dft-texts', DFTTEXT()) <<
-                StartBlock() << DataLabExtContents() << Opt(SS()) << 
+                StartBlock() << DataLabExtContents() << Opt(SS()) <<
                 Many('shape-props-list', SHAPEPROPS(), max=4) << EndBlock() << Req(End()))
 
 class AXISPARENT(BaseParser):
@@ -641,9 +641,9 @@ class CHARTFORMATS(BaseParser):
                 Many('ss-list', SS()) << Req(ShtProps()) << Many('dft-texts', DFTTEXT(), max=2) <<
                 Req(AxesUsed()) << Many('axis-roots', AXISPARENT(), min=1, max=2) <<
                 CrtLayout12A() << Opt(DAT()) << Many('attached-labels', ATTACHEDLABEL()) <<
-                Opt(CRTMLFRT()) << Many('datalab-exts', Seq(Opt(Seq(Req(DataLabExt()), 
-                                                                    Req(StartObject()))), 
-                                                            Req(ATTACHEDLABEL()), 
+                Opt(CRTMLFRT()) << Many('datalab-exts', Seq(Opt(Seq(Req(DataLabExt()),
+                                                                    Req(StartObject()))),
+                                                            Req(ATTACHEDLABEL()),
                                                             EndObject())) <<
                 Opt(TEXTPROPS()) << Many('crtmlfrt-list', CRTMLFRT()) << EndBlock() << Req(End()))
 
@@ -665,11 +665,11 @@ class Label(BaseParser):
 
 class SERIESDATA(BaseParser):
     #SERIESDATA = Dimensions 3(SIIndex *(Number / BoolErr / Blank / Label))
-    PARSER = Group('series-data', Req(Dimensions()) << Many('si-index-list', 
-                                                          Seq(Req(SIIndex()), 
-                                                              Many('values', 
-                                                                   OneOf(Number(), BoolErr(), 
-                                                                         Blank(), Label()))), 
+    PARSER = Group('series-data', Req(Dimensions()) << Many('si-index-list',
+                                                          Seq(Req(SIIndex()),
+                                                              Many('values',
+                                                                   OneOf(Number(), BoolErr(),
+                                                                         Blank(), Label()))),
                                                           min=3, max=3))
 
 class CodeName(BaseParser): pass
@@ -690,15 +690,15 @@ class CHARTSHEETCONTENT(BaseParser):
     PARSER = Group('chart', WriteProtect() << SheetExt() << WebPub() << Many('hf-pictures', HFPicture()) <<
               Req(PAGESETUP()) << Req(PrintSize()) << HeaderFooter() << BACKGROUND() <<
               Many('fbi-list', Fbi()) << Many('fbi2-list', Fbi2()) <<
-              ClrtClient() << PROTECTION() << Palette() << SXViewLink() << PivotChartBits() << 
-              SBaseRef() << MsoDrawingGroup() << Req(OBJECTS()) << Req(Units()) << 
+              ClrtClient() << PROTECTION() << Palette() << SXViewLink() << PivotChartBits() <<
+              SBaseRef() << MsoDrawingGroup() << Req(OBJECTS()) << Req(Units()) <<
               Req(CHARTFORMATS()) << Req(SERIESDATA()) << Many('windows', WINDOW()) <<
               Many('custom-views', CUSTOMVIEW()) << CodeName() << CRTMLFRT() << Req(EOF()))
-    
+
 class XlsParser(BaseParser):
     def __init__(self, tokens):
         self.__tokenStream = TokenStream(tokens)
-        
+
     def parse(self, stream):
         PARSERS = {0x0005: None, # WorkbookGlobal
                    0x0006: None,# Visual Basic module,
@@ -709,18 +709,18 @@ class XlsParser(BaseParser):
                    }
         parsedList = []
         bofParser = Req(BOF())
-        
+
         while True:
             bof = None
             try:
                 bof = safeParse(bofParser, stream)
             except ParseException:
-                pass 
+                pass
             if bof is None: # we should break only in case stream is ended
                 break
             bof.dumpData() # we need to dump data to make it parse the record
             parser = PARSERS[bof.dataType]
-            
+
             try:
                 if not parser is None:
                     parsed = (parser[0], parser[1]().parse(stream))
@@ -730,11 +730,11 @@ class XlsParser(BaseParser):
                     parsed = parser.parse(stream) # skipping the unknown stream
                     parsedList.append(parsed)
             except ParseException:
-                print ("Parse failed, previous token is [%s], next tokens are [%s]" % (stream.tokens[stream.currentIndex-1], 
+                print ("Parse failed, previous token is [%s], next tokens are [%s]" % (stream.tokens[stream.currentIndex-1],
                                                                                        ','.join(map(str,stream.tokens[stream.currentIndex:stream.currentIndex+5]))))
                 raise
         return parsedList
-    
+
     def __dumpRoot(self, parsed):
         if parsed is None:
             return None
@@ -744,12 +744,10 @@ class XlsParser(BaseParser):
             return map(self.__dumpRoot, parsed)
         else:
             return parsed.dumpData()
-        
+
     def dumpData(self):
         parsed = self.parse(self.__tokenStream)
         if parsed is None:
             return None
         return self.__dumpRoot(parsed)
-        
-    
-    
+
diff --git a/src/xmlpp.py b/src/xmlpp.py
index fd3ef62..8975689 100755
--- a/src/xmlpp.py
+++ b/src/xmlpp.py
@@ -5,30 +5,30 @@ LICENCE:
 Copyright (c) 2008, Fredrik Ekholdt
 All rights reserved.
 
-Redistribution and use in source and binary forms, with or without 
+Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 
-* Redistributions of source code must retain the above copyright notice, 
+* Redistributions of source code must retain the above copyright notice,
 this list of conditions and the following disclaimer.
 
-* Redistributions in binary form must reproduce the above copyright notice, 
-this list of conditions and the following disclaimer in the documentation 
+* Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
 and/or other materials provided with the distribution.
 
-* Neither the name of None nor the names of its contributors may be used to 
-endorse or promote products derived from this software without specific prior 
+* Neither the name of None nor the names of its contributors may be used to
+endorse or promote products derived from this software without specific prior
 written permission.
 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE."""
 
 import sys as _sys
@@ -47,7 +47,7 @@ def _pprint_line(indent_level, line, width=100, output=_sys.stdout):
             number_chars = number_chars + 1
         try:
             elem_start = _re.findall("(\<\W{0,1}\w+) ?", line)[0]
-            elem_finished = _re.findall("([?|\]\]]*\>)", line)[0] 
+            elem_finished = _re.findall("([?|\]\]]*\>)", line)[0]
             #should not have *
             attrs = _re.findall("(\S*?\=\".*?\")", line)
             output.write(start + elem_start)
@@ -59,7 +59,7 @@ def _pprint_line(indent_level, line, width=100, output=_sys.stdout):
                     output.write("\n")
                     for i in range(len(start + elem_start) + 1):
                         output.write(" ")
-                    number_chars = len(start + elem_start) + 1 
+                    number_chars = len(start + elem_start) + 1
                 else:
                     output.write(" ")
                     number_chars = number_chars + 1
@@ -69,7 +69,7 @@ def _pprint_line(indent_level, line, width=100, output=_sys.stdout):
         except IndexError:
             #give up pretty print this line
             output.write(start + line + "\n")
-                
+
 
 def _pprint_elem_content(indent_level, line, output=_sys.stdout):
     if line.strip():
@@ -81,7 +81,7 @@ def _get_next_elem(data):
     start_pos = data.find("<")
     end_pos = data.find(">") + 1
     retval = data[start_pos:end_pos]
-    stopper = retval.rfind("/") 
+    stopper = retval.rfind("/")
     if stopper < retval.rfind("\""):
         stopper = -1
     single = (stopper > -1 and ((retval.find(">") - stopper) < (stopper - retval.find("<"))))
@@ -99,7 +99,7 @@ def _get_next_elem(data):
     elif ignore_question:
         end_pos = data.find("?>") + len("?>")
     ignore = ignore_excl or ignore_question
-    
+
     no_indent = ignore or single
 
     #print retval, end_pos, start_pos, stopper > -1, no_indent
@@ -109,20 +109,20 @@ def _get_next_elem(data):
            no_indent
 
 def pprint(xml, output=_sys.stdout, indent=4, width=80):
-    """Pretty print xml. 
+    """Pretty print xml.
     Use output to select output stream. Default is sys.stdout
     Use indent to select indentation level. Default is 4   """
     data = xml
     indent_level = 0
     start_pos, end_pos, is_stop, no_indent  = _get_next_elem(data)
     while ((start_pos > -1 and end_pos > -1)):
-        _pprint_elem_content(indent_level, data[:start_pos].strip(), 
+        _pprint_elem_content(indent_level, data[:start_pos].strip(),
                              output=output)
         data = data[start_pos:]
         if is_stop and not no_indent:
             indent_level = indent_level - indent
-        _pprint_line(indent_level, 
-                     data[:end_pos - start_pos], 
+        _pprint_line(indent_level,
+                     data[:end_pos - start_pos],
                      width=width,
                      output=output)
         data = data[end_pos - start_pos:]
@@ -133,7 +133,7 @@ def pprint(xml, output=_sys.stdout, indent=4, width=80):
             break
         else:
             start_pos, end_pos, is_stop, no_indent  = _get_next_elem(data)
-    
+
 
 if __name__ == "__main__":
     if "-h" in _sys.argv or "--help" in _sys.argv:
commit a993def31b1fb6ce9371f276d79cb2e71abcbf90
Author: Jean-Francois Dockes <jf at dockes.org>
Date:   Mon Nov 18 10:31:43 2013 +0100

    Account for 8bit characters inside TextBytesAtom
    
    The Microsoft documentation does not specify an encoding for TextBytesAtom,
    but it says that the bytes are the "low byte of a character in the Unicode
    character set with the high byte considered equal to zero". As characters
    from the latin1 set are the only ones which would not be changed by this
    operation, it seems reasonable to assume that the encoding is actually
    cp1252/windows-1252, which is what is used by the new version.

diff --git a/src/pptrecord.py b/src/pptrecord.py
index ae7bad7..f29d590 100644
--- a/src/pptrecord.py
+++ b/src/pptrecord.py
@@ -117,6 +117,10 @@ class String(BaseRecordHandler):
     def parseBytes (self):
         name = globals.getTextBytes(self.readRemainingBytes())
         self.appendProperty(name)
+        # The MS doc says that the bytes are the low bytes of unicode
+        # chars, with the high byte ignored. Only latin1 could stand
+        # this transformation.
+        name = name.decode('cp1252').encode('UTF-8')
         self.appendLine("text: '%s'"%name)
 
 def ShapeString (*args):
commit 8030151594791650cee71246165e78e27fcf7313
Author: Jean-Francois Dockes <jf at dockes.org>
Date:   Mon Nov 18 09:56:11 2013 +0100

    Convert from UTF16 for real in getUTF8FromUTF16().
    
    The Microsoft documentation for the Powerpoint format ("Microsoft office
    Powerpoint 97-2007 binary file format specification) says that
    TextCharsAtoms contain "The actual characters of the text" "stored in the
    Unicode character set". It does not actually specify UTF-16, but it also
    says that most Unicode characters take 2 bytes, while some (surrogates)
    will take 4 bytes, which comes close.

diff --git a/src/globals.py b/src/globals.py
index 68aae93..3bcba17 100644
--- a/src/globals.py
+++ b/src/globals.py
@@ -411,28 +411,22 @@ def getDouble (bytes):
     text = toTextBytes(bytes)
     return struct.unpack('<d', text)[0]
 
-
 def getUTF8FromUTF16 (bytes):
     # little endian utf-16 strings
     byteCount = len(bytes)
     loopCount = int(byteCount/2)
-    text = ''
+
+    # Truncate input to first null doublet
     for i in xrange(0, loopCount):
-        code = ''
-        lsbZero = bytes[i*2] == '\x00'
-        msbZero = bytes[i*2+1] == '\x00'
-        if msbZero and lsbZero:
-            return text
-        
-        if not msbZero:
-            code += bytes[i*2+1]
-        if not lsbZero:
-            code += bytes[i*2]
-        try:    
-            text += unicode(code, 'utf-8')
-        except UnicodeDecodeError:
-            text += "<%d invalid chars>"%len(code)
-    return text
+        if bytes[i*2] == '\x00':
+            if bytes[i*2+1] == '\x00':
+                bytes = bytes[0:i*2]
+                break
+
+    # Convert from utf-16 and return utf-8, using markers for
+    # conversion errors
+    text = unicode(bytes, 'UTF-16LE', errors='replace')
+    return text.encode('UTF-8')
 
 class StreamWrap(object):
     def __init__ (self,printer):


More information about the Libreoffice-commits mailing list