[Libreoffice-commits] .: scratch/mso-dumper

Thu Feb 17 18:15:23 PST 2011

scratch/mso-dumper/misc/mso-dumper.spec   |   76 
 scratch/mso-dumper/misc/package.sh        |   11 
 scratch/mso-dumper/oletool.diff           |  230 --
 scratch/mso-dumper/ppt-dump.py            |  121 -
 scratch/mso-dumper/src/formula.py         |  859 -------
 scratch/mso-dumper/src/globals.py         |  472 ----
 scratch/mso-dumper/src/msodraw.py         |  607 -----
 scratch/mso-dumper/src/node.py            |  219 -
 scratch/mso-dumper/src/ole.py             |  757 ------
 scratch/mso-dumper/src/pptrecord.py       | 1603 --------------
 scratch/mso-dumper/src/pptstream.py       |  447 ----
 scratch/mso-dumper/src/xlsmodel.py        |  627 -----
 scratch/mso-dumper/src/xlsrecord.py       | 3350 ------------------------------
 scratch/mso-dumper/src/xlsstream.py       |  518 ----
 scratch/mso-dumper/src/xmlpp.py           |  149 -
 scratch/mso-dumper/xls-dump.py            |  178 -
 scratch/mso-dumper/xls_sheetpass_hash.cxx |   85 
 17 files changed, 10309 deletions(-)

New commits:
commit 9db0f8d44426f9981058108af1981b1e2f08b3dd
Author: Kohei Yoshida <kyoshida at novell.com>
Date:   Thu Feb 17 21:14:45 2011 -0500

    Removed mso-dumper directory.
    
    This project has been relocated to libreoffice/contrib/mso-dumper.

diff --git a/scratch/mso-dumper/misc/mso-dumper.spec b/scratch/mso-dumper/misc/mso-dumper.spec
deleted file mode 100644
index 9a8f63e..0000000
--- a/scratch/mso-dumper/misc/mso-dumper.spec
+++ /dev/null
@@ -1,76 +0,0 @@
-Name:           mso-dumper
-# List of additional build dependencies
-Requires:       python
-BuildRequires:  python
-Version:        0.2
-Release:        1
-BuildArch:      noarch
-License:        LGPL v2 or later
-Packager:       Kohei Yoshida  <kyoshida at novell.com>
-Source:         mso-dumper-0.2.tar.gz
-Group:          Development/Tools/Debuggers
-Summary:        Dumper tool for Microsoft Office binary file format
-
-BuildRoot:      %{_tmppath}/%{name}-%{version}-build
-
-%description
-This package provides dumper scripts for dumping the contents of Microsoft
-Office binary file formats.  Currently only the Excel and PowerPoint file
-formats are supported.
-
-
-Authors:
---------
-    Kohei Yoshida <kyoshida at novell.com>
-    Thorsten Behrens <tbehrens at novell.com>
-    Fredrik Ekholdt (for xmlpp.py)  
-
-%prep
-%setup -q
-
-%build
-mkdir -p %buildroot/usr/bin
-mkdir -p %buildroot/usr/share/mso-dumper-%version/src
-mkdir -p %buildroot/usr/share/doc/packages/mso-dumper-%version
-cp xls-dump.py %buildroot/usr/share/mso-dumper-%version/
-cp ppt-dump.py %buildroot/usr/share/mso-dumper-%version/
-cp src/*.py %buildroot/usr/share/mso-dumper-%version/src
-
-%install
-install -d %buildroot/usr/share/mso-dumper-%version
-#install -d %buildroot/usr/share/doc/packages/mso-dumper-%version
-cd %buildroot/usr/bin
-ln -s ../share/mso-dumper-%version/xls-dump.py xls-dump
-ln -s ../share/mso-dumper-%version/ppt-dump.py ppt-dump
-
-%clean
-rm -rf %buildroot
-
-%files
-%defattr(-,root,root)
-%dir /usr/share/doc/packages/mso-dumper-%version
-%dir /usr/share/mso-dumper-%version
-%dir /usr/share/mso-dumper-%version/src
-
-/usr/bin/xls-dump
-/usr/bin/ppt-dump
-/usr/share/mso-dumper-%version/xls-dump.py
-/usr/share/mso-dumper-%version/ppt-dump.py
-/usr/share/mso-dumper-%version/src/ole.py
-/usr/share/mso-dumper-%version/src/formula.py
-/usr/share/mso-dumper-%version/src/globals.py
-/usr/share/mso-dumper-%version/src/node.py
-/usr/share/mso-dumper-%version/src/pptstream.py
-/usr/share/mso-dumper-%version/src/pptrecord.py
-/usr/share/mso-dumper-%version/src/xlsstream.py
-/usr/share/mso-dumper-%version/src/xlsmodel.py
-/usr/share/mso-dumper-%version/src/xlsrecord.py
-/usr/share/mso-dumper-%version/src/xmlpp.py
-
-#%doc AUTHORS COPYING ChangeLog INSTALL NEWS README README-WIN32 TODO
-
-%changelog
-* Wed Mar 17 2010 kyoshida at novell.com
-- Initial package version 0.2.
-
-  
diff --git a/scratch/mso-dumper/misc/package.sh b/scratch/mso-dumper/misc/package.sh
deleted file mode 100755
index 38ab04d..0000000
--- a/scratch/mso-dumper/misc/package.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-# Execute this in the parent directory of the 'mso-dumper' directory.
-
-VER=0.2
-DESTDIR=mso-dumper-$VER
-mkdir -p $DESTDIR/src
-cp mso-dumper/*.py $DESTDIR/
-cp mso-dumper/src/*.py $DESTDIR/src/
-tar cvf $DESTDIR.tar.gz $DESTDIR
-rm -rf $DESTDIR
-
diff --git a/scratch/mso-dumper/oletool.diff b/scratch/mso-dumper/oletool.diff
deleted file mode 100644
index c882229..0000000
--- a/scratch/mso-dumper/oletool.diff
+++ /dev/null
@@ -1,230 +0,0 @@
---- /dev/null	2011-01-31 08:27:50.166383946 +0000
-+++ scratch/mso-dumper/src/oletool.py 2011-01-31 09:15:32.000000000 +0000
-@@ -0,0 +1,204 @@
-+#!/usr/bin/env python
-+import sys, os.path, optparse
-+
-+sys.path.append(sys.path[0]+"/src")
-+
-+import ole, globals
-+
-+from globals import encodeName
-+class DateTime:
-+    def __init__(self):
-+        self.day = 0
-+        self.month = 0 
-+        self.year = 0
-+        self.hour = 0
-+        self.second = 0
-+
-+class DirNode:
-+
-+    def __init__(self, entry):
-+        self.Nodes = []
-+        self.Entry = entry;
-+        self.HierachicalName = ''
-+
-+    def isStorage():
-+        return entry.Type == Directory.Type.RootStorage
-+
-+class OleContainer:
-+
-+    def __init__(self,filePath, params ):
-+        self.filePath = filePath
-+        self.header = None
-+        self.params = params
-+        self.pos = None
-+        
-+    def __getModifiedTime(self, entry):
-+        # need parse/decode Entry.TimeModified
-+        # ( although the documentation indicates that it might not be
-+        # worth it 'cause they are not universally used
-+        modified  = DateTime
-+        modified.day = 0
-+        modified.month = 0 
-+        modified.year = 0
-+        modified.hour = 0
-+        modified.second = 0
-+        return modified
-+
-+    def __parseFile (self):
-+        file = open(self.filePath, 'rb')
-+        self.strmData = globals.StreamData()
-+        self.chars = file.read()
-+        file.close()    
-+
-+    def __addSiblings( self, entries, parent, child ):
-+        # add left siblings
-+        nextLeft = child.Entry.DirIDLeft
-+        if ( nextLeft > 0 ):
-+            newEntry = DirNode( entries[ nextLeft ] )
-+            newEntry.HierachicalName = parent.HierachicalName + globals.encodeName( newEntry.Entry.Name )
-+            if  newEntry.Entry.DirIDRoot > 0:
-+                newEntry.HierachicalName = newEntry.HierachicalName + '/'
-+
-+            self.__addSiblings( entries, parent, newEntry ) 
-+            parent.Nodes.insert( 0, newEntry )
-+
-+        nextRight = child.Entry.DirIDRight
-+        # add children to the right 
-+        if ( nextRight > 0 ):
-+            newEntry = DirNode( entries[ nextRight ] )
-+            newEntry.HierachicalName = parent.HierachicalName + globals.encodeName( newEntry.Entry.Name )
-+            if  newEntry.Entry.DirIDRoot > 0:
-+                newEntry.HierachicalName = newEntry.HierachicalName + '/'
-+            self.__addSiblings( entries, parent, newEntry ) 
-+            parent.Nodes.append( newEntry )
-+
-+    def __buildTreeImpl(self, entries, parent ):
-+
-+        if ( parent.Entry.DirIDRoot > 0 ):
-+            newEntry = DirNode( entries[ parent.Entry.DirIDRoot ] )
-+            newEntry.HierachicalName = parent.HierachicalName + globals.encodeName( newEntry.Entry.Name )
-+            if ( newEntry.Entry.DirIDRoot > 0 ):
-+                newEntry.HierachicalName =  newEntry.HierachicalName + '/'
-+
-+            self.__addSiblings( entries, parent, newEntry )
-+            parent.Nodes.append( newEntry )
-+            
-+        for child in parent.Nodes:
-+            if child.Entry.DirIDRoot > 0:
-+                self.__buildTreeImpl( entries, child )
-+
-+    def __buildTree(self, entries ):
-+        treeRoot = DirNode( entries[0] ) 
-+        self.__buildTreeImpl( entries, treeRoot )
-+        return treeRoot
-+
-+    def __findEntryByHierachicalName( self, node, name ):
-+        if node.HierachicalName == name:
-+            return node.Entry
-+        else:
-+            for child in node.Nodes:
-+                result = self.__findEntryByHierachicalName( child, name )
-+                if result != None:
-+                    return result 
-+        return None 
-+
-+    def __printListReport( self, treeNode, stats ):
-+
-+        dateInfo = self.__getModifiedTime( treeNode.Entry )
-+
-+        if len( treeNode.HierachicalName ) > 0 :
-+            print '{0:8d}  {1:0<2d}-{2:0<2d}-{3:0<2d} {4:0<2d}:{5:0<2d}   {6}'.format(treeNode.Entry.StreamSize, dateInfo.day, dateInfo.month, dateInfo.year, dateInfo.hour, dateInfo.second, treeNode.HierachicalName )
-+     
-+        for node in treeNode.Nodes:
-+            # ignore the root
-+            self.__printListReport( node, stats )
-+
-+    def __printHeader(self):
-+        print ("OLE: %s")%self.filePath
-+        print (" Length     Date   Time    Name")
-+        print ("--------    ----   ----    ----")
-+
-+    def listEntries(self):
-+        self.__parseFile()
-+        #if self.header == None:
-+        #    self.header = ole.Header(self.chars, self.params)
-+        #    self.pos = self.header.parse()
-+        self.header = ole.Header(self.chars, self.params)
-+        self.pos = self.header.parse()
-+        obj =  self.header.getDirectory()
-+        if obj != None:
-+            obj.parseDirEntries()
-+            count = 0
-+            for entry in obj.entries:
-+                print("Entry [0x%x] Name %s  Root 0x%x Left 0x%x Right %x")%( count, entry.Name, entry.DirIDRoot, entry.DirIDLeft, entry.DirIDRight )
-+                count = count + 1
-+    def list(self):
-+        # need to share the inititialisation and parse stuff between the different options
-+        self.__parseFile()
-+        if self.header == None:
-+            self.header = ole.Header(self.chars, self.params)
-+            self.pos = self.header.parse()
-+        obj =  self.header.getDirectory()
-+        if obj != None:
-+            obj.parseDirEntries()
-+            count = 0
-+            rootNode = self.__buildTree( obj.entries )            
-+
-+            self.__printHeader()
-+            self.__printListReport( rootNode, obj.entries )
-+            # need to print a footer ( total bytes, total files like unzip )
-+
-+    def extract(self, name):
-+        if  self.header == None:
-+            self.__parseFile()
-+            self.header = ole.Header(self.chars, self.params)
-+            self.pos = self.header.parse()
-+
-+        obj =  self.header.getDirectory()
-+        if obj != None:
-+            obj.parseDirEntries()
-+     
-+        root = self.__buildTree( obj.entries )
-+        entry = self.__findEntryByHierachicalName( root, name )
-+
-+        if  entry == None or entry.DirIDRoot > 0 :
-+            print "can't extract %s"%name
-+            return
-+
-+        bytes = obj.getRawStreamByEntry( entry )
-+
-+        file = open(entry.Name, 'wb') 
-+        file.write( bytes )
-+        file.close
-+def main ():
-+    parser = optparse.OptionParser()
-+    parser.add_option("-l", "--list", action="store_true", dest="list", default=False, help="lists ole contents")
-+    parser.add_option("-x", "--extract", action="store_true", dest="extract", default=False, help="extract file")
-+
-+
-+    options, args = parser.parse_args()
-+
-+    params = globals.Params()
-+
-+    params.list =  options.list
-+    params.extract =  options.extract
-+
-+    if len(args) < 1:
-+        globals.error("takes at least one arguments\n")
-+        parser.print_help()
-+        sys.exit(1)
-+
-+    container =  OleContainer( args[ 0 ], params )
-+
-+    if params.list == True:
-+        container.list() 
-+    if params.extract:
-+       files = args
-+       files.pop(0)
-+           
-+       for file in files:
-+           container.extract( file ) 
-+#        container.listEntries() 
-+
-+if __name__ == '__main__':
-+    main()
-diff --git a/scratch/mso-dumper/src/ole.py b/scratch/mso-dumper/src/ole.py
-index 9b01928..3db2458 100644
---- a/scratch/mso-dumper/src/ole.py
-+++ b/scratch/mso-dumper/src/ole.py
-@@ -526,7 +526,8 @@ entire file stream.
-             self.RootStorageBytes += self.header.bytes[pos:pos+self.sectorSize]
- 
- 
--    def __getRawStream (self, entry):
-+    def getRawStreamByEntry (self, entry):
-+
-         chain = []
-         if entry.StreamLocation == StreamLocation.SAT:
-             chain = self.header.getSAT().getSectorIDChain(entry.StreamSectorID)
-@@ -561,7 +562,7 @@ entire file stream.
-         bytes = []
-         for entry in self.entries:
-             if entry.Name == name:
--                bytes = self.__getRawStream(entry)
-+                bytes = self.getRawStreamByEntry(entry)
-                 break
-         return bytes
- 
diff --git a/scratch/mso-dumper/ppt-dump.py b/scratch/mso-dumper/ppt-dump.py
deleted file mode 100755
index f303fb2..0000000
--- a/scratch/mso-dumper/ppt-dump.py
+++ /dev/null
@@ -1,121 +0,0 @@
-#!/usr/bin/env python
-########################################################################
-#
-#  Copyright (c) 2010 Kohei Yoshida, Thorsten Behrens
-#  
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#  
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#  
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
-
-import sys, os.path, getopt
-sys.path.append(sys.path[0]+"/src")
-import ole, pptstream, globals
-
-from globals import error
-
-def usage (exname):
-    exname = os.path.basename(exname)
-    msg = """Usage: %s [options] [ppt file]
-
-Options:
-  --help        displays this help message.
-"""%exname
-    print msg
-
-
-class PPTDumper(object):
-
-    def __init__ (self, filepath, params):
-        self.filepath = filepath
-        self.params = params
-
-    def __printDirHeader (self, dirname, byteLen):
-        dirname = globals.encodeName(dirname)
-        print("")
-        print("="*68)
-        print("%s (size: %d bytes)"%(dirname, byteLen))
-        print("-"*68)
-
-    def dump (self):
-        file = open(self.filepath, 'rb')
-        strm = pptstream.PPTFile(file.read(), self.params)
-        file.close()
-        strm.printStreamInfo()
-        strm.printHeader()
-        strm.printDirectory()
-        dirnames = strm.getDirectoryNames()
-        result = True
-        for dirname in dirnames:
-            if len(dirname) == 0 or dirname == 'Root Entry':
-                continue
-
-            dirstrm = strm.getDirectoryStreamByName(dirname)
-            self.__printDirHeader(dirname, len(dirstrm.bytes))
-            if  dirname == "PowerPoint Document":
-                if not self.__readSubStream(dirstrm):
-                    result = False
-            elif  dirname == "Current User":
-                if not self.__readSubStream(dirstrm):
-                    result = False
-            else:
-                globals.dumpBytes(dirstrm.bytes, 512)
-        return result
-
-    def __readSubStream (self, strm):
-        # read all records in substream
-        return strm.readRecords()
-
-
-def main (args):
-    exname, args = args[0], args[1:]
-    if len(args) < 1:
-        print("takes at least one argument")
-        usage(exname)
-        return
-
-    params = globals.Params()
-    try:
-        opts, args = getopt.getopt(args, "h", ["help", "debug", "show-sector-chain"])
-        for opt, arg in opts:
-            if opt in ['-h', '--help']:
-                usage(exname)
-                return
-            elif opt in ['--debug']:
-                params.debug = True
-            elif opt in ['--show-sector-chain']:
-                params.showSectorChain = True
-            else:
-                error("unknown option %s\n"%opt)
-                usage()
-
-    except getopt.GetoptError:
-        error("error parsing input options\n")
-        usage(exname)
-        return
-
-    dumper = PPTDumper(args[0], params)
-    if not dumper.dump():
-        error("FAILURE\n")
-
-
-if __name__ == '__main__':
-    main(sys.argv)
diff --git a/scratch/mso-dumper/src/formula.py b/scratch/mso-dumper/src/formula.py
deleted file mode 100644
index 85bb6bf..0000000
--- a/scratch/mso-dumper/src/formula.py
+++ /dev/null
@@ -1,859 +0,0 @@
-########################################################################
-#
-#  Copyright (c) 2010 Kohei Yoshida
-#  
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#  
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#  
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
-
-import struct, sys
-import globals
-
-class InvalidCellAddress(Exception): pass
-
-def toColName (colID):
-    if colID > 255:
-        globals.error("Column ID greater than 255")
-        raise InvalidCellAddress
-    n1 = colID % 26
-    n2 = int(colID/26)
-    name = struct.pack('b', n1 + ord('A'))
-    if n2 > 0:
-        name += struct.pack('b', n2 + ord('A'))
-    return name
-
-def toAbsName (name, isRelative):
-    if not isRelative:
-        name = '$' + name
-    return name
-
-class CellAddress(object):
-    def __init__ (self, col=0, row=0, colRel=False, rowRel=False):
-        self.col = col
-        self.row = row
-        self.isColRelative = colRel
-        self.isRowRelative = rowRel
-
-    def toString (self):
-        return self.getName()
-
-    def getName (self):
-        colName = toAbsName(toColName(self.col), self.isColRelative)
-        rowName = toAbsName("%d"%(self.row+1),   self.isRowRelative)
-        return colName + rowName
-
-class CellRange(object):
-    def __init__ (self):
-        self.firstRow = 0
-        self.lastRow = 0
-        self.firstCol = 0
-        self.lastCol = 0
-        self.isFirstRowRelative = False
-        self.isLastRowRelative = False
-        self.isFirstColRelative = False
-        self.isLastColRelative = False
-
-    def toString (self):
-        return self.getName()
-
-    def getName (self):
-        col1 = toColName(self.firstCol)
-        col2 = toColName(self.lastCol)
-        row1 = "%d"%(self.firstRow+1)
-        row2 = "%d"%(self.lastRow+1)
-        col1 = toAbsName(col1, self.isFirstColRelative)
-        col2 = toAbsName(col2, self.isLastColRelative)
-        row1 = toAbsName(row1, self.isFirstRowRelative)
-        row2 = toAbsName(row2, self.isLastRowRelative)
-        return col1 + row1 + ':' + col2 + row2
-
-def parseCellAddress (bytes):
-    if len(bytes) != 4:
-        globals.error("Byte size is %d but expected 4 bytes for cell address.\n"%len(bytes))
-        raise InvalidCellAddress
-
-    row = globals.getSignedInt(bytes[0:2])
-    col = globals.getSignedInt(bytes[2:4])
-    colRelative = ((col & 0x4000) != 0)
-    rowRelative = ((col & 0x8000) != 0)
-    col = (col & 0x00FF)
-    obj = CellAddress(col, row, colRelative, rowRelative)
-    return obj
-
-def parseCellRangeAddress (bytes):
-    if len(bytes) != 8:
-        raise InvalidCellAddress
-
-    obj = CellRange()
-    obj.firstRow = globals.getSignedInt(bytes[0:2])
-    obj.lastRow  = globals.getSignedInt(bytes[2:4])
-    obj.firstCol = globals.getSignedInt(bytes[4:6])
-    obj.lastCol  = globals.getSignedInt(bytes[6:8])
-
-    obj.isFirstColRelative = ((obj.firstCol & 0x4000) != 0)
-    obj.isFirstRowRelative = ((obj.firstCol & 0x8000) != 0)
-    obj.firstCol = (obj.firstCol & 0x00FF)
-
-    obj.isLastColRelative = ((obj.lastCol & 0x4000) != 0)
-    obj.isLastRowRelative = ((obj.lastCol & 0x8000) != 0)
-    obj.lastCol = (obj.lastCol & 0x00FF)
-    return obj
-
-
-def makeSheetName (sheet1, sheet2):
-    if sheet2 == None or sheet1 == sheet2:
-        sheetName = "sheetID='%d'"%sheet1
-    else:
-        sheetName = "sheetID='%d-%d'"%(sheet1, sheet2)
-    return sheetName
-
-
-class TokenBase(object):
-    """base class for token handler
-
-Derive a class from this base class to create a token handler for a formula
-token.  
-
-The parse method takes the token array position that points to the first 
-token to be processed, and returns the position of the laste token that has 
-been processed.  So, if the handler processes only one token, it should 
-return the same value it receives without incrementing it.  
-
-"""
-    def __init__ (self, header, tokens):
-        self.header = header
-        self.tokens = tokens
-        self.size = len(self.tokens)
-        self.init()
-
-    def init (self):
-        """initializer for a derived class"""
-        pass
-
-    def parse (self, i):
-        return i
-
-    def getText (self):
-        return ''
-
-class Add(TokenBase): pass
-class Sub(TokenBase): pass
-class Mul(TokenBase): pass
-class Div(TokenBase): pass
-class Power(TokenBase): pass
-class Concat(TokenBase): pass
-class LT(TokenBase): pass
-class LE(TokenBase): pass
-class EQ(TokenBase): pass
-class GE(TokenBase): pass
-class GT(TokenBase): pass
-class NE(TokenBase): pass
-class Isect(TokenBase): pass
-class List(TokenBase): pass
-class Range(TokenBase): pass
-
-class Plus(TokenBase): pass
-class Minus(TokenBase): pass
-class Percent(TokenBase): pass
-
-class NameX(TokenBase):
-    """external name"""
-
-    def parse (self, i):
-        i += 1
-        self.refID = globals.getSignedInt(self.tokens[i:i+2])
-        i += 2
-        self.nameID = globals.getSignedInt(self.tokens[i:i+2])
-        i += 2
-        return i
-
-    def getText (self):
-        return "<externname externSheetID='%d' nameID='%d'>"%(self.refID, self.nameID)
-
-
-class Ref3dR(TokenBase):
-    """3D reference or external reference to a cell"""
-
-    def init (self):
-        self.cell = None
-        self.sheet1 = None
-        self.sheet2 = None
-
-    def parse (self, i):
-        try:
-            i += 1
-            self.sheet1 = globals.getSignedInt(self.tokens[i:i+2])
-            i += 2
-            if self.header == 0x0023:
-                # 3A in EXTERNNAME expects a 2nd sheet index
-                self.sheet2 = globals.getSignedInt(self.tokens[i:i+2])
-                i += 2
-            self.cell = parseCellAddress(self.tokens[i:i+4])
-            i += 4
-        except InvalidCellAddress:
-            pass
-        return i
-
-    def getText (self):
-        if self.cell == None:
-            return ''
-        cellName = self.cell.getName()
-        sheetName = makeSheetName(self.sheet1, self.sheet2)
-        return "<3dref %s cellAddress='%s'>"%(sheetName, cellName)
-
-
-class Ref3dV(TokenBase):
-    """3D reference or external reference to a cell"""
-
-    def init (self):
-        self.cell = None
-
-    def parse (self, i):
-        try:
-            i += 1
-            self.extSheetId = globals.getSignedInt(self.tokens[i:i+2])
-            i += 2
-            self.cell = parseCellAddress(self.tokens[i:i+4])
-            i += 4
-        except InvalidCellAddress:
-            pass
-        return i
-
-    def getText (self):
-        if self.cell == None:
-            return ''
-        cellName = self.cell.getName()
-        return "<3dref externSheetID=%d cellAddress='%s'>"%(self.extSheetId, cellName)
-
-
-class Ref3dA(Ref3dV):
-    def __init__ (self, header, tokens):
-        Ref3dV.__init__(self, header, tokens)
-
-
-class Area3d(TokenBase):
-
-    def parse (self, i):
-        self.cellrange = None
-        try:
-            op = self.tokens[i]
-            i += 1
-            self.extSheetId = globals.getSignedInt(self.tokens[i:i+2])
-            i += 2
-            self.cellrange = parseCellRangeAddress(self.tokens[i:i+8])
-        except InvalidCellAddress:
-            pass
-        return i
-
-    def getText (self):
-        if self.cellrange == None:
-            return ''
-        cellRangeName = self.cellrange.getName()
-        return "<3drange externSheetID=%d rangeAddress='%s'>"%(self.extSheetId, cellRangeName)
-
-class Error(TokenBase):
-
-    def parse (self, i):
-        i += 1 # skip opcode
-        self.errorNum = globals.getSignedInt(self.tokens[i:i+1])
-        i += 1
-        return i
-
-    def getText (self):
-        errorText = ''
-        if self.errorNum == 0x17:
-            errorText = '#REF!'
-        return "<error code='0x%2.2X' text='%s'>"%(self.errorNum, errorText)
-
-tokenMap = {
-    # binary operator
-    0x03: Add,
-    0x04: Sub,
-    0x05: Mul,
-    0x06: Div,
-    0x07: Power,
-    0x08: Concat,
-    0x09: LT,
-    0x0A: LE,
-    0x0B: EQ,
-    0x0C: GE,
-    0x0D: GT,
-    0x0E: NE,
-    0x0F: Isect,
-    0x10: List,
-    0x11: Range,
-
-    # unary operator
-    0x12: Plus,
-    0x13: Minus,
-    0x14: Percent,
-
-    # operand tokens
-    0x39: NameX,
-    0x59: NameX,
-    0x79: NameX,
-
-    # 3d reference (TODO: There is a slight variation in how a cell reference
-    # is represented between 0x3A and 0x5A).
-    0x3A: Ref3dR,
-    0x5A: Ref3dV,
-    0x7A: Ref3dA,
-
-    0x3B: Area3d,
-    0x5B: Area3d,
-    0x7B: Area3d,
-
-    0x1C: Error,
-
-    # last item
-  0xFFFF: None
-}
-
-class FormulaParser(object):
-    """formula parser for token bytes
-
-This class receives a series of bytes that represent formula tokens through
-the constructor.  That series of bytes must also include the formula length
-which is usually the first 2 bytes.
-"""
-    def __init__ (self, header, tokens, sizeField=True):
-        self.header = header
-        self.strm = globals.ByteStream(tokens)
-        self.text = ''
-        self.sizeField = sizeField
-
-    def parse (self):
-        length = self.strm.getSize()
-        if self.sizeField:
-            # first 2-bytes contain the length of the formula tokens
-            length = self.strm.readUnsignedInt(2)
-            if length <= 0:
-                return
-            ftokens = self.strm.readBytes(length)
-            length = len(ftokens)
-        else:
-            ftokens = self.strm.readRemainingBytes()
-
-        i = 0
-        while i < length:
-            tk = ftokens[i]
-
-            if type(tk) == type('c'):
-                # get the ordinal of the character.
-                tk = ord(tk)
-
-            if not tokenMap.has_key(tk):
-                # no token handler
-                i += 1
-                continue
-
-            # token handler exists.
-            o = tokenMap[tk](self.header, ftokens)
-            i = o.parse(i)
-            self.text += o.getText() + ' '
-
-            i += 1
-
-
-    def getText (self):
-        return self.text
-
-# ============================================================================
-
-class TokenType:
-    Area3d = 0
-    Unknown = 9999
-
-class _TokenBase(object):
-    def __init__ (self, strm, opcode1, opcode2=None):
-        self.opcode1 = opcode1
-        self.opcode2 = opcode2
-        self.strm = strm
-        self.tokenType = TokenType.Unknown
-
-    def parse (self):
-        self.parseBytes()
-        self.strm = None # no need to hold reference to the stream.
-
-    def parseBytes (self):
-        # derived class should overwrite this method.
-        pass
-
-    def getText (self):
-        return ''
-
-class _Int(_TokenBase):
-    def parseBytes (self):
-        self.value = self.strm.readUnsignedInt(2)
-
-    def getText (self):
-        return "%d"%self.value
-
-class _Area3d(_TokenBase):
-    def parseBytes (self):
-        self.xti = self.strm.readUnsignedInt(2)
-        self.cellRange = parseCellRangeAddress(self.strm.readBytes(8))
-        self.tokenType = TokenType.Area3d
-
-    def getText (self):
-        return "(xti=%d,"%self.xti + self.cellRange.getName() + ")"
-
-class _FuncVar(_TokenBase):
-
-    funcTab = {
-        0x0000: 'COUNT',
-        0x0001: 'IF',
-        0x0002: 'ISNA',
-        0x0003: 'ISERROR',
-        0x0004: 'SUM',
-        0x0005: 'AVERAGE',
-        0x0006: 'MIN',
-        0x0007: 'MAX',
-        0x0008: 'ROW',
-        0x0009: 'COLUMN',
-        0x000A: 'NA',
-        0x000B: 'NPV',
-        0x000C: 'STDEV',
-        0x000D: 'DOLLAR',
-        0x000E: 'FIXED',
-        0x000F: 'SIN',
-        0x0010: 'COS',
-        0x0011: 'TAN',
-        0x0012: 'ATAN',
-        0x0013: 'PI',
-        0x0014: 'SQRT',
-        0x0015: 'EXP',
-        0x0016: 'LN',
-        0x0017: 'LOG10',
-        0x0018: 'ABS',
-        0x0019: 'INT',
-        0x001A: 'SIGN',
-        0x001B: 'ROUND',
-        0x001C: 'LOOKUP',
-        0x001D: 'INDEX',
-        0x001E: 'REPT',
-        0x001F: 'MID',
-        0x0020: 'LEN',
-        0x0021: 'VALUE',
-        0x0022: 'TRUE',
-        0x0023: 'FALSE',
-        0x0024: 'AND',
-        0x0025: 'OR',
-        0x0026: 'NOT',
-        0x0027: 'MOD',
-        0x0028: 'DCOUNT',
-        0x0029: 'DSUM',
-        0x002A: 'DAVERAGE',
-        0x002B: 'DMIN',
-        0x002C: 'DMAX',
-        0x002D: 'DSTDEV',
-        0x002E: 'VAR',
-        0x002F: 'DVAR',
-        0x0030: 'TEXT',
-        0x0031: 'LINEST',
-        0x0032: 'TREND',
-        0x0033: 'LOGEST',
-        0x0034: 'GROWTH',
-        0x0035: 'GOTO',
-        0x0036: 'HALT',
-        0x0037: 'RETURN',
-        0x0038: 'PV',
-        0x0039: 'FV',
-        0x003A: 'NPER',
-        0x003B: 'PMT',
-        0x003C: 'RATE',
-        0x003D: 'MIRR',
-        0x003E: 'IRR',
-        0x003F: 'RAND',
-        0x0040: 'MATCH',
-        0x0041: 'DATE',
-        0x0042: 'TIME',
-        0x0043: 'DAY',
-        0x0044: 'MONTH',
-        0x0045: 'YEAR',
-        0x0046: 'WEEKDAY',
-        0x0047: 'HOUR',
-        0x0048: 'MINUTE',
-        0x0049: 'SECOND',
-        0x004A: 'NOW',
-        0x004B: 'AREAS',
-        0x004C: 'ROWS',
-        0x004D: 'COLUMNS',
-        0x004E: 'OFFSET',
-        0x004F: 'ABSREF',
-        0x0050: 'RELREF',
-        0x0051: 'ARGUMENT',
-        0x0052: 'SEARCH',
-        0x0053: 'TRANSPOSE',
-        0x0054: 'ERROR',
-        0x0055: 'STEP',
-        0x0056: 'TYPE',
-        0x0057: 'ECHO',
-        0x0058: 'SET.NAME',
-        0x0059: 'CALLER',
-        0x005A: 'DEREF',
-        0x005B: 'WINDOWS',
-        0x005C: 'SERIES',
-        0x005D: 'DOCUMENTS',
-        0x005E: 'ACTIVE.CELL',
-        0x005F: 'SELECTION',
-        0x0060: 'RESULT',
-        0x0061: 'ATAN2',
-        0x0062: 'ASIN',
-        0x0063: 'ACOS',
-        0x0064: 'CHOOSE',
-        0x0065: 'HLOOKUP',
-        0x0066: 'VLOOKUP',
-        0x0067: 'LINKS',
-        0x0068: 'INPUT',
-        0x0069: 'ISREF',
-        0x006A: 'GET.FORMULA',
-        0x006B: 'GET.NAME',
-        0x006C: 'SET.VALUE',
-        0x006D: 'LOG',
-        0x006E: 'EXEC',
-        0x006F: 'CHAR',
-        0x0070: 'LOWER',
-        0x0071: 'UPPER',
-        0x0072: 'PROPER',
-        0x0073: 'LEFT',
-        0x0074: 'RIGHT',
-        0x0075: 'EXACT',
-        0x0076: 'TRIM',
-        0x0077: 'REPLACE',
-        0x0078: 'SUBSTITUTE',
-        0x0079: 'CODE',
-        0x007A: 'NAMES',
-        0x007B: 'DIRECTORY',
-        0x007C: 'FIND',
-        0x007D: 'CELL',
-        0x007E: 'ISERR',
-        0x007F: 'ISTEXT',
-        0x0080: 'ISNUMBER',
-        0x0081: 'ISBLANK',
-        0x0082: 'T',
-        0x0083: 'N',
-        0x0084: 'FOPEN',
-        0x0085: 'FCLOSE',
-        0x0086: 'FSIZE',
-        0x0087: 'FREADLN',
-        0x0088: 'FREAD',
-        0x0089: 'FWRITELN',
-        0x008A: 'FWRITE',
-        0x008B: 'FPOS',
-        0x008C: 'DATEVALUE',
-        0x008D: 'TIMEVALUE',
-        0x008E: 'SLN',
-        0x008F: 'SYD',
-        0x0090: 'DDB',
-        0x0091: 'GET.DEF',
-        0x0092: 'REFTEXT',
-        0x0093: 'TEXTREF',
-        0x0094: 'INDIRECT',
-        0x0095: 'REGISTER',
-        0x0096: 'CALL',
-        0x0097: 'ADD.BAR',
-        0x0098: 'ADD.MENU',
-        0x0099: 'ADD.COMMAND',
-        0x009A: 'ENABLE.COMMAND',
-        0x009B: 'CHECK.COMMAND',
-        0x009C: 'RENAME.COMMAND',
-        0x009D: 'SHOW.BAR',
-        0x009E: 'DELETE.MENU',
-        0x009F: 'DELETE.COMMAND',
-        0x00A0: 'GET.CHART.ITEM',
-        0x00A1: 'DIALOG.BOX',
-        0x00A2: 'CLEAN',
-        0x00A3: 'MDETERM',
-        0x00A4: 'MINVERSE',
-        0x00A5: 'MMULT',
-        0x00A6: 'FILES',
-        0x00A7: 'IPMT',
-        0x00A8: 'PPMT',
-        0x00A9: 'COUNTA',
-        0x00AA: 'CANCEL.KEY',
-        0x00AB: 'FOR',
-        0x00AC: 'WHILE',
-        0x00AD: 'BREAK',
-        0x00AE: 'NEXT',
-        0x00AF: 'INITIATE',
-        0x00B0: 'REQUEST',
-        0x00B1: 'POKE',
-        0x00B2: 'EXECUTE',
-        0x00B3: 'TERMINATE',
-        0x00B4: 'RESTART',
-        0x00B5: 'HELP',
-        0x00B6: 'GET.BAR',
-        0x00B7: 'PRODUCT',
-        0x00B8: 'FACT',
-        0x00B9: 'GET.CELL',
-        0x00BA: 'GET.WORKSPACE',
-        0x00BB: 'GET.WINDOW',
-        0x00BC: 'GET.DOCUMENT',
-        0x00BD: 'DPRODUCT',
-        0x00BE: 'ISNONTEXT',
-        0x00BF: 'GET.NOTE',
-        0x00C0: 'NOTE',
-        0x00C1: 'STDEVP',
-        0x00C2: 'VARP',
-        0x00C3: 'DSTDEVP',
-        0x00C4: 'DVARP',
-        0x00C5: 'TRUNC',
-        0x00C6: 'ISLOGICAL',
-        0x00C7: 'DCOUNTA',
-        0x00C8: 'DELETE.BAR',
-        0x00C9: 'UNREGISTER',
-        0x00CC: 'USDOLLAR',
-        0x00CD: 'FINDB',
-        0x00CE: 'SEARCHB',
-        0x00CF: 'REPLACEB',
-        0x00D0: 'LEFTB',
-        0x00D1: 'RIGHTB',
-        0x00D2: 'MIDB',
-        0x00D3: 'LENB',
-        0x00D4: 'ROUNDUP',
-        0x00D5: 'ROUNDDOWN',
-        0x00D6: 'ASC',
-        0x00D7: 'DBCS',
-        0x00D8: 'RANK',
-        0x00DB: 'ADDRESS',
-        0x00DC: 'DAYS360',
-        0x00DD: 'TODAY',
-        0x00DE: 'VDB',
-        0x00DF: 'ELSE',
-        0x00E0: 'ELSE.IF',
-        0x00E1: 'END.IF',
-        0x00E2: 'FOR.CELL',
-        0x00E3: 'MEDIAN',
-        0x00E4: 'SUMPRODUCT',
-        0x00E5: 'SINH',
-        0x00E6: 'COSH',
-        0x00E7: 'TANH',
-        0x00E8: 'ASINH',
-        0x00E9: 'ACOSH',
-        0x00EA: 'ATANH',
-        0x00EB: 'DGET',
-        0x00EC: 'CREATE.OBJECT',
-        0x00ED: 'VOLATILE',
-        0x00EE: 'LAST.ERROR',
-        0x00EF: 'CUSTOM.UNDO',
-        0x00F0: 'CUSTOM.REPEAT',
-        0x00F1: 'FORMULA.CONVERT',
-        0x00F2: 'GET.LINK.INFO',
-        0x00F3: 'TEXT.BOX',
-        0x00F4: 'INFO',
-        0x00F5: 'GROUP',
-        0x00F6: 'GET.OBJECT',
-        0x00F7: 'DB',
-        0x00F8: 'PAUSE',
-        0x00FB: 'RESUME',
-        0x00FC: 'FREQUENCY',
-        0x00FD: 'ADD.TOOLBAR',
-        0x00FE: 'DELETE.TOOLBAR',
-        0x00FF: 'User Defined Function',
-        0x0100: 'RESET.TOOLBAR',
-        0x0101: 'EVALUATE',
-        0x0102: 'GET.TOOLBAR',
-        0x0103: 'GET.TOOL',
-        0x0104: 'SPELLING.CHECK',
-        0x0105: 'ERROR.TYPE',
-        0x0106: 'APP.TITLE',
-        0x0107: 'WINDOW.TITLE',
-        0x0108: 'SAVE.TOOLBAR',
-        0x0109: 'ENABLE.TOOL',
-        0x010A: 'PRESS.TOOL',
-        0x010B: 'REGISTER.ID',
-        0x010C: 'GET.WORKBOOK',
-        0x010D: 'AVEDEV',
-        0x010E: 'BETADIST',
-        0x010F: 'GAMMALN',
-        0x0110: 'BETAINV',
-        0x0111: 'BINOMDIST',
-        0x0112: 'CHIDIST',
-        0x0113: 'CHIINV',
-        0x0114: 'COMBIN',
-        0x0115: 'CONFIDENCE',
-        0x0116: 'CRITBINOM',
-        0x0117: 'EVEN',
-        0x0118: 'EXPONDIST',
-        0x0119: 'FDIST',
-        0x011A: 'FINV',
-        0x011B: 'FISHER',
-        0x011C: 'FISHERINV',
-        0x011D: 'FLOOR',
-        0x011E: 'GAMMADIST',
-        0x011F: 'GAMMAINV',
-        0x0120: 'CEILING',
-        0x0121: 'HYPGEOMDIST',
-        0x0122: 'LOGNORMDIST',
-        0x0123: 'LOGINV',
-        0x0124: 'NEGBINOMDIST',
-        0x0125: 'NORMDIST',
-        0x0126: 'NORMSDIST',
-        0x0127: 'NORMINV',
-        0x0128: 'NORMSINV',
-        0x0129: 'STANDARDIZE',
-        0x012A: 'ODD',
-        0x012B: 'PERMUT',
-        0x012C: 'POISSON',
-        0x012D: 'TDIST',
-        0x012E: 'WEIBULL',
-        0x012F: 'SUMXMY2',
-        0x0130: 'SUMX2MY2',
-        0x0131: 'SUMX2PY2',
-        0x0132: 'CHITEST',
-        0x0133: 'CORREL',
-        0x0134: 'COVAR',
-        0x0135: 'FORECAST',
-        0x0136: 'FTEST',
-        0x0137: 'INTERCEPT',
-        0x0138: 'PEARSON',
-        0x0139: 'RSQ',
-        0x013A: 'STEYX',
-        0x013B: 'SLOPE',
-        0x013C: 'TTEST',
-        0x013D: 'PROB',
-        0x013E: 'DEVSQ',
-        0x013F: 'GEOMEAN',
-        0x0140: 'HARMEAN',
-        0x0141: 'SUMSQ',
-        0x0142: 'KURT',
-        0x0143: 'SKEW',
-        0x0144: 'ZTEST',
-        0x0145: 'LARGE',
-        0x0146: 'SMALL',
-        0x0147: 'QUARTILE',
-        0x0148: 'PERCENTILE',
-        0x0149: 'PERCENTRANK',
-        0x014A: 'MODE',
-        0x014B: 'TRIMMEAN',
-        0x014C: 'TINV',
-        0x014E: 'MOVIE.COMMAND',
-        0x014F: 'GET.MOVIE',
-        0x0150: 'CONCATENATE',
-        0x0151: 'POWER',
-        0x0152: 'PIVOT.ADD.DATA',
-        0x0153: 'GET.PIVOT.TABLE',
-        0x0154: 'GET.PIVOT.FIELD',
-        0x0155: 'GET.PIVOT.ITEM',
-        0x0156: 'RADIANS',
-        0x0157: 'DEGREES',
-        0x0158: 'SUBTOTAL',
-        0x0159: 'SUMIF',
-        0x015A: 'COUNTIF',
-        0x015B: 'COUNTBLANK',
-        0x015C: 'SCENARIO.GET',
-        0x015D: 'OPTIONS.LISTS.GET',
-        0x015E: 'ISPMT',
-        0x015F: 'DATEDIF',
-        0x0160: 'DATESTRING',
-        0x0161: 'NUMBERSTRING',
-        0x0162: 'ROMAN',
-        0x0163: 'OPEN.DIALOG',
-        0x0164: 'SAVE.DIALOG',
-        0x0165: 'VIEW.GET',
-        0x0166: 'GETPIVOTDATA',
-        0x0167: 'HYPERLINK',
-        0x0168: 'PHONETIC',
-        0x0169: 'AVERAGEA',
-        0x016A: 'MAXA',
-        0x016B: 'MINA',
-        0x016C: 'STDEVPA',
-        0x016D: 'VARPA',
-        0x016E: 'STDEVA',
-        0x016F: 'VARA',
-        0x0170: 'BAHTTEXT',
-        0x0171: 'THAIDAYOFWEEK',
-        0x0172: 'THAIDIGIT',
-        0x0173: 'THAIMONTHOFYEAR',
-        0x0174: 'THAINUMSOUND',
-        0x0175: 'THAINUMSTRING',
-        0x0176: 'THAISTRINGLENGTH',
-        0x0177: 'ISTHAIDIGIT',
-        0x0178: 'ROUNDBAHTDOWN',
-        0x0179: 'ROUNDBAHTUP',
-        0x017A: 'THAIYEAR',
-        0x017B: 'RTD'
-    }
-
-    def parseBytes (self):
-        self.dataType = (self.opcode1 & 0x60)/32  # 0x1 = reference, 0x2 = value, 0x3 = array
-        self.argCount = self.strm.readUnsignedInt(1)
-        tab = self.strm.readUnsignedInt(2)
-        self.funcType = (tab & 0x7FFF)
-        self.isCeTab = (tab & 0x8000) != 0
-
-    def getText (self):
-        if self.isCeTab:
-            # I'll support this later.
-            return ''
-
-        if not _FuncVar.funcTab.has_key(self.funcType):
-            # unknown function name
-            return '#NAME!'
-
-        if self.argCount > 0:
-            # I'll support functions with arguments later.
-            return ''
-
-        return _FuncVar.funcTab[self.funcType] + "()"
-
-_tokenMap = {
-    0x1E: _Int,
-    0x3B: _Area3d,
-    0x5B: _Area3d,
-    0x7B: _Area3d,
-
-    0x42: _FuncVar
-}
-
-class FormulaParser2(object):
-    """This is a new formula parser that will eventually replace the old one.
-
-Once replaced, I'll change the name to FormulaParser and the names of the 
-associated token classes will be without the leading underscore (_)."""
-
-
-    def __init__ (self, header, bytes):
-        self.header = header
-        self.tokens = []
-        self.strm = globals.ByteStream(bytes)
-
-    def parse (self):
-        while not self.strm.isEndOfRecord():
-            b = self.strm.readUnsignedInt(1)
-            if not _tokenMap.has_key(b):
-                # Unknown token.  Stop parsing.
-                return
-
-            token = _tokenMap[b](self.strm, b)
-            token.parse()
-            self.tokens.append(token)
-
-    def getText (self):
-        s = ''
-        for tk in self.tokens:
-            s += tk.getText()
-        return s
-
-    def getTokens (self):
-        return self.tokens
diff --git a/scratch/mso-dumper/src/globals.py b/scratch/mso-dumper/src/globals.py
deleted file mode 100644
index de4f199..0000000
--- a/scratch/mso-dumper/src/globals.py
+++ /dev/null
@@ -1,472 +0,0 @@
-########################################################################
-#
-#  Copyright (c) 2010 Kohei Yoshida
-#  
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#  
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#  
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
-
-import sys, struct, math, zipfile, xmlpp, StringIO
-
-class ByteConvertError(Exception): pass
-
-class ByteStreamError(Exception): pass
-
-
-class ModelBase(object):
-
-    class HostAppType:
-        Word       = 0
-        Excel      = 1
-        PowerPoint = 2
-
-    def __init__ (self, hostApp):
-        self.hostApp = hostApp
-
-
-class Params(object):
-    """command-line parameters."""
-    def __init__ (self):
-        self.debug = False
-        self.showSectorChain = False
-        self.showStreamPos = False
-
-
-class StreamData(object):
-    """run-time stream data."""
-    def __init__ (self):
-        self.encrypted = False
-        self.pivotCacheIDs = {}
-
-    def appendPivotCacheId (self, newId):
-        # must be 4-digit with leading '0's.
-        strId = "%.4d"%newId
-        self.pivotCacheIDs[strId] = True
-
-    def isPivotCacheStream (self, name):
-        return self.pivotCacheIDs.has_key(name)
-
-
-class ByteStream(object):
-
-    def __init__ (self, bytes):
-        self.bytes = bytes
-        self.pos = 0
-        self.size = len(bytes)
-
-    def getSize (self):
-        return self.size
-
-    def readBytes (self, length):
-        if self.pos + length > self.size:
-            error("reading %d bytes from position %d would exceed the current size of %d\n"%
-                (length, self.pos, self.size))
-            raise ByteStreamError()
-        r = self.bytes[self.pos:self.pos+length]
-        self.pos += length
-        return r
-
-    def readRemainingBytes (self):
-        r = self.bytes[self.pos:]
-        self.pos = self.size
-        return r
-
-    def getCurrentPos (self):
-        return self.pos
-
-    def setCurrentPos (self, pos):
-        self.pos = pos
-
-    def isEndOfRecord (self):
-        return (self.pos == self.size)
-
-    def readUnsignedInt (self, length):
-        bytes = self.readBytes(length)
-        return getUnsignedInt(bytes)
-
-    def readSignedInt (self, length):
-        bytes = self.readBytes(length)
-        return getSignedInt(bytes)
-
-    def readDouble (self):
-        # double is always 8 bytes.
-        bytes = self.readBytes(8)
-        return getDouble(bytes)
-
-    def readUnicodeString (self):
-        # First 2-bytes contains the text length, followed by a 1-byte flag.
-        textLen = self.readUnsignedInt(2)
-        bytes = self.bytes[self.pos:]
-        text, byteLen = getRichText(bytes, textLen)
-        self.moveForward (byteLen)
-        return text
-
-    def moveBack (self, byteCount):
-        self.pos -= byteCount
-        if self.pos < 0:
-            self.pos = 0
-
-    def moveForward (self, byteCount):
-        self.pos += byteCount
-        if self.pos > self.size:
-            self.pos = self.size
-
-
-def getValueOrUnknown (list, idx, errmsg='(unknown)'):
-    listType = type(list)
-    if listType == type([]):
-        # list
-        if idx < len(list):
-            return list[idx]
-    elif listType == type({}):
-        # dictionary
-        if list.has_key(idx):
-            return list[idx]
-
-    return errmsg
-
-
-def output (msg):
-    sys.stdout.write(msg)
-
-def error (msg):
-    sys.stderr.write("Error: " + msg)
-
-def debug (msg):
-    sys.stderr.write("DEBUG: %s\n"%msg)
-
-
-def encodeName (name):
-    """Encode name that contains unprintable characters."""
-
-    n = len(name)
-    if n == 0:
-        return name
-
-    newname = ''
-    for i in xrange(0, n):
-        if ord(name[i]) <= 20 or ord(name[i]) >= 127:
-            newname += "\\x%2.2X"%ord(name[i])
-        else:
-            newname += name[i]
-
-    return newname
-
-
-class UnicodeRichExtText(object):
-    def __init__ (self):
-        self.baseText = ''
-        self.phoneticBytes = []
-
-
-def getUnicodeRichExtText (bytes):
-    ret = UnicodeRichExtText()
-    strm = ByteStream(bytes)
-    textLen = strm.readUnsignedInt(2)
-    flags = strm.readUnsignedInt(1)
-    #  0 0 0 0 0 0 0 0
-    # |-------|D|C|B|A|
-    isDoubleByte = (flags & 0x01) > 0 # A
-    ignored      = (flags & 0x02) > 0 # B
-    hasPhonetic  = (flags & 0x04) > 0 # C
-    isRichStr    = (flags & 0x08) > 0 # D
-
-    numElem = 0
-    if isRichStr:
-        numElem = strm.readUnsignedInt(2)
-
-    phoneticBytes = 0
-    if hasPhonetic:
-        phoneticBytes = strm.readUnsignedInt(4)
-        
-    if isDoubleByte:
-        # double-byte string (UTF-16)
-        text = ''
-        for i in xrange(0, textLen):
-            text += toTextBytes(strm.readBytes(2)).decode('utf-16')
-        ret.baseText = text
-    else:
-        # single-byte string
-        ret.baseText = toTextBytes(strm.readBytes(textLen))
-
-    if isRichStr:
-        for i in xrange(0, numElem):
-            posChar = strm.readUnsignedInt(2)
-            fontIdx = strm.readUnsignedInt(2)
-
-    if hasPhonetic:
-        ret.phoneticBytes = strm.readBytes(phoneticBytes)
-
-    return ret, strm.getCurrentPos()
-
-
-def getRichText (bytes, textLen=None):
-    """parse a string of the rich-text format that Excel uses.
-
-Note the following:
-
-  * The 1st byte always contains flag.
-  * The actual number of bytes read may differ depending on the values of the 
-    flags, so the client code should pass an open-ended stream of bytes and 
-    always query for the actual bytes read to adjust for the new stream 
-    position when this function returns.
-"""
-
-    strm = ByteStream(bytes)
-    flags = strm.readUnsignedInt(1)
-    if type(flags) == type('c'):
-        flags = ord(flags)
-    is16Bit   = (flags & 0x01)
-    isFarEast = (flags & 0x04)
-    isRich    = (flags & 0x08)
-
-    formatRuns = 0
-    if isRich:
-        formatRuns = strm.readUnsignedInt(2)
-
-    extInfo = 0
-    if isFarEast:
-        extInfo = strm.readUnsignedInt(4)
-
-    extraBytes = 0
-    if textLen == None:
-        extraBytes = formatRuns*4 + extInfo
-        textLen = len(bytes) - extraBytes
-
-    totalByteLen = strm.getCurrentPos() + textLen + extraBytes
-    if is16Bit:
-        totalByteLen += textLen # double the text length since each char is 2 bytes.
-        text = ''
-        for i in xrange(0, textLen):
-            text += toTextBytes(strm.readBytes(2)).decode('utf-16')
-    else:
-        text = toTextBytes(strm.readBytes(textLen))
-
-    return (text, totalByteLen)
-
-
-def dumpBytes (chars, subDivide=None):
-    line = 0
-    subDivideLine = None
-    if subDivide != None:
-        subDivideLine = subDivide/16
-
-    charLen = len(chars)
-    if charLen == 0:
-        # no bytes to dump.
-        return
-
-    labelWidth = int(math.ceil(math.log(charLen, 10)))
-    flushBytes = False
-    for i in xrange(0, charLen):
-        if (i+1)%16 == 1:
-            # print line header with seek position
-            fmt = "%%%d.%dd: "%(labelWidth, labelWidth)
-            output(fmt%i)
-
-        byte = ord(chars[i])
-        output("%2.2X "%byte)
-        flushBytes = True
-
-        if (i+1)%4 == 0:
-            # put extra space at every 4 bytes.
-            output(" ")
-
-        if (i+1)%16 == 0:
-            output("\n")
-            flushBytes = False
-            if subDivideLine != None and (line+1)%subDivideLine == 0:
-                output("\n")
-            line += 1
-
-    if flushBytes:
-        output("\n")
-
-
-def getSectorPos (secID, secSize):
-    return 512 + secID*secSize
-
-
-def getRawBytes (bytes, spaced=True, reverse=True):
-    text = ''
-    for b in bytes:
-        if type(b) == type(''):
-            b = ord(b)
-        if len(text) == 0:
-            text = "%2.2X"%b
-        elif spaced:
-            if reverse:
-                text = "%2.2X "%b + text
-            else:
-                text += " %2.2X"%b
-        else:
-            if reverse:
-                text = "%2.2X"%b + text
-            else:
-                text += "%2.2X"%b
-    return text
-
-
-def getTextBytes (bytes):
-    return toTextBytes(bytes)
-
-
-def toTextBytes (bytes):
-    n = len(bytes)
-    text = ''
-    for i in xrange(0, n):
-        b = bytes[i]
-        if type(b) == type(0x00):
-            b = struct.pack('B', b)
-        text += b
-    return text
-
-
-def getSignedInt (bytes):
-    # little endian
-    n = len(bytes)
-    if n == 0:
-        return 0
-
-    text = toTextBytes(bytes)
-    if n == 1:
-        # byte - 1 byte
-        return struct.unpack('b', text)[0]
-    elif n == 2:
-        # short - 2 bytes
-        return struct.unpack('<h', text)[0]
-    elif n == 4:
-        # int, long - 4 bytes
-        return struct.unpack('<l', text)[0]
-
-    raise ByteConvertError
-
-
-def getUnsignedInt (bytes):
-    # little endian
-    n = len(bytes)
-    if n == 0:
-        return 0
-
-    text = toTextBytes(bytes)
-    if n == 1:
-        # byte - 1 byte
-        return struct.unpack('B', text)[0]
-    elif n == 2:
-        # short - 2 bytes
-        return struct.unpack('<H', text)[0]
-    elif n == 4:
-        # int, long - 4 bytes
-        return struct.unpack('<L', text)[0]
-
-    raise ByteConvertError
-
-
-def getFloat (bytes):
-    n = len(bytes)
-    if n == 0:
-        return 0.0
-
-    text = toTextBytes(bytes)
-    return struct.unpack('<f', text)[0]
-
-
-def getDouble (bytes):
-    n = len(bytes)
-    if n == 0:
-        return 0.0
-
-    text = toTextBytes(bytes)
-    return struct.unpack('<d', text)[0]
-
-
-def getUTF8FromUTF16 (bytes):
-    # little endian utf-16 strings
-    byteCount = len(bytes)
-    loopCount = int(byteCount/2)
-    text = ''
-    for i in xrange(0, loopCount):
-        code = ''
-        lsbZero = bytes[i*2] == '\x00'
-        msbZero = bytes[i*2+1] == '\x00'
-        if msbZero and lsbZero:
-            return text
-        
-        if not msbZero:
-            code += bytes[i*2+1]
-        if not lsbZero:
-            code += bytes[i*2]
-        try:    
-            text += unicode(code, 'utf-8')
-        except UnicodeDecodeError:
-            text += "<%d invalid chars>"%len(code)
-    return text
-
-class StreamWrap(object):
-    def __init__ (self,printer):
-        self.printer = printer
-        self.buffer = ""
-    def write (self,string):
-        self.buffer += string
-    def flush (self):
-        for line in self.buffer.splitlines():
-            self.printer(line)
-
-def outputZipContent (bytes, printer, width=80):
-    printer("Zipped content:")
-    rawFile = StringIO.StringIO(bytes)
-    zipFile = zipfile.ZipFile(rawFile)
-    i = 0
-    # TODO: when 2.6/3.0 is in widespread use, change to infolist
-    # here, names might be ambiguous
-    for filename in zipFile.namelist():
-        if i > 0:
-            printer('-'*width)
-        i += 1
-        printer("")
-        printer(filename + ":")
-        printer('-'*width)
-
-        contents = zipFile.read(filename)
-        if filename.endswith(".xml") or contents.startswith("<?xml"):
-            wrapper = StreamWrap(printer)
-            xmlpp.pprint(contents,wrapper,1,80)
-            wrapper.flush()
-        else:
-            dumpBytes(contents)
-            
-    zipFile.close()
-
-def stringizeColorRef(colorRef, colorName="color"):
-    def split (packedColor):
-        return ((packedColor & 0xFF0000) // 0x10000, (packedColor & 0xFF00) / 0x100, (packedColor & 0xFF))
-    
-    colorValue = colorRef & 0xFFFFFF
-    if colorRef & 0xFE000000 == 0xFE000000 or colorRef & 0xFF000000 == 0:
-        colors = split(colorValue)
-        return "%s = (%d,%d,%d)"%(colorName, colors[0], colors[1], colors[2])
-    elif colorRef & 0x08000000 or colorRef & 0x10000000:
-        return "%s = schemecolor(%d)"%(colorName, colorValue)
-    elif colorRef & 0x04000000:
-        return "%s = colorschemecolor(%d)"%(colorName, colorValue)
-    else:
-        return "%s = <unidentified color>(%4.4Xh)"%(colorName, colorValue)
diff --git a/scratch/mso-dumper/src/msodraw.py b/scratch/mso-dumper/src/msodraw.py
deleted file mode 100644
index 26835ff..0000000
--- a/scratch/mso-dumper/src/msodraw.py
+++ /dev/null
@@ -1,607 +0,0 @@
-########################################################################
-#
-#  Copyright (c) 2010 Kohei Yoshida
-#  
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#  
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#  
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
-
-import globals, xlsmodel
-import sys
-
-def indent (level):
-    return '  '*level
-
-def headerLine ():
-    return "+ " + "-"*58 + "+"
-
-
-class RecordHeader:
-
-    class Type:
-        dggContainer            = 0xF000
-        dgContainer             = 0xF002
-        spgrContainer           = 0xF003
-        spContainer             = 0xF004
-        solverContainer         = 0xF005
-        FDGGBlock               = 0xF006
-        FDG                     = 0xF008
-        FSPGR                   = 0xF009
-        FSP                     = 0xF00A
-        FOPT                    = 0xF00B
-        FClientAnchor           = 0xF010
-        FClientData             = 0xF011
-        FConnectorRule          = 0xF012
-        FDGSL                   = 0xF119
-        SplitMenuColorContainer = 0xF11E
-
-    containerTypeNames = {
-        Type.dggContainer:            'OfficeArtDggContainer',
-        Type.dgContainer:             'OfficeArtDgContainer',
-        Type.spContainer:             'OfficeArtSpContainer',
-        Type.spgrContainer:           'OfficeArtSpgrContainer',
-        Type.solverContainer:         'OfficeArtSolverContainer',
-        Type.FDG:                     'OfficeArtFDG',
-        Type.FDGGBlock:               'OfficeArtFDGGBlock',
-        Type.FOPT:                    'OfficeArtFOPT',
-        Type.FClientAnchor:           'OfficeArtClientAnchor',
-        Type.FClientData:             'OfficeArtClientData',
-        Type.FSP:                     'OfficeArtFSP',
-        Type.FSPGR:                   'OfficeArtFSPGR',
-        Type.FConnectorRule:          'OfficeArtFConnectorRule',
-        Type.FDGSL:                   'OfficeArtFDGSL',
-        Type.SplitMenuColorContainer: 'OfficeArtSplitMenuColorContainer'
-    }
-
-    @staticmethod
-    def getRecTypeName (recType):
-        if RecordHeader.containerTypeNames.has_key(recType):
-            return RecordHeader.containerTypeNames[recType]
-        return 'unknown'
-
-    @staticmethod
-    def appendHeaderLine (recHdl, line):
-        n = len(line)
-        if n < 60:
-            line += ' '*(60-n)
-            line += '|'
-        recHdl.appendLine(line)
-
-    def __init__ (self, strm):
-        mixed = strm.readUnsignedInt(2)
-        self.recVer = (mixed & 0x000F)
-        self.recInstance = (mixed & 0xFFF0) / 16
-        self.recType = strm.readUnsignedInt(2)
-        self.recLen  = strm.readUnsignedInt(4)
-
-    def appendLines (self, recHdl, level=0):
-        pre = "| "
-        RecordHeader.appendHeaderLine(recHdl, pre + "Record type: 0x%4.4X (%s)"%(self.recType, RecordHeader.getRecTypeName(self.recType)))
-        RecordHeader.appendHeaderLine(recHdl, pre + "  version: 0x%1.1X   instance: 0x%3.3X   size: %d"%
-            (self.recVer, self.recInstance, self.recLen))
-
-
-class ColorRef:
-    def __init__ (self, byte):
-        self.red   = (byte & 0x000000FF)
-        self.green = (byte & 0x0000FF00) / 256 
-        self.blue  = (byte & 0x00FF0000) / 65536
-        self.flag  = (byte & 0xFF000000) / 16777216
-
-        self.paletteIndex = (self.flag & 0x01) != 0
-        self.paletteRGB   = (self.flag & 0x02) != 0
-        self.systemRGB    = (self.flag & 0x04) != 0
-        self.schemeIndex  = (self.flag & 0x08) != 0
-        self.sysIndex     = (self.flag & 0x10) != 0
-
-    def appendLine (self, recHdl, level):
-        if self.paletteIndex:
-            # red and green and used as an unsigned index into the current color palette.
-            paletteId = self.green * 256 + self.red
-            recHdl.appendLine(indent(level) + "color index in current palette: %d"%paletteId)
-        if self.sysIndex:
-            # red and green are used as an unsigned 16-bit index into the system color table.
-            sysId = self.green * 256 + self.red
-            recHdl.appendLine(indent(level) + "system index: %d"%sysId)
-        elif self.schemeIndex:
-            # the red value is used as as a color scheme index
-            recHdl.appendLine(indent(level) + "color scheme index: %d"%self.red)
-
-        else:
-            recHdl.appendLine(indent(level) + "color: (red=%d, green=%d, blue=%d)    flag: 0x%2.2X"%
-                (self.red, self.green, self.blue, self.flag))
-            recHdl.appendLine(indent(level) + "palette index: %s"%recHdl.getTrueFalse(self.paletteIndex))
-            recHdl.appendLine(indent(level) + "palette RGB: %s"%recHdl.getTrueFalse(self.paletteRGB))
-            recHdl.appendLine(indent(level) + "system RGB: %s"%recHdl.getTrueFalse(self.systemRGB))
-            recHdl.appendLine(indent(level) + "system RGB: %s"%recHdl.getTrueFalse(self.systemRGB))
-            recHdl.appendLine(indent(level) + "scheme index: %s"%recHdl.getTrueFalse(self.schemeIndex))
-            recHdl.appendLine(indent(level) + "system index: %s"%recHdl.getTrueFalse(self.sysIndex))
-
-
-
-class FDG:
-    def __init__ (self, strm):
-        self.shapeCount  = strm.readUnsignedInt(4)
-        self.lastShapeID = strm.readUnsignedInt(4)
-
-    def appendLines (self, recHdl, rh):
-        recHdl.appendLine("FDG content (drawing data):")
-        recHdl.appendLine("  ID of this shape: %d"%rh.recInstance)
-        recHdl.appendLine("  shape count: %d"%self.shapeCount)
-        recHdl.appendLine("  last shape ID: %d"%self.lastShapeID)
-
-
-class IDCL:
-    def __init__ (self, strm):
-        self.dgid = strm.readUnsignedInt(4)
-        self.cspidCur = strm.readUnsignedInt(4)
-
-    def appendLines (self, recHdl, rh):
-        recHdl.appendLine("IDCL content:")
-        recHdl.appendLine("  drawing ID: %d"%self.dgid)
-        recHdl.appendLine("  cspidCur: 0x%8.8X"%self.cspidCur)
-
-class FDGG:
-    def __init__ (self, strm):
-        self.spidMax  = strm.readUnsignedInt(4) # current max shape ID
-        self.cidcl    = strm.readUnsignedInt(4) # number of OfficeArtIDCL's.
-        self.cspSaved = strm.readUnsignedInt(4) # total number of shapes in all drawings
-        self.cdgSaved = strm.readUnsignedInt(4) # total number of drawings saved in the file
-
-    def appendLines (self, recHdl, rh):
-        recHdl.appendLine("FDGG content:")
-        recHdl.appendLine("  current max shape ID: %d"%self.spidMax)
-        recHdl.appendLine("  number of OfficeArtIDCL's: %d"%self.cidcl)
-        recHdl.appendLine("  total number of shapes in all drawings: %d"%self.cspSaved)
-        recHdl.appendLine("  total number of drawings in the file: %d"%self.cdgSaved)
-
-class FDGGBlock:
-    def __init__ (self, strm):
-        self.head = FDGG(strm)
-        self.idcls = []
-        # NOTE: The spec says head.cidcl stores the number of IDCL's, but each
-        # FDGGBlock only contains bytes enough to store (head.cidcl - 1) of 
-        # IDCL's.
-        for i in xrange(0, self.head.cidcl-1):
-            idcl = IDCL(strm)
-            self.idcls.append(idcl)
-
-    def appendLines (self, recHdl, rh):
-        self.head.appendLines(recHdl, rh)
-        for idcl in self.idcls:
-            idcl.appendLines(recHdl, rh)
-
-
-class FDGSL:
-    selectionMode = {
-        0x00000000: 'default state',
-        0x00000001: 'ready to rotate',
-        0x00000002: 'ready to change the curvature of line shapes',
-        0x00000007: 'ready to crop the picture'
-    }
-
-    def __init__ (self, strm):
-        self.cpsp = strm.readUnsignedInt(4)  # the spec says undefined.
-        self.dgslk = strm.readUnsignedInt(4) # selection mode
-        self.shapeFocus = strm.readUnsignedInt(4) # shape ID in focus
-        self.shapesSelected = []
-        shapeCount = (strm.getSize() - 20)/4
-        for i in xrange(0, shapeCount):
-            spid = strm.readUnsignedInt(4)
-            self.shapesSelected.append(spid)
-
-    def appendLines (self, recHdl, rh):
-        recHdl.appendLine("FDGSL content:")
-        recHdl.appendLine("  selection mode: %s"%
-            globals.getValueOrUnknown(FDGSL.selectionMode, self.dgslk))
-        recHdl.appendLine("  ID of shape in focus: %d"%self.shapeFocus)
-        for shape in self.shapesSelected:
-            recHdl.appendLine("  ID of shape selected: %d"%shape)
-
-
-class FOPT:
-    """property table for a shape instance"""
-
-    class TextBoolean:
-
-        def appendLines (self, recHdl, prop, level):
-            A = (prop.value & 0x00000001) != 0
-            B = (prop.value & 0x00000002) != 0
-            C = (prop.value & 0x00000004) != 0
-            D = (prop.value & 0x00000008) != 0
-            E = (prop.value & 0x00000010) != 0
-            F = (prop.value & 0x00010000) != 0
-            G = (prop.value & 0x00020000) != 0
-            H = (prop.value & 0x00040000) != 0
-            I = (prop.value & 0x00080000) != 0
-            J = (prop.value & 0x00100000) != 0
-            recHdl.appendLineBoolean(indent(level) + "fit shape to text",     B)
-            recHdl.appendLineBoolean(indent(level) + "auto text margin",      D)
-            recHdl.appendLineBoolean(indent(level) + "select text",           E)
-            recHdl.appendLineBoolean(indent(level) + "use fit shape to text", G)
-            recHdl.appendLineBoolean(indent(level) + "use auto text margin",  I)
-            recHdl.appendLineBoolean(indent(level) + "use select text",       J)
-
-    class CXStyle:
-        style = [
-            'straight connector',     # 0x00000000
-            'elbow-shaped connector', # 0x00000001
-            'curved connector',       # 0x00000002
-            'no connector'            # 0x00000003
-        ]
-
-        def appendLines (self, recHdl, prop, level):
-            styleName = globals.getValueOrUnknown(FOPT.CXStyle.style, prop.value)
-            recHdl.appendLine(indent(level) + "connector style: %s (0x%8.8X)"%(styleName, prop.value))
-
-    class FillColor:
-
-        def appendLines (self, recHdl, prop, level):
-            color = ColorRef(prop.value)
-            color.appendLine(recHdl, level)
-
-    class FillStyle:
-
-        def appendLines (self, recHdl, prop, level):
-            flag1 = recHdl.readUnsignedInt(1)
-            recHdl.moveForward(1)
-            flag2 = recHdl.readUnsignedInt(1)
-            recHdl.moveForward(1)
-            A = (flag1 & 0x01) != 0 # fNoFillHitTest
-            B = (flag1 & 0x02) != 0 # fillUseRect
-            C = (flag1 & 0x04) != 0 # fillShape
-            D = (flag1 & 0x08) != 0 # fHitTestFill
-            E = (flag1 & 0x10) != 0 # fFilled
-            F = (flag1 & 0x20) != 0 # fUseShapeAnchor
-            G = (flag1 & 0x40) != 0 # fRecolorFillAsPicture
-
-            H = (flag2 & 0x01) != 0 # fUseNoFillHitTest
-            I = (flag2 & 0x02) != 0 # fUsefillUseRect
-            J = (flag2 & 0x04) != 0 # fUsefillShape
-            K = (flag2 & 0x08) != 0 # fUsefHitTestFill
-            L = (flag2 & 0x10) != 0 # fUsefFilled
-            M = (flag2 & 0x20) != 0 # fUsefUseShapeAnchor
-            N = (flag2 & 0x40) != 0 # fUsefRecolorFillAsPicture
-
-            recHdl.appendLine(indent(level)+"fNoFillHitTest            : %s"%recHdl.getTrueFalse(A))
-            recHdl.appendLine(indent(level)+"fillUseRect               : %s"%recHdl.getTrueFalse(B))
-            recHdl.appendLine(indent(level)+"fillShape                 : %s"%recHdl.getTrueFalse(C))
-            recHdl.appendLine(indent(level)+"fHitTestFill              : %s"%recHdl.getTrueFalse(D))
-            recHdl.appendLine(indent(level)+"fFilled                   : %s"%recHdl.getTrueFalse(E))
-            recHdl.appendLine(indent(level)+"fUseShapeAnchor           : %s"%recHdl.getTrueFalse(F))
-            recHdl.appendLine(indent(level)+"fRecolorFillAsPicture     : %s"%recHdl.getTrueFalse(G))
-
-            recHdl.appendLine(indent(level)+"fUseNoFillHitTest         : %s"%recHdl.getTrueFalse(H))
-            recHdl.appendLine(indent(level)+"fUsefillUseRect           : %s"%recHdl.getTrueFalse(I))
-            recHdl.appendLine(indent(level)+"fUsefillShape             : %s"%recHdl.getTrueFalse(J))
-            recHdl.appendLine(indent(level)+"fUsefHitTestFill          : %s"%recHdl.getTrueFalse(K))
-            recHdl.appendLine(indent(level)+"fUsefFilled               : %s"%recHdl.getTrueFalse(L))
-            recHdl.appendLine(indent(level)+"fUsefUseShapeAnchor       : %s"%recHdl.getTrueFalse(M))
-            recHdl.appendLine(indent(level)+"fUsefRecolorFillAsPicture : %s"%recHdl.getTrueFalse(N))
-
-    class LineColor:
-
-        def appendLines (self, recHdl, prop, level):
-            color = ColorRef(prop.value)
-            color.appendLine(recHdl, level)
-
-    class GroupShape:
-
-        flagNames = [
-            'fPrint',                 # A
-            'fHidden',                # B
-            'fOneD',                  # C
-            'fIsButton',              # D
-            'fOnDblClickNotify',      # E
-            'fBehindDocument',        # F
-            'fEditedWrap',            # G
-            'fScriptAnchor',          # H
-            'fReallyHidden',          # I
-            'fAllowOverlap',          # J
-            'fUserDrawn',             # K
-            'fHorizRule',             # L
-            'fNoshadeHR',             # M
-            'fStandardHR',            # N
-            'fIsBullet',              # O
-            'fLayoutInCell',          # P
-            'fUsefPrint',             # Q
-            'fUsefHidden',            # R
-            'fUsefOneD',              # S
-            'fUsefIsButton',          # T
-            'fUsefOnDblClickNotify',  # U
-            'fUsefBehindDocument',    # V
-            'fUsefEditedWrap',        # W
-            'fUsefScriptAnchor',      # X
-            'fUsefReallyHidden',      # Y
-            'fUsefAllowOverlap',      # Z
-            'fUsefUserDrawn',         # a
-            'fUsefHorizRule',         # b
-            'fUsefNoshadeHR',         # c
-            'fUsefStandardHR',        # d
-            'fUsefIsBullet',          # e
-            'fUsefLayoutInCell'       # f
-        ]
-
-        def appendLines (self, recHdl, prop, level):
-            flag = prop.value
-            flagCount = len(FOPT.GroupShape.flagNames)
-            recHdl.appendLine(indent(level)+"flag: 0x%8.8X"%flag)
-            for i in xrange(0, flagCount):
-                bval = (flag & 0x00000001)
-                recHdl.appendLine(indent(level)+"%s: %s"%(FOPT.GroupShape.flagNames[i], recHdl.getTrueFalse(bval)))
-                flag /= 2
-
-    propTable = {
-        0x00BF: ['Text Boolean Properties', TextBoolean],
-        0x0181: ['Fill Color', FillColor],
-        0x01BF: ['Fill Style Boolean Properties', FillStyle],
-        0x01C0: ['Line Color', LineColor],
-        0x0303: ['Connector Shape Style (cxstyle)', CXStyle],
-        0x03BF: ['Group Shape Boolean Properties', GroupShape]
-    }
-
-    class E:
-        """single property entry in a property table"""
-        def __init__ (self):
-            self.ID          = None
-            self.flagBid     = False
-            self.flagComplex = False
-            self.value       = None
-            self.extra       = None
-
-    def __init__ (self):
-        self.properties = []
-
-    def appendLines (self, recHdl, rh):
-        recHdl.appendLine("FOPT content (property table):")
-        recHdl.appendLine("  property count: %d"%rh.recInstance)
-        for i in xrange(0, rh.recInstance):
-            recHdl.appendLine("    "+"-"*57)
-            prop = self.properties[i]
-            if FOPT.propTable.has_key(prop.ID):
-                # We have a handler for this property.
-                # propData is expected to have two elements: name (0) and handler (1).
-                propHdl = FOPT.propTable[prop.ID]
-                recHdl.appendLine("    property name: %s (0x%4.4X)"%(propHdl[0], prop.ID))
-                propHdl[1]().appendLines(recHdl, prop, 2)
-            else:
-                recHdl.appendLine("    property ID: 0x%4.4X"%prop.ID)
-                if prop.flagComplex:
-                    recHdl.appendLine("    complex property: %s"%globals.getRawBytes(prop.extra, True, False))
-                elif prop.flagBid:
-                    recHdl.appendLine("    blip ID: %d"%prop.value)
-                else:
-                    # regular property value
-                    recHdl.appendLine("    property value: 0x%8.8X"%prop.value)
-
-
-class FRIT:
-    def __init__ (self, strm):
-        self.lastGroupID = strm.readUnsignedInt(2)
-        self.secondLastGroupID = strm.readUnsignedInt(2)
-
-    def appendLines (self, recHdl, rh):
-        pass
-
-
-class FSP:
-    def __init__ (self, strm):
-        self.spid = strm.readUnsignedInt(4)
-        self.flag = strm.readUnsignedInt(4)
-
-    def appendLines (self, recHdl, rh):
-        recHdl.appendLine("FSP content (instance of a shape):")
-        recHdl.appendLine("  ID of this shape: %d"%self.spid)
-        groupShape     = (self.flag & 0x0001) != 0
-        childShape     = (self.flag & 0x0002) != 0
-        topMostInGroup = (self.flag & 0x0004) != 0
-        deleted        = (self.flag & 0x0008) != 0
-        oleObject      = (self.flag & 0x0010) != 0
-        haveMaster     = (self.flag & 0x0020) != 0
-        flipHorizontal = (self.flag & 0x0040) != 0
-        flipVertical   = (self.flag & 0x0080) != 0
-        isConnector    = (self.flag & 0x0100) != 0
-        haveAnchor     = (self.flag & 0x0200) != 0
-        background     = (self.flag & 0x0400) != 0
-        haveProperties = (self.flag & 0x0800) != 0
-        recHdl.appendLineBoolean("  group shape", groupShape)
-        recHdl.appendLineBoolean("  child shape", childShape)
-        recHdl.appendLineBoolean("  topmost in group", topMostInGroup)
-        recHdl.appendLineBoolean("  deleted", deleted)
-        recHdl.appendLineBoolean("  OLE object shape", oleObject)
-        recHdl.appendLineBoolean("  have valid master", haveMaster)
-        recHdl.appendLineBoolean("  horizontally flipped", flipHorizontal)
-        recHdl.appendLineBoolean("  vertically flipped", flipVertical)
-        recHdl.appendLineBoolean("  connector shape", isConnector)
-        recHdl.appendLineBoolean("  have anchor", haveAnchor)
-        recHdl.appendLineBoolean("  background shape", background)
-        recHdl.appendLineBoolean("  have shape type property", haveProperties)
-
-
-class FSPGR:
-    def __init__ (self, strm):
-        self.left   = strm.readSignedInt(4)
-        self.top    = strm.readSignedInt(4)
-        self.right  = strm.readSignedInt(4)
-        self.bottom = strm.readSignedInt(4)
-
-    def appendLines (self, recHdl, rh):
-        recHdl.appendLine("FSPGR content (coordinate system of group shape):")
-        recHdl.appendLine("  left boundary: %d"%self.left)
-        recHdl.appendLine("  top boundary: %d"%self.top)
-        recHdl.appendLine("  right boundary: %d"%self.right)
-        recHdl.appendLine("  bottom boundary: %d"%self.bottom)
-
-
-class FConnectorRule:
-    def __init__ (self, strm):
-        self.ruleID = strm.readUnsignedInt(4)
-        self.spIDA = strm.readUnsignedInt(4)
-        self.spIDB = strm.readUnsignedInt(4)
-        self.spIDC = strm.readUnsignedInt(4)
-        self.conSiteIDA = strm.readUnsignedInt(4)
-        self.conSiteIDB = strm.readUnsignedInt(4)
-
-    def appendLines (self, recHdl, rh):
-        recHdl.appendLine("FConnectorRule content:")
-        recHdl.appendLine("  rule ID: %d"%self.ruleID)
-        recHdl.appendLine("  ID of the shape where the connector starts: %d"%self.spIDA)
-        recHdl.appendLine("  ID of the shape where the connector ends: %d"%self.spIDB)
-        recHdl.appendLine("  ID of the connector shape: %d"%self.spIDB)
-        recHdl.appendLine("  ID of the connection site in the begin shape: %d"%self.conSiteIDA)
-        recHdl.appendLine("  ID of the connection site in the end shape: %d"%self.conSiteIDB)
-
-
-class MSOCR:
-    def __init__ (self, strm):
-        self.red = strm.readUnsignedInt(1)
-        self.green = strm.readUnsignedInt(1)
-        self.blue = strm.readUnsignedInt(1)
-        flag = strm.readUnsignedInt(1)
-        self.isSchemeIndex = (flag & 0x08) != 0
-
-    def appendLines (self, recHdl, rh):
-        recHdl.appendLine("MSOCR content (color index)")
-        if self.isSchemeIndex:
-            recHdl.appendLine("  scheme index: %d"%self.red)
-        else:
-            recHdl.appendLine("  RGB color: (red=%d, green=%d, blue=%d)"%(self.red, self.green, self.blue))
-
-class SplitMenuColorContainer:
-    def __init__ (self, strm):
-        self.smca = []
-        # this container contains 4 MSOCR records.
-        for i in xrange(0, 4):
-            msocr = MSOCR(strm)
-            self.smca.append(msocr)
-
-    def appendLines (self, recHdl, rh):
-        for msocr in self.smca:
-            msocr.appendLines(recHdl, rh)
-
-
-class FClientAnchorSheet:
-    """Excel-specific anchor data (OfficeArtClientAnchorSheet)"""
-
-    def __init__ (self, strm):
-        # dx is 1/1024th of the underlying cell's width.
-        # dy is 1/1024th of the underlying cell's height.
-        flag = strm.readUnsignedInt(2)
-        self.moveWithCells   = (flag & 0x0001) != 0
-        self.resizeWithCells = (flag & 0x0002 != 0)
-        self.col1 = strm.readUnsignedInt(2)
-        self.dx1 = strm.readUnsignedInt(2)
-        self.row1 = strm.readUnsignedInt(2)
-        self.dy1 = strm.readUnsignedInt(2)
-        self.col2 = strm.readUnsignedInt(2)
-        self.dx2 = strm.readUnsignedInt(2)
-        self.row2 = strm.readUnsignedInt(2)
-        self.dy2 = strm.readUnsignedInt(2)
-
-    def appendLines (self, recHdl, rh):
-        recHdl.appendLine("Client anchor (Excel):")
-        recHdl.appendLine("  cols: %d-%d   rows: %d-%d"%(self.col1, self.col2, self.row1, self.row2))
-        recHdl.appendLine("  dX1: %d  dY1: %d"%(self.dx1, self.dy1))
-        recHdl.appendLine("  dX2: %d  dY2: %d"%(self.dx2, self.dy2))
-        recHdl.appendLineBoolean("  move with cells", self.moveWithCells)
-        recHdl.appendLineBoolean("  resize with cells", self.resizeWithCells)
-
-    def fillModel (self, model, sheet):
-        obj = xlsmodel.Shape(self.col1, self.row1, self.dx1, self.dy1, self.col2, self.row2, self.dx2, self.dy2)
-        sheet.addShape(obj)
-
-# ----------------------------------------------------------------------------
-
-recData = {
-    RecordHeader.Type.FDG: FDG,
-    RecordHeader.Type.FSPGR: FSPGR,
-    RecordHeader.Type.FSP: FSP,
-    RecordHeader.Type.FDGGBlock: FDGGBlock,
-    RecordHeader.Type.FConnectorRule: FConnectorRule,
-    RecordHeader.Type.FDGSL: FDGSL,
-    RecordHeader.Type.FClientAnchor: FClientAnchorSheet,
-    RecordHeader.Type.SplitMenuColorContainer: SplitMenuColorContainer
-}
-
-class MSODrawHandler(globals.ByteStream):
-
-    def __init__ (self, bytes, parent):
-        """The 'parent' instance must have appendLine() method that takes one string argument."""
-
-        globals.ByteStream.__init__(self, bytes)
-        self.parent = parent
-
-    def readFOPT (self, rh):
-        fopt = FOPT()
-        strm = globals.ByteStream(self.readBytes(rh.recLen))
-        while not strm.isEndOfRecord():
-            entry = FOPT.E()
-            val = strm.readUnsignedInt(2)
-            entry.ID          = (val & 0x3FFF)
-            entry.flagBid     = (val & 0x4000) # if true, the value is a blip ID.
-            entry.flagComplex = (val & 0x8000) # if true, the value stores the size of the extra bytes.
-            entry.value = strm.readSignedInt(4)
-            if entry.flagComplex:
-                entry.extra = strm.readBytes(entry.value)
-            fopt.properties.append(entry)
-
-        return fopt
-
-    def parseBytes (self):
-        while not self.isEndOfRecord():
-            self.parent.appendLine(headerLine())
-            rh = RecordHeader(self)
-            rh.appendLines(self.parent, 0)
-            # if rh.recType == Type.dgContainer:
-            if rh.recVer == 0xF:
-                # container
-                continue
-
-            self.parent.appendLine(headerLine())
-            if recData.has_key(rh.recType):
-                obj = recData[rh.recType](self)
-                obj.appendLines(self.parent, rh)
-            elif rh.recType == RecordHeader.Type.FOPT:
-                fopt = self.readFOPT(rh)
-                fopt.appendLines(self.parent, rh)
-            else:
-                # unknown object
-                bytes = self.readBytes(rh.recLen)
-                self.parent.appendLine(globals.getRawBytes(bytes, True, False))
-
-    def fillModel (self, model):
-        sheet = model.getCurrentSheet()
-        while not self.isEndOfRecord():
-            rh = RecordHeader(self)
-            if rh.recVer == 0xF:
-                # container
-                continue
-
-            if rh.recType == RecordHeader.Type.FClientAnchor and \
-                model.hostApp == globals.ModelBase.HostAppType.Excel:
-                obj = FClientAnchorSheet(self)
-                obj.fillModel(model, sheet)
-            else:
-                # unknown object
-                bytes = self.readBytes(rh.recLen)
-
-
diff --git a/scratch/mso-dumper/src/node.py b/scratch/mso-dumper/src/node.py
deleted file mode 100644
index 777655d..0000000
--- a/scratch/mso-dumper/src/node.py
+++ /dev/null
@@ -1,219 +0,0 @@
-########################################################################
-#
-#  Copyright (c) 2010 Kohei Yoshida
-#  
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#  
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#  
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
-
-# This file (node.py) gets copied in several of my projects.  Find out a way 
-# to avoid making duplicate copies in each of my projects.
-
-import sys
-
-class NodeType:
-    # unknown node type.
-    Unknown = 0
-    # the document root - typically has only one child element, but it can 
-    # have multiple children.
-    Root    = 1 
-    # node that has name and attributes, and may have child nodes.
-    Element = 2
-    # node that only has textural content.
-    Content = 3
-
-class NodeBase:
-    def __init__ (self, nodeType = NodeType.Unknown):
-        self.parent = None
-        self.nodeType = nodeType
-
-        self.__children = []
-        self.__hasContent = False
-
-    def appendChild (self, node):
-        self.__children.append(node)
-        node.parent = self
-
-    def appendElement (self, name):
-        node = Element(name)
-        self.appendChild(node)
-        return node
-
-    def hasContent (self):
-        return self.__hasContent
-
-    def appendContent (self, text):
-        node = Content(text)
-        self.appendChild(node)
-        self.__hasContent = True
-        return node
-
-    def firstChild (self):
-        return self.__children[0]
-
-    def setChildNodes (self, children):
-        self.__children = children
-
-    def getChildNodes (self):
-        return self.__children
-
-    def firstChildByName (self, name):
-        for child in self.__children:
-            if child.nodeType == NodeType.Element and child.name == name:
-                return child
-        return None
-
-    def getChildByName (self, name):
-        children = []
-        for child in self.__children:
-            if child.nodeType == NodeType.Element and child.name == name:
-                children.append(child)
-        return children
-
-class Root(NodeBase):
-    def __init__ (self):
-        NodeBase.__init__(self, NodeType.Root)
-
-class Content(NodeBase):
-    def __init__ (self, content):
-        NodeBase.__init__(self, NodeType.Content)
-        self.content = content
-
-class Element(NodeBase):
-    def __init__ (self, name, attrs=None):
-        NodeBase.__init__(self, NodeType.Element)
-        self.name = name
-        self.attrs = attrs
-        if self.attrs == None:
-            self.attrs = {}
-
-    def getContent (self):
-        text = ''
-        first = True
-        for child in self.getChildNodes():
-            if first:
-                first = False
-            else:
-                text += ' '
-            if child.nodeType == NodeType.Content:
-                text += child.content
-            elif child.nodeType == NodeType.Element:
-                text += child.getContent()
-        return text
-
-    def getAttr (self, name):
-        if not self.attrs.has_key(name):
-            return None
-        return self.attrs[name]
-
-    def setAttr (self, name, val):
-        self.attrs[name] = val
-
-    def hasAttr (self, name):
-        return self.attrs.has_key(name)
-
-encodeTable = {
-    '>': 'gt',
-    '<': 'lt',
-    '&': 'amp',
-    '"': 'quot',
-    '\'': 'apos'
-}
-
-def encodeString (sin):
-    sout = ''
-    for c in sin:
-        if ord(c) >= 128:
-            # encode non-ascii ranges.
-            sout += "\\x%2.2x"%ord(c)
-        elif encodeTable.has_key(c):
-            # encode html symbols.
-            sout += '&' + encodeTable[c] + ';'
-        else:
-            sout += c
-
-    return sout
-
-def convertAttrValue (val):
-    if type(val) == type(True):
-        if val:
-            val = "true"
-        else:
-            val = "false"
-    elif type(val) == type(0) or type(val) == type(0L):
-        val = "%d"%val
-    elif type(val) == type(0.0):
-        val = "%g"%val
-
-    return val
-
-def prettyPrint (fd, node):
-    printNode(fd, node, 0, True)
-
-def printNode (fd, node, level, breakLine):
-    singleIndent = ''
-    lf = ''
-    if breakLine:
-        singleIndent = ' '*4
-        lf = "\n"
-    indent = singleIndent*level
-    if node.nodeType == NodeType.Root:
-        # root node itself only contains child nodes.
-        for child in node.getChildNodes():
-            printNode(fd, child, level, True)
-    elif node.nodeType == NodeType.Element:
-        hasChildren = len(node.getChildNodes()) > 0
-
-        # We add '<' and '>' (or '/>') after the element content gets 
-        # encoded.
-        line = node.name
-        if len(node.attrs) > 0:
-            keys = node.attrs.keys()
-            keys.sort()
-            for key in keys:
-                val = node.attrs[key]
-                if val == None:
-                    continue
-                val = convertAttrValue(val)
-                line += " " + key + '="' + encodeString(val) + '"'
-
-        if hasChildren:
-            breakChildren = breakLine and not node.hasContent()
-            line = "<%s>"%line
-            if breakChildren:
-                line += "\n"
-            fd.write (indent + line)
-            for child in node.getChildNodes():
-                printNode(fd, child, level+1, breakChildren)
-            line = "</%s>%s"%(node.name, lf)
-            if breakChildren:
-                line = indent + line
-            fd.write (line)
-        else:
-            line = "<%s/>%s"%(line, lf)
-            fd.write (indent + line)
-
-    elif node.nodeType == NodeType.Content:
-        content = node.content
-        content = encodeString(content)
-        if len(content) > 0:
-            fd.write (indent + content + lf)
diff --git a/scratch/mso-dumper/src/ole.py b/scratch/mso-dumper/src/ole.py
deleted file mode 100644
index 9b01928..0000000
--- a/scratch/mso-dumper/src/ole.py
+++ /dev/null
@@ -1,757 +0,0 @@
-########################################################################
-#
-#  Copyright (c) 2010 Kohei Yoshida
-#  
-#  Permission is hereby granted, free of charge, to any person
-#  obtaining a copy of this software and associated documentation
-#  files (the "Software"), to deal in the Software without
-#  restriction, including without limitation the rights to use,
-#  copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the
-#  Software is furnished to do so, subject to the following
-#  conditions:
-#  
-#  The above copyright notice and this permission notice shall be
-#  included in all copies or substantial portions of the Software.
-#  
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-#  OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
-
-import sys
-import globals
-from globals import getSignedInt
-# ----------------------------------------------------------------------------
-# Reference: The Microsoft Compound Document File Format by Daniel Rentz
-# http://sc.openoffice.org/compdocfileformat.pdf
-# ----------------------------------------------------------------------------
-
-from globals import output
-
-
-class NoRootStorage(Exception): pass
-
-class ByteOrder:
-    LittleEndian = 0
-    BigEndian    = 1
-    Unknown      = 2
-
-class BlockType:
-    MSAT      = 0
-    SAT       = 1
-    SSAT      = 2
-    Directory = 3
-
-class StreamLocation:
-    SAT  = 0
-    SSAT = 1
-
-class Header(object):
-
-    @staticmethod
-    def byteOrder (chars):
-        b1, b2 = ord(chars[0]), ord(chars[1])
-        if b1 == 0xFE and b2 == 0xFF:
-            return ByteOrder.LittleEndian
-        elif b1 == 0xFF and b2 == 0xFE:
-            return ByteOrder.BigEndian
-        else:
-            return ByteOrder.Unknown
-
-
-    def __init__ (self, bytes, params):
-        self.bytes = bytes
-        self.MSAT = None
-
-        self.docId = None
-        self.uId = None
-        self.revision = 0
-        self.version = 0
-        self.byteOrder = ByteOrder.Unknown
-        self.minStreamSize = 0
-
-        self.numSecMSAT = 0
-        self.numSecSSAT = 0
-        self.numSecSAT = 0
-
-        self.__secIDFirstMSAT = -2
-        self.__secIDFirstDirStrm = -2
-        self.__secIDFirstSSAT = -2
-
-        self.secSize = 512
-        self.secSizeShort = 64
-
-        self.params = params
-
-    def getSectorSize (self):
-        return 2**self.secSize
-
-
-    def getShortSectorSize (self):
-        return 2**self.secSizeShort
-
-
-    def getFirstSectorID (self, blockType):
-        if blockType == BlockType.MSAT:
-            return self.__secIDFirstMSAT
-        elif blockType == BlockType.SSAT:
-            return self.__secIDFirstSSAT
-        elif blockType == BlockType.Directory:
-            return self.__secIDFirstDirStrm
-        return -2
-
-
-    def output (self):
-
-        def printRawBytes (bytes):
-            for b in bytes:
-                output("%2.2X "%ord(b))
-            output("\n")
-
-        def printSep (c='-', w=68, prefix=''):
-            print(prefix + c*w)
-
-        printSep('=', 68)
-        print("Compound Document Header")
-        printSep('-', 68)
-
-        if self.params.debug:
-            globals.dumpBytes(self.bytes[0:512])
-            printSep('-', 68)
-
-        # document ID and unique ID
-        output("Document ID: ")
-        printRawBytes(self.docId)
-        output("Unique ID: ")
-        printRawBytes(self.uId)
-
-        # revision and version
-        print("Revision: %d  Version: %d"%(self.revision, self.version))
-
-        # byte order
-        output("Byte order: ")
-        if self.byteOrder == ByteOrder.LittleEndian:
-            print("little endian")
-        elif self.byteOrder == ByteOrder.BigEndian:
-            print("big endian")
-        else:
-            print("unknown")
-
-        # sector size (usually 512 bytes)
-        print("Sector size: %d (%d)"%(2**self.secSize, self.secSize))
-
-        # short sector size (usually 64 bytes)
-        print("Short sector size: %d (%d)"%(2**self.secSizeShort, self.secSizeShort))
-
-        # total number of sectors in SAT (equals the number of sector IDs 
-        # stored in the MSAT).
-        print("Total number of sectors used in SAT: %d"%self.numSecSAT)
-
-        print("Sector ID of the first sector of the directory stream: %d"%
-              self.__secIDFirstDirStrm)
-
-        print("Minimum stream size: %d"%self.minStreamSize)
-
-        if self.__secIDFirstSSAT == -2:
-            print("Sector ID of the first SSAT sector: [none]")
-        else:
-            print("Sector ID of the first SSAT sector: %d"%self.__secIDFirstSSAT)
-
-        print("Total number of sectors used in SSAT: %d"%self.numSecSSAT)
-
-        if self.__secIDFirstMSAT == -2:
-            # There is no more sector ID stored outside the header.
-            print("Sector ID of the first MSAT sector: [end of chain]")
-        else:
-            # There is more sector IDs than 109 IDs stored in the header.
-            print("Sector ID of the first MSAT sector: %d"%(self.__secIDFirstMSAT))
-
-        print("Total number of sectors used to store additional MSAT: %d"%self.numSecMSAT)
-
-
-    def parse (self):
-
-        # document ID and unique ID
-        self.docId = self.bytes[0:8]
-        self.uId = self.bytes[8:24]
-
-        # revision and version
-        self.revision = getSignedInt(self.bytes[24:26])
-        self.version = getSignedInt(self.bytes[26:28])
-
-        # byte order
-        self.byteOrder = Header.byteOrder(self.bytes[28:30])
-
-        # sector size (usually 512 bytes)
-        self.secSize = getSignedInt(self.bytes[30:32])
-
-        # short sector size (usually 64 bytes)
-        self.secSizeShort = getSignedInt(self.bytes[32:34])
-
-        # total number of sectors in SAT (equals the number of sector IDs 
-        # stored in the MSAT).
-        self.numSecSAT = getSignedInt(self.bytes[44:48])
-
-        self.__secIDFirstDirStrm = getSignedInt(self.bytes[48:52])
-        self.minStreamSize = getSignedInt(self.bytes[56:60])
-        self.__secIDFirstSSAT = getSignedInt(self.bytes[60:64])
-        self.numSecSSAT = getSignedInt(self.bytes[64:68])
-        self.__secIDFirstMSAT = getSignedInt(self.bytes[68:72])
-        self.numSecMSAT = getSignedInt(self.bytes[72:76])
-
-        # master sector allocation table
-        self.MSAT = MSAT(2**self.secSize, self.bytes, self.params)
-
-        # First part of MSAT consisting of an array of up to 109 sector IDs.
-        # Each sector ID is 4 bytes in length.
-        for i in xrange(0, 109):
-            pos = 76 + i*4
-            id = getSignedInt(self.bytes[pos:pos+4])
-            if id == -1:
-                break
-
-            self.MSAT.appendSectorID(id)
-
-        if self.__secIDFirstMSAT != -2:
-            # additional sectors are used to store more SAT sector IDs.
-            secID = self.__secIDFirstMSAT
-            size = self.getSectorSize()
-            inLoop = True
-            while inLoop:
-                pos = 512 + secID*size
-                bytes = self.bytes[pos:pos+size]
-                n = int(size/4)
-                for i in xrange(0, n):
-                    pos = i*4
-                    id = getSignedInt(bytes[pos:pos+4])
-                    if id < 0:
-                        inLoop = False
-                        break
-                    elif i == n-1:
-                        # last sector ID - points to the next MSAT sector.
-                        secID = id
-                        break
-                    else:
-                        self.MSAT.appendSectorID(id)
-
-        return 512 
-
-
-    def getMSAT (self):
-        return self.MSAT
-
-
-    def getSAT (self):
-        return self.MSAT.getSAT()
-
-
-    def getSSAT (self):
-        ssatID = self.getFirstSectorID(BlockType.SSAT)
-        if ssatID < 0:
-            return None
-        chain = self.getSAT().getSectorIDChain(ssatID)
-        if len(chain) == 0:
-            return None
-        obj = SSAT(2**self.secSize, self.bytes, self.params)
-        for secID in chain:
-            obj.addSector(secID)
-        obj.buildArray()
-        return obj
-
-
-    def getDirectory (self):
-        dirID = self.getFirstSectorID(BlockType.Directory)
-        if dirID < 0:
-            return None
-        chain = self.getSAT().getSectorIDChain(dirID)
-        if len(chain) == 0:
-            return None
-        obj = Directory(self, self.params)
-        for secID in chain:
-            obj.addSector(secID)
-        return obj
-
-
-    def dummy ():
-        pass
-
-
-
-
-class MSAT(object):
-    """Master Sector Allocation Table (MSAT)
-
-This class represents the master sector allocation table (MSAT) that stores 
-sector IDs that point to all the sectors that are used by the sector 
-allocation table (SAT).  The actual SAT are to be constructed by combining 
-all the sectors pointed by the sector IDs in order of occurrence.
-"""
-    def __init__ (self, sectorSize, bytes, params):
-        self.sectorSize = sectorSize
-        self.secIDs = []
-        self.bytes = bytes
-        self.__SAT = None
-
-        self.params = params
-
-    def appendSectorID (self, id):
-        self.secIDs.append(id)
-
-    def output (self):
-        print('')
-        print("="*68)
-        print("Master Sector Allocation Table (MSAT)")
-        print("-"*68)
-
-        for id in self.secIDs:
-            print("sector ID: %5d   (pos: %7d)"%(id, 512+id*self.sectorSize))
-
-    def getSATSectorPosList (self):
-        list = []
-        for id in self.secIDs:
-            pos = 512 + id*self.sectorSize
-            list.append([id, pos])
-        return list
-
-    def getSAT (self):
-        if self.__SAT != None:
-            return self.__SAT
-
-        obj = SAT(self.sectorSize, self.bytes, self.params)
-        for id in self.secIDs:
-            obj.addSector(id)
-        obj.buildArray()
-        self.__SAT = obj
-        return self.__SAT
-
-
-class SAT(object):
-    """Sector Allocation Table (SAT)
-"""
-    def __init__ (self, sectorSize, bytes, params):
-        self.sectorSize = sectorSize
-        self.sectorIDs = []
-        self.bytes = bytes
-        self.array = []
-        self.params = params
-
-
-    def getSectorSize (self):
-        return self.sectorSize
-
-
-    def addSector (self, id):
-        self.sectorIDs.append(id)
-
-
-    def buildArray (self):
-        if len(self.array) > 0:
-            # array already built.
-            return
-
-        numItems = int(self.sectorSize/4)
-        self.array = []
-        for secID in self.sectorIDs:
-            pos = 512 + secID*self.sectorSize
-            for i in xrange(0, numItems):
-                beginPos = pos + i*4
-                id = getSignedInt(self.bytes[beginPos:beginPos+4])
-                self.array.append(id)
-
-
-    def outputRawBytes (self):
-        bytes = ""
-        for secID in self.sectorIDs:
-            pos = 512 + secID*self.sectorSize
-            bytes += self.bytes[pos:pos+self.sectorSize]
-        globals.dumpBytes(bytes, 512)
-
-
-    def outputArrayStats (self):
-        sectorTotal = len(self.array)
-        sectorP  = 0       # >= 0
-        sectorM1 = 0       # -1
-        sectorM2 = 0       # -2
-        sectorM3 = 0       # -3
-        sectorM4 = 0       # -4
-        sectorMElse = 0    # < -4
-        sectorLiveTotal = 0
-        for i in xrange(0, len(self.array)):
-            item = self.array[i]
-            if item >= 0:
-                sectorP += 1
-            elif item == -1:
-                sectorM1 += 1
-            elif item == -2:
-                sectorM2 += 1
-            elif item == -3:
-                sectorM3 += 1
-            elif item == -4:
-                sectorM4 += 1

... etc. - the rest is truncated