[Libreoffice-commits] .: scratch/mso-dumper
Kohei Yoshida
kohei at kemper.freedesktop.org
Thu Feb 17 18:15:23 PST 2011
scratch/mso-dumper/misc/mso-dumper.spec | 76
scratch/mso-dumper/misc/package.sh | 11
scratch/mso-dumper/oletool.diff | 230 --
scratch/mso-dumper/ppt-dump.py | 121 -
scratch/mso-dumper/src/formula.py | 859 -------
scratch/mso-dumper/src/globals.py | 472 ----
scratch/mso-dumper/src/msodraw.py | 607 -----
scratch/mso-dumper/src/node.py | 219 -
scratch/mso-dumper/src/ole.py | 757 ------
scratch/mso-dumper/src/pptrecord.py | 1603 --------------
scratch/mso-dumper/src/pptstream.py | 447 ----
scratch/mso-dumper/src/xlsmodel.py | 627 -----
scratch/mso-dumper/src/xlsrecord.py | 3350 ------------------------------
scratch/mso-dumper/src/xlsstream.py | 518 ----
scratch/mso-dumper/src/xmlpp.py | 149 -
scratch/mso-dumper/xls-dump.py | 178 -
scratch/mso-dumper/xls_sheetpass_hash.cxx | 85
17 files changed, 10309 deletions(-)
New commits:
commit 9db0f8d44426f9981058108af1981b1e2f08b3dd
Author: Kohei Yoshida <kyoshida at novell.com>
Date: Thu Feb 17 21:14:45 2011 -0500
Removed mso-dumper directory.
This project has been relocated to libreoffice/contrib/mso-dumper.
diff --git a/scratch/mso-dumper/misc/mso-dumper.spec b/scratch/mso-dumper/misc/mso-dumper.spec
deleted file mode 100644
index 9a8f63e..0000000
--- a/scratch/mso-dumper/misc/mso-dumper.spec
+++ /dev/null
@@ -1,76 +0,0 @@
-Name: mso-dumper
-# List of additional build dependencies
-Requires: python
-BuildRequires: python
-Version: 0.2
-Release: 1
-BuildArch: noarch
-License: LGPL v2 or later
-Packager: Kohei Yoshida <kyoshida at novell.com>
-Source: mso-dumper-0.2.tar.gz
-Group: Development/Tools/Debuggers
-Summary: Dumper tool for Microsoft Office binary file format
-
-BuildRoot: %{_tmppath}/%{name}-%{version}-build
-
-%description
-This package provides dumper scripts for dumping the contents of Microsoft
-Office binary file formats. Currently only the Excel and PowerPoint file
-formats are supported.
-
-
-Authors:
---------
- Kohei Yoshida <kyoshida at novell.com>
- Thorsten Behrens <tbehrens at novell.com>
- Fredrik Ekholdt (for xmlpp.py)
-
-%prep
-%setup -q
-
-%build
-mkdir -p %buildroot/usr/bin
-mkdir -p %buildroot/usr/share/mso-dumper-%version/src
-mkdir -p %buildroot/usr/share/doc/packages/mso-dumper-%version
-cp xls-dump.py %buildroot/usr/share/mso-dumper-%version/
-cp ppt-dump.py %buildroot/usr/share/mso-dumper-%version/
-cp src/*.py %buildroot/usr/share/mso-dumper-%version/src
-
-%install
-install -d %buildroot/usr/share/mso-dumper-%version
-#install -d %buildroot/usr/share/doc/packages/mso-dumper-%version
-cd %buildroot/usr/bin
-ln -s ../share/mso-dumper-%version/xls-dump.py xls-dump
-ln -s ../share/mso-dumper-%version/ppt-dump.py ppt-dump
-
-%clean
-rm -rf %buildroot
-
-%files
-%defattr(-,root,root)
-%dir /usr/share/doc/packages/mso-dumper-%version
-%dir /usr/share/mso-dumper-%version
-%dir /usr/share/mso-dumper-%version/src
-
-/usr/bin/xls-dump
-/usr/bin/ppt-dump
-/usr/share/mso-dumper-%version/xls-dump.py
-/usr/share/mso-dumper-%version/ppt-dump.py
-/usr/share/mso-dumper-%version/src/ole.py
-/usr/share/mso-dumper-%version/src/formula.py
-/usr/share/mso-dumper-%version/src/globals.py
-/usr/share/mso-dumper-%version/src/node.py
-/usr/share/mso-dumper-%version/src/pptstream.py
-/usr/share/mso-dumper-%version/src/pptrecord.py
-/usr/share/mso-dumper-%version/src/xlsstream.py
-/usr/share/mso-dumper-%version/src/xlsmodel.py
-/usr/share/mso-dumper-%version/src/xlsrecord.py
-/usr/share/mso-dumper-%version/src/xmlpp.py
-
-#%doc AUTHORS COPYING ChangeLog INSTALL NEWS README README-WIN32 TODO
-
-%changelog
-* Wed Mar 17 2010 kyoshida at novell.com
-- Initial package version 0.2.
-
-
diff --git a/scratch/mso-dumper/misc/package.sh b/scratch/mso-dumper/misc/package.sh
deleted file mode 100755
index 38ab04d..0000000
--- a/scratch/mso-dumper/misc/package.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-# Execute this in the parent directory of the 'mso-dumper' directory.
-
-VER=0.2
-DESTDIR=mso-dumper-$VER
-mkdir -p $DESTDIR/src
-cp mso-dumper/*.py $DESTDIR/
-cp mso-dumper/src/*.py $DESTDIR/src/
-tar cvf $DESTDIR.tar.gz $DESTDIR
-rm -rf $DESTDIR
-
diff --git a/scratch/mso-dumper/oletool.diff b/scratch/mso-dumper/oletool.diff
deleted file mode 100644
index c882229..0000000
--- a/scratch/mso-dumper/oletool.diff
+++ /dev/null
@@ -1,230 +0,0 @@
---- /dev/null 2011-01-31 08:27:50.166383946 +0000
-+++ scratch/mso-dumper/src/oletool.py 2011-01-31 09:15:32.000000000 +0000
-@@ -0,0 +1,204 @@
-+#!/usr/bin/env python
-+import sys, os.path, optparse
-+
-+sys.path.append(sys.path[0]+"/src")
-+
-+import ole, globals
-+
-+from globals import encodeName
-+class DateTime:
-+ def __init__(self):
-+ self.day = 0
-+ self.month = 0
-+ self.year = 0
-+ self.hour = 0
-+ self.second = 0
-+
-+class DirNode:
-+
-+ def __init__(self, entry):
-+ self.Nodes = []
-+ self.Entry = entry;
-+ self.HierachicalName = ''
-+
-+ def isStorage():
-+ return entry.Type == Directory.Type.RootStorage
-+
-+class OleContainer:
-+
-+ def __init__(self,filePath, params ):
-+ self.filePath = filePath
-+ self.header = None
-+ self.params = params
-+ self.pos = None
-+
-+ def __getModifiedTime(self, entry):
-+ # need parse/decode Entry.TimeModified
-+ # ( although the documentation indicates that it might not be
-+ # worth it 'cause they are not universally used
-+ modified = DateTime
-+ modified.day = 0
-+ modified.month = 0
-+ modified.year = 0
-+ modified.hour = 0
-+ modified.second = 0
-+ return modified
-+
-+ def __parseFile (self):
-+ file = open(self.filePath, 'rb')
-+ self.strmData = globals.StreamData()
-+ self.chars = file.read()
-+ file.close()
-+
-+ def __addSiblings( self, entries, parent, child ):
-+ # add left siblings
-+ nextLeft = child.Entry.DirIDLeft
-+ if ( nextLeft > 0 ):
-+ newEntry = DirNode( entries[ nextLeft ] )
-+ newEntry.HierachicalName = parent.HierachicalName + globals.encodeName( newEntry.Entry.Name )
-+ if newEntry.Entry.DirIDRoot > 0:
-+ newEntry.HierachicalName = newEntry.HierachicalName + '/'
-+
-+ self.__addSiblings( entries, parent, newEntry )
-+ parent.Nodes.insert( 0, newEntry )
-+
-+ nextRight = child.Entry.DirIDRight
-+ # add children to the right
-+ if ( nextRight > 0 ):
-+ newEntry = DirNode( entries[ nextRight ] )
-+ newEntry.HierachicalName = parent.HierachicalName + globals.encodeName( newEntry.Entry.Name )
-+ if newEntry.Entry.DirIDRoot > 0:
-+ newEntry.HierachicalName = newEntry.HierachicalName + '/'
-+ self.__addSiblings( entries, parent, newEntry )
-+ parent.Nodes.append( newEntry )
-+
-+ def __buildTreeImpl(self, entries, parent ):
-+
-+ if ( parent.Entry.DirIDRoot > 0 ):
-+ newEntry = DirNode( entries[ parent.Entry.DirIDRoot ] )
-+ newEntry.HierachicalName = parent.HierachicalName + globals.encodeName( newEntry.Entry.Name )
-+ if ( newEntry.Entry.DirIDRoot > 0 ):
-+ newEntry.HierachicalName = newEntry.HierachicalName + '/'
-+
-+ self.__addSiblings( entries, parent, newEntry )
-+ parent.Nodes.append( newEntry )
-+
-+ for child in parent.Nodes:
-+ if child.Entry.DirIDRoot > 0:
-+ self.__buildTreeImpl( entries, child )
-+
-+ def __buildTree(self, entries ):
-+ treeRoot = DirNode( entries[0] )
-+ self.__buildTreeImpl( entries, treeRoot )
-+ return treeRoot
-+
-+ def __findEntryByHierachicalName( self, node, name ):
-+ if node.HierachicalName == name:
-+ return node.Entry
-+ else:
-+ for child in node.Nodes:
-+ result = self.__findEntryByHierachicalName( child, name )
-+ if result != None:
-+ return result
-+ return None
-+
-+ def __printListReport( self, treeNode, stats ):
-+
-+ dateInfo = self.__getModifiedTime( treeNode.Entry )
-+
-+ if len( treeNode.HierachicalName ) > 0 :
-+ print '{0:8d} {1:0<2d}-{2:0<2d}-{3:0<2d} {4:0<2d}:{5:0<2d} {6}'.format(treeNode.Entry.StreamSize, dateInfo.day, dateInfo.month, dateInfo.year, dateInfo.hour, dateInfo.second, treeNode.HierachicalName )
-+
-+ for node in treeNode.Nodes:
-+ # ignore the root
-+ self.__printListReport( node, stats )
-+
-+ def __printHeader(self):
-+ print ("OLE: %s")%self.filePath
-+ print (" Length Date Time Name")
-+ print ("-------- ---- ---- ----")
-+
-+ def listEntries(self):
-+ self.__parseFile()
-+ #if self.header == None:
-+ # self.header = ole.Header(self.chars, self.params)
-+ # self.pos = self.header.parse()
-+ self.header = ole.Header(self.chars, self.params)
-+ self.pos = self.header.parse()
-+ obj = self.header.getDirectory()
-+ if obj != None:
-+ obj.parseDirEntries()
-+ count = 0
-+ for entry in obj.entries:
-+ print("Entry [0x%x] Name %s Root 0x%x Left 0x%x Right %x")%( count, entry.Name, entry.DirIDRoot, entry.DirIDLeft, entry.DirIDRight )
-+ count = count + 1
-+ def list(self):
-+ # need to share the inititialisation and parse stuff between the different options
-+ self.__parseFile()
-+ if self.header == None:
-+ self.header = ole.Header(self.chars, self.params)
-+ self.pos = self.header.parse()
-+ obj = self.header.getDirectory()
-+ if obj != None:
-+ obj.parseDirEntries()
-+ count = 0
-+ rootNode = self.__buildTree( obj.entries )
-+
-+ self.__printHeader()
-+ self.__printListReport( rootNode, obj.entries )
-+ # need to print a footer ( total bytes, total files like unzip )
-+
-+ def extract(self, name):
-+ if self.header == None:
-+ self.__parseFile()
-+ self.header = ole.Header(self.chars, self.params)
-+ self.pos = self.header.parse()
-+
-+ obj = self.header.getDirectory()
-+ if obj != None:
-+ obj.parseDirEntries()
-+
-+ root = self.__buildTree( obj.entries )
-+ entry = self.__findEntryByHierachicalName( root, name )
-+
-+ if entry == None or entry.DirIDRoot > 0 :
-+ print "can't extract %s"%name
-+ return
-+
-+ bytes = obj.getRawStreamByEntry( entry )
-+
-+ file = open(entry.Name, 'wb')
-+ file.write( bytes )
-+ file.close
-+def main ():
-+ parser = optparse.OptionParser()
-+ parser.add_option("-l", "--list", action="store_true", dest="list", default=False, help="lists ole contents")
-+ parser.add_option("-x", "--extract", action="store_true", dest="extract", default=False, help="extract file")
-+
-+
-+ options, args = parser.parse_args()
-+
-+ params = globals.Params()
-+
-+ params.list = options.list
-+ params.extract = options.extract
-+
-+ if len(args) < 1:
-+ globals.error("takes at least one arguments\n")
-+ parser.print_help()
-+ sys.exit(1)
-+
-+ container = OleContainer( args[ 0 ], params )
-+
-+ if params.list == True:
-+ container.list()
-+ if params.extract:
-+ files = args
-+ files.pop(0)
-+
-+ for file in files:
-+ container.extract( file )
-+# container.listEntries()
-+
-+if __name__ == '__main__':
-+ main()
-diff --git a/scratch/mso-dumper/src/ole.py b/scratch/mso-dumper/src/ole.py
-index 9b01928..3db2458 100644
---- a/scratch/mso-dumper/src/ole.py
-+++ b/scratch/mso-dumper/src/ole.py
-@@ -526,7 +526,8 @@ entire file stream.
- self.RootStorageBytes += self.header.bytes[pos:pos+self.sectorSize]
-
-
-- def __getRawStream (self, entry):
-+ def getRawStreamByEntry (self, entry):
-+
- chain = []
- if entry.StreamLocation == StreamLocation.SAT:
- chain = self.header.getSAT().getSectorIDChain(entry.StreamSectorID)
-@@ -561,7 +562,7 @@ entire file stream.
- bytes = []
- for entry in self.entries:
- if entry.Name == name:
-- bytes = self.__getRawStream(entry)
-+ bytes = self.getRawStreamByEntry(entry)
- break
- return bytes
-
diff --git a/scratch/mso-dumper/ppt-dump.py b/scratch/mso-dumper/ppt-dump.py
deleted file mode 100755
index f303fb2..0000000
--- a/scratch/mso-dumper/ppt-dump.py
+++ /dev/null
@@ -1,121 +0,0 @@
-#!/usr/bin/env python
-########################################################################
-#
-# Copyright (c) 2010 Kohei Yoshida, Thorsten Behrens
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
-
-import sys, os.path, getopt
-sys.path.append(sys.path[0]+"/src")
-import ole, pptstream, globals
-
-from globals import error
-
-def usage (exname):
- exname = os.path.basename(exname)
- msg = """Usage: %s [options] [ppt file]
-
-Options:
- --help displays this help message.
-"""%exname
- print msg
-
-
-class PPTDumper(object):
-
- def __init__ (self, filepath, params):
- self.filepath = filepath
- self.params = params
-
- def __printDirHeader (self, dirname, byteLen):
- dirname = globals.encodeName(dirname)
- print("")
- print("="*68)
- print("%s (size: %d bytes)"%(dirname, byteLen))
- print("-"*68)
-
- def dump (self):
- file = open(self.filepath, 'rb')
- strm = pptstream.PPTFile(file.read(), self.params)
- file.close()
- strm.printStreamInfo()
- strm.printHeader()
- strm.printDirectory()
- dirnames = strm.getDirectoryNames()
- result = True
- for dirname in dirnames:
- if len(dirname) == 0 or dirname == 'Root Entry':
- continue
-
- dirstrm = strm.getDirectoryStreamByName(dirname)
- self.__printDirHeader(dirname, len(dirstrm.bytes))
- if dirname == "PowerPoint Document":
- if not self.__readSubStream(dirstrm):
- result = False
- elif dirname == "Current User":
- if not self.__readSubStream(dirstrm):
- result = False
- else:
- globals.dumpBytes(dirstrm.bytes, 512)
- return result
-
- def __readSubStream (self, strm):
- # read all records in substream
- return strm.readRecords()
-
-
-def main (args):
- exname, args = args[0], args[1:]
- if len(args) < 1:
- print("takes at least one argument")
- usage(exname)
- return
-
- params = globals.Params()
- try:
- opts, args = getopt.getopt(args, "h", ["help", "debug", "show-sector-chain"])
- for opt, arg in opts:
- if opt in ['-h', '--help']:
- usage(exname)
- return
- elif opt in ['--debug']:
- params.debug = True
- elif opt in ['--show-sector-chain']:
- params.showSectorChain = True
- else:
- error("unknown option %s\n"%opt)
- usage()
-
- except getopt.GetoptError:
- error("error parsing input options\n")
- usage(exname)
- return
-
- dumper = PPTDumper(args[0], params)
- if not dumper.dump():
- error("FAILURE\n")
-
-
-if __name__ == '__main__':
- main(sys.argv)
diff --git a/scratch/mso-dumper/src/formula.py b/scratch/mso-dumper/src/formula.py
deleted file mode 100644
index 85bb6bf..0000000
--- a/scratch/mso-dumper/src/formula.py
+++ /dev/null
@@ -1,859 +0,0 @@
-########################################################################
-#
-# Copyright (c) 2010 Kohei Yoshida
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
-
-import struct, sys
-import globals
-
-class InvalidCellAddress(Exception): pass
-
-def toColName (colID):
- if colID > 255:
- globals.error("Column ID greater than 255")
- raise InvalidCellAddress
- n1 = colID % 26
- n2 = int(colID/26)
- name = struct.pack('b', n1 + ord('A'))
- if n2 > 0:
- name += struct.pack('b', n2 + ord('A'))
- return name
-
-def toAbsName (name, isRelative):
- if not isRelative:
- name = '$' + name
- return name
-
-class CellAddress(object):
- def __init__ (self, col=0, row=0, colRel=False, rowRel=False):
- self.col = col
- self.row = row
- self.isColRelative = colRel
- self.isRowRelative = rowRel
-
- def toString (self):
- return self.getName()
-
- def getName (self):
- colName = toAbsName(toColName(self.col), self.isColRelative)
- rowName = toAbsName("%d"%(self.row+1), self.isRowRelative)
- return colName + rowName
-
-class CellRange(object):
- def __init__ (self):
- self.firstRow = 0
- self.lastRow = 0
- self.firstCol = 0
- self.lastCol = 0
- self.isFirstRowRelative = False
- self.isLastRowRelative = False
- self.isFirstColRelative = False
- self.isLastColRelative = False
-
- def toString (self):
- return self.getName()
-
- def getName (self):
- col1 = toColName(self.firstCol)
- col2 = toColName(self.lastCol)
- row1 = "%d"%(self.firstRow+1)
- row2 = "%d"%(self.lastRow+1)
- col1 = toAbsName(col1, self.isFirstColRelative)
- col2 = toAbsName(col2, self.isLastColRelative)
- row1 = toAbsName(row1, self.isFirstRowRelative)
- row2 = toAbsName(row2, self.isLastRowRelative)
- return col1 + row1 + ':' + col2 + row2
-
-def parseCellAddress (bytes):
- if len(bytes) != 4:
- globals.error("Byte size is %d but expected 4 bytes for cell address.\n"%len(bytes))
- raise InvalidCellAddress
-
- row = globals.getSignedInt(bytes[0:2])
- col = globals.getSignedInt(bytes[2:4])
- colRelative = ((col & 0x4000) != 0)
- rowRelative = ((col & 0x8000) != 0)
- col = (col & 0x00FF)
- obj = CellAddress(col, row, colRelative, rowRelative)
- return obj
-
-def parseCellRangeAddress (bytes):
- if len(bytes) != 8:
- raise InvalidCellAddress
-
- obj = CellRange()
- obj.firstRow = globals.getSignedInt(bytes[0:2])
- obj.lastRow = globals.getSignedInt(bytes[2:4])
- obj.firstCol = globals.getSignedInt(bytes[4:6])
- obj.lastCol = globals.getSignedInt(bytes[6:8])
-
- obj.isFirstColRelative = ((obj.firstCol & 0x4000) != 0)
- obj.isFirstRowRelative = ((obj.firstCol & 0x8000) != 0)
- obj.firstCol = (obj.firstCol & 0x00FF)
-
- obj.isLastColRelative = ((obj.lastCol & 0x4000) != 0)
- obj.isLastRowRelative = ((obj.lastCol & 0x8000) != 0)
- obj.lastCol = (obj.lastCol & 0x00FF)
- return obj
-
-
-def makeSheetName (sheet1, sheet2):
- if sheet2 == None or sheet1 == sheet2:
- sheetName = "sheetID='%d'"%sheet1
- else:
- sheetName = "sheetID='%d-%d'"%(sheet1, sheet2)
- return sheetName
-
-
-class TokenBase(object):
- """base class for token handler
-
-Derive a class from this base class to create a token handler for a formula
-token.
-
-The parse method takes the token array position that points to the first
-token to be processed, and returns the position of the laste token that has
-been processed. So, if the handler processes only one token, it should
-return the same value it receives without incrementing it.
-
-"""
- def __init__ (self, header, tokens):
- self.header = header
- self.tokens = tokens
- self.size = len(self.tokens)
- self.init()
-
- def init (self):
- """initializer for a derived class"""
- pass
-
- def parse (self, i):
- return i
-
- def getText (self):
- return ''
-
-class Add(TokenBase): pass
-class Sub(TokenBase): pass
-class Mul(TokenBase): pass
-class Div(TokenBase): pass
-class Power(TokenBase): pass
-class Concat(TokenBase): pass
-class LT(TokenBase): pass
-class LE(TokenBase): pass
-class EQ(TokenBase): pass
-class GE(TokenBase): pass
-class GT(TokenBase): pass
-class NE(TokenBase): pass
-class Isect(TokenBase): pass
-class List(TokenBase): pass
-class Range(TokenBase): pass
-
-class Plus(TokenBase): pass
-class Minus(TokenBase): pass
-class Percent(TokenBase): pass
-
-class NameX(TokenBase):
- """external name"""
-
- def parse (self, i):
- i += 1
- self.refID = globals.getSignedInt(self.tokens[i:i+2])
- i += 2
- self.nameID = globals.getSignedInt(self.tokens[i:i+2])
- i += 2
- return i
-
- def getText (self):
- return "<externname externSheetID='%d' nameID='%d'>"%(self.refID, self.nameID)
-
-
-class Ref3dR(TokenBase):
- """3D reference or external reference to a cell"""
-
- def init (self):
- self.cell = None
- self.sheet1 = None
- self.sheet2 = None
-
- def parse (self, i):
- try:
- i += 1
- self.sheet1 = globals.getSignedInt(self.tokens[i:i+2])
- i += 2
- if self.header == 0x0023:
- # 3A in EXTERNNAME expects a 2nd sheet index
- self.sheet2 = globals.getSignedInt(self.tokens[i:i+2])
- i += 2
- self.cell = parseCellAddress(self.tokens[i:i+4])
- i += 4
- except InvalidCellAddress:
- pass
- return i
-
- def getText (self):
- if self.cell == None:
- return ''
- cellName = self.cell.getName()
- sheetName = makeSheetName(self.sheet1, self.sheet2)
- return "<3dref %s cellAddress='%s'>"%(sheetName, cellName)
-
-
-class Ref3dV(TokenBase):
- """3D reference or external reference to a cell"""
-
- def init (self):
- self.cell = None
-
- def parse (self, i):
- try:
- i += 1
- self.extSheetId = globals.getSignedInt(self.tokens[i:i+2])
- i += 2
- self.cell = parseCellAddress(self.tokens[i:i+4])
- i += 4
- except InvalidCellAddress:
- pass
- return i
-
- def getText (self):
- if self.cell == None:
- return ''
- cellName = self.cell.getName()
- return "<3dref externSheetID=%d cellAddress='%s'>"%(self.extSheetId, cellName)
-
-
-class Ref3dA(Ref3dV):
- def __init__ (self, header, tokens):
- Ref3dV.__init__(self, header, tokens)
-
-
-class Area3d(TokenBase):
-
- def parse (self, i):
- self.cellrange = None
- try:
- op = self.tokens[i]
- i += 1
- self.extSheetId = globals.getSignedInt(self.tokens[i:i+2])
- i += 2
- self.cellrange = parseCellRangeAddress(self.tokens[i:i+8])
- except InvalidCellAddress:
- pass
- return i
-
- def getText (self):
- if self.cellrange == None:
- return ''
- cellRangeName = self.cellrange.getName()
- return "<3drange externSheetID=%d rangeAddress='%s'>"%(self.extSheetId, cellRangeName)
-
-class Error(TokenBase):
-
- def parse (self, i):
- i += 1 # skip opcode
- self.errorNum = globals.getSignedInt(self.tokens[i:i+1])
- i += 1
- return i
-
- def getText (self):
- errorText = ''
- if self.errorNum == 0x17:
- errorText = '#REF!'
- return "<error code='0x%2.2X' text='%s'>"%(self.errorNum, errorText)
-
-tokenMap = {
- # binary operator
- 0x03: Add,
- 0x04: Sub,
- 0x05: Mul,
- 0x06: Div,
- 0x07: Power,
- 0x08: Concat,
- 0x09: LT,
- 0x0A: LE,
- 0x0B: EQ,
- 0x0C: GE,
- 0x0D: GT,
- 0x0E: NE,
- 0x0F: Isect,
- 0x10: List,
- 0x11: Range,
-
- # unary operator
- 0x12: Plus,
- 0x13: Minus,
- 0x14: Percent,
-
- # operand tokens
- 0x39: NameX,
- 0x59: NameX,
- 0x79: NameX,
-
- # 3d reference (TODO: There is a slight variation in how a cell reference
- # is represented between 0x3A and 0x5A).
- 0x3A: Ref3dR,
- 0x5A: Ref3dV,
- 0x7A: Ref3dA,
-
- 0x3B: Area3d,
- 0x5B: Area3d,
- 0x7B: Area3d,
-
- 0x1C: Error,
-
- # last item
- 0xFFFF: None
-}
-
-class FormulaParser(object):
- """formula parser for token bytes
-
-This class receives a series of bytes that represent formula tokens through
-the constructor. That series of bytes must also include the formula length
-which is usually the first 2 bytes.
-"""
- def __init__ (self, header, tokens, sizeField=True):
- self.header = header
- self.strm = globals.ByteStream(tokens)
- self.text = ''
- self.sizeField = sizeField
-
- def parse (self):
- length = self.strm.getSize()
- if self.sizeField:
- # first 2-bytes contain the length of the formula tokens
- length = self.strm.readUnsignedInt(2)
- if length <= 0:
- return
- ftokens = self.strm.readBytes(length)
- length = len(ftokens)
- else:
- ftokens = self.strm.readRemainingBytes()
-
- i = 0
- while i < length:
- tk = ftokens[i]
-
- if type(tk) == type('c'):
- # get the ordinal of the character.
- tk = ord(tk)
-
- if not tokenMap.has_key(tk):
- # no token handler
- i += 1
- continue
-
- # token handler exists.
- o = tokenMap[tk](self.header, ftokens)
- i = o.parse(i)
- self.text += o.getText() + ' '
-
- i += 1
-
-
- def getText (self):
- return self.text
-
-# ============================================================================
-
-class TokenType:
- Area3d = 0
- Unknown = 9999
-
-class _TokenBase(object):
- def __init__ (self, strm, opcode1, opcode2=None):
- self.opcode1 = opcode1
- self.opcode2 = opcode2
- self.strm = strm
- self.tokenType = TokenType.Unknown
-
- def parse (self):
- self.parseBytes()
- self.strm = None # no need to hold reference to the stream.
-
- def parseBytes (self):
- # derived class should overwrite this method.
- pass
-
- def getText (self):
- return ''
-
-class _Int(_TokenBase):
- def parseBytes (self):
- self.value = self.strm.readUnsignedInt(2)
-
- def getText (self):
- return "%d"%self.value
-
-class _Area3d(_TokenBase):
- def parseBytes (self):
- self.xti = self.strm.readUnsignedInt(2)
- self.cellRange = parseCellRangeAddress(self.strm.readBytes(8))
- self.tokenType = TokenType.Area3d
-
- def getText (self):
- return "(xti=%d,"%self.xti + self.cellRange.getName() + ")"
-
-class _FuncVar(_TokenBase):
-
- funcTab = {
- 0x0000: 'COUNT',
- 0x0001: 'IF',
- 0x0002: 'ISNA',
- 0x0003: 'ISERROR',
- 0x0004: 'SUM',
- 0x0005: 'AVERAGE',
- 0x0006: 'MIN',
- 0x0007: 'MAX',
- 0x0008: 'ROW',
- 0x0009: 'COLUMN',
- 0x000A: 'NA',
- 0x000B: 'NPV',
- 0x000C: 'STDEV',
- 0x000D: 'DOLLAR',
- 0x000E: 'FIXED',
- 0x000F: 'SIN',
- 0x0010: 'COS',
- 0x0011: 'TAN',
- 0x0012: 'ATAN',
- 0x0013: 'PI',
- 0x0014: 'SQRT',
- 0x0015: 'EXP',
- 0x0016: 'LN',
- 0x0017: 'LOG10',
- 0x0018: 'ABS',
- 0x0019: 'INT',
- 0x001A: 'SIGN',
- 0x001B: 'ROUND',
- 0x001C: 'LOOKUP',
- 0x001D: 'INDEX',
- 0x001E: 'REPT',
- 0x001F: 'MID',
- 0x0020: 'LEN',
- 0x0021: 'VALUE',
- 0x0022: 'TRUE',
- 0x0023: 'FALSE',
- 0x0024: 'AND',
- 0x0025: 'OR',
- 0x0026: 'NOT',
- 0x0027: 'MOD',
- 0x0028: 'DCOUNT',
- 0x0029: 'DSUM',
- 0x002A: 'DAVERAGE',
- 0x002B: 'DMIN',
- 0x002C: 'DMAX',
- 0x002D: 'DSTDEV',
- 0x002E: 'VAR',
- 0x002F: 'DVAR',
- 0x0030: 'TEXT',
- 0x0031: 'LINEST',
- 0x0032: 'TREND',
- 0x0033: 'LOGEST',
- 0x0034: 'GROWTH',
- 0x0035: 'GOTO',
- 0x0036: 'HALT',
- 0x0037: 'RETURN',
- 0x0038: 'PV',
- 0x0039: 'FV',
- 0x003A: 'NPER',
- 0x003B: 'PMT',
- 0x003C: 'RATE',
- 0x003D: 'MIRR',
- 0x003E: 'IRR',
- 0x003F: 'RAND',
- 0x0040: 'MATCH',
- 0x0041: 'DATE',
- 0x0042: 'TIME',
- 0x0043: 'DAY',
- 0x0044: 'MONTH',
- 0x0045: 'YEAR',
- 0x0046: 'WEEKDAY',
- 0x0047: 'HOUR',
- 0x0048: 'MINUTE',
- 0x0049: 'SECOND',
- 0x004A: 'NOW',
- 0x004B: 'AREAS',
- 0x004C: 'ROWS',
- 0x004D: 'COLUMNS',
- 0x004E: 'OFFSET',
- 0x004F: 'ABSREF',
- 0x0050: 'RELREF',
- 0x0051: 'ARGUMENT',
- 0x0052: 'SEARCH',
- 0x0053: 'TRANSPOSE',
- 0x0054: 'ERROR',
- 0x0055: 'STEP',
- 0x0056: 'TYPE',
- 0x0057: 'ECHO',
- 0x0058: 'SET.NAME',
- 0x0059: 'CALLER',
- 0x005A: 'DEREF',
- 0x005B: 'WINDOWS',
- 0x005C: 'SERIES',
- 0x005D: 'DOCUMENTS',
- 0x005E: 'ACTIVE.CELL',
- 0x005F: 'SELECTION',
- 0x0060: 'RESULT',
- 0x0061: 'ATAN2',
- 0x0062: 'ASIN',
- 0x0063: 'ACOS',
- 0x0064: 'CHOOSE',
- 0x0065: 'HLOOKUP',
- 0x0066: 'VLOOKUP',
- 0x0067: 'LINKS',
- 0x0068: 'INPUT',
- 0x0069: 'ISREF',
- 0x006A: 'GET.FORMULA',
- 0x006B: 'GET.NAME',
- 0x006C: 'SET.VALUE',
- 0x006D: 'LOG',
- 0x006E: 'EXEC',
- 0x006F: 'CHAR',
- 0x0070: 'LOWER',
- 0x0071: 'UPPER',
- 0x0072: 'PROPER',
- 0x0073: 'LEFT',
- 0x0074: 'RIGHT',
- 0x0075: 'EXACT',
- 0x0076: 'TRIM',
- 0x0077: 'REPLACE',
- 0x0078: 'SUBSTITUTE',
- 0x0079: 'CODE',
- 0x007A: 'NAMES',
- 0x007B: 'DIRECTORY',
- 0x007C: 'FIND',
- 0x007D: 'CELL',
- 0x007E: 'ISERR',
- 0x007F: 'ISTEXT',
- 0x0080: 'ISNUMBER',
- 0x0081: 'ISBLANK',
- 0x0082: 'T',
- 0x0083: 'N',
- 0x0084: 'FOPEN',
- 0x0085: 'FCLOSE',
- 0x0086: 'FSIZE',
- 0x0087: 'FREADLN',
- 0x0088: 'FREAD',
- 0x0089: 'FWRITELN',
- 0x008A: 'FWRITE',
- 0x008B: 'FPOS',
- 0x008C: 'DATEVALUE',
- 0x008D: 'TIMEVALUE',
- 0x008E: 'SLN',
- 0x008F: 'SYD',
- 0x0090: 'DDB',
- 0x0091: 'GET.DEF',
- 0x0092: 'REFTEXT',
- 0x0093: 'TEXTREF',
- 0x0094: 'INDIRECT',
- 0x0095: 'REGISTER',
- 0x0096: 'CALL',
- 0x0097: 'ADD.BAR',
- 0x0098: 'ADD.MENU',
- 0x0099: 'ADD.COMMAND',
- 0x009A: 'ENABLE.COMMAND',
- 0x009B: 'CHECK.COMMAND',
- 0x009C: 'RENAME.COMMAND',
- 0x009D: 'SHOW.BAR',
- 0x009E: 'DELETE.MENU',
- 0x009F: 'DELETE.COMMAND',
- 0x00A0: 'GET.CHART.ITEM',
- 0x00A1: 'DIALOG.BOX',
- 0x00A2: 'CLEAN',
- 0x00A3: 'MDETERM',
- 0x00A4: 'MINVERSE',
- 0x00A5: 'MMULT',
- 0x00A6: 'FILES',
- 0x00A7: 'IPMT',
- 0x00A8: 'PPMT',
- 0x00A9: 'COUNTA',
- 0x00AA: 'CANCEL.KEY',
- 0x00AB: 'FOR',
- 0x00AC: 'WHILE',
- 0x00AD: 'BREAK',
- 0x00AE: 'NEXT',
- 0x00AF: 'INITIATE',
- 0x00B0: 'REQUEST',
- 0x00B1: 'POKE',
- 0x00B2: 'EXECUTE',
- 0x00B3: 'TERMINATE',
- 0x00B4: 'RESTART',
- 0x00B5: 'HELP',
- 0x00B6: 'GET.BAR',
- 0x00B7: 'PRODUCT',
- 0x00B8: 'FACT',
- 0x00B9: 'GET.CELL',
- 0x00BA: 'GET.WORKSPACE',
- 0x00BB: 'GET.WINDOW',
- 0x00BC: 'GET.DOCUMENT',
- 0x00BD: 'DPRODUCT',
- 0x00BE: 'ISNONTEXT',
- 0x00BF: 'GET.NOTE',
- 0x00C0: 'NOTE',
- 0x00C1: 'STDEVP',
- 0x00C2: 'VARP',
- 0x00C3: 'DSTDEVP',
- 0x00C4: 'DVARP',
- 0x00C5: 'TRUNC',
- 0x00C6: 'ISLOGICAL',
- 0x00C7: 'DCOUNTA',
- 0x00C8: 'DELETE.BAR',
- 0x00C9: 'UNREGISTER',
- 0x00CC: 'USDOLLAR',
- 0x00CD: 'FINDB',
- 0x00CE: 'SEARCHB',
- 0x00CF: 'REPLACEB',
- 0x00D0: 'LEFTB',
- 0x00D1: 'RIGHTB',
- 0x00D2: 'MIDB',
- 0x00D3: 'LENB',
- 0x00D4: 'ROUNDUP',
- 0x00D5: 'ROUNDDOWN',
- 0x00D6: 'ASC',
- 0x00D7: 'DBCS',
- 0x00D8: 'RANK',
- 0x00DB: 'ADDRESS',
- 0x00DC: 'DAYS360',
- 0x00DD: 'TODAY',
- 0x00DE: 'VDB',
- 0x00DF: 'ELSE',
- 0x00E0: 'ELSE.IF',
- 0x00E1: 'END.IF',
- 0x00E2: 'FOR.CELL',
- 0x00E3: 'MEDIAN',
- 0x00E4: 'SUMPRODUCT',
- 0x00E5: 'SINH',
- 0x00E6: 'COSH',
- 0x00E7: 'TANH',
- 0x00E8: 'ASINH',
- 0x00E9: 'ACOSH',
- 0x00EA: 'ATANH',
- 0x00EB: 'DGET',
- 0x00EC: 'CREATE.OBJECT',
- 0x00ED: 'VOLATILE',
- 0x00EE: 'LAST.ERROR',
- 0x00EF: 'CUSTOM.UNDO',
- 0x00F0: 'CUSTOM.REPEAT',
- 0x00F1: 'FORMULA.CONVERT',
- 0x00F2: 'GET.LINK.INFO',
- 0x00F3: 'TEXT.BOX',
- 0x00F4: 'INFO',
- 0x00F5: 'GROUP',
- 0x00F6: 'GET.OBJECT',
- 0x00F7: 'DB',
- 0x00F8: 'PAUSE',
- 0x00FB: 'RESUME',
- 0x00FC: 'FREQUENCY',
- 0x00FD: 'ADD.TOOLBAR',
- 0x00FE: 'DELETE.TOOLBAR',
- 0x00FF: 'User Defined Function',
- 0x0100: 'RESET.TOOLBAR',
- 0x0101: 'EVALUATE',
- 0x0102: 'GET.TOOLBAR',
- 0x0103: 'GET.TOOL',
- 0x0104: 'SPELLING.CHECK',
- 0x0105: 'ERROR.TYPE',
- 0x0106: 'APP.TITLE',
- 0x0107: 'WINDOW.TITLE',
- 0x0108: 'SAVE.TOOLBAR',
- 0x0109: 'ENABLE.TOOL',
- 0x010A: 'PRESS.TOOL',
- 0x010B: 'REGISTER.ID',
- 0x010C: 'GET.WORKBOOK',
- 0x010D: 'AVEDEV',
- 0x010E: 'BETADIST',
- 0x010F: 'GAMMALN',
- 0x0110: 'BETAINV',
- 0x0111: 'BINOMDIST',
- 0x0112: 'CHIDIST',
- 0x0113: 'CHIINV',
- 0x0114: 'COMBIN',
- 0x0115: 'CONFIDENCE',
- 0x0116: 'CRITBINOM',
- 0x0117: 'EVEN',
- 0x0118: 'EXPONDIST',
- 0x0119: 'FDIST',
- 0x011A: 'FINV',
- 0x011B: 'FISHER',
- 0x011C: 'FISHERINV',
- 0x011D: 'FLOOR',
- 0x011E: 'GAMMADIST',
- 0x011F: 'GAMMAINV',
- 0x0120: 'CEILING',
- 0x0121: 'HYPGEOMDIST',
- 0x0122: 'LOGNORMDIST',
- 0x0123: 'LOGINV',
- 0x0124: 'NEGBINOMDIST',
- 0x0125: 'NORMDIST',
- 0x0126: 'NORMSDIST',
- 0x0127: 'NORMINV',
- 0x0128: 'NORMSINV',
- 0x0129: 'STANDARDIZE',
- 0x012A: 'ODD',
- 0x012B: 'PERMUT',
- 0x012C: 'POISSON',
- 0x012D: 'TDIST',
- 0x012E: 'WEIBULL',
- 0x012F: 'SUMXMY2',
- 0x0130: 'SUMX2MY2',
- 0x0131: 'SUMX2PY2',
- 0x0132: 'CHITEST',
- 0x0133: 'CORREL',
- 0x0134: 'COVAR',
- 0x0135: 'FORECAST',
- 0x0136: 'FTEST',
- 0x0137: 'INTERCEPT',
- 0x0138: 'PEARSON',
- 0x0139: 'RSQ',
- 0x013A: 'STEYX',
- 0x013B: 'SLOPE',
- 0x013C: 'TTEST',
- 0x013D: 'PROB',
- 0x013E: 'DEVSQ',
- 0x013F: 'GEOMEAN',
- 0x0140: 'HARMEAN',
- 0x0141: 'SUMSQ',
- 0x0142: 'KURT',
- 0x0143: 'SKEW',
- 0x0144: 'ZTEST',
- 0x0145: 'LARGE',
- 0x0146: 'SMALL',
- 0x0147: 'QUARTILE',
- 0x0148: 'PERCENTILE',
- 0x0149: 'PERCENTRANK',
- 0x014A: 'MODE',
- 0x014B: 'TRIMMEAN',
- 0x014C: 'TINV',
- 0x014E: 'MOVIE.COMMAND',
- 0x014F: 'GET.MOVIE',
- 0x0150: 'CONCATENATE',
- 0x0151: 'POWER',
- 0x0152: 'PIVOT.ADD.DATA',
- 0x0153: 'GET.PIVOT.TABLE',
- 0x0154: 'GET.PIVOT.FIELD',
- 0x0155: 'GET.PIVOT.ITEM',
- 0x0156: 'RADIANS',
- 0x0157: 'DEGREES',
- 0x0158: 'SUBTOTAL',
- 0x0159: 'SUMIF',
- 0x015A: 'COUNTIF',
- 0x015B: 'COUNTBLANK',
- 0x015C: 'SCENARIO.GET',
- 0x015D: 'OPTIONS.LISTS.GET',
- 0x015E: 'ISPMT',
- 0x015F: 'DATEDIF',
- 0x0160: 'DATESTRING',
- 0x0161: 'NUMBERSTRING',
- 0x0162: 'ROMAN',
- 0x0163: 'OPEN.DIALOG',
- 0x0164: 'SAVE.DIALOG',
- 0x0165: 'VIEW.GET',
- 0x0166: 'GETPIVOTDATA',
- 0x0167: 'HYPERLINK',
- 0x0168: 'PHONETIC',
- 0x0169: 'AVERAGEA',
- 0x016A: 'MAXA',
- 0x016B: 'MINA',
- 0x016C: 'STDEVPA',
- 0x016D: 'VARPA',
- 0x016E: 'STDEVA',
- 0x016F: 'VARA',
- 0x0170: 'BAHTTEXT',
- 0x0171: 'THAIDAYOFWEEK',
- 0x0172: 'THAIDIGIT',
- 0x0173: 'THAIMONTHOFYEAR',
- 0x0174: 'THAINUMSOUND',
- 0x0175: 'THAINUMSTRING',
- 0x0176: 'THAISTRINGLENGTH',
- 0x0177: 'ISTHAIDIGIT',
- 0x0178: 'ROUNDBAHTDOWN',
- 0x0179: 'ROUNDBAHTUP',
- 0x017A: 'THAIYEAR',
- 0x017B: 'RTD'
- }
-
- def parseBytes (self):
- self.dataType = (self.opcode1 & 0x60)/32 # 0x1 = reference, 0x2 = value, 0x3 = array
- self.argCount = self.strm.readUnsignedInt(1)
- tab = self.strm.readUnsignedInt(2)
- self.funcType = (tab & 0x7FFF)
- self.isCeTab = (tab & 0x8000) != 0
-
- def getText (self):
- if self.isCeTab:
- # I'll support this later.
- return ''
-
- if not _FuncVar.funcTab.has_key(self.funcType):
- # unknown function name
- return '#NAME!'
-
- if self.argCount > 0:
- # I'll support functions with arguments later.
- return ''
-
- return _FuncVar.funcTab[self.funcType] + "()"
-
-_tokenMap = {
- 0x1E: _Int,
- 0x3B: _Area3d,
- 0x5B: _Area3d,
- 0x7B: _Area3d,
-
- 0x42: _FuncVar
-}
-
-class FormulaParser2(object):
- """This is a new formula parser that will eventually replace the old one.
-
-Once replaced, I'll change the name to FormulaParser and the names of the
-associated token classes will be without the leading underscore (_)."""
-
-
- def __init__ (self, header, bytes):
- self.header = header
- self.tokens = []
- self.strm = globals.ByteStream(bytes)
-
- def parse (self):
- while not self.strm.isEndOfRecord():
- b = self.strm.readUnsignedInt(1)
- if not _tokenMap.has_key(b):
- # Unknown token. Stop parsing.
- return
-
- token = _tokenMap[b](self.strm, b)
- token.parse()
- self.tokens.append(token)
-
- def getText (self):
- s = ''
- for tk in self.tokens:
- s += tk.getText()
- return s
-
- def getTokens (self):
- return self.tokens
diff --git a/scratch/mso-dumper/src/globals.py b/scratch/mso-dumper/src/globals.py
deleted file mode 100644
index de4f199..0000000
--- a/scratch/mso-dumper/src/globals.py
+++ /dev/null
@@ -1,472 +0,0 @@
-########################################################################
-#
-# Copyright (c) 2010 Kohei Yoshida
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
-
-import sys, struct, math, zipfile, xmlpp, StringIO
-
-class ByteConvertError(Exception): pass
-
-class ByteStreamError(Exception): pass
-
-
-class ModelBase(object):
-
- class HostAppType:
- Word = 0
- Excel = 1
- PowerPoint = 2
-
- def __init__ (self, hostApp):
- self.hostApp = hostApp
-
-
-class Params(object):
- """command-line parameters."""
- def __init__ (self):
- self.debug = False
- self.showSectorChain = False
- self.showStreamPos = False
-
-
-class StreamData(object):
- """run-time stream data."""
- def __init__ (self):
- self.encrypted = False
- self.pivotCacheIDs = {}
-
- def appendPivotCacheId (self, newId):
- # must be 4-digit with leading '0's.
- strId = "%.4d"%newId
- self.pivotCacheIDs[strId] = True
-
- def isPivotCacheStream (self, name):
- return self.pivotCacheIDs.has_key(name)
-
-
-class ByteStream(object):
-
- def __init__ (self, bytes):
- self.bytes = bytes
- self.pos = 0
- self.size = len(bytes)
-
- def getSize (self):
- return self.size
-
- def readBytes (self, length):
- if self.pos + length > self.size:
- error("reading %d bytes from position %d would exceed the current size of %d\n"%
- (length, self.pos, self.size))
- raise ByteStreamError()
- r = self.bytes[self.pos:self.pos+length]
- self.pos += length
- return r
-
- def readRemainingBytes (self):
- r = self.bytes[self.pos:]
- self.pos = self.size
- return r
-
- def getCurrentPos (self):
- return self.pos
-
- def setCurrentPos (self, pos):
- self.pos = pos
-
- def isEndOfRecord (self):
- return (self.pos == self.size)
-
- def readUnsignedInt (self, length):
- bytes = self.readBytes(length)
- return getUnsignedInt(bytes)
-
- def readSignedInt (self, length):
- bytes = self.readBytes(length)
- return getSignedInt(bytes)
-
- def readDouble (self):
- # double is always 8 bytes.
- bytes = self.readBytes(8)
- return getDouble(bytes)
-
- def readUnicodeString (self):
- # First 2-bytes contains the text length, followed by a 1-byte flag.
- textLen = self.readUnsignedInt(2)
- bytes = self.bytes[self.pos:]
- text, byteLen = getRichText(bytes, textLen)
- self.moveForward (byteLen)
- return text
-
- def moveBack (self, byteCount):
- self.pos -= byteCount
- if self.pos < 0:
- self.pos = 0
-
- def moveForward (self, byteCount):
- self.pos += byteCount
- if self.pos > self.size:
- self.pos = self.size
-
-
-def getValueOrUnknown (list, idx, errmsg='(unknown)'):
- listType = type(list)
- if listType == type([]):
- # list
- if idx < len(list):
- return list[idx]
- elif listType == type({}):
- # dictionary
- if list.has_key(idx):
- return list[idx]
-
- return errmsg
-
-
-def output (msg):
- sys.stdout.write(msg)
-
-def error (msg):
- sys.stderr.write("Error: " + msg)
-
-def debug (msg):
- sys.stderr.write("DEBUG: %s\n"%msg)
-
-
-def encodeName (name):
- """Encode name that contains unprintable characters."""
-
- n = len(name)
- if n == 0:
- return name
-
- newname = ''
- for i in xrange(0, n):
- if ord(name[i]) <= 20 or ord(name[i]) >= 127:
- newname += "\\x%2.2X"%ord(name[i])
- else:
- newname += name[i]
-
- return newname
-
-
-class UnicodeRichExtText(object):
- def __init__ (self):
- self.baseText = ''
- self.phoneticBytes = []
-
-
-def getUnicodeRichExtText (bytes):
- ret = UnicodeRichExtText()
- strm = ByteStream(bytes)
- textLen = strm.readUnsignedInt(2)
- flags = strm.readUnsignedInt(1)
- # 0 0 0 0 0 0 0 0
- # |-------|D|C|B|A|
- isDoubleByte = (flags & 0x01) > 0 # A
- ignored = (flags & 0x02) > 0 # B
- hasPhonetic = (flags & 0x04) > 0 # C
- isRichStr = (flags & 0x08) > 0 # D
-
- numElem = 0
- if isRichStr:
- numElem = strm.readUnsignedInt(2)
-
- phoneticBytes = 0
- if hasPhonetic:
- phoneticBytes = strm.readUnsignedInt(4)
-
- if isDoubleByte:
- # double-byte string (UTF-16)
- text = ''
- for i in xrange(0, textLen):
- text += toTextBytes(strm.readBytes(2)).decode('utf-16')
- ret.baseText = text
- else:
- # single-byte string
- ret.baseText = toTextBytes(strm.readBytes(textLen))
-
- if isRichStr:
- for i in xrange(0, numElem):
- posChar = strm.readUnsignedInt(2)
- fontIdx = strm.readUnsignedInt(2)
-
- if hasPhonetic:
- ret.phoneticBytes = strm.readBytes(phoneticBytes)
-
- return ret, strm.getCurrentPos()
-
-
-def getRichText (bytes, textLen=None):
- """parse a string of the rich-text format that Excel uses.
-
-Note the following:
-
- * The 1st byte always contains flag.
- * The actual number of bytes read may differ depending on the values of the
- flags, so the client code should pass an open-ended stream of bytes and
- always query for the actual bytes read to adjust for the new stream
- position when this function returns.
-"""
-
- strm = ByteStream(bytes)
- flags = strm.readUnsignedInt(1)
- if type(flags) == type('c'):
- flags = ord(flags)
- is16Bit = (flags & 0x01)
- isFarEast = (flags & 0x04)
- isRich = (flags & 0x08)
-
- formatRuns = 0
- if isRich:
- formatRuns = strm.readUnsignedInt(2)
-
- extInfo = 0
- if isFarEast:
- extInfo = strm.readUnsignedInt(4)
-
- extraBytes = 0
- if textLen == None:
- extraBytes = formatRuns*4 + extInfo
- textLen = len(bytes) - extraBytes
-
- totalByteLen = strm.getCurrentPos() + textLen + extraBytes
- if is16Bit:
- totalByteLen += textLen # double the text length since each char is 2 bytes.
- text = ''
- for i in xrange(0, textLen):
- text += toTextBytes(strm.readBytes(2)).decode('utf-16')
- else:
- text = toTextBytes(strm.readBytes(textLen))
-
- return (text, totalByteLen)
-
-
-def dumpBytes (chars, subDivide=None):
- line = 0
- subDivideLine = None
- if subDivide != None:
- subDivideLine = subDivide/16
-
- charLen = len(chars)
- if charLen == 0:
- # no bytes to dump.
- return
-
- labelWidth = int(math.ceil(math.log(charLen, 10)))
- flushBytes = False
- for i in xrange(0, charLen):
- if (i+1)%16 == 1:
- # print line header with seek position
- fmt = "%%%d.%dd: "%(labelWidth, labelWidth)
- output(fmt%i)
-
- byte = ord(chars[i])
- output("%2.2X "%byte)
- flushBytes = True
-
- if (i+1)%4 == 0:
- # put extra space at every 4 bytes.
- output(" ")
-
- if (i+1)%16 == 0:
- output("\n")
- flushBytes = False
- if subDivideLine != None and (line+1)%subDivideLine == 0:
- output("\n")
- line += 1
-
- if flushBytes:
- output("\n")
-
-
-def getSectorPos (secID, secSize):
- return 512 + secID*secSize
-
-
-def getRawBytes (bytes, spaced=True, reverse=True):
- text = ''
- for b in bytes:
- if type(b) == type(''):
- b = ord(b)
- if len(text) == 0:
- text = "%2.2X"%b
- elif spaced:
- if reverse:
- text = "%2.2X "%b + text
- else:
- text += " %2.2X"%b
- else:
- if reverse:
- text = "%2.2X"%b + text
- else:
- text += "%2.2X"%b
- return text
-
-
-def getTextBytes (bytes):
- return toTextBytes(bytes)
-
-
-def toTextBytes (bytes):
- n = len(bytes)
- text = ''
- for i in xrange(0, n):
- b = bytes[i]
- if type(b) == type(0x00):
- b = struct.pack('B', b)
- text += b
- return text
-
-
-def getSignedInt (bytes):
- # little endian
- n = len(bytes)
- if n == 0:
- return 0
-
- text = toTextBytes(bytes)
- if n == 1:
- # byte - 1 byte
- return struct.unpack('b', text)[0]
- elif n == 2:
- # short - 2 bytes
- return struct.unpack('<h', text)[0]
- elif n == 4:
- # int, long - 4 bytes
- return struct.unpack('<l', text)[0]
-
- raise ByteConvertError
-
-
-def getUnsignedInt (bytes):
- # little endian
- n = len(bytes)
- if n == 0:
- return 0
-
- text = toTextBytes(bytes)
- if n == 1:
- # byte - 1 byte
- return struct.unpack('B', text)[0]
- elif n == 2:
- # short - 2 bytes
- return struct.unpack('<H', text)[0]
- elif n == 4:
- # int, long - 4 bytes
- return struct.unpack('<L', text)[0]
-
- raise ByteConvertError
-
-
-def getFloat (bytes):
- n = len(bytes)
- if n == 0:
- return 0.0
-
- text = toTextBytes(bytes)
- return struct.unpack('<f', text)[0]
-
-
-def getDouble (bytes):
- n = len(bytes)
- if n == 0:
- return 0.0
-
- text = toTextBytes(bytes)
- return struct.unpack('<d', text)[0]
-
-
-def getUTF8FromUTF16 (bytes):
- # little endian utf-16 strings
- byteCount = len(bytes)
- loopCount = int(byteCount/2)
- text = ''
- for i in xrange(0, loopCount):
- code = ''
- lsbZero = bytes[i*2] == '\x00'
- msbZero = bytes[i*2+1] == '\x00'
- if msbZero and lsbZero:
- return text
-
- if not msbZero:
- code += bytes[i*2+1]
- if not lsbZero:
- code += bytes[i*2]
- try:
- text += unicode(code, 'utf-8')
- except UnicodeDecodeError:
- text += "<%d invalid chars>"%len(code)
- return text
-
-class StreamWrap(object):
- def __init__ (self,printer):
- self.printer = printer
- self.buffer = ""
- def write (self,string):
- self.buffer += string
- def flush (self):
- for line in self.buffer.splitlines():
- self.printer(line)
-
-def outputZipContent (bytes, printer, width=80):
- printer("Zipped content:")
- rawFile = StringIO.StringIO(bytes)
- zipFile = zipfile.ZipFile(rawFile)
- i = 0
- # TODO: when 2.6/3.0 is in widespread use, change to infolist
- # here, names might be ambiguous
- for filename in zipFile.namelist():
- if i > 0:
- printer('-'*width)
- i += 1
- printer("")
- printer(filename + ":")
- printer('-'*width)
-
- contents = zipFile.read(filename)
- if filename.endswith(".xml") or contents.startswith("<?xml"):
- wrapper = StreamWrap(printer)
- xmlpp.pprint(contents,wrapper,1,80)
- wrapper.flush()
- else:
- dumpBytes(contents)
-
- zipFile.close()
-
-def stringizeColorRef(colorRef, colorName="color"):
- def split (packedColor):
- return ((packedColor & 0xFF0000) // 0x10000, (packedColor & 0xFF00) / 0x100, (packedColor & 0xFF))
-
- colorValue = colorRef & 0xFFFFFF
- if colorRef & 0xFE000000 == 0xFE000000 or colorRef & 0xFF000000 == 0:
- colors = split(colorValue)
- return "%s = (%d,%d,%d)"%(colorName, colors[0], colors[1], colors[2])
- elif colorRef & 0x08000000 or colorRef & 0x10000000:
- return "%s = schemecolor(%d)"%(colorName, colorValue)
- elif colorRef & 0x04000000:
- return "%s = colorschemecolor(%d)"%(colorName, colorValue)
- else:
- return "%s = <unidentified color>(%4.4Xh)"%(colorName, colorValue)
diff --git a/scratch/mso-dumper/src/msodraw.py b/scratch/mso-dumper/src/msodraw.py
deleted file mode 100644
index 26835ff..0000000
--- a/scratch/mso-dumper/src/msodraw.py
+++ /dev/null
@@ -1,607 +0,0 @@
-########################################################################
-#
-# Copyright (c) 2010 Kohei Yoshida
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
-
-import globals, xlsmodel
-import sys
-
-def indent (level):
- return ' '*level
-
-def headerLine ():
- return "+ " + "-"*58 + "+"
-
-
-class RecordHeader:
-
- class Type:
- dggContainer = 0xF000
- dgContainer = 0xF002
- spgrContainer = 0xF003
- spContainer = 0xF004
- solverContainer = 0xF005
- FDGGBlock = 0xF006
- FDG = 0xF008
- FSPGR = 0xF009
- FSP = 0xF00A
- FOPT = 0xF00B
- FClientAnchor = 0xF010
- FClientData = 0xF011
- FConnectorRule = 0xF012
- FDGSL = 0xF119
- SplitMenuColorContainer = 0xF11E
-
- containerTypeNames = {
- Type.dggContainer: 'OfficeArtDggContainer',
- Type.dgContainer: 'OfficeArtDgContainer',
- Type.spContainer: 'OfficeArtSpContainer',
- Type.spgrContainer: 'OfficeArtSpgrContainer',
- Type.solverContainer: 'OfficeArtSolverContainer',
- Type.FDG: 'OfficeArtFDG',
- Type.FDGGBlock: 'OfficeArtFDGGBlock',
- Type.FOPT: 'OfficeArtFOPT',
- Type.FClientAnchor: 'OfficeArtClientAnchor',
- Type.FClientData: 'OfficeArtClientData',
- Type.FSP: 'OfficeArtFSP',
- Type.FSPGR: 'OfficeArtFSPGR',
- Type.FConnectorRule: 'OfficeArtFConnectorRule',
- Type.FDGSL: 'OfficeArtFDGSL',
- Type.SplitMenuColorContainer: 'OfficeArtSplitMenuColorContainer'
- }
-
- @staticmethod
- def getRecTypeName (recType):
- if RecordHeader.containerTypeNames.has_key(recType):
- return RecordHeader.containerTypeNames[recType]
- return 'unknown'
-
- @staticmethod
- def appendHeaderLine (recHdl, line):
- n = len(line)
- if n < 60:
- line += ' '*(60-n)
- line += '|'
- recHdl.appendLine(line)
-
- def __init__ (self, strm):
- mixed = strm.readUnsignedInt(2)
- self.recVer = (mixed & 0x000F)
- self.recInstance = (mixed & 0xFFF0) / 16
- self.recType = strm.readUnsignedInt(2)
- self.recLen = strm.readUnsignedInt(4)
-
- def appendLines (self, recHdl, level=0):
- pre = "| "
- RecordHeader.appendHeaderLine(recHdl, pre + "Record type: 0x%4.4X (%s)"%(self.recType, RecordHeader.getRecTypeName(self.recType)))
- RecordHeader.appendHeaderLine(recHdl, pre + " version: 0x%1.1X instance: 0x%3.3X size: %d"%
- (self.recVer, self.recInstance, self.recLen))
-
-
-class ColorRef:
- def __init__ (self, byte):
- self.red = (byte & 0x000000FF)
- self.green = (byte & 0x0000FF00) / 256
- self.blue = (byte & 0x00FF0000) / 65536
- self.flag = (byte & 0xFF000000) / 16777216
-
- self.paletteIndex = (self.flag & 0x01) != 0
- self.paletteRGB = (self.flag & 0x02) != 0
- self.systemRGB = (self.flag & 0x04) != 0
- self.schemeIndex = (self.flag & 0x08) != 0
- self.sysIndex = (self.flag & 0x10) != 0
-
- def appendLine (self, recHdl, level):
- if self.paletteIndex:
- # red and green and used as an unsigned index into the current color palette.
- paletteId = self.green * 256 + self.red
- recHdl.appendLine(indent(level) + "color index in current palette: %d"%paletteId)
- if self.sysIndex:
- # red and green are used as an unsigned 16-bit index into the system color table.
- sysId = self.green * 256 + self.red
- recHdl.appendLine(indent(level) + "system index: %d"%sysId)
- elif self.schemeIndex:
- # the red value is used as as a color scheme index
- recHdl.appendLine(indent(level) + "color scheme index: %d"%self.red)
-
- else:
- recHdl.appendLine(indent(level) + "color: (red=%d, green=%d, blue=%d) flag: 0x%2.2X"%
- (self.red, self.green, self.blue, self.flag))
- recHdl.appendLine(indent(level) + "palette index: %s"%recHdl.getTrueFalse(self.paletteIndex))
- recHdl.appendLine(indent(level) + "palette RGB: %s"%recHdl.getTrueFalse(self.paletteRGB))
- recHdl.appendLine(indent(level) + "system RGB: %s"%recHdl.getTrueFalse(self.systemRGB))
- recHdl.appendLine(indent(level) + "system RGB: %s"%recHdl.getTrueFalse(self.systemRGB))
- recHdl.appendLine(indent(level) + "scheme index: %s"%recHdl.getTrueFalse(self.schemeIndex))
- recHdl.appendLine(indent(level) + "system index: %s"%recHdl.getTrueFalse(self.sysIndex))
-
-
-
-class FDG:
- def __init__ (self, strm):
- self.shapeCount = strm.readUnsignedInt(4)
- self.lastShapeID = strm.readUnsignedInt(4)
-
- def appendLines (self, recHdl, rh):
- recHdl.appendLine("FDG content (drawing data):")
- recHdl.appendLine(" ID of this shape: %d"%rh.recInstance)
- recHdl.appendLine(" shape count: %d"%self.shapeCount)
- recHdl.appendLine(" last shape ID: %d"%self.lastShapeID)
-
-
-class IDCL:
- def __init__ (self, strm):
- self.dgid = strm.readUnsignedInt(4)
- self.cspidCur = strm.readUnsignedInt(4)
-
- def appendLines (self, recHdl, rh):
- recHdl.appendLine("IDCL content:")
- recHdl.appendLine(" drawing ID: %d"%self.dgid)
- recHdl.appendLine(" cspidCur: 0x%8.8X"%self.cspidCur)
-
-class FDGG:
- def __init__ (self, strm):
- self.spidMax = strm.readUnsignedInt(4) # current max shape ID
- self.cidcl = strm.readUnsignedInt(4) # number of OfficeArtIDCL's.
- self.cspSaved = strm.readUnsignedInt(4) # total number of shapes in all drawings
- self.cdgSaved = strm.readUnsignedInt(4) # total number of drawings saved in the file
-
- def appendLines (self, recHdl, rh):
- recHdl.appendLine("FDGG content:")
- recHdl.appendLine(" current max shape ID: %d"%self.spidMax)
- recHdl.appendLine(" number of OfficeArtIDCL's: %d"%self.cidcl)
- recHdl.appendLine(" total number of shapes in all drawings: %d"%self.cspSaved)
- recHdl.appendLine(" total number of drawings in the file: %d"%self.cdgSaved)
-
-class FDGGBlock:
- def __init__ (self, strm):
- self.head = FDGG(strm)
- self.idcls = []
- # NOTE: The spec says head.cidcl stores the number of IDCL's, but each
- # FDGGBlock only contains bytes enough to store (head.cidcl - 1) of
- # IDCL's.
- for i in xrange(0, self.head.cidcl-1):
- idcl = IDCL(strm)
- self.idcls.append(idcl)
-
- def appendLines (self, recHdl, rh):
- self.head.appendLines(recHdl, rh)
- for idcl in self.idcls:
- idcl.appendLines(recHdl, rh)
-
-
-class FDGSL:
- selectionMode = {
- 0x00000000: 'default state',
- 0x00000001: 'ready to rotate',
- 0x00000002: 'ready to change the curvature of line shapes',
- 0x00000007: 'ready to crop the picture'
- }
-
- def __init__ (self, strm):
- self.cpsp = strm.readUnsignedInt(4) # the spec says undefined.
- self.dgslk = strm.readUnsignedInt(4) # selection mode
- self.shapeFocus = strm.readUnsignedInt(4) # shape ID in focus
- self.shapesSelected = []
- shapeCount = (strm.getSize() - 20)/4
- for i in xrange(0, shapeCount):
- spid = strm.readUnsignedInt(4)
- self.shapesSelected.append(spid)
-
- def appendLines (self, recHdl, rh):
- recHdl.appendLine("FDGSL content:")
- recHdl.appendLine(" selection mode: %s"%
- globals.getValueOrUnknown(FDGSL.selectionMode, self.dgslk))
- recHdl.appendLine(" ID of shape in focus: %d"%self.shapeFocus)
- for shape in self.shapesSelected:
- recHdl.appendLine(" ID of shape selected: %d"%shape)
-
-
-class FOPT:
- """property table for a shape instance"""
-
- class TextBoolean:
-
- def appendLines (self, recHdl, prop, level):
- A = (prop.value & 0x00000001) != 0
- B = (prop.value & 0x00000002) != 0
- C = (prop.value & 0x00000004) != 0
- D = (prop.value & 0x00000008) != 0
- E = (prop.value & 0x00000010) != 0
- F = (prop.value & 0x00010000) != 0
- G = (prop.value & 0x00020000) != 0
- H = (prop.value & 0x00040000) != 0
- I = (prop.value & 0x00080000) != 0
- J = (prop.value & 0x00100000) != 0
- recHdl.appendLineBoolean(indent(level) + "fit shape to text", B)
- recHdl.appendLineBoolean(indent(level) + "auto text margin", D)
- recHdl.appendLineBoolean(indent(level) + "select text", E)
- recHdl.appendLineBoolean(indent(level) + "use fit shape to text", G)
- recHdl.appendLineBoolean(indent(level) + "use auto text margin", I)
- recHdl.appendLineBoolean(indent(level) + "use select text", J)
-
- class CXStyle:
- style = [
- 'straight connector', # 0x00000000
- 'elbow-shaped connector', # 0x00000001
- 'curved connector', # 0x00000002
- 'no connector' # 0x00000003
- ]
-
- def appendLines (self, recHdl, prop, level):
- styleName = globals.getValueOrUnknown(FOPT.CXStyle.style, prop.value)
- recHdl.appendLine(indent(level) + "connector style: %s (0x%8.8X)"%(styleName, prop.value))
-
- class FillColor:
-
- def appendLines (self, recHdl, prop, level):
- color = ColorRef(prop.value)
- color.appendLine(recHdl, level)
-
- class FillStyle:
-
- def appendLines (self, recHdl, prop, level):
- flag1 = recHdl.readUnsignedInt(1)
- recHdl.moveForward(1)
- flag2 = recHdl.readUnsignedInt(1)
- recHdl.moveForward(1)
- A = (flag1 & 0x01) != 0 # fNoFillHitTest
- B = (flag1 & 0x02) != 0 # fillUseRect
- C = (flag1 & 0x04) != 0 # fillShape
- D = (flag1 & 0x08) != 0 # fHitTestFill
- E = (flag1 & 0x10) != 0 # fFilled
- F = (flag1 & 0x20) != 0 # fUseShapeAnchor
- G = (flag1 & 0x40) != 0 # fRecolorFillAsPicture
-
- H = (flag2 & 0x01) != 0 # fUseNoFillHitTest
- I = (flag2 & 0x02) != 0 # fUsefillUseRect
- J = (flag2 & 0x04) != 0 # fUsefillShape
- K = (flag2 & 0x08) != 0 # fUsefHitTestFill
- L = (flag2 & 0x10) != 0 # fUsefFilled
- M = (flag2 & 0x20) != 0 # fUsefUseShapeAnchor
- N = (flag2 & 0x40) != 0 # fUsefRecolorFillAsPicture
-
- recHdl.appendLine(indent(level)+"fNoFillHitTest : %s"%recHdl.getTrueFalse(A))
- recHdl.appendLine(indent(level)+"fillUseRect : %s"%recHdl.getTrueFalse(B))
- recHdl.appendLine(indent(level)+"fillShape : %s"%recHdl.getTrueFalse(C))
- recHdl.appendLine(indent(level)+"fHitTestFill : %s"%recHdl.getTrueFalse(D))
- recHdl.appendLine(indent(level)+"fFilled : %s"%recHdl.getTrueFalse(E))
- recHdl.appendLine(indent(level)+"fUseShapeAnchor : %s"%recHdl.getTrueFalse(F))
- recHdl.appendLine(indent(level)+"fRecolorFillAsPicture : %s"%recHdl.getTrueFalse(G))
-
- recHdl.appendLine(indent(level)+"fUseNoFillHitTest : %s"%recHdl.getTrueFalse(H))
- recHdl.appendLine(indent(level)+"fUsefillUseRect : %s"%recHdl.getTrueFalse(I))
- recHdl.appendLine(indent(level)+"fUsefillShape : %s"%recHdl.getTrueFalse(J))
- recHdl.appendLine(indent(level)+"fUsefHitTestFill : %s"%recHdl.getTrueFalse(K))
- recHdl.appendLine(indent(level)+"fUsefFilled : %s"%recHdl.getTrueFalse(L))
- recHdl.appendLine(indent(level)+"fUsefUseShapeAnchor : %s"%recHdl.getTrueFalse(M))
- recHdl.appendLine(indent(level)+"fUsefRecolorFillAsPicture : %s"%recHdl.getTrueFalse(N))
-
- class LineColor:
-
- def appendLines (self, recHdl, prop, level):
- color = ColorRef(prop.value)
- color.appendLine(recHdl, level)
-
- class GroupShape:
-
- flagNames = [
- 'fPrint', # A
- 'fHidden', # B
- 'fOneD', # C
- 'fIsButton', # D
- 'fOnDblClickNotify', # E
- 'fBehindDocument', # F
- 'fEditedWrap', # G
- 'fScriptAnchor', # H
- 'fReallyHidden', # I
- 'fAllowOverlap', # J
- 'fUserDrawn', # K
- 'fHorizRule', # L
- 'fNoshadeHR', # M
- 'fStandardHR', # N
- 'fIsBullet', # O
- 'fLayoutInCell', # P
- 'fUsefPrint', # Q
- 'fUsefHidden', # R
- 'fUsefOneD', # S
- 'fUsefIsButton', # T
- 'fUsefOnDblClickNotify', # U
- 'fUsefBehindDocument', # V
- 'fUsefEditedWrap', # W
- 'fUsefScriptAnchor', # X
- 'fUsefReallyHidden', # Y
- 'fUsefAllowOverlap', # Z
- 'fUsefUserDrawn', # a
- 'fUsefHorizRule', # b
- 'fUsefNoshadeHR', # c
- 'fUsefStandardHR', # d
- 'fUsefIsBullet', # e
- 'fUsefLayoutInCell' # f
- ]
-
- def appendLines (self, recHdl, prop, level):
- flag = prop.value
- flagCount = len(FOPT.GroupShape.flagNames)
- recHdl.appendLine(indent(level)+"flag: 0x%8.8X"%flag)
- for i in xrange(0, flagCount):
- bval = (flag & 0x00000001)
- recHdl.appendLine(indent(level)+"%s: %s"%(FOPT.GroupShape.flagNames[i], recHdl.getTrueFalse(bval)))
- flag /= 2
-
- propTable = {
- 0x00BF: ['Text Boolean Properties', TextBoolean],
- 0x0181: ['Fill Color', FillColor],
- 0x01BF: ['Fill Style Boolean Properties', FillStyle],
- 0x01C0: ['Line Color', LineColor],
- 0x0303: ['Connector Shape Style (cxstyle)', CXStyle],
- 0x03BF: ['Group Shape Boolean Properties', GroupShape]
- }
-
- class E:
- """single property entry in a property table"""
- def __init__ (self):
- self.ID = None
- self.flagBid = False
- self.flagComplex = False
- self.value = None
- self.extra = None
-
- def __init__ (self):
- self.properties = []
-
- def appendLines (self, recHdl, rh):
- recHdl.appendLine("FOPT content (property table):")
- recHdl.appendLine(" property count: %d"%rh.recInstance)
- for i in xrange(0, rh.recInstance):
- recHdl.appendLine(" "+"-"*57)
- prop = self.properties[i]
- if FOPT.propTable.has_key(prop.ID):
- # We have a handler for this property.
- # propData is expected to have two elements: name (0) and handler (1).
- propHdl = FOPT.propTable[prop.ID]
- recHdl.appendLine(" property name: %s (0x%4.4X)"%(propHdl[0], prop.ID))
- propHdl[1]().appendLines(recHdl, prop, 2)
- else:
- recHdl.appendLine(" property ID: 0x%4.4X"%prop.ID)
- if prop.flagComplex:
- recHdl.appendLine(" complex property: %s"%globals.getRawBytes(prop.extra, True, False))
- elif prop.flagBid:
- recHdl.appendLine(" blip ID: %d"%prop.value)
- else:
- # regular property value
- recHdl.appendLine(" property value: 0x%8.8X"%prop.value)
-
-
-class FRIT:
- def __init__ (self, strm):
- self.lastGroupID = strm.readUnsignedInt(2)
- self.secondLastGroupID = strm.readUnsignedInt(2)
-
- def appendLines (self, recHdl, rh):
- pass
-
-
-class FSP:
- def __init__ (self, strm):
- self.spid = strm.readUnsignedInt(4)
- self.flag = strm.readUnsignedInt(4)
-
- def appendLines (self, recHdl, rh):
- recHdl.appendLine("FSP content (instance of a shape):")
- recHdl.appendLine(" ID of this shape: %d"%self.spid)
- groupShape = (self.flag & 0x0001) != 0
- childShape = (self.flag & 0x0002) != 0
- topMostInGroup = (self.flag & 0x0004) != 0
- deleted = (self.flag & 0x0008) != 0
- oleObject = (self.flag & 0x0010) != 0
- haveMaster = (self.flag & 0x0020) != 0
- flipHorizontal = (self.flag & 0x0040) != 0
- flipVertical = (self.flag & 0x0080) != 0
- isConnector = (self.flag & 0x0100) != 0
- haveAnchor = (self.flag & 0x0200) != 0
- background = (self.flag & 0x0400) != 0
- haveProperties = (self.flag & 0x0800) != 0
- recHdl.appendLineBoolean(" group shape", groupShape)
- recHdl.appendLineBoolean(" child shape", childShape)
- recHdl.appendLineBoolean(" topmost in group", topMostInGroup)
- recHdl.appendLineBoolean(" deleted", deleted)
- recHdl.appendLineBoolean(" OLE object shape", oleObject)
- recHdl.appendLineBoolean(" have valid master", haveMaster)
- recHdl.appendLineBoolean(" horizontally flipped", flipHorizontal)
- recHdl.appendLineBoolean(" vertically flipped", flipVertical)
- recHdl.appendLineBoolean(" connector shape", isConnector)
- recHdl.appendLineBoolean(" have anchor", haveAnchor)
- recHdl.appendLineBoolean(" background shape", background)
- recHdl.appendLineBoolean(" have shape type property", haveProperties)
-
-
-class FSPGR:
- def __init__ (self, strm):
- self.left = strm.readSignedInt(4)
- self.top = strm.readSignedInt(4)
- self.right = strm.readSignedInt(4)
- self.bottom = strm.readSignedInt(4)
-
- def appendLines (self, recHdl, rh):
- recHdl.appendLine("FSPGR content (coordinate system of group shape):")
- recHdl.appendLine(" left boundary: %d"%self.left)
- recHdl.appendLine(" top boundary: %d"%self.top)
- recHdl.appendLine(" right boundary: %d"%self.right)
- recHdl.appendLine(" bottom boundary: %d"%self.bottom)
-
-
-class FConnectorRule:
- def __init__ (self, strm):
- self.ruleID = strm.readUnsignedInt(4)
- self.spIDA = strm.readUnsignedInt(4)
- self.spIDB = strm.readUnsignedInt(4)
- self.spIDC = strm.readUnsignedInt(4)
- self.conSiteIDA = strm.readUnsignedInt(4)
- self.conSiteIDB = strm.readUnsignedInt(4)
-
- def appendLines (self, recHdl, rh):
- recHdl.appendLine("FConnectorRule content:")
- recHdl.appendLine(" rule ID: %d"%self.ruleID)
- recHdl.appendLine(" ID of the shape where the connector starts: %d"%self.spIDA)
- recHdl.appendLine(" ID of the shape where the connector ends: %d"%self.spIDB)
- recHdl.appendLine(" ID of the connector shape: %d"%self.spIDB)
- recHdl.appendLine(" ID of the connection site in the begin shape: %d"%self.conSiteIDA)
- recHdl.appendLine(" ID of the connection site in the end shape: %d"%self.conSiteIDB)
-
-
-class MSOCR:
- def __init__ (self, strm):
- self.red = strm.readUnsignedInt(1)
- self.green = strm.readUnsignedInt(1)
- self.blue = strm.readUnsignedInt(1)
- flag = strm.readUnsignedInt(1)
- self.isSchemeIndex = (flag & 0x08) != 0
-
- def appendLines (self, recHdl, rh):
- recHdl.appendLine("MSOCR content (color index)")
- if self.isSchemeIndex:
- recHdl.appendLine(" scheme index: %d"%self.red)
- else:
- recHdl.appendLine(" RGB color: (red=%d, green=%d, blue=%d)"%(self.red, self.green, self.blue))
-
-class SplitMenuColorContainer:
- def __init__ (self, strm):
- self.smca = []
- # this container contains 4 MSOCR records.
- for i in xrange(0, 4):
- msocr = MSOCR(strm)
- self.smca.append(msocr)
-
- def appendLines (self, recHdl, rh):
- for msocr in self.smca:
- msocr.appendLines(recHdl, rh)
-
-
-class FClientAnchorSheet:
- """Excel-specific anchor data (OfficeArtClientAnchorSheet)"""
-
- def __init__ (self, strm):
- # dx is 1/1024th of the underlying cell's width.
- # dy is 1/1024th of the underlying cell's height.
- flag = strm.readUnsignedInt(2)
- self.moveWithCells = (flag & 0x0001) != 0
- self.resizeWithCells = (flag & 0x0002 != 0)
- self.col1 = strm.readUnsignedInt(2)
- self.dx1 = strm.readUnsignedInt(2)
- self.row1 = strm.readUnsignedInt(2)
- self.dy1 = strm.readUnsignedInt(2)
- self.col2 = strm.readUnsignedInt(2)
- self.dx2 = strm.readUnsignedInt(2)
- self.row2 = strm.readUnsignedInt(2)
- self.dy2 = strm.readUnsignedInt(2)
-
- def appendLines (self, recHdl, rh):
- recHdl.appendLine("Client anchor (Excel):")
- recHdl.appendLine(" cols: %d-%d rows: %d-%d"%(self.col1, self.col2, self.row1, self.row2))
- recHdl.appendLine(" dX1: %d dY1: %d"%(self.dx1, self.dy1))
- recHdl.appendLine(" dX2: %d dY2: %d"%(self.dx2, self.dy2))
- recHdl.appendLineBoolean(" move with cells", self.moveWithCells)
- recHdl.appendLineBoolean(" resize with cells", self.resizeWithCells)
-
- def fillModel (self, model, sheet):
- obj = xlsmodel.Shape(self.col1, self.row1, self.dx1, self.dy1, self.col2, self.row2, self.dx2, self.dy2)
- sheet.addShape(obj)
-
-# ----------------------------------------------------------------------------
-
-recData = {
- RecordHeader.Type.FDG: FDG,
- RecordHeader.Type.FSPGR: FSPGR,
- RecordHeader.Type.FSP: FSP,
- RecordHeader.Type.FDGGBlock: FDGGBlock,
- RecordHeader.Type.FConnectorRule: FConnectorRule,
- RecordHeader.Type.FDGSL: FDGSL,
- RecordHeader.Type.FClientAnchor: FClientAnchorSheet,
- RecordHeader.Type.SplitMenuColorContainer: SplitMenuColorContainer
-}
-
-class MSODrawHandler(globals.ByteStream):
-
- def __init__ (self, bytes, parent):
- """The 'parent' instance must have appendLine() method that takes one string argument."""
-
- globals.ByteStream.__init__(self, bytes)
- self.parent = parent
-
- def readFOPT (self, rh):
- fopt = FOPT()
- strm = globals.ByteStream(self.readBytes(rh.recLen))
- while not strm.isEndOfRecord():
- entry = FOPT.E()
- val = strm.readUnsignedInt(2)
- entry.ID = (val & 0x3FFF)
- entry.flagBid = (val & 0x4000) # if true, the value is a blip ID.
- entry.flagComplex = (val & 0x8000) # if true, the value stores the size of the extra bytes.
- entry.value = strm.readSignedInt(4)
- if entry.flagComplex:
- entry.extra = strm.readBytes(entry.value)
- fopt.properties.append(entry)
-
- return fopt
-
- def parseBytes (self):
- while not self.isEndOfRecord():
- self.parent.appendLine(headerLine())
- rh = RecordHeader(self)
- rh.appendLines(self.parent, 0)
- # if rh.recType == Type.dgContainer:
- if rh.recVer == 0xF:
- # container
- continue
-
- self.parent.appendLine(headerLine())
- if recData.has_key(rh.recType):
- obj = recData[rh.recType](self)
- obj.appendLines(self.parent, rh)
- elif rh.recType == RecordHeader.Type.FOPT:
- fopt = self.readFOPT(rh)
- fopt.appendLines(self.parent, rh)
- else:
- # unknown object
- bytes = self.readBytes(rh.recLen)
- self.parent.appendLine(globals.getRawBytes(bytes, True, False))
-
- def fillModel (self, model):
- sheet = model.getCurrentSheet()
- while not self.isEndOfRecord():
- rh = RecordHeader(self)
- if rh.recVer == 0xF:
- # container
- continue
-
- if rh.recType == RecordHeader.Type.FClientAnchor and \
- model.hostApp == globals.ModelBase.HostAppType.Excel:
- obj = FClientAnchorSheet(self)
- obj.fillModel(model, sheet)
- else:
- # unknown object
- bytes = self.readBytes(rh.recLen)
-
-
diff --git a/scratch/mso-dumper/src/node.py b/scratch/mso-dumper/src/node.py
deleted file mode 100644
index 777655d..0000000
--- a/scratch/mso-dumper/src/node.py
+++ /dev/null
@@ -1,219 +0,0 @@
-########################################################################
-#
-# Copyright (c) 2010 Kohei Yoshida
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
-
-# This file (node.py) gets copied in several of my projects. Find out a way
-# to avoid making duplicate copies in each of my projects.
-
-import sys
-
-class NodeType:
- # unknown node type.
- Unknown = 0
- # the document root - typically has only one child element, but it can
- # have multiple children.
- Root = 1
- # node that has name and attributes, and may have child nodes.
- Element = 2
- # node that only has textural content.
- Content = 3
-
-class NodeBase:
- def __init__ (self, nodeType = NodeType.Unknown):
- self.parent = None
- self.nodeType = nodeType
-
- self.__children = []
- self.__hasContent = False
-
- def appendChild (self, node):
- self.__children.append(node)
- node.parent = self
-
- def appendElement (self, name):
- node = Element(name)
- self.appendChild(node)
- return node
-
- def hasContent (self):
- return self.__hasContent
-
- def appendContent (self, text):
- node = Content(text)
- self.appendChild(node)
- self.__hasContent = True
- return node
-
- def firstChild (self):
- return self.__children[0]
-
- def setChildNodes (self, children):
- self.__children = children
-
- def getChildNodes (self):
- return self.__children
-
- def firstChildByName (self, name):
- for child in self.__children:
- if child.nodeType == NodeType.Element and child.name == name:
- return child
- return None
-
- def getChildByName (self, name):
- children = []
- for child in self.__children:
- if child.nodeType == NodeType.Element and child.name == name:
- children.append(child)
- return children
-
-class Root(NodeBase):
- def __init__ (self):
- NodeBase.__init__(self, NodeType.Root)
-
-class Content(NodeBase):
- def __init__ (self, content):
- NodeBase.__init__(self, NodeType.Content)
- self.content = content
-
-class Element(NodeBase):
- def __init__ (self, name, attrs=None):
- NodeBase.__init__(self, NodeType.Element)
- self.name = name
- self.attrs = attrs
- if self.attrs == None:
- self.attrs = {}
-
- def getContent (self):
- text = ''
- first = True
- for child in self.getChildNodes():
- if first:
- first = False
- else:
- text += ' '
- if child.nodeType == NodeType.Content:
- text += child.content
- elif child.nodeType == NodeType.Element:
- text += child.getContent()
- return text
-
- def getAttr (self, name):
- if not self.attrs.has_key(name):
- return None
- return self.attrs[name]
-
- def setAttr (self, name, val):
- self.attrs[name] = val
-
- def hasAttr (self, name):
- return self.attrs.has_key(name)
-
-encodeTable = {
- '>': 'gt',
- '<': 'lt',
- '&': 'amp',
- '"': 'quot',
- '\'': 'apos'
-}
-
-def encodeString (sin):
- sout = ''
- for c in sin:
- if ord(c) >= 128:
- # encode non-ascii ranges.
- sout += "\\x%2.2x"%ord(c)
- elif encodeTable.has_key(c):
- # encode html symbols.
- sout += '&' + encodeTable[c] + ';'
- else:
- sout += c
-
- return sout
-
-def convertAttrValue (val):
- if type(val) == type(True):
- if val:
- val = "true"
- else:
- val = "false"
- elif type(val) == type(0) or type(val) == type(0L):
- val = "%d"%val
- elif type(val) == type(0.0):
- val = "%g"%val
-
- return val
-
-def prettyPrint (fd, node):
- printNode(fd, node, 0, True)
-
-def printNode (fd, node, level, breakLine):
- singleIndent = ''
- lf = ''
- if breakLine:
- singleIndent = ' '*4
- lf = "\n"
- indent = singleIndent*level
- if node.nodeType == NodeType.Root:
- # root node itself only contains child nodes.
- for child in node.getChildNodes():
- printNode(fd, child, level, True)
- elif node.nodeType == NodeType.Element:
- hasChildren = len(node.getChildNodes()) > 0
-
- # We add '<' and '>' (or '/>') after the element content gets
- # encoded.
- line = node.name
- if len(node.attrs) > 0:
- keys = node.attrs.keys()
- keys.sort()
- for key in keys:
- val = node.attrs[key]
- if val == None:
- continue
- val = convertAttrValue(val)
- line += " " + key + '="' + encodeString(val) + '"'
-
- if hasChildren:
- breakChildren = breakLine and not node.hasContent()
- line = "<%s>"%line
- if breakChildren:
- line += "\n"
- fd.write (indent + line)
- for child in node.getChildNodes():
- printNode(fd, child, level+1, breakChildren)
- line = "</%s>%s"%(node.name, lf)
- if breakChildren:
- line = indent + line
- fd.write (line)
- else:
- line = "<%s/>%s"%(line, lf)
- fd.write (indent + line)
-
- elif node.nodeType == NodeType.Content:
- content = node.content
- content = encodeString(content)
- if len(content) > 0:
- fd.write (indent + content + lf)
diff --git a/scratch/mso-dumper/src/ole.py b/scratch/mso-dumper/src/ole.py
deleted file mode 100644
index 9b01928..0000000
--- a/scratch/mso-dumper/src/ole.py
+++ /dev/null
@@ -1,757 +0,0 @@
-########################################################################
-#
-# Copyright (c) 2010 Kohei Yoshida
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-########################################################################
-
-import sys
-import globals
-from globals import getSignedInt
-# ----------------------------------------------------------------------------
-# Reference: The Microsoft Compound Document File Format by Daniel Rentz
-# http://sc.openoffice.org/compdocfileformat.pdf
-# ----------------------------------------------------------------------------
-
-from globals import output
-
-
-class NoRootStorage(Exception): pass
-
-class ByteOrder:
- LittleEndian = 0
- BigEndian = 1
- Unknown = 2
-
-class BlockType:
- MSAT = 0
- SAT = 1
- SSAT = 2
- Directory = 3
-
-class StreamLocation:
- SAT = 0
- SSAT = 1
-
-class Header(object):
-
- @staticmethod
- def byteOrder (chars):
- b1, b2 = ord(chars[0]), ord(chars[1])
- if b1 == 0xFE and b2 == 0xFF:
- return ByteOrder.LittleEndian
- elif b1 == 0xFF and b2 == 0xFE:
- return ByteOrder.BigEndian
- else:
- return ByteOrder.Unknown
-
-
- def __init__ (self, bytes, params):
- self.bytes = bytes
- self.MSAT = None
-
- self.docId = None
- self.uId = None
- self.revision = 0
- self.version = 0
- self.byteOrder = ByteOrder.Unknown
- self.minStreamSize = 0
-
- self.numSecMSAT = 0
- self.numSecSSAT = 0
- self.numSecSAT = 0
-
- self.__secIDFirstMSAT = -2
- self.__secIDFirstDirStrm = -2
- self.__secIDFirstSSAT = -2
-
- self.secSize = 512
- self.secSizeShort = 64
-
- self.params = params
-
- def getSectorSize (self):
- return 2**self.secSize
-
-
- def getShortSectorSize (self):
- return 2**self.secSizeShort
-
-
- def getFirstSectorID (self, blockType):
- if blockType == BlockType.MSAT:
- return self.__secIDFirstMSAT
- elif blockType == BlockType.SSAT:
- return self.__secIDFirstSSAT
- elif blockType == BlockType.Directory:
- return self.__secIDFirstDirStrm
- return -2
-
-
- def output (self):
-
- def printRawBytes (bytes):
- for b in bytes:
- output("%2.2X "%ord(b))
- output("\n")
-
- def printSep (c='-', w=68, prefix=''):
- print(prefix + c*w)
-
- printSep('=', 68)
- print("Compound Document Header")
- printSep('-', 68)
-
- if self.params.debug:
- globals.dumpBytes(self.bytes[0:512])
- printSep('-', 68)
-
- # document ID and unique ID
- output("Document ID: ")
- printRawBytes(self.docId)
- output("Unique ID: ")
- printRawBytes(self.uId)
-
- # revision and version
- print("Revision: %d Version: %d"%(self.revision, self.version))
-
- # byte order
- output("Byte order: ")
- if self.byteOrder == ByteOrder.LittleEndian:
- print("little endian")
- elif self.byteOrder == ByteOrder.BigEndian:
- print("big endian")
- else:
- print("unknown")
-
- # sector size (usually 512 bytes)
- print("Sector size: %d (%d)"%(2**self.secSize, self.secSize))
-
- # short sector size (usually 64 bytes)
- print("Short sector size: %d (%d)"%(2**self.secSizeShort, self.secSizeShort))
-
- # total number of sectors in SAT (equals the number of sector IDs
- # stored in the MSAT).
- print("Total number of sectors used in SAT: %d"%self.numSecSAT)
-
- print("Sector ID of the first sector of the directory stream: %d"%
- self.__secIDFirstDirStrm)
-
- print("Minimum stream size: %d"%self.minStreamSize)
-
- if self.__secIDFirstSSAT == -2:
- print("Sector ID of the first SSAT sector: [none]")
- else:
- print("Sector ID of the first SSAT sector: %d"%self.__secIDFirstSSAT)
-
- print("Total number of sectors used in SSAT: %d"%self.numSecSSAT)
-
- if self.__secIDFirstMSAT == -2:
- # There is no more sector ID stored outside the header.
- print("Sector ID of the first MSAT sector: [end of chain]")
- else:
- # There is more sector IDs than 109 IDs stored in the header.
- print("Sector ID of the first MSAT sector: %d"%(self.__secIDFirstMSAT))
-
- print("Total number of sectors used to store additional MSAT: %d"%self.numSecMSAT)
-
-
- def parse (self):
-
- # document ID and unique ID
- self.docId = self.bytes[0:8]
- self.uId = self.bytes[8:24]
-
- # revision and version
- self.revision = getSignedInt(self.bytes[24:26])
- self.version = getSignedInt(self.bytes[26:28])
-
- # byte order
- self.byteOrder = Header.byteOrder(self.bytes[28:30])
-
- # sector size (usually 512 bytes)
- self.secSize = getSignedInt(self.bytes[30:32])
-
- # short sector size (usually 64 bytes)
- self.secSizeShort = getSignedInt(self.bytes[32:34])
-
- # total number of sectors in SAT (equals the number of sector IDs
- # stored in the MSAT).
- self.numSecSAT = getSignedInt(self.bytes[44:48])
-
- self.__secIDFirstDirStrm = getSignedInt(self.bytes[48:52])
- self.minStreamSize = getSignedInt(self.bytes[56:60])
- self.__secIDFirstSSAT = getSignedInt(self.bytes[60:64])
- self.numSecSSAT = getSignedInt(self.bytes[64:68])
- self.__secIDFirstMSAT = getSignedInt(self.bytes[68:72])
- self.numSecMSAT = getSignedInt(self.bytes[72:76])
-
- # master sector allocation table
- self.MSAT = MSAT(2**self.secSize, self.bytes, self.params)
-
- # First part of MSAT consisting of an array of up to 109 sector IDs.
- # Each sector ID is 4 bytes in length.
- for i in xrange(0, 109):
- pos = 76 + i*4
- id = getSignedInt(self.bytes[pos:pos+4])
- if id == -1:
- break
-
- self.MSAT.appendSectorID(id)
-
- if self.__secIDFirstMSAT != -2:
- # additional sectors are used to store more SAT sector IDs.
- secID = self.__secIDFirstMSAT
- size = self.getSectorSize()
- inLoop = True
- while inLoop:
- pos = 512 + secID*size
- bytes = self.bytes[pos:pos+size]
- n = int(size/4)
- for i in xrange(0, n):
- pos = i*4
- id = getSignedInt(bytes[pos:pos+4])
- if id < 0:
- inLoop = False
- break
- elif i == n-1:
- # last sector ID - points to the next MSAT sector.
- secID = id
- break
- else:
- self.MSAT.appendSectorID(id)
-
- return 512
-
-
- def getMSAT (self):
- return self.MSAT
-
-
- def getSAT (self):
- return self.MSAT.getSAT()
-
-
- def getSSAT (self):
- ssatID = self.getFirstSectorID(BlockType.SSAT)
- if ssatID < 0:
- return None
- chain = self.getSAT().getSectorIDChain(ssatID)
- if len(chain) == 0:
- return None
- obj = SSAT(2**self.secSize, self.bytes, self.params)
- for secID in chain:
- obj.addSector(secID)
- obj.buildArray()
- return obj
-
-
- def getDirectory (self):
- dirID = self.getFirstSectorID(BlockType.Directory)
- if dirID < 0:
- return None
- chain = self.getSAT().getSectorIDChain(dirID)
- if len(chain) == 0:
- return None
- obj = Directory(self, self.params)
- for secID in chain:
- obj.addSector(secID)
- return obj
-
-
- def dummy ():
- pass
-
-
-
-
-class MSAT(object):
- """Master Sector Allocation Table (MSAT)
-
-This class represents the master sector allocation table (MSAT) that stores
-sector IDs that point to all the sectors that are used by the sector
-allocation table (SAT). The actual SAT are to be constructed by combining
-all the sectors pointed by the sector IDs in order of occurrence.
-"""
- def __init__ (self, sectorSize, bytes, params):
- self.sectorSize = sectorSize
- self.secIDs = []
- self.bytes = bytes
- self.__SAT = None
-
- self.params = params
-
- def appendSectorID (self, id):
- self.secIDs.append(id)
-
- def output (self):
- print('')
- print("="*68)
- print("Master Sector Allocation Table (MSAT)")
- print("-"*68)
-
- for id in self.secIDs:
- print("sector ID: %5d (pos: %7d)"%(id, 512+id*self.sectorSize))
-
- def getSATSectorPosList (self):
- list = []
- for id in self.secIDs:
- pos = 512 + id*self.sectorSize
- list.append([id, pos])
- return list
-
- def getSAT (self):
- if self.__SAT != None:
- return self.__SAT
-
- obj = SAT(self.sectorSize, self.bytes, self.params)
- for id in self.secIDs:
- obj.addSector(id)
- obj.buildArray()
- self.__SAT = obj
- return self.__SAT
-
-
-class SAT(object):
- """Sector Allocation Table (SAT)
-"""
- def __init__ (self, sectorSize, bytes, params):
- self.sectorSize = sectorSize
- self.sectorIDs = []
- self.bytes = bytes
- self.array = []
- self.params = params
-
-
- def getSectorSize (self):
- return self.sectorSize
-
-
- def addSector (self, id):
- self.sectorIDs.append(id)
-
-
- def buildArray (self):
- if len(self.array) > 0:
- # array already built.
- return
-
- numItems = int(self.sectorSize/4)
- self.array = []
- for secID in self.sectorIDs:
- pos = 512 + secID*self.sectorSize
- for i in xrange(0, numItems):
- beginPos = pos + i*4
- id = getSignedInt(self.bytes[beginPos:beginPos+4])
- self.array.append(id)
-
-
- def outputRawBytes (self):
- bytes = ""
- for secID in self.sectorIDs:
- pos = 512 + secID*self.sectorSize
- bytes += self.bytes[pos:pos+self.sectorSize]
- globals.dumpBytes(bytes, 512)
-
-
- def outputArrayStats (self):
- sectorTotal = len(self.array)
- sectorP = 0 # >= 0
- sectorM1 = 0 # -1
- sectorM2 = 0 # -2
- sectorM3 = 0 # -3
- sectorM4 = 0 # -4
- sectorMElse = 0 # < -4
- sectorLiveTotal = 0
- for i in xrange(0, len(self.array)):
- item = self.array[i]
- if item >= 0:
- sectorP += 1
- elif item == -1:
- sectorM1 += 1
- elif item == -2:
- sectorM2 += 1
- elif item == -3:
- sectorM3 += 1
- elif item == -4:
- sectorM4 += 1
... etc. - the rest is truncated
More information about the Libreoffice-commits
mailing list