[ooo-build-commit] scratch/mso-dumper
Kohei Yoshida
kohei at kemper.freedesktop.org
Wed Dec 30 13:04:54 PST 2009
scratch/mso-dumper/src/node.py | 17 ++++
scratch/mso-dumper/src/xlsmodel.py | 126 +++++++++++++++++++++++++++++++++++-
scratch/mso-dumper/src/xlsrecord.py | 83 ++++++++++++++++-------
scratch/mso-dumper/xls-dump.py | 3
4 files changed, 200 insertions(+), 29 deletions(-)
New commits:
commit 4310ed8d381192624509ec0c1c38f4ac52bc71ee
Author: Kohei Yoshida <kyoshida at novell.com>
Date: Wed Dec 30 16:03:13 2009 -0500
[xls-dump] More work on dumping canonical XML format.
* scratch/mso-dumper/src/node.py:
* scratch/mso-dumper/src/xlsmodel.py:
* scratch/mso-dumper/src/xlsrecord.py:
* scratch/mso-dumper/xls-dump.py:
diff --git a/scratch/mso-dumper/src/node.py b/scratch/mso-dumper/src/node.py
index e8d9119..ecaf2ad 100644
--- a/scratch/mso-dumper/src/node.py
+++ b/scratch/mso-dumper/src/node.py
@@ -120,6 +120,16 @@ def encodeString (sin):
return sout
+def convertAttrValue (val):
+ if type(val) == type(True):
+ if val:
+ val = "true"
+ else:
+ val = "false"
+ elif type(val) == type(0):
+ val = "%d"%val
+
+ return val
def prettyPrint (fd, node):
printNode(fd, node, 0)
@@ -141,7 +151,12 @@ def printNode (fd, node, level):
keys = node.attrs.keys()
keys.sort()
for key in keys:
- line += " " + key + '="' + encodeString(node.attrs[key]) + '"'
+ val = node.attrs[key]
+ if val == None:
+ continue
+ val = convertAttrValue(val)
+ line += " " + key + '="' + encodeString(val) + '"'
+
if hasChildren:
line = "<%s>\n"%line
fd.write (indent + line)
diff --git a/scratch/mso-dumper/src/xlsmodel.py b/scratch/mso-dumper/src/xlsmodel.py
index 5a572d0..06b4eab 100644
--- a/scratch/mso-dumper/src/xlsmodel.py
+++ b/scratch/mso-dumper/src/xlsmodel.py
@@ -1,11 +1,133 @@
import globals, node
-class Workbook(object):
+
+class ModelType:
+ Workbook = 0
+ Unknown = 999
+
+
+class ModelBase(object):
+ def __init__ (self, modelType=ModelType.Unknown):
+ self.modelType = modelType
+
+
+class Workbook(ModelBase):
def __init__ (self):
- pass
+ ModelBase.__init__(self, ModelType.Workbook)
+
+ # public members
+ self.encrypted = False
+
+ # private members
+ self.__sheets = []
+
+ def appendSheet (self):
+ if len(self.__sheets) == 0:
+ self.__sheets.append(WorkbookGlobal())
+ else:
+ self.__sheets.append(Worksheet())
+
+ return self.getCurrentSheet()
+
+ def getWorkbookGlobal (self):
+ return self.__sheets[0]
+
+ def getCurrentSheet (self):
+ return self.__sheets[-1]
def createDOM (self):
nd = node.Element('workbook')
+ nd.setAttr('encrypted', self.encrypted)
+ n = len(self.__sheets)
+ if n == 0:
+ return
+
+ wbglobal = self.__sheets[0]
+ nd.appendChild(wbglobal.createDOM())
+ for i in xrange(1, n):
+ sheet = self.__sheets[i]
+ sheetNode = sheet.createDOM()
+ nd.appendChild(sheetNode)
+ if i > 0:
+ data = wbglobal.getSheetData(i-1)
+ sheetNode.setAttr('name', data.name)
+ sheetNode.setAttr('visible', data.visible)
+
+ return nd
+
+
+class SheetModelType:
+ WorkbookGlobal = 0
+ Worksheet = 1
+
+
+class SheetBase(object):
+ def __init__ (self, modelType):
+ self.modelType = modelType
+ self.version = None
+
+ def createDOM (self):
+ nd = node.Element('sheet')
+ return nd
+
+
+class WorkbookGlobal(SheetBase):
+ class SheetData:
+ def __init__ (self):
+ self.name = None
+ self.visible = True
+
+ def __init__ (self):
+ SheetBase.__init__(self, SheetModelType.WorkbookGlobal)
+
+ self.__sheetData = []
+
+ def createDOM (self):
+ nd = node.Element('workbook-global')
return nd
+
+ def appendSheetData (self, data):
+ self.__sheetData.append(data)
+
+ def getSheetData (self, i):
+ return self.__sheetData[i]
+
+
+class Worksheet(SheetBase):
+
+ def __init__ (self):
+ SheetBase.__init__(self, SheetModelType.Worksheet)
+ self.rows = {}
+
+ def setCell (self, col, row, cell):
+ if not self.rows.has_key(row):
+ self.rows[row] = {}
+
+ self.rows[row][col] = cell
+
+ def createDOM (self):
+ nd = node.Element('worksheet')
+ nd.setAttr('version', self.version)
+ rows = self.rows.keys()
+ rows.sort()
+ for row in rows:
+ rowNode = nd.appendElement('row')
+ rowNode.setAttr('id', row)
+ return nd
+
+
+class CellModelType:
+ Label = 0
+ Unknown = 999
+
+
+class CellBase(object):
+ def __init__ (self, modelType):
+ self.modelType = modelType
+
+class LabelCell(CellBase):
+ def __init__ (self):
+ CellBase.__init__(self, CellModelType.Label)
+
diff --git a/scratch/mso-dumper/src/xlsrecord.py b/scratch/mso-dumper/src/xlsrecord.py
index 2c0e8fc..5b2e544 100644
--- a/scratch/mso-dumper/src/xlsrecord.py
+++ b/scratch/mso-dumper/src/xlsrecord.py
@@ -1,6 +1,6 @@
import struct
-import globals, formula
+import globals, formula, xlsmodel
# -------------------------------------------------------------------
# record handler classes
@@ -153,6 +153,18 @@ class BOF(BaseRecordHandler):
lowestExcelVer = self.readSignedInt(4)
self.appendLine("earliest Excel version that can read all records: %d"%lowestExcelVer)
+ def fillModel (self, model):
+ if model.modelType != xlsmodel.ModelType.Workbook:
+ return
+
+ sheet = model.appendSheet()
+ ver = self.readUnsignedInt(2)
+ s = 'not BIFF8'
+ if ver == 0x0600:
+ s = 'BIFF8'
+ sheet.version = s
+
+
class BoundSheet(BaseRecordHandler):
@@ -177,19 +189,28 @@ class BoundSheet(BaseRecordHandler):
else:
return 'unknown'
- def parseBytes (self):
- posBOF = self.readUnsignedInt(4)
+ def __parseBytes (self):
+ self.posBOF = self.readUnsignedInt(4)
flags = self.readUnsignedInt(2)
textLen = self.readUnsignedInt(1)
- text, textLen = globals.getRichText(self.readRemainingBytes(), textLen)
- self.appendLine("BOF position in this stream: %d"%posBOF)
- self.appendLine("sheet name: %s"%text)
+ self.name, textLen = globals.getRichText(self.readRemainingBytes(), textLen)
+ self.hiddenState = (flags & 0x0003)
+ self.sheetType = (flags & 0xFF00)
- hiddenState = (flags & 0x0003)
- self.appendLine("hidden state: %s"%BoundSheet.getHiddenState(hiddenState))
+ def parseBytes (self):
+ self.__parseBytes()
+ self.appendLine("BOF position in this stream: %d"%self.posBOF)
+ self.appendLine("sheet name: %s"%self.name)
+ self.appendLine("hidden state: %s"%BoundSheet.getHiddenState(self.hiddenState))
+ self.appendLine("sheet type: %s"%BoundSheet.getSheetType(self.sheetType))
- sheetType = (flags & 0xFF00)
- self.appendLine("sheet type: %s"%BoundSheet.getSheetType(sheetType))
+ def fillModel (self, model):
+ self.__parseBytes()
+ wbglobal = model.getWorkbookGlobal()
+ data = xlsmodel.WorkbookGlobal.SheetData()
+ data.name = self.name
+ data.visible = not self.hiddenState
+ wbglobal.appendSheetData(data)
class Dimensions(BaseRecordHandler):
@@ -328,27 +349,39 @@ class Array(BaseRecordHandler):
class Label(BaseRecordHandler):
- def parseBytes (self):
- col = self.readUnsignedInt(2)
- row = self.readUnsignedInt(2)
- xfIdx = self.readUnsignedInt(2)
+ def __parseBytes (self):
+ self.col = self.readUnsignedInt(2)
+ self.row = self.readUnsignedInt(2)
+ self.xfIdx = self.readUnsignedInt(2)
textLen = self.readUnsignedInt(2)
- text, textLen = globals.getRichText(self.readRemainingBytes(), textLen)
- self.appendCellPosition(col, row)
- self.appendLine("XF record ID: %d"%xfIdx)
- self.appendLine("label text: %s"%text)
+ self.text, textLen = globals.getRichText(self.readRemainingBytes(), textLen)
+
+ def parseBytes (self):
+ self.__parseBytes()
+ self.appendCellPosition(self.col, self.row)
+ self.appendLine("XF record ID: %d"%self.xfIdx)
+ self.appendLine("label text: %s"%self.text)
class LabelSST(BaseRecordHandler):
+ def __parseBytes (self):
+ self.row = self.readUnsignedInt(2)
+ self.col = self.readUnsignedInt(2)
+ self.xfIdx = self.readUnsignedInt(2)
+ self.strId = self.readUnsignedInt(4)
+
def parseBytes (self):
- col = self.readUnsignedInt(2)
- row = self.readUnsignedInt(2)
- xfIdx = self.readUnsignedInt(2)
- strId = self.readUnsignedInt(4)
- self.appendCellPosition(col, row)
- self.appendLine("XF record ID: %d"%xfIdx)
- self.appendLine("string ID in SST: %d"%strId)
+ self.__parseBytes()
+ self.appendCellPosition(self.col, self.row)
+ self.appendLine("XF record ID: %d"%self.xfIdx)
+ self.appendLine("string ID in SST: %d"%self.strId)
+
+ def fillModel (self, model):
+ self.__parseBytes()
+ sheet = model.getCurrentSheet()
+ cell = xlsmodel.LabelCell()
+ sheet.setCell(self.col, self.row, cell)
class Number(BaseRecordHandler):
diff --git a/scratch/mso-dumper/xls-dump.py b/scratch/mso-dumper/xls-dump.py
index 56e025b..63c2672 100755
--- a/scratch/mso-dumper/xls-dump.py
+++ b/scratch/mso-dumper/xls-dump.py
@@ -51,8 +51,9 @@ class XLDumper(object):
dirstrm = self.strm.getDirectoryStreamByName(dirname)
wbmodel = self.__buildWorkbookModel(dirstrm)
+ wbmodel.encrypted = self.strmData.encrypted
root.appendChild(wbmodel.createDOM())
-
+
node.prettyPrint(sys.stdout, docroot)
def dump (self):
More information about the ooo-build-commit
mailing list