[ooo-build-commit] scratch/mso-dumper

Kohei Yoshida kohei at kemper.freedesktop.org
Wed Dec 30 13:04:54 PST 2009


 scratch/mso-dumper/src/node.py      |   17 ++++
 scratch/mso-dumper/src/xlsmodel.py  |  126 +++++++++++++++++++++++++++++++++++-
 scratch/mso-dumper/src/xlsrecord.py |   83 ++++++++++++++++-------
 scratch/mso-dumper/xls-dump.py      |    3 
 4 files changed, 200 insertions(+), 29 deletions(-)

New commits:
commit 4310ed8d381192624509ec0c1c38f4ac52bc71ee
Author: Kohei Yoshida <kyoshida at novell.com>
Date:   Wed Dec 30 16:03:13 2009 -0500

    [xls-dump] More work on dumping canonical XML format.
    
    * scratch/mso-dumper/src/node.py:
    * scratch/mso-dumper/src/xlsmodel.py:
    * scratch/mso-dumper/src/xlsrecord.py:
    * scratch/mso-dumper/xls-dump.py:

diff --git a/scratch/mso-dumper/src/node.py b/scratch/mso-dumper/src/node.py
index e8d9119..ecaf2ad 100644
--- a/scratch/mso-dumper/src/node.py
+++ b/scratch/mso-dumper/src/node.py
@@ -120,6 +120,16 @@ def encodeString (sin):
 
     return sout
 
+def convertAttrValue (val):
+    if type(val) == type(True):
+        if val:
+            val = "true"
+        else:
+            val = "false"
+    elif type(val) == type(0):
+        val = "%d"%val
+
+    return val
 
 def prettyPrint (fd, node):
     printNode(fd, node, 0)
@@ -141,7 +151,12 @@ def printNode (fd, node, level):
             keys = node.attrs.keys()
             keys.sort()
             for key in keys:
-                line += " " + key + '="' + encodeString(node.attrs[key]) + '"'
+                val = node.attrs[key]
+                if val == None:
+                    continue
+                val = convertAttrValue(val)
+                line += " " + key + '="' + encodeString(val) + '"'
+
         if hasChildren:
             line = "<%s>\n"%line
             fd.write (indent + line)
diff --git a/scratch/mso-dumper/src/xlsmodel.py b/scratch/mso-dumper/src/xlsmodel.py
index 5a572d0..06b4eab 100644
--- a/scratch/mso-dumper/src/xlsmodel.py
+++ b/scratch/mso-dumper/src/xlsmodel.py
@@ -1,11 +1,133 @@
 
 import globals, node
 
-class Workbook(object):
+
+class ModelType:
+    Workbook = 0
+    Unknown = 999
+
+
+class ModelBase(object):
+    def __init__ (self, modelType=ModelType.Unknown):
+        self.modelType = modelType
+
+
+class Workbook(ModelBase):
 
     def __init__ (self):
-        pass
+        ModelBase.__init__(self, ModelType.Workbook)
+
+        # public members
+        self.encrypted = False
+
+        # private members
+        self.__sheets = []
+
+    def appendSheet (self):
+        if len(self.__sheets) == 0:
+            self.__sheets.append(WorkbookGlobal())
+        else:
+            self.__sheets.append(Worksheet())
+
+        return self.getCurrentSheet()
+
+    def getWorkbookGlobal (self):
+        return self.__sheets[0]
+
+    def getCurrentSheet (self):
+        return self.__sheets[-1]
 
     def createDOM (self):
         nd = node.Element('workbook')
+        nd.setAttr('encrypted', self.encrypted)
+        n = len(self.__sheets)
+        if n == 0:
+            return
+
+        wbglobal = self.__sheets[0]
+        nd.appendChild(wbglobal.createDOM())
+        for i in xrange(1, n):
+            sheet = self.__sheets[i]
+            sheetNode = sheet.createDOM()
+            nd.appendChild(sheetNode)
+            if i > 0:
+                data = wbglobal.getSheetData(i-1)
+                sheetNode.setAttr('name', data.name)
+                sheetNode.setAttr('visible', data.visible)
+
+        return nd
+
+
+class SheetModelType:
+    WorkbookGlobal = 0
+    Worksheet = 1
+
+
+class SheetBase(object):
+    def __init__ (self, modelType):
+        self.modelType = modelType
+        self.version = None
+
+    def createDOM (self):
+        nd = node.Element('sheet')
+        return nd
+
+
+class WorkbookGlobal(SheetBase):
+    class SheetData:
+        def __init__ (self):
+            self.name = None
+            self.visible = True
+
+    def __init__ (self):
+        SheetBase.__init__(self, SheetModelType.WorkbookGlobal)
+
+        self.__sheetData = []
+
+    def createDOM (self):
+        nd = node.Element('workbook-global')
         return nd
+
+    def appendSheetData (self, data):
+        self.__sheetData.append(data)
+
+    def getSheetData (self, i):
+        return self.__sheetData[i]
+
+
+class Worksheet(SheetBase):
+
+    def __init__ (self):
+        SheetBase.__init__(self, SheetModelType.Worksheet)
+        self.rows = {}
+
+    def setCell (self, col, row, cell):
+        if not self.rows.has_key(row):
+            self.rows[row] = {}
+
+        self.rows[row][col] = cell
+
+    def createDOM (self):
+        nd = node.Element('worksheet')
+        nd.setAttr('version', self.version)
+        rows = self.rows.keys()
+        rows.sort()
+        for row in rows:
+            rowNode = nd.appendElement('row')
+            rowNode.setAttr('id', row)
+        return nd
+
+
+class CellModelType:
+    Label = 0
+    Unknown = 999
+
+
+class CellBase(object):
+    def __init__ (self, modelType):
+        self.modelType = modelType
+
+class LabelCell(CellBase):
+    def __init__ (self):
+        CellBase.__init__(self, CellModelType.Label)
+
diff --git a/scratch/mso-dumper/src/xlsrecord.py b/scratch/mso-dumper/src/xlsrecord.py
index 2c0e8fc..5b2e544 100644
--- a/scratch/mso-dumper/src/xlsrecord.py
+++ b/scratch/mso-dumper/src/xlsrecord.py
@@ -1,6 +1,6 @@
 
 import struct
-import globals, formula
+import globals, formula, xlsmodel
 
 # -------------------------------------------------------------------
 # record handler classes
@@ -153,6 +153,18 @@ class BOF(BaseRecordHandler):
         lowestExcelVer = self.readSignedInt(4)
         self.appendLine("earliest Excel version that can read all records: %d"%lowestExcelVer)
 
+    def fillModel (self, model):
+        if model.modelType != xlsmodel.ModelType.Workbook:
+            return
+
+        sheet = model.appendSheet()
+        ver = self.readUnsignedInt(2)
+        s = 'not BIFF8'
+        if ver == 0x0600:
+            s = 'BIFF8'
+        sheet.version = s
+
+
 
 class BoundSheet(BaseRecordHandler):
 
@@ -177,19 +189,28 @@ class BoundSheet(BaseRecordHandler):
         else:
             return 'unknown'
 
-    def parseBytes (self):
-        posBOF = self.readUnsignedInt(4)
+    def __parseBytes (self):
+        self.posBOF = self.readUnsignedInt(4)
         flags = self.readUnsignedInt(2)
         textLen = self.readUnsignedInt(1)
-        text, textLen = globals.getRichText(self.readRemainingBytes(), textLen)
-        self.appendLine("BOF position in this stream: %d"%posBOF)
-        self.appendLine("sheet name: %s"%text)
+        self.name, textLen = globals.getRichText(self.readRemainingBytes(), textLen)
+        self.hiddenState = (flags & 0x0003)
+        self.sheetType = (flags & 0xFF00)
 
-        hiddenState = (flags & 0x0003)
-        self.appendLine("hidden state: %s"%BoundSheet.getHiddenState(hiddenState))
+    def parseBytes (self):
+        self.__parseBytes()
+        self.appendLine("BOF position in this stream: %d"%self.posBOF)
+        self.appendLine("sheet name: %s"%self.name)
+        self.appendLine("hidden state: %s"%BoundSheet.getHiddenState(self.hiddenState))
+        self.appendLine("sheet type: %s"%BoundSheet.getSheetType(self.sheetType))
 
-        sheetType = (flags & 0xFF00)
-        self.appendLine("sheet type: %s"%BoundSheet.getSheetType(sheetType))
+    def fillModel (self, model):
+        self.__parseBytes()
+        wbglobal = model.getWorkbookGlobal()
+        data = xlsmodel.WorkbookGlobal.SheetData()
+        data.name = self.name
+        data.visible = not self.hiddenState
+        wbglobal.appendSheetData(data)
 
 
 class Dimensions(BaseRecordHandler):
@@ -328,27 +349,39 @@ class Array(BaseRecordHandler):
 
 class Label(BaseRecordHandler):
 
-    def parseBytes (self):
-        col = self.readUnsignedInt(2)
-        row = self.readUnsignedInt(2)
-        xfIdx = self.readUnsignedInt(2)
+    def __parseBytes (self):
+        self.col = self.readUnsignedInt(2)
+        self.row = self.readUnsignedInt(2)
+        self.xfIdx = self.readUnsignedInt(2)
         textLen = self.readUnsignedInt(2)
-        text, textLen = globals.getRichText(self.readRemainingBytes(), textLen)
-        self.appendCellPosition(col, row)
-        self.appendLine("XF record ID: %d"%xfIdx)
-        self.appendLine("label text: %s"%text)
+        self.text, textLen = globals.getRichText(self.readRemainingBytes(), textLen)
+
+    def parseBytes (self):
+        self.__parseBytes()
+        self.appendCellPosition(self.col, self.row)
+        self.appendLine("XF record ID: %d"%self.xfIdx)
+        self.appendLine("label text: %s"%self.text)
 
 
 class LabelSST(BaseRecordHandler):
 
+    def __parseBytes (self):
+        self.row = self.readUnsignedInt(2)
+        self.col = self.readUnsignedInt(2)
+        self.xfIdx = self.readUnsignedInt(2)
+        self.strId = self.readUnsignedInt(4)
+
     def parseBytes (self):
-        col = self.readUnsignedInt(2)
-        row = self.readUnsignedInt(2)
-        xfIdx = self.readUnsignedInt(2)
-        strId = self.readUnsignedInt(4)
-        self.appendCellPosition(col, row)
-        self.appendLine("XF record ID: %d"%xfIdx)
-        self.appendLine("string ID in SST: %d"%strId)
+        self.__parseBytes()
+        self.appendCellPosition(self.col, self.row)
+        self.appendLine("XF record ID: %d"%self.xfIdx)
+        self.appendLine("string ID in SST: %d"%self.strId)
+
+    def fillModel (self, model):
+        self.__parseBytes()
+        sheet = model.getCurrentSheet()
+        cell = xlsmodel.LabelCell()
+        sheet.setCell(self.col, self.row, cell)
 
 
 class Number(BaseRecordHandler):
diff --git a/scratch/mso-dumper/xls-dump.py b/scratch/mso-dumper/xls-dump.py
index 56e025b..63c2672 100755
--- a/scratch/mso-dumper/xls-dump.py
+++ b/scratch/mso-dumper/xls-dump.py
@@ -51,8 +51,9 @@ class XLDumper(object):
 
             dirstrm = self.strm.getDirectoryStreamByName(dirname)
             wbmodel = self.__buildWorkbookModel(dirstrm)
+            wbmodel.encrypted = self.strmData.encrypted
             root.appendChild(wbmodel.createDOM())
-
+        
         node.prettyPrint(sys.stdout, docroot)
 
     def dump (self):


More information about the ooo-build-commit mailing list