[ooo-build-commit] scratch/mso-dumper

Kohei Yoshida kohei at kemper.freedesktop.org
Thu Oct 8 21:45:34 PDT 2009


 scratch/mso-dumper/src/globals.py   |   87 ++++++++++++++++++++++++++++++++++++
 scratch/mso-dumper/src/xlsrecord.py |   27 +++++++++++
 scratch/mso-dumper/src/xlsstream.py |    4 -
 3 files changed, 116 insertions(+), 2 deletions(-)

New commits:
commit 7ec67023573376d3264fe8e252a3ead57d6320ef
Author: Kohei Yoshida <kyoshida at novell.com>
Date:   Fri Oct 9 00:44:52 2009 -0400

    [xls-dump] Added handlers for DBCELL and SST records.
    
    * scratch/mso-dumper/src/globals.py:
    * scratch/mso-dumper/src/xlsrecord.py:
    * scratch/mso-dumper/src/xlsstream.py:

diff --git a/scratch/mso-dumper/src/globals.py b/scratch/mso-dumper/src/globals.py
index df0c161..3e725c4 100644
--- a/scratch/mso-dumper/src/globals.py
+++ b/scratch/mso-dumper/src/globals.py
@@ -39,6 +39,46 @@ class StreamData(object):
         return self.pivotCacheIDs.has_key(name)
 
 
+class ByteStream(object):
+
+    def __init__ (self, bytes):
+        self.bytes = bytes
+        self.pos = 0
+        self.size = len(bytes)
+
+    def readBytes (self, length):
+        r = self.bytes[self.pos:self.pos+length]
+        self.pos += length
+        return r
+
+    def readRemainingBytes (self):
+        r = self.bytes[self.pos:]
+        self.pos = self.size
+        return r
+
+    def getCurrentPos (self):
+        return self.pos
+
+    def setCurrentPos (self, pos):
+        self.pos = pos
+
+    def isEndOfRecord (self):
+        return (self.pos == self.size)
+
+    def readUnsignedInt (self, length):
+        bytes = self.readBytes(length)
+        return getUnsignedInt(bytes)
+
+    def readSignedInt (self, length):
+        bytes = self.readBytes(length)
+        return getSignedInt(bytes)
+
+    def readDouble (self):
+        # double is always 8 bytes.
+        bytes = self.readBytes(8)
+        return getDouble(bytes)
+
+
 def output (msg):
     sys.stdout.write(msg)
 
@@ -63,6 +103,53 @@ def decodeName (name):
     return newname
 
 
+class UnicodeRichExtText(object):
+    def __init__ (self):
+        self.baseText = ''
+        self.phoneticBytes = []
+
+
+def getUnicodeRichExtText (bytes):
+    ret = UnicodeRichExtText()
+    strm = ByteStream(bytes)
+    textLen = strm.readUnsignedInt(2)
+    flags = strm.readUnsignedInt(1)
+    #  0 0 0 0 0 0 0 0
+    # |-------|D|C|B|A|
+    isDoubleByte = (flags     & 0x01) == 1 # A
+    ignored      = ((flags/2) & 0x01) == 1 # B
+    hasPhonetic  = ((flags/4) & 0x01) == 1 # C
+    isRichStr    = ((flags/8) & 0x01) == 1 # D
+
+    numElem = 0
+    if isRichStr:
+        numElem = strm.readUnsignedInt(2)
+
+    phoneticBytes = 0
+    if hasPhonetic:
+        phoneticBytes = strm.readUnsignedInt(4)
+        
+    if isDoubleByte:
+        # double-byte string (UTF-16)
+        text = ''
+        for i in xrange(0, textLen):
+            text += toTextBytes(strm.readBytes(2)).decode('utf-16')
+        ret.baseText = text
+    else:
+        # single-byte string
+        ret.baseText = toTextBytes(strm.readBytes(textLen))
+
+    if isRichStr:
+        for i in xrange(0, numElem):
+            posChar = strm.readUnsignedInt(2)
+            fontIdx = strm.readUnsignedInt(2)
+
+    if hasPhonetic:
+        ret.phoneticBytes = strm.readBytes(phoneticBytes)
+
+    return ret, strm.getCurrentPos()
+
+
 def getRichText (bytes, textLen=None):
     """parse a string of the rich-text format that Excel uses."""
 
diff --git a/scratch/mso-dumper/src/xlsrecord.py b/scratch/mso-dumper/src/xlsrecord.py
index b317ebd..3b3e5e1 100644
--- a/scratch/mso-dumper/src/xlsrecord.py
+++ b/scratch/mso-dumper/src/xlsrecord.py
@@ -72,6 +72,9 @@ append a line to be displayed.
     def setCurrentPos (self, pos):
         self.pos = pos
 
+    def isEndOfRecord (self):
+        return (self.pos == self.size)
+
     def getYesNo (self, boolVal):
         if boolVal:
             return 'yes'
@@ -417,6 +420,19 @@ class String(BaseRecordHandler):
         self.appendLine("string value: '%s'"%name)
 
 
+class SST(BaseRecordHandler):
+
+    def parseBytes (self):
+        refCount = self.readSignedInt(4) # total number of references in workbook
+        strCount = self.readSignedInt(4) # total number of unique strings.
+        self.appendLine("total number of references: %d"%refCount)
+        self.appendLine("total number of unique strings: %d"%strCount)
+        for i in xrange(0, strCount):
+            extText, bytesRead = globals.getUnicodeRichExtText(self.bytes[self.getCurrentPos():])
+            self.readBytes(bytesRead) # advance current position.
+        return
+
+
 class Blank(BaseRecordHandler):
 
     def parseBytes (self):
@@ -427,6 +443,17 @@ class Blank(BaseRecordHandler):
         self.appendLine("XF record ID: %d"%xf)
 
 
+class DBCell(BaseRecordHandler):
+
+    def parseBytes (self):
+        rowRecOffset = self.readUnsignedInt(4)
+        self.appendLine("offset to first ROW record: %d"%rowRecOffset)
+        while not self.isEndOfRecord():
+            cellOffset = self.readUnsignedInt(2)
+            self.appendLine("offset to CELL record: %d"%cellOffset)
+        return
+
+
 class DefColWidth(BaseRecordHandler):
 
     def parseBytes (self):
diff --git a/scratch/mso-dumper/src/xlsstream.py b/scratch/mso-dumper/src/xlsstream.py
index 78dc9d3..bef9c42 100644
--- a/scratch/mso-dumper/src/xlsstream.py
+++ b/scratch/mso-dumper/src/xlsstream.py
@@ -122,7 +122,7 @@ recData = {
     0x00D3: ["OBPROJ", "Visual Basic Project"],
     0x00D5: ["SXIDSTM", "Stream ID", xlsrecord.SXStreamID],
     0x00D6: ["RSTRING", "Cell with Character Formatting"],
-    0x00D7: ["DBCELL", "Stream Offsets"],
+    0x00D7: ["DBCELL", "Stream Offsets", xlsrecord.DBCell],
     0x00DA: ["BOOKBOOL", "Workbook Option Flag"],
     0x00DC: ["PARAMQRY", "Query Parameters"],
     0x00DC: ["SXEXT", "External Source Information"],
@@ -146,7 +146,7 @@ recData = {
     0x00F8: ["SXPAIR", "PivotTable Name Pair"],
     0x00F9: ["SXFMLA", "PivotTable Parsed Expression"],
     0x00FB: ["SXFORMAT", "PivotTable Format Record"],
-    0x00FC: ["SST", "Shared String Table"],
+    0x00FC: ["SST", "Shared String Table", xlsrecord.SST],
     0x00FD: ["LABELSST", "Cell Value", xlsrecord.LabelSST],
     0x00FF: ["EXTSST", "Extended Shared String Table"],
     0x0100: ["SXVDEX", "Extended PivotTable View Fields", xlsrecord.SXViewFieldsEx],


More information about the ooo-build-commit mailing list