[ooo-build-commit] scratch/mso-dumper
Kohei Yoshida
kohei at kemper.freedesktop.org
Thu Oct 8 21:45:34 PDT 2009
scratch/mso-dumper/src/globals.py | 87 ++++++++++++++++++++++++++++++++++++
scratch/mso-dumper/src/xlsrecord.py | 27 +++++++++++
scratch/mso-dumper/src/xlsstream.py | 4 -
3 files changed, 116 insertions(+), 2 deletions(-)
New commits:
commit 7ec67023573376d3264fe8e252a3ead57d6320ef
Author: Kohei Yoshida <kyoshida at novell.com>
Date: Fri Oct 9 00:44:52 2009 -0400
[xls-dump] Added handlers for DBCELL and SST records.
* scratch/mso-dumper/src/globals.py:
* scratch/mso-dumper/src/xlsrecord.py:
* scratch/mso-dumper/src/xlsstream.py:
diff --git a/scratch/mso-dumper/src/globals.py b/scratch/mso-dumper/src/globals.py
index df0c161..3e725c4 100644
--- a/scratch/mso-dumper/src/globals.py
+++ b/scratch/mso-dumper/src/globals.py
@@ -39,6 +39,46 @@ class StreamData(object):
return self.pivotCacheIDs.has_key(name)
+class ByteStream(object):
+
+ def __init__ (self, bytes):
+ self.bytes = bytes
+ self.pos = 0
+ self.size = len(bytes)
+
+ def readBytes (self, length):
+ r = self.bytes[self.pos:self.pos+length]
+ self.pos += length
+ return r
+
+ def readRemainingBytes (self):
+ r = self.bytes[self.pos:]
+ self.pos = self.size
+ return r
+
+ def getCurrentPos (self):
+ return self.pos
+
+ def setCurrentPos (self, pos):
+ self.pos = pos
+
+ def isEndOfRecord (self):
+ return (self.pos == self.size)
+
+ def readUnsignedInt (self, length):
+ bytes = self.readBytes(length)
+ return getUnsignedInt(bytes)
+
+ def readSignedInt (self, length):
+ bytes = self.readBytes(length)
+ return getSignedInt(bytes)
+
+ def readDouble (self):
+ # double is always 8 bytes.
+ bytes = self.readBytes(8)
+ return getDouble(bytes)
+
+
def output (msg):
sys.stdout.write(msg)
@@ -63,6 +103,53 @@ def decodeName (name):
return newname
+class UnicodeRichExtText(object):
+ def __init__ (self):
+ self.baseText = ''
+ self.phoneticBytes = []
+
+
+def getUnicodeRichExtText (bytes):
+ ret = UnicodeRichExtText()
+ strm = ByteStream(bytes)
+ textLen = strm.readUnsignedInt(2)
+ flags = strm.readUnsignedInt(1)
+ # 0 0 0 0 0 0 0 0
+ # |-------|D|C|B|A|
+ isDoubleByte = (flags & 0x01) == 1 # A
+ ignored = ((flags/2) & 0x01) == 1 # B
+ hasPhonetic = ((flags/4) & 0x01) == 1 # C
+ isRichStr = ((flags/8) & 0x01) == 1 # D
+
+ numElem = 0
+ if isRichStr:
+ numElem = strm.readUnsignedInt(2)
+
+ phoneticBytes = 0
+ if hasPhonetic:
+ phoneticBytes = strm.readUnsignedInt(4)
+
+ if isDoubleByte:
+ # double-byte string (UTF-16)
+ text = ''
+ for i in xrange(0, textLen):
+ text += toTextBytes(strm.readBytes(2)).decode('utf-16')
+ ret.baseText = text
+ else:
+ # single-byte string
+ ret.baseText = toTextBytes(strm.readBytes(textLen))
+
+ if isRichStr:
+ for i in xrange(0, numElem):
+ posChar = strm.readUnsignedInt(2)
+ fontIdx = strm.readUnsignedInt(2)
+
+ if hasPhonetic:
+ ret.phoneticBytes = strm.readBytes(phoneticBytes)
+
+ return ret, strm.getCurrentPos()
+
+
def getRichText (bytes, textLen=None):
"""parse a string of the rich-text format that Excel uses."""
diff --git a/scratch/mso-dumper/src/xlsrecord.py b/scratch/mso-dumper/src/xlsrecord.py
index b317ebd..3b3e5e1 100644
--- a/scratch/mso-dumper/src/xlsrecord.py
+++ b/scratch/mso-dumper/src/xlsrecord.py
@@ -72,6 +72,9 @@ append a line to be displayed.
def setCurrentPos (self, pos):
self.pos = pos
+ def isEndOfRecord (self):
+ return (self.pos == self.size)
+
def getYesNo (self, boolVal):
if boolVal:
return 'yes'
@@ -417,6 +420,19 @@ class String(BaseRecordHandler):
self.appendLine("string value: '%s'"%name)
+class SST(BaseRecordHandler):
+
+ def parseBytes (self):
+ refCount = self.readSignedInt(4) # total number of references in workbook
+ strCount = self.readSignedInt(4) # total number of unique strings.
+ self.appendLine("total number of references: %d"%refCount)
+ self.appendLine("total number of unique strings: %d"%strCount)
+ for i in xrange(0, strCount):
+ extText, bytesRead = globals.getUnicodeRichExtText(self.bytes[self.getCurrentPos():])
+ self.readBytes(bytesRead) # advance current position.
+ return
+
+
class Blank(BaseRecordHandler):
def parseBytes (self):
@@ -427,6 +443,17 @@ class Blank(BaseRecordHandler):
self.appendLine("XF record ID: %d"%xf)
+class DBCell(BaseRecordHandler):
+
+ def parseBytes (self):
+ rowRecOffset = self.readUnsignedInt(4)
+ self.appendLine("offset to first ROW record: %d"%rowRecOffset)
+ while not self.isEndOfRecord():
+ cellOffset = self.readUnsignedInt(2)
+ self.appendLine("offset to CELL record: %d"%cellOffset)
+ return
+
+
class DefColWidth(BaseRecordHandler):
def parseBytes (self):
diff --git a/scratch/mso-dumper/src/xlsstream.py b/scratch/mso-dumper/src/xlsstream.py
index 78dc9d3..bef9c42 100644
--- a/scratch/mso-dumper/src/xlsstream.py
+++ b/scratch/mso-dumper/src/xlsstream.py
@@ -122,7 +122,7 @@ recData = {
0x00D3: ["OBPROJ", "Visual Basic Project"],
0x00D5: ["SXIDSTM", "Stream ID", xlsrecord.SXStreamID],
0x00D6: ["RSTRING", "Cell with Character Formatting"],
- 0x00D7: ["DBCELL", "Stream Offsets"],
+ 0x00D7: ["DBCELL", "Stream Offsets", xlsrecord.DBCell],
0x00DA: ["BOOKBOOL", "Workbook Option Flag"],
0x00DC: ["PARAMQRY", "Query Parameters"],
0x00DC: ["SXEXT", "External Source Information"],
@@ -146,7 +146,7 @@ recData = {
0x00F8: ["SXPAIR", "PivotTable Name Pair"],
0x00F9: ["SXFMLA", "PivotTable Parsed Expression"],
0x00FB: ["SXFORMAT", "PivotTable Format Record"],
- 0x00FC: ["SST", "Shared String Table"],
+ 0x00FC: ["SST", "Shared String Table", xlsrecord.SST],
0x00FD: ["LABELSST", "Cell Value", xlsrecord.LabelSST],
0x00FF: ["EXTSST", "Extended Shared String Table"],
0x0100: ["SXVDEX", "Extended PivotTable View Fields", xlsrecord.SXViewFieldsEx],
More information about the ooo-build-commit
mailing list