[Libreoffice-commits] mso-dumper.git: 3 commits - msodumper/globals.py msodumper/xlsrecord.py msodumper/xlsstream.py

Kohei Yoshida kohei.yoshida at gmail.com
Tue Sep 16 07:09:37 PDT 2014


 msodumper/globals.py   |   13 ++-
 msodumper/xlsrecord.py |  196 +++++++++++++++++++------------------------------
 msodumper/xlsstream.py |   38 ++++++---
 3 files changed, 113 insertions(+), 134 deletions(-)

New commits:
commit 49e89f8d3a5d362935e93d763e63bfa2ac05b8f6
Author: Kohei Yoshida <kohei.yoshida at gmail.com>
Date:   Tue Sep 16 10:00:00 2014 -0400

    Some notes about how we organize record list & more description for FEATURE11.

diff --git a/msodumper/xlsstream.py b/msodumper/xlsstream.py
index 13370e0..63f9827 100644
--- a/msodumper/xlsstream.py
+++ b/msodumper/xlsstream.py
@@ -13,7 +13,11 @@ class EndOfStream(Exception): pass
 
 unusedRecDesc = "[unused, must be ignored]"
 
-    # opcode: [canonical name, description, handler (optional)]
+# opcode: [canonical name, description, handler (optional)]
+
+# Please use the same record name as canonical name as it appears in the
+# [MS-XLS] specification.  The description should be short enough to fit the
+# screen column width when it's dumped to stdout.
 
 recData = {
     0x0006: ["FORMULA", "Cell Formula", xlsrecord.Formula],
@@ -222,7 +226,7 @@ recData = {
     0x086B: ["DATALABEXTCONTENTS", "Contents of an extended data label", xlsrecord.DataLabExtContents],
     0x086C: ["CELLWATCH", "Reference to a watched cell"],
     0x0871: ["FEATHDR11", "Common information for all tables on a sheet"],
-    0x0872: ["FEATURE11", "Shared feature data"],
+    0x0872: ["FEATURE11", "Shared feature data for a table in a worksheet"],
     0x0873: ["DROPDOWNOBJIDS", "Object identifiers of autofilter dropdown objects"],
     0x0875: ["CONTINUEFRT11", "Continuation of the data in a preceding Future Record Type record that has data longer than 8,224 bytes"],
     0x0876: ["DCONN", "Information for a single data connection"],
commit 397cf81c7bff03e7b1f38840a6cafb4c1b8c4cdc
Author: Kohei Yoshida <kohei.yoshida at gmail.com>
Date:   Mon Sep 15 18:52:55 2014 -0400

    Work on updating the handler for the SXAddlInfo record.
    
    This is still work in progress.
    
    Also print raw bytes as characters for each record.  This helps detect
    string bytes without having to write handlers.
    
    Also, shorten the descriptions of some records to keep them fit within
    76 char width.  We don't need to be verbose here; as long as we use the
    same record names as in the spec we can look them up if needed.

diff --git a/msodumper/globals.py b/msodumper/globals.py
index 8e30194..aaac534 100644
--- a/msodumper/globals.py
+++ b/msodumper/globals.py
@@ -321,6 +321,11 @@ Note the following:
 
     return (text, totalByteLen)
 
+def toCharOrDot (char):
+    if 32 < ord(char) and ord(char) < 127:
+        return char
+    else:
+        return '.'
 
 def dumpBytes (chars, subDivide=None):
     if params.noStructOutput or params.noRawDump:
@@ -345,10 +350,7 @@ def dumpBytes (chars, subDivide=None):
             output(fmt%i)
 
         byte = ord(chars[i])
-        if 32 < byte and byte < 127:
-            lineBuf += chars[i]
-        else:
-            lineBuf += '.'
+        lineBuf += toCharOrDot(chars[i])
         output("%2.2X "%byte)
 
         if (i+1)%4 == 0:
diff --git a/msodumper/xlsrecord.py b/msodumper/xlsrecord.py
index b41be00..38e15af 100644
--- a/msodumper/xlsrecord.py
+++ b/msodumper/xlsrecord.py
@@ -2750,6 +2750,7 @@ class XF(BaseRecordHandler):
 
 
 class FeatureHeader(BaseRecordHandler):
+    """Beginning of a collection of records."""
 
     def parseBytes (self):
         recordType = self.readUnsignedInt(2)
@@ -2898,6 +2899,28 @@ class ShrFmla(BaseRecordHandler):
 # -------------------------------------------------------------------
 # SX - Pivot Table
 
+class FrtFlags(object):
+
+    def __init__ (self, strm):
+        grbitFrt = strm.readUnsignedInt(2)
+        self.fFrtRef   = (grbitFrt & 0x0001) # record specifies a range of cells
+        self.fFrtAlert = (grbitFrt & 0x0002) # whether to alert the user of possible problems when saving the file
+
+    def appendLines (self, hdl):
+        pass
+
+
+class XLUnicodeStringSegmentedSXAddl(object):
+
+    def __init__ (self, strm):
+        self.cchTotal = strm.readUnsignedInt(4)
+        strm.readBytes(2) # ignored
+
+    def appendLines (self, hdl):
+        if self.cchTotal <= 65535:
+            hdl.appendLineInt("cchTotal", self.cchTotal)
+
+
 class DConName(BaseRecordHandler):
 
     def __parseBytes (self):
@@ -2971,135 +2994,70 @@ class SXViewEx9(BaseRecordHandler):
 
 class SXAddlInfo(BaseRecordHandler):
 
-    sxcNameList = {
-        0x00: "sxcView",
-        0x01: "sxcField",
-        0x02: "sxcHierarchy",
-        0x03: "sxcCache",
-        0x04: "sxcCacheField",
-        0x05: "sxcQsi",
-        0x06: "sxcQuery",
-        0x07: "sxcGrpLevel",
-        0x08: "sxcGroup"
+    SxcClassList = {
+        0x00: 'sxcView',
+        0x01: 'sxcField',
+        0x02: 'sxcHierarchy',
+        0x03: 'sxcCache',
+        0x04: 'sxcCacheField',
+        0x05: 'sxcQsi',
+        0x06: 'sxcQuery',
+        0x07: 'sxcGrpLevel',
+        0x08: 'sxcGroup',
+        0x09: 'sxcCacheItem',
+        0x0C: 'sxcSxrule',
+        0x0D: 'sxcSxfilt',
+        0x10: 'sxcSxdh',
+        0x12: 'sxcAutoSort',
+        0x13: 'sxcSxmgs',
+        0x14: 'sxcSxmg',
+        0x17: 'sxcField12',
+        0x1A: 'sxcSxcondfmts',
+        0x1B: 'sxcSxcondfmt',
+        0x1C: 'sxcSxfilters12',
+        0x1D: 'sxcSxfilter12'
     }
 
-    sxdNameList = {
+    SxcViewTypes = {
         0x00: 'sxdId',
         0x01: 'sxdVerUpdInv',
         0x02: 'sxdVer10Info',
         0x03: 'sxdCalcMember',
-        0x04: 'sxdXMLSource',
-        0x05: 'sxdProperty',
-        0x05: 'sxdSrcDataFile',
-        0x06: 'sxdGrpLevelInfo',
-        0x06: 'sxdSrcConnFile',
-        0x07: 'sxdGrpInfo',
-        0x07: 'sxdReconnCond',
-        0x08: 'sxdMember',
-        0x09: 'sxdFilterMember',
         0x0A: 'sxdCalcMemString',
+        0x19: 'sxdVer12Info',
+        0x1E: 'sxdTableStyleClient',
+        0x21: 'sxdCompactRwHdr',
+        0x22: 'sxdCompactColHdr',
+        0x26: 'sxdSxpiIvmb',
         0xFF: 'sxdEnd'
     }
 
-    def parseBytes (self):
-        dummy = self.readBytes(2) # 0x0864
-        dummy = self.readBytes(2) # 0x0000
-        sxc = self.readBytes(1)[0]
-        sxd = self.readBytes(1)[0]
-        dwUserData = self.readBytes(4)
-        dummy = self.readBytes(2)
-
-        className = "(unknown)"
-        if SXAddlInfo.sxcNameList.has_key(sxc):
-            className = SXAddlInfo.sxcNameList[sxc]
-        self.appendLine("class name: %s"%className)
-        typeName = '(unknown)'
-        if SXAddlInfo.sxdNameList.has_key(sxd):
-            typeName = SXAddlInfo.sxdNameList[sxd]
-        self.appendLine("type name: %s"%typeName)
-
-        if sxd == 0x00:
-            self.__parseId(sxc, dwUserData)
-
-        elif sxd == 0x02:
-            if sxc == 0x03:
-                self.__parseSxDbSave10()
-            elif sxc == 0x00:
-                self.__parseViewFlags(dwUserData)
-
-    def __parseViewFlags (self, dwUserData):
-        flags = globals.getUnsignedInt(dwUserData)
-        viewVer = (flags & 0x000000FF)
-        verName = self.__getExcelVerName(viewVer)
-        self.appendLine("PivotTable view version: %s"%verName)
-        displayImmediateItems = (flags & 0x00000100)
-        enableDataEd          = (flags & 0x00000200)
-        disableFList          = (flags & 0x00000400)
-        reenterOnLoadOnce     = (flags & 0x00000800)
-        notViewCalcMembers    = (flags & 0x00001000)
-        notVisualTotals       = (flags & 0x00002000)
-        pageMultiItemLabel    = (flags & 0x00004000)
-        tensorFillCv          = (flags & 0x00008000)
-        hideDDData            = (flags & 0x00010000)
-
-        self.appendLine("display immediate items: %s"%self.getYesNo(displayImmediateItems))
-        self.appendLine("editing values in data area allowed: %s"%self.getYesNo(enableDataEd))
-        self.appendLine("field list disabled: %s"%self.getYesNo(disableFList))
-        self.appendLine("re-center on load once: %s"%self.getYesNo(reenterOnLoadOnce))
-        self.appendLine("hide calculated members: %s"%self.getYesNo(notViewCalcMembers))
-        self.appendLine("totals include hidden members: %s"%self.getYesNo(notVisualTotals))
-        self.appendLine("(Multiple Items) instead of (All) in page field: %s"%self.getYesNo(pageMultiItemLabel))
-        self.appendLine("background color from source: %s"%self.getYesNo(tensorFillCv))
-        self.appendLine("hide drill-down for data field: %s"%self.getYesNo(hideDDData))
-
-    def __parseId (self, sxc, dwUserData):
-        if sxc == 0x03:
-            idCache = globals.getUnsignedInt(dwUserData)
-            self.appendLine("cache ID: %d"%idCache)
-        elif sxc in [0x00, 0x01, 0x02, 0x05, 0x06, 0x07, 0x08]:
-            lenStr = globals.getUnsignedInt(dwUserData)
-            self.appendLine("length of ID string: %d"%lenStr)
-            textLen = globals.getUnsignedInt(self.readBytes(2))
-            data = self.bytes[self.getCurrentPos():]
-            if lenStr == 0:
-                self.appendLine("name (ID) string: (continued from last record)")
-            elif lenStr == len(data) - 1:
-                text, textLen = globals.getRichText(data, textLen)
-                self.appendLine("name (ID) string: %s"%text)
-            else:
-                self.appendLine("name (ID) string: (first of multiple records)")
-
-
-    def __parseSxDbSave10 (self):
-        countGhostMax = globals.getSignedInt(self.readBytes(4))
-        self.appendLine("max ghost pivot items: %d"%countGhostMax)
-
-        # version last refreshed this cache
-        lastVer = globals.getUnsignedInt(self.readBytes(1))
-        verName = self.__getExcelVerName(lastVer)
-        self.appendLine("last version refreshed: %s"%verName)
-
-        # minimum version needed to refresh this cache
-        lastVer = globals.getUnsignedInt(self.readBytes(1))
-        verName = self.__getExcelVerName(lastVer)
-        self.appendLine("minimum version needed to refresh: %s"%verName)
-
-        # date last refreshed
-        dateRefreshed = globals.getDouble(self.readBytes(8))
-        self.appendLine("date last refreshed: %g"%dateRefreshed)
-
-
-    def __getExcelVerName (self, ver):
-        verName = '(unknown)'
-        if ver == 0:
-            verName = 'Excel 9 (2000) and earlier'
-        elif ver == 1:
-            verName = 'Excel 10 (XP)'
-        elif ver == 2:
-            verName = 'Excel 11 (2003)'
-        elif ver == 3:
-            verName = 'Excel 12 (2007)'
-        return verName
+    def __parseBytes (self):
+        self.flags = FrtFlags(self) # ignored
+        self.sxc = self.readUnsignedInt(1)
+        self.sxd = self.readUnsignedInt(1)
+        if self.sxc == 0x00:
+            # SxcView
+            self.__parseBytesView()
+
+    def __parseBytesView (self):
+        assert(self.sxc == 0x00)
+        if self.sxd == 0x00:
+            # sxdId
+            self.__parseSxcViewSxdId()
+
+    def __parseSxcViewSxdId (self):
+        self.stName = XLUnicodeStringSegmentedSXAddl(self)
+
+    def parseBytes (self):
+        self.__parseBytes()
+        self.appendLineString("classs name", globals.getValueOrUnknown(SXAddlInfo.SxcClassList, self.sxc))
+        if self.sxc == 0x00:
+            # SxcView
+            self.appendLineString("record type", globals.getValueOrUnknown(SXAddlInfo.SxcViewTypes, self.sxd))
+            if self.sxd == 0x00:
+                # sxdId
+                self.stName.appendLines(self)
 
 
 class SXDb(BaseRecordHandler):
diff --git a/msodumper/xlsstream.py b/msodumper/xlsstream.py
index 19f9e04..13370e0 100644
--- a/msodumper/xlsstream.py
+++ b/msodumper/xlsstream.py
@@ -216,14 +216,14 @@ recData = {
     0x0862: ["SHEETLAYOUT", "Tab Color below Sheet Name"],
     0x0863: ["BOOKEXT", "Extra Book Info"],
     0x0864: ["SXADDL", "Pivot Table Additional Info", xlsrecord.SXAddlInfo],
-    0x0867: ["FEATHDR", "Shared Feature Header, specifies the beginning of a collection of records", xlsrecord.FeatureHeader],
+    0x0867: ["FEATHDR", "Shared Feature Header", xlsrecord.FeatureHeader],
     0x0868: ["FEAT", "Shared Feature Data (wrongly named RANGEPROTECTION elsewhere)", xlsrecord.FeatureData],
     0x086A: ["DATALABEXT", "Extended Data Label"],
     0x086B: ["DATALABEXTCONTENTS", "Contents of an extended data label", xlsrecord.DataLabExtContents],
     0x086C: ["CELLWATCH", "Reference to a watched cell"],
-    0x0871: ["FEATHDR11", "Common information for all tables on a sheet and specifies the beginning of a collection"],
-    0x0872: ["FEATURE11", "Shared feature data. The only shared feature type stored in this record is a table in a worksheet."],
-    0x0873: ["DROPDOWNOBJIDS", "Object identifiers that can be reused by the application when creating the dropdown objects for the AutoFilter at runtime in a sheet"],
+    0x0871: ["FEATHDR11", "Common information for all tables on a sheet"],
+    0x0872: ["FEATURE11", "Shared feature data"],
+    0x0873: ["DROPDOWNOBJIDS", "Object identifiers of autofilter dropdown objects"],
     0x0875: ["CONTINUEFRT11", "Continuation of the data in a preceding Future Record Type record that has data longer than 8,224 bytes"],
     0x0876: ["DCONN", "Information for a single data connection"],
     0x0877: ["LIST12", "Additional formatting information for a table"],
@@ -562,14 +562,26 @@ class XLDirStream(object):
         else:
             print("%4.4Xh:   size = %d"%(header, size))
 
+        # print the raw bytes, with 16 bytes per line.
         self.__printSep('-', globals.OutputWidth-len(headerStr), headerStr)
+        lines = []
         for i in xrange(0, size):
-            if (i+1) % 16 == 1:
-                output("%4.4Xh: "%header)
-            output("%2.2X "% ord(bytes[i]))
-            if (i+1) % 16 == 0 and i != size-1:
-                print("")
-        if size > 0:
+            if i % 16 == 0:
+                lines.append([])
+            lines[-1].append(bytes[i])
+
+        for line in lines:
+            output("%4.4Xh: "%header)
+            n = len(line)
+            for byte in line:
+                output("%2.2X "%ord(byte))
+            for i in xrange(n, 16):
+                output("   ")
+            output('  ')
+
+            for byte in line:
+                output(globals.toCharOrDot(byte))
+
             print("")
 
         if handler != None and not self.strmData.encrypted:
commit 0fd44680eca74c74e4edee341808b708acae61ba
Author: Kohei Yoshida <kohei.yoshida at gmail.com>
Date:   Mon Sep 15 13:20:21 2014 -0400

    Unpack 8-byte unsigned long long integers.

diff --git a/msodumper/globals.py b/msodumper/globals.py
index 68d8254..8e30194 100644
--- a/msodumper/globals.py
+++ b/msodumper/globals.py
@@ -449,6 +449,9 @@ def getUnsignedInt (bytes):
     elif n == 4:
         # int, long - 4 bytes
         return struct.unpack('<L', text)[0]
+    elif n == 8:
+        # long long - 8 bytes
+        return struct.unpack('<Q', text)[0]
 
     raise ByteConvertError
 


More information about the Libreoffice-commits mailing list