[Libreoffice-commits] mso-dumper.git: msodumper/xlsrecord.py msodumper/xlsstream.py

Kohei Yoshida kohei.yoshida at gmail.com
Fri Jul 11 16:02:06 PDT 2014


 msodumper/xlsrecord.py |  318 ++++++++++++++++++++++++++++++++++---------------
 msodumper/xlsstream.py |    4 
 2 files changed, 228 insertions(+), 94 deletions(-)

New commits:
commit ff0dc534560702f5af16a1a15f368f6c6e083d54
Author: Kohei Yoshida <kohei.yoshida at gmail.com>
Date:   Fri Jul 11 18:51:21 2014 -0400

    Handle change cell and ins/del rows/columns records in change tracking.
    
    The change cell record is a redo of my previous handler code, to get
    more details.  This is still work-in-progress.

diff --git a/msodumper/xlsrecord.py b/msodumper/xlsrecord.py
index 463e783..181a2a8 100644
--- a/msodumper/xlsrecord.py
+++ b/msodumper/xlsrecord.py
@@ -15,6 +15,32 @@ class RecordError(Exception): pass
 # -------------------------------------------------------------------
 # record handler classes
 
+class ColRelU(object):
+
+    def __init__ (self, strm):
+        self.col = strm.readUnsignedInt(2)
+        self.colRelative = (self.col & 0x4000) != 0
+        self.rowRelative = (self.col & 0x8000) != 0
+        self.col = self.col & 0x3FFF
+
+
+class RgceLoc(object):
+
+    def __init__ (self, strm):
+        self.row = strm.readUnsignedInt(2)
+        self.column = ColRelU(strm)
+
+    def toString (self):
+        s = ''
+        if not self.column.colRelative:
+            s += '$'
+        s += formula.toColName(self.column.col)
+        if not self.column.rowRelative:
+            s += '$'
+        s += "%d"%(self.row+1)
+        return s
+
+
 class Ref8(object):
 
     def __init__ (self, strm):
@@ -58,6 +84,14 @@ class Ref8U(object):
         self.col1 = strm.readUnsignedInt(2)
         self.col2 = strm.readUnsignedInt(2)
 
+    def toString (self):
+        rge = formula.CellRange()
+        rge.firstRow = self.row1
+        rge.firstCol = self.col1
+        rge.lastRow = self.row2
+        rge.lastCol = self.col2
+        return rge.toString()
+
 
 class RKAuxData(object):
     """Store auxiliary data for RK value"""
@@ -4010,100 +4044,200 @@ class SXRng(BaseRecordHandler):
 # -------------------------------------------------------------------
 # CT - Change Tracking
 
-class CTCellContent(BaseRecordHandler):
-
-    EXC_CHTR_TYPE_MASK       = 0x0007
-    EXC_CHTR_TYPE_FORMATMASK = 0xFF00
-    EXC_CHTR_TYPE_EMPTY      = 0x0000
-    EXC_CHTR_TYPE_RK         = 0x0001
-    EXC_CHTR_TYPE_DOUBLE     = 0x0002
-    EXC_CHTR_TYPE_STRING     = 0x0003
-    EXC_CHTR_TYPE_BOOL       = 0x0004
-    EXC_CHTR_TYPE_FORMULA    = 0x0005
-
-    def parseBytes (self):
-        size = globals.getSignedInt(self.readBytes(4))
-        id = globals.getSignedInt(self.readBytes(4))
-        opcode = globals.getSignedInt(self.readBytes(2))
-        accept = globals.getSignedInt(self.readBytes(2))
-        tabCreateId = globals.getSignedInt(self.readBytes(2))
-        valueType = globals.getSignedInt(self.readBytes(2))
-        self.appendLine("header: (size=%d; index=%d; opcode=0x%2.2X; accept=%d)"%(size, id, opcode, accept))
-        self.appendLine("sheet creation id: %d"%tabCreateId)
-
-        oldType = (valueType/(2*2*2) & CTCellContent.EXC_CHTR_TYPE_MASK)
-        newType = (valueType & CTCellContent.EXC_CHTR_TYPE_MASK)
-        self.appendLine("value type: (old=%4.4Xh; new=%4.4Xh)"%(oldType, newType))
-        self.readBytes(2) # ignore next 2 bytes.
-
-        row = globals.getSignedInt(self.readBytes(2))
-        col = globals.getSignedInt(self.readBytes(2))
-        cell = formula.CellAddress(col, row)
-        self.appendLine("cell position: %s"%cell.getName())
-
-        oldSize = globals.getSignedInt(self.readBytes(2))
-        self.readBytes(4) # ignore 4 bytes.
-
-        fmtType = (valueType & CTCellContent.EXC_CHTR_TYPE_FORMATMASK)
-        if fmtType == 0x1100:
-            self.readBytes(16)
-        elif fmtType == 0x1300:
-            self.readBytes(8)
-
-        self.readCell(oldType, "old cell type")
-        self.readCell(newType, "new cell type")
-
-    def readCell (self, cellType, cellName):
-
-        cellTypeText = 'unknown'
-
-        if cellType == CTCellContent.EXC_CHTR_TYPE_FORMULA:
-            cellTypeText, formulaBytes, formulaText = self.readFormula()
-            self.appendLine("%s: %s"%(cellName, cellTypeText))
-            self.appendLine("formula bytes: %s"%globals.getRawBytes(formulaBytes, True, False))
-            self.appendLine("tokens: %s"%formulaText)
+class RRD(object):
+
+    RevType = {
+        0x0000: "insert row",                  # REVTINSRW
+        0x0001: "insert column",               # REVTINSCOL
+        0x0002: "delete row",                  # REVTDELRW
+        0x0003: "delete column",               # REVTDELCOL
+        0x0004: "cell move",                   # REVTMOVE
+        0x0005: "insert sheet",                # REVTINSERTSH
+        0x0007: "sort",                        # REVTSORT
+        0x0008: "cell change",                 # REVTCHANGECELL
+        0x0009: "rename sheet",                # REVTRENSHEET
+        0x000A: "defined name change",         # REVTDEFNAME
+        0x000B: "format revision",             # REVTFORMAT
+        0x000C: "autoformat revision",         # REVTAUTOFMT
+        0x000D: "comment revision",            # REVTNOTE
+        0x0020: "header (meta-data) revision", # REVTHEADER
+        0x0025: "conflict",                    # REVTCONFLICT
+        0x002B: "custom view add",             # REVTADDVIEW
+        0x002C: "custom view delete",          # REVTDELVIEW
+        0x002E: "query table field removal"    # REVTTRASHQTFIELD
+    }
+
+    def __init__ (self, parent):
+        self.parent = parent
+        self.cbMemory = parent.readUnsignedInt(4)
+        self.revid = parent.readSignedInt(4)
+        self.revt = parent.readUnsignedInt(2)
+
+        flags = parent.readUnsignedInt(2)
+        self.fAccepted        = (flags & 0x0001) != 0
+        self.fUndoAction      = (flags & 0x0002) != 0
+        unused                = (flags & 0x0004) != 0
+        self.fDelAtEdgeOfSort = (flags & 0x0008) != 0
+        self.tabid = parent.readUnsignedInt(2)
+
+    def parseBytes (self):
+        self.parent.appendLineInt("memory size", self.cbMemory)
+        self.parent.appendLine("revision ID: %d"%self.revid)
+        self.parent.appendLine("revision type: %s (0x%4.4X)"%(globals.getValueOrUnknown(RRD.RevType, self.revt), self.revt))
+        self.parent.appendLineBoolean("accepted", self.fAccepted)
+        self.parent.appendLineBoolean("undo action", self.fUndoAction)
+        self.parent.appendLineBoolean("deleted at edge of sorted range", self.fDelAtEdgeOfSort)
+        self.parent.appendLineInt("sheet index", self.tabid)
+
+
+class RRDChgCell(BaseRecordHandler):
+
+    class CellType:
+        Blank                       = 0x0000
+        RKNumber                    = 0x0001
+        Xnum                        = 0x0002
+        XLUnicodeRichExtendedString = 0x0003
+        Bes                         = 0x0004
+        CellParsedFormula           = 0x0005
+
+    CellTypes = [
+        "blank",            # 0x0
+        "RK",               # 0x1
+        "double",           # 0x2
+        "string",           # 0x3
+        "boolean or error", # 0x4
+        "formula"           # 0x5
+    ]
+
+    NumFmtTypes = {
+        0x0000: "automatic",
+        0x0004: "number, two decimal places, use the 1000 separator (,)",
+        0x000B: "currency, two decimal places, use parentheses for negative values",
+        0x000D: "percentage, zero decimal places",
+        0x000E: "percentage, two decimal places",
+        0x000F: "scientific",
+        0x0010: "engineering",
+        0x0011: "fraction, up to one digit numerator and denominator",
+        0x0012: "fraction, up to two digit numerator and denominator",
+        0x0013: "date (MM-DD-YY)",
+        0x0015: "date (DD-MMM)",
+        0x0017: "time (H:MM AM/PM)",
+        0x001B: "date/time, 24 hour format (M/D/YY H:MM)",
+        0x0022: "accounting (currency with decimal point aligned, and centered minus-sign for 0-value), two decimal places, use currency symbol"
+    }
+
+    def __parseBytes (self):
+        self.rrd = RRD(self)
+        flags = self.readUnsignedInt(2)
+        self.vt    = (flags & 0x0007)
+        flags /= 2**3 # shift 3 bits
+        self.vtOld = (flags & 0x0007)
+        flags /= 2**3 # shift 3 bits
+        self.f123Prefix  = (flags & 0x0001)
+        unused           = (flags & 0x0002)
+        self.fOldFmt     = (flags & 0x0004)
+        self.fOldFmtNull = (flags & 0x0008)
+        self.fXfDxf      = (flags & 0x0010)
+        self.fStyXfDxf   = (flags & 0x0020)
+        self.fDxf        = (flags & 0x0040)
+        self.fDxfNull    = (flags & 0x0080)
+
+        self.ifmtDisp = self.readUnsignedInt(1)
+
+        flags = self.readUnsignedInt(1)
+        self.fPhShow        = (flags & 0x01)
+        self.fPhShowOld     = (flags & 0x02)
+        self.fEOLFmlaUpdate = (flags & 0x04)
+
+        self.loc = RgceLoc(self)
+
+        self.cbOldVal = self.readUnsignedInt(4)
+        self.cetxpRst = self.readUnsignedInt(2)
+
+        if self.fOldFmt or self.fDxf:
+            # TODO : Parse DXFN
             return
 
-        if cellType == CTCellContent.EXC_CHTR_TYPE_EMPTY:
-            cellTypeText = 'empty'
-        elif cellType == CTCellContent.EXC_CHTR_TYPE_RK:
-            cellTypeText = self.readRK()
-        elif cellType == CTCellContent.EXC_CHTR_TYPE_DOUBLE:
-            cellTypeText = self.readDouble()
-        elif cellType == CTCellContent.EXC_CHTR_TYPE_STRING:
-            cellTypeText = self.readString()
-        elif cellType == CTCellContent.EXC_CHTR_TYPE_BOOL:
-            cellTypeText = self.readBool()
-        elif cellType == CTCellContent.EXC_CHTR_TYPE_FORMULA:
-            cellTypeText, formulaText = self.readFormula()
-
-        self.appendLine("%s: %s"%(cellName, cellTypeText))
-
-    def readRK (self):
-        valRK = globals.getSignedInt(self.readBytes(4))
-        return 'RK value'
-
-    def readDouble (self):
-        val = globals.getDouble(self.readBytes(4))
-        return "value %f"%val
-
-    def readString (self):
-        size = globals.getSignedInt(self.readBytes(2))
-        pos = self.getCurrentPos()
-        name, byteLen = globals.getRichText(self.bytes[pos:], size)
-        self.setCurrentPos(pos + byteLen)
-        return "string '%s'"%name
-
-    def readBool (self):
-        bool = globals.getSignedInt(self.readBytes(2))
-        return "bool (%d)"%bool
-
-    def readFormula (self):
-        size = globals.getSignedInt(self.readBytes(2))
-        fmlaBytes = self.readBytes(size)
-        o = formula.FormulaParser(self.header, fmlaBytes)
-        o.parse()
-        return "formula", fmlaBytes, o.getText()
+        if self.vtOld == RRDChgCell.CellType.Blank:
+            pass
+        elif self.vtOld == RRDChgCell.CellType.RKNumber:
+            self.rkOld = decodeRK(self.readUnsignedInt(4))
+        elif self.vtOld == RRDChgCell.CellType.Xnum:
+            self.numOld = self.readDouble()
+        else:
+            # TODO : Handle other value types.
+            return
+
+        if self.vt == RRDChgCell.CellType.Blank:
+            pass
+        elif self.vt == RRDChgCell.CellType.RKNumber:
+            self.rk = decodeRK(self.readUnsignedInt(4))
+        elif self.vt == RRDChgCell.CellType.Xnum:
+            self.num = self.readDouble()
+        else:
+            # TODO : Handle other value types.
+            return
+
+    def parseBytes (self):
+        self.__parseBytes()
+        self.rrd.parseBytes()
+        self.appendLineString("old cell type", globals.getValueOrUnknown(RRDChgCell.CellTypes,self.vtOld))
+        self.appendLineString("new cell type", globals.getValueOrUnknown(RRDChgCell.CellTypes,self.vt))
+        self.appendLineBoolean("prefix characters present", self.f123Prefix)
+        self.appendLineBoolean("old formatting available", self.fOldFmt)
+        self.appendLineBoolean("old formatting empty", self.fOldFmtNull)
+        self.appendLineBoolean("reset to cell style first", self.fXfDxf)
+        self.appendLineBoolean("clear cell format first", self.fStyXfDxf)
+        self.appendLineBoolean("format has changed", self.fDxf)
+        self.appendLineBoolean("new formatting empty", self.fDxfNull)
+        self.appendLineString("number format for new value", globals.getValueOrUnknown(RRDChgCell.NumFmtTypes,self.ifmtDisp))
+        self.appendLineBoolean("new cell has phonetic string", self.fPhShow)
+        self.appendLineBoolean("old cell has phonetic string", self.fPhShowOld)
+        self.appendLineBoolean("new cell is formula update", self.fEOLFmlaUpdate)
+        self.appendLineString("cell position", self.loc.toString())
+        self.appendLineInt("old cell content size", self.cbOldVal)
+        self.appendLineInt("number of RRDRstEtxp records", self.cetxpRst)
+
+        if self.fOldFmt or self.fDxf:
+            # TODO : Parse DXFN.
+            return
+
+        if self.vtOld == RRDChgCell.CellType.Blank:
+            self.appendLine("old value: blank")
+        elif self.vtOld == RRDChgCell.CellType.RKNumber:
+            self.appendLine("old value: %g"%self.rkOld)
+        elif self.vtOld == RRDChgCell.CellType.Xnum:
+            self.appendLine("old value: %g"%self.numOld)
+        else:
+            return
+
+        if self.vt == RRDChgCell.CellType.Blank:
+            self.appendLine("new value: blank")
+        elif self.vt == RRDChgCell.CellType.RKNumber:
+            self.appendLine("new value: %g"%self.rk)
+        elif self.vt == RRDChgCell.CellType.Xnum:
+            self.appendLine("new value: %g"%self.num)
+        else:
+            return
+
+
+class RRDInsDel(BaseRecordHandler):
+
+    def __parseBytes (self):
+        self.rrd = RRD(self)
+        flags = self.readUnsignedInt(2)
+        self.fEndOfList = (flags & 0x0001) != 0
+        self.refn = Ref8U(self)
+        self.cUcr = self.readUnsignedInt(4)
+        # TODO : parse optional undo data.
+
+    def parseBytes (self):
+        self.__parseBytes()
+        self.rrd.parseBytes()
+        self.appendLineBoolean("row inserted at bottom", self.fEndOfList)
+        self.appendLineString("range", self.refn.toString())
+        self.appendLineInt("number of items in undo data", self.cUcr)
+
 
 # -------------------------------------------------------------------
 # CH - Chart
diff --git a/msodumper/xlsstream.py b/msodumper/xlsstream.py
index a034a4c..19f9e04 100644
--- a/msodumper/xlsstream.py
+++ b/msodumper/xlsstream.py
@@ -307,9 +307,9 @@ recData = {
 }
 
 recDataRev = {
-    0x0137: ["INSERT*", "Change Track Insert"],
+    0x0137: ["RRDINSDEL", "Insertion / deletion of rows / columns", xlsrecord.RRDInsDel],
     0x0138: ["INFO*", "Change Track Info"],
-    0x013B: ["CELLCONTENT*", "Change Track Cell Content", xlsrecord.CTCellContent],
+    0x013B: ["RRDCHGCELL", "Change cell revision", xlsrecord.RRDChgCell],
     0x013D: ["SHEETID*", "Change Track Sheet Identifier"],
     0x0140: ["MOVERANGE*", "Change Track Move Range"],
     0x014D: ["INSERTSHEET*", "Change Track Insert Sheet"],


More information about the Libreoffice-commits mailing list