[ooo-build-commit] scratch/mso-dumper

Kohei Yoshida kohei at kemper.freedesktop.org
Mon Jan 4 20:35:58 PST 2010


 scratch/mso-dumper/src/formula.py   |  414 +++++++++++++++++++++++++++++++++++-
 scratch/mso-dumper/src/xlsmodel.py  |    5 
 scratch/mso-dumper/src/xlsrecord.py |    5 
 3 files changed, 413 insertions(+), 11 deletions(-)

New commits:
commit b0fbb0d49f79f31269718ea179bf2e90e83d1347
Author: Kohei Yoshida <kyoshida at novell.com>
Date:   Mon Jan 4 23:33:46 2010 -0500

    [xls-dump] Parse function tokens to display worksheet function names.
    
    For now I only support functions with no arguments.  I'll support
    functions with arguments when I learn how to parse them.
    
    * scratch/mso-dumper/src/formula.py:
    * scratch/mso-dumper/src/xlsmodel.py:
    * scratch/mso-dumper/src/xlsrecord.py:

diff --git a/scratch/mso-dumper/src/formula.py b/scratch/mso-dumper/src/formula.py
index 7f422d3..0664332 100644
--- a/scratch/mso-dumper/src/formula.py
+++ b/scratch/mso-dumper/src/formula.py
@@ -378,10 +378,412 @@ class _Area3d(_TokenBase):
     def getText (self):
         return "(xti=%d,"%self.xti + self.cellRange.getName() + ")"
 
+class _FuncVar(_TokenBase):
+
+    funcTab = {
+        0x0000: 'COUNT',
+        0x0001: 'IF',
+        0x0002: 'ISNA',
+        0x0003: 'ISERROR',
+        0x0004: 'SUM',
+        0x0005: 'AVERAGE',
+        0x0006: 'MIN',
+        0x0007: 'MAX',
+        0x0008: 'ROW',
+        0x0009: 'COLUMN',
+        0x000A: 'NA',
+        0x000B: 'NPV',
+        0x000C: 'STDEV',
+        0x000D: 'DOLLAR',
+        0x000E: 'FIXED',
+        0x000F: 'SIN',
+        0x0010: 'COS',
+        0x0011: 'TAN',
+        0x0012: 'ATAN',
+        0x0013: 'PI',
+        0x0014: 'SQRT',
+        0x0015: 'EXP',
+        0x0016: 'LN',
+        0x0017: 'LOG10',
+        0x0018: 'ABS',
+        0x0019: 'INT',
+        0x001A: 'SIGN',
+        0x001B: 'ROUND',
+        0x001C: 'LOOKUP',
+        0x001D: 'INDEX',
+        0x001E: 'REPT',
+        0x001F: 'MID',
+        0x0020: 'LEN',
+        0x0021: 'VALUE',
+        0x0022: 'TRUE',
+        0x0023: 'FALSE',
+        0x0024: 'AND',
+        0x0025: 'OR',
+        0x0026: 'NOT',
+        0x0027: 'MOD',
+        0x0028: 'DCOUNT',
+        0x0029: 'DSUM',
+        0x002A: 'DAVERAGE',
+        0x002B: 'DMIN',
+        0x002C: 'DMAX',
+        0x002D: 'DSTDEV',
+        0x002E: 'VAR',
+        0x002F: 'DVAR',
+        0x0030: 'TEXT',
+        0x0031: 'LINEST',
+        0x0032: 'TREND',
+        0x0033: 'LOGEST',
+        0x0034: 'GROWTH',
+        0x0035: 'GOTO',
+        0x0036: 'HALT',
+        0x0037: 'RETURN',
+        0x0038: 'PV',
+        0x0039: 'FV',
+        0x003A: 'NPER',
+        0x003B: 'PMT',
+        0x003C: 'RATE',
+        0x003D: 'MIRR',
+        0x003E: 'IRR',
+        0x003F: 'RAND',
+        0x0040: 'MATCH',
+        0x0041: 'DATE',
+        0x0042: 'TIME',
+        0x0043: 'DAY',
+        0x0044: 'MONTH',
+        0x0045: 'YEAR',
+        0x0046: 'WEEKDAY',
+        0x0047: 'HOUR',
+        0x0048: 'MINUTE',
+        0x0049: 'SECOND',
+        0x004A: 'NOW',
+        0x004B: 'AREAS',
+        0x004C: 'ROWS',
+        0x004D: 'COLUMNS',
+        0x004E: 'OFFSET',
+        0x004F: 'ABSREF',
+        0x0050: 'RELREF',
+        0x0051: 'ARGUMENT',
+        0x0052: 'SEARCH',
+        0x0053: 'TRANSPOSE',
+        0x0054: 'ERROR',
+        0x0055: 'STEP',
+        0x0056: 'TYPE',
+        0x0057: 'ECHO',
+        0x0058: 'SET.NAME',
+        0x0059: 'CALLER',
+        0x005A: 'DEREF',
+        0x005B: 'WINDOWS',
+        0x005C: 'SERIES',
+        0x005D: 'DOCUMENTS',
+        0x005E: 'ACTIVE.CELL',
+        0x005F: 'SELECTION',
+        0x0060: 'RESULT',
+        0x0061: 'ATAN2',
+        0x0062: 'ASIN',
+        0x0063: 'ACOS',
+        0x0064: 'CHOOSE',
+        0x0065: 'HLOOKUP',
+        0x0066: 'VLOOKUP',
+        0x0067: 'LINKS',
+        0x0068: 'INPUT',
+        0x0069: 'ISREF',
+        0x006A: 'GET.FORMULA',
+        0x006B: 'GET.NAME',
+        0x006C: 'SET.VALUE',
+        0x006D: 'LOG',
+        0x006E: 'EXEC',
+        0x006F: 'CHAR',
+        0x0070: 'LOWER',
+        0x0071: 'UPPER',
+        0x0072: 'PROPER',
+        0x0073: 'LEFT',
+        0x0074: 'RIGHT',
+        0x0075: 'EXACT',
+        0x0076: 'TRIM',
+        0x0077: 'REPLACE',
+        0x0078: 'SUBSTITUTE',
+        0x0079: 'CODE',
+        0x007A: 'NAMES',
+        0x007B: 'DIRECTORY',
+        0x007C: 'FIND',
+        0x007D: 'CELL',
+        0x007E: 'ISERR',
+        0x007F: 'ISTEXT',
+        0x0080: 'ISNUMBER',
+        0x0081: 'ISBLANK',
+        0x0082: 'T',
+        0x0083: 'N',
+        0x0084: 'FOPEN',
+        0x0085: 'FCLOSE',
+        0x0086: 'FSIZE',
+        0x0087: 'FREADLN',
+        0x0088: 'FREAD',
+        0x0089: 'FWRITELN',
+        0x008A: 'FWRITE',
+        0x008B: 'FPOS',
+        0x008C: 'DATEVALUE',
+        0x008D: 'TIMEVALUE',
+        0x008E: 'SLN',
+        0x008F: 'SYD',
+        0x0090: 'DDB',
+        0x0091: 'GET.DEF',
+        0x0092: 'REFTEXT',
+        0x0093: 'TEXTREF',
+        0x0094: 'INDIRECT',
+        0x0095: 'REGISTER',
+        0x0096: 'CALL',
+        0x0097: 'ADD.BAR',
+        0x0098: 'ADD.MENU',
+        0x0099: 'ADD.COMMAND',
+        0x009A: 'ENABLE.COMMAND',
+        0x009B: 'CHECK.COMMAND',
+        0x009C: 'RENAME.COMMAND',
+        0x009D: 'SHOW.BAR',
+        0x009E: 'DELETE.MENU',
+        0x009F: 'DELETE.COMMAND',
+        0x00A0: 'GET.CHART.ITEM',
+        0x00A1: 'DIALOG.BOX',
+        0x00A2: 'CLEAN',
+        0x00A3: 'MDETERM',
+        0x00A4: 'MINVERSE',
+        0x00A5: 'MMULT',
+        0x00A6: 'FILES',
+        0x00A7: 'IPMT',
+        0x00A8: 'PPMT',
+        0x00A9: 'COUNTA',
+        0x00AA: 'CANCEL.KEY',
+        0x00AB: 'FOR',
+        0x00AC: 'WHILE',
+        0x00AD: 'BREAK',
+        0x00AE: 'NEXT',
+        0x00AF: 'INITIATE',
+        0x00B0: 'REQUEST',
+        0x00B1: 'POKE',
+        0x00B2: 'EXECUTE',
+        0x00B3: 'TERMINATE',
+        0x00B4: 'RESTART',
+        0x00B5: 'HELP',
+        0x00B6: 'GET.BAR',
+        0x00B7: 'PRODUCT',
+        0x00B8: 'FACT',
+        0x00B9: 'GET.CELL',
+        0x00BA: 'GET.WORKSPACE',
+        0x00BB: 'GET.WINDOW',
+        0x00BC: 'GET.DOCUMENT',
+        0x00BD: 'DPRODUCT',
+        0x00BE: 'ISNONTEXT',
+        0x00BF: 'GET.NOTE',
+        0x00C0: 'NOTE',
+        0x00C1: 'STDEVP',
+        0x00C2: 'VARP',
+        0x00C3: 'DSTDEVP',
+        0x00C4: 'DVARP',
+        0x00C5: 'TRUNC',
+        0x00C6: 'ISLOGICAL',
+        0x00C7: 'DCOUNTA',
+        0x00C8: 'DELETE.BAR',
+        0x00C9: 'UNREGISTER',
+        0x00CC: 'USDOLLAR',
+        0x00CD: 'FINDB',
+        0x00CE: 'SEARCHB',
+        0x00CF: 'REPLACEB',
+        0x00D0: 'LEFTB',
+        0x00D1: 'RIGHTB',
+        0x00D2: 'MIDB',
+        0x00D3: 'LENB',
+        0x00D4: 'ROUNDUP',
+        0x00D5: 'ROUNDDOWN',
+        0x00D6: 'ASC',
+        0x00D7: 'DBCS',
+        0x00D8: 'RANK',
+        0x00DB: 'ADDRESS',
+        0x00DC: 'DAYS360',
+        0x00DD: 'TODAY',
+        0x00DE: 'VDB',
+        0x00DF: 'ELSE',
+        0x00E0: 'ELSE.IF',
+        0x00E1: 'END.IF',
+        0x00E2: 'FOR.CELL',
+        0x00E3: 'MEDIAN',
+        0x00E4: 'SUMPRODUCT',
+        0x00E5: 'SINH',
+        0x00E6: 'COSH',
+        0x00E7: 'TANH',
+        0x00E8: 'ASINH',
+        0x00E9: 'ACOSH',
+        0x00EA: 'ATANH',
+        0x00EB: 'DGET',
+        0x00EC: 'CREATE.OBJECT',
+        0x00ED: 'VOLATILE',
+        0x00EE: 'LAST.ERROR',
+        0x00EF: 'CUSTOM.UNDO',
+        0x00F0: 'CUSTOM.REPEAT',
+        0x00F1: 'FORMULA.CONVERT',
+        0x00F2: 'GET.LINK.INFO',
+        0x00F3: 'TEXT.BOX',
+        0x00F4: 'INFO',
+        0x00F5: 'GROUP',
+        0x00F6: 'GET.OBJECT',
+        0x00F7: 'DB',
+        0x00F8: 'PAUSE',
+        0x00FB: 'RESUME',
+        0x00FC: 'FREQUENCY',
+        0x00FD: 'ADD.TOOLBAR',
+        0x00FE: 'DELETE.TOOLBAR',
+        0x00FF: 'User Defined Function',
+        0x0100: 'RESET.TOOLBAR',
+        0x0101: 'EVALUATE',
+        0x0102: 'GET.TOOLBAR',
+        0x0103: 'GET.TOOL',
+        0x0104: 'SPELLING.CHECK',
+        0x0105: 'ERROR.TYPE',
+        0x0106: 'APP.TITLE',
+        0x0107: 'WINDOW.TITLE',
+        0x0108: 'SAVE.TOOLBAR',
+        0x0109: 'ENABLE.TOOL',
+        0x010A: 'PRESS.TOOL',
+        0x010B: 'REGISTER.ID',
+        0x010C: 'GET.WORKBOOK',
+        0x010D: 'AVEDEV',
+        0x010E: 'BETADIST',
+        0x010F: 'GAMMALN',
+        0x0110: 'BETAINV',
+        0x0111: 'BINOMDIST',
+        0x0112: 'CHIDIST',
+        0x0113: 'CHIINV',
+        0x0114: 'COMBIN',
+        0x0115: 'CONFIDENCE',
+        0x0116: 'CRITBINOM',
+        0x0117: 'EVEN',
+        0x0118: 'EXPONDIST',
+        0x0119: 'FDIST',
+        0x011A: 'FINV',
+        0x011B: 'FISHER',
+        0x011C: 'FISHERINV',
+        0x011D: 'FLOOR',
+        0x011E: 'GAMMADIST',
+        0x011F: 'GAMMAINV',
+        0x0120: 'CEILING',
+        0x0121: 'HYPGEOMDIST',
+        0x0122: 'LOGNORMDIST',
+        0x0123: 'LOGINV',
+        0x0124: 'NEGBINOMDIST',
+        0x0125: 'NORMDIST',
+        0x0126: 'NORMSDIST',
+        0x0127: 'NORMINV',
+        0x0128: 'NORMSINV',
+        0x0129: 'STANDARDIZE',
+        0x012A: 'ODD',
+        0x012B: 'PERMUT',
+        0x012C: 'POISSON',
+        0x012D: 'TDIST',
+        0x012E: 'WEIBULL',
+        0x012F: 'SUMXMY2',
+        0x0130: 'SUMX2MY2',
+        0x0131: 'SUMX2PY2',
+        0x0132: 'CHITEST',
+        0x0133: 'CORREL',
+        0x0134: 'COVAR',
+        0x0135: 'FORECAST',
+        0x0136: 'FTEST',
+        0x0137: 'INTERCEPT',
+        0x0138: 'PEARSON',
+        0x0139: 'RSQ',
+        0x013A: 'STEYX',
+        0x013B: 'SLOPE',
+        0x013C: 'TTEST',
+        0x013D: 'PROB',
+        0x013E: 'DEVSQ',
+        0x013F: 'GEOMEAN',
+        0x0140: 'HARMEAN',
+        0x0141: 'SUMSQ',
+        0x0142: 'KURT',
+        0x0143: 'SKEW',
+        0x0144: 'ZTEST',
+        0x0145: 'LARGE',
+        0x0146: 'SMALL',
+        0x0147: 'QUARTILE',
+        0x0148: 'PERCENTILE',
+        0x0149: 'PERCENTRANK',
+        0x014A: 'MODE',
+        0x014B: 'TRIMMEAN',
+        0x014C: 'TINV',
+        0x014E: 'MOVIE.COMMAND',
+        0x014F: 'GET.MOVIE',
+        0x0150: 'CONCATENATE',
+        0x0151: 'POWER',
+        0x0152: 'PIVOT.ADD.DATA',
+        0x0153: 'GET.PIVOT.TABLE',
+        0x0154: 'GET.PIVOT.FIELD',
+        0x0155: 'GET.PIVOT.ITEM',
+        0x0156: 'RADIANS',
+        0x0157: 'DEGREES',
+        0x0158: 'SUBTOTAL',
+        0x0159: 'SUMIF',
+        0x015A: 'COUNTIF',
+        0x015B: 'COUNTBLANK',
+        0x015C: 'SCENARIO.GET',
+        0x015D: 'OPTIONS.LISTS.GET',
+        0x015E: 'ISPMT',
+        0x015F: 'DATEDIF',
+        0x0160: 'DATESTRING',
+        0x0161: 'NUMBERSTRING',
+        0x0162: 'ROMAN',
+        0x0163: 'OPEN.DIALOG',
+        0x0164: 'SAVE.DIALOG',
+        0x0165: 'VIEW.GET',
+        0x0166: 'GETPIVOTDATA',
+        0x0167: 'HYPERLINK',
+        0x0168: 'PHONETIC',
+        0x0169: 'AVERAGEA',
+        0x016A: 'MAXA',
+        0x016B: 'MINA',
+        0x016C: 'STDEVPA',
+        0x016D: 'VARPA',
+        0x016E: 'STDEVA',
+        0x016F: 'VARA',
+        0x0170: 'BAHTTEXT',
+        0x0171: 'THAIDAYOFWEEK',
+        0x0172: 'THAIDIGIT',
+        0x0173: 'THAIMONTHOFYEAR',
+        0x0174: 'THAINUMSOUND',
+        0x0175: 'THAINUMSTRING',
+        0x0176: 'THAISTRINGLENGTH',
+        0x0177: 'ISTHAIDIGIT',
+        0x0178: 'ROUNDBAHTDOWN',
+        0x0179: 'ROUNDBAHTUP',
+        0x017A: 'THAIYEAR',
+        0x017B: 'RTD'
+    }
+
+    def parseBytes (self):
+        self.dataType = (self.opcode1 & 0x60)/32  # 0x1 = reference, 0x2 = value, 0x3 = array
+        self.argCount = self.strm.readUnsignedInt(1)
+        tab = self.strm.readUnsignedInt(2)
+        self.funcType = (tab & 0x7FFF)
+        self.isCeTab = (tab & 0x8000) != 0
+
+    def getText (self):
+        if self.isCeTab:
+            # I'll support this later.
+            return ''
+
+        if not _FuncVar.funcTab.has_key(self.funcType):
+            # unknown function name
+            return '#NAME!'
+
+        if self.argCount > 0:
+            # I'll support functions with arguments later.
+            return ''
+
+        return _FuncVar.funcTab[self.funcType] + "()"
+
 _tokenMap = {
     0x3B: _Area3d,
     0x5B: _Area3d,
-    0x7B: _Area3d
+    0x7B: _Area3d,
+
+    0x42: _FuncVar
 }
 
 class FormulaParser2(object):
@@ -391,20 +793,16 @@ Once replaced, I'll change the name to FormulaParser and the names of the
 associated token classes will be without the leading underscore (_)."""
 
 
-    def __init__ (self, header, bytes, sizeField=True):
+    def __init__ (self, header, bytes):
         self.header = header
         self.tokens = []
-        if sizeField:
-            # first 2-bytes contain the length of the formula bytes.
-            length = globals.getUnsignedInt(bytes[:2])
-            self.strm = globals.ByteStream(bytes[2:2+length])
-        else:
-            self.strm = globals.ByteStream(bytes)
+        self.strm = globals.ByteStream(bytes)
 
     def parse (self):
         while not self.strm.isEndOfRecord():
             b = self.strm.readUnsignedInt(1)
             if not _tokenMap.has_key(b):
+                # Unknown token.  Stop parsing.
                 return
 
             token = _tokenMap[b](self.strm, b)
diff --git a/scratch/mso-dumper/src/xlsmodel.py b/scratch/mso-dumper/src/xlsmodel.py
index b1c3a33..df13542 100644
--- a/scratch/mso-dumper/src/xlsmodel.py
+++ b/scratch/mso-dumper/src/xlsmodel.py
@@ -229,7 +229,7 @@ class Worksheet(SheetBase):
 
         wbg = wb.getWorkbookGlobal()
         tokens = wbg.getFilterRange(self.__sheetID)
-        parser = formula.FormulaParser2(None, tokens, False)
+        parser = formula.FormulaParser2(None, tokens)
         parser.parse()
         tokens = parser.getTokens()
         if len(tokens) != 1 or tokens[0].tokenType != formula.TokenType.Area3d:
@@ -298,6 +298,9 @@ class FormulaCell(CellBase):
     def createDOM (self, wb):
         nd = node.Element('formula-cell')
         if self.tokens != None:
+            parser = formula.FormulaParser2(None, self.tokens)
+            parser.parse()
+            nd.setAttr('formula', parser.getText())
             s = globals.getRawBytes(self.tokens, True, False)
             nd.setAttr('token-bytes', s)
         return nd
diff --git a/scratch/mso-dumper/src/xlsrecord.py b/scratch/mso-dumper/src/xlsrecord.py
index aa9b1d3..76799b0 100644
--- a/scratch/mso-dumper/src/xlsrecord.py
+++ b/scratch/mso-dumper/src/xlsrecord.py
@@ -515,7 +515,8 @@ class Formula(BaseRecordHandler):
         self.calcOnOpen     = (flags & 0x0002) != 0
         self.sharedFormula  = (flags & 0x0008) != 0
         self.appCacheInfo = self.readUnsignedInt(4) # used only for app-specific optimization.  Ignore it for now.
-        self.tokens = self.readRemainingBytes()
+        tokenSize = self.readUnsignedInt(2)
+        self.tokens = self.readBytes(tokenSize)
 
     def parseBytes (self):
         self.__parseBytes()
@@ -1069,7 +1070,7 @@ class Name(BaseRecordHandler):
         self.__parseBytes()
 
         tokenText = globals.getRawBytes(self.tokenBytes, True, False)
-        o = formula.FormulaParser2(self.header, self.tokenBytes, False)
+        o = formula.FormulaParser2(self.header, self.tokenBytes)
         o.parse()
         formulaText = o.getText()
         self.appendLine("name: %s"%globals.encodeName(self.name))


More information about the ooo-build-commit mailing list