[ooo-build-commit] scratch/mso-dumper
Kohei Yoshida
kohei at kemper.freedesktop.org
Mon Jan 4 20:35:58 PST 2010
scratch/mso-dumper/src/formula.py | 414 +++++++++++++++++++++++++++++++++++-
scratch/mso-dumper/src/xlsmodel.py | 5
scratch/mso-dumper/src/xlsrecord.py | 5
3 files changed, 413 insertions(+), 11 deletions(-)
New commits:
commit b0fbb0d49f79f31269718ea179bf2e90e83d1347
Author: Kohei Yoshida <kyoshida at novell.com>
Date: Mon Jan 4 23:33:46 2010 -0500
[xls-dump] Parse function tokens to display worksheet function names.
For now I only support functions with no arguments. I'll support
functions with arguments when I learn how to parse them.
* scratch/mso-dumper/src/formula.py:
* scratch/mso-dumper/src/xlsmodel.py:
* scratch/mso-dumper/src/xlsrecord.py:
diff --git a/scratch/mso-dumper/src/formula.py b/scratch/mso-dumper/src/formula.py
index 7f422d3..0664332 100644
--- a/scratch/mso-dumper/src/formula.py
+++ b/scratch/mso-dumper/src/formula.py
@@ -378,10 +378,412 @@ class _Area3d(_TokenBase):
def getText (self):
return "(xti=%d,"%self.xti + self.cellRange.getName() + ")"
+class _FuncVar(_TokenBase):
+
+ funcTab = {
+ 0x0000: 'COUNT',
+ 0x0001: 'IF',
+ 0x0002: 'ISNA',
+ 0x0003: 'ISERROR',
+ 0x0004: 'SUM',
+ 0x0005: 'AVERAGE',
+ 0x0006: 'MIN',
+ 0x0007: 'MAX',
+ 0x0008: 'ROW',
+ 0x0009: 'COLUMN',
+ 0x000A: 'NA',
+ 0x000B: 'NPV',
+ 0x000C: 'STDEV',
+ 0x000D: 'DOLLAR',
+ 0x000E: 'FIXED',
+ 0x000F: 'SIN',
+ 0x0010: 'COS',
+ 0x0011: 'TAN',
+ 0x0012: 'ATAN',
+ 0x0013: 'PI',
+ 0x0014: 'SQRT',
+ 0x0015: 'EXP',
+ 0x0016: 'LN',
+ 0x0017: 'LOG10',
+ 0x0018: 'ABS',
+ 0x0019: 'INT',
+ 0x001A: 'SIGN',
+ 0x001B: 'ROUND',
+ 0x001C: 'LOOKUP',
+ 0x001D: 'INDEX',
+ 0x001E: 'REPT',
+ 0x001F: 'MID',
+ 0x0020: 'LEN',
+ 0x0021: 'VALUE',
+ 0x0022: 'TRUE',
+ 0x0023: 'FALSE',
+ 0x0024: 'AND',
+ 0x0025: 'OR',
+ 0x0026: 'NOT',
+ 0x0027: 'MOD',
+ 0x0028: 'DCOUNT',
+ 0x0029: 'DSUM',
+ 0x002A: 'DAVERAGE',
+ 0x002B: 'DMIN',
+ 0x002C: 'DMAX',
+ 0x002D: 'DSTDEV',
+ 0x002E: 'VAR',
+ 0x002F: 'DVAR',
+ 0x0030: 'TEXT',
+ 0x0031: 'LINEST',
+ 0x0032: 'TREND',
+ 0x0033: 'LOGEST',
+ 0x0034: 'GROWTH',
+ 0x0035: 'GOTO',
+ 0x0036: 'HALT',
+ 0x0037: 'RETURN',
+ 0x0038: 'PV',
+ 0x0039: 'FV',
+ 0x003A: 'NPER',
+ 0x003B: 'PMT',
+ 0x003C: 'RATE',
+ 0x003D: 'MIRR',
+ 0x003E: 'IRR',
+ 0x003F: 'RAND',
+ 0x0040: 'MATCH',
+ 0x0041: 'DATE',
+ 0x0042: 'TIME',
+ 0x0043: 'DAY',
+ 0x0044: 'MONTH',
+ 0x0045: 'YEAR',
+ 0x0046: 'WEEKDAY',
+ 0x0047: 'HOUR',
+ 0x0048: 'MINUTE',
+ 0x0049: 'SECOND',
+ 0x004A: 'NOW',
+ 0x004B: 'AREAS',
+ 0x004C: 'ROWS',
+ 0x004D: 'COLUMNS',
+ 0x004E: 'OFFSET',
+ 0x004F: 'ABSREF',
+ 0x0050: 'RELREF',
+ 0x0051: 'ARGUMENT',
+ 0x0052: 'SEARCH',
+ 0x0053: 'TRANSPOSE',
+ 0x0054: 'ERROR',
+ 0x0055: 'STEP',
+ 0x0056: 'TYPE',
+ 0x0057: 'ECHO',
+ 0x0058: 'SET.NAME',
+ 0x0059: 'CALLER',
+ 0x005A: 'DEREF',
+ 0x005B: 'WINDOWS',
+ 0x005C: 'SERIES',
+ 0x005D: 'DOCUMENTS',
+ 0x005E: 'ACTIVE.CELL',
+ 0x005F: 'SELECTION',
+ 0x0060: 'RESULT',
+ 0x0061: 'ATAN2',
+ 0x0062: 'ASIN',
+ 0x0063: 'ACOS',
+ 0x0064: 'CHOOSE',
+ 0x0065: 'HLOOKUP',
+ 0x0066: 'VLOOKUP',
+ 0x0067: 'LINKS',
+ 0x0068: 'INPUT',
+ 0x0069: 'ISREF',
+ 0x006A: 'GET.FORMULA',
+ 0x006B: 'GET.NAME',
+ 0x006C: 'SET.VALUE',
+ 0x006D: 'LOG',
+ 0x006E: 'EXEC',
+ 0x006F: 'CHAR',
+ 0x0070: 'LOWER',
+ 0x0071: 'UPPER',
+ 0x0072: 'PROPER',
+ 0x0073: 'LEFT',
+ 0x0074: 'RIGHT',
+ 0x0075: 'EXACT',
+ 0x0076: 'TRIM',
+ 0x0077: 'REPLACE',
+ 0x0078: 'SUBSTITUTE',
+ 0x0079: 'CODE',
+ 0x007A: 'NAMES',
+ 0x007B: 'DIRECTORY',
+ 0x007C: 'FIND',
+ 0x007D: 'CELL',
+ 0x007E: 'ISERR',
+ 0x007F: 'ISTEXT',
+ 0x0080: 'ISNUMBER',
+ 0x0081: 'ISBLANK',
+ 0x0082: 'T',
+ 0x0083: 'N',
+ 0x0084: 'FOPEN',
+ 0x0085: 'FCLOSE',
+ 0x0086: 'FSIZE',
+ 0x0087: 'FREADLN',
+ 0x0088: 'FREAD',
+ 0x0089: 'FWRITELN',
+ 0x008A: 'FWRITE',
+ 0x008B: 'FPOS',
+ 0x008C: 'DATEVALUE',
+ 0x008D: 'TIMEVALUE',
+ 0x008E: 'SLN',
+ 0x008F: 'SYD',
+ 0x0090: 'DDB',
+ 0x0091: 'GET.DEF',
+ 0x0092: 'REFTEXT',
+ 0x0093: 'TEXTREF',
+ 0x0094: 'INDIRECT',
+ 0x0095: 'REGISTER',
+ 0x0096: 'CALL',
+ 0x0097: 'ADD.BAR',
+ 0x0098: 'ADD.MENU',
+ 0x0099: 'ADD.COMMAND',
+ 0x009A: 'ENABLE.COMMAND',
+ 0x009B: 'CHECK.COMMAND',
+ 0x009C: 'RENAME.COMMAND',
+ 0x009D: 'SHOW.BAR',
+ 0x009E: 'DELETE.MENU',
+ 0x009F: 'DELETE.COMMAND',
+ 0x00A0: 'GET.CHART.ITEM',
+ 0x00A1: 'DIALOG.BOX',
+ 0x00A2: 'CLEAN',
+ 0x00A3: 'MDETERM',
+ 0x00A4: 'MINVERSE',
+ 0x00A5: 'MMULT',
+ 0x00A6: 'FILES',
+ 0x00A7: 'IPMT',
+ 0x00A8: 'PPMT',
+ 0x00A9: 'COUNTA',
+ 0x00AA: 'CANCEL.KEY',
+ 0x00AB: 'FOR',
+ 0x00AC: 'WHILE',
+ 0x00AD: 'BREAK',
+ 0x00AE: 'NEXT',
+ 0x00AF: 'INITIATE',
+ 0x00B0: 'REQUEST',
+ 0x00B1: 'POKE',
+ 0x00B2: 'EXECUTE',
+ 0x00B3: 'TERMINATE',
+ 0x00B4: 'RESTART',
+ 0x00B5: 'HELP',
+ 0x00B6: 'GET.BAR',
+ 0x00B7: 'PRODUCT',
+ 0x00B8: 'FACT',
+ 0x00B9: 'GET.CELL',
+ 0x00BA: 'GET.WORKSPACE',
+ 0x00BB: 'GET.WINDOW',
+ 0x00BC: 'GET.DOCUMENT',
+ 0x00BD: 'DPRODUCT',
+ 0x00BE: 'ISNONTEXT',
+ 0x00BF: 'GET.NOTE',
+ 0x00C0: 'NOTE',
+ 0x00C1: 'STDEVP',
+ 0x00C2: 'VARP',
+ 0x00C3: 'DSTDEVP',
+ 0x00C4: 'DVARP',
+ 0x00C5: 'TRUNC',
+ 0x00C6: 'ISLOGICAL',
+ 0x00C7: 'DCOUNTA',
+ 0x00C8: 'DELETE.BAR',
+ 0x00C9: 'UNREGISTER',
+ 0x00CC: 'USDOLLAR',
+ 0x00CD: 'FINDB',
+ 0x00CE: 'SEARCHB',
+ 0x00CF: 'REPLACEB',
+ 0x00D0: 'LEFTB',
+ 0x00D1: 'RIGHTB',
+ 0x00D2: 'MIDB',
+ 0x00D3: 'LENB',
+ 0x00D4: 'ROUNDUP',
+ 0x00D5: 'ROUNDDOWN',
+ 0x00D6: 'ASC',
+ 0x00D7: 'DBCS',
+ 0x00D8: 'RANK',
+ 0x00DB: 'ADDRESS',
+ 0x00DC: 'DAYS360',
+ 0x00DD: 'TODAY',
+ 0x00DE: 'VDB',
+ 0x00DF: 'ELSE',
+ 0x00E0: 'ELSE.IF',
+ 0x00E1: 'END.IF',
+ 0x00E2: 'FOR.CELL',
+ 0x00E3: 'MEDIAN',
+ 0x00E4: 'SUMPRODUCT',
+ 0x00E5: 'SINH',
+ 0x00E6: 'COSH',
+ 0x00E7: 'TANH',
+ 0x00E8: 'ASINH',
+ 0x00E9: 'ACOSH',
+ 0x00EA: 'ATANH',
+ 0x00EB: 'DGET',
+ 0x00EC: 'CREATE.OBJECT',
+ 0x00ED: 'VOLATILE',
+ 0x00EE: 'LAST.ERROR',
+ 0x00EF: 'CUSTOM.UNDO',
+ 0x00F0: 'CUSTOM.REPEAT',
+ 0x00F1: 'FORMULA.CONVERT',
+ 0x00F2: 'GET.LINK.INFO',
+ 0x00F3: 'TEXT.BOX',
+ 0x00F4: 'INFO',
+ 0x00F5: 'GROUP',
+ 0x00F6: 'GET.OBJECT',
+ 0x00F7: 'DB',
+ 0x00F8: 'PAUSE',
+ 0x00FB: 'RESUME',
+ 0x00FC: 'FREQUENCY',
+ 0x00FD: 'ADD.TOOLBAR',
+ 0x00FE: 'DELETE.TOOLBAR',
+ 0x00FF: 'User Defined Function',
+ 0x0100: 'RESET.TOOLBAR',
+ 0x0101: 'EVALUATE',
+ 0x0102: 'GET.TOOLBAR',
+ 0x0103: 'GET.TOOL',
+ 0x0104: 'SPELLING.CHECK',
+ 0x0105: 'ERROR.TYPE',
+ 0x0106: 'APP.TITLE',
+ 0x0107: 'WINDOW.TITLE',
+ 0x0108: 'SAVE.TOOLBAR',
+ 0x0109: 'ENABLE.TOOL',
+ 0x010A: 'PRESS.TOOL',
+ 0x010B: 'REGISTER.ID',
+ 0x010C: 'GET.WORKBOOK',
+ 0x010D: 'AVEDEV',
+ 0x010E: 'BETADIST',
+ 0x010F: 'GAMMALN',
+ 0x0110: 'BETAINV',
+ 0x0111: 'BINOMDIST',
+ 0x0112: 'CHIDIST',
+ 0x0113: 'CHIINV',
+ 0x0114: 'COMBIN',
+ 0x0115: 'CONFIDENCE',
+ 0x0116: 'CRITBINOM',
+ 0x0117: 'EVEN',
+ 0x0118: 'EXPONDIST',
+ 0x0119: 'FDIST',
+ 0x011A: 'FINV',
+ 0x011B: 'FISHER',
+ 0x011C: 'FISHERINV',
+ 0x011D: 'FLOOR',
+ 0x011E: 'GAMMADIST',
+ 0x011F: 'GAMMAINV',
+ 0x0120: 'CEILING',
+ 0x0121: 'HYPGEOMDIST',
+ 0x0122: 'LOGNORMDIST',
+ 0x0123: 'LOGINV',
+ 0x0124: 'NEGBINOMDIST',
+ 0x0125: 'NORMDIST',
+ 0x0126: 'NORMSDIST',
+ 0x0127: 'NORMINV',
+ 0x0128: 'NORMSINV',
+ 0x0129: 'STANDARDIZE',
+ 0x012A: 'ODD',
+ 0x012B: 'PERMUT',
+ 0x012C: 'POISSON',
+ 0x012D: 'TDIST',
+ 0x012E: 'WEIBULL',
+ 0x012F: 'SUMXMY2',
+ 0x0130: 'SUMX2MY2',
+ 0x0131: 'SUMX2PY2',
+ 0x0132: 'CHITEST',
+ 0x0133: 'CORREL',
+ 0x0134: 'COVAR',
+ 0x0135: 'FORECAST',
+ 0x0136: 'FTEST',
+ 0x0137: 'INTERCEPT',
+ 0x0138: 'PEARSON',
+ 0x0139: 'RSQ',
+ 0x013A: 'STEYX',
+ 0x013B: 'SLOPE',
+ 0x013C: 'TTEST',
+ 0x013D: 'PROB',
+ 0x013E: 'DEVSQ',
+ 0x013F: 'GEOMEAN',
+ 0x0140: 'HARMEAN',
+ 0x0141: 'SUMSQ',
+ 0x0142: 'KURT',
+ 0x0143: 'SKEW',
+ 0x0144: 'ZTEST',
+ 0x0145: 'LARGE',
+ 0x0146: 'SMALL',
+ 0x0147: 'QUARTILE',
+ 0x0148: 'PERCENTILE',
+ 0x0149: 'PERCENTRANK',
+ 0x014A: 'MODE',
+ 0x014B: 'TRIMMEAN',
+ 0x014C: 'TINV',
+ 0x014E: 'MOVIE.COMMAND',
+ 0x014F: 'GET.MOVIE',
+ 0x0150: 'CONCATENATE',
+ 0x0151: 'POWER',
+ 0x0152: 'PIVOT.ADD.DATA',
+ 0x0153: 'GET.PIVOT.TABLE',
+ 0x0154: 'GET.PIVOT.FIELD',
+ 0x0155: 'GET.PIVOT.ITEM',
+ 0x0156: 'RADIANS',
+ 0x0157: 'DEGREES',
+ 0x0158: 'SUBTOTAL',
+ 0x0159: 'SUMIF',
+ 0x015A: 'COUNTIF',
+ 0x015B: 'COUNTBLANK',
+ 0x015C: 'SCENARIO.GET',
+ 0x015D: 'OPTIONS.LISTS.GET',
+ 0x015E: 'ISPMT',
+ 0x015F: 'DATEDIF',
+ 0x0160: 'DATESTRING',
+ 0x0161: 'NUMBERSTRING',
+ 0x0162: 'ROMAN',
+ 0x0163: 'OPEN.DIALOG',
+ 0x0164: 'SAVE.DIALOG',
+ 0x0165: 'VIEW.GET',
+ 0x0166: 'GETPIVOTDATA',
+ 0x0167: 'HYPERLINK',
+ 0x0168: 'PHONETIC',
+ 0x0169: 'AVERAGEA',
+ 0x016A: 'MAXA',
+ 0x016B: 'MINA',
+ 0x016C: 'STDEVPA',
+ 0x016D: 'VARPA',
+ 0x016E: 'STDEVA',
+ 0x016F: 'VARA',
+ 0x0170: 'BAHTTEXT',
+ 0x0171: 'THAIDAYOFWEEK',
+ 0x0172: 'THAIDIGIT',
+ 0x0173: 'THAIMONTHOFYEAR',
+ 0x0174: 'THAINUMSOUND',
+ 0x0175: 'THAINUMSTRING',
+ 0x0176: 'THAISTRINGLENGTH',
+ 0x0177: 'ISTHAIDIGIT',
+ 0x0178: 'ROUNDBAHTDOWN',
+ 0x0179: 'ROUNDBAHTUP',
+ 0x017A: 'THAIYEAR',
+ 0x017B: 'RTD'
+ }
+
+ def parseBytes (self):
+ self.dataType = (self.opcode1 & 0x60)/32 # 0x1 = reference, 0x2 = value, 0x3 = array
+ self.argCount = self.strm.readUnsignedInt(1)
+ tab = self.strm.readUnsignedInt(2)
+ self.funcType = (tab & 0x7FFF)
+ self.isCeTab = (tab & 0x8000) != 0
+
+ def getText (self):
+ if self.isCeTab:
+ # I'll support this later.
+ return ''
+
+ if not _FuncVar.funcTab.has_key(self.funcType):
+ # unknown function name
+ return '#NAME!'
+
+ if self.argCount > 0:
+ # I'll support functions with arguments later.
+ return ''
+
+ return _FuncVar.funcTab[self.funcType] + "()"
+
_tokenMap = {
0x3B: _Area3d,
0x5B: _Area3d,
- 0x7B: _Area3d
+ 0x7B: _Area3d,
+
+ 0x42: _FuncVar
}
class FormulaParser2(object):
@@ -391,20 +793,16 @@ Once replaced, I'll change the name to FormulaParser and the names of the
associated token classes will be without the leading underscore (_)."""
- def __init__ (self, header, bytes, sizeField=True):
+ def __init__ (self, header, bytes):
self.header = header
self.tokens = []
- if sizeField:
- # first 2-bytes contain the length of the formula bytes.
- length = globals.getUnsignedInt(bytes[:2])
- self.strm = globals.ByteStream(bytes[2:2+length])
- else:
- self.strm = globals.ByteStream(bytes)
+ self.strm = globals.ByteStream(bytes)
def parse (self):
while not self.strm.isEndOfRecord():
b = self.strm.readUnsignedInt(1)
if not _tokenMap.has_key(b):
+ # Unknown token. Stop parsing.
return
token = _tokenMap[b](self.strm, b)
diff --git a/scratch/mso-dumper/src/xlsmodel.py b/scratch/mso-dumper/src/xlsmodel.py
index b1c3a33..df13542 100644
--- a/scratch/mso-dumper/src/xlsmodel.py
+++ b/scratch/mso-dumper/src/xlsmodel.py
@@ -229,7 +229,7 @@ class Worksheet(SheetBase):
wbg = wb.getWorkbookGlobal()
tokens = wbg.getFilterRange(self.__sheetID)
- parser = formula.FormulaParser2(None, tokens, False)
+ parser = formula.FormulaParser2(None, tokens)
parser.parse()
tokens = parser.getTokens()
if len(tokens) != 1 or tokens[0].tokenType != formula.TokenType.Area3d:
@@ -298,6 +298,9 @@ class FormulaCell(CellBase):
def createDOM (self, wb):
nd = node.Element('formula-cell')
if self.tokens != None:
+ parser = formula.FormulaParser2(None, self.tokens)
+ parser.parse()
+ nd.setAttr('formula', parser.getText())
s = globals.getRawBytes(self.tokens, True, False)
nd.setAttr('token-bytes', s)
return nd
diff --git a/scratch/mso-dumper/src/xlsrecord.py b/scratch/mso-dumper/src/xlsrecord.py
index aa9b1d3..76799b0 100644
--- a/scratch/mso-dumper/src/xlsrecord.py
+++ b/scratch/mso-dumper/src/xlsrecord.py
@@ -515,7 +515,8 @@ class Formula(BaseRecordHandler):
self.calcOnOpen = (flags & 0x0002) != 0
self.sharedFormula = (flags & 0x0008) != 0
self.appCacheInfo = self.readUnsignedInt(4) # used only for app-specific optimization. Ignore it for now.
- self.tokens = self.readRemainingBytes()
+ tokenSize = self.readUnsignedInt(2)
+ self.tokens = self.readBytes(tokenSize)
def parseBytes (self):
self.__parseBytes()
@@ -1069,7 +1070,7 @@ class Name(BaseRecordHandler):
self.__parseBytes()
tokenText = globals.getRawBytes(self.tokenBytes, True, False)
- o = formula.FormulaParser2(self.header, self.tokenBytes, False)
+ o = formula.FormulaParser2(self.header, self.tokenBytes)
o.parse()
formulaText = o.getText()
self.appendLine("name: %s"%globals.encodeName(self.name))
More information about the ooo-build-commit
mailing list