[Libreoffice-commits] .: 7 commits - .gitignore misc/test-files.sh src/xlsmodel.py src/xlsparser.py src/xlsrecord.py src/xlsstream.py xls-dump.py
Kohei Yoshida
kohei at kemper.freedesktop.org
Fri Sep 16 09:16:23 PDT 2011
.gitignore | 1
misc/test-files.sh | 15
src/xlsmodel.py | 46 +-
src/xlsparser.py | 671 ++++++++++++++++++++++++++++++++++++++
src/xlsrecord.py | 927 +++++++++++++++++++++++++++++++++++++++++++----------
src/xlsstream.py | 64 +--
xls-dump.py | 39 +-
7 files changed, 1534 insertions(+), 229 deletions(-)
New commits:
commit 2465c4bb135fa9ef701bb81f0ceaf64baa233bd6
Author: Sergey Kishchenko <voidwrk at gmail.com>
Date: Fri Sep 16 15:38:30 2011 +0300
xls-dump.py was tested using libreoffice's set of test files; several small issues were fixed
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0d20b64
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*.pyc
diff --git a/misc/test-files.sh b/misc/test-files.sh
new file mode 100755
index 0000000..164952d
--- /dev/null
+++ b/misc/test-files.sh
@@ -0,0 +1,15 @@
+#! /bin/sh
+
+test_dir=$1
+
+if [ ! -d $test_dir ]
+then
+ echo "Usage: test-files.sh TEST_DIR"
+ exit 1
+fi
+
+for x in `find $test_dir -name \*.xls`; do
+ (python xls-dump.py $x | grep -v "rror inter" > /dev/null) || echo "Flat dump failed for" $x
+ python xls-dump.py --dump-mode=xml $x > /dev/null || echo "Xml dump failed for" $x
+ python xls-dump.py --dump-mode=cxml $x > /dev/null || echo "CXml dump failed for" $x
+done
diff --git a/src/xlsmodel.py b/src/xlsmodel.py
index 5c10cad..5094ee5 100644
--- a/src/xlsmodel.py
+++ b/src/xlsmodel.py
@@ -79,7 +79,7 @@ class Workbook(ModelBase):
sheets = filter(lambda x: isinstance(x, Worksheet), self.__sheets)
n = len(sheets)
if n == 0:
- return
+ return nd
wbglobal = self.getWorkbookGlobal()
nd.appendChild(wbglobal.createDOM(self))
diff --git a/src/xlsparser.py b/src/xlsparser.py
index b9d5204..d105f24 100644
--- a/src/xlsparser.py
+++ b/src/xlsparser.py
@@ -119,8 +119,11 @@ class Req(BaseParser):
def parse(self, stream):
parsed = safeParse(self.__parser, stream)
if parsed is None:
+ currentToken = "<<<End Of Token Stream>>>"
+ if stream.currentIndex < len(stream.tokens):
+ currentToken = stream.tokens[stream.currentIndex]
raise ParseException("%s failed but it is required, next token is [%s]" %
- (str(self.__parser), str(stream.tokens[stream.currentIndex])))
+ (str(self.__parser), str(currentToken)))
return parsed
def __str__(self):
@@ -334,8 +337,14 @@ class PlotGrowth(BaseParser):
class Series(BaseParser):
PARSER = Term(xlsrecord.Series)
+class SeriesText(BaseParser):
+ PARSER = Term(xlsrecord.SeriesText)
+
+class BRAI(BaseParser):
+ PARSER = Term(xlsrecord.Brai)
+
class AI(BaseParser):
- PARSER = Term(xlsrecord.Brai) # TODO: we use Brai instead of AI now, fix it
+ PARSER = Req(BRAI()) << SeriesText()
class SerParent(BaseParser): pass
class SerAuxTrend(BaseParser): pass
@@ -351,9 +360,13 @@ class DataFormat(BaseParser):
class Chart3DBarShape(BaseParser):
PARSER = Term(xlsrecord.Chart3DBarShape)
-class PieFormat(BaseParser): pass
-class SerFmt(BaseParser): pass
-class MarkerFormat(BaseParser): pass
+class PieFormat(BaseParser):
+ PARSER = Term(xlsrecord.PieFormat)
+
+class SerFmt(BaseParser): pass
+
+class MarkerFormat(BaseParser):
+ PARSER = Term(xlsrecord.MarkerFormat)
class Text(BaseParser):
PARSER = Term(xlsrecord.Text)
@@ -487,7 +500,9 @@ class BobPopCustom(BaseParser): pass
class Bar(BaseParser):
PARSER = Term(xlsrecord.CHBar)
-class Line(BaseParser): pass
+class Line(BaseParser):
+ PARSER = Term(xlsrecord.CHLine)
+
class Pie(BaseParser): pass
class Area(BaseParser): pass
class Scatter(BaseParser): pass
@@ -538,15 +553,15 @@ class CHARTFORMATS(BaseParser):
#*2DFTTEXT AxesUsed 1*2AXISPARENT [CrtLayout12A] [DAT] *ATTACHEDLABEL [CRTMLFRT]
#*([DataLabExt StartObject] ATTACHEDLABEL [EndObject]) [TEXTPROPS] *2CRTMLFRT End
PARSER = Group('chart-fmt', Req(Chart()) << Req(Begin()) << Many('font-lists', FONTLIST(), max=2) <<
- Req(Scl()) << Req(PlotGrowth()) << FRAME() << Many('series-fmt-list', SERIESFORMAT()) <<
+ Req(Scl()) << Req(PlotGrowth()) << Opt(FRAME()) << Many('series-fmt-list', SERIESFORMAT()) <<
Many('ss-list', SS()) << Req(ShtProps()) << Many('dft-texts', DFTTEXT(), max=2) <<
Req(AxesUsed()) << Many('axis-roots', AXISPARENT(), min=1, max=2) <<
- CrtLayout12A() << DAT() << Many('attached-labels', ATTACHEDLABEL()) <<
- CRTMLFRT() << Many('datalab-exts', Seq(Seq(Req(DataLabExt()),
- Req(StartObject())),
- Req(ATTACHEDLABEL()),
- EndObject())) <<
- TEXTPROPS() << Many('crtmlfrt-list', CRTMLFRT()) << Req(End()))
+ CrtLayout12A() << Opt(DAT()) << Many('attached-labels', ATTACHEDLABEL()) <<
+ Opt(CRTMLFRT()) << Many('datalab-exts', Seq(Opt(Seq(Req(DataLabExt()),
+ Req(StartObject()))),
+ Req(ATTACHEDLABEL()),
+ EndObject())) <<
+ Opt(TEXTPROPS()) << Many('crtmlfrt-list', CRTMLFRT()) << Req(End()))
class Dimensions(BaseParser):
PARSER = Term(xlsrecord.Dimensions)
@@ -560,7 +575,9 @@ class Number(BaseParser):
class BoolErr(BaseParser): pass
class Blank(BaseParser): pass
-class Label(BaseParser): pass
+
+class Label(BaseParser):
+ PARSER = Term(xlsrecord.Label)
class SERIESDATA(BaseParser):
#SERIESDATA = Dimensions 3(SIIndex *(Number / BoolErr / Blank / Label))
diff --git a/src/xlsrecord.py b/src/xlsrecord.py
index 1e5b0e6..21f0beb 100644
--- a/src/xlsrecord.py
+++ b/src/xlsrecord.py
@@ -1127,6 +1127,12 @@ class Label(BaseRecordHandler):
self.appendLine("XF record ID: %d"%self.xfIdx)
self.appendLine("label text: %s"%self.text)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('label', {'col': self.col,
+ 'row': self.row,
+ 'xf-idx': self.xfIdx,
+ 'text': self.text})
class LabelSST(BaseRecordHandler):
@@ -3531,6 +3537,48 @@ class AreaFormat(BaseRecordHandler):
('icv-fore', dumpIcv(self.icvFore)),
('icv-back', dumpIcv(self.icvBack))])
+class PieFormat(BaseRecordHandler):
+ def __parseBytes(self):
+ self.pcExplode = self.readSignedInt(2)
+
+ def parseBytes (self):
+ self.__parseBytes()
+ # TODO: dump all data
+
+ def dumpData(self):
+ self.__parseBytes()
+ return ('pie-format', {'pc-explode': self.pcExplode})
+
+class MarkerFormat(BaseRecordHandler):
+ def __parseBytes(self):
+ self.rgbFore = self.readLongRGB()
+ self.rgbBack = self.readLongRGB()
+ self.imk = self.readUnsignedInt(2)
+ flags = self.readUnsignedInt(2)
+ self.auto = (flags & 0x001) != 0 # A
+ # next 3 bits are reserved
+ self.notShowInt = (flags & 0x010) != 0 # C
+ self.notShowBrd = (flags & 0x020) != 0 # D
+ self.icvFore = self.readICV()
+ self.icvBack = self.readICV()
+ self.miSize = self.readUnsignedInt(4)
+
+ def parseBytes (self):
+ self.__parseBytes()
+ # TODO: dump all data
+
+ def dumpData(self):
+ self.__parseBytes()
+ return ('marker-format', {'imk': self.imk,
+ 'auto': self.auto,
+ 'not-show-int': self.notShowInt,
+ 'not-show-brd': self.notShowBrd,
+ 'mi-size': self.miSize},
+ [('rgb-fore', dumpRgb(self.rgbFore)),
+ ('rgb-back', dumpRgb(self.rgbBack)),
+ ('icv-fore', dumpIcv(self.icvFore)),
+ ('icv-back', dumpIcv(self.icvBack))])
+
class DataFormat(BaseRecordHandler):
def __parseBytes(self):
self.xi = self.readUnsignedInt(2)
@@ -4104,15 +4152,24 @@ class CHBar(BaseRecordHandler):
class CHLine(BaseRecordHandler):
- def parseBytes (self):
+ def __parseBytes (self):
flags = globals.getUnsignedInt(self.readBytes(2))
- stacked = (flags & 0x0001)
- percent = (flags & 0x0002)
- shadow = (flags & 0x0004)
+ self.stacked = (flags & 0x0001)
+ self.percent = (flags & 0x0002)
+ self.shadow = (flags & 0x0004)
- self.appendLine("stacked: %s"%self.getYesNo(stacked))
- self.appendLine("percent: %s"%self.getYesNo(percent))
- self.appendLine("shadow: %s"%self.getYesNo(shadow))
+ def parseBytes (self):
+ self.__parseBytes()
+ self.appendLine("stacked: %s"%self.getYesNo(self.stacked))
+ self.appendLine("percent: %s"%self.getYesNo(self.percent))
+ self.appendLine("shadow: %s"%self.getYesNo(self.shadow))
+
+ def dumpData(self):
+ self.__parseBytes()
+ return ('line', {'stacked': self.stacked,
+ 'percent': self.percent,
+ 'shadow': self.shadow})
+
class Brai(BaseRecordHandler):
diff --git a/src/xlsstream.py b/src/xlsstream.py
index 5c569ad..ceb6eeb 100644
--- a/src/xlsstream.py
+++ b/src/xlsstream.py
@@ -243,9 +243,9 @@ recData = {
0x1003: ["SERIES", "Data Properties for Series, Trendlines or Error Bars", xlsrecord.Series],
0x1006: ["CHDATAFORMAT", "Data point or series that the formatting information that follows applies to (2.4.74)", xlsrecord.DataFormat],
0x1007: ["LINEFORMAT", "Appearance of A Line", xlsrecord.LineFormat],
- 0x1009: ["CHMARKERFORMAT", "?"],
+ 0x1009: ["CHMARKERFORMAT", "Color, size, and shape of the markers", xlsrecord.MarkerFormat],
0x100A: ["AREAFORMAT", "Patterns and Colors in Filled Region of Chart", xlsrecord.AreaFormat],
- 0x100B: ["CHPIEFORMAT", "?"],
+ 0x100B: ["CHPIEFORMAT", "Distance of a data point from the center", xlsrecord.PieFormat],
0x100C: ["CHATTACHEDLABEL", "?"],
0x100D: ["SERIESTEXT", "Series Category Name or Title Text in Chart", xlsrecord.SeriesText],
0x1014: ["CHTYPEGROUP", "Properties of a chart group", xlsrecord.ChartFormat],
commit d5cb3bdc85ad3e46938a357d3903980309ada171
Merge: 5545218... d9d34dd...
Author: Sergey Kishchenko <voidwrk at gmail.com>
Date: Thu Sep 15 18:41:49 2011 +0300
chart_xml_dump_refactor was merged to master branch
commit d9d34dd0bf34d9f1b107f8c4def1df5f3ec7ae62
Author: Sergey Kishchenko <voidwrk at gmail.com>
Date: Thu Sep 15 18:33:38 2011 +0300
Small fixes for flat & cxml dump
diff --git a/src/xlsparser.py b/src/xlsparser.py
index 638f6a2..b9d5204 100644
--- a/src/xlsparser.py
+++ b/src/xlsparser.py
@@ -524,7 +524,7 @@ class CRT(BaseParser):
class AXISPARENT(BaseParser):
#AXISPARENT = AxisParent Begin Pos [AXES] 1*4CRT End
- PARSER = Group('axis-parent', Req(AxisParent()) << Req(Begin()) << Req(Pos()) <<
+ PARSER = Group('axis-root', Req(AxisParent()) << Req(Begin()) << Req(Pos()) <<
Opt(AXES()) << Many('crt-list', CRT(), min=1, max=4) <<
Req(End()))
@@ -540,7 +540,7 @@ class CHARTFORMATS(BaseParser):
PARSER = Group('chart-fmt', Req(Chart()) << Req(Begin()) << Many('font-lists', FONTLIST(), max=2) <<
Req(Scl()) << Req(PlotGrowth()) << FRAME() << Many('series-fmt-list', SERIESFORMAT()) <<
Many('ss-list', SS()) << Req(ShtProps()) << Many('dft-texts', DFTTEXT(), max=2) <<
- Req(AxesUsed()) << Many('axis-parents', AXISPARENT(), min=1, max=2) <<
+ Req(AxesUsed()) << Many('axis-roots', AXISPARENT(), min=1, max=2) <<
CrtLayout12A() << DAT() << Many('attached-labels', ATTACHEDLABEL()) <<
CRTMLFRT() << Many('datalab-exts', Seq(Seq(Req(DataLabExt()),
Req(StartObject())),
diff --git a/src/xlsrecord.py b/src/xlsrecord.py
index 3f53fd4..1516d47 100644
--- a/src/xlsrecord.py
+++ b/src/xlsrecord.py
@@ -3466,7 +3466,7 @@ class Frame(BaseRecordHandler):
def parseBytes (self):
self.__parseBytes()
- self.appendLine("frame type: %s" % ChartFrame.__frt_table[self.frt])
+ self.appendLine("frame type: %s" % Frame.__frt_table[self.frt])
self.appendLine("autoSize: %s" % self.autoSize)
self.appendLine("autoPosition: %s" % self.autoPosition)
@@ -3937,6 +3937,7 @@ class CHLabelRange(BaseRecordHandler):
self.reversed = (flags & 0x0004) != 0
def parseBytes (self):
+ self.__parseBytes()
self.appendLine("axis crossing: %d"%self.axisCross)
self.appendLine("label frequency: %d"%self.freqLabel)
self.appendLine("tick frequency: %d"%self.freqTick)
@@ -4040,7 +4041,7 @@ class CHValueRange(BaseRecordHandler):
self.bit8 = (flags & 0x0100) != 0
def parseBytes (self):
-
+ self.__parseBytes()
self.appendLine("min: %g (auto min: %s)"%(self.minVal, self.getYesNo(self.autoMin)))
self.appendLine("max: %g (auto max: %s)"%(self.maxVal, self.getYesNo(self.autoMax)))
self.appendLine("major step: %g (auto major: %s)"%
@@ -4088,11 +4089,6 @@ class CHBar(BaseRecordHandler):
self.appendLine("stacked: %s"%self.getYesNo(self.stacked))
self.appendLine("percent: %s"%self.getYesNo(self.percent))
self.appendLine("shadow: %s"%self.getYesNo(self.shadow))
-
- def fillModel(self, model):
- self.__parseBytes()
- sh = model.getCurrentSheet()
- sh.setBar(self.overlap, self.gap, self.horizontal, self.stacked, self.percent, self.shadow)
def dumpData(self):
self.__parseBytes()
diff --git a/src/xlsstream.py b/src/xlsstream.py
index 9b82608..5c569ad 100644
--- a/src/xlsstream.py
+++ b/src/xlsstream.py
@@ -476,7 +476,8 @@ class XLDirStream(object):
self.strmData.encrypted = True
def fillModel (self, model):
- handler = self.getNextRecordHandler()
+ pos, header, size, bytes = self.__readRecordBytes()
+ handler = self.__getRecordHandler(header, size, bytes)
if handler != None:
handler.fillModel(model)
self.__postReadRecord(header)
commit 883e0d274f7526f214d60ba06bbd5bc57bfb4816
Author: Sergey Kishchenko <voidwrk at gmail.com>
Date: Thu Sep 15 15:25:27 2011 +0300
xls-dump can now produce real xml dumps
diff --git a/src/xlsrecord.py b/src/xlsrecord.py
index c564868..3f53fd4 100644
--- a/src/xlsrecord.py
+++ b/src/xlsrecord.py
@@ -110,6 +110,9 @@ class ICV(object):
def toString (self):
return "color=0x%2.2X"%self.value
+def dumpIcv(icv):
+ return {'value': icv.value}
+
class BaseRecordHandler(globals.ByteStream):
def __init__ (self, header, size, bytes, strmData):
@@ -3495,9 +3498,9 @@ class LineFormat(BaseRecordHandler):
'we': self.we,
'auto': self.auto,
'axis-on': self.axisOn,
- 'auto-co': self.autoCo,
- 'icv': self.icv},
- [('rgb', dumpRgb(self.rgb))])
+ 'auto-co': self.autoCo},
+ [('rgb', dumpRgb(self.rgb)),
+ ('icv', dumpIcv(self.icv))])
class AreaFormat(BaseRecordHandler):
def __parseBytes(self):
@@ -3518,11 +3521,11 @@ class AreaFormat(BaseRecordHandler):
self.__parseBytes()
return ('area-format', {'fls': self.fls,
'auto': self.auto,
- 'invert-neg': self.invertNeg,
- 'icv-fore': self.icvFore,
- 'icv-back': self.icvBack},
+ 'invert-neg': self.invertNeg},
[('fore-color', dumpRgb(self.foreColor)),
- ('back-color', dumpRgb(self.backColor))])
+ ('back-color', dumpRgb(self.backColor)),
+ ('icv-fore', dumpIcv(self.icvFore)),
+ ('icv-back', dumpIcv(self.icvBack))])
class DataFormat(BaseRecordHandler):
def __parseBytes(self):
@@ -3701,9 +3704,9 @@ class Tick(BaseRecordHandler):
'auto-mode': self.autoMode,
'rot': self.rot,
'reading-order': self.readingOrder,
- 'icv': self.icv,
'trot': self.trot},
- [('rgb', dumpRgb(self.rgb))])
+ [('rgb', dumpRgb(self.rgb)),
+ ('icv', dumpIcv(self.icv))])
class AxisLine(BaseRecordHandler):
def __parseBytes(self):
@@ -3809,11 +3812,11 @@ class Text(BaseRecordHandler):
'show-percent': self.showPercent,
'show-bubble-sizes': self.showBubbleSizes,
'show-label': self.showLabel,
- 'icv-text-color': self.icvTextColor,
'dlp': self.dlp,
'reading-order': self.readingOrder,
'trot': self.trot},
- [('text-color', dumpRgb(self.textColor))])
+ [('text-color', dumpRgb(self.textColor)),
+ ('icv-text-color', dumpIcv(self.icvTextColor))])
class Series(BaseRecordHandler):
diff --git a/xls-dump.py b/xls-dump.py
index 0abd70f..2c7733a 100755
--- a/xls-dump.py
+++ b/xls-dump.py
@@ -94,6 +94,7 @@ class XLDumper(object):
dirstrm = self.strm.getDirectoryStream(d)
data = self.__readSubStreamXML(dirstrm)
self.__dumpDataAsXML(data, root)
+ node.prettyPrint(sys.stdout, docroot)
def dumpCanonicalXML (self):
self.__parseFile()
@@ -173,7 +174,20 @@ class XLDumper(object):
globals.error("failed to parse CompObj stream.\n")
def __dumpDataAsXML(self, data, root):
- print data
+ if isinstance(data, tuple):
+ newRoot = root.appendElement(data[0])
+ if isinstance(data[1], dict): # attrs
+ for key,val in data[1].iteritems():
+ newRoot.setAttr(key, val)
+ if len(data) > 2: # data has a list of children
+ self.__dumpDataAsXML(data[2], newRoot)
+ else:
+ self.__dumpDataAsXML(data[1], newRoot)
+ elif isinstance(data, list):
+ for x in data:
+ self.__dumpDataAsXML(x, root)
+ else:
+ pass # we're skipping all unknown elems
def __readSubStreamXML (self, strm):
handlers = []
commit eca5118e08d294171273eba004a5ee45fbf5ed57
Author: Sergey Kishchenko <voidwrk at gmail.com>
Date: Wed Sep 14 19:20:15 2011 +0300
Data dumpers were added
diff --git a/src/xlsrecord.py b/src/xlsrecord.py
index f58c464..c564868 100644
--- a/src/xlsrecord.py
+++ b/src/xlsrecord.py
@@ -98,6 +98,11 @@ class LongRGB(object):
self.green = g
self.blue = b
+def dumpRgb(rgb):
+ return {'r': rgb.red,
+ 'g': rgb.green,
+ 'b': rgb.blue}
+
class ICV(object):
def __init__ (self, value):
self.value = value
@@ -781,7 +786,13 @@ class Dimensions(BaseRecordHandler):
if not isinstance(sh, xlsmodel.Chart):
sh.setFirstDefinedCell(self.colMin, self.rowMin)
sh.setFirstFreeCell(self.colMax, self.rowMax)
-
+
+ def dumpData(self):
+ self.__parseBytes()
+ return ('dimensions', {'row-min': self.rowMin,
+ 'row-max': self.rowMax,
+ 'col-min': self.colMin,
+ 'col-max': self.colMax})
class Dv(BaseRecordHandler):
@@ -961,6 +972,13 @@ class Fbi(BaseRecordHandler):
self.appendLine("scale by: %s"%s)
self.appendLine("font ID: %d"%self.fontID)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('fbi', {'font-width': self.fontWidth,
+ 'font-height': self.fontHeight,
+ 'default-height': self.defaultHeight,
+ 'scale-type': self.scaleType,
+ 'font-id': self.fontID})
class FilePass(BaseRecordHandler):
@@ -1199,10 +1217,18 @@ class Number(BaseRecordHandler):
self.fval = self.readDouble()
def parseBytes (self):
+ self.__parseBytes()
self.appendCellPosition(self.col, self.row)
self.appendLine("XF record ID: %d"%self.xf)
self.appendLine("value: %g"%self.fval)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('number', {'row': self.row,
+ 'col': self.col,
+ 'xf': self.xf,
+ 'fval': self.fval})
+
class Obj(BaseRecordHandler):
@@ -1341,9 +1367,13 @@ class PlotGrowth(BaseRecordHandler):
self.__parseBytes()
self.appendLine("horizontal growth: %g"%self.dx)
self.appendLine("vertical growth: %g"%self.dy)
+
+ def dumpData(self):
+ self.__parseBytes()
+ return ('plot-growth', {'dx': self.dx,
+ 'dy': self.dy})
class PrintSize(BaseRecordHandler):
-
Types = [
"unchanged from the defaults in the workbook",
"resized non-proportionally to fill the entire page",
@@ -1358,8 +1388,11 @@ class PrintSize(BaseRecordHandler):
self.__parseBytes()
self.appendLine(globals.getValueOrUnknown(PrintSize.Types, self.typeID))
-class Protect(BaseRecordHandler):
+ def dumpData(self):
+ self.__parseBytes()
+ return ('print-size', {'type-id': self.typeID})
+class Protect(BaseRecordHandler):
def __parseBytes (self):
self.locked = self.readUnsignedInt(2) != 0
@@ -1367,6 +1400,10 @@ class Protect(BaseRecordHandler):
self.__parseBytes()
self.appendLineBoolean("workbook locked", self.locked)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('protect', {'locked': self.locked})
+
class RK(BaseRecordHandler):
"""Cell with encoded integer or floating-point value"""
@@ -1401,6 +1438,11 @@ class Scl(BaseRecordHandler):
val += self.numerator
val /= self.denominator
self.appendLine("zoom level: %g"%val)
+
+ def dumpData(self):
+ self.__parseBytes()
+ return ('scl', {'numer': self.numerator,
+ 'denom': self.denominator})
class SeriesText(BaseRecordHandler):
@@ -3345,33 +3387,47 @@ class CTCellContent(BaseRecordHandler):
class Header(BaseRecordHandler):
- pass
+ def dumpData(self):
+ return ('header', {})
class Footer(BaseRecordHandler):
- pass
+ def dumpData(self):
+ return ('footer', {})
class HCenter(BaseRecordHandler):
def __parseBytes(self):
self.val = self.readUnsignedInt(2)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('hcenter', {'val': self.val})
+
class VCenter(BaseRecordHandler):
def __parseBytes(self):
self.val = self.readUnsignedInt(2)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('vcenter', {'val': self.val})
+
class Setup(BaseRecordHandler):
- pass
+ def dumpData(self):
+ return ('setup', {})
class Units(BaseRecordHandler):
- pass
+ def dumpData(self):
+ return ('units', {})
class Begin(BaseRecordHandler):
pass
class PlotArea(BaseRecordHandler):
- pass
+ def dumpData(self):
+ return ('plot-area', {})
class CrtLink(BaseRecordHandler): # it's unused
- pass
+ def dumpData(self):
+ return ('crt-link', {})
class End(BaseRecordHandler):
pass
@@ -3388,6 +3444,13 @@ class Chart(BaseRecordHandler):
self.appendLine("position: (x, y) = (%d, %d)"%(self.x, self.y))
self.appendLine("size: (width, height) = (%d, %d)"%(self.w, self.h))
+ def dumpData(self):
+ self.__parseBytes()
+ return ('chart-xfrm', {'x': self.x,
+ 'y': self.y,
+ 'w': self.w,
+ 'h': self.h})
+
class Frame(BaseRecordHandler):
__frt_table = {0x0000: "frame surrounding the chart element",
0x0004: "frame with a shadow surrounding the chart element"}
@@ -3404,6 +3467,12 @@ class Frame(BaseRecordHandler):
self.appendLine("autoSize: %s" % self.autoSize)
self.appendLine("autoPosition: %s" % self.autoPosition)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('frame', {'frt': self.frt,
+ 'auto-size': self.autoSize,
+ 'auto-position': self.autoPosition})
+
class LineFormat(BaseRecordHandler):
def __parseBytes(self):
self.rgb = self.readLongRGB()
@@ -3420,6 +3489,16 @@ class LineFormat(BaseRecordHandler):
self.__parseBytes()
# TODO: dump all data
+ def dumpData(self):
+ self.__parseBytes()
+ return ('line-format', {'lns': self.lns,
+ 'we': self.we,
+ 'auto': self.auto,
+ 'axis-on': self.axisOn,
+ 'auto-co': self.autoCo,
+ 'icv': self.icv},
+ [('rgb', dumpRgb(self.rgb))])
+
class AreaFormat(BaseRecordHandler):
def __parseBytes(self):
self.foreColor = self.readLongRGB()
@@ -3435,6 +3514,16 @@ class AreaFormat(BaseRecordHandler):
self.__parseBytes()
# TODO: dump all data
+ def dumpData(self):
+ self.__parseBytes()
+ return ('area-format', {'fls': self.fls,
+ 'auto': self.auto,
+ 'invert-neg': self.invertNeg,
+ 'icv-fore': self.icvFore,
+ 'icv-back': self.icvBack},
+ [('fore-color', dumpRgb(self.foreColor)),
+ ('back-color', dumpRgb(self.backColor))])
+
class DataFormat(BaseRecordHandler):
def __parseBytes(self):
self.xi = self.readUnsignedInt(2)
@@ -3447,6 +3536,12 @@ class DataFormat(BaseRecordHandler):
self.__parseBytes()
# TODO: dump all data
+ def dumpData(self):
+ self.__parseBytes()
+ return ('data-format', {'xi': self.xi,
+ 'yi': self.yi,
+ 'iss': self.iss})
+
class ChartFormat(BaseRecordHandler):
def __parseBytes(self):
reserved1 = self.readUnsignedInt(4)
@@ -3461,6 +3556,11 @@ class ChartFormat(BaseRecordHandler):
self.__parseBytes()
# TODO: dump all data
+ def dumpData(self):
+ self.__parseBytes()
+ return ('chart-format', {'varied': self.varied,
+ 'icrt': self.icrt})
+
class Chart3DBarShape(BaseRecordHandler):
def __parseBytes(self):
self.riser = self.readUnsignedInt(1)
@@ -3470,10 +3570,19 @@ class Chart3DBarShape(BaseRecordHandler):
self.__parseBytes()
# TODO: dump all data
+ def dumpData(self):
+ self.__parseBytes()
+ return ('chart-3dbar-shape', {'riser': self.riser,
+ 'taper': self.taper})
+
class SerToCrt(BaseRecordHandler):
def __parseBytes(self):
self.id = self.readUnsignedInt(2)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('ser-to-crt', {'id': self.id})
+
class Pos(BaseRecordHandler):
def __parseBytes(self):
self.mdTopLt = self.readUnsignedInt(2)
@@ -3487,19 +3596,40 @@ class Pos(BaseRecordHandler):
self.y2 = self.readSignedInt(2)
unused = self.readUnsignedInt(2)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('pos', {'md-top-lt': self.mdTopLt,
+ 'md-bot-rt': self.mdBotRt,
+ 'x1': self.x1,
+ 'y1': self.y1,
+ 'x2': self.x2,
+ 'y2': self.y2})
+
class FontX(BaseRecordHandler):
def __parseBytes(self):
self.iFont = self.readUnsignedInt(2)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('font-x', {'i-font': self.iFont})
+
class AxesUsed(BaseRecordHandler):
def __parseBytes(self):
self.cAxes = self.readUnsignedInt(2)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('axes-used', {'c-axes': self.cAxes})
+
class AxisParent(BaseRecordHandler):
def __parseBytes(self):
self.iax = self.readUnsignedInt(2)
# 16 bytes are unused
+ def dumpData(self):
+ self.__parseBytes()
+ return ('axis-parent', {'iax': self.iax})
+
class AxcExt(BaseRecordHandler):
def __parseBytes (self):
self.catMin = self.readUnsignedInt(2)
@@ -3521,6 +3651,25 @@ class AxcExt(BaseRecordHandler):
self.autoCross = (flag & 0x0040) != 0 # G
self.autoDate = (flag & 0x0080) != 0 # H
+ def dumpData(self):
+ self.__parseBytes()
+ return ('axc-ext', {'cat-min': self.catMin,
+ 'cat-max': self.catMax,
+ 'cat-major': self.catMajor,
+ 'du-major': self.duMajor,
+ 'cat-minor': self.catMinor,
+ 'du-minor': self.duMinor,
+ 'du-base': self.duBase,
+ 'cat-cross-date': self.catCrossDate,
+ 'auto-min': self.autoMin,
+ 'auto-max': self.autoMax,
+ 'auto-major': self.autoMajor,
+ 'auto-minor': self.autoMinor,
+ 'date-axis': self.dateAxis,
+ 'auto-base': self.autoBase,
+ 'auto-cross': self.autoCross,
+ 'auto-date': self.autoDate})
+
class Tick(BaseRecordHandler):
def __parseBytes (self):
self.tktMajor = self.readUnsignedInt(1)
@@ -3542,14 +3691,36 @@ class Tick(BaseRecordHandler):
self.icv = self.readICV()
self.trot = self.readUnsignedInt(2)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('tick', {'tkt-major': self.tktMajor,
+ 'tkt-minor': self.tktMinor,
+ 'tlt': self.tlt,
+ 'w-bkg-mode': self.wBkgMode,
+ 'auto-co': self.autoCo,
+ 'auto-mode': self.autoMode,
+ 'rot': self.rot,
+ 'reading-order': self.readingOrder,
+ 'icv': self.icv,
+ 'trot': self.trot},
+ [('rgb', dumpRgb(self.rgb))])
+
class AxisLine(BaseRecordHandler):
def __parseBytes(self):
self.id = self.readUnsignedInt(2)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('axis-line', {'id': self.id})
+
class SIIndex(BaseRecordHandler):
def __parseBytes(self):
self.numIndex = self.readUnsignedInt(2)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('si-index', {'num-index': self.numIndex})
+
class DefaultText(BaseRecordHandler):
__types = [
@@ -3565,6 +3736,10 @@ class DefaultText(BaseRecordHandler):
self.__parseBytes()
self.appendLine(globals.getValueOrUnknown(DefaultText.__types, self.id))
+ def dumpData(self):
+ self.__parseBytes()
+ return ('default-text', {'id': self.id})
+
class Text(BaseRecordHandler):
__horAlign = { 1: 'left', 2: 'center', 3: 'right', 4: 'justify', 7: 'distributed' }
@@ -3615,6 +3790,31 @@ class Text(BaseRecordHandler):
# TODO : handle the rest of the data.
+ def dumpData(self):
+ self.__parseBytes()
+ return ('text', {'at': self.at,
+ 'vat': self.vat,
+ 'bkg-mode': self.bkgMode,
+ 'x': self.x,
+ 'y': self.y,
+ 'dx': self.dx,
+ 'dy': self.dy,
+ 'auto-color': self.autoColor,
+ 'show-key': self.showKey,
+ 'show-value': self.showValue,
+ 'auto-text': self.autoText,
+ 'generated': self.generated,
+ 'deleted': self.deleted,
+ 'show-label-and-perc': self.showLabelAndPerc,
+ 'show-percent': self.showPercent,
+ 'show-bubble-sizes': self.showBubbleSizes,
+ 'show-label': self.showLabel,
+ 'icv-text-color': self.icvTextColor,
+ 'dlp': self.dlp,
+ 'reading-order': self.readingOrder,
+ 'trot': self.trot},
+ [('text-color', dumpRgb(self.textColor))])
+
class Series(BaseRecordHandler):
DATE = 0
@@ -3651,6 +3851,15 @@ class Series(BaseRecordHandler):
self.appendLine("value or vertical value count: %d"%self.valCount)
self.appendLine("bubble size value count: %d"%self.bubbleCount)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('series', {'cat-type': self.catType,
+ 'val-type': self.valType,
+ 'cat-count': self.catCount,
+ 'val-count': self.valCount,
+ 'bubble-type': self.bubbleType,
+ 'bubble-count': self.bubbleCount})
+
class CHAxis(BaseRecordHandler):
axisTypeList = ['x-axis', 'y-axis', 'z-axis']
@@ -3670,6 +3879,14 @@ class CHAxis(BaseRecordHandler):
self.appendLine("axis type: unknown")
self.appendLine("area: (x, y, w, h) = (%d, %d, %d, %d) [no longer used]"%(self.x, self.y, self.w, self.h))
+ def dumpData(self):
+ self.__parseBytes()
+ return ('axis', {'axis-type': self.axisType,
+ 'x': self.x,
+ 'y': self.y,
+ 'w': self.w,
+ 'h': self.h})
+
class CHProperties(BaseRecordHandler):
def __parseBytes(self):
@@ -3696,6 +3913,14 @@ class CHProperties(BaseRecordHandler):
self.appendLine("empty value treatment: %s" % emptyValues)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('sht-props', {'empty-flags': self.emptyFlags,
+ 'manual-series': self.manualSeries,
+ 'show-vis-cells': self.showVisCells,
+ 'no-resize': self.noResize,
+ 'manual-plot-area': self.manualPlotArea})
+
class CHLabelRange(BaseRecordHandler):
@@ -3716,12 +3941,15 @@ class CHLabelRange(BaseRecordHandler):
self.appendLineBoolean("axis between categories", self.betweenCateg)
self.appendLineBoolean("other axis crosses at maximum", self.maxCross)
self.appendLineBoolean("axis reversed", self.reversed)
-
- def fillModel(self, model):
+
+ def dumpData(self):
self.__parseBytes()
- sh = model.getCurrentSheet()
- sh.setCatSerRange(self.axisCross, self.freqLabel, self.freqTick,
- self.betweenCateg, self.maxCross, self.reversed)
+ return ('cat-ser-range', {'axis-cross': self.axisCross,
+ 'freq-label': self.freqLabel,
+ 'freq-tick': self.freqTick,
+ 'between-categ': self.betweenCateg,
+ 'max-cross': self.maxCross,
+ 'reversed': self.reversed})
class Legend(BaseRecordHandler):
@@ -3773,6 +4001,20 @@ class Legend(BaseRecordHandler):
self.appendLine("")
self.appendMultiLine("NOTE: Position and size are in units of 1/4000 of chart's width or height.")
+ def dumpData(self):
+ self.__parseBytes()
+ return ('legend', {'x': self.x,
+ 'y': self.y,
+ 'w': self.w,
+ 'h': self.h,
+ 'dock-mode': self.dockMode,
+ 'spacing': self.spacing,
+ 'docked': self.docked,
+ 'auto-series': self.autoSeries,
+ 'auto-pos-x': self.autoPosX,
+ 'auto-pos-y': self.autoPosY,
+ 'stacked': self.stacked,
+ 'data-table': self.dataTable})
class CHValueRange(BaseRecordHandler):
@@ -3806,9 +4048,25 @@ class CHValueRange(BaseRecordHandler):
(self.cross, self.getYesNo(self.autoCross), self.getYesNo(self.maxCross)))
self.appendLine("biff5 or above: %s"%self.getYesNo(self.bit8))
+ def dumpData(self):
+ self.__parseBytes()
+ return ('value-range', {'min-val': self.minVal,
+ 'max-val': self.maxVal,
+ 'major-step': self.majorStep,
+ 'minor-step': self.minorStep,
+ 'cross': self.cross,
+ 'auto-min': self.autoMin,
+ 'auto-max': self.autoMax,
+ 'auto-major': self.autoMajor,
+ 'auto-minor': self.autoMinor,
+ 'auto-cross': self.autoCross,
+ 'log-scale': self.logScale,
+ 'reversed': self.reversed,
+ 'max-cross': self.maxCross,
+ 'biff5bit': self.bit8})
-class CHBar(BaseRecordHandler):
+class CHBar(BaseRecordHandler):
def __parseBytes (self):
self.overlap = globals.getSignedInt(self.readBytes(2))
self.gap = globals.getSignedInt(self.readBytes(2))
@@ -3833,6 +4091,14 @@ class CHBar(BaseRecordHandler):
sh = model.getCurrentSheet()
sh.setBar(self.overlap, self.gap, self.horizontal, self.stacked, self.percent, self.shadow)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('bar', {'overlap': self.overlap,
+ 'gap': self.gap,
+ 'horizontal': self.horizontal,
+ 'stacked': self.stacked,
+ 'shadow': self.shadow})
+
class CHLine(BaseRecordHandler):
def parseBytes (self):
@@ -3847,7 +4113,6 @@ class CHLine(BaseRecordHandler):
class Brai(BaseRecordHandler):
-
destTypes = [
'series, legend entry, trendline name, or error bars name',
'values or horizontal values',
@@ -3897,6 +4162,14 @@ class Brai(BaseRecordHandler):
else:
self.appendLine("formula parser error: %s"%self.formulaError)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('brai', {'id': self.id,
+ 'rt': self.rt,
+ 'unlinked-ifmt': self.unlinkedIFmt,
+ 'i-fmt': self.iFmt,
+ 'formula': self.formula}) # assuming the formula is fine
+
class MSODrawing(BaseRecordHandler):
"""Handler for the MSODRAWING record
commit b58622fbd8b02e8b6de7a43d2c66a755c46d2c3a
Author: Sergey Kishchenko <voidwrk at gmail.com>
Date: Wed Sep 14 14:57:58 2011 +0300
First version of combinators-based parser is ready
diff --git a/src/xlsparser.py b/src/xlsparser.py
new file mode 100644
index 0000000..638f6a2
--- /dev/null
+++ b/src/xlsparser.py
@@ -0,0 +1,654 @@
+#!/usr/bin/env python
+########################################################################
+#
+# Copyright (c) 2011 Sergey Kishchenko
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+########################################################################
+import xlsrecord
+
+
+############## Common parsers ##########################################
+class ParseException(Exception):
+ pass
+
+class TokenStream(object):
+ def __init__(self, tokens):
+ self.tokens = tokens
+ self.currentIndex = 0
+
+ def readToken(self):
+ if self.currentIndex >= len(self.tokens):
+ return None
+ token = self.tokens[self.currentIndex]
+ self.currentIndex += 1
+ return token
+
+class BaseParser(object):
+ def parse(self, stream):
+ parser = getattr(self, 'PARSER', None)
+ if parser is None:
+ return None
+ else:
+ return parser.parse(stream)
+
+ def __str__(self):
+ parser = getattr(self, 'PARSER', None)
+ if parser is None:
+ return "NONIMPL"
+ else:
+ return str(parser)
+
+ def __lshift__(self, other):
+ if isinstance(self, Seq):
+ self.appendParser(other)
+ return self
+ else:
+ return Seq(self, other)
+
+def safeParse(parser, stream):
+ #print "TRACE:[%s,%s]" % (str(parser), str(stream.tokens[stream.currentIndex]))
+
+ parsed = None
+ try:
+ curIndex = stream.currentIndex
+ parsed = parser.parse(stream)
+ except ParseException as exc:
+ stream.currentIndex = curIndex
+ raise
+ return parsed
+
+def getParsedOrNone(parser, stream):
+ parsed = None
+ try:
+ parsed = safeParse(parser, stream)
+ except ParseException:
+ pass
+ return parsed
+
+class Term(BaseParser):
+ def __init__(self, tokenType):
+ self.__tokenType = tokenType
+
+ def parse(self, stream):
+ curIndex = stream.currentIndex
+ token = stream.readToken()
+ if not token is None and isinstance(token, self.__tokenType):
+ return token
+ else:
+ stream.currentIndex = curIndex
+ return None
+
+ def __str__(self):
+ return 'Term(%s)' % str(self.__tokenType)
+
+class Opt(BaseParser):
+ def __init__(self, parser):
+ self.__parser = parser
+
+ def parse(self, stream):
+ return getParsedOrNone(self.__parser, stream)
+
+ def __str__(self):
+ return 'Opt(%s)' % str(self.__parser)
+
+class Req(BaseParser):
+ def __init__(self, parser):
+ self.__parser = parser
+
+ def parse(self, stream):
+ parsed = safeParse(self.__parser, stream)
+ if parsed is None:
+ raise ParseException("%s failed but it is required, next token is [%s]" %
+ (str(self.__parser), str(stream.tokens[stream.currentIndex])))
+ return parsed
+
+ def __str__(self):
+ return 'Req(%s)' % str(self.__parser)
+
+class AnyButThis(BaseParser):
+ def __init__(self, parser):
+ self.__parser = parser
+
+ def parse(self, stream):
+ curIndex = stream.currentIndex
+ parsed = getParsedOrNone(self.__parser, stream)
+ if parsed is None:
+ return ('any', stream.readToken())
+ else:
+ stream.currentIndex = curIndex
+ return None
+
+ def __str__(self):
+ return 'AnyButThis(%s)' % str(self.__parser)
+
+class Many(BaseParser):
+ def __init__(self, group, parser, min=0, max=-1):
+ self.__group = group
+ self.__parser = parser
+ self.__min = min
+ self.__max = max
+
+ def parse(self, stream):
+ if self.__min == 0 and self.__max == 0:
+ return None
+ x = 0
+ parsedList = []
+ while True:
+ parsed = getParsedOrNone(self.__parser, stream)
+ if parsed is None:
+ break
+ parsedList.append(parsed)
+ x += 1
+ if self.__max != -1 and x>=self.__max:
+ break
+ if x<self.__min:
+ raise ParseException("%s should occur at least %s times" % (self.__parser,self.__min))
+ return (self.__group, parsedList)
+
+ def __str__(self):
+ return 'Many(%s,%s,min=%s,max=%s)' % (self.__group, str(self.__parser), self.__min, self.__max)
+
+class OneOf(BaseParser):
+ def __init__(self, *args):
+ self.__parsers = args
+
+ def parse(self, stream):
+ for parser in self.__parsers:
+ parsed = safeParse(parser, stream)
+ if not parsed is None:
+ return parsed
+ raise ParseException("No suitable options: [%s]" % ','.join(str(x) for x in self.__parsers))
+
+ def __str__(self):
+ return 'OneOf(%s)' % ','.join(str(x) for x in self.__parsers)
+
+class Seq(BaseParser):
+ def __init__(self, *args):
+ self.__parsers = list(args)
+
+ def parse(self, stream):
+ parsedList = []
+ for parser in self.__parsers:
+ parsed = safeParse(parser, stream)
+ if not parsed is None:
+ parsedList.append(parsed)
+ return parsedList
+
+ def appendParser(self, parser):
+ self.__parsers.append(parser)
+
+ def __str__(self):
+ return 'Seq(%s)' % ','.join(str(x) for x in self.__parsers)
+
+class Group(BaseParser):
+ def __init__(self, name, parser):
+ self.__name = name
+ self.__parser = parser
+
+ def parse(self, stream):
+ parsed = self.__parser.parse(stream)
+ if not parsed is None:
+ return (self.__name, parsed)
+ else:
+ return None
+
+ def __str__(self):
+ return 'Group(%s, %s)' % (self.__name, str(self.__parser))
+
+
+############## Specific parsers ##########################################
+class WriteProtect(BaseParser): pass
+class SheetExt(BaseParser): pass
+class WebPub(BaseParser): pass
+class HFPicture(BaseParser): pass
+
+class Header(BaseParser):
+ PARSER = Term(xlsrecord.Header)
+
+class Footer(BaseParser):
+ PARSER = Term(xlsrecord.Footer)
+
+class HCenter(BaseParser):
+ PARSER = Term(xlsrecord.HCenter)
+
+class VCenter(BaseParser):
+ PARSER = Term(xlsrecord.VCenter)
+
+class MarginBaseParser(BaseParser): pass
+class LeftMargin(MarginBaseParser): pass
+class RightMargin(MarginBaseParser): pass
+class TopMargin(MarginBaseParser): pass
+class BottomMargin(MarginBaseParser): pass
+class Pls(MarginBaseParser): pass
+class Continue(MarginBaseParser): pass
+
+class Setup(BaseParser):
+ PARSER = Term(xlsrecord.Setup)
+
+class PAGESETUP(BaseParser):
+ #PAGESETUP = Header Footer HCenter VCenter [LeftMargin] [RightMargin] [TopMargin]
+ #[BottomMargin] [Pls *Continue] Setup
+ PARSER = Group('page-setup', Req(Header()) << Req(Footer()) << Req(HCenter()) << Req(VCenter()) <<
+ LeftMargin() << RightMargin() << TopMargin() << BottomMargin() <<
+ Seq(Pls(), Many('continues', Continue())) << Setup())
+
+
+class PrintSize(BaseParser):
+ PARSER = Term(xlsrecord.PrintSize)
+
+class HeaderFooter(BaseParser): pass
+class BACKGROUND(BaseParser): pass
+
+class Fbi(BaseParser):
+ PARSER = Term(xlsrecord.Fbi)
+
+class Fbi2(BaseParser): pass
+class ClrtClient(BaseParser): pass
+
+class PROTECTION(BaseParser):
+ PARSER = Term(xlsrecord.Protect)
+
+class Palette(BaseParser): pass
+class SXViewLink(BaseParser): pass
+class PivotChartBits(BaseParser): pass
+class SBaseRef(BaseParser): pass
+class MsoDrawingGroup(BaseParser): pass
+
+
+class MSODRAWING(BaseParser): pass
+class TEXTOBJECT(BaseParser): pass
+class OBJ(BaseParser): pass
+class MsoDrawingSelection(BaseParser): pass
+
+class OBJECTS(BaseParser):
+ #*(MSODRAWING *(TEXTOBJECT / OBJ)) [MsoDrawingSelection]
+ PARSER = Group('objects', Many('drawings', Seq(Req(MSODRAWING()),
+ Many('obj-list',
+ OneOf(TEXTOBJECT(), OBJ())))) <<
+ MsoDrawingSelection())
+
+class Units(BaseParser):
+ PARSER = Term(xlsrecord.Units)
+
+class Chart(BaseParser):
+ PARSER = Term(xlsrecord.Chart)
+
+class Begin(BaseParser):
+ PARSER = Term(xlsrecord.Begin)
+
+class End(BaseParser):
+ PARSER = Term(xlsrecord.End)
+
+class PlotArea(BaseParser):
+ PARSER = Term(xlsrecord.PlotArea)
+
+class CrtLink(BaseParser):
+ PARSER = Term(xlsrecord.CrtLink)
+
+class FONTLIST(BaseParser): pass
+
+class Frame(BaseParser):
+ PARSER = Term(xlsrecord.Frame)
+
+class LineFormat(BaseParser):
+ PARSER = Term(xlsrecord.LineFormat)
+
+class AreaFormat(BaseParser):
+ PARSER = Term(xlsrecord.AreaFormat)
+
+class GELFRAME(BaseParser): pass
+class SHAPEPROPS(BaseParser): pass
+
+class FRAME(BaseParser):
+ PARSER = Group('frame', Req(Frame()) << Req(Begin()) << Req(LineFormat()) << Req(AreaFormat()) <<
+ GELFRAME() << SHAPEPROPS() << Req(End()))
+
+
+class Scl(BaseParser):
+ PARSER = Term(xlsrecord.Scl)
+
+class PlotGrowth(BaseParser):
+ PARSER = Term(xlsrecord.PlotGrowth)
+
+class Series(BaseParser):
+ PARSER = Term(xlsrecord.Series)
+
+class AI(BaseParser):
+ PARSER = Term(xlsrecord.Brai) # TODO: we use Brai instead of AI now, fix it
+
+class SerParent(BaseParser): pass
+class SerAuxTrend(BaseParser): pass
+class SerAuxErrBar(BaseParser): pass
+class SerToCrt(BaseParser):
+ PARSER = Term(xlsrecord.SerToCrt)
+
+class LegendException(BaseParser): pass
+
+class DataFormat(BaseParser):
+ PARSER = Term(xlsrecord.DataFormat)
+
+class Chart3DBarShape(BaseParser):
+ PARSER = Term(xlsrecord.Chart3DBarShape)
+
+class PieFormat(BaseParser): pass
+class SerFmt(BaseParser): pass
+class MarkerFormat(BaseParser): pass
+
+class Text(BaseParser):
+ PARSER = Term(xlsrecord.Text)
+
+class Pos(BaseParser):
+ PARSER = Term(xlsrecord.Pos)
+
+class FontX(BaseParser):
+ PARSER = Term(xlsrecord.FontX)
+
+class AlRuns(BaseParser): pass
+class ObjectLink(BaseParser): pass
+class DataLabExtContents(BaseParser): pass
+class CrtLayout12(BaseParser): pass
+class CRTMLFRT(BaseParser): pass
+class TEXTPROPS(BaseParser): pass
+
+
+class ATTACHEDLABEL(BaseParser):
+ #ATTACHEDLABEL = Text Begin Pos [FontX] [AlRuns] AI [FRAME] [ObjectLink] [DataLabExtContents]
+ #[CrtLayout12] [TEXTPROPS] [CRTMLFRT] End
+ PARSER = Group('attached-label', Req(Text()) << Req(Begin()) << Req(Pos()) << FontX() << AlRuns() << Req(AI()) <<
+ Opt(FRAME()) << ObjectLink() << DataLabExtContents() << CrtLayout12() << TEXTPROPS() << CRTMLFRT() << Req(End()))
+
+class SS(BaseParser):
+ #SS = DataFormat Begin [Chart3DBarShape] [LineFormat AreaFormat PieFormat] [SerFmt]
+ #[GELFRAME] [MarkerFormat] [AttachedLabel] *2SHAPEPROPS [CRTMLFRT] End
+ PARSER = Group('ss', Seq(Req(DataFormat()), Req(Begin()), Chart3DBarShape(),
+ Opt(Seq(Req(LineFormat()), Req(AreaFormat()), Req(PieFormat()))),
+ SerFmt(), GELFRAME(), MarkerFormat(), Opt(ATTACHEDLABEL()), # ATTACHEDLABEL was used instead of AttachedLabel
+ Many('shape-props-list', SHAPEPROPS(), max=2), CRTMLFRT(),
+ Req(End())))
+
+class SERIESFORMAT(BaseParser):
+ #SERIESFORMAT = Series Begin 4AI *SS (SerToCrt / (SerParent (SerAuxTrend / SerAuxErrBar)))
+ #*(LegendException [Begin ATTACHEDLABEL [TEXTPROPS] End]) End
+ PARSER = Group('series-fmt', Req(Series()) << Req(Begin()) << Many('ai-list', AI(), min=4, max=4) <<
+ Many('ss-list', SS()) << OneOf(SerToCrt(), Seq(SerParent(), OneOf(SerAuxTrend(), SerAuxErrBar()))) <<
+ Many('legend-exceptions', Group('legend-exception-root',
+ Seq(Req(LegendException()),
+ Seq(Req(Begin()), Req(ATTACHEDLABEL()), TEXTPROPS(), Req(End()))))) <<
+ Req(End()))
+
+
+
+class ShtProps(BaseParser):
+ PARSER = Term(xlsrecord.CHProperties)
+
+class DataLabExt(BaseParser): pass
+class StartObject(BaseParser): pass
+class EndObject(BaseParser): pass
+
+class DefaultText(BaseParser):
+ PARSER = Term(xlsrecord.DefaultText)
+
+class DFTTEXT(BaseParser):
+ #DFTTEXT = [DataLabExt StartObject] DefaultText ATTACHEDLABEL [EndObject]
+ PARSER = Group('dft-text', Seq(Opt(Seq(Req(DataLabExt()), Req(StartObject()))),
+ Req(DefaultText()), Req(ATTACHEDLABEL()),
+ EndObject()))
+
+class AxesUsed(BaseParser):
+ PARSER = Term(xlsrecord.AxesUsed)
+
+class AxisParent(BaseParser):
+ PARSER = Term(xlsrecord.AxisParent)
+
+class Axis(BaseParser):
+ PARSER = Term(xlsrecord.CHAxis)
+
+class CatSerRange(BaseParser):
+ PARSER = Term(xlsrecord.CHLabelRange)
+
+class AxcExt(BaseParser):
+ PARSER = Term(xlsrecord.AxcExt)
+
+class CatLab(BaseParser): pass
+
+class IFmtRecord(BaseParser): pass
+
+class Tick(BaseParser):
+ PARSER = Term(xlsrecord.Tick)
+
+class AxisLine(BaseParser):
+ PARSER = Term(xlsrecord.AxisLine)
+
+
+class TextPropsStream(BaseParser): pass
+class ContinueFrt12(BaseParser): pass
+
+class AXS(BaseParser):
+ # AXS = [IFmtRecord] [Tick] [FontX] *4(AxisLine LineFormat) [AreaFormat] [GELFRAME]
+ # *4SHAPEPROPS [TextPropsStream *ContinueFrt12]
+ PARSER = Group('axs', IFmtRecord() << Tick() << FontX() <<
+ Many('axis-lines', Seq(Req(AxisLine()), Req(LineFormat())), max=4) <<
+ AreaFormat() << GELFRAME() << Many('shape-props-list', SHAPEPROPS(), max=4) <<
+ Opt(Seq(Req(TextPropsStream()), Many('continue-frt12-list', ContinueFrt12()))))
+
+class IVAXIS(BaseParser):
+ # IVAXIS = Axis Begin [CatSerRange] AxcExt [CatLab] AXS [CRTMLFRT] End
+ PARSER = Group('ivaxis', Req(Axis()) << Req(Begin()) << CatSerRange() << Req(AxcExt()) <<
+ CatLab() << Req(AXS()) << CRTMLFRT() << Req(End()))
+
+class ValueRange(BaseParser):
+ PARSER = Term(xlsrecord.CHValueRange)
+
+class AXM(BaseParser): pass
+
+class DVAXIS(BaseParser):
+ #DVAXIS = Axis Begin [ValueRange] [AXM] AXS [CRTMLFRT] End
+ PARSER = Group('dvaxis', Req(Axis()) << Req(Begin()) << ValueRange() << AXM() << Req(AXS()) << CRTMLFRT() <<
+ Req(End()))
+
+class SERIESAXIS(BaseParser): pass #SERIESAXIS = Axis Begin [CatSerRange] AXS [CRTMLFRT] End
+
+class AXES(BaseParser):
+ #AXES = [IVAXIS DVAXIS [SERIESAXIS] / DVAXIS DVAXIS] *3ATTACHEDLABEL [PlotArea FRAME]
+ # TODO: recheck it. The rule above leaks some brackets :(
+ PARSER = Group('axes', Seq(OneOf(Seq(Req(IVAXIS()), Req(DVAXIS()), SERIESAXIS()),
+ Seq(Req(DVAXIS()), Req(DVAXIS()))),
+ Many('attached-labels', ATTACHEDLABEL(), max=3),
+ Opt(Seq(Req(PlotArea()), Req(FRAME())))))
+
+
+class ChartFormat(BaseParser):
+ PARSER = Term(xlsrecord.ChartFormat)
+
+class BobPop(BaseParser): pass
+class BobPopCustom(BaseParser): pass
+
+class Bar(BaseParser):
+ PARSER = Term(xlsrecord.CHBar)
+
+class Line(BaseParser): pass
+class Pie(BaseParser): pass
+class Area(BaseParser): pass
+class Scatter(BaseParser): pass
+class Radar(BaseParser): pass
+class RadarArea(BaseParser): pass
+class Surf(BaseParser): pass
+class SeriesList(BaseParser): pass
+class Chart3d(BaseParser): pass
+
+class Legend(BaseParser):
+ PARSER = Term(xlsrecord.Legend)
+
+class LD(BaseParser):
+ #LD = Legend Begin Pos ATTACHEDLABEL [FRAME] [CrtLayout12] [TEXTPROPS] [CRTMLFRT] End
+ PARSER = Group('ld', Req(Legend()) << Req(Begin()) << Req(Pos()) << Req(ATTACHEDLABEL()) <<
+ Opt(FRAME()) << CrtLayout12() << TEXTPROPS() << CRTMLFRT() << Req(End()))
+
+class TWODROPBAR(BaseParser): pass
+class CrtLine(BaseParser): pass
+class CrtLayout12A(BaseParser): pass
+class DAT(BaseParser): pass
+
+class CRT(BaseParser):
+ #CRT = ChartFormat Begin (Bar / Line / (BopPop [BopPopCustom]) / Pie / Area / Scatter / Radar /
+ #RadarArea / Surf) CrtLink [SeriesList] [Chart3d] [LD] [2DROPBAR] *4(CrtLine LineFormat)
+ #*2DFTTEXT [DataLabExtContents] [SS] *4SHAPEPROPS End
+ PARSER = Group('crt', Req(ChartFormat()) << Req(Begin()) << OneOf(Bar(), Line(), Seq(Req(BobPop()), BobPopCustom()),
+ Pie(), Area(), Scatter(), Radar(),
+ RadarArea(), Surf()) <<
+ Req(CrtLink()) << SeriesList() << Chart3d() << Opt(LD()) << TWODROPBAR() <<
+ Many('crt-lines', Seq(Req(CrtLine()),
+ Req(LineFormat()))) << Many('dft-texts', DFTTEXT()) <<
+ DataLabExtContents() << Opt(SS()) << Many('shape-props-list', SHAPEPROPS(), max=4) << Req(End()))
+
+class AXISPARENT(BaseParser):
+ #AXISPARENT = AxisParent Begin Pos [AXES] 1*4CRT End
+ PARSER = Group('axis-parent', Req(AxisParent()) << Req(Begin()) << Req(Pos()) <<
+ Opt(AXES()) << Many('crt-list', CRT(), min=1, max=4) <<
+ Req(End()))
+
+
+
+
+
+
+class CHARTFORMATS(BaseParser):
+ #CHARTFOMATS = Chart Begin *2FONTLIST Scl PlotGrowth [FRAME] *SERIESFORMAT *SS ShtProps
+ #*2DFTTEXT AxesUsed 1*2AXISPARENT [CrtLayout12A] [DAT] *ATTACHEDLABEL [CRTMLFRT]
+ #*([DataLabExt StartObject] ATTACHEDLABEL [EndObject]) [TEXTPROPS] *2CRTMLFRT End
+ PARSER = Group('chart-fmt', Req(Chart()) << Req(Begin()) << Many('font-lists', FONTLIST(), max=2) <<
+ Req(Scl()) << Req(PlotGrowth()) << FRAME() << Many('series-fmt-list', SERIESFORMAT()) <<
+ Many('ss-list', SS()) << Req(ShtProps()) << Many('dft-texts', DFTTEXT(), max=2) <<
+ Req(AxesUsed()) << Many('axis-parents', AXISPARENT(), min=1, max=2) <<
+ CrtLayout12A() << DAT() << Many('attached-labels', ATTACHEDLABEL()) <<
+ CRTMLFRT() << Many('datalab-exts', Seq(Seq(Req(DataLabExt()),
+ Req(StartObject())),
+ Req(ATTACHEDLABEL()),
+ EndObject())) <<
+ TEXTPROPS() << Many('crtmlfrt-list', CRTMLFRT()) << Req(End()))
+
+class Dimensions(BaseParser):
+ PARSER = Term(xlsrecord.Dimensions)
+
+class SIIndex(BaseParser):
+ PARSER = Term(xlsrecord.SIIndex)
+
+class Number(BaseParser):
+ PARSER = Term(xlsrecord.Number)
+
+
+class BoolErr(BaseParser): pass
+class Blank(BaseParser): pass
+class Label(BaseParser): pass
+
+class SERIESDATA(BaseParser):
+ #SERIESDATA = Dimensions 3(SIIndex *(Number / BoolErr / Blank / Label))
+ PARSER = Group('series-data', Req(Dimensions()) << Many('si-index-list',
+ Seq(Req(SIIndex()),
+ Many('values',
+ OneOf(Number(), BoolErr(),
+ Blank(), Label()))),
+ min=3, max=3))
+
+class CodeName(BaseParser): pass
+class WINDOW(BaseParser): pass
+class CUSTOMVIEW(BaseParser): pass
+
+class EOF(BaseParser):
+ PARSER = Term(xlsrecord.EOF)
+
+class BOF(BaseParser):
+ PARSER = Term(xlsrecord.BOF)
+
+class CHARTSHEETCONTENT(BaseParser):
+ #CHARTSHEETCONTENT = [WriteProtect] [SheetExt] [WebPub] *HFPicture PAGESETUP PrintSize
+ #[HeaderFooter] [BACKGROUND] *Fbi *Fbi2 [ClrtClient] [PROTECTION] [Palette] [SXViewLink]
+ #[PivotChartBits] [SBaseRef] [MsoDrawingGroup] OBJECTS Units CHARTFOMATS SERIESDATA
+ #*WINDOW *CUSTOMVIEW [CodeName] [CRTMLFRT] EOF
+ PARSER = Group('chart', WriteProtect() << SheetExt() << WebPub() << Many('hf-pictures', HFPicture()) <<
+ Req(PAGESETUP()) << Req(PrintSize()) << HeaderFooter() << BACKGROUND() <<
+ Many('fbi-list', Fbi()) << Many('fbi2-list', Fbi2()) <<
+ ClrtClient() << PROTECTION() << Palette() << SXViewLink() << PivotChartBits() <<
+ SBaseRef() << MsoDrawingGroup() << Req(OBJECTS()) << Req(Units()) <<
+ Req(CHARTFORMATS()) << Req(SERIESDATA()) << Many('windows', WINDOW()) <<
+ Many('custom-views', CUSTOMVIEW()) << CodeName() << CRTMLFRT() << Req(EOF()))
+
+class XlsParser(BaseParser):
+ def __init__(self, tokens):
+ self.__tokenStream = TokenStream(tokens)
+
+ def parse(self, stream):
+ PARSERS = {0x0005: None, # WorkbookGlobal
+ 0x0006: None,# Visual Basic module,
+ 0x0010: None,# Worksheet
+ 0x0020: ('chart', CHARTSHEETCONTENT),
+ 0x0040: None,# Excel 4.0 macro sheet
+ 0x0100: None,# Workspace file
+ }
+ parsedList = []
+ bofParser = Req(BOF())
+
+ while True:
+ bof = None
+ try:
+ bof = safeParse(bofParser, stream)
+ except ParseException:
+ pass
+ if bof is None: # we should break only in case stream is ended
+ break
+ bof.dumpData() # we need to dump data to make it parse the record
+ parser = PARSERS[bof.dataType]
+
+ try:
+ if not parser is None:
+ parsed = (parser[0], parser[1]().parse(stream))
+ parsedList.append(parsed)
+ else:
+ parser = Many('any-list', AnyButThis(OneOf(EOF(), BOF()))) << EOF()
+ parsed = parser.parse(stream) # skipping the unknown stream
+ parsedList.append(parsed)
+ except ParseException:
+ print ("Parse failed, previous token is [%s], next tokens are [%s]" % (stream.tokens[stream.currentIndex-1],
+ ','.join(map(str,stream.tokens[stream.currentIndex:stream.currentIndex+5]))))
+ raise
+ return parsedList
+
+ def __dumpRoot(self, parsed):
+ if parsed is None:
+ return None
+ elif isinstance(parsed, tuple):
+ return (parsed[0], self.__dumpRoot(parsed[1]))
+ elif isinstance(parsed, list):
+ return map(self.__dumpRoot, parsed)
+ else:
+ return parsed.dumpData()
+
+ def dumpData(self):
+ parsed = self.parse(self.__tokenStream)
+ if parsed is None:
+ return None
+ return self.__dumpRoot(parsed)
+
+
+
diff --git a/src/xlsrecord.py b/src/xlsrecord.py
index c69cabf..f58c464 100644
--- a/src/xlsrecord.py
+++ b/src/xlsrecord.py
@@ -128,6 +128,12 @@ append a line to be displayed.
Like parseBytes(), the derived classes must overwrite this method."""
pass
+ def dumpData (self):
+ """Parse the original bytes and return the data dump as ('name', {'val1': val1,...})
+
+Like parseBytes(), the derived classes must overwrite this method."""
+ pass
+
def __getHeaderStr (self):
return "%4.4Xh: "%self.header
@@ -411,6 +417,8 @@ class Autofilter(BaseRecordHandler):
sh.setAutoFilterArrow(self.filterIndex, obj)
# TODO: Pick up more complex states as we need them.
+class EOF(BaseRecordHandler):
+ pass
class BOF(BaseRecordHandler):
@@ -461,7 +469,6 @@ class BOF(BaseRecordHandler):
def parseBytes (self):
self.__parseBytes()
# BIFF version
- ver = self.readUnsignedInt(2)
s = 'not BIFF8'
if self.ver == 0x0600:
s = 'BIFF8'
@@ -498,7 +505,21 @@ class BOF(BaseRecordHandler):
sheet.version = s
-
+ def dumpData(self):
+ self.__parseBytes()
+ return ('bof', {'ver': self.ver,
+ 'data-type': self.dataType,
+ 'build-id': self.buildID,
+ 'build-year': self.buildYear,
+ 'win': self.win,
+ 'risc': self.risc,
+ 'beta': self.beta,
+ 'win-any': self.winAny,
+ 'mac-any': self.macAny,
+ 'beta-any': self.betaAny,
+ 'risc-any': self.riscAny,
+ 'lowest-version': self.lowestExcelVer})
+
class BoundSheet(BaseRecordHandler):
hiddenStates = {0x00: 'visible', 0x01: 'hidden', 0x02: 'very hidden'}
@@ -1171,14 +1192,16 @@ class MulBlank(BaseRecordHandler):
class Number(BaseRecordHandler):
+ def __parseBytes (self):
+ self.row = self.readSignedInt(2)
+ self.col = self.readSignedInt(2)
+ self.xf = self.readSignedInt(2)
+ self.fval = self.readDouble()
+
def parseBytes (self):
- row = globals.getSignedInt(self.bytes[0:2])
- col = globals.getSignedInt(self.bytes[2:4])
- xf = globals.getSignedInt(self.bytes[4:6])
- fval = globals.getDouble(self.bytes[6:14])
- self.appendCellPosition(col, row)
- self.appendLine("XF record ID: %d"%xf)
- self.appendLine("value: %g"%fval)
+ self.appendCellPosition(self.col, self.row)
+ self.appendLine("XF record ID: %d"%self.xf)
+ self.appendLine("value: %g"%self.fval)
class Obj(BaseRecordHandler):
@@ -3320,16 +3343,213 @@ class CTCellContent(BaseRecordHandler):
# -------------------------------------------------------------------
# CH - Chart
+
+class Header(BaseRecordHandler):
+ pass
+
+class Footer(BaseRecordHandler):
+ pass
+
+class HCenter(BaseRecordHandler):
+ def __parseBytes(self):
+ self.val = self.readUnsignedInt(2)
+
+class VCenter(BaseRecordHandler):
+ def __parseBytes(self):
+ self.val = self.readUnsignedInt(2)
+
+class Setup(BaseRecordHandler):
+ pass
+
+class Units(BaseRecordHandler):
+ pass
+
+class Begin(BaseRecordHandler):
+ pass
+
+class PlotArea(BaseRecordHandler):
+ pass
+
+class CrtLink(BaseRecordHandler): # it's unused
+ pass
+
+class End(BaseRecordHandler):
+ pass
+
class Chart(BaseRecordHandler):
+ def __parseBytes(self):
+ self.x = globals.getSignedInt(self.bytes[0:4])
+ self.y = globals.getSignedInt(self.bytes[4:8])
+ self.w = globals.getSignedInt(self.bytes[8:12])
+ self.h = globals.getSignedInt(self.bytes[12:16])
+
+ def parseBytes (self):
+ self.__parseBytes()
+ self.appendLine("position: (x, y) = (%d, %d)"%(self.x, self.y))
+ self.appendLine("size: (width, height) = (%d, %d)"%(self.w, self.h))
+
+class Frame(BaseRecordHandler):
+ __frt_table = {0x0000: "frame surrounding the chart element",
+ 0x0004: "frame with a shadow surrounding the chart element"}
+
+ def __parseBytes(self):
+ self.frt = self.readUnsignedInt(2)
+ flags = self.readUnsignedInt(2)
+ self.autoSize = (flags & 0x001) != 0
+ self.autoPosition = (flags & 0x002) != 0
+
+ def parseBytes (self):
+ self.__parseBytes()
+ self.appendLine("frame type: %s" % ChartFrame.__frt_table[self.frt])
+ self.appendLine("autoSize: %s" % self.autoSize)
+ self.appendLine("autoPosition: %s" % self.autoPosition)
+
+class LineFormat(BaseRecordHandler):
+ def __parseBytes(self):
+ self.rgb = self.readLongRGB()
+ self.lns = self.readUnsignedInt(2)
+ self.we = self.readUnsignedInt(2)
+ flags = self.readUnsignedInt(2)
+ self.auto = (flags & 0x001) != 0 # A
+ unused = (flags & 0x002) != 0 # B (unused)
+ self.axisOn = (flags & 0x004) != 0 # C
+ self.autoCo = (flags & 0x008) != 0 # D
+ self.icv = self.readICV()
+
+ def parseBytes (self):
+ self.__parseBytes()
+ # TODO: dump all data
+
+class AreaFormat(BaseRecordHandler):
+ def __parseBytes(self):
+ self.foreColor = self.readLongRGB()
+ self.backColor = self.readLongRGB()
+ self.fls = self.readUnsignedInt(2)
+ flags = self.readUnsignedInt(2)
+ self.auto = (flags & 0x001) != 0 # A
+ self.invertNeg = (flags & 0x002) != 0 # B
+ self.icvFore = self.readICV()
+ self.icvBack = self.readICV()
+
+ def parseBytes (self):
+ self.__parseBytes()
+ # TODO: dump all data
+
+class DataFormat(BaseRecordHandler):
+ def __parseBytes(self):
+ self.xi = self.readUnsignedInt(2)
+ self.yi = self.readUnsignedInt(2)
+ self.iss = self.readUnsignedInt(2)
+ flags = self.readUnsignedInt(2)
+ unused = (flags & 0x001) != 0 # A (??? - not described in docs)
+
+ def parseBytes (self):
+ self.__parseBytes()
+ # TODO: dump all data
+class ChartFormat(BaseRecordHandler):
+ def __parseBytes(self):
+ reserved1 = self.readUnsignedInt(4)
+ reserved2 = self.readUnsignedInt(4)
+ reserved3 = self.readUnsignedInt(4)
+ reserved4 = self.readUnsignedInt(4)
+ flags = self.readUnsignedInt(2)
+ self.varied = (flags & 0x001) != 0 # A
+ self.icrt = self.readUnsignedInt(2)
+
+ def parseBytes (self):
+ self.__parseBytes()
+ # TODO: dump all data
+
+class Chart3DBarShape(BaseRecordHandler):
+ def __parseBytes(self):
+ self.riser = self.readUnsignedInt(1)
+ self.taper = self.readUnsignedInt(1)
+
def parseBytes (self):
- x = globals.getSignedInt(self.bytes[0:4])
- y = globals.getSignedInt(self.bytes[4:8])
- w = globals.getSignedInt(self.bytes[8:12])
- h = globals.getSignedInt(self.bytes[12:16])
- self.appendLine("position: (x, y) = (%d, %d)"%(x, y))
- self.appendLine("size: (width, height) = (%d, %d)"%(w, h))
+ self.__parseBytes()
+ # TODO: dump all data
+
+class SerToCrt(BaseRecordHandler):
+ def __parseBytes(self):
+ self.id = self.readUnsignedInt(2)
+
+class Pos(BaseRecordHandler):
+ def __parseBytes(self):
+ self.mdTopLt = self.readUnsignedInt(2)
+ self.mdBotRt = self.readUnsignedInt(2)
+ self.x1 = self.readSignedInt(2)
+ unused = self.readUnsignedInt(2)
+ self.y1 = self.readSignedInt(2)
+ unused = self.readUnsignedInt(2)
+ self.x2 = self.readSignedInt(2)
+ unused = self.readUnsignedInt(2)
+ self.y2 = self.readSignedInt(2)
+ unused = self.readUnsignedInt(2)
+
+class FontX(BaseRecordHandler):
+ def __parseBytes(self):
+ self.iFont = self.readUnsignedInt(2)
+
+class AxesUsed(BaseRecordHandler):
+ def __parseBytes(self):
+ self.cAxes = self.readUnsignedInt(2)
+
+class AxisParent(BaseRecordHandler):
+ def __parseBytes(self):
+ self.iax = self.readUnsignedInt(2)
+ # 16 bytes are unused
+
+class AxcExt(BaseRecordHandler):
+ def __parseBytes (self):
+ self.catMin = self.readUnsignedInt(2)
+ self.catMax = self.readUnsignedInt(2)
+ self.catMajor = self.readUnsignedInt(2)
+ self.duMajor = self.readUnsignedInt(2)
+ self.catMinor = self.readUnsignedInt(2)
+ self.duMinor = self.readUnsignedInt(2)
+ self.duBase = self.readUnsignedInt(2)
+ self.catCrossDate = self.readUnsignedInt(2)
+
+ flag = self.readUnsignedInt(2)
+ self.autoMin = (flag & 0x0001) != 0 # A
+ self.autoMax = (flag & 0x0002) != 0 # B
+ self.autoMajor = (flag & 0x0004) != 0 # C
+ self.autoMinor = (flag & 0x0008) != 0 # D
+ self.dateAxis = (flag & 0x0010) != 0 # E
+ self.autoBase = (flag & 0x0020) != 0 # F
+ self.autoCross = (flag & 0x0040) != 0 # G
+ self.autoDate = (flag & 0x0080) != 0 # H
+
+class Tick(BaseRecordHandler):
+ def __parseBytes (self):
+ self.tktMajor = self.readUnsignedInt(1)
+ self.tktMinor = self.readUnsignedInt(1)
+ self.tlt = self.readUnsignedInt(1)
+ self.wBkgMode = self.readUnsignedInt(1)
+ self.rgb = self.readLongRGB()
+ reserved1 = self.readUnsignedInt(4)
+ reserved2 = self.readUnsignedInt(4)
+ reserved3 = self.readUnsignedInt(4)
+ reserved4 = self.readUnsignedInt(4)
+ flag = self.readUnsignedInt(2)
+ # TODO: recheck it
+ self.autoCo = (flag & 0x0001) != 0 # A
+ self.autoMode = (flag & 0x0002) != 0 # B
+ self.rot = (flag & (0x4+0x8+0x10)) >> 2
+ self.readingOrder = (flag >>14)
+ self.icv = self.readICV()
+ self.trot = self.readUnsignedInt(2)
+
+class AxisLine(BaseRecordHandler):
+ def __parseBytes(self):
+ self.id = self.readUnsignedInt(2)
+
+class SIIndex(BaseRecordHandler):
+ def __parseBytes(self):
+ self.numIndex = self.readUnsignedInt(2)
+
class DefaultText(BaseRecordHandler):
__types = [
@@ -3434,71 +3654,74 @@ class Series(BaseRecordHandler):
class CHAxis(BaseRecordHandler):
axisTypeList = ['x-axis', 'y-axis', 'z-axis']
-
+
+ def __parseBytes(self):
+ self.axisType = self.readUnsignedInt(2)
+ self.x = self.readSignedInt(4)
+ self.y = self.readSignedInt(4)
+ self.w = self.readSignedInt(4)
+ self.h = self.readSignedInt(4)
+
def parseBytes (self):
- axisType = self.readUnsignedInt(2)
- x = self.readSignedInt(4)
- y = self.readSignedInt(4)
- w = self.readSignedInt(4)
- h = self.readSignedInt(4)
- if axisType < len(CHAxis.axisTypeList):
- self.appendLine("axis type: %s (%d)"%(CHAxis.axisTypeList[axisType], axisType))
+ self.__parseBytes()
+ if self.axisType < len(CHAxis.axisTypeList):
+ self.appendLine("axis type: %s (%d)"%(CHAxis.axisTypeList[self.axisType], self.axisType))
else:
self.appendLine("axis type: unknown")
- self.appendLine("area: (x, y, w, h) = (%d, %d, %d, %d) [no longer used]"%(x, y, w, h))
+ self.appendLine("area: (x, y, w, h) = (%d, %d, %d, %d) [no longer used]"%(self.x, self.y, self.w, self.h))
class CHProperties(BaseRecordHandler):
-
+ def __parseBytes(self):
+ flags = self.readUnsignedInt(2)
+ self.emptyFlags = self.readUnsignedInt(2)
+ self.manualSeries = (flags & 0x0001) != 0
+ self.showVisCells = (flags & 0x0002) != 0
+ self.noResize = (flags & 0x0004) != 0
+ self.manualPlotArea = (flags & 0x0008) != 0
+
def parseBytes (self):
- flags = globals.getSignedInt(self.bytes[0:2])
- emptyFlags = globals.getSignedInt(self.bytes[2:4])
-
- manualSeries = "false"
- showVisCells = "false"
- noResize = "false"
- manualPlotArea = "false"
-
- if (flags & 0x0001):
- manualSeries = "true"
- if (flags & 0x0002):
- showVisCells = "true"
- if (flags & 0x0004):
- noResize = "true"
- if (flags & 0x0008):
- manualPlotArea = "true"
+ self.__parseBytes()
- self.appendLine("manual series: %s"%manualSeries)
- self.appendLine("show only visible cells: %s"%showVisCells)
- self.appendLine("no resize: %s"%noResize)
- self.appendLine("manual plot area: %s"%manualPlotArea)
+ self.appendLine("manual series: %s" % self.getTrueFalse(self.manualSeries))
+ self.appendLine("show only visible cells: %s" % self.getTrueFalse(self.showVisCells))
+ self.appendLine("no resize: %s"%self.getTrueFalse(self.noResize))
+ self.appendLine("manual plot area: %s" % self.getTrueFalse(self.manualPlotArea))
emptyValues = "skip"
- if emptyFlags == 1:
+ if self.emptyFlags == 1:
emptyValues = "plot as zero"
- elif emptyFlags == 2:
+ elif self.emptyFlags == 2:
emptyValues = "interpolate empty values"
- self.appendLine("empty value treatment: %s"%emptyValues)
-
+ self.appendLine("empty value treatment: %s" % emptyValues)
class CHLabelRange(BaseRecordHandler):
+
+ def __parseBytes (self):
+ self.axisCross = self.readUnsignedInt(2)
+ self.freqLabel = self.readUnsignedInt(2)
+ self.freqTick = self.readUnsignedInt(2)
+ flags = self.readUnsignedInt(2)
+ self.betweenCateg = (flags & 0x0001) != 0
+ self.maxCross = (flags & 0x0002) != 0
+ self.reversed = (flags & 0x0004) != 0
+
def parseBytes (self):
- axisCross = self.readUnsignedInt(2)
- freqLabel = self.readUnsignedInt(2)
- freqTick = self.readUnsignedInt(2)
- self.appendLine("axis crossing: %d"%axisCross)
- self.appendLine("label frequency: %d"%freqLabel)
- self.appendLine("tick frequency: %d"%freqTick)
+ self.appendLine("axis crossing: %d"%self.axisCross)
+ self.appendLine("label frequency: %d"%self.freqLabel)
+ self.appendLine("tick frequency: %d"%self.freqTick)
- flags = self.readUnsignedInt(2)
- betweenCateg = (flags & 0x0001)
- maxCross = (flags & 0x0002)
- reversed = (flags & 0x0004)
- self.appendLineBoolean("axis between categories", betweenCateg)
- self.appendLineBoolean("other axis crosses at maximum", maxCross)
- self.appendLineBoolean("axis reversed", reversed)
+ self.appendLineBoolean("axis between categories", self.betweenCateg)
+ self.appendLineBoolean("other axis crosses at maximum", self.maxCross)
+ self.appendLineBoolean("axis reversed", self.reversed)
+
+ def fillModel(self, model):
+ self.__parseBytes()
+ sh = model.getCurrentSheet()
+ sh.setCatSerRange(self.axisCross, self.freqLabel, self.freqTick,
+ self.betweenCateg, self.maxCross, self.reversed)
class Legend(BaseRecordHandler):
@@ -3518,32 +3741,34 @@ class Legend(BaseRecordHandler):
else:
return '(unknown)'
- def parseBytes (self):
- x = self.readSignedInt(4)
- y = self.readSignedInt(4)
- w = self.readSignedInt(4)
- h = self.readSignedInt(4)
- dockMode = self.readUnsignedInt(1) # [MS-XLS] says unused !?
- spacing = self.readUnsignedInt(1)
+ def __parseBytes (self):
+ self.x = self.readSignedInt(4)
+ self.y = self.readSignedInt(4)
+ self.w = self.readSignedInt(4)
+ self.h = self.readSignedInt(4)
+ self.dockMode = self.readUnsignedInt(1) # [MS-XLS] says unused !?
+ self.spacing = self.readUnsignedInt(1)
flags = self.readUnsignedInt(2)
- docked = (flags & 0x0001)
- autoSeries = (flags & 0x0002)
- autoPosX = (flags & 0x0004)
- autoPosY = (flags & 0x0008)
- stacked = (flags & 0x0010)
- dataTable = (flags & 0x0020)
-
- self.appendLine("legend position: (x, y) = (%d, %d)"%(x,y))
- self.appendLine("legend size: width = %d, height = %d"%(w,h))
- self.appendLine("dock mode: %s"%self.getDockModeText(dockMode))
- self.appendLine("spacing: %s"%self.getSpacingText(spacing))
- self.appendLineBoolean("docked", docked)
- self.appendLineBoolean("auto series", autoSeries)
- self.appendLineBoolean("auto position x", autoPosX)
- self.appendLineBoolean("auto position y", autoPosY)
- self.appendLineBoolean("stacked", stacked)
- self.appendLineBoolean("data table", dataTable)
+ self.docked = (flags & 0x0001) != 0
+ self.autoSeries = (flags & 0x0002) != 0
+ self.autoPosX = (flags & 0x0004) != 0
+ self.autoPosY = (flags & 0x0008) != 0
+ self.stacked = (flags & 0x0010) != 0
+ self.dataTable = (flags & 0x0020) != 0
+
+ def parseBytes (self):
+ self.__parseBytes()
+ self.appendLine("legend position: (x, y) = (%d, %d)"%(self.x, self.y))
+ self.appendLine("legend size: width = %d, height = %d"%(self.w, self.h))
+ self.appendLine("dock mode: %s"%self.getDockModeText(self.dockMode))
+ self.appendLine("spacing: %s"%self.getSpacingText(self.spacing))
+ self.appendLineBoolean("docked", self.docked)
+ self.appendLineBoolean("auto series", self.autoSeries)
+ self.appendLineBoolean("auto position x", self.autoPosX)
+ self.appendLineBoolean("auto position y", self.autoPosY)
+ self.appendLineBoolean("stacked", self.stacked)
+ self.appendLineBoolean("data table", self.dataTable)
self.appendLine("")
self.appendMultiLine("NOTE: Position and size are in units of 1/4000 of chart's width or height.")
@@ -3551,54 +3776,62 @@ class Legend(BaseRecordHandler):
class CHValueRange(BaseRecordHandler):
- def parseBytes (self):
- minVal = globals.getDouble(self.readBytes(8))
- maxVal = globals.getDouble(self.readBytes(8))
- majorStep = globals.getDouble(self.readBytes(8))
- minorStep = globals.getDouble(self.readBytes(8))
- cross = globals.getDouble(self.readBytes(8))
+ def __parseBytes (self):
+ self.minVal = globals.getDouble(self.readBytes(8))
+ self.maxVal = globals.getDouble(self.readBytes(8))
+ self.majorStep = globals.getDouble(self.readBytes(8))
+ self.minorStep = globals.getDouble(self.readBytes(8))
+ self.cross = globals.getDouble(self.readBytes(8))
flags = globals.getSignedInt(self.readBytes(2))
- autoMin = (flags & 0x0001)
- autoMax = (flags & 0x0002)
- autoMajor = (flags & 0x0004)
- autoMinor = (flags & 0x0008)
- autoCross = (flags & 0x0010)
- logScale = (flags & 0x0020)
- reversed = (flags & 0x0040)
- maxCross = (flags & 0x0080)
- bit8 = (flags & 0x0100)
-
- self.appendLine("min: %g (auto min: %s)"%(minVal, self.getYesNo(autoMin)))
- self.appendLine("max: %g (auto max: %s)"%(maxVal, self.getYesNo(autoMax)))
+ self.autoMin = (flags & 0x0001) != 0
+ self.autoMax = (flags & 0x0002) != 0
+ self.autoMajor = (flags & 0x0004) != 0
+ self.autoMinor = (flags & 0x0008) != 0
+ self.autoCross = (flags & 0x0010) != 0
+ self.logScale = (flags & 0x0020) != 0
+ self.reversed = (flags & 0x0040) != 0
+ self.maxCross = (flags & 0x0080) != 0
+ self.bit8 = (flags & 0x0100) != 0
+
+ def parseBytes (self):
+
+ self.appendLine("min: %g (auto min: %s)"%(self.minVal, self.getYesNo(self.autoMin)))
+ self.appendLine("max: %g (auto max: %s)"%(self.maxVal, self.getYesNo(self.autoMax)))
self.appendLine("major step: %g (auto major: %s)"%
- (majorStep, self.getYesNo(autoMajor)))
+ (self.majorStep, self.getYesNo(self.autoMajor)))
self.appendLine("minor step: %g (auto minor: %s)"%
- (minorStep, self.getYesNo(autoMinor)))
+ (self.minorStep, self.getYesNo(self.autoMinor)))
self.appendLine("cross: %g (auto cross: %s) (max cross: %s)"%
- (cross, self.getYesNo(autoCross), self.getYesNo(maxCross)))
- self.appendLine("biff5 or above: %s"%self.getYesNo(bit8))
+ (self.cross, self.getYesNo(self.autoCross), self.getYesNo(self.maxCross)))
+ self.appendLine("biff5 or above: %s"%self.getYesNo(self.bit8))
class CHBar(BaseRecordHandler):
- def parseBytes (self):
- overlap = globals.getSignedInt(self.readBytes(2))
- gap = globals.getSignedInt(self.readBytes(2))
+ def __parseBytes (self):
+ self.overlap = globals.getSignedInt(self.readBytes(2))
+ self.gap = globals.getSignedInt(self.readBytes(2))
flags = globals.getUnsignedInt(self.readBytes(2))
- horizontal = (flags & 0x0001)
- stacked = (flags & 0x0002)
- percent = (flags & 0x0004)
- shadow = (flags & 0x0008)
-
- self.appendLine("overlap width: %d"%overlap)
- self.appendLine("gap: %d"%gap)
- self.appendLine("horizontal: %s"%self.getYesNo(horizontal))
- self.appendLine("stacked: %s"%self.getYesNo(stacked))
- self.appendLine("percent: %s"%self.getYesNo(percent))
- self.appendLine("shadow: %s"%self.getYesNo(shadow))
-
+ self.horizontal = (flags & 0x0001) != 0
+ self.stacked = (flags & 0x0002) != 0
+ self.percent = (flags & 0x0004) != 0
+ self.shadow = (flags & 0x0008) != 0
+
+ def parseBytes (self):
+ self.__parseBytes()
+ self.appendLine("overlap width: %d"%self.overlap)
+ self.appendLine("gap: %d"%self.gap)
+ self.appendLine("horizontal: %s"%self.getYesNo(self.horizontal))
+ self.appendLine("stacked: %s"%self.getYesNo(self.stacked))
+ self.appendLine("percent: %s"%self.getYesNo(self.percent))
+ self.appendLine("shadow: %s"%self.getYesNo(self.shadow))
+
+ def fillModel(self, model):
+ self.__parseBytes()
+ sh = model.getCurrentSheet()
+ sh.setBar(self.overlap, self.gap, self.horizontal, self.stacked, self.percent, self.shadow)
class CHLine(BaseRecordHandler):
@@ -3634,6 +3867,16 @@ class Brai(BaseRecordHandler):
self.iFmt = self.readUnsignedInt(2)
tokenBytes = self.readUnsignedInt(2)
self.formulaBytes = self.readBytes(tokenBytes)
+ self.formula = None
+ self.formulaError = None
+ if len(self.formulaBytes) > 0:
+ parser = formula.FormulaParser(self.header, self.formulaBytes)
+ try:
+ parser.parse()
+ self.formula = parser.getText()
+ except formula.FormulaParserError as e:
+ self.formulaError = e.args[0]
+
def parseBytes (self):
self.__parseBytes()
@@ -3644,17 +3887,15 @@ class Brai(BaseRecordHandler):
s += "custom format"
else:
s += "source data format"
- self.appendLine(s)
+ self.appendLine(s)
self.appendLine("number format ID: %d"%self.iFmt)
self.appendLine("formula size (bytes): %d"%len(self.formulaBytes))
- if len(self.formulaBytes) > 0:
- parser = formula.FormulaParser(self.header, self.formulaBytes)
- try:
- parser.parse()
- self.appendLine("formula: %s"%parser.getText())
- except formula.FormulaParserError as e:
- self.appendLine("formula parser error: %s"%e.args[0])
+
+ if not self.formula is None:
+ self.appendLine("formula: %s"%self.formula)
+ else:
+ self.appendLine("formula parser error: %s"%self.formulaError)
class MSODrawing(BaseRecordHandler):
"""Handler for the MSODRAWING record
diff --git a/src/xlsstream.py b/src/xlsstream.py
index eea243a..9b82608 100644
--- a/src/xlsstream.py
+++ b/src/xlsstream.py
@@ -37,7 +37,7 @@ unusedRecDesc = "[unused, must be ignored]"
recData = {
0x0006: ["FORMULA", "Cell Formula", xlsrecord.Formula],
- 0x000A: ["EOF", "End of File"],
+ 0x000A: ["EOF", "End of File", xlsrecord.EOF],
0x000C: ["CALCCOUNT", "Iteration Count"],
0x000D: ["CALCMODE", "Calculation Mode"],
0x000E: ["PRECISION", "Precision"],
@@ -46,8 +46,8 @@ recData = {
0x0011: ["ITERATION", "Iteration Mode"],
0x0012: ["PROTECT", "Protection Flag", xlsrecord.Protect],
0x0013: ["PASSWORD", "Protection Password"],
- 0x0014: ["HEADER", "Print Header on Each Page"],
- 0x0015: ["FOOTER", "Print Footer on Each Page"],
+ 0x0014: ["HEADER", "Print Header on Each Page", xlsrecord.Header],
+ 0x0015: ["FOOTER", "Print Footer on Each Page", xlsrecord.Footer],
0x0016: ["EXTERNCOUNT", "Number of External References"],
0x0017: ["EXTERNSHEET", "External Reference", xlsrecord.ExternSheet],
0x0018: ["NAME", "Internal Defined Name", xlsrecord.Name],
@@ -92,8 +92,8 @@ recData = {
0x0080: ["GUTS", "Size of Row and Column Gutters"],
0x0081: ["WSBOOL", "Additional Workspace Information"],
0x0082: ["GRIDSET", "State Change of Gridlines Option"],
- 0x0083: ["HCENTER", "Center Between Horizontal Margins"],
- 0x0084: ["VCENTER", "Center Between Vertical Margins"],
+ 0x0083: ["HCENTER", "Center Between Horizontal Margins", xlsrecord.HCenter],
+ 0x0084: ["VCENTER", "Center Between Vertical Margins", xlsrecord.VCenter],
0x0085: ["BOUNDSHEET", "Sheet Information", xlsrecord.BoundSheet],
0x0086: ["WRITEPROT", "Workbook Is Write-Protected"],
0x0087: ["ADDIN", "Workbook Is an Add-in Macro"],
@@ -115,7 +115,7 @@ recData = {
0x009D: ["AUTOFILTERINFO", "Drop-Down Arrow Count", xlsrecord.AutofilterInfo],
0x009E: ["AUTOFILTER", "AutoFilter Data", xlsrecord.Autofilter],
0x00A0: ["SCL", "Window Zoom Magnification", xlsrecord.Scl],
- 0x00A1: ["SETUP", "Page Setup"],
+ 0x00A1: ["SETUP", "Page Setup", xlsrecord.Setup],
0x00A9: ["COORDLIST", "Polygon Object Vertex Coordinates"],
0x00AB: ["GCW", "Global Column-Width Flags"],
0x00AE: ["SCENMAN", "Scenario Output Data"],
@@ -238,17 +238,17 @@ recData = {
0x089C: ["HEADERFOOTER", "Header Footer"],
0x089B: ["COMPRESSPICTURES", "Automatic Picture Compression Mode"],
0x08A3: ["FORCEFULLCALCULATION", "Force Full Calculation Mode"],
- 0x1001: ["UNITS", unusedRecDesc],
+ 0x1001: ["UNITS", unusedRecDesc, xlsrecord.Units],
0x1002: ["CHART", "Position And Size of Chart Area", xlsrecord.Chart],
0x1003: ["SERIES", "Data Properties for Series, Trendlines or Error Bars", xlsrecord.Series],
- 0x1006: ["CHDATAFORMAT", "?"],
- 0x1007: ["LINEFORMAT", "Appearance of A Line"],
+ 0x1006: ["CHDATAFORMAT", "Data point or series that the formatting information that follows applies to (2.4.74)", xlsrecord.DataFormat],
+ 0x1007: ["LINEFORMAT", "Appearance of A Line", xlsrecord.LineFormat],
0x1009: ["CHMARKERFORMAT", "?"],
- 0x100A: ["AREAFORMAT", "Patterns and Colors in Filled Region of Chart"],
+ 0x100A: ["AREAFORMAT", "Patterns and Colors in Filled Region of Chart", xlsrecord.AreaFormat],
0x100B: ["CHPIEFORMAT", "?"],
0x100C: ["CHATTACHEDLABEL", "?"],
0x100D: ["SERIESTEXT", "Series Category Name or Title Text in Chart", xlsrecord.SeriesText],
- 0x1014: ["CHTYPEGROUP", "?"],
+ 0x1014: ["CHTYPEGROUP", "Properties of a chart group", xlsrecord.ChartFormat],
0x1015: ["LEGEND", "Legend Properties", xlsrecord.Legend],
0x1017: ["CHBAR, CHCOLUMN", "?", xlsrecord.CHBar],
0x1018: ["CHLINE", "?", xlsrecord.CHLine],
@@ -257,43 +257,44 @@ recData = {
0x101B: ["CHSCATTER", "?"],
0x001C: ["CHCHARTLINE", "?"],
0x101D: ["CHAXIS", "Chart Axis", xlsrecord.CHAxis],
- 0x101E: ["CHTICK", "?"],
+ 0x101E: ["CHTICK", "Attributes of the axis labels and tick marks", xlsrecord.Tick],
0x101F: ["CHVALUERANGE", "Chart Axis Value Range", xlsrecord.CHValueRange],
0x1020: ["CHLABELRANGE", "Chart Axis Label Range", xlsrecord.CHLabelRange],
- 0x1021: ["CHAXISLINE", "?"],
- 0x1022: ["CRTLINK", unusedRecDesc],
+ 0x1021: ["CHAXISLINE", "Specifies which part of the axis is specified by the LineFormat record that follows(2.4.12)", xlsrecord.AxisLine],
+ 0x1022: ["CRTLINK", unusedRecDesc, xlsrecord.CrtLink],
0x1024: ["DEFAULTTEXT", "Default Text", xlsrecord.DefaultText],
0x1025: ["TEXT", "Label Properties", xlsrecord.Text],
- 0x1026: ["CHFONT", "?"],
+ 0x1026: ["CHFONT", "Font for a given text element", xlsrecord.FontX],
0x1027: ["CHOBJECTLINK", "?"],
- 0x1032: ["FRAME", "Type, Size and Position of the Frame around A Chart"],
- 0x1033: ["BEGIN", "Start of Chart Sheet Substream"],
- 0x1034: ["END", "End of Chart Sheet Substream"],
- 0x1035: ["CHPLOTFRAME", "Chart Plot Frame"],
+ 0x1032: ["FRAME", "Type, Size and Position of the Frame around A Chart", xlsrecord.Frame],
+ 0x1033: ["BEGIN", "Start of Chart Sheet Substream", xlsrecord.Begin],
+ 0x1034: ["END", "End of Chart Sheet Substream", xlsrecord.End],
+ 0x1035: ["CHPLOTFRAME", "Chart Plot Frame (indicates the frame that follows)", xlsrecord.PlotArea],
0x103A: ["CHCHART3D", "?"],
0x103C: ["CHPICFORMAT", "?"],
0x103D: ["CHDROPBAR", "?"],
0x103E: ["CHRADARLINE", "?"],
0x103F: ["CHSURFACE", "?"],
0x1040: ["CHRADARAREA", "?"],
- 0x1041: ["CHAXESSET", "?"],
- 0x1044: ["CHPROPERTIES", "?", xlsrecord.CHProperties],
- 0x1045: ["CHSERGROUP", "?"],
+ 0x1041: ["CHAXESSET", "Properties of an axis group", xlsrecord.AxisParent],
+ 0x1044: ["CHPROPERTIES", "Properties of a chart(2.4.261)", xlsrecord.CHProperties],
+ 0x1045: ["CHSERGROUP", "Chart group for the current series(2.4.256)", xlsrecord.SerToCrt],
+ 0x1046: ["AXESUSED", "Number of axis groups on the chart(2.4.10)", xlsrecord.AxesUsed],
0x1048: ["CHPIVOTREF", "?"],
0x104A: ["CHSERPARENT", "?"],
0x104B: ["CHSERTRENDLINE", "?"],
0x104E: ["CHFORMAT", "?"],
- 0x104F: ["CHFRAMEPOS", "?"],
+ 0x104F: ["CHFRAMEPOS", "Size and position for a legend, an attached label, or the plot area(2.4.201)", xlsrecord.Pos],
0x1050: ["CHFORMATRUNS", "?"],
0x1051: ["BRAI", "Data Source of A Chart", xlsrecord.Brai],
0x105B: ["CHSERERRORBAR", "?"],
0x105D: ["CHSERIESFORMAT", "?"],
- 0x105F: ["CH3DDATAFORMAT", "?"],
+ 0x105F: ["CH3DDATAFORMAT", "Shape of the data points(2.4.47)", xlsrecord.Chart3DBarShape],
0x1060: ["FBI", "Font Information for Chart", xlsrecord.Fbi],
0x1061: ["CHPIEEXT", "?"],
- 0x1062: ["CHLABELRANGE2", "?"],
+ 0x1062: ["AXCEXT", "Additional extension properties of a date axis(2.4.9)", xlsrecord.AxcExt],
0x1064: ["PLOTGROWTH", "Font Scaling Information in the Plot Area", xlsrecord.PlotGrowth],
- 0x1065: ["CHSIINDEX*", "?"],
+ 0x1065: ["CHSIINDEX*", "Part of a group of records which specify the data of a chart", xlsrecord.SIIndex],
0x1066: ["CHESCHERFORMAT", "?"]
}
@@ -475,19 +476,15 @@ class XLDirStream(object):
self.strmData.encrypted = True
def fillModel (self, model):
- pos, header, size, bytes = self.__readRecordBytes()
- handler = self.__getRecordHandler(header, size, bytes)
+ handler = self.getNextRecordHandler()
if handler != None:
handler.fillModel(model)
self.__postReadRecord(header)
- def readRecordXML (self):
+ def getNextRecordHandler (self):
pos, header, size, bytes = self.__readRecordBytes()
- handler = self.__getRecordHandler(header, size, bytes)
- print (recData[header][1])
- self.__postReadRecord(header)
- return header
+ return self.__getRecordHandler(header, size, bytes)
def readRecord (self):
pos, header, size, bytes = self.__readRecordBytes()
diff --git a/xls-dump.py b/xls-dump.py
index 00d5c4a..0abd70f 100755
--- a/xls-dump.py
+++ b/xls-dump.py
@@ -29,6 +29,7 @@
import sys, os.path, optparse
sys.path.append(sys.path[0]+"/src")
import ole, xlsstream, globals, node, xlsmodel, olestream
+import xlsparser
from globals import error
@@ -82,13 +83,17 @@ class XLDumper(object):
def dumpXML (self):
self.__parseFile()
dirs = self.strm.getDirectoryEntries()
+ docroot = node.Root()
+ root = docroot.appendElement('xls-dump')
+
for d in dirs:
if d.Name != "Workbook":
# for now, we only dump the Workbook directory stream.
continue
dirstrm = self.strm.getDirectoryStream(d)
- self.__readSubStreamXML(dirstrm)
+ data = self.__readSubStreamXML(dirstrm)
+ self.__dumpDataAsXML(data, root)
def dumpCanonicalXML (self):
self.__parseFile()
@@ -167,12 +172,19 @@ class XLDumper(object):
except olestream.CompObjStreamError:
globals.error("failed to parse CompObj stream.\n")
+ def __dumpDataAsXML(self, data, root):
+ print data
+
def __readSubStreamXML (self, strm):
+ handlers = []
try:
while True:
- strm.readRecordXML()
+ handler = strm.getNextRecordHandler()
+ handlers.append(handler)
except xlsstream.EndOfStream:
pass
+ parser = xlsparser.XlsParser(handlers)
+ return parser.dumpData()
def __buildWorkbookModel (self, strm):
model = xlsmodel.Workbook()
commit 74a24e29cdc0329c9e96dad87d8f122c81c459eb
Author: Sergey Kishchenko <voidwrk at gmail.com>
Date: Mon Sep 12 10:06:32 2011 +0300
xls-dump canonical xml dump fix
diff --git a/src/xlsmodel.py b/src/xlsmodel.py
index f47a088..5c10cad 100644
--- a/src/xlsmodel.py
+++ b/src/xlsmodel.py
@@ -50,17 +50,25 @@ class Workbook(ModelBase):
# private members
self.__sheets = []
- def appendSheet (self):
- n = len(self.__sheets)
- if n == 0:
- self.__sheets.append(WorkbookGlobal())
- else:
- self.__sheets.append(Worksheet(n-1))
+
+ def appendSheet (self, sheetType):
+ def raiseError(cause):
+ def errorFunc():
+ raise Exception(cause)
+
+ HANDLERS = { 0x0005: WorkbookGlobal,
+ 0x0006: raiseError("Unsupported sheet type: Visual Basic module"),
+ 0x0010: lambda: Worksheet(len(self.__sheets)),
+ 0x0020: Chart,
+ 0x0040: raiseError("Unsupported sheet type: Excel 4.0 macro sheet"),
+ 0x0100: raiseError("Unsupported sheet type: Workspace file")
+ }
... etc. - the rest is truncated
More information about the Libreoffice-commits
mailing list