[Libreoffice-commits] mso-dumper.git: 3 commits - msodumper/docrecord.py test/doc
Miklos Vajna
vmiklos at collabora.co.uk
Tue Nov 4 03:07:59 PST 2014
msodumper/docrecord.py | 158 ++++++++++++++++++++++++++++++++++++++++++++++---
test/doc/formtext.doc |binary
test/doc/test.py | 9 ++
3 files changed, 159 insertions(+), 8 deletions(-)
New commits:
commit 9a72febc7f4162e55ba7e542ea51f35eb1210824
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Tue Nov 4 12:00:33 2014 +0100
dump FFData
diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py
index 9ad1056..1586c48 100644
--- a/msodumper/docrecord.py
+++ b/msodumper/docrecord.py
@@ -1045,6 +1045,91 @@ class PICF(DOCDirStream):
print '</picf>'
+IType = {
+ 0: "iTypeText",
+ 1: "iTypeChck",
+ 2: "iTypeDrop"
+}
+
+
+ITypeTxt = {
+ 0: "iTypeTxtReg",
+ 1: "iTypeTxtNum",
+ 2: "iTypeTxtDate",
+ 3: "iTypeTxtCurDate",
+ 4: "iTypeTxtCurTime",
+ 5: "iTypeTxtCalc"
+}
+
+
+class FFDataBits(DOCDirStream):
+ """The FFDataBits structure specifies the type and properties for a form
+ field that is specified by a FFData."""
+ def __init__(self, parent):
+ DOCDirStream.__init__(self, parent.bytes)
+ self.pos = parent.pos
+ self.parent = parent
+
+ def dump(self):
+ print '<FFDataBits>'
+ buf = self.readuInt8()
+ self.printAndSet("iType", buf & 0x0003, dict=IType) # 1..2nd bits
+ self.printAndSet("iRes", buf & 0x007c) # 3..7th bits
+ self.printAndSet("fOwnHelp", self.getBit(buf, 8))
+ buf = self.readuInt8()
+ self.printAndSet("fOwnStat", self.getBit(buf, 1))
+ self.printAndSet("fProt", self.getBit(buf, 2))
+ self.printAndSet("iSize", self.getBit(buf, 3))
+ self.printAndSet("iTypeTxt", buf & 0x0038, dict=ITypeTxt) # 4..6th bits
+ self.printAndSet("fRecalc", self.getBit(buf, 7))
+ self.printAndSet("fHasListBox", self.getBit(buf, 8))
+ print '</FFDataBits>'
+ self.parent.pos = self.pos
+
+
+class FFData(DOCDirStream):
+ """The FFData structure specifies form field data for a text box, check
+ box, or drop-down list box. (Page 348 of [MS-DOC] spec.)"""
+ def __init__(self, parent):
+ DOCDirStream.__init__(self, parent.bytes)
+ self.pos = parent.pos
+ self.parent = parent
+
+ def dump(self):
+ print '<FFData>'
+ self.printAndSet("version", self.readuInt32())
+ self.bits = FFDataBits(self)
+ self.bits.dump()
+ self.printAndSet("cch", self.readuInt16())
+ self.printAndSet("hps", self.readuInt16())
+ xstzName = Xstz(self, "xstzName")
+ xstzName.dump()
+ self.pos = xstzName.pos
+ xstzTextDef = Xstz(self, "xstzTextDef")
+ xstzTextDef.dump()
+ self.pos = xstzTextDef.pos
+ if self.bits.iType == 1 or self.bits.iType == 2: # iTypeChck or iTypeDrop
+ self.printAndSet("wDef", self.readuInt16())
+ xstzTextFormat = Xstz(self, "xstzTextFormat")
+ xstzTextFormat.dump()
+ self.pos = xstzTextFormat.pos
+ xstzHelpText = Xstz(self, "xstzHelpText")
+ xstzHelpText.dump()
+ self.pos = xstzHelpText.pos
+ xstzStatText = Xstz(self, "xstzStatText")
+ xstzStatText.dump()
+ self.pos = xstzStatText.pos
+ xstzEntryMcr = Xstz(self, "xstzEntryMcr")
+ xstzEntryMcr.dump()
+ self.pos = xstzEntryMcr.pos
+ xstzExitMcr = Xstz(self, "xstzExitMcr")
+ xstzExitMcr.dump()
+ self.pos = xstzExitMcr.pos
+ if self.bits.iType == 2: # iTypeDrop
+ print '<todo what="FFData::dump(): handle hsttbDropList for iTypeDrop"/>'
+ print '</FFData>'
+
+
class NilPICFAndBinData(DOCDirStream):
"""The NilPICFAndBinData structure that holds header information and binary
data for a hyperlink, form field, or add-in field. The NilPICFAndBinData
@@ -1079,7 +1164,7 @@ class NilPICFAndBinData(DOCDirStream):
self.printAndSet("ignored15", self.readInt16())
fieldType = chpxFkp.transformeds[-2]
if fieldType == " FORMTEXT ":
- print '<todo what="NilPICFAndBinData::dump(): FORMTEXT"/>'
+ FFData(self).dump()
else:
print '<todo what="NilPICFAndBinData::dump(): handle %s"/>' % fieldType
print '</NilPICFAndBinData>'
@@ -3137,17 +3222,18 @@ class Xst(DOCDirStream):
class Xstz(DOCDirStream):
"""The Xstz structure is a string. The string is prepended by its length and is null-terminated."""
- def __init__(self, parent):
+ def __init__(self, parent, name="xstz"):
DOCDirStream.__init__(self, parent.bytes)
self.pos = parent.pos
+ self.name = name
def dump(self):
- print '<xstz type="Xstz" offset="%d">' % self.pos
+ print '<%s type="Xstz" offset="%d">' % (self.name, self.pos)
xst = Xst(self)
xst.dump()
self.pos = xst.pos
self.printAndSet("chTerm", self.readuInt16())
- print '</xstz>'
+ print '</%s>' % self.name
class UpxPapx(DOCDirStream):
diff --git a/test/doc/formtext.doc b/test/doc/formtext.doc
new file mode 100644
index 0000000..5f92c3e
Binary files /dev/null and b/test/doc/formtext.doc differ
diff --git a/test/doc/test.py b/test/doc/test.py
index e7c42b2..c63bc11 100755
--- a/test/doc/test.py
+++ b/test/doc/test.py
@@ -227,6 +227,15 @@ class Test(unittest.TestCase):
actual = self.root.findall(xpath)[0].attrib['value']
self.assertEqual(expected, actual)
+ def test_formtext(self):
+ self.dump('formtext')
+
+ # make sure we find that the max length is 5 chars
+ expected = "0x5"
+ xpath = 'stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfBteChpx/plcBteChpx/aFC/aPnBteChpx/chpxFkp/rgfc/chpx/prl/sprm/NilPICFAndBinData/FFData/cch'
+ actual = self.root.findall(xpath)[0].attrib['value']
+ self.assertEqual(expected, actual)
+
if __name__ == '__main__':
unittest.main()
commit 3701f745d7f3397f4166110a3a743ec7e25dba80
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Tue Nov 4 11:19:45 2014 +0100
let NilPICFAndBinData know its field type
diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py
index 71e84f3..9ad1056 100644
--- a/msodumper/docrecord.py
+++ b/msodumper/docrecord.py
@@ -1057,7 +1057,31 @@ class NilPICFAndBinData(DOCDirStream):
def dump(self):
print '<NilPICFAndBinData>'
- print '<todo what="NilPICFAndBinData::dump()"/>'
+ # self -> sprm -> prl -> chpx -> chpxFkp
+ chpxFkp = self.parent.parent.parent.parent
+ self.printAndSet("lcb", self.readInt32())
+ self.printAndSet("cbHeader", self.readInt16())
+ self.printAndSet("ignored0", self.readInt32())
+ self.printAndSet("ignored1", self.readInt32())
+ self.printAndSet("ignored2", self.readInt32())
+ self.printAndSet("ignored3", self.readInt32())
+ self.printAndSet("ignored4", self.readInt32())
+ self.printAndSet("ignored5", self.readInt32())
+ self.printAndSet("ignored6", self.readInt32())
+ self.printAndSet("ignored7", self.readInt32())
+ self.printAndSet("ignored8", self.readInt32())
+ self.printAndSet("ignored9", self.readInt32())
+ self.printAndSet("ignored10", self.readInt32())
+ self.printAndSet("ignored11", self.readInt32())
+ self.printAndSet("ignored12", self.readInt32())
+ self.printAndSet("ignored13", self.readInt32())
+ self.printAndSet("ignored14", self.readInt32())
+ self.printAndSet("ignored15", self.readInt16())
+ fieldType = chpxFkp.transformeds[-2]
+ if fieldType == " FORMTEXT ":
+ print '<todo what="NilPICFAndBinData::dump(): FORMTEXT"/>'
+ else:
+ print '<todo what="NilPICFAndBinData::dump(): handle %s"/>' % fieldType
print '</NilPICFAndBinData>'
@@ -1461,8 +1485,9 @@ class GrpPrlAndIstd(DOCDirStream):
class Chpx(DOCDirStream):
"""The Chpx structure specifies a set of properties for text."""
- def __init__(self, bytes, mainStream, offset, transformed=None):
- DOCDirStream.__init__(self, bytes, mainStream=mainStream)
+ def __init__(self, parent, mainStream, offset, transformed=None):
+ DOCDirStream.__init__(self, parent.bytes, mainStream=mainStream)
+ self.parent = parent
self.pos = offset
self.transformed = transformed
@@ -1531,6 +1556,7 @@ class ChpxFkp(DOCDirStream):
print '<chpxFkp type="ChpxFkp" offset="%d" size="%d bytes">' % (self.pos, self.size)
self.crun = self.getuInt8(pos=self.pos + self.size - 1)
pos = self.pos
+ self.transformeds = []
for i in range(self.crun):
# rgfc
start = self.getuInt32(pos=pos)
@@ -1538,12 +1564,13 @@ class ChpxFkp(DOCDirStream):
print '<rgfc index="%d" start="%d" end="%d">' % (i, start, end)
self.transformed = self.quoteAttr(self.pnFkpChpx.mainStream.retrieveOffset(start, end))
print '<transformed value="%s"/>' % self.transformed
+ self.transformeds.append(self.transformed)
pos += 4
# rgbx
offset = PLC.getPLCOffset(self.pos, self.crun, 1, i)
chpxOffset = self.getuInt8(pos=offset) * 2
- chpx = Chpx(self.bytes, self.mainStream, self.pos + chpxOffset, self.transformed)
+ chpx = Chpx(self, self.mainStream, self.pos + chpxOffset, self.transformed)
chpx.dump()
print '</rgfc>'
commit 867133098167634e2a69362cd1a491ffb00be612
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Tue Nov 4 10:54:59 2014 +0100
doc: NilPICFAndBinData skeleton
diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py
index 66f5653..71e84f3 100644
--- a/msodumper/docrecord.py
+++ b/msodumper/docrecord.py
@@ -1045,6 +1045,22 @@ class PICF(DOCDirStream):
print '</picf>'
+class NilPICFAndBinData(DOCDirStream):
+ """The NilPICFAndBinData structure that holds header information and binary
+ data for a hyperlink, form field, or add-in field. The NilPICFAndBinData
+ structure MUST be stored in the Data Stream."""
+ def __init__(self, parent):
+ dataStream = parent.mainStream.doc.getDirectoryStreamByName("Data")
+ DOCDirStream.__init__(self, dataStream.bytes)
+ self.pos = parent.operand
+ self.parent = parent
+
+ def dump(self):
+ print '<NilPICFAndBinData>'
+ print '<todo what="NilPICFAndBinData::dump()"/>'
+ print '</NilPICFAndBinData>'
+
+
class PICFAndOfficeArtData(DOCDirStream):
"""The PICFAndOfficeArtData structure specifies header information and
binary data for a picture."""
@@ -1268,6 +1284,7 @@ class Sprm(DOCDirStream):
def __init__(self, parent, mainStream=None, transformed=None):
DOCDirStream.__init__(self, parent.bytes, mainStream=mainStream)
self.parent = parent
+ self.transformed = transformed
self.pos = parent.pos
self.operandSizeMap = {
0: 1,
@@ -1298,8 +1315,9 @@ class Sprm(DOCDirStream):
self.operand = self.getuInt24()
elif self.getOperandSize() == 4:
self.operand = self.getuInt32()
- if self.sprm == 0x6a03 and transformed == r"\x01":
- self.ct = PICFAndOfficeArtData(self)
+ if self.sprm == 0x6a03 and transformed == r"\x01": # sprmCPicLocation
+ # Can't decide right now, depends on if there will be an sprmCFData later or not.
+ self.ct = True
elif self.sprm == 0x6646: # sprmPHugePapx
dataStream = mainStream.doc.getDirectoryStreamByName("Data")
dataStream.pos = self.operand
@@ -1368,6 +1386,17 @@ class Sprm(DOCDirStream):
attrs.append('operand="%s"' % hex(self.operand))
print '<sprm %s%s>' % (" ".join(attrs), {True: "/", False: ""}[close])
if self.ct:
+ if type(self.ct) == bool:
+ if self.sprm == 0x6a03 and self.transformed == r"\x01":
+ haveCFData = False
+ for prl in self.parent.parent.prls:
+ if prl.sprm.sprm == 0x0806: # sprmCFData
+ haveCFData = True
+ break
+ if haveCFData:
+ self.ct = NilPICFAndBinData(self)
+ else:
+ self.ct = PICFAndOfficeArtData(self)
self.ct.dump()
print '</sprm>'
More information about the Libreoffice-commits
mailing list