[Libreoffice-commits] mso-dumper.git: 3 commits - msodumper/docrecord.py test/doc

Miklos Vajna vmiklos at collabora.co.uk
Tue Nov 4 03:07:59 PST 2014


 msodumper/docrecord.py |  158 ++++++++++++++++++++++++++++++++++++++++++++++---
 test/doc/formtext.doc  |binary
 test/doc/test.py       |    9 ++
 3 files changed, 159 insertions(+), 8 deletions(-)

New commits:
commit 9a72febc7f4162e55ba7e542ea51f35eb1210824
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Tue Nov 4 12:00:33 2014 +0100

    dump FFData

diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py
index 9ad1056..1586c48 100644
--- a/msodumper/docrecord.py
+++ b/msodumper/docrecord.py
@@ -1045,6 +1045,91 @@ class PICF(DOCDirStream):
         print '</picf>'
 
 
+IType = {
+    0: "iTypeText",
+    1: "iTypeChck",
+    2: "iTypeDrop"
+}
+
+
+ITypeTxt = {
+    0: "iTypeTxtReg",
+    1: "iTypeTxtNum",
+    2: "iTypeTxtDate",
+    3: "iTypeTxtCurDate",
+    4: "iTypeTxtCurTime",
+    5: "iTypeTxtCalc"
+}
+
+
+class FFDataBits(DOCDirStream):
+    """The FFDataBits structure specifies the type and properties for a form
+    field that is specified by a FFData."""
+    def __init__(self, parent):
+        DOCDirStream.__init__(self, parent.bytes)
+        self.pos = parent.pos
+        self.parent = parent
+
+    def dump(self):
+        print '<FFDataBits>'
+        buf = self.readuInt8()
+        self.printAndSet("iType", buf & 0x0003, dict=IType)  # 1..2nd bits
+        self.printAndSet("iRes", buf & 0x007c)  # 3..7th bits
+        self.printAndSet("fOwnHelp", self.getBit(buf, 8))
+        buf = self.readuInt8()
+        self.printAndSet("fOwnStat", self.getBit(buf, 1))
+        self.printAndSet("fProt", self.getBit(buf, 2))
+        self.printAndSet("iSize", self.getBit(buf, 3))
+        self.printAndSet("iTypeTxt", buf & 0x0038, dict=ITypeTxt)  # 4..6th bits
+        self.printAndSet("fRecalc", self.getBit(buf, 7))
+        self.printAndSet("fHasListBox", self.getBit(buf, 8))
+        print '</FFDataBits>'
+        self.parent.pos = self.pos
+
+
+class FFData(DOCDirStream):
+    """The FFData structure specifies form field data for a text box, check
+    box, or drop-down list box. (Page 348 of [MS-DOC] spec.)"""
+    def __init__(self, parent):
+        DOCDirStream.__init__(self, parent.bytes)
+        self.pos = parent.pos
+        self.parent = parent
+
+    def dump(self):
+        print '<FFData>'
+        self.printAndSet("version", self.readuInt32())
+        self.bits = FFDataBits(self)
+        self.bits.dump()
+        self.printAndSet("cch", self.readuInt16())
+        self.printAndSet("hps", self.readuInt16())
+        xstzName = Xstz(self, "xstzName")
+        xstzName.dump()
+        self.pos = xstzName.pos
+        xstzTextDef = Xstz(self, "xstzTextDef")
+        xstzTextDef.dump()
+        self.pos = xstzTextDef.pos
+        if self.bits.iType == 1 or self.bits.iType == 2:  # iTypeChck or iTypeDrop
+            self.printAndSet("wDef", self.readuInt16())
+        xstzTextFormat = Xstz(self, "xstzTextFormat")
+        xstzTextFormat.dump()
+        self.pos = xstzTextFormat.pos
+        xstzHelpText = Xstz(self, "xstzHelpText")
+        xstzHelpText.dump()
+        self.pos = xstzHelpText.pos
+        xstzStatText = Xstz(self, "xstzStatText")
+        xstzStatText.dump()
+        self.pos = xstzStatText.pos
+        xstzEntryMcr = Xstz(self, "xstzEntryMcr")
+        xstzEntryMcr.dump()
+        self.pos = xstzEntryMcr.pos
+        xstzExitMcr = Xstz(self, "xstzExitMcr")
+        xstzExitMcr.dump()
+        self.pos = xstzExitMcr.pos
+        if self.bits.iType == 2:  # iTypeDrop
+            print '<todo what="FFData::dump(): handle hsttbDropList for iTypeDrop"/>'
+        print '</FFData>'
+
+
 class NilPICFAndBinData(DOCDirStream):
     """The NilPICFAndBinData structure that holds header information and binary
     data for a hyperlink, form field, or add-in field. The NilPICFAndBinData
@@ -1079,7 +1164,7 @@ class NilPICFAndBinData(DOCDirStream):
         self.printAndSet("ignored15", self.readInt16())
         fieldType = chpxFkp.transformeds[-2]
         if fieldType == " FORMTEXT ":
-            print '<todo what="NilPICFAndBinData::dump(): FORMTEXT"/>'
+            FFData(self).dump()
         else:
             print '<todo what="NilPICFAndBinData::dump(): handle %s"/>' % fieldType
         print '</NilPICFAndBinData>'
@@ -3137,17 +3222,18 @@ class Xst(DOCDirStream):
 
 class Xstz(DOCDirStream):
     """The Xstz structure is a string. The string is prepended by its length and is null-terminated."""
-    def __init__(self, parent):
+    def __init__(self, parent, name="xstz"):
         DOCDirStream.__init__(self, parent.bytes)
         self.pos = parent.pos
+        self.name = name
 
     def dump(self):
-        print '<xstz type="Xstz" offset="%d">' % self.pos
+        print '<%s type="Xstz" offset="%d">' % (self.name, self.pos)
         xst = Xst(self)
         xst.dump()
         self.pos = xst.pos
         self.printAndSet("chTerm", self.readuInt16())
-        print '</xstz>'
+        print '</%s>' % self.name
 
 
 class UpxPapx(DOCDirStream):
diff --git a/test/doc/formtext.doc b/test/doc/formtext.doc
new file mode 100644
index 0000000..5f92c3e
Binary files /dev/null and b/test/doc/formtext.doc differ
diff --git a/test/doc/test.py b/test/doc/test.py
index e7c42b2..c63bc11 100755
--- a/test/doc/test.py
+++ b/test/doc/test.py
@@ -227,6 +227,15 @@ class Test(unittest.TestCase):
         actual = self.root.findall(xpath)[0].attrib['value']
         self.assertEqual(expected, actual)
 
+    def test_formtext(self):
+        self.dump('formtext')
+
+        # make sure we find that the max length is 5 chars
+        expected = "0x5"
+        xpath = 'stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfBteChpx/plcBteChpx/aFC/aPnBteChpx/chpxFkp/rgfc/chpx/prl/sprm/NilPICFAndBinData/FFData/cch'
+        actual = self.root.findall(xpath)[0].attrib['value']
+        self.assertEqual(expected, actual)
+
 if __name__ == '__main__':
     unittest.main()
 
commit 3701f745d7f3397f4166110a3a743ec7e25dba80
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Tue Nov 4 11:19:45 2014 +0100

    let NilPICFAndBinData know its field type

diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py
index 71e84f3..9ad1056 100644
--- a/msodumper/docrecord.py
+++ b/msodumper/docrecord.py
@@ -1057,7 +1057,31 @@ class NilPICFAndBinData(DOCDirStream):
 
     def dump(self):
         print '<NilPICFAndBinData>'
-        print '<todo what="NilPICFAndBinData::dump()"/>'
+        # self -> sprm -> prl -> chpx -> chpxFkp
+        chpxFkp = self.parent.parent.parent.parent
+        self.printAndSet("lcb", self.readInt32())
+        self.printAndSet("cbHeader", self.readInt16())
+        self.printAndSet("ignored0", self.readInt32())
+        self.printAndSet("ignored1", self.readInt32())
+        self.printAndSet("ignored2", self.readInt32())
+        self.printAndSet("ignored3", self.readInt32())
+        self.printAndSet("ignored4", self.readInt32())
+        self.printAndSet("ignored5", self.readInt32())
+        self.printAndSet("ignored6", self.readInt32())
+        self.printAndSet("ignored7", self.readInt32())
+        self.printAndSet("ignored8", self.readInt32())
+        self.printAndSet("ignored9", self.readInt32())
+        self.printAndSet("ignored10", self.readInt32())
+        self.printAndSet("ignored11", self.readInt32())
+        self.printAndSet("ignored12", self.readInt32())
+        self.printAndSet("ignored13", self.readInt32())
+        self.printAndSet("ignored14", self.readInt32())
+        self.printAndSet("ignored15", self.readInt16())
+        fieldType = chpxFkp.transformeds[-2]
+        if fieldType == " FORMTEXT ":
+            print '<todo what="NilPICFAndBinData::dump(): FORMTEXT"/>'
+        else:
+            print '<todo what="NilPICFAndBinData::dump(): handle %s"/>' % fieldType
         print '</NilPICFAndBinData>'
 
 
@@ -1461,8 +1485,9 @@ class GrpPrlAndIstd(DOCDirStream):
 
 class Chpx(DOCDirStream):
     """The Chpx structure specifies a set of properties for text."""
-    def __init__(self, bytes, mainStream, offset, transformed=None):
-        DOCDirStream.__init__(self, bytes, mainStream=mainStream)
+    def __init__(self, parent, mainStream, offset, transformed=None):
+        DOCDirStream.__init__(self, parent.bytes, mainStream=mainStream)
+        self.parent = parent
         self.pos = offset
         self.transformed = transformed
 
@@ -1531,6 +1556,7 @@ class ChpxFkp(DOCDirStream):
         print '<chpxFkp type="ChpxFkp" offset="%d" size="%d bytes">' % (self.pos, self.size)
         self.crun = self.getuInt8(pos=self.pos + self.size - 1)
         pos = self.pos
+        self.transformeds = []
         for i in range(self.crun):
             # rgfc
             start = self.getuInt32(pos=pos)
@@ -1538,12 +1564,13 @@ class ChpxFkp(DOCDirStream):
             print '<rgfc index="%d" start="%d" end="%d">' % (i, start, end)
             self.transformed = self.quoteAttr(self.pnFkpChpx.mainStream.retrieveOffset(start, end))
             print '<transformed value="%s"/>' % self.transformed
+            self.transformeds.append(self.transformed)
             pos += 4
 
             # rgbx
             offset = PLC.getPLCOffset(self.pos, self.crun, 1, i)
             chpxOffset = self.getuInt8(pos=offset) * 2
-            chpx = Chpx(self.bytes, self.mainStream, self.pos + chpxOffset, self.transformed)
+            chpx = Chpx(self, self.mainStream, self.pos + chpxOffset, self.transformed)
             chpx.dump()
             print '</rgfc>'
 
commit 867133098167634e2a69362cd1a491ffb00be612
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Tue Nov 4 10:54:59 2014 +0100

    doc: NilPICFAndBinData skeleton

diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py
index 66f5653..71e84f3 100644
--- a/msodumper/docrecord.py
+++ b/msodumper/docrecord.py
@@ -1045,6 +1045,22 @@ class PICF(DOCDirStream):
         print '</picf>'
 
 
+class NilPICFAndBinData(DOCDirStream):
+    """The NilPICFAndBinData structure that holds header information and binary
+    data for a hyperlink, form field, or add-in field. The NilPICFAndBinData
+    structure MUST be stored in the Data Stream."""
+    def __init__(self, parent):
+        dataStream = parent.mainStream.doc.getDirectoryStreamByName("Data")
+        DOCDirStream.__init__(self, dataStream.bytes)
+        self.pos = parent.operand
+        self.parent = parent
+
+    def dump(self):
+        print '<NilPICFAndBinData>'
+        print '<todo what="NilPICFAndBinData::dump()"/>'
+        print '</NilPICFAndBinData>'
+
+
 class PICFAndOfficeArtData(DOCDirStream):
     """The PICFAndOfficeArtData structure specifies header information and
     binary data for a picture."""
@@ -1268,6 +1284,7 @@ class Sprm(DOCDirStream):
     def __init__(self, parent, mainStream=None, transformed=None):
         DOCDirStream.__init__(self, parent.bytes, mainStream=mainStream)
         self.parent = parent
+        self.transformed = transformed
         self.pos = parent.pos
         self.operandSizeMap = {
             0: 1,
@@ -1298,8 +1315,9 @@ class Sprm(DOCDirStream):
             self.operand = self.getuInt24()
         elif self.getOperandSize() == 4:
             self.operand = self.getuInt32()
-            if self.sprm == 0x6a03 and transformed == r"\x01":
-                self.ct = PICFAndOfficeArtData(self)
+            if self.sprm == 0x6a03 and transformed == r"\x01":  # sprmCPicLocation
+                # Can't decide right now, depends on if there will be an sprmCFData later or not.
+                self.ct = True
             elif self.sprm == 0x6646:  # sprmPHugePapx
                 dataStream = mainStream.doc.getDirectoryStreamByName("Data")
                 dataStream.pos = self.operand
@@ -1368,6 +1386,17 @@ class Sprm(DOCDirStream):
                 attrs.append('operand="%s"' % hex(self.operand))
         print '<sprm %s%s>' % (" ".join(attrs), {True: "/", False: ""}[close])
         if self.ct:
+            if type(self.ct) == bool:
+                if self.sprm == 0x6a03 and self.transformed == r"\x01":
+                    haveCFData = False
+                    for prl in self.parent.parent.prls:
+                        if prl.sprm.sprm == 0x0806:  # sprmCFData
+                            haveCFData = True
+                            break
+                    if haveCFData:
+                        self.ct = NilPICFAndBinData(self)
+                    else:
+                        self.ct = PICFAndOfficeArtData(self)
             self.ct.dump()
             print '</sprm>'
 


More information about the Libreoffice-commits mailing list