[Libreoffice-commits] mso-dumper.git: 5 commits - msodumper/docrecord.py msodumper/docstream.py

Miklos Vajna vmiklos at collabora.co.uk
Fri Dec 4 06:35:39 PST 2015


 msodumper/docrecord.py |  115 +++++++++++++++++++++++++++++++++++++++++++++++--
 msodumper/docstream.py |   12 ++++-
 2 files changed, 121 insertions(+), 6 deletions(-)

New commits:
commit 23cbc57ead1e061b57d8e31e1535d4d0b57facee
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Fri Dec 4 15:28:21 2015 +0100

    docrecord: dump FTO

diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py
index af88e3a..ca54445 100644
--- a/msodumper/docrecord.py
+++ b/msodumper/docrecord.py
@@ -4273,6 +4273,16 @@ class SttbfBkmk(DOCDirStream):
         print '</sttbfBkmk>'
 
 
+# The FTO enumerated type identifies the feature that is responsible to create
+# a given smart tag in a document.
+FTO = {
+    0x0000: "ftoUnknown",
+    0x0001: "ftoGrammar",
+    0x0002: "ftoScanDll",
+    0x0003: "ftoVB"
+}
+
+
 class FACTOIDINFO(DOCDirStream):
     """Specified by [MS-DOC] 2.9.66, contains information about a smart tag
     bookmark in the document."""
@@ -4287,7 +4297,7 @@ class FACTOIDINFO(DOCDirStream):
         buf = self.readuInt16()
         self.printAndSet("fSubEntry", self.getBit(buf, 0))
         self.printAndSet("fUnused", (buf & 0xfffe) >> 1)  # 2..16th bits
-        self.printAndSet("fto", self.readuInt16())  # TODO dump FTO
+        self.printAndSet("fto", self.readuInt16(), dict=FTO)
         self.printAndSet("pfpb", self.readuInt32())
         print '</factoidinfo>'
         self.parent.pos = self.pos
commit fbf855c65d44291725af5476038bfaec84694a8b
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Fri Dec 4 15:24:43 2015 +0100

    docrecord: dump SttbfBkmkFactoid

diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py
index c521962..af88e3a 100644
--- a/msodumper/docrecord.py
+++ b/msodumper/docrecord.py
@@ -4272,4 +4272,47 @@ class SttbfBkmk(DOCDirStream):
         assert self.pos == self.mainStream.fcSttbfBkmk + self.size
         print '</sttbfBkmk>'
 
+
+class FACTOIDINFO(DOCDirStream):
+    """Specified by [MS-DOC] 2.9.66, contains information about a smart tag
+    bookmark in the document."""
+    def __init__(self, parent):
+        DOCDirStream.__init__(self, parent.bytes)
+        self.parent = parent
+        self.pos = parent.pos
+
+    def dump(self):
+        print '<factoidinfo>'
+        self.printAndSet("dwId", self.readuInt32())
+        buf = self.readuInt16()
+        self.printAndSet("fSubEntry", self.getBit(buf, 0))
+        self.printAndSet("fUnused", (buf & 0xfffe) >> 1)  # 2..16th bits
+        self.printAndSet("fto", self.readuInt16())  # TODO dump FTO
+        self.printAndSet("pfpb", self.readuInt32())
+        print '</factoidinfo>'
+        self.parent.pos = self.pos
+
+
+class SttbfBkmkFactoid(DOCDirStream):
+    """Specified by [MS-DOC] 2.9.281, an STTB whose strings are FACTOIDINFO structures."""
+    def __init__(self, mainStream):
+        DOCDirStream.__init__(self, mainStream.getTableStream().bytes)
+        self.pos = mainStream.fcSttbfBkmkFactoid
+        self.size = mainStream.lcbSttbfBkmkFactoid
+        self.mainStream = mainStream
+
+    def dump(self):
+        print '<sttbfBkmkFactoid type="SttbfBkmkFactoid" offset="%d" size="%d bytes">' % (self.pos, self.size)
+        self.printAndSet("fExtended", self.readuInt16())
+        assert self.fExtended == 0xffff
+        self.printAndSet("cData", self.readuInt16())
+        self.printAndSet("cbExtra", self.readuInt16())
+        assert self.cbExtra == 0
+        self.printAndSet("cchData", self.readuInt16())
+        assert self.cchData == 0x6
+        for i in range(self.cData):
+            FACTOIDINFO(self).dump()
+        assert self.pos == self.mainStream.fcSttbfBkmk + self.size
+        print '</sttbfBkmkFactoid>'
+
 # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
diff --git a/msodumper/docstream.py b/msodumper/docstream.py
index fda523f..83e2149 100644
--- a/msodumper/docstream.py
+++ b/msodumper/docstream.py
@@ -692,6 +692,10 @@ class WordDocumentStream(DOCDirStream):
     def handleLcbSttbListNames(self):
         docrecord.SttbListNames(self).dump()
 
+    def handleLcbSttbfBkmkFactoid(self):
+        self.sttbfBkmkFactoid = docrecord.SttbfBkmkFactoid(self)
+        self.sttbfBkmkFactoid.dump()
+
     def handleLcbPlcfBkfFactoid(self):
         self.plcfBkfFactoid = docrecord.PlcfBkfd(self)
         self.plcfBkfFactoid.dump()
@@ -788,7 +792,7 @@ class WordDocumentStream(DOCDirStream):
             ["fcPlrsid"],
             ["lcbPlrsid"],
             ["fcSttbfBkmkFactoid"],
-            ["lcbSttbfBkmkFactoid"],
+            ["lcbSttbfBkmkFactoid", self.handleLcbSttbfBkmkFactoid],
             ["fcPlcfBkfFactoid"],
             ["lcbPlcfBkfFactoid", self.handleLcbPlcfBkfFactoid],
             ["fcPlcfcookie"],
commit 5b305ef98708153b7474e66823e1eb55022ea172
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Fri Dec 4 14:25:13 2015 +0100

    docrecord: dump FBKLD

diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py
index 63e0507..c521962 100644
--- a/msodumper/docrecord.py
+++ b/msodumper/docrecord.py
@@ -178,6 +178,19 @@ class PlcfBkfd(DOCDirStream, PLC):
         print '</plcfBkfd>'
 
 
+class FBKLD(DOCDirStream):
+    """Specified by [MS-DOC] 2.9.72, contains information about a bookmark."""
+    def __init__(self, parent, offset):
+        DOCDirStream.__init__(self, parent.bytes)
+        self.pos = offset
+
+    def dump(self):
+        print '<aFBKLD type="FBKLD" offset="%d">' % self.pos
+        self.printAndSet("ibkf", self.readuInt16())
+        self.printAndSet("cDepth", self.readuInt16())
+        print '</aFBKFD>'
+
+
 class PlcfBkld(DOCDirStream, PLC):
     """Specified by [MS-DOC] 2.8.13, a PLC whose data elements are FBKLD structures."""
     def __init__(self, mainStream):
@@ -195,13 +208,13 @@ class PlcfBkld(DOCDirStream, PLC):
             # aCp
             start = self.getuInt32(pos=pos)
             self.aCP.append(start)
-            print '<aCP index="%d" bookmarkStart="%d">' % (i, start)
+            print '<aCP index="%d" bookmarkEnd="%d">' % (i, start)
             pos += 4
 
             # aFBKFD
-            # aFBKFD = FBKFD(self, self.getOffset(self.pos, i))
-            # aFBKFD.dump()
-            # self.aFBKFD.append(aFBKFD)
+            aFBKFD = FBKFD(self, self.getOffset(self.pos, i))
+            aFBKFD.dump()
+            self.aFBKFD.append(aFBKFD)
             print '</aCP>'
         print '</plcfBkld>'
 
commit f582306c060ab921b615c06a252c8341ae364a9c
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Fri Dec 4 13:56:00 2015 +0100

    docrecord: dump PlcfBkld

diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py
index 89e8c50..63e0507 100644
--- a/msodumper/docrecord.py
+++ b/msodumper/docrecord.py
@@ -178,6 +178,34 @@ class PlcfBkfd(DOCDirStream, PLC):
         print '</plcfBkfd>'
 
 
+class PlcfBkld(DOCDirStream, PLC):
+    """Specified by [MS-DOC] 2.8.13, a PLC whose data elements are FBKLD structures."""
+    def __init__(self, mainStream):
+        DOCDirStream.__init__(self, mainStream.getTableStream().bytes, mainStream=mainStream)
+        PLC.__init__(self, mainStream.lcbPlcfBklFactoid, 4)  # 4 is defined by the spec
+        self.pos = mainStream.fcPlcfBklFactoid
+        self.size = mainStream.lcbPlcfBklFactoid
+        self.aCP = []
+        self.aFBKFD = []
+
+    def dump(self):
+        print '<plcfBkld type="PlcfBkld" offset="%d" size="%d bytes">' % (self.pos, self.size)
+        pos = self.pos
+        for i in range(self.getElements()):
+            # aCp
+            start = self.getuInt32(pos=pos)
+            self.aCP.append(start)
+            print '<aCP index="%d" bookmarkStart="%d">' % (i, start)
+            pos += 4
+
+            # aFBKFD
+            # aFBKFD = FBKFD(self, self.getOffset(self.pos, i))
+            # aFBKFD.dump()
+            # self.aFBKFD.append(aFBKFD)
+            print '</aCP>'
+        print '</plcfBkld>'
+
+
 class Fldch(DOCDirStream):
     """The fldch structure determines the type of the field character."""
     def __init__(self, parent):
diff --git a/msodumper/docstream.py b/msodumper/docstream.py
index 4b22384..fda523f 100644
--- a/msodumper/docstream.py
+++ b/msodumper/docstream.py
@@ -696,6 +696,10 @@ class WordDocumentStream(DOCDirStream):
         self.plcfBkfFactoid = docrecord.PlcfBkfd(self)
         self.plcfBkfFactoid.dump()
 
+    def handleLcbPlcfBklFactoid(self):
+        self.plcfBklFactoid = docrecord.PlcfBkld(self)
+        self.plcfBklFactoid.dump()
+
     def handleLcbFactoidData(self):
         self.factoidData = docrecord.SmartTagData(self)
         self.factoidData.dump()
@@ -790,7 +794,7 @@ class WordDocumentStream(DOCDirStream):
             ["fcPlcfcookie"],
             ["lcbPlcfcookie"],
             ["fcPlcfBklFactoid"],
-            ["lcbPlcfBklFactoid"],
+            ["lcbPlcfBklFactoid", self.handleLcbPlcfBklFactoid],
             ["fcFactoidData"],
             ["lcbFactoidData", self.handleLcbFactoidData],
             ["fcDocUndo"],
commit 0ba8e375d10cfac35b27d21621104945d2da26fe
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Fri Dec 4 13:51:34 2015 +0100

    docrecord: dump FBKFD

diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py
index 7a395cb..89e8c50 100644
--- a/msodumper/docrecord.py
+++ b/msodumper/docrecord.py
@@ -136,6 +136,20 @@ class PlcfBkf(DOCDirStream, PLC):
         print '</plcfBkf>'
 
 
+class FBKFD(DOCDirStream):
+    """Specified by [MS-DOC] 2.9.71, contains information about a bookmark."""
+    def __init__(self, parent, offset):
+        DOCDirStream.__init__(self, parent.bytes)
+        self.pos = offset
+
+    def dump(self):
+        print '<aFBKFD type="FBKFD" offset="%d">' % self.pos
+        FBKF(self, self.pos).dump()
+        self.pos += 4
+        self.printAndSet("cDepth", self.readInt16())
+        print '</aFBKFD>'
+
+
 class PlcfBkfd(DOCDirStream, PLC):
     """Specified by [MS-DOC] 2.8.11, a PLC whose data elements are FBKFD structures."""
     def __init__(self, mainStream):
@@ -157,10 +171,9 @@ class PlcfBkfd(DOCDirStream, PLC):
             pos += 4
 
             # aFBKFD
-            # aFBKF = FBKF(self, self.getOffset(self.pos, i))
-            # aFBKF.dump()
-            # self.aFBKF.append(aFBKF)
-            pos += 6
+            aFBKFD = FBKFD(self, self.getOffset(self.pos, i))
+            aFBKFD.dump()
+            self.aFBKFD.append(aFBKFD)
             print '</aCP>'
         print '</plcfBkfd>'
 


More information about the Libreoffice-commits mailing list