[Libreoffice-commits] mso-dumper.git: 2 commits - msodumper/docrecord.py msodumper/docstream.py

Miklos Vajna vmiklos at collabora.co.uk
Thu Dec 3 08:31:49 PST 2015


 msodumper/docrecord.py |   81 ++++++++++++++++++++++++++++++
 msodumper/docstream.py |  130 +++++++++++++++++++++++++++----------------------
 2 files changed, 154 insertions(+), 57 deletions(-)

New commits:
commit 061094f9c354464b9b2b2cf105fb83df3ca8b1f5
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Thu Dec 3 17:30:31 2015 +0100

    docrecord: dump PropertyBagStore

diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py
index e8228b6..893dc07 100644
--- a/msodumper/docrecord.py
+++ b/msodumper/docrecord.py
@@ -4041,6 +4041,72 @@ class SttbListNames(DOCDirStream):
         print '</sttbListNames>'
 
 
+class PBString(DOCDirStream):
+    """Specified by [MS-OSHARED] 2.3.4.5, specifies a null-terminated string."""
+    def __init__(self, parent, name):
+        DOCDirStream.__init__(self, parent.bytes)
+        self.parent = parent
+        self.pos = parent.pos
+        self.name = name
+
+    def dump(self):
+        print '<%s type="PBString">' % self.name
+        buf = self.readuInt16()
+        self.printAndSet("cch", buf & 0x7fff)  # bits 0..15
+        self.printAndSet("fAnsiString", self.getBit(buf, 15))
+
+        # TODO support fAnsiString == 0
+        bytes = []
+        for dummy in range(self.cch):
+            c = self.readuInt8()
+            if c == 0:
+                break
+            bytes.append(c)
+        encoding = "ascii"
+        print '<rgxch value="%s"/>' % globals.encodeName("".join(map(lambda c: chr(c), bytes)).decode(encoding), lowOnly=True).encode('utf-8')
+
+        print '</%s>' % self.name
+        self.parent.pos = self.pos
+
+
+class FactoidType(DOCDirStream):
+    """Specified by [MS-OSHARED] 2.3.4.2, specifies the type of smart tag."""
+    def __init__(self, parent):
+        DOCDirStream.__init__(self, parent.bytes)
+        self.parent = parent
+        self.pos = parent.pos
+
+    def dump(self):
+        print '<factoidType>'
+        self.printAndSet("cbFactoid", self.readuInt32())
+        self.printAndSet("id", self.readuInt32())
+        PBString(self, "rgbUri").dump()
+        # rgbTag
+        # rgbDownLoadURL
+        print '</factoidType>'
+
+
+class PropertyBagStore(DOCDirStream):
+    """Specified by [MS-OSHARED] 2.3.4.1, specifies the shared data for the
+    smart tags embedded in the document."""
+    def __init__(self, parent):
+        DOCDirStream.__init__(self, parent.bytes)
+        self.parent = parent
+        self.pos = parent.pos
+
+    def dump(self):
+        print '<propBagStore type="PropertyBagStore" offset="%s">' % self.pos
+        self.printAndSet("cFactoidType", self.readuInt32())
+        print '<factoidTypes>'
+        self.factoidTypes = []
+        for i in range(self.cFactoidType):
+            factoidType = FactoidType(self)
+            factoidType.dump()
+            self.factoidTypes.append(factoidType)
+        print '</factoidTypes>'
+        print '</propBagStore>'
+
+
 class SmartTagData(DOCDirStream):
     """Specified by [MS-DOC] 2.9.251, stores information about all the smart
     tags in the document."""
@@ -4051,6 +4117,8 @@ class SmartTagData(DOCDirStream):
 
     def dump(self):
         print '<smartTagData type="SmartTagData" offset="%d" size="%d bytes">' % (self.pos, self.size)
+        self.propBagStore = PropertyBagStore(self)
+        self.propBagStore.dump()
         print '</smartTagData>'
 
 
commit 2fc6e88471b38cba85bf7337aa5642ec00a5bf08
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Thu Dec 3 15:53:17 2015 +0100

    docrecord: initial SmartTagData

diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py
index 4b6dc4f..e8228b6 100644
--- a/msodumper/docrecord.py
+++ b/msodumper/docrecord.py
@@ -4041,6 +4041,19 @@ class SttbListNames(DOCDirStream):
         print '</sttbListNames>'
 
 
+class SmartTagData(DOCDirStream):
+    """Specified by [MS-DOC] 2.9.251, stores information about all the smart
+    tags in the document."""
+    def __init__(self, mainStream):
+        DOCDirStream.__init__(self, mainStream.getTableStream().bytes, mainStream=mainStream)
+        self.pos = mainStream.fcFactoidData
+        self.size = mainStream.lcbFactoidData
+
+    def dump(self):
+        print '<smartTagData type="SmartTagData" offset="%d" size="%d bytes">' % (self.pos, self.size)
+        print '</smartTagData>'
+
+
 class SttbSavedBy(DOCDirStream):
     """The SttbSavedBy structure is an STTB structure that specifies the save history of this document."""
     def __init__(self, mainStream):
diff --git a/msodumper/docstream.py b/msodumper/docstream.py
index e7f8776..13dddc9 100644
--- a/msodumper/docstream.py
+++ b/msodumper/docstream.py
@@ -692,6 +692,9 @@ class WordDocumentStream(DOCDirStream):
     def handleLcbSttbListNames(self):
         docrecord.SttbListNames(self).dump()
 
+    def handleLcbFactoidData(self):
+        docrecord.SmartTagData(self).dump()
+
     def handleLcbSttbfBkmk(self):
         docrecord.SttbfBkmk(self).dump()
 
@@ -763,65 +766,78 @@ class WordDocumentStream(DOCDirStream):
     def __dumpFibRgFcLcb2002(self):
         self.__dumpFibRgFcLcb2000()
         fields = [
-            "fcUnused1",
-            "lcbUnused1",
-            "fcPlcfPgp",
-            "lcbPlcfPgp",
-            "fcPlcfuim",
-            "lcbPlcfuim",
-            "fcPlfguidUim",
-            "lcbPlfguidUim",
-            "fcAtrdExtra",
-            "lcbAtrdExtra",
-            "fcPlrsid",
-            "lcbPlrsid",
-            "fcSttbfBkmkFactoid",
-            "lcbSttbfBkmkFactoid",
-            "fcPlcfBkfFactoid",
-            "lcbPlcfBkfFactoid",
-            "fcPlcfcookie",
-            "lcbPlcfcookie",
-            "fcPlcfBklFactoid",
-            "lcbPlcfBklFactoid",
-            "fcFactoidData",
-            "lcbFactoidData",
-            "fcDocUndo",
-            "lcbDocUndo",
-            "fcSttbfBkmkFcc",
-            "lcbSttbfBkmkFcc",
-            "fcPlcfBkfFcc",
-            "lcbPlcfBkfFcc",
-            "fcPlcfBklFcc",
-            "lcbPlcfBklFcc",
-            "fcSttbfbkmkBPRepairs",
-            "lcbSttbfbkmkBPRepairs",
-            "fcPlcfbkfBPRepairs",
-            "lcbPlcfbkfBPRepairs",
-            "fcPlcfbklBPRepairs",
-            "lcbPlcfbklBPRepairs",
-            "fcPmsNew",
-            "lcbPmsNew",
-            "fcODSO",
-            "lcbODSO",
-            "fcPlcfpmiOldXP",
-            "lcbPlcfpmiOldXP",
-            "fcPlcfpmiNewXP",
-            "lcbPlcfpmiNewXP",
-            "fcPlcfpmiMixedXP",
-            "lcbPlcfpmiMixedXP",
-            "fcUnused2",
-            "lcbUnused2",
-            "fcPlcffactoid",
-            "lcbPlcffactoid",
-            "fcPlcflvcOldXP",
-            "lcbPlcflvcOldXP",
-            "fcPlcflvcNewXP",
-            "lcbPlcflvcNewXP",
-            "fcPlcflvcMixedXP",
-            "lcbPlcflvcMixedXP",
+            ["fcUnused1"],
+            ["lcbUnused1"],
+            ["fcPlcfPgp"],
+            ["lcbPlcfPgp"],
+            ["fcPlcfuim"],
+            ["lcbPlcfuim"],
+            ["fcPlfguidUim"],
+            ["lcbPlfguidUim"],
+            ["fcAtrdExtra"],
+            ["lcbAtrdExtra"],
+            ["fcPlrsid"],
+            ["lcbPlrsid"],
+            ["fcSttbfBkmkFactoid"],
+            ["lcbSttbfBkmkFactoid"],
+            ["fcPlcfBkfFactoid"],
+            ["lcbPlcfBkfFactoid"],
+            ["fcPlcfcookie"],
+            ["lcbPlcfcookie"],
+            ["fcPlcfBklFactoid"],
+            ["lcbPlcfBklFactoid"],
+            ["fcFactoidData"],
+            ["lcbFactoidData", self.handleLcbFactoidData],
+            ["fcDocUndo"],
+            ["lcbDocUndo"],
+            ["fcSttbfBkmkFcc"],
+            ["lcbSttbfBkmkFcc"],
+            ["fcPlcfBkfFcc"],
+            ["lcbPlcfBkfFcc"],
+            ["fcPlcfBklFcc"],
+            ["lcbPlcfBklFcc"],
+            ["fcSttbfbkmkBPRepairs"],
+            ["lcbSttbfbkmkBPRepairs"],
+            ["fcPlcfbkfBPRepairs"],
+            ["lcbPlcfbkfBPRepairs"],
+            ["fcPlcfbklBPRepairs"],
+            ["lcbPlcfbklBPRepairs"],
+            ["fcPmsNew"],
+            ["lcbPmsNew"],
+            ["fcODSO"],
+            ["lcbODSO"],
+            ["fcPlcfpmiOldXP"],
+            ["lcbPlcfpmiOldXP"],
+            ["fcPlcfpmiNewXP"],
+            ["lcbPlcfpmiNewXP"],
+            ["fcPlcfpmiMixedXP"],
+            ["lcbPlcfpmiMixedXP"],
+            ["fcUnused2"],
+            ["lcbUnused2"],
+            ["fcPlcffactoid"],
+            ["lcbPlcffactoid"],
+            ["fcPlcflvcOldXP"],
+            ["lcbPlcflvcOldXP"],
+            ["fcPlcflvcNewXP"],
+            ["lcbPlcflvcNewXP"],
+            ["fcPlcflvcMixedXP"],
+            ["lcbPlcflvcMixedXP"],
         ]
         for i in fields:
-            self.printAndSet(i, self.readuInt32())
+            value = self.readInt32()
+            hasHandler = len(i) > 1
+            # the spec says these must be ignored
+            needsIgnoring = []
+            # a member needs handling if it defines the size of a struct and it's non-zero
+            needsHandling = i[0].startswith("lcb") and value != 0 and (not i[0] in needsIgnoring)
+            self.printAndSet(i[0], value, end=((not hasHandler) and (not needsHandling)), offset=True)
+            if hasHandler or needsHandling:
+                if needsHandling:
+                    if hasHandler:
+                        i[1]()
+                    else:
+                        print '<todo what="value is non-zero and unhandled"/>'
+                print '</%s>' % i[0]
 
     def __dumpFibRgFcLcb2003(self):
         self.__dumpFibRgFcLcb2002()


More information about the Libreoffice-commits mailing list