[Libreoffice-commits] mso-dumper.git: 3 commits - msodumper/docrecord.py msodumper/docstream.py

Miklos Vajna vmiklos at collabora.co.uk
Fri Dec 4 04:43:17 PST 2015


 msodumper/docrecord.py |   62 ++++++++++++++++++++++++++++++++++++++++++++-----
 msodumper/docstream.py |    9 +++++--
 2 files changed, 63 insertions(+), 8 deletions(-)

New commits:
commit d42ef23578d49c25313bc14fa222fd54a80656c1
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Fri Dec 4 12:27:45 2015 +0100

    docrecord: dump PlcfBkfd

diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py
index 424da09..7a395cb 100644
--- a/msodumper/docrecord.py
+++ b/msodumper/docrecord.py
@@ -136,6 +136,35 @@ class PlcfBkf(DOCDirStream, PLC):
         print '</plcfBkf>'
 
 
+class PlcfBkfd(DOCDirStream, PLC):
+    """Specified by [MS-DOC] 2.8.11, a PLC whose data elements are FBKFD structures."""
+    def __init__(self, mainStream):
+        DOCDirStream.__init__(self, mainStream.getTableStream().bytes, mainStream=mainStream)
+        PLC.__init__(self, mainStream.lcbPlcfBkfFactoid, 6)  # 6 is defined by 2.8.10
+        self.pos = mainStream.fcPlcfBkfFactoid
+        self.size = mainStream.lcbPlcfBkfFactoid
+        self.aCP = []
+        self.aFBKFD = []
+
+    def dump(self):
+        print '<plcfBkfd type="PlcfBkfd" offset="%d" size="%d bytes">' % (self.pos, self.size)
+        pos = self.pos
+        for i in range(self.getElements()):
+            # aCp
+            start = self.getuInt32(pos=pos)
+            self.aCP.append(start)
+            print '<aCP index="%d" bookmarkStart="%d">' % (i, start)
+            pos += 4
+
+            # aFBKFD
+            # aFBKF = FBKF(self, self.getOffset(self.pos, i))
+            # aFBKF.dump()
+            # self.aFBKF.append(aFBKF)
+            pos += 6
+            print '</aCP>'
+        print '</plcfBkfd>'
+
+
 class Fldch(DOCDirStream):
     """The fldch structure determines the type of the field character."""
     def __init__(self, parent):
diff --git a/msodumper/docstream.py b/msodumper/docstream.py
index c8d99a7..4b22384 100644
--- a/msodumper/docstream.py
+++ b/msodumper/docstream.py
@@ -692,6 +692,10 @@ class WordDocumentStream(DOCDirStream):
     def handleLcbSttbListNames(self):
         docrecord.SttbListNames(self).dump()
 
+    def handleLcbPlcfBkfFactoid(self):
+        self.plcfBkfFactoid = docrecord.PlcfBkfd(self)
+        self.plcfBkfFactoid.dump()
+
     def handleLcbFactoidData(self):
         self.factoidData = docrecord.SmartTagData(self)
         self.factoidData.dump()
@@ -782,7 +786,7 @@ class WordDocumentStream(DOCDirStream):
             ["fcSttbfBkmkFactoid"],
             ["lcbSttbfBkmkFactoid"],
             ["fcPlcfBkfFactoid"],
-            ["lcbPlcfBkfFactoid"],
+            ["lcbPlcfBkfFactoid", self.handleLcbPlcfBkfFactoid],
             ["fcPlcfcookie"],
             ["lcbPlcfcookie"],
             ["fcPlcfBklFactoid"],
commit f787f514ed327b56e979dad473d1f0e9ad3d1c77
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Fri Dec 4 10:33:27 2015 +0100

    docstream: allow seeking back to FactoidData later
    
    To allow code like:
    
    self.factoidData.propBagStore.factoidTypes[0].rgbUri.rgxch
    
    or
    
    self.factoidData.propBagStore.stringTable

diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py
index 319e21b..424da09 100644
--- a/msodumper/docrecord.py
+++ b/msodumper/docrecord.py
@@ -4067,7 +4067,7 @@ class PBString(DOCDirStream):
                 break
             bytes.append(c)
         encoding = "ascii"
-        print '<rgxch value="%s"/>' % globals.encodeName("".join(map(lambda c: chr(c), bytes)).decode(encoding), lowOnly=True).encode('utf-8')
+        self.printAndSet("rgxch", globals.encodeName("".join(map(lambda c: chr(c), bytes)).decode(encoding), lowOnly=True).encode('utf-8'), hexdump=False)
 
         print '</%s>' % self.name
         self.parent.pos = self.pos
@@ -4084,9 +4084,12 @@ class FactoidType(DOCDirStream):
         print '<factoidType>'
         self.printAndSet("cbFactoid", self.readuInt32())
         self.printAndSet("id", self.readuInt32())
-        PBString(self, "rgbUri").dump()
-        PBString(self, "rgbTag").dump()
-        PBString(self, "rgbDownLoadURL").dump()
+        self.rgbUri = PBString(self, "rgbUri")
+        self.rgbUri.dump()
+        self.rgbTag = PBString(self, "rgbTag")
+        self.rgbTag.dump()
+        self.rgbDownLoadURL = PBString(self, "rgbDownLoadURL")
+        self.rgbDownLoadURL.dump()
         print '</factoidType>'
         self.parent.pos = self.pos
 
diff --git a/msodumper/docstream.py b/msodumper/docstream.py
index 13dddc9..c8d99a7 100644
--- a/msodumper/docstream.py
+++ b/msodumper/docstream.py
@@ -693,7 +693,8 @@ class WordDocumentStream(DOCDirStream):
         docrecord.SttbListNames(self).dump()
 
     def handleLcbFactoidData(self):
-        docrecord.SmartTagData(self).dump()
+        self.factoidData = docrecord.SmartTagData(self)
+        self.factoidData.dump()
 
     def handleLcbSttbfBkmk(self):
         docrecord.SttbfBkmk(self).dump()
commit 9a699098799f5b13ac5c9e8e221d4c703bfbb693
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Fri Dec 4 10:22:13 2015 +0100

    docrecord: dump PropertyBagStore's stringTable

diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py
index 893dc07..319e21b 100644
--- a/msodumper/docrecord.py
+++ b/msodumper/docrecord.py
@@ -4043,14 +4043,18 @@ class SttbListNames(DOCDirStream):
 
 class PBString(DOCDirStream):
     """Specified by [MS-OSHARED] 2.3.4.5, specifies a null-terminated string."""
-    def __init__(self, parent, name):
+    def __init__(self, parent, name, index=None):
         DOCDirStream.__init__(self, parent.bytes)
         self.parent = parent
         self.pos = parent.pos
         self.name = name
+        self.index = index
 
     def dump(self):
-        print '<%s type="PBString">' % self.name
+        if self.index is None:
+            print '<%s type="PBString">' % self.name
+        else:
+            print '<%s type="PBString" index="%s">' % (self.name, self.index)
         buf = self.readuInt16()
         self.printAndSet("cch", buf & 0x7fff)  # bits 0..15
         self.printAndSet("fAnsiString", self.getBit(buf, 15))
@@ -4081,9 +4085,10 @@ class FactoidType(DOCDirStream):
         self.printAndSet("cbFactoid", self.readuInt32())
         self.printAndSet("id", self.readuInt32())
         PBString(self, "rgbUri").dump()
-        # rgbTag
-        # rgbDownLoadURL
+        PBString(self, "rgbTag").dump()
+        PBString(self, "rgbDownLoadURL").dump()
         print '</factoidType>'
+        self.parent.pos = self.pos
 
 
 class PropertyBagStore(DOCDirStream):
@@ -4104,6 +4109,19 @@ class PropertyBagStore(DOCDirStream):
             factoidType.dump()
             self.factoidTypes.append(factoidType)
         print '</factoidTypes>'
+        self.printAndSet("cbHdr", self.readuInt16())
+        assert self.cbHdr == 0xc
+        self.printAndSet("sVer", self.readuInt16())
+        assert self.sVer == 0x0100
+        self.printAndSet("cfactoid", self.readuInt32())
+        self.printAndSet("cste", self.readuInt32())
+        print '<stringTable>'
+        self.stringTable = []
+        for i in range(self.cste):
+            string = PBString(self, "stringTable", index=i)
+            string.dump()
+            self.stringTable.append(string)
+        print '</stringTable>'
         print '</propBagStore>'
 
 


More information about the Libreoffice-commits mailing list