[Libreoffice-commits] .: src/docrecord.py src/docstream.py

Tue Nov 20 07:12:52 PST 2012

src/docrecord.py |   21 +++++++++++++++++++++
 src/docstream.py |   11 +++++++++--
 2 files changed, 30 insertions(+), 2 deletions(-)

New commits:
commit 68dadb716e6ee7f12f1ad804458896291ac67611
Author: Miklos Vajna <vmiklos at suse.cz>
Date:   Tue Nov 20 16:07:04 2012 +0100

    dump PlcfandTxt (comment text)

diff --git a/src/docrecord.py b/src/docrecord.py
index 46369d3..6c51a48 100755
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -359,6 +359,27 @@ class PlcBteChpx(DOCDirStream, PLC):
             print '</aFC>'
         print '</plcBteChpx>'
 
+class PlcfandTxt(DOCDirStream, PLC):
+    """The PlcfandTxt structure is a PLC that contains only CPs and no additional data."""
+    def __init__(self, mainStream, offset, size):
+        DOCDirStream.__init__(self, mainStream.doc.getDirectoryStreamByName("1Table").bytes, mainStream=mainStream)
+        PLC.__init__(self, size, 0)
+        self.pos = offset
+        self.size = size
+
+    def dump(self):
+        print '<plcfandTxt type="PlcfandTxt" offset="%d" size="%d bytes">' % (self.pos, self.size)
+        offset = self.mainStream.fcMin + self.mainStream.ccpText + self.mainStream.ccpFtn + self.mainStream.ccpHdd # TODO do this in a better way when headers are handled
+        pos = self.pos
+        for i in range(self.getElements() - 1):
+            start = self.getuInt32(pos = pos)
+            end = self.getuInt32(pos = pos + 4)
+            print '<aCP index="%d" start="%d" end="%d">' % (i, start, end)
+            print '<transformed value="%s"/>' % FcCompressed.getFCTransformedValue(self.mainStream.bytes, offset+start, offset+end)
+            pos += 4
+            print '</aCP>'
+        print '</plcfandTxt>'
+
 class PlcBtePapx(DOCDirStream, PLC):
     """The PlcBtePapx structure is a PLC that specifies paragraph, table row, or table cell properties."""
     def __init__(self, bytes, mainStream, offset, size):
diff --git a/src/docstream.py b/src/docstream.py
index 559bd0a..b98a0fc 100755
--- a/src/docstream.py
+++ b/src/docstream.py
@@ -125,7 +125,8 @@ class WordDocumentStream(DOCDirStream):
         self.pos += 2
         self.printAndSet("reserved4", self.getuInt16())
         self.pos += 2
-        self.printAndSet("reserved5", self.getuInt32())
+        # reserved5 in the spec, offset of first character of text according to LO ww8 import filter
+        self.printAndSet("fcMin", self.getuInt32())
         self.pos += 4
         self.printAndSet("reserved6", self.getuInt32())
         self.pos += 4
@@ -198,7 +199,7 @@ class WordDocumentStream(DOCDirStream):
             ["fcPlcfandRef"],
             ["lcbPlcfandRef"],
             ["fcPlcfandTxt"],
-            ["lcbPlcfandTxt"],
+            ["lcbPlcfandTxt", self.handleLcbPlcfandTxt],
             ["fcPlcfSed"],
             ["lcbPlcfSed"],
             ["fcPlcPad"],
@@ -411,6 +412,12 @@ class WordDocumentStream(DOCDirStream):
         stsh = docrecord.STSH(self.doc.getDirectoryStreamByName("1Table").bytes, self, offset, size)
         stsh.dump()
 
+    def handleLcbPlcfandTxt(self):
+        offset = self.fcPlcfandTxt
+        size = self.lcbPlcfandTxt
+        plcfandTxt = docrecord.PlcfandTxt(self, offset, size)
+        plcfandTxt.dump()
+
     def dumpFibRgFcLcb97(self, name):
         print '<%s type="FibRgFcLcb97" size="744 bytes">' % name
         self.__dumpFibRgFcLcb97()