[Libreoffice-commits] .: src/docrecord.py src/docstream.py test/doc

Miklos Vajna vmiklos at kemper.freedesktop.org
Sat Jan 19 05:25:33 PST 2013


 src/docrecord.py      |   27 +++++++++++++++++++++++++--
 src/docstream.py      |   23 +++++++++++++++++++----
 test/doc/bookmark.doc |binary
 test/doc/bookmark.rtf |    7 +++++++
 test/doc/test.py      |   11 +++++++++++
 5 files changed, 62 insertions(+), 6 deletions(-)

New commits:
commit 8f7af80bf491794f9b1781157cceb25783227741
Author: Miklos Vajna <vmiklos at suse.cz>
Date:   Sat Jan 19 14:25:33 2013 +0100

    doc: dump bookmarks

diff --git a/src/docrecord.py b/src/docrecord.py
index f277196..36b29c3 100644
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -202,11 +202,12 @@ class PlcFld(DOCDirStream, PLC):
 
 class PlcfBkl(DOCDirStream, PLC):
     """The Plcfbkl structure is a PLC that contains only CPs and no additional data."""
-    def __init__(self, mainStream, offset, size):
+    def __init__(self, mainStream, offset, size, start):
         DOCDirStream.__init__(self, mainStream.doc.getDirectoryStreamByName("1Table").bytes, mainStream = mainStream)
         PLC.__init__(self, size, 0) # 0 is defined by 2.8.12
         self.pos = offset
         self.size = size
+        self.start = start
 
     def dump(self):
         print '<plcfBkl type="PlcfBkl" offset="%d" size="%d bytes">' % (self.pos, self.size)
@@ -216,7 +217,7 @@ class PlcfBkl(DOCDirStream, PLC):
             # aCp
             end = offset + self.getuInt32(pos = pos)
             print '<aCP index="%d" bookmarkEnd="%d">' % (i, end)
-            start = self.mainStream.plcfAtnBkf.aCP[i]
+            start = self.start.aCP[i]
             print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveText(start, end))
             pos += 4
             print '</aCP>'
@@ -2609,4 +2610,26 @@ class SttbSavedBy(DOCDirStream):
         assert self.pos == self.mainStream.fcSttbSavedBy + self.size
         print '</sttbSavedBy>'
 
+class SttbfBkmk(DOCDirStream):
+    """The SttbfBkmk structure is an STTB structure whose strings specify the names of bookmarks."""
+    def __init__(self, mainStream):
+        DOCDirStream.__init__(self, mainStream.doc.getDirectoryStreamByName("1Table").bytes)
+        self.pos = mainStream.fcSttbfBkmk
+        self.size = mainStream.lcbSttbfBkmk
+        self.mainStream = mainStream
+
+    def dump(self):
+        print '<sttbfBkmk type="SttbfBkmk" offset="%d" size="%d bytes">' % (self.pos, self.size)
+        self.printAndSet("fExtended", self.readuInt16())
+        self.printAndSet("cData", self.readuInt16())
+        self.printAndSet("cbExtra", self.readuInt16())
+        for i in range(self.cData):
+            cchData = self.readuInt16()
+            print '<cchData index="%d" offset="%d" size="%d bytes">' % (i, self.pos, cchData)
+            print '<string value="%s"/>' % globals.encodeName(self.bytes[self.pos:self.pos+2*cchData].decode('utf-16'), lowOnly = True)
+            self.pos += 2*cchData
+            print '</cchData>'
+        assert self.pos == self.mainStream.fcSttbfBkmk + self.size
+        print '</sttbfBkmk>'
+
 # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
diff --git a/src/docstream.py b/src/docstream.py
index 7633ccc..d9003aa 100644
--- a/src/docstream.py
+++ b/src/docstream.py
@@ -253,11 +253,11 @@ class WordDocumentStream(DOCDirStream):
             ["fcPlcfFldMcr"],
             ["lcbPlcfFldMcr"],
             ["fcSttbfBkmk"],
-            ["lcbSttbfBkmk"],
+            ["lcbSttbfBkmk", self.handleLcbSttbfBkmk],
             ["fcPlcfBkf"],
-            ["lcbPlcfBkf"],
+            ["lcbPlcfBkf", self.handleLcbPlcfBkf],
             ["fcPlcfBkl"],
-            ["lcbPlcfBkl"],
+            ["lcbPlcfBkl", self.handleLcbPlcfBkl],
             ["fcCmds"],
             ["lcbCmds", self.handleLcbCmds],
             ["fcUnused1"],
@@ -489,7 +489,19 @@ class WordDocumentStream(DOCDirStream):
     def handleLcbPlcfAtnBkl(self):
         offset = self.fcPlcfAtnBkl
         size = self.lcbPlcfAtnBkl
-        plcfBkl = docrecord.PlcfBkl(self, offset, size)
+        plcfBkl = docrecord.PlcfBkl(self, offset, size, start = self.plcfAtnBkf)
+        plcfBkl.dump()
+
+    def handleLcbPlcfBkf(self):
+        offset = self.fcPlcfBkf
+        size = self.lcbPlcfBkf
+        self.plcfBkf = docrecord.PlcfBkf(self, offset, size)
+        self.plcfBkf.dump()
+
+    def handleLcbPlcfBkl(self):
+        offset = self.fcPlcfBkl
+        size = self.lcbPlcfBkl
+        plcfBkl = docrecord.PlcfBkl(self, offset, size, start = self.plcfBkf)
         plcfBkl.dump()
 
     def handleLcbPlcfSed(self):
@@ -537,6 +549,9 @@ class WordDocumentStream(DOCDirStream):
     def handleLcbSttbListNames(self):
         docrecord.SttbListNames(self).dump()
 
+    def handleLcbSttbfBkmk(self):
+        docrecord.SttbfBkmk(self).dump()
+
     def dumpFibRgFcLcb97(self, name):
         print '<%s type="FibRgFcLcb97" size="744 bytes">' % name
         self.__dumpFibRgFcLcb97()
diff --git a/test/doc/bookmark.doc b/test/doc/bookmark.doc
new file mode 100755
index 0000000..6d32ff6
Binary files /dev/null and b/test/doc/bookmark.doc differ
diff --git a/test/doc/bookmark.rtf b/test/doc/bookmark.rtf
new file mode 100644
index 0000000..05b3d42
--- /dev/null
+++ b/test/doc/bookmark.rtf
@@ -0,0 +1,7 @@
+{\rtf1
+{\*\bkmkstart firstword}
+Hello
+{\*\bkmkend firstword}
+ world!
+\par
+}
diff --git a/test/doc/test.py b/test/doc/test.py
index 6936f1a..d02f680 100755
--- a/test/doc/test.py
+++ b/test/doc/test.py
@@ -118,6 +118,17 @@ class Test(unittest.TestCase):
         self.assertEqual(1, len(authors))
         self.assertEqual('vmiklos', authors[0].attrib['value'])
 
+    def test_bookmark(self):
+        self.dump('bookmark')
+
+        bookmarkStarts = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfBkf/plcfBkf/aCP')
+        bookmarkEnds = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfBkl/plcfBkl/aCP')
+        self.assertEqual(1, len(bookmarkStarts))
+        self.assertEqual(1, len(bookmarkEnds))
+
+        # The bookmark covers Hello
+        self.assertEqual('Hello', bookmarkEnds[0].findall('transformed')[0].attrib['value'])
+
     def test_zoom(self):
         self.dump('zoom')
         dopBase = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbDop/dop/dop2007/dop2003/dop2002/dop2000/dop97/dop95/dopBase')[0]


More information about the Libreoffice-commits mailing list