[Libreoffice-commits] .: src/docrecord.py src/docstream.py test/doc
Miklos Vajna
vmiklos at kemper.freedesktop.org
Sat Jan 19 05:25:33 PST 2013
src/docrecord.py | 27 +++++++++++++++++++++++++--
src/docstream.py | 23 +++++++++++++++++++----
test/doc/bookmark.doc |binary
test/doc/bookmark.rtf | 7 +++++++
test/doc/test.py | 11 +++++++++++
5 files changed, 62 insertions(+), 6 deletions(-)
New commits:
commit 8f7af80bf491794f9b1781157cceb25783227741
Author: Miklos Vajna <vmiklos at suse.cz>
Date: Sat Jan 19 14:25:33 2013 +0100
doc: dump bookmarks
diff --git a/src/docrecord.py b/src/docrecord.py
index f277196..36b29c3 100644
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -202,11 +202,12 @@ class PlcFld(DOCDirStream, PLC):
class PlcfBkl(DOCDirStream, PLC):
"""The Plcfbkl structure is a PLC that contains only CPs and no additional data."""
- def __init__(self, mainStream, offset, size):
+ def __init__(self, mainStream, offset, size, start):
DOCDirStream.__init__(self, mainStream.doc.getDirectoryStreamByName("1Table").bytes, mainStream = mainStream)
PLC.__init__(self, size, 0) # 0 is defined by 2.8.12
self.pos = offset
self.size = size
+ self.start = start
def dump(self):
print '<plcfBkl type="PlcfBkl" offset="%d" size="%d bytes">' % (self.pos, self.size)
@@ -216,7 +217,7 @@ class PlcfBkl(DOCDirStream, PLC):
# aCp
end = offset + self.getuInt32(pos = pos)
print '<aCP index="%d" bookmarkEnd="%d">' % (i, end)
- start = self.mainStream.plcfAtnBkf.aCP[i]
+ start = self.start.aCP[i]
print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveText(start, end))
pos += 4
print '</aCP>'
@@ -2609,4 +2610,26 @@ class SttbSavedBy(DOCDirStream):
assert self.pos == self.mainStream.fcSttbSavedBy + self.size
print '</sttbSavedBy>'
+class SttbfBkmk(DOCDirStream):
+ """The SttbfBkmk structure is an STTB structure whose strings specify the names of bookmarks."""
+ def __init__(self, mainStream):
+ DOCDirStream.__init__(self, mainStream.doc.getDirectoryStreamByName("1Table").bytes)
+ self.pos = mainStream.fcSttbfBkmk
+ self.size = mainStream.lcbSttbfBkmk
+ self.mainStream = mainStream
+
+ def dump(self):
+ print '<sttbfBkmk type="SttbfBkmk" offset="%d" size="%d bytes">' % (self.pos, self.size)
+ self.printAndSet("fExtended", self.readuInt16())
+ self.printAndSet("cData", self.readuInt16())
+ self.printAndSet("cbExtra", self.readuInt16())
+ for i in range(self.cData):
+ cchData = self.readuInt16()
+ print '<cchData index="%d" offset="%d" size="%d bytes">' % (i, self.pos, cchData)
+ print '<string value="%s"/>' % globals.encodeName(self.bytes[self.pos:self.pos+2*cchData].decode('utf-16'), lowOnly = True)
+ self.pos += 2*cchData
+ print '</cchData>'
+ assert self.pos == self.mainStream.fcSttbfBkmk + self.size
+ print '</sttbfBkmk>'
+
# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
diff --git a/src/docstream.py b/src/docstream.py
index 7633ccc..d9003aa 100644
--- a/src/docstream.py
+++ b/src/docstream.py
@@ -253,11 +253,11 @@ class WordDocumentStream(DOCDirStream):
["fcPlcfFldMcr"],
["lcbPlcfFldMcr"],
["fcSttbfBkmk"],
- ["lcbSttbfBkmk"],
+ ["lcbSttbfBkmk", self.handleLcbSttbfBkmk],
["fcPlcfBkf"],
- ["lcbPlcfBkf"],
+ ["lcbPlcfBkf", self.handleLcbPlcfBkf],
["fcPlcfBkl"],
- ["lcbPlcfBkl"],
+ ["lcbPlcfBkl", self.handleLcbPlcfBkl],
["fcCmds"],
["lcbCmds", self.handleLcbCmds],
["fcUnused1"],
@@ -489,7 +489,19 @@ class WordDocumentStream(DOCDirStream):
def handleLcbPlcfAtnBkl(self):
offset = self.fcPlcfAtnBkl
size = self.lcbPlcfAtnBkl
- plcfBkl = docrecord.PlcfBkl(self, offset, size)
+ plcfBkl = docrecord.PlcfBkl(self, offset, size, start = self.plcfAtnBkf)
+ plcfBkl.dump()
+
+ def handleLcbPlcfBkf(self):
+ offset = self.fcPlcfBkf
+ size = self.lcbPlcfBkf
+ self.plcfBkf = docrecord.PlcfBkf(self, offset, size)
+ self.plcfBkf.dump()
+
+ def handleLcbPlcfBkl(self):
+ offset = self.fcPlcfBkl
+ size = self.lcbPlcfBkl
+ plcfBkl = docrecord.PlcfBkl(self, offset, size, start = self.plcfBkf)
plcfBkl.dump()
def handleLcbPlcfSed(self):
@@ -537,6 +549,9 @@ class WordDocumentStream(DOCDirStream):
def handleLcbSttbListNames(self):
docrecord.SttbListNames(self).dump()
+ def handleLcbSttbfBkmk(self):
+ docrecord.SttbfBkmk(self).dump()
+
def dumpFibRgFcLcb97(self, name):
print '<%s type="FibRgFcLcb97" size="744 bytes">' % name
self.__dumpFibRgFcLcb97()
diff --git a/test/doc/bookmark.doc b/test/doc/bookmark.doc
new file mode 100755
index 0000000..6d32ff6
Binary files /dev/null and b/test/doc/bookmark.doc differ
diff --git a/test/doc/bookmark.rtf b/test/doc/bookmark.rtf
new file mode 100644
index 0000000..05b3d42
--- /dev/null
+++ b/test/doc/bookmark.rtf
@@ -0,0 +1,7 @@
+{\rtf1
+{\*\bkmkstart firstword}
+Hello
+{\*\bkmkend firstword}
+ world!
+\par
+}
diff --git a/test/doc/test.py b/test/doc/test.py
index 6936f1a..d02f680 100755
--- a/test/doc/test.py
+++ b/test/doc/test.py
@@ -118,6 +118,17 @@ class Test(unittest.TestCase):
self.assertEqual(1, len(authors))
self.assertEqual('vmiklos', authors[0].attrib['value'])
+ def test_bookmark(self):
+ self.dump('bookmark')
+
+ bookmarkStarts = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfBkf/plcfBkf/aCP')
+ bookmarkEnds = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfBkl/plcfBkl/aCP')
+ self.assertEqual(1, len(bookmarkStarts))
+ self.assertEqual(1, len(bookmarkEnds))
+
+ # The bookmark covers Hello
+ self.assertEqual('Hello', bookmarkEnds[0].findall('transformed')[0].attrib['value'])
+
def test_zoom(self):
self.dump('zoom')
dopBase = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbDop/dop/dop2007/dop2003/dop2002/dop2000/dop97/dop95/dopBase')[0]
More information about the Libreoffice-commits
mailing list