[Libreoffice-commits] .: src/docrecord.py src/docstream.py test/doc
Miklos Vajna
vmiklos at kemper.freedesktop.org
Wed Jan 9 02:38:30 PST 2013
src/docrecord.py | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++
src/docstream.py | 5 +++-
test/doc/field.doc |binary
test/doc/field.rtf | 8 ++++++
test/doc/test.py | 9 +++++++
5 files changed, 82 insertions(+), 1 deletion(-)
New commits:
commit 134ed95ebe314473ad0333394f0ee0cf1dbad49e
Author: Miklos Vajna <vmiklos at suse.cz>
Date: Wed Jan 9 11:38:36 2013 +0100
dump PlcFld
diff --git a/src/docrecord.py b/src/docrecord.py
index 0aa271d..9e30066 100644
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -142,6 +142,67 @@ class PlcfBkf(DOCDirStream, PLC):
print '</aCP>'
print '</plcfBkf>'
+class Fldch(DOCDirStream):
+ """The fldch structure determines the type of the field character."""
+ def __init__(self, parent):
+ DOCDirStream.__init__(self, parent.bytes)
+ self.pos = parent.pos
+ self.parent = parent
+
+ def dump(self):
+ print '<fldch type="fldch" offset="%d" size="1 byte">' % self.pos
+ buf = self.readuInt8()
+ self.printAndSet("ch", buf & 0x1f) # 1..5th bits
+ self.printAndSet("reserved", (buf & 0xe0) >> 5) # 6..8th bits
+ print '</fldch>'
+ self.parent.pos = self.pos
+
+class Fld(DOCDirStream):
+ """The Fld structure specifies a field character."""
+ def __init__(self, parent, offset):
+ DOCDirStream.__init__(self, parent.bytes)
+ self.pos = offset
+
+ def dump(self):
+ print '<fld type="FLD" offset="%d" size="2 bytes">' % self.pos
+ self.fldch = Fldch(self)
+ self.fldch.dump()
+ self.printAndSet("grffld", self.readuInt8()) # TODO parse flt and grffldEnd
+ print '</fld>'
+
+class PlcFld(DOCDirStream, PLC):
+ """The Plcfld structure specifies the location of fields in the document."""
+ def __init__(self, mainStream):
+ DOCDirStream.__init__(self, mainStream.doc.getDirectoryStreamByName("1Table").bytes, mainStream = mainStream)
+ PLC.__init__(self, mainStream.lcbPlcfFldMom, 2) # 2 is defined by 2.8.25
+ self.pos = mainStream.fcPlcfFldMom
+ self.size = mainStream.lcbPlcfFldMom
+
+ def dump(self):
+ print '<plcFld type="PlcFld" offset="%d" size="%d bytes">' % (self.pos, self.size)
+ offset = self.mainStream.fcMin # 2.8.25: CPs relative to the start of that document part.
+ pos = self.pos
+ aFlds = []
+ for i in range(self.getElements()):
+ # aCp
+ value = self.getuInt32(pos = pos)
+ print '<aCP index="%d" value="%d">' % (i, value)
+ pos += 4
+
+ # aFld
+ aFld = Fld(self, self.getOffset(self.pos, i))
+ aFld.dump()
+
+ # This is a separator and the previous was a start: display the field instructions.
+ if aFld.fldch.ch == 0x14 and aFlds[-1][1].fldch.ch == 0x13:
+ print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveText(offset + aFlds[-1][0] + 1, offset + value))
+ # This is an end and the previous was a separator: display the field result.
+ elif aFld.fldch.ch == 0x15 and aFlds[-1][1].fldch.ch == 0x14:
+ print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveText(offset + aFlds[-1][0] + 1, offset + value))
+ aFlds.append((value, aFld))
+ print '</aCP>'
+ print '</plcFld>'
+
class PlcfBkl(DOCDirStream, PLC):
"""The Plcfbkl structure is a PLC that contains only CPs and no additional data."""
def __init__(self, mainStream, offset, size):
diff --git a/src/docstream.py b/src/docstream.py
index 167fb9c..a0aba5e 100644
--- a/src/docstream.py
+++ b/src/docstream.py
@@ -236,7 +236,7 @@ class WordDocumentStream(DOCDirStream):
["fcSttbfFfn"],
["lcbSttbfFfn", self.handleLcbSttbfFfn],
["fcPlcfFldMom"],
- ["lcbPlcfFldMom"],
+ ["lcbPlcfFldMom", self.handleLcbPlcfFldMom],
["fcPlcfFldHdr"],
["lcbPlcfFldHdr"],
["fcPlcfFldFtn"],
@@ -509,6 +509,9 @@ class WordDocumentStream(DOCDirStream):
def handleLcbDggInfo(self):
docrecord.OfficeArtContent(self).dump()
+ def handleLcbPlcfFldMom(self):
+ docrecord.PlcFld(self).dump()
+
def dumpFibRgFcLcb97(self, name):
print '<%s type="FibRgFcLcb97" size="744 bytes">' % name
self.__dumpFibRgFcLcb97()
diff --git a/test/doc/field.doc b/test/doc/field.doc
new file mode 100644
index 0000000..247f024
Binary files /dev/null and b/test/doc/field.doc differ
diff --git a/test/doc/field.rtf b/test/doc/field.rtf
new file mode 100644
index 0000000..48e39d1
--- /dev/null
+++ b/test/doc/field.rtf
@@ -0,0 +1,8 @@
+{\rtf1
+Page number:
+{\field
+{\*\fldinst PAGE }
+{\fldrslt 1}
+}
+\par
+}
diff --git a/test/doc/test.py b/test/doc/test.py
index a97d0f5..c2b955c 100755
--- a/test/doc/test.py
+++ b/test/doc/test.py
@@ -137,6 +137,15 @@ class Test(unittest.TestCase):
# This first caused unhandled exceptions, then later invalid XML output.
self.dump('escape')
+ def test_field(self):
+ self.dump('field')
+
+ instruction = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfFldMom/plcFld/aCP[@index="1"]/transformed')
+ self.assertEqual(' PAGE ', instruction[0].attrib['value'])
+
+ result = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfFldMom/plcFld/aCP[@index="2"]/transformed')
+ self.assertEqual('1', result[0].attrib['value'])
+
if __name__ == '__main__':
unittest.main()
More information about the Libreoffice-commits
mailing list