[Libreoffice-commits] .: 2 commits - src/docdirstream.py src/docrecord.py src/docstream.py

Miklos Vajna vmiklos at kemper.freedesktop.org
Thu Nov 8 07:46:23 PST 2012


 src/docdirstream.py |   22 ++++++++++++++++++
 src/docrecord.py    |   61 ++++++++++++++++++++++++++++------------------------
 src/docstream.py    |   48 ++++++++++++++++++++--------------------
 3 files changed, 80 insertions(+), 51 deletions(-)

New commits:
commit e72cda1d059a30b5fa2e1a1e52568f011399dfc7
Author: Miklos Vajna <vmiklos at suse.cz>
Date:   Thu Nov 8 16:23:44 2012 +0100

    add DOCDirStream.getInt*() methods

diff --git a/src/docdirstream.py b/src/docdirstream.py
index 9f1ccd5..6fa4c17 100755
--- a/src/docdirstream.py
+++ b/src/docdirstream.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 
 import globals
+import struct
 
 class DOCDirStream:
     """Represents one single word file subdirectory, like e.g. 'WordDocument'."""
@@ -23,6 +24,27 @@ class DOCDirStream:
         else:
             print '<%s value="%s">' % (key, value)
 
+    def getInt8(self, bytes = None, pos = None):
+        if not bytes:
+            bytes = self.bytes
+        if not pos:
+            pos = self.pos
+        return ord(struct.unpack("<c", bytes[pos:pos+1])[0])
+
+    def getInt16(self, bytes = None, pos = None):
+        if not bytes:
+            bytes = self.bytes
+        if not pos:
+            pos = self.pos
+        return struct.unpack("<H", bytes[pos:pos+2])[0]
+
+    def getInt32(self, bytes = None, pos = None):
+        if not bytes:
+            bytes = self.bytes
+        if not pos:
+            pos = self.pos
+        return struct.unpack("<I", bytes[pos:pos+4])[0]
+
     def getBit(self, byte, bitNumber):
         return (byte & (1 << bitNumber)) >> bitNumber
 
diff --git a/src/docrecord.py b/src/docrecord.py
index 2141c75..44552bb 100755
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -13,7 +13,7 @@ class FcCompressed(DOCDirStream):
 
     def dump(self):
         print '<fcCompressed type="FcCompressed" offset="%d" size="%d bytes">' % (self.pos, self.size)
-        buf = struct.unpack("<I", self.bytes[self.pos:self.pos+4])[0]
+        buf = self.getInt32()
         self.pos += 4
         self.printAndSet("fc", buf & ((2**32-1) >> 2)) # bits 0..29
         self.printAndSet("fCompressed", self.getBit(buf, 30))
@@ -35,7 +35,7 @@ class Pcd(DOCDirStream):
 
     def dump(self):
         print '<pcd type="Pcd" offset="%d" size="%d bytes">' % (self.pos, self.size)
-        buf = struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]
+        buf = self.getInt16()
         self.pos += 2
         self.printAndSet("fNoParaLast", self.getBit(buf, 0))
         self.printAndSet("fR1", self.getBit(buf, 1))
@@ -75,8 +75,8 @@ class PlcPcd(DOCDirStream, PLC):
         pos = self.pos
         for i in range(self.getElements()):
             # aCp
-            start = struct.unpack("<I", self.bytes[pos:pos+4])[0]
-            end = struct.unpack("<I", self.bytes[pos+4:pos+8])[0]
+            start = self.getInt32(pos = pos)
+            end = self.getInt32(pos = pos + 4)
             print '<aCP index="%d" start="%d" end="%d">' % (i, start, end)
             pos += 4
 
@@ -105,7 +105,7 @@ class Sprm(DOCDirStream):
                 7: 3,
                 }
 
-        self.sprm = struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]
+        self.sprm = self.getInt16()
         self.pos += 2
 
         self.ispmd = (self.sprm & 0x1ff)        # 1-9th bits
@@ -114,11 +114,11 @@ class Sprm(DOCDirStream):
         self.spra  = (self.sprm & 0xe000) >> 13 # 14-16th bits
 
         if self.operandSizeMap[self.spra] == 1:
-            self.operand = ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0])
+            self.operand = self.getInt8()
         elif self.operandSizeMap[self.spra] == 2:
-            self.operand = struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]
+            self.operand = self.getInt16()
         elif self.operandSizeMap[self.spra] == 4:
-            self.operand = struct.unpack("<I", self.bytes[self.pos:self.pos+4])[0] # TODO generalize this
+            self.operand = self.getInt32()
         else:
             self.operand = "todo"
 
@@ -163,7 +163,7 @@ class GrpPrlAndIstd(DOCDirStream):
     def dump(self):
         print '<grpPrlAndIstd type="GrpPrlAndIstd" offset="%d" size="%d bytes">' % (self.pos, self.size)
         pos = self.pos
-        self.printAndSet("istd", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+        self.printAndSet("istd", self.getInt16())
         pos += 2
         while (self.size - (pos - self.pos)) > 0:
             prl = Prl(self.bytes, pos)
@@ -179,10 +179,10 @@ class PapxInFkp(DOCDirStream):
 
     def dump(self):
         print '<papxInFkp type="PapxInFkp" offset="%d">' % self.pos
-        self.printAndSet("cb", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0]))
+        self.printAndSet("cb", self.getInt8())
         self.pos += 1
         if self.cb == 0:
-            self.printAndSet("cb_", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0]))
+            self.printAndSet("cb_", self.getInt8())
             self.pos += 1
             grpPrlAndIstd = GrpPrlAndIstd(self.bytes, self.pos, 2 * self.cb_)
             grpPrlAndIstd.dump()
@@ -199,7 +199,7 @@ class BxPap(DOCDirStream):
 
     def dump(self):
         print '<bxPap type="BxPap" offset="%d" size="%d bytes">' % (self.pos, self.getSize())
-        self.printAndSet("bOffset", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0]))
+        self.printAndSet("bOffset", self.getInt8())
         papxInFkp = PapxInFkp(self.bytes, self.mainStream, self.parentpos + self.bOffset*2)
         papxInFkp.dump()
         print '</bxPap>'
@@ -217,12 +217,12 @@ class PapxFkp(DOCDirStream):
 
     def dump(self):
         print '<papxFkp type="PapxFkp" offset="%d" size="%d bytes">' % (self.pos, self.size)
-        self.cpara = ord(struct.unpack("<c", self.bytes[self.pos+self.size-1:self.pos+self.size-1+1])[0])
+        self.cpara = self.getInt8(pos = self.pos + self.size - 1)
         pos = self.pos
         for i in range(self.cpara):
             # rgfc
-            start = struct.unpack("<I", self.bytes[pos:pos+4])[0]
-            end = struct.unpack("<I", self.bytes[pos+4:pos+8])[0]
+            start = self.getInt32(pos = pos)
+            end = self.getInt32(pos = pos + 4)
             print '<rgfc index="%d" start="%d" end="%d">' % (i, start, end)
             print '<transformed value="%s"/>' % globals.encodeName(self.bytes[start:end])
             pos += 4
@@ -246,7 +246,7 @@ class PnFkpPapx(DOCDirStream):
 
     def dump(self):
         print '<%s type="PnFkpPapx" offset="%d" size="%d bytes">' % (self.name, self.pos, self.size)
-        buf = struct.unpack("<I", self.bytes[self.pos:self.pos+4])[0]
+        buf = self.getInt32()
         self.pos += 4
         self.printAndSet("pn", buf & (2**22-1))
         papxFkp = PapxFkp(self.bytes, self.mainStream, self.pn*512, 512)
@@ -266,8 +266,8 @@ class PlcBtePapx(DOCDirStream, PLC):
         pos = self.pos
         for i in range(self.getElements()):
             # aFC
-            start = struct.unpack("<I", self.bytes[pos:pos+4])[0]
-            end = struct.unpack("<I", self.bytes[pos+4:pos+8])[0]
+            start = self.getInt32(pos = pos)
+            end = self.getInt32(pos = pos + 4)
             print '<aFC index="%d" start="%d" end="%d">' % (i, start, end)
             pos += 4
 
@@ -286,9 +286,9 @@ class Pcdt(DOCDirStream):
 
     def dump(self):
         print '<pcdt type="Pcdt" offset="%d" size="%d bytes">' % (self.pos, self.size)
-        self.printAndSet("clxt", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0]))
+        self.printAndSet("clxt", self.getInt8())
         self.pos += 1
-        self.printAndSet("lcb", struct.unpack("<I", self.bytes[self.pos:self.pos+4])[0])
+        self.printAndSet("lcb", self.getInt32())
         self.pos += 4
         PlcPcd(self.bytes, self.mainStream, self.pos, self.lcb).dump()
         print '</pcdt>'
@@ -301,7 +301,7 @@ class Clx(DOCDirStream):
 
     def dump(self):
         print '<clx type="Clx" offset="%d" size="%d bytes">' % (self.pos, self.size)
-        firstByte = ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0])
+        firstByte = self.getInt8()
         if firstByte == 0x02:
             print '<info what="Array of Prc, 0 elements"/>'
             Pcdt(self.bytes, self.mainStream, self.pos, self.size).dump()
diff --git a/src/docstream.py b/src/docstream.py
index 338232d..16657df 100755
--- a/src/docstream.py
+++ b/src/docstream.py
@@ -52,38 +52,38 @@ class WordDocumentStream(DOCDirStream):
     def dumpFib(self):
         print '<fib>'
         self.dumpFibBase("base")
-        self.printAndSet("csw", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+        self.printAndSet("csw", self.getInt16())
         self.pos += 2
         self.dumpFibRgW97("fibRgW")
-        self.printAndSet("cslw", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+        self.printAndSet("cslw", self.getInt16())
         self.pos += 2
         self.dumpFibRgLw97("fibRgLw")
-        self.printAndSet("cbRgFcLcb", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+        self.printAndSet("cbRgFcLcb", self.getInt16())
         self.pos += 2
         self.dumpFibRgFcLcb("fibRgFcLcbBlob")
-        self.printAndSet("cswNew", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+        self.printAndSet("cswNew", self.getInt16())
         self.pos += 2
         print '</fib>'
 
     def dumpFibBase(self, name):
         print '<%s type="FibBase" size="32 bytes">' % name
 
-        self.printAndSet("wIndent", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+        self.printAndSet("wIndent", self.getInt16())
         self.pos += 2
 
-        self.printAndSet("nFib", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+        self.printAndSet("nFib", self.getInt16())
         self.pos += 2
 
-        self.printAndSet("unused", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+        self.printAndSet("unused", self.getInt16())
         self.pos += 2
 
-        self.printAndSet("lid", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+        self.printAndSet("lid", self.getInt16())
         self.pos += 2
 
-        self.printAndSet("pnNext", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+        self.printAndSet("pnNext", self.getInt16())
         self.pos += 2
 
-        buf = struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]
+        buf = self.getInt16()
         self.pos += 2
         self.printAndSet("fDot", self.getBit(buf, 0))
         self.printAndSet("fGlsy", self.getBit(buf, 1))
@@ -102,16 +102,16 @@ class WordDocumentStream(DOCDirStream):
         self.printAndSet("fFarEast", self.getBit(buf, 14))
         self.printAndSet("fObfuscated", self.getBit(buf, 15))
 
-        self.printAndSet("nFibBack", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+        self.printAndSet("nFibBack", self.getInt16())
         self.pos += 2
 
-        self.printAndSet("lKey", struct.unpack("<I", self.bytes[self.pos:self.pos+4])[0])
+        self.printAndSet("lKey", self.getInt32())
         self.pos += 4
 
-        self.printAndSet("envr", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0]))
+        self.printAndSet("envr", self.getInt8())
         self.pos += 1
 
-        buf = ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0])
+        buf = self.getInt8()
         self.pos += 1
 
         self.printAndSet("fMac", self.getBit(buf, 0))
@@ -121,13 +121,13 @@ class WordDocumentStream(DOCDirStream):
         self.printAndSet("reserved2", self.getBit(buf, 4))
         self.printAndSet("fSpare0",  (buf & (2**3-1)))
 
-        self.printAndSet("reserved3", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+        self.printAndSet("reserved3", self.getInt16())
         self.pos += 2
-        self.printAndSet("reserved4", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+        self.printAndSet("reserved4", self.getInt16())
         self.pos += 2
-        self.printAndSet("reserved5", struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0])
+        self.printAndSet("reserved5", self.getInt32())
         self.pos += 4
-        self.printAndSet("reserved6", struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0])
+        self.printAndSet("reserved6", self.getInt32())
         self.pos += 4
 
         print '</%s>' % name
@@ -136,9 +136,9 @@ class WordDocumentStream(DOCDirStream):
         print '<%s type="FibRgW97" size="28 bytes">' % name
 
         for i in range(13):
-            self.printAndSet("reserved%d" % (i + 1), struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+            self.printAndSet("reserved%d" % (i + 1), self.getInt16())
             self.pos += 2
-        self.printAndSet("lidFE", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+        self.printAndSet("lidFE", self.getInt16())
         self.pos += 2
 
         print '</%s>' % name
@@ -171,7 +171,7 @@ class WordDocumentStream(DOCDirStream):
                 "reserved14",
                 ]
         for i in fields:
-            self.printAndSet(i, struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0])
+            self.printAndSet(i, self.getInt32())
             self.pos += 4
 
         print '</%s>' % name
@@ -375,7 +375,7 @@ class WordDocumentStream(DOCDirStream):
             ["lcbSttbfUssr"],
                 ]
         for i in fields:
-            self.printAndSet(i[0], struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0], end = len(i) == 1)
+            self.printAndSet(i[0], self.getInt32(), end = len(i) == 1)
             self.pos += 4
             if len(i) > 1:
                 i[1]()
@@ -433,7 +433,7 @@ class WordDocumentStream(DOCDirStream):
             "lcbBkdEdnOld", 
                 ]
         for i in fields:
-            self.printAndSet(i, struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0])
+            self.printAndSet(i, self.getInt32())
             self.pos += 4
 
     def __dumpFibRgFcLcb2002(self):
@@ -497,7 +497,7 @@ class WordDocumentStream(DOCDirStream):
             "lcbPlcflvcMixedXP",
                 ]
         for i in fields:
-            self.printAndSet(i, struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0])
+            self.printAndSet(i, self.getInt32())
             self.pos += 4
 
     def dumpFibRgFcLcb2002(self, name):
commit 128a02e1b20443e9e59fb7d0a17c26801143e5b8
Author: Miklos Vajna <vmiklos at suse.cz>
Date:   Thu Nov 8 16:19:37 2012 +0100

    eliminate one more place where PLC elements are counted manually

diff --git a/src/docrecord.py b/src/docrecord.py
index 884dc10..2141c75 100755
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -56,7 +56,11 @@ class PLC:
         return (self.totalSize - 4) / (4 + self.structSize) # defined by 2.2.2
 
     def getOffset(self, pos, i):
-        return pos + (4 * (self.getElements() + 1)) + (self.structSize * i)
+        return self.getPLCOffset(pos, self.getElements(), self.structSize, i)
+
+    @staticmethod
+    def getPLCOffset(pos, elements, structSize, i):
+        return pos + (4 * (elements + 1)) + (structSize * i)
 
 class PlcPcd(DOCDirStream, PLC):
     """The PlcPcd structure is a PLC whose data elements are Pcds (8 bytes each)."""
@@ -188,19 +192,22 @@ class PapxInFkp(DOCDirStream):
     
 class BxPap(DOCDirStream):
     """The BxPap structure specifies the offset of a PapxInFkp in PapxFkp."""
-    def __init__(self, bytes, mainStream, offset, size, parentoffset):
+    def __init__(self, bytes, mainStream, offset, parentoffset):
         DOCDirStream.__init__(self, bytes)
         self.pos = offset
-        self.size = size
         self.parentpos = parentoffset
 
     def dump(self):
-        print '<bxPap type="BxPap" offset="%d" size="%d bytes">' % (self.pos, self.size)
+        print '<bxPap type="BxPap" offset="%d" size="%d bytes">' % (self.pos, self.getSize())
         self.printAndSet("bOffset", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0]))
         papxInFkp = PapxInFkp(self.bytes, self.mainStream, self.parentpos + self.bOffset*2)
         papxInFkp.dump()
         print '</bxPap>'
 
+    @staticmethod
+    def getSize():
+        return 13 # in bytes, see 2.9.23
+
 class PapxFkp(DOCDirStream):
     """The PapxFkp structure maps paragraphs, table rows, and table cells to their properties."""
     def __init__(self, bytes, mainStream, offset, size):
@@ -221,8 +228,8 @@ class PapxFkp(DOCDirStream):
             pos += 4
 
             # rgbx
-            offset = self.pos + ( 4 * ( self.cpara + 1 ) ) + ( 13 * i ) # TODO, 13 is hardwired here
-            bxPap = BxPap(self.bytes, self.mainStream, offset, 13, self.pos) # TODO 13 hardwired
+            offset = PLC.getPLCOffset(self.pos, self.cpara, BxPap.getSize(), i)
+            bxPap = BxPap(self.bytes, self.mainStream, offset, self.pos)
             bxPap.dump()
             print '</rgfc>'
 


More information about the Libreoffice-commits mailing list