[Libreoffice-commits] .: 2 commits - src/docdirstream.py src/docrecord.py src/docstream.py
Miklos Vajna
vmiklos at kemper.freedesktop.org
Thu Nov 8 07:46:23 PST 2012
src/docdirstream.py | 22 ++++++++++++++++++
src/docrecord.py | 61 ++++++++++++++++++++++++++++------------------------
src/docstream.py | 48 ++++++++++++++++++++--------------------
3 files changed, 80 insertions(+), 51 deletions(-)
New commits:
commit e72cda1d059a30b5fa2e1a1e52568f011399dfc7
Author: Miklos Vajna <vmiklos at suse.cz>
Date: Thu Nov 8 16:23:44 2012 +0100
add DOCDirStream.getInt*() methods
diff --git a/src/docdirstream.py b/src/docdirstream.py
index 9f1ccd5..6fa4c17 100755
--- a/src/docdirstream.py
+++ b/src/docdirstream.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python
import globals
+import struct
class DOCDirStream:
"""Represents one single word file subdirectory, like e.g. 'WordDocument'."""
@@ -23,6 +24,27 @@ class DOCDirStream:
else:
print '<%s value="%s">' % (key, value)
+ def getInt8(self, bytes = None, pos = None):
+ if not bytes:
+ bytes = self.bytes
+ if not pos:
+ pos = self.pos
+ return ord(struct.unpack("<c", bytes[pos:pos+1])[0])
+
+ def getInt16(self, bytes = None, pos = None):
+ if not bytes:
+ bytes = self.bytes
+ if not pos:
+ pos = self.pos
+ return struct.unpack("<H", bytes[pos:pos+2])[0]
+
+ def getInt32(self, bytes = None, pos = None):
+ if not bytes:
+ bytes = self.bytes
+ if not pos:
+ pos = self.pos
+ return struct.unpack("<I", bytes[pos:pos+4])[0]
+
def getBit(self, byte, bitNumber):
return (byte & (1 << bitNumber)) >> bitNumber
diff --git a/src/docrecord.py b/src/docrecord.py
index 2141c75..44552bb 100755
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -13,7 +13,7 @@ class FcCompressed(DOCDirStream):
def dump(self):
print '<fcCompressed type="FcCompressed" offset="%d" size="%d bytes">' % (self.pos, self.size)
- buf = struct.unpack("<I", self.bytes[self.pos:self.pos+4])[0]
+ buf = self.getInt32()
self.pos += 4
self.printAndSet("fc", buf & ((2**32-1) >> 2)) # bits 0..29
self.printAndSet("fCompressed", self.getBit(buf, 30))
@@ -35,7 +35,7 @@ class Pcd(DOCDirStream):
def dump(self):
print '<pcd type="Pcd" offset="%d" size="%d bytes">' % (self.pos, self.size)
- buf = struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]
+ buf = self.getInt16()
self.pos += 2
self.printAndSet("fNoParaLast", self.getBit(buf, 0))
self.printAndSet("fR1", self.getBit(buf, 1))
@@ -75,8 +75,8 @@ class PlcPcd(DOCDirStream, PLC):
pos = self.pos
for i in range(self.getElements()):
# aCp
- start = struct.unpack("<I", self.bytes[pos:pos+4])[0]
- end = struct.unpack("<I", self.bytes[pos+4:pos+8])[0]
+ start = self.getInt32(pos = pos)
+ end = self.getInt32(pos = pos + 4)
print '<aCP index="%d" start="%d" end="%d">' % (i, start, end)
pos += 4
@@ -105,7 +105,7 @@ class Sprm(DOCDirStream):
7: 3,
}
- self.sprm = struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]
+ self.sprm = self.getInt16()
self.pos += 2
self.ispmd = (self.sprm & 0x1ff) # 1-9th bits
@@ -114,11 +114,11 @@ class Sprm(DOCDirStream):
self.spra = (self.sprm & 0xe000) >> 13 # 14-16th bits
if self.operandSizeMap[self.spra] == 1:
- self.operand = ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0])
+ self.operand = self.getInt8()
elif self.operandSizeMap[self.spra] == 2:
- self.operand = struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]
+ self.operand = self.getInt16()
elif self.operandSizeMap[self.spra] == 4:
- self.operand = struct.unpack("<I", self.bytes[self.pos:self.pos+4])[0] # TODO generalize this
+ self.operand = self.getInt32()
else:
self.operand = "todo"
@@ -163,7 +163,7 @@ class GrpPrlAndIstd(DOCDirStream):
def dump(self):
print '<grpPrlAndIstd type="GrpPrlAndIstd" offset="%d" size="%d bytes">' % (self.pos, self.size)
pos = self.pos
- self.printAndSet("istd", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+ self.printAndSet("istd", self.getInt16())
pos += 2
while (self.size - (pos - self.pos)) > 0:
prl = Prl(self.bytes, pos)
@@ -179,10 +179,10 @@ class PapxInFkp(DOCDirStream):
def dump(self):
print '<papxInFkp type="PapxInFkp" offset="%d">' % self.pos
- self.printAndSet("cb", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0]))
+ self.printAndSet("cb", self.getInt8())
self.pos += 1
if self.cb == 0:
- self.printAndSet("cb_", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0]))
+ self.printAndSet("cb_", self.getInt8())
self.pos += 1
grpPrlAndIstd = GrpPrlAndIstd(self.bytes, self.pos, 2 * self.cb_)
grpPrlAndIstd.dump()
@@ -199,7 +199,7 @@ class BxPap(DOCDirStream):
def dump(self):
print '<bxPap type="BxPap" offset="%d" size="%d bytes">' % (self.pos, self.getSize())
- self.printAndSet("bOffset", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0]))
+ self.printAndSet("bOffset", self.getInt8())
papxInFkp = PapxInFkp(self.bytes, self.mainStream, self.parentpos + self.bOffset*2)
papxInFkp.dump()
print '</bxPap>'
@@ -217,12 +217,12 @@ class PapxFkp(DOCDirStream):
def dump(self):
print '<papxFkp type="PapxFkp" offset="%d" size="%d bytes">' % (self.pos, self.size)
- self.cpara = ord(struct.unpack("<c", self.bytes[self.pos+self.size-1:self.pos+self.size-1+1])[0])
+ self.cpara = self.getInt8(pos = self.pos + self.size - 1)
pos = self.pos
for i in range(self.cpara):
# rgfc
- start = struct.unpack("<I", self.bytes[pos:pos+4])[0]
- end = struct.unpack("<I", self.bytes[pos+4:pos+8])[0]
+ start = self.getInt32(pos = pos)
+ end = self.getInt32(pos = pos + 4)
print '<rgfc index="%d" start="%d" end="%d">' % (i, start, end)
print '<transformed value="%s"/>' % globals.encodeName(self.bytes[start:end])
pos += 4
@@ -246,7 +246,7 @@ class PnFkpPapx(DOCDirStream):
def dump(self):
print '<%s type="PnFkpPapx" offset="%d" size="%d bytes">' % (self.name, self.pos, self.size)
- buf = struct.unpack("<I", self.bytes[self.pos:self.pos+4])[0]
+ buf = self.getInt32()
self.pos += 4
self.printAndSet("pn", buf & (2**22-1))
papxFkp = PapxFkp(self.bytes, self.mainStream, self.pn*512, 512)
@@ -266,8 +266,8 @@ class PlcBtePapx(DOCDirStream, PLC):
pos = self.pos
for i in range(self.getElements()):
# aFC
- start = struct.unpack("<I", self.bytes[pos:pos+4])[0]
- end = struct.unpack("<I", self.bytes[pos+4:pos+8])[0]
+ start = self.getInt32(pos = pos)
+ end = self.getInt32(pos = pos + 4)
print '<aFC index="%d" start="%d" end="%d">' % (i, start, end)
pos += 4
@@ -286,9 +286,9 @@ class Pcdt(DOCDirStream):
def dump(self):
print '<pcdt type="Pcdt" offset="%d" size="%d bytes">' % (self.pos, self.size)
- self.printAndSet("clxt", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0]))
+ self.printAndSet("clxt", self.getInt8())
self.pos += 1
- self.printAndSet("lcb", struct.unpack("<I", self.bytes[self.pos:self.pos+4])[0])
+ self.printAndSet("lcb", self.getInt32())
self.pos += 4
PlcPcd(self.bytes, self.mainStream, self.pos, self.lcb).dump()
print '</pcdt>'
@@ -301,7 +301,7 @@ class Clx(DOCDirStream):
def dump(self):
print '<clx type="Clx" offset="%d" size="%d bytes">' % (self.pos, self.size)
- firstByte = ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0])
+ firstByte = self.getInt8()
if firstByte == 0x02:
print '<info what="Array of Prc, 0 elements"/>'
Pcdt(self.bytes, self.mainStream, self.pos, self.size).dump()
diff --git a/src/docstream.py b/src/docstream.py
index 338232d..16657df 100755
--- a/src/docstream.py
+++ b/src/docstream.py
@@ -52,38 +52,38 @@ class WordDocumentStream(DOCDirStream):
def dumpFib(self):
print '<fib>'
self.dumpFibBase("base")
- self.printAndSet("csw", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+ self.printAndSet("csw", self.getInt16())
self.pos += 2
self.dumpFibRgW97("fibRgW")
- self.printAndSet("cslw", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+ self.printAndSet("cslw", self.getInt16())
self.pos += 2
self.dumpFibRgLw97("fibRgLw")
- self.printAndSet("cbRgFcLcb", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+ self.printAndSet("cbRgFcLcb", self.getInt16())
self.pos += 2
self.dumpFibRgFcLcb("fibRgFcLcbBlob")
- self.printAndSet("cswNew", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+ self.printAndSet("cswNew", self.getInt16())
self.pos += 2
print '</fib>'
def dumpFibBase(self, name):
print '<%s type="FibBase" size="32 bytes">' % name
- self.printAndSet("wIndent", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+ self.printAndSet("wIndent", self.getInt16())
self.pos += 2
- self.printAndSet("nFib", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+ self.printAndSet("nFib", self.getInt16())
self.pos += 2
- self.printAndSet("unused", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+ self.printAndSet("unused", self.getInt16())
self.pos += 2
- self.printAndSet("lid", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+ self.printAndSet("lid", self.getInt16())
self.pos += 2
- self.printAndSet("pnNext", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+ self.printAndSet("pnNext", self.getInt16())
self.pos += 2
- buf = struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]
+ buf = self.getInt16()
self.pos += 2
self.printAndSet("fDot", self.getBit(buf, 0))
self.printAndSet("fGlsy", self.getBit(buf, 1))
@@ -102,16 +102,16 @@ class WordDocumentStream(DOCDirStream):
self.printAndSet("fFarEast", self.getBit(buf, 14))
self.printAndSet("fObfuscated", self.getBit(buf, 15))
- self.printAndSet("nFibBack", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+ self.printAndSet("nFibBack", self.getInt16())
self.pos += 2
- self.printAndSet("lKey", struct.unpack("<I", self.bytes[self.pos:self.pos+4])[0])
+ self.printAndSet("lKey", self.getInt32())
self.pos += 4
- self.printAndSet("envr", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0]))
+ self.printAndSet("envr", self.getInt8())
self.pos += 1
- buf = ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0])
+ buf = self.getInt8()
self.pos += 1
self.printAndSet("fMac", self.getBit(buf, 0))
@@ -121,13 +121,13 @@ class WordDocumentStream(DOCDirStream):
self.printAndSet("reserved2", self.getBit(buf, 4))
self.printAndSet("fSpare0", (buf & (2**3-1)))
- self.printAndSet("reserved3", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+ self.printAndSet("reserved3", self.getInt16())
self.pos += 2
- self.printAndSet("reserved4", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+ self.printAndSet("reserved4", self.getInt16())
self.pos += 2
- self.printAndSet("reserved5", struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0])
+ self.printAndSet("reserved5", self.getInt32())
self.pos += 4
- self.printAndSet("reserved6", struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0])
+ self.printAndSet("reserved6", self.getInt32())
self.pos += 4
print '</%s>' % name
@@ -136,9 +136,9 @@ class WordDocumentStream(DOCDirStream):
print '<%s type="FibRgW97" size="28 bytes">' % name
for i in range(13):
- self.printAndSet("reserved%d" % (i + 1), struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+ self.printAndSet("reserved%d" % (i + 1), self.getInt16())
self.pos += 2
- self.printAndSet("lidFE", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0])
+ self.printAndSet("lidFE", self.getInt16())
self.pos += 2
print '</%s>' % name
@@ -171,7 +171,7 @@ class WordDocumentStream(DOCDirStream):
"reserved14",
]
for i in fields:
- self.printAndSet(i, struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0])
+ self.printAndSet(i, self.getInt32())
self.pos += 4
print '</%s>' % name
@@ -375,7 +375,7 @@ class WordDocumentStream(DOCDirStream):
["lcbSttbfUssr"],
]
for i in fields:
- self.printAndSet(i[0], struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0], end = len(i) == 1)
+ self.printAndSet(i[0], self.getInt32(), end = len(i) == 1)
self.pos += 4
if len(i) > 1:
i[1]()
@@ -433,7 +433,7 @@ class WordDocumentStream(DOCDirStream):
"lcbBkdEdnOld",
]
for i in fields:
- self.printAndSet(i, struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0])
+ self.printAndSet(i, self.getInt32())
self.pos += 4
def __dumpFibRgFcLcb2002(self):
@@ -497,7 +497,7 @@ class WordDocumentStream(DOCDirStream):
"lcbPlcflvcMixedXP",
]
for i in fields:
- self.printAndSet(i, struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0])
+ self.printAndSet(i, self.getInt32())
self.pos += 4
def dumpFibRgFcLcb2002(self, name):
commit 128a02e1b20443e9e59fb7d0a17c26801143e5b8
Author: Miklos Vajna <vmiklos at suse.cz>
Date: Thu Nov 8 16:19:37 2012 +0100
eliminate one more place where PLC elements are counted manually
diff --git a/src/docrecord.py b/src/docrecord.py
index 884dc10..2141c75 100755
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -56,7 +56,11 @@ class PLC:
return (self.totalSize - 4) / (4 + self.structSize) # defined by 2.2.2
def getOffset(self, pos, i):
- return pos + (4 * (self.getElements() + 1)) + (self.structSize * i)
+ return self.getPLCOffset(pos, self.getElements(), self.structSize, i)
+
+ @staticmethod
+ def getPLCOffset(pos, elements, structSize, i):
+ return pos + (4 * (elements + 1)) + (structSize * i)
class PlcPcd(DOCDirStream, PLC):
"""The PlcPcd structure is a PLC whose data elements are Pcds (8 bytes each)."""
@@ -188,19 +192,22 @@ class PapxInFkp(DOCDirStream):
class BxPap(DOCDirStream):
"""The BxPap structure specifies the offset of a PapxInFkp in PapxFkp."""
- def __init__(self, bytes, mainStream, offset, size, parentoffset):
+ def __init__(self, bytes, mainStream, offset, parentoffset):
DOCDirStream.__init__(self, bytes)
self.pos = offset
- self.size = size
self.parentpos = parentoffset
def dump(self):
- print '<bxPap type="BxPap" offset="%d" size="%d bytes">' % (self.pos, self.size)
+ print '<bxPap type="BxPap" offset="%d" size="%d bytes">' % (self.pos, self.getSize())
self.printAndSet("bOffset", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0]))
papxInFkp = PapxInFkp(self.bytes, self.mainStream, self.parentpos + self.bOffset*2)
papxInFkp.dump()
print '</bxPap>'
+ @staticmethod
+ def getSize():
+ return 13 # in bytes, see 2.9.23
+
class PapxFkp(DOCDirStream):
"""The PapxFkp structure maps paragraphs, table rows, and table cells to their properties."""
def __init__(self, bytes, mainStream, offset, size):
@@ -221,8 +228,8 @@ class PapxFkp(DOCDirStream):
pos += 4
# rgbx
- offset = self.pos + ( 4 * ( self.cpara + 1 ) ) + ( 13 * i ) # TODO, 13 is hardwired here
- bxPap = BxPap(self.bytes, self.mainStream, offset, 13, self.pos) # TODO 13 hardwired
+ offset = PLC.getPLCOffset(self.pos, self.cpara, BxPap.getSize(), i)
+ bxPap = BxPap(self.bytes, self.mainStream, offset, self.pos)
bxPap.dump()
print '</rgfc>'
More information about the Libreoffice-commits
mailing list