[Libreoffice-commits] .: 3 commits - doc-dump.py src/docrecord.py src/globals.py
Miklos Vajna
vmiklos at kemper.freedesktop.org
Mon Nov 19 07:29:22 PST 2012
doc-dump.py | 2 ++
src/docrecord.py | 44 ++++++++++++++++++++++++--------------------
src/globals.py | 4 ++--
3 files changed, 28 insertions(+), 22 deletions(-)
New commits:
commit b2b4859c6f8d692804a3eb33f68981b337e11022
Author: Miklos Vajna <vmiklos at suse.cz>
Date: Mon Nov 19 16:28:40 2012 +0100
FcCompressed: support fCompressed = 0
This seems to be used for non-ascii text.
diff --git a/doc-dump.py b/doc-dump.py
index ac1fba6..28e7bcb 100755
--- a/doc-dump.py
+++ b/doc-dump.py
@@ -1,6 +1,8 @@
#!/usr/bin/env python
import sys
+sys = reload(sys)
+sys.setdefaultencoding("utf-8")
sys.path.append(sys.path[0]+"/src")
import globals
import docstream
diff --git a/src/docrecord.py b/src/docrecord.py
index 64d2865..46369d3 100755
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -21,11 +21,21 @@ class FcCompressed(DOCDirStream):
self.printAndSet("r1", self.getBit(buf, 31))
print '</fcCompressed>'
- def getTransformedAddress(self):
- if self.fCompressed:
- return self.fc/2
- else:
- print '<todo what="FcCompressed: fCompressed = 0 not supported"/>'
+ def getTransformedValue(self, start, end):
+ if self.fCompressed:
+ offset = self.fc/2
+ return globals.encodeName(self.mainStream.bytes[offset:offset+end-start])
+ else:
+ offset = self.fc
+ return globals.encodeName(self.mainStream.bytes[offset:offset+end*2-start].decode('utf-16'), lowOnly = True)
+
+ @staticmethod
+ def getFCTransformedValue(bytes, start, end):
+ # This is a bit ugly, but at this state we don't know yet if the text is compressed or not.
+ try:
+ return globals.encodeName(bytes[start:end].decode('utf-16'), lowOnly = True)
+ except UnicodeDecodeError:
+ return globals.encodeName(bytes[start:end])
class Pcd(DOCDirStream):
"""The Pcd structure specifies the location of text in the WordDocument Stream and additional properties for this text."""
@@ -85,8 +95,7 @@ class PlcPcd(DOCDirStream, PLC):
aPcd = Pcd(self.bytes, self.mainStream, self.getOffset(self.pos, i), 8)
aPcd.dump()
- offset = aPcd.fc.getTransformedAddress()
- print '<transformed value="%s"/>' % globals.encodeName(self.mainStream.bytes[offset:offset+end-start])
+ print '<transformed value="%s"/>' % aPcd.fc.getTransformedValue(start, end)
print '</aCP>'
print '</plcPcd>'
@@ -251,7 +260,7 @@ class ChpxFkp(DOCDirStream):
start = self.getuInt32(pos = pos)
end = self.getuInt32(pos = pos + 4)
print '<rgfc index="%d" start="%d" end="%d">' % (i, start, end)
- print '<transformed value="%s"/>' % globals.encodeName(self.bytes[start:end])
+ print '<transformed value="%s"/>' % FcCompressed.getFCTransformedValue(self.bytes, start, end)
pos += 4
# rgbx
@@ -280,7 +289,7 @@ class PapxFkp(DOCDirStream):
start = self.getuInt32(pos = pos)
end = self.getuInt32(pos = pos + 4)
print '<rgfc index="%d" start="%d" end="%d">' % (i, start, end)
- print '<transformed value="%s"/>' % globals.encodeName(self.bytes[start:end])
+ print '<transformed value="%s"/>' % FcCompressed.getFCTransformedValue(self.bytes, start, end)
pos += 4
# rgbx
commit e6ed69713adc34f7fe554b268dda3d97eb71ae74
Author: Miklos Vajna <vmiklos at suse.cz>
Date: Mon Nov 19 16:05:14 2012 +0100
globals.encodeName: allow not encoding high characters
diff --git a/src/globals.py b/src/globals.py
index ca7a92e..dbb6690 100644
--- a/src/globals.py
+++ b/src/globals.py
@@ -151,7 +151,7 @@ def debug (msg):
sys.stderr.write("DEBUG: %s\n"%msg)
-def encodeName (name):
+def encodeName (name, lowOnly = False):
"""Encode name that contains unprintable characters."""
n = len(name)
@@ -160,7 +160,7 @@ def encodeName (name):
newname = ''
for i in xrange(0, n):
- if ord(name[i]) <= 20 or ord(name[i]) >= 127:
+ if ord(name[i]) <= 20 or ((not lowOnly) and ord(name[i]) >= 127):
newname += "\\x%2.2X"%ord(name[i])
else:
newname += name[i]
commit 52055fd71dda9407f8728b0584a2f3dc440f9f37
Author: Miklos Vajna <vmiklos at suse.cz>
Date: Tue Nov 13 11:14:14 2012 +0100
drop pointless getSize methods
diff --git a/src/docrecord.py b/src/docrecord.py
index 9931173..64d2865 100755
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -222,22 +222,19 @@ class PapxInFkp(DOCDirStream):
class BxPap(DOCDirStream):
"""The BxPap structure specifies the offset of a PapxInFkp in PapxFkp."""
+ size = 13 # in bytes, see 2.9.23
def __init__(self, bytes, mainStream, offset, parentoffset):
DOCDirStream.__init__(self, bytes)
self.pos = offset
self.parentpos = parentoffset
def dump(self):
- print '<bxPap type="BxPap" offset="%d" size="%d bytes">' % (self.pos, self.getSize())
+ print '<bxPap type="BxPap" offset="%d" size="%d bytes">' % (self.pos, self.size)
self.printAndSet("bOffset", self.getuInt8())
papxInFkp = PapxInFkp(self.bytes, self.mainStream, self.parentpos + self.bOffset*2)
papxInFkp.dump()
print '</bxPap>'
- @staticmethod
- def getSize():
- return 13 # in bytes, see 2.9.23
-
class ChpxFkp(DOCDirStream):
"""The ChpxFkp structure maps text to its character properties."""
def __init__(self, bytes, mainStream, offset, size):
@@ -287,7 +284,7 @@ class PapxFkp(DOCDirStream):
pos += 4
# rgbx
- offset = PLC.getPLCOffset(self.pos, self.cpara, BxPap.getSize(), i)
+ offset = PLC.getPLCOffset(self.pos, self.cpara, BxPap.size, i)
bxPap = BxPap(self.bytes, self.mainStream, offset, self.pos)
bxPap.dump()
print '</rgfc>'
@@ -514,9 +511,10 @@ class Stshif(DOCDirStream):
def __init__(self, bytes, mainStream, offset):
DOCDirStream.__init__(self, bytes, mainStream=mainStream)
self.pos = offset
+ self.size = 18
def dump(self):
- print '<stshif type="Stshif" offset="%d" size="%d bytes">' % (self.pos, self.getSize())
+ print '<stshif type="Stshif" offset="%d" size="%d bytes">' % (self.pos, self.size)
self.printAndSet("cstd", self.getuInt16())
self.pos += 2
self.printAndSet("cbSTDBaseInFile", self.getuInt16())
@@ -539,9 +537,6 @@ class Stshif(DOCDirStream):
self.pos += 2
print '</stshif>'
- def getSize(self):
- return 18
-
class LSD(DOCDirStream):
"""The LSD structure specifies the properties to be used for latent application-defined styles (see StshiLsd) when they are created."""
def __init__(self, bytes, offset):
@@ -588,7 +583,7 @@ class STSHI(DOCDirStream):
print '<stshi type="STSHI" offset="%d" size="%d bytes">' % (self.pos, self.size)
self.stshif = Stshif(self.bytes, self.mainStream, self.pos)
self.stshif.dump()
- self.pos += self.stshif.getSize()
+ self.pos += self.stshif.size
self.printAndSet("ftcBi", self.getuInt16())
self.pos += 2
stshiLsd = StshiLsd(self.bytes, self, self.pos)
More information about the Libreoffice-commits
mailing list