[Libreoffice-commits] mso-dumper.git: src/docdirstream.py src/docrecord.py src/globals.py test/doc
Miklos Vajna
vmiklos at kemper.freedesktop.org
Tue Oct 22 08:58:04 PDT 2013
src/docdirstream.py | 20 ++++++++++++++++----
src/docrecord.py | 6 +++---
src/globals.py | 7 ++-----
test/doc/pass/novell717594-2.doc |binary
4 files changed, 21 insertions(+), 12 deletions(-)
New commits:
commit a3d4647e737f2640c96062cd481797fa71ba5ba4
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Tue Oct 22 17:29:08 2013 +0200
fix DOCDirStream::getString()
For all the other methods that read numbers, the getFoo variant just
reads the memory, while the readFoo version also adjusts the stream
position. Do the same for strings: make getString() not adjust the
stream position, add a readString() that does so and adapt callers.
With that, we can use the new getString() in DopTypography, and then
seek to the expected position, without trying to figure out what the new
position is.
Also, remove xml mode in globals::getUTF8FromUTF16(): that was used by
docdirstream only, and now even that no longer uses it.
diff --git a/src/docdirstream.py b/src/docdirstream.py
index 5cf1493..e73c935 100644
--- a/src/docdirstream.py
+++ b/src/docdirstream.py
@@ -120,21 +120,33 @@ class DOCDirStream:
self.pos += 8
return ret
- def getString(self, limit = None):
+ def __getString(self, limit):
bytes = []
count = 0
+ pos = self.pos
while True:
if (not limit is None) and count == limit:
break
- i = self.readuInt8()
- j = self.readuInt8()
+ i = self.getuInt8(pos = pos)
+ pos += 1
+ j = self.getuInt8(pos = pos)
+ pos += 1
if i != 0 or j != 0:
bytes.append(i)
bytes.append(j)
else:
break
count += 1
- return globals.getUTF8FromUTF16("".join(map(lambda x: chr(x), bytes)), xml = True)
+ return (self.quoteAttr(globals.encodeName(globals.getUTF8FromUTF16("".join(map(lambda x: chr(x), bytes))))), pos)
+
+ def getString(self, limit = None):
+ ret, pos = self.__getString(limit)
+ return ret
+
+ def readString(self, limit = None):
+ ret, pos = self.__getString(limit)
+ self.pos = pos
+ return ret
def getBit(self, byte, bitNumber):
return (byte & (1 << bitNumber)) >> bitNumber
diff --git a/src/docrecord.py b/src/docrecord.py
index 04cf8c0..a0d3ec3 100644
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -2035,10 +2035,10 @@ class DopTypography(DOCDirStream):
self.printAndSet("cchLeadingPunct", self.readInt16())
self.printAndSet("rgxchFPunct", self.getString(self.cchFollowingPunct), hexdump = False)
- self.pos += (202 - 2 * self.cchFollowingPunct)
+ self.pos += 202
self.printAndSet("rgxchLPunct", self.getString(self.cchLeadingPunct), hexdump = False)
- self.pos += (102 - 2 * self.cchLeadingPunct)
+ self.pos += 102
print '</dopTypography>'
assert self.pos == self.dop.pos + DopTypography.size
@@ -2514,7 +2514,7 @@ class FFN(DOCDirStream):
self.pos += 10
FontSignature(self.bytes, self.pos).dump()
self.pos += 24
- print '<xszFfn value="%s"/>' % self.getString()
+ print '<xszFfn value="%s"/>' % self.readString()
print '</ffn>'
class SttbfFfn(DOCDirStream):
diff --git a/src/globals.py b/src/globals.py
index 31e995a..68aae93 100644
--- a/src/globals.py
+++ b/src/globals.py
@@ -412,7 +412,7 @@ def getDouble (bytes):
return struct.unpack('<d', text)[0]
-def getUTF8FromUTF16 (bytes, xml = False):
+def getUTF8FromUTF16 (bytes):
# little endian utf-16 strings
byteCount = len(bytes)
loopCount = int(byteCount/2)
@@ -431,10 +431,7 @@ def getUTF8FromUTF16 (bytes, xml = False):
try:
text += unicode(code, 'utf-8')
except UnicodeDecodeError:
- close = ""
- if xml:
- close="/"
- text += "<%d invalid chars%s>"%(len(code), close)
+ text += "<%d invalid chars>"%len(code)
return text
class StreamWrap(object):
diff --git a/test/doc/pass/novell717594-2.doc b/test/doc/pass/novell717594-2.doc
new file mode 100644
index 0000000..03c455d
Binary files /dev/null and b/test/doc/pass/novell717594-2.doc differ
More information about the Libreoffice-commits
mailing list