[Libreoffice-commits] mso-dumper.git: src/docdirstream.py src/docrecord.py src/globals.py test/doc

Tue Oct 22 08:58:04 PDT 2013

src/docdirstream.py              |   20 ++++++++++++++++----
 src/docrecord.py                 |    6 +++---
 src/globals.py                   |    7 ++-----
 test/doc/pass/novell717594-2.doc |binary
 4 files changed, 21 insertions(+), 12 deletions(-)

New commits:
commit a3d4647e737f2640c96062cd481797fa71ba5ba4
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Tue Oct 22 17:29:08 2013 +0200

    fix DOCDirStream::getString()
    
    For all the other methods that read numbers, the getFoo variant just
    reads the memory, while the readFoo version also adjusts the stream
    position. Do the same for strings: make getString() not adjust the
    stream position, add a readString() that does so and adapt callers.
    
    With that, we can use the new getString() in DopTypography, and then
    seek to the expected position, without trying to figure out what the new
    position is.
    
    Also, remove xml mode in globals::getUTF8FromUTF16(): that was used by
    docdirstream only, and now even that no longer uses it.

diff --git a/src/docdirstream.py b/src/docdirstream.py
index 5cf1493..e73c935 100644
--- a/src/docdirstream.py
+++ b/src/docdirstream.py
@@ -120,21 +120,33 @@ class DOCDirStream:
         self.pos += 8
         return ret
 
-    def getString(self, limit = None):
+    def __getString(self, limit):
         bytes = []
         count = 0
+        pos = self.pos
         while True:
             if (not limit is None) and count == limit:
                 break
-            i = self.readuInt8()
-            j = self.readuInt8()
+            i = self.getuInt8(pos = pos)
+            pos += 1
+            j = self.getuInt8(pos = pos)
+            pos += 1
             if i != 0 or j != 0:
                 bytes.append(i)
                 bytes.append(j)
             else:
                 break
             count += 1
-        return globals.getUTF8FromUTF16("".join(map(lambda x: chr(x), bytes)), xml = True)
+        return (self.quoteAttr(globals.encodeName(globals.getUTF8FromUTF16("".join(map(lambda x: chr(x), bytes))))), pos)
+
+    def getString(self, limit = None):
+        ret, pos = self.__getString(limit)
+        return ret
+
+    def readString(self, limit = None):
+        ret, pos = self.__getString(limit)
+        self.pos = pos
+        return ret
 
     def getBit(self, byte, bitNumber):
         return (byte & (1 << bitNumber)) >> bitNumber
diff --git a/src/docrecord.py b/src/docrecord.py
index 04cf8c0..a0d3ec3 100644
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -2035,10 +2035,10 @@ class DopTypography(DOCDirStream):
         self.printAndSet("cchLeadingPunct", self.readInt16())
 
         self.printAndSet("rgxchFPunct", self.getString(self.cchFollowingPunct), hexdump = False)
-        self.pos += (202 - 2 * self.cchFollowingPunct)
+        self.pos += 202
 
         self.printAndSet("rgxchLPunct", self.getString(self.cchLeadingPunct), hexdump = False)
-        self.pos += (102 - 2 * self.cchLeadingPunct)
+        self.pos += 102
 
         print '</dopTypography>'
         assert self.pos == self.dop.pos + DopTypography.size
@@ -2514,7 +2514,7 @@ class FFN(DOCDirStream):
         self.pos += 10
         FontSignature(self.bytes, self.pos).dump()
         self.pos += 24
-        print '<xszFfn value="%s"/>' % self.getString()
+        print '<xszFfn value="%s"/>' % self.readString()
         print '</ffn>'
 
 class SttbfFfn(DOCDirStream):
diff --git a/src/globals.py b/src/globals.py
index 31e995a..68aae93 100644
--- a/src/globals.py
+++ b/src/globals.py
@@ -412,7 +412,7 @@ def getDouble (bytes):
     return struct.unpack('<d', text)[0]
 
 
-def getUTF8FromUTF16 (bytes, xml = False):
+def getUTF8FromUTF16 (bytes):
     # little endian utf-16 strings
     byteCount = len(bytes)
     loopCount = int(byteCount/2)
@@ -431,10 +431,7 @@ def getUTF8FromUTF16 (bytes, xml = False):
         try:    
             text += unicode(code, 'utf-8')
         except UnicodeDecodeError:
-            close = ""
-            if xml:
-                close="/"
-            text += "<%d invalid chars%s>"%(len(code), close)
+            text += "<%d invalid chars>"%len(code)
     return text
 
 class StreamWrap(object):
diff --git a/test/doc/pass/novell717594-2.doc b/test/doc/pass/novell717594-2.doc
new file mode 100644
index 0000000..03c455d
Binary files /dev/null and b/test/doc/pass/novell717594-2.doc differ