[Libreoffice-commits] core.git: solenv/bin

Caolán McNamara caolanm at redhat.com
Wed Jul 12 14:26:22 UTC 2017


 solenv/bin/polib.py           | 1868 ++++++++++++++++++++++++++++++++++++++++++
 solenv/bin/update-for-gettext |   76 +
 2 files changed, 1944 insertions(+)

New commits:
commit c3d8711aa109ab9081108c77dc7af396444fd3f1
Author: Caolán McNamara <caolanm at redhat.com>
Date:   Wed Jul 12 15:18:55 2017 +0100

    add script to update translations for gettext
    
    Change-Id: I1ec48a71103aad50e8a303a5356a25f3a168d6bd
    Reviewed-on: https://gerrit.libreoffice.org/39867
    Reviewed-by: Caolán McNamara <caolanm at redhat.com>
    Tested-by: Caolán McNamara <caolanm at redhat.com>

diff --git a/solenv/bin/polib.py b/solenv/bin/polib.py
new file mode 100644
index 000000000000..b835167729e8
--- /dev/null
+++ b/solenv/bin/polib.py
@@ -0,0 +1,1868 @@
+# -* coding: utf-8 -*-
+#
+# License: MIT (see LICENSE file provided)
+# vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
+8
+"""
+**polib** allows you to manipulate, create, modify gettext files (pot, po and
+mo files).  You can load existing files, iterate through it's entries, add,
+modify entries, comments or metadata, etc. or create new po files from scratch.
+
+**polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
+:func:`~polib.mofile` convenience functions.
+"""
+
+__author__ = 'David Jean Louis <izimobil at gmail.com>'
+__version__ = '1.0.8'
+__all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
+           'default_encoding', 'escape', 'unescape', 'detect_encoding', ]
+
+import array
+import codecs
+import os
+import re
+import struct
+import sys
+import textwrap
+import binascii
+
+try:
+    import io
+except ImportError:
+    # replacement of io.open() for python < 2.6
+    # we use codecs instead
+    class io(object):
+        @staticmethod
+        def open(fpath, mode='r', encoding=None):
+            return codecs.open(fpath, mode, encoding)
+
+
+# the default encoding to use when encoding cannot be detected
+default_encoding = 'utf-8'
+
+# python 2/3 compatibility helpers {{{
+
+
+if sys.version_info[:2] < (3, 0):
+    PY3 = False
+    text_type = unicode
+
+    def b(s):
+        return s
+
+    def u(s):
+        return unicode(s, "unicode_escape")
+
+else:
+    PY3 = True
+    text_type = str
+
+    def b(s):
+        return s.encode("latin-1")
+
+    def u(s):
+        return s
+# }}}
+# _pofile_or_mofile {{{
+
+
+def _pofile_or_mofile(f, type, **kwargs):
+    """
+    Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
+    honor the DRY concept.
+    """
+    # get the file encoding
+    enc = kwargs.get('encoding')
+    if enc is None:
+        enc = detect_encoding(f, type == 'mofile')
+
+    # parse the file
+    kls = type == 'pofile' and _POFileParser or _MOFileParser
+    parser = kls(
+        f,
+        encoding=enc,
+        check_for_duplicates=kwargs.get('check_for_duplicates', False),
+        klass=kwargs.get('klass')
+    )
+    instance = parser.parse()
+    instance.wrapwidth = kwargs.get('wrapwidth', 78)
+    return instance
+# }}}
+# _is_file {{{
+
+
+def _is_file(filename_or_contents):
+    """
+    Safely returns the value of os.path.exists(filename_or_contents).
+
+    Arguments:
+
+    ``filename_or_contents``
+        either a filename, or a string holding the contents of some file.
+        In the latter case, this function will always return False.
+    """
+    try:
+        return os.path.exists(filename_or_contents)
+    except (ValueError, UnicodeEncodeError):
+        return False
+# }}}
+# function pofile() {{{
+
+
+def pofile(pofile, **kwargs):
+    """
+    Convenience function that parses the po or pot file ``pofile`` and returns
+    a :class:`~polib.POFile` instance.
+
+    Arguments:
+
+    ``pofile``
+        string, full or relative path to the po/pot file or its content (data).
+
+    ``wrapwidth``
+        integer, the wrap width, only useful when the ``-w`` option was passed
+        to xgettext (optional, default: ``78``).
+
+    ``encoding``
+        string, the encoding to use (e.g. "utf-8") (default: ``None``, the
+        encoding will be auto-detected).
+
+    ``check_for_duplicates``
+        whether to check for duplicate entries when adding entries to the
+        file (optional, default: ``False``).
+
+    ``klass``
+        class which is used to instantiate the return value (optional,
+        default: ``None``, the return value with be a :class:`~polib.POFile`
+        instance).
+    """
+    return _pofile_or_mofile(pofile, 'pofile', **kwargs)
+# }}}
+# function mofile() {{{
+
+
+def mofile(mofile, **kwargs):
+    """
+    Convenience function that parses the mo file ``mofile`` and returns a
+    :class:`~polib.MOFile` instance.
+
+    Arguments:
+
+    ``mofile``
+        string, full or relative path to the mo file or its content (data).
+
+    ``wrapwidth``
+        integer, the wrap width, only useful when the ``-w`` option was passed
+        to xgettext to generate the po file that was used to format the mo file
+        (optional, default: ``78``).
+
+    ``encoding``
+        string, the encoding to use (e.g. "utf-8") (default: ``None``, the
+        encoding will be auto-detected).
+
+    ``check_for_duplicates``
+        whether to check for duplicate entries when adding entries to the
+        file (optional, default: ``False``).
+
+    ``klass``
+        class which is used to instantiate the return value (optional,
+        default: ``None``, the return value with be a :class:`~polib.POFile`
+        instance).
+    """
+    return _pofile_or_mofile(mofile, 'mofile', **kwargs)
+# }}}
+# function detect_encoding() {{{
+
+
+def detect_encoding(file, binary_mode=False):
+    """
+    Try to detect the encoding used by the ``file``. The ``file`` argument can
+    be a PO or MO file path or a string containing the contents of the file.
+    If the encoding cannot be detected, the function will return the value of
+    ``default_encoding``.
+
+    Arguments:
+
+    ``file``
+        string, full or relative path to the po/mo file or its content.
+
+    ``binary_mode``
+        boolean, set this to True if ``file`` is a mo file.
+    """
+    PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)'
+    rxt = re.compile(u(PATTERN))
+    rxb = re.compile(b(PATTERN))
+
+    def charset_exists(charset):
+        """Check whether ``charset`` is valid or not."""
+        try:
+            codecs.lookup(charset)
+        except LookupError:
+            return False
+        return True
+
+    if not _is_file(file):
+        match = rxt.search(file)
+        if match:
+            enc = match.group(1).strip()
+            if charset_exists(enc):
+                return enc
+    else:
+        # For PY3, always treat as binary
+        if binary_mode or PY3:
+            mode = 'rb'
+            rx = rxb
+        else:
+            mode = 'r'
+            rx = rxt
+        f = open(file, mode)
+        for l in f.readlines():
+            match = rx.search(l)
+            if match:
+                f.close()
+                enc = match.group(1).strip()
+                if not isinstance(enc, text_type):
+                    enc = enc.decode('utf-8')
+                if charset_exists(enc):
+                    return enc
+        f.close()
+    return default_encoding
+# }}}
+# function escape() {{{
+
+
+def escape(st):
+    """
+    Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
+    the given string ``st`` and returns it.
+    """
+    return st.replace('\\', r'\\')\
+             .replace('\t', r'\t')\
+             .replace('\r', r'\r')\
+             .replace('\n', r'\n')\
+             .replace('\"', r'\"')
+# }}}
+# function unescape() {{{
+
+
+def unescape(st):
+    """
+    Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
+    the given string ``st`` and returns it.
+    """
+    def unescape_repl(m):
+        m = m.group(1)
+        if m == 'n':
+            return '\n'
+        if m == 't':
+            return '\t'
+        if m == 'r':
+            return '\r'
+        if m == '\\':
+            return '\\'
+        return m  # handles escaped double quote
+    return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
+# }}}
+# function natural_sort() {{{
+
+
+def natural_sort(lst):
+    """
+    Sort naturally the given list.
+    Credits: http://stackoverflow.com/a/4836734
+    """
+    convert = lambda text: int(text) if text.isdigit() else text.lower() 
+    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
+    return sorted(lst, key = alphanum_key)
+# }}}
+# class _BaseFile {{{
+
+
+class _BaseFile(list):
+    """
+    Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
+    classes. This class should **not** be instantiated directly.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Constructor, accepts the following keyword arguments:
+
+        ``pofile``
+            string, the path to the po or mo file, or its content as a string.
+
+        ``wrapwidth``
+            integer, the wrap width, only useful when the ``-w`` option was
+            passed to xgettext (optional, default: ``78``).
+
+        ``encoding``
+            string, the encoding to use, defaults to ``default_encoding``
+            global variable (optional).
+
+        ``check_for_duplicates``
+            whether to check for duplicate entries when adding entries to the
+            file, (optional, default: ``False``).
+        """
+        list.__init__(self)
+        # the opened file handle
+        pofile = kwargs.get('pofile', None)
+        if pofile and _is_file(pofile):
+            self.fpath = pofile
+        else:
+            self.fpath = kwargs.get('fpath')
+        # the width at which lines should be wrapped
+        self.wrapwidth = kwargs.get('wrapwidth', 78)
+        # the file encoding
+        self.encoding = kwargs.get('encoding', default_encoding)
+        # whether to check for duplicate entries or not
+        self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
+        # header
+        self.header = ''
+        # both po and mo files have metadata
+        self.metadata = {}
+        self.metadata_is_fuzzy = 0
+
+    def __unicode__(self):
+        """
+        Returns the unicode representation of the file.
+        """
+        ret = []
+        entries = [self.metadata_as_entry()] + \
+                  [e for e in self if not e.obsolete]
+        for entry in entries:
+            ret.append(entry.__unicode__(self.wrapwidth))
+        for entry in self.obsolete_entries():
+            ret.append(entry.__unicode__(self.wrapwidth))
+        ret = u('\n').join(ret)
+
+        assert isinstance(ret, text_type)
+        #if type(ret) != text_type:
+        #    return unicode(ret, self.encoding)
+        return ret
+
+    if PY3:
+        def __str__(self):
+            return self.__unicode__()
+    else:
+        def __str__(self):
+            """
+            Returns the string representation of the file.
+            """
+            return unicode(self).encode(self.encoding)
+
+    def __contains__(self, entry):
+        """
+        Overridden ``list`` method to implement the membership test (in and
+        not in).
+        The method considers that an entry is in the file if it finds an entry
+        that has the same msgid (the test is **case sensitive**) and the same
+        msgctxt (or none for both entries).
+
+        Argument:
+
+        ``entry``
+            an instance of :class:`~polib._BaseEntry`.
+        """
+        return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \
+            is not None
+
+    def __eq__(self, other):
+        return str(self) == str(other)
+
+    def append(self, entry):
+        """
+        Overridden method to check for duplicates entries, if a user tries to
+        add an entry that is already in the file, the method will raise a
+        ``ValueError`` exception.
+
+        Argument:
+
+        ``entry``
+            an instance of :class:`~polib._BaseEntry`.
+        """
+        # check_for_duplicates may not be defined (yet) when unpickling.
+        # But if pickling, we never want to check for duplicates anyway.
+        if getattr(self, 'check_for_duplicates', False) and entry in self:
+            raise ValueError('Entry "%s" already exists' % entry.msgid)
+        super(_BaseFile, self).append(entry)
+
+    def insert(self, index, entry):
+        """
+        Overridden method to check for duplicates entries, if a user tries to
+        add an entry that is already in the file, the method will raise a
+        ``ValueError`` exception.
+
+        Arguments:
+
+        ``index``
+            index at which the entry should be inserted.
+
+        ``entry``
+            an instance of :class:`~polib._BaseEntry`.
+        """
+        if self.check_for_duplicates and entry in self:
+            raise ValueError('Entry "%s" already exists' % entry.msgid)
+        super(_BaseFile, self).insert(index, entry)
+
+    def metadata_as_entry(self):
+        """
+        Returns the file metadata as a :class:`~polib.POFile` instance.
+        """
+        e = POEntry(msgid='')
+        mdata = self.ordered_metadata()
+        if mdata:
+            strs = []
+            for name, value in mdata:
+                # Strip whitespace off each line in a multi-line entry
+                strs.append('%s: %s' % (name, value))
+            e.msgstr = '\n'.join(strs) + '\n'
+        if self.metadata_is_fuzzy:
+            e.flags.append('fuzzy')
+        return e
+
+    def save(self, fpath=None, repr_method='__unicode__'):
+        """
+        Saves the po file to ``fpath``.
+        If it is an existing file and no ``fpath`` is provided, then the
+        existing file is rewritten with the modified data.
+
+        Keyword arguments:
+
+        ``fpath``
+            string, full or relative path to the file.
+
+        ``repr_method``
+            string, the method to use for output.
+        """
+        if self.fpath is None and fpath is None:
+            raise IOError('You must provide a file path to save() method')
+        contents = getattr(self, repr_method)()
+        if fpath is None:
+            fpath = self.fpath
+        if repr_method == 'to_binary':
+            fhandle = open(fpath, 'wb')
+        else:
+            fhandle = io.open(fpath, 'w', encoding=self.encoding)
+            if not isinstance(contents, text_type):
+                contents = contents.decode(self.encoding)
+        fhandle.write(contents)
+        fhandle.close()
+        # set the file path if not set
+        if self.fpath is None and fpath:
+            self.fpath = fpath
+
+    def find(self, st, by='msgid', include_obsolete_entries=False,
+             msgctxt=False):
+        """
+        Find the entry which msgid (or property identified by the ``by``
+        argument) matches the string ``st``.
+
+        Keyword arguments:
+
+        ``st``
+            string, the string to search for.
+
+        ``by``
+            string, the property to use for comparison (default: ``msgid``).
+
+        ``include_obsolete_entries``
+            boolean, whether to also search in entries that are obsolete.
+
+        ``msgctxt``
+            string, allows specifying a specific message context for the
+            search.
+        """
+        if include_obsolete_entries:
+            entries = self[:]
+        else:
+            entries = [e for e in self if not e.obsolete]
+        for e in entries:
+            if getattr(e, by) == st:
+                if msgctxt is not False and e.msgctxt != msgctxt:
+                    continue
+                return e
+        return None
+
+    def ordered_metadata(self):
+        """
+        Convenience method that returns an ordered version of the metadata
+        dictionary. The return value is list of tuples (metadata name,
+        metadata_value).
+        """
+        # copy the dict first
+        metadata = self.metadata.copy()
+        data_order = [
+            'Project-Id-Version',
+            'Report-Msgid-Bugs-To',
+            'POT-Creation-Date',
+            'PO-Revision-Date',
+            'Last-Translator',
+            'Language-Team',
+            'Language',
+            'MIME-Version',
+            'Content-Type',
+            'Content-Transfer-Encoding',
+            'Plural-Forms'
+        ]
+        ordered_data = []
+        for data in data_order:
+            try:
+                value = metadata.pop(data)
+                ordered_data.append((data, value))
+            except KeyError:
+                pass
+        # the rest of the metadata will be alphabetically ordered since there
+        # are no specs for this AFAIK
+        for data in natural_sort(metadata.keys()):
+            value = metadata[data]
+            ordered_data.append((data, value))
+        return ordered_data
+
+    def to_binary(self):
+        """
+        Return the binary representation of the file.
+        """
+        offsets = []
+        entries = self.translated_entries()
+
+        # the keys are sorted in the .mo file
+        def cmp(_self, other):
+            # msgfmt compares entries with msgctxt if it exists
+            self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
+            other_msgid = other.msgctxt and other.msgctxt or other.msgid
+            if self_msgid > other_msgid:
+                return 1
+            elif self_msgid < other_msgid:
+                return -1
+            else:
+                return 0
+        # add metadata entry
+        entries.sort(key=lambda o: o.msgctxt or o.msgid)
+        mentry = self.metadata_as_entry()
+        #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
+        entries = [mentry] + entries
+        entries_len = len(entries)
+        ids, strs = b(''), b('')
+        for e in entries:
+            # For each string, we need size and file offset.  Each string is
+            # NUL terminated; the NUL does not count into the size.
+            msgid = b('')
+            if e.msgctxt:
+                # Contexts are stored by storing the concatenation of the
+                # context, a <EOT> byte, and the original string
+                msgid = self._encode(e.msgctxt + '\4')
+            if e.msgid_plural:
+                msgstr = []
+                for index in sorted(e.msgstr_plural.keys()):
+                    msgstr.append(e.msgstr_plural[index])
+                msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
+                msgstr = self._encode('\0'.join(msgstr))
+            else:
+                msgid += self._encode(e.msgid)
+                msgstr = self._encode(e.msgstr)
+            offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
+            ids += msgid + b('\0')
+            strs += msgstr + b('\0')
+
+        # The header is 7 32-bit unsigned integers.
+        keystart = 7 * 4 + 16 * entries_len
+        # and the values start after the keys
+        valuestart = keystart + len(ids)
+        koffsets = []
+        voffsets = []
+        # The string table first has the list of keys, then the list of values.
+        # Each entry has first the size of the string, then the file offset.
+        for o1, l1, o2, l2 in offsets:
+            koffsets += [l1, o1 + keystart]
+            voffsets += [l2, o2 + valuestart]
+        offsets = koffsets + voffsets
+
+        output = struct.pack(
+            "Iiiiiii",
+            # Magic number
+            MOFile.MAGIC,
+            # Version
+            0,
+            # number of entries
+            entries_len,
+            # start of key index
+            7 * 4,
+            # start of value index
+            7 * 4 + entries_len * 8,
+            # size and offset of hash table, we don't use hash tables
+            0, keystart
+
+        )
+        if PY3 and sys.version_info.minor > 1:  # python 3.2 or superior
+            output += array.array("i", offsets).tobytes()
+        else:
+            output += array.array("i", offsets).tostring()
+        output += ids
+        output += strs
+        return output
+
+    def _encode(self, mixed):
+        """
+        Encodes the given ``mixed`` argument with the file encoding if and
+        only if it's an unicode string and returns the encoded string.
+        """
+        if isinstance(mixed, text_type):
+            mixed = mixed.encode(self.encoding)
+        return mixed
+# }}}
+# class POFile {{{
+
+
+class POFile(_BaseFile):
+    """
+    Po (or Pot) file reader/writer.
+    This class inherits the :class:`~polib._BaseFile` class and, by extension,
+    the python ``list`` type.
+    """
+
+    def __unicode__(self):
+        """
+        Returns the unicode representation of the po file.
+        """
+        ret, headers = '', self.header.split('\n')
+        for header in headers:
+            if not len(header):
+                ret += "#\n"
+            elif header[:1] in [',', ':']:
+                ret += '#%s\n' % header
+            else:
+                ret += '# %s\n' % header
+
+        if not isinstance(ret, text_type):
+            ret = ret.decode(self.encoding)
+
+        return ret + _BaseFile.__unicode__(self)
+
+    def save_as_mofile(self, fpath):
+        """
+        Saves the binary representation of the file to given ``fpath``.
+
+        Keyword argument:
+
+        ``fpath``
+            string, full or relative path to the mo file.
+        """
+        _BaseFile.save(self, fpath, 'to_binary')
+
+    def percent_translated(self):
+        """
+        Convenience method that returns the percentage of translated
+        messages.
+        """
+        total = len([e for e in self if not e.obsolete])
+        if total == 0:
+            return 100
+        translated = len(self.translated_entries())
+        return int(translated * 100 / float(total))
+
+    def translated_entries(self):
+        """
+        Convenience method that returns the list of translated entries.
+        """
+        return [e for e in self if e.translated()]
+
+    def untranslated_entries(self):
+        """
+        Convenience method that returns the list of untranslated entries.
+        """
+        return [e for e in self if not e.translated() and not e.obsolete
+                and not 'fuzzy' in e.flags]
+
+    def fuzzy_entries(self):
+        """
+        Convenience method that returns the list of fuzzy entries.
+        """
+        return [e for e in self if 'fuzzy' in e.flags]
+
+    def obsolete_entries(self):
+        """
+        Convenience method that returns the list of obsolete entries.
+        """
+        return [e for e in self if e.obsolete]
+
+    def merge(self, refpot):
+        """
+        Convenience method that merges the current pofile with the pot file
+        provided. It behaves exactly as the gettext msgmerge utility:
+
+        * comments of this file will be preserved, but extracted comments and
+          occurrences will be discarded;
+        * any translations or comments in the file will be discarded, however,
+          dot comments and file positions will be preserved;
+        * the fuzzy flags are preserved.
+
+        Keyword argument:
+
+        ``refpot``
+            object POFile, the reference catalog.
+        """
+        # Store entries in dict/set for faster access
+        self_entries = dict((entry.msgid, entry) for entry in self)
+        refpot_msgids = set(entry.msgid for entry in refpot)
+        # Merge entries that are in the refpot
+        for entry in refpot:
+            e = self_entries.get(entry.msgid)
+            if e is None:
+                e = POEntry()
+                self.append(e)
+            e.merge(entry)
+        # ok, now we must "obsolete" entries that are not in the refpot anymore
+        for entry in self:
+            if entry.msgid not in refpot_msgids:
+                entry.obsolete = True
+# }}}
+# class MOFile {{{
+
+
+class MOFile(_BaseFile):
+    """
+    Mo file reader/writer.
+    This class inherits the :class:`~polib._BaseFile` class and, by
+    extension, the python ``list`` type.
+    """
+    MAGIC = 0x950412de
+    MAGIC_SWAPPED = 0xde120495
+
+    def __init__(self, *args, **kwargs):
+        """
+        Constructor, accepts all keywords arguments accepted by
+        :class:`~polib._BaseFile` class.
+        """
+        _BaseFile.__init__(self, *args, **kwargs)
+        self.magic_number = None
+        self.version = 0
+
+    def save_as_pofile(self, fpath):
+        """
+        Saves the mofile as a pofile to ``fpath``.
+
+        Keyword argument:
+
+        ``fpath``
+            string, full or relative path to the file.
+        """
+        _BaseFile.save(self, fpath)
+
+    def save(self, fpath=None):
+        """
+        Saves the mofile to ``fpath``.
+
+        Keyword argument:
+
+        ``fpath``
+            string, full or relative path to the file.
+        """
+        _BaseFile.save(self, fpath, 'to_binary')
+
+    def percent_translated(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return 100
+
+    def translated_entries(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return self
+
+    def untranslated_entries(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return []
+
+    def fuzzy_entries(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return []
+
+    def obsolete_entries(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return []
+# }}}
+# class _BaseEntry {{{
+
+
+class _BaseEntry(object):
+    """
+    Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
+    This class should **not** be instantiated directly.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Constructor, accepts the following keyword arguments:
+
+        ``msgid``
+            string, the entry msgid.
+
+        ``msgstr``
+            string, the entry msgstr.
+
+        ``msgid_plural``
+            string, the entry msgid_plural.
+
+        ``msgstr_plural``
+            list, the entry msgstr_plural lines.
+
+        ``msgctxt``
+            string, the entry context (msgctxt).
+
+        ``obsolete``
+            bool, whether the entry is "obsolete" or not.
+
+        ``encoding``
+            string, the encoding to use, defaults to ``default_encoding``
+            global variable (optional).
+        """
+        self.msgid = kwargs.get('msgid', '')
+        self.msgstr = kwargs.get('msgstr', '')
+        self.msgid_plural = kwargs.get('msgid_plural', '')
+        self.msgstr_plural = kwargs.get('msgstr_plural', {})
+        self.msgctxt = kwargs.get('msgctxt', None)
+        self.obsolete = kwargs.get('obsolete', False)
+        self.encoding = kwargs.get('encoding', default_encoding)
+
+    def __unicode__(self, wrapwidth=78):
+        """
+        Returns the unicode representation of the entry.
+        """
+        if self.obsolete:
+            delflag = '#~ '
+        else:
+            delflag = ''
+        ret = []
+        # write the msgctxt if any
+        if self.msgctxt is not None:
+            ret += self._str_field("msgctxt", delflag, "", self.msgctxt,
+                                   wrapwidth)
+        # write the msgid
+        ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
+        # write the msgid_plural if any
+        if self.msgid_plural:
+            ret += self._str_field("msgid_plural", delflag, "",
+                                   self.msgid_plural, wrapwidth)
+        if self.msgstr_plural:
+            # write the msgstr_plural if any
+            msgstrs = self.msgstr_plural
+            keys = list(msgstrs)
+            keys.sort()
+            for index in keys:
+                msgstr = msgstrs[index]
+                plural_index = '[%s]' % index
+                ret += self._str_field("msgstr", delflag, plural_index, msgstr,
+                                       wrapwidth)
+        else:
+            # otherwise write the msgstr
+            ret += self._str_field("msgstr", delflag, "", self.msgstr,
+                                   wrapwidth)
+        ret.append('')
+        usedirect = True
+        if type(ret[0] != unicode):
+            try:
+                usedirect = False
+                ret = u('\n').join(x.decode('utf-8') for x in ret)
+            except:
+                usedirect = True
+        if usedirect:
+            ret = u('\n').join(ret)
+        return ret
+
+    if PY3:
+        def __str__(self):
+            return self.__unicode__()
+    else:
+        def __str__(self):
+            """
+            Returns the string representation of the entry.
+            """
+            return unicode(self).encode(self.encoding)
+
+    def __eq__(self, other):
+        return str(self) == str(other)
+
+    def _str_field(self, fieldname, delflag, plural_index, field,
+                   wrapwidth=78):
+        lines = field.splitlines(True)
+        if len(lines) > 1:
+            lines = [''] + lines  # start with initial empty line
+        else:
+            escaped_field = escape(field)
+            specialchars_count = 0
+            for c in ['\\', '\n', '\r', '\t', '"']:
+                specialchars_count += field.count(c)
+            # comparison must take into account fieldname length + one space
+            # + 2 quotes (eg. msgid "<string>")
+            flength = len(fieldname) + 3
+            if plural_index:
+                flength += len(plural_index)
+            real_wrapwidth = wrapwidth - flength + specialchars_count
+            if wrapwidth > 0 and len(field) > real_wrapwidth:
+                # Wrap the line but take field name into account
+                lines = [''] + [unescape(item) for item in wrap(
+                    escaped_field,
+                    wrapwidth - 2,  # 2 for quotes ""
+                    drop_whitespace=False,
+                    break_long_words=False
+                )]
+            else:
+                lines = [field]
+        if fieldname.startswith('previous_'):
+            # quick and dirty trick to get the real field name
+            fieldname = fieldname[9:]
+
+        ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
+                                escape(lines.pop(0)))]
+        for line in lines:
+            ret.append('%s"%s"' % (delflag, escape(line)))
+        return ret
+# }}}
+# class POEntry {{{
+
+
+class POEntry(_BaseEntry):
+    """
+    Represents a po file entry.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Constructor, accepts the following keyword arguments:
+
+        ``comment``
+            string, the entry comment.
+
+        ``tcomment``
+            string, the entry translator comment.
+
+        ``occurrences``
+            list, the entry occurrences.
+
+        ``flags``
+            list, the entry flags.
+
+        ``previous_msgctxt``
+            string, the entry previous context.
+
+        ``previous_msgid``
+            string, the entry previous msgid.
+
+        ``previous_msgid_plural``
+            string, the entry previous msgid_plural.
+
+        ``linenum``
+            integer, the line number of the entry
+        """
+        _BaseEntry.__init__(self, *args, **kwargs)
+        self.comment = kwargs.get('comment', '')
+        self.tcomment = kwargs.get('tcomment', '')
+        self.occurrences = kwargs.get('occurrences', [])
+        self.flags = kwargs.get('flags', [])
+        self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
+        self.previous_msgid = kwargs.get('previous_msgid', None)
+        self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
+        self.linenum = kwargs.get('linenum', None)
+
+    def __unicode__(self, wrapwidth=0):
+        """
+        Returns the unicode representation of the entry.
+        """
+        ret = []
+        # comments first, if any (with text wrapping as xgettext does)
+        if self.obsolete:
+            comments = [('tcomment', '# ')]
+        else:
+            comments = [('comment', '#. '), ('tcomment', '# ')]
+        for c in comments:
+            val = getattr(self, c[0])
+            if val:
+                for comment in val.split('\n'):
+                    if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
+                        ret += wrap(
+                            comment,
+                            wrapwidth,
+                            initial_indent=c[1],
+                            subsequent_indent=c[1],
+                            break_long_words=False
+                        )
+                    else:
+                        ret.append('%s%s' % (c[1], comment))
+
+        # occurrences (with text wrapping as xgettext does)
+        if not self.obsolete and self.occurrences:
+            filelist = []
+            for fpath, lineno in self.occurrences:
+                if lineno:
+                    filelist.append('%s:%s' % (fpath, lineno))
+                else:
+                    filelist.append(fpath)
+            filestr = ' '.join(filelist)
+            if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
+                # textwrap split words that contain hyphen, this is not
+                # what we want for filenames, so the dirty hack is to
+                # temporally replace hyphens with a char that a file cannot
+                # contain, like "*"
+                ret += [l.replace('*', '-') for l in wrap(
+                    filestr.replace('-', '*'),
+                    wrapwidth,
+                    initial_indent='#: ',
+                    subsequent_indent='#: ',
+                    break_long_words=False
+                )]
+            else:
+                ret.append('#: ' + filestr)
+
+        # flags (TODO: wrapping ?)
+        if self.flags:
+            ret.append('#, %s' % ', '.join(self.flags))
+
+        # previous context and previous msgid/msgid_plural
+        fields = ['previous_msgctxt', 'previous_msgid',
+                  'previous_msgid_plural']
+        if self.obsolete:
+            prefix = "#~| "
+        else:
+            prefix = "#| "
+        for f in fields:
+            val = getattr(self, f)
+            if val:
+                ret += self._str_field(f, prefix, "", val, wrapwidth)
+
+        ret.append(_BaseEntry.__unicode__(self, wrapwidth))
+        ret = u('\n').join(ret)
+        return ret
+
+    def __cmp__(self, other):
+        """
+        Called by comparison operations if rich comparison is not defined.
+        """
+
+        # First: Obsolete test
+        if self.obsolete != other.obsolete:
+            if self.obsolete:
+                return -1
+            else:
+                return 1
+        # Work on a copy to protect original
+        occ1 = sorted(self.occurrences[:])
+        occ2 = sorted(other.occurrences[:])
+        pos = 0
+        for entry1 in occ1:
+            try:
+                entry2 = occ2[pos]
+            except IndexError:
+                return 1
+            pos = pos + 1
+            if entry1[0] != entry2[0]:
+                if entry1[0] > entry2[0]:
+                    return 1
+                else:
+                    return -1
+            if entry1[1] != entry2[1]:
+                if entry1[1] > entry2[1]:
+                    return 1
+                else:
+                    return -1
+        # Compare msgid_plural if set
+        if self.msgid_plural:
+            if not other.msgid_plural:
+                return 1
+            for pos in self.msgid_plural:
+                if pos not in other.msgid_plural:
+                    return 1
+                if self.msgid_plural[pos] > other.msgid_plural[pos]:
+                    return 1
+                if self.msgid_plural[pos] < other.msgid_plural[pos]:
+                    return -1
+        # Finally: Compare message ID
+        if self.msgid > other.msgid:
+            return 1
+        elif self.msgid < other.msgid:
+            return -1
+        return 0
+
+    def __gt__(self, other):
+        return self.__cmp__(other) > 0
+
+    def __lt__(self, other):
+        return self.__cmp__(other) < 0
+
+    def __ge__(self, other):
+        return self.__cmp__(other) >= 0
+
+    def __le__(self, other):
+        return self.__cmp__(other) <= 0
+
+    def __eq__(self, other):
+        return self.__cmp__(other) == 0
+
+    def __ne__(self, other):
+        return self.__cmp__(other) != 0
+
+    def translated(self):
+        """
+        Returns ``True`` if the entry has been translated or ``False``
+        otherwise.
+        """
+        if self.obsolete or 'fuzzy' in self.flags:
+            return False
+        if self.msgstr != '':
+            return True
+        if self.msgstr_plural:
+            for pos in self.msgstr_plural:
+                if self.msgstr_plural[pos] == '':
+                    return False
+            return True
+        return False
+
+    def merge(self, other):
+        """
+        Merge the current entry with the given pot entry.
+        """
+        self.msgid = other.msgid
+        self.msgctxt = other.msgctxt
+        self.occurrences = other.occurrences
+        self.comment = other.comment
+        fuzzy = 'fuzzy' in self.flags
+        self.flags = other.flags[:]  # clone flags
+        if fuzzy:
+            self.flags.append('fuzzy')
+        self.msgid_plural = other.msgid_plural
+        self.obsolete = other.obsolete
+        self.previous_msgctxt = other.previous_msgctxt
+        self.previous_msgid = other.previous_msgid
+        self.previous_msgid_plural = other.previous_msgid_plural
+        if other.msgstr_plural:
+            for pos in other.msgstr_plural:
+                try:
+                    # keep existing translation at pos if any
+                    self.msgstr_plural[pos]
+                except KeyError:
+                    self.msgstr_plural[pos] = ''
+
+    def __hash__(self):
+        return hash((self.msgid, self.msgstr))
+# }}}
+# class MOEntry {{{
+
+
+class MOEntry(_BaseEntry):
+    """
+    Represents a mo file entry.
+    """
+    def __init__(self, *args, **kwargs):
+        """
+        Constructor, accepts the following keyword arguments,
+        for consistency with :class:`~polib.POEntry`:
+
+        ``comment``
+        ``tcomment``
+        ``occurrences``
+        ``flags``
+        ``previous_msgctxt``
+        ``previous_msgid``
+        ``previous_msgid_plural``
+
+        Note: even though these keyword arguments are accepted,
+        they hold no real meaning in the context of MO files
+        and are simply ignored.
+        """
+        _BaseEntry.__init__(self, *args, **kwargs)
+        self.comment = ''
+        self.tcomment = ''
+        self.occurrences = []
+        self.flags = []
+        self.previous_msgctxt = None
+        self.previous_msgid = None
+        self.previous_msgid_plural = None
+
+    def __hash__(self):
+        return hash((self.msgid, self.msgstr))
+
+# }}}
+# class _POFileParser {{{
+
+
+class _POFileParser(object):
+    """
+    A finite state machine to parse efficiently and correctly po
+    file format.
+    """
+
+    def __init__(self, pofile, *args, **kwargs):
+        """
+        Constructor.
+
+        Keyword arguments:
+
+        ``pofile``
+            string, path to the po file or its content
+
+        ``encoding``
+            string, the encoding to use, defaults to ``default_encoding``
+            global variable (optional).
+
+        ``check_for_duplicates``
+            whether to check for duplicate entries when adding entries to the
+            file (optional, default: ``False``).
+        """
+        enc = kwargs.get('encoding', default_encoding)
+        if _is_file(pofile):
+            try:
+                self.fhandle = io.open(pofile, 'rt', encoding=enc)
+            except LookupError:
+                enc = default_encoding
+                self.fhandle = io.open(pofile, 'rt', encoding=enc)
+        else:
+            self.fhandle = pofile.splitlines()
+
+        klass = kwargs.get('klass')
+        if klass is None:
+            klass = POFile
+        self.instance = klass(
+            pofile=pofile,
+            encoding=enc,
+            check_for_duplicates=kwargs.get('check_for_duplicates', False)
+        )
+        self.transitions = {}
+        self.current_line = 0
+        self.current_entry = POEntry(linenum=self.current_line)
+        self.current_state = 'st'
+        self.current_token = None
+        # two memo flags used in handlers
+        self.msgstr_index = 0
+        self.entry_obsolete = 0
+        # Configure the state machine, by adding transitions.
+        # Signification of symbols:
+        #     * ST: Beginning of the file (start)
+        #     * HE: Header
+        #     * TC: a translation comment
+        #     * GC: a generated comment
+        #     * OC: a file/line occurrence
+        #     * FL: a flags line
+        #     * CT: a message context
+        #     * PC: a previous msgctxt
+        #     * PM: a previous msgid
+        #     * PP: a previous msgid_plural
+        #     * MI: a msgid
+        #     * MP: a msgid plural
+        #     * MS: a msgstr
+        #     * MX: a msgstr plural
+        #     * MC: a msgid or msgstr continuation line
+        all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc',
+               'ms', 'mp', 'mx', 'mi']
+
+        self.add('tc', ['st', 'he'],                                     'he')
+        self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms',
+                        'mp', 'mx', 'mi'],                               'tc')
+        self.add('gc', all,                                              'gc')
+        self.add('oc', all,                                              'oc')
+        self.add('fl', all,                                              'fl')
+        self.add('pc', all,                                              'pc')
+        self.add('pm', all,                                              'pm')
+        self.add('pp', all,                                              'pp')
+        self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm',
+                        'pp', 'ms', 'mx'],                               'ct')
+        self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc',
+                 'pm', 'pp', 'ms', 'mx'],                                'mi')
+        self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'],             'mp')
+        self.add('ms', ['mi', 'mp', 'tc'],                               'ms')
+        self.add('mx', ['mi', 'mx', 'mp', 'tc'],                         'mx')
+        self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
+
+    def parse(self):
+        """
+        Run the state machine, parse the file line by line and call process()
+        with the current matched symbol.
+        """
+
+        keywords = {
+            'msgctxt': 'ct',
+            'msgid': 'mi',
+            'msgstr': 'ms',
+            'msgid_plural': 'mp',
+        }
+        prev_keywords = {
+            'msgid_plural': 'pp',
+            'msgid': 'pm',
+            'msgctxt': 'pc',
+        }
+        tokens = []
+        for line in self.fhandle:
+            self.current_line += 1
+            line = line.strip()
+            if line == '':
+                continue
+
+            tokens = line.split(None, 2)
+            nb_tokens = len(tokens)
+
+            if tokens[0] == '#~|':
+                continue
+
+            if tokens[0] == '#~' and nb_tokens > 1:
+                line = line[3:].strip()
+                tokens = tokens[1:]
+                nb_tokens -= 1
+                self.entry_obsolete = 1
+            else:
+                self.entry_obsolete = 0
+
+            # Take care of keywords like
+            # msgid, msgid_plural, msgctxt & msgstr.
+            if tokens[0] in keywords and nb_tokens > 1:
+                line = line[len(tokens[0]):].lstrip()
+                if re.search(r'([^\\]|^)"', line[1:-1]):
+                    raise IOError('Syntax error in po file %s (line %s): '
+                                  'unescaped double quote found' %
+                                  (self.instance.fpath, self.current_line))
+                self.current_token = line
+                self.process(keywords[tokens[0]])
+                continue
+
+            self.current_token = line
+
+            if tokens[0] == '#:':
+                if nb_tokens <= 1:
+                    continue
+                # we are on a occurrences line
+                self.process('oc')
+
+            elif line[:1] == '"':
+                # we are on a continuation line
+                if re.search(r'([^\\]|^)"', line[1:-1]):
+                    raise IOError('Syntax error in po file %s (line %s): '
+                                  'unescaped double quote found' %
+                                  (self.instance.fpath, self.current_line))
+                self.process('mc')
+
+            elif line[:7] == 'msgstr[':
+                # we are on a msgstr plural
+                self.process('mx')
+
+            elif tokens[0] == '#,':
+                if nb_tokens <= 1:
+                    continue
+                # we are on a flags line
+                self.process('fl')
+
+            elif tokens[0] == '#' or tokens[0].startswith('##'):
+                if line == '#':
+                    line += ' '
+                # we are on a translator comment line
+                self.process('tc')
+
+            elif tokens[0] == '#.':
+                if nb_tokens <= 1:
+                    continue
+                # we are on a generated comment line
+                self.process('gc')
+
+            elif tokens[0] == '#|':
+                if nb_tokens <= 1:
+                    raise IOError('Syntax error in po file %s (line %s)' %
+                                  (self.instance.fpath, self.current_line))
+
+                # Remove the marker and any whitespace right after that.
+                line = line[2:].lstrip()
+                self.current_token = line
+
+                if tokens[1].startswith('"'):
+                    # Continuation of previous metadata.
+                    self.process('mc')
+                    continue
+
+                if nb_tokens == 2:
+                    # Invalid continuation line.
+                    raise IOError('Syntax error in po file %s (line %s): '
+                                  'invalid continuation line' %
+                                  (self.instance.fpath, self.current_line))
+
+                # we are on a "previous translation" comment line,
+                if tokens[1] not in prev_keywords:
+                    # Unknown keyword in previous translation comment.
+                    raise IOError('Syntax error in po file %s (line %s): '
+                                  'unknown keyword %s' %
+                                  (self.instance.fpath, self.current_line,
+                                   tokens[1]))
+
+                # Remove the keyword and any whitespace
+                # between it and the starting quote.
+                line = line[len(tokens[1]):].lstrip()
+                self.current_token = line
+                self.process(prev_keywords[tokens[1]])
+
+            else:
+                raise IOError('Syntax error in po file %s (line %s)' %
+                              (self.instance.fpath, self.current_line))
+
+        if self.current_entry and len(tokens) > 0 and \
+           not tokens[0].startswith('#'):
+            # since entries are added when another entry is found, we must add
+            # the last entry here (only if there are lines). Trailing comments
+            # are ignored
+            self.instance.append(self.current_entry)
+
+        # before returning the instance, check if there's metadata and if
+        # so extract it in a dict
+        metadataentry = self.instance.find('')
+        if metadataentry:  # metadata found
+            # remove the entry
+            self.instance.remove(metadataentry)
+            self.instance.metadata_is_fuzzy = metadataentry.flags
+            key = None
+            for msg in metadataentry.msgstr.splitlines():
+                try:
+                    key, val = msg.split(':', 1)
+                    self.instance.metadata[key] = val.strip()
+                except (ValueError, KeyError):
+                    if key is not None:
+                        self.instance.metadata[key] += '\n' + msg.strip()
+        # close opened file
+        if not isinstance(self.fhandle, list):  # must be file
+            self.fhandle.close()
+        return self.instance
+
+    def add(self, symbol, states, next_state):
+        """
+        Add a transition to the state machine.
+
+        Keywords arguments:
+
+        ``symbol``
+            string, the matched token (two chars symbol).
+
+        ``states``
+            list, a list of states (two chars symbols).
+
+        ``next_state``
+            the next state the fsm will have after the action.
+        """
+        for state in states:
+            action = getattr(self, 'handle_%s' % next_state)
+            self.transitions[(symbol, state)] = (action, next_state)
+
+    def process(self, symbol):
+        """
+        Process the transition corresponding to the current state and the
+        symbol provided.
+
+        Keywords arguments:
+
+        ``symbol``
+            string, the matched token (two chars symbol).
+
+        ``linenum``
+            integer, the current line number of the parsed file.
+        """
+        try:
+            (action, state) = self.transitions[(symbol, self.current_state)]
+            if action():
+                self.current_state = state
+        except Exception:
+            raise IOError('Syntax error in po file (line %s)' %
+                          self.current_line)
+
+    # state handlers
+
+    def handle_he(self):
+        """Handle a header comment."""
+        if self.instance.header != '':
+            self.instance.header += '\n'
+        self.instance.header += self.current_token[2:]
+        return 1
+
+    def handle_tc(self):
+        """Handle a translator comment."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        if self.current_entry.tcomment != '':
+            self.current_entry.tcomment += '\n'
+        tcomment = self.current_token.lstrip('#')
+        if tcomment.startswith(' '):
+            tcomment = tcomment[1:]
+        self.current_entry.tcomment += tcomment
+        return True
+
+    def handle_gc(self):
+        """Handle a generated comment."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        if self.current_entry.comment != '':
+            self.current_entry.comment += '\n'
+        self.current_entry.comment += self.current_token[3:]
+        return True
+
+    def handle_oc(self):
+        """Handle a file:num occurrence."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        occurrences = self.current_token[3:].split()
+        for occurrence in occurrences:
+            if occurrence != '':
+                try:
+                    fil, line = occurrence.rsplit(':', 1)
+                    if not line.isdigit():
+                        fil = fil + line
+                        line = ''
+                    self.current_entry.occurrences.append((fil, line))
+                except (ValueError, AttributeError):
+                    self.current_entry.occurrences.append((occurrence, ''))
+        return True
+
+    def handle_fl(self):
+        """Handle a flags line."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        self.current_entry.flags += [c.strip() for c in
+                                     self.current_token[3:].split(',')]
+        return True
+
+    def handle_pp(self):
+        """Handle a previous msgid_plural line."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        self.current_entry.previous_msgid_plural = \
+            unescape(self.current_token[1:-1])
+        return True
+
+    def handle_pm(self):
+        """Handle a previous msgid line."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        self.current_entry.previous_msgid = \
+            unescape(self.current_token[1:-1])
+        return True
+
+    def handle_pc(self):
+        """Handle a previous msgctxt line."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        self.current_entry.previous_msgctxt = \
+            unescape(self.current_token[1:-1])
+        return True
+
+    def handle_ct(self):
+        """Handle a msgctxt."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        self.current_entry.msgctxt = unescape(self.current_token[1:-1])
+        return True
+
+    def handle_mi(self):
+        """Handle a msgid."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        self.current_entry.obsolete = self.entry_obsolete
+        self.current_entry.msgid = unescape(self.current_token[1:-1])
+        return True
+
+    def handle_mp(self):
+        """Handle a msgid plural."""
+        self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
+        return True
+
+    def handle_ms(self):
+        """Handle a msgstr."""
+        self.current_entry.msgstr = unescape(self.current_token[1:-1])
+        return True
+
+    def handle_mx(self):
+        """Handle a msgstr plural."""
+        index = self.current_token[7]
+        value = self.current_token[self.current_token.find('"') + 1:-1]
+        self.current_entry.msgstr_plural[int(index)] = unescape(value)
+        self.msgstr_index = int(index)
+        return True
+
+    def handle_mc(self):
+        """Handle a msgid or msgstr continuation line."""
+        token = unescape(self.current_token[1:-1])
+        if self.current_state == 'ct':
+            self.current_entry.msgctxt += token
+        elif self.current_state == 'mi':
+            self.current_entry.msgid += token
+        elif self.current_state == 'mp':
+            self.current_entry.msgid_plural += token
+        elif self.current_state == 'ms':
+            self.current_entry.msgstr += token
+        elif self.current_state == 'mx':
+            self.current_entry.msgstr_plural[self.msgstr_index] += token
+        elif self.current_state == 'pp':
+            self.current_entry.previous_msgid_plural += token
+        elif self.current_state == 'pm':
+            self.current_entry.previous_msgid += token
+        elif self.current_state == 'pc':
+            self.current_entry.previous_msgctxt += token
+        # don't change the current state
+        return False
+# }}}
+# class _MOFileParser {{{
+
+
+class _MOFileParser(object):
+    """
+    A class to parse binary mo files.
+    """
+
+    def __init__(self, mofile, *args, **kwargs):
+        """
+        Constructor.
+
+        Keyword arguments:
+
+        ``mofile``
+            string, path to the mo file or its content
+
+        ``encoding``
+            string, the encoding to use, defaults to ``default_encoding``
+            global variable (optional).
+
+        ``check_for_duplicates``
+            whether to check for duplicate entries when adding entries to the
+            file (optional, default: ``False``).
+        """
+        self.fhandle = open(mofile, 'rb')
+
+        klass = kwargs.get('klass')
+        if klass is None:
+            klass = MOFile
+        self.instance = klass(
+            fpath=mofile,
+            encoding=kwargs.get('encoding', default_encoding),
+            check_for_duplicates=kwargs.get('check_for_duplicates', False)
+        )
+
+    def __del__(self):
+        """
+        Make sure the file is closed, this prevents warnings on unclosed file
+        when running tests with python >= 3.2.
+        """
+        if self.fhandle:
+            self.fhandle.close()
+
+    def parse(self):
+        """
+        Build the instance with the file handle provided in the
+        constructor.
+        """
+        # parse magic number
+        magic_number = self._readbinary('<I', 4)
+        if magic_number == MOFile.MAGIC:
+            ii = '<II'
+        elif magic_number == MOFile.MAGIC_SWAPPED:
+            ii = '>II'
+        else:
+            raise IOError('Invalid mo file, magic number is incorrect !')
+        self.instance.magic_number = magic_number
+        # parse the version number and the number of strings
+        version, numofstrings = self._readbinary(ii, 8)
+        # from MO file format specs: "A program seeing an unexpected major
+        # revision number should stop reading the MO file entirely"
+        if version not in (0, 1):
+            raise IOError('Invalid mo file, unexpected major revision number')
+        self.instance.version = version
+        # original strings and translation strings hash table offset
+        msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
+        # move to msgid hash table and read length and offset of msgids
+        self.fhandle.seek(msgids_hash_offset)
+        msgids_index = []
+        for i in range(numofstrings):
+            msgids_index.append(self._readbinary(ii, 8))
+        # move to msgstr hash table and read length and offset of msgstrs
+        self.fhandle.seek(msgstrs_hash_offset)
+        msgstrs_index = []
+        for i in range(numofstrings):
+            msgstrs_index.append(self._readbinary(ii, 8))
+        # build entries
+        encoding = self.instance.encoding
+        for i in range(numofstrings):
+            self.fhandle.seek(msgids_index[i][1])
+            msgid = self.fhandle.read(msgids_index[i][0])
+
+            self.fhandle.seek(msgstrs_index[i][1])
+            msgstr = self.fhandle.read(msgstrs_index[i][0])
+            if i == 0 and not msgid:  # metadata
+                raw_metadata, metadata = msgstr.split(b('\n')), {}
+                for line in raw_metadata:
+                    tokens = line.split(b(':'), 1)
+                    if tokens[0] != b(''):
+                        try:
+                            k = tokens[0].decode(encoding)
+                            v = tokens[1].decode(encoding)
+                            metadata[k] = v.strip()
+                        except IndexError:
+                            metadata[k] = u('')
+                self.instance.metadata = metadata
+                continue
+            # test if we have a plural entry
+            msgid_tokens = msgid.split(b('\0'))
+            if len(msgid_tokens) > 1:
+                entry = self._build_entry(
+                    msgid=msgid_tokens[0],
+                    msgid_plural=msgid_tokens[1],
+                    msgstr_plural=dict((k, v) for k, v in
+                                       enumerate(msgstr.split(b('\0'))))
+                )
+            else:
+                entry = self._build_entry(msgid=msgid, msgstr=msgstr)
+            self.instance.append(entry)
+        # close opened file
+        self.fhandle.close()
+        return self.instance
+
+    def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
+                     msgstr_plural=None):
+        msgctxt_msgid = msgid.split(b('\x04'))
+        encoding = self.instance.encoding
+        if len(msgctxt_msgid) > 1:
+            kwargs = {
+                'msgctxt': msgctxt_msgid[0].decode(encoding),
+                'msgid': msgctxt_msgid[1].decode(encoding),
+            }
+        else:
+            kwargs = {'msgid': msgid.decode(encoding)}
+        if msgstr:
+            kwargs['msgstr'] = msgstr.decode(encoding)
+        if msgid_plural:
+            kwargs['msgid_plural'] = msgid_plural.decode(encoding)
+        if msgstr_plural:
+            for k in msgstr_plural:
+                msgstr_plural[k] = msgstr_plural[k].decode(encoding)
+            kwargs['msgstr_plural'] = msgstr_plural
+        return MOEntry(**kwargs)
+
+    def _readbinary(self, fmt, numbytes):
+        """
+        Private method that unpack n bytes of data using format <fmt>.
+        It returns a tuple or a mixed value if the tuple length is 1.
+        """
+        bytes = self.fhandle.read(numbytes)
+        tup = struct.unpack(fmt, bytes)
+        if len(tup) == 1:
+            return tup[0]
+        return tup
+# }}}
+# class TextWrapper {{{
+
+
+class TextWrapper(textwrap.TextWrapper):
+    """
+    Subclass of textwrap.TextWrapper that backport the
+    drop_whitespace option.
+    """
+    def __init__(self, *args, **kwargs):
+        drop_whitespace = kwargs.pop('drop_whitespace', True)
+        textwrap.TextWrapper.__init__(self, *args, **kwargs)
+        self.drop_whitespace = drop_whitespace
+
+    def _wrap_chunks(self, chunks):
+        """_wrap_chunks(chunks : [string]) -> [string]
+
+        Wrap a sequence of text chunks and return a list of lines of
+        length 'self.width' or less.  (If 'break_long_words' is false,
+        some lines may be longer than this.)  Chunks correspond roughly
+        to words and the whitespace between them: each chunk is
+        indivisible (modulo 'break_long_words'), but a line break can
+        come between any two chunks.  Chunks should not have internal
+        whitespace; ie. a chunk is either all whitespace or a "word".
+        Whitespace chunks will be removed from the beginning and end of
+        lines, but apart from that whitespace is preserved.
+        """
+        lines = []
+        if self.width <= 0:
+            raise ValueError("invalid width %r (must be > 0)" % self.width)
+
+        # Arrange in reverse order so items can be efficiently popped
+        # from a stack of chucks.
+        chunks.reverse()
+
+        while chunks:
+
+            # Start the list of chunks that will make up the current line.
+            # cur_len is just the length of all the chunks in cur_line.
+            cur_line = []
+            cur_len = 0
+
+            # Figure out which static string will prefix this line.
+            if lines:
+                indent = self.subsequent_indent
+            else:
+                indent = self.initial_indent
+
+            # Maximum width for this line.
+            width = self.width - len(indent)
+
+            # First chunk on line is whitespace -- drop it, unless this
+            # is the very beginning of the text (ie. no lines started yet).
+            if self.drop_whitespace and chunks[-1].strip() == '' and lines:
+                del chunks[-1]
+
+            while chunks:
+                l = len(chunks[-1])
+
+                # Can at least squeeze this chunk onto the current line.
+                if cur_len + l <= width:
+                    cur_line.append(chunks.pop())
+                    cur_len += l
+
+                # Nope, this line is full.
+                else:
+                    break
+
+            # The current line is full, and the next chunk is too big to
+            # fit on *any* line (not just this one).
+            if chunks and len(chunks[-1]) > width:
+                self._handle_long_word(chunks, cur_line, cur_len, width)
+
+            # If the last chunk on this line is all whitespace, drop it.
+            if self.drop_whitespace and cur_line and not cur_line[-1].strip():
+                del cur_line[-1]
+
+            # Convert current line back to a string and store it in list
+            # of all lines (return value).
+            if cur_line:
+                lines.append(indent + ''.join(cur_line))
+
+        return lines
+# }}}
+# function wrap() {{{
+
+
+def wrap(text, width=70, **kwargs):
+    """
+    Wrap a single paragraph of text, returning a list of wrapped lines.
+    """
+    if sys.version_info < (2, 6):
+        return TextWrapper(width=width, **kwargs).wrap(text)
+    return textwrap.wrap(text, width=width, **kwargs)
+
+# }}}
+
+def genKeyId(inkey):
+    crc = binascii.crc32(bytes(inkey)) & 0xffffffff
+    # Use simple ASCII characters, exclude I, l, 1 and O, 0 to avoid confusing IDs
+    symbols = "ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz23456789";
+    outkey = ""
+    for keyind in range(0, 5):
+        outkey += symbols[(crc & 63) % len(symbols)];
+        crc >>= 6;
+    return outkey
diff --git a/solenv/bin/update-for-gettext b/solenv/bin/update-for-gettext
new file mode 100644
index 000000000000..1234b248490f
--- /dev/null
+++ b/solenv/bin/update-for-gettext
@@ -0,0 +1,76 @@
+#!/usr/bin/python
+
+import binascii
+import polib
+from os import listdir, walk, remove
+from os.path import isdir, join
+
+import sys
+
+if len(sys.argv) < 2:
+    print(" Syntax: update-for-gettext path/to/dir/of/languages")
+    sys.exit(2)
+
+langs = [f for f in listdir(sys.argv[1]) if isdir(join(sys.argv[1], f))]
+
+uiline = False
+
+for lang in langs:
+    path = join(sys.argv[1],lang)
+    modules = [f for f in listdir(path) if isdir(join(path, f))]
+    for module in modules:
+        subpath = join(path, module)
+        print >> sys.stderr, "module is", subpath, lang, module
+        messages = None
+        npos = 0
+        for dirpath, dirname, filenames in walk(subpath):
+            for filename in filenames:
+                ipath = join(dirpath, filename)
+                print >> sys.stderr, "file is", ipath
+                po = polib.pofile(ipath)
+                if len(po) != 0:
+                    samplefile = po[0].occurrences[0][0]
+                    if samplefile.endswith(".src") or samplefile.endswith(".ui"):
+                        if npos == 0:
+                            messages = po
+                        else:
+                            for entry in po:
+                                messages.append(entry)
+                        npos = npos + 1
+                        remove(ipath)
+        if npos > 0:
+            middle = 0
+            for entry in messages:
+                if not len(entry.occurrences):
+                    continue
+                location = entry.occurrences[0][0]
+                if location.endswith(".ui"):
+                    uiline = True
+                else:
+                    uiline = False
+                lines = entry.msgctxt.split('\n')
+                if uiline:
+                    widgetid = lines[1]
+                    typeid = lines[2]
+                    entry.msgctxt = location[:-3] + "|" + widgetid
+                    if typeid == "tooltip_text":
+                        entry.msgctxt = entry.msgctxt + "|" + typeid
+                    if entry.msgctxt == 'calloutpage|position' and entry.msgid == 'Middle':
+                        middle = middle + 1
+                        if middle == 2:
+                            entry.msgid = "Center"
+                else:
+                    ctxline = lines[1]
+                    if (ctxline.endswith("+RID_SC_FUNC_DESCRIPTIONS_START")):
+                        ctxline = ctxline[:-len("+RID_SC_FUNC_DESCRIPTIONS_START")]
+                    elif (ctxline.endswith("+RID_GLOBSTR_OFFSET")):
+                        ctxline = ctxline[:-len("+RID_GLOBSTR_OFFSET")]
+                    entry.msgctxt = ctxline
+                comments = entry.comment.split('\n')
+                keyid = entry.msgctxt + '|' + entry.msgid
+                comments[-1] = polib.genKeyId(keyid.encode('utf-8'))
+                entry.comment = "\n".join(comments)
+            if lang != "templates":
+                messages.save(join(subpath, "messages.po"))
+            else:
+                messages.save(join(subpath, "messages.pot"))


More information about the Libreoffice-commits mailing list