[Libreoffice-commits] core.git: i18npool/source

László Németh nemeth at numbertext.org
Sat Mar 31 19:13:09 UTC 2018


 i18npool/source/collator/data/hu_charset.txt |   56 ++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 9 deletions(-)

New commits:
commit 34ae19b1e9ede8bdcf56e393f68a7f875e32a068
Author: László Németh <nemeth at numbertext.org>
Date:   Sat Mar 31 16:08:07 2018 +0200

    tdf#116666 Hungarian collation: casing and equality fixes
    
    Casing fixes: “CCS” sorted as “CSCS”, not “cscs”.
    “Ccs” and “CCS” are capitalized versions of the simplified
    double consonant “cs”, but “CCs” is an abbreviation of words
    beginning with “C” and “Cs” (similar to “AkH.”, “MHSz.”) etc.
    
    To avoid the comparison result “equal” we set a precedence
    between the simplified and compound-like long forms, too.
    For example, “ésszerű” (old orthography before 2015) and
    “észszerű” (not “észszerű”, “ésszerű”), or “mennyelv” and
    “menynyelv” (words with different meanings) sorted as
    “észszerű” and “észSzerű”, also “menynyelv” and “menyNyelv”.
    
    Change-Id: If31c97262bc74429b514ede43a0384de80fe8ac5
    Reviewed-on: https://gerrit.libreoffice.org/52194
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Andras Timar <andras.timar at collabora.com>

diff --git a/i18npool/source/collator/data/hu_charset.txt b/i18npool/source/collator/data/hu_charset.txt
index 2b7288e7d8a6..2b6677e0dc1b 100644
--- a/i18npool/source/collator/data/hu_charset.txt
+++ b/i18npool/source/collator/data/hu_charset.txt
@@ -35,12 +35,50 @@
 & u < ü <<< Ü <<< ű <<< Ű
 & z < zs <<< Zs <<< ZS
 
-& cs <<< ccs / cs <<< Ccs / cs <<< CCs / cs <<< CCS / cs
-& dz <<< ddz / dz <<< Ddz / dz <<< DDz / dz <<< DDZ / dz
-& dzs <<< ddzs / dzs <<< Ddzs / dzs <<< DDzs / dzs <<< DDZs / dzs <<< DDZS / dzs
-& gy <<< ggy / gy <<< Ggy / gy <<< GGy / gy <<< GGY / gy
-& ly <<< lly / ly <<< Lly / ly <<< LLy / ly <<< LLY / ly
-& ny <<< nny / ny <<< Nny / ny <<< NNy / ny <<< NNY / ny
-& sz <<< ssz / sz <<< Ssz / sz <<< SSz / sz <<< SSZ / sz
-& ty <<< tty / ty <<< Tty / ty <<< TTy / ty <<< TTY / ty
-& zs <<< zzs / zs <<< Zzs / zs <<< ZZs / zs <<< ZZS / zs
+# We expand simplified double consonants, for example,
+# "ccs" sorted as "cscs". This is still not an error-free
+# method, but better, than the old method, because now
+# it's *possible* to fix all errors in a semi-automatic way,
+# using soft hyphens. Inserting them in bad or ambiguous
+# character positions will fix all automatic sortings later,
+# for example, “arccsont” -> “arc|csont” (“|” signs soft
+# hyphen position).
+
+# Note: Ccs and CCS are capitalized versions of the simplified
+# double consonant “cs”, but CCs is an abbreviation of words
+# beginning with C and Cs (similar to “AkH.”, “MHSz.”).
+
+# To avoid the comparison result “equal” we set a precedence
+# between the simplified and compound-like long forms, too.
+# For example, “ésszerű” (old orthography before 2015) and
+# “észszerű” (not “észszerű”, “ésszerű”), or “mennyelv” and
+# “menynyelv” (words with different meanings), sorted as
+# “észszerű” and “észSzerű”, also “menynyelv” and “menyNyelv”.
+
+& cs <<< ccs / cs <<< cscs / Cs
+& Cs <<< Ccs / cs <<< CsCs / Cs
+& CS <<< CCS / Cs <<< CSCS / CS
+& dz <<< ddz / dz <<< dzdz / Dz
+& Dz <<< Ddz / dz <<< DzDz / Dz
+& DZ <<< DDZ / Dz <<< DZDZ / DZ
+& dzs <<< ddzs / dzs <<< dzsdzs / Dzs
+& Dzs <<< Ddzs / dzs <<< DzsDzs / Dzs
+& DZS <<< DDZS / DZs <<< DZSDZS / DZS
+& gy <<< ggy / gy <<< gygy / Gy
+& Gy <<< Ggy / gy <<< GyGy / Gy
+& GY <<< GGY / Gy <<< GYGY / GY
+& ly <<< lly / ly <<< lyly / Ly
+& Ly <<< Lly / ly <<< LyLy / Ly
+& LY <<< LLY / Ly <<< LYLY / LY
+& ny <<< nny / ny <<< nyny / Ny
+& Ny <<< Nny / ny <<< NyNy / Ny
+& NY <<< NNY / Ny <<< NYNY / NY
+& sz <<< ssz / sz <<< szsz / Sz
+& Sz <<< Ssz / sz <<< SzSz / Sz
+& SZ <<< SSZ / Sz <<< SZSZ / SZ
+& ty <<< tty / ty <<< tyty / Ty
+& Ty <<< Tty / ty <<< TyTy / Ty
+& TY <<< TTY / Ty <<< TYTY / TY
+& zs <<< zzs / zs <<< zszs / Zs
+& Zs <<< Zzs / zs <<< ZsZs / Zs
+& ZS <<< ZZS / Zs <<< ZSZS / ZS


More information about the Libreoffice-commits mailing list