[Libreoffice-commits] core.git: 2 commits - i18nlangtag/Library_i18nlangtag.mk i18nlangtag/source
Eike Rathke
erack at redhat.com
Tue Sep 3 05:30:37 PDT 2013
i18nlangtag/Library_i18nlangtag.mk | 1
i18nlangtag/source/languagetag/languagetag.cxx | 91 +++++++++++++++++--------
2 files changed, 64 insertions(+), 28 deletions(-)
New commits:
commit 7c032aa60eaccde88a9064a80bb69fe8076a040b
Author: Eike Rathke <erack at redhat.com>
Date: Tue Sep 3 14:17:54 2013 +0200
resolve all known fallbacks
Needed for rsc during build time to not pull in liblangtag and its data.
Change-Id: I1d4dd32b04ed93ec75720132a30b66ef63fec179
diff --git a/i18nlangtag/Library_i18nlangtag.mk b/i18nlangtag/Library_i18nlangtag.mk
index fd1548b..30b6e01 100644
--- a/i18nlangtag/Library_i18nlangtag.mk
+++ b/i18nlangtag/Library_i18nlangtag.mk
@@ -21,6 +21,7 @@ $(eval $(call gb_Library_use_libraries,i18nlangtag,\
))
$(eval $(call gb_Library_use_externals,i18nlangtag,\
+ boost_headers \
icu_headers \
icuuc \
))
diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx
index 65236ce..de48d06 100644
--- a/i18nlangtag/source/languagetag/languagetag.cxx
+++ b/i18nlangtag/source/languagetag/languagetag.cxx
@@ -16,6 +16,7 @@
#include <osl/file.hxx>
#include <rtl/instance.hxx>
#include <rtl/locale.h>
+#include <boost/unordered_set.hpp>
//#define erDEBUG
@@ -46,13 +47,44 @@ struct myLtError
~myLtError() { if (p) lt_error_unref( p); }
};
-
// "statics" to be returned as const reference to an empty locale and string.
namespace {
struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {};
struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {};
}
+typedef ::boost::unordered_set< OUString, OUStringHash > KnownTagSet;
+namespace {
+struct theKnowns : public rtl::Static< KnownTagSet, theKnowns > {};
+struct theMutex : public rtl::Static< osl::Mutex, theMutex > {};
+}
+
+static const KnownTagSet & getKnowns()
+{
+ KnownTagSet & rKnowns = theKnowns::get();
+ if (rKnowns.empty())
+ {
+ osl::MutexGuard aGuard( theMutex::get());
+ if (rKnowns.empty())
+ {
+ ::std::vector< MsLangId::LanguagetagMapping > aDefined( MsLangId::getDefinedLanguagetags());
+ for (::std::vector< MsLangId::LanguagetagMapping >::const_iterator it( aDefined.begin());
+ it != aDefined.end(); ++it)
+ {
+ // Do not use the BCP47 string here to initialize the
+ // LanguageTag because then canonicalize() would call this
+ // getKnowns() again..
+ ::std::vector< OUString > aFallbacks( LanguageTag( (*it).mnLang).getFallbackStrings());
+ for (::std::vector< OUString >::const_iterator fb( aFallbacks.begin()); fb != aFallbacks.end(); ++fb)
+ {
+ rKnowns.insert( *fb);
+ }
+ }
+ }
+ }
+ return rKnowns;
+}
+
/** A reference holder for liblangtag data de/initialization, one static
instance. Currently implemented such that the first "ref" inits and dtor
@@ -448,7 +480,7 @@ bool LanguageTag::canonicalize()
// Now this is getting funny.. we only have some BCP47 string
// and want to determine if parsing it would be possible
// without using liblangtag just to see if it is a simple known
- // locale.
+ // locale or could fall back to one.
OUString aLanguage, aScript, aCountry, aVariants;
Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
if (eExt != EXTRACTED_NONE)
@@ -512,6 +544,12 @@ bool LanguageTag::canonicalize()
}
if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
meIsLiblangtagNeeded = DECISION_NO; // known locale
+ else
+ {
+ const KnownTagSet& rKnowns = getKnowns();
+ if (rKnowns.find( maBcp47) != rKnowns.end())
+ meIsLiblangtagNeeded = DECISION_NO; // known fallback
+ }
}
}
if (bTemporaryLocale)
commit c318f19c492f76e3b7d557257b3706f05b6fed62
Author: Eike Rathke <erack at redhat.com>
Date: Tue Sep 3 11:31:39 2013 +0200
simplified simpleExtract() and made some cases work
Change-Id: I5d39a020a4bb6164c0d7695d2f2e43785869b345
diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx
index 22f82fe..65236ce 100644
--- a/i18nlangtag/source/languagetag/languagetag.cxx
+++ b/i18nlangtag/source/languagetag/languagetag.cxx
@@ -1302,9 +1302,9 @@ LanguageTag::Extraction LanguageTag::simpleExtract( const OUString& rBcp47,
Extraction eRet = EXTRACTED_NONE;
const sal_Int32 nLen = rBcp47.getLength();
const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
- const sal_Int32 nHyph2 = (nHyph1 < 0 ? -1 : rBcp47.indexOf( '-', nHyph1 + 1));
- const sal_Int32 nHyph3 = (nHyph2 < 0 ? -1 : rBcp47.indexOf( '-', nHyph2 + 1));
- const sal_Int32 nHyph4 = (nHyph3 < 0 ? -1 : rBcp47.indexOf( '-', nHyph3 + 1));
+ sal_Int32 nHyph2 = (nHyph1 < 0 ? -1 : rBcp47.indexOf( '-', nHyph1 + 1));
+ sal_Int32 nHyph3 = (nHyph2 < 0 ? -1 : rBcp47.indexOf( '-', nHyph2 + 1));
+ sal_Int32 nHyph4 = (nHyph3 < 0 ? -1 : rBcp47.indexOf( '-', nHyph3 + 1));
if (nLen == 1 && rBcp47[0] == '*') // * the dreaded jolly joker
{
// It's f*d up but we need to recognize this.
@@ -1362,45 +1362,42 @@ LanguageTag::Extraction LanguageTag::simpleExtract( const OUString& rBcp47,
else if ( (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 10 && nLen >= 15) // ll-Ssss-CC-vvvv[vvvv][-...]
|| (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 11 && nLen >= 16)) // lll-Ssss-CC-vvvv[vvvv][-...]
{
- if (nHyph4 < 0 || (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9))
+ if (nHyph4 < 0)
+ nHyph4 = rBcp47.getLength();
+ if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)
{
+ rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
+ rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
+ rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
rVariants = rBcp47.copy( nHyph3 + 1);
- if (nHyph4 < 0 && (rVariants.getLength() < 4 || 8 < rVariants.getLength()))
- {
- rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
- rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
- rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
- eRet = EXTRACTED_LV;
- }
+ eRet = EXTRACTED_LV;
}
}
else if ( (nHyph1 == 2 && nHyph2 == 5 && nLen >= 10) // ll-CC-vvvv[vvvv][-...]
|| (nHyph1 == 3 && nHyph2 == 6 && nLen >= 11)) // lll-CC-vvvv[vvvv][-...]
{
- if (nHyph3 < 0 || (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9))
+ if (nHyph3 < 0)
+ nHyph3 = rBcp47.getLength();
+ if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)
{
+ rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
+ rScript = OUString();
+ rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
rVariants = rBcp47.copy( nHyph2 + 1);
- if (nHyph3 < 0 && (rVariants.getLength() < 4 || 8 < rVariants.getLength()))
- {
- rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
- rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
- rScript = OUString();
- eRet = EXTRACTED_LV;
- }
+ eRet = EXTRACTED_LV;
}
}
else if ( (nHyph1 == 2 && nLen >= 8) // ll-vvvvv[vvv][-...]
|| (nHyph1 == 3 && nLen >= 9)) // lll-vvvvv[vvv][-...]
{
- if (nHyph2 < 0 || (nHyph2 - nHyph1 > 5 && nHyph2 - nHyph1 <= 9))
+ if (nHyph2 < 0)
+ nHyph2 = rBcp47.getLength();
+ if (nHyph2 - nHyph1 > 5 && nHyph2 - nHyph1 <= 9)
{
+ rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
+ rScript = rCountry = OUString();
rVariants = rBcp47.copy( nHyph1 + 1);
- if (nHyph2 < 0 && (rVariants.getLength() < 5 || 8 < rVariants.getLength()))
- {
- rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
- rScript = rCountry = OUString();
- eRet = EXTRACTED_LV;
- }
+ eRet = EXTRACTED_LV;
}
}
if (eRet == EXTRACTED_NONE)
More information about the Libreoffice-commits
mailing list