[uim-commit] r668 - trunk/uim
ekato at freedesktop.org
ekato at freedesktop.org
Thu Feb 17 20:56:16 PST 2005
Author: ekato
Date: 2005-02-17 20:56:13 -0800 (Thu, 17 Feb 2005)
New Revision: 668
Added:
trunk/uim/uim-encoding.h
Modified:
trunk/uim/uim-func.c
Log:
* uim/uim-encoding.h : New file. Table for codeset aliases.
* uim/uim-func.c (check_encoding_equivalence) : New function to
check whether tocode and fromcode of iconv_open(3) are
equivalent.
(uim_iconv_is_convertible) : Use check_encoding_equivalence()
instead of strcmp(). Use uim_iconv_open() instead of
iconv_open(3).
(uim_get_encoding_alias) : New function. Get list of aliases of
supplied encoding.
(uim_iconv_open) : New function. Wrapper for iconv_open(3) to
avoid system dependency.
(uim_iconv_create) : Use check_encoding_equivalence() instead of
strcmp().
(uim_iconv_code_conv) : If ic is NULL, return immediately.
Added: trunk/uim/uim-encoding.h
===================================================================
--- trunk/uim/uim-encoding.h 2005-02-17 07:26:41 UTC (rev 667)
+++ trunk/uim/uim-encoding.h 2005-02-18 04:56:13 UTC (rev 668)
@@ -0,0 +1,331 @@
+/*
+
+ Copyright (c) 2005 uim Project http://uim.freedesktop.org/
+
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ 3. Neither the name of authors nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ SUCH DAMAGE.
+
+*/
+
+#ifndef _uim_encoding_h_included_
+#define _uim_encoding_h_included_
+
+/*
+ * Mostly from http://www.openi18n.org/subgroups/sa/locnameguide/final/CodesetAliasTable.html
+ */
+static const char *alias_us[] = {
+ "US-ASCII", "ANSI_X3.4-1968", "ASCII", "CP367", "cp367", "IBM367",
+ "ISO-IR-6", "ISO646-US", "ISO-646-US", "ANSI_X3.4-1986", "iso-ir-6",
+ "ISO_646.irv:1991", "us", "csASCII", "646",
+ NULL
+};
+static const char *alias_big5[] = {
+ "Big5", "TCA-BIG5", "BIG5", "BIG5-CP950", "csBig5", "big5",
+ NULL
+};
+static const char *alias_hkscs[] = {
+ "Big5-HKSCS", "HKSCS-BIG5", "BIG5-HKSCS", "BIG5HKSCS", "big5hk",
+ "big5-hkscs:unicode 3.0",
+ NULL
+};
+static const char *alias_eucjp[] = {
+ "EUC-JP", "eucJP", "ujis",
+ "Extended_UNIX_Code_Packed_Format_for_Japanese", "euc-jp",
+ "csEUCPkdFmtJapanese",
+ NULL
+};
+static const char *alias_utf8[] = {
+ "UTF-8", "utf-8", "unicode-1-utf-8", "utf8", "UTF8",
+ NULL
+};
+static const char *alias_euckr[] = {
+ "EUC-KR", "csEUCKR", "5601", "ksc-5601", "ksc-5601-1987",
+ "ksc-5601_1987", "ksc5601",
+ NULL
+};
+static const char *alias_euctw[] = {
+ "EUC-TW", "cns11643", "ibm-euctw",
+ NULL
+};
+static const char *alias_gb18030[] = {
+ "GB-18030", "GB18030", "ibm1392", "ibm-1392", "gb18030-2000",
+ NULL
+};
+static const char *alias_gb2312[] = {
+ "GB2312", "GB-2312", "csGB2312", "EUC_CN", "gb2312-80",
+ "gb2312-1980", "euccn", "euc-cn",
+ NULL
+};
+static const char *alias_gbk[] = {
+ "GB-K", "GBK",
+ NULL
+};
+static const char *alias_iso88591[] = {
+ "ISO-8859-1", "ISO-IR-100", "ISO_8859-1:1987", "ISO_8859-1",
+ "LATIN1", "L1", "latin1", "l1", "IBM819", "CP819", "csISOLatin1"
+ "819", "iso8859-1", "8859-1", "iso8859_1", "iso_8859_1",
+ NULL
+};
+static const char *alias_iso88592[] = {
+ "ISO-8859-2", "ISO-IR-101", "ISO_8859-2:1987", "ISO_8859-2",
+ "LATIN2", "L2", "csISOLatin2", "912", "cp912", "ibm-912", "ibm912",
+ "iso8859-2", "8859-2", "iso8859_2", "iso_8859_2",
+ NULL
+};
+
+static const char *alias_iso88593[] = {
+ "ISO-8859-3", "ISO-IR-109", "ISO_8859-3:1988", "ISO_8859-3",
+ "LATIN3", "L3", "csISOLatin3", "913", "cp913", "ibm-913", "ibm913",
+ "iso8859-3", "8859-3", "iso8859_3", "iso_8859_3",
+ NULL
+};
+static const char *alias_iso88594[] = {
+ "ISO-8859-4", "ISO-IR-110", "ISO_8859-4:1988", "ISO_8859-4",
+ "LATIN4", "L4", "csISOLatin4", "914", "cp914", "ibm-914", "ibm914",
+ "iso8859-4", "8859-4", "iso8859_4", "iso_8859_4",
+ NULL
+};
+static const char *alias_iso88595[] = {
+ "ISO-8859-5", "ISO-IR-144", "ISO_8859-5:1988", "ISO_8859-5",
+ "CYRILLIC", "csISOLatinCyrillic", "915", "cp915", "ibm-915",
+ "ibm915", "iso8859-5", "8859-5", "iso8859_5", "iso_8859_5",
+ NULL
+};
+static const char *alias_iso88596[] = {
+ "ISO-8859-6", "ISO-IR-127", "ISO_8859-6:1987", "ISO_8859-6",
+ "ECMA-114", "ASMO-708", "ARABIC", "csISOLatinArabic", "1089",
+ "cp1089", "ibm-1089", "ibm1089", "iso8859-6", "8859-6", "iso8859_6",
+ "iso_8859_6",
+ NULL
+};
+static const char *alias_iso88597[] = {
+ "ISO-8859-7", "ISO-IR-126", "ISO_8859-7:1987", "ISO_8859-7",
+ "ELOT_928", "ECMA-118", "greek", "greek8", "csISOLatinGreek", "813",
+ "cp813", "ibm-813", "ibm813", "iso8859-7", "8859-7", "iso8859_7",
+ "iso_8859_7",
+ NULL
+};
+static const char *alias_iso88598[] = {
+ "ISO-8859-8", "ISO-IR-138", "ISO_8859-8:1988", "ISO_8859-8",
+ "hebrew", "csISOLatinHebrew", "916", "cp916", "ibm-916", "ibm916",
+ "iso8859-8", "8859-8", "iso8859_8", "iso_8859_8",
+ NULL
+};
+static const char *alias_iso88599[] = {
+ "ISO-8859-9", "ISO-IR-148", "ISO_8859-9:1989", "ISO_8859-9",
+ "latin5", "l5", "csISOLatin5", "920", "cp920", "ibm-920", "ibm920",
+ "iso8859-9", "8859-9", "iso8859_9", "iso_8859_9",
+ NULL
+};
+static const char *alias_iso885913[] = {
+ "ISO-8859-13", "ISO-IR-179", "LATIN7", "L7", "iso_8859-13",
+ "iso8859-13", "8859-13", "iso8859_13", "iso_8859_13",
+ NULL
+};
+static const char *alias_iso885914[] = {
+ "ISO-8859-14", "LATIN8", "L8", "ISO-8859-14", "iso-ir-199",
+ "ISO_8859-14:1998", "ISO_8859-14", "iso-celtic",
+ NULL
+};
+static const char *alias_iso885915[] = {
+ "ISO-8859-15", "csisolatin9", "csisolatin0", "latin9", "latin0",
+ "923", "cp923", "ibm-923", "ibm923", "iso8859-15", "iso_8859-15",
+ "8859-15", "iso_8859-15_FDIS", "L9",
+ NULL
+};
+static const char *alias_iso885916[] = {
+ "ISO-8859-16", "ISO-IR-226", "LATIN10", "L10",
+ NULL
+};
+static const char *alias_koi8r[] = {
+ "KOI8-R", "csKOI8R", "koi8",
+ NULL
+};
+static const char *alias_koi8u[] = {
+ "KOI-8-U",
+ NULL
+};
+static const char *alias_koi8t[] = {
+ "KOI-8-T",
+ NULL
+};
+static const char *alias_sjis[] = {
+ "Shift_JIS", "SHIFT-JIS", "SHIFTJIS", "SJIS", "sjis", "MS_Kanji",
+ "csShiftJIS", "pck", "PCK",
+ NULL
+};
+static const char *alias_viscii[] = {
+ "VISCII",
+ NULL
+};
+static const char *alias_cp437[] = {
+ "CP-437", "IBM437", "CP437", "437", "csPC8CodePage437", "ibm-437",
+ NULL
+};
+static const char *alias_cp850[] = {
+ "CP-850", "IBM850", "cp850", "850", "csPC850Multilingual",
+ "ibm-850",
+ NULL
+};
+static const char *alias_cp851[] = {
+ "CP-851", "IBM851", "cp851", "851", "csIBM851",
+ NULL
+};
+static const char *alias_cp852[] = {
+ "CP-852", "IBM852", "cp852", "852", "csPCp852", "ibm-852",
+ NULL
+};
+static const char *alias_cp855[] = {
+ "CP-855", "IBM855", "cp855", "855", "csIBM855", "cspcp855",
+ "ibm-855",
+ NULL
+};
+static const char *alias_cp857[] = {
+ "CP-857", "IBM857", "cp857", "857", "csIBM857", "ibm-857",
+ NULL
+};
+static const char *alias_cp860[] = {
+ "CP-860", "IBM860", "cp860", "860", "csIBM860", "ibm-860",
+ NULL
+};
+static const char *alias_cp861[] = {
+ "CP-861", "IBM861", "cp861", "861", "cp-is", "csIBM861", "ibm-861",
+ NULL
+};
+static const char *alias_cp862[] = {
+ "CP-862", "IBM862", "cp862", "862", "csPC862LatinHebrew", "ibm-862",
+ NULL
+};
+static const char *alias_cp863[] = {
+ "CP-863", "IBM863", "cp863", "863", "csIBM863", "ibm-863",
+ NULL
+};
+static const char *alias_cp864[] = {
+ "CP-864", "IBM864", "cp864", "csIBM864", "ibm-864",
+ NULL
+};
+static const char *alias_cp865[] = {
+ "CP-865", "IBM865", "cp865", "865", "csIBM865", "ibm-865",
+ NULL
+};
+static const char *alias_cp866[] = {
+ "CP-866", "IBM866", "cp866", "866", "csIBM866", "ibm-866",
+ NULL
+};
+static const char *alias_cp868[] = {
+ "CP-868", "IBM868", "CP868", "cp-ar", "csIBM868", "ibm-868",
+ NULL
+};
+static const char *alias_cp869[] = {
+ "CP-869", "IBM869", "cp869", "869", "cp-gr", "csIBM869",
+ NULL
+};
+static const char *alias_cp891[] = {
+ "CP-891", "IBM891", "cp891", "csIBM891",
+ NULL
+};
+static const char *alias_cp903[] = {
+ "CP-903", "IBM903", "cp903", "csIBM903",
+ NULL
+};
+static const char *alias_cp904[] = {
+ "CP-904", "IBM904", "cp904", "904", "csIBM904",
+ NULL
+};
+static const char *alias_cp1251[] = {
+ "CP-1251", "CP1251", "MS-CYRL", "windows-1251", "Cp1251",
+ NULL
+};
+static const char *alias_cp1255[] = {
+ "CP-1255", "CP1255", "MS-HEBR", "windows-1255",
+ NULL
+};
+static const char *alias_tis620[] = {
+ "TIS-620", "TIS620", "TIS620-0", "TIS620.2529-1", "TIS620.2533-0",
+ "ISO-IR-166", "TIS620.2533",
+ NULL
+};
+static const char *alias_georgianps[] = {
+ "GEORGIAN-PS",
+ NULL
+};
+
+static const char **uim_encoding_list[] = {
+ alias_us,
+ alias_big5,
+ alias_hkscs,
+ alias_eucjp,
+ alias_utf8,
+ alias_euckr,
+ alias_euctw,
+ alias_gb18030,
+ alias_gb2312,
+ alias_gbk,
+ alias_iso88591,
+ alias_iso88592,
+ alias_iso88593,
+ alias_iso88594,
+ alias_iso88595,
+ alias_iso88596,
+ alias_iso88597,
+ alias_iso88598,
+ alias_iso88599,
+ alias_iso885913,
+ alias_iso885914,
+ alias_iso885915,
+ alias_iso885916,
+ alias_koi8r,
+ alias_koi8u,
+ alias_koi8t,
+ alias_sjis,
+ alias_viscii,
+ alias_cp437,
+ alias_cp850,
+ alias_cp851,
+ alias_cp852,
+ alias_cp855,
+ alias_cp857,
+ alias_cp860,
+ alias_cp861,
+ alias_cp862,
+ alias_cp863,
+ alias_cp864,
+ alias_cp865,
+ alias_cp866,
+ alias_cp868,
+ alias_cp869,
+ alias_cp891,
+ alias_cp903,
+ alias_cp904,
+ alias_cp1251,
+ alias_cp1255,
+ alias_tis620,
+ alias_georgianps,
+ NULL,
+};
+
+#endif
Modified: trunk/uim/uim-func.c
===================================================================
--- trunk/uim/uim-func.c 2005-02-17 07:26:41 UTC (rev 667)
+++ trunk/uim/uim-func.c 2005-02-18 04:56:13 UTC (rev 668)
@@ -38,9 +38,13 @@
#include <stdarg.h>
#include "context.h"
#include "uim-scm.h"
+#include "uim-encoding.h"
#define MAX_LENGTH_OF_INT_AS_STR (((sizeof(int) == 4) ? sizeof("-2147483648") : sizeof("-9223372036854775808")) - sizeof((char)'\0'))
+static const char **uim_get_encoding_alias(const char *encoding);
+static iconv_t uim_iconv_open(const char *tocode, const char *fromcode);
+
char *uim_return_str;
char *uim_return_str_list[10]; /* XXX */
/* duplicate definition */
@@ -223,16 +227,59 @@
uc->nr_psegs = 0;
}
+static int check_encoding_equivalence(const char *tocode, const char *fromcode)
+{
+ const char **alias_tocode;
+ const char **alias_fromcode;
+ int i, j;
+ int alias_tocode_alloced = 0;
+ int alias_fromcode_alloced = 0;
+ int found = 0;
+
+ alias_tocode = uim_get_encoding_alias(tocode);
+ alias_fromcode = uim_get_encoding_alias(fromcode);
+
+ if (!alias_tocode) {
+ alias_tocode = malloc(sizeof(char *) * 2);
+ alias_tocode[0] = tocode;
+ alias_tocode[1] = NULL;
+ alias_tocode_alloced = 1;
+ }
+ if (!alias_fromcode) {
+ alias_fromcode = malloc(sizeof(char *) * 2);
+ alias_fromcode[0] = fromcode;
+ alias_fromcode[1] = NULL;
+ alias_fromcode_alloced = 1;
+ }
+
+ for (i = 0; alias_tocode[i]; i++) {
+ for (j = 0; alias_fromcode[j]; j++) {
+ if (!strcmp(alias_tocode[i], alias_fromcode[j])) {
+ found = 1;
+ break;
+ }
+ }
+ if (found)
+ break;
+ }
+
+ if (alias_tocode_alloced)
+ free(alias_tocode);
+ if (alias_fromcode_alloced)
+ free(alias_fromcode);
+ return found;
+}
+
int
uim_iconv_is_convertible(const char *tocode, const char *fromcode)
{
iconv_t ic;
- if (!strcmp("UTF-8", fromcode) || !strcmp(tocode, fromcode)) {
+ if (check_encoding_equivalence(tocode, fromcode))
return 1;
- }
+
/* TODO cache the result */
- ic = iconv_open(tocode, fromcode);
+ ic = uim_iconv_open(tocode, fromcode);
if (ic == (iconv_t)-1) {
return 0;
}
@@ -240,15 +287,70 @@
return 1;
}
+static const char**
+uim_get_encoding_alias(const char *encoding) {
+ int i, j;
+ const char **alias;
+
+ for (i = 0; (alias = uim_encoding_list[i]); i++) {
+ for (j = 0; alias[j]; j++) {
+ if (!strcmp(alias[j], encoding))
+ return alias;
+ }
+ }
+ return NULL;
+}
+
+static iconv_t
+uim_iconv_open(const char *tocode, const char *fromcode) {
+ iconv_t cd = (iconv_t)-1;
+ int i, j;
+ const char **alias_tocode, **alias_fromcode;
+ int alias_tocode_alloced = 0;
+ int alias_fromcode_alloced = 0;
+ int opened = 0;
+
+ alias_tocode = uim_get_encoding_alias(tocode);
+ alias_fromcode = uim_get_encoding_alias(fromcode);
+
+ if (!alias_tocode) {
+ alias_tocode = malloc(sizeof(char *) * 2);
+ alias_tocode[0] = tocode;
+ alias_tocode[1] = NULL;
+ alias_tocode_alloced = 1;
+ }
+ if (!alias_fromcode) {
+ alias_fromcode = malloc(sizeof(char *) * 2);
+ alias_fromcode[0] = fromcode;
+ alias_fromcode[1] = NULL;
+ alias_fromcode_alloced = 1;
+ }
+
+ for (i = 0; alias_tocode[i]; i++) {
+ for (j = 0; alias_fromcode[j]; j++) {
+ cd = iconv_open(alias_tocode[i], alias_fromcode[j]);
+ if (cd != (iconv_t)-1) {
+ opened = 1;
+ break;
+ }
+ }
+ if (opened)
+ break;
+ }
+
+ if (alias_tocode_alloced)
+ free(alias_tocode);
+ if (alias_fromcode_alloced)
+ free(alias_fromcode);
+ return cd;
+}
+
void *
uim_iconv_create(const char *tocode, const char *fromcode)
{
iconv_t ic;
- if (!strcmp(tocode, fromcode))
- return (void *) 0;
-
- ic = iconv_open(tocode, fromcode);
+ ic = uim_iconv_open(tocode, fromcode);
if (ic == (iconv_t)-1) {
ic = (iconv_t)0;
}
@@ -268,14 +370,15 @@
ic = (iconv_t)obj;
if(!str)
return NULL;
+
+ if (!ic)
+ return strdup(str);
+
len = strlen(str);
buflen = (len * 6)+3;
realbuf = alloca(buflen);
outbuf = realbuf;
inbuf = str;
- if (!ic) {
- return strdup(str);
- }
bzero(realbuf, buflen);
iconv(ic, (ICONV_CONST char **)&inbuf, &len, &outbuf, &buflen);
return strdup(realbuf);
More information about the Uim-commit
mailing list