[uim-commit] r2714 - branches/r5rs/sigscheme
yamaken at freedesktop.org
yamaken at freedesktop.org
Sun Jan 1 23:36:34 PST 2006
Author: yamaken
Date: 2006-01-01 23:36:30 -0800 (Sun, 01 Jan 2006)
New Revision: 2714
Modified:
branches/r5rs/sigscheme/encoding.c
branches/r5rs/sigscheme/encoding.h
Log:
* sigscheme/encoding.h
- (SCM_CHARCODEC_CCS, SCM_CHARCODEC_CHAR_LEN): New macro
- (enum ScmCodedCharSet, ScmCharCodecMethod_ccs,
ScmCharCodecMethod_char_len): New type
- (struct ScmCharCodecVTbl_): Add new member 'ccs' and 'char_len'
* sigscheme/encoding.c
- (eucjp_ccs, eucjp_char_len, sjis_ccs, sjis_char_len, euc_char_len,
euccn_ccs, euckr_ccs, utf8_ccs, utf8_char_len, unibyte_ccs,
unibyte_char_len): New function
- (utf8_codec_vtbl, euccn_codec_vtbl, eucjp_codec_vtbl,
euckr_codec_vtbl, sjis_codec_vtbl, unibyte_codec_vtbl): Follow the
specification change of ScmCharCodecVTbl
Modified: branches/r5rs/sigscheme/encoding.c
===================================================================
--- branches/r5rs/sigscheme/encoding.c 2006-01-02 07:28:28 UTC (rev 2713)
+++ branches/r5rs/sigscheme/encoding.c 2006-01-02 07:36:30 UTC (rev 2714)
@@ -67,6 +67,8 @@
=======================================*/
#if SCM_USE_EUCJP
static const char *eucjp_encoding(void);
+static enum ScmCodedCharSet eucjp_ccs(void);
+static int eucjp_char_len(int ch);
static ScmMultibyteCharInfo eucjp_scan_char(ScmMultibyteString mbs);
static int eucjp_str2int(const uchar *src, size_t len,
ScmMultibyteState state);
@@ -85,6 +87,8 @@
#if SCM_USE_SJIS
static const char *sjis_encoding(void);
+ static enum ScmCodedCharSet sjis_ccs(void);
+ static int sjis_char_len(int ch);
static ScmMultibyteCharInfo sjis_scan_char(ScmMultibyteString mbs);
static uchar *sjis_int2str(uchar *dst, int ch, ScmMultibyteState state);
#endif
@@ -96,27 +100,34 @@
#if (SCM_USE_EUCCN || SCM_USE_EUCKR)
/* shared by EUCCN and EUCKR */
+static int euc_char_len(int ch);
static uchar *euc_int2str(uchar *dst, int ch, ScmMultibyteState state);
#endif
#if SCM_USE_EUCCN
static const char *euccn_encoding(void);
+static enum ScmCodedCharSet euccn_ccs(void);
static ScmMultibyteCharInfo euccn_scan_char(ScmMultibyteString mbs);
#endif
#if SCM_USE_EUCKR
static const char *euckr_encoding(void);
+static enum ScmCodedCharSet euckr_ccs(void);
static ScmMultibyteCharInfo euckr_scan_char(ScmMultibyteString mbs);
#endif
#if SCM_USE_UTF8
static const char *utf8_encoding(void);
+static enum ScmCodedCharSet utf8_ccs(void);
+static int utf8_char_len(int ch);
static ScmMultibyteCharInfo utf8_scan_char(ScmMultibyteString mbs);
static int utf8_str2int(const uchar *src, size_t len, ScmMultibyteState state);
static uchar *utf8_int2str(uchar *dst, int ch, ScmMultibyteState state);
#endif
static const char *unibyte_encoding(void);
+static enum ScmCodedCharSet unibyte_ccs(void);
+static int unibyte_char_len(int ch);
static ScmMultibyteCharInfo unibyte_scan_char(ScmMultibyteString mbs);
static int unibyte_str2int(const uchar *src, size_t len,
ScmMultibyteState state);
@@ -128,6 +139,8 @@
#if SCM_USE_UTF8
static const ScmCharCodecVTbl utf8_codec_vtbl = {
&utf8_encoding,
+ &utf8_ccs,
+ &utf8_char_len,
&utf8_scan_char,
(ScmCharCodecMethod_str2int)&utf8_str2int,
(ScmCharCodecMethod_int2str)&utf8_int2str
@@ -138,6 +151,8 @@
#if SCM_USE_EUCCN
static const ScmCharCodecVTbl euccn_codec_vtbl = {
&euccn_encoding,
+ &euccn_ccs,
+ &euc_char_len,
&euccn_scan_char,
(ScmCharCodecMethod_str2int)&dbc_str2int,
(ScmCharCodecMethod_int2str)&euc_int2str
@@ -148,6 +163,8 @@
#if SCM_USE_EUCJP
static const ScmCharCodecVTbl eucjp_codec_vtbl = {
&eucjp_encoding,
+ &eucjp_ccs,
+ &eucjp_char_len,
&eucjp_scan_char,
(ScmCharCodecMethod_str2int)&eucjp_str2int,
(ScmCharCodecMethod_int2str)&eucjp_int2str
@@ -158,6 +175,8 @@
#if SCM_USE_EUCKR
static const ScmCharCodecVTbl euckr_codec_vtbl = {
&euckr_encoding,
+ &euckr_ccs,
+ &euc_char_len,
&euckr_scan_char,
(ScmCharCodecMethod_str2int)&dbc_str2int,
(ScmCharCodecMethod_int2str)&euc_int2str
@@ -168,6 +187,8 @@
#if SCM_USE_SJIS
static const ScmCharCodecVTbl sjis_codec_vtbl = {
&sjis_encoding,
+ &sjis_ccs,
+ &sjis_char_len,
&sjis_scan_char,
(ScmCharCodecMethod_str2int)&dbc_str2int,
(ScmCharCodecMethod_int2str)&sjis_int2str
@@ -177,6 +198,8 @@
static const ScmCharCodecVTbl unibyte_codec_vtbl = {
&unibyte_encoding,
+ &unibyte_ccs,
+ &unibyte_char_len,
&unibyte_scan_char,
(ScmCharCodecMethod_str2int)&unibyte_str2int,
(ScmCharCodecMethod_int2str)&unibyte_int2str
@@ -351,6 +374,21 @@
return "EUC-JP";
}
+enum ScmCodedCharSet
+eucjp_ccs(void)
+{
+ return SCM_CCS_JIS;
+}
+
+/* FIXME: Optimize */
+int
+eucjp_char_len(int ch)
+{
+ char buf[SCM_MB_MAX_LEN + sizeof("")];
+
+ return (eucjp_int2str((uchar *)buf, ch, SCM_MB_STATELESS)) ? strlen(buf) : 0;
+}
+
/* G0 <- (96) ASCII (or was it JIS X 0201 Roman?)
* G1 <- (94x94) JIS X 0208 kanji/kana
* G2 <- (94) JIS X 0201 Katakana ("half-width katakana")
@@ -495,6 +533,15 @@
#endif /* (SCM_USE_EUCCN || SCM_USE_EUCKR || SCM_USE_SJIS) */
#if (SCM_USE_EUCCN || SCM_USE_EUCKR)
+/* FIXME: Optimize */
+int
+euc_char_len(int ch)
+{
+ char buf[SCM_MB_MAX_LEN + sizeof("")];
+
+ return (euc_int2str((uchar *)buf, ch, SCM_MB_STATELESS)) ? strlen(buf) : 0;
+}
+
static uchar *
euc_int2str(uchar *dst, int ch, ScmMultibyteState state)
{
@@ -529,6 +576,12 @@
return "EUC-CN";
}
+enum ScmCodedCharSet
+euccn_ccs(void)
+{
+ return SCM_CCS_UNKNOWN;
+}
+
/* FIXME: NOT TESTED!
*
* G0 <- ASCII (or GB 1988?)
@@ -563,6 +616,12 @@
#endif
#if SCM_USE_EUCKR
+enum ScmCodedCharSet
+euckr_ccs(void)
+{
+ return SCM_CCS_UNKNOWN;
+}
+
static const char *
euckr_encoding(void)
{
@@ -628,6 +687,21 @@
return "UTF-8";
}
+enum ScmCodedCharSet
+utf8_ccs(void)
+{
+ return SCM_CCS_UNICODE;
+}
+
+/* FIXME: Optimize */
+int
+utf8_char_len(int ch)
+{
+ char buf[SCM_MB_MAX_LEN + sizeof("")];
+
+ return (utf8_int2str((uchar *)buf, ch, SCM_MB_STATELESS)) ? strlen(buf) : 0;
+}
+
static ScmMultibyteCharInfo
utf8_scan_char(ScmMultibyteString mbs)
{
@@ -768,6 +842,21 @@
return "SHIFT_JIS";
}
+enum ScmCodedCharSet
+sjis_ccs(void)
+{
+ return SCM_CCS_UNKNOWN;
+}
+
+/* FIXME: Optimize */
+int
+sjis_char_len(int ch)
+{
+ char buf[SCM_MB_MAX_LEN + sizeof("")];
+
+ return (sjis_int2str((uchar *)buf, ch, SCM_MB_STATELESS)) ? strlen(buf) : 0;
+}
+
static ScmMultibyteCharInfo
sjis_scan_char(ScmMultibyteString mbs)
{
@@ -833,6 +922,19 @@
return "ISO-8859-1";
}
+enum ScmCodedCharSet
+unibyte_ccs(void)
+{
+ /* conventional assumption */
+ return SCM_CCS_ISO8859_1;
+}
+
+int
+unibyte_char_len(int ch)
+{
+ return (0 < ch && ch <= 0xff) ? 1 : 0;
+}
+
static ScmMultibyteCharInfo
unibyte_scan_char(ScmMultibyteString mbs)
{
Modified: branches/r5rs/sigscheme/encoding.h
===================================================================
--- branches/r5rs/sigscheme/encoding.h 2006-01-02 07:28:28 UTC (rev 2713)
+++ branches/r5rs/sigscheme/encoding.h 2006-01-02 07:36:30 UTC (rev 2714)
@@ -91,6 +91,8 @@
SCM_MBS_SET_STATE((mbs), SCM_MBCINFO_GET_STATE(inf)))
#define SCM_CHARCODEC_ENCODING(codec) ((*(codec)->encoding)())
+#define SCM_CHARCODEC_CCS(codec) ((*(codec)->ccs)())
+#define SCM_CHARCODEC_CHAR_LEN(codec, ch) ((*(codec)->char_len)(ch))
#define SCM_CHARCODEC_SCAN_CHAR(codec, mbs) ((*(codec)->scan_char)(mbs))
#define SCM_CHARCODEC_STR2INT(codec, src, len, state) \
((*(codec)->str2int)((src), (len), (state)))
@@ -100,6 +102,13 @@
/*=======================================
Type Definitions
=======================================*/
+enum ScmCodedCharSet {
+ SCM_CCS_UNKNOWN = 0,
+ SCM_CCS_UNICODE = 1,
+ SCM_CCS_ISO8859_1 = 2,
+ SCM_CCS_JIS = 3 /* ASCII + JIS X 0208, 0212, 0213 */
+};
+
/* This type will actually contain some encoding-dependent enum value.
* It might as well be defined as mbstate_t if we're using libc. */
typedef int ScmMultibyteState;
@@ -137,6 +146,8 @@
/* FIXME: replace (char *) with (uchar *) once C99-independent stdint is
introduced */
typedef const char *(*ScmCharCodecMethod_encoding)(void);
+typedef enum ScmCodedCharSet (*ScmCharCodecMethod_ccs)(void);
+typedef int (*ScmCharCodecMethod_char_len)(int ch);
typedef ScmMultibyteCharInfo (*ScmCharCodecMethod_scan_char)(ScmMultibyteString mbs);
typedef int (*ScmCharCodecMethod_str2int)(const char *src, size_t len,
ScmMultibyteState state);
@@ -145,6 +156,8 @@
struct ScmCharCodecVTbl_ {
ScmCharCodecMethod_encoding encoding;
+ ScmCharCodecMethod_ccs ccs;
+ ScmCharCodecMethod_char_len char_len;
ScmCharCodecMethod_scan_char scan_char;
ScmCharCodecMethod_str2int str2int;
ScmCharCodecMethod_int2str int2str;
More information about the uim-commit
mailing list