[uim-commit] r2720 - in branches/r5rs/sigscheme: . test
yamaken at freedesktop.org
yamaken at freedesktop.org
Mon Jan 2 04:01:01 PST 2006
Author: yamaken
Date: 2006-01-02 04:00:57 -0800 (Mon, 02 Jan 2006)
New Revision: 2720
Modified:
branches/r5rs/sigscheme/TODO
branches/r5rs/sigscheme/read.c
branches/r5rs/sigscheme/test/test-enc-eucgeneric.scm
branches/r5rs/sigscheme/test/test-enc-eucjp.scm
branches/r5rs/sigscheme/test/test-enc-sjis.scm
branches/r5rs/sigscheme/test/test-enc-utf8.scm
Log:
* sigscheme/read.c
- (read_token, read_char, read_string): Reject multibyte char if not
an Unicode port
- (parse_unicode_sequence): Fix \Uxxxxxxxx
* sigscheme/test/test-enc-utf8.scm
* sigscheme/test/test-enc-eucjp.scm
* sigscheme/test/test-enc-eucgeneric.scm
* sigscheme/test/test-enc-sjis.scm
- Add test for SRFI-75
* sigscheme/TODO
- Update
Modified: branches/r5rs/sigscheme/TODO
===================================================================
--- branches/r5rs/sigscheme/TODO 2006-01-02 10:16:13 UTC (rev 2719)
+++ branches/r5rs/sigscheme/TODO 2006-01-02 12:00:57 UTC (rev 2720)
@@ -96,9 +96,6 @@
* Separate SCM_ASSERT into required validation (SCM_ENSURE) and optional
assertion (SCM_ASSERT)
-* Add CCS identity for each codec and use it to check Unicode compatibility on
- SRFI-75-related operations
-
* Add charcodec_char_len() interface to codec and simplify string operations
with it (also used for validation)
Modified: branches/r5rs/sigscheme/read.c
===================================================================
--- branches/r5rs/sigscheme/read.c 2006-01-02 10:16:13 UTC (rev 2719)
+++ branches/r5rs/sigscheme/read.c 2006-01-02 12:00:57 UTC (rev 2720)
@@ -213,6 +213,7 @@
read_token(ScmObj port,
int *err, char *buf, size_t buf_size, const char *delim)
{
+ ScmCharCodec *codec;
int c;
size_t len;
char *p;
@@ -243,9 +244,10 @@
*err = TOKEN_BUF_EXCEEDED;
break;
}
- /* FIXME: check Unicode capability of scm_current_char_codec */
- p = SCM_CHARCODEC_INT2STR(scm_current_char_codec,
- p, c, SCM_MB_STATELESS);
+ codec = scm_port_codec(port);
+ if (SCM_CHARCODEC_CCS(codec) != SCM_CCS_UCS4)
+ ERR("non-ASCII char in token on a non-Unicode port: 0x%x", c);
+ p = SCM_CHARCODEC_INT2STR(codec, p, c, SCM_MB_STATELESS);
#else
ERR("non-ASCII char in token: 0x%x", c);
#endif
@@ -444,7 +446,7 @@
case 'U':
/* #\U<x><x><x><x><x><x><x><x> : Unicode char of BMP or SMP */
- if (len != 8 || (0xd800 <= c && c <= 0xdfff) || 0x10ffff < c)
+ if (len != 9 || (0xd800 <= c && c <= 0xdfff) || 0x10ffff < c)
ERR("invalid Unicode sequence. conform \\U<x><x><x><x><x><x><x><x>");
break;
@@ -483,8 +485,10 @@
int unicode;
#endif
const ScmSpecialCharInfo *info;
+ ScmCharCodec *codec;
size_t len;
char buf[CHAR_LITERAL_LEN_MAX + sizeof("")];
+ DECLARE_INTERNAL_FUNCTION("read_char");
/* plain char (multibyte-ready) */
c = scm_port_get_char(port);
@@ -505,8 +509,12 @@
#if SCM_USE_SRFI75
unicode = parse_unicode_sequence(buf, len + 1);
- if (0 <= unicode)
+ if (0 <= unicode) {
+ codec = scm_port_codec(port);
+ if (c != 'x' && SCM_CHARCODEC_CCS(codec) != SCM_CCS_UCS4)
+ ERR_OBJ("Unicode char sequence on non-Unicode port", port);
return MAKE_CHAR(unicode);
+ }
#endif
/* named chars */
for (info = scm_special_char_table; info->esc_seq; info++) {
@@ -521,15 +529,18 @@
{
ScmObj obj;
const ScmSpecialCharInfo *info;
+ ScmCharCodec *codec;
int c;
size_t offset;
char *p;
ScmLBuf(char) lbuf;
char init_buf[SCM_INITIAL_STRING_BUF_SIZE];
+ DECLARE_INTERNAL_FUNCTION("read_string");
CDBG((SCM_DBG_PARSER, "read_string"));
LBUF_INIT(lbuf, init_buf, sizeof(init_buf));
+ codec = scm_port_codec(port);
for (offset = 0, p = LBUF_BUF(lbuf);; offset = p - LBUF_BUF(lbuf)) {
c = scm_port_get_char(port);
@@ -554,12 +565,12 @@
c = scm_port_get_char(port);
#if SCM_USE_SRFI75
if (strchr("xuU", c)) {
+ if (c != 'x' && SCM_CHARCODEC_CCS(codec) != SCM_CCS_UCS4)
+ ERR_OBJ("Unicode char sequence on non-Unicode port", port);
c = read_unicode_sequence(port, c);
LBUF_EXTEND(lbuf, SCM_LBUF_F_STRING, offset + MB_MAX_SIZE);
p = &LBUF_BUF(lbuf)[offset];
- /* FIXME: check Unicode capability of scm_current_char_codec */
- p = SCM_CHARCODEC_INT2STR(scm_current_char_codec,
- p, c, SCM_MB_STATELESS);
+ p = SCM_CHARCODEC_INT2STR(codec, p, c, SCM_MB_STATELESS);
if (!p)
ERR("invalid Unicode sequence in string: 0x%x", c);
goto found;
@@ -584,8 +595,7 @@
LBUF_EXTEND(lbuf, SCM_LBUF_F_STRING, offset + MB_MAX_SIZE);
p = &LBUF_BUF(lbuf)[offset];
/* FIXME: support stateful encoding */
- p = SCM_CHARCODEC_INT2STR(scm_current_char_codec,
- p, c, SCM_MB_STATELESS);
+ p = SCM_CHARCODEC_INT2STR(codec, p, c, SCM_MB_STATELESS);
if (!p)
ERR("invalid char in string: 0x%x", c);
break;
Modified: branches/r5rs/sigscheme/test/test-enc-eucgeneric.scm
===================================================================
--- branches/r5rs/sigscheme/test/test-enc-eucgeneric.scm 2006-01-02 10:16:13 UTC (rev 2719)
+++ branches/r5rs/sigscheme/test/test-enc-eucgeneric.scm 2006-01-02 12:00:57 UTC (rev 2720)
@@ -35,6 +35,8 @@
(load "./test/unittest.scm")
+(define tn test-name)
+
;; This file provides a fallback test unit for all EUC systems. It's
;; just a copy of test-enc-eucjp.scm with EUCJP-specific character
;; sequences removed, so some characters may be undefined in other EUC
@@ -60,4 +62,17 @@
(assert-equal? "string 2" str1 (apply string str1-list))
(assert-equal? "list->string 2" str1-list (string->list str1))
+;; SRFI-75
+(tn "SRFI-75")
+(assert-parseable (tn) "#\\x63")
+(assert-parse-error (tn) "#\\u0063")
+(assert-parse-error (tn) "#\\U00000063")
+
+(assert-parseable (tn) "\"\\x63\"")
+(assert-parse-error (tn) "\"\\u0063\"")
+(assert-parse-error (tn) "\"\\U00000063\"")
+
+(assert-parseable (tn) "'a")
+(assert-parse-error (tn) "'¤¢")
+
(total-report)
Modified: branches/r5rs/sigscheme/test/test-enc-eucjp.scm
===================================================================
--- branches/r5rs/sigscheme/test/test-enc-eucjp.scm 2006-01-02 10:16:13 UTC (rev 2719)
+++ branches/r5rs/sigscheme/test/test-enc-eucjp.scm 2006-01-02 12:00:57 UTC (rev 2720)
@@ -35,6 +35,8 @@
(load "./test/unittest.scm")
+(define tn test-name)
+
;; string?
(assert-true "string? check" (string? "¤¢¤¤¤¦¤¨¤ª"))
@@ -125,4 +127,17 @@
(assert-equal? "string 2" str1 (apply string str1-list))
(assert-equal? "list->string 2" str1-list (string->list str1))
+;; SRFI-75
+(tn "SRFI-75")
+(assert-parseable (tn) "#\\x63")
+(assert-parse-error (tn) "#\\u0063")
+(assert-parse-error (tn) "#\\U00000063")
+
+(assert-parseable (tn) "\"\\x63\"")
+(assert-parse-error (tn) "\"\\u0063\"")
+(assert-parse-error (tn) "\"\\U00000063\"")
+
+(assert-parseable (tn) "'a")
+(assert-parse-error (tn) "'¤¢")
+
(total-report)
Modified: branches/r5rs/sigscheme/test/test-enc-sjis.scm
===================================================================
--- branches/r5rs/sigscheme/test/test-enc-sjis.scm 2006-01-02 10:16:13 UTC (rev 2719)
+++ branches/r5rs/sigscheme/test/test-enc-sjis.scm 2006-01-02 12:00:57 UTC (rev 2720)
@@ -36,6 +36,8 @@
(load "./test/unittest.scm")
+(define tn test-name)
+
(assert-equal? "string 1" "ülÉÍ" (string #\ü #\l #\É #\Í))
(assert-equal? "list->string 1" "3úÅ" (list->string '(#\3 #\ú #\Å)))
(assert-equal? "string->list 1" '(#\ #\« #\é) (string->list "«é"))
@@ -75,4 +77,17 @@
(assert-equal? "JIS X 0201 kana and 0208 kana" '(#\Ë #\) (string->list "Ë"))
(assert-equal? "JIS X 0201 kana and 0208 kana" "Ë" (list->string '(#\Ë #\)))
+;; SRFI-75
+(tn "SRFI-75")
+(assert-parseable (tn) "#\\x63")
+(assert-parse-error (tn) "#\\u0063")
+(assert-parse-error (tn) "#\\U00000063")
+
+(assert-parseable (tn) "\"\\x63\"")
+(assert-parse-error (tn) "\"\\u0063\"")
+(assert-parse-error (tn) "\"\\U00000063\"")
+
+(assert-parseable (tn) "'a")
+(assert-parse-error (tn) "' ")
+
(total-report)
Modified: branches/r5rs/sigscheme/test/test-enc-utf8.scm
===================================================================
--- branches/r5rs/sigscheme/test/test-enc-utf8.scm 2006-01-02 10:16:13 UTC (rev 2719)
+++ branches/r5rs/sigscheme/test/test-enc-utf8.scm 2006-01-02 12:00:57 UTC (rev 2720)
@@ -35,6 +35,8 @@
(load "./test/unittest.scm")
+(define tn test-name)
+
(assert-equal? "string 1" "ç¾äººã«ã¯" (string #\ç¾ #\人 #\ã« #\ã¯))
(assert-equal? "list->string 1" "3æ¥ã§" (list->string '(#\3 #\æ¥ #\ã§)))
(assert-equal? "string->list 1" '(#\ã #\ã #\ã) (string->list "ããã"))
@@ -54,4 +56,17 @@
(assert-equal? "string 2" str1 (apply string str1-list))
(assert-equal? "list->string 2" str1-list (string->list str1))
+;; SRFI-75
+(tn "SRFI-75")
+(assert-parseable (tn) "#\\x63")
+(assert-parseable (tn) "#\\u0063")
+(assert-parseable (tn) "#\\U00000063")
+
+(assert-parseable (tn) "\"\\x63\"")
+(assert-parseable (tn) "\"\\u0063\"")
+(assert-parseable (tn) "\"\\U00000063\"")
+
+(assert-parseable (tn) "'a")
+(assert-parseable (tn) "'ã")
+
(total-report)
More information about the uim-commit
mailing list