[uim-commit] r3079 - branches/r5rs/sigscheme/src
yamaken at freedesktop.org
yamaken at freedesktop.org
Thu Feb 2 06:02:06 PST 2006
Author: yamaken
Date: 2006-02-02 06:02:00 -0800 (Thu, 02 Feb 2006)
New Revision: 3079
Modified:
branches/r5rs/sigscheme/src/char.c
branches/r5rs/sigscheme/src/sigschemeinternal.h
Log:
* sigscheme/src/sigschemeinternal.h
- Exclude ctype.h
- (enum ScmCharClass): New type
- (scm_char_class_table): New variable decl
- (ICHAR_ASCIIP, ICHAR_ASCII_CLASS, ICHAR_CLASS, ICHAR_ALPHABETICP,
ICHAR_NUMERICP, ICHAR_WHITESPACEP, ICHAR_UPPER_CASEP,
ICHAR_LOWER_CASEP): New macro
- (ICHAR_DOWNCASE, ICHAR_UPCASE): Make efficient
* sigscheme/src/char.c
- Exclude ctype.h and stdlib.h
- Cite "7.1.1 Lexical structure" of R5RS
- (scm_char_class_table): New variable
- (scm_p_char_alphabeticp, scm_p_char_numericp,
scm_p_char_whitespacep, scm_p_char_upper_casep,
scm_p_char_lower_casep, scm_p_integer2char): Simplify with the new
macros
Modified: branches/r5rs/sigscheme/src/char.c
===================================================================
--- branches/r5rs/sigscheme/src/char.c 2006-02-02 11:30:13 UTC (rev 3078)
+++ branches/r5rs/sigscheme/src/char.c 2006-02-02 14:02:00 UTC (rev 3079)
@@ -38,8 +38,6 @@
/*=======================================
System Include
=======================================*/
-#include <ctype.h>
-#include <stdlib.h>
/*=======================================
Local Include
@@ -58,7 +56,214 @@
/*=======================================
Variable Declarations
=======================================*/
+/*
+ * R5RS: 7.1.1 Lexical structure
+ *
+ * <token> --> <identifier> | <boolean> | <number> | <character> | <string>
+ * | ( | ) | #( | ' | ` | , | ,@ | .
+ * <delimiter> --> <whitespace> | ( | ) | " | ;
+ * <whitespace> --> <space or newline>
+ * <comment> --> ; <all subsequent characters up to a
+ * line break>
+ * <atmosphere> --> <whitespace> | <comment>
+ * <intertoken space> --> <atmosphere>*
+ *
+ * <identifier> --> <initial> <subsequent>* | <peculiar identifier>
+ * <initial> --> <letter> | <special initial>
+ * <letter> --> a | b | c | ... | z
+ *
+ * <special initial> --> ! | $ | % | & | * | / | : | < | = | > | ? | ^ | _ | ~
+ * <subsequent> --> <initial> | <digit> | <special subsequent>
+ * <digit> --> 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
+ * <special subsequent> --> + | - | . | @
+ * <peculiar identifier> --> + | - | ...
+ * <syntactic keyword> --> <expression keyword>
+ * | else | => | define
+ * | unquote | unquote-splicing
+ * <expression keyword> --> quote | lambda | if
+ * | set! | begin | cond | and | or | case
+ * | let | let* | letrec | do | delay
+ * | quasiquote
+ *
+ * `<variable> => <'any <identifier> that isn't
+ * also a <syntactic keyword>>
+ *
+ * <boolean> --> #t | #f
+ * <character> --> #\ <any character>
+ * | #\ <character name>
+ * <character name> --> space | newline
+ *
+ * <string> --> " <string element>* "
+ * <string element> --> <any character other than " or \>
+ * | \" | \\
+ *
+ * <number> --> <num 2>| <num 8>
+ * | <num 10>| <num 16>
+ *
+ *
+ * <num R> --> <prefix R> <complex R>
+ * <complex R> --> <real R> | <real R> @ <real R>
+ * | <real R> + <ureal R> i | <real R> - <ureal R> i
+ * | <real R> + i | <real R> - i
+ * | + <ureal R> i | - <ureal R> i | + i | - i
+ * <real R> --> <sign> <ureal R>
+ * <ureal R> --> <uinteger R>
+ * | <uinteger R> / <uinteger R>
+ * | <decimal R>
+ * <decimal 10> --> <uinteger 10> <suffix>
+ * | . <digit 10>+ #* <suffix>
+ * | <digit 10>+ . <digit 10>* #* <suffix>
+ * | <digit 10>+ #+ . #* <suffix>
+ * <uinteger R> --> <digit R>+ #*
+ * <prefix R> --> <radix R> <exactness>
+ * | <exactness> <radix R>
+ *
+ * <suffix> --> <empty>
+ * | <exponent marker> <sign> <digit 10>+
+ * <exponent marker> --> e | s | f | d | l
+ * <sign> --> <empty> | + | -
+ * <exactness> --> <empty> | #i | #e
+ * <radix 2> --> #b
+ * <radix 8> --> #o
+ * <radix 10> --> <empty> | #d
+ * <radix 16> --> #x
+ * <digit 2> --> 0 | 1
+ * <digit 8> --> 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7
+ * <digit 10> --> <digit>
+ * <digit 16> --> <digit 10> | a | b | c | d | e | f
+ */
+const unsigned char scm_char_class_table[] = {
+ SCM_CH_CONTROL, /* 0 nul */
+ SCM_CH_CONTROL, /* 1 x01 */
+ SCM_CH_CONTROL, /* 2 x02 */
+ SCM_CH_CONTROL, /* 3 x03 */
+ SCM_CH_CONTROL, /* 4 x04 */
+ SCM_CH_CONTROL, /* 5 x05 */
+ SCM_CH_CONTROL, /* 6 x06 */
+ SCM_CH_CONTROL, /* 7 alarm */
+ SCM_CH_CONTROL, /* 8 backspace */
+ SCM_CH_CONTROL | SCM_CH_WHITESPACE, /* 9 tab */
+ SCM_CH_CONTROL | SCM_CH_WHITESPACE, /* 10 newline */
+ SCM_CH_CONTROL | SCM_CH_WHITESPACE, /* 11 vtab */
+ SCM_CH_CONTROL | SCM_CH_WHITESPACE, /* 12 page */
+ SCM_CH_CONTROL | SCM_CH_WHITESPACE, /* 13 return */
+ SCM_CH_CONTROL, /* 14 x0e */
+ SCM_CH_CONTROL, /* 15 x0f */
+ SCM_CH_CONTROL, /* 16 x10 */
+ SCM_CH_CONTROL, /* 17 x11 */
+ SCM_CH_CONTROL, /* 18 x12 */
+ SCM_CH_CONTROL, /* 19 x13 */
+ SCM_CH_CONTROL, /* 20 x14 */
+ SCM_CH_CONTROL, /* 21 x15 */
+ SCM_CH_CONTROL, /* 22 x16 */
+ SCM_CH_CONTROL, /* 23 x17 */
+ SCM_CH_CONTROL, /* 24 x18 */
+ SCM_CH_CONTROL, /* 25 x19 */
+ SCM_CH_CONTROL, /* 26 x1a */
+ SCM_CH_CONTROL, /* 27 esc */
+ SCM_CH_CONTROL, /* 28 x1c */
+ SCM_CH_CONTROL, /* 29 x1d */
+ SCM_CH_CONTROL, /* 30 x1e */
+ SCM_CH_CONTROL, /* 31 x1f */
+ SCM_CH_WHITESPACE, /* 32 space */
+ SCM_CH_SPECIAL_INITIAL, /* 33 ! */
+ SCM_CH_TOKEN_INITIAL, /* 34 " */
+ SCM_CH_TOKEN_INITIAL, /* 35 # */
+ SCM_CH_SPECIAL_INITIAL, /* 36 $ */
+ SCM_CH_SPECIAL_INITIAL, /* 37 % */
+ SCM_CH_SPECIAL_INITIAL, /* 38 & */
+ SCM_CH_TOKEN_INITIAL, /* 39 ' */
+ SCM_CH_TOKEN_INITIAL, /* 40 ( */
+ SCM_CH_TOKEN_INITIAL, /* 41 ) */
+ SCM_CH_SPECIAL_INITIAL, /* 42 * */
+ SCM_CH_SPECIAL_SUBSEQUENT, /* 43 + */
+ SCM_CH_TOKEN_INITIAL, /* 44 , */
+ SCM_CH_SPECIAL_SUBSEQUENT, /* 45 - */
+ SCM_CH_SPECIAL_SUBSEQUENT | SCM_CH_TOKEN_INITIAL, /* 46 . */
+ SCM_CH_SPECIAL_INITIAL, /* 47 / */
+ SCM_CH_DIGIT, /* 48 0 */
+ SCM_CH_DIGIT, /* 49 1 */
+ SCM_CH_DIGIT, /* 50 2 */
+ SCM_CH_DIGIT, /* 51 3 */
+ SCM_CH_DIGIT, /* 52 4 */
+ SCM_CH_DIGIT, /* 53 5 */
+ SCM_CH_DIGIT, /* 54 6 */
+ SCM_CH_DIGIT, /* 55 7 */
+ SCM_CH_DIGIT, /* 56 8 */
+ SCM_CH_DIGIT, /* 57 9 */
+ SCM_CH_SPECIAL_INITIAL, /* 58 : */
+ SCM_CH_TOKEN_INITIAL, /* 59 ; */
+ SCM_CH_SPECIAL_INITIAL, /* 60 < */
+ SCM_CH_SPECIAL_INITIAL, /* 61 = */
+ SCM_CH_SPECIAL_INITIAL, /* 62 > */
+ SCM_CH_SPECIAL_INITIAL, /* 63 ? */
+ SCM_CH_SPECIAL_SUBSEQUENT, /* 64 @ */
+ SCM_CH_HEX_LETTER, /* 65 A */
+ SCM_CH_HEX_LETTER, /* 66 B */
+ SCM_CH_HEX_LETTER, /* 67 C */
+ SCM_CH_HEX_LETTER, /* 68 D */
+ SCM_CH_HEX_LETTER, /* 69 E */
+ SCM_CH_HEX_LETTER, /* 70 F */
+ SCM_CH_NONHEX_LETTER, /* 71 G */
+ SCM_CH_NONHEX_LETTER, /* 72 H */
+ SCM_CH_NONHEX_LETTER, /* 73 I */
+ SCM_CH_NONHEX_LETTER, /* 74 J */
+ SCM_CH_NONHEX_LETTER, /* 75 K */
+ SCM_CH_NONHEX_LETTER, /* 76 L */
+ SCM_CH_NONHEX_LETTER, /* 77 M */
+ SCM_CH_NONHEX_LETTER, /* 78 N */
+ SCM_CH_NONHEX_LETTER, /* 79 O */
+ SCM_CH_NONHEX_LETTER, /* 80 P */
+ SCM_CH_NONHEX_LETTER, /* 81 Q */
+ SCM_CH_NONHEX_LETTER, /* 82 R */
+ SCM_CH_NONHEX_LETTER, /* 83 S */
+ SCM_CH_NONHEX_LETTER, /* 84 T */
+ SCM_CH_NONHEX_LETTER, /* 85 U */
+ SCM_CH_NONHEX_LETTER, /* 86 V */
+ SCM_CH_NONHEX_LETTER, /* 87 W */
+ SCM_CH_NONHEX_LETTER, /* 88 X */
+ SCM_CH_NONHEX_LETTER, /* 89 Y */
+ SCM_CH_NONHEX_LETTER, /* 90 Z */
+ SCM_CH_TOKEN_INITIAL, /* 91 [ */
+ SCM_CH_CONTROL, /* 92 \\ */
+ SCM_CH_TOKEN_INITIAL, /* 93 ] */
+ SCM_CH_SPECIAL_INITIAL, /* 94 ^ */
+ SCM_CH_SPECIAL_INITIAL, /* 95 _ */
+ SCM_CH_TOKEN_INITIAL, /* 96 ` */
+ SCM_CH_HEX_LETTER, /* 97 a */
+ SCM_CH_HEX_LETTER, /* 98 b */
+ SCM_CH_HEX_LETTER, /* 99 c */
+ SCM_CH_HEX_LETTER, /* 100 d */
+ SCM_CH_HEX_LETTER, /* 101 e */
+ SCM_CH_HEX_LETTER, /* 102 f */
+ SCM_CH_NONHEX_LETTER, /* 103 g */
+ SCM_CH_NONHEX_LETTER, /* 104 h */
+ SCM_CH_NONHEX_LETTER, /* 105 i */
+ SCM_CH_NONHEX_LETTER, /* 106 j */
+ SCM_CH_NONHEX_LETTER, /* 107 k */
+ SCM_CH_NONHEX_LETTER, /* 108 l */
+ SCM_CH_NONHEX_LETTER, /* 109 m */
+ SCM_CH_NONHEX_LETTER, /* 110 n */
+ SCM_CH_NONHEX_LETTER, /* 111 o */
+ SCM_CH_NONHEX_LETTER, /* 112 p */
+ SCM_CH_NONHEX_LETTER, /* 113 q */
+ SCM_CH_NONHEX_LETTER, /* 114 r */
+ SCM_CH_NONHEX_LETTER, /* 115 s */
+ SCM_CH_NONHEX_LETTER, /* 116 t */
+ SCM_CH_NONHEX_LETTER, /* 117 u */
+ SCM_CH_NONHEX_LETTER, /* 118 v */
+ SCM_CH_NONHEX_LETTER, /* 119 w */
+ SCM_CH_NONHEX_LETTER, /* 120 x */
+ SCM_CH_NONHEX_LETTER, /* 121 y */
+ SCM_CH_NONHEX_LETTER, /* 122 z */
+ SCM_CH_TOKEN_INITIAL, /* 123 { */
+ SCM_CH_TOKEN_INITIAL, /* 124 | */
+ SCM_CH_TOKEN_INITIAL, /* 125 } */
+ SCM_CH_SPECIAL_INITIAL, /* 126 ~ */
+ SCM_CH_CONTROL, /* 127 delete */
+};
+
/*=======================================
File Local Function Declarations
=======================================*/
@@ -198,7 +403,7 @@
val = SCM_CHAR_VALUE(ch);
- return MAKE_BOOL(isascii(val) && isalpha(val));
+ return MAKE_BOOL(ICHAR_ALPHABETICP(val));
}
ScmObj
@@ -211,7 +416,7 @@
val = SCM_CHAR_VALUE(ch);
- return MAKE_BOOL(isascii(val) && isdigit(val));
+ return MAKE_BOOL(ICHAR_NUMERICP(val));
}
ScmObj
@@ -224,7 +429,7 @@
val = SCM_CHAR_VALUE(ch);
- return MAKE_BOOL(isascii(val) && isspace(val));
+ return MAKE_BOOL(ICHAR_WHITESPACEP(val));
}
ScmObj
@@ -237,7 +442,7 @@
val = SCM_CHAR_VALUE(ch);
- return MAKE_BOOL(isascii(val) && isupper(val));
+ return MAKE_BOOL(ICHAR_UPPER_CASEP(val));
}
ScmObj
@@ -250,7 +455,7 @@
val = SCM_CHAR_VALUE(ch);
- return MAKE_BOOL(isascii(val) && islower(val));
+ return MAKE_BOOL(ICHAR_LOWER_CASEP(val));
}
ScmObj
@@ -275,7 +480,7 @@
#if SCM_USE_MULTIBYTE_CHAR
if (!SCM_CHARCODEC_CHAR_LEN(scm_current_char_codec, val))
#else
- if (!isascii(val))
+ if (!ICHAR_ASCIIP(val))
#endif
ERR_OBJ("invalid char value", n);
Modified: branches/r5rs/sigscheme/src/sigschemeinternal.h
===================================================================
--- branches/r5rs/sigscheme/src/sigschemeinternal.h 2006-02-02 11:30:13 UTC (rev 3078)
+++ branches/r5rs/sigscheme/src/sigschemeinternal.h 2006-02-02 14:02:00 UTC (rev 3079)
@@ -39,7 +39,6 @@
=======================================*/
#include <stddef.h>
#include <string.h>
-#include <ctype.h> /* for char macros */
/*=======================================
Local Include
@@ -428,10 +427,50 @@
/*=======================================
Characters
=======================================*/
+enum ScmCharClass {
+ /* ASCII */
+ SCM_CH_INVALID = 0,
+ SCM_CH_CONTROL = 1 << 0, /* iscntrl(3) + backslash */
+ SCM_CH_WHITESPACE = 1 << 1, /* [ \t\n\r\v\f] */
+ SCM_CH_DIGIT = 1 << 2, /* [0-9] */
+ SCM_CH_HEX_LETTER = 1 << 3, /* [a-fA-F] */
+ SCM_CH_NONHEX_LETTER = 1 << 4, /* [g-zG-Z] */
+ SCM_CH_SPECIAL_INITIAL = 1 << 5, /* [!$%&*\/:<=>?^_~] */
+ SCM_CH_SPECIAL_SUBSEQUENT = 1 << 6, /* [-+\.@] */
+ SCM_CH_TOKEN_INITIAL = 1 << 7, /* [()#'`,\.\"\|\{\}\[\]] */
+
+ SCM_CH_LETTER = SCM_CH_HEX_LETTER | SCM_CH_NONHEX_LETTER,
+ SCM_CH_HEX_DIGIT = SCM_CH_DIGIT | SCM_CH_HEX_LETTER,
+ SCM_CH_INITIAL = SCM_CH_LETTER | SCM_CH_SPECIAL_INITIAL,
+ SCM_CH_SUBSEQUENT = SCM_CH_INITIAL | SCM_CH_DIGIT,
+ SCM_CH_PECULIAR_IDENTIFIER_CAND = SCM_CH_SPECIAL_SUBSEQUENT,
+
+ /* beyond ASCII */
+ SCM_CH_ASCII = 0 << 8,
+ SCM_CH_8BIT = 1 << 8,
+ SCM_CH_MULTIBYTE = 1 << 9,
+
+ SCM_CH_NONASCII = SCM_CH_8BIT | SCM_CH_MULTIBYTE
+};
+
+extern const unsigned char scm_char_class_table[];
+
+#define ICHAR_ASCIIP(c) (SCM_ASSERT(0 <= (c)), (c) <= 127)
+#define ICHAR_ASCII_CLASS(c) \
+ (ICHAR_ASCIIP(c) ? scm_char_class_table[c] : SCM_CH_INVALID)
+#define ICHAR_CLASS(c) \
+ (ICHAR_ASCIIP(c) ? scm_char_class_table[c] : SCM_CH_NONASCII)
+
+#define ICHAR_ALPHABETICP(c) (ICHAR_UPPER_CASEP(c) || ICHAR_LOWER_CASEP(c))
+#define ICHAR_NUMERICP(c) ('0' <= (c) && (c) <= '9')
+#define ICHAR_WHITESPACEP(c) ((c) == ' ' || ('\t' <= (c) && (c) <= '\r'))
+#define ICHAR_UPPER_CASEP(c) ('A' <= (c) && (c) <= 'Z')
+#define ICHAR_LOWER_CASEP(c) ('a' <= (c) && (c) <= 'z')
+
/*
- * SigScheme's case-insensitive comparison conforms to the foldcase'ed
- * comparison described in SRFI-75 and SRFI-13, although R5RS does not specify
- * comparison between alphabetic and non-alphabetic char.
+ * SigScheme's case-insensitive character comparison conforms to the
+ * foldcase'ed comparison described in SRFI-75 and SRFI-13, although R5RS does
+ * not define comparison between alphabetic and non-alphabetic char.
*
* This specification is needed to produce natural result on sort functions
* with these case-insensitive predicates as comparator.
@@ -447,8 +486,8 @@
* - "Case mapping and case-folding" and "Comparison" section of SRFI-13
*/
/* FIXME: support SRFI-75 */
-#define ICHAR_DOWNCASE(c) ((isascii((int)(c))) ? tolower((int)(c)) : (c))
-#define ICHAR_UPCASE(c) ((isascii((int)(c))) ? toupper((int)(c)) : (c))
+#define ICHAR_DOWNCASE(c) (ICHAR_UPPER_CASEP(c) ? (c) + ('a' - 'A') : (c))
+#define ICHAR_UPCASE(c) (ICHAR_LOWER_CASEP(c) ? (c) - ('a' - 'A') : (c))
/* foldcase for case-insensitive character comparison is done by downcase as
* described in SRFI-75. Although SRFI-13 expects (char-downcase (char-upcase
* c)), this implementation is sufficient for ASCII range. */
More information about the uim-commit
mailing list