[uim-commit] r2879 - branches/r5rs/sigscheme
yamaken at freedesktop.org
yamaken at freedesktop.org
Tue Jan 10 01:19:28 PST 2006
Author: yamaken
Date: 2006-01-10 01:19:24 -0800 (Tue, 10 Jan 2006)
New Revision: 2879
Modified:
branches/r5rs/sigscheme/config.h
branches/r5rs/sigscheme/encoding.c
branches/r5rs/sigscheme/encoding.h
branches/r5rs/sigscheme/operations-srfi6.c
branches/r5rs/sigscheme/operations.c
branches/r5rs/sigscheme/read.c
branches/r5rs/sigscheme/sigscheme.c
branches/r5rs/sigscheme/sigscheme.h
branches/r5rs/sigscheme/storage.c
branches/r5rs/sigscheme/strport.c
branches/r5rs/sigscheme/strport.h
Log:
* sigscheme/encoding.h
- (scm_mb_strlen, scm_mb_bare_c_strlen, scm_mb_substring,
scm_mb_strref): Add new arg 'codec'
* sigscheme/encoding.c
- (scm_mb_strlen, scm_mb_bare_c_strlen, scm_mb_substring): Add new
arg 'codec'
* sigscheme/config.h
- (SCM_USE_NULL_CAPABLE_STRING): New macro
* sigscheme/sigscheme.h
- (SCM_ERRMSG_NULL_IN_STRING): New macro
* sigscheme/sigscheme.c
- (scm_initialize_internal): Provide "null-capable-string" when
SCM_USE_NULL_CAPABLE_STRING
* sigscheme/read.c
- (read_string): Reject null character when
!SCM_USE_NULL_CAPABLE_STRING
* sigscheme/storage.c
- (scm_make_string_internal): Follow the API change of encoding.h
* sigscheme/strport.h
- (ScmOutputStrPort_c_strlen): New function decl
* sigscheme/strport.c
- (ScmOutputStrPort_c_strlen): New function
* sigscheme/operations-srfi6.c
- (scm_p_srfi6_get_output_string):
* Fix character codec selection
* Add partial support for SCM_USE_NULL_CAPABLE_STRING
* sigscheme/operations.c
- (scm_p_make_string, scm_p_list2string):
* Make efficient
* Support SCM_USE_NULL_CAPABLE_STRING
- (scm_p_string_length, scm_p_string_ref): Follow the API change of
encoding.h
- (scm_p_string_setd):
* Ditto
* Add partial support for SCM_USE_NULL_CAPABLE_STRING
- (scm_p_string_append, scm_p_string_copy): Add partial support for
SCM_USE_NULL_CAPABLE_STRING
- (scm_p_string2list): Support SCM_USE_NULL_CAPABLE_STRING
Modified: branches/r5rs/sigscheme/config.h
===================================================================
--- branches/r5rs/sigscheme/config.h 2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/config.h 2006-01-10 09:19:24 UTC (rev 2879)
@@ -82,6 +82,7 @@
#define SCM_ACCESSOR_ASSERT 0 /* enable strict type check with accessor */
#define SCM_USE_VALUECONS 1 /* use experimental values passing */
#define SCM_VOLATILE_OUTPUT 0 /* always flush files on write */
+#define SCM_USE_NULL_CAPABLE_STRING 1 /* accept null character in a middle of a string */
#define SCM_OBJ_COMPACT 0 /* object representation compaction (experimental) */
#define SCM_GCC4_READY_GC 1 /* use experimental gcc4-ready stack protection */
Modified: branches/r5rs/sigscheme/encoding.c
===================================================================
--- branches/r5rs/sigscheme/encoding.c 2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/encoding.c 2006-01-10 09:19:24 UTC (rev 2879)
@@ -266,7 +266,7 @@
=======================================*/
int
-scm_mb_strlen(ScmMultibyteString mbs)
+scm_mb_strlen(ScmCharCodec *codec, ScmMultibyteString mbs)
{
int len;
ScmMultibyteCharInfo c;
@@ -275,7 +275,7 @@
SCM_MBS_GET_SIZE(mbs), SCM_MBS_GET_STR(mbs)));
for (len = 0; SCM_MBS_GET_SIZE(mbs); len++) {
- c = SCM_CHARCODEC_SCAN_CHAR(scm_current_char_codec, mbs);
+ c = SCM_CHARCODEC_SCAN_CHAR(codec, mbs);
CDBG((SCM_DBG_ENCODING, "%d, %d;", SCM_MBCINFO_GET_SIZE(c), c.flag));
SCM_MBS_SKIP_CHAR(mbs, c);
}
@@ -286,16 +286,16 @@
/* FIXME: pick a better name. */
int
-scm_mb_bare_c_strlen(const char *s)
+scm_mb_bare_c_strlen(ScmCharCodec *codec, const char *s)
{
ScmMultibyteString mbs;
SCM_MBS_INIT2(mbs, s, strlen(s));
- return scm_mb_strlen(mbs);
+ return scm_mb_strlen(codec, mbs);
}
ScmMultibyteString
-scm_mb_substring(ScmMultibyteString mbs, int i, int len)
+scm_mb_substring(ScmCharCodec *codec, ScmMultibyteString mbs, int i, int len)
{
ScmMultibyteString ret, end;
ScmMultibyteCharInfo c;
@@ -303,14 +303,14 @@
ret = mbs;
while (i--) {
- c = SCM_CHARCODEC_SCAN_CHAR(scm_current_char_codec, ret);
+ c = SCM_CHARCODEC_SCAN_CHAR(codec, ret);
SCM_MBS_SKIP_CHAR(ret, c);
}
end = ret;
while (len--) {
- c = SCM_CHARCODEC_SCAN_CHAR(scm_current_char_codec, end);
+ c = SCM_CHARCODEC_SCAN_CHAR(codec, end);
SCM_MBS_SKIP_CHAR(end, c);
}
Modified: branches/r5rs/sigscheme/encoding.h
===================================================================
--- branches/r5rs/sigscheme/encoding.h 2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/encoding.h 2006-01-10 09:19:24 UTC (rev 2879)
@@ -200,10 +200,11 @@
/*=======================================
Function Declarations
=======================================*/
-int scm_mb_strlen(ScmMultibyteString mbs);
-int scm_mb_bare_c_strlen(const char *str);
-ScmMultibyteString scm_mb_substring(ScmMultibyteString str, int i, int len);
-#define scm_mb_strref(str, i) (scm_mb_substring((str), (i), 1))
+int scm_mb_strlen(ScmCharCodec *codec, ScmMultibyteString mbs);
+int scm_mb_bare_c_strlen(ScmCharCodec *codec, const char *str);
+ScmMultibyteString scm_mb_substring(ScmCharCodec *codec,
+ ScmMultibyteString str, int i, int len);
+#define scm_mb_strref(codec, str, i) (scm_mb_substring((codec), (str), (i), 1))
ScmCharCodec *scm_mb_find_codec(const char *encoding);
int scm_charcodec_read_char(ScmCharCodec *codec, ScmMultibyteString *mbs,
const char *caller);
Modified: branches/r5rs/sigscheme/operations-srfi6.c
===================================================================
--- branches/r5rs/sigscheme/operations-srfi6.c 2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/operations-srfi6.c 2006-01-10 09:19:24 UTC (rev 2879)
@@ -35,6 +35,7 @@
/*=======================================
System Include
=======================================*/
+#include <stdlib.h>
/*=======================================
Local Include
@@ -111,6 +112,12 @@
scm_p_srfi6_get_output_string(ScmObj port)
{
ScmBaseCharPort *cport;
+ const char *str;
+ char *new_str;
+ int mb_len;
+#if SCM_USE_NULL_CAPABLE_STRING
+ size_t size;
+#endif
DECLARE_FUNCTION("get-output-string", procedure_fixed_1);
ENSURE_PORT(port);
@@ -118,8 +125,18 @@
SCM_ENSURE_LIVE_PORT(port);
cport = SCM_CHARPORT_DYNAMIC_CAST(ScmBaseCharPort, SCM_PORT_IMPL(port));
- return MAKE_STRING_COPYING(ScmOutputStrPort_str(cport->bport),
- STRLEN_UNKNOWN);
+ str = ScmOutputStrPort_str(cport->bport);
+ /* FIXME: incorrect length for null-capable string */
+ mb_len = scm_mb_bare_c_strlen(scm_port_codec(port), str);
+#if SCM_USE_NULL_CAPABLE_STRING
+ size = ScmOutputStrPort_c_strlen(cport->bport) + sizeof("");
+ new_str = scm_malloc(size);
+ memcpy(new_str, str, size);
+#else
+ new_str = strdup(str);
+#endif
+
+ return MAKE_STRING(new_str, mb_len);
}
Modified: branches/r5rs/sigscheme/operations.c
===================================================================
--- branches/r5rs/sigscheme/operations.c 2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/operations.c 2006-01-10 09:19:24 UTC (rev 2879)
@@ -1241,10 +1241,14 @@
ScmObj
scm_p_make_string(ScmObj length, ScmObj args)
{
- int filler_val, len, i;
- ScmObj filler, sport;
+ ScmObj filler;
+ int filler_val, len, ch_len;
+ char *str, *dst;
+ const char *next;
+ char ch_str[SCM_MB_MAX_LEN + sizeof("")];
DECLARE_FUNCTION("make-string", procedure_variadic_1);
+ ENSURE_STATELESS_CODEC(scm_current_char_codec);
ENSURE_INT(length);
len = SCM_INT_VALUE(length);
if (len == 0)
@@ -1255,21 +1259,31 @@
/* extract filler */
if (NULLP(args)) {
filler_val = ' ';
+ ch_len = sizeof((char)' ');
} else {
filler = POP(args);
ASSERT_NO_MORE_ARG(args);
ENSURE_CHAR(filler);
filler_val = SCM_CHAR_VALUE(filler);
+ ch_len = SCM_CHARCODEC_CHAR_LEN(scm_current_char_codec, filler_val);
}
+#if !SCM_USE_NULL_CAPABLE_STRING
+ if (filler_val == '\0')
+ ERR("make-string: " SCM_ERRMSG_NULL_IN_STRING);
+#endif
- /* TODO: make efficient */
- /* fill string (multibyte-ready) */
- sport = scm_p_srfi6_open_output_string();
- for (i = 0; i < len; i++) {
- scm_port_put_char(sport, filler_val);
- }
+ next = SCM_CHARCODEC_INT2STR(scm_current_char_codec, ch_str, filler_val,
+ SCM_MB_STATELESS);
+ if (!next)
+ ERR("make-string: invalid char 0x%x for encoding %s",
+ filler_val, SCM_CHARCODEC_ENCODING(scm_current_char_codec));
- return scm_p_srfi6_get_output_string(sport);
+ str = scm_malloc(ch_len * len + sizeof(""));
+ for (dst = str; dst < &str[ch_len * len]; dst += ch_len)
+ memcpy(dst, ch_str, ch_len);
+ *dst = '\0';
+
+ return MAKE_STRING(str, len);
}
ScmObj
@@ -1288,7 +1302,7 @@
ENSURE_STRING(str);
- len = scm_mb_bare_c_strlen(SCM_STRING_STR(str));
+ len = scm_mb_bare_c_strlen(scm_current_char_codec, SCM_STRING_STR(str));
return MAKE_INT(len);
}
@@ -1308,7 +1322,7 @@
ERR_OBJ("index out of range", k);
SCM_MBS_INIT2(mbs, SCM_STRING_STR(str), strlen(SCM_STRING_STR(str)));
- mbs = scm_mb_strref(mbs, idx);
+ mbs = scm_mb_strref(scm_current_char_codec, mbs, idx);
ch = SCM_CHARCODEC_STR2INT(scm_current_char_codec, SCM_MBS_GET_STR(mbs),
SCM_MBS_GET_SIZE(mbs), SCM_MBS_GET_STATE(mbs));
@@ -1342,7 +1356,7 @@
/* point at the char that to be replaced */
SCM_MBS_INIT2(mbs_ch, c_str, strlen(c_str));
- mbs_ch = scm_mb_strref(mbs_ch, idx);
+ mbs_ch = scm_mb_strref(scm_current_char_codec, mbs_ch, idx);
orig_ch_len = SCM_MBS_GET_SIZE(mbs_ch);
prefix_len = SCM_MBS_GET_STR(mbs_ch) - c_str;
@@ -1417,14 +1431,20 @@
/* substring */
c_str = SCM_STRING_STR(str);
SCM_MBS_INIT2(mbs, c_str, strlen(c_str));
- mbs = scm_mb_substring(mbs, c_start, c_end - c_start);
+ mbs = scm_mb_substring(scm_current_char_codec,
+ mbs, c_start, c_end - c_start);
/* copy the substring */
new_str = scm_malloc(SCM_MBS_GET_SIZE(mbs) + sizeof(""));
memcpy(new_str, SCM_MBS_GET_STR(mbs), SCM_MBS_GET_SIZE(mbs));
new_str[SCM_MBS_GET_SIZE(mbs)] = '\0';
+#if SCM_USE_NULL_CAPABLE_STRING
+ /* FIXME: the result is truncated at null and incorrect */
+ return MAKE_STRING(new_str, STRLEN_UNKNOWN);
+#else
return MAKE_STRING(new_str, c_end - c_start);
+#endif
}
/* FIXME: support stateful encoding */
@@ -1459,7 +1479,12 @@
}
*dst = '\0';
+#if SCM_USE_NULL_CAPABLE_STRING
+ /* each string is chopped at first null and the result is incorrect */
+ return MAKE_STRING(new_str, STRLEN_UNKNOWN);
+#else
return MAKE_STRING(new_str, mb_len);
+#endif
}
ScmObj
@@ -1467,18 +1492,31 @@
{
ScmQueue q;
ScmObj res;
- int ch;
+ int ch, mb_len;
+ const char *c_str;
ScmMultibyteString mbs;
DECLARE_FUNCTION("string->list", procedure_fixed_1);
ENSURE_STRING(str);
- SCM_MBS_INIT2(mbs, SCM_STRING_STR(str), strlen(SCM_STRING_STR(str)));
+ c_str = SCM_STRING_STR(str);
+ mb_len = SCM_STRING_LEN(str);
+ SCM_MBS_INIT2(mbs, c_str, strlen(c_str));
res = SCM_NULL;
SCM_QUEUE_POINT_TO(q, res);
- while (SCM_MBS_GET_SIZE(mbs)) {
- ch = SCM_CHARCODEC_READ_CHAR(scm_current_char_codec, mbs);
+ while (mb_len--) {
+ if (SCM_MBS_GET_SIZE(mbs)) {
+ ch = SCM_CHARCODEC_READ_CHAR(scm_current_char_codec, mbs);
+ } else {
+#if SCM_USE_NULL_CAPABLE_STRING
+ ch = '\0';
+ c_str = &SCM_MBS_GET_STR(mbs)[1];
+ SCM_MBS_INIT2(mbs, c_str, strlen(c_str));
+#else
+ break;
+#endif
+ }
SCM_QUEUE_ADD(q, MAKE_CHAR(ch));
}
@@ -1488,24 +1526,40 @@
ScmObj
scm_p_list2string(ScmObj lst)
{
- ScmObj rest, ch, sport;
+ ScmObj rest, ch;
+ size_t str_size;
+ int ch_val, len;
+ char *str, *dst;
DECLARE_FUNCTION("list->string", procedure_fixed_1);
+ ENSURE_STATELESS_CODEC(scm_current_char_codec);
ENSURE_LIST(lst);
if (NULLP(lst))
return MAKE_STRING_COPYING("", 0);
- /* TODO: make efficient */
- sport = scm_p_srfi6_open_output_string();
+ str_size = sizeof("");
rest = lst;
+ len = 0;
FOR_EACH (ch, rest) {
ENSURE_CHAR(ch);
- scm_port_put_char(sport, SCM_CHAR_VALUE(ch));
+ ch_val = SCM_CHAR_VALUE(ch);
+ str_size += SCM_CHARCODEC_CHAR_LEN(scm_current_char_codec, ch_val);
+ len++;
}
ENSURE_PROPER_LIST_TERMINATION(rest, lst);
- return scm_p_srfi6_get_output_string(sport);
+ dst = str = scm_malloc(str_size);
+ FOR_EACH (ch, lst) {
+#if !SCM_USE_NULL_CAPABLE_STRING
+ if (ch == '\0')
+ ERR("list->string: " SCM_ERRMSG_NULL_IN_STRING);
+#endif
+ dst = SCM_CHARCODEC_INT2STR(scm_current_char_codec, dst,
+ SCM_CHAR_VALUE(ch), SCM_MB_STATELESS);
+ }
+
+ return MAKE_STRING(str, len);
}
ScmObj
@@ -1515,7 +1569,12 @@
ENSURE_STRING(str);
+#if SCM_USE_NULL_CAPABLE_STRING
+ /* result is truncated at first null and incorrect */
+ return MAKE_STRING_COPYING(SCM_STRING_STR(str), STRLEN_UNKNOWN);
+#else
return MAKE_STRING_COPYING(SCM_STRING_STR(str), SCM_STRING_LEN(str));
+#endif
}
ScmObj
Modified: branches/r5rs/sigscheme/read.c
===================================================================
--- branches/r5rs/sigscheme/read.c 2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/read.c 2006-01-10 09:19:24 UTC (rev 2879)
@@ -605,6 +605,10 @@
ERR("invalid char in string: 0x%x", c);
break;
}
+#if !SCM_USE_NULL_CAPABLE_STRING
+ if (c == '\0')
+ ERR(SCM_ERRMSG_NULL_IN_STRING);
+#endif
}
LBUF_END(lbuf)[-1] = '\0';
ERR("too long string: \"%s\"", LBUF_BUF(lbuf));
Modified: branches/r5rs/sigscheme/sigscheme.c
===================================================================
--- branches/r5rs/sigscheme/sigscheme.c 2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/sigscheme.c 2006-01-10 09:19:24 UTC (rev 2879)
@@ -193,6 +193,9 @@
#if SCM_COMPAT_SIOD_BUGS
scm_provide(CONST_STRING("siod-bugs"));
#endif
+#if SCM_USE_NULL_CAPABLE_STRING
+ scm_provide(CONST_STRING("null-capable-string"));
+#endif
scm_initialized = scm_true;
}
Modified: branches/r5rs/sigscheme/sigscheme.h
===================================================================
--- branches/r5rs/sigscheme/sigscheme.h 2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/sigscheme.h 2006-01-10 09:19:24 UTC (rev 2879)
@@ -60,6 +60,8 @@
#define SCM_ERRMSG_MEMORY_EXHAUSTED "memory exhausted"
#define SCM_ERRMSG_IMPROPER_ARGS \
"proper list required for function call but got"
+#define SCM_ERRMSG_NULL_IN_STRING \
+ "null character in a middle of string is not enabled"
#ifdef __GNUC__
#define SCM_NOINLINE __attribute__((noinline))
Modified: branches/r5rs/sigscheme/storage.c
===================================================================
--- branches/r5rs/sigscheme/storage.c 2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/storage.c 2006-01-10 09:19:24 UTC (rev 2879)
@@ -208,7 +208,7 @@
SCM_ASSERT(str);
if (len == STRLEN_UNKNOWN)
- len = scm_mb_bare_c_strlen(str);
+ len = scm_mb_bare_c_strlen(scm_current_char_codec, str);
obj = scm_alloc_cell();
SCM_ENTYPE_STRING(obj);
Modified: branches/r5rs/sigscheme/strport.c
===================================================================
--- branches/r5rs/sigscheme/strport.c 2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/strport.c 2006-01-10 09:19:24 UTC (rev 2879)
@@ -311,6 +311,16 @@
return (port->str) ? port->str : "";
}
+size_t
+ScmOutputStrPort_c_strlen(ScmBytePort *bport)
+{
+ ScmOutputStrPort *port;
+
+ port = SCM_BYTEPORT_DYNAMIC_CAST(ScmOutputStrPort, bport);
+
+ return (port->buf_size) ? port->buf_size - sizeof("") : 0;
+}
+
void **
ScmOutputStrPort_ref_opaque(ScmBytePort *bport)
{
Modified: branches/r5rs/sigscheme/strport.h
===================================================================
--- branches/r5rs/sigscheme/strport.h 2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/strport.h 2006-01-10 09:19:24 UTC (rev 2879)
@@ -82,6 +82,7 @@
ScmBytePort *ScmOutputStrPort_new(ScmOutputStrPort_finalizer finalize);
const char *ScmOutputStrPort_str(ScmBytePort *bport);
+size_t ScmOutputStrPort_c_strlen(ScmBytePort *bport);
void **ScmOutputStrPort_ref_opaque(ScmBytePort *bport);
More information about the uim-commit
mailing list