[uim-commit] r2879 - branches/r5rs/sigscheme

yamaken at freedesktop.org yamaken at freedesktop.org
Tue Jan 10 01:19:28 PST 2006


Author: yamaken
Date: 2006-01-10 01:19:24 -0800 (Tue, 10 Jan 2006)
New Revision: 2879

Modified:
   branches/r5rs/sigscheme/config.h
   branches/r5rs/sigscheme/encoding.c
   branches/r5rs/sigscheme/encoding.h
   branches/r5rs/sigscheme/operations-srfi6.c
   branches/r5rs/sigscheme/operations.c
   branches/r5rs/sigscheme/read.c
   branches/r5rs/sigscheme/sigscheme.c
   branches/r5rs/sigscheme/sigscheme.h
   branches/r5rs/sigscheme/storage.c
   branches/r5rs/sigscheme/strport.c
   branches/r5rs/sigscheme/strport.h
Log:
* sigscheme/encoding.h
  - (scm_mb_strlen, scm_mb_bare_c_strlen, scm_mb_substring,
    scm_mb_strref): Add new arg 'codec'
* sigscheme/encoding.c
  - (scm_mb_strlen, scm_mb_bare_c_strlen, scm_mb_substring): Add new
    arg 'codec'
* sigscheme/config.h
  - (SCM_USE_NULL_CAPABLE_STRING): New macro
* sigscheme/sigscheme.h
  - (SCM_ERRMSG_NULL_IN_STRING): New macro
* sigscheme/sigscheme.c
  - (scm_initialize_internal): Provide "null-capable-string" when
    SCM_USE_NULL_CAPABLE_STRING
* sigscheme/read.c
  - (read_string): Reject null character when
    !SCM_USE_NULL_CAPABLE_STRING
* sigscheme/storage.c
  - (scm_make_string_internal): Follow the API change of encoding.h
* sigscheme/strport.h
  - (ScmOutputStrPort_c_strlen): New function decl
* sigscheme/strport.c
  - (ScmOutputStrPort_c_strlen): New function
* sigscheme/operations-srfi6.c
  - (scm_p_srfi6_get_output_string):
    * Fix character codec selection
    * Add partial support for SCM_USE_NULL_CAPABLE_STRING
* sigscheme/operations.c
  - (scm_p_make_string, scm_p_list2string):
    * Make efficient
    * Support SCM_USE_NULL_CAPABLE_STRING
  - (scm_p_string_length, scm_p_string_ref): Follow the API change of
    encoding.h
  - (scm_p_string_setd):
    * Ditto
    * Add partial support for SCM_USE_NULL_CAPABLE_STRING
  - (scm_p_string_append, scm_p_string_copy): Add partial support for
    SCM_USE_NULL_CAPABLE_STRING
  - (scm_p_string2list): Support SCM_USE_NULL_CAPABLE_STRING


Modified: branches/r5rs/sigscheme/config.h
===================================================================
--- branches/r5rs/sigscheme/config.h	2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/config.h	2006-01-10 09:19:24 UTC (rev 2879)
@@ -82,6 +82,7 @@
 #define SCM_ACCESSOR_ASSERT     0  /* enable strict type check with accessor */
 #define SCM_USE_VALUECONS       1  /* use experimental values passing */
 #define SCM_VOLATILE_OUTPUT     0  /* always flush files on write */
+#define SCM_USE_NULL_CAPABLE_STRING 1  /* accept null character in a middle of a string */
 #define SCM_OBJ_COMPACT         0  /* object representation compaction (experimental) */
 
 #define SCM_GCC4_READY_GC       1  /* use experimental gcc4-ready stack protection */

Modified: branches/r5rs/sigscheme/encoding.c
===================================================================
--- branches/r5rs/sigscheme/encoding.c	2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/encoding.c	2006-01-10 09:19:24 UTC (rev 2879)
@@ -266,7 +266,7 @@
 =======================================*/
 
 int
-scm_mb_strlen(ScmMultibyteString mbs)
+scm_mb_strlen(ScmCharCodec *codec, ScmMultibyteString mbs)
 {
     int len;
     ScmMultibyteCharInfo c;
@@ -275,7 +275,7 @@
           SCM_MBS_GET_SIZE(mbs), SCM_MBS_GET_STR(mbs)));
 
     for (len = 0; SCM_MBS_GET_SIZE(mbs); len++) {
-        c = SCM_CHARCODEC_SCAN_CHAR(scm_current_char_codec, mbs);
+        c = SCM_CHARCODEC_SCAN_CHAR(codec, mbs);
         CDBG((SCM_DBG_ENCODING, "%d, %d;", SCM_MBCINFO_GET_SIZE(c), c.flag));
         SCM_MBS_SKIP_CHAR(mbs, c);
     }
@@ -286,16 +286,16 @@
 
 /* FIXME: pick a better name. */
 int
-scm_mb_bare_c_strlen(const char *s)
+scm_mb_bare_c_strlen(ScmCharCodec *codec, const char *s)
 {
     ScmMultibyteString mbs;
 
     SCM_MBS_INIT2(mbs, s, strlen(s));
-    return scm_mb_strlen(mbs);
+    return scm_mb_strlen(codec, mbs);
 }
 
 ScmMultibyteString
-scm_mb_substring(ScmMultibyteString mbs, int i, int len)
+scm_mb_substring(ScmCharCodec *codec, ScmMultibyteString mbs, int i, int len)
 {
     ScmMultibyteString ret, end;
     ScmMultibyteCharInfo c;
@@ -303,14 +303,14 @@
     ret = mbs;
 
     while (i--) {
-        c = SCM_CHARCODEC_SCAN_CHAR(scm_current_char_codec, ret);
+        c = SCM_CHARCODEC_SCAN_CHAR(codec, ret);
         SCM_MBS_SKIP_CHAR(ret, c);
     }
 
     end = ret;
 
     while (len--) {
-        c = SCM_CHARCODEC_SCAN_CHAR(scm_current_char_codec, end);
+        c = SCM_CHARCODEC_SCAN_CHAR(codec, end);
         SCM_MBS_SKIP_CHAR(end, c);
     }
 

Modified: branches/r5rs/sigscheme/encoding.h
===================================================================
--- branches/r5rs/sigscheme/encoding.h	2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/encoding.h	2006-01-10 09:19:24 UTC (rev 2879)
@@ -200,10 +200,11 @@
 /*=======================================
    Function Declarations
 =======================================*/
-int scm_mb_strlen(ScmMultibyteString mbs);
-int scm_mb_bare_c_strlen(const char *str);
-ScmMultibyteString scm_mb_substring(ScmMultibyteString str, int i, int len);
-#define scm_mb_strref(str, i) (scm_mb_substring((str), (i), 1))
+int scm_mb_strlen(ScmCharCodec *codec, ScmMultibyteString mbs);
+int scm_mb_bare_c_strlen(ScmCharCodec *codec, const char *str);
+ScmMultibyteString scm_mb_substring(ScmCharCodec *codec,
+                                    ScmMultibyteString str, int i, int len);
+#define scm_mb_strref(codec, str, i) (scm_mb_substring((codec), (str), (i), 1))
 ScmCharCodec *scm_mb_find_codec(const char *encoding);
 int scm_charcodec_read_char(ScmCharCodec *codec, ScmMultibyteString *mbs,
                             const char *caller);

Modified: branches/r5rs/sigscheme/operations-srfi6.c
===================================================================
--- branches/r5rs/sigscheme/operations-srfi6.c	2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/operations-srfi6.c	2006-01-10 09:19:24 UTC (rev 2879)
@@ -35,6 +35,7 @@
 /*=======================================
   System Include
 =======================================*/
+#include <stdlib.h>
 
 /*=======================================
   Local Include
@@ -111,6 +112,12 @@
 scm_p_srfi6_get_output_string(ScmObj port)
 {
     ScmBaseCharPort *cport;
+    const char *str;
+    char *new_str;
+    int mb_len;
+#if SCM_USE_NULL_CAPABLE_STRING
+    size_t size;
+#endif
     DECLARE_FUNCTION("get-output-string", procedure_fixed_1);
 
     ENSURE_PORT(port);
@@ -118,8 +125,18 @@
     SCM_ENSURE_LIVE_PORT(port);
     cport = SCM_CHARPORT_DYNAMIC_CAST(ScmBaseCharPort, SCM_PORT_IMPL(port));
 
-    return MAKE_STRING_COPYING(ScmOutputStrPort_str(cport->bport),
-                               STRLEN_UNKNOWN);
+    str = ScmOutputStrPort_str(cport->bport);
+    /* FIXME: incorrect length for null-capable string */
+    mb_len = scm_mb_bare_c_strlen(scm_port_codec(port), str);
+#if SCM_USE_NULL_CAPABLE_STRING
+    size = ScmOutputStrPort_c_strlen(cport->bport) + sizeof("");
+    new_str = scm_malloc(size);
+    memcpy(new_str, str, size);
+#else
+    new_str = strdup(str);
+#endif
+
+    return MAKE_STRING(new_str, mb_len);
 }
 
 

Modified: branches/r5rs/sigscheme/operations.c
===================================================================
--- branches/r5rs/sigscheme/operations.c	2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/operations.c	2006-01-10 09:19:24 UTC (rev 2879)
@@ -1241,10 +1241,14 @@
 ScmObj
 scm_p_make_string(ScmObj length, ScmObj args)
 {
-    int filler_val, len, i;
-    ScmObj filler, sport;
+    ScmObj filler;
+    int filler_val, len, ch_len;
+    char *str, *dst;
+    const char *next;
+    char ch_str[SCM_MB_MAX_LEN + sizeof("")];
     DECLARE_FUNCTION("make-string", procedure_variadic_1);
 
+    ENSURE_STATELESS_CODEC(scm_current_char_codec);
     ENSURE_INT(length);
     len = SCM_INT_VALUE(length);
     if (len == 0)
@@ -1255,21 +1259,31 @@
     /* extract filler */
     if (NULLP(args)) {
         filler_val = ' ';
+        ch_len = sizeof((char)' ');
     } else {
         filler = POP(args);
         ASSERT_NO_MORE_ARG(args);
         ENSURE_CHAR(filler);
         filler_val = SCM_CHAR_VALUE(filler);
+        ch_len = SCM_CHARCODEC_CHAR_LEN(scm_current_char_codec, filler_val);
     }
+#if !SCM_USE_NULL_CAPABLE_STRING
+    if (filler_val == '\0')
+        ERR("make-string: " SCM_ERRMSG_NULL_IN_STRING);
+#endif
 
-    /* TODO: make efficient */
-    /* fill string (multibyte-ready) */
-    sport = scm_p_srfi6_open_output_string();
-    for (i = 0; i < len; i++) {
-        scm_port_put_char(sport, filler_val);
-    }
+    next = SCM_CHARCODEC_INT2STR(scm_current_char_codec, ch_str, filler_val,
+                                 SCM_MB_STATELESS);
+    if (!next)
+        ERR("make-string: invalid char 0x%x for encoding %s",
+            filler_val, SCM_CHARCODEC_ENCODING(scm_current_char_codec));
 
-    return scm_p_srfi6_get_output_string(sport);
+    str = scm_malloc(ch_len * len + sizeof(""));
+    for (dst = str; dst < &str[ch_len * len]; dst += ch_len)
+        memcpy(dst, ch_str, ch_len);
+    *dst = '\0';
+
+    return MAKE_STRING(str, len);
 }
 
 ScmObj
@@ -1288,7 +1302,7 @@
 
     ENSURE_STRING(str);
 
-    len = scm_mb_bare_c_strlen(SCM_STRING_STR(str));
+    len = scm_mb_bare_c_strlen(scm_current_char_codec, SCM_STRING_STR(str));
 
     return MAKE_INT(len);
 }
@@ -1308,7 +1322,7 @@
         ERR_OBJ("index out of range", k);
 
     SCM_MBS_INIT2(mbs, SCM_STRING_STR(str), strlen(SCM_STRING_STR(str)));
-    mbs = scm_mb_strref(mbs, idx);
+    mbs = scm_mb_strref(scm_current_char_codec, mbs, idx);
 
     ch = SCM_CHARCODEC_STR2INT(scm_current_char_codec, SCM_MBS_GET_STR(mbs),
                                SCM_MBS_GET_SIZE(mbs), SCM_MBS_GET_STATE(mbs));
@@ -1342,7 +1356,7 @@
 
     /* point at the char that to be replaced */
     SCM_MBS_INIT2(mbs_ch, c_str, strlen(c_str));
-    mbs_ch = scm_mb_strref(mbs_ch, idx);
+    mbs_ch = scm_mb_strref(scm_current_char_codec, mbs_ch, idx);
     orig_ch_len = SCM_MBS_GET_SIZE(mbs_ch);
     prefix_len = SCM_MBS_GET_STR(mbs_ch) - c_str;
 
@@ -1417,14 +1431,20 @@
     /* substring */
     c_str = SCM_STRING_STR(str);
     SCM_MBS_INIT2(mbs, c_str, strlen(c_str));
-    mbs = scm_mb_substring(mbs, c_start, c_end - c_start);
+    mbs = scm_mb_substring(scm_current_char_codec,
+                           mbs, c_start, c_end - c_start);
 
     /* copy the substring */
     new_str = scm_malloc(SCM_MBS_GET_SIZE(mbs) + sizeof(""));
     memcpy(new_str, SCM_MBS_GET_STR(mbs), SCM_MBS_GET_SIZE(mbs));
     new_str[SCM_MBS_GET_SIZE(mbs)] = '\0';
 
+#if SCM_USE_NULL_CAPABLE_STRING
+    /* FIXME: the result is truncated at null and incorrect */
+    return MAKE_STRING(new_str, STRLEN_UNKNOWN);
+#else
     return MAKE_STRING(new_str, c_end - c_start);
+#endif
 }
 
 /* FIXME: support stateful encoding */
@@ -1459,7 +1479,12 @@
     }
     *dst = '\0';
 
+#if SCM_USE_NULL_CAPABLE_STRING
+    /* each string is chopped at first null and the result is incorrect */
+    return MAKE_STRING(new_str, STRLEN_UNKNOWN);
+#else
     return MAKE_STRING(new_str, mb_len);
+#endif
 }
 
 ScmObj
@@ -1467,18 +1492,31 @@
 {
     ScmQueue q;
     ScmObj res;
-    int ch;
+    int ch, mb_len;
+    const char *c_str;
     ScmMultibyteString mbs;
     DECLARE_FUNCTION("string->list", procedure_fixed_1);
 
     ENSURE_STRING(str);
 
-    SCM_MBS_INIT2(mbs, SCM_STRING_STR(str), strlen(SCM_STRING_STR(str)));
+    c_str = SCM_STRING_STR(str);
+    mb_len = SCM_STRING_LEN(str);
+    SCM_MBS_INIT2(mbs, c_str, strlen(c_str));
 
     res = SCM_NULL;
     SCM_QUEUE_POINT_TO(q, res);
-    while (SCM_MBS_GET_SIZE(mbs)) {
-        ch = SCM_CHARCODEC_READ_CHAR(scm_current_char_codec, mbs);
+    while (mb_len--) {
+        if (SCM_MBS_GET_SIZE(mbs)) {
+            ch = SCM_CHARCODEC_READ_CHAR(scm_current_char_codec, mbs);
+        } else {
+#if SCM_USE_NULL_CAPABLE_STRING
+            ch = '\0';
+            c_str = &SCM_MBS_GET_STR(mbs)[1];
+            SCM_MBS_INIT2(mbs, c_str, strlen(c_str));
+#else
+            break;
+#endif
+        }
         SCM_QUEUE_ADD(q, MAKE_CHAR(ch));
     }
 
@@ -1488,24 +1526,40 @@
 ScmObj
 scm_p_list2string(ScmObj lst)
 {
-    ScmObj rest, ch, sport;
+    ScmObj rest, ch;
+    size_t str_size;
+    int ch_val, len;
+    char *str, *dst;
     DECLARE_FUNCTION("list->string", procedure_fixed_1);
 
+    ENSURE_STATELESS_CODEC(scm_current_char_codec);
     ENSURE_LIST(lst);
 
     if (NULLP(lst))
         return MAKE_STRING_COPYING("", 0);
 
-    /* TODO: make efficient */
-    sport = scm_p_srfi6_open_output_string();
+    str_size = sizeof("");
     rest = lst;
+    len = 0;
     FOR_EACH (ch, rest) {
         ENSURE_CHAR(ch);
-        scm_port_put_char(sport, SCM_CHAR_VALUE(ch));
+        ch_val = SCM_CHAR_VALUE(ch);
+        str_size += SCM_CHARCODEC_CHAR_LEN(scm_current_char_codec, ch_val);
+        len++;
     }
     ENSURE_PROPER_LIST_TERMINATION(rest, lst);
 
-    return scm_p_srfi6_get_output_string(sport);
+    dst = str = scm_malloc(str_size);
+    FOR_EACH (ch, lst) {
+#if !SCM_USE_NULL_CAPABLE_STRING
+        if (ch == '\0')
+            ERR("list->string: " SCM_ERRMSG_NULL_IN_STRING);
+#endif
+        dst = SCM_CHARCODEC_INT2STR(scm_current_char_codec, dst,
+                                    SCM_CHAR_VALUE(ch), SCM_MB_STATELESS);
+    }
+
+    return MAKE_STRING(str, len);
 }
 
 ScmObj
@@ -1515,7 +1569,12 @@
 
     ENSURE_STRING(str);
 
+#if SCM_USE_NULL_CAPABLE_STRING
+    /* result is truncated at first null and incorrect */
+    return MAKE_STRING_COPYING(SCM_STRING_STR(str), STRLEN_UNKNOWN);
+#else
     return MAKE_STRING_COPYING(SCM_STRING_STR(str), SCM_STRING_LEN(str));
+#endif
 }
 
 ScmObj

Modified: branches/r5rs/sigscheme/read.c
===================================================================
--- branches/r5rs/sigscheme/read.c	2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/read.c	2006-01-10 09:19:24 UTC (rev 2879)
@@ -605,6 +605,10 @@
                 ERR("invalid char in string: 0x%x", c);
             break;
         }
+#if !SCM_USE_NULL_CAPABLE_STRING
+        if (c == '\0')
+            ERR(SCM_ERRMSG_NULL_IN_STRING);
+#endif
     }
     LBUF_END(lbuf)[-1] = '\0';
     ERR("too long string: \"%s\"", LBUF_BUF(lbuf));

Modified: branches/r5rs/sigscheme/sigscheme.c
===================================================================
--- branches/r5rs/sigscheme/sigscheme.c	2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/sigscheme.c	2006-01-10 09:19:24 UTC (rev 2879)
@@ -193,6 +193,9 @@
 #if SCM_COMPAT_SIOD_BUGS
     scm_provide(CONST_STRING("siod-bugs"));
 #endif
+#if SCM_USE_NULL_CAPABLE_STRING
+    scm_provide(CONST_STRING("null-capable-string"));
+#endif
     scm_initialized = scm_true;
 }
 

Modified: branches/r5rs/sigscheme/sigscheme.h
===================================================================
--- branches/r5rs/sigscheme/sigscheme.h	2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/sigscheme.h	2006-01-10 09:19:24 UTC (rev 2879)
@@ -60,6 +60,8 @@
 #define SCM_ERRMSG_MEMORY_EXHAUSTED    "memory exhausted"
 #define SCM_ERRMSG_IMPROPER_ARGS                                             \
     "proper list required for function call but got"
+#define SCM_ERRMSG_NULL_IN_STRING                                            \
+    "null character in a middle of string is not enabled"
 
 #ifdef __GNUC__
 #define SCM_NOINLINE __attribute__((noinline))

Modified: branches/r5rs/sigscheme/storage.c
===================================================================
--- branches/r5rs/sigscheme/storage.c	2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/storage.c	2006-01-10 09:19:24 UTC (rev 2879)
@@ -208,7 +208,7 @@
     SCM_ASSERT(str);
 
     if (len == STRLEN_UNKNOWN)
-        len = scm_mb_bare_c_strlen(str);
+        len = scm_mb_bare_c_strlen(scm_current_char_codec, str);
 
     obj = scm_alloc_cell();
     SCM_ENTYPE_STRING(obj);

Modified: branches/r5rs/sigscheme/strport.c
===================================================================
--- branches/r5rs/sigscheme/strport.c	2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/strport.c	2006-01-10 09:19:24 UTC (rev 2879)
@@ -311,6 +311,16 @@
     return (port->str) ? port->str : "";
 }
 
+size_t
+ScmOutputStrPort_c_strlen(ScmBytePort *bport)
+{
+    ScmOutputStrPort *port;
+
+    port = SCM_BYTEPORT_DYNAMIC_CAST(ScmOutputStrPort, bport);
+
+    return (port->buf_size) ? port->buf_size - sizeof("") : 0;
+}
+
 void **
 ScmOutputStrPort_ref_opaque(ScmBytePort *bport)
 {

Modified: branches/r5rs/sigscheme/strport.h
===================================================================
--- branches/r5rs/sigscheme/strport.h	2006-01-10 07:46:09 UTC (rev 2878)
+++ branches/r5rs/sigscheme/strport.h	2006-01-10 09:19:24 UTC (rev 2879)
@@ -82,6 +82,7 @@
 
 ScmBytePort *ScmOutputStrPort_new(ScmOutputStrPort_finalizer finalize);
 const char *ScmOutputStrPort_str(ScmBytePort *bport);
+size_t ScmOutputStrPort_c_strlen(ScmBytePort *bport);
 void **ScmOutputStrPort_ref_opaque(ScmBytePort *bport);
 
 



More information about the uim-commit mailing list