[Libburn] [PATCH] name mangling/sorting

Joe Neeman neeman at webone.com.au
Sun Jul 31 14:39:34 EST 2005


Previous versions of libisofs didn't correctly perform directory sorting 
and name mangling. For example, writing a level 1 iso filesystem with 
two files, "morethan8chars.txt" and "morethan8chars2.txt" would cause 
"MORETHAN.TXT" to appear twice in the same directory. Also the files 
"a.txt" and "A.TXU" would appear in the wrong order.

This patch fixes those problems. Now the only case (that I know of) 
which will cause an invalid .iso is if a directory contains more than 
10^8 files that start with the same 8 letters.

This patch applies on top of the structpatch that I sent earlier.
Joe
-------------- next part --------------
diff -ru configure.ac configure.ac
--- configure.ac	2005-07-31 14:31:22.000000000 +1000
+++ configure.ac	2005-07-24 11:16:54.000000000 +1000
@@ -52,6 +52,7 @@
 AM_PROG_CC_C_O
 AC_C_CONST
 AC_C_INLINE
+AC_C_BIGENDIAN
 
 AC_PROG_LIBTOOL
 AC_SUBST(LIBTOOL_DEPS)
@@ -63,8 +64,6 @@
 
 THREAD_LIBS=-lpthread
 AC_SUBST(THREAD_LIBS)
-    
-TARGET_SHIZZLE
 
 dnl Add compiler-specific flags
 
diff -ru libisofs/ecma119.c libisofs/ecma119.c
--- libisofs/ecma119.c	2005-07-31 14:31:43.000000000 +1000
+++ libisofs/ecma119.c	2005-07-31 14:16:56.000000000 +1000
@@ -141,7 +141,8 @@
 					 struct iso_tree_dir *dir);
 static void ecma119_alloc_writer_data(struct ecma119_write_target *target,
 				      struct iso_tree_dir *dir);
-static void ecma119_setup_path_tables(struct ecma119_write_target*, int flags);
+static void ecma119_setup_path_tables_iso(struct ecma119_write_target*);
+static void ecma119_setup_path_tables_joliet(struct ecma119_write_target*);
 
 /* burn_source functions */
 static int ecma119_read(struct burn_source*, unsigned char*, int);
@@ -196,6 +197,37 @@
 					    uint32_t size,
 					    int joliet);
 
+
+/* name-mangling routines */
+
+/* return a newly allocated array of pointers to all the children, in
+ * in alphabetical order and with mangled names. */
+static struct iso_tree_node **ecma119_mangle_names(struct iso_tree_dir *dir);
+
+/* mangle a single filename according to where the last '.' is:
+ * \param name the name to mangle
+ * \param num_change the number of characters to mangle
+ * \param seq_num the string ("%0<num_change>d", seq_num) to which it should
+ *	be mangled
+ */
+static void ecma119_mangle_name(char *name,
+				int num_change,
+				int seq_num);
+
+/* mangle a single filename based on the explicit positions passed. That is,
+ * the difference between this and ecma119_mangle_name_iso is that this one
+ * doesn't look for the '.' character; it changes the character at the
+ * beginning of name. */
+static void ecma119_mangle_name_priv(char *name,
+				     int num_change,
+				     int seq_num);
+
+/* return a newly allocated array of pointers to all the children, in
+ * alphabetical order. The difference between this and ecma119_mangle_names
+ * is that this function sorts the files according to joliet names and it
+ * doesn't mangle the names. */
+static struct iso_tree_node **ecma119_sort_joliet(struct iso_tree_dir *dir);
+
 /* string abstraction functions */
 static int node_namelen_iso(struct iso_tree_node *node)
 {
@@ -280,6 +312,117 @@
 		(int)file->block);
 }
 
+static struct iso_tree_node **ecma119_mangle_names(struct iso_tree_dir *dir)
+{
+	size_t retsize = dir->nfiles + dir->nchildren;
+	struct iso_tree_node **ret = calloc(1, sizeof(void*) * retsize);
+	int i, j, k;
+
+	j = 0;
+	for (i=0; i<dir->nfiles; i++) {
+		ret[j++] = ISO_NODE(dir->files[i]);
+	}
+	for (i=0; i<dir->nchildren; i++) {
+		ret[j++] = ISO_NODE(dir->children[i]);
+	}
+
+	qsort(ret, retsize, sizeof(void*), iso_node_cmp_iso);
+
+	for (i=0; i<retsize; i++) {
+		int n_change;
+		char *name;
+
+		/* find the number of consecutive equal names */
+		j = 1;
+		while (i+j < retsize && !iso_node_cmp_iso(&ret[i], &ret[i+j]))
+			j++;
+
+		if (j == 1) continue;
+
+		/* mangle the names */
+		n_change = j / 10 + 1;
+		for (k=0; k<j; k++) {
+			name = iso_tree_node_get_name_nconst(ret[i+k],
+							     ISO_NAME_ISO);
+			ecma119_mangle_name(name, n_change, k);
+		}
+
+		/* skip ahead by the number of mangled names */
+		i += j - 1;
+	}
+
+	return ret;
+}
+
+static void ecma119_mangle_name(char *name,
+				int num_change,
+				int seq_num)
+{
+	char *dot = strrchr(name, '.');
+	int dot_pos;
+	int len = strlen(name);
+	int ext_len;
+
+	if (!dot) dot = name + len;
+	dot_pos = dot - name;
+	ext_len = len - dot_pos;
+	if (dot_pos < num_change) {
+		/* we need to shift part of the mangling into the extension */
+		int digits=0, i;
+		int n_ext;
+		int nleft = num_change - ext_len;
+		char *pos = name + len - num_change;
+
+		pos = MAX(pos, dot+1);
+		n_ext = name + len - pos;
+
+		/* digits = 10^ext_len */
+		for (i=0; i<ext_len; i++) {
+			digits *= 10;
+		}
+		ecma119_mangle_name_priv(pos, n_ext, seq_num % digits);
+		ecma119_mangle_name_priv(dot-nleft, nleft, seq_num / digits);
+	} else {
+		ecma119_mangle_name_priv(dot-num_change, num_change, seq_num);
+	}
+}
+
+static void ecma119_mangle_name_priv(char *name,
+				     int num_change,
+				     int seq_num)
+{
+	char fmt[5];
+	char overwritten;
+
+	if (num_change <= 0 || num_change >= 10) {
+		return;
+	}
+
+	overwritten = name[num_change];
+	sprintf(fmt, "%%0%1dd", num_change);
+	sprintf(name, fmt, seq_num);
+	name[num_change] = overwritten;
+}
+
+static struct iso_tree_node **ecma119_sort_joliet(struct iso_tree_dir *dir)
+{
+	struct dir_write_info *inf = GET_DIR_INF(dir);
+	size_t retsize = dir->nfiles + inf->real_nchildren;
+	struct iso_tree_node **ret = malloc(retsize * sizeof(void*));
+	int i, j;
+
+	j = 0;
+	for (i=0; i<dir->nfiles; i++) {
+		ret[j++] = ISO_NODE(dir->files[i]);
+	}
+	for (i=0; i<inf->real_nchildren; i++) {
+		ret[j++] = ISO_NODE(inf->real_children[i]);
+	}
+	qsort(ret, retsize, sizeof(void*), iso_node_cmp_joliet);
+
+	return ret;
+}
+
 struct ecma119_write_target *ecma119_target_new(struct iso_volset *volset,
 						int volnum)
 {
@@ -443,7 +586,7 @@
 }
 
 /* set directory sizes recursively. Also fill out the dirlist_len and
- * filelist_len fields in the ecma119 writer. FIXME: write one for Joliet
+ * filelist_len fields in the ecma119 writer.
  */
 static void ecma119_target_rsize(struct ecma119_write_target *t,
 				 struct iso_tree_dir *dir)
@@ -581,7 +724,7 @@
 	if (t->joliet) {
 		ecma119_target_rsize_joliet(t, TARGET_ROOT(t));
 	}
-	ecma119_setup_path_tables(t, 0);
+	ecma119_setup_path_tables_iso(t);
 	t->curblock = 16 /* for the system area */
 		+ 1  /* volume desc */
 		+ 1; /* volume desc terminator */
@@ -595,7 +738,7 @@
 	t->curblock += DIV_UP(t->path_table_size, 2048);
 
 	if (t->joliet) {
-		ecma119_setup_path_tables(t, ECMA119_JOLIET);
+		ecma119_setup_path_tables_joliet(t);
 		t->l_path_table_pos_joliet = t->curblock;
 		t->curblock += DIV_UP(t->path_table_size_joliet, 2048);
 		t->m_path_table_pos_joliet = t->curblock;
@@ -1083,26 +1226,26 @@
 	struct dir_write_info *inf = GET_DIR_INF(dir);
 	int len = ROUND_UP(inf->susp_len + inf->len, 2048);
 	unsigned char *buf;
-	int i, file, child, pos;
+	int i, pos;
 	struct iso_tree_node *ch;
 
 	susp_len slen;
 	total_dirent_len dlen;
-	struct iso_tree_dir **children;
+	struct iso_tree_node **children;
 	int nchildren;
 
 	assert ( ((flags & ECMA119_JOLIET) && t->curblock == inf->joliet_block)
 			|| t->curblock == dir->block );
 
 	if (flags & ECMA119_JOLIET) {
-		children = inf->real_children;
-		nchildren = inf->real_nchildren;
+		children = ecma119_sort_joliet(dir);
+		nchildren = inf->real_nchildren + dir->nfiles;
 		len = ROUND_UP(inf->joliet_len, 2048);
 		slen = susp_len_joliet;
 		dlen = dirent_len_joliet;
 	} else {
-		children = dir->children;
-		nchildren = dir->nchildren;
+		children = ecma119_mangle_names(dir);
+		nchildren = dir->nchildren + dir->nfiles;
 		len = ROUND_UP(inf->susp_len + inf->len, 2048);
 		slen = susp_len_iso;
 		dlen = dirent_len_iso;
@@ -1119,18 +1262,15 @@
 				 RECORD_TYPE_PARENT);
 	pos += 34 + slen(&inf->parent_susp);
 
-	for (i = file = child = 0; i<(nchildren + dir->nfiles); i++) {
-		if (child < nchildren && (file == dir->nfiles ||
-					iso_node_cmp(&children[child],
-						     &dir->files[file]) < 0)) {
-			ch = ISO_NODE(children[child++]);
-		} else {
-			ch = ISO_NODE(dir->files[file++]);
+	for (i=0; i<nchildren; i++) {
+		ch = children[i];
 
-			/* the Joliet directories shouldn't list Rock Ridge
-			 * directory placeholders */
-			if (flags & ECMA119_JOLIET && GET_FILE_INF(ch)->real_me)
-				continue;
+		/* Joliet directories shouldn't list RR directory placeholders.
+		 */
+		if (flags & ECMA119_JOLIET
+				&& S_ISREG( ch->attrib.st_mode )
+				&& GET_FILE_INF(ch)->real_me) {
+			continue;
 		}
 		ecma119_write_dir_record(t, buf+pos, flags, ch,
 					 RECORD_TYPE_NORMAL);
@@ -1138,6 +1278,7 @@
 	}
 
 	if (flags & ECMA119_JOLIET) {
+		free(children);
 		return buf;
 	}
 
@@ -1146,16 +1287,12 @@
 	pos += inf->self_susp.CE_len;
 	susp_write_CE(t, &inf->parent_susp, buf+pos);
 	pos += inf->parent_susp.CE_len;
-	for (i=0; i<dir->nchildren; i++) {
-		struct dir_write_info *cinf = GET_DIR_INF(dir->children[i]);
-		susp_write_CE(t, &cinf->susp, buf+pos);
-		pos += cinf->susp.CE_len;
-	}
-	for (i=0; i<dir->nfiles; i++) {
-		struct file_write_info *cinf = GET_FILE_INF(dir->files[i]);
-		susp_write_CE(t, &cinf->susp, buf+pos);
-		pos += cinf->susp.CE_len;
+	for (i=0; i<nchildren; i++) {
+		struct node_write_info *ninf = GET_NODE_INF(children[i]);
+		susp_write_CE(t, &ninf->susp, buf+pos);
+		pos += ninf->susp.CE_len;
 	}
+	free(children);
 	return buf;
 }
 
@@ -1170,93 +1307,6 @@
 	} else {
 		ecma119_write_dir_record_iso(t, buf, node, type);
 	}
-#if 0
-	struct node_write_info *inf = GET_NODE_INF(node);
-	struct iso_tree_dir *parent;
-	const char *name;
-	int len;
-	int block, pblock, size, psize;
-
-	node_getname getname;
-	node_namelen namelen;
-	susp_len slen;
-	total_dirent_len dlen;
-
-	if (flags & ECMA119_JOLIET) {
-		getname = node_getname_joliet;
-		namelen = node_namelen_joliet;
-		slen = susp_len_joliet;
-		dlen = dirent_len_joliet;
-		if (S_ISDIR(node->attrib.st_mode)) {
-			block = DIR_INF(inf)->joliet_block;
-			size = DIR_INF(inf)->joliet_len;
-			parent = DIR_INF(inf)->real_parent;
-		} else {
-			block = node->block;
-			size = node->attrib.st_size;
-			parent = node->parent;
-		}
-		if (parent) {
-			pblock = GET_DIR_INF(parent)->joliet_block;
-			psize = GET_DIR_INF(parent)->joliet_len;
-		}
-	} else {
-		getname = node_getname_iso;
-		namelen = node_namelen_iso;
-		slen = susp_len_iso;
-		dlen = dirent_len_iso;
-		parent = node->parent;
-		size = node->attrib.st_size;
-		block = node->block;
-		if (parent) {
-			pblock = parent->block;
-			psize = parent->attrib.st_size;
-		}
-	}
-
-	buf[1] = 0; /* extended attribute length */
-	if (type == RECORD_TYPE_PARENT && parent) {
-		iso_bb(&buf[2], pblock, 4);
-		iso_bb(&buf[10], psize, 4);
-	} else {
-		iso_bb(&buf[2], block, 4);
-		iso_bb(&buf[10], size, 4);
-	}
-	iso_datetime_7(&buf[18], t->now);
-	buf[25] = S_ISDIR(node->attrib.st_mode) ? 2 : 0; /* file flags */
-	buf[26] = 0;	/* file unit size (for interleaved mode) */
-	buf[27] = 0;	/* interleave gap size */
-	iso_bb(&buf[28], 1, 2);	/* volume sequence number */
-
-	switch(type) {
-	case RECORD_TYPE_ROOT:
-		buf[0] = 34;
-		buf[32] = 1;
-		break;
-	case RECORD_TYPE_SELF:
-		buf[0] = 34 + slen(&DIR_INF(inf)->self_susp);
-		buf[32] = 1;
-		if (!(flags & ECMA119_JOLIET))
-			susp_write(t, &DIR_INF(inf)->self_susp, &buf[34]);
-		break;
-	case RECORD_TYPE_PARENT:
-		buf[0] = 34 + slen(&DIR_INF(inf)->parent_susp);
-		buf[32] = 1;
-		buf[33] = 1;
-		if (!(flags & ECMA119_JOLIET))
-			susp_write(t, &DIR_INF(inf)->parent_susp, &buf[34]);
-		break;
-	case RECORD_TYPE_NORMAL:
-		name = getname(node);
-		len = namelen(node);
-
-		buf[0] = dlen(node);
-		buf[32] = len;
-		memcpy(&buf[33], name, len);
-		if (!(flags & ECMA119_JOLIET))
-			susp_write(t, &inf->susp, &buf[node->dirent_len]);
-	}
-#endif
 }
 
 static void ecma119_write_dir_record_iso(struct ecma119_write_target *t,
@@ -1403,44 +1453,51 @@
 	buf[33] = file_id;
 }
 
-static void ecma119_setup_path_tables(struct ecma119_write_target *t, int flags)
+static void ecma119_setup_path_tables_iso(struct ecma119_write_target *t)
 {
 	int i, j, cur;
-	struct iso_tree_dir ***pathlist;
-	uint32_t *path_table_size;
-	node_namelen namelen;
-	struct iso_tree_dir **children;
-	int nchildren;
+	struct iso_tree_node **children;
 
-	if (flags & ECMA119_JOLIET) {
-		pathlist = &t->pathlist_joliet;
-		path_table_size = &t->path_table_size_joliet;
-		namelen = node_namelen_joliet;
-	} else {
-		pathlist = &t->pathlist;
-		path_table_size = &t->path_table_size;
-		namelen = node_namelen_iso;
+	t->pathlist = calloc(1, sizeof(void*) * t->dirlist_len);
+	t->pathlist[0] = TARGET_ROOT(t);
+	t->path_table_size = 10; /* root directory record */
+
+	cur = 1;
+	for (i=0; i<t->dirlist_len; i++) {
+		struct iso_tree_dir *dir = t->pathlist[i];
+		children = ecma119_mangle_names(dir);
+		for (j=0; j<dir->nchildren + dir->nfiles; j++) {
+			if (S_ISDIR(children[j]->attrib.st_mode)) {
+				int len = 8 + node_namelen_iso(children[j]);
+				t->pathlist[cur++] = ISO_DIR(children[j]);
+				t->path_table_size += len + len % 2;
+			}
+		}
+		free(children);
 	}
+}
+
+static void ecma119_setup_path_tables_joliet(struct ecma119_write_target *t)
+{
+	int i, j, cur;
+	struct iso_tree_node **children;
 
-	*pathlist = calloc(1, sizeof(void*) * t->dirlist_len);
-	(*pathlist)[0] = TARGET_ROOT(t);
-	*path_table_size = 10; /* root directory record */
+	t->pathlist_joliet = calloc(1, sizeof(void*) * t->dirlist_len);
+	t->pathlist_joliet[0] = TARGET_ROOT(t);
+	t->path_table_size_joliet = 10; /* root directory record */
 
 	cur = 1;
 	for (i=0; i<t->dirlist_len; i++) {
-		struct iso_tree_dir *dir = (*pathlist)[i];
-		if (flags & ECMA119_JOLIET) {
-			children = GET_DIR_INF(dir)->real_children;
-			nchildren = GET_DIR_INF(dir)->real_nchildren;
-		} else {
-			children = dir->children;
-			nchildren = dir->nchildren;
-		}
-		for (j=0; j<nchildren; j++) {
-			int len = 8 + namelen( ISO_NODE(children[j]) );
-			(*pathlist)[cur++] = children[j];
-			*path_table_size += len + len % 2;
+		struct iso_tree_dir *dir = t->pathlist_joliet[i];
+		struct dir_write_info *inf = GET_DIR_INF(dir);
+		children = ecma119_sort_joliet(dir);
+		for (j=0; j<inf->real_nchildren + dir->nfiles; j++) {
+			if (S_ISDIR(children[j]->attrib.st_mode)) {
+				int len = 8 + node_namelen_joliet(children[j]);
+				t->pathlist_joliet[cur++] =
+					ISO_DIR(children[j]);
+				t->path_table_size_joliet += len + len % 2;
+			}
 		}
 	}
 }
-
diff -ru libisofs/tree.c libisofs/tree.c
--- libisofs/tree.c	2005-07-31 14:31:22.000000000 +1000
+++ libisofs/tree.c	2005-07-31 13:18:04.000000000 +1000
@@ -262,8 +262,8 @@
 	file->name.joliet = iso_j_id(file->name.full);
 }
 
-const char *iso_tree_node_get_name(const struct iso_tree_node *node,
-				   enum iso_name_version ver)
+char *iso_tree_node_get_name_nconst(const struct iso_tree_node *node,
+				    enum iso_name_version ver)
 {
 	if (ver == ISO_NAME_ISO) {
 		if (node->volume->iso_level == 1) {
@@ -289,13 +289,19 @@
 	case ISO_NAME_ROCKRIDGE:
 		return node->name.rockridge;
 	case ISO_NAME_JOLIET:
-		return (const char*) node->name.joliet;
+		return (char*) node->name.joliet;
 	}
 
 	assert(0);
 	return NULL; /* just to shut up warnings */
 }
 
+const char *iso_tree_node_get_name(const struct iso_tree_node *node,
+				   enum iso_name_version ver)
+{
+	return iso_tree_node_get_name_nconst(node, ver);
+}
+
 void iso_tree_dir_set_name(struct iso_tree_dir *dir, const char *name)
 {
 	dir->name.full = strdup(name);
@@ -317,6 +323,24 @@
 		      iso_tree_node_get_name(*f2, ISO_NAME_FULL));
 }
 
+int iso_node_cmp_iso(const void *v1, const void *v2)
+{
+	struct iso_tree_node **f1 = (struct iso_tree_node **)v1;
+	struct iso_tree_node **f2 = (struct iso_tree_node **)v2;
+
+	return strcmp(iso_tree_node_get_name(*f1, ISO_NAME_ISO),
+		      iso_tree_node_get_name(*f2, ISO_NAME_ISO));
+}
+
+int iso_node_cmp_joliet(const void *v1, const void *v2)
+{
+	struct iso_tree_node **f1 = (struct iso_tree_node **)v1;
+	struct iso_tree_node **f2 = (struct iso_tree_node **)v2;
+
+	return ucscmp((uint16_t*)iso_tree_node_get_name(*f1, ISO_NAME_JOLIET),
+		      (uint16_t*)iso_tree_node_get_name(*f2, ISO_NAME_JOLIET));
+}
+
 void iso_tree_sort(struct iso_tree_dir *root)
 {
 	int i;
diff -ru libisofs/tree.h libisofs/tree.h
--- libisofs/tree.h	2005-07-31 14:31:43.000000000 +1000
+++ libisofs/tree.h	2005-07-31 13:18:04.000000000 +1000
@@ -196,6 +196,16 @@
 int iso_node_cmp(const void *v1, const void *v2);
 
 /**
+ * Compare the joliet names of 2 nodes, compatible with qsort.
+ */
+int iso_node_cmp_joliet(const void *v1, const void *v2);
+
+/**
+ * Compare the iso names of 2 nodes, compatible with qsort.
+ */
+int iso_node_cmp_iso(const void *v1, const void *v2);
+
+/**
  * A function that prints verbose information about a directory.
  *
  * \param dir The directory about which to print information.
@@ -239,4 +249,11 @@
 			    void *callback_data,
 			    int spaces);
 
+/**
+ * Get a non-const version of the node name. This is used for name-mangling
+ * iso names. It should only be used internally in libisofs; all other users
+ * should only access the const name.
+ */
+char *iso_tree_node_get_name_nconst(const struct iso_tree_node *node,
+				    enum iso_name_version ver);
 #endif /* __ISO_TREE */
diff -ru libisofs/util.c libisofs/util.c
--- libisofs/util.c	2005-07-31 14:31:43.000000000 +1000
+++ libisofs/util.c	2005-07-31 13:18:04.000000000 +1000
@@ -661,6 +661,23 @@
 	return i;
 }
 
+int ucscmp(const uint16_t *s1, const uint16_t *s2)
+{
+	int i;
+
+	for (i=0; 1; i++) {
+		if (s1[i] < s2[i]) {
+			return -1;
+		} else if (s1[i] > s2[i]) {
+			return 1;
+		} else if (s1[i] == 0 && s2[i] == 0) {
+			break;
+		}
+	}
+
+	return 0;
+}
+
 uint32_t iso_read_lsb(const uint8_t *buf, int bytes)
 {
 	int i;
diff -ru libisofs/util.h libisofs/util.h
--- libisofs/util.h	2005-07-31 14:31:43.000000000 +1000
+++ libisofs/util.h	2005-07-31 13:18:04.000000000 +1000
@@ -147,6 +147,9 @@
  * with 32-bit wchar_t */
 size_t ucslen(const uint16_t *);
 
+/* replacement for strcmp for joliet names */
+int ucscmp(const uint16_t*, const uint16_t*);
+
 #define DIV_UP(n,div) ( ((n)+(div)-1) / (div) )
 #define ROUND_UP(n,mul) ( DIV_UP(n,mul) * (mul) )
 


More information about the libburn mailing list