[HarfBuzz] harfbuzz-ng: Branch 'master' - 13 commits

Behdad Esfahbod behdad at kemper.freedesktop.org
Tue Jul 17 20:56:56 PDT 2012


 src/hb-buffer.cc                                                                                   |   76 ++++
 src/hb-buffer.h                                                                                    |   13 
 src/hb-ot-shape-complex-indic-machine.rl                                                           |   48 +--
 src/hb-ot-shape-complex-indic-private.hh                                                           |    9 
 src/hb-ot-shape-complex-indic.cc                                                                   |  159 +++++++---
 src/hb-private.hh                                                                                  |   29 +
 test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt                         |    2 
 test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/MANIFEST                |    2 
 test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt                |    7 
 test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks-invalid.txt |    4 
 test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks.txt         |    6 
 util/options.cc                                                                                    |    1 
 util/options.hh                                                                                    |    7 
 13 files changed, 287 insertions(+), 76 deletions(-)

New commits:
commit db8981f1e0e8625714568c6d0f11f0b317b11d0a
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Jul 17 18:17:30 2012 -0400

    [Indic] Position Khmer Robat
    
    It's a visual Repha.
    
    Still not positioning logical Repha as occurs in Malayalam.
    
    Another 200 Khmer failures fixed.  547 to go.  That's better than
    Devanagari!

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index b87d2df..5f565b6 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -40,33 +40,35 @@
 # Same order as enum indic_category_t.  Not sure how to avoid duplication.
 X    = 0;
 C    = 1;
-Ra   = 2;
-V    = 3;
-N    = 4;
-H    = 5;
-ZWNJ = 6;
-ZWJ  = 7;
-M    = 8;
-SM   = 9;
-VD   = 10;
-A    = 11;
-NBSP = 12;
-DOTTEDCIRCLE = 13;
-RS   = 14;
-Coeng = 15;
+V    = 2;
+N    = 3;
+H    = 4;
+ZWNJ = 5;
+ZWJ  = 6;
+M    = 7;
+SM   = 8;
+VD   = 9;
+A    = 10;
+NBSP = 11;
+DOTTEDCIRCLE = 12;
+RS   = 13;
+Coeng = 14;
+Repha = 15;
+Ra    = 16;
 
 c = C | Ra;			# is_consonant
 n = (N.N? | ZWNJ?.RS);		# is_consonant_modifier
 z = ZWJ|ZWNJ;			# is_joiner
 h = H | Coeng;			# is_halant_or_coeng
+reph = (Ra H | Repha);		# possible reph
 matra_group = M.N?.H?;
 syllable_tail = SM? (Coeng (c|V))? (VD VD?)?;
 place_holder = NBSP | DOTTEDCIRCLE;
 
 
-consonant_syllable =	(c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
-vowel_syllable =	(Ra H)? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
-standalone_cluster =	(Ra H)? place_holder.n? (z? h c)* matra_group* syllable_tail;
+consonant_syllable =	Repha? (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
+vowel_syllable =	reph? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
+standalone_cluster =	reph? place_holder.n? (z? h c)* matra_group* syllable_tail;
 other =			any;
 
 main := |*
diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh
index 0541738..9637018 100644
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@@ -47,7 +47,6 @@
 enum indic_category_t {
   OT_X = 0,
   OT_C,
-  OT_Ra, /* Not explicitly listed in the OT spec, but used in the grammar. */
   OT_V,
   OT_N,
   OT_H,
@@ -60,7 +59,9 @@ enum indic_category_t {
   OT_NBSP,
   OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */
   OT_RS, /* Register Shifter, used in Khmer OT spec */
-  OT_Coeng
+  OT_Coeng,
+  OT_Repha,
+  OT_Ra /* Not explicitly listed in the OT spec, but used in the grammar. */
 };
 
 /* Visual positions in a syllable from left to right. */
@@ -92,7 +93,7 @@ enum indic_syllabic_category_t {
   INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL	= OT_C,
   INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER	= OT_NBSP,
   INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED	= OT_C,
-  INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA	= OT_C,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA	= OT_Repha,
   INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER	= OT_X,
   INDIC_SYLLABIC_CATEGORY_NUKTA			= OT_N,
   INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER	= OT_RS,
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 3c83ce6..bbf5024 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -282,6 +282,19 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED,
     if (unlikely (info.codepoint == 0x17D2))
       info.indic_category() = OT_Coeng;
 
+    if (info.indic_category() == OT_Repha) {
+      /* There are two kinds of characters marked as Repha:
+       * - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
+       * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
+       *
+       * We recategorize the first kind to look like a Nukta and attached to the base directly.
+       */
+      if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
+        info.indic_category() = OT_N;
+    }
+
+
+    /* Assign positions... */
     if (is_consonant (info)) {
       info.indic_position() = consonant_position (info.codepoint);
       if (is_ra (info.codepoint))
commit 25bc489498ef7d0beb8fe9ab663e3f0b2f52c9c2
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Jul 17 17:53:03 2012 -0400

    [Indic] Better categorize Register Shifters and Khmer Various signs
    
    Down another 500 or so Khmer failures!

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 3c7193d..b87d2df 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -55,16 +55,16 @@ DOTTEDCIRCLE = 13;
 RS   = 14;
 Coeng = 15;
 
-c = C | Ra;
-n = (N.N? | ZWNJ?.RS);
-z = ZWJ|ZWNJ;
-h = H | Coeng;
-matra_group = (M | RS) N? H?;
-syllable_tail = SM? (VD VD?)?;
+c = C | Ra;			# is_consonant
+n = (N.N? | ZWNJ?.RS);		# is_consonant_modifier
+z = ZWJ|ZWNJ;			# is_joiner
+h = H | Coeng;			# is_halant_or_coeng
+matra_group = M.N?.H?;
+syllable_tail = SM? (Coeng (c|V))? (VD VD?)?;
 place_holder = NBSP | DOTTEDCIRCLE;
 
 
-consonant_syllable =	(c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? (Coeng (c|V))? syllable_tail;
+consonant_syllable =	(c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
 vowel_syllable =	(Ra H)? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
 standalone_cluster =	(Ra H)? place_holder.n? (z? h c)* matra_group* syllable_tail;
 other =			any;
diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh
index 0fe350f..0541738 100644
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@@ -59,7 +59,7 @@ enum indic_category_t {
   OT_A,
   OT_NBSP,
   OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */
-  OT_RS, /* Register Shifter (and other marks), used in Khmer OT spec */
+  OT_RS, /* Register Shifter, used in Khmer OT spec */
   OT_Coeng
 };
 
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 4482dd3..3c83ce6 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -275,8 +275,8 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED,
       info.indic_category() = OT_VD;
 
     if (info.indic_category() == OT_X &&
-	unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x17CB, 0x17D0)))
-      info.indic_category() = OT_RS;
+	unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x17CB, 0x17D2))) /* Khmer Various signs */
+      info.indic_category() = OT_N;
 
     /* Khmer Virama is different since it can be used to form a final consonant. */
     if (unlikely (info.codepoint == 0x17D2))
@@ -488,7 +488,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
   {
     /* Please update the Uniscribe branch when touching this! */
     for (unsigned int i = start + 1; i < end; i++)
-      if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_H))))
+      if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H))))
 	info[i].indic_position() = info[i - 1].indic_position();
   } else {
     /*
@@ -497,7 +497,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
      */
     /* Please update the non-Uniscribe branch when touching this! */
     for (unsigned int i = start + 1; i < end; i++)
-      if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_H)))) {
+      if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H)))) {
 	info[i].indic_position() = info[i - 1].indic_position();
 	if (info[i].indic_category() == OT_H && info[i].indic_position() == POS_PRE_M)
 	  for (unsigned int j = i; j > start; j--)
diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/MANIFEST b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/MANIFEST
index 29cfb2f..fde3fa1 100644
--- a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/MANIFEST
@@ -1 +1,3 @@
 misc.txt
+other-marks-invalid.txt
+other-marks.txt
diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks-invalid.txt b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks-invalid.txt
new file mode 100644
index 0000000..213cfc2
--- /dev/null
+++ b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks-invalid.txt
@@ -0,0 +1,4 @@
+ព់្ឈា
+ព្ឈា៉
+ព្ឈា៌
+ព្ឈ៌ា
diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks.txt b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks.txt
new file mode 100644
index 0000000..0ad62e7
--- /dev/null
+++ b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks.txt
@@ -0,0 +1,6 @@
+ព្ឈា
+ព្ឈា់
+ព្ឈ់ា
+ព្ឈ៉ា
+ព៉្ឈា
+ព៌្ឈា
commit 39b17837b4064d59c18cebb49c1c0b5b8cc0c117
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Jul 17 17:09:29 2012 -0400

    Add hb_buffer_normalize_glyphs() and hb-shape --normalize-glyphs
    
    This reorders glyphs within the cluster to a nominal order.  This should
    have no visible effect on the output, but helps with testing, for
    getting the same hb-shape output for visually-equal glyphs for each
    cluster.

diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc
index 44571da..ec29659 100644
--- a/src/hb-buffer.cc
+++ b/src/hb-buffer.cc
@@ -887,3 +887,79 @@ hb_buffer_add_utf32 (hb_buffer_t    *buffer,
 }
 
 
+static int
+compare_info_codepoint (const hb_glyph_info_t *pa,
+			const hb_glyph_info_t *pb)
+{
+  return (int) pb->codepoint - (int) pa->codepoint;
+}
+
+static inline void
+normalize_glyphs_cluster (hb_buffer_t *buffer,
+			  unsigned int start,
+			  unsigned int end,
+			  bool backward)
+{
+  hb_glyph_position_t *pos = buffer->pos;
+
+  /* Total cluster advance */
+  hb_position_t total_x_advance = 0, total_y_advance = 0;
+  for (unsigned int i = start; i < end; i++)
+  {
+    total_x_advance += pos[i].x_advance;
+    total_y_advance += pos[i].y_advance;
+  }
+
+  hb_position_t x_advance = 0, y_advance = 0;
+  for (unsigned int i = start; i < end; i++)
+  {
+    pos[i].x_offset += x_advance;
+    pos[i].y_offset += y_advance;
+
+    x_advance += pos[i].x_advance;
+    y_advance += pos[i].y_advance;
+
+    pos[i].x_advance = 0;
+    pos[i].y_advance = 0;
+  }
+
+  if (backward)
+  {
+    /* Transfer all cluster advance to the last glyph. */
+    pos[end - 1].x_advance = total_x_advance;
+    pos[end - 1].y_advance = total_y_advance;
+
+    hb_bubble_sort (buffer->info + start, end - start - 1, compare_info_codepoint, buffer->pos + start);
+  } else {
+    /* Transfer all cluster advance to the first glyph. */
+    pos[start].x_advance += total_x_advance;
+    pos[start].y_advance += total_y_advance;
+    for (unsigned int i = start + 1; i < end; i++) {
+      pos[i].x_offset -= total_x_advance;
+      pos[i].y_offset -= total_y_advance;
+    }
+    hb_bubble_sort (buffer->info + start + 1, end - start - 1, compare_info_codepoint, buffer->pos + start + 1);
+  }
+}
+
+void
+hb_buffer_normalize_glyphs (hb_buffer_t *buffer)
+{
+  assert (buffer->have_positions);
+  /* XXX assert (buffer->have_glyphs); */
+
+  bool backward = HB_DIRECTION_IS_BACKWARD (buffer->props.direction);
+
+  unsigned int count = buffer->len;
+  if (unlikely (!count)) return;
+  hb_glyph_info_t *info = buffer->info;
+
+  unsigned int start = 0;
+  unsigned int end;
+  for (end = start + 1; end < count; end++)
+    if (info[start].cluster != info[end].cluster) {
+      normalize_glyphs_cluster (buffer, start, end, backward);
+      start = end;
+    }
+  normalize_glyphs_cluster (buffer, start, end, backward);
+}
diff --git a/src/hb-buffer.h b/src/hb-buffer.h
index 73adc2e..d67cfd6 100644
--- a/src/hb-buffer.h
+++ b/src/hb-buffer.h
@@ -193,6 +193,19 @@ hb_buffer_get_glyph_positions (hb_buffer_t  *buffer,
                                unsigned int *length);
 
 
+/* Reorders a glyph buffer to have canonical in-cluster glyph order / position.
+ * The resulting clusters should behave identical to pre-reordering clusters.
+ * NOTE: This has nothing to do with Unicode normalization. */
+void
+hb_buffer_normalize_glyphs (hb_buffer_t *buffer);
+
+/*
+ * NOT IMPLEMENTED
+void
+hb_buffer_normalize_characters (hb_buffer_t *buffer);
+*/
+
+
 HB_END_DECLS
 
 #endif /* HB_BUFFER_H */
diff --git a/src/hb-private.hh b/src/hb-private.hh
index 29cd68c..02ed9ce 100644
--- a/src/hb-private.hh
+++ b/src/hb-private.hh
@@ -737,8 +737,8 @@ hb_in_range (T u, T lo, T hi)
 #define FLAG(x) (1<<(x))
 
 
-template <typename T> inline void
-hb_bubble_sort (T *array, unsigned int len, int(*compar)(const T *, const T *))
+template <typename T, typename T2> inline void
+hb_bubble_sort (T *array, unsigned int len, int(*compar)(const T *, const T *), T2 *array2)
 {
   if (unlikely (!len))
     return;
@@ -748,11 +748,21 @@ hb_bubble_sort (T *array, unsigned int len, int(*compar)(const T *, const T *))
     unsigned int new_k = 0;
 
     for (unsigned int j = 0; j < k; j++)
-      if (compar (&array[j], &array[j+1]) > 0) {
-        T t;
-	t = array[j];
-	array[j] = array[j + 1];
-	array[j + 1] = t;
+      if (compar (&array[j], &array[j+1]) > 0)
+      {
+        {
+	  T t;
+	  t = array[j];
+	  array[j] = array[j + 1];
+	  array[j + 1] = t;
+	}
+        if (array2)
+        {
+	  T2 t;
+	  t = array2[j];
+	  array2[j] = array2[j + 1];
+	  array2[j + 1] = t;
+	}
 
 	new_k = j;
       }
@@ -760,6 +770,11 @@ hb_bubble_sort (T *array, unsigned int len, int(*compar)(const T *, const T *))
   } while (k);
 }
 
+template <typename T> inline void
+hb_bubble_sort (T *array, unsigned int len, int(*compar)(const T *, const T *))
+{
+  hb_bubble_sort (array, len, compar, (int *) NULL);
+}
 
 
 
diff --git a/util/options.cc b/util/options.cc
index db1b244..584190e 100644
--- a/util/options.cc
+++ b/util/options.cc
@@ -396,6 +396,7 @@ shape_options_t::add_options (option_parser_t *parser)
     {"language",	0, 0, G_OPTION_ARG_STRING,	&this->language,		"Set text language (default: $LANG)",	"langstr"},
     {"script",		0, 0, G_OPTION_ARG_STRING,	&this->script,			"Set text script (default: auto)",	"ISO-15924 tag"},
     {"utf8-clusters",	0, 0, G_OPTION_ARG_NONE,	&this->utf8_clusters,		"Use UTF8 byte indices, not char indices",	NULL},
+    {"normalize-glyphs",0, 0, G_OPTION_ARG_NONE,	&this->normalize_glyphs,	"Rearrange glyph clusters in nominal order",	NULL},
     {NULL}
   };
   parser->add_group (entries,
diff --git a/util/options.hh b/util/options.hh
index 9b7baa7..2485230 100644
--- a/util/options.hh
+++ b/util/options.hh
@@ -148,6 +148,7 @@ struct shape_options_t : option_group_t
     num_features = 0;
     shapers = NULL;
     utf8_clusters = false;
+    normalize_glyphs = false;
 
     add_options (parser);
   }
@@ -188,7 +189,10 @@ struct shape_options_t : option_group_t
 
   hb_bool_t shape (hb_font_t *font, hb_buffer_t *buffer)
   {
-    return hb_shape_full (font, buffer, features, num_features, shapers);
+    hb_bool_t res = hb_shape_full (font, buffer, features, num_features, shapers);
+    if (normalize_glyphs)
+      hb_buffer_normalize_glyphs (buffer);
+    return res;
   }
 
   void shape_closure (const char *text, int text_len,
@@ -208,6 +212,7 @@ struct shape_options_t : option_group_t
   unsigned int num_features;
   char **shapers;
   hb_bool_t utf8_clusters;
+  hb_bool_t normalize_glyphs;
 };
 
 
commit 25e302da9a712e6f1d63b0d243a8df0d326ddba3
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Jul 17 14:25:14 2012 -0400

    [Indic] Minor

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 2690511..4482dd3 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -372,7 +372,10 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
 
 	  /* -> or that is not a pre-base reordering Ra,
 	   *
-	   * TODO
+	   * IMPLEMENTATION NOTES:
+	   *
+	   * Our pre-base reordering Ra's are marked POS_BELOW, so will be skipped
+	   * by the logic above already.
 	   */
 
 	  /* -> or arrive at the first consonant. The consonant stopped at will
commit 5d32690a3428fa86eb26fe5fcec943a10aa95881
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Jul 17 14:23:28 2012 -0400

    [Indic] For scripts without Half forms, always choose first consonant as base
    
    In such scripts (ie. Khmer), a ZWJ/ZWNJ shouldn't stop the search for
    base.  So, instead just choose the first consonant as base directly.
    
    Test sequence:
    U+1798,200c,U+17C9,U+17D2,U+179B,U+17C1,U+17C7

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 70068ae..2690511 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -352,35 +352,46 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
       has_reph = true;
     };
 
-    /* -> starting from the end of the syllable, move backwards */
-    unsigned int i = end;
-    do {
-      i--;
-      /* -> until a consonant is found */
-      if (is_consonant (info[i]))
-      {
-	/* -> that does not have a below-base or post-base form
-	 * (post-base forms have to follow below-base forms), */
-	if (info[i].indic_position() != POS_BELOW_C &&
-	    info[i].indic_position() != POS_POST_C)
+    if (basic_mask_array[HALF])
+    {
+      /* -> starting from the end of the syllable, move backwards */
+      unsigned int i = end;
+      do {
+	i--;
+	/* -> until a consonant is found */
+	if (is_consonant (info[i]))
 	{
+	  /* -> that does not have a below-base or post-base form
+	   * (post-base forms have to follow below-base forms), */
+	  if (info[i].indic_position() != POS_BELOW_C &&
+	      info[i].indic_position() != POS_POST_C)
+	  {
+	    base = i;
+	    break;
+	  }
+
+	  /* -> or that is not a pre-base reordering Ra,
+	   *
+	   * TODO
+	   */
+
+	  /* -> or arrive at the first consonant. The consonant stopped at will
+	   * be the base. */
 	  base = i;
-	  break;
 	}
+	else
+	  if (is_joiner (info[i]))
+	    break;
+      } while (i > limit);
+    }
+    else
+    {
+      /* In scripts without half forms (eg. Khmer), the first consonant is always the base. */
 
-	/* -> or that is not a pre-base reordering Ra,
-	 *
-	 * TODO
-	 */
+      if (!has_reph)
+	base = limit;
+    }
 
-	/* -> or arrive at the first consonant. The consonant stopped at will
-	 * be the base. */
-	base = i;
-      }
-      else
-	if (is_joiner (info[i]))
-	  break;
-    } while (i > limit);
     if (base < start)
       base = start; /* Just in case... */
 
commit 34b57149065d96f7528aaccaa7654e956ce27e93
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Jul 17 14:09:32 2012 -0400

    [Indic] Treat Khmer Register Shifters more like Nuktas
    
    Except that there may be a ZWNJ before a Register Shifter.

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index e519e71..3c7193d 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -56,7 +56,7 @@ RS   = 14;
 Coeng = 15;
 
 c = C | Ra;
-n = N N?;
+n = (N.N? | ZWNJ?.RS);
 z = ZWJ|ZWNJ;
 h = H | Coeng;
 matra_group = (M | RS) N? H?;
diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
index 3396db3..c9e5443 100644
--- a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
@@ -12,3 +12,5 @@
 ង្ឈឹ
 ង្គ្រ
 ង្រ្គ
+ម៉្លេះ
+ម‌៉្លេះ
commit 11e2a601b19861b05dbb2051d2d078c3cfd75b29
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Jul 17 14:02:28 2012 -0400

    [Indic] Minor

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 675a68d..e519e71 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -65,8 +65,8 @@ place_holder = NBSP | DOTTEDCIRCLE;
 
 
 consonant_syllable =	(c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? (Coeng (c|V))? syllable_tail;
-vowel_syllable =	(Ra H)? V n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
-standalone_cluster =	(Ra H)? place_holder n? (z? h c)* matra_group* syllable_tail;
+vowel_syllable =	(Ra H)? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
+standalone_cluster =	(Ra H)? place_holder.n? (z? h c)* matra_group* syllable_tail;
 other =			any;
 
 main := |*
commit 0201e0a4649ad5b607e50bcb9605e7a5b7143812
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Jul 17 13:55:10 2012 -0400

    [Indic] Apply 'cfar' for Khmer
    
    Mark stuff after a pre-base reordering Ro 'cfar'.  Used in Khmer.
    This allows distinguishing the following cases with MS Khmer fonts:
    
      U+1784,U+17D2,U+179A,U+17D2,U+1782
      U+1784,U+17D2,U+1782,U+17D2,U+179A

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 18a3699..70068ae 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -154,6 +154,7 @@ indic_basic_features[] =
   {HB_TAG('h','a','l','f'), false},
   {HB_TAG('a','b','v','f'), false},
   {HB_TAG('p','s','t','f'), false},
+  {HB_TAG('c','f','a','r'), false},
   {HB_TAG('c','j','c','t'), false},
   {HB_TAG('v','a','t','u'), true},
 };
@@ -169,6 +170,7 @@ enum {
   HALF,
   ABVF,
   PSTF,
+  CFAR,
   CJCT,
   VATU
 };
@@ -547,8 +549,18 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
       if (is_halant_or_coeng (info[i]) &&
 	  info[i + 1].indic_category() == OT_Ra)
       {
-	info[i].mask |= basic_mask_array[PREF];
-	info[i + 1].mask |= basic_mask_array[PREF];
+	info[i++].mask |= basic_mask_array[PREF];
+	info[i++].mask |= basic_mask_array[PREF];
+
+	/* Mark the subsequent stuff with 'cfar'.  Used in Khmer.
+	 * Read the feature spec.
+	 * This allows distinguishing the following cases with MS Khmer fonts:
+	 * U+1784,U+17D2,U+179A,U+17D2,U+1782
+	 * U+1784,U+17D2,U+1782,U+17D2,U+179A
+	 */
+	for (; i < end; i++)
+	  info[i].mask |= basic_mask_array[CFAR];
+
 	break;
       }
   }
diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
index 5a354b2..3396db3 100644
--- a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
@@ -10,3 +10,5 @@
 រ្ឥ
 ងឹ្ឈ
 ង្ឈឹ
+ង្គ្រ
+ង្រ្គ
commit 55f70ebfb95083f515d9b0044a2a65ab11484bb5
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Jul 17 12:50:13 2012 -0400

    [Indic] Position final subjoined consonants (and vowels) after matras
    
    In Khmer, a final subjoined consonant or independent vowel can occur
    after matras.  This final subjoined thing should NOT be reordered to
    before the matra even though it's subjoined.
    
    Fixes another 1k of the Khmer failures.  Not much left really.

diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh
index 91e288f..0fe350f 100644
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@@ -75,6 +75,7 @@ enum indic_position_t {
   POS_BELOW_M,
   POS_POST_C,
   POS_POST_M,
+  POS_FINAL_C,
   POS_SMVD
 };
 
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 8e738db..18a3699 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -429,8 +429,21 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
 
   for (unsigned int i = start; i < base; i++)
     info[i].indic_position() = POS_PRE_C;
+
   info[base].indic_position() = POS_BASE_C;
 
+  /* Mark final consonants.  A final consonant is one appearing after a matra,
+   * like in Khmer. */
+  for (unsigned int i = base + 1; i < end; i++)
+    if (info[i].indic_category() == OT_M) {
+      for (unsigned int j = i + 1; j < end; j++)
+        if (is_consonant (info[j])) {
+	  info[j].indic_position() = POS_FINAL_C;
+	  break;
+	}
+      break;
+    }
+
   /* Handle beginning Ra */
   if (has_reph)
     info[start].indic_position() = POS_RA_TO_BECOME_REPH;
diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
index b305cb8..5a354b2 100644
--- a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
@@ -8,3 +8,5 @@
 រ៍
 សៅ
 រ្ឥ
+ងឹ្ឈ
+ង្ឈឹ
commit c50ed71e9a3df1844f564de66d54b46a696c1356
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Jul 17 11:54:28 2012 -0400

    [Indic] Recategorize Khmer coeng sign as a separate category OT_Coeng
    
    Amend the syllable structure to allow a final subscripted consonant
    (Coeng+C) and a final subscripted independent vowel (Coeng+V).
    Fixes another 2k of Khmer failures.

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 6130529..675a68d 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -58,14 +58,15 @@ Coeng = 15;
 c = C | Ra;
 n = N N?;
 z = ZWJ|ZWNJ;
+h = H | Coeng;
 matra_group = (M | RS) N? H?;
 syllable_tail = SM? (VD VD?)?;
 place_holder = NBSP | DOTTEDCIRCLE;
 
 
-consonant_syllable =	(c.n? (H.z?|z.H))* c.n? A? (H.z? | matra_group*)? syllable_tail;
-vowel_syllable =	(Ra H)? V n? (z?.H.c | ZWJ.c)* matra_group* syllable_tail;
-standalone_cluster =	(Ra H)? place_holder n? (z? H c)* matra_group* syllable_tail;
+consonant_syllable =	(c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? (Coeng (c|V))? syllable_tail;
+vowel_syllable =	(Ra H)? V n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
+standalone_cluster =	(Ra H)? place_holder n? (z? h c)* matra_group* syllable_tail;
 other =			any;
 
 main := |*
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 039664d..8e738db 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -276,7 +276,11 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED,
 	unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x17CB, 0x17D0)))
       info.indic_category() = OT_RS;
 
-    if (info.indic_category() == OT_C) {
+    /* Khmer Virama is different since it can be used to form a final consonant. */
+    if (unlikely (info.codepoint == 0x17D2))
+      info.indic_category() = OT_Coeng;
+
+    if (is_consonant (info)) {
       info.indic_position() = consonant_position (info.codepoint);
       if (is_ra (info.codepoint))
 	info.indic_category() = OT_Ra;
diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
index 8e4deb9..b305cb8 100644
--- a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
@@ -7,3 +7,4 @@
 រី
 រ៍
 សៅ
+រ្ឥ
commit deb521dee4fdca8c2124cfb39a205e6269d4a70d
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Jul 17 11:37:32 2012 -0400

    [Indic] Add a separate Coeng class
    
    No characters recategorized yet.  No semantic change.

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 77606d1..6130529 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -53,6 +53,7 @@ A    = 11;
 NBSP = 12;
 DOTTEDCIRCLE = 13;
 RS   = 14;
+Coeng = 15;
 
 c = C | Ra;
 n = N N?;
diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh
index ab44777..91e288f 100644
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@@ -59,7 +59,8 @@ enum indic_category_t {
   OT_A,
   OT_NBSP,
   OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */
-  OT_RS /* Register Shifter, used in Khmer OT spec */
+  OT_RS, /* Register Shifter (and other marks), used in Khmer OT spec */
+  OT_Coeng
 };
 
 /* Visual positions in a syllable from left to right. */
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index d5036b3..039664d 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -130,6 +130,12 @@ is_consonant (const hb_glyph_info_t &info)
   return !!(FLAG (info.indic_category()) & (FLAG (OT_C) | FLAG (OT_Ra) | FLAG (OT_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE)));
 }
 
+static bool
+is_halant_or_coeng (const hb_glyph_info_t &info)
+{
+  return !!(FLAG (info.indic_category()) & (FLAG (OT_H) | FLAG (OT_Coeng)));
+}
+
 struct feature_list_t {
   hb_tag_t tag;
   hb_bool_t is_global;
@@ -472,7 +478,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
   {
     unsigned int last_halant = end;
     for (unsigned int i = base + 1; i < end; i++)
-      if (info[i].indic_category() == OT_H)
+      if (is_halant_or_coeng (info[i]))
         last_halant = i;
       else if (is_consonant (info[i])) {
 	for (unsigned int j = last_halant; j < i; j++)
@@ -521,7 +527,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
   {
     /* Find a Halant,Ra sequence and mark it fore pre-base reordering processing. */
     for (unsigned int i = base + 1; i + 1 < end; i++)
-      if (info[i].indic_category() == OT_H &&
+      if (is_halant_or_coeng (info[i]) &&
 	  info[i + 1].indic_category() == OT_Ra)
       {
 	info[i].mask |= basic_mask_array[PREF];
@@ -644,11 +650,11 @@ final_reordering_syllable (hb_buffer_t *buffer,
   {
     unsigned int new_pos = base - 1;
     while (new_pos > start &&
-	   !(FLAG (info[new_pos].indic_category()) & (FLAG (OT_M) | FLAG (OT_H))))
+	   !(FLAG (info[new_pos].indic_category()) & (FLAG (OT_M) | FLAG (OT_H) | FLAG (OT_Coeng))))
       new_pos--;
     /* If we found no Halant we are done.  Otherwise only proceed if the Halant does
      * not belong to the Matra itself! */
-    if (info[new_pos].indic_category() == OT_H &&
+    if (is_halant_or_coeng (info[new_pos]) &&
 	info[new_pos].indic_position() != POS_PRE_M) {
       /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
       if (new_pos + 1 < end && is_joiner (info[new_pos + 1]))
@@ -745,10 +751,10 @@ final_reordering_syllable (hb_buffer_t *buffer,
      */
     {
       new_reph_pos = start + 1;
-      while (new_reph_pos < base && info[new_reph_pos].indic_category() != OT_H)
+      while (new_reph_pos < base && !is_halant_or_coeng (info[new_reph_pos]))
 	new_reph_pos++;
 
-      if (new_reph_pos < base && info[new_reph_pos].indic_category() == OT_H) {
+      if (new_reph_pos < base && is_halant_or_coeng (info[new_reph_pos])) {
 	/* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
 	if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1]))
 	  new_reph_pos++;
@@ -814,7 +820,7 @@ final_reordering_syllable (hb_buffer_t *buffer,
        * TEST: U+0930,U+094D,U+0915,U+094B,U+094D
        */
       if (!indic_options ().uniscribe_bug_compatible &&
-	  unlikely (info[new_reph_pos].indic_category() == OT_H)) {
+	  unlikely (is_halant_or_coeng (info[new_reph_pos]))) {
 	for (unsigned int i = base + 1; i < new_reph_pos; i++)
 	  if (info[i].indic_category() == OT_M) {
 	    /* Ok, got it. */
@@ -862,10 +868,10 @@ final_reordering_syllable (hb_buffer_t *buffer,
 
 	  unsigned int new_pos = base;
 	  while (new_pos > start + 1 &&
-		 !(FLAG (info[new_pos - 1].indic_category()) & (FLAG (OT_M) | FLAG (OT_H))))
+		 !(FLAG (info[new_pos - 1].indic_category()) & (FLAG (OT_M) | FLAG (OT_H) | FLAG (OT_Coeng))))
 	    new_pos--;
 
-	  if (new_pos > start && info[new_pos - 1].indic_category() == OT_H)
+	  if (new_pos > start && is_halant_or_coeng (info[new_pos - 1]))
 	    /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
 	    if (new_pos < end && is_joiner (info[new_pos]))
 	      new_pos++;
@@ -910,7 +916,7 @@ final_reordering_syllable (hb_buffer_t *buffer,
      * Uniscribe does. */
     unsigned int cluster_start = start;
     for (unsigned int i = start + 1; i < start_of_last_cluster; i++)
-      if (info[i - 1].indic_category() == OT_H && info[i].indic_category() == OT_ZWNJ) {
+      if (is_halant_or_coeng (info[i - 1]) && info[i].indic_category() == OT_ZWNJ) {
         i++;
 	buffer->merge_clusters (cluster_start, i);
 	cluster_start = i;
commit 74ccc6a1322f8c48c5f2a05f04821783c4b87a14
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Jul 17 11:16:19 2012 -0400

    [Indic] Move Halant with after-base consonants
    
    Normally, we attach the Halant to the previous character and move it
    with it.  For after-base consonants however, the Halant "belongs" to the
    consonant after, so attach it so.
    
    This fixes Bengali sequences involving post-base consonant Ya, which
    should ligate with the Halant to form Ya Phala, but previously a
    reordered matras was blocking the ligation.

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 48c7a15..d5036b3 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -468,6 +468,17 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
 	    }
       }
   }
+  /* Re-attach ZWJ, ZWNJ, and halant to next char, for after-base consonants. */
+  {
+    unsigned int last_halant = end;
+    for (unsigned int i = base + 1; i < end; i++)
+      if (info[i].indic_category() == OT_H)
+        last_halant = i;
+      else if (is_consonant (info[i])) {
+	for (unsigned int j = last_halant; j < i; j++)
+	  info[j].indic_position() = info[i].indic_position();
+      }
+  }
 
   /* We do bubble-sort, skip malicious clusters attempts */
   if (end - start < 64)
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt
index 1eb2611..843ee4f 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt
@@ -48,3 +48,4 @@
 ৱ্ভ
 অৗ
 ন্ত্র
+ত্যু
commit d5c4edcdd6df32f2f23aca44f14838b4baab4d7a
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Jul 17 10:40:59 2012 -0400

    [Indic] Apply presentation-forms features all at once
    
    Seems like this is what Uniscribe is doing, and does not break any fonts
    we tested (with Devanagari, Malayalam, Khmer, and Bengali), while fixing
    some Ra Phala sequences for Bengali with Vrinda.  Fixes another 2% of
    Bengali failures (a couple more to go).

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 19bb75c..48c7a15 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -135,6 +135,7 @@ struct feature_list_t {
   hb_bool_t is_global;
 };
 
+/* These features are applied one at a time, given the order in this table. */
 static const feature_list_t
 indic_basic_features[] =
 {
@@ -166,6 +167,7 @@ enum {
   VATU
 };
 
+/* These features are applied all at once. */
 static const feature_list_t
 indic_other_features[] =
 {
@@ -211,10 +213,8 @@ _hb_ot_shape_complex_collect_features_indic (hb_ot_map_builder_t *map,
 
   map->add_gsub_pause (final_reordering, NULL);
 
-  for (unsigned int i = 0; i < ARRAY_LENGTH (indic_other_features); i++) {
+  for (unsigned int i = 0; i < ARRAY_LENGTH (indic_other_features); i++)
     map->add_bool_feature (indic_other_features[i].tag, indic_other_features[i].is_global);
-    map->add_gsub_pause (NULL, NULL);
-  }
 }
 
 void
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt
index c208625..1eb2611 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt
@@ -47,3 +47,4 @@
 ৰ্ভ
 ৱ্ভ
 অৗ
+ন্ত্র



More information about the HarfBuzz mailing list