[HarfBuzz] harfbuzz: Branch 'master' - 4 commits

Behdad Esfahbod behdad at kemper.freedesktop.org
Fri Aug 31 16:21:15 PDT 2012


 src/hb-buffer-private.hh                 |   17 ++++-
 src/hb-buffer.cc                         |   25 ++-----
 src/hb-ot-shape-complex-indic-machine.rl |   23 +++---
 src/hb-ot-shape-complex-indic-private.hh |    2 
 src/hb-ot-shape-complex-indic.cc         |  105 +++++++++++++++++++++++++++++--
 src/hb-ot-shape-normalize-private.hh     |    1 
 src/hb-ot-shape-normalize.cc             |   11 +--
 src/hb-ot-shape.cc                       |   12 ++-
 8 files changed, 153 insertions(+), 43 deletions(-)

New commits:
commit b85800f9de8976a7418ef9df467d3080c6ab0199
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Aug 31 18:12:01 2012 -0400

    [Indic] Implement dotted-circle insertion for broken clusters
    
    No panic, we reeally insert dotted circle when it's absolutely broken.
    
    Fixes most of the dotted-circle cases against Uniscribe. (for Devanagari
    fixes 80% of them, for Khmer 70%; the rest look like Uniscribe being
    really bogus...)
    
    I had to make a decision.  Apparently Uniscribe adds one dotted circle
    to each broken character.  I tried that, but that goes wrong easily with
    split matras.  So I made it add only one dotted circle to an entire
    broken syllable tail.  As in: "if there was a dotted circle here, this
    would have formed a correct cluster."  That works better for split
    stuff, and I like it more.

diff --git a/src/hb-buffer-private.hh b/src/hb-buffer-private.hh
index 9fa1e4b..456e1b8 100644
--- a/src/hb-buffer-private.hh
+++ b/src/hb-buffer-private.hh
@@ -152,6 +152,7 @@ struct hb_buffer_t {
   HB_INTERNAL void replace_glyph (hb_codepoint_t glyph_index);
   /* Makes a copy of the glyph at idx to output and replace glyph_index */
   HB_INTERNAL void output_glyph (hb_codepoint_t glyph_index);
+  HB_INTERNAL void output_info (hb_glyph_info_t &glyph_info);
   /* Copies glyph at idx to output but doesn't advance idx */
   HB_INTERNAL void copy_glyph (void);
   /* Copies glyph at idx to output and advance idx.
diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc
index 3f039d0..f25a8bc 100644
--- a/src/hb-buffer.cc
+++ b/src/hb-buffer.cc
@@ -268,6 +268,16 @@ hb_buffer_t::output_glyph (hb_codepoint_t glyph_index)
 }
 
 void
+hb_buffer_t::output_info (hb_glyph_info_t &glyph_info)
+{
+  if (unlikely (!make_room_for (0, 1))) return;
+
+  out_info[out_len] = glyph_info;
+
+  out_len++;
+}
+
+void
 hb_buffer_t::copy_glyph (void)
 {
   if (unlikely (!make_room_for (0, 1))) return;
diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 6c76d24..c9309e9 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -75,12 +75,14 @@ halant_or_matra_group = (final_halant_group | matra_group{0,4});
 consonant_syllable =	Repha? (cn.halant_group){0,4} cn A? halant_or_matra_group? syllable_tail;
 vowel_syllable =	reph? V.n? (ZWJ | (halant_group.cn){0,4} halant_or_matra_group? syllable_tail);
 standalone_cluster =	reph? place_holder.n? (halant_group.cn){0,4} halant_or_matra_group? syllable_tail;
+broken_cluster =	n? (halant_group.cn){0,4} halant_or_matra_group syllable_tail;
 other =			any;
 
 main := |*
 	consonant_syllable	=> { found_syllable (consonant_syllable); };
 	vowel_syllable		=> { found_syllable (vowel_syllable); };
 	standalone_cluster	=> { found_syllable (standalone_cluster); };
+	broken_cluster		=> { found_syllable (broken_cluster); *had_broken_cluster = true; };
 	other			=> { found_syllable (non_indic_cluster); };
 *|;
 
@@ -98,7 +100,7 @@ main := |*
   } HB_STMT_END
 
 static void
-find_syllables (const hb_ot_shape_plan_t *plan, hb_buffer_t *buffer)
+find_syllables (const hb_ot_shape_plan_t *plan, hb_buffer_t *buffer, bool *had_broken_cluster)
 {
   unsigned int p, pe, eof, ts, te, act;
   int cs;
diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh
index 79daba5..91b0be5 100644
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@@ -300,7 +300,7 @@ is_halant_or_coeng (const hb_glyph_info_t &info)
 }
 
 static inline void
-set_indic_properties (hb_glyph_info_t   &info)
+set_indic_properties (hb_glyph_info_t &info)
 {
   hb_codepoint_t u = info.codepoint;
   unsigned int type = get_indic_categories (u);
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 15b00b0..2417ab7 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -770,6 +770,15 @@ initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan,
 }
 
 static void
+initial_reordering_broken_cluster (const hb_ot_shape_plan_t *plan,
+				   hb_buffer_t *buffer,
+				   unsigned int start, unsigned int end)
+{
+  /* We already inserted dotted-circles, so just call the standalone_cluster. */
+  initial_reordering_standalone_cluster (plan, buffer, start, end);
+}
+
+static void
 initial_reordering_non_indic_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED,
 				      hb_buffer_t *buffer HB_UNUSED,
 				      unsigned int start HB_UNUSED, unsigned int end HB_UNUSED)
@@ -799,23 +808,63 @@ initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
   case consonant_syllable:	initial_reordering_consonant_syllable (plan, buffer, start, end); return;
   case vowel_syllable:		initial_reordering_vowel_syllable     (plan, buffer, start, end); return;
   case standalone_cluster:	initial_reordering_standalone_cluster (plan, buffer, start, end); return;
-  case broken_cluster:		initial_reordering_non_indic_cluster  (plan, buffer, start, end); return;
+  case broken_cluster:		initial_reordering_broken_cluster     (plan, buffer, start, end); return;
   case non_indic_cluster:	initial_reordering_non_indic_cluster  (plan, buffer, start, end); return;
   }
 }
 
 static void
+insert_dotted_circles (const hb_ot_shape_plan_t *plan,
+		       hb_font_t *font,
+		       hb_buffer_t *buffer)
+{
+  hb_codepoint_t dottedcircle_glyph;
+  if (!font->get_glyph (0x25CC, 0, &dottedcircle_glyph))
+    return;
+
+  hb_glyph_info_t dottedcircle;
+  dottedcircle.codepoint = 0x25CC;
+  set_indic_properties (dottedcircle);
+  dottedcircle.codepoint = dottedcircle_glyph;
+
+  buffer->clear_output ();
+
+  buffer->idx = 0;
+  unsigned int last_syllable = 0;
+  while (buffer->idx < buffer->len)
+  {
+    unsigned int syllable = buffer->cur().syllable();
+    syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
+    if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
+    {
+      hb_glyph_info_t info = dottedcircle;
+      info.cluster = buffer->cur().cluster;
+      info.mask = buffer->cur().mask;
+      info.syllable() = buffer->cur().syllable();
+      buffer->output_info (info);
+      last_syllable = syllable;
+    }
+    buffer->next_glyph ();
+  }
+
+  buffer->swap_buffers ();
+}
+
+static void
 initial_reordering (const hb_ot_shape_plan_t *plan,
 		    hb_font_t *font,
 		    hb_buffer_t *buffer)
 {
-  unsigned int count = buffer->len;
-  if (unlikely (!count)) return;
-
   update_consonant_positions (plan, font, buffer);
-  find_syllables (plan, buffer);
+
+  bool had_broken_clusters = false;
+  find_syllables (plan, buffer, &had_broken_clusters);
+  if (unlikely (had_broken_clusters))
+    insert_dotted_circles (plan, font, buffer);
 
   hb_glyph_info_t *info = buffer->info;
+  unsigned int count = buffer->len;
+  if (unlikely (!count)) return;
   unsigned int last = 0;
   unsigned int last_syllable = info[0].syllable();
   for (unsigned int i = 1; i < count; i++)
@@ -1170,6 +1219,12 @@ final_reordering (const hb_ot_shape_plan_t *plan,
 }
 
 
+static hb_ot_shape_normalization_mode_t
+normalization_preference_indic (const hb_ot_shape_plan_t *plan)
+{
+  return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT;
+}
+
 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
 {
   "indic",
@@ -1178,7 +1233,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
   data_create_indic,
   data_destroy_indic,
   NULL, /* preprocess_text */
-  NULL, /* normalization_preference */
+  normalization_preference_indic,
   setup_masks_indic,
   false, /* zero_width_attached_marks */
 };
diff --git a/src/hb-ot-shape-normalize-private.hh b/src/hb-ot-shape-normalize-private.hh
index 462b87d..c5fcbea 100644
--- a/src/hb-ot-shape-normalize-private.hh
+++ b/src/hb-ot-shape-normalize-private.hh
@@ -38,6 +38,7 @@
 enum hb_ot_shape_normalization_mode_t {
   HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED,
   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS, /* never composes base-to-base */
+  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, /* always fully decomposes and then recompose back */
   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL, /* including base-to-base composition */
 
   HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS
diff --git a/src/hb-ot-shape-normalize.cc b/src/hb-ot-shape-normalize.cc
index 93dd00c..f4d8330 100644
--- a/src/hb-ot-shape-normalize.cc
+++ b/src/hb-ot-shape-normalize.cc
@@ -414,10 +414,10 @@ decompose_multi_char_cluster (hb_font_t *font, hb_buffer_t *buffer, unsigned int
 }
 
 static inline bool
-decompose_cluster (hb_font_t *font, hb_buffer_t *buffer, bool recompose, unsigned int end)
+decompose_cluster (hb_font_t *font, hb_buffer_t *buffer, bool short_circuit, unsigned int end)
 {
   if (likely (buffer->idx + 1 == end))
-    return decompose_current_character (font, buffer, recompose);
+    return decompose_current_character (font, buffer, short_circuit);
   else
     return decompose_multi_char_cluster (font, buffer, end);
 }
@@ -437,7 +437,8 @@ void
 _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
 			hb_ot_shape_normalization_mode_t mode)
 {
-  bool recompose = mode != HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED;
+  bool short_circuit = mode != HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED &&
+		       mode != HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT;
   bool can_use_recompose = false;
   unsigned int count;
 
@@ -459,7 +460,7 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
       if (buffer->cur().cluster != buffer->info[end].cluster)
         break;
 
-    can_use_recompose = decompose_cluster (font, buffer, recompose, end) || can_use_recompose;
+    can_use_recompose = decompose_cluster (font, buffer, short_circuit, end) || can_use_recompose;
   }
   buffer->swap_buffers ();
 
@@ -495,7 +496,7 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
   }
 
 
-  if (!recompose)
+  if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED)
     return;
 
   /* Third round, recompose */
commit 327d14ef188396006d54af976506ab6f8bb2869a
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Aug 31 16:49:34 2012 -0400

    [Indic] Start adding dotted-circle instrastructure

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index b6a372e..6c76d24 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -78,24 +78,23 @@ standalone_cluster =	reph? place_holder.n? (halant_group.cn){0,4} halant_or_matr
 other =			any;
 
 main := |*
-	consonant_syllable	=> { process_syllable (consonant_syllable); };
-	vowel_syllable		=> { process_syllable (vowel_syllable); };
-	standalone_cluster	=> { process_syllable (standalone_cluster); };
-	other			=> { process_syllable (non_indic); };
+	consonant_syllable	=> { found_syllable (consonant_syllable); };
+	vowel_syllable		=> { found_syllable (vowel_syllable); };
+	standalone_cluster	=> { found_syllable (standalone_cluster); };
+	other			=> { found_syllable (non_indic_cluster); };
 *|;
 
 
 }%%
 
-#define process_syllable(func) \
+#define found_syllable(syllable_type) \
   HB_STMT_START { \
-    if (0) fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #func); \
+    if (0) fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #syllable_type); \
     for (unsigned int i = last; i < p+1; i++) \
-      info[i].syllable() = syllable_serial; \
-    PASTE (initial_reordering_, func) (plan, buffer, last, p+1); \
+      info[i].syllable() = (syllable_serial << 4) | syllable_type; \
     last = p+1; \
     syllable_serial++; \
-    if (unlikely (!syllable_serial)) syllable_serial++; \
+    if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
   } HB_STMT_END
 
 static void
@@ -113,7 +112,7 @@ find_syllables (const hb_ot_shape_plan_t *plan, hb_buffer_t *buffer)
   pe = eof = buffer->len;
 
   unsigned int last = 0;
-  uint8_t syllable_serial = 1;
+  unsigned int syllable_serial = 1;
   %%{
     write exec;
   }%%
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index e39629f..15b00b0 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -770,23 +770,61 @@ initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan,
 }
 
 static void
-initial_reordering_non_indic (const hb_ot_shape_plan_t *plan HB_UNUSED,
-			      hb_buffer_t *buffer HB_UNUSED,
-			      unsigned int start HB_UNUSED, unsigned int end HB_UNUSED)
+initial_reordering_non_indic_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED,
+				      hb_buffer_t *buffer HB_UNUSED,
+				      unsigned int start HB_UNUSED, unsigned int end HB_UNUSED)
 {
   /* Nothing to do right now.  If we ever switch to using the output
    * buffer in the reordering process, we'd need to next_glyph() here. */
 }
 
+
+enum syllable_type_t {
+  consonant_syllable,
+  vowel_syllable,
+  standalone_cluster,
+  broken_cluster,
+  non_indic_cluster,
+};
+
 #include "hb-ot-shape-complex-indic-machine.hh"
 
 static void
+initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
+			     hb_buffer_t *buffer,
+			     unsigned int start, unsigned int end)
+{
+  syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
+  switch (syllable_type) {
+  case consonant_syllable:	initial_reordering_consonant_syllable (plan, buffer, start, end); return;
+  case vowel_syllable:		initial_reordering_vowel_syllable     (plan, buffer, start, end); return;
+  case standalone_cluster:	initial_reordering_standalone_cluster (plan, buffer, start, end); return;
+  case broken_cluster:		initial_reordering_non_indic_cluster  (plan, buffer, start, end); return;
+  case non_indic_cluster:	initial_reordering_non_indic_cluster  (plan, buffer, start, end); return;
+  }
+}
+
+static void
 initial_reordering (const hb_ot_shape_plan_t *plan,
 		    hb_font_t *font,
 		    hb_buffer_t *buffer)
 {
+  unsigned int count = buffer->len;
+  if (unlikely (!count)) return;
+
   update_consonant_positions (plan, font, buffer);
   find_syllables (plan, buffer);
+
+  hb_glyph_info_t *info = buffer->info;
+  unsigned int last = 0;
+  unsigned int last_syllable = info[0].syllable();
+  for (unsigned int i = 1; i < count; i++)
+    if (last_syllable != info[i].syllable()) {
+      initial_reordering_syllable (plan, buffer, last, i);
+      last = i;
+      last_syllable = info[last].syllable();
+    }
+  initial_reordering_syllable (plan, buffer, last, count);
 }
 
 static void
@@ -1110,7 +1148,7 @@ final_reordering (const hb_ot_shape_plan_t *plan,
 		  hb_buffer_t *buffer)
 {
   unsigned int count = buffer->len;
-  if (!count) return;
+  if (unlikely (!count)) return;
 
   hb_glyph_info_t *info = buffer->info;
   unsigned int last = 0;
commit 1be368e96fb7de8c77bf992874e0d5bd6b272ebe
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Aug 31 16:29:17 2012 -0400

    Minor

diff --git a/src/hb-buffer-private.hh b/src/hb-buffer-private.hh
index 91e7560..9fa1e4b 100644
--- a/src/hb-buffer-private.hh
+++ b/src/hb-buffer-private.hh
@@ -156,7 +156,21 @@ struct hb_buffer_t {
   HB_INTERNAL void copy_glyph (void);
   /* Copies glyph at idx to output and advance idx.
    * If there's no output, just advance idx. */
-  HB_INTERNAL void next_glyph (void);
+  inline void
+  next_glyph (void)
+  {
+    if (have_output)
+    {
+      if (unlikely (out_info != info || out_len != idx)) {
+	if (unlikely (!make_room_for (1, 1))) return;
+	out_info[out_len] = info[idx];
+      }
+      out_len++;
+    }
+
+    idx++;
+  }
+
   /* Advance idx without copying to output. */
   inline void skip_glyph (void) { idx++; }
 
diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc
index e9bb15e..3f039d0 100644
--- a/src/hb-buffer.cc
+++ b/src/hb-buffer.cc
@@ -290,21 +290,6 @@ hb_buffer_t::replace_glyph (hb_codepoint_t glyph_index)
   out_len++;
 }
 
-void
-hb_buffer_t::next_glyph (void)
-{
-  if (have_output)
-  {
-    if (unlikely (out_info != info || out_len != idx)) {
-      if (unlikely (!make_room_for (1, 1))) return;
-      out_info[out_len] = info[idx];
-    }
-    out_len++;
-  }
-
-  idx++;
-}
-
 
 void
 hb_buffer_t::set_masks (hb_mask_t    value,
commit 784f29d061a2939562eca0c4943feb01174aee00
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Aug 31 14:06:26 2012 -0400

    Minor

diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc
index 473bc17..929406e 100644
--- a/src/hb-ot-shape.cc
+++ b/src/hb-ot-shape.cc
@@ -487,14 +487,18 @@ hb_ot_position (hb_ot_shape_context_t *c)
 static void
 hb_ot_hide_zerowidth (hb_ot_shape_context_t *c)
 {
-  hb_codepoint_t space;
-  if (!c->font->get_glyph (' ', 0, &space))
-    return; /* No point! */
+  hb_codepoint_t space = 0;
 
   unsigned int count = c->buffer->len;
   for (unsigned int i = 0; i < count; i++)
     if (unlikely (!is_a_ligature (c->buffer->info[i]) &&
-		  _hb_glyph_info_is_zero_width (&c->buffer->info[i]))) {
+		  _hb_glyph_info_is_zero_width (&c->buffer->info[i])))
+    {
+      if (!space) {
+        /* We assume that the space glyph is not gid0. */
+        if (unlikely (!c->font->get_glyph (' ', 0, &space)) || !space)
+	return; /* No point! */
+      }
       c->buffer->info[i].codepoint = space;
       c->buffer->pos[i].x_advance = 0;
       c->buffer->pos[i].y_advance = 0;



More information about the HarfBuzz mailing list