[HarfBuzz] harfbuzz-ng: Branch 'master' - 19 commits
Behdad Esfahbod
behdad at kemper.freedesktop.org
Thu Jul 19 06:52:49 PDT 2012
src/hb-buffer-private.hh | 2
src/hb-ot-shape-complex-indic-machine.rl | 21 +-
src/hb-ot-shape-complex-indic-private.hh | 6
src/hb-ot-shape-complex-indic.cc | 43 +++--
src/hb-ot-shape-complex-misc.cc | 82 ++++------
src/hb-ot-shape-complex-private.hh | 7
src/hb-unicode.cc | 8
test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt | 5
test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST | 1
test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt | 3
test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/reph.txt | 3
test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt | 1
12 files changed, 107 insertions(+), 75 deletions(-)
New commits:
commit 422ecd2d3c198a36d07d409341cb82ea57c7ad6b
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 23:25:58 2012 -0400
[Indic] Accept a forced Rakar sequence at the end of syllable
In Sinhala, Rakar is formed by Al-Lakuna,ZWJ,Ra. If you put that at the
end of a Consonant,Matra syllable, you get a dotted-circle from
Uniscribe. Apparently adding a ZWJ before the Al-Lakuna "fixes" that.
And people have been encoding that sequence... So, allow a forced
"ZWJ,Virama,ZWJ,Ra" sequence at the of syllables.
Fixes some 100 or more of Sinhala failures. Now at 622 only (0.23%).
diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 7fe9a68..4501773 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -63,7 +63,8 @@ h = H | Coeng; # is_halant_or_coeng
reph = (Ra H | Repha); # possible reph
cn = c.n?;
-matra_group = z*.M.N?.H?;
+forced_rakar = ZWJ H ZWJ Ra;
+matra_group = z*.M.N?.(H | forced_rakar)?;
syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?;
place_holder = NBSP | DOTTEDCIRCLE;
halant_group = (z?.h.z?);
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
index a8491bf..40b5dc5 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
@@ -5,3 +5,5 @@
à¶à·à·
à¶à·
à·à·à¶§à·âරà·
+à¶âà·âරම
+à·à·âà·âර
commit 6fc1732003d71cf90d37247482772c3da884687f
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 17:49:19 2012 -0400
[Indic] Allow joiners on both sides of Halant at the same time
The sequence <ZWJ,Al-Lakuna,ZWJ> is used in Sinhala to explicitly ask
for Rakar. Fixes two-thousand Sinhala tests. Not many left.
diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index a266a12..7fe9a68 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -66,7 +66,7 @@ cn = c.n?;
matra_group = z*.M.N?.H?;
syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?;
place_holder = NBSP | DOTTEDCIRCLE;
-halant_group = (h.z?|z.h);
+halant_group = (z?.h.z?);
halant_or_matra_group = (halant_group | matra_group*);
commit 10cdc94eee2225f14c198c015256a5a0063eecad
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 17:42:34 2012 -0400
[Indic] In final reordering, find base, even if it disappeared
POS_BASE can disappear if base ligated backward. Define base as last
with position not after base.
Fixes a few hundred of Sinhala failures with Iskoola Pota.
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index c0d56eb..2aaac54 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -705,11 +705,8 @@ final_reordering_syllable (hb_buffer_t *buffer,
/* Find base again */
unsigned int base = end;
- for (unsigned int i = start; i < end; i++)
- if (info[i].indic_position() == POS_BASE_C) {
- base = i;
- break;
- }
+ while (start < base && info[base - 1].indic_position() >= POS_BASE_C)
+ base--;
unsigned int start_of_last_cluster = base;
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
index 0d772a7..a8491bf 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
@@ -4,3 +4,4 @@
à¶à·
à¶à·à·
à¶à·
+à·à·à¶§à·âරà·
commit 9c4d24a3a677a58ec59c7fb0f8b70b8aad30a032
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 17:29:10 2012 -0400
[Indic] Minor
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index fa88d2e..c0d56eb 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -797,6 +797,7 @@ final_reordering_syllable (hb_buffer_t *buffer,
default:
case HB_SCRIPT_DEVANAGARI:
case HB_SCRIPT_GUJARATI:
+ case HB_SCRIPT_SINHALA:
reph_pos = REPH_BEFORE_POSTSCRIPT;
break;
commit 3285e107c9a83aeb552e67f9460680ff6d167d88
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 17:22:14 2012 -0400
[Indic] Implement Sinhala "Al Lakuna" Reph behavior
In Sinhala, Reph is formed only explicitly, by the presence of a ZWJ.
diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh
index e97fca9..ac11732 100644
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@@ -269,6 +269,8 @@ static const hb_codepoint_t ra_chars[] = {
0x0CB0, /* Kannada */
0x0D30, /* Malayalam */ /* No Reph, Logical Repha */
+ 0x0DBB, /* Sinhala */ /* Reph formed only with ZWJ */
+
0x179A, /* Khmer */ /* No Reph, Visual Repha */
};
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 9b5b499..fa88d2e 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -356,9 +356,14 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
start + 3 <= end &&
info[start].indic_category() == OT_Ra &&
info[start + 1].indic_category() == OT_H &&
- !is_joiner (info[start + 2]))
+ (unlikely (buffer->props.script == HB_SCRIPT_SINHALA) ?
+ info[start + 2].indic_category() == OT_ZWJ /* In Sinhala, form Reph only if ZWJ is present */:
+ !is_joiner (info[start + 2] /* In other scripts, any joiner blocks Reph formation */ )
+ ))
{
limit += 2;
+ while (limit < end && is_joiner (info[limit]))
+ limit++;
base = start;
has_reph = true;
};
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST
index 29cfb2f..3c2a4fb 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST
@@ -1 +1,2 @@
misc.txt
+reph.txt
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/reph.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/reph.txt
new file mode 100644
index 0000000..f5f2f53
--- /dev/null
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/reph.txt
@@ -0,0 +1,3 @@
+රà·à¶°
+රà·âධ
+රà·âධ
commit 91cade755534c42bb826a6aefcbca8a543d94387
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 16:50:41 2012 -0400
[Indic/Unicode] Decompose Sinhala split matras the way Uniscribe likes
Makes no visual difference.
Fixes most of the failures. Down from 15% to 1.3%!
diff --git a/src/hb-unicode.cc b/src/hb-unicode.cc
index 7a5aa6c..cf46aa9 100644
--- a/src/hb-unicode.cc
+++ b/src/hb-unicode.cc
@@ -270,6 +270,14 @@ hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
/* XXX FIXME, move these to complex shapers and propagage to normalizer.*/
switch (ab) {
case 0x0AC9 : *a = 0x0AC5; *b= 0x0ABE; return true;
+
+ /* These ones have Unicode decompositions, but we do it
+ * this way to be close to what Uniscribe does. */
+ case 0x0DDA : *a = 0x0DD9; *b= 0x0DDA; return true;
+ case 0x0DDC : *a = 0x0DD9; *b= 0x0DDC; return true;
+ case 0x0DDD : *a = 0x0DD9; *b= 0x0DDD; return true;
+ case 0x0DDE : *a = 0x0DD9; *b= 0x0DDE; return true;
+
case 0x0F77 : *a = 0x0FB2; *b= 0x0F81; return true;
case 0x0F79 : *a = 0x0FB3; *b= 0x0F81; return true;
case 0x17BE : *a = 0x17C1; *b= 0x17BE; return true;
commit d8942dcbb4e3249a2d78a6455c119294ed4390bc
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 16:34:10 2012 -0400
Apply Tibetan (global) features.
Fixes all Tibetan failures. All 180k of them!
Merges back Hangul into the default shaper.
diff --git a/src/hb-ot-shape-complex-misc.cc b/src/hb-ot-shape-complex-misc.cc
index 3cea734..7a11876 100644
--- a/src/hb-ot-shape-complex-misc.cc
+++ b/src/hb-ot-shape-complex-misc.cc
@@ -29,73 +29,65 @@
/* TODO Add kana, and other small shapers here */
-/* When adding trivial shapers, eg. kana, hangul, etc, we can either
- * add a full shaper enum value for them, or switch on the script in
- * the default complex shaper. The former is faster, so I think that's
- * what we would do, and hence the default complex shaper shall remain
- * empty.
- */
-
-void
-_hb_ot_shape_complex_collect_features_default (hb_ot_map_builder_t *map HB_UNUSED,
- const hb_segment_properties_t *props HB_UNUSED)
-{
-}
-
-void
-_hb_ot_shape_complex_override_features_default (hb_ot_map_builder_t *map HB_UNUSED,
- const hb_segment_properties_t *props HB_UNUSED)
-{
-}
-hb_ot_shape_normalization_mode_t
-_hb_ot_shape_complex_normalization_preference_default (void)
-{
- return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
-}
-
-void
-_hb_ot_shape_complex_setup_masks_default (hb_ot_map_t *map HB_UNUSED,
- hb_buffer_t *buffer HB_UNUSED,
- hb_font_t *font HB_UNUSED)
-{
-}
-
-
-
-/* Hangul shaper */
+/* The default shaper *only* adds additional per-script features.*/
static const hb_tag_t hangul_features[] =
{
HB_TAG('l','j','m','o'),
HB_TAG('v','j','m','o'),
HB_TAG('t','j','m','o'),
+ HB_TAG_NONE
+};
+
+static const hb_tag_t tibetan_features[] =
+{
+ HB_TAG('a','b','v','s'),
+ HB_TAG('b','l','w','s'),
+ HB_TAG('a','b','v','m'),
+ HB_TAG('b','l','w','m'),
+ HB_TAG_NONE
};
void
-_hb_ot_shape_complex_collect_features_hangul (hb_ot_map_builder_t *map,
- const hb_segment_properties_t *props HB_UNUSED)
+_hb_ot_shape_complex_collect_features_default (hb_ot_map_builder_t *map HB_UNUSED,
+ const hb_segment_properties_t *props)
{
- for (unsigned int i = 0; i < ARRAY_LENGTH (hangul_features); i++)
- map->add_bool_feature (hangul_features[i]);
+ const hb_tag_t *script_features = NULL;
+
+ switch ((hb_tag_t) props->script)
+ {
+ /* Unicode-1.1 additions */
+ case HB_SCRIPT_HANGUL:
+ script_features = hangul_features;
+ break;
+
+ /* Unicode-2.0 additions */
+ case HB_SCRIPT_TIBETAN:
+ script_features = tibetan_features;
+ break;
+ }
+
+ for (; script_features && *script_features; script_features++)
+ map->add_bool_feature (*script_features);
}
void
-_hb_ot_shape_complex_override_features_hangul (hb_ot_map_builder_t *map,
- const hb_segment_properties_t *props HB_UNUSED)
+_hb_ot_shape_complex_override_features_default (hb_ot_map_builder_t *map HB_UNUSED,
+ const hb_segment_properties_t *props HB_UNUSED)
{
}
hb_ot_shape_normalization_mode_t
-_hb_ot_shape_complex_normalization_preference_hangul (void)
+_hb_ot_shape_complex_normalization_preference_default (void)
{
- return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL;
+ return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
}
void
-_hb_ot_shape_complex_setup_masks_hangul (hb_ot_map_t *map HB_UNUSED,
- hb_buffer_t *buffer HB_UNUSED,
- hb_font_t *font HB_UNUSED)
+_hb_ot_shape_complex_setup_masks_default (hb_ot_map_t *map HB_UNUSED,
+ hb_buffer_t *buffer HB_UNUSED,
+ hb_font_t *font HB_UNUSED)
{
}
diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh
index 7f74e34..867a3c2 100644
--- a/src/hb-ot-shape-complex-private.hh
+++ b/src/hb-ot-shape-complex-private.hh
@@ -52,7 +52,6 @@
#define HB_COMPLEX_SHAPERS_IMPLEMENT_SHAPERS \
HB_COMPLEX_SHAPER_IMPLEMENT (default) /* should be first */ \
HB_COMPLEX_SHAPER_IMPLEMENT (arabic) \
- HB_COMPLEX_SHAPER_IMPLEMENT (hangul) \
HB_COMPLEX_SHAPER_IMPLEMENT (indic) \
HB_COMPLEX_SHAPER_IMPLEMENT (thai) \
/* ^--- Add new shapers here */
@@ -89,12 +88,6 @@ hb_ot_shape_complex_categorize (const hb_segment_properties_t *props)
/* Unicode-1.1 additions */
- case HB_SCRIPT_HANGUL:
-
- return hb_ot_complex_shaper_hangul;
-
-
- /* Unicode-1.1 additions */
case HB_SCRIPT_THAI:
case HB_SCRIPT_LAO:
commit 552d19b7a11f7dff888587fce4d56d9f8e47e819
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 16:00:49 2012 -0400
[Indic] Treat Register Shifters like Nukta
Really this time.
Fixes another 18 Khmer tests.
diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 0943b4d..a266a12 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -57,7 +57,7 @@ Repha = 15;
Ra = 16;
c = (C | Ra); # is_consonant
-n = (N.N? | ZWNJ?.RS); # is_consonant_modifier
+n = ((ZWNJ?.RS)? (N.N?)?); # is_consonant_modifier
z = ZWJ|ZWNJ; # is_joiner
h = H | Coeng; # is_halant_or_coeng
reph = (Ra H | Repha); # possible reph
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 3f1faec..9b5b499 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -299,8 +299,6 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED,
info.indic_position() = consonant_position (info.codepoint);
if (is_ra (info.codepoint))
info.indic_category() = OT_Ra;
- } else if (info.indic_category() == OT_RS) {
- info.indic_position() = POS_ABOVE_M;
} else if (info.indic_category() == OT_SM ||
info.indic_category() == OT_VD) {
info.indic_position() = POS_SMVD;
diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
index c9e5443..46a8073 100644
--- a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
@@ -14,3 +14,4 @@
ááááá
áááááá
áâááááá
+ááá
commit e8cd81f76d159f3ecf808952dab24bc07782497a
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 16:00:20 2012 -0400
[Indic] Minor
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index abe74b2..3f1faec 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -473,7 +473,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
/* Reorder characters */
for (unsigned int i = start; i < base; i++)
- info[i].indic_position() = MIN (POS_PRE_C, info[i].indic_position());
+ info[i].indic_position() = MIN ((unsigned int) POS_PRE_C, info[i].indic_position());
if (base < end)
info[base].indic_position() = POS_BASE_C;
commit 69f26bf39c824d6bf5b1c0d410380cc5462ad5ca
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 15:45:43 2012 -0400
[Indic] Fix Matra reordering when base is at end of syllable
For example: U+915,U+200c,U+93f
Fixes last Tamil failure!
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 1c80ea4..abe74b2 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -473,7 +473,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
/* Reorder characters */
for (unsigned int i = start; i < base; i++)
- info[i].indic_position() = POS_PRE_C;
+ info[i].indic_position() = MIN (POS_PRE_C, info[i].indic_position());
if (base < end)
info[base].indic_position() = POS_BASE_C;
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
index 23afbe0..83cac77 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
@@ -29,3 +29,4 @@
रà¥à¤
à¥â
रà¥à¤
à¥â
रà¥à¤à¥à¤°à¥
+à¤âि
commit d16ccc4ae7aa8be460881042413fa2637929fede
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 15:43:55 2012 -0400
Leave one extra item at the end of buffer allocation
Just in case, for the times we do out-of-bounds access.
jk
diff --git a/src/hb-buffer-private.hh b/src/hb-buffer-private.hh
index 9bd80e0..e06a7fa 100644
--- a/src/hb-buffer-private.hh
+++ b/src/hb-buffer-private.hh
@@ -148,7 +148,7 @@ struct hb_buffer_t {
HB_INTERNAL bool enlarge (unsigned int size);
inline bool ensure (unsigned int size)
- { return likely (size <= allocated) ? true : enlarge (size); }
+ { return likely (size < allocated) ? true : enlarge (size); }
HB_INTERNAL bool make_room_for (unsigned int num_in, unsigned int num_out);
commit 075d671f1093d2e3c58f7f45568696030f1b3efd
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 15:41:53 2012 -0400
[Indic] Fix out-of-bounds array access
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 394e3f4..1c80ea4 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -475,7 +475,8 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
for (unsigned int i = start; i < base; i++)
info[i].indic_position() = POS_PRE_C;
- info[base].indic_position() = POS_BASE_C;
+ if (base < end)
+ info[base].indic_position() = POS_BASE_C;
/* Mark final consonants. A final consonant is one appearing after a matra,
* like in Khmer. */
@@ -577,7 +578,8 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
info[i].mask |= mask;
/* Base */
mask = basic_mask_array[AKHN] | basic_mask_array[CJCT];
- info[base].mask |= mask;
+ if (base < end)
+ info[base].mask |= mask;
/* Post-base */
mask = basic_mask_array[BLWF] | basic_mask_array[ABVF] | basic_mask_array[PSTF] | basic_mask_array[CJCT];
for (unsigned int i = base + 1; i < end; i++)
commit dcb527242b1eca4db1e190a7802f9cd132aaf46e
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 15:28:25 2012 -0400
[Indic] Allow joiners before matras
Fixes 1 more Devanagari test!
diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 9ad3ec2..0943b4d 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -63,7 +63,7 @@ h = H | Coeng; # is_halant_or_coeng
reph = (Ra H | Repha); # possible reph
cn = c.n?;
-matra_group = M.N?.H?;
+matra_group = z*.M.N?.H?;
syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?;
place_holder = NBSP | DOTTEDCIRCLE;
halant_group = (h.z?|z.h);
@@ -87,7 +87,7 @@ main := |*
#define process_syllable(func) \
HB_STMT_START { \
- /* fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #func); */ \
+ if (0) fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #func); \
for (unsigned int i = last; i < p+1; i++) \
info[i].syllable() = syllable_serial; \
PASTE (initial_reordering_, func) (map, buffer, mask_array, last, p+1); \
commit 391cc0331749e263bdfe83a8f5f6d76f2360ee7a
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 15:10:05 2012 -0400
[Indic] Allow halant group in Vowel and placeholder syllables
Fixes 2 out of 560 Devanagari failures. AND:
Fixes 1 out of 2 Tamil failures.
diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 48bf609..9ad3ec2 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -67,11 +67,12 @@ matra_group = M.N?.H?;
syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?;
place_holder = NBSP | DOTTEDCIRCLE;
halant_group = (h.z?|z.h);
+halant_or_matra_group = (halant_group | matra_group*);
-consonant_syllable = Repha? (cn.halant_group)* cn A? (halant_group | matra_group*)? syllable_tail;
-vowel_syllable = reph? V.n? (halant_group.cn | ZWJ.cn)* matra_group* syllable_tail;
-standalone_cluster = reph? place_holder.n? (halant_group.cn)* matra_group* syllable_tail;
+consonant_syllable = Repha? (cn.halant_group)* cn A? halant_or_matra_group? syllable_tail;
+vowel_syllable = reph? V.n? (halant_group.cn | ZWJ.cn)* halant_or_matra_group? syllable_tail;
+standalone_cluster = reph? place_holder.n? (halant_group.cn)* halant_or_matra_group? syllable_tail;
other = any;
main := |*
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
index 3c2bab7..23afbe0 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
@@ -28,3 +28,4 @@
रà¥à¤
à¥
रà¥à¤
à¥â
रà¥à¤
à¥â
+रà¥à¤à¥à¤°à¥
commit ca4e3d3eab7b738c2b8e2a81696a28bca1b81495
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 15:05:40 2012 -0400
[Indic] Streamline halant/joiner in grammar
diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 54f7765..48bf609 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -69,9 +69,9 @@ place_holder = NBSP | DOTTEDCIRCLE;
halant_group = (h.z?|z.h);
-consonant_syllable = Repha? (cn halant_group)* cn A? (h.z? | matra_group*)? syllable_tail;
-vowel_syllable = reph? V.n? (z?.h.cn | ZWJ.cn)* matra_group* syllable_tail;
-standalone_cluster = reph? place_holder.n? (z? h.cn)* matra_group* syllable_tail;
+consonant_syllable = Repha? (cn.halant_group)* cn A? (halant_group | matra_group*)? syllable_tail;
+vowel_syllable = reph? V.n? (halant_group.cn | ZWJ.cn)* matra_group* syllable_tail;
+standalone_cluster = reph? place_holder.n? (halant_group.cn)* matra_group* syllable_tail;
other = any;
main := |*
commit 418d00dffddd95a1f27e9be15752d494c627d45e
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 14:57:28 2012 -0400
[Indic] Minor
diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 5609ff3..54f7765 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -66,9 +66,10 @@ cn = c.n?;
matra_group = M.N?.H?;
syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?;
place_holder = NBSP | DOTTEDCIRCLE;
+halant_group = (h.z?|z.h);
-consonant_syllable = Repha? (cn (h.z?|z.h))* cn A? (h.z? | matra_group*)? syllable_tail;
+consonant_syllable = Repha? (cn halant_group)* cn A? (h.z? | matra_group*)? syllable_tail;
vowel_syllable = reph? V.n? (z?.h.cn | ZWJ.cn)* matra_group* syllable_tail;
standalone_cluster = reph? place_holder.n? (z? h.cn)* matra_group* syllable_tail;
other = any;
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
index 0ab04d6..3c2bab7 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
@@ -25,3 +25,6 @@
à¤à¥à¤·
à¤à¥âष
à¤à¥âष
+रà¥à¤
à¥
+रà¥à¤
à¥â
+रà¥à¤
à¥â
commit 4c3691d2a32ca7e54a54f7c08098fd96fa7af39e
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 14:23:55 2012 -0400
[Indic] Hopefully minor!
Refactoring Indic machin. No semantic change.
diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 5f565b6..5609ff3 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -56,19 +56,21 @@ Coeng = 14;
Repha = 15;
Ra = 16;
-c = C | Ra; # is_consonant
+c = (C | Ra); # is_consonant
n = (N.N? | ZWNJ?.RS); # is_consonant_modifier
z = ZWJ|ZWNJ; # is_joiner
h = H | Coeng; # is_halant_or_coeng
reph = (Ra H | Repha); # possible reph
+
+cn = c.n?;
matra_group = M.N?.H?;
-syllable_tail = SM? (Coeng (c|V))? (VD VD?)?;
+syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?;
place_holder = NBSP | DOTTEDCIRCLE;
-consonant_syllable = Repha? (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
-vowel_syllable = reph? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
-standalone_cluster = reph? place_holder.n? (z? h c)* matra_group* syllable_tail;
+consonant_syllable = Repha? (cn (h.z?|z.h))* cn A? (h.z? | matra_group*)? syllable_tail;
+vowel_syllable = reph? V.n? (z?.h.cn | ZWJ.cn)* matra_group* syllable_tail;
+standalone_cluster = reph? place_holder.n? (z? h.cn)* matra_group* syllable_tail;
other = any;
main := |*
commit e092c556fb1cf38be3cea1f4b75a0d879372dfa2
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 14:09:25 2012 -0400
[Indic] Minor
diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh
index 9637018..e97fca9 100644
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@@ -267,9 +267,9 @@ static const hb_codepoint_t ra_chars[] = {
0x0BB0, /* Tamil */ /* No Reph */
0x0C30, /* Telugu */ /* No Reph */
0x0CB0, /* Kannada */
- 0x0D30, /* Malayalam */ /* No Reph */
+ 0x0D30, /* Malayalam */ /* No Reph, Logical Repha */
- 0x179A, /* Khmer */ /* No Reph */
+ 0x179A, /* Khmer */ /* No Reph, Visual Repha */
};
commit 14dbdd9e39d3a869fd1521000c889c347433d22b
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 18 13:13:03 2012 -0400
[Indic] Unbreak Tamil
Tamil has only about 150 failures now!
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index bbf5024..394e3f4 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -365,7 +365,23 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
has_reph = true;
};
- if (basic_mask_array[HALF])
+ enum base_position_t {
+ BASE_FIRST,
+ BASE_LAST
+ } base_pos;
+
+ switch ((hb_tag_t) buffer->props.script)
+ {
+ case HB_SCRIPT_KHMER:
+ base_pos = BASE_FIRST;
+ break;
+
+ default:
+ base_pos = BASE_LAST;
+ break;
+ }
+
+ if (base_pos == BASE_LAST)
{
/* -> starting from the end of the syllable, move backwards */
unsigned int i = end;
More information about the HarfBuzz
mailing list