[HarfBuzz] harfbuzz-ng: Branch 'master' - 21 commits
Behdad Esfahbod
behdad at kemper.freedesktop.org
Tue Jul 24 00:51:24 PDT 2012
src/hb-ot-layout-gsubgpos-private.hh | 27 -
src/hb-ot-layout-private.hh | 35 ++
src/hb-ot-shape-complex-indic-machine.rl | 6
src/hb-ot-shape-complex-indic-private.hh | 2
src/hb-ot-shape-complex-indic.cc | 157 +++++++---
src/hb-ot-shape.cc | 6
test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/reph.txt | 4
test/shaping/texts/in-tree/shaper-indic/indic/script-malayalam/misc/misc.txt | 1
test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt | 7
test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt | 6
10 files changed, 172 insertions(+), 79 deletions(-)
New commits:
commit 65c43accdc4d2082282d5cedba8514b8df0c18a2
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Tue Jul 24 03:36:47 2012 -0400
[Indic] Better position left-matra in Malayalam
Just put it before base, which is what's expected.
Malayalam failures down from 1559 to 1197 (0.114172%).
BENGALI: 353988 out of 354285 tests passed. 297 failed (0.0838308%)
DEVANAGARI: 693571 out of 693628 tests passed. 57 failed (0.00821766%)
GUJARATI: 366489 out of 366506 tests passed. 17 failed (0.0046384%)
GURMUKHI: 60750 out of 60809 tests passed. 59 failed (0.0970251%)
KANNADA: 950956 out of 951913 tests passed. 957 failed (0.100534%)
KHMER: 299094 out of 299124 tests passed. 30 failed (0.0100293%)
MALAYALAM: 1047219 out of 1048416 tests passed. 1197 failed (0.114172%)
ORIYA: 42320 out of 42329 tests passed. 9 failed (0.021262%)
SINHALA: 271699 out of 271847 tests passed. 148 failed (0.0544424%)
TAMIL: 1091837 out of 1091837 tests passed. 0 failed (0%)
TELUGU: 970524 out of 970573 tests passed. 49 failed (0.00504856%)
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index d90d238..d0c3c09 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -895,22 +895,37 @@ final_reordering_syllable (hb_buffer_t *buffer,
* halant, position is moved after it.
*/
- if (start < base) /* Otherwise there can't be any pre-base matra characters. */
+ if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base matra characters. */
{
- unsigned int new_pos = base - 1;
- while (new_pos > start &&
- !(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_H) | FLAG (OT_Coeng)))))
- new_pos--;
- /* If we found no Halant we are done (just need to update clusters).
- * Otherwise only proceed if the Halant does
- * not belong to the Matra itself! */
- if (is_halant_or_coeng (info[new_pos]) &&
- info[new_pos].indic_position() != POS_PRE_M)
+ /* If we lost track of base, alas, position before last thingy. */
+ unsigned int new_pos = base == end ? base - 2 : base - 1;
+
+ /* Malayalam does not have "half" forms or explicit virama forms.
+ * The glyphs formed by 'half' are Chillus. We want to position
+ * matra after them all.
+ */
+ if (buffer->props.script != HB_SCRIPT_MALAYALAM)
{
- /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
- if (new_pos + 1 < end && is_joiner (info[new_pos + 1]))
- new_pos++;
+ while (new_pos > start &&
+ !(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_H) | FLAG (OT_Coeng)))))
+ new_pos--;
+
+ /* If we found no Halant we are done.
+ * Otherwise only proceed if the Halant does
+ * not belong to the Matra itself! */
+ if (is_halant_or_coeng (info[new_pos]) &&
+ info[new_pos].indic_position() != POS_PRE_M)
+ {
+ /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
+ if (new_pos + 1 < end && is_joiner (info[new_pos + 1]))
+ new_pos++;
+ }
+ else
+ new_pos = start; /* No move. */
+ }
+ if (start < new_pos)
+ {
/* Now go see if there's actually any matras... */
for (unsigned int i = new_pos; i > start; i--)
if (info[i - 1].indic_position () == POS_PRE_M)
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-malayalam/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-malayalam/misc/misc.txt
index 3072b0a..ffb408d 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-malayalam/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-malayalam/misc/misc.txt
@@ -58,3 +58,4 @@
à´³àµâ
à´³àµà´¯à´
à´³àµà´³
+à´²àµâà´ªàµà´ªàµ
commit 88f413b56f2858d149e2fc067685aeecaea779ca
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Tue Jul 24 03:04:36 2012 -0400
[Indic] Implement Reph+Ya-Phalaa interaction
The sequence Ra,H,Ya in Bengali is ambigious and Unicode encoded that to
get Ya-Phalaa, one would place ZWJ before Halant. Ie. a ZWJ,H sequence
requests subjoining, while a H,ZWJ requests Half form. Implement that.
Bengali failures go down from 377 to 297 (0.0838308%).
Gujarati is down by 4 to 17 (0.0046384%).
Kannada is down by 226 to 957 (0.100534%).
Current status:
BENGALI: 353988 out of 354285 tests passed. 297 failed (0.0838308%)
DEVANAGARI: 693571 out of 693628 tests passed. 57 failed (0.00821766%)
GUJARATI: 366489 out of 366506 tests passed. 17 failed (0.0046384%)
GURMUKHI: 60750 out of 60809 tests passed. 59 failed (0.0970251%)
KANNADA: 950956 out of 951913 tests passed. 957 failed (0.100534%)
KHMER: 299094 out of 299124 tests passed. 30 failed (0.0100293%)
MALAYALAM: 1046857 out of 1048416 tests passed. 1559 failed (0.148701%)
ORIYA: 42320 out of 42329 tests passed. 9 failed (0.021262%)
SINHALA: 271699 out of 271847 tests passed. 148 failed (0.0544424%)
TAMIL: 1091837 out of 1091837 tests passed. 0 failed (0%)
TELUGU: 970524 out of 970573 tests passed. 49 failed (0.00504856%)
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 58b38c7..d90d238 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -553,8 +553,14 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
}
else
{
- /* A ZWJ stops the base search, and requests an explicit half form. */
- if (info[i].indic_category() == OT_ZWJ)
+ /* A ZWJ after a Halant stops the base search, and requests an explicit
+ * half form.
+ * A ZWJ before a Halant, requests a subjoined form instead, and hence
+ * search continues. This is particularly important for Bengali
+ * sequence Ra,H,Ya that shouls form Ya-Phalaa by subjoining Ya. */
+ if (start < i &&
+ info[i].indic_category() == OT_ZWJ &&
+ info[i - 1].indic_category() == OT_H)
break;
}
} while (i > limit);
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/reph.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/reph.txt
index d5d6442..9739eaa 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/reph.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/reph.txt
@@ -8,3 +8,7 @@
রà§à¦à§
রà§à¦à§
রà§à¦à§
+রà§à¦¯
+রà§âয
+রâà§à¦¯
+রà§à¦°âà§à¦¯
commit dff0ece11d61978c04e839501f179a5c3077f340
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Tue Jul 24 02:30:38 2012 -0400
[Indic] Limit matras to 4 per syllable
Also limit joiners.
This limits our syllable length to a constant, and is
closer to what Uniscribe does anyway.
Two Devanagari tests regressed, but who cares about tests with 20
joiners in a row?! Devanagari at 57 (0.00821766%) now.
diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 69e5aa1..25e308d 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -64,12 +64,12 @@ reph = (Ra H | Repha); # possible reph
cn = c.n?;
forced_rakar = ZWJ H ZWJ Ra;
-matra_group = z*.M.N?.(H | forced_rakar)?;
+matra_group = z{0,3}.M.N?.(H | forced_rakar)?;
syllable_tail = (SM.ZWNJ?)? (Coeng (cn|V))? (VD VD?)?;
place_holder = NBSP | DOTTEDCIRCLE;
halant_group = (z?.h.ZWJ?);
final_halant_group = halant_group | h.ZWNJ;
-halant_or_matra_group = (final_halant_group | matra_group*);
+halant_or_matra_group = (final_halant_group | matra_group{0,4});
consonant_syllable = Repha? (cn.halant_group){0,4} cn A? halant_or_matra_group? syllable_tail;
commit 330b329c8905a37ca88c556dea82c70d74c77458
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Tue Jul 24 02:25:26 2012 -0400
[Indic] Unmark U+17D1 KHMER SIGN VIRIAM to NOT be a Virama
Fixes another 1 Khmer failure. Down to 30 (0.0100293%) now.
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index a061d7b..58b38c7 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -256,6 +256,8 @@ set_indic_properties (hb_glyph_info_t &info, hb_ot_map_t *map, hb_font_t *font)
if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0951, 0x0954)))
cat = OT_VD;
+ if (unlikely (u == 0x17D1))
+ cat = OT_X;
if (cat == OT_X &&
unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CB, 0x17D3))) /* Khmer Various signs */
{
diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
index aa76bc5..945dd1d 100644
--- a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
@@ -20,3 +20,4 @@
áááá
áááá
áá
áá
+ááá¶
commit 6824a7194e01b77eddb95bd95a9b32e219140912
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Tue Jul 24 02:22:18 2012 -0400
[Indic] Recategorize Khmer various signs as top matras
Khmer failures down from 39 to 31 (0.0103636%).
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 20a2ad0..a061d7b 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -236,13 +236,14 @@ set_indic_properties (hb_glyph_info_t &info, hb_ot_map_t *map, hb_font_t *font)
{
hb_codepoint_t u = info.codepoint;
unsigned int type = get_indic_categories (u);
+ indic_category_t cat = (indic_category_t) (type & 0x0F);
+ indic_position_t pos = (indic_position_t) (type >> 4);
/*
- * Assign category
+ * Re-assign category
*/
- indic_category_t cat = (indic_category_t) (type & 0x0F);
/* The spec says U+0952 is OT_A. However, testing shows that Uniscribe
* treats U+0951..U+0952 all as OT_VD.
@@ -256,8 +257,12 @@ set_indic_properties (hb_glyph_info_t &info, hb_ot_map_t *map, hb_font_t *font)
cat = OT_VD;
if (cat == OT_X &&
- unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CB, 0x17D2))) /* Khmer Various signs */
- cat = OT_N;
+ unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CB, 0x17D3))) /* Khmer Various signs */
+ {
+ /* These are like Top Matras. */
+ cat = OT_M;
+ pos = POS_ABOVE_C;
+ }
if (u == 0x17C6) /* Khmer Bindu doesn't like to be repositioned. */
cat = OT_N;
@@ -280,13 +285,10 @@ set_indic_properties (hb_glyph_info_t &info, hb_ot_map_t *map, hb_font_t *font)
-
/*
- * Assign position.
+ * Re-assign position.
*/
- indic_position_t pos = (indic_position_t) (type >> 4);
-
if ((FLAG (cat) & CONSONANT_FLAGS))
{
pos = consonant_position (u, map, font);
@@ -304,11 +306,6 @@ set_indic_properties (hb_glyph_info_t &info, hb_ot_map_t *map, hb_font_t *font)
if (unlikely (u == 0x0B01)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */
- if (u == 0x17CE) /* U+17CE is not in Indic files. Likes to be treated like Top Matra */
- {
- cat = OT_M;
- pos = POS_AFTER_SUB;
- }
info.indic_category() = cat;
commit d90b8e841e0068a601c96ab184d18b0f48eec9d1
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Tue Jul 24 02:10:20 2012 -0400
[Indic] Reposition Khmer prebase-reordering Ra around split matras
In Khmer coeng model, a V,Ra can go *after* matras. If it goes after a
split matra, it should be reordered to *before* the left part of such matra.
Khmer failures down from 136 to 39 (0.0130381%).
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 0c13299..20a2ad0 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -1135,6 +1135,19 @@ final_reordering_syllable (hb_buffer_t *buffer,
!(is_one_of (info[new_pos - 1], FLAG(OT_M) | HALANT_OR_COENG_FLAGS)))
new_pos--;
+ /* In Khmer coeng model, a V,Ra can go *after* matras. If it goes after a
+ * split matra, it should be reordered to *before* the left part of such matra. */
+ if (new_pos > start && info[new_pos - 1].indic_category() == OT_M)
+ {
+ unsigned int old_pos = i;
+ for (unsigned int i = base + 1; i < old_pos; i++)
+ if (info[i].indic_category() == OT_M)
+ {
+ new_pos--;
+ break;
+ }
+ }
+
if (new_pos > start && is_halant_or_coeng (info[new_pos - 1]))
/* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
if (new_pos < end && is_joiner (info[new_pos]))
diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
index 317a79c..aa76bc5 100644
--- a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
@@ -16,3 +16,7 @@
áâááááá
ááá
ááá
+áááá
+áááá
+áááá
+áá
áá
commit 0afb84c12567ac35adac657bf8be29999b8c5a50
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Tue Jul 24 01:44:47 2012 -0400
[Indic] Fix minor bug in pre-base Ra positioning
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 17cca33..0c13299 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -1131,8 +1131,8 @@ final_reordering_syllable (hb_buffer_t *buffer,
*/
unsigned int new_pos = base;
- while (new_pos > start + 1 &&
- !(is_one_of (info[new_pos - 1], (FLAG (OT_M) | FLAG (OT_H) | FLAG (OT_Coeng)))))
+ while (new_pos > start &&
+ !(is_one_of (info[new_pos - 1], FLAG(OT_M) | HALANT_OR_COENG_FLAGS)))
new_pos--;
if (new_pos > start && is_halant_or_coeng (info[new_pos - 1]))
commit 7573799126e812a047daa5f64121ec959866b3c8
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Tue Jul 24 01:32:07 2012 -0400
[Indic] Position Khmer U+17CE
Fixes another 6 Khmer failures. Now at 136 (0.0454661%).
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 2d8e13e..17cca33 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -304,6 +304,12 @@ set_indic_properties (hb_glyph_info_t &info, hb_ot_map_t *map, hb_font_t *font)
if (unlikely (u == 0x0B01)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */
+ if (u == 0x17CE) /* U+17CE is not in Indic files. Likes to be treated like Top Matra */
+ {
+ cat = OT_M;
+ pos = POS_AFTER_SUB;
+ }
+
info.indic_category() = cat;
info.indic_position() = pos;
diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
index 46a8073..317a79c 100644
--- a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
@@ -15,3 +15,4 @@
áááááá
áâááááá
ááá
+ááá
commit 8d00e8d0e7d10f823e6975fecaffb9d557b1a99a
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Tue Jul 24 01:04:18 2012 -0400
[Indic] Don't reposition Khmer Bindu
Khmer Bindu doesn't like to move to syllable end. Leave it where it
was.
Brings down Khmer failures from 510 to 142 (0.047572%).
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index ad1cada..2d8e13e 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -258,6 +258,8 @@ set_indic_properties (hb_glyph_info_t &info, hb_ot_map_t *map, hb_font_t *font)
if (cat == OT_X &&
unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CB, 0x17D2))) /* Khmer Various signs */
cat = OT_N;
+ if (u == 0x17C6) /* Khmer Bindu doesn't like to be repositioned. */
+ cat = OT_N;
if (unlikely (u == 0x17D2)) cat = OT_Coeng; /* Khmer coeng */
else if (unlikely (u == 0x200C)) cat = OT_ZWNJ;
commit 2278eefcdb3dd0d492b9d07176fbecc1f0516bb7
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Tue Jul 24 00:26:43 2012 -0400
[Indic] In Sinhala, form forced Reph even if no other consonant found
Fixes another 10 Sinhala failures. Down to 148 (0.0544424%).
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 80bdb31..ad1cada 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -576,17 +576,17 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
info[i].indic_position() = POS_BELOW_C;
}
- if (base < start)
- base = start; /* Just in case... */
-
/* -> If the syllable starts with Ra + Halant (in a script that has Reph)
* and has more than one consonant, Ra is excluded from candidates for
- * base consonants. */
- if (has_reph && base == start) {
+ * base consonants.
+ *
+ * Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */
+ if (has_reph && base == start && start + 2 == limit) {
/* Have no other consonant, so Reph is not formed and Ra becomes base. */
has_reph = false;
}
}
+
if (base < end)
info[base].indic_position() = POS_BASE_C;
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
index a549673..c8c939a 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
@@ -35,3 +35,4 @@
නà·à¶à·
නà·âà¶à·
නâà·à¶à·
+රà·â
commit 71fd5e80ad06c8e85a1112cc89e129d6cd03f82c
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Tue Jul 24 00:21:16 2012 -0400
[Indic] Further adjust base algorithm for Sinhala
Apparently if there is C,V,ZWJ,C, the first C will be base, but if
it's C,ZWJ,V,C, the second one will be.
Note that Uniscribe implements this differently, by breaking syllable in
the case of C,ZWJ,V,C and putting the first consonant in one syllable
and the rest in the next syllable.
Sinhala failures down from 208 to 158 (0.0581209%). No changes to
Khmer.
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 83d7ab5..80bdb31 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -560,12 +560,15 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
base = limit;
/* Find the last base consonant that is not blocked by ZWJ. If there is
- * a ZWJ before a bse consonant, that would request a subjoined form. */
+ * a ZWJ right before a base consonant, that would request a subjoined form. */
for (unsigned int i = limit; i < end; i++)
if (is_consonant (info[i]) && info[i].indic_position() == POS_BASE_C)
- base = i;
- else if (info[i].indic_category() == OT_ZWJ)
- break;
+ {
+ if (limit < i && info[i - 1].indic_category() == OT_ZWJ)
+ break;
+ else
+ base = i;
+ }
/* Mark all subsequent consonants as below. */
for (unsigned int i = base + 1; i < end; i++)
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
index d6b7abd..a549673 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
@@ -32,3 +32,6 @@
à¶à·âයà·
රà·âයà·âය
à¶âඬà·
+නà·à¶à·
+නà·âà¶à·
+නâà·à¶à·
commit 73d71cc527d28fd5519c5d965c272ea1fb149a0e
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Tue Jul 24 00:09:12 2012 -0400
[Indic] End Vowel-based syllable at ZWJ
One Devanagari test regressed, plus 10 Malayalam (at 1545 now).
Fixed 120 Sinhala failures. Now at 208 (0.0765136%).
diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 01a22e8..69e5aa1 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -73,7 +73,7 @@ halant_or_matra_group = (final_halant_group | matra_group*);
consonant_syllable = Repha? (cn.halant_group){0,4} cn A? halant_or_matra_group? syllable_tail;
-vowel_syllable = reph? V.n? (halant_group.cn | ZWJ.cn){0,4} halant_or_matra_group? syllable_tail;
+vowel_syllable = reph? V.n? (ZWJ | (halant_group.cn){0,4} halant_or_matra_group? syllable_tail);
standalone_cluster = reph? place_holder.n? (halant_group.cn){0,4} halant_or_matra_group? syllable_tail;
other = any;
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
index c4f6b6b..d6b7abd 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
@@ -31,3 +31,4 @@
à·à·à¶§à·âරà·
à¶à·âයà·
රà·âයà·âය
+à¶âඬà·
commit 34c215036f5fcdc7599b1ab0591b56dbb3811902
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Mon Jul 23 23:51:29 2012 -0400
[Indic] Improve Sinhala base algorithm and reph positioning
Sinhala does not have half forms. And most (all?) consonants can be
base, except when preceded by ZWJ, which would request a subjoined form.
Hence switch the base algorithm to categorize with Khmer, start search
at start, and stop at a ZWJ.
Also, mark all pos=base consonants after base to be subjoined. Mark
base itself to have pos=base.
Finally, adjust Sinhala's reph position to after-main.
Brings down Sinhala failures from 455 to 328 (0.120656%).
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index e7b70c8..83d7ab5 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -501,6 +501,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
switch ((hb_tag_t) buffer->props.script)
{
+ case HB_SCRIPT_SINHALA:
case HB_SCRIPT_KHMER:
base_pos = BASE_FIRST;
break;
@@ -557,6 +558,19 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
if (!has_reph)
base = limit;
+
+ /* Find the last base consonant that is not blocked by ZWJ. If there is
+ * a ZWJ before a bse consonant, that would request a subjoined form. */
+ for (unsigned int i = limit; i < end; i++)
+ if (is_consonant (info[i]) && info[i].indic_position() == POS_BASE_C)
+ base = i;
+ else if (info[i].indic_category() == OT_ZWJ)
+ break;
+
+ /* Mark all subsequent consonants as below. */
+ for (unsigned int i = base + 1; i < end; i++)
+ if (is_consonant (info[i]) && info[i].indic_position() == POS_BASE_C)
+ info[i].indic_position() = POS_BELOW_C;
}
if (base < start)
@@ -570,6 +584,8 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
has_reph = false;
}
}
+ if (base < end)
+ info[base].indic_position() = POS_BASE_C;
/* 2. Decompose and reorder Matras:
@@ -931,6 +947,7 @@ final_reordering_syllable (hb_buffer_t *buffer,
{
case HB_SCRIPT_MALAYALAM:
case HB_SCRIPT_ORIYA:
+ case HB_SCRIPT_SINHALA:
reph_pos = REPH_AFTER_MAIN;
break;
@@ -945,7 +962,6 @@ final_reordering_syllable (hb_buffer_t *buffer,
default:
case HB_SCRIPT_DEVANAGARI:
case HB_SCRIPT_GUJARATI:
- case HB_SCRIPT_SINHALA:
reph_pos = REPH_BEFORE_POSTSCRIPT;
break;
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
index 03a0fae..c4f6b6b 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
@@ -30,3 +30,4 @@
à·à·âà·âර
à·à·à¶§à·âරà·
à¶à·âයà·
+රà·âයà·âය
commit 2ec934c6c25423e7af20d909a9c698a149808ea9
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Mon Jul 23 23:49:04 2012 -0400
[Indic] Change "unknown" position to end of syllable
diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh
index 4a4c8c0..7767ae7 100644
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@@ -136,7 +136,7 @@ enum indic_syllabic_category_t {
/* Categories used in IndicSMatraCategory.txt from UCD */
enum indic_matra_category_t {
- INDIC_MATRA_CATEGORY_NOT_APPLICABLE = POS_BASE_C,
+ INDIC_MATRA_CATEGORY_NOT_APPLICABLE = POS_END,
INDIC_MATRA_CATEGORY_LEFT = POS_PRE_C,
INDIC_MATRA_CATEGORY_TOP = POS_ABOVE_C,
commit b70021f7c81a0ed08475b14b07291f662cd9f905
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Mon Jul 23 20:18:17 2012 -0400
When removing zero-width marks, don't remove ligatures
If a mark ligated, it probably should NOT be removed.
diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc
index b5485bd..3b1c1d7 100644
--- a/src/hb-ot-shape.cc
+++ b/src/hb-ot-shape.cc
@@ -29,6 +29,7 @@
#include "hb-ot-shape-private.hh"
#include "hb-ot-shape-normalize-private.hh"
+#include "hb-ot-layout-private.hh"
#include "hb-font-private.hh"
#include "hb-set-private.hh"
@@ -378,15 +379,14 @@ hb_position_complex_fallback_visual (hb_ot_shape_context_t *c)
static void
hb_hide_zerowidth (hb_ot_shape_context_t *c)
{
- /* TODO Save the space character in the font? */
hb_codepoint_t space;
if (!hb_font_get_glyph (c->font, ' ', 0, &space))
return; /* No point! */
unsigned int count = c->buffer->len;
for (unsigned int i = 0; i < count; i++)
- /* TODO Do this if no ligature was formed? */
- if (unlikely (_hb_glyph_info_is_zero_width (&c->buffer->info[i]))) {
+ if (unlikely (!is_a_ligature (c->buffer->info[i]) &&
+ _hb_glyph_info_is_zero_width (&c->buffer->info[i]))) {
c->buffer->info[i].codepoint = space;
c->buffer->pos[i].x_advance = 0;
c->buffer->pos[i].y_advance = 0;
commit 49c5ec51444f27f33e1eb6aa1959c61b08fa89c0
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Mon Jul 23 20:14:13 2012 -0400
Minor refactoring
diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index a26cc4f..d50c653 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -34,38 +34,6 @@
-/* unique ligature id */
-/* component number in the ligature (0 = base) */
-static inline void
-set_lig_props (hb_glyph_info_t &info, unsigned int lig_id, unsigned int lig_comp)
-{
- info.lig_props() = (lig_id << 4) | (lig_comp & 0x0F);
-}
-static inline unsigned int
-get_lig_id (const hb_glyph_info_t &info)
-{
- return info.lig_props() >> 4;
-}
-static inline unsigned int
-get_lig_comp (const hb_glyph_info_t &info)
-{
- return info.lig_props() & 0x0F;
-}
-static inline bool
-is_a_ligature (const hb_glyph_info_t &info)
-{
- return unlikely (get_lig_id (info) && ~get_lig_comp (info));
-}
-
-static inline uint8_t allocate_lig_id (hb_buffer_t *buffer) {
- uint8_t lig_id = buffer->next_serial () & 0x0F;
- if (unlikely (!lig_id))
- lig_id = allocate_lig_id (buffer); /* in case of overflow */
- return lig_id;
-}
-
-
-
#ifndef HB_DEBUG_CLOSURE
#define HB_DEBUG_CLOSURE (HB_DEBUG+0)
#endif
diff --git a/src/hb-ot-layout-private.hh b/src/hb-ot-layout-private.hh
index 366b061..7a1c7e3 100644
--- a/src/hb-ot-layout-private.hh
+++ b/src/hb-ot-layout-private.hh
@@ -64,6 +64,41 @@ _hb_ot_layout_skip_mark (hb_face_t *face,
unsigned int *property_out);
+/*
+ * GSUB/GPOS
+ */
+
+/* unique ligature id */
+/* component number in the ligature (0 = base) */
+static inline void
+set_lig_props (hb_glyph_info_t &info, unsigned int lig_id, unsigned int lig_comp)
+{
+ info.lig_props() = (lig_id << 4) | (lig_comp & 0x0F);
+}
+static inline unsigned int
+get_lig_id (const hb_glyph_info_t &info)
+{
+ return info.lig_props() >> 4;
+}
+static inline unsigned int
+get_lig_comp (const hb_glyph_info_t &info)
+{
+ return info.lig_props() & 0x0F;
+}
+static inline bool
+is_a_ligature (const hb_glyph_info_t &info)
+{
+ return unlikely (get_lig_id (info) && ~get_lig_comp (info));
+}
+
+static inline uint8_t allocate_lig_id (hb_buffer_t *buffer) {
+ uint8_t lig_id = buffer->next_serial () & 0x0F;
+ if (unlikely (!lig_id))
+ lig_id = allocate_lig_id (buffer); /* in case of overflow */
+ return lig_id;
+}
+
+
/*
* hb_ot_layout_t
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 8b1d975..e7b70c8 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -26,7 +26,7 @@
#include "hb-ot-shape-complex-indic-private.hh"
#include "hb-ot-shape-private.hh"
-#include "hb-ot-layout-gsubgpos-private.hh"
+#include "hb-ot-layout-private.hh"
#define OLD_INDIC_TAG(script) (((hb_tag_t) script) | 0x20000000)
#define IS_OLD_INDIC_TAG(tag) ( \
commit c3e6fdc3791168cf2b4c9412e751f187d58faa42
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Mon Jul 23 20:11:42 2012 -0400
[Indic] Improve check on ligatures
Only skip actual ligatures, not marks in-between ligature components.
diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index c01e9c1..a26cc4f 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -51,6 +51,11 @@ get_lig_comp (const hb_glyph_info_t &info)
{
return info.lig_props() & 0x0F;
}
+static inline bool
+is_a_ligature (const hb_glyph_info_t &info)
+{
+ return unlikely (get_lig_id (info) && ~get_lig_comp (info));
+}
static inline uint8_t allocate_lig_id (hb_buffer_t *buffer) {
uint8_t lig_id = buffer->next_serial () & 0x0F;
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 7af7223..8b1d975 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -201,7 +201,7 @@ static inline bool
is_one_of (const hb_glyph_info_t &info, unsigned int flags)
{
/* If it ligated, all bets are off. */
- if (unlikely (get_lig_id (info))) return false;
+ if (is_a_ligature (info)) return false;
return !!(FLAG (info.indic_category()) & flags);
}
commit 771a8f50289e8fa458cfc3cd84f73a380ce98077
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Mon Jul 23 20:07:50 2012 -0400
[Indic] exclude ligatures when matching on Indic category
If, say, a H,ZWJ,C ligature was formed, we don't want the code to detec
that as a Halant. So, ignore ligatures when matching category in
final_reordering.
Sinhala failures down from 514 to 455 (0.167374%).
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 6b1e2fa..7af7223 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -26,6 +26,7 @@
#include "hb-ot-shape-complex-indic-private.hh"
#include "hb-ot-shape-private.hh"
+#include "hb-ot-layout-gsubgpos-private.hh"
#define OLD_INDIC_TAG(script) (((hb_tag_t) script) | 0x20000000)
#define IS_OLD_INDIC_TAG(tag) ( \
@@ -187,7 +188,7 @@ matra_position (hb_codepoint_t u, indic_position_t side)
abort ();
}
-static bool
+static inline bool
is_ra (hb_codepoint_t u)
{
return !!bsearch (&u, ra_chars,
@@ -196,36 +197,38 @@ is_ra (hb_codepoint_t u)
compare_codepoint);
}
+static inline bool
+is_one_of (const hb_glyph_info_t &info, unsigned int flags)
+{
+ /* If it ligated, all bets are off. */
+ if (unlikely (get_lig_id (info))) return false;
+ return !!(FLAG (info.indic_category()) & flags);
+}
+
#define JOINER_FLAGS (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ))
-static bool
+static inline bool
is_joiner (const hb_glyph_info_t &info)
{
- return !!(FLAG (info.indic_category()) & JOINER_FLAGS);
+ return is_one_of (info, JOINER_FLAGS);
}
+/* Note:
+ *
+ * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels
+ * cannot happen in a consonant syllable. The plus side however is, we can call the
+ * consonant syllable logic from the vowel syllable function and get it all right! */
#define CONSONANT_FLAGS (FLAG (OT_C) | FLAG (OT_Ra) | FLAG (OT_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE))
-static bool
+static inline bool
is_consonant (const hb_glyph_info_t &info)
{
- /* Note:
- *
- * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels
- * cannot happen in a consonant syllable. The plus side however is, we can call the
- * consonant syllable logic from the vowel syllable function and get it all right! */
- return !!(FLAG (info.indic_category()) & CONSONANT_FLAGS);
+ return is_one_of (info, CONSONANT_FLAGS);
}
#define HALANT_OR_COENG_FLAGS (FLAG (OT_H) | FLAG (OT_Coeng))
-static bool
+static inline bool
is_halant_or_coeng (const hb_glyph_info_t &info)
{
- return !!(FLAG (info.indic_category()) & HALANT_OR_COENG_FLAGS);
-}
-
-static bool
-is_one_of (const hb_glyph_info_t &info, unsigned int flags)
-{
- return !!(FLAG (info.indic_category()) & flags);
+ return is_one_of (info, HALANT_OR_COENG_FLAGS);
}
static inline void
@@ -726,7 +729,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
/* XXX This will not match for old-Indic spec since the Halant-Ra order is reversed already. */
if (basic_mask_array[PREF] && base + 2 < end)
{
- /* Find a Halant,Ra sequence and mark it fore pre-base reordering processing. */
+ /* Find a Halant,Ra sequence and mark it for pre-base reordering processing. */
for (unsigned int i = base + 1; i + 1 < end; i++)
if (is_halant_or_coeng (info[i]) &&
info[i + 1].indic_category() == OT_Ra)
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
index 32af26f..03a0fae 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
@@ -29,3 +29,4 @@
රà·âම
à·à·âà·âර
à·à·à¶§à·âරà·
+à¶à·âයà·
commit d1af9e82e5309158ed334ab8e21f3a3b64b9540f
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Mon Jul 23 19:55:35 2012 -0400
[GSUB/GPOS] Const correctness
diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index 99a7271..c01e9c1 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -42,12 +42,12 @@ set_lig_props (hb_glyph_info_t &info, unsigned int lig_id, unsigned int lig_comp
info.lig_props() = (lig_id << 4) | (lig_comp & 0x0F);
}
static inline unsigned int
-get_lig_id (hb_glyph_info_t &info)
+get_lig_id (const hb_glyph_info_t &info)
{
return info.lig_props() >> 4;
}
static inline unsigned int
-get_lig_comp (hb_glyph_info_t &info)
+get_lig_comp (const hb_glyph_info_t &info)
{
return info.lig_props() & 0x0F;
}
commit baacd090df97610e3f6d1b2a110dc67b6c6f9f5c
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Mon Jul 23 19:51:48 2012 -0400
[Indic] Minor refactoring
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index d49e274..6b1e2fa 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -222,6 +222,12 @@ is_halant_or_coeng (const hb_glyph_info_t &info)
return !!(FLAG (info.indic_category()) & HALANT_OR_COENG_FLAGS);
}
+static bool
+is_one_of (const hb_glyph_info_t &info, unsigned int flags)
+{
+ return !!(FLAG (info.indic_category()) & flags);
+}
+
static inline void
set_indic_properties (hb_glyph_info_t &info, hb_ot_map_t *map, hb_font_t *font)
{
@@ -858,7 +864,7 @@ final_reordering_syllable (hb_buffer_t *buffer,
{
unsigned int new_pos = base - 1;
while (new_pos > start &&
- !(FLAG (info[new_pos].indic_category()) & (FLAG (OT_M) | FLAG (OT_H) | FLAG (OT_Coeng))))
+ !(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_H) | FLAG (OT_Coeng)))))
new_pos--;
/* If we found no Halant we are done (just need to update clusters).
* Otherwise only proceed if the Halant does
@@ -1096,7 +1102,7 @@ final_reordering_syllable (hb_buffer_t *buffer,
unsigned int new_pos = base;
while (new_pos > start + 1 &&
- !(FLAG (info[new_pos - 1].indic_category()) & (FLAG (OT_M) | FLAG (OT_H) | FLAG (OT_Coeng))))
+ !(is_one_of (info[new_pos - 1], (FLAG (OT_M) | FLAG (OT_H) | FLAG (OT_Coeng)))))
new_pos--;
if (new_pos > start && is_halant_or_coeng (info[new_pos - 1]))
commit c7c4de2fb9bba216e37875d79815eef55c0acc01
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Mon Jul 23 18:25:02 2012 -0400
[Indic] Remove syllable length check before sorting
We now limit syllable lengths in the machine. No need to match here.
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 6ba9c16..d49e274 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -678,8 +678,6 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
}
}
- /* We do bubble-sort, skip malicious clusters attempts */
- if (end - start < 64)
{
/* Things are out-of-control for post base positions, they may shuffle
* around like crazy, so merge clusters. For pre-base stuff, we handle
More information about the HarfBuzz
mailing list