[HarfBuzz] harfbuzz: Branch 'master' - 3 commits

Behdad Esfahbod behdad at kemper.freedesktop.org
Mon Oct 1 17:36:21 UTC 2018


 src/hb-ot-shape-complex-indic.hh                                              |    2 
 src/hb-ot-shape-complex-khmer-machine.hh                                      |  300 ++++++----
 src/hb-ot-shape-complex-khmer-machine.rl                                      |   42 -
 src/hb-ot-shape-complex-khmer.cc                                              |    4 
 src/hb-ot-shape-complex-khmer.hh                                              |  111 +--
 test/shaping/data/in-house/Makefile.sources                                   |    2 
 test/shaping/data/in-house/fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf |binary
 test/shaping/data/in-house/tests/khmer-mark-order.tests                       |   25 
 8 files changed, 297 insertions(+), 189 deletions(-)

New commits:
commit ab4c37f73a7d4fcf48584cda3fff94e98a672086
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Mon Oct 1 19:35:48 2018 +0200

    [khmer] Add mark-ordering tests
    
    Fixes https://github.com/harfbuzz/harfbuzz/issues/667

diff --git a/test/shaping/data/in-house/Makefile.sources b/test/shaping/data/in-house/Makefile.sources
index c0b85f2f..e5cf77fd 100644
--- a/test/shaping/data/in-house/Makefile.sources
+++ b/test/shaping/data/in-house/Makefile.sources
@@ -26,6 +26,8 @@ TESTS = \
 	tests/indic-script-extensions.tests \
 	tests/indic-special-cases.tests \
 	tests/indic-syllable.tests \
+	tests/khmer-mark-order.tests \
+	tests/khmer-misc.tests \
 	tests/language-tags.tests \
 	tests/ligature-id.tests \
 	tests/mark-attachment.tests \
diff --git a/test/shaping/data/in-house/fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf b/test/shaping/data/in-house/fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf
new file mode 100644
index 00000000..a9dc202b
Binary files /dev/null and b/test/shaping/data/in-house/fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf differ
diff --git a/test/shaping/data/in-house/tests/khmer-mark-order.tests b/test/shaping/data/in-house/tests/khmer-mark-order.tests
new file mode 100644
index 00000000..d581dd15
--- /dev/null
+++ b/test/shaping/data/in-house/tests/khmer-mark-order.tests
@@ -0,0 +1,25 @@
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17CA,U+17BE,U+1794:[uni17C1=0+288|uni179F=0+928|uni17BB=0 at -6,-26+0|uni17B8=0 at -32,-29+0|uni1794=3+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17C9,U+17BE,U+17BB,U+1794:[uni17C1=0+288|uni179F=0+928|uni17C9=0 at -32,-29+0|uni17B8=0 at -32,237+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=4+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17C9,U+17C1,U+17B8,U+17BB,U+1794:[uni17C1=0+288|uni179F=0+928|uni17C9=0 at -32,-29+0|uni17B8=0 at -32,237+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=5+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17C9,U+17B8,U+17C1,U+17BB,U+1794:[uni179F=0+928|uni17C9=0 at -32,-29+0|uni17B8=0 at -32,237+0|uni17C1=0+288|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=5+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17C9,U+17BE,U+17BB,U+17BB,U+1794:[uni17C1=0+288|uni179F=0+928|uni17C9=0 at -32,-29+0|uni17B8=0 at -32,237+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=5+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17C9,U+17C1,U+17B8,U+17BB,U+17BB,U+1794:[uni17C1=0+288|uni179F=0+928|uni17C9=0 at -32,-29+0|uni17B8=0 at -32,237+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=6+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17C9,U+17B8,U+17C1,U+17BB,U+17BB,U+1794:[uni179F=0+928|uni17C9=0 at -32,-29+0|uni17B8=0 at -32,237+0|uni17C1=0+288|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=6+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17CA,U+17BE,U+17BB,U+1794:[uni17C1=0+288|uni179F=0+928|uni17BB=0 at -6,-26+0|uni17B8=0 at -32,-29+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=4+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17CA,U+17C1,U+17B8,U+17BB,U+1794:[uni17C1=0+288|uni179F=0+928|uni17BB=0 at -6,-26+0|uni17B8=0 at -32,-29+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=5+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17CA,U+17B8,U+17C1,U+17BB,U+1794:[uni179F=0+928|uni17BB=0 at -6,-26+0|uni17B8=0 at -32,-29+0|uni17C1=0+288|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=5+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17CA,U+17BE,U+17BB,U+17BB,U+1794:[uni17C1=0+288|uni179F=0+928|uni17BB=0 at -6,-26+0|uni17B8=0 at -32,-29+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=5+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17CA,U+17C1,U+17B8,U+17BB,U+17BB,U+1794:[uni17C1=0+288|uni179F=0+928|uni17BB=0 at -6,-26+0|uni17B8=0 at -32,-29+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=6+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17CA,U+17B8,U+17C1,U+17BB,U+17BB,U+1794:[uni179F=0+928|uni17BB=0 at -6,-26+0|uni17B8=0 at -32,-29+0|uni17C1=0+288|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=6+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17C9,U+17BE,U+17B8,U+1794:[uni17C1=0+288|uni179F=0+928|uni17C9=0 at -32,-29+0|uni17B8=0 at -32,237+0|uni25CC=0+635|uni17B8=0 at -20,-84+0|uni1794=4+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17C9,U+17B8,U+17BE,U+1794:[uni179F=0+928|uni17C9=0 at -32,-29+0|uni17B8=0 at -32,237+0|uni17C1=0+288|uni25CC=0+635|uni17B8=0 at -20,-84+0|uni1794=4+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17C9,U+17BE,U+17B8,U+17BB,U+1794:[uni17C1=0+288|uni179F=0+928|uni17C9=0 at -32,-29+0|uni17B8=0 at -32,237+0|uni25CC=0+635|uni17B8=0 at -20,-84+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=5+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17C9,U+17B8,U+17BE,U+17BB,U+1794:[uni179F=0+928|uni17C9=0 at -32,-29+0|uni17B8=0 at -32,237+0|uni17C1=0+288|uni25CC=0+635|uni17B8=0 at -20,-84+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=5+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17CA,U+17BE,U+17B8,U+1794:[uni17C1=0+288|uni179F=0+928|uni17BB=0 at -6,-26+0|uni17B8=0 at -32,-29+0|uni25CC=0+635|uni17B8=0 at -20,-84+0|uni1794=4+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17CA,U+17B8,U+17BE,U+1794:[uni179F=0+928|uni17BB=0 at -6,-26+0|uni17B8=0 at -32,-29+0|uni17C1=0+288|uni25CC=0+635|uni17B8=0 at -20,-84+0|uni1794=4+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17CA,U+17BE,U+17B8,U+17BB,U+1794:[uni17C1=0+288|uni179F=0+928|uni17BB=0 at -6,-26+0|uni17B8=0 at -32,-29+0|uni25CC=0+635|uni17B8=0 at -20,-84+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=5+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17CA,U+17B8,U+17BE,U+17BB,U+1794:[uni179F=0+928|uni17BB=0 at -6,-26+0|uni17B8=0 at -32,-29+0|uni17C1=0+288|uni25CC=0+635|uni17B8=0 at -20,-84+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=5+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17BE,U+17B8,U+17BB,U+1794:[uni17C1=0+288|uni179F=0+928|uni17B8=0 at -32,-29+0|uni25CC=0+635|uni17B8=0 at -20,-84+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=4+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17BE,U+17BB,U+17B8,U+1794:[uni17C1=0+288|uni179F=0+928|uni17B8=0 at -32,-29+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni17B8=0 at -20,-84+0|uni1794=4+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17B8,U+17BE,U+17BB,U+1794:[uni179F=0+928|uni17B8=0 at -32,-29+0|uni17C1=0+288|uni25CC=0+635|uni17B8=0 at -20,-84+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni1794=4+635]
+../fonts/b6031119874ae9ff1dd65383a335e361c0962220.ttf::U+179F,U+17B8,U+17BB,U+17BE,U+1794:[uni179F=0+928|uni17B8=0 at -32,-29+0|uni25CC=0+635|uni17BB=0 at -20,-26+0|uni17C1=0+288|uni25CC=0+635|uni17B8=0 at -20,-84+0|uni1794=4+635]
commit 51436547162a18e88144e7125ad6ce4a69a08d4b
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Mon Oct 1 19:09:58 2018 +0200

    [khmer] Rewrite grammar completely
    
    Based on experimenting with Uniscribe to extract grammar and categories.
    
    Failures down from 44 to 35:
    
    KHMER: 299089 out of 299124 tests passed. 35 failed (0.0117008%)
    
    We still don't enforce the one-matra rule pre-decomposition, but enforce
    an order and one-matra-per-position post-decomposition.
    
    https://github.com/harfbuzz/harfbuzz/issues/667

diff --git a/src/hb-ot-shape-complex-indic.hh b/src/hb-ot-shape-complex-indic.hh
index 9e597797..fe5595f8 100644
--- a/src/hb-ot-shape-complex-indic.hh
+++ b/src/hb-ot-shape-complex-indic.hh
@@ -125,7 +125,7 @@ enum indic_syllabic_category_t {
   INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA	= OT_Repha,
   INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED		= OT_X, /* Don't care. */
   INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED		= OT_CM,
-  INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA	= OT_N,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA	= OT_CM,
   INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER	= OT_CS,
   INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK		= OT_SM, /* https://github.com/harfbuzz/harfbuzz/issues/552 */
   INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER		= OT_Coeng,
diff --git a/src/hb-ot-shape-complex-khmer-machine.hh b/src/hb-ot-shape-complex-khmer-machine.hh
index a7e1711e..d013456b 100644
--- a/src/hb-ot-shape-complex-khmer-machine.hh
+++ b/src/hb-ot-shape-complex-khmer-machine.hh
@@ -34,130 +34,200 @@
 
 #line 36 "hb-ot-shape-complex-khmer-machine.hh"
 static const unsigned char _khmer_syllable_machine_trans_keys[] = {
-	7u, 7u, 1u, 16u, 13u, 13u, 1u, 16u, 7u, 13u, 7u, 7u, 1u, 16u, 13u, 13u, 
-	1u, 16u, 7u, 13u, 1u, 16u, 3u, 14u, 3u, 14u, 5u, 14u, 3u, 14u, 5u, 14u, 
-	8u, 8u, 3u, 13u, 3u, 8u, 8u, 8u, 3u, 8u, 3u, 14u, 3u, 14u, 5u, 14u, 
-	3u, 14u, 5u, 14u, 8u, 8u, 3u, 13u, 3u, 8u, 8u, 8u, 3u, 8u, 3u, 14u, 
-	3u, 14u, 7u, 13u, 7u, 7u, 1u, 16u, 0
+	5u, 26u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 16u, 5u, 21u, 5u, 26u, 5u, 21u, 
+	5u, 26u, 5u, 21u, 1u, 16u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 16u, 5u, 21u, 
+	5u, 26u, 5u, 21u, 5u, 26u, 5u, 21u, 5u, 26u, 1u, 16u, 1u, 29u, 5u, 29u, 
+	5u, 29u, 5u, 29u, 22u, 22u, 5u, 22u, 5u, 29u, 5u, 29u, 5u, 29u, 5u, 26u, 
+	5u, 29u, 5u, 29u, 22u, 22u, 5u, 22u, 5u, 29u, 5u, 29u, 1u, 16u, 5u, 29u, 
+	5u, 29u, 0
 };
 
 static const char _khmer_syllable_machine_key_spans[] = {
-	1, 16, 1, 16, 7, 1, 16, 1, 
-	16, 7, 16, 12, 12, 10, 12, 10, 
-	1, 11, 6, 1, 6, 12, 12, 10, 
-	12, 10, 1, 11, 6, 1, 6, 12, 
-	12, 7, 1, 16
+	22, 17, 22, 17, 16, 17, 22, 17, 
+	22, 17, 16, 17, 22, 17, 16, 17, 
+	22, 17, 22, 17, 22, 16, 29, 25, 
+	25, 25, 1, 18, 25, 25, 25, 22, 
+	25, 25, 1, 18, 25, 25, 16, 25, 
+	25
 };
 
 static const short _khmer_syllable_machine_index_offsets[] = {
-	0, 2, 19, 21, 38, 46, 48, 65, 
-	67, 84, 92, 109, 122, 135, 146, 159, 
-	170, 172, 184, 191, 193, 200, 213, 226, 
-	237, 250, 261, 263, 275, 282, 284, 291, 
-	304, 317, 325, 327
+	0, 23, 41, 64, 82, 99, 117, 140, 
+	158, 181, 199, 216, 234, 257, 275, 292, 
+	310, 333, 351, 374, 392, 415, 432, 462, 
+	488, 514, 540, 542, 561, 587, 613, 639, 
+	662, 688, 714, 716, 735, 761, 787, 804, 
+	830
 };
 
 static const char _khmer_syllable_machine_indicies[] = {
-	1, 0, 2, 2, 0, 0, 0, 0, 
+	1, 1, 0, 0, 0, 0, 0, 0, 
+	0, 0, 0, 0, 0, 0, 0, 2, 
+	3, 0, 0, 0, 0, 4, 0, 1, 
+	1, 0, 0, 0, 0, 0, 0, 0, 
+	0, 0, 0, 0, 0, 0, 0, 3, 
+	0, 1, 1, 0, 0, 0, 0, 0, 
 	0, 0, 0, 0, 0, 0, 0, 0, 
-	0, 2, 0, 3, 0, 4, 4, 0, 
+	0, 3, 0, 0, 0, 0, 4, 0, 
+	5, 5, 0, 0, 0, 0, 0, 0, 
 	0, 0, 0, 0, 0, 0, 0, 0, 
-	0, 0, 0, 0, 4, 0, 1, 0, 
-	0, 0, 0, 0, 5, 0, 7, 6, 
-	8, 8, 6, 6, 6, 6, 6, 6, 
-	6, 6, 6, 6, 6, 6, 6, 8, 
-	6, 9, 6, 10, 10, 6, 6, 6, 
-	6, 6, 6, 6, 6, 6, 6, 6, 
-	6, 6, 10, 6, 7, 6, 6, 6, 
-	6, 6, 11, 6, 4, 4, 13, 12, 
-	14, 15, 7, 16, 12, 12, 4, 4, 
-	11, 17, 12, 4, 12, 19, 18, 20, 
-	21, 1, 22, 18, 18, 18, 18, 5, 
-	23, 18, 24, 18, 21, 21, 1, 22, 
-	18, 18, 18, 18, 18, 23, 18, 21, 
-	21, 1, 22, 18, 18, 18, 18, 18, 
-	23, 18, 25, 18, 21, 21, 1, 22, 
-	18, 18, 18, 18, 18, 26, 18, 21, 
-	21, 1, 22, 18, 18, 18, 18, 18, 
-	26, 18, 27, 18, 28, 18, 29, 18, 
-	18, 22, 18, 18, 18, 18, 3, 18, 
-	30, 18, 18, 18, 18, 22, 18, 22, 
-	18, 28, 18, 18, 18, 18, 22, 18, 
-	19, 18, 21, 21, 1, 22, 18, 18, 
-	18, 18, 18, 23, 18, 32, 31, 33, 
-	33, 7, 16, 31, 31, 31, 31, 31, 
-	34, 31, 33, 33, 7, 16, 31, 31, 
-	31, 31, 31, 34, 31, 35, 31, 33, 
-	33, 7, 16, 31, 31, 31, 31, 31, 
-	36, 31, 33, 33, 7, 16, 31, 31, 
-	31, 31, 31, 36, 31, 37, 31, 38, 
-	31, 39, 31, 31, 16, 31, 31, 31, 
-	31, 9, 31, 40, 31, 31, 31, 31, 
-	16, 31, 16, 31, 38, 31, 31, 31, 
-	31, 16, 31, 13, 31, 41, 33, 7, 
-	16, 31, 31, 31, 31, 11, 34, 31, 
-	13, 31, 33, 33, 7, 16, 31, 31, 
-	31, 31, 31, 34, 31, 7, 42, 42, 
-	42, 42, 42, 11, 42, 7, 42, 10, 
-	10, 42, 42, 42, 42, 42, 42, 42, 
-	42, 42, 42, 42, 42, 42, 10, 42, 
+	4, 0, 6, 6, 0, 0, 0, 0, 
+	0, 0, 0, 0, 0, 0, 0, 0, 
+	0, 6, 0, 7, 7, 0, 0, 0, 
+	0, 0, 0, 0, 0, 0, 0, 0, 
+	0, 0, 0, 8, 0, 9, 9, 0, 
+	0, 0, 0, 0, 0, 0, 0, 0, 
+	0, 0, 0, 0, 0, 10, 0, 0, 
+	0, 0, 4, 0, 9, 9, 0, 0, 
+	0, 0, 0, 0, 0, 0, 0, 0, 
+	0, 0, 0, 0, 10, 0, 11, 11, 
+	0, 0, 0, 0, 0, 0, 0, 0, 
+	0, 0, 0, 0, 0, 0, 12, 0, 
+	0, 0, 0, 4, 0, 11, 11, 0, 
+	0, 0, 0, 0, 0, 0, 0, 0, 
+	0, 0, 0, 0, 0, 12, 0, 13, 
+	13, 0, 0, 0, 0, 0, 0, 0, 
+	0, 0, 0, 0, 0, 0, 13, 0, 
+	15, 15, 14, 14, 14, 14, 14, 14, 
+	14, 14, 14, 14, 14, 14, 14, 14, 
+	16, 14, 15, 15, 17, 17, 17, 17, 
+	17, 17, 17, 17, 17, 17, 17, 17, 
+	17, 17, 16, 17, 17, 17, 17, 18, 
+	17, 19, 19, 17, 17, 17, 17, 17, 
+	17, 17, 17, 17, 17, 17, 17, 17, 
+	17, 18, 17, 20, 20, 17, 17, 17, 
+	17, 17, 17, 17, 17, 17, 17, 17, 
+	17, 17, 20, 17, 21, 21, 17, 17, 
+	17, 17, 17, 17, 17, 17, 17, 17, 
+	17, 17, 17, 17, 22, 17, 23, 23, 
+	17, 17, 17, 17, 17, 17, 17, 17, 
+	17, 17, 17, 17, 17, 17, 24, 17, 
+	17, 17, 17, 18, 17, 23, 23, 17, 
+	17, 17, 17, 17, 17, 17, 17, 17, 
+	17, 17, 17, 17, 17, 24, 17, 25, 
+	25, 17, 17, 17, 17, 17, 17, 17, 
+	17, 17, 17, 17, 17, 17, 17, 26, 
+	17, 17, 17, 17, 18, 17, 25, 25, 
+	17, 17, 17, 17, 17, 17, 17, 17, 
+	17, 17, 17, 17, 17, 17, 26, 17, 
+	15, 15, 17, 17, 17, 17, 17, 17, 
+	17, 17, 17, 17, 17, 17, 17, 27, 
+	16, 17, 17, 17, 17, 18, 17, 28, 
+	28, 17, 17, 17, 17, 17, 17, 17, 
+	17, 17, 17, 17, 17, 17, 28, 17, 
+	13, 13, 29, 29, 30, 30, 29, 29, 
+	29, 29, 2, 2, 29, 31, 29, 13, 
+	29, 29, 29, 29, 16, 20, 29, 29, 
+	29, 18, 24, 26, 22, 29, 33, 33, 
+	32, 32, 32, 32, 32, 32, 32, 34, 
+	32, 32, 32, 32, 32, 2, 3, 6, 
+	32, 32, 32, 4, 10, 12, 8, 32, 
+	35, 35, 32, 32, 32, 32, 32, 32, 
+	32, 36, 32, 32, 32, 32, 32, 32, 
+	3, 6, 32, 32, 32, 4, 10, 12, 
+	8, 32, 5, 5, 32, 32, 32, 32, 
+	32, 32, 32, 36, 32, 32, 32, 32, 
+	32, 32, 4, 6, 32, 32, 32, 32, 
+	32, 32, 8, 32, 6, 32, 7, 7, 
+	32, 32, 32, 32, 32, 32, 32, 36, 
+	32, 32, 32, 32, 32, 32, 8, 6, 
+	32, 37, 37, 32, 32, 32, 32, 32, 
+	32, 32, 36, 32, 32, 32, 32, 32, 
+	32, 10, 6, 32, 32, 32, 4, 32, 
+	32, 8, 32, 38, 38, 32, 32, 32, 
+	32, 32, 32, 32, 36, 32, 32, 32, 
+	32, 32, 32, 12, 6, 32, 32, 32, 
+	4, 10, 32, 8, 32, 35, 35, 32, 
+	32, 32, 32, 32, 32, 32, 34, 32, 
+	32, 32, 32, 32, 32, 3, 6, 32, 
+	32, 32, 4, 10, 12, 8, 32, 15, 
+	15, 39, 39, 39, 39, 39, 39, 39, 
+	39, 39, 39, 39, 39, 39, 39, 16, 
+	39, 39, 39, 39, 18, 39, 41, 41, 
+	40, 40, 40, 40, 40, 40, 40, 42, 
+	40, 40, 40, 40, 40, 40, 16, 20, 
+	40, 40, 40, 18, 24, 26, 22, 40, 
+	19, 19, 40, 40, 40, 40, 40, 40, 
+	40, 42, 40, 40, 40, 40, 40, 40, 
+	18, 20, 40, 40, 40, 40, 40, 40, 
+	22, 40, 20, 40, 21, 21, 40, 40, 
+	40, 40, 40, 40, 40, 42, 40, 40, 
+	40, 40, 40, 40, 22, 20, 40, 43, 
+	43, 40, 40, 40, 40, 40, 40, 40, 
+	42, 40, 40, 40, 40, 40, 40, 24, 
+	20, 40, 40, 40, 18, 40, 40, 22, 
+	40, 44, 44, 40, 40, 40, 40, 40, 
+	40, 40, 42, 40, 40, 40, 40, 40, 
+	40, 26, 20, 40, 40, 40, 18, 24, 
+	40, 22, 40, 28, 28, 39, 39, 39, 
+	39, 39, 39, 39, 39, 39, 39, 39, 
+	39, 39, 28, 39, 45, 45, 40, 40, 
+	40, 40, 40, 40, 40, 46, 40, 40, 
+	40, 40, 40, 27, 16, 20, 40, 40, 
+	40, 18, 24, 26, 22, 40, 41, 41, 
+	40, 40, 40, 40, 40, 40, 40, 46, 
+	40, 40, 40, 40, 40, 40, 16, 20, 
+	40, 40, 40, 18, 24, 26, 22, 40, 
 	0
 };
 
 static const char _khmer_syllable_machine_trans_targs[] = {
-	10, 14, 17, 20, 11, 21, 10, 24, 
-	27, 30, 31, 32, 10, 22, 33, 34, 
-	26, 35, 10, 12, 4, 0, 16, 3, 
-	13, 15, 1, 10, 18, 2, 19, 10, 
-	23, 5, 8, 25, 6, 10, 28, 7, 
-	29, 9, 10
+	22, 1, 30, 24, 25, 3, 26, 5, 
+	27, 7, 28, 9, 29, 23, 22, 11, 
+	32, 22, 33, 13, 34, 15, 35, 17, 
+	36, 19, 37, 40, 39, 22, 31, 38, 
+	22, 0, 10, 2, 4, 6, 8, 22, 
+	22, 12, 14, 16, 18, 20, 21
 };
 
 static const char _khmer_syllable_machine_trans_actions[] = {
-	1, 2, 2, 0, 2, 2, 3, 2, 
-	2, 0, 2, 2, 6, 2, 0, 0, 
-	0, 0, 7, 2, 0, 0, 0, 0, 
-	2, 2, 0, 8, 0, 0, 0, 9, 
-	2, 0, 0, 2, 0, 10, 0, 0, 
-	0, 0, 11
+	1, 0, 2, 2, 2, 0, 0, 0, 
+	2, 0, 2, 0, 2, 2, 3, 0, 
+	4, 5, 2, 0, 0, 0, 2, 0, 
+	2, 0, 2, 4, 4, 8, 9, 0, 
+	10, 0, 0, 0, 0, 0, 0, 11, 
+	12, 0, 0, 0, 0, 0, 0
 };
 
 static const char _khmer_syllable_machine_to_state_actions[] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 
-	0, 0, 4, 0, 0, 0, 0, 0, 
 	0, 0, 0, 0, 0, 0, 0, 0, 
+	0, 0, 0, 0, 0, 0, 6, 0, 
 	0, 0, 0, 0, 0, 0, 0, 0, 
-	0, 0, 0, 0
+	0, 0, 0, 0, 0, 0, 0, 0, 
+	0
 };
 
 static const char _khmer_syllable_machine_from_state_actions[] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 
-	0, 0, 5, 0, 0, 0, 0, 0, 
+	0, 0, 0, 0, 0, 0, 0, 0, 
+	0, 0, 0, 0, 0, 0, 7, 0, 
 	0, 0, 0, 0, 0, 0, 0, 0, 
 	0, 0, 0, 0, 0, 0, 0, 0, 
-	0, 0, 0, 0
+	0
 };
 
 static const unsigned char _khmer_syllable_machine_eof_trans[] = {
-	1, 1, 1, 1, 1, 7, 7, 7, 
-	7, 7, 0, 19, 19, 19, 19, 19, 
-	19, 19, 19, 19, 19, 19, 32, 32, 
-	32, 32, 32, 32, 32, 32, 32, 32, 
-	32, 43, 43, 43
+	1, 1, 1, 1, 1, 1, 1, 1, 
+	1, 1, 1, 15, 18, 18, 18, 18, 
+	18, 18, 18, 18, 18, 18, 0, 33, 
+	33, 33, 33, 33, 33, 33, 33, 40, 
+	41, 41, 41, 41, 41, 41, 40, 41, 
+	41
 };
 
-static const int khmer_syllable_machine_start = 10;
-static const int khmer_syllable_machine_first_final = 10;
+static const int khmer_syllable_machine_start = 22;
+static const int khmer_syllable_machine_first_final = 22;
 static const int khmer_syllable_machine_error = -1;
 
-static const int khmer_syllable_machine_en_main = 10;
+static const int khmer_syllable_machine_en_main = 22;
 
 
 #line 36 "hb-ot-shape-complex-khmer-machine.rl"
 
 
 
-#line 74 "hb-ot-shape-complex-khmer-machine.rl"
+#line 80 "hb-ot-shape-complex-khmer-machine.rl"
 
 
 #define found_syllable(syllable_type) \
@@ -177,7 +247,7 @@ find_syllables (hb_buffer_t *buffer)
   int cs;
   hb_glyph_info_t *info = buffer->info;
   
-#line 181 "hb-ot-shape-complex-khmer-machine.hh"
+#line 251 "hb-ot-shape-complex-khmer-machine.hh"
 	{
 	cs = khmer_syllable_machine_start;
 	ts = 0;
@@ -185,7 +255,7 @@ find_syllables (hb_buffer_t *buffer)
 	act = 0;
 	}
 
-#line 95 "hb-ot-shape-complex-khmer-machine.rl"
+#line 101 "hb-ot-shape-complex-khmer-machine.rl"
 
 
   p = 0;
@@ -194,7 +264,7 @@ find_syllables (hb_buffer_t *buffer)
   unsigned int last = 0;
   unsigned int syllable_serial = 1;
   
-#line 198 "hb-ot-shape-complex-khmer-machine.hh"
+#line 268 "hb-ot-shape-complex-khmer-machine.hh"
 	{
 	int _slen;
 	int _trans;
@@ -204,11 +274,11 @@ find_syllables (hb_buffer_t *buffer)
 		goto _test_eof;
 _resume:
 	switch ( _khmer_syllable_machine_from_state_actions[cs] ) {
-	case 5:
+	case 7:
 #line 1 "NONE"
 	{ts = p;}
 	break;
-#line 212 "hb-ot-shape-complex-khmer-machine.hh"
+#line 282 "hb-ot-shape-complex-khmer-machine.hh"
 	}
 
 	_keys = _khmer_syllable_machine_trans_keys + (cs<<1);
@@ -231,47 +301,63 @@ _eof_trans:
 	{te = p+1;}
 	break;
 	case 8:
-#line 68 "hb-ot-shape-complex-khmer-machine.rl"
-	{te = p+1;{ found_syllable (consonant_syllable); }}
-	break;
-	case 10:
-#line 69 "hb-ot-shape-complex-khmer-machine.rl"
-	{te = p+1;{ found_syllable (broken_cluster); }}
-	break;
-	case 6:
-#line 70 "hb-ot-shape-complex-khmer-machine.rl"
+#line 76 "hb-ot-shape-complex-khmer-machine.rl"
 	{te = p+1;{ found_syllable (non_khmer_cluster); }}
 	break;
-	case 7:
-#line 68 "hb-ot-shape-complex-khmer-machine.rl"
+	case 10:
+#line 74 "hb-ot-shape-complex-khmer-machine.rl"
 	{te = p;p--;{ found_syllable (consonant_syllable); }}
 	break;
-	case 9:
-#line 69 "hb-ot-shape-complex-khmer-machine.rl"
+	case 12:
+#line 75 "hb-ot-shape-complex-khmer-machine.rl"
 	{te = p;p--;{ found_syllable (broken_cluster); }}
 	break;
 	case 11:
-#line 70 "hb-ot-shape-complex-khmer-machine.rl"
+#line 76 "hb-ot-shape-complex-khmer-machine.rl"
 	{te = p;p--;{ found_syllable (non_khmer_cluster); }}
 	break;
 	case 1:
-#line 68 "hb-ot-shape-complex-khmer-machine.rl"
+#line 74 "hb-ot-shape-complex-khmer-machine.rl"
 	{{p = ((te))-1;}{ found_syllable (consonant_syllable); }}
 	break;
-	case 3:
-#line 69 "hb-ot-shape-complex-khmer-machine.rl"
+	case 5:
+#line 75 "hb-ot-shape-complex-khmer-machine.rl"
 	{{p = ((te))-1;}{ found_syllable (broken_cluster); }}
 	break;
-#line 266 "hb-ot-shape-complex-khmer-machine.hh"
+	case 3:
+#line 1 "NONE"
+	{	switch( act ) {
+	case 2:
+	{{p = ((te))-1;} found_syllable (broken_cluster); }
+	break;
+	case 3:
+	{{p = ((te))-1;} found_syllable (non_khmer_cluster); }
+	break;
+	}
+	}
+	break;
+	case 4:
+#line 1 "NONE"
+	{te = p+1;}
+#line 75 "hb-ot-shape-complex-khmer-machine.rl"
+	{act = 2;}
+	break;
+	case 9:
+#line 1 "NONE"
+	{te = p+1;}
+#line 76 "hb-ot-shape-complex-khmer-machine.rl"
+	{act = 3;}
+	break;
+#line 352 "hb-ot-shape-complex-khmer-machine.hh"
 	}
 
 _again:
 	switch ( _khmer_syllable_machine_to_state_actions[cs] ) {
-	case 4:
+	case 6:
 #line 1 "NONE"
 	{ts = 0;}
 	break;
-#line 275 "hb-ot-shape-complex-khmer-machine.hh"
+#line 361 "hb-ot-shape-complex-khmer-machine.hh"
 	}
 
 	if ( ++p != pe )
@@ -287,7 +373,7 @@ _again:
 
 	}
 
-#line 104 "hb-ot-shape-complex-khmer-machine.rl"
+#line 110 "hb-ot-shape-complex-khmer-machine.rl"
 
 }
 
diff --git a/src/hb-ot-shape-complex-khmer-machine.rl b/src/hb-ot-shape-complex-khmer-machine.rl
index 7c795162..eb9f6988 100644
--- a/src/hb-ot-shape-complex-khmer-machine.rl
+++ b/src/hb-ot-shape-complex-khmer-machine.rl
@@ -40,28 +40,34 @@
 # Same order as enum khmer_category_t.  Not sure how to avoid duplication.
 C    = 1;
 V    = 2;
-N    = 3;
 ZWNJ = 5;
 ZWJ  = 6;
-M    = 7;
-SM   = 8;
 PLACEHOLDER = 11;
 DOTTEDCIRCLE = 12;
-RS    = 13;
-Coeng = 14;
-Ra    = 16;
-
-c = (C | Ra | V);		# is_consonant
-n = ((ZWNJ?.RS)? (N.N?)?);	# is_consonant_modifier
-z = ZWJ|ZWNJ;			# is_joiner
-
-cn = c.n?;
-matra_group = z?.M.N?;
-syllable_tail = (SM.SM?)?;
-
-
-broken_cluster =	n? (Coeng.cn)* matra_group* (Coeng.cn)? syllable_tail;
-consonant_syllable =	(c|PLACEHOLDER|DOTTEDCIRCLE) broken_cluster;
+Coeng= 14;
+Ra   = 16;
+Robatic = 20;
+Xgroup  = 21;
+Ygroup  = 22;
+VAbv = 26;
+VBlw = 27;
+VPre = 28;
+VPst = 29;
+
+c = (C | Ra | V);
+cn = c.((ZWJ|ZWNJ)?.Robatic)?;
+joiner = (ZWJ | ZWNJ);
+xgroup = (joiner*.Xgroup)*;
+ygroup = Ygroup*;
+
+# This grammar was experimentally extracted from what Uniscribe allows.
+
+matra_group = VPre? xgroup VBlw? xgroup (joiner?.VAbv)? xgroup VPst?;
+syllable_tail = xgroup matra_group xgroup (Coeng.c)? ygroup;
+
+
+broken_cluster =	(Coeng.cn)* syllable_tail;
+consonant_syllable =	(cn|PLACEHOLDER|DOTTEDCIRCLE) broken_cluster;
 other =			any;
 
 main := |*
diff --git a/src/hb-ot-shape-complex-khmer.cc b/src/hb-ot-shape-complex-khmer.cc
index d46f0b3a..9c766be1 100644
--- a/src/hb-ot-shape-complex-khmer.cc
+++ b/src/hb-ot-shape-complex-khmer.cc
@@ -241,7 +241,6 @@ setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
 		   hb_font_t                *font HB_UNUSED)
 {
   HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category);
-  HB_BUFFER_ALLOCATE_VAR (buffer, khmer_position);
 
   /* We cannot setup masks here.  We save information about characters
    * and setup masks later on in a pause-callback. */
@@ -330,7 +329,7 @@ reorder_consonant_syllable (const hb_ot_shape_plan_t *plan,
     }
 
     /* Reorder left matra piece. */
-    else if (info[i].khmer_position() == POS_PRE_M)
+    else if (info[i].khmer_category() == OT_VPre)
     {
       /* Move to the start. */
       buffer->merge_clusters (start, i + 1);
@@ -432,7 +431,6 @@ reorder (const hb_ot_shape_plan_t *plan,
     initial_reordering_syllable (plan, font->face, buffer, start, end);
 
   HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category);
-  HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_position);
 }
 
 static void
diff --git a/src/hb-ot-shape-complex-khmer.hh b/src/hb-ot-shape-complex-khmer.hh
index c86e7aad..4ee0b838 100644
--- a/src/hb-ot-shape-complex-khmer.hh
+++ b/src/hb-ot-shape-complex-khmer.hh
@@ -34,30 +34,22 @@
 
 /* buffer var allocations */
 #define khmer_category() indic_category() /* khmer_category_t */
-#define khmer_position() indic_position() /* khmer_position_t */
+#define khmer_position() indic_position() /* indic_position_t */
 
 
-typedef indic_category_t khmer_category_t;
-typedef indic_position_t khmer_position_t;
-
-
-static inline khmer_position_t
-matra_position_khmer (khmer_position_t side)
+/* Note: This enum is duplicated in the -machine.rl source file.
+ * Not sure how to avoid duplication. */
+enum khmer_category_t
 {
-  switch ((int) side)
-  {
-    case POS_PRE_C:
-      return POS_PRE_M;
+  OT_Robatic = 20,
+  OT_Xgroup  = 21,
+  OT_Ygroup  = 22,
 
-    case POS_POST_C:
-    case POS_ABOVE_C:
-    case POS_BELOW_C:
-      return POS_AFTER_POST;
-
-    default:
-      return side;
-  };
-}
+  OT_VAbv    = 26,
+  OT_VBlw    = 27,
+  OT_VPre    = 28,
+  OT_VPst    = 29,
+};
 
 static inline void
 set_khmer_properties (hb_glyph_info_t &info)
@@ -65,47 +57,58 @@ set_khmer_properties (hb_glyph_info_t &info)
   hb_codepoint_t u = info.codepoint;
   unsigned int type = hb_indic_get_categories (u);
   khmer_category_t cat = (khmer_category_t) (type & 0x7Fu);
-  khmer_position_t pos = (khmer_position_t) (type >> 8);
+  indic_position_t pos = (indic_position_t) (type >> 8);
 
 
   /*
    * Re-assign category
+   *
+   * These categories are experimentally extracted from what Uniscribe allows.
    */
-
-  if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to be repositioned. */
-  else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CDu, 0x17D1u) ||
-		     u == 0x17CBu || u == 0x17D3u || u == 0x17DDu)) /* Khmer Various signs */
+  switch (u)
   {
-    /* These can occur mid-syllable (eg. before matras), even though Unicode marks them as Syllable_Modifier.
-     * https://github.com/roozbehp/unicode-data/issues/5 */
-    cat = OT_M;
-    pos = POS_ABOVE_C;
+    case 0x179Au:
+      cat = (khmer_category_t) OT_Ra;
+      break;
+
+    case 0x17CCu:
+    case 0x17C9u:
+    case 0x17CAu:
+      cat = OT_Robatic;
+      break;
+
+    case 0x17C6u:
+    case 0x17CBu:
+    case 0x17CDu:
+    case 0x17CEu:
+    case 0x17CFu:
+    case 0x17D0u:
+    case 0x17D1u:
+      cat = OT_Xgroup;
+      break;
+
+    case 0x17C7u:
+    case 0x17C8u:
+    case 0x17DDu:
+    case 0x17D3u: /* Just guessing. Uniscribe doesn't categorize it. */
+      cat = OT_Ygroup;
+      break;
   }
-  else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u))) cat = OT_PLACEHOLDER;
-  else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE;
-
 
   /*
    * Re-assign position.
    */
-
-  if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS))
-  {
-    pos = POS_BASE_C;
-    if (u == 0x179Au)
-      cat = OT_Ra;
-  }
-  else if (cat == OT_M)
-  {
-    pos = matra_position_khmer (pos);
-  }
-  else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) | FLAG (OT_A) | FLAG (OT_Symbol))))
-  {
-    pos = POS_SMVD;
-  }
+  if (cat == (khmer_category_t) OT_M)
+    switch ((int) pos)
+    {
+      case POS_PRE_C:	cat = OT_VPre; break;
+      case POS_BELOW_C:	cat = OT_VBlw; break;
+      case POS_ABOVE_C:	cat = OT_VAbv; break;
+      case POS_POST_C:	cat = OT_VPst; break;
+      default: assert (0);
+    };
 
   info.khmer_category() = cat;
-  info.khmer_position() = pos;
 }
 
 
commit aaaa65baa7fcfb65ae814528bdd93cc5c4ea540d
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Mon Oct 1 16:59:48 2018 +0200

    [khmer] Remove unused code

diff --git a/src/hb-ot-shape-complex-khmer.hh b/src/hb-ot-shape-complex-khmer.hh
index 3371c130..c86e7aad 100644
--- a/src/hb-ot-shape-complex-khmer.hh
+++ b/src/hb-ot-shape-complex-khmer.hh
@@ -59,18 +59,6 @@ matra_position_khmer (khmer_position_t side)
   };
 }
 
-static inline bool
-is_consonant_or_vowel (const hb_glyph_info_t &info)
-{
-  return is_one_of (info, CONSONANT_FLAGS | FLAG (OT_V));
-}
-
-static inline bool
-is_coeng (const hb_glyph_info_t &info)
-{
-  return is_one_of (info, FLAG (OT_Coeng));
-}
-
 static inline void
 set_khmer_properties (hb_glyph_info_t &info)
 {


More information about the HarfBuzz mailing list