[HarfBuzz] harfbuzz: Branch 'master' - 16 commits
Behdad Esfahbod
behdad at kemper.freedesktop.org
Tue Feb 27 02:25:28 UTC 2018
CMakeLists.txt | 2
src/Makefile.am | 8 -
src/Makefile.sources | 1
src/gen-unicode-ranges.py | 52 ++++++++
src/hb-ot-os2-table.hh | 31 ++++-
src/hb-ot-os2-unicode-ranges.hh | 247 ++++++++++++++++++++++++++++++++++++++++
src/test-unicode-ranges.cc | 67 ++++++++++
7 files changed, 404 insertions(+), 4 deletions(-)
New commits:
commit d0caf7e5e3294c25230ed0e5580c647fa0f1a2c8
Merge: 6c844ae8 44dc36dd
Author: Garret Rieger <grieger at google.com>
Date: Mon Feb 26 19:24:18 2018 -0700
Merge pull request #840 from googlefonts/os2
[subset] During os2 subsetting update ulUnicodeRange[]c
commit 44dc36dd82fc948a15e2ad0d605eb4a466b3553d
Author: Garret Rieger <grieger at google.com>
Date: Mon Feb 26 17:56:23 2018 -0800
[subset] update to comment in hb-ot-os2-unicode-ranges.hh
diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh
index 2c05d895..2cf168f9 100644
--- a/src/hb-ot-os2-unicode-ranges.hh
+++ b/src/hb-ot-os2-unicode-ranges.hh
@@ -38,7 +38,7 @@ struct Range {
unsigned int bit;
};
-/* Note: The contents of this array was generated using util/generate-unicode-ranges.py. */
+/* Note: The contents of this array was generated using src/gen-unicode-ranges.py. */
static Range os2UnicodeRangesSorted[] =
{
{ 0x0, 0x7F, 0}, // Basic Latin
commit ad3f2f77dafdee524e836e732077ee9670602369
Author: Garret Rieger <grieger at google.com>
Date: Mon Feb 26 17:51:27 2018 -0800
[subset] small cleanups in hb-ot-os2-table.
diff --git a/src/hb-ot-os2-table.hh b/src/hb-ot-os2-table.hh
index 754537a6..6cb8d494 100644
--- a/src/hb-ot-os2-table.hh
+++ b/src/hb-ot-os2-table.hh
@@ -83,8 +83,9 @@ struct os2
for (unsigned int i = 0; i < codepoints.len; i++)
{
hb_codepoint_t cp = codepoints[i];
- int bit = hb_get_unicode_range_bit (cp);
- if (bit >= 0 && bit < 128) {
+ unsigned int bit = hb_get_unicode_range_bit (cp);
+ if (bit < 128)
+ {
unsigned int block = bit / 32;
unsigned int bit_in_block = bit % 32;
unsigned int mask = 1 << bit_in_block;
diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh
index f4b339ea..2c05d895 100644
--- a/src/hb-ot-os2-unicode-ranges.hh
+++ b/src/hb-ot-os2-unicode-ranges.hh
@@ -230,7 +230,7 @@ _compare_range (const void *_key, const void *_item, void *_arg)
* hb_get_unicode_range_bit:
* Returns the bit to be set in os/2 ulUnicodeRange for a given codepoint.
**/
-static int
+static unsigned int
hb_get_unicode_range_bit (hb_codepoint_t cp)
{
Range *range = (Range*) hb_bsearch_r (&cp, os2UnicodeRangesSorted,
commit f1c8fc3487d5c5efb8ee1804acb07e6e282d3bc5
Author: Garret Rieger <grieger at google.com>
Date: Mon Feb 26 17:48:51 2018 -0800
[subset] small updates to gen-unicode-ranges.py
diff --git a/src/gen-unicode-ranges.py b/src/gen-unicode-ranges.py
index e24b262a..3b59cd86 100644
--- a/src/gen-unicode-ranges.py
+++ b/src/gen-unicode-ranges.py
@@ -1,7 +1,8 @@
# -*- coding: utf-8 -*-
# Generates the code for a sorted unicode range array as used in hb-ot-os2-unicode-ranges.hh
-# Input is a tab seperated list of unicode ranges from the otspec.
+# Input is a tab seperated list of unicode ranges from the otspec
+# (https://docs.microsoft.com/en-us/typography/opentype/spec/os2#ulunicoderange1).
import io
import re
@@ -10,7 +11,8 @@ import sys
reload(sys)
sys.setdefaultencoding('utf-8')
-print (u"""static Range os2UnicodeRangesSorted[] = {""")
+print (u"""static Range os2UnicodeRangesSorted[] =
+{""")
args = sys.argv[1:]
input_file = args[0]
diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh
index a3ad4d34..f4b339ea 100644
--- a/src/hb-ot-os2-unicode-ranges.hh
+++ b/src/hb-ot-os2-unicode-ranges.hh
@@ -39,7 +39,8 @@ struct Range {
};
/* Note: The contents of this array was generated using util/generate-unicode-ranges.py. */
-static Range os2UnicodeRangesSorted[] = {
+static Range os2UnicodeRangesSorted[] =
+{
{ 0x0, 0x7F, 0}, // Basic Latin
{ 0x80, 0xFF, 1}, // Latin-1 Supplement
{ 0x100, 0x17F, 2}, // Latin Extended-A
commit 0c0fe2ff8209228f2ddfce464b7b6f1b1ee1654a
Author: Garret Rieger <grieger at google.com>
Date: Mon Feb 26 17:47:10 2018 -0800
[subset] Move util/generated-unicode-ranges.py to src/gen-unicode-ranges.py
diff --git a/util/generate-unicode-ranges.py b/src/gen-unicode-ranges.py
similarity index 100%
rename from util/generate-unicode-ranges.py
rename to src/gen-unicode-ranges.py
commit f630ae5161bfc8420f9ae0127fd8c7f447874fdd
Author: Garret Rieger <grieger at google.com>
Date: Mon Feb 26 17:46:17 2018 -0800
[subset] unsigned int -> hb_codepoint_t.
diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh
index a456ffdd..a3ad4d34 100644
--- a/src/hb-ot-os2-unicode-ranges.hh
+++ b/src/hb-ot-os2-unicode-ranges.hh
@@ -33,8 +33,8 @@
namespace OT {
struct Range {
- unsigned int start;
- unsigned int end;
+ hb_codepoint_t start;
+ hb_codepoint_t end;
unsigned int bit;
};
commit 6368ce4c927b1457cf19945d5957e91d4621dc8b
Author: Garret Rieger <grieger at google.com>
Date: Mon Feb 26 17:44:39 2018 -0800
[subset] const in _compare_range.
diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh
index 9e06c100..a456ffdd 100644
--- a/src/hb-ot-os2-unicode-ranges.hh
+++ b/src/hb-ot-os2-unicode-ranges.hh
@@ -214,12 +214,12 @@ static Range os2UnicodeRangesSorted[] = {
static int
_compare_range (const void *_key, const void *_item, void *_arg)
{
- hb_codepoint_t *cp = (hb_codepoint_t *) _key;
- Range *range = (Range *) _item;
+ hb_codepoint_t cp = *((hb_codepoint_t *) _key);
+ const Range *range = (Range *) _item;
- if (*cp < range->start)
+ if (cp < range->start)
return -1;
- else if (*cp <= range->end)
+ else if (cp <= range->end)
return 0;
else
return 1;
commit 0be9fea0a96701b159a9db190e55b1c3efc38a28
Author: Garret Rieger <grieger at google.com>
Date: Mon Feb 26 16:15:09 2018 -0800
[subset] Add comment to os2UnicodeRangesSorted.
diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh
index 4148342a..9e06c100 100644
--- a/src/hb-ot-os2-unicode-ranges.hh
+++ b/src/hb-ot-os2-unicode-ranges.hh
@@ -38,6 +38,7 @@ struct Range {
unsigned int bit;
};
+/* Note: The contents of this array was generated using util/generate-unicode-ranges.py. */
static Range os2UnicodeRangesSorted[] = {
{ 0x0, 0x7F, 0}, // Basic Latin
{ 0x80, 0xFF, 1}, // Latin-1 Supplement
commit e20ab71d12b032371b830b76462e5e979d963b58
Author: Garret Rieger <grieger at google.com>
Date: Mon Feb 26 16:13:10 2018 -0800
[subset] Fix incorrect index.
diff --git a/src/hb-ot-os2-table.hh b/src/hb-ot-os2-table.hh
index 7325e93e..754537a6 100644
--- a/src/hb-ot-os2-table.hh
+++ b/src/hb-ot-os2-table.hh
@@ -95,7 +95,7 @@ struct os2
/* the spec says that bit 57 ("Non Plane 0") implies that there's
at least one codepoint beyond the BMP; so I also include all
the non-BMP codepoints here */
- ulUnicodeRange[2].set (ulUnicodeRange[2] | (1 << 25));
+ ulUnicodeRange[1].set (ulUnicodeRange[1] | (1 << 25));
}
}
}
commit 074b5a29a166d1812abc7229a71af4a3bb311536
Author: Garret Rieger <grieger at google.com>
Date: Mon Feb 26 16:11:24 2018 -0800
[subset] Add special case handling of bit 57 in os2 ulUnicodeRange.
diff --git a/src/hb-ot-os2-table.hh b/src/hb-ot-os2-table.hh
index 63e97264..7325e93e 100644
--- a/src/hb-ot-os2-table.hh
+++ b/src/hb-ot-os2-table.hh
@@ -90,6 +90,13 @@ struct os2
unsigned int mask = 1 << bit_in_block;
ulUnicodeRange[block].set (ulUnicodeRange[block] | mask);
}
+ if (cp >= 0x10000 && cp <= 0x110000)
+ {
+ /* the spec says that bit 57 ("Non Plane 0") implies that there's
+ at least one codepoint beyond the BMP; so I also include all
+ the non-BMP codepoints here */
+ ulUnicodeRange[2].set (ulUnicodeRange[2] | (1 << 25));
+ }
}
}
diff --git a/src/test-unicode-ranges.cc b/src/test-unicode-ranges.cc
index 68dfe079..16d01ef2 100644
--- a/src/test-unicode-ranges.cc
+++ b/src/test-unicode-ranges.cc
@@ -31,9 +31,6 @@
void
test (hb_codepoint_t cp, int bit)
{
- // TODO:
- // Note: * Setting bit 57 implies that there is at least one codepoint beyond the Basic
- // Multilingual Plane that is supported by this font. (See fontTools impl.)
if (OT::hb_get_unicode_range_bit (cp) != bit)
{
fprintf (stderr, "got incorrect bit (%d) for cp 0x%X. Should have been %d.",
commit ddc4f2b9fc5566e70558a57133289f84d467cc98
Author: Garret Rieger <grieger at google.com>
Date: Mon Feb 26 15:59:32 2018 -0800
[subset] Add python util that was used to generated hb-ot-os2-unicode-ranges.hh
diff --git a/util/generate-unicode-ranges.py b/util/generate-unicode-ranges.py
new file mode 100644
index 00000000..e24b262a
--- /dev/null
+++ b/util/generate-unicode-ranges.py
@@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+
+# Generates the code for a sorted unicode range array as used in hb-ot-os2-unicode-ranges.hh
+# Input is a tab seperated list of unicode ranges from the otspec.
+
+import io
+import re
+import sys
+
+reload(sys)
+sys.setdefaultencoding('utf-8')
+
+print (u"""static Range os2UnicodeRangesSorted[] = {""")
+
+args = sys.argv[1:]
+input_file = args[0]
+
+with io.open(input_file, mode="r", encoding="utf-8") as f:
+
+ all_ranges = [];
+ current_bit = 0
+ while True:
+ line = f.readline().strip()
+ if not line:
+ break
+ fields = re.split(r'\t+', line)
+ if len(fields) == 3:
+ current_bit = fields[0]
+ fields = fields[1:]
+ elif len(fields) > 3:
+ raise Error("bad input :(.")
+
+ name = fields[0]
+ ranges = re.split("-", fields[1])
+ if len(ranges) != 2:
+ raise Error("bad input :(.")
+
+ v = tuple((int(ranges[0], 16), int(ranges[1], 16), int(current_bit), name))
+ all_ranges.append(v)
+
+all_ranges = sorted(all_ranges, key=lambda t: t[0])
+
+for ranges in all_ranges:
+ start = ("0x%X" % ranges[0]).rjust(8)
+ end = ("0x%X" % ranges[1]).rjust(8)
+ bit = ("%s" % ranges[2]).rjust(3)
+
+ print " {%s, %s, %s}, // %s" % (start, end, bit, ranges[3])
+
+print (u"""};""");
commit f757757eda5f00a89a156e3427bdf8c4313611ef
Author: Garret Rieger <grieger at google.com>
Date: Mon Feb 26 15:56:36 2018 -0800
[subset] Add cmake build def for test-unicode-ranges.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f7a5d830..660da5a1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -809,7 +809,7 @@ endif ()
## src/ executables
if (NOT HB_DISABLE_TEST_PROGS)
- foreach (prog main test test-would-substitute test-size-params test-buffer-serialize hb-ot-tag)
+ foreach (prog main test test-would-substitute test-size-params test-buffer-serialize hb-ot-tag test-unicode-ranges)
set (prog_name ${prog})
if (${prog_name} STREQUAL "test")
# test can not be used as a valid executable name on cmake, lets special case it
diff --git a/src/Makefile.am b/src/Makefile.am
index 73c0c61d..2871f30f 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -389,7 +389,6 @@ test_ot_tag_CPPFLAGS = $(HBCFLAGS) -DMAIN
test_ot_tag_LDADD = libharfbuzz.la $(HBLIBS)
test_unicode_ranges_SOURCES = test-unicode-ranges.cc
-test_unicode_ranges_CPPFLAGS = $(HBCFLAGS) -DMAIN
test_unicode_ranges_LDADD = libharfbuzz.la $(HBLIBS)
TESTS_ENVIRONMENT = \
commit f82f2a3e50805503d93aa1aa1ccb27da4967a14a
Author: Garret Rieger <grieger at google.com>
Date: Mon Feb 26 15:52:01 2018 -0800
[subset] Rename hb-os2-unicode-ranges.cc to test-unicode-ranges.cc.
diff --git a/src/Makefile.am b/src/Makefile.am
index 6044366d..73c0c61d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -388,7 +388,7 @@ test_ot_tag_SOURCES = hb-ot-tag.cc
test_ot_tag_CPPFLAGS = $(HBCFLAGS) -DMAIN
test_ot_tag_LDADD = libharfbuzz.la $(HBLIBS)
-test_unicode_ranges_SOURCES = hb-ot-os2-unicode-ranges.cc
+test_unicode_ranges_SOURCES = test-unicode-ranges.cc
test_unicode_ranges_CPPFLAGS = $(HBCFLAGS) -DMAIN
test_unicode_ranges_LDADD = libharfbuzz.la $(HBLIBS)
diff --git a/src/hb-ot-os2-unicode-ranges.cc b/src/test-unicode-ranges.cc
similarity index 100%
rename from src/hb-ot-os2-unicode-ranges.cc
rename to src/test-unicode-ranges.cc
commit 4014555ca083dea3e4f42120aeaf52a2186b8a09
Author: Garret Rieger <grieger at google.com>
Date: Mon Feb 26 15:50:13 2018 -0800
[subset] set ulUnicodeRange[] in os2.
diff --git a/src/Makefile.sources b/src/Makefile.sources
index c2071643..e114a501 100644
--- a/src/Makefile.sources
+++ b/src/Makefile.sources
@@ -29,7 +29,6 @@ HB_BASE_sources = \
hb-ot-name-table.hh \
hb-ot-os2-table.hh \
hb-ot-os2-unicode-ranges.hh \
- hb-ot-os2-unicode-ranges.cc \
hb-ot-post-macroman.hh \
hb-ot-post-table.hh \
hb-ot-tag.cc \
diff --git a/src/hb-ot-os2-table.hh b/src/hb-ot-os2-table.hh
index 2d9d2149..63e97264 100644
--- a/src/hb-ot-os2-table.hh
+++ b/src/hb-ot-os2-table.hh
@@ -28,7 +28,7 @@
#define HB_OT_OS2_TABLE_HH
#include "hb-open-type-private.hh"
-
+#include "hb-ot-os2-unicode-ranges.hh"
namespace OT {
@@ -67,11 +67,32 @@ struct os2
os2_prime->usFirstCharIndex.set (min_cp);
os2_prime->usLastCharIndex.set (max_cp);
+ _update_unicode_ranges (plan->codepoints, os2_prime->ulUnicodeRange);
bool result = hb_subset_plan_add_table(plan, HB_OT_TAG_os2, os2_prime_blob);
+
hb_blob_destroy (os2_prime_blob);
return result;
}
+ inline void _update_unicode_ranges (const hb_prealloced_array_t<hb_codepoint_t> &codepoints,
+ HBUINT32 ulUnicodeRange[4]) const
+ {
+ for (unsigned int i = 0; i < 4; i++)
+ ulUnicodeRange[i].set (0);
+
+ for (unsigned int i = 0; i < codepoints.len; i++)
+ {
+ hb_codepoint_t cp = codepoints[i];
+ int bit = hb_get_unicode_range_bit (cp);
+ if (bit >= 0 && bit < 128) {
+ unsigned int block = bit / 32;
+ unsigned int bit_in_block = bit % 32;
+ unsigned int mask = 1 << bit_in_block;
+ ulUnicodeRange[block].set (ulUnicodeRange[block] | mask);
+ }
+ }
+ }
+
static inline void find_min_and_max_codepoint (const hb_prealloced_array_t<hb_codepoint_t> &codepoints,
uint16_t *min_cp, /* OUT */
uint16_t *max_cp /* OUT */)
diff --git a/src/hb-ot-os2-unicode-ranges.cc b/src/hb-ot-os2-unicode-ranges.cc
index f2c06397..68dfe079 100644
--- a/src/hb-ot-os2-unicode-ranges.cc
+++ b/src/hb-ot-os2-unicode-ranges.cc
@@ -26,225 +26,18 @@
#include "hb-private.hh"
-#include "hb-dsalgs.hh"
-
-struct Range {
- unsigned int start;
- unsigned int end;
- unsigned int bit;
-};
-
-#define NUM_RANGES 169
-static Range os2UnicodeRangesSorted[NUM_RANGES] = {
- { 0x0, 0x7F, 0}, // Basic Latin
- { 0x80, 0xFF, 1}, // Latin-1 Supplement
- { 0x100, 0x17F, 2}, // Latin Extended-A
- { 0x180, 0x24F, 3}, // Latin Extended-B
- { 0x250, 0x2AF, 4}, // IPA Extensions
- { 0x2B0, 0x2FF, 5}, // Spacing Modifier Letters
- { 0x300, 0x36F, 6}, // Combining Diacritical Marks
- { 0x370, 0x3FF, 7}, // Greek and Coptic
- { 0x400, 0x4FF, 9}, // Cyrillic
- { 0x500, 0x52F, 9}, // Cyrillic Supplement
- { 0x530, 0x58F, 10}, // Armenian
- { 0x590, 0x5FF, 11}, // Hebrew
- { 0x600, 0x6FF, 13}, // Arabic
- { 0x700, 0x74F, 71}, // Syriac
- { 0x750, 0x77F, 13}, // Arabic Supplement
- { 0x780, 0x7BF, 72}, // Thaana
- { 0x7C0, 0x7FF, 14}, // NKo
- { 0x900, 0x97F, 15}, // Devanagari
- { 0x980, 0x9FF, 16}, // Bengali
- { 0xA00, 0xA7F, 17}, // Gurmukhi
- { 0xA80, 0xAFF, 18}, // Gujarati
- { 0xB00, 0xB7F, 19}, // Oriya
- { 0xB80, 0xBFF, 20}, // Tamil
- { 0xC00, 0xC7F, 21}, // Telugu
- { 0xC80, 0xCFF, 22}, // Kannada
- { 0xD00, 0xD7F, 23}, // Malayalam
- { 0xD80, 0xDFF, 73}, // Sinhala
- { 0xE00, 0xE7F, 24}, // Thai
- { 0xE80, 0xEFF, 25}, // Lao
- { 0xF00, 0xFFF, 70}, // Tibetan
- { 0x1000, 0x109F, 74}, // Myanmar
- { 0x10A0, 0x10FF, 26}, // Georgian
- { 0x1100, 0x11FF, 28}, // Hangul Jamo
- { 0x1200, 0x137F, 75}, // Ethiopic
- { 0x1380, 0x139F, 75}, // Ethiopic Supplement
- { 0x13A0, 0x13FF, 76}, // Cherokee
- { 0x1400, 0x167F, 77}, // Unified Canadian Aboriginal Syllabics
- { 0x1680, 0x169F, 78}, // Ogham
- { 0x16A0, 0x16FF, 79}, // Runic
- { 0x1700, 0x171F, 84}, // Tagalog
- { 0x1720, 0x173F, 84}, // Hanunoo
- { 0x1740, 0x175F, 84}, // Buhid
- { 0x1760, 0x177F, 84}, // Tagbanwa
- { 0x1780, 0x17FF, 80}, // Khmer
- { 0x1800, 0x18AF, 81}, // Mongolian
- { 0x1900, 0x194F, 93}, // Limbu
- { 0x1950, 0x197F, 94}, // Tai Le
- { 0x1980, 0x19DF, 95}, // New Tai Lue
- { 0x19E0, 0x19FF, 80}, // Khmer Symbols
- { 0x1A00, 0x1A1F, 96}, // Buginese
- { 0x1B00, 0x1B7F, 27}, // Balinese
- { 0x1B80, 0x1BBF, 112}, // Sundanese
- { 0x1C00, 0x1C4F, 113}, // Lepcha
- { 0x1C50, 0x1C7F, 114}, // Ol Chiki
- { 0x1D00, 0x1D7F, 4}, // Phonetic Extensions
- { 0x1D80, 0x1DBF, 4}, // Phonetic Extensions Supplement
- { 0x1DC0, 0x1DFF, 6}, // Combining Diacritical Marks Supplement
- { 0x1E00, 0x1EFF, 29}, // Latin Extended Additional
- { 0x1F00, 0x1FFF, 30}, // Greek Extended
- { 0x2000, 0x206F, 31}, // General Punctuation
- { 0x2070, 0x209F, 32}, // Superscripts And Subscripts
- { 0x20A0, 0x20CF, 33}, // Currency Symbols
- { 0x20D0, 0x20FF, 34}, // Combining Diacritical Marks For Symbols
- { 0x2100, 0x214F, 35}, // Letterlike Symbols
- { 0x2150, 0x218F, 36}, // Number Forms
- { 0x2190, 0x21FF, 37}, // Arrows
- { 0x2200, 0x22FF, 38}, // Mathematical Operators
- { 0x2300, 0x23FF, 39}, // Miscellaneous Technical
- { 0x2400, 0x243F, 40}, // Control Pictures
- { 0x2440, 0x245F, 41}, // Optical Character Recognition
- { 0x2460, 0x24FF, 42}, // Enclosed Alphanumerics
- { 0x2500, 0x257F, 43}, // Box Drawing
- { 0x2580, 0x259F, 44}, // Block Elements
- { 0x25A0, 0x25FF, 45}, // Geometric Shapes
- { 0x2600, 0x26FF, 46}, // Miscellaneous Symbols
- { 0x2700, 0x27BF, 47}, // Dingbats
- { 0x27C0, 0x27EF, 38}, // Miscellaneous Mathematical Symbols-A
- { 0x27F0, 0x27FF, 37}, // Supplemental Arrows-A
- { 0x2800, 0x28FF, 82}, // Braille Patterns
- { 0x2900, 0x297F, 37}, // Supplemental Arrows-B
- { 0x2980, 0x29FF, 38}, // Miscellaneous Mathematical Symbols-B
- { 0x2A00, 0x2AFF, 38}, // Supplemental Mathematical Operators
- { 0x2B00, 0x2BFF, 37}, // Miscellaneous Symbols and Arrows
- { 0x2C00, 0x2C5F, 97}, // Glagolitic
- { 0x2C60, 0x2C7F, 29}, // Latin Extended-C
- { 0x2C80, 0x2CFF, 8}, // Coptic
- { 0x2D00, 0x2D2F, 26}, // Georgian Supplement
- { 0x2D30, 0x2D7F, 98}, // Tifinagh
- { 0x2D80, 0x2DDF, 75}, // Ethiopic Extended
- { 0x2DE0, 0x2DFF, 9}, // Cyrillic Extended-A
- { 0x2E00, 0x2E7F, 31}, // Supplemental Punctuation
- { 0x2E80, 0x2EFF, 59}, // CJK Radicals Supplement
- { 0x2F00, 0x2FDF, 59}, // Kangxi Radicals
- { 0x2FF0, 0x2FFF, 59}, // Ideographic Description Characters
- { 0x3000, 0x303F, 48}, // CJK Symbols And Punctuation
- { 0x3040, 0x309F, 49}, // Hiragana
- { 0x30A0, 0x30FF, 50}, // Katakana
- { 0x3100, 0x312F, 51}, // Bopomofo
- { 0x3130, 0x318F, 52}, // Hangul Compatibility Jamo
- { 0x3190, 0x319F, 59}, // Kanbun
- { 0x31A0, 0x31BF, 51}, // Bopomofo Extended
- { 0x31C0, 0x31EF, 61}, // CJK Strokes
- { 0x31F0, 0x31FF, 50}, // Katakana Phonetic Extensions
- { 0x3200, 0x32FF, 54}, // Enclosed CJK Letters And Months
- { 0x3300, 0x33FF, 55}, // CJK Compatibility
- { 0x3400, 0x4DBF, 59}, // CJK Unified Ideographs Extension A
- { 0x4DC0, 0x4DFF, 99}, // Yijing Hexagram Symbols
- { 0x4E00, 0x9FFF, 59}, // CJK Unified Ideographs
- { 0xA000, 0xA48F, 83}, // Yi Syllables
- { 0xA490, 0xA4CF, 83}, // Yi Radicals
- { 0xA500, 0xA63F, 12}, // Vai
- { 0xA640, 0xA69F, 9}, // Cyrillic Extended-B
- { 0xA700, 0xA71F, 5}, // Modifier Tone Letters
- { 0xA720, 0xA7FF, 29}, // Latin Extended-D
- { 0xA800, 0xA82F, 100}, // Syloti Nagri
- { 0xA840, 0xA87F, 53}, // Phags-pa
- { 0xA880, 0xA8DF, 115}, // Saurashtra
- { 0xA900, 0xA92F, 116}, // Kayah Li
- { 0xA930, 0xA95F, 117}, // Rejang
- { 0xAA00, 0xAA5F, 118}, // Cham
- { 0xAC00, 0xD7AF, 56}, // Hangul Syllables
- { 0xD800, 0xDFFF, 57}, // Non-Plane 0 *
- { 0xE000, 0xF8FF, 60}, // Private Use Area (plane 0)
- { 0xF900, 0xFAFF, 61}, // CJK Compatibility Ideographs
- { 0xFB00, 0xFB4F, 62}, // Alphabetic Presentation Forms
- { 0xFB50, 0xFDFF, 63}, // Arabic Presentation Forms-A
- { 0xFE00, 0xFE0F, 91}, // Variation Selectors
- { 0xFE10, 0xFE1F, 65}, // Vertical Forms
- { 0xFE20, 0xFE2F, 64}, // Combining Half Marks
- { 0xFE30, 0xFE4F, 65}, // CJK Compatibility Forms
- { 0xFE50, 0xFE6F, 66}, // Small Form Variants
- { 0xFE70, 0xFEFF, 67}, // Arabic Presentation Forms-B
- { 0xFF00, 0xFFEF, 68}, // Halfwidth And Fullwidth Forms
- { 0xFFF0, 0xFFFF, 69}, // Specials
- { 0x10000, 0x1007F, 101}, // Linear B Syllabary
- { 0x10080, 0x100FF, 101}, // Linear B Ideograms
- { 0x10100, 0x1013F, 101}, // Aegean Numbers
- { 0x10140, 0x1018F, 102}, // Ancient Greek Numbers
- { 0x10190, 0x101CF, 119}, // Ancient Symbols
- { 0x101D0, 0x101FF, 120}, // Phaistos Disc
- { 0x10280, 0x1029F, 121}, // Lycian
- { 0x102A0, 0x102DF, 121}, // Carian
- { 0x10300, 0x1032F, 85}, // Old Italic
- { 0x10330, 0x1034F, 86}, // Gothic
- { 0x10380, 0x1039F, 103}, // Ugaritic
- { 0x103A0, 0x103DF, 104}, // Old Persian
- { 0x10400, 0x1044F, 87}, // Deseret
- { 0x10450, 0x1047F, 105}, // Shavian
- { 0x10480, 0x104AF, 106}, // Osmanya
- { 0x10800, 0x1083F, 107}, // Cypriot Syllabary
- { 0x10900, 0x1091F, 58}, // Phoenician
- { 0x10920, 0x1093F, 121}, // Lydian
- { 0x10A00, 0x10A5F, 108}, // Kharoshthi
- { 0x12000, 0x123FF, 110}, // Cuneiform
- { 0x12400, 0x1247F, 110}, // Cuneiform Numbers and Punctuation
- { 0x1D000, 0x1D0FF, 88}, // Byzantine Musical Symbols
- { 0x1D100, 0x1D1FF, 88}, // Musical Symbols
- { 0x1D200, 0x1D24F, 88}, // Ancient Greek Musical Notation
- { 0x1D300, 0x1D35F, 109}, // Tai Xuan Jing Symbols
- { 0x1D360, 0x1D37F, 111}, // Counting Rod Numerals
- { 0x1D400, 0x1D7FF, 89}, // Mathematical Alphanumeric Symbols
- { 0x1F000, 0x1F02F, 122}, // Mahjong Tiles
- { 0x1F030, 0x1F09F, 122}, // Domino Tiles
- { 0x20000, 0x2A6DF, 59}, // CJK Unified Ideographs Extension B
- { 0x2F800, 0x2FA1F, 61}, // CJK Compatibility Ideographs Supplement
- { 0xE0000, 0xE007F, 92}, // Tags
- { 0xE0100, 0xE01EF, 91}, // Variation Selectors Supplement
- { 0xF0000, 0xFFFFD, 90}, // Private Use (plane 15)
- {0x100000, 0x10FFFD, 90}, // Private Use (plane 16)
-};
-
-static int
-_compare_range (const void *_key, const void *_item, void *_arg)
-{
- hb_codepoint_t *cp = (hb_codepoint_t *) _key;
- Range *range = (Range *) _item;
-
- if (*cp < range->start)
- return -1;
- else if (*cp <= range->end)
- return 0;
- else
- return 1;
-}
-
-/**
- * hb_get_unicode_range_bit:
- * Returns the bit to be set in os/2 ulUnicodeRange for a given codepoint.
- **/
-static int
-hb_get_unicode_range_bit (hb_codepoint_t cp)
-{
- Range *range = (Range*) hb_bsearch_r (&cp, os2UnicodeRangesSorted, NUM_RANGES, sizeof(Range),
- _compare_range, nullptr);
- if (range != NULL)
- return range->bit;
- return -1;
-}
-
-
-#ifdef MAIN
+#include "hb-ot-os2-unicode-ranges.hh"
void
test (hb_codepoint_t cp, int bit)
{
- if (hb_get_unicode_range_bit (cp) != bit)
+ // TODO:
+ // Note: * Setting bit 57 implies that there is at least one codepoint beyond the Basic
+ // Multilingual Plane that is supported by this font. (See fontTools impl.)
+ if (OT::hb_get_unicode_range_bit (cp) != bit)
{
fprintf (stderr, "got incorrect bit (%d) for cp 0x%X. Should have been %d.",
- hb_get_unicode_range_bit (cp),
+ OT::hb_get_unicode_range_bit (cp),
cp,
bit);
abort();
@@ -275,5 +68,3 @@ main (void)
test_get_unicode_range_bit ();
return 0;
}
-
-#endif
diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh
index 5f36b20c..4148342a 100644
--- a/src/hb-ot-os2-unicode-ranges.hh
+++ b/src/hb-ot-os2-unicode-ranges.hh
@@ -28,11 +28,217 @@
#define HB_OT_OS2_UNICODE_RANGES_HH
#include "hb-private.hh"
+#include "hb-dsalgs.hh"
namespace OT {
-HB_INTERNAL int
-hb_get_unicode_range_bit (hb_codepoint_t cp);
+struct Range {
+ unsigned int start;
+ unsigned int end;
+ unsigned int bit;
+};
+
+static Range os2UnicodeRangesSorted[] = {
+ { 0x0, 0x7F, 0}, // Basic Latin
+ { 0x80, 0xFF, 1}, // Latin-1 Supplement
+ { 0x100, 0x17F, 2}, // Latin Extended-A
+ { 0x180, 0x24F, 3}, // Latin Extended-B
+ { 0x250, 0x2AF, 4}, // IPA Extensions
+ { 0x2B0, 0x2FF, 5}, // Spacing Modifier Letters
+ { 0x300, 0x36F, 6}, // Combining Diacritical Marks
+ { 0x370, 0x3FF, 7}, // Greek and Coptic
+ { 0x400, 0x4FF, 9}, // Cyrillic
+ { 0x500, 0x52F, 9}, // Cyrillic Supplement
+ { 0x530, 0x58F, 10}, // Armenian
+ { 0x590, 0x5FF, 11}, // Hebrew
+ { 0x600, 0x6FF, 13}, // Arabic
+ { 0x700, 0x74F, 71}, // Syriac
+ { 0x750, 0x77F, 13}, // Arabic Supplement
+ { 0x780, 0x7BF, 72}, // Thaana
+ { 0x7C0, 0x7FF, 14}, // NKo
+ { 0x900, 0x97F, 15}, // Devanagari
+ { 0x980, 0x9FF, 16}, // Bengali
+ { 0xA00, 0xA7F, 17}, // Gurmukhi
+ { 0xA80, 0xAFF, 18}, // Gujarati
+ { 0xB00, 0xB7F, 19}, // Oriya
+ { 0xB80, 0xBFF, 20}, // Tamil
+ { 0xC00, 0xC7F, 21}, // Telugu
+ { 0xC80, 0xCFF, 22}, // Kannada
+ { 0xD00, 0xD7F, 23}, // Malayalam
+ { 0xD80, 0xDFF, 73}, // Sinhala
+ { 0xE00, 0xE7F, 24}, // Thai
+ { 0xE80, 0xEFF, 25}, // Lao
+ { 0xF00, 0xFFF, 70}, // Tibetan
+ { 0x1000, 0x109F, 74}, // Myanmar
+ { 0x10A0, 0x10FF, 26}, // Georgian
+ { 0x1100, 0x11FF, 28}, // Hangul Jamo
+ { 0x1200, 0x137F, 75}, // Ethiopic
+ { 0x1380, 0x139F, 75}, // Ethiopic Supplement
+ { 0x13A0, 0x13FF, 76}, // Cherokee
+ { 0x1400, 0x167F, 77}, // Unified Canadian Aboriginal Syllabics
+ { 0x1680, 0x169F, 78}, // Ogham
+ { 0x16A0, 0x16FF, 79}, // Runic
+ { 0x1700, 0x171F, 84}, // Tagalog
+ { 0x1720, 0x173F, 84}, // Hanunoo
+ { 0x1740, 0x175F, 84}, // Buhid
+ { 0x1760, 0x177F, 84}, // Tagbanwa
+ { 0x1780, 0x17FF, 80}, // Khmer
+ { 0x1800, 0x18AF, 81}, // Mongolian
+ { 0x1900, 0x194F, 93}, // Limbu
+ { 0x1950, 0x197F, 94}, // Tai Le
+ { 0x1980, 0x19DF, 95}, // New Tai Lue
+ { 0x19E0, 0x19FF, 80}, // Khmer Symbols
+ { 0x1A00, 0x1A1F, 96}, // Buginese
+ { 0x1B00, 0x1B7F, 27}, // Balinese
+ { 0x1B80, 0x1BBF, 112}, // Sundanese
+ { 0x1C00, 0x1C4F, 113}, // Lepcha
+ { 0x1C50, 0x1C7F, 114}, // Ol Chiki
+ { 0x1D00, 0x1D7F, 4}, // Phonetic Extensions
+ { 0x1D80, 0x1DBF, 4}, // Phonetic Extensions Supplement
+ { 0x1DC0, 0x1DFF, 6}, // Combining Diacritical Marks Supplement
+ { 0x1E00, 0x1EFF, 29}, // Latin Extended Additional
+ { 0x1F00, 0x1FFF, 30}, // Greek Extended
+ { 0x2000, 0x206F, 31}, // General Punctuation
+ { 0x2070, 0x209F, 32}, // Superscripts And Subscripts
+ { 0x20A0, 0x20CF, 33}, // Currency Symbols
+ { 0x20D0, 0x20FF, 34}, // Combining Diacritical Marks For Symbols
+ { 0x2100, 0x214F, 35}, // Letterlike Symbols
+ { 0x2150, 0x218F, 36}, // Number Forms
+ { 0x2190, 0x21FF, 37}, // Arrows
+ { 0x2200, 0x22FF, 38}, // Mathematical Operators
+ { 0x2300, 0x23FF, 39}, // Miscellaneous Technical
+ { 0x2400, 0x243F, 40}, // Control Pictures
+ { 0x2440, 0x245F, 41}, // Optical Character Recognition
+ { 0x2460, 0x24FF, 42}, // Enclosed Alphanumerics
+ { 0x2500, 0x257F, 43}, // Box Drawing
+ { 0x2580, 0x259F, 44}, // Block Elements
+ { 0x25A0, 0x25FF, 45}, // Geometric Shapes
+ { 0x2600, 0x26FF, 46}, // Miscellaneous Symbols
+ { 0x2700, 0x27BF, 47}, // Dingbats
+ { 0x27C0, 0x27EF, 38}, // Miscellaneous Mathematical Symbols-A
+ { 0x27F0, 0x27FF, 37}, // Supplemental Arrows-A
+ { 0x2800, 0x28FF, 82}, // Braille Patterns
+ { 0x2900, 0x297F, 37}, // Supplemental Arrows-B
+ { 0x2980, 0x29FF, 38}, // Miscellaneous Mathematical Symbols-B
+ { 0x2A00, 0x2AFF, 38}, // Supplemental Mathematical Operators
+ { 0x2B00, 0x2BFF, 37}, // Miscellaneous Symbols and Arrows
+ { 0x2C00, 0x2C5F, 97}, // Glagolitic
+ { 0x2C60, 0x2C7F, 29}, // Latin Extended-C
+ { 0x2C80, 0x2CFF, 8}, // Coptic
+ { 0x2D00, 0x2D2F, 26}, // Georgian Supplement
+ { 0x2D30, 0x2D7F, 98}, // Tifinagh
+ { 0x2D80, 0x2DDF, 75}, // Ethiopic Extended
+ { 0x2DE0, 0x2DFF, 9}, // Cyrillic Extended-A
+ { 0x2E00, 0x2E7F, 31}, // Supplemental Punctuation
+ { 0x2E80, 0x2EFF, 59}, // CJK Radicals Supplement
+ { 0x2F00, 0x2FDF, 59}, // Kangxi Radicals
+ { 0x2FF0, 0x2FFF, 59}, // Ideographic Description Characters
+ { 0x3000, 0x303F, 48}, // CJK Symbols And Punctuation
+ { 0x3040, 0x309F, 49}, // Hiragana
+ { 0x30A0, 0x30FF, 50}, // Katakana
+ { 0x3100, 0x312F, 51}, // Bopomofo
+ { 0x3130, 0x318F, 52}, // Hangul Compatibility Jamo
+ { 0x3190, 0x319F, 59}, // Kanbun
+ { 0x31A0, 0x31BF, 51}, // Bopomofo Extended
+ { 0x31C0, 0x31EF, 61}, // CJK Strokes
+ { 0x31F0, 0x31FF, 50}, // Katakana Phonetic Extensions
+ { 0x3200, 0x32FF, 54}, // Enclosed CJK Letters And Months
+ { 0x3300, 0x33FF, 55}, // CJK Compatibility
+ { 0x3400, 0x4DBF, 59}, // CJK Unified Ideographs Extension A
+ { 0x4DC0, 0x4DFF, 99}, // Yijing Hexagram Symbols
+ { 0x4E00, 0x9FFF, 59}, // CJK Unified Ideographs
+ { 0xA000, 0xA48F, 83}, // Yi Syllables
+ { 0xA490, 0xA4CF, 83}, // Yi Radicals
+ { 0xA500, 0xA63F, 12}, // Vai
+ { 0xA640, 0xA69F, 9}, // Cyrillic Extended-B
+ { 0xA700, 0xA71F, 5}, // Modifier Tone Letters
+ { 0xA720, 0xA7FF, 29}, // Latin Extended-D
+ { 0xA800, 0xA82F, 100}, // Syloti Nagri
+ { 0xA840, 0xA87F, 53}, // Phags-pa
+ { 0xA880, 0xA8DF, 115}, // Saurashtra
+ { 0xA900, 0xA92F, 116}, // Kayah Li
+ { 0xA930, 0xA95F, 117}, // Rejang
+ { 0xAA00, 0xAA5F, 118}, // Cham
+ { 0xAC00, 0xD7AF, 56}, // Hangul Syllables
+ { 0xD800, 0xDFFF, 57}, // Non-Plane 0 *
+ { 0xE000, 0xF8FF, 60}, // Private Use Area (plane 0)
+ { 0xF900, 0xFAFF, 61}, // CJK Compatibility Ideographs
+ { 0xFB00, 0xFB4F, 62}, // Alphabetic Presentation Forms
+ { 0xFB50, 0xFDFF, 63}, // Arabic Presentation Forms-A
+ { 0xFE00, 0xFE0F, 91}, // Variation Selectors
+ { 0xFE10, 0xFE1F, 65}, // Vertical Forms
+ { 0xFE20, 0xFE2F, 64}, // Combining Half Marks
+ { 0xFE30, 0xFE4F, 65}, // CJK Compatibility Forms
+ { 0xFE50, 0xFE6F, 66}, // Small Form Variants
+ { 0xFE70, 0xFEFF, 67}, // Arabic Presentation Forms-B
+ { 0xFF00, 0xFFEF, 68}, // Halfwidth And Fullwidth Forms
+ { 0xFFF0, 0xFFFF, 69}, // Specials
+ { 0x10000, 0x1007F, 101}, // Linear B Syllabary
+ { 0x10080, 0x100FF, 101}, // Linear B Ideograms
+ { 0x10100, 0x1013F, 101}, // Aegean Numbers
+ { 0x10140, 0x1018F, 102}, // Ancient Greek Numbers
+ { 0x10190, 0x101CF, 119}, // Ancient Symbols
+ { 0x101D0, 0x101FF, 120}, // Phaistos Disc
+ { 0x10280, 0x1029F, 121}, // Lycian
+ { 0x102A0, 0x102DF, 121}, // Carian
+ { 0x10300, 0x1032F, 85}, // Old Italic
+ { 0x10330, 0x1034F, 86}, // Gothic
+ { 0x10380, 0x1039F, 103}, // Ugaritic
+ { 0x103A0, 0x103DF, 104}, // Old Persian
+ { 0x10400, 0x1044F, 87}, // Deseret
+ { 0x10450, 0x1047F, 105}, // Shavian
+ { 0x10480, 0x104AF, 106}, // Osmanya
+ { 0x10800, 0x1083F, 107}, // Cypriot Syllabary
+ { 0x10900, 0x1091F, 58}, // Phoenician
+ { 0x10920, 0x1093F, 121}, // Lydian
+ { 0x10A00, 0x10A5F, 108}, // Kharoshthi
+ { 0x12000, 0x123FF, 110}, // Cuneiform
+ { 0x12400, 0x1247F, 110}, // Cuneiform Numbers and Punctuation
+ { 0x1D000, 0x1D0FF, 88}, // Byzantine Musical Symbols
+ { 0x1D100, 0x1D1FF, 88}, // Musical Symbols
+ { 0x1D200, 0x1D24F, 88}, // Ancient Greek Musical Notation
+ { 0x1D300, 0x1D35F, 109}, // Tai Xuan Jing Symbols
+ { 0x1D360, 0x1D37F, 111}, // Counting Rod Numerals
+ { 0x1D400, 0x1D7FF, 89}, // Mathematical Alphanumeric Symbols
+ { 0x1F000, 0x1F02F, 122}, // Mahjong Tiles
+ { 0x1F030, 0x1F09F, 122}, // Domino Tiles
+ { 0x20000, 0x2A6DF, 59}, // CJK Unified Ideographs Extension B
+ { 0x2F800, 0x2FA1F, 61}, // CJK Compatibility Ideographs Supplement
+ { 0xE0000, 0xE007F, 92}, // Tags
+ { 0xE0100, 0xE01EF, 91}, // Variation Selectors Supplement
+ { 0xF0000, 0xFFFFD, 90}, // Private Use (plane 15)
+ {0x100000, 0x10FFFD, 90}, // Private Use (plane 16)
+};
+
+static int
+_compare_range (const void *_key, const void *_item, void *_arg)
+{
+ hb_codepoint_t *cp = (hb_codepoint_t *) _key;
+ Range *range = (Range *) _item;
+
+ if (*cp < range->start)
+ return -1;
+ else if (*cp <= range->end)
+ return 0;
+ else
+ return 1;
+}
+
+/**
+ * hb_get_unicode_range_bit:
+ * Returns the bit to be set in os/2 ulUnicodeRange for a given codepoint.
+ **/
+static int
+hb_get_unicode_range_bit (hb_codepoint_t cp)
+{
+ Range *range = (Range*) hb_bsearch_r (&cp, os2UnicodeRangesSorted,
+ sizeof (os2UnicodeRangesSorted) / sizeof(Range),
+ sizeof(Range),
+ _compare_range, nullptr);
+ if (range != NULL)
+ return range->bit;
+ return -1;
+}
} /* namespace OT */
commit a570142d0c6d819feeeeb9e209fc90a33c1c2b48
Author: Garret Rieger <grieger at google.com>
Date: Mon Feb 26 15:20:23 2018 -0800
[subset] Move hb-ot-os2-unicode-ranges into a cc file with a helper methods and tests.
diff --git a/src/Makefile.am b/src/Makefile.am
index 3f98e1db..6044366d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -381,12 +381,17 @@ dump_use_data_SOURCES = dump-use-data.cc hb-ot-shape-complex-use-table.cc
dump_use_data_CPPFLAGS = $(HBCFLAGS)
dump_use_data_LDADD = libharfbuzz.la $(HBLIBS)
-check_PROGRAMS += test-ot-tag
-TESTS += test-ot-tag
+check_PROGRAMS += test-ot-tag test-unicode-ranges
+TESTS += test-ot-tag test-unicode-ranges
+
test_ot_tag_SOURCES = hb-ot-tag.cc
test_ot_tag_CPPFLAGS = $(HBCFLAGS) -DMAIN
test_ot_tag_LDADD = libharfbuzz.la $(HBLIBS)
+test_unicode_ranges_SOURCES = hb-ot-os2-unicode-ranges.cc
+test_unicode_ranges_CPPFLAGS = $(HBCFLAGS) -DMAIN
+test_unicode_ranges_LDADD = libharfbuzz.la $(HBLIBS)
+
TESTS_ENVIRONMENT = \
srcdir="$(srcdir)" \
MAKE="$(MAKE) $(AM_MAKEFLAGS)" \
diff --git a/src/Makefile.sources b/src/Makefile.sources
index 7883412c..c2071643 100644
--- a/src/Makefile.sources
+++ b/src/Makefile.sources
@@ -28,6 +28,8 @@ HB_BASE_sources = \
hb-ot-maxp-table.hh \
hb-ot-name-table.hh \
hb-ot-os2-table.hh \
+ hb-ot-os2-unicode-ranges.hh \
+ hb-ot-os2-unicode-ranges.cc \
hb-ot-post-macroman.hh \
hb-ot-post-table.hh \
hb-ot-tag.cc \
diff --git a/src/hb-ot-os2-unicode-ranges.cc b/src/hb-ot-os2-unicode-ranges.cc
new file mode 100644
index 00000000..f2c06397
--- /dev/null
+++ b/src/hb-ot-os2-unicode-ranges.cc
@@ -0,0 +1,279 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger
+ */
+
+#include "hb-private.hh"
+
+#include "hb-dsalgs.hh"
+
+struct Range {
+ unsigned int start;
+ unsigned int end;
+ unsigned int bit;
+};
+
+#define NUM_RANGES 169
+static Range os2UnicodeRangesSorted[NUM_RANGES] = {
+ { 0x0, 0x7F, 0}, // Basic Latin
+ { 0x80, 0xFF, 1}, // Latin-1 Supplement
+ { 0x100, 0x17F, 2}, // Latin Extended-A
+ { 0x180, 0x24F, 3}, // Latin Extended-B
+ { 0x250, 0x2AF, 4}, // IPA Extensions
+ { 0x2B0, 0x2FF, 5}, // Spacing Modifier Letters
+ { 0x300, 0x36F, 6}, // Combining Diacritical Marks
+ { 0x370, 0x3FF, 7}, // Greek and Coptic
+ { 0x400, 0x4FF, 9}, // Cyrillic
+ { 0x500, 0x52F, 9}, // Cyrillic Supplement
+ { 0x530, 0x58F, 10}, // Armenian
+ { 0x590, 0x5FF, 11}, // Hebrew
+ { 0x600, 0x6FF, 13}, // Arabic
+ { 0x700, 0x74F, 71}, // Syriac
+ { 0x750, 0x77F, 13}, // Arabic Supplement
+ { 0x780, 0x7BF, 72}, // Thaana
+ { 0x7C0, 0x7FF, 14}, // NKo
+ { 0x900, 0x97F, 15}, // Devanagari
+ { 0x980, 0x9FF, 16}, // Bengali
+ { 0xA00, 0xA7F, 17}, // Gurmukhi
+ { 0xA80, 0xAFF, 18}, // Gujarati
+ { 0xB00, 0xB7F, 19}, // Oriya
+ { 0xB80, 0xBFF, 20}, // Tamil
+ { 0xC00, 0xC7F, 21}, // Telugu
+ { 0xC80, 0xCFF, 22}, // Kannada
+ { 0xD00, 0xD7F, 23}, // Malayalam
+ { 0xD80, 0xDFF, 73}, // Sinhala
+ { 0xE00, 0xE7F, 24}, // Thai
+ { 0xE80, 0xEFF, 25}, // Lao
+ { 0xF00, 0xFFF, 70}, // Tibetan
+ { 0x1000, 0x109F, 74}, // Myanmar
+ { 0x10A0, 0x10FF, 26}, // Georgian
+ { 0x1100, 0x11FF, 28}, // Hangul Jamo
+ { 0x1200, 0x137F, 75}, // Ethiopic
+ { 0x1380, 0x139F, 75}, // Ethiopic Supplement
+ { 0x13A0, 0x13FF, 76}, // Cherokee
+ { 0x1400, 0x167F, 77}, // Unified Canadian Aboriginal Syllabics
+ { 0x1680, 0x169F, 78}, // Ogham
+ { 0x16A0, 0x16FF, 79}, // Runic
+ { 0x1700, 0x171F, 84}, // Tagalog
+ { 0x1720, 0x173F, 84}, // Hanunoo
+ { 0x1740, 0x175F, 84}, // Buhid
+ { 0x1760, 0x177F, 84}, // Tagbanwa
+ { 0x1780, 0x17FF, 80}, // Khmer
+ { 0x1800, 0x18AF, 81}, // Mongolian
+ { 0x1900, 0x194F, 93}, // Limbu
+ { 0x1950, 0x197F, 94}, // Tai Le
+ { 0x1980, 0x19DF, 95}, // New Tai Lue
+ { 0x19E0, 0x19FF, 80}, // Khmer Symbols
+ { 0x1A00, 0x1A1F, 96}, // Buginese
+ { 0x1B00, 0x1B7F, 27}, // Balinese
+ { 0x1B80, 0x1BBF, 112}, // Sundanese
+ { 0x1C00, 0x1C4F, 113}, // Lepcha
+ { 0x1C50, 0x1C7F, 114}, // Ol Chiki
+ { 0x1D00, 0x1D7F, 4}, // Phonetic Extensions
+ { 0x1D80, 0x1DBF, 4}, // Phonetic Extensions Supplement
+ { 0x1DC0, 0x1DFF, 6}, // Combining Diacritical Marks Supplement
+ { 0x1E00, 0x1EFF, 29}, // Latin Extended Additional
+ { 0x1F00, 0x1FFF, 30}, // Greek Extended
+ { 0x2000, 0x206F, 31}, // General Punctuation
+ { 0x2070, 0x209F, 32}, // Superscripts And Subscripts
+ { 0x20A0, 0x20CF, 33}, // Currency Symbols
+ { 0x20D0, 0x20FF, 34}, // Combining Diacritical Marks For Symbols
+ { 0x2100, 0x214F, 35}, // Letterlike Symbols
+ { 0x2150, 0x218F, 36}, // Number Forms
+ { 0x2190, 0x21FF, 37}, // Arrows
+ { 0x2200, 0x22FF, 38}, // Mathematical Operators
+ { 0x2300, 0x23FF, 39}, // Miscellaneous Technical
+ { 0x2400, 0x243F, 40}, // Control Pictures
+ { 0x2440, 0x245F, 41}, // Optical Character Recognition
+ { 0x2460, 0x24FF, 42}, // Enclosed Alphanumerics
+ { 0x2500, 0x257F, 43}, // Box Drawing
+ { 0x2580, 0x259F, 44}, // Block Elements
+ { 0x25A0, 0x25FF, 45}, // Geometric Shapes
+ { 0x2600, 0x26FF, 46}, // Miscellaneous Symbols
+ { 0x2700, 0x27BF, 47}, // Dingbats
+ { 0x27C0, 0x27EF, 38}, // Miscellaneous Mathematical Symbols-A
+ { 0x27F0, 0x27FF, 37}, // Supplemental Arrows-A
+ { 0x2800, 0x28FF, 82}, // Braille Patterns
+ { 0x2900, 0x297F, 37}, // Supplemental Arrows-B
+ { 0x2980, 0x29FF, 38}, // Miscellaneous Mathematical Symbols-B
+ { 0x2A00, 0x2AFF, 38}, // Supplemental Mathematical Operators
+ { 0x2B00, 0x2BFF, 37}, // Miscellaneous Symbols and Arrows
+ { 0x2C00, 0x2C5F, 97}, // Glagolitic
+ { 0x2C60, 0x2C7F, 29}, // Latin Extended-C
+ { 0x2C80, 0x2CFF, 8}, // Coptic
+ { 0x2D00, 0x2D2F, 26}, // Georgian Supplement
+ { 0x2D30, 0x2D7F, 98}, // Tifinagh
+ { 0x2D80, 0x2DDF, 75}, // Ethiopic Extended
+ { 0x2DE0, 0x2DFF, 9}, // Cyrillic Extended-A
+ { 0x2E00, 0x2E7F, 31}, // Supplemental Punctuation
+ { 0x2E80, 0x2EFF, 59}, // CJK Radicals Supplement
+ { 0x2F00, 0x2FDF, 59}, // Kangxi Radicals
+ { 0x2FF0, 0x2FFF, 59}, // Ideographic Description Characters
+ { 0x3000, 0x303F, 48}, // CJK Symbols And Punctuation
+ { 0x3040, 0x309F, 49}, // Hiragana
+ { 0x30A0, 0x30FF, 50}, // Katakana
+ { 0x3100, 0x312F, 51}, // Bopomofo
+ { 0x3130, 0x318F, 52}, // Hangul Compatibility Jamo
+ { 0x3190, 0x319F, 59}, // Kanbun
+ { 0x31A0, 0x31BF, 51}, // Bopomofo Extended
+ { 0x31C0, 0x31EF, 61}, // CJK Strokes
+ { 0x31F0, 0x31FF, 50}, // Katakana Phonetic Extensions
+ { 0x3200, 0x32FF, 54}, // Enclosed CJK Letters And Months
+ { 0x3300, 0x33FF, 55}, // CJK Compatibility
+ { 0x3400, 0x4DBF, 59}, // CJK Unified Ideographs Extension A
+ { 0x4DC0, 0x4DFF, 99}, // Yijing Hexagram Symbols
+ { 0x4E00, 0x9FFF, 59}, // CJK Unified Ideographs
+ { 0xA000, 0xA48F, 83}, // Yi Syllables
+ { 0xA490, 0xA4CF, 83}, // Yi Radicals
+ { 0xA500, 0xA63F, 12}, // Vai
+ { 0xA640, 0xA69F, 9}, // Cyrillic Extended-B
+ { 0xA700, 0xA71F, 5}, // Modifier Tone Letters
+ { 0xA720, 0xA7FF, 29}, // Latin Extended-D
+ { 0xA800, 0xA82F, 100}, // Syloti Nagri
+ { 0xA840, 0xA87F, 53}, // Phags-pa
+ { 0xA880, 0xA8DF, 115}, // Saurashtra
+ { 0xA900, 0xA92F, 116}, // Kayah Li
+ { 0xA930, 0xA95F, 117}, // Rejang
+ { 0xAA00, 0xAA5F, 118}, // Cham
+ { 0xAC00, 0xD7AF, 56}, // Hangul Syllables
+ { 0xD800, 0xDFFF, 57}, // Non-Plane 0 *
+ { 0xE000, 0xF8FF, 60}, // Private Use Area (plane 0)
+ { 0xF900, 0xFAFF, 61}, // CJK Compatibility Ideographs
+ { 0xFB00, 0xFB4F, 62}, // Alphabetic Presentation Forms
+ { 0xFB50, 0xFDFF, 63}, // Arabic Presentation Forms-A
+ { 0xFE00, 0xFE0F, 91}, // Variation Selectors
+ { 0xFE10, 0xFE1F, 65}, // Vertical Forms
+ { 0xFE20, 0xFE2F, 64}, // Combining Half Marks
+ { 0xFE30, 0xFE4F, 65}, // CJK Compatibility Forms
+ { 0xFE50, 0xFE6F, 66}, // Small Form Variants
+ { 0xFE70, 0xFEFF, 67}, // Arabic Presentation Forms-B
+ { 0xFF00, 0xFFEF, 68}, // Halfwidth And Fullwidth Forms
+ { 0xFFF0, 0xFFFF, 69}, // Specials
+ { 0x10000, 0x1007F, 101}, // Linear B Syllabary
+ { 0x10080, 0x100FF, 101}, // Linear B Ideograms
+ { 0x10100, 0x1013F, 101}, // Aegean Numbers
+ { 0x10140, 0x1018F, 102}, // Ancient Greek Numbers
+ { 0x10190, 0x101CF, 119}, // Ancient Symbols
+ { 0x101D0, 0x101FF, 120}, // Phaistos Disc
+ { 0x10280, 0x1029F, 121}, // Lycian
+ { 0x102A0, 0x102DF, 121}, // Carian
+ { 0x10300, 0x1032F, 85}, // Old Italic
+ { 0x10330, 0x1034F, 86}, // Gothic
+ { 0x10380, 0x1039F, 103}, // Ugaritic
+ { 0x103A0, 0x103DF, 104}, // Old Persian
+ { 0x10400, 0x1044F, 87}, // Deseret
+ { 0x10450, 0x1047F, 105}, // Shavian
+ { 0x10480, 0x104AF, 106}, // Osmanya
+ { 0x10800, 0x1083F, 107}, // Cypriot Syllabary
+ { 0x10900, 0x1091F, 58}, // Phoenician
+ { 0x10920, 0x1093F, 121}, // Lydian
+ { 0x10A00, 0x10A5F, 108}, // Kharoshthi
+ { 0x12000, 0x123FF, 110}, // Cuneiform
+ { 0x12400, 0x1247F, 110}, // Cuneiform Numbers and Punctuation
+ { 0x1D000, 0x1D0FF, 88}, // Byzantine Musical Symbols
+ { 0x1D100, 0x1D1FF, 88}, // Musical Symbols
+ { 0x1D200, 0x1D24F, 88}, // Ancient Greek Musical Notation
+ { 0x1D300, 0x1D35F, 109}, // Tai Xuan Jing Symbols
+ { 0x1D360, 0x1D37F, 111}, // Counting Rod Numerals
+ { 0x1D400, 0x1D7FF, 89}, // Mathematical Alphanumeric Symbols
+ { 0x1F000, 0x1F02F, 122}, // Mahjong Tiles
+ { 0x1F030, 0x1F09F, 122}, // Domino Tiles
+ { 0x20000, 0x2A6DF, 59}, // CJK Unified Ideographs Extension B
+ { 0x2F800, 0x2FA1F, 61}, // CJK Compatibility Ideographs Supplement
+ { 0xE0000, 0xE007F, 92}, // Tags
+ { 0xE0100, 0xE01EF, 91}, // Variation Selectors Supplement
+ { 0xF0000, 0xFFFFD, 90}, // Private Use (plane 15)
+ {0x100000, 0x10FFFD, 90}, // Private Use (plane 16)
+};
+
+static int
+_compare_range (const void *_key, const void *_item, void *_arg)
+{
+ hb_codepoint_t *cp = (hb_codepoint_t *) _key;
+ Range *range = (Range *) _item;
+
+ if (*cp < range->start)
+ return -1;
+ else if (*cp <= range->end)
+ return 0;
+ else
+ return 1;
+}
+
+/**
+ * hb_get_unicode_range_bit:
+ * Returns the bit to be set in os/2 ulUnicodeRange for a given codepoint.
+ **/
+static int
+hb_get_unicode_range_bit (hb_codepoint_t cp)
+{
+ Range *range = (Range*) hb_bsearch_r (&cp, os2UnicodeRangesSorted, NUM_RANGES, sizeof(Range),
+ _compare_range, nullptr);
+ if (range != NULL)
+ return range->bit;
+ return -1;
+}
+
+
+#ifdef MAIN
+
+void
+test (hb_codepoint_t cp, int bit)
+{
+ if (hb_get_unicode_range_bit (cp) != bit)
+ {
+ fprintf (stderr, "got incorrect bit (%d) for cp 0x%X. Should have been %d.",
+ hb_get_unicode_range_bit (cp),
+ cp,
+ bit);
+ abort();
+ }
+}
+
+void
+test_get_unicode_range_bit (void)
+{
+ test (0x0000, 0);
+ test (0x0042, 0);
+ test (0x007F, 0);
+ test (0x0080, 1);
+
+ test (0x30A0, 50);
+ test (0x30B1, 50);
+ test (0x30FF, 50);
+
+ test (0x10FFFD, 90);
+
+ test (0x30000, -1);
+ test (0x110000, -1);
+}
+
+int
+main (void)
+{
+ test_get_unicode_range_bit ();
+ return 0;
+}
+
+#endif
diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh
index 94daad2f..5f36b20c 100644
--- a/src/hb-ot-os2-unicode-ranges.hh
+++ b/src/hb-ot-os2-unicode-ranges.hh
@@ -24,196 +24,15 @@
* Google Author(s): Garret Rieger
*/
-
-
#ifndef HB_OT_OS2_UNICODE_RANGES_HH
#define HB_OT_OS2_UNICODE_RANGES_HH
-namespace OT {
-
-struct Range
-{
- unsigned int start;
- unsigned int end;
- unsigned int bit;
-};
-
-static Range os2UnicodeRangesSorted[] = {
- { 0x0, 0x7F, 0}, // Basic Latin
- { 0x80, 0xFF, 1}, // Latin-1 Supplement
- { 0x100, 0x17F, 2}, // Latin Extended-A
- { 0x180, 0x24F, 3}, // Latin Extended-B
- { 0x250, 0x2AF, 4}, // IPA Extensions
- { 0x2B0, 0x2FF, 5}, // Spacing Modifier Letters
- { 0x300, 0x36F, 6}, // Combining Diacritical Marks
- { 0x370, 0x3FF, 7}, // Greek and Coptic
- { 0x400, 0x4FF, 9}, // Cyrillic
- { 0x500, 0x52F, 9}, // Cyrillic Supplement
- { 0x530, 0x58F, 10}, // Armenian
- { 0x590, 0x5FF, 11}, // Hebrew
- { 0x600, 0x6FF, 13}, // Arabic
- { 0x700, 0x74F, 71}, // Syriac
- { 0x750, 0x77F, 13}, // Arabic Supplement
- { 0x780, 0x7BF, 72}, // Thaana
- { 0x7C0, 0x7FF, 14}, // NKo
- { 0x900, 0x97F, 15}, // Devanagari
- { 0x980, 0x9FF, 16}, // Bengali
- { 0xA00, 0xA7F, 17}, // Gurmukhi
- { 0xA80, 0xAFF, 18}, // Gujarati
- { 0xB00, 0xB7F, 19}, // Oriya
- { 0xB80, 0xBFF, 20}, // Tamil
- { 0xC00, 0xC7F, 21}, // Telugu
- { 0xC80, 0xCFF, 22}, // Kannada
- { 0xD00, 0xD7F, 23}, // Malayalam
- { 0xD80, 0xDFF, 73}, // Sinhala
- { 0xE00, 0xE7F, 24}, // Thai
- { 0xE80, 0xEFF, 25}, // Lao
- { 0xF00, 0xFFF, 70}, // Tibetan
- { 0x1000, 0x109F, 74}, // Myanmar
- { 0x10A0, 0x10FF, 26}, // Georgian
- { 0x1100, 0x11FF, 28}, // Hangul Jamo
- { 0x1200, 0x137F, 75}, // Ethiopic
- { 0x1380, 0x139F, 75}, // Ethiopic Supplement
- { 0x13A0, 0x13FF, 76}, // Cherokee
- { 0x1400, 0x167F, 77}, // Unified Canadian Aboriginal Syllabics
- { 0x1680, 0x169F, 78}, // Ogham
- { 0x16A0, 0x16FF, 79}, // Runic
- { 0x1700, 0x171F, 84}, // Tagalog
- { 0x1720, 0x173F, 84}, // Hanunoo
- { 0x1740, 0x175F, 84}, // Buhid
- { 0x1760, 0x177F, 84}, // Tagbanwa
- { 0x1780, 0x17FF, 80}, // Khmer
- { 0x1800, 0x18AF, 81}, // Mongolian
- { 0x1900, 0x194F, 93}, // Limbu
- { 0x1950, 0x197F, 94}, // Tai Le
- { 0x1980, 0x19DF, 95}, // New Tai Lue
- { 0x19E0, 0x19FF, 80}, // Khmer Symbols
- { 0x1A00, 0x1A1F, 96}, // Buginese
- { 0x1B00, 0x1B7F, 27}, // Balinese
- { 0x1B80, 0x1BBF, 112}, // Sundanese
- { 0x1C00, 0x1C4F, 113}, // Lepcha
- { 0x1C50, 0x1C7F, 114}, // Ol Chiki
- { 0x1D00, 0x1D7F, 4}, // Phonetic Extensions
- { 0x1D80, 0x1DBF, 4}, // Phonetic Extensions Supplement
- { 0x1DC0, 0x1DFF, 6}, // Combining Diacritical Marks Supplement
- { 0x1E00, 0x1EFF, 29}, // Latin Extended Additional
- { 0x1F00, 0x1FFF, 30}, // Greek Extended
- { 0x2000, 0x206F, 31}, // General Punctuation
- { 0x2070, 0x209F, 32}, // Superscripts And Subscripts
- { 0x20A0, 0x20CF, 33}, // Currency Symbols
- { 0x20D0, 0x20FF, 34}, // Combining Diacritical Marks For Symbols
- { 0x2100, 0x214F, 35}, // Letterlike Symbols
- { 0x2150, 0x218F, 36}, // Number Forms
- { 0x2190, 0x21FF, 37}, // Arrows
- { 0x2200, 0x22FF, 38}, // Mathematical Operators
- { 0x2300, 0x23FF, 39}, // Miscellaneous Technical
- { 0x2400, 0x243F, 40}, // Control Pictures
- { 0x2440, 0x245F, 41}, // Optical Character Recognition
- { 0x2460, 0x24FF, 42}, // Enclosed Alphanumerics
- { 0x2500, 0x257F, 43}, // Box Drawing
- { 0x2580, 0x259F, 44}, // Block Elements
- { 0x25A0, 0x25FF, 45}, // Geometric Shapes
- { 0x2600, 0x26FF, 46}, // Miscellaneous Symbols
- { 0x2700, 0x27BF, 47}, // Dingbats
- { 0x27C0, 0x27EF, 38}, // Miscellaneous Mathematical Symbols-A
- { 0x27F0, 0x27FF, 37}, // Supplemental Arrows-A
- { 0x2800, 0x28FF, 82}, // Braille Patterns
- { 0x2900, 0x297F, 37}, // Supplemental Arrows-B
- { 0x2980, 0x29FF, 38}, // Miscellaneous Mathematical Symbols-B
- { 0x2A00, 0x2AFF, 38}, // Supplemental Mathematical Operators
- { 0x2B00, 0x2BFF, 37}, // Miscellaneous Symbols and Arrows
- { 0x2C00, 0x2C5F, 97}, // Glagolitic
- { 0x2C60, 0x2C7F, 29}, // Latin Extended-C
- { 0x2C80, 0x2CFF, 8}, // Coptic
- { 0x2D00, 0x2D2F, 26}, // Georgian Supplement
- { 0x2D30, 0x2D7F, 98}, // Tifinagh
- { 0x2D80, 0x2DDF, 75}, // Ethiopic Extended
- { 0x2DE0, 0x2DFF, 9}, // Cyrillic Extended-A
- { 0x2E00, 0x2E7F, 31}, // Supplemental Punctuation
- { 0x2E80, 0x2EFF, 59}, // CJK Radicals Supplement
- { 0x2F00, 0x2FDF, 59}, // Kangxi Radicals
- { 0x2FF0, 0x2FFF, 59}, // Ideographic Description Characters
- { 0x3000, 0x303F, 48}, // CJK Symbols And Punctuation
- { 0x3040, 0x309F, 49}, // Hiragana
- { 0x30A0, 0x30FF, 50}, // Katakana
- { 0x3100, 0x312F, 51}, // Bopomofo
- { 0x3130, 0x318F, 52}, // Hangul Compatibility Jamo
- { 0x3190, 0x319F, 59}, // Kanbun
- { 0x31A0, 0x31BF, 51}, // Bopomofo Extended
- { 0x31C0, 0x31EF, 61}, // CJK Strokes
- { 0x31F0, 0x31FF, 50}, // Katakana Phonetic Extensions
- { 0x3200, 0x32FF, 54}, // Enclosed CJK Letters And Months
- { 0x3300, 0x33FF, 55}, // CJK Compatibility
- { 0x3400, 0x4DBF, 59}, // CJK Unified Ideographs Extension A
- { 0x4DC0, 0x4DFF, 99}, // Yijing Hexagram Symbols
- { 0x4E00, 0x9FFF, 59}, // CJK Unified Ideographs
- { 0xA000, 0xA48F, 83}, // Yi Syllables
- { 0xA490, 0xA4CF, 83}, // Yi Radicals
- { 0xA500, 0xA63F, 12}, // Vai
- { 0xA640, 0xA69F, 9}, // Cyrillic Extended-B
- { 0xA700, 0xA71F, 5}, // Modifier Tone Letters
- { 0xA720, 0xA7FF, 29}, // Latin Extended-D
- { 0xA800, 0xA82F, 100}, // Syloti Nagri
- { 0xA840, 0xA87F, 53}, // Phags-pa
- { 0xA880, 0xA8DF, 115}, // Saurashtra
- { 0xA900, 0xA92F, 116}, // Kayah Li
- { 0xA930, 0xA95F, 117}, // Rejang
- { 0xAA00, 0xAA5F, 118}, // Cham
- { 0xAC00, 0xD7AF, 56}, // Hangul Syllables
- { 0xD800, 0xDFFF, 57}, // Non-Plane 0 *
- { 0xE000, 0xF8FF, 60}, // Private Use Area (plane 0)
- { 0xF900, 0xFAFF, 61}, // CJK Compatibility Ideographs
- { 0xFB00, 0xFB4F, 62}, // Alphabetic Presentation Forms
- { 0xFB50, 0xFDFF, 63}, // Arabic Presentation Forms-A
- { 0xFE00, 0xFE0F, 91}, // Variation Selectors
- { 0xFE10, 0xFE1F, 65}, // Vertical Forms
- { 0xFE20, 0xFE2F, 64}, // Combining Half Marks
- { 0xFE30, 0xFE4F, 65}, // CJK Compatibility Forms
- { 0xFE50, 0xFE6F, 66}, // Small Form Variants
- { 0xFE70, 0xFEFF, 67}, // Arabic Presentation Forms-B
- { 0xFF00, 0xFFEF, 68}, // Halfwidth And Fullwidth Forms
- { 0xFFF0, 0xFFFF, 69}, // Specials
- { 0x10000, 0x1007F, 101}, // Linear B Syllabary
- { 0x10080, 0x100FF, 101}, // Linear B Ideograms
- { 0x10100, 0x1013F, 101}, // Aegean Numbers
- { 0x10140, 0x1018F, 102}, // Ancient Greek Numbers
- { 0x10190, 0x101CF, 119}, // Ancient Symbols
- { 0x101D0, 0x101FF, 120}, // Phaistos Disc
- { 0x10280, 0x1029F, 121}, // Lycian
- { 0x102A0, 0x102DF, 121}, // Carian
- { 0x10300, 0x1032F, 85}, // Old Italic
- { 0x10330, 0x1034F, 86}, // Gothic
- { 0x10380, 0x1039F, 103}, // Ugaritic
- { 0x103A0, 0x103DF, 104}, // Old Persian
- { 0x10400, 0x1044F, 87}, // Deseret
- { 0x10450, 0x1047F, 105}, // Shavian
- { 0x10480, 0x104AF, 106}, // Osmanya
- { 0x10800, 0x1083F, 107}, // Cypriot Syllabary
- { 0x10900, 0x1091F, 58}, // Phoenician
- { 0x10920, 0x1093F, 121}, // Lydian
- { 0x10A00, 0x10A5F, 108}, // Kharoshthi
- { 0x12000, 0x123FF, 110}, // Cuneiform
- { 0x12400, 0x1247F, 110}, // Cuneiform Numbers and Punctuation
- { 0x1D000, 0x1D0FF, 88}, // Byzantine Musical Symbols
- { 0x1D100, 0x1D1FF, 88}, // Musical Symbols
- { 0x1D200, 0x1D24F, 88}, // Ancient Greek Musical Notation
- { 0x1D300, 0x1D35F, 109}, // Tai Xuan Jing Symbols
- { 0x1D360, 0x1D37F, 111}, // Counting Rod Numerals
- { 0x1D400, 0x1D7FF, 89}, // Mathematical Alphanumeric Symbols
- { 0x1F000, 0x1F02F, 122}, // Mahjong Tiles
- { 0x1F030, 0x1F09F, 122}, // Domino Tiles
- { 0x20000, 0x2A6DF, 59}, // CJK Unified Ideographs Extension B
- { 0x2F800, 0x2FA1F, 61}, // CJK Compatibility Ideographs Supplement
- { 0xE0000, 0xE007F, 92}, // Tags
- { 0xE0100, 0xE01EF, 91}, // Variation Selectors Supplement
- { 0xF0000, 0xFFFFD, 90}, // Private Use (plane 15)
- {0x100000, 0x10FFFD, 90}, // Private Use (plane 16)
-};
+#include "hb-private.hh"
-static unsigned int hb_get_unicode_range_bit (hb_codepoint_t cp)
-{
+namespace OT {
-}
+HB_INTERNAL int
+hb_get_unicode_range_bit (hb_codepoint_t cp);
} /* namespace OT */
commit 8e81799b32f3dfaca000fa5d42943ceed9af8d17
Author: Garret Rieger <grieger at google.com>
Date: Fri Feb 23 18:35:41 2018 -0800
[subset] Add hb-ot-os2-unicode-ranges.hh, a map of os2 unicode ranges.
diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh
new file mode 100644
index 00000000..94daad2f
--- /dev/null
+++ b/src/hb-ot-os2-unicode-ranges.hh
@@ -0,0 +1,220 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger
+ */
+
+
+
+#ifndef HB_OT_OS2_UNICODE_RANGES_HH
+#define HB_OT_OS2_UNICODE_RANGES_HH
+
+namespace OT {
+
+struct Range
+{
+ unsigned int start;
+ unsigned int end;
+ unsigned int bit;
+};
+
+static Range os2UnicodeRangesSorted[] = {
+ { 0x0, 0x7F, 0}, // Basic Latin
+ { 0x80, 0xFF, 1}, // Latin-1 Supplement
+ { 0x100, 0x17F, 2}, // Latin Extended-A
+ { 0x180, 0x24F, 3}, // Latin Extended-B
+ { 0x250, 0x2AF, 4}, // IPA Extensions
+ { 0x2B0, 0x2FF, 5}, // Spacing Modifier Letters
+ { 0x300, 0x36F, 6}, // Combining Diacritical Marks
+ { 0x370, 0x3FF, 7}, // Greek and Coptic
+ { 0x400, 0x4FF, 9}, // Cyrillic
+ { 0x500, 0x52F, 9}, // Cyrillic Supplement
+ { 0x530, 0x58F, 10}, // Armenian
+ { 0x590, 0x5FF, 11}, // Hebrew
+ { 0x600, 0x6FF, 13}, // Arabic
+ { 0x700, 0x74F, 71}, // Syriac
+ { 0x750, 0x77F, 13}, // Arabic Supplement
+ { 0x780, 0x7BF, 72}, // Thaana
+ { 0x7C0, 0x7FF, 14}, // NKo
+ { 0x900, 0x97F, 15}, // Devanagari
+ { 0x980, 0x9FF, 16}, // Bengali
+ { 0xA00, 0xA7F, 17}, // Gurmukhi
+ { 0xA80, 0xAFF, 18}, // Gujarati
+ { 0xB00, 0xB7F, 19}, // Oriya
+ { 0xB80, 0xBFF, 20}, // Tamil
+ { 0xC00, 0xC7F, 21}, // Telugu
+ { 0xC80, 0xCFF, 22}, // Kannada
+ { 0xD00, 0xD7F, 23}, // Malayalam
+ { 0xD80, 0xDFF, 73}, // Sinhala
+ { 0xE00, 0xE7F, 24}, // Thai
+ { 0xE80, 0xEFF, 25}, // Lao
+ { 0xF00, 0xFFF, 70}, // Tibetan
+ { 0x1000, 0x109F, 74}, // Myanmar
+ { 0x10A0, 0x10FF, 26}, // Georgian
+ { 0x1100, 0x11FF, 28}, // Hangul Jamo
+ { 0x1200, 0x137F, 75}, // Ethiopic
+ { 0x1380, 0x139F, 75}, // Ethiopic Supplement
+ { 0x13A0, 0x13FF, 76}, // Cherokee
+ { 0x1400, 0x167F, 77}, // Unified Canadian Aboriginal Syllabics
+ { 0x1680, 0x169F, 78}, // Ogham
+ { 0x16A0, 0x16FF, 79}, // Runic
+ { 0x1700, 0x171F, 84}, // Tagalog
+ { 0x1720, 0x173F, 84}, // Hanunoo
+ { 0x1740, 0x175F, 84}, // Buhid
+ { 0x1760, 0x177F, 84}, // Tagbanwa
+ { 0x1780, 0x17FF, 80}, // Khmer
+ { 0x1800, 0x18AF, 81}, // Mongolian
+ { 0x1900, 0x194F, 93}, // Limbu
+ { 0x1950, 0x197F, 94}, // Tai Le
+ { 0x1980, 0x19DF, 95}, // New Tai Lue
+ { 0x19E0, 0x19FF, 80}, // Khmer Symbols
+ { 0x1A00, 0x1A1F, 96}, // Buginese
+ { 0x1B00, 0x1B7F, 27}, // Balinese
+ { 0x1B80, 0x1BBF, 112}, // Sundanese
+ { 0x1C00, 0x1C4F, 113}, // Lepcha
+ { 0x1C50, 0x1C7F, 114}, // Ol Chiki
+ { 0x1D00, 0x1D7F, 4}, // Phonetic Extensions
+ { 0x1D80, 0x1DBF, 4}, // Phonetic Extensions Supplement
+ { 0x1DC0, 0x1DFF, 6}, // Combining Diacritical Marks Supplement
+ { 0x1E00, 0x1EFF, 29}, // Latin Extended Additional
+ { 0x1F00, 0x1FFF, 30}, // Greek Extended
+ { 0x2000, 0x206F, 31}, // General Punctuation
+ { 0x2070, 0x209F, 32}, // Superscripts And Subscripts
+ { 0x20A0, 0x20CF, 33}, // Currency Symbols
+ { 0x20D0, 0x20FF, 34}, // Combining Diacritical Marks For Symbols
+ { 0x2100, 0x214F, 35}, // Letterlike Symbols
+ { 0x2150, 0x218F, 36}, // Number Forms
+ { 0x2190, 0x21FF, 37}, // Arrows
+ { 0x2200, 0x22FF, 38}, // Mathematical Operators
+ { 0x2300, 0x23FF, 39}, // Miscellaneous Technical
+ { 0x2400, 0x243F, 40}, // Control Pictures
+ { 0x2440, 0x245F, 41}, // Optical Character Recognition
+ { 0x2460, 0x24FF, 42}, // Enclosed Alphanumerics
+ { 0x2500, 0x257F, 43}, // Box Drawing
+ { 0x2580, 0x259F, 44}, // Block Elements
+ { 0x25A0, 0x25FF, 45}, // Geometric Shapes
+ { 0x2600, 0x26FF, 46}, // Miscellaneous Symbols
+ { 0x2700, 0x27BF, 47}, // Dingbats
+ { 0x27C0, 0x27EF, 38}, // Miscellaneous Mathematical Symbols-A
+ { 0x27F0, 0x27FF, 37}, // Supplemental Arrows-A
+ { 0x2800, 0x28FF, 82}, // Braille Patterns
+ { 0x2900, 0x297F, 37}, // Supplemental Arrows-B
+ { 0x2980, 0x29FF, 38}, // Miscellaneous Mathematical Symbols-B
+ { 0x2A00, 0x2AFF, 38}, // Supplemental Mathematical Operators
+ { 0x2B00, 0x2BFF, 37}, // Miscellaneous Symbols and Arrows
+ { 0x2C00, 0x2C5F, 97}, // Glagolitic
+ { 0x2C60, 0x2C7F, 29}, // Latin Extended-C
+ { 0x2C80, 0x2CFF, 8}, // Coptic
+ { 0x2D00, 0x2D2F, 26}, // Georgian Supplement
+ { 0x2D30, 0x2D7F, 98}, // Tifinagh
+ { 0x2D80, 0x2DDF, 75}, // Ethiopic Extended
+ { 0x2DE0, 0x2DFF, 9}, // Cyrillic Extended-A
+ { 0x2E00, 0x2E7F, 31}, // Supplemental Punctuation
+ { 0x2E80, 0x2EFF, 59}, // CJK Radicals Supplement
+ { 0x2F00, 0x2FDF, 59}, // Kangxi Radicals
+ { 0x2FF0, 0x2FFF, 59}, // Ideographic Description Characters
+ { 0x3000, 0x303F, 48}, // CJK Symbols And Punctuation
+ { 0x3040, 0x309F, 49}, // Hiragana
+ { 0x30A0, 0x30FF, 50}, // Katakana
+ { 0x3100, 0x312F, 51}, // Bopomofo
+ { 0x3130, 0x318F, 52}, // Hangul Compatibility Jamo
+ { 0x3190, 0x319F, 59}, // Kanbun
+ { 0x31A0, 0x31BF, 51}, // Bopomofo Extended
+ { 0x31C0, 0x31EF, 61}, // CJK Strokes
+ { 0x31F0, 0x31FF, 50}, // Katakana Phonetic Extensions
+ { 0x3200, 0x32FF, 54}, // Enclosed CJK Letters And Months
+ { 0x3300, 0x33FF, 55}, // CJK Compatibility
+ { 0x3400, 0x4DBF, 59}, // CJK Unified Ideographs Extension A
+ { 0x4DC0, 0x4DFF, 99}, // Yijing Hexagram Symbols
+ { 0x4E00, 0x9FFF, 59}, // CJK Unified Ideographs
+ { 0xA000, 0xA48F, 83}, // Yi Syllables
+ { 0xA490, 0xA4CF, 83}, // Yi Radicals
+ { 0xA500, 0xA63F, 12}, // Vai
+ { 0xA640, 0xA69F, 9}, // Cyrillic Extended-B
+ { 0xA700, 0xA71F, 5}, // Modifier Tone Letters
+ { 0xA720, 0xA7FF, 29}, // Latin Extended-D
+ { 0xA800, 0xA82F, 100}, // Syloti Nagri
+ { 0xA840, 0xA87F, 53}, // Phags-pa
+ { 0xA880, 0xA8DF, 115}, // Saurashtra
+ { 0xA900, 0xA92F, 116}, // Kayah Li
+ { 0xA930, 0xA95F, 117}, // Rejang
+ { 0xAA00, 0xAA5F, 118}, // Cham
+ { 0xAC00, 0xD7AF, 56}, // Hangul Syllables
+ { 0xD800, 0xDFFF, 57}, // Non-Plane 0 *
+ { 0xE000, 0xF8FF, 60}, // Private Use Area (plane 0)
+ { 0xF900, 0xFAFF, 61}, // CJK Compatibility Ideographs
+ { 0xFB00, 0xFB4F, 62}, // Alphabetic Presentation Forms
+ { 0xFB50, 0xFDFF, 63}, // Arabic Presentation Forms-A
+ { 0xFE00, 0xFE0F, 91}, // Variation Selectors
+ { 0xFE10, 0xFE1F, 65}, // Vertical Forms
+ { 0xFE20, 0xFE2F, 64}, // Combining Half Marks
+ { 0xFE30, 0xFE4F, 65}, // CJK Compatibility Forms
+ { 0xFE50, 0xFE6F, 66}, // Small Form Variants
+ { 0xFE70, 0xFEFF, 67}, // Arabic Presentation Forms-B
+ { 0xFF00, 0xFFEF, 68}, // Halfwidth And Fullwidth Forms
+ { 0xFFF0, 0xFFFF, 69}, // Specials
+ { 0x10000, 0x1007F, 101}, // Linear B Syllabary
+ { 0x10080, 0x100FF, 101}, // Linear B Ideograms
+ { 0x10100, 0x1013F, 101}, // Aegean Numbers
+ { 0x10140, 0x1018F, 102}, // Ancient Greek Numbers
+ { 0x10190, 0x101CF, 119}, // Ancient Symbols
+ { 0x101D0, 0x101FF, 120}, // Phaistos Disc
+ { 0x10280, 0x1029F, 121}, // Lycian
+ { 0x102A0, 0x102DF, 121}, // Carian
+ { 0x10300, 0x1032F, 85}, // Old Italic
+ { 0x10330, 0x1034F, 86}, // Gothic
+ { 0x10380, 0x1039F, 103}, // Ugaritic
+ { 0x103A0, 0x103DF, 104}, // Old Persian
+ { 0x10400, 0x1044F, 87}, // Deseret
+ { 0x10450, 0x1047F, 105}, // Shavian
+ { 0x10480, 0x104AF, 106}, // Osmanya
+ { 0x10800, 0x1083F, 107}, // Cypriot Syllabary
+ { 0x10900, 0x1091F, 58}, // Phoenician
+ { 0x10920, 0x1093F, 121}, // Lydian
+ { 0x10A00, 0x10A5F, 108}, // Kharoshthi
+ { 0x12000, 0x123FF, 110}, // Cuneiform
+ { 0x12400, 0x1247F, 110}, // Cuneiform Numbers and Punctuation
+ { 0x1D000, 0x1D0FF, 88}, // Byzantine Musical Symbols
+ { 0x1D100, 0x1D1FF, 88}, // Musical Symbols
+ { 0x1D200, 0x1D24F, 88}, // Ancient Greek Musical Notation
+ { 0x1D300, 0x1D35F, 109}, // Tai Xuan Jing Symbols
+ { 0x1D360, 0x1D37F, 111}, // Counting Rod Numerals
+ { 0x1D400, 0x1D7FF, 89}, // Mathematical Alphanumeric Symbols
+ { 0x1F000, 0x1F02F, 122}, // Mahjong Tiles
+ { 0x1F030, 0x1F09F, 122}, // Domino Tiles
+ { 0x20000, 0x2A6DF, 59}, // CJK Unified Ideographs Extension B
+ { 0x2F800, 0x2FA1F, 61}, // CJK Compatibility Ideographs Supplement
+ { 0xE0000, 0xE007F, 92}, // Tags
+ { 0xE0100, 0xE01EF, 91}, // Variation Selectors Supplement
+ { 0xF0000, 0xFFFFD, 90}, // Private Use (plane 15)
+ {0x100000, 0x10FFFD, 90}, // Private Use (plane 16)
+};
+
+static unsigned int hb_get_unicode_range_bit (hb_codepoint_t cp)
+{
+
+}
+
+} /* namespace OT */
+
+#endif /* HB_OT_OS2_UNICODE_RANGES_HH */
More information about the HarfBuzz
mailing list