[HarfBuzz] harfbuzz: Branch 'master' - 5 commits
Behdad Esfahbod
behdad at kemper.freedesktop.org
Wed Jul 16 12:37:03 PDT 2014
src/hb-buffer-private.hh | 1
src/hb-buffer.cc | 79 ++++++++-
src/hb-buffer.h | 23 ++
src/hb-ot-layout-gsubgpos-private.hh | 25 --
src/hb-utf-private.hh | 306 ++++++++++++++++++-----------------
test/api/test-buffer.c | 65 +++++++
6 files changed, 328 insertions(+), 171 deletions(-)
New commits:
commit 976c8f455221eb599d1c446eafd88d51d7d2aa65
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 16 15:34:20 2014 -0400
New API: hb_buffer_[sg]et_replacement_codepoint()
With this change, we now by default replace broken UTF-8/16/32 bits
with U+FFFD. This can be changed by calling new API on the buffer.
Previously the replacement value used to be (hb_codepoint_t)-1.
Note that hb_buffer_clear_contents() does NOT reset the replacement
character.
See discussion here:
https://github.com/behdad/harfbuzz/commit/6f13b6d62daae4989e3cc2fe4b168e5c59650964
New API:
hb_buffer_set_replacement_codepoint()
hb_buffer_get_replacement_codepoint()
diff --git a/src/hb-buffer-private.hh b/src/hb-buffer-private.hh
index 3a2b9ab..5eccd3c 100644
--- a/src/hb-buffer-private.hh
+++ b/src/hb-buffer-private.hh
@@ -52,6 +52,7 @@ struct hb_buffer_t {
hb_unicode_funcs_t *unicode; /* Unicode functions */
hb_segment_properties_t props; /* Script, language, direction */
hb_buffer_flags_t flags; /* BOT / EOT / etc. */
+ hb_codepoint_t replacement; /* U+FFFD or something else. */
/* Buffer contents */
diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc
index 242cded..2377ba4 100644
--- a/src/hb-buffer.cc
+++ b/src/hb-buffer.cc
@@ -178,6 +178,7 @@ hb_buffer_t::reset (void)
hb_unicode_funcs_destroy (unicode);
unicode = hb_unicode_funcs_get_default ();
+ replacement = HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT;
clear ();
}
@@ -703,6 +704,7 @@ hb_buffer_get_empty (void)
const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_nil),
HB_SEGMENT_PROPERTIES_DEFAULT,
HB_BUFFER_FLAG_DEFAULT,
+ HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT,
HB_BUFFER_CONTENT_TYPE_INVALID,
true, /* in_error */
@@ -1048,6 +1050,42 @@ hb_buffer_get_flags (hb_buffer_t *buffer)
/**
+ * hb_buffer_set_replacement_codepoint:
+ * @buffer: a buffer.
+ * @replacement:
+ *
+ *
+ *
+ * Since: 1.0
+ **/
+void
+hb_buffer_set_replacement_codepoint (hb_buffer_t *buffer,
+ hb_codepoint_t replacement)
+{
+ if (unlikely (hb_object_is_inert (buffer)))
+ return;
+
+ buffer->replacement = replacement;
+}
+
+/**
+ * hb_buffer_get_replacement_codepoint:
+ * @buffer: a buffer.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 1.0
+ **/
+hb_codepoint_t
+hb_buffer_get_replacement_codepoint (hb_buffer_t *buffer)
+{
+ return buffer->replacement;
+}
+
+
+/**
* hb_buffer_reset:
* @buffer: a buffer.
*
@@ -1299,6 +1337,7 @@ hb_buffer_add_utf (hb_buffer_t *buffer,
int item_length)
{
typedef hb_utf_t<T, true> utf_t;
+ const hb_codepoint_t replacement = buffer->replacement;
assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE ||
(!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID));
@@ -1330,7 +1369,7 @@ hb_buffer_add_utf (hb_buffer_t *buffer,
while (start < prev && buffer->context_len[0] < buffer->CONTEXT_LENGTH)
{
hb_codepoint_t u;
- prev = utf_t::prev (prev, start, &u);
+ prev = utf_t::prev (prev, start, &u, replacement);
buffer->context[0][buffer->context_len[0]++] = u;
}
}
@@ -1341,7 +1380,7 @@ hb_buffer_add_utf (hb_buffer_t *buffer,
{
hb_codepoint_t u;
const T *old_next = next;
- next = utf_t::next (next, end, &u);
+ next = utf_t::next (next, end, &u, replacement);
buffer->add (u, old_next - (const T *) text);
}
@@ -1351,7 +1390,7 @@ hb_buffer_add_utf (hb_buffer_t *buffer,
while (next < end && buffer->context_len[1] < buffer->CONTEXT_LENGTH)
{
hb_codepoint_t u;
- next = utf_t::next (next, end, &u);
+ next = utf_t::next (next, end, &u, replacement);
buffer->context[1][buffer->context_len[1]++] = u;
}
diff --git a/src/hb-buffer.h b/src/hb-buffer.h
index 777c3d9..7b0c920 100644
--- a/src/hb-buffer.h
+++ b/src/hb-buffer.h
@@ -186,12 +186,25 @@ hb_buffer_flags_t
hb_buffer_get_flags (hb_buffer_t *buffer);
+
+#define HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT 0xFFFDu
+
+/* Sets codepoint used to replace invalid UTF-8/16/32 entries.
+ * Default is 0xFFFDu. */
+void
+hb_buffer_set_replacement_codepoint (hb_buffer_t *buffer,
+ hb_codepoint_t replacement);
+
+hb_codepoint_t
+hb_buffer_get_replacement_codepoint (hb_buffer_t *buffer);
+
+
/* Resets the buffer. Afterwards it's as if it was just created,
* except that it has a larger buffer allocated perhaps... */
void
hb_buffer_reset (hb_buffer_t *buffer);
-/* Like reset, but does NOT clear unicode_funcs. */
+/* Like reset, but does NOT clear unicode_funcs and replacement_codepoint. */
void
hb_buffer_clear_contents (hb_buffer_t *buffer);
diff --git a/src/hb-utf-private.hh b/src/hb-utf-private.hh
index cbacd67..68216c4 100644
--- a/src/hb-utf-private.hh
+++ b/src/hb-utf-private.hh
@@ -40,7 +40,8 @@ struct hb_utf_t<uint8_t, true>
static inline const uint8_t *
next (const uint8_t *text,
const uint8_t *end,
- hb_codepoint_t *unicode)
+ hb_codepoint_t *unicode,
+ hb_codepoint_t replacement)
{
/* Written to only accept well-formed sequences.
* Based on ideas from ICU's U8_NEXT.
@@ -101,23 +102,24 @@ struct hb_utf_t<uint8_t, true>
return text;
error:
- *unicode = -1;
+ *unicode = replacement;
return text;
}
static inline const uint8_t *
prev (const uint8_t *text,
const uint8_t *start,
- hb_codepoint_t *unicode)
+ hb_codepoint_t *unicode,
+ hb_codepoint_t replacement)
{
const uint8_t *end = text--;
while (start < text && (*text & 0xc0) == 0x80 && end - text < 4)
text--;
- if (likely (next (text, end, unicode) == end))
+ if (likely (next (text, end, unicode, replacement) == end))
return text;
- *unicode = -1;
+ *unicode = replacement;
return end - 1;
}
@@ -137,7 +139,8 @@ struct hb_utf_t<uint16_t, true>
static inline const uint16_t *
next (const uint16_t *text,
const uint16_t *end,
- hb_codepoint_t *unicode)
+ hb_codepoint_t *unicode,
+ hb_codepoint_t replacement)
{
hb_codepoint_t c = *text++;
@@ -161,14 +164,15 @@ struct hb_utf_t<uint16_t, true>
}
/* Lonely / out-of-order surrogate. */
- *unicode = -1;
+ *unicode = replacement;
return text;
}
static inline const uint16_t *
prev (const uint16_t *text,
const uint16_t *start,
- hb_codepoint_t *unicode)
+ hb_codepoint_t *unicode,
+ hb_codepoint_t replacement)
{
const uint16_t *end = text--;
hb_codepoint_t c = *text;
@@ -182,10 +186,10 @@ struct hb_utf_t<uint16_t, true>
if (likely (start < text && hb_in_range (c, 0xDC00u, 0xDFFFu)))
text--;
- if (likely (next (text, end, unicode) == end))
+ if (likely (next (text, end, unicode, replacement) == end))
return text;
- *unicode = -1;
+ *unicode = replacement;
return end - 1;
}
@@ -208,7 +212,8 @@ struct hb_utf_t<uint32_t, validate>
static inline const uint32_t *
next (const uint32_t *text,
const uint32_t *end HB_UNUSED,
- hb_codepoint_t *unicode)
+ hb_codepoint_t *unicode,
+ hb_codepoint_t replacement)
{
hb_codepoint_t c = *text++;
if (validate && unlikely (c > 0x10FFFFu || hb_in_range (c, 0xD800u, 0xDFFFu)))
@@ -217,16 +222,17 @@ struct hb_utf_t<uint32_t, validate>
return text;
error:
- *unicode = -1;
+ *unicode = replacement;
return text;
}
static inline const uint32_t *
prev (const uint32_t *text,
const uint32_t *start HB_UNUSED,
- hb_codepoint_t *unicode)
+ hb_codepoint_t *unicode,
+ hb_codepoint_t replacement)
{
- next (text - 1, text, unicode);
+ next (text - 1, text, unicode, replacement);
return text - 1;
}
diff --git a/test/api/test-buffer.c b/test/api/test-buffer.c
index 1be6931..af73c3f 100644
--- a/test/api/test-buffer.c
+++ b/test/api/test-buffer.c
@@ -374,6 +374,7 @@ test_buffer_utf8_conversion (void)
unsigned int bytes, chars, i, j, len;
b = hb_buffer_create ();
+ hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
for (i = 0; i < G_N_ELEMENTS (utf8_conversion_tests); i++)
{
@@ -388,7 +389,7 @@ test_buffer_utf8_conversion (void)
for (chars = 0; test->codepoints[chars]; chars++)
;
- hb_buffer_reset (b);
+ hb_buffer_clear_contents (b);
hb_buffer_add_utf8 (b, test->utf8, bytes, 1, bytes - 2);
glyphs = hb_buffer_get_glyph_infos (b, &len);
@@ -660,6 +661,7 @@ test_buffer_utf8_validity (void)
unsigned int i;
b = hb_buffer_create ();
+ hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
for (i = 0; i < G_N_ELEMENTS (utf8_validity_tests); i++)
{
@@ -678,7 +680,7 @@ test_buffer_utf8_validity (void)
else
segment_bytes = test->max_len;
- hb_buffer_reset (b);
+ hb_buffer_clear_contents (b);
hb_buffer_add_utf8 (b, test->utf8, text_bytes, 0, segment_bytes);
glyphs = hb_buffer_get_glyph_infos (b, &len);
@@ -718,6 +720,7 @@ test_buffer_utf16_conversion (void)
unsigned int i;
b = hb_buffer_create ();
+ hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
for (i = 0; i < G_N_ELEMENTS (utf16_conversion_tests); i++)
{
@@ -732,7 +735,7 @@ test_buffer_utf16_conversion (void)
for (chars = 0; test->codepoints[chars]; chars++)
;
- hb_buffer_reset (b);
+ hb_buffer_clear_contents (b);
hb_buffer_add_utf16 (b, test->utf16, u_len, 1, u_len - 2);
glyphs = hb_buffer_get_glyph_infos (b, &len);
@@ -752,15 +755,15 @@ typedef struct {
/* note: we skip the first and last item from utf32 when adding to buffer */
static const utf32_conversion_test_t utf32_conversion_tests[] = {
- {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, -1, -1}},
+ {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, -3, -3}},
{{0x41, 0x004D, 0x0430, 0x4E8C, 0x10302, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
- {{0x41, 0xD800, 0xDF02, 0x61}, {-1, -1}},
- {{0x41, 0xD800, 0xDF02}, {-1}},
- {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, -1}},
- {{0x41, 0xD800, 0x61, 0xDF02}, {-1, 0x61}},
- {{0x41, 0xDF00, 0x61}, {-1}},
+ {{0x41, 0xD800, 0xDF02, 0x61}, {-3, -3}},
+ {{0x41, 0xD800, 0xDF02}, {-3}},
+ {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, -3}},
+ {{0x41, 0xD800, 0x61, 0xDF02}, {-3, 0x61}},
+ {{0x41, 0xDF00, 0x61}, {-3}},
{{0x41, 0x10FFFF, 0x61}, {0x10FFFF}},
- {{0x41, 0x110000, 0x61}, {-1}},
+ {{0x41, 0x110000, 0x61}, {-3}},
{{0x41, 0x61}, {0}}
};
@@ -771,6 +774,7 @@ test_buffer_utf32_conversion (void)
unsigned int i;
b = hb_buffer_create ();
+ hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -3);
for (i = 0; i < G_N_ELEMENTS (utf32_conversion_tests); i++)
{
@@ -785,7 +789,7 @@ test_buffer_utf32_conversion (void)
for (chars = 0; test->codepoints[chars]; chars++)
;
- hb_buffer_reset (b);
+ hb_buffer_clear_contents (b);
hb_buffer_add_utf32 (b, test->utf32, u_len, 1, u_len - 2);
glyphs = hb_buffer_get_glyph_infos (b, &len);
commit bcba8b45024e1eca8be77ca2657de1dc44dbf8fb
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 16 14:59:04 2014 -0400
New API hb_buffer_add_codepoints()
Like hb_buffer_add_utf32, but doesn't do any Unicode validation.
This is like what hb_buffer_add_utf32 used to be until a couple
commits ago.
diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc
index d920552..242cded 100644
--- a/src/hb-buffer.cc
+++ b/src/hb-buffer.cc
@@ -1290,7 +1290,7 @@ hb_buffer_guess_segment_properties (hb_buffer_t *buffer)
buffer->guess_segment_properties ();
}
-template <typename T>
+template <bool validate, typename T>
static inline void
hb_buffer_add_utf (hb_buffer_t *buffer,
const T *text,
@@ -1298,7 +1298,7 @@ hb_buffer_add_utf (hb_buffer_t *buffer,
unsigned int item_offset,
int item_length)
{
- typedef hb_utf_t<T> utf_t;
+ typedef hb_utf_t<T, true> utf_t;
assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE ||
(!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID));
@@ -1377,7 +1377,7 @@ hb_buffer_add_utf8 (hb_buffer_t *buffer,
unsigned int item_offset,
int item_length)
{
- hb_buffer_add_utf (buffer, (const uint8_t *) text, text_length, item_offset, item_length);
+ hb_buffer_add_utf<true> (buffer, (const uint8_t *) text, text_length, item_offset, item_length);
}
/**
@@ -1399,7 +1399,7 @@ hb_buffer_add_utf16 (hb_buffer_t *buffer,
unsigned int item_offset,
int item_length)
{
- hb_buffer_add_utf (buffer, text, text_length, item_offset, item_length);
+ hb_buffer_add_utf<true> (buffer, text, text_length, item_offset, item_length);
}
/**
@@ -1421,7 +1421,29 @@ hb_buffer_add_utf32 (hb_buffer_t *buffer,
unsigned int item_offset,
int item_length)
{
- hb_buffer_add_utf (buffer, text, text_length, item_offset, item_length);
+ hb_buffer_add_utf<true> (buffer, text, text_length, item_offset, item_length);
+}
+
+/**
+ * hb_buffer_add_codepoints:
+ * @buffer: a buffer.
+ * @text: (array length=text_length):
+ * @text_length:
+ * @item_offset:
+ * @item_length:
+ *
+ *
+ *
+ * Since: 1.0
+ **/
+void
+hb_buffer_add_codepoints (hb_buffer_t *buffer,
+ const hb_codepoint_t *text,
+ int text_length,
+ unsigned int item_offset,
+ int item_length)
+{
+ hb_buffer_add_utf<false> (buffer, text, text_length, item_offset, item_length);
}
diff --git a/src/hb-buffer.h b/src/hb-buffer.h
index 3086851..777c3d9 100644
--- a/src/hb-buffer.h
+++ b/src/hb-buffer.h
@@ -240,6 +240,14 @@ hb_buffer_add_utf32 (hb_buffer_t *buffer,
unsigned int item_offset,
int item_length);
+/* Like add_utf32 but does NOT check for invalid Unicode codepoints. */
+void
+hb_buffer_add_codepoints (hb_buffer_t *buffer,
+ const hb_codepoint_t *text,
+ int text_length,
+ unsigned int item_offset,
+ int item_length);
+
/* Clears any new items added at the end */
hb_bool_t
commit 625dbf141a05f1ae81a7b8cbc529996370101284
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 16 14:49:55 2014 -0400
[buffer] Templatize UTF-* functions
diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc
index 76bb10c..d920552 100644
--- a/src/hb-buffer.cc
+++ b/src/hb-buffer.cc
@@ -1298,6 +1298,8 @@ hb_buffer_add_utf (hb_buffer_t *buffer,
unsigned int item_offset,
int item_length)
{
+ typedef hb_utf_t<T> utf_t;
+
assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE ||
(!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID));
@@ -1305,7 +1307,7 @@ hb_buffer_add_utf (hb_buffer_t *buffer,
return;
if (text_length == -1)
- text_length = hb_utf_strlen (text);
+ text_length = utf_t::strlen (text);
if (item_length == -1)
item_length = text_length - item_offset;
@@ -1328,7 +1330,7 @@ hb_buffer_add_utf (hb_buffer_t *buffer,
while (start < prev && buffer->context_len[0] < buffer->CONTEXT_LENGTH)
{
hb_codepoint_t u;
- prev = hb_utf_prev (prev, start, &u);
+ prev = utf_t::prev (prev, start, &u);
buffer->context[0][buffer->context_len[0]++] = u;
}
}
@@ -1339,7 +1341,7 @@ hb_buffer_add_utf (hb_buffer_t *buffer,
{
hb_codepoint_t u;
const T *old_next = next;
- next = hb_utf_next (next, end, &u);
+ next = utf_t::next (next, end, &u);
buffer->add (u, old_next - (const T *) text);
}
@@ -1349,7 +1351,7 @@ hb_buffer_add_utf (hb_buffer_t *buffer,
while (next < end && buffer->context_len[1] < buffer->CONTEXT_LENGTH)
{
hb_codepoint_t u;
- next = hb_utf_next (next, end, &u);
+ next = utf_t::next (next, end, &u);
buffer->context[1][buffer->context_len[1]++] = u;
}
diff --git a/src/hb-utf-private.hh b/src/hb-utf-private.hh
index 398f73c..cbacd67 100644
--- a/src/hb-utf-private.hh
+++ b/src/hb-utf-private.hh
@@ -29,202 +29,215 @@
#include "hb-private.hh"
+template <typename T, bool validate=true> struct hb_utf_t;
+
/* UTF-8 */
-static inline const uint8_t *
-hb_utf_next (const uint8_t *text,
- const uint8_t *end,
- hb_codepoint_t *unicode)
+template <>
+struct hb_utf_t<uint8_t, true>
{
- /* Written to only accept well-formed sequences.
- * Based on ideas from ICU's U8_NEXT.
- * Generates a -1 for each ill-formed byte. */
+ static inline const uint8_t *
+ next (const uint8_t *text,
+ const uint8_t *end,
+ hb_codepoint_t *unicode)
+ {
+ /* Written to only accept well-formed sequences.
+ * Based on ideas from ICU's U8_NEXT.
+ * Generates a -1 for each ill-formed byte. */
- hb_codepoint_t c = *text++;
+ hb_codepoint_t c = *text++;
- if (c > 0x7Fu)
- {
- if (hb_in_range (c, 0xC2u, 0xDFu)) /* Two-byte */
+ if (c > 0x7Fu)
{
- unsigned int t1;
- if (likely (text < end &&
- (t1 = text[0] - 0x80u) <= 0x3Fu))
+ if (hb_in_range (c, 0xC2u, 0xDFu)) /* Two-byte */
{
- c = ((c&0x1Fu)<<6) | t1;
- text++;
+ unsigned int t1;
+ if (likely (text < end &&
+ (t1 = text[0] - 0x80u) <= 0x3Fu))
+ {
+ c = ((c&0x1Fu)<<6) | t1;
+ text++;
+ }
+ else
+ goto error;
}
- else
- goto error;
- }
- else if (hb_in_range (c, 0xE0u, 0xEFu)) /* Three-byte */
- {
- unsigned int t1, t2;
- if (likely (1 < end - text &&
- (t1 = text[0] - 0x80u) <= 0x3Fu &&
- (t2 = text[1] - 0x80u) <= 0x3Fu))
+ else if (hb_in_range (c, 0xE0u, 0xEFu)) /* Three-byte */
{
- c = ((c&0xFu)<<12) | (t1<<6) | t2;
- if (unlikely (c < 0x0800u || hb_in_range (c, 0xD800u, 0xDFFFu)))
+ unsigned int t1, t2;
+ if (likely (1 < end - text &&
+ (t1 = text[0] - 0x80u) <= 0x3Fu &&
+ (t2 = text[1] - 0x80u) <= 0x3Fu))
+ {
+ c = ((c&0xFu)<<12) | (t1<<6) | t2;
+ if (unlikely (c < 0x0800u || hb_in_range (c, 0xD800u, 0xDFFFu)))
+ goto error;
+ text += 2;
+ }
+ else
goto error;
- text += 2;
}
- else
- goto error;
- }
- else if (hb_in_range (c, 0xF0u, 0xF4u)) /* Four-byte */
- {
- unsigned int t1, t2, t3;
- if (likely (2 < end - text &&
- (t1 = text[0] - 0x80u) <= 0x3Fu &&
- (t2 = text[1] - 0x80u) <= 0x3Fu &&
- (t3 = text[2] - 0x80u) <= 0x3Fu))
+ else if (hb_in_range (c, 0xF0u, 0xF4u)) /* Four-byte */
{
- c = ((c&0x7u)<<18) | (t1<<12) | (t2<<6) | t3;
- if (unlikely (!hb_in_range (c, 0x10000u, 0x10FFFFu)))
+ unsigned int t1, t2, t3;
+ if (likely (2 < end - text &&
+ (t1 = text[0] - 0x80u) <= 0x3Fu &&
+ (t2 = text[1] - 0x80u) <= 0x3Fu &&
+ (t3 = text[2] - 0x80u) <= 0x3Fu))
+ {
+ c = ((c&0x7u)<<18) | (t1<<12) | (t2<<6) | t3;
+ if (unlikely (!hb_in_range (c, 0x10000u, 0x10FFFFu)))
+ goto error;
+ text += 3;
+ }
+ else
goto error;
- text += 3;
}
else
goto error;
}
- else
- goto error;
- }
-
- *unicode = c;
- return text;
-
-error:
- *unicode = -1;
- return text;
-}
-static inline const uint8_t *
-hb_utf_prev (const uint8_t *text,
- const uint8_t *start,
- hb_codepoint_t *unicode)
-{
- const uint8_t *end = text--;
- while (start < text && (*text & 0xc0) == 0x80 && end - text < 4)
- text--;
+ *unicode = c;
+ return text;
- if (likely (hb_utf_next (text, end, unicode) == end))
+ error:
+ *unicode = -1;
return text;
+ }
- *unicode = -1;
- return end - 1;
-}
+ static inline const uint8_t *
+ prev (const uint8_t *text,
+ const uint8_t *start,
+ hb_codepoint_t *unicode)
+ {
+ const uint8_t *end = text--;
+ while (start < text && (*text & 0xc0) == 0x80 && end - text < 4)
+ text--;
+ if (likely (next (text, end, unicode) == end))
+ return text;
-static inline unsigned int
-hb_utf_strlen (const uint8_t *text)
-{
- return strlen ((const char *) text);
-}
+ *unicode = -1;
+ return end - 1;
+ }
+
+ static inline unsigned int
+ strlen (const uint8_t *text)
+ {
+ return ::strlen ((const char *) text);
+ }
+};
/* UTF-16 */
-static inline const uint16_t *
-hb_utf_next (const uint16_t *text,
- const uint16_t *end,
- hb_codepoint_t *unicode)
+template <>
+struct hb_utf_t<uint16_t, true>
{
- hb_codepoint_t c = *text++;
-
- if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu)))
+ static inline const uint16_t *
+ next (const uint16_t *text,
+ const uint16_t *end,
+ hb_codepoint_t *unicode)
{
- *unicode = c;
+ hb_codepoint_t c = *text++;
+
+ if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu)))
+ {
+ *unicode = c;
+ return text;
+ }
+
+ if (likely (hb_in_range (c, 0xD800u, 0xDBFFu)))
+ {
+ /* High-surrogate in c */
+ hb_codepoint_t l;
+ if (text < end && ((l = *text), likely (hb_in_range (l, 0xDC00u, 0xDFFFu))))
+ {
+ /* Low-surrogate in l */
+ *unicode = (c << 10) + l - ((0xD800u << 10) - 0x10000u + 0xDC00u);
+ text++;
+ return text;
+ }
+ }
+
+ /* Lonely / out-of-order surrogate. */
+ *unicode = -1;
return text;
}
- if (likely (hb_in_range (c, 0xD800u, 0xDBFFu)))
+ static inline const uint16_t *
+ prev (const uint16_t *text,
+ const uint16_t *start,
+ hb_codepoint_t *unicode)
{
- /* High-surrogate in c */
- hb_codepoint_t l;
- if (text < end && ((l = *text), likely (hb_in_range (l, 0xDC00u, 0xDFFFu))))
+ const uint16_t *end = text--;
+ hb_codepoint_t c = *text;
+
+ if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu)))
{
- /* Low-surrogate in l */
- *unicode = (c << 10) + l - ((0xD800u << 10) - 0x10000u + 0xDC00u);
- text++;
- return text;
+ *unicode = c;
+ return text;
}
- }
- /* Lonely / out-of-order surrogate. */
- *unicode = -1;
- return text;
-}
+ if (likely (start < text && hb_in_range (c, 0xDC00u, 0xDFFFu)))
+ text--;
-static inline const uint16_t *
-hb_utf_prev (const uint16_t *text,
- const uint16_t *start,
- hb_codepoint_t *unicode)
-{
- const uint16_t *end = text--;
- hb_codepoint_t c = *text;
+ if (likely (next (text, end, unicode) == end))
+ return text;
- if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu)))
- {
- *unicode = c;
- return text;
+ *unicode = -1;
+ return end - 1;
}
- if (likely (start < text && hb_in_range (c, 0xDC00u, 0xDFFFu)))
- text--;
- if (likely (hb_utf_next (text, end, unicode) == end))
- return text;
+ static inline unsigned int
+ strlen (const uint16_t *text)
+ {
+ unsigned int l = 0;
+ while (*text++) l++;
+ return l;
+ }
+};
- *unicode = -1;
- return end - 1;
-}
+/* UTF-32 */
-static inline unsigned int
-hb_utf_strlen (const uint16_t *text)
+template <bool validate>
+struct hb_utf_t<uint32_t, validate>
{
- unsigned int l = 0;
- while (*text++) l++;
- return l;
-}
-
+ static inline const uint32_t *
+ next (const uint32_t *text,
+ const uint32_t *end HB_UNUSED,
+ hb_codepoint_t *unicode)
+ {
+ hb_codepoint_t c = *text++;
+ if (validate && unlikely (c > 0x10FFFFu || hb_in_range (c, 0xD800u, 0xDFFFu)))
+ goto error;
+ *unicode = c;
+ return text;
-/* UTF-32 */
+ error:
+ *unicode = -1;
+ return text;
+ }
-static inline const uint32_t *
-hb_utf_next (const uint32_t *text,
- const uint32_t *end HB_UNUSED,
- hb_codepoint_t *unicode)
-{
- hb_codepoint_t c = *text++;
- if (unlikely (c > 0x10FFFFu || hb_in_range (c, 0xD800u, 0xDFFFu)))
- goto error;
- *unicode = c;
- return text;
-
-error:
- *unicode = -1;
- return text;
-}
-
-static inline const uint32_t *
-hb_utf_prev (const uint32_t *text,
- const uint32_t *start HB_UNUSED,
- hb_codepoint_t *unicode)
-{
- hb_utf_next (text - 1, text, unicode);
- return text - 1;
-}
+ static inline const uint32_t *
+ prev (const uint32_t *text,
+ const uint32_t *start HB_UNUSED,
+ hb_codepoint_t *unicode)
+ {
+ next (text - 1, text, unicode);
+ return text - 1;
+ }
-static inline unsigned int
-hb_utf_strlen (const uint32_t *text)
-{
- unsigned int l = 0;
- while (*text++) l++;
- return l;
-}
+ static inline unsigned int
+ strlen (const uint32_t *text)
+ {
+ unsigned int l = 0;
+ while (*text++) l++;
+ return l;
+ }
+};
#endif /* HB_UTF_PRIVATE_HH */
commit e634fed4285ce440d277345727ed01757df6d779
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 16 14:17:26 2014 -0400
[buffer] Validate UTF-32 input
Same as what we do for UTF-8 and UTF-16.
diff --git a/src/hb-utf-private.hh b/src/hb-utf-private.hh
index a4c6236..398f73c 100644
--- a/src/hb-utf-private.hh
+++ b/src/hb-utf-private.hh
@@ -198,7 +198,14 @@ hb_utf_next (const uint32_t *text,
const uint32_t *end HB_UNUSED,
hb_codepoint_t *unicode)
{
- *unicode = *text++;
+ hb_codepoint_t c = *text++;
+ if (unlikely (c > 0x10FFFFu || hb_in_range (c, 0xD800u, 0xDFFFu)))
+ goto error;
+ *unicode = c;
+ return text;
+
+error:
+ *unicode = -1;
return text;
}
@@ -207,8 +214,8 @@ hb_utf_prev (const uint32_t *text,
const uint32_t *start HB_UNUSED,
hb_codepoint_t *unicode)
{
- *unicode = *--text;
- return text;
+ hb_utf_next (text - 1, text, unicode);
+ return text - 1;
}
static inline unsigned int
diff --git a/test/api/test-buffer.c b/test/api/test-buffer.c
index 1956c92..1be6931 100644
--- a/test/api/test-buffer.c
+++ b/test/api/test-buffer.c
@@ -744,6 +744,60 @@ test_buffer_utf16_conversion (void)
hb_buffer_destroy (b);
}
+
+typedef struct {
+ const uint32_t utf32[8];
+ const uint32_t codepoints[8];
+} utf32_conversion_test_t;
+
+/* note: we skip the first and last item from utf32 when adding to buffer */
+static const utf32_conversion_test_t utf32_conversion_tests[] = {
+ {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, -1, -1}},
+ {{0x41, 0x004D, 0x0430, 0x4E8C, 0x10302, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
+ {{0x41, 0xD800, 0xDF02, 0x61}, {-1, -1}},
+ {{0x41, 0xD800, 0xDF02}, {-1}},
+ {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, -1}},
+ {{0x41, 0xD800, 0x61, 0xDF02}, {-1, 0x61}},
+ {{0x41, 0xDF00, 0x61}, {-1}},
+ {{0x41, 0x10FFFF, 0x61}, {0x10FFFF}},
+ {{0x41, 0x110000, 0x61}, {-1}},
+ {{0x41, 0x61}, {0}}
+};
+
+static void
+test_buffer_utf32_conversion (void)
+{
+ hb_buffer_t *b;
+ unsigned int i;
+
+ b = hb_buffer_create ();
+
+ for (i = 0; i < G_N_ELEMENTS (utf32_conversion_tests); i++)
+ {
+ const utf32_conversion_test_t *test = &utf32_conversion_tests[i];
+ unsigned int u_len, chars, j, len;
+ hb_glyph_info_t *glyphs;
+
+ g_test_message ("UTF-32 test #%d", i);
+
+ for (u_len = 0; test->utf32[u_len]; u_len++)
+ ;
+ for (chars = 0; test->codepoints[chars]; chars++)
+ ;
+
+ hb_buffer_reset (b);
+ hb_buffer_add_utf32 (b, test->utf32, u_len, 1, u_len - 2);
+
+ glyphs = hb_buffer_get_glyph_infos (b, &len);
+ g_assert_cmpint (len, ==, chars);
+ for (j = 0; j < chars; j++)
+ g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
+ }
+
+ hb_buffer_destroy (b);
+}
+
+
static void
test_empty (hb_buffer_t *b)
{
@@ -810,6 +864,7 @@ main (int argc, char **argv)
hb_test_add (test_buffer_utf8_conversion);
hb_test_add (test_buffer_utf8_validity);
hb_test_add (test_buffer_utf16_conversion);
+ hb_test_add (test_buffer_utf32_conversion);
hb_test_add (test_buffer_empty);
return hb_test_run();
commit b98c5db32d15fcfb27ce2f6737203ce1ad124319
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Jul 16 13:44:01 2014 -0400
Minor refactoring
diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index 470c353..546ff4b 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -349,11 +349,7 @@ struct hb_apply_context_t
may_skip (const hb_apply_context_t *c,
const hb_glyph_info_t &info) const
{
- unsigned int property;
-
- property = _hb_glyph_info_get_glyph_props (&info);
-
- if (!c->match_properties (info.codepoint, property, lookup_props))
+ if (!c->check_glyph_property (&info, lookup_props))
return SKIP_YES;
if (unlikely (_hb_glyph_info_is_default_ignorable (&info) &&
@@ -537,10 +533,12 @@ struct hb_apply_context_t
}
inline bool
- match_properties (hb_codepoint_t glyph,
- unsigned int glyph_props,
- unsigned int lookup_props) const
+ check_glyph_property (const hb_glyph_info_t *info,
+ unsigned int lookup_props) const
{
+ hb_codepoint_t glyph = info->codepoint;
+ unsigned int glyph_props = _hb_glyph_info_get_glyph_props (info);
+
/* Not covered, if, for example, glyph class is ligature and
* lookup_props includes LookupFlags::IgnoreLigatures
*/
@@ -553,17 +551,6 @@ struct hb_apply_context_t
return true;
}
- inline bool
- check_glyph_property (hb_glyph_info_t *info,
- unsigned int lookup_props) const
- {
- unsigned int property;
-
- property = _hb_glyph_info_get_glyph_props (info);
-
- return match_properties (info->codepoint, property, lookup_props);
- }
-
inline void _set_glyph_props (hb_codepoint_t glyph_index,
unsigned int class_guess = 0,
bool ligature = false,
More information about the HarfBuzz
mailing list