[HarfBuzz] harfbuzz: Branch 'master' - 2 commits

Behdad Esfahbod behdad at kemper.freedesktop.org
Thu Apr 11 13:31:17 PDT 2013


 src/hb-old/harfbuzz-arabic.c   |   44 ++++++++++++++++++++---------------------
 src/hb-old/harfbuzz-external.h |   18 ++++++++--------
 src/hb-old/harfbuzz-greek.c    |    2 -
 src/hb-old/harfbuzz-hebrew.c   |    2 -
 src/hb-old/harfbuzz-indic.cpp  |   10 ++++-----
 src/hb-old/harfbuzz-shaper.cpp |    6 +++--
 src/hb-old/harfbuzz-shaper.h   |    1 
 util/options.cc                |    1 
 util/options.hh                |    2 +
 util/shape-consumer.hh         |   15 ++++++++-----
 10 files changed, 55 insertions(+), 46 deletions(-)

New commits:
commit 50067e280f381918b8e90b26df9e7bf20f98f0bd
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Thu Apr 11 16:31:01 2013 -0400

    [util] Add --num-iterations
    
    Useful for profiling shapers.

diff --git a/util/options.cc b/util/options.cc
index 5e57548..66b5e15 100644
--- a/util/options.cc
+++ b/util/options.cc
@@ -276,6 +276,7 @@ shape_options_t::add_options (option_parser_t *parser)
     {"preserve-default-ignorables",0, 0, G_OPTION_ARG_NONE,	&this->preserve_default_ignorables,	"Preserve Default-Ignorable characters",	NULL},
     {"utf8-clusters",	0, 0, G_OPTION_ARG_NONE,	&this->utf8_clusters,		"Use UTF8 byte indices, not char indices",	NULL},
     {"normalize-glyphs",0, 0, G_OPTION_ARG_NONE,	&this->normalize_glyphs,	"Rearrange glyph clusters in nominal order",	NULL},
+    {"num-iterations",	0, 0, G_OPTION_ARG_INT,		&this->num_iterations,		"Run shaper N times (default: 1)",	"N"},
     {NULL}
   };
   parser->add_group (entries,
diff --git a/util/options.hh b/util/options.hh
index 35ea0bc..9302427 100644
--- a/util/options.hh
+++ b/util/options.hh
@@ -150,6 +150,7 @@ struct shape_options_t : option_group_t
     shapers = NULL;
     utf8_clusters = false;
     normalize_glyphs = false;
+    num_iterations = 1;
 
     add_options (parser);
   }
@@ -234,6 +235,7 @@ struct shape_options_t : option_group_t
   char **shapers;
   hb_bool_t utf8_clusters;
   hb_bool_t normalize_glyphs;
+  unsigned int num_iterations;
 };
 
 
diff --git a/util/shape-consumer.hh b/util/shape-consumer.hh
index bf1a22d..2b86a3c 100644
--- a/util/shape-consumer.hh
+++ b/util/shape-consumer.hh
@@ -51,14 +51,17 @@ struct shape_consumer_t
   {
     output.new_line ();
 
-    shaper.populate_buffer (buffer, text, text_len, text_before, text_after);
     output.consume_text (buffer, text, text_len, shaper.utf8_clusters);
 
-    if (!shaper.shape (font, buffer)) {
-      failed = true;
-      hb_buffer_set_length (buffer, 0);
-      output.shape_failed (buffer, text, text_len, shaper.utf8_clusters);
-      return;
+    for (unsigned int n = shaper.num_iterations; n; n--)
+    {
+      shaper.populate_buffer (buffer, text, text_len, text_before, text_after);
+      if (!shaper.shape (font, buffer)) {
+	failed = true;
+	hb_buffer_set_length (buffer, 0);
+	output.shape_failed (buffer, text, text_len, shaper.utf8_clusters);
+	return;
+      }
     }
 
     output.consume_glyphs (buffer, text, text_len, shaper.utf8_clusters);
commit 08677c250767bfc2c0dd1fc934dea92984741291
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Thu Apr 11 14:45:58 2013 -0400

    [old] Speed up Unicode properties access in hb-old shaper
    
    Just to be sure it's not punishing the old shaper performance
    numbers.  Doesn't seem to have a measurable effect.

diff --git a/src/hb-old/harfbuzz-arabic.c b/src/hb-old/harfbuzz-arabic.c
index 51f839a..206411f 100644
--- a/src/hb-old/harfbuzz-arabic.c
+++ b/src/hb-old/harfbuzz-arabic.c
@@ -237,13 +237,13 @@ static const unsigned char arabic_group[0x150] = {
     ArabicNone, Zain, Kaph, Fe,
 };
 
-static ArabicGroup arabicGroup(unsigned short uc)
+static ArabicGroup arabicGroup(hb_unicode_funcs_t *ufuncs, unsigned short uc)
 {
     if (uc >= 0x0600 && uc < 0x750)
         return (ArabicGroup) arabic_group[uc-0x600];
     else if (uc == 0x200d)
         return Center;
-    else if (HB_GetUnicodeCharCategory(uc) == HB_Separator_Space)
+    else if (HB_GetUnicodeCharCategory(ufuncs, uc) == HB_Separator_Space)
         return ArabicSpace;
     else
         return ArabicNone;
@@ -375,14 +375,14 @@ This seems to imply that we have at most one kashida point per arabic word.
 
 */
 
-static void getArabicProperties(const unsigned short *chars, int len, HB_ArabicProperties *properties)
+static void getArabicProperties(hb_unicode_funcs_t *ufuncs, const unsigned short *chars, int len, HB_ArabicProperties *properties)
 {
 /*     qDebug("arabicSyriacOpenTypeShape: properties:"); */
     int lastPos = 0;
     int lastGroup = ArabicNone;
     int i = 0;
 
-    ArabicGroup group = arabicGroup(chars[0]);
+    ArabicGroup group = arabicGroup(ufuncs, chars[0]);
     Joining j = joining_for_group[group];
     ArabicShape shape = joining_table[XIsolated][j].form2;
     properties[0].justification = HB_NoJustification;
@@ -391,7 +391,7 @@ static void getArabicProperties(const unsigned short *chars, int len, HB_ArabicP
         /* #### fix handling for spaces and punktuation */
         properties[i].justification = HB_NoJustification;
 
-        group = arabicGroup(chars[i]);
+        group = arabicGroup(ufuncs, chars[i]);
         j = joining_for_group[group];
 
         if (j == JTransparent) {
@@ -460,7 +460,7 @@ static void getArabicProperties(const unsigned short *chars, int len, HB_ArabicP
 
         case Yeh:
         case Reh:
-            if (properties[lastPos].shape == XMedial && arabicGroup(chars[lastPos]) == Beh)
+            if (properties[lastPos].shape == XMedial && arabicGroup(ufuncs, chars[lastPos]) == Beh)
                 properties[lastPos-1].justification = HB_Arabic_BaRa;
             break;
 
@@ -504,7 +504,7 @@ static Joining getNkoJoining(unsigned short uc)
     return JNone;
 }
 
-static void getNkoProperties(const unsigned short *chars, int len, HB_ArabicProperties *properties)
+static void getNkoProperties(hb_unicode_funcs_t *ufuncs, const unsigned short *chars, int len, HB_ArabicProperties *properties)
 {
     int lastPos = 0;
     int i = 0;
@@ -514,7 +514,7 @@ static void getNkoProperties(const unsigned short *chars, int len, HB_ArabicProp
     properties[0].justification = HB_NoJustification;
 
     for (i = 1; i < len; ++i) {
-        properties[i].justification = (HB_GetUnicodeCharCategory(chars[i]) == HB_Separator_Space) ?
+        properties[i].justification = (HB_GetUnicodeCharCategory(ufuncs, chars[i]) == HB_Separator_Space) ?
                                       ArabicSpace : ArabicNone;
 
         j = getNkoJoining(chars[i]);
@@ -854,13 +854,13 @@ static int getShape(hb_uint8 cell, int shape)
 /*
   Two small helper functions for arabic shaping.
 */
-static HB_UChar16 prevChar(const HB_UChar16 *str, int pos)
+static HB_UChar16 prevChar(hb_unicode_funcs_t *ufuncs, const HB_UChar16 *str, int pos)
 {
     /*qDebug("leftChar: pos=%d", pos); */
     const HB_UChar16 *ch = str + pos - 1;
     pos--;
     while(pos > -1) {
-        if(HB_GetUnicodeCharCategory(*ch) != HB_Mark_NonSpacing)
+        if(HB_GetUnicodeCharCategory(ufuncs, *ch) != HB_Mark_NonSpacing)
             return *ch;
         pos--;
         ch--;
@@ -868,13 +868,13 @@ static HB_UChar16 prevChar(const HB_UChar16 *str, int pos)
     return ReplacementCharacter;
 }
 
-static HB_UChar16 nextChar(const HB_UChar16 *str, hb_uint32 len, hb_uint32 pos)
+static HB_UChar16 nextChar(hb_unicode_funcs_t *ufuncs, const HB_UChar16 *str, hb_uint32 len, hb_uint32 pos)
 {
     const HB_UChar16 *ch = str + pos + 1;
     pos++;
     while(pos < len) {
         /*qDebug("rightChar: %d isLetter=%d, joining=%d", pos, ch.isLetter(), ch.joining()); */
-        if(HB_GetUnicodeCharCategory(*ch) != HB_Mark_NonSpacing)
+        if(HB_GetUnicodeCharCategory(ufuncs, *ch) != HB_Mark_NonSpacing)
             return *ch;
         /* assume it's a transparent char, this might not be 100% correct */
         pos++;
@@ -883,7 +883,7 @@ static HB_UChar16 nextChar(const HB_UChar16 *str, hb_uint32 len, hb_uint32 pos)
     return ReplacementCharacter;
 }
 
-static void shapedString(const HB_UChar16 *uc, hb_uint32 stringLength, hb_uint32 from, hb_uint32 len, HB_UChar16 *shapeBuffer, int *shapedLength,
+static void shapedString(hb_unicode_funcs_t *ufuncs, const HB_UChar16 *uc, hb_uint32 stringLength, hb_uint32 from, hb_uint32 len, HB_UChar16 *shapeBuffer, int *shapedLength,
                          HB_Bool reverse, HB_GlyphAttributes *attributes, unsigned short *logClusters)
 {
     HB_ArabicProperties *properties;
@@ -910,7 +910,7 @@ static void shapedString(const HB_UChar16 *uc, hb_uint32 stringLength, hb_uint32
     }
     if (f + l < stringLength)
         ++l;
-    getArabicProperties(uc+f, l, props);
+    getArabicProperties(ufuncs, uc+f, l, props);
 
     ch = uc + from;
     data = shapeBuffer;
@@ -927,7 +927,7 @@ static void shapedString(const HB_UChar16 *uc, hb_uint32 stringLength, hb_uint32
                     goto skip;
             }
             if (reverse)
-                *data = HB_GetMirroredChar(*ch);
+                *data = HB_GetMirroredChar(ufuncs, *ch);
             else
                 *data = *ch;
         } else {
@@ -939,7 +939,7 @@ static void shapedString(const HB_UChar16 *uc, hb_uint32 stringLength, hb_uint32
             hb_uint16 map;
             switch (c) {
                 case 0x44: { /* lam */
-                    const HB_UChar16 pch = nextChar(uc, stringLength, pos);
+                    const HB_UChar16 pch = nextChar(ufuncs, uc, stringLength, pos);
                     if ((pch >> 8) == 0x06) {
                         switch (pch & 0xff) {
                             case 0x22:
@@ -959,7 +959,7 @@ static void shapedString(const HB_UChar16 *uc, hb_uint32 stringLength, hb_uint32
                 case 0x23: /* alef with hamza above */
                 case 0x25: /* alef with hamza below */
                 case 0x27: /* alef */
-                    if (prevChar(uc, pos) == 0x0644) {
+                    if (prevChar(ufuncs, uc, pos) == 0x0644) {
                         /* have a lam alef ligature */
                         /*qDebug(" alef of lam-alef ligature"); */
                         goto skip;
@@ -973,7 +973,7 @@ static void shapedString(const HB_UChar16 *uc, hb_uint32 stringLength, hb_uint32
         }
         /* ##### Fixme */
         /*glyphs[gpos].attributes.zeroWidth = zeroWidth; */
-        if (HB_GetUnicodeCharCategory(*ch) == HB_Mark_NonSpacing) {
+        if (HB_GetUnicodeCharCategory(ufuncs, *ch) == HB_Mark_NonSpacing) {
             attributes[gpos].mark = TRUE;
 /*             qDebug("glyph %d (char %d) is mark!", gpos, i); */
         } else {
@@ -981,7 +981,7 @@ static void shapedString(const HB_UChar16 *uc, hb_uint32 stringLength, hb_uint32
             clusterStart = data - shapeBuffer;
         }
         attributes[gpos].clusterStart = !attributes[gpos].mark;
-        attributes[gpos].combiningClass = HB_GetUnicodeCharCombiningClass(*ch);
+        attributes[gpos].combiningClass = HB_GetUnicodeCharCombiningClass(ufuncs, *ch);
         attributes[gpos].justification = properties[i].justification;
 /*         qDebug("data[%d] = %x (from %x)", gpos, (uint)data->unicode(), ch->unicode());*/
         data++;
@@ -1063,9 +1063,9 @@ static HB_Bool arabicSyriacOpenTypeShape(HB_ShaperItem *item, HB_Bool *ot_ok)
         ++l;
     }
     if (item->item.script == HB_Script_Nko)
-        getNkoProperties(uc+f, l, props);
+        getNkoProperties(item->ufuncs, uc+f, l, props);
     else
-        getArabicProperties(uc+f, l, props);
+        getArabicProperties(item->ufuncs, uc+f, l, props);
 
     for (i = 0; i < (int)item->num_glyphs; i++) {
         apply[i] = 0;
@@ -1128,7 +1128,7 @@ HB_Bool HB_ArabicShape(HB_ShaperItem *item)
         return HB_BasicShape(item);
     }
 
-    shapedString(item->string, item->stringLength, item->item.pos, item->item.length, shapedChars, &slen,
+    shapedString(item->ufuncs, item->string, item->stringLength, item->item.pos, item->item.length, shapedChars, &slen,
                   item->item.bidiLevel % 2,
                   item->attributes, item->log_clusters);
 
diff --git a/src/hb-old/harfbuzz-external.h b/src/hb-old/harfbuzz-external.h
index 13ec15f..7a9e363 100644
--- a/src/hb-old/harfbuzz-external.h
+++ b/src/hb-old/harfbuzz-external.h
@@ -78,27 +78,27 @@ typedef enum
 } HB_CharCategory;
 
 
-static inline HB_CharCategory HB_GetUnicodeCharCategory(HB_UChar32 ch)
+static inline HB_CharCategory HB_GetUnicodeCharCategory(hb_unicode_funcs_t *funcs, HB_UChar32 ch)
 {
-  return (HB_CharCategory) hb_unicode_general_category (hb_unicode_funcs_get_default (), ch);
+  return (HB_CharCategory) hb_unicode_general_category (funcs, ch);
 }
 
-static inline int HB_GetUnicodeCharCombiningClass(HB_UChar32 ch)
+static inline int HB_GetUnicodeCharCombiningClass(hb_unicode_funcs_t *funcs, HB_UChar32 ch)
 {
-  return hb_unicode_combining_class (hb_unicode_funcs_get_default (), ch);
+  return hb_unicode_combining_class (funcs, ch);
 }
 
-static inline HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch)
+static inline HB_UChar16 HB_GetMirroredChar(hb_unicode_funcs_t *funcs, HB_UChar16 ch)
 {
-  return hb_unicode_mirroring (hb_unicode_funcs_get_default (), ch);
+  return hb_unicode_mirroring (funcs, ch);
 }
 
-static inline void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass)
+static inline void HB_GetUnicodeCharProperties(hb_unicode_funcs_t *funcs, HB_UChar32 ch, HB_CharCategory *category, int *combiningClass)
 {
   if (category)
-    *category = HB_GetUnicodeCharCategory (ch);
+    *category = HB_GetUnicodeCharCategory (funcs, ch);
   if (combiningClass)
-    *combiningClass = HB_GetUnicodeCharCombiningClass (ch);
+    *combiningClass = HB_GetUnicodeCharCombiningClass (funcs, ch);
 }
 
 HB_END_HEADER
diff --git a/src/hb-old/harfbuzz-greek.c b/src/hb-old/harfbuzz-greek.c
index 2e9b858..7d7996a 100644
--- a/src/hb-old/harfbuzz-greek.c
+++ b/src/hb-old/harfbuzz-greek.c
@@ -406,7 +406,7 @@ HB_Bool HB_GreekShape(HB_ShaperItem *shaper_item)
             HB_CharCategory category;
             int cmb;
             shapedChars[slen] = uc[i];
-            HB_GetUnicodeCharProperties(uc[i], &category, &cmb);
+            HB_GetUnicodeCharProperties(shaper_item->ufuncs, uc[i], &category, &cmb);
             if (category != HB_Mark_NonSpacing) {
                 attributes[slen].clusterStart = TRUE;
                 attributes[slen].mark = FALSE;
diff --git a/src/hb-old/harfbuzz-hebrew.c b/src/hb-old/harfbuzz-hebrew.c
index b5431a5..e3135e5 100644
--- a/src/hb-old/harfbuzz-hebrew.c
+++ b/src/hb-old/harfbuzz-hebrew.c
@@ -151,7 +151,7 @@ HB_Bool HB_HebrewShape(HB_ShaperItem *shaper_item)
                 HB_CharCategory category;
                 int cmb;
                 shapedChars[slen] = uc[i];
-                HB_GetUnicodeCharProperties(uc[i], &category, &cmb);
+                HB_GetUnicodeCharProperties(shaper_item->ufuncs, uc[i], &category, &cmb);
                 if (category != HB_Mark_NonSpacing) {
                     attributes[slen].clusterStart = TRUE;
                     attributes[slen].mark = FALSE;
diff --git a/src/hb-old/harfbuzz-indic.cpp b/src/hb-old/harfbuzz-indic.cpp
index 17e97e0..086d45c 100644
--- a/src/hb-old/harfbuzz-indic.cpp
+++ b/src/hb-old/harfbuzz-indic.cpp
@@ -30,22 +30,22 @@
 
 #define FLAG(x) (1 << (x))
 
-static HB_Bool isLetter(HB_UChar16 ucs)
+static HB_Bool isLetter(hb_unicode_funcs_t *ufuncs, HB_UChar16 ucs)
 {
     const int test = FLAG(HB_Letter_Uppercase) |
                      FLAG(HB_Letter_Lowercase) |
                      FLAG(HB_Letter_Titlecase) |
                      FLAG(HB_Letter_Modifier) |
                      FLAG(HB_Letter_Other);
-    return !!(FLAG(HB_GetUnicodeCharCategory(ucs)) & test);
+    return !!(FLAG(HB_GetUnicodeCharCategory(ufuncs, ucs)) & test);
 }
 
-static HB_Bool isMark(HB_UChar16 ucs)
+static HB_Bool isMark(hb_unicode_funcs_t *ufuncs, HB_UChar16 ucs)
 {
     const int test = FLAG(HB_Mark_NonSpacing) |
                      FLAG(HB_Mark_SpacingCombining) |
                      FLAG(HB_Mark_Enclosing);
-    return !!(FLAG(HB_GetUnicodeCharCategory(ucs)) & test);
+    return !!(FLAG(HB_GetUnicodeCharCategory(ufuncs, ucs)) & test);
 }
 
 enum Form {
@@ -1576,7 +1576,7 @@ static bool indic_shape_syllable(HB_Bool openType, HB_ShaperItem *item, bool inv
         // Ccmp always applies
         // Init
         if (item->item.pos == 0
-            || !(isLetter(item->string[item->item.pos-1]) || isMark(item->string[item->item.pos-1])))
+            || !(isLetter(item->ufuncs, item->string[item->item.pos-1]) || isMark(item->ufuncs, item->string[item->item.pos-1])))
             properties[0] &= ~InitProperty;
 
         // Nukta always applies
diff --git a/src/hb-old/harfbuzz-shaper.cpp b/src/hb-old/harfbuzz-shaper.cpp
index d1e2335..dacc36c 100644
--- a/src/hb-old/harfbuzz-shaper.cpp
+++ b/src/hb-old/harfbuzz-shaper.cpp
@@ -262,7 +262,7 @@ void HB_HeuristicSetGlyphAttributes(HB_ShaperItem *item)
     int pos = 0;
     HB_CharCategory lastCat;
     int dummy;
-    HB_GetUnicodeCharProperties(uc[0], &lastCat, &dummy);
+    HB_GetUnicodeCharProperties(item->ufuncs, uc[0], &lastCat, &dummy);
     for (i = 1; i < length; ++i) {
         if (logClusters[i] == pos)
             // same glyph
@@ -277,7 +277,7 @@ void HB_HeuristicSetGlyphAttributes(HB_ShaperItem *item)
             attributes[pos].dontPrint = true;
         HB_CharCategory cat;
         int cmb;
-        HB_GetUnicodeCharProperties(uc[i], &cat, &cmb);
+        HB_GetUnicodeCharProperties(item->ufuncs, uc[i], &cat, &cmb);
         if (cat != HB_Mark_NonSpacing) {
             attributes[pos].mark = false;
             attributes[pos].clusterStart = true;
@@ -988,6 +988,8 @@ HB_Bool HB_ShapeItem(HB_ShaperItem *shaper_item)
         return false;
     }
     assert(shaper_item->item.script < HB_ScriptCount);
+    if (!shaper_item->ufuncs)
+        shaper_item->ufuncs = hb_unicode_funcs_get_default ();
     result = HB_ScriptEngines[shaper_item->item.script].shape(shaper_item);
     shaper_item->glyphIndicesPresent = false;
     return result;
diff --git a/src/hb-old/harfbuzz-shaper.h b/src/hb-old/harfbuzz-shaper.h
index ab65004..8aa86d9 100644
--- a/src/hb-old/harfbuzz-shaper.h
+++ b/src/hb-old/harfbuzz-shaper.h
@@ -234,6 +234,7 @@ typedef struct HB_Font_ {
 typedef struct HB_ShaperItem_ HB_ShaperItem;
 
 struct HB_ShaperItem_ {
+    hb_unicode_funcs_t *ufuncs;
     const HB_UChar16 *string;               /* input: the Unicode UTF16 text to be shaped */
     hb_uint32 stringLength;                 /* input: the length of the input in 16-bit words */
     HB_ScriptItem item;                     /* input: the current run to be shaped: a run of text all in the same script that is a substring of <string> */



More information about the HarfBuzz mailing list