[HarfBuzz] harfbuzz: Branch 'master' - 2 commits

Lars Knoll lars at kemper.freedesktop.org
Tue Jun 15 07:20:50 PDT 2010


 contrib/harfbuzz-unicode.c         |   19 ----------
 src/harfbuzz-external.h            |    6 ---
 src/harfbuzz-shaper.cpp            |   13 ++-----
 src/harfbuzz-thai.c                |   64 ++++++++++++++++++++++++-------------
 tests/linebreaking/harfbuzz-qt.cpp |   25 +-------------
 5 files changed, 52 insertions(+), 75 deletions(-)

New commits:
commit 1313dc84678c74f1c24f910f702d7ed27a417607
Merge: cce760d... 96a637d...
Author: Lars Knoll <lars.knoll at nokia.com>
Date:   Tue Jun 15 16:20:26 2010 +0200

    Merge branch 'master' of ssh://git.freedesktop.org/git/harfbuzz

commit cce760d41f115fecd5b9b6b20b62883b10a9c204
Author: Lars Knoll <lars.knoll at nokia.com>
Date:   Mon Jun 14 14:14:59 2010 +0200

    Fixes for thai linebreaking
    
    * Load libthai.so.0 since libthai.so is not there on all systems
    * Remove dependency on codecs. Unicode->TIS620 is so simple we can
    simply hardcode it in harbuzz-thai.c
    * Speed up detection of word boundaries
    * Falback when libthai is not found is now to not break instead of
    breaking after every character (in line with recommendations from
    unicode.org linebreaking algorithm)
    
    Reviewed-by: Simon Hausmann

diff --git a/contrib/harfbuzz-unicode.c b/contrib/harfbuzz-unicode.c
index 51dd4ea..049e0ca 100644
--- a/contrib/harfbuzz-unicode.c
+++ b/contrib/harfbuzz-unicode.c
@@ -262,24 +262,7 @@ HB_GetMirroredChar(HB_UChar16 ch) {
 }
 
 void *
-HB_Library_Resolve(const char *library, const char *symbol) {
+HB_Library_Resolve(const char *library, int version, const char *symbol) {
   abort();
   return NULL;
 }
-
-void *
-HB_TextCodecForMib(int mib) {
-  abort();
-  return NULL;
-}
-
-char *
-HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength) {
-  abort();
-  return NULL;
-}
-
-void
-HB_TextCodec_FreeResult(char *v) {
-  abort();
-}
diff --git a/src/harfbuzz-external.h b/src/harfbuzz-external.h
index 760749b..7644f0d 100644
--- a/src/harfbuzz-external.h
+++ b/src/harfbuzz-external.h
@@ -146,11 +146,7 @@ HB_CharCategory HB_GetUnicodeCharCategory(HB_UChar32 ch);
 int HB_GetUnicodeCharCombiningClass(HB_UChar32 ch);
 HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch);
 
-void *HB_Library_Resolve(const char *library, const char *symbol);
-
-void *HB_TextCodecForMib(int mib);
-char *HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength);
-void HB_TextCodec_FreeResult(char *);
+void *HB_Library_Resolve(const char *library, int version, const char *symbol);
 
 HB_END_HEADER
 
diff --git a/src/harfbuzz-shaper.cpp b/src/harfbuzz-shaper.cpp
index 4bc53c8..ce4d4ac 100644
--- a/src/harfbuzz-shaper.cpp
+++ b/src/harfbuzz-shaper.cpp
@@ -183,18 +183,15 @@ static void calcLineBreaks(const HB_UChar16 *uc, hb_uint32 len, HB_CharAttribute
         if (ncls >= HB_LineBreak_CR)
             goto next;
 
-        // two complex chars (thai or lao), thai_attributes might override, but here we do a best guess
-	if (cls == HB_LineBreak_SA && ncls == HB_LineBreak_SA) {
-            lineBreakType = HB_Break;
-            goto next;
-        }
-
         {
             int tcls = ncls;
+            // for south east asian chars that require a complex (dictionary analysis), the unicode
+            // standard recommends to treat them as AL. thai_attributes and other attribute methods that
+            // do dictionary analysis can override
             if (tcls >= HB_LineBreak_SA)
-                tcls = HB_LineBreak_ID;
+                tcls = HB_LineBreak_AL;
             if (cls >= HB_LineBreak_SA)
-                cls = HB_LineBreak_ID;
+                cls = HB_LineBreak_AL;
 
             int brk = breakTable[cls][tcls];
             switch (brk) {
diff --git a/src/harfbuzz-thai.c b/src/harfbuzz-thai.c
index 1d1aa2f..fc2bdbf 100644
--- a/src/harfbuzz-thai.c
+++ b/src/harfbuzz-thai.c
@@ -27,57 +27,79 @@
 #include "harfbuzz-external.h"
 
 #include <assert.h>
+#include <stdio.h>
+
+typedef int (*th_brk_def)(const char*, int[], int);
+static th_brk_def th_brk = 0;
+static int libthai_resolved = 0;
+
+static void resolve_libthai()
+{
+    if (!th_brk)
+        th_brk = (th_brk_def)HB_Library_Resolve("thai", 0, "th_brk");
+    libthai_resolved = 1;
+}
+
+static void to_tis620(const HB_UChar16 *string, hb_uint32 len, const char *cstr)
+{
+    hb_uint32 i;
+    unsigned char *result = (unsigned char *)cstr;
+
+    for (i = 0; i < len; ++i) {
+        if (string[i] <= 0xa0)
+            result[i] = (unsigned char)string[i];
+        if (string[i] >= 0xe01 && string[i] <= 0xe5b)
+            result[i] = (unsigned char)(string[i] - 0xe00 + 0xa0);
+        else
+            result[i] = '?';
+    }
+}
 
 static void thaiWordBreaks(const HB_UChar16 *string, hb_uint32 len, HB_CharAttributes *attributes)
 {
-    typedef int (*th_brk_def)(const char*, int[], int);
-    static void *thaiCodec = 0;
-    static th_brk_def th_brk = 0;
-    char *cstr = 0;
+    char s[128];
+    char *cstr = s;
     int brp[128];
     int *break_positions = brp;
     hb_uint32 numbreaks;
     hb_uint32 i;
 
-    if (!thaiCodec)
-        thaiCodec = HB_TextCodecForMib(2259);
-
-    /* load libthai dynamically */
-    if (!th_brk && thaiCodec) {
-        th_brk = (th_brk_def)HB_Library_Resolve("thai", "th_brk");
-        if (!th_brk)
-            thaiCodec = 0;
-    }
+    if (!libthai_resolved)
+        resolve_libthai();
 
     if (!th_brk)
         return;
 
-    cstr = HB_TextCodec_ConvertFromUnicode(thaiCodec, string, len, 0);
-    if (!cstr)
-        return;
+    if (len > 128)
+        cstr = (char *)malloc(len*sizeof(char));
+
+    to_tis620(string, len, cstr);
 
-    break_positions = brp;
     numbreaks = th_brk(cstr, break_positions, 128);
     if (numbreaks > 128) {
         break_positions = (int *)malloc(numbreaks * sizeof(int));
         numbreaks = th_brk(cstr, break_positions, numbreaks);
     }
 
-    for (i = 0; i < len; ++i)
+    for (i = 0; i < len; ++i) {
         attributes[i].lineBreakType = HB_NoBreak;
+        attributes[i].wordBoundary = FALSE;
+    }
 
     for (i = 0; i < numbreaks; ++i) {
-        if (break_positions[i] > 0)
+        if (break_positions[i] > 0) {
             attributes[break_positions[i]-1].lineBreakType = HB_Break;
+            attributes[i].wordBoundary = TRUE;
+        }
     }
 
     if (break_positions != brp)
         free(break_positions);
 
-    HB_TextCodec_FreeResult(cstr);
+    if (len > 128)
+        free(cstr);
 }
 
-
 void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
 {
     assert(script == HB_Script_Thai);
diff --git a/tests/linebreaking/harfbuzz-qt.cpp b/tests/linebreaking/harfbuzz-qt.cpp
index ea03052..f0048b7 100644
--- a/tests/linebreaking/harfbuzz-qt.cpp
+++ b/tests/linebreaking/harfbuzz-qt.cpp
@@ -79,30 +79,9 @@ void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme,
     *lineBreak = (HB_LineBreakClass) prop->line_break_class;
 }
 
-void *HB_Library_Resolve(const char *library, const char *symbol)
+void *HB_Library_Resolve(const char *library, int version, const char *symbol)
 {
-    return QLibrary::resolve(library, symbol);
-}
-
-void *HB_TextCodecForMib(int mib)
-{
-    return QTextCodec::codecForMib(mib);
-}
-
-char *HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength)
-{
-    QByteArray data = reinterpret_cast<QTextCodec *>(codec)->fromUnicode((const QChar *)unicode, length);
-    // ### suboptimal
-    char *output = (char *)malloc(data.length() + 1);
-    memcpy(output, data.constData(), data.length() + 1);
-    if (outputLength)
-        *outputLength = data.length();
-    return output;
-}
-
-void HB_TextCodec_FreeResult(char *string)
-{
-    free(string);
+    return QLibrary::resolve(library, version, symbol);
 }
 
 }



More information about the HarfBuzz mailing list