[HarfBuzz] harfbuzz: Branch 'master'

Simon Hausmann hausmann at kemper.freedesktop.org
Tue Jan 20 10:06:49 PST 2009


 src/Makefile.am                    |    3 -
 src/harfbuzz-external.h            |    5 ++
 src/harfbuzz-shaper-all.cpp        |    1 
 src/harfbuzz-shaper-private.h      |    2 
 src/harfbuzz-shaper.cpp            |    5 --
 src/harfbuzz-thai.c                |   87 +++++++++++++++++++++++++++++++++++++
 src/harfbuzz-thai.cpp              |   85 ------------------------------------
 tests/linebreaking/harfbuzz-qt.cpp |   22 +++++++++
 tests/linebreaking/main.cpp        |   45 ++++++++++++++++++-
 9 files changed, 163 insertions(+), 92 deletions(-)

New commits:
commit 58e8d493e6abfa6e9b2d5898e5289cabc77d85ea
Author: Simon Hausmann <simon.hausmann at nokia.com>
Date:   Tue Jan 20 14:57:29 2009 +0100

    Re-added support for Thai word breaking through libthai.
    
    This commit resurrects the old code that dynamically opens libthai.

diff --git a/src/Makefile.am b/src/Makefile.am
index d46cc0e..2b0fb1d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -18,7 +18,8 @@ MAINSOURCES =  \
 	harfbuzz-hebrew.c \
 	harfbuzz-arabic.c \
 	harfbuzz-hangul.c \
-	harfbuzz-myanmar.c
+	harfbuzz-myanmar.c \
+	harfbuzz-thai.c
 
 EXTRA_SOURCES = harfbuzz.c
 
diff --git a/src/harfbuzz-external.h b/src/harfbuzz-external.h
index b961cd4..29c4631 100644
--- a/src/harfbuzz-external.h
+++ b/src/harfbuzz-external.h
@@ -146,6 +146,11 @@ HB_CharCategory HB_GetUnicodeCharCategory(HB_UChar32 ch);
 int HB_GetUnicodeCharCombiningClass(HB_UChar32 ch);
 HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch);
 
+void *HB_Library_Resolve(const char *library, const char *symbol);
+
+void *HB_TextCodecForMib(int mib);
+char *HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength);
+
 HB_END_HEADER
 
 #endif
diff --git a/src/harfbuzz-shaper-all.cpp b/src/harfbuzz-shaper-all.cpp
index 9c26fe3..d2f902f 100644
--- a/src/harfbuzz-shaper-all.cpp
+++ b/src/harfbuzz-shaper-all.cpp
@@ -31,5 +31,6 @@ extern "C" {
 #include "harfbuzz-arabic.c"
 #include "harfbuzz-hangul.c"
 #include "harfbuzz-myanmar.c"
+#include "harfbuzz-thai.c"
 }
 
diff --git a/src/harfbuzz-shaper-private.h b/src/harfbuzz-shaper-private.h
index d698c48..80bccf8 100644
--- a/src/harfbuzz-shaper-private.h
+++ b/src/harfbuzz-shaper-private.h
@@ -116,6 +116,8 @@ extern void HB_KhmerAttributes(HB_Script script, const HB_UChar16 *string, hb_ui
 
 extern void HB_IndicAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
 
+extern void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
+
 typedef struct {
     hb_uint32 tag;
     hb_uint32 property;
diff --git a/src/harfbuzz-shaper.cpp b/src/harfbuzz-shaper.cpp
index 65e8f61..36b9282 100644
--- a/src/harfbuzz-shaper.cpp
+++ b/src/harfbuzz-shaper.cpp
@@ -583,9 +583,6 @@ HB_Bool HB_BasicShape(HB_ShaperItem *shaper_item)
     return true;
 }
 
-//static HB_AttributeFunction thai_attributes = 0;
-#define thai_attributes 0
-
 const HB_ScriptEngine HB_ScriptEngines[] = {
     // Common
     { HB_BasicShape, 0},
@@ -624,7 +621,7 @@ const HB_ScriptEngine HB_ScriptEngines[] = {
     // Sinhala
     { HB_IndicShape, HB_IndicAttributes },
     // Thai
-    { HB_BasicShape, thai_attributes },
+    { HB_BasicShape, HB_ThaiAttributes },
     // Lao
     { HB_BasicShape, 0 },
     // Tibetan
diff --git a/src/harfbuzz-thai.c b/src/harfbuzz-thai.c
new file mode 100644
index 0000000..ca55c20
--- /dev/null
+++ b/src/harfbuzz-thai.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
+ *
+ * This is part of HarfBuzz, an OpenType Layout engine library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "harfbuzz-shaper.h"
+#include "harfbuzz-shaper-private.h"
+#include "harfbuzz-external.h"
+
+#include <assert.h>
+
+static void thaiWordBreaks(const HB_UChar16 *string, hb_uint32 len, HB_CharAttributes *attributes)
+{
+    typedef int (*th_brk_def)(const char*, int[], int);
+    static void *thaiCodec = 0;
+    static th_brk_def th_brk = 0;
+    hb_uint32 cstrLength = 0;
+    char *cstr = 0;
+    int brp[128];
+    int *break_positions = brp;
+    hb_uint32 numbreaks;
+    hb_uint32 i;
+
+    if (!thaiCodec)
+        thaiCodec = HB_TextCodecForMib(2259);
+
+    /* load libthai dynamically */
+    if (!th_brk && thaiCodec) {
+        th_brk = (th_brk_def)HB_Library_Resolve("thai", "th_brk");
+        if (!th_brk)
+            thaiCodec = 0;
+    }
+
+    if (!th_brk)
+        return;
+
+    cstr = HB_TextCodec_ConvertFromUnicode(thaiCodec, string, len, &cstrLength);
+    if (!cstr)
+        return;
+
+    break_positions = brp;
+    numbreaks = th_brk(cstr, break_positions, 128);
+    if (numbreaks > 128) {
+        break_positions = (int *)malloc(numbreaks * sizeof(int));
+        numbreaks = th_brk(cstr, break_positions, numbreaks);
+    }
+
+    for (i = 0; i < len; ++i)
+        attributes[i].lineBreakType = HB_NoBreak;
+
+    for (i = 0; i < numbreaks; ++i) {
+        if (break_positions[i] > 0)
+            attributes[break_positions[i]-1].lineBreakType = HB_Break;
+    }
+
+    if (break_positions != brp)
+        free(break_positions);
+
+    free(cstr);
+}
+
+
+void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
+{
+    assert(script == HB_Script_Thai);
+    thaiWordBreaks(text + from, len, attributes);
+}
+
diff --git a/src/harfbuzz-thai.cpp b/src/harfbuzz-thai.cpp
deleted file mode 100644
index 8d12786..0000000
--- a/src/harfbuzz-thai.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
- *
- * This is part of HarfBuzz, an OpenType Layout engine library.
- *
- * Permission is hereby granted, without written agreement and without
- * license or royalty fees, to use, copy, modify, and distribute this
- * software and its documentation for any purpose, provided that the
- * above copyright notice and the following two paragraphs appear in
- * all copies of this software.
- *
- * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
- * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
- * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- *
- * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
- * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
- * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
- * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
- */
-
-#if 0
-#include <qtextcodec.h>
-#include <qlibrary.h>
-
-
-static void thaiWordBreaks(const QChar *string, const int len, QCharAttributes *attributes)
-{
-#ifdef QT_NO_TEXTCODEC
-    Q_UNUSED(string);
-    Q_UNUSED(len);
-    Q_UNUSED(attributes);
-#else
-    typedef int (*th_brk_def)(const char*, int[], int);
-    static QTextCodec *thaiCodec = QTextCodec::codecForMib(2259);
-    static th_brk_def th_brk = 0;
-
-#ifndef QT_NO_LIBRARY
-    /* load libthai dynamically */
-    if (!th_brk && thaiCodec) {
-        th_brk = (th_brk_def)QLibrary::resolve(QLatin1String("thai"), "th_brk");
-        if (!th_brk)
-            thaiCodec = 0;
-    }
-#endif
-
-    if (!th_brk)
-        return;
-
-    QByteArray cstr = thaiCodec->fromUnicode(QString(string, len));
-
-    int brp[128];
-    int *break_positions = brp;
-    int numbreaks = th_brk(cstr.constData(), break_positions, 128);
-    if (numbreaks > 128) {
-        break_positions = new int[numbreaks];
-        numbreaks = th_brk(cstr.data(),break_positions, numbreaks);
-    }
-
-    for (int i = 0; i < len - 1; ++i)
-        attributes[i].lineBreakType = QCharAttributes::NoBreak;
-
-    for (int i = 0; i < numbreaks; ++i) {
-        if (break_positions[i] > 0)
-            attributes[break_positions[i]-1].lineBreakType = QCharAttributes::Break;
-    }
-
-    if (break_positions != brp)
-        delete [] break_positions;
-#endif // QT_NO_TEXTCODEC
-}
-
-
-static void thai_attributes( int script, const QString &text, int from, int len, QCharAttributes *attributes )
-{
-    Q_UNUSED(script);
-    Q_ASSERT(script == QUnicodeTables::Thai);
-    thaiWordBreaks(text.unicode() + from, len, attributes);
-}
-
-
-#endif
diff --git a/tests/linebreaking/harfbuzz-qt.cpp b/tests/linebreaking/harfbuzz-qt.cpp
index 6f65c11..71d8439 100644
--- a/tests/linebreaking/harfbuzz-qt.cpp
+++ b/tests/linebreaking/harfbuzz-qt.cpp
@@ -24,6 +24,8 @@
 
 #include <harfbuzz-external.h>
 #include <Qt/private/qunicodetables_p.h>
+#include <QLibrary>
+#include <QTextCodec>
 
 HB_LineBreakClass HB_GetLineBreakClass(HB_UChar32 ch)
 {
@@ -75,3 +77,23 @@ void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme,
     *lineBreak = (HB_LineBreakClass) prop->line_break_class;
 }
 
+void *HB_Library_Resolve(const char *library, const char *symbol)
+{
+    return QLibrary::resolve(library, symbol);
+}
+
+void *HB_TextCodecForMib(int mib)
+{
+    return QTextCodec::codecForMib(mib);
+}
+
+char *HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength)
+{
+    QByteArray data = reinterpret_cast<QTextCodec *>(codec)->fromUnicode((const QChar *)unicode, length);
+    // ### suboptimal
+    char *output = (char *)malloc(data.length() + 1);
+    memcpy(output, data.constData(), data.length() + 1);
+    *outputLength = data.length();
+    return output;
+}
+
diff --git a/tests/linebreaking/main.cpp b/tests/linebreaking/main.cpp
index 470b119..3b2734a 100644
--- a/tests/linebreaking/main.cpp
+++ b/tests/linebreaking/main.cpp
@@ -32,13 +32,13 @@
 
 #include <harfbuzz-shaper.h>
 
-static QVector<HB_CharAttributes> getCharAttributes(const QString &str)
+static QVector<HB_CharAttributes> getCharAttributes(const QString &str, HB_Script script = HB_Script_Common)
 {
     QVector<HB_CharAttributes> attrs(str.length());
     HB_ScriptItem item;
     item.pos = 0;
     item.length = str.length();
-    item.script = HB_Script_Common;
+    item.script = script;
     HB_GetCharAttributes(str.utf16(), str.length(),
                          &item, 1,
                          attrs.data());
@@ -60,6 +60,7 @@ private slots:
     void lineBreaking();
     void charWordStopOnLineSeparator();
     void charStopForSurrogatePairs();
+    void thaiWordBreak();
 };
 
 
@@ -185,5 +186,45 @@ void tst_CharAttributes::charStopForSurrogatePairs()
     QVERIFY(attrs[3].charStop);
 }
 
+void tst_CharAttributes::thaiWordBreak()
+{
+    // สวัสดีครับ นี่เป็นการงทดสอบตัวเอ
+    QTextCodec *codec = QTextCodec::codecForMib(2259);
+    QString txt = codec->toUnicode(QByteArray("\xca\xc7\xd1\xca\xb4\xd5\xa4\xc3\xd1\xba\x20\xb9\xd5\xe8\xe0\xbb\xe7\xb9\xa1\xd2\xc3\xb7\xb4\xca\xcd\xba\xb5\xd1\xc7\xe0\xcd\xa7"));
+
+
+    QCOMPARE(txt.length(), 32);
+    QVector<HB_CharAttributes> attrs = getCharAttributes(txt, HB_Script_Thai);
+    QVERIFY(attrs[0].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[1].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[2].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[3].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[4].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[5].lineBreakType == HB_Break);
+    QVERIFY(attrs[6].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[7].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[8].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[9].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[10].lineBreakType == HB_Break);
+    QVERIFY(attrs[11].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[12].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[13].lineBreakType == HB_Break);
+    QVERIFY(attrs[14].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[15].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[16].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[17].lineBreakType == HB_Break);
+    QVERIFY(attrs[18].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[19].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[20].lineBreakType == HB_Break);
+    QVERIFY(attrs[21].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[22].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[23].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[24].lineBreakType == HB_NoBreak);
+    QVERIFY(attrs[25].lineBreakType == HB_Break);
+    QVERIFY(attrs[26].lineBreakType == HB_NoBreak);
+    for (int i = 27; i < 32; ++i)
+        QVERIFY(attrs[i].lineBreakType == HB_NoBreak);
+}
+
 QTEST_MAIN(tst_CharAttributes)
 #include "main.moc"



More information about the HarfBuzz mailing list