[HarfBuzz] harfbuzz: Branch 'master' - 2 commits
Lars Knoll
lars at kemper.freedesktop.org
Tue Jun 15 07:20:50 PDT 2010
contrib/harfbuzz-unicode.c | 19 ----------
src/harfbuzz-external.h | 6 ---
src/harfbuzz-shaper.cpp | 13 ++-----
src/harfbuzz-thai.c | 64 ++++++++++++++++++++++++-------------
tests/linebreaking/harfbuzz-qt.cpp | 25 +-------------
5 files changed, 52 insertions(+), 75 deletions(-)
New commits:
commit 1313dc84678c74f1c24f910f702d7ed27a417607
Merge: cce760d... 96a637d...
Author: Lars Knoll <lars.knoll at nokia.com>
Date: Tue Jun 15 16:20:26 2010 +0200
Merge branch 'master' of ssh://git.freedesktop.org/git/harfbuzz
commit cce760d41f115fecd5b9b6b20b62883b10a9c204
Author: Lars Knoll <lars.knoll at nokia.com>
Date: Mon Jun 14 14:14:59 2010 +0200
Fixes for thai linebreaking
* Load libthai.so.0 since libthai.so is not there on all systems
* Remove dependency on codecs. Unicode->TIS620 is so simple we can
simply hardcode it in harbuzz-thai.c
* Speed up detection of word boundaries
* Falback when libthai is not found is now to not break instead of
breaking after every character (in line with recommendations from
unicode.org linebreaking algorithm)
Reviewed-by: Simon Hausmann
diff --git a/contrib/harfbuzz-unicode.c b/contrib/harfbuzz-unicode.c
index 51dd4ea..049e0ca 100644
--- a/contrib/harfbuzz-unicode.c
+++ b/contrib/harfbuzz-unicode.c
@@ -262,24 +262,7 @@ HB_GetMirroredChar(HB_UChar16 ch) {
}
void *
-HB_Library_Resolve(const char *library, const char *symbol) {
+HB_Library_Resolve(const char *library, int version, const char *symbol) {
abort();
return NULL;
}
-
-void *
-HB_TextCodecForMib(int mib) {
- abort();
- return NULL;
-}
-
-char *
-HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength) {
- abort();
- return NULL;
-}
-
-void
-HB_TextCodec_FreeResult(char *v) {
- abort();
-}
diff --git a/src/harfbuzz-external.h b/src/harfbuzz-external.h
index 760749b..7644f0d 100644
--- a/src/harfbuzz-external.h
+++ b/src/harfbuzz-external.h
@@ -146,11 +146,7 @@ HB_CharCategory HB_GetUnicodeCharCategory(HB_UChar32 ch);
int HB_GetUnicodeCharCombiningClass(HB_UChar32 ch);
HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch);
-void *HB_Library_Resolve(const char *library, const char *symbol);
-
-void *HB_TextCodecForMib(int mib);
-char *HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength);
-void HB_TextCodec_FreeResult(char *);
+void *HB_Library_Resolve(const char *library, int version, const char *symbol);
HB_END_HEADER
diff --git a/src/harfbuzz-shaper.cpp b/src/harfbuzz-shaper.cpp
index 4bc53c8..ce4d4ac 100644
--- a/src/harfbuzz-shaper.cpp
+++ b/src/harfbuzz-shaper.cpp
@@ -183,18 +183,15 @@ static void calcLineBreaks(const HB_UChar16 *uc, hb_uint32 len, HB_CharAttribute
if (ncls >= HB_LineBreak_CR)
goto next;
- // two complex chars (thai or lao), thai_attributes might override, but here we do a best guess
- if (cls == HB_LineBreak_SA && ncls == HB_LineBreak_SA) {
- lineBreakType = HB_Break;
- goto next;
- }
-
{
int tcls = ncls;
+ // for south east asian chars that require a complex (dictionary analysis), the unicode
+ // standard recommends to treat them as AL. thai_attributes and other attribute methods that
+ // do dictionary analysis can override
if (tcls >= HB_LineBreak_SA)
- tcls = HB_LineBreak_ID;
+ tcls = HB_LineBreak_AL;
if (cls >= HB_LineBreak_SA)
- cls = HB_LineBreak_ID;
+ cls = HB_LineBreak_AL;
int brk = breakTable[cls][tcls];
switch (brk) {
diff --git a/src/harfbuzz-thai.c b/src/harfbuzz-thai.c
index 1d1aa2f..fc2bdbf 100644
--- a/src/harfbuzz-thai.c
+++ b/src/harfbuzz-thai.c
@@ -27,57 +27,79 @@
#include "harfbuzz-external.h"
#include <assert.h>
+#include <stdio.h>
+
+typedef int (*th_brk_def)(const char*, int[], int);
+static th_brk_def th_brk = 0;
+static int libthai_resolved = 0;
+
+static void resolve_libthai()
+{
+ if (!th_brk)
+ th_brk = (th_brk_def)HB_Library_Resolve("thai", 0, "th_brk");
+ libthai_resolved = 1;
+}
+
+static void to_tis620(const HB_UChar16 *string, hb_uint32 len, const char *cstr)
+{
+ hb_uint32 i;
+ unsigned char *result = (unsigned char *)cstr;
+
+ for (i = 0; i < len; ++i) {
+ if (string[i] <= 0xa0)
+ result[i] = (unsigned char)string[i];
+ if (string[i] >= 0xe01 && string[i] <= 0xe5b)
+ result[i] = (unsigned char)(string[i] - 0xe00 + 0xa0);
+ else
+ result[i] = '?';
+ }
+}
static void thaiWordBreaks(const HB_UChar16 *string, hb_uint32 len, HB_CharAttributes *attributes)
{
- typedef int (*th_brk_def)(const char*, int[], int);
- static void *thaiCodec = 0;
- static th_brk_def th_brk = 0;
- char *cstr = 0;
+ char s[128];
+ char *cstr = s;
int brp[128];
int *break_positions = brp;
hb_uint32 numbreaks;
hb_uint32 i;
- if (!thaiCodec)
- thaiCodec = HB_TextCodecForMib(2259);
-
- /* load libthai dynamically */
- if (!th_brk && thaiCodec) {
- th_brk = (th_brk_def)HB_Library_Resolve("thai", "th_brk");
- if (!th_brk)
- thaiCodec = 0;
- }
+ if (!libthai_resolved)
+ resolve_libthai();
if (!th_brk)
return;
- cstr = HB_TextCodec_ConvertFromUnicode(thaiCodec, string, len, 0);
- if (!cstr)
- return;
+ if (len > 128)
+ cstr = (char *)malloc(len*sizeof(char));
+
+ to_tis620(string, len, cstr);
- break_positions = brp;
numbreaks = th_brk(cstr, break_positions, 128);
if (numbreaks > 128) {
break_positions = (int *)malloc(numbreaks * sizeof(int));
numbreaks = th_brk(cstr, break_positions, numbreaks);
}
- for (i = 0; i < len; ++i)
+ for (i = 0; i < len; ++i) {
attributes[i].lineBreakType = HB_NoBreak;
+ attributes[i].wordBoundary = FALSE;
+ }
for (i = 0; i < numbreaks; ++i) {
- if (break_positions[i] > 0)
+ if (break_positions[i] > 0) {
attributes[break_positions[i]-1].lineBreakType = HB_Break;
+ attributes[i].wordBoundary = TRUE;
+ }
}
if (break_positions != brp)
free(break_positions);
- HB_TextCodec_FreeResult(cstr);
+ if (len > 128)
+ free(cstr);
}
-
void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
{
assert(script == HB_Script_Thai);
diff --git a/tests/linebreaking/harfbuzz-qt.cpp b/tests/linebreaking/harfbuzz-qt.cpp
index ea03052..f0048b7 100644
--- a/tests/linebreaking/harfbuzz-qt.cpp
+++ b/tests/linebreaking/harfbuzz-qt.cpp
@@ -79,30 +79,9 @@ void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme,
*lineBreak = (HB_LineBreakClass) prop->line_break_class;
}
-void *HB_Library_Resolve(const char *library, const char *symbol)
+void *HB_Library_Resolve(const char *library, int version, const char *symbol)
{
- return QLibrary::resolve(library, symbol);
-}
-
-void *HB_TextCodecForMib(int mib)
-{
- return QTextCodec::codecForMib(mib);
-}
-
-char *HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength)
-{
- QByteArray data = reinterpret_cast<QTextCodec *>(codec)->fromUnicode((const QChar *)unicode, length);
- // ### suboptimal
- char *output = (char *)malloc(data.length() + 1);
- memcpy(output, data.constData(), data.length() + 1);
- if (outputLength)
- *outputLength = data.length();
- return output;
-}
-
-void HB_TextCodec_FreeResult(char *string)
-{
- free(string);
+ return QLibrary::resolve(library, version, symbol);
}
}
More information about the HarfBuzz
mailing list