[HarfBuzz] harfbuzz-ng: Branch 'master' - 5 commits
Behdad Esfahbod
behdad at kemper.freedesktop.org
Wed May 9 01:12:41 PDT 2012
test/shaping/Makefile.am | 2
test/shaping/hb-diff-ngrams | 5 +
test/shaping/hb-diff-stat | 5 +
test/shaping/hb_test_tools.py | 174 +++++++++++++++++++++++++++++++++++++++---
4 files changed, 177 insertions(+), 9 deletions(-)
New commits:
commit 2214a03900d32710573a1b05c7665195b3129761
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed May 9 09:54:54 2012 +0200
Add hb-diff-ngrams
diff --git a/test/shaping/Makefile.am b/test/shaping/Makefile.am
index 81c9991..4fb762c 100644
--- a/test/shaping/Makefile.am
+++ b/test/shaping/Makefile.am
@@ -13,6 +13,7 @@ EXTRA_DIST += \
hb-diff \
hb-diff-colorize \
hb-diff-filter-failures \
+ hb-diff-ngrams \
hb-diff-stat \
hb-manifest-read \
hb-manifest-update \
diff --git a/test/shaping/hb-diff-ngrams b/test/shaping/hb-diff-ngrams
new file mode 100755
index 0000000..a496447
--- /dev/null
+++ b/test/shaping/hb-diff-ngrams
@@ -0,0 +1,5 @@
+#!/usr/bin/python
+
+from hb_test_tools import *
+
+UtilMains.process_multiple_files (DiffSinks.print_ngrams)
diff --git a/test/shaping/hb_test_tools.py b/test/shaping/hb_test_tools.py
index a38f067..3ff75b8 100644
--- a/test/shaping/hb_test_tools.py
+++ b/test/shaping/hb_test_tools.py
@@ -155,12 +155,60 @@ class DiffFilters:
if not DiffHelpers.test_passed (lines):
for l in lines: yield l
+class Stat:
+
+ def __init__ (self):
+ self.count = 0
+ self.freq = 0
+
+ def add (self, test):
+ self.count += 1
+ self.freq += test.freq
+
+class Stats:
+
+ def __init__ (self):
+ self.passed = Stat ()
+ self.failed = Stat ()
+ self.total = Stat ()
+
+ def add (self, test):
+ self.total.add (test)
+ if test.passed:
+ self.passed.add (test)
+ else:
+ self.failed.add (test)
+
+ def mean (self):
+ return float (self.passed.count) / self.total.count
+
+ def variance (self):
+ return (float (self.passed.count) / self.total.count) * \
+ (float (self.failed.count) / self.total.count)
+
+ def stddev (self):
+ return self.variance () ** .5
+
+ def zscore (self, population):
+ """Calculate the standard score.
+ Population is the Stats for population.
+ Self is Stats for sample.
+ Returns larger absolute value if sample is highly unlikely to be random.
+ Anything outside of -3..+3 is very unlikely to be random.
+ See: http://en.wikipedia.org/wiki/Standard_score"""
+
+ return (self.mean () - population.mean ()) / population.stddev ()
+
+
+
+
class DiffSinks:
@staticmethod
def print_stat (f):
passed = 0
failed = 0
+ # XXX port to Stats, but that would really slow us down here
for key, lines in DiffHelpers.separate_test_cases (f):
if DiffHelpers.test_passed (lines):
passed += 1
@@ -172,21 +220,34 @@ class DiffSinks:
@staticmethod
def print_ngrams (f, ns=(1,2,3)):
gens = tuple (Ngram.generator (n) for n in ns)
+ allstats = Stats ()
+ allgrams = {}
for key, lines in DiffHelpers.separate_test_cases (f):
test = Test (lines)
- unicodes = test.unicodes
- del test
+ allstats.add (test)
for gen in gens:
- print "Printing %d-grams:" % gen.n
- for ngram in gen (unicodes):
- print ngram
+ for ngram in gen (test.unicodes):
+ if ngram not in allgrams:
+ allgrams[ngram] = Stats ()
+ allgrams[ngram].add (test)
+
+ importantgrams = {}
+ for ngram, stats in allgrams.iteritems ():
+ if stats.failed.count >= 30: # for statistical reasons
+ importantgrams[ngram] = stats
+ allgrams = importantgrams
+ del importantgrams
+
+ for ngram, stats in allgrams.iteritems ():
+ print "zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram))
class Test:
def __init__ (self, lines):
+ self.freq = 1
self.passed = True
self.identifier = None
self.text = None
commit 178e6dce01ad28c8708bad62ce0fb79c46e836dc
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed May 9 08:57:29 2012 +0200
Add N-gram generator
diff --git a/test/shaping/hb_test_tools.py b/test/shaping/hb_test_tools.py
index d3c0939..a38f067 100644
--- a/test/shaping/hb_test_tools.py
+++ b/test/shaping/hb_test_tools.py
@@ -169,6 +169,53 @@ class DiffSinks:
total = passed + failed
print "%d out of %d tests passed. %d failed (%g%%)" % (passed, total, failed, 100. * failed / total)
+ @staticmethod
+ def print_ngrams (f, ns=(1,2,3)):
+ gens = tuple (Ngram.generator (n) for n in ns)
+ for key, lines in DiffHelpers.separate_test_cases (f):
+ test = Test (lines)
+ unicodes = test.unicodes
+ del test
+
+ for gen in gens:
+ print "Printing %d-grams:" % gen.n
+ for ngram in gen (unicodes):
+ print ngram
+
+
+
+class Test:
+
+ def __init__ (self, lines):
+ self.passed = True
+ self.identifier = None
+ self.text = None
+ self.unicodes = None
+ self.glyphs = None
+ for l in lines:
+ symbol = l[0]
+ if symbol != ' ':
+ self.passed = False
+ i = 1
+ if ':' in l:
+ i = l.index (':')
+ if not self.identifier:
+ self.identifier = l[1:i]
+ i = i + 2 # Skip colon and space
+ j = -1
+ if l[j] == '\n':
+ j -= 1
+ brackets = l[i] + l[j]
+ l = l[i+1:-2]
+ if brackets == '()':
+ self.text = l
+ elif brackets == '<>':
+ self.unicodes = Unicode.parse (l)
+ elif brackets == '[]':
+ # XXX we don't handle failed tests here
+ self.glyphs = l
+
+
class DiffHelpers:
@staticmethod
@@ -205,6 +252,23 @@ class FilterHelpers:
return printer
+class Ngram:
+
+ @staticmethod
+ def generator (n):
+
+ def gen (f):
+ l = []
+ for x in f:
+ l.append (x)
+ if len (l) == n:
+ yield tuple (l)
+ l[:1] = []
+
+ gen.n = n
+ return gen
+
+
class UtilMains:
@staticmethod
@@ -276,10 +340,14 @@ class Unicode:
return '<' + u','.join ("U+%04X" % ord (u) for u in unicode (s, 'utf-8')).encode ('utf-8') + '>'
@staticmethod
- def encode (s):
+ def parse (s):
s = re.sub (r"[<+>,\\uU\n ]", " ", s)
s = re.sub (r"0[xX]", " ", s)
- return u''.join (unichr (int (x, 16)) for x in s.split (' ') if len (x)).encode ('utf-8')
+ return [int (x, 16) for x in s.split (' ') if len (x)]
+
+ @staticmethod
+ def encode (s):
+ return u''.join (unichr (x) for x in Unicode.parse (s)).encode ('utf-8')
shorthands = {
"ZERO WIDTH NON-JOINER": "ZWNJ",
commit 98669ceb77657d60435f2cb2e3fc18272c0a2c6a
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed May 9 08:16:15 2012 +0200
Use groupby()
diff --git a/test/shaping/hb_test_tools.py b/test/shaping/hb_test_tools.py
index 70a9ce1..d3c0939 100644
--- a/test/shaping/hb_test_tools.py
+++ b/test/shaping/hb_test_tools.py
@@ -150,7 +150,8 @@ class DiffFilters:
@staticmethod
def filter_failures (f):
- for lines in DiffHelpers.separate_test_cases (f):
+ for key, lines in DiffHelpers.separate_test_cases (f):
+ lines = list (lines)
if not DiffHelpers.test_passed (lines):
for l in lines: yield l
@@ -160,7 +161,7 @@ class DiffSinks:
def print_stat (f):
passed = 0
failed = 0
- for lines in DiffHelpers.separate_test_cases (f):
+ for key, lines in DiffHelpers.separate_test_cases (f):
if DiffHelpers.test_passed (lines):
passed += 1
else:
@@ -176,22 +177,11 @@ class DiffHelpers:
have a colon character, groups them by identifier,
yielding lists of all lines with the same identifier.'''
- acc = []
- iden = None
- for l in f:
- if ':' not in l:
- if acc: yield acc
- acc = []
- iden = None
- yield [l]
- continue
- l_iden = l[1:l.index (':')]
- if acc and iden != l_iden:
- yield acc
- acc = []
- iden = l_iden
- acc.append (l)
- if acc: yield acc
+ def identifier (l):
+ if ':' in l[1:]:
+ return l[1:l.index (':')]
+ return l
+ return groupby (f, key=identifier)
@staticmethod
def test_passed (lines):
commit c438a14b62433db488b5c90854a4a3934adf3305
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed May 9 07:45:17 2012 +0200
Add hb-diff-stat
diff --git a/test/shaping/Makefile.am b/test/shaping/Makefile.am
index f216c5d..81c9991 100644
--- a/test/shaping/Makefile.am
+++ b/test/shaping/Makefile.am
@@ -13,6 +13,7 @@ EXTRA_DIST += \
hb-diff \
hb-diff-colorize \
hb-diff-filter-failures \
+ hb-diff-stat \
hb-manifest-read \
hb-manifest-update \
hb-unicode-decode \
diff --git a/test/shaping/hb-diff-stat b/test/shaping/hb-diff-stat
new file mode 100755
index 0000000..81626e1
--- /dev/null
+++ b/test/shaping/hb-diff-stat
@@ -0,0 +1,5 @@
+#!/usr/bin/python
+
+from hb_test_tools import *
+
+UtilMains.process_multiple_files (DiffSinks.print_stat)
diff --git a/test/shaping/hb_test_tools.py b/test/shaping/hb_test_tools.py
index 17181ac..70a9ce1 100644
--- a/test/shaping/hb_test_tools.py
+++ b/test/shaping/hb_test_tools.py
@@ -151,9 +151,23 @@ class DiffFilters:
@staticmethod
def filter_failures (f):
for lines in DiffHelpers.separate_test_cases (f):
- if any (l[0] != ' ' for l in lines):
+ if not DiffHelpers.test_passed (lines):
for l in lines: yield l
+class DiffSinks:
+
+ @staticmethod
+ def print_stat (f):
+ passed = 0
+ failed = 0
+ for lines in DiffHelpers.separate_test_cases (f):
+ if DiffHelpers.test_passed (lines):
+ passed += 1
+ else:
+ failed += 1
+ total = passed + failed
+ print "%d out of %d tests passed. %d failed (%g%%)" % (passed, total, failed, 100. * failed / total)
+
class DiffHelpers:
@staticmethod
@@ -175,10 +189,14 @@ class DiffHelpers:
if acc and iden != l_iden:
yield acc
acc = []
- iden = l_iden
+ iden = l_iden
acc.append (l)
if acc: yield acc
+ @staticmethod
+ def test_passed (lines):
+ return all (l[0] == ' ' for l in lines)
+
class FilterHelpers:
commit 1058d031e2046eb80331b0950eaff75c2bf608dc
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed May 9 07:30:07 2012 +0200
Make hb-diff-filter-failtures retain all test info for failed tests
diff --git a/test/shaping/hb_test_tools.py b/test/shaping/hb_test_tools.py
index 03a7710..17181ac 100644
--- a/test/shaping/hb_test_tools.py
+++ b/test/shaping/hb_test_tools.py
@@ -149,16 +149,35 @@ class ZipDiffer:
class DiffFilters:
@staticmethod
- def filter_failures (f, symbols=diff_symbols):
- for l in f:
- if l[0] in symbols:
- # TODO retain all lines of the failure
- yield l
+ def filter_failures (f):
+ for lines in DiffHelpers.separate_test_cases (f):
+ if any (l[0] != ' ' for l in lines):
+ for l in lines: yield l
+class DiffHelpers:
-class ShapeFilters:
+ @staticmethod
+ def separate_test_cases (f):
+ '''Reads lines from f, and if the lines have identifiers, ie.
+ have a colon character, groups them by identifier,
+ yielding lists of all lines with the same identifier.'''
- pass
+ acc = []
+ iden = None
+ for l in f:
+ if ':' not in l:
+ if acc: yield acc
+ acc = []
+ iden = None
+ yield [l]
+ continue
+ l_iden = l[1:l.index (':')]
+ if acc and iden != l_iden:
+ yield acc
+ acc = []
+ iden = l_iden
+ acc.append (l)
+ if acc: yield acc
class FilterHelpers:
More information about the HarfBuzz
mailing list