[PATCH libxkbcommon v4 2/2] Add xkb_keysym_from_casename() helper for case-insensitive search

Ran Benita ran234 at gmail.com
Mon Oct 8 01:00:38 PDT 2012


On Sun, Oct 07, 2012 at 03:59:09PM +0200, David Herrmann wrote:
> This adds another helper that allows finding a keysym based on a
> case-insensitive search. This should really be supported as many keysyms
> have really weird capitalization-rules.
> 
> However, as this may produce conflicts, users must be warned to only use
> this for fallback paths or error-recovery. This is also the reason why the
> internal XKB parsers still use the case-sensitive search.
> 
> This also adds some test-cases so the expected results are really
> produced.
> 
> Signed-off-by: David Herrmann <dh.herrmann at googlemail.com>
> ---
> Ran, could you check whether the new binary-size is acceptable? I get about 350K
> for the new search-arrays.
> 
> Regards
> David

Hi David,
The patch looks good to me, thanks for going through all of these
revisions! The first patch is an improvement regardless of the feature,
and the second is nice to have IMO.

A couple of comments below.

> 
>  makekeys.py           |  6 ++++
>  src/keysym.c          | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  test/keysym.c         | 29 ++++++++++++++++
>  xkbcommon/xkbcommon.h | 15 ++++++++
>  4 files changed, 144 insertions(+)
> 
> diff --git a/makekeys.py b/makekeys.py
> index 94885c0..ab75cce 100644
> --- a/makekeys.py
> +++ b/makekeys.py
> @@ -5,6 +5,7 @@ import re, sys, itertools
>  pattern = re.compile(r'^#define\s+XKB_KEY_(?P<name>\w+)\s+(?P<value>0x[0-9a-fA-F]+)\s')
>  matches = [pattern.match(line) for line in open(sys.argv[1])]
>  entries = [(m.group("name"), int(m.group("value"), 16)) for m in matches if m]
> +lentries = [(m.group("name").lower(), m.group("name")) for m in matches if m]
>  
>  print('''struct name_keysym {
>      const char *name;
> @@ -16,6 +17,11 @@ for (name, _) in sorted(entries, key=lambda e: e[0]):
>      print('    {{ "{name}", XKB_KEY_{name} }},'.format(name=name))
>  print('};\n')
>  
> +print('static const struct name_keysym case_to_keysym[] = {');
> +for (lname, name) in sorted(lentries, key=lambda e: e[0]):
> +    print('    {{ "{lname}", XKB_KEY_{name} }},'.format(lname=lname, name=name))
> +print('};\n')

I think it would be more space efficient if we don't use a new 'lentries'
list like above, but reuse the (original-cased) 'entries' list and
change the sort key from 'lambda e: e[0]' to 'lambda e: e[0].lower()'.
This will result in a table with the same strings being generated but
ordered by lowercase. If I'm reading your code correctly, it would work
nonetheless, but any half-decent compiler/linker would only store the
strings once, e.g. instead of this:
    $ strings libxkbcommon.so | grep -i lambda
    Greek_LAMBDA
    Greek_lambda
    greek_lambda
You get this:
    $ strings libxkbcommon.so | grep -i lambda
    Greek_LAMBDA
    Greek_lambda

Also maybe we want to stick a:
    locale.setlocle(locale.LC_ALL, "C")
or "en_us.UTF-8" there? I don't know if the python2/3 functions are
locale sensitive, but why not to be safe I guess.

> +
>  # *.sort() is stable so we always get the first keysym for duplicate
>  print('static const struct name_keysym keysym_to_name[] = {');
>  for (name, _) in (next(g[1]) for g in itertools.groupby(sorted(entries, key=lambda e: e[1]), key=lambda e: e[1])):
> diff --git a/src/keysym.c b/src/keysym.c
> index 85d4386..d320055 100644
> --- a/src/keysym.c
> +++ b/src/keysym.c
> @@ -65,6 +65,12 @@ static int compare_by_name(const void *a, const void *b)
>      return strcmp(key->name, entry->name);
>  }
>  
> +static int compare_by_casename(const void *a, const void *b)
> +{
> +    const struct name_keysym *key = a, *entry = b;
> +    return strcasecmp(key->name, entry->name);
> +}
> +
>  XKB_EXPORT int
>  xkb_keysym_get_name(xkb_keysym_t ks, char *buffer, size_t size)
>  {
> @@ -144,6 +150,94 @@ xkb_keysym_from_name(const char *s)
>      return XKB_KEY_NoSymbol;
>  }
>  
> +/*
> + * Find the lower-case keysym of any keysym entry.
> + * If we use bsearch() to find a keysym based on a case-insensitive search, we
> + * may get _any_ of all possible duplicates back. This function looks at all
> + * entries before and after @entry which have the same name (case insensitive)
> + * and then returns the _best_ match of all.
> + * The _best_ match is the lower-case keysym which we find with the help of
> + * xkb_keysym_is_lower().
> + */
> +static const struct name_keysym *find_lcase(const struct name_keysym *entry)
> +{
> +    const struct name_keysym *iter, *last;
> +    size_t len = sizeof(case_to_keysym) / sizeof(*case_to_keysym);
> +
> +    if (xkb_keysym_is_lower(entry->keysym))
> +        return entry;

xkb_keysym_is_lower is pretty lousy right now, it doesn't handle unicode
keysysm at all. But I will fix this later (I think we're going to have
to copy XConvertCase and UCSConvertCase from libX11:src/KeyBind.c - kind
of ugly).

Thanks,
Ran

> +
> +    for (iter = entry - 1; iter >= case_to_keysym; --iter) {
> +        if (strcasecmp(iter->name, entry->name))
> +            break;
> +        if (xkb_keysym_is_lower(iter->keysym))
> +            return iter;
> +    }
> +
> +    last = case_to_keysym + len;
> +    for (iter = entry + 1; iter < last; --iter) {
> +        if (strcasecmp(iter->name, entry->name))
> +            break;
> +        if (xkb_keysym_is_lower(iter->keysym))
> +            return iter;
> +    }
> +
> +    return entry;
> +}
> +
> +XKB_EXPORT xkb_keysym_t
> +xkb_keysym_from_casename(const char *s)
> +{
> +    const struct name_keysym search = { .name = s, .keysym = 0 };
> +    const struct name_keysym *entry;
> +    char *tmp;
> +    xkb_keysym_t val;
> +
> +    entry = bsearch(&search, case_to_keysym,
> +                    sizeof(case_to_keysym) / sizeof(*case_to_keysym),
> +                    sizeof(*case_to_keysym),
> +                    compare_by_casename);
> +    if (entry)
> +        return find_lcase(entry)->keysym;
> +
> +    if (*s == 'U' || *s == 'u') {
> +        val = strtoul(&s[1], &tmp, 16);
> +        if (tmp && *tmp != '\0')
> +            return XKB_KEY_NoSymbol;
> +
> +        if (val < 0x20 || (val > 0x7e && val < 0xa0))
> +            return XKB_KEY_NoSymbol;
> +        if (val < 0x100)
> +            return val;
> +        if (val > 0x10ffff)
> +            return XKB_KEY_NoSymbol;
> +        return val | 0x01000000;
> +    }
> +    else if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
> +        val = strtoul(&s[2], &tmp, 16);
> +        if (tmp && *tmp != '\0')
> +            return XKB_KEY_NoSymbol;
> +
> +        return val;
> +    }
> +
> +    /* Stupid inconsistency between the headers and XKeysymDB: the former has
> +     * no separating underscore, while some XF86* syms in the latter did.
> +     * As a last ditch effort, try without. */
> +    if (strncasecmp(s, "XF86_", 5) == 0) {
> +        xkb_keysym_t ret;
> +        tmp = strdup(s);
> +        if (!tmp)
> +            return XKB_KEY_NoSymbol;
> +        memmove(&tmp[4], &tmp[5], strlen(s) - 5 + 1);
> +        ret = xkb_keysym_from_casename(tmp);
> +        free(tmp);
> +        return ret;
> +    }
> +
> +    return XKB_KEY_NoSymbol;
> +}
> +
>  enum keysym_case {
>      NONE,
>      LOWERCASE,
> diff --git a/test/keysym.c b/test/keysym.c
> index 1bf704b..2eb0f78 100644
> --- a/test/keysym.c
> +++ b/test/keysym.c
> @@ -37,6 +37,19 @@ test_string(const char *string, xkb_keysym_t expected)
>  }
>  
>  static int
> +test_casestring(const char *string, xkb_keysym_t expected)
> +{
> +    xkb_keysym_t keysym;
> +
> +    keysym = xkb_keysym_from_casename(string);
> +
> +    fprintf(stderr, "Expected casestring %s -> %x\n", string, expected);
> +    fprintf(stderr, "Received casestring %s -> %x\n\n", string, keysym);
> +
> +    return keysym == expected;
> +}
> +
> +static int
>  test_keysym(xkb_keysym_t keysym, const char *expected)
>  {
>      char s[16];
> @@ -80,6 +93,22 @@ main(void)
>      assert(test_keysym(0x1008FE20, "XF86Ungrab"));
>      assert(test_keysym(0x01001234, "U1234"));
>  
> +    assert(test_casestring("Undo", 0xFF65));
> +    assert(test_casestring("UNDO", 0xFF65));
> +    assert(test_casestring("A", 0x61));
> +    assert(test_casestring("a", 0x61));
> +    assert(test_casestring("ThisKeyShouldNotExist", XKB_KEY_NoSymbol));
> +    assert(test_casestring("XF86_Switch_vT_5", 0x1008FE05));
> +    assert(test_casestring("xF86_SwitcH_VT_5", 0x1008FE05));
> +    assert(test_casestring("xF86SwiTch_VT_5", 0x1008FE05));
> +    assert(test_casestring("xF86Switch_vt_5", 0x1008FE05));
> +    assert(test_casestring("VoidSymbol", 0xFFFFFF));
> +    assert(test_casestring("vOIDsymBol", 0xFFFFFF));
> +    assert(test_casestring("U4567", 0x1004567));
> +    assert(test_casestring("u4567", 0x1004567));
> +    assert(test_casestring("0x10203040", 0x10203040));
> +    assert(test_casestring("0X10203040", 0x10203040));
> +
>      assert(test_utf8(XKB_KEY_y, "y"));
>      assert(test_utf8(XKB_KEY_u, "u"));
>      assert(test_utf8(XKB_KEY_m, "m"));
> diff --git a/xkbcommon/xkbcommon.h b/xkbcommon/xkbcommon.h
> index a6b0fa8..81c2a0b 100644
> --- a/xkbcommon/xkbcommon.h
> +++ b/xkbcommon/xkbcommon.h
> @@ -230,6 +230,21 @@ xkb_keysym_t
>  xkb_keysym_from_name(const char *name);
>  
>  /**
> + * @brief Get a keysym from its name (case-insensitive).
> + *
> + * @param name The name of a keysym.
> + *
> + * @remark This is the same as xkb_keysym_get_name() but does a case-insensitive
> + * lookup. If there are multiple keysyms with the same name, then the lower-case
> + * keysym is returned.
> + *
> + * @returns The keysym, if name is valid.  Otherwise, XKB_KEY_NoSymbol is
> + * returned.
> + */
> +xkb_keysym_t
> +xkb_keysym_from_casename(const char *name);
> +
> +/**
>   * @brief Get the Unicode/UTF-8 representation of a keysym.
>   *
>   * @param[in]  keysym The keysym.
> -- 
> 1.7.12.2
> 


More information about the wayland-devel mailing list