[Cogl] [PATCH 3/5] Add a simple internal regular expression parser
Neil Roberts
neil at linux.intel.com
Tue Mar 27 09:56:23 PDT 2012
This adds a function which can match a string against a simple form of
regular expressions. The regular expression is parsed into a tree
structure and then tried against every position in the string. The
parser does not support backtracking or capturing and only has a
limited set of operators. This will be used as a convenient way to
match patterns on the values from glGetString to determine driver
specific workarounds. GRegex is not used to avoid having to pull in a
large dependency when --disable-glib is used.
---
cogl/Makefile.am | 2 +
cogl/cogl-mini-re-private.h | 67 +++++
cogl/cogl-mini-re.c | 558 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 627 insertions(+), 0 deletions(-)
create mode 100644 cogl/cogl-mini-re-private.h
create mode 100644 cogl/cogl-mini-re.c
diff --git a/cogl/Makefile.am b/cogl/Makefile.am
index 69cb5df..b9968a7 100644
--- a/cogl/Makefile.am
+++ b/cogl/Makefile.am
@@ -183,6 +183,8 @@ cogl_sources_c = \
$(srcdir)/cogl-handle.h \
$(srcdir)/cogl-context-private.h \
$(srcdir)/cogl-context.c \
+ $(srcdir)/cogl-mini-re-private.h \
+ $(srcdir)/cogl-mini-re.c \
$(srcdir)/cogl-renderer-private.h \
$(srcdir)/cogl-renderer.h \
$(srcdir)/cogl-renderer.c \
diff --git a/cogl/cogl-mini-re-private.h b/cogl/cogl-mini-re-private.h
new file mode 100644
index 0000000..22ac6bb
--- /dev/null
+++ b/cogl/cogl-mini-re-private.h
@@ -0,0 +1,67 @@
+/*
+ * Cogl
+ *
+ * An object oriented GL/GLES Abstraction/Utility Layer
+ *
+ * Copyright (C) 2012 Intel Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ */
+
+#ifndef __COGL_MINI_RE_PRIVATE_H
+#define __COGL_MINI_RE_PRIVATE_H
+
+#include <glib.h>
+
+/*
+ * cogl_mini_re_match:
+ * @re: A 'regular expression' to match against the string
+ * @str: The string to match
+ *
+ * This function provides a very basic regular expression-like string
+ * matcher. The special symbols supported are:
+ *
+ * ^: Matches only at the beginning of the string
+ * $: Matches only at the end of the string
+ * .: Matches any character
+ * \d: Matches any of the characters 0-9
+ * \b: Matches the empty string, but only at the start or end of a word
+ * \s: Matches any whitespace character
+ * \w: Matches any alphanumeric character including _
+ * \\: Matches a literal backslash
+ * +: One or more repeats of the previous expression. Note that this
+ * operator is greedy and does not support backtracking. Ie, "a.+b"
+ * will not match "abb" because the + operator will consume all of
+ * the b's and it will not backtrack to try matches where it
+ * consumes less b's.
+ * *: Zero or more repeats of the previous expression. This is also
+ * greedy and does not support backtracking.
+ * ?: Matches zero or one repeats of the previous expression. This is
+ * also greedy and does not support backtracking.
+ * (...): Groups a regular expression
+ * |: Alternation operator. Note that this does not support
+ * backtracking.
+ *
+ * Both @re and @str are expected to be valid UTF-8 strings.
+ *
+ * Return value: %TRUE if anywhere in @str matches @re, %FALSE
+ * otherwise.
+ */
+gboolean
+_cogl_mini_re_match (const char *re,
+ const char *str);
+
+#endif /* __COGL_MINI_RE_PRIVATE_H */
diff --git a/cogl/cogl-mini-re.c b/cogl/cogl-mini-re.c
new file mode 100644
index 0000000..ef74f95
--- /dev/null
+++ b/cogl/cogl-mini-re.c
@@ -0,0 +1,558 @@
+/*
+ * Cogl
+ *
+ * An object oriented GL/GLES Abstraction/Utility Layer
+ *
+ * Copyright (C) 2012 Intel Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "cogl-mini-re-private.h"
+
+typedef enum
+{
+ COGL_MINI_RE_BEGIN,
+ COGL_MINI_RE_END,
+ COGL_MINI_RE_ANY,
+ COGL_MINI_RE_DIGIT,
+ COGL_MINI_RE_WORD_BOUNDARY,
+ COGL_MINI_RE_SPACE,
+ COGL_MINI_RE_ALNUM,
+ COGL_MINI_RE_CHARACTERS,
+ COGL_MINI_RE_REPEAT,
+ COGL_MINI_RE_GROUP,
+ COGL_MINI_RE_ALTERNATION
+} CoglMiniReNodeType;
+
+typedef struct _CoglMiniReNode CoglMiniReNode;
+
+struct _CoglMiniReNode
+{
+ CoglMiniReNodeType type;
+
+ union
+ {
+ struct
+ {
+ CoglMiniReNode *child;
+ int min;
+ int max;
+ } repeat;
+
+ struct
+ {
+ CoglMiniReNode *left;
+ CoglMiniReNode *right;
+ } alternation;
+
+ GQueue children;
+ GString *characters;
+ } d;
+};
+
+static CoglMiniReNode *
+_cogl_mini_re_compile_internal (const char *re,
+ const char **tail_ret);
+
+static void
+_cogl_mini_re_free (CoglMiniReNode *node)
+{
+ switch (node->type)
+ {
+ case COGL_MINI_RE_BEGIN:
+ case COGL_MINI_RE_END:
+ case COGL_MINI_RE_ANY:
+ case COGL_MINI_RE_DIGIT:
+ case COGL_MINI_RE_WORD_BOUNDARY:
+ case COGL_MINI_RE_SPACE:
+ case COGL_MINI_RE_ALNUM:
+ break;
+
+ case COGL_MINI_RE_CHARACTERS:
+ g_string_free (node->d.characters, TRUE);
+ break;
+
+ case COGL_MINI_RE_REPEAT:
+ _cogl_mini_re_free (node->d.repeat.child);
+ break;
+
+ case COGL_MINI_RE_GROUP:
+ g_queue_foreach (&node->d.children,
+ (GFunc) _cogl_mini_re_free,
+ NULL);
+ g_queue_clear (&node->d.children);
+ break;
+
+ case COGL_MINI_RE_ALTERNATION:
+ _cogl_mini_re_free (node->d.alternation.left);
+ _cogl_mini_re_free (node->d.alternation.right);
+ break;
+ }
+
+ g_slice_free (CoglMiniReNode, node);
+}
+
+static CoglMiniReNode *
+_cogl_mini_re_new_node (CoglMiniReNodeType type)
+{
+ CoglMiniReNode *node = g_slice_new (CoglMiniReNode);
+
+ node->type = type;
+
+ return node;
+}
+
+static CoglMiniReNode *
+_cogl_mini_re_new_group_node (void)
+{
+ CoglMiniReNode *node = _cogl_mini_re_new_node (COGL_MINI_RE_GROUP);
+
+ g_queue_init (&node->d.children);
+
+ return node;
+}
+
+static void
+_cogl_mini_re_add_node (CoglMiniReNode *root_node,
+ CoglMiniReNodeType type)
+{
+ CoglMiniReNode *node = _cogl_mini_re_new_node (type);
+
+ g_queue_push_tail (&root_node->d.children, node);
+}
+
+static void
+_cogl_mini_re_add_character (CoglMiniReNode *root_node,
+ gunichar character)
+{
+ CoglMiniReNode *node;
+
+ if (root_node->d.children.length < 1 ||
+ (node = (CoglMiniReNode *) g_queue_peek_tail (&root_node->d.children))->
+ type != COGL_MINI_RE_CHARACTERS)
+ {
+ node = _cogl_mini_re_new_node (COGL_MINI_RE_CHARACTERS);
+ node->d.characters = g_string_new (NULL);
+
+ g_queue_push_tail (&root_node->d.children, node);
+ }
+
+ g_string_append_unichar (node->d.characters, character);
+}
+
+static gboolean
+_cogl_mini_re_add_repeat (CoglMiniReNode *root_node,
+ int min,
+ int max)
+{
+ CoglMiniReNode *node;
+ CoglMiniReNode *last_node;
+ CoglMiniReNode *child_node;
+
+ if (root_node->d.children.length < 1)
+ {
+ g_warning ("Repeat specified with no previous regular expression");
+ return FALSE;
+ }
+
+ last_node = g_queue_peek_tail (&root_node->d.children);
+
+ if (last_node->type == COGL_MINI_RE_CHARACTERS &&
+ last_node->d.characters->len > 1)
+ {
+ const char *new_end =
+ g_utf8_prev_char (last_node->d.characters->str +
+ last_node->d.characters->len);
+ gunichar ch = g_utf8_get_char (new_end);
+
+ g_string_set_size (last_node->d.characters,
+ new_end - last_node->d.characters->str);
+
+ child_node = _cogl_mini_re_new_node (COGL_MINI_RE_CHARACTERS);
+ child_node->d.characters = g_string_new (NULL);
+ g_string_append_unichar (child_node->d.characters, ch);
+ }
+ else
+ child_node = g_queue_pop_tail (&root_node->d.children);
+
+ node = _cogl_mini_re_new_node (COGL_MINI_RE_REPEAT);
+ node->d.repeat.child = child_node;
+ node->d.repeat.min = min;
+ node->d.repeat.max = max;
+
+ g_queue_push_tail (&root_node->d.children, node);
+
+ return TRUE;
+}
+
+static CoglMiniReNode *
+_cogl_mini_re_compile_internal (const char *re,
+ const char **tail_ret)
+{
+ CoglMiniReNode *root_node;
+ const char *p;
+
+ root_node = _cogl_mini_re_new_group_node ();
+
+ for (p = re; *p && *p != ')'; p = g_utf8_next_char (p))
+ {
+ gunichar ch = g_utf8_get_char (p);
+
+ switch (ch)
+ {
+ case '^':
+ _cogl_mini_re_add_node (root_node, COGL_MINI_RE_BEGIN);
+ break;
+
+ case '$':
+ _cogl_mini_re_add_node (root_node, COGL_MINI_RE_BEGIN);
+ break;
+
+ case '.':
+ _cogl_mini_re_add_node (root_node, COGL_MINI_RE_ANY);
+ break;
+
+ case '\\':
+ {
+ p++;
+ ch = g_utf8_get_char (p);
+
+ switch (ch)
+ {
+ case 'd':
+ _cogl_mini_re_add_node (root_node, COGL_MINI_RE_DIGIT);
+ break;
+
+ case 'b':
+ _cogl_mini_re_add_node (root_node, COGL_MINI_RE_WORD_BOUNDARY);
+ break;
+
+ case 's':
+ _cogl_mini_re_add_node (root_node, COGL_MINI_RE_SPACE);
+ break;
+
+ case 'w':
+ _cogl_mini_re_add_node (root_node, COGL_MINI_RE_ALNUM);
+ break;
+
+ case '\0':
+ g_warning ("Unexpected end of regular expression");
+ goto error;
+
+ default:
+ _cogl_mini_re_add_character (root_node, ch);
+ break;
+ }
+ }
+ break;
+
+ case '+':
+ if (!_cogl_mini_re_add_repeat (root_node, 1, G_MAXINT))
+ goto error;
+ break;
+
+ case '*':
+ if (!_cogl_mini_re_add_repeat (root_node, 0, G_MAXINT))
+ goto error;
+ break;
+
+ case '?':
+ if (!_cogl_mini_re_add_repeat (root_node, 0, 1))
+ goto error;
+ break;
+
+ case '(':
+ {
+ CoglMiniReNode *sub_re;
+ const char *tail;
+
+ sub_re = _cogl_mini_re_compile_internal (p + 1, &tail);
+
+ if (sub_re == NULL)
+ goto error;
+
+ if (*tail != ')')
+ {
+ _cogl_mini_re_free (sub_re);
+ g_warning ("Mismatched parenthesis in regular expression");
+ goto error;
+ }
+
+ g_queue_push_tail (&root_node->d.children, sub_re);
+
+ p = tail;
+ }
+ break;
+
+ case '|':
+ {
+ CoglMiniReNode *left_re;
+ CoglMiniReNode *right_re;
+ const char *tail;
+
+ right_re = _cogl_mini_re_compile_internal (p + 1, &tail);
+ if (right_re == NULL)
+ goto error;
+
+ /* Replace the root node with the alternation node. This
+ is a bit hacky but it should work out because we've
+ just consumed the rest of the regular expression so
+ parsing of this root node should end here */
+ left_re = g_slice_dup (CoglMiniReNode, root_node);
+
+ root_node->type = COGL_MINI_RE_ALTERNATION;
+ root_node->d.alternation.left = left_re;
+ root_node->d.alternation.right = right_re;
+
+ p = tail - 1;
+ }
+ break;
+
+ default:
+ _cogl_mini_re_add_character (root_node, ch);
+ break;
+ }
+ }
+
+ *tail_ret = p;
+
+ return root_node;
+
+ error:
+ _cogl_mini_re_free (root_node);
+ return NULL;
+}
+
+static CoglMiniReNode *
+_cogl_mini_re_compile (const char *re)
+{
+ CoglMiniReNode *root_node;
+ const char *tail;
+
+ root_node = _cogl_mini_re_compile_internal (re, &tail);
+
+ if (root_node == NULL)
+ return NULL;
+
+ if (*tail != '\0')
+ {
+ g_warning ("Unbalanced parenthesis in regular expression");
+ _cogl_mini_re_free (root_node);
+ return NULL;
+ }
+
+ return root_node;
+}
+
+static gboolean
+_cogl_mini_re_match_internal (CoglMiniReNode *node,
+ const char *str,
+ gboolean is_beginning,
+ const char **tail)
+{
+ switch (node->type)
+ {
+ case COGL_MINI_RE_BEGIN:
+ if (is_beginning)
+ {
+ *tail = str;
+ return TRUE;
+ }
+
+ return FALSE;
+
+ case COGL_MINI_RE_END:
+ if (*str == '\0')
+ {
+ *tail = str;
+ return TRUE;
+ }
+
+ return FALSE;
+
+ case COGL_MINI_RE_ANY:
+ if (*str == '\0')
+ return FALSE;
+
+ *tail = g_utf8_next_char (str);
+ return TRUE;
+
+ case COGL_MINI_RE_DIGIT:
+ if (g_ascii_isdigit (*str))
+ {
+ *tail = str + 1;
+ return TRUE;
+ }
+
+ return FALSE;
+
+ case COGL_MINI_RE_WORD_BOUNDARY:
+ {
+ gboolean after_word, before_word;
+
+ after_word = !is_beginning && g_ascii_isalnum (str[-1]);
+ before_word = *str != '\0' && g_ascii_isalnum (*str);
+
+ *tail = str;
+ return after_word != before_word;
+ }
+
+ case COGL_MINI_RE_SPACE:
+ if (g_ascii_isspace (*str))
+ {
+ *tail = str + 1;
+ return TRUE;
+ }
+
+ return FALSE;
+
+ case COGL_MINI_RE_ALNUM:
+ if (g_ascii_isalnum (*str) || *str == '_')
+ {
+ *tail = str + 1;
+ return TRUE;
+ }
+
+ return FALSE;
+
+ case COGL_MINI_RE_CHARACTERS:
+ if (g_str_has_prefix (str, node->d.characters->str))
+ {
+ *tail = str + node->d.characters->len;
+ return TRUE;
+ }
+
+ return FALSE;
+
+ case COGL_MINI_RE_REPEAT:
+ {
+ const char *repeat_tail;
+ int repeat_count = 0;
+
+ while (repeat_count < node->d.repeat.max &&
+ _cogl_mini_re_match_internal (node->d.repeat.child,
+ str,
+ is_beginning,
+ &repeat_tail))
+ {
+ repeat_count++;
+ if (str != repeat_tail)
+ is_beginning = FALSE;
+ str = repeat_tail;
+ }
+
+ *tail = str;
+ return repeat_count >= node->d.repeat.min;
+ }
+
+ case COGL_MINI_RE_GROUP:
+ {
+ GList *l;
+
+ for (l = node->d.children.head; l; l = l->next)
+ {
+ const char *node_tail;
+
+ if (!_cogl_mini_re_match_internal (l->data,
+ str,
+ is_beginning,
+ &node_tail))
+ return FALSE;
+
+ if (str != node_tail)
+ is_beginning = FALSE;
+ str = node_tail;
+ }
+
+ *tail = str;
+ return TRUE;
+ }
+
+ case COGL_MINI_RE_ALTERNATION:
+ return (_cogl_mini_re_match_internal (node->d.alternation.left,
+ str,
+ is_beginning,
+ tail) ||
+ _cogl_mini_re_match_internal (node->d.alternation.right,
+ str,
+ is_beginning,
+ tail));
+ }
+
+ g_assert_not_reached ();
+}
+
+gboolean
+_cogl_mini_re_match (const char *re,
+ const char *str)
+{
+ CoglMiniReNode *root_node;
+ gboolean ret = FALSE;
+ const char *p;
+ const char *tail;
+
+ root_node = _cogl_mini_re_compile (re);
+
+ if (root_node == NULL)
+ return FALSE;
+
+ for (p = str; *p; p = g_utf8_next_char (p))
+ if (_cogl_mini_re_match_internal (root_node, p, p == str, &tail))
+ {
+ ret = TRUE;
+ break;
+ }
+
+ _cogl_mini_re_free (root_node);
+
+ return ret;
+}
+
+#ifndef CLUTTER_COMPILATION
+
+/* Testing code.. */
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+ int i;
+
+ if (argc < 2)
+ {
+ fprintf (stderr, "usage: %s <re> <test string>...\n", argv[0]);
+ return 1;
+ }
+
+ for (i = 2; i < argc; i++)
+ {
+ gboolean ret = _cogl_mini_re_match (argv[1], argv[i]);
+
+ printf ("\"%s\": %s\n",
+ argv[i],
+ ret ? "yes" : "no");
+ }
+
+ return 0;
+}
+
+#endif
--
1.7.3.16.g9464b
More information about the Cogl
mailing list