[Cogl] [PATCH 3/5] Add a simple internal regular expression parser

Neil Roberts neil at linux.intel.com
Tue Mar 27 09:56:23 PDT 2012


This adds a function which can match a string against a simple form of
regular expressions. The regular expression is parsed into a tree
structure and then tried against every position in the string. The
parser does not support backtracking or capturing and only has a
limited set of operators. This will be used as a convenient way to
match patterns on the values from glGetString to determine driver
specific workarounds. GRegex is not used to avoid having to pull in a
large dependency when --disable-glib is used.
---
 cogl/Makefile.am            |    2 +
 cogl/cogl-mini-re-private.h |   67 +++++
 cogl/cogl-mini-re.c         |  558 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 627 insertions(+), 0 deletions(-)
 create mode 100644 cogl/cogl-mini-re-private.h
 create mode 100644 cogl/cogl-mini-re.c

diff --git a/cogl/Makefile.am b/cogl/Makefile.am
index 69cb5df..b9968a7 100644
--- a/cogl/Makefile.am
+++ b/cogl/Makefile.am
@@ -183,6 +183,8 @@ cogl_sources_c = \
 	$(srcdir)/cogl-handle.h 			\
 	$(srcdir)/cogl-context-private.h		\
 	$(srcdir)/cogl-context.c			\
+	$(srcdir)/cogl-mini-re-private.h		\
+	$(srcdir)/cogl-mini-re.c			\
 	$(srcdir)/cogl-renderer-private.h		\
 	$(srcdir)/cogl-renderer.h			\
 	$(srcdir)/cogl-renderer.c			\
diff --git a/cogl/cogl-mini-re-private.h b/cogl/cogl-mini-re-private.h
new file mode 100644
index 0000000..22ac6bb
--- /dev/null
+++ b/cogl/cogl-mini-re-private.h
@@ -0,0 +1,67 @@
+/*
+ * Cogl
+ *
+ * An object oriented GL/GLES Abstraction/Utility Layer
+ *
+ * Copyright (C) 2012 Intel Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ */
+
+#ifndef __COGL_MINI_RE_PRIVATE_H
+#define __COGL_MINI_RE_PRIVATE_H
+
+#include <glib.h>
+
+/*
+ * cogl_mini_re_match:
+ * @re: A 'regular expression' to match against the string
+ * @str: The string to match
+ *
+ * This function provides a very basic regular expression-like string
+ * matcher. The special symbols supported are:
+ *
+ * ^: Matches only at the beginning of the string
+ * $: Matches only at the end of the string
+ * .: Matches any character
+ * \d: Matches any of the characters 0-9
+ * \b: Matches the empty string, but only at the start or end of a word
+ * \s: Matches any whitespace character
+ * \w: Matches any alphanumeric character including _
+ * \\: Matches a literal backslash
+ * +: One or more repeats of the previous expression. Note that this
+ *    operator is greedy and does not support backtracking. Ie, "a.+b"
+ *    will not match "abb" because the + operator will consume all of
+ *    the b's and it will not backtrack to try matches where it
+ *    consumes less b's.
+ * *: Zero or more repeats of the previous expression. This is also
+ *    greedy and does not support backtracking.
+ * ?: Matches zero or one repeats of the previous expression. This is
+ *    also greedy and does not support backtracking.
+ * (...): Groups a regular expression
+ * |: Alternation operator. Note that this does not support
+ *    backtracking.
+ *
+ * Both @re and @str are expected to be valid UTF-8 strings.
+ *
+ * Return value: %TRUE if anywhere in @str matches @re, %FALSE
+ *  otherwise.
+ */
+gboolean
+_cogl_mini_re_match (const char *re,
+                     const char *str);
+
+#endif /* __COGL_MINI_RE_PRIVATE_H */
diff --git a/cogl/cogl-mini-re.c b/cogl/cogl-mini-re.c
new file mode 100644
index 0000000..ef74f95
--- /dev/null
+++ b/cogl/cogl-mini-re.c
@@ -0,0 +1,558 @@
+/*
+ * Cogl
+ *
+ * An object oriented GL/GLES Abstraction/Utility Layer
+ *
+ * Copyright (C) 2012 Intel Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "cogl-mini-re-private.h"
+
+typedef enum
+{
+  COGL_MINI_RE_BEGIN,
+  COGL_MINI_RE_END,
+  COGL_MINI_RE_ANY,
+  COGL_MINI_RE_DIGIT,
+  COGL_MINI_RE_WORD_BOUNDARY,
+  COGL_MINI_RE_SPACE,
+  COGL_MINI_RE_ALNUM,
+  COGL_MINI_RE_CHARACTERS,
+  COGL_MINI_RE_REPEAT,
+  COGL_MINI_RE_GROUP,
+  COGL_MINI_RE_ALTERNATION
+} CoglMiniReNodeType;
+
+typedef struct _CoglMiniReNode CoglMiniReNode;
+
+struct _CoglMiniReNode
+{
+  CoglMiniReNodeType type;
+
+  union
+  {
+    struct
+    {
+      CoglMiniReNode *child;
+      int min;
+      int max;
+    } repeat;
+
+    struct
+    {
+      CoglMiniReNode *left;
+      CoglMiniReNode *right;
+    } alternation;
+
+    GQueue children;
+    GString *characters;
+  } d;
+};
+
+static CoglMiniReNode *
+_cogl_mini_re_compile_internal (const char *re,
+                                const char **tail_ret);
+
+static void
+_cogl_mini_re_free (CoglMiniReNode *node)
+{
+  switch (node->type)
+    {
+    case COGL_MINI_RE_BEGIN:
+    case COGL_MINI_RE_END:
+    case COGL_MINI_RE_ANY:
+    case COGL_MINI_RE_DIGIT:
+    case COGL_MINI_RE_WORD_BOUNDARY:
+    case COGL_MINI_RE_SPACE:
+    case COGL_MINI_RE_ALNUM:
+      break;
+
+    case COGL_MINI_RE_CHARACTERS:
+      g_string_free (node->d.characters, TRUE);
+      break;
+
+    case COGL_MINI_RE_REPEAT:
+      _cogl_mini_re_free (node->d.repeat.child);
+      break;
+
+    case COGL_MINI_RE_GROUP:
+      g_queue_foreach (&node->d.children,
+                       (GFunc) _cogl_mini_re_free,
+                       NULL);
+      g_queue_clear (&node->d.children);
+      break;
+
+    case COGL_MINI_RE_ALTERNATION:
+      _cogl_mini_re_free (node->d.alternation.left);
+      _cogl_mini_re_free (node->d.alternation.right);
+      break;
+    }
+
+  g_slice_free (CoglMiniReNode, node);
+}
+
+static CoglMiniReNode *
+_cogl_mini_re_new_node (CoglMiniReNodeType type)
+{
+  CoglMiniReNode *node = g_slice_new (CoglMiniReNode);
+
+  node->type = type;
+
+  return node;
+}
+
+static CoglMiniReNode *
+_cogl_mini_re_new_group_node (void)
+{
+  CoglMiniReNode *node = _cogl_mini_re_new_node (COGL_MINI_RE_GROUP);
+
+  g_queue_init (&node->d.children);
+
+  return node;
+}
+
+static void
+_cogl_mini_re_add_node (CoglMiniReNode *root_node,
+                        CoglMiniReNodeType type)
+{
+  CoglMiniReNode *node = _cogl_mini_re_new_node (type);
+
+  g_queue_push_tail (&root_node->d.children, node);
+}
+
+static void
+_cogl_mini_re_add_character (CoglMiniReNode *root_node,
+                             gunichar character)
+{
+  CoglMiniReNode *node;
+
+  if (root_node->d.children.length < 1 ||
+      (node = (CoglMiniReNode *) g_queue_peek_tail (&root_node->d.children))->
+      type != COGL_MINI_RE_CHARACTERS)
+    {
+      node = _cogl_mini_re_new_node (COGL_MINI_RE_CHARACTERS);
+      node->d.characters = g_string_new (NULL);
+
+      g_queue_push_tail (&root_node->d.children, node);
+    }
+
+  g_string_append_unichar (node->d.characters, character);
+}
+
+static gboolean
+_cogl_mini_re_add_repeat (CoglMiniReNode *root_node,
+                          int min,
+                          int max)
+{
+  CoglMiniReNode *node;
+  CoglMiniReNode *last_node;
+  CoglMiniReNode *child_node;
+
+  if (root_node->d.children.length < 1)
+    {
+      g_warning ("Repeat specified with no previous regular expression");
+      return FALSE;
+    }
+
+  last_node = g_queue_peek_tail (&root_node->d.children);
+
+  if (last_node->type == COGL_MINI_RE_CHARACTERS &&
+      last_node->d.characters->len > 1)
+    {
+      const char *new_end =
+        g_utf8_prev_char (last_node->d.characters->str +
+                          last_node->d.characters->len);
+      gunichar ch = g_utf8_get_char (new_end);
+
+      g_string_set_size (last_node->d.characters,
+                         new_end - last_node->d.characters->str);
+
+      child_node = _cogl_mini_re_new_node (COGL_MINI_RE_CHARACTERS);
+      child_node->d.characters = g_string_new (NULL);
+      g_string_append_unichar (child_node->d.characters, ch);
+    }
+  else
+    child_node = g_queue_pop_tail (&root_node->d.children);
+
+  node = _cogl_mini_re_new_node (COGL_MINI_RE_REPEAT);
+  node->d.repeat.child = child_node;
+  node->d.repeat.min = min;
+  node->d.repeat.max = max;
+
+  g_queue_push_tail (&root_node->d.children, node);
+
+  return TRUE;
+}
+
+static CoglMiniReNode *
+_cogl_mini_re_compile_internal (const char *re,
+                                const char **tail_ret)
+{
+  CoglMiniReNode *root_node;
+  const char *p;
+
+  root_node = _cogl_mini_re_new_group_node ();
+
+  for (p = re; *p && *p != ')'; p = g_utf8_next_char (p))
+    {
+      gunichar ch = g_utf8_get_char (p);
+
+      switch (ch)
+        {
+        case '^':
+          _cogl_mini_re_add_node (root_node, COGL_MINI_RE_BEGIN);
+          break;
+
+        case '$':
+          _cogl_mini_re_add_node (root_node, COGL_MINI_RE_BEGIN);
+          break;
+
+        case '.':
+          _cogl_mini_re_add_node (root_node, COGL_MINI_RE_ANY);
+          break;
+
+        case '\\':
+          {
+            p++;
+            ch = g_utf8_get_char (p);
+
+            switch (ch)
+              {
+              case 'd':
+                _cogl_mini_re_add_node (root_node, COGL_MINI_RE_DIGIT);
+                break;
+
+              case 'b':
+                _cogl_mini_re_add_node (root_node, COGL_MINI_RE_WORD_BOUNDARY);
+                break;
+
+              case 's':
+                _cogl_mini_re_add_node (root_node, COGL_MINI_RE_SPACE);
+                break;
+
+              case 'w':
+                _cogl_mini_re_add_node (root_node, COGL_MINI_RE_ALNUM);
+                break;
+
+              case '\0':
+                g_warning ("Unexpected end of regular expression");
+                goto error;
+
+              default:
+                _cogl_mini_re_add_character (root_node, ch);
+                break;
+              }
+          }
+          break;
+
+        case '+':
+          if (!_cogl_mini_re_add_repeat (root_node, 1, G_MAXINT))
+            goto error;
+          break;
+
+        case '*':
+          if (!_cogl_mini_re_add_repeat (root_node, 0, G_MAXINT))
+            goto error;
+          break;
+
+        case '?':
+          if (!_cogl_mini_re_add_repeat (root_node, 0, 1))
+            goto error;
+          break;
+
+        case '(':
+          {
+            CoglMiniReNode *sub_re;
+            const char *tail;
+
+            sub_re = _cogl_mini_re_compile_internal (p + 1, &tail);
+
+            if (sub_re == NULL)
+              goto error;
+
+            if (*tail != ')')
+              {
+                _cogl_mini_re_free (sub_re);
+                g_warning ("Mismatched parenthesis in regular expression");
+                goto error;
+              }
+
+            g_queue_push_tail (&root_node->d.children, sub_re);
+
+            p = tail;
+          }
+          break;
+
+        case '|':
+          {
+            CoglMiniReNode *left_re;
+            CoglMiniReNode *right_re;
+            const char *tail;
+
+            right_re = _cogl_mini_re_compile_internal (p + 1, &tail);
+            if (right_re == NULL)
+              goto error;
+
+            /* Replace the root node with the alternation node. This
+               is a bit hacky but it should work out because we've
+               just consumed the rest of the regular expression so
+               parsing of this root node should end here */
+            left_re = g_slice_dup (CoglMiniReNode, root_node);
+
+            root_node->type = COGL_MINI_RE_ALTERNATION;
+            root_node->d.alternation.left = left_re;
+            root_node->d.alternation.right = right_re;
+
+            p = tail - 1;
+          }
+          break;
+
+        default:
+          _cogl_mini_re_add_character (root_node, ch);
+          break;
+        }
+    }
+
+  *tail_ret = p;
+
+  return root_node;
+
+ error:
+  _cogl_mini_re_free (root_node);
+  return NULL;
+}
+
+static CoglMiniReNode *
+_cogl_mini_re_compile (const char *re)
+{
+  CoglMiniReNode *root_node;
+  const char *tail;
+
+  root_node = _cogl_mini_re_compile_internal (re, &tail);
+
+  if (root_node == NULL)
+    return NULL;
+
+  if (*tail != '\0')
+    {
+      g_warning ("Unbalanced parenthesis in regular expression");
+      _cogl_mini_re_free (root_node);
+      return NULL;
+    }
+
+  return root_node;
+}
+
+static gboolean
+_cogl_mini_re_match_internal (CoglMiniReNode *node,
+                              const char *str,
+                              gboolean is_beginning,
+                              const char **tail)
+{
+  switch (node->type)
+    {
+    case COGL_MINI_RE_BEGIN:
+      if (is_beginning)
+        {
+          *tail = str;
+          return TRUE;
+        }
+
+      return FALSE;
+
+    case COGL_MINI_RE_END:
+      if (*str == '\0')
+        {
+          *tail = str;
+          return TRUE;
+        }
+
+      return FALSE;
+
+    case COGL_MINI_RE_ANY:
+      if (*str == '\0')
+        return FALSE;
+
+      *tail = g_utf8_next_char (str);
+      return TRUE;
+
+    case COGL_MINI_RE_DIGIT:
+      if (g_ascii_isdigit (*str))
+        {
+          *tail = str + 1;
+          return TRUE;
+        }
+
+      return FALSE;
+
+    case COGL_MINI_RE_WORD_BOUNDARY:
+      {
+        gboolean after_word, before_word;
+
+        after_word = !is_beginning && g_ascii_isalnum (str[-1]);
+        before_word = *str != '\0' && g_ascii_isalnum (*str);
+
+        *tail = str;
+        return after_word != before_word;
+      }
+
+    case COGL_MINI_RE_SPACE:
+      if (g_ascii_isspace (*str))
+        {
+          *tail = str + 1;
+          return TRUE;
+        }
+
+      return FALSE;
+
+    case COGL_MINI_RE_ALNUM:
+      if (g_ascii_isalnum (*str) || *str == '_')
+        {
+          *tail = str + 1;
+          return TRUE;
+        }
+
+      return FALSE;
+
+    case COGL_MINI_RE_CHARACTERS:
+      if (g_str_has_prefix (str, node->d.characters->str))
+        {
+          *tail = str + node->d.characters->len;
+          return TRUE;
+        }
+
+      return FALSE;
+
+    case COGL_MINI_RE_REPEAT:
+      {
+        const char *repeat_tail;
+        int repeat_count = 0;
+
+        while (repeat_count < node->d.repeat.max &&
+               _cogl_mini_re_match_internal (node->d.repeat.child,
+                                             str,
+                                             is_beginning,
+                                             &repeat_tail))
+          {
+            repeat_count++;
+            if (str != repeat_tail)
+              is_beginning = FALSE;
+            str = repeat_tail;
+          }
+
+        *tail = str;
+        return repeat_count >= node->d.repeat.min;
+      }
+
+    case COGL_MINI_RE_GROUP:
+      {
+        GList *l;
+
+        for (l = node->d.children.head; l; l = l->next)
+          {
+            const char *node_tail;
+
+            if (!_cogl_mini_re_match_internal (l->data,
+                                               str,
+                                               is_beginning,
+                                               &node_tail))
+              return FALSE;
+
+            if (str != node_tail)
+              is_beginning = FALSE;
+            str = node_tail;
+          }
+
+        *tail = str;
+        return TRUE;
+      }
+
+    case COGL_MINI_RE_ALTERNATION:
+      return (_cogl_mini_re_match_internal (node->d.alternation.left,
+                                            str,
+                                            is_beginning,
+                                            tail) ||
+              _cogl_mini_re_match_internal (node->d.alternation.right,
+                                            str,
+                                            is_beginning,
+                                            tail));
+    }
+
+  g_assert_not_reached ();
+}
+
+gboolean
+_cogl_mini_re_match (const char *re,
+                     const char *str)
+{
+  CoglMiniReNode *root_node;
+  gboolean ret = FALSE;
+  const char *p;
+  const char *tail;
+
+  root_node = _cogl_mini_re_compile (re);
+
+  if (root_node == NULL)
+    return FALSE;
+
+  for (p = str; *p; p = g_utf8_next_char (p))
+    if (_cogl_mini_re_match_internal (root_node, p, p == str, &tail))
+      {
+        ret = TRUE;
+        break;
+      }
+
+  _cogl_mini_re_free (root_node);
+
+  return ret;
+}
+
+#ifndef CLUTTER_COMPILATION
+
+/* Testing code.. */
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+  int i;
+
+  if (argc < 2)
+    {
+      fprintf (stderr, "usage: %s <re> <test string>...\n", argv[0]);
+      return 1;
+    }
+
+  for (i = 2; i < argc; i++)
+    {
+      gboolean ret = _cogl_mini_re_match (argv[1], argv[i]);
+
+      printf ("\"%s\": %s\n",
+              argv[i],
+              ret ? "yes" : "no");
+    }
+
+  return 0;
+}
+
+#endif
-- 
1.7.3.16.g9464b



More information about the Cogl mailing list