[Intel-gfx] [PATCH i-g-t v3] lib/igt_core.c: Expand --run-subtest functionality.
Daniel Vetter
daniel at ffwll.ch
Mon Feb 15 16:55:10 UTC 2016
On Thu, Feb 04, 2016 at 12:06:57PM +0000, Derek Morton wrote:
> Added extended wildcard support when specifying --run-subtest.
>
> Wildcard format is as specified in rfc3977 and the uwildmat() implementation
> is taken from libinn.
> See https://tools.ietf.org/html/rfc3977#section-4 for a description of
> allowed wildcard expressions.
>
> v2: Use comma as list separator (Ville Syrjala)
> support both ^ and ! as not operators (Dave Gordon)
>
> v3: Updated to use uwildmat() (Dave Gordon)
>
> Signed-off-by: Derek Morton <derek.j.morton at intel.com>
> ---
> COPYING | 21 +++
> lib/Makefile.sources | 2 +
> lib/igt_core.c | 17 +-
> lib/uwildmat/uwildmat.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++
> lib/uwildmat/uwildmat.h | 24 +++
Not really a fan of copying other sources into ours. Don't we have
something ready-made that's generally available, or can we at least pull
it in as a build-dep?
Thanks, Daniel
> 5 files changed, 536 insertions(+), 2 deletions(-)
> create mode 100644 lib/uwildmat/uwildmat.c
> create mode 100644 lib/uwildmat/uwildmat.h
>
> diff --git a/COPYING b/COPYING
> index b8f6753..16375f2 100644
> --- a/COPYING
> +++ b/COPYING
> @@ -106,3 +106,24 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> IN THE SOFTWARE.
> +
> +Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
> + 2013, 2014 by Internet Systems Consortium, Inc. ("ISC")
> +Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
> + 2002, 2003 by The Internet Software Consortium and Rich Salz
> +
> +This code is derived from software contributed to the Internet Software
> +Consortium by Rich Salz.
> +
> +Permission to use, copy, modify, and distribute this software for any
> +purpose with or without fee is hereby granted, provided that the above
> +copyright notice and this permission notice appear in all copies.
> +
> +THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
> +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY
> +SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> +
> diff --git a/lib/Makefile.sources b/lib/Makefile.sources
> index 4999868..e33861e 100644
> --- a/lib/Makefile.sources
> +++ b/lib/Makefile.sources
> @@ -60,6 +60,8 @@ libintel_tools_la_SOURCES = \
> igt_core.h \
> igt_draw.c \
> igt_draw.h \
> + uwildmat/uwildmat.h \
> + uwildmat/uwildmat.c \
> $(NULL)
>
> .PHONY: version.h.tmp
> diff --git a/lib/igt_core.c b/lib/igt_core.c
> index 6b69bb7..8e0bd2e 100644
> --- a/lib/igt_core.c
> +++ b/lib/igt_core.c
> @@ -56,7 +56,7 @@
> #include <ctype.h>
> #include <limits.h>
> #include <locale.h>
> -#include <fnmatch.h>
> +#include <uwildmat/uwildmat.h>
>
> #include "drmtest.h"
> #include "intel_chipset.h"
> @@ -209,6 +209,19 @@
> * intel gpu to be present). Then individual subtests can be run with
> * "--run-subtest". Usage help for tests with subtests can be obtained with the
> * "--help" command line option.
> + *
> + * A wildcard expression can be given to --run-subtest to specify a subset of
> + * subtests to run. See https://tools.ietf.org/html/rfc3977#section-4 for a
> + * description of allowed wildcard expressions.
> + * Some examples of allowed wildcard expressions are:
> + *
> + * - '*basic*' match any subtest containing basic
> + * - 'basic-???' match any subtest named basic- with 3 characters after -
> + * - 'basic-[0-9]' match any subtest named basic- with a single number after -
> + * - 'basic-[^0-9]' match any subtest named basic- with a single non numerical character after -
> + * - 'basic*,advanced*' match any subtest starting basic or advanced
> + * - '*,!basic*' match any subtest not starting basic
> + * - 'basic*,!basic-render*' match any subtest starting basic but not starting basic-render
> */
>
> static unsigned int exit_handler_count;
> @@ -814,7 +827,7 @@ bool __igt_run_subtest(const char *subtest_name)
> }
>
> if (run_single_subtest) {
> - if (fnmatch(run_single_subtest, subtest_name, 0) != 0)
> + if (uwildmat(subtest_name, run_single_subtest) == 0)
> return false;
> else
> run_single_subtest_found = true;
> diff --git a/lib/uwildmat/uwildmat.c b/lib/uwildmat/uwildmat.c
> new file mode 100644
> index 0000000..2d34742
> --- /dev/null
> +++ b/lib/uwildmat/uwildmat.c
> @@ -0,0 +1,474 @@
> +/* uwildmat.c is reused from libinn - https://launchpad.net/ubuntu/+source/inn2/2.5.4-1
> +
> +This provides wild card matching originally used in InterNetNews and is
> +described in https://tools.ietf.org/html/rfc3977#section-4
> +
> +INN licence:
> +INN as a whole and all code contained in it not otherwise marked with
> +different licenses and/or copyrights is covered by the following copyright
> +and license:
> +
> + Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
> + 2013, 2014 by Internet Systems Consortium, Inc. ("ISC")
> + Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
> + 2002, 2003 by The Internet Software Consortium and Rich Salz
> +
> + This code is derived from software contributed to the Internet Software
> + Consortium by Rich Salz.
> +
> + Permission to use, copy, modify, and distribute this software for any
> + purpose with or without fee is hereby granted, provided that the above
> + copyright notice and this permission notice appear in all copies.
> +
> + THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
> + REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY
> + SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> +
> +*/
> +
> +/* $Id: uwildmat.c 8918 2010-01-22 23:28:28Z iulius $
> +**
> +** wildmat pattern matching with Unicode UTF-8 extensions.
> +**
> +** Do shell-style pattern matching for ?, \, [], and * characters. Might not
> +** be robust in face of malformed patterns; e.g., "foo[a-" could cause a
> +** segmentation violation. It is 8-bit clean. (Robustness hopefully fixed
> +** July 2000; all malformed patterns should now just fail to match anything.)
> +**
> +** Original by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
> +** Rich $alz is now <rsalz at osf.org>.
> +**
> +** April, 1991: Replaced mutually-recursive calls with in-line code for the
> +** star character.
> +**
> +** Special thanks to Lars Mathiesen <thorinn at diku.dk> for the ABORT code.
> +** This can greatly speed up failing wildcard patterns. For example:
> +**
> +** pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
> +** text 1: -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
> +** text 2: -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
> +**
> +** Text 1 matches with 51 calls, while text 2 fails with 54 calls. Without
> +** the ABORT code, it takes 22310 calls to fail. Ugh. The following
> +** explanation is from Lars:
> +**
> +** The precondition that must be fulfilled is that DoMatch will consume at
> +** least one character in text. This is true if *p is neither '*' nor '\0'.)
> +** The last return has ABORT instead of false to avoid quadratic behaviour in
> +** cases like pattern "*a*b*c*d" with text "abcxxxxx". With false, each
> +** star-loop has to run to the end of the text; with ABORT only the last one
> +** does.
> +**
> +** Once the control of one instance of DoMatch enters the star-loop, that
> +** instance will return either true or ABORT, and any calling instance will
> +** therefore return immediately after (without calling recursively again).
> +** In effect, only one star-loop is ever active. It would be possible to
> +** modify the code to maintain this context explicitly, eliminating all
> +** recursive calls at the cost of some complication and loss of clarity (and
> +** the ABORT stuff seems to be unclear enough by itself). I think it would
> +** be unwise to try to get this into a released version unless you have a
> +** good test data base to try it out on.
> +**
> +** June, 1991: Robert Elz <kre at munnari.oz.au> added minus and close bracket
> +** handling for character sets.
> +**
> +** July, 2000: Largely rewritten by Russ Allbery <rra at stanford.edu> to add
> +** support for ',', '!', and optionally '@' to the core wildmat routine.
> +** Broke the character class matching into a separate function for clarity
> +** since it's infrequently used in practice, and added some simple lookahead
> +** to significantly decrease the recursive calls in the '*' matching code.
> +** Added support for UTF-8 as the default character set for any high-bit
> +** characters.
> +**
> +** For more information on UTF-8, see RFC 3629.
> +**
> +** Please note that this file is intentionally written so that conditionally
> +** executed expressions are on separate lines from the condition to
> +** facilitate analysis of the coverage of the test suite using purecov.
> +** Please preserve this. As of March 11, 2001, purecov reports that the
> +** accompanying test suite achieves 100% coverage of this file.
> +*/
> +
> +#include <string.h>
> +#include "uwildmat.h"
> +
> +#define ABORT -1
> +
> +/* Whether or not an octet looks like the start of a UTF-8 character. */
> +#define ISUTF8(c) (((c) & 0xc0) == 0xc0)
> +
> +
> +/*
> +** Determine the length of a non-ASCII character in octets (for advancing
> +** pointers when skipping over characters). Takes a pointer to the start of
> +** the character and to the last octet of the string. If end is NULL, expect
> +** the string pointed to by start to be nul-terminated. If the character is
> +** malformed UTF-8, return 1 to treat it like an eight-bit local character.
> +*/
> +static int
> +utf8_length(const unsigned char *start, const unsigned char *end)
> +{
> + unsigned char mask = 0x80;
> + const unsigned char *p;
> + int length = 0;
> + int left;
> +
> + for (; mask > 0 && (*start & mask) == mask; mask >>= 1)
> + length++;
> + if (length < 2 || length > 6)
> + return 1;
> + if (end != NULL && (end - start + 1) < length)
> + return 1;
> + left = length - 1;
> + for (p = start + 1; left > 0 && (*p & 0xc0) == 0x80; p++)
> + left--;
> + return (left == 0) ? length : 1;
> +}
> +
> +
> +/*
> +** Check whether a string contains only valid UTF-8 characters.
> +*/
> +bool
> +is_valid_utf8(const char *text)
> +{
> + unsigned char mask;
> + const unsigned char *p;
> + int length;
> + int left;
> +
> + for (p = (const unsigned char *)text; *p != '\0';) {
> + mask = 0x80;
> + length = 0;
> +
> + /* Find out the expected length of the character. */
> + for (; mask > 0 && (*p & mask) == mask; mask >>= 1)
> + length++;
> +
> + p++;
> +
> + /* Valid ASCII. */
> + if (length == 0)
> + continue;
> +
> + /* Invalid length. */
> + if (length < 2 || length > 6)
> + return false;
> +
> + /* Check that each byte looks like 10xxxxxx, except for the first. */
> + left = length - 1;
> + for (; left > 0 && (*p & 0xc0) == 0x80; p++)
> + left--;
> +
> + if (left > 0)
> + return false;
> + }
> +
> + return true;
> +}
> +
> +
> +/*
> +** Convert a UTF-8 character to UCS-4. Takes a pointer to the start of the
> +** character and to the last octet of the string, and to a uint32_t into
> +** which to put the decoded UCS-4 value. If end is NULL, expect the string
> +** pointed to by start to be nul-terminated. Returns the number of octets in
> +** the UTF-8 encoding. If the UTF-8 character is malformed, set result to
> +** the decimal value of the first octet; this is wrong, but it will generally
> +** cause the rest of the wildmat matching to do the right thing for non-UTF-8
> +** input.
> +*/
> +static int
> +utf8_decode(const unsigned char *start, const unsigned char *end,
> + uint32_t *result)
> +{
> + uint32_t value = 0;
> + int length, i;
> + const unsigned char *p = start;
> + unsigned char mask;
> +
> + length = utf8_length(start, end);
> + if (length < 2) {
> + *result = *start;
> + return 1;
> + }
> + mask = (1 << (7 - length)) - 1;
> + value = *p & mask;
> + p++;
> + for (i = length - 1; i > 0; i--) {
> + value = (value << 6) | (*p & 0x3f);
> + p++;
> + }
> + *result = value;
> + return length;
> +}
> +
> +
> +/*
> +** Match a character class against text, a UCS-4 character. start is a
> +** pointer to the first character of the character class, end a pointer to
> +** the last. Returns whether the class matches that character.
> +*/
> +static bool
> +match_class(uint32_t text, const unsigned char *start,
> + const unsigned char *end)
> +{
> + bool reversed, allowrange;
> + const unsigned char *p = start;
> + uint32_t first = 0;
> + uint32_t last;
> +
> + /* Check for an inverted character class (starting with ^). If the
> + character matches the character class, we return !reversed; that way,
> + we return true if it's a regular character class and false if it's a
> + reversed one. If the character doesn't match, we return reversed. */
> + reversed = (*p == '^');
> + if (reversed)
> + p++;
> +
> + /* Walk through the character class until we reach the end or find a
> + match, handling character ranges as we go. Only permit a range to
> + start when allowrange is true; this allows - to be treated like a
> + normal character as the first character of the class and catches
> + malformed ranges like a-e-n. We treat the character at the beginning
> + of a range as both a regular member of the class and the beginning of
> + the range; this is harmless (although it means that malformed ranges
> + like m-a will match m and nothing else). */
> + allowrange = false;
> + while (p <= end) {
> + if (allowrange && *p == '-' && p < end) {
> + p++;
> + p += utf8_decode(p, end, &last);
> + if (text >= first && text <= last)
> + return !reversed;
> + allowrange = false;
> + } else {
> + p += utf8_decode(p, end, &first);
> + if (text == first)
> + return !reversed;
> + allowrange = true;
> + }
> + }
> + return reversed;
> +}
> +
> +
> +/*
> +** Match the text against the pattern between start and end. This is a
> +** single pattern; a leading ! or @ must already be taken care of, and
> +** commas must be dealt with outside of this routine.
> +*/
> +static int
> +match_pattern(const unsigned char *text, const unsigned char *start,
> + const unsigned char *end)
> +{
> + const unsigned char *q, *endclass;
> + const unsigned char *p = start;
> + bool ismeta;
> + int matched, width;
> + uint32_t c;
> +
> + for (; p <= end; p++) {
> + if (!*text && *p != '*')
> + return ABORT;
> +
> + switch (*p) {
> + case '\\':
> + if (!*++p)
> + return ABORT;
> + /* Fall through. */
> +
> + default:
> + if (*text++ != *p)
> + return false;
> + break;
> +
> + case '?':
> + text += ISUTF8(*text) ? utf8_length(text, NULL) : 1;
> + break;
> +
> + case '*':
> + /* Consecutive stars are equivalent to one. Advance pattern to
> + the character after the star. */
> + for (++p; *p == '*'; p++)
> + ;
> +
> + /* A trailing star will match anything. */
> + if (p > end)
> + return true;
> +
> + /* Basic algorithm: Recurse at each point where the * could
> + possibly match. If the match succeeds or aborts, return
> + immediately; otherwise, try the next position.
> +
> + Optimization: If the character after the * in the pattern
> + isn't a metacharacter (the common case), then the * has to
> + consume characters at least up to the next occurrence of that
> + character in the text. Scan forward for those points rather
> + than recursing at every possible point to save the extra
> + function call overhead. */
> + ismeta = (*p == '[' || *p == '?' || *p == '\\');
> + while (*text) {
> + width = ISUTF8(*text) ? utf8_length(text, NULL) : 1;
> + if (ismeta) {
> + matched = match_pattern(text, p, end);
> + text += width;
> + } else {
> + while (*text && *text != *p) {
> + text += width;
> + width = ISUTF8(*text) ? utf8_length(text, NULL) : 1;
> + }
> + if (!*text)
> + return ABORT;
> + matched = match_pattern(++text, p + 1, end);
> + }
> + if (matched != false)
> + return matched;
> + }
> + return ABORT;
> +
> + case '[':
> + /* Find the end of the character class, making sure not to pick
> + up a close bracket at the beginning of the class. */
> + p++;
> + q = p + (*p == '^') + 1;
> + if (q > end)
> + return ABORT;
> + endclass = memchr(q, ']', (size_t) (end - q + 1));
> + if (!endclass)
> + return ABORT;
> +
> + /* Do the heavy lifting in another function for clarity, since
> + character classes are an uncommon case. */
> + text += utf8_decode(text, NULL, &c);
> + if (!match_class(c, p, endclass - 1))
> + return false;
> + p = endclass;
> + break;
> + }
> + }
> +
> + return (*text == '\0');
> +}
> +
> +
> +/*
> +** Takes text and a wildmat expression; a wildmat expression is a
> +** comma-separated list of wildmat patterns, optionally preceded by ! to
> +** invert the sense of the expression. Returns UWILDMAT_MATCH if that
> +** expression matches the text, UWILDMAT_FAIL otherwise. If allowpoison is
> +** set, allow @ to introduce a poison expression (the same as !, but if it
> +** triggers the failed match the routine returns UWILDMAT_POISON instead).
> +*/
> +static enum uwildmat
> +match_expression(const unsigned char *text, const unsigned char *start,
> + bool allowpoison)
> +{
> + const unsigned char *end, *split;
> + const unsigned char *p = start;
> + bool reverse, escaped;
> + bool match = false;
> + bool poison = false;
> + bool poisoned = false;
> +
> + /* Handle the empty expression separately, since otherwise end will be
> + set to an invalid pointer. */
> + if (!*p)
> + return !*text ? UWILDMAT_MATCH : UWILDMAT_FAIL;
> + end = start + strlen((const char *) start) - 1;
> +
> + /* Main match loop. Find each comma that separates patterns, and attempt
> + to match the text with each pattern in order. The last matching
> + pattern determines whether the whole expression matches. */
> + for (; p <= end + 1; p = split + 1) {
> + if (allowpoison)
> + poison = (*p == '@');
> + reverse = (*p == '!') || poison;
> + if (reverse)
> + p++;
> +
> + /* Find the first unescaped comma, if any. If there is none, split
> + will be one greater than end and point at the nul at the end of
> + the string. */
> + for (escaped = false, split = p; split <= end; split++) {
> + if (*split == '[') {
> + split++;
> + if (*split == ']')
> + split++;
> + while (split <= end && *split != ']')
> + split++;
> + }
> + if (*split == ',' && !escaped)
> + break;
> + escaped = (*split == '\\') ? !escaped : false;
> + }
> +
> + /* Optimization: If match == !reverse and poison == poisoned, this
> + pattern can't change the result, so don't do any work. */
> + if (match == !reverse && poison == poisoned)
> + continue;
> + if (match_pattern(text, p, split - 1) == true) {
> + poisoned = poison;
> + match = !reverse;
> + }
> + }
> + if (poisoned)
> + return UWILDMAT_POISON;
> + return match ? UWILDMAT_MATCH : UWILDMAT_FAIL;
> +}
> +
> +
> +/*
> +** User-level routine used for wildmats where @ should be treated as a
> +** regular character.
> +*/
> +bool
> +uwildmat(const char *text, const char *pat)
> +{
> + const unsigned char *utext = (const unsigned char *) text;
> + const unsigned char *upat = (const unsigned char *) pat;
> +
> + if (upat[0] == '*' && upat[1] == '\0')
> + return true;
> + else
> + return (match_expression(utext, upat, false) == UWILDMAT_MATCH);
> +}
> +
> +
> +/*
> +** User-level routine used for wildmats that support poison matches.
> +*/
> +enum uwildmat
> +uwildmat_poison(const char *text, const char *pat)
> +{
> + const unsigned char *utext = (const unsigned char *) text;
> + const unsigned char *upat = (const unsigned char *) pat;
> +
> + if (upat[0] == '*' && upat[1] == '\0')
> + return UWILDMAT_MATCH;
> + else
> + return match_expression(utext, upat, true);
> +}
> +
> +
> +/*
> +** User-level routine for simple expressions (neither , nor ! are special).
> +*/
> +bool
> +uwildmat_simple(const char *text, const char *pat)
> +{
> + const unsigned char *utext = (const unsigned char *) text;
> + const unsigned char *upat = (const unsigned char *) pat;
> + size_t length;
> +
> + if (upat[0] == '*' && upat[1] == '\0')
> + return true;
> + else {
> + length = strlen(pat);
> + return (match_pattern(utext, upat, upat + length - 1) == true);
> + }
> +}
> diff --git a/lib/uwildmat/uwildmat.h b/lib/uwildmat/uwildmat.h
> new file mode 100644
> index 0000000..2e47189
> --- /dev/null
> +++ b/lib/uwildmat/uwildmat.h
> @@ -0,0 +1,24 @@
> +
> +#ifndef UWILDMAT_H
> +#define UWILDMAT_H 1
> +
> +#include <stdio.h>
> +#include <sys/types.h>
> +#include <stdbool.h>
> +
> +/*
> +** WILDMAT MATCHING
> +*/
> +enum uwildmat {
> + UWILDMAT_FAIL = 0,
> + UWILDMAT_MATCH = 1,
> + UWILDMAT_POISON
> +};
> +
> +extern bool is_valid_utf8(const char *start);
> +extern bool uwildmat(const char *text, const char *pat);
> +extern bool uwildmat_simple(const char *text, const char *pat);
> +extern enum uwildmat uwildmat_poison(const char *text, const char *pat);
> +
> +
> +#endif /* UWILDMAT_H */
> --
> 1.9.1
>
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
More information about the Intel-gfx
mailing list