[igt-dev] [PATCH i-g-t v4 1/1] runner: Implement --abort-on-monitored-error

Chris Wilson chris at chris-wilson.co.uk
Tue Nov 6 16:53:02 UTC 2018


Quoting Petri Latvala (2018-11-06 14:26:29)
> Deviating a bit from the piglit command line flag, igt_runner takes an
> optional comma-separated list as an argument to
> --abort-on-monitored-error for the list of conditions to abort
> on. Without a list all possible conditions will be checked.
> 
> Two conditions implemented:
>  - "taint" checks the kernel taint level for TAINT_PAGE, TAINT_DIE and
>  TAINT_OOPS
>  - "lockdep" checks the kernel lockdep status
> 
> Checking is done after every test binary execution, and if an abort
> condition is met, the reason is printed to stderr (unless log level is
> quiet) and the runner doesn't execute any further tests. Aborting
> between subtests (when running in --multiple-mode) is not done.
> 
> A TODO item for the future is having aborting appear in the test
> results.
> 
> v2:
>  - Remember to fclose
>  - Taints are unsigned long (Chris)
>  - Use getline instead of fgets (Chris)
> v3:
>  - Fix brainfart with lockdep
> v4:
>  - Rebase
>  - Refactor the abort condition checking to pass down strings
>  - Present the abort result in results.json as a pseudo test result
>  - Unit tests for the pseudo result
> 
> Signed-off-by: Petri Latvala <petri.latvala at intel.com>
> Cc: Arkadiusz Hiler <arkadiusz.hiler at intel.com>
> Cc: Tomi Sarvela <tomi.p.sarvela at intel.com>
> Cc: Martin Peres <martin.peres at linux.intel.com>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  runner/executor.c                             | 189 ++++++++++++++++--
>  runner/executor.h                             |   1 +
>  .../aborted-after-a-test/0/dmesg.txt          |   5 +
>  .../aborted-after-a-test/0/err.txt            |   2 +
>  .../aborted-after-a-test/0/journal.txt        |   2 +
>  .../aborted-after-a-test/0/out.txt            |   3 +
>  .../aborted-after-a-test/README.txt           |   1 +
>  .../aborted-after-a-test/aborted.txt          |   3 +
>  .../aborted-after-a-test/endtime.txt          |   1 +
>  .../aborted-after-a-test/joblist.txt          |   5 +
>  .../aborted-after-a-test/metadata.txt         |  12 ++
>  .../aborted-after-a-test/reference.json       |  90 +++++++++
>  .../aborted-after-a-test/starttime.txt        |   1 +
>  .../aborted-after-a-test/uname.txt            |   1 +
>  .../aborted-on-boot/README.txt                |   1 +
>  .../aborted-on-boot/aborted.txt               |   3 +
>  .../aborted-on-boot/endtime.txt               |   1 +
>  .../aborted-on-boot/joblist.txt               |   5 +
>  .../aborted-on-boot/metadata.txt              |  12 ++
>  .../aborted-on-boot/reference.json            |  59 ++++++
>  .../aborted-on-boot/starttime.txt             |   1 +
>  .../json_tests_data/aborted-on-boot/uname.txt |   1 +
>  .../dmesg-results/metadata.txt                |   2 +-
>  .../metadata.txt                              |   2 +-
>  .../json_tests_data/normal-run/metadata.txt   |   2 +-
>  .../piglit-style-dmesg/metadata.txt           |   2 +-
>  .../warnings-with-dmesg-warns/metadata.txt    |   2 +-
>  runner/json_tests_data/warnings/metadata.txt  |   2 +-
>  runner/resultgen.c                            |  48 ++++-
>  runner/runner_json_tests.c                    |   2 +
>  runner/runner_tests.c                         |  49 ++++-
>  runner/settings.c                             |  79 +++++++-
>  runner/settings.h                             |   8 +-
>  33 files changed, 556 insertions(+), 41 deletions(-)
>  create mode 100644 runner/json_tests_data/aborted-after-a-test/0/dmesg.txt
>  create mode 100644 runner/json_tests_data/aborted-after-a-test/0/err.txt
>  create mode 100644 runner/json_tests_data/aborted-after-a-test/0/journal.txt
>  create mode 100644 runner/json_tests_data/aborted-after-a-test/0/out.txt
>  create mode 100644 runner/json_tests_data/aborted-after-a-test/README.txt
>  create mode 100644 runner/json_tests_data/aborted-after-a-test/aborted.txt
>  create mode 100644 runner/json_tests_data/aborted-after-a-test/endtime.txt
>  create mode 100644 runner/json_tests_data/aborted-after-a-test/joblist.txt
>  create mode 100644 runner/json_tests_data/aborted-after-a-test/metadata.txt
>  create mode 100644 runner/json_tests_data/aborted-after-a-test/reference.json
>  create mode 100644 runner/json_tests_data/aborted-after-a-test/starttime.txt
>  create mode 100644 runner/json_tests_data/aborted-after-a-test/uname.txt
>  create mode 100644 runner/json_tests_data/aborted-on-boot/README.txt
>  create mode 100644 runner/json_tests_data/aborted-on-boot/aborted.txt
>  create mode 100644 runner/json_tests_data/aborted-on-boot/endtime.txt
>  create mode 100644 runner/json_tests_data/aborted-on-boot/joblist.txt
>  create mode 100644 runner/json_tests_data/aborted-on-boot/metadata.txt
>  create mode 100644 runner/json_tests_data/aborted-on-boot/reference.json
>  create mode 100644 runner/json_tests_data/aborted-on-boot/starttime.txt
>  create mode 100644 runner/json_tests_data/aborted-on-boot/uname.txt
> 
> diff --git a/runner/executor.c b/runner/executor.c
> index 007b72ce..ed444a9a 100644
> --- a/runner/executor.c
> +++ b/runner/executor.c
> @@ -108,6 +108,90 @@ static void ping_watchdogs(void)
>         }
>  }
>  
> +static char *handle_lockdep(void)
> +{
> +       FILE *f = fopen("/proc/lockdep_stats", "r");
> +       const char *debug_locks_line = " debug_locks:";
> +       char *buf = NULL;
> +       size_t bufsize = 0;
> +       int val;
> +       char *reason = NULL;
> +
> +       if (!f)
> +               return reason;
> +
> +       while (getline(&buf, &bufsize, f) > 0) {
> +               if (!strncmp(buf, debug_locks_line, strlen(debug_locks_line))) {
> +                       char *p = buf + strlen(debug_locks_line);
> +                       if (sscanf(p, "%d", &val) == 1 &&
> +                           val != 1)
> +                               asprintf(&reason, "Lockdep triggered\n");
> +
> +                       break;
> +               }
> +       }
> +
> +       fclose(f);
> +       free(buf);
> +       return reason;
> +}
> +
> +static char *handle_taint(void)
> +{
> +       FILE *f = fopen("/proc/sys/kernel/tainted", "r");
> +       int s;
> +       unsigned long taint;
> +       unsigned long bad_taints =
> +               0x20  | /* TAINT_PAGE */
> +               0x80  | /* TAINT_DIE */
> +               0x200; /* TAINT_OOPS */

Please be considerate to the reader.
> +
> +       if (!f)
> +               return NULL;
> +
> +       s = fscanf(f, "%lu", &taint);
> +       fclose(f);
> +
> +       if (s != 1)
> +               return NULL;
> +
> +       if (taint & bad_taints) {
> +               char *reason;
> +
> +               asprintf(&reason, "Kernel tainted (0x%lx)\n", taint);
> +
> +               return reason;
> +       }
> +
> +       return NULL;

const unsigned long bad_taints =
	0x20  | /* TAINT_PAGE */
	0x80  | /* TAINT_DIE */
	0x200; /* TAINT_OOPS */
unsigned long taints = 0;
char *reason = NULL;
FILE *f;

f = fopen();
if (f) {
	fscanf(f, "%lu", &taints);
	fclose(f);
}

if (taints & bad_taints)
	asprintf(&reason,
		 "Kernel tainted (%#lx -- %lx)\n",
		 taints, taints & bad_taints);

return reason;

> +}
> +
> +static struct {

static const struct {

> +       int condition;
> +       char *(*handler)(void);
> +} abort_handlers[] = {
> +       { ABORT_LOCKDEP, handle_lockdep },
> +       { ABORT_TAINT, handle_taint },
> +       { 0, 0 },
> +};
> +
> +static char *need_to_abort(struct settings* settings)

const struct settings *settings

> +{
> +       typeof(*abort_handlers) *it;
> +       char *ret = NULL;
> +
> +       for (it = abort_handlers; it->condition; it++) {
> +               if (settings->abort_mask & it->condition &&
> +                   (ret = it->handler()) != NULL) {
> +                       if (settings->log_level >= LOG_LEVEL_NORMAL)
> +                               fprintf(stderr, "%s", ret);
> +               }
> +               break;

break? looks incongruous.

for (it = abort_handlers; it->condition; it++) {
	char *abort;

	if (!(settings->abort_mask & it->condition))
		continue;

	abort = it->handler();
	if (!abort)
		continue;

	if (settings->log_level >= ...)
		fprintf(stderr, "%s\n", abort);

	return abort;
}

I'd recommend not including the '\n' in the abort string itself but
append in the callers -- it should make it easier to include in any
error message.
-Chris


More information about the igt-dev mailing list