[igt-dev] [PATCH i-g-t 1/2] runner: Add support for aborting on network failure

Petri Latvala petri.latvala at intel.com
Mon Dec 10 13:41:55 UTC 2018


If the network goes down while testing, CI tends to interpret that as
the device being down, cutting its power after a while. This causes an
incomplete to an innocent test, increasing noise in the results.

A new flag to --abort-on-monitored-error, "ping", uses liboping to
ping a host configured in .igtrc with one ping after each test
execution and aborts the run if there is no reply in a hardcoded
amount of time.

Signed-off-by: Petri Latvala <petri.latvala at intel.com>
Cc: Arkadiusz Hiler <arkadiusz.hiler at intel.com>
Cc: Martin Peres <martin.peres at linux.intel.com>
Cc: Tomi Sarvela <tomi.p.sarvela at intel.com>
---
 meson.build        |   1 +
 meson_options.txt  |   6 +++
 runner/executor.c  | 104 +++++++++++++++++++++++++++++++++++++++++++++
 runner/meson.build |  12 +++++-
 runner/settings.c  |   3 ++
 runner/settings.h  |   5 ++-
 6 files changed, 128 insertions(+), 3 deletions(-)

diff --git a/meson.build b/meson.build
index 469723dc..a9fc0999 100644
--- a/meson.build
+++ b/meson.build
@@ -86,6 +86,7 @@ build_tests = get_option('build_tests')
 with_libdrm = get_option('with_libdrm')
 with_libunwind = get_option('with_libunwind')
 build_runner = get_option('build_runner')
+with_oping = get_option('with_oping')
 
 _build_overlay = build_overlay != 'false'
 _overlay_required = build_overlay == 'true'
diff --git a/meson_options.txt b/meson_options.txt
index 0cd3b350..a5935704 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -64,6 +64,12 @@ option('build_runner',
        choices : ['auto', 'true', 'false'],
        description : 'Build test runner')
 
+option('with_oping',
+       type : 'combo',
+       value : 'auto',
+       choices : ['auto', 'true', 'false'],
+       description : 'Build igt_runner with liboping')
+
 option('use_rpath',
        type : 'boolean',
        value : false,
diff --git a/runner/executor.c b/runner/executor.c
index 54c530b7..bb0fc772 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -1,6 +1,10 @@
 #include <errno.h>
 #include <fcntl.h>
+#include <glib.h>
 #include <linux/watchdog.h>
+#if HAVE_OPING
+#include <oping.h>
+#endif
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -108,6 +112,104 @@ static void ping_watchdogs(void)
 	}
 }
 
+#if HAVE_OPING
+static pingobj_t *pingobj = NULL;
+#endif
+
+static void load_ping_config(void)
+{
+#if HAVE_OPING
+	char *key_file_env = NULL;
+	char *key_file_loc = NULL;
+	GError *error = NULL;
+	GKeyFile *key_file = NULL;
+	const char *ping_hostname;
+	double timeout = 2.0; /* Fair dice roll */
+
+	if (pingobj)
+		return;
+
+	/* Determine igt config path */
+	key_file_env = getenv("IGT_CONFIG_PATH");
+	if (key_file_env) {
+		key_file_loc = key_file_env;
+	} else {
+		key_file_loc = malloc(100);
+		snprintf(key_file_loc, 100, "%s/.igtrc", g_get_home_dir());
+	}
+
+	/* Load igt config file */
+	key_file = g_key_file_new();
+	g_key_file_load_from_file(key_file, key_file_loc,
+				  G_KEY_FILE_NONE, &error);
+	if (error && error->code == G_KEY_FILE_ERROR) {
+		g_error_free(error);
+		key_file = NULL;
+
+		goto out;
+	}
+
+	g_clear_error(&error);
+
+	ping_hostname =
+		g_key_file_get_string(key_file, "DUT",
+				      "PingHostName", &error);
+
+	g_clear_error(&error);
+
+	if (!ping_hostname)
+		return;
+
+	pingobj = ping_construct();
+	if (ping_host_add(pingobj, ping_hostname)) {
+		fprintf(stderr,
+			"abort on ping: Cannot add configured hostname\n");
+		ping_destroy(pingobj);
+		pingobj = NULL;
+		return;
+	}
+
+	ping_setopt(pingobj, PING_OPT_TIMEOUT, &timeout);
+
+out:
+	if (!key_file_env && key_file_loc)
+		free(key_file_loc);
+	g_key_file_free(key_file);
+#endif
+}
+
+static char *handle_ping(void)
+{
+#if HAVE_OPING
+	if (pingobj) {
+		pingobj_iter_t *iter;
+
+		ping_send(pingobj);
+
+		for (iter = ping_iterator_get(pingobj);
+		     iter != NULL;
+		     iter = ping_iterator_next(iter)) {
+			double latency;
+			size_t len = sizeof(latency);
+
+			ping_iterator_get_info(iter,
+					       PING_INFO_LATENCY,
+					       &latency,
+					       &len);
+			if (latency < 0.0) {
+				char *reason;
+
+				asprintf(&reason,
+					 "Ping host did not respond to ping, network down");
+				return reason;
+			}
+		}
+	}
+#endif
+
+	return NULL;
+}
+
 static char *handle_lockdep(void)
 {
 	const char *header = "Lockdep not active\n\n/proc/lockdep_stats contents:\n";
@@ -175,6 +277,7 @@ static const struct {
 } abort_handlers[] = {
 	{ ABORT_LOCKDEP, handle_lockdep },
 	{ ABORT_TAINT, handle_taint },
+	{ ABORT_PING, handle_ping },
 	{ 0, 0 },
 };
 
@@ -1238,6 +1341,7 @@ bool execute(struct execute_state *state,
 	}
 
 	init_watchdogs(settings);
+	load_ping_config();
 
 	if (!uname(&unamebuf)) {
 		dprintf(unamefd, "%s %s %s %s %s\n",
diff --git a/runner/meson.build b/runner/meson.build
index de6e6f1c..218b492e 100644
--- a/runner/meson.build
+++ b/runner/meson.build
@@ -1,4 +1,13 @@
 jsonc = dependency('json-c', required: _runner_required)
+runner_deps = [jsonc, glib]
+have_oping = []
+if with_oping != 'false'
+	oping = dependency('liboping', required: with_oping == 'true')
+	if oping.found()
+		runner_deps += oping
+		have_oping = '-DHAVE_OPING=1'
+	endif
+endif
 
 runnerlib_sources = [ 'settings.c',
 		      'job_list.c',
@@ -17,7 +26,8 @@ if _build_runner and jsonc.found()
 
 	runnerlib = static_library('igt_runner', runnerlib_sources,
 				   include_directories : inc,
-				   dependencies : jsonc)
+				   c_args : have_oping,
+				   dependencies : runner_deps)
 
 	runner = executable('igt_runner', runner_sources,
 			    link_with : runnerlib,
diff --git a/runner/settings.c b/runner/settings.c
index e64244e6..c531b9a4 100644
--- a/runner/settings.c
+++ b/runner/settings.c
@@ -47,6 +47,7 @@ static struct {
 } abort_conditions[] = {
 	{ ABORT_TAINT, "taint" },
 	{ ABORT_LOCKDEP, "lockdep" },
+	{ ABORT_PING, "ping" },
 	{ ABORT_ALL, "all" },
 	{ 0, 0 },
 };
@@ -135,6 +136,8 @@ static const char *usage_str =
 	"                        Possible conditions:\n"
 	"                         lockdep - abort when kernel lockdep has been angered.\n"
 	"                         taint   - abort when kernel becomes fatally tainted.\n"
+	"                         ping    - abort when a host configured in .igtrc does\n"
+	"                                   not respond to ping.\n"
 	"                         all     - abort for all of the above.\n"
 	"  -s, --sync            Sync results to disk after every test\n"
 	"  -l {quiet,verbose,dummy}, --log-level {quiet,verbose,dummy}\n"
diff --git a/runner/settings.h b/runner/settings.h
index 267d72cf..997e7370 100644
--- a/runner/settings.h
+++ b/runner/settings.h
@@ -14,9 +14,10 @@ enum {
 
 #define ABORT_TAINT   (1 << 0)
 #define ABORT_LOCKDEP (1 << 1)
-#define ABORT_ALL     (ABORT_TAINT | ABORT_LOCKDEP)
+#define ABORT_PING    (1 << 2)
+#define ABORT_ALL     (ABORT_TAINT | ABORT_LOCKDEP | ABORT_PING)
 
-_Static_assert(ABORT_ALL == (ABORT_TAINT | ABORT_LOCKDEP), "ABORT_ALL must be all conditions bitwise or'd");
+_Static_assert(ABORT_ALL == (ABORT_TAINT | ABORT_LOCKDEP | ABORT_PING), "ABORT_ALL must be all conditions bitwise or'd");
 
 struct regex_list {
 	char **regex_strings;
-- 
2.19.1



More information about the igt-dev mailing list