[igt-dev] [PATCH i-g-t v4] runner: Correctly handle abort before first test
Kamil Konieczny
kamil.konieczny at linux.intel.com
Thu Jan 5 19:25:37 UTC 2023
Hi Petri,
small nits, see below.
On 2022-12-21 at 13:42:13 +0200, Petri Latvala wrote:
> Don't leave the execution in a "please resume me" state if bootup
> causes an abort condition. Especially handle the case of abort on
> bootup when resuming correctly, so that it doesn't attempt to run a
> test on a tainted kernel if we've explicitly configured the runner to
> not execute when there's a taint.
>
> v2: Fudge the results directory instead to get the desired results:
> runner exits with nonzero, and resuming exits with "all done" instead
> of executing anything.
>
> v3: Use faccessat instead of open+close, use less magic strings,
> remember to close fds (Chris)
>
> v4: Use GRACEFUL_EXITCODE in monitor_output, remove the 'resuming'
> field (why was it a double?!). (Ryszard)
> Stop trying to execute if all tests are already run, to avoid a
> crash in environment validation.
>
> Signed-off-by: Petri Latvala <petri.latvala at intel.com>
> Cc: Arkadiusz Hiler <arek at hiler.eu>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Kamil Konieczny <kamil.konieczny at linux.intel.com>
> Cc: Ryszard Knop <ryszard.knop at intel.com>
> ---
> runner/executor.c | 57 ++++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 54 insertions(+), 3 deletions(-)
>
> diff --git a/runner/executor.c b/runner/executor.c
> index d2253082..e954c23e 100644
> --- a/runner/executor.c
> +++ b/runner/executor.c
> @@ -37,6 +37,7 @@
>
> #define KMSG_HEADER "[IGT] "
> #define KMSG_WARN 4
> +#define GRACEFUL_EXITCODE -SIGHUP
>
> static struct {
> int *fds;
> @@ -1249,7 +1250,7 @@ static int monitor_output(pid_t child,
> } else {
> dprintf(outputs[_F_JOURNAL], "%s%d (%.3fs)\n",
----------------------------------------------------------------------------------- ^
Maybe instead of %.3fs just put here 0.0s ?
> EXECUTOR_EXIT,
> - -SIGHUP, 0.0);
> + GRACEFUL_EXITCODE, 0.0);
-------------------------------------------------------------------------- ^
Then you can drop 0.0 here.
> if (settings->sync)
> fdatasync(outputs[_F_JOURNAL]);
> }
> @@ -1720,6 +1721,41 @@ out_dirfd:
> return result;
> }
>
> +static void fill_results_directory_with_notruns(struct job_list *list,
> + int resdirfd)
> +{
> + int outputs[_F_LAST];
> + char name[32];
> + int dirfd;
> + size_t i;
> +
> + for (i = 0; i < list->size; i++) {
> + snprintf(name, sizeof(name), "%zd", i);
> +
> + if (faccessat(resdirfd, name, F_OK, 0) == 0)
> + continue;
> +
> + mkdirat(resdirfd, name, 0777);
> + dirfd = openat(resdirfd, name, O_DIRECTORY | O_RDONLY);
> + if (dirfd < 0) {
> + errf("Error accessing individual test result directory\n");
> + return;
> + }
> +
> + if (!open_output_files(dirfd, outputs, true)) {
> + errf("Error opening output files\n");
> + close(dirfd);
> + return;
> + }
> +
> + dprintf(outputs[_F_OUT], "Forced notrun result because of abort condition on bootup\n");
> + dprintf(outputs[_F_JOURNAL], "%s%d (%.3fs)\n", EXECUTOR_EXIT, GRACEFUL_EXITCODE, 0.0);
--------------------------------------------------- ^ ------------------------------------------ ^
Same here, you can just put here 0.0s instead.
> +
> + close_outputs(outputs);
> + close(dirfd);
> + }
> +}
> +
> static int remove_file(int dirfd, const char *name)
> {
> return unlinkat(dirfd, name, 0) && errno != ENOENT;
> @@ -1845,7 +1881,6 @@ bool initialize_execute_state_from_resume(int dirfd,
> clear_settings(settings);
> free_job_list(list);
> memset(state, 0, sizeof(*state));
> - state->resuming = true;
It's not used anywhere so remove it from header.
With that fixed you can add my r-b,
Regards,
Kamil
>
> if (!read_settings_from_dir(settings, dirfd) ||
> !read_job_list(list, dirfd)) {
> @@ -2183,6 +2218,11 @@ bool execute(struct execute_state *state,
> return true;
> }
>
> + if (state->next >= job_list->size) {
> + outf("All tests already executed.\n");
> + return true;
> + }
> +
> igt_list_for_each_entry(env_var, &settings->env_vars, link) {
> setenv(env_var->key, env_var->value, 1);
> }
> @@ -2271,7 +2311,7 @@ bool execute(struct execute_state *state,
> close(unamefd);
>
> /* Check if we're already in abort-state at bootup */
> - if (!state->resuming) {
> + {
> char *reason;
>
> if ((reason = need_to_abort(settings)) != NULL) {
> @@ -2280,6 +2320,17 @@ bool execute(struct execute_state *state,
> free(reason);
> free(nexttest);
>
> + /*
> + * If an abort condition happened at bootup,
> + * assume that it happens on every boot,
> + * making this test execution impossible.
> + * Write stuff to the results directory
> + * indicating this so resuming immediately
> + * finishes instead of getting stuck in an
> + * infinite reboot loop.
> + */
> + fill_results_directory_with_notruns(job_list, resdirfd);
> +
> status = false;
>
> goto end;
> --
> 2.30.2
>
More information about the igt-dev
mailing list