[igt-dev] [PATCH i-g-t v4] runner: Correctly handle abort before first test
Petri Latvala
petri.latvala at intel.com
Mon Jan 9 09:49:46 UTC 2023
On Thu, Jan 05, 2023 at 08:25:37PM +0100, Kamil Konieczny wrote:
> Hi Petri,
>
> small nits, see below.
>
> On 2022-12-21 at 13:42:13 +0200, Petri Latvala wrote:
> > Don't leave the execution in a "please resume me" state if bootup
> > causes an abort condition. Especially handle the case of abort on
> > bootup when resuming correctly, so that it doesn't attempt to run a
> > test on a tainted kernel if we've explicitly configured the runner to
> > not execute when there's a taint.
> >
> > v2: Fudge the results directory instead to get the desired results:
> > runner exits with nonzero, and resuming exits with "all done" instead
> > of executing anything.
> >
> > v3: Use faccessat instead of open+close, use less magic strings,
> > remember to close fds (Chris)
> >
> > v4: Use GRACEFUL_EXITCODE in monitor_output, remove the 'resuming'
> > field (why was it a double?!). (Ryszard)
> > Stop trying to execute if all tests are already run, to avoid a
> > crash in environment validation.
> >
> > Signed-off-by: Petri Latvala <petri.latvala at intel.com>
> > Cc: Arkadiusz Hiler <arek at hiler.eu>
> > Cc: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Kamil Konieczny <kamil.konieczny at linux.intel.com>
> > Cc: Ryszard Knop <ryszard.knop at intel.com>
> > ---
> > runner/executor.c | 57 ++++++++++++++++++++++++++++++++++++++++++++---
> > 1 file changed, 54 insertions(+), 3 deletions(-)
> >
> > diff --git a/runner/executor.c b/runner/executor.c
> > index d2253082..e954c23e 100644
> > --- a/runner/executor.c
> > +++ b/runner/executor.c
> > @@ -37,6 +37,7 @@
> >
> > #define KMSG_HEADER "[IGT] "
> > #define KMSG_WARN 4
> > +#define GRACEFUL_EXITCODE -SIGHUP
> >
> > static struct {
> > int *fds;
> > @@ -1249,7 +1250,7 @@ static int monitor_output(pid_t child,
> > } else {
> > dprintf(outputs[_F_JOURNAL], "%s%d (%.3fs)\n",
> ----------------------------------------------------------------------------------- ^
> Maybe instead of %.3fs just put here 0.0s ?
>
> > EXECUTOR_EXIT,
> > - -SIGHUP, 0.0);
> > + GRACEFUL_EXITCODE, 0.0);
> -------------------------------------------------------------------------- ^
> Then you can drop 0.0 here.
>
> > if (settings->sync)
> > fdatasync(outputs[_F_JOURNAL]);
> > }
> > @@ -1720,6 +1721,41 @@ out_dirfd:
> > return result;
> > }
> >
> > +static void fill_results_directory_with_notruns(struct job_list *list,
> > + int resdirfd)
> > +{
> > + int outputs[_F_LAST];
> > + char name[32];
> > + int dirfd;
> > + size_t i;
> > +
> > + for (i = 0; i < list->size; i++) {
> > + snprintf(name, sizeof(name), "%zd", i);
> > +
> > + if (faccessat(resdirfd, name, F_OK, 0) == 0)
> > + continue;
> > +
> > + mkdirat(resdirfd, name, 0777);
> > + dirfd = openat(resdirfd, name, O_DIRECTORY | O_RDONLY);
> > + if (dirfd < 0) {
> > + errf("Error accessing individual test result directory\n");
> > + return;
> > + }
> > +
> > + if (!open_output_files(dirfd, outputs, true)) {
> > + errf("Error opening output files\n");
> > + close(dirfd);
> > + return;
> > + }
> > +
> > + dprintf(outputs[_F_OUT], "Forced notrun result because of abort condition on bootup\n");
> > + dprintf(outputs[_F_JOURNAL], "%s%d (%.3fs)\n", EXECUTOR_EXIT, GRACEFUL_EXITCODE, 0.0);
> --------------------------------------------------- ^ ------------------------------------------ ^
> Same here, you can just put here 0.0s instead.
>
> > +
> > + close_outputs(outputs);
> > + close(dirfd);
> > + }
> > +}
> > +
> > static int remove_file(int dirfd, const char *name)
> > {
> > return unlinkat(dirfd, name, 0) && errno != ENOENT;
> > @@ -1845,7 +1881,6 @@ bool initialize_execute_state_from_resume(int dirfd,
> > clear_settings(settings);
> > free_job_list(list);
> > memset(state, 0, sizeof(*state));
> > - state->resuming = true;
>
> It's not used anywhere so remove it from header.
I in fact did remove it! But then I forgot to git add...
--
Petri Latvala
>
> With that fixed you can add my r-b,
>
> Regards,
> Kamil
>
> >
> > if (!read_settings_from_dir(settings, dirfd) ||
> > !read_job_list(list, dirfd)) {
> > @@ -2183,6 +2218,11 @@ bool execute(struct execute_state *state,
> > return true;
> > }
> >
> > + if (state->next >= job_list->size) {
> > + outf("All tests already executed.\n");
> > + return true;
> > + }
> > +
> > igt_list_for_each_entry(env_var, &settings->env_vars, link) {
> > setenv(env_var->key, env_var->value, 1);
> > }
> > @@ -2271,7 +2311,7 @@ bool execute(struct execute_state *state,
> > close(unamefd);
> >
> > /* Check if we're already in abort-state at bootup */
> > - if (!state->resuming) {
> > + {
> > char *reason;
> >
> > if ((reason = need_to_abort(settings)) != NULL) {
> > @@ -2280,6 +2320,17 @@ bool execute(struct execute_state *state,
> > free(reason);
> > free(nexttest);
> >
> > + /*
> > + * If an abort condition happened at bootup,
> > + * assume that it happens on every boot,
> > + * making this test execution impossible.
> > + * Write stuff to the results directory
> > + * indicating this so resuming immediately
> > + * finishes instead of getting stuck in an
> > + * infinite reboot loop.
> > + */
> > + fill_results_directory_with_notruns(job_list, resdirfd);
> > +
> > status = false;
> >
> > goto end;
> > --
> > 2.30.2
> >
More information about the igt-dev
mailing list