[igt-dev] [PATCH i-g-t v4] runner: Correctly handle abort before first test

Petri Latvala petri.latvala at intel.com
Mon Jan 9 09:49:46 UTC 2023


On Thu, Jan 05, 2023 at 08:25:37PM +0100, Kamil Konieczny wrote:
> Hi Petri,
> 
> small nits, see below.
> 
> On 2022-12-21 at 13:42:13 +0200, Petri Latvala wrote:
> > Don't leave the execution in a "please resume me" state if bootup
> > causes an abort condition. Especially handle the case of abort on
> > bootup when resuming correctly, so that it doesn't attempt to run a
> > test on a tainted kernel if we've explicitly configured the runner to
> > not execute when there's a taint.
> > 
> > v2: Fudge the results directory instead to get the desired results:
> >     runner exits with nonzero, and resuming exits with "all done" instead
> >     of executing anything.
> > 
> > v3: Use faccessat instead of open+close, use less magic strings,
> >     remember to close fds (Chris)
> > 
> > v4: Use GRACEFUL_EXITCODE in monitor_output, remove the 'resuming'
> >     field (why was it a double?!). (Ryszard)
> >     Stop trying to execute if all tests are already run, to avoid a
> >     crash in environment validation.
> > 
> > Signed-off-by: Petri Latvala <petri.latvala at intel.com>
> > Cc: Arkadiusz Hiler <arek at hiler.eu>
> > Cc: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Kamil Konieczny <kamil.konieczny at linux.intel.com>
> > Cc: Ryszard Knop <ryszard.knop at intel.com>
> > ---
> >  runner/executor.c | 57 ++++++++++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 54 insertions(+), 3 deletions(-)
> > 
> > diff --git a/runner/executor.c b/runner/executor.c
> > index d2253082..e954c23e 100644
> > --- a/runner/executor.c
> > +++ b/runner/executor.c
> > @@ -37,6 +37,7 @@
> >  
> >  #define KMSG_HEADER "[IGT] "
> >  #define KMSG_WARN 4
> > +#define GRACEFUL_EXITCODE -SIGHUP
> >  
> >  static struct {
> >  	int *fds;
> > @@ -1249,7 +1250,7 @@ static int monitor_output(pid_t child,
> >  					} else {
> >  						dprintf(outputs[_F_JOURNAL], "%s%d (%.3fs)\n",
> ----------------------------------------------------------------------------------- ^
> Maybe instead of %.3fs just put here 0.0s ?
> 
> >  							EXECUTOR_EXIT,
> > -							-SIGHUP, 0.0);
> > +							GRACEFUL_EXITCODE, 0.0);
> -------------------------------------------------------------------------- ^
> Then you can drop 0.0 here.
> 
> >  						if (settings->sync)
> >  							fdatasync(outputs[_F_JOURNAL]);
> >  					}
> > @@ -1720,6 +1721,41 @@ out_dirfd:
> >  	return result;
> >  }
> >  
> > +static void fill_results_directory_with_notruns(struct job_list *list,
> > +						int resdirfd)
> > +{
> > +	int outputs[_F_LAST];
> > +	char name[32];
> > +	int dirfd;
> > +	size_t i;
> > +
> > +	for (i = 0; i < list->size; i++) {
> > +		snprintf(name, sizeof(name), "%zd", i);
> > +
> > +		if (faccessat(resdirfd, name, F_OK, 0) == 0)
> > +			continue;
> > +
> > +		mkdirat(resdirfd, name, 0777);
> > +		dirfd = openat(resdirfd, name, O_DIRECTORY | O_RDONLY);
> > +		if (dirfd < 0) {
> > +			errf("Error accessing individual test result directory\n");
> > +			return;
> > +		}
> > +
> > +		if (!open_output_files(dirfd, outputs, true)) {
> > +			errf("Error opening output files\n");
> > +			close(dirfd);
> > +			return;
> > +		}
> > +
> > +		dprintf(outputs[_F_OUT], "Forced notrun result because of abort condition on bootup\n");
> > +		dprintf(outputs[_F_JOURNAL], "%s%d (%.3fs)\n", EXECUTOR_EXIT, GRACEFUL_EXITCODE, 0.0);
> --------------------------------------------------- ^ ------------------------------------------ ^
> Same here, you can just put here 0.0s instead.
> 
> > +
> > +		close_outputs(outputs);
> > +		close(dirfd);
> > +	}
> > +}
> > +
> >  static int remove_file(int dirfd, const char *name)
> >  {
> >  	return unlinkat(dirfd, name, 0) && errno != ENOENT;
> > @@ -1845,7 +1881,6 @@ bool initialize_execute_state_from_resume(int dirfd,
> >  	clear_settings(settings);
> >  	free_job_list(list);
> >  	memset(state, 0, sizeof(*state));
> > -	state->resuming = true;
> 
> It's not used anywhere so remove it from header.

I in fact did remove it! But then I forgot to git add...


-- 
Petri Latvala


> 
> With that fixed you can add my r-b,
> 
> Regards,
> Kamil
> 
> >  
> >  	if (!read_settings_from_dir(settings, dirfd) ||
> >  	    !read_job_list(list, dirfd)) {
> > @@ -2183,6 +2218,11 @@ bool execute(struct execute_state *state,
> >  		return true;
> >  	}
> >  
> > +	if (state->next >= job_list->size) {
> > +		outf("All tests already executed.\n");
> > +		return true;
> > +	}
> > +
> >  	igt_list_for_each_entry(env_var, &settings->env_vars, link) {
> >  		setenv(env_var->key, env_var->value, 1);
> >  	}
> > @@ -2271,7 +2311,7 @@ bool execute(struct execute_state *state,
> >  	close(unamefd);
> >  
> >  	/* Check if we're already in abort-state at bootup */
> > -	if (!state->resuming) {
> > +	{
> >  		char *reason;
> >  
> >  		if ((reason = need_to_abort(settings)) != NULL) {
> > @@ -2280,6 +2320,17 @@ bool execute(struct execute_state *state,
> >  			free(reason);
> >  			free(nexttest);
> >  
> > +			/*
> > +			 * If an abort condition happened at bootup,
> > +			 * assume that it happens on every boot,
> > +			 * making this test execution impossible.
> > +			 * Write stuff to the results directory
> > +			 * indicating this so resuming immediately
> > +			 * finishes instead of getting stuck in an
> > +			 * infinite reboot loop.
> > +			 */
> > +			fill_results_directory_with_notruns(job_list, resdirfd);
> > +
> >  			status = false;
> >  
> >  			goto end;
> > -- 
> > 2.30.2
> > 


More information about the igt-dev mailing list