[igt-dev] [PATCH i-g-t v4] runner: Correctly handle abort before first test

Kamil Konieczny kamil.konieczny at linux.intel.com
Thu Jan 5 19:25:37 UTC 2023


Hi Petri,

small nits, see below.

On 2022-12-21 at 13:42:13 +0200, Petri Latvala wrote:
> Don't leave the execution in a "please resume me" state if bootup
> causes an abort condition. Especially handle the case of abort on
> bootup when resuming correctly, so that it doesn't attempt to run a
> test on a tainted kernel if we've explicitly configured the runner to
> not execute when there's a taint.
> 
> v2: Fudge the results directory instead to get the desired results:
>     runner exits with nonzero, and resuming exits with "all done" instead
>     of executing anything.
> 
> v3: Use faccessat instead of open+close, use less magic strings,
>     remember to close fds (Chris)
> 
> v4: Use GRACEFUL_EXITCODE in monitor_output, remove the 'resuming'
>     field (why was it a double?!). (Ryszard)
>     Stop trying to execute if all tests are already run, to avoid a
>     crash in environment validation.
> 
> Signed-off-by: Petri Latvala <petri.latvala at intel.com>
> Cc: Arkadiusz Hiler <arek at hiler.eu>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Kamil Konieczny <kamil.konieczny at linux.intel.com>
> Cc: Ryszard Knop <ryszard.knop at intel.com>
> ---
>  runner/executor.c | 57 ++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 54 insertions(+), 3 deletions(-)
> 
> diff --git a/runner/executor.c b/runner/executor.c
> index d2253082..e954c23e 100644
> --- a/runner/executor.c
> +++ b/runner/executor.c
> @@ -37,6 +37,7 @@
>  
>  #define KMSG_HEADER "[IGT] "
>  #define KMSG_WARN 4
> +#define GRACEFUL_EXITCODE -SIGHUP
>  
>  static struct {
>  	int *fds;
> @@ -1249,7 +1250,7 @@ static int monitor_output(pid_t child,
>  					} else {
>  						dprintf(outputs[_F_JOURNAL], "%s%d (%.3fs)\n",
----------------------------------------------------------------------------------- ^
Maybe instead of %.3fs just put here 0.0s ?

>  							EXECUTOR_EXIT,
> -							-SIGHUP, 0.0);
> +							GRACEFUL_EXITCODE, 0.0);
-------------------------------------------------------------------------- ^
Then you can drop 0.0 here.

>  						if (settings->sync)
>  							fdatasync(outputs[_F_JOURNAL]);
>  					}
> @@ -1720,6 +1721,41 @@ out_dirfd:
>  	return result;
>  }
>  
> +static void fill_results_directory_with_notruns(struct job_list *list,
> +						int resdirfd)
> +{
> +	int outputs[_F_LAST];
> +	char name[32];
> +	int dirfd;
> +	size_t i;
> +
> +	for (i = 0; i < list->size; i++) {
> +		snprintf(name, sizeof(name), "%zd", i);
> +
> +		if (faccessat(resdirfd, name, F_OK, 0) == 0)
> +			continue;
> +
> +		mkdirat(resdirfd, name, 0777);
> +		dirfd = openat(resdirfd, name, O_DIRECTORY | O_RDONLY);
> +		if (dirfd < 0) {
> +			errf("Error accessing individual test result directory\n");
> +			return;
> +		}
> +
> +		if (!open_output_files(dirfd, outputs, true)) {
> +			errf("Error opening output files\n");
> +			close(dirfd);
> +			return;
> +		}
> +
> +		dprintf(outputs[_F_OUT], "Forced notrun result because of abort condition on bootup\n");
> +		dprintf(outputs[_F_JOURNAL], "%s%d (%.3fs)\n", EXECUTOR_EXIT, GRACEFUL_EXITCODE, 0.0);
--------------------------------------------------- ^ ------------------------------------------ ^
Same here, you can just put here 0.0s instead.

> +
> +		close_outputs(outputs);
> +		close(dirfd);
> +	}
> +}
> +
>  static int remove_file(int dirfd, const char *name)
>  {
>  	return unlinkat(dirfd, name, 0) && errno != ENOENT;
> @@ -1845,7 +1881,6 @@ bool initialize_execute_state_from_resume(int dirfd,
>  	clear_settings(settings);
>  	free_job_list(list);
>  	memset(state, 0, sizeof(*state));
> -	state->resuming = true;

It's not used anywhere so remove it from header.

With that fixed you can add my r-b,

Regards,
Kamil

>  
>  	if (!read_settings_from_dir(settings, dirfd) ||
>  	    !read_job_list(list, dirfd)) {
> @@ -2183,6 +2218,11 @@ bool execute(struct execute_state *state,
>  		return true;
>  	}
>  
> +	if (state->next >= job_list->size) {
> +		outf("All tests already executed.\n");
> +		return true;
> +	}
> +
>  	igt_list_for_each_entry(env_var, &settings->env_vars, link) {
>  		setenv(env_var->key, env_var->value, 1);
>  	}
> @@ -2271,7 +2311,7 @@ bool execute(struct execute_state *state,
>  	close(unamefd);
>  
>  	/* Check if we're already in abort-state at bootup */
> -	if (!state->resuming) {
> +	{
>  		char *reason;
>  
>  		if ((reason = need_to_abort(settings)) != NULL) {
> @@ -2280,6 +2320,17 @@ bool execute(struct execute_state *state,
>  			free(reason);
>  			free(nexttest);
>  
> +			/*
> +			 * If an abort condition happened at bootup,
> +			 * assume that it happens on every boot,
> +			 * making this test execution impossible.
> +			 * Write stuff to the results directory
> +			 * indicating this so resuming immediately
> +			 * finishes instead of getting stuck in an
> +			 * infinite reboot loop.
> +			 */
> +			fill_results_directory_with_notruns(job_list, resdirfd);
> +
>  			status = false;
>  
>  			goto end;
> -- 
> 2.30.2
> 


More information about the igt-dev mailing list