[igt-dev] [PATCH i-g-t] runner: Clean up quickly if the kernel OOPSed during a test

Petri Latvala petri.latvala at intel.com
Tue Jan 21 13:45:06 UTC 2020


On Tue, Jan 21, 2020 at 12:59:29PM +0000, Chris Wilson wrote:
> If the kernel OOPSed during the test, it is unlikely to ever complete.
> Furthermore, we have the reason why it won't complete and so do not need
> to burden ourselves with the full stacktrace of every process -- or at
> least we have a more pressing bug to fix before worrying about the
> system deadlock.
> 
> v2: Log the post-taint killing.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Petri Latvala <petri.latvala at intel.com>


Reviewed-by: Petri Latvala <petri.latvala at intel.com>



> ---
>  runner/executor.c | 45 +++++++++++++++++++++++++++++++++++----------
>  1 file changed, 35 insertions(+), 10 deletions(-)
> 
> diff --git a/runner/executor.c b/runner/executor.c
> index f36bfd3dd..71483b749 100644
> --- a/runner/executor.c
> +++ b/runner/executor.c
> @@ -314,13 +314,28 @@ static const struct {
>    {(1 << 9), "TAINT_WARN: WARN_ON has happened."},
>    {0, 0}};
>  
> +static unsigned long bad_taints(void)
> +{
> +	static unsigned long __bad_taints;
> +
> +	if (!__bad_taints) {
> +		for (typeof(*abort_taints) *taint = abort_taints;
> +		     taint->bit;
> +		     taint++)
> +			__bad_taints |= taint->bit;
> +	}
> +
> +	return __bad_taints;
> +}
> +
> +static unsigned long is_tainted(unsigned long taints)
> +{
> +	return taints & bad_taints();
> +}
> +
>  static unsigned long tainted(unsigned long *taints)
>  {
>  	FILE *f;
> -	unsigned long bad_taints = 0;
> -
> -	for (typeof(*abort_taints) *taint = abort_taints; taint->bit; taint++)
> -		bad_taints |= taint->bit;
>  
>  	*taints = 0;
>  
> @@ -330,7 +345,7 @@ static unsigned long tainted(unsigned long *taints)
>  		fclose(f);
>  	}
>  
> -	return *taints & bad_taints;
> +	return is_tainted(*taints);
>  }
>  
>  static char *handle_taint(void)
> @@ -744,17 +759,27 @@ static int monitor_output(pid_t child,
>  			return -1;
>  		}
>  
> +		if (tainted(&taints)) /* cancel children after a kernel OOPS */
> +			n = 0, intervals_left = 1;
> +
>  		if (n == 0) {
>  			if (--intervals_left)
>  				continue;
>  
>  			switch (killed) {
>  			case 0:
> -				show_kernel_task_state();
> +				if (!is_tainted(taints)) {
> +					show_kernel_task_state();
> +					if (settings->log_level >= LOG_LEVEL_NORMAL) {
> +						outf("Timeout. Killing the current test with SIGQUIT.\n");
>  
> -				if (settings->log_level >= LOG_LEVEL_NORMAL) {
> -					outf("Timeout. Killing the current test with SIGQUIT.\n");
> -					fflush(stdout);
> +						fflush(stdout);
> +					}
> +				} else {
> +					if (settings->log_level >= LOG_LEVEL_NORMAL) {
> +						outf("Killing the test because the kernel is tainted.\n");
> +						fflush(stdout);
> +					}
>  				}
>  
>  				killed = SIGQUIT;
> @@ -791,7 +816,7 @@ static int monitor_output(pid_t child,
>  				intervals_left = 1; /* Intervals handled separately for sigkill */
>  				break;
>  			case SIGKILL:
> -				if (!tainted(&taints) && --sigkill_intervals_left) {
> +				if (!is_tainted(taints) && --sigkill_intervals_left) {
>  					intervals_left = 1;
>  					break;
>  				}
> -- 
> 2.25.0
> 


More information about the igt-dev mailing list