[igt-dev] [PATCH i-g-t] runner: Clean up quickly if the kernel OOPSed during a test

Petri Latvala petri.latvala at intel.com
Tue Jan 21 12:28:11 UTC 2020


On Tue, Jan 21, 2020 at 11:17:38AM +0000, Chris Wilson wrote:
> If the kernel OOPSed during the test, it is unlikely to ever complete.
> Furthermore, we have the reason why it won't complete and so do not need
> to burden ourselves with the full stacktrace of every process -- or at
> least we have a more pressing bug to fix before worrying about the
> system deadlock.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Petri Latvala <petri.latvala at intel.com>
> ---
>  runner/executor.c | 33 ++++++++++++++++++++++++++-------
>  1 file changed, 26 insertions(+), 7 deletions(-)
> 
> diff --git a/runner/executor.c b/runner/executor.c
> index f36bfd3dd..00cd3b08a 100644
> --- a/runner/executor.c
> +++ b/runner/executor.c
> @@ -314,13 +314,28 @@ static const struct {
>    {(1 << 9), "TAINT_WARN: WARN_ON has happened."},
>    {0, 0}};
>  
> +static unsigned long bad_taints(void)
> +{
> +	static unsigned long __bad_taints;
> +
> +	if (!__bad_taints) {
> +		for (typeof(*abort_taints) *taint = abort_taints;
> +		     taint->bit;
> +		     taint++)
> +			__bad_taints |= taint->bit;
> +	}
> +
> +	return __bad_taints;
> +}
> +
> +static unsigned long is_tainted(unsigned long taints)
> +{
> +	return taints & bad_taints();
> +}
> +
>  static unsigned long tainted(unsigned long *taints)
>  {
>  	FILE *f;
> -	unsigned long bad_taints = 0;
> -
> -	for (typeof(*abort_taints) *taint = abort_taints; taint->bit; taint++)
> -		bad_taints |= taint->bit;
>  
>  	*taints = 0;
>  
> @@ -330,7 +345,7 @@ static unsigned long tainted(unsigned long *taints)
>  		fclose(f);
>  	}
>  
> -	return *taints & bad_taints;
> +	return is_tainted(*taints);
>  }
>  
>  static char *handle_taint(void)
> @@ -744,13 +759,17 @@ static int monitor_output(pid_t child,
>  			return -1;
>  		}
>  
> +		if (tainted(&taints)) /* cancel children after a kernel OOPS */
> +			n = 0, intervals_left = 1;
> +
>  		if (n == 0) {
>  			if (--intervals_left)
>  				continue;
>  
>  			switch (killed) {
>  			case 0:
> -				show_kernel_task_state();
> +				if (!is_tainted(taints))
> +					show_kernel_task_state();
>  
>  				if (settings->log_level >= LOG_LEVEL_NORMAL) {
>  					outf("Timeout. Killing the current test with SIGQUIT.\n");


We need a different message here if we're killing the test due to a
taint.

Something like


  if (!is_tainted(taints)) {
	show_kernel_task_state();
	if (settings->log_level >= LOG_LEVEL_NORMAL) {
	  outf("Timeout etc");
	  fflush(stdout);
	}
  } else {
    if (settings->log_level >= LOG_LEVEL_NORMAL) {
      outf("Killing the test because the kernel is tainted.\n");
      fflush(stdout);
    }
  }


Note to self: Is it time for logf(LOG_LEVEL_NORMAL, "Hello world")
yet?


-- 
Petri Latvala


More information about the igt-dev mailing list