[igt-dev] [PATCH i-g-t] runner: Clean up quickly if the kernel OOPSed during a test
Petri Latvala
petri.latvala at intel.com
Tue Jan 21 12:28:11 UTC 2020
On Tue, Jan 21, 2020 at 11:17:38AM +0000, Chris Wilson wrote:
> If the kernel OOPSed during the test, it is unlikely to ever complete.
> Furthermore, we have the reason why it won't complete and so do not need
> to burden ourselves with the full stacktrace of every process -- or at
> least we have a more pressing bug to fix before worrying about the
> system deadlock.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Petri Latvala <petri.latvala at intel.com>
> ---
> runner/executor.c | 33 ++++++++++++++++++++++++++-------
> 1 file changed, 26 insertions(+), 7 deletions(-)
>
> diff --git a/runner/executor.c b/runner/executor.c
> index f36bfd3dd..00cd3b08a 100644
> --- a/runner/executor.c
> +++ b/runner/executor.c
> @@ -314,13 +314,28 @@ static const struct {
> {(1 << 9), "TAINT_WARN: WARN_ON has happened."},
> {0, 0}};
>
> +static unsigned long bad_taints(void)
> +{
> + static unsigned long __bad_taints;
> +
> + if (!__bad_taints) {
> + for (typeof(*abort_taints) *taint = abort_taints;
> + taint->bit;
> + taint++)
> + __bad_taints |= taint->bit;
> + }
> +
> + return __bad_taints;
> +}
> +
> +static unsigned long is_tainted(unsigned long taints)
> +{
> + return taints & bad_taints();
> +}
> +
> static unsigned long tainted(unsigned long *taints)
> {
> FILE *f;
> - unsigned long bad_taints = 0;
> -
> - for (typeof(*abort_taints) *taint = abort_taints; taint->bit; taint++)
> - bad_taints |= taint->bit;
>
> *taints = 0;
>
> @@ -330,7 +345,7 @@ static unsigned long tainted(unsigned long *taints)
> fclose(f);
> }
>
> - return *taints & bad_taints;
> + return is_tainted(*taints);
> }
>
> static char *handle_taint(void)
> @@ -744,13 +759,17 @@ static int monitor_output(pid_t child,
> return -1;
> }
>
> + if (tainted(&taints)) /* cancel children after a kernel OOPS */
> + n = 0, intervals_left = 1;
> +
> if (n == 0) {
> if (--intervals_left)
> continue;
>
> switch (killed) {
> case 0:
> - show_kernel_task_state();
> + if (!is_tainted(taints))
> + show_kernel_task_state();
>
> if (settings->log_level >= LOG_LEVEL_NORMAL) {
> outf("Timeout. Killing the current test with SIGQUIT.\n");
We need a different message here if we're killing the test due to a
taint.
Something like
if (!is_tainted(taints)) {
show_kernel_task_state();
if (settings->log_level >= LOG_LEVEL_NORMAL) {
outf("Timeout etc");
fflush(stdout);
}
} else {
if (settings->log_level >= LOG_LEVEL_NORMAL) {
outf("Killing the test because the kernel is tainted.\n");
fflush(stdout);
}
}
Note to self: Is it time for logf(LOG_LEVEL_NORMAL, "Hello world")
yet?
--
Petri Latvala
More information about the igt-dev
mailing list