[PATCH i-g-t v2 2/4] runner/executor: check disk limit at dumping kmsg
Kamil Konieczny
kamil.konieczny at linux.intel.com
Mon Jul 15 17:31:06 UTC 2024
It was reported that kernel dumps can grow beyond disk limit size
so add checks for it and report error if that happen.
v2: return number of written bytes (Petri)
v3: monitor dmesg size from zero to catch flooding (Kamil)
Reported-by: Karol Krol <karol.krol at intel.com>
Link: https://gitlab.freedesktop.org/drm/igt-gpu-tools/-/issues/129
Cc: Petri Latvala <adrinael at adrinael.net>
Cc: Arkadiusz Hiler <arkadiusz.hiler at intel.com>
Cc: Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>
Cc: Andrzej Hajda <andrzej.hajda at intel.com>
Signed-off-by: Kamil Konieczny <kamil.konieczny at linux.intel.com>
---
runner/executor.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/runner/executor.c b/runner/executor.c
index 4b374d223..ed2621383 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -584,7 +584,7 @@ void close_outputs(int *fds)
}
/* Returns the number of bytes written to disk, or a negative number on error */
-static long dump_dmesg(int kmsgfd, int outfd)
+static long dump_dmesg(int kmsgfd, int outfd, size_t disk_limit)
{
/*
* Write kernel messages to the log file until we reach
@@ -605,6 +605,10 @@ static long dump_dmesg(int kmsgfd, int outfd)
if (kmsgfd < 0)
return 0;
+ /*
+ * Dynamic subtests restarts our size limiting checks so here there are
+ * no checks for already written bytes.
+ */
comparefd = open("/dev/kmsg", O_RDONLY | O_NONBLOCK);
if (comparefd < 0) {
errf("Error opening another fd for /dev/kmsg\n");
@@ -656,6 +660,12 @@ static long dump_dmesg(int kmsgfd, int outfd)
write(outfd, buf, r);
written += r;
+ if (disk_limit && written > disk_limit) {
+ close(comparefd);
+ errf("Error kmsg flood: disk limit exceeded %ld, written %ld\n", disk_limit, r);
+ return -1;
+ }
+
if (comparefd < 0 && sscanf(buf, "%u,%llu,%llu,%c;",
&flags, &seq, &usec, &cont) == 4) {
/*
@@ -891,6 +901,7 @@ static int monitor_output(pid_t child,
unsigned long taints = 0;
bool aborting = false;
size_t disk_usage = 0;
+ const size_t mon_disk_limit = settings->disk_usage_limit;
bool socket_comms_used = false; /* whether the test actually uses comms */
bool results_received = false; /* whether we already have test results that might need overriding if we detect an abort condition */
@@ -1220,7 +1231,7 @@ static int monitor_output(pid_t child,
time_last_activity = time_now;
- dmesgwritten = dump_dmesg(kmsgfd, outputs[_F_DMESG]);
+ dmesgwritten = dump_dmesg(kmsgfd, outputs[_F_DMESG], mon_disk_limit);
if (settings->sync)
fdatasync(outputs[_F_DMESG]);
@@ -1458,7 +1469,7 @@ static int monitor_output(pid_t child,
asprintf(abortreason, "Child refuses to die, tainted 0x%lx.", taints);
}
- dump_dmesg(kmsgfd, outputs[_F_DMESG]);
+ dump_dmesg(kmsgfd, outputs[_F_DMESG], mon_disk_limit);
if (settings->sync)
fdatasync(outputs[_F_DMESG]);
@@ -1484,7 +1495,7 @@ static int monitor_output(pid_t child,
}
}
- dump_dmesg(kmsgfd, outputs[_F_DMESG]);
+ dump_dmesg(kmsgfd, outputs[_F_DMESG], mon_disk_limit);
if (settings->sync)
fdatasync(outputs[_F_DMESG]);
--
2.43.0
More information about the Intel-gfx-trybot
mailing list