[PATCH i-g-t v2 2/4] runner/executor: check disk limit at dumping kmsg

Kamil Konieczny kamil.konieczny at linux.intel.com
Mon Jul 15 17:31:06 UTC 2024


It was reported that kernel dumps can grow beyond disk limit size
so add checks for it and report error if that happen.

v2: return number of written bytes (Petri)
v3: monitor dmesg size from zero to catch flooding (Kamil)

Reported-by: Karol Krol <karol.krol at intel.com>
Link: https://gitlab.freedesktop.org/drm/igt-gpu-tools/-/issues/129
Cc: Petri Latvala <adrinael at adrinael.net>
Cc: Arkadiusz Hiler <arkadiusz.hiler at intel.com>
Cc: Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>
Cc: Andrzej Hajda <andrzej.hajda at intel.com>
Signed-off-by: Kamil Konieczny <kamil.konieczny at linux.intel.com>
---
 runner/executor.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/runner/executor.c b/runner/executor.c
index 4b374d223..ed2621383 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -584,7 +584,7 @@ void close_outputs(int *fds)
 }
 
 /* Returns the number of bytes written to disk, or a negative number on error */
-static long dump_dmesg(int kmsgfd, int outfd)
+static long dump_dmesg(int kmsgfd, int outfd, size_t disk_limit)
 {
 	/*
 	 * Write kernel messages to the log file until we reach
@@ -605,6 +605,10 @@ static long dump_dmesg(int kmsgfd, int outfd)
 	if (kmsgfd < 0)
 		return 0;
 
+	/*
+	 * Dynamic subtests restarts our size limiting checks so here there are
+	 * no checks for already written bytes.
+	 */
 	comparefd = open("/dev/kmsg", O_RDONLY | O_NONBLOCK);
 	if (comparefd < 0) {
 		errf("Error opening another fd for /dev/kmsg\n");
@@ -656,6 +660,12 @@ static long dump_dmesg(int kmsgfd, int outfd)
 		write(outfd, buf, r);
 		written += r;
 
+		if (disk_limit && written > disk_limit) {
+			close(comparefd);
+			errf("Error kmsg flood: disk limit exceeded %ld, written %ld\n", disk_limit, r);
+			return -1;
+		}
+
 		if (comparefd < 0 && sscanf(buf, "%u,%llu,%llu,%c;",
 					    &flags, &seq, &usec, &cont) == 4) {
 			/*
@@ -891,6 +901,7 @@ static int monitor_output(pid_t child,
 	unsigned long taints = 0;
 	bool aborting = false;
 	size_t disk_usage = 0;
+	const size_t mon_disk_limit = settings->disk_usage_limit;
 	bool socket_comms_used = false; /* whether the test actually uses comms */
 	bool results_received = false; /* whether we already have test results that might need overriding if we detect an abort condition */
 
@@ -1220,7 +1231,7 @@ static int monitor_output(pid_t child,
 
 			time_last_activity = time_now;
 
-			dmesgwritten = dump_dmesg(kmsgfd, outputs[_F_DMESG]);
+			dmesgwritten = dump_dmesg(kmsgfd, outputs[_F_DMESG], mon_disk_limit);
 			if (settings->sync)
 				fdatasync(outputs[_F_DMESG]);
 
@@ -1458,7 +1469,7 @@ static int monitor_output(pid_t child,
 					asprintf(abortreason, "Child refuses to die, tainted 0x%lx.", taints);
 				}
 
-				dump_dmesg(kmsgfd, outputs[_F_DMESG]);
+				dump_dmesg(kmsgfd, outputs[_F_DMESG], mon_disk_limit);
 				if (settings->sync)
 					fdatasync(outputs[_F_DMESG]);
 
@@ -1484,7 +1495,7 @@ static int monitor_output(pid_t child,
 		}
 	}
 
-	dump_dmesg(kmsgfd, outputs[_F_DMESG]);
+	dump_dmesg(kmsgfd, outputs[_F_DMESG], mon_disk_limit);
 	if (settings->sync)
 		fdatasync(outputs[_F_DMESG]);
 
-- 
2.43.0



More information about the Intel-gfx-trybot mailing list