[PATCH i-g-t v4] runner/executor: Abort if dmesg is flooded

Kamil Konieczny kamil.konieczny at linux.intel.com
Tue Aug 26 12:41:54 UTC 2025


Hi Krzysztof,
On 2025-07-24 at 11:23:05 +0000, Krzysztof Karas wrote:
> Hi Kamil,
> 
> I posted some comments below.
> 
> [...]
> > Create a way to abort test in such scenarios with the help of
> > measureing kernel kmsg activity before first test is executed
> measureing -> measuring
> 

Fixed.

> [...]
> > +++ b/runner/executor.c
> > +static int open_kmsg_rdonly(void)
> > +{
> > +	return open("/dev/kmsg", O_RDONLY | O_CLOEXEC | O_NONBLOCK);
> > +}
> > +
> >  /* Returns the number of bytes written to disk, or a negative number on error */
> >  static long dump_dmesg(int kmsgfd, int outfd, ssize_t size)
> >  {
> > @@ -734,6 +739,95 @@ static long dump_dmesg(int kmsgfd, int outfd, ssize_t size)
> >  	}
> >  }
> >  
> > +/*
> > + * Measure how many bytes appears in dmesg.
> > + * This is the same as above dump_dmesg() but with reading only.
> > + */
> > +static unsigned long long read_dmesg2(int kmsgfd, unsigned long long size, int wrfd)
> > +{
> > +	bool underflow_once = false;
> > +	int comparefd;
> > +	char buf[2048];
> > +	unsigned int flags;
> > +	unsigned long long readed = 0;
> readed -> read, as this is irregular verb.
> 

There is read() system function so no easy name here.
I changed it into dmsg_read

> > +	unsigned long long seq, cmpseq, usec;
> > +	char cont;
> > +	ssize_t r;
> > +
> > +	if (kmsgfd < 0)
> > +		return 0;
> > +
> > +	if (wrfd < 0)
> > +		wrfd = 0;
> > +
> > +	comparefd = open_kmsg_rdonly();
> > +	if (comparefd < 0) {
> > +		errf("Error opening another fd for /dev/kmsg\n");
> > +		return 0;
> > +	}
> > +
> > +	lseek(comparefd, 0, SEEK_END);
> > +
> > +	while (1) {
> > +		if (comparefd >= 0) {
> > +			r = read(comparefd, buf, sizeof(buf) - 1);
> > +			if (r < 0) {
> > +				if (errno != EAGAIN && errno != EPIPE) {
> > +					errf("Warning: Error reading kmsg comparison record: %m\n");
> > +					close(comparefd);
> > +					return readed;
> > +				}
> > +			} else {
> > +				buf[r] = '\0';
> > +				if (sscanf(buf, "%u,%llu,%llu,%c;",
> > +					   &flags, &cmpseq, &usec, &cont) == 4) {
> > +					close(comparefd);
> > +					comparefd = -1;
> > +				}
> What would happen if the first call to
> r = read(comparefd, buf, sizeof(buf) - 1);
> failed and then
> r = read(kmsgfd, buf, sizeof(buf));
> would succeed? Would we be counting the comparefd bytes after we
> already got some bytes from kmsgfd? Or is the comparison
> operation optional?

Please note that while(1) loop is entered with comparefd >= 0
and is set to -1 only when read from it succeeded. Also,
comparefd reads are not counted here as this is only a once
time read of one dmesg line.

> > +			}
> > +		}
> > +
> > +		r = read(kmsgfd, buf, sizeof(buf));
> > +		if (r < 0) {
> > +			if (errno == EPIPE) {
> > +				if (!underflow_once) {
> > +					errf("Warning: kernel log ringbuffer underflow, some records lost.\n");
> > +					underflow_once = true;
> > +				}
> > +				continue;
> > +			} else if (errno == EINVAL) {
> > +				errf("Warning: Buffer too small for kernel log record, record lost.\n");
> > +				continue;
> > +			} else if (errno != EAGAIN) {
> > +				errf("Error reading from kmsg: %m\n");
> > +				return 0;
> > +			}
> > +
> > +			/* EAGAIN, so we're done reading */
> > +			close(comparefd);
> > +
> > +			return readed;
> > +		}
> > +
> > +		readed += r;
> > +		if (wrfd && r)
> > +			write(wrfd, buf, r);
> > +
> > +		if (comparefd < 0 && sscanf(buf, "%u,%llu,%llu,%c;",
> > +					    &flags, &seq, &usec, &cont) == 4) {
> > +			if (seq >= cmpseq)
> I wonder if seq can be a lower value than cmpseq - should we
> care if that happens? If not, then maybe move the "seq >= cmpseq"
> check into the if before, to reduce indentation a bit.
> 

Good question, imho it could happen in systems running for
very long time without reboot, then counter could overflow
and start from zero. Imho this will not happen in out CI
runs.

> > +				return readed;
> > +		}
> > +
> > +		if (size && readed >= size) {
> > +			if (comparefd >= 0)
> > +				close(comparefd);
> > +
> > +			return readed;
> > +		}
> > +	} /* while(1) */
> > +}
> > +
> >  static bool kill_child(int sig, pid_t child)
> >  {
> >  	/*
> > @@ -948,6 +1042,78 @@ static size_t calc_last_dmesg_chunk(size_t limit, size_t disk_usage)
> >  	return dt != 0 ? dt : -1;
> >  }
> >  
> > +/*
> > + * Returns:
> > + *  =>0.0 - Success, measured kmsg activity in bytes/second
> > + *  -1.0 - Failure
> > + */
> > +static double measure_dmesg_bytes_per_sec(int wrfd)
> > +{
> > +	struct timespec time_beg, time_now, nsec_sleep;
> > +	unsigned long long readed, rnow;
> > +	double time, ratio;
> > +	int kmsgfd, cnt, zero;
> > +
> > +	if ((kmsgfd = open_kmsg_rdonly()) < 0) {
> > +		errf("Warning: Cannot open /dev/kmsg\n");
> > +
> > +		return -1.0;
> > +	}
> > +
> > +	if (wrfd < 0)
> > +		wrfd = 0;
> > +
> > +	lseek(kmsgfd, 0, SEEK_END);
> > +	nsec_sleep.tv_sec = 0;
> > +	nsec_sleep.tv_nsec = 10ULL * 1000ULL * 1000ULL; /* 10^7 nanoseconds = 10^-2 sec */
> > +	runner_gettime(&time_beg);
> > +	for (cnt = 0, zero = 0, time = 0.0, readed = 0.0; cnt < 11 && time < 0.01; ++cnt) {
> > +		if (zero)
> If "zero" only serves as a flag then I think below sleep could
> be moved to the end of this for loop, because from execution
> standpoint this will never trigger during the first pass

Right, I rewritten this loop according to your suggestions,
less vars, easier flow (I hope). See v5.

> > +			nanosleep(&nsec_sleep, NULL);
> > +
> > +		rnow = read_dmesg2(kmsgfd, 128 * 1024, wrfd); /* 64KB max */
> > +		readed += rnow;
> > +		runner_gettime(&time_now);
> > +		time = igt_time_elapsed(&time_beg, &time_now);
> > +		if (time <= 0.0) {
> > +			errf("Warning: Time underflow\n");
> > +
> > +			return -1.0;
> > +		}
> > +
> > +		if (rnow == 0)
> Move the nanosleep here and perhaps remove "zero" variable
> completely. Then the code here would only contain a single if:
> 	if (rnow == 0)
> 		nanosleep(&nsec_sleep, NULL);
> 

Done.

> > +			++zero;
> > +		else
> > +			zero = 0;
> > +	}
> > +
> > +	runner_gettime(&time_now);
> > +	time = igt_time_elapsed(&time_beg, &time_now);
> > +	if (time <= 0.0) {
> > +		errf("Warning: Time underflow\n");
> > +
> You could skip this newline here.
> 

Done.

> > +		return -1.0;
> > +	}
> > +
> > +	ratio = (double)readed / time;
> > +
> > +	return ratio;
> You could return (double)readed / time; and remove ratio
> variable.
> 

Done, thank you for review.

Regards,
Kamil

> Best Regards,
> Krzysztof


More information about the igt-dev mailing list