[PATCH i-g-t v2] runner: Use 'abort' as result if a test caused an abort

Petri Latvala petri.latvala at intel.com
Fri Jan 20 06:06:43 UTC 2023


... when possible, which is when using socket comms.

v2: Handle abort conditions between dynamic subtests

Signed-off-by: Petri Latvala <petri.latvala at intel.com>
Cc: Arkadiusz Hiler <arek at hiler.eu>
Cc: Kamil Konieczny <kamil.konieczny at linux.intel.com>
---
 runner/executor.c  | 100 +++++++++++++++++++++++++++++++++++++++------
 runner/resultgen.c |   6 +++
 2 files changed, 93 insertions(+), 13 deletions(-)

diff --git a/runner/executor.c b/runner/executor.c
index 9d3623b4..100a50f0 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -858,7 +858,8 @@ static int monitor_output(pid_t child,
 			  int *outputs,
 			  double *time_spent,
 			  struct settings *settings,
-			  char **abortreason)
+			  char **abortreason,
+			  bool *abort_already_written)
 {
 	fd_set set;
 	char *buf;
@@ -878,6 +879,7 @@ static int monitor_output(pid_t child,
 	bool aborting = false;
 	size_t disk_usage = 0;
 	bool socket_comms_used = false; /* whether the test actually uses comms */
+	bool results_received = false; /* whether we already have test results that might need overriding if we detect an abort condition */
 
 	igt_gettime(&time_beg);
 	time_last_activity = time_last_subtest = time_killed = time_beg;
@@ -1101,8 +1103,6 @@ static int monitor_output(pid_t child,
 					goto socket_end;
 				}
 
-				write_packet_with_canary(outputs[_F_SOCKET], packet, settings->sync);
-
 				/*
 				 * runner sends EXEC itself before executing
 				 * the test, other types indicate the test
@@ -1115,10 +1115,44 @@ static int monitor_output(pid_t child,
 				    packet->type == PACKETTYPE_DYNAMIC_SUBTEST_START) {
 					time_last_subtest = time_now;
 					disk_usage = 0;
+
+					if (results_received && !aborting) {
+						/*
+						 * We already have
+						 * results for a
+						 * dynamic subtest or
+						 * a subtest. Before
+						 * writing to disk
+						 * that the next one
+						 * starts, check
+						 * whether it caused
+						 * an abort condition.
+						 */
+						*abortreason = need_to_abort(settings);
+						if (*abortreason) {
+							write_packet_with_canary(outputs[_F_SOCKET],
+										 runnerpacket_log(STDOUT_FILENO, "\nThis test caused an abort condition: "),
+										 false);
+							write_packet_with_canary(outputs[_F_SOCKET],
+										 runnerpacket_log(STDOUT_FILENO, *abortreason),
+										 false);
+							write_packet_with_canary(outputs[_F_SOCKET],
+										 runnerpacket_resultoverride("abort"),
+										 settings->sync);
+
+							aborting = true;
+							*abort_already_written = true;
+						}
+					}
 				}
 
+				write_packet_with_canary(outputs[_F_SOCKET], packet, settings->sync);
 				disk_usage += packet->size;
 
+				if (packet->type == PACKETTYPE_SUBTEST_RESULT ||
+				    packet->type == PACKETTYPE_DYNAMIC_SUBTEST_RESULT)
+					results_received = true;
+
 				if (settings->log_level >= LOG_LEVEL_VERBOSE) {
 					runnerpacket_read_helper helper = {};
 					const char *time;
@@ -1588,7 +1622,8 @@ static int execute_next_entry(struct execute_state *state,
 			      struct job_list_entry *entry,
 			      int testdirfd, int resdirfd,
 			      int sigfd, sigset_t *sigmask,
-			      char **abortreason)
+			      char **abortreason,
+			      bool *abort_already_written)
 {
 	int dirfd;
 	int outputs[_F_LAST];
@@ -1704,7 +1739,7 @@ static int execute_next_entry(struct execute_state *state,
 	result = monitor_output(child, outfd, errfd, socketfd,
 				kmsgfd, sigfd,
 				outputs, time_spent, settings,
-				abortreason);
+				abortreason, abort_already_written);
 
 out_kmsgfd:
 	close(kmsgfd);
@@ -2202,6 +2237,31 @@ static void code_coverage_stop(struct settings *settings, const char *job_name,
 	run_as_root(argv, sigfd, abortreason);
 }
 
+/* Open the comms file if the test used socket comms */
+static int open_comms_if_valid(int resdirfd, size_t testidx)
+{
+	struct comms_visitor emptyvisitor = {};
+	char name[32];
+	int dirfd, commsfd;
+
+	snprintf(name, sizeof(name), "%zd", testidx);
+	dirfd = openat(resdirfd, name, O_DIRECTORY | O_RDONLY);
+	if (dirfd < 0)
+		return -1;
+
+	commsfd = openat(dirfd, "comms", O_RDWR);
+	close(dirfd);
+
+	if (commsfd < 0)
+		return -1;
+
+	if (comms_read_dump(commsfd, &emptyvisitor) == COMMSPARSE_SUCCESS)
+		return commsfd;
+
+	close(commsfd);
+	return -1;
+}
+
 bool execute(struct execute_state *state,
 	     struct settings *settings,
 	     struct job_list *job_list)
@@ -2342,6 +2402,7 @@ bool execute(struct execute_state *state,
 		char *reason = NULL;
 		char *job_name;
 		int result;
+		bool already_written = false;
 
 		if (should_die_because_signal(sigfd)) {
 			status = false;
@@ -2355,13 +2416,13 @@ bool execute(struct execute_state *state,
 
 		if (reason == NULL) {
 			result = execute_next_entry(state,
-						job_list->size,
-						&time_spent,
-						settings,
-						&job_list->entries[state->next],
-						testdirfd, resdirfd,
-						sigfd, &sigmask,
-						&reason);
+						    job_list->size,
+						    &time_spent,
+						    settings,
+						    &job_list->entries[state->next],
+						    testdirfd, resdirfd,
+						    sigfd, &sigmask,
+						    &reason, &already_written);
 
 			if (settings->cov_results_per_test) {
 				code_coverage_stop(settings, job_name, sigfd, &reason);
@@ -2374,7 +2435,20 @@ bool execute(struct execute_state *state,
 			char *next = (state->next + 1 < job_list->size ?
 				      entry_display_name(&job_list->entries[state->next + 1]) :
 				      strdup("nothing"));
-			write_abort_file(resdirfd, reason, prev, next);
+			int commsfd;
+
+			commsfd = open_comms_if_valid(resdirfd, state->next);
+			if (commsfd >= 0 && !already_written) {
+				lseek(commsfd, 0, SEEK_END);
+				write_packet_with_canary(commsfd, runnerpacket_log(STDOUT_FILENO, "\nThis test caused an abort condition: "), false);
+				write_packet_with_canary(commsfd, runnerpacket_log(STDOUT_FILENO, reason), false);
+				write_packet_with_canary(commsfd, runnerpacket_resultoverride("abort"), settings->sync);
+
+				close(commsfd);
+			} else {
+				write_abort_file(resdirfd, reason, prev, next);
+			}
+
 			free(prev);
 			free(next);
 			free(reason);
diff --git a/runner/resultgen.c b/runner/resultgen.c
index 596de786..b00bb6ba 100644
--- a/runner/resultgen.c
+++ b/runner/resultgen.c
@@ -1540,6 +1540,9 @@ static bool comms_handle_subtest_start(const struct runnerpacket *packet,
 		/* Subtest starting message is not in logs with socket comms, inject it manually */
 		comms_inject_subtest_start_log(context, STARTING_SUBTEST, helper.subteststart.name);
 
+		free(context->subtestresult);
+		context->subtestresult = NULL;
+
 		break;
 	default:
 		assert(false); /* unreachable */
@@ -1669,6 +1672,9 @@ static bool comms_handle_dynamic_subtest_start(const struct runnerpacket *packet
 		/* Dynamic subtest starting message is not in logs with socket comms, inject it manually */
 		comms_inject_subtest_start_log(context, STARTING_DYNAMIC_SUBTEST, helper.dynamicsubteststart.name);
 
+		free(context->dynamicsubtestresult);
+		context->dynamicsubtestresult = NULL;
+
 		break;
 	default:
 		assert(false); /* unreachable */
-- 
2.30.2



More information about the Intel-gfx-trybot mailing list