[Intel-gfx] [PATCH i-g-t] tests/perf_pmu: Handle CPU hotplug failures better

Tvrtko Ursulin tursulin at ursulin.net
Fri Feb 23 11:34:53 UTC 2018


From: Chris Wilson <chris at chris-wilson.co.uk>

CPU hotplug, especially CPU0, can be flaky on commodity hardware.

To improve test reliability and reponse times when testing larger runs we
need to handle those cases better.

Handle failures to off-line a CPU by immediately skipping the test, and
failures to on-line a CPU by immediately rebooting the machine.

This patch includes igt_sysrq_reboot implementation from Chris Wilson.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
---
 lib/Makefile.sources |  2 ++
 lib/igt_sysrq.c      | 22 ++++++++++++++++++++++
 lib/igt_sysrq.h      | 30 ++++++++++++++++++++++++++++++
 lib/meson.build      |  2 ++
 tests/perf_pmu.c     | 42 ++++++++++++++++++++++++++++++++++--------
 5 files changed, 90 insertions(+), 8 deletions(-)
 create mode 100644 lib/igt_sysrq.c
 create mode 100644 lib/igt_sysrq.h

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 5b13ef8896c0..3d37ef1d1984 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -35,6 +35,8 @@ lib_source_list =	 	\
 	igt_stats.h		\
 	igt_sysfs.c		\
 	igt_sysfs.h		\
+	igt_sysrq.c		\
+	igt_sysrq.h		\
 	igt_x86.h		\
 	igt_x86.c		\
 	igt_vgem.c		\
diff --git a/lib/igt_sysrq.c b/lib/igt_sysrq.c
new file mode 100644
index 000000000000..fe3d2e344ff1
--- /dev/null
+++ b/lib/igt_sysrq.c
@@ -0,0 +1,22 @@
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/reboot.h>
+
+#include "igt_core.h"
+
+#include "igt_sysrq.h"
+
+void igt_sysrq_reboot(void)
+{
+	sync();
+
+	/* Try to be nice at first, and if that fails pull the trigger */
+	if (reboot(RB_AUTOBOOT)) {
+		int fd = open("/proc/sysrq-trigger", O_WRONLY);
+		igt_ignore_warn(write(fd, "b", 2));
+		close(fd);
+	}
+
+	abort();
+}
diff --git a/lib/igt_sysrq.h b/lib/igt_sysrq.h
new file mode 100644
index 000000000000..422473d2a480
--- /dev/null
+++ b/lib/igt_sysrq.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __IGT_SYSRQ_H__
+#define __IGT_SYSRQ_H__
+
+void igt_sysrq_reboot(void) __attribute__((noreturn));
+
+#endif /* __IGT_SYSRQ_H__ */
diff --git a/lib/meson.build b/lib/meson.build
index 8f14f6320ecf..63afd3ddb535 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -22,6 +22,7 @@ lib_headers = [
 	'igt_stats.h',
 	'igt_syncobj.h',
 	'igt_sysfs.h',
+	'igt_sysrq.h',
 	'igt_x86.h',
 	'igt_vgem.h',
 	'instdone.h',
@@ -69,6 +70,7 @@ lib_sources = [
 	'igt_stats.c',
 	'igt_syncobj.c',
 	'igt_sysfs.c',
+	'igt_sysrq.c',
 	'igt_vgem.c',
 	'igt_x86.c',
 	'instdone.c',
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 3bbb18d2f216..658d0976137f 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -41,6 +41,7 @@
 #include "igt_core.h"
 #include "igt_perf.h"
 #include "igt_sysfs.h"
+#include "igt_sysrq.h"
 #include "igt_pm.h"
 #include "sw_sync.h"
 
@@ -965,6 +966,7 @@ static void cpu_hotplug(int gem_fd)
 	int link[2];
 	int fd, ret;
 	int cur = 0;
+	char buf;
 
 	igt_skip_on(IS_BROXTON(intel_get_drm_devid(gem_fd)));
 	igt_require(cpu0_hotplug_support());
@@ -994,7 +996,7 @@ static void cpu_hotplug(int gem_fd)
 
 		for (;;) {
 			char name[128];
-			int cpufd;
+			int cpufd, ret;
 
 			igt_assert_lt(snprintf(name, sizeof(name),
 					       "/sys/devices/system/cpu/cpu%d/online",
@@ -1011,9 +1013,33 @@ static void cpu_hotplug(int gem_fd)
 			}
 
 			/* Offline followed by online a CPU. */
-			igt_assert_eq(write(cpufd, "0", 2), 2);
+
+			ret = write(cpufd, "0", 2);
+			if (ret < 0) {
+				/*
+				 * If we failed to offline a CPU we don't want
+				 * to proceed.
+				 */
+				igt_warn("Failed to offline cpu%u! (%d)\n",
+					 cpu, errno);
+				igt_assert_eq(write(link[1], "s", 1), 1);
+				break;
+			}
+
 			usleep(1e6);
-			igt_assert_eq(write(cpufd, "1", 2), 2);
+
+			ret = write(cpufd, "1", 2);
+			if (ret < 0) {
+				/*
+				 * Failed to bring a CPU back online is fatal
+				 * for the sanity of a test run so reboot
+				 * immediately.
+				 */
+				igt_warn("Failed to online cpu%u! (%d)\n",
+					 cpu, errno);
+				igt_sysrq_reboot();
+				igt_assert(0);
+			}
 
 			close(cpufd);
 			cpu++;
@@ -1027,15 +1053,12 @@ static void cpu_hotplug(int gem_fd)
 	 * until the CPU core shuffler finishes one loop.
 	 */
 	for (;;) {
-		char buf;
-		int ret2;
-
 		usleep(500e3);
 		end_spin(gem_fd, spin[cur], 0);
 
 		/* Check if the child is signaling completion. */
-		ret2 = read(link[0], &buf, 1);
-		if ( ret2 == 1 || (ret2 < 0 && errno != EAGAIN))
+		ret = read(link[0], &buf, 1);
+		if ( ret == 1 || (ret < 0 && errno != EAGAIN))
 			break;
 
 		igt_spin_batch_free(gem_fd, spin[cur]);
@@ -1054,6 +1077,9 @@ static void cpu_hotplug(int gem_fd)
 	close(fd);
 	close(link[0]);
 
+	/* Skip if child signals a problem with offlining a CPU. */
+	igt_skip_on(buf == 's');
+
 	assert_within_epsilon(val, ts[1] - ts[0], tolerance);
 }
 
-- 
2.14.1



More information about the Intel-gfx mailing list