[Intel-gfx] [PATCH igt] lib: Add a GPU error detector

Chris Wilson chris at chris-wilson.co.uk
Tue Mar 22 11:48:23 UTC 2016


If we listen to the uevents from the kernel, we can detect when the GPU
hangs. This requires us to fork a helper process to do so and send a
signal back to the parent.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 benchmarks/Makefile.am   |  2 +-
 debugger/Makefile.am     |  2 +-
 demos/Makefile.am        |  2 +-
 lib/Makefile.am          | 12 +++++--
 lib/igt_aux.c            | 82 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/igt_aux.h            |  3 ++
 tests/Makefile.am        |  3 +-
 tests/gem_exec_whisper.c |  4 +++
 tools/Makefile.am        |  2 +-
 9 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
index c67f472..2c2d100 100644
--- a/benchmarks/Makefile.am
+++ b/benchmarks/Makefile.am
@@ -3,7 +3,7 @@ include Makefile.sources
 
 AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
 AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) $(LIBUNWIND_CFLAGS)
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm
+LDADD = $(top_builddir)/lib/libintel_tools.la
 
 benchmarks_LTLIBRARIES = gem_exec_tracer.la
 gem_exec_tracer_la_LDFLAGS = -module -avoid-version -no-undefined
diff --git a/debugger/Makefile.am b/debugger/Makefile.am
index 5a523f5..9d231d3 100644
--- a/debugger/Makefile.am
+++ b/debugger/Makefile.am
@@ -15,4 +15,4 @@ AM_CFLAGS = 			\
 	$(LIBUNWIND_CFLAGS)	\
 	$(CWARNFLAGS)
 
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS)
+LDADD = $(top_builddir)/lib/libintel_tools.la
diff --git a/demos/Makefile.am b/demos/Makefile.am
index d18a705..e6fbb3b 100644
--- a/demos/Makefile.am
+++ b/demos/Makefile.am
@@ -4,4 +4,4 @@ bin_PROGRAMS = 				\
 
 AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
 AM_CFLAGS = $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) $(LIBUNWIND_CFLAGS)
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS)
+LDADD = $(top_builddir)/lib/libintel_tools.la
diff --git a/lib/Makefile.am b/lib/Makefile.am
index a8a1eb6..d2f2e16 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -15,12 +15,20 @@ if HAVE_VC4
 endif
 
 AM_CPPFLAGS = -I$(top_srcdir)
-AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(LIBUNWIND_CFLAGS) $(DEBUG_CFLAGS) \
+AM_CFLAGS = $(CWARNFLAGS) $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(LIBUNWIND_CFLAGS) $(DEBUG_CFLAGS) \
 	    -DIGT_SRCDIR=\""$(abs_top_srcdir)/tests"\" \
 	    -DIGT_DATADIR=\""$(pkgdatadir)"\" \
 	    -DIGT_LOG_DOMAIN=\""$(subst _,-,$*)"\" \
 	    -pthread
 
-LDADD = $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm
 AM_CFLAGS += $(CAIRO_CFLAGS)
 
+libintel_tools_la_LIBADD = \
+	$(DRM_LIBS) \
+	$(PCIACCESS_LIBS) \
+	$(CAIRO_LIBS) \
+	$(LIBUDEV_LIBS) \
+	$(LIBUNWIND_LIBS) \
+	$(TIMER_LIBS) \
+	-lm
+
diff --git a/lib/igt_aux.c b/lib/igt_aux.c
index 7deaf2f..d8f72fb 100644
--- a/lib/igt_aux.c
+++ b/lib/igt_aux.c
@@ -42,6 +42,7 @@
 #include <stdlib.h>
 #include <time.h>
 #include <unistd.h>
+#include <sys/poll.h>
 #include <sys/wait.h>
 #include <sys/time.h>
 #include <sys/types.h>
@@ -360,6 +361,87 @@ void igt_stop_signal_helper(void)
 	sig_stat = 0;
 }
 
+#if HAVE_UDEV
+#include <libudev.h>
+
+static struct igt_helper_process hang_detector;
+static void __attribute__((noreturn))
+hang_detector_process(pid_t pid, dev_t rdev)
+{
+	struct udev_monitor *mon =
+		udev_monitor_new_from_netlink(udev_new(), "kernel");
+	struct pollfd pfd;
+
+	udev_monitor_filter_add_match_subsystem_devtype(mon, "drm", NULL);
+	udev_monitor_enable_receiving(mon);
+
+	pfd.fd = udev_monitor_get_fd(mon);
+	pfd.events = POLLIN;
+
+	while (poll(&pfd, 1, -1) > 0) {
+		struct udev_device *dev = udev_monitor_receive_device(mon);
+		dev_t devnum;
+
+		if (dev == NULL)
+			break;
+
+		devnum = udev_device_get_devnum(dev);
+		if (memcmp(&rdev, &devnum, sizeof(dev_t)) == 0) {
+			const char *str;
+
+			str = udev_device_get_property_value(dev, "ERROR");
+			if (str && atoi(str) == 1)
+				kill(pid, SIGRTMAX);
+		}
+
+		udev_device_unref(dev);
+		if (kill(pid, 0)) /* Parent has died, so must we. */
+			break;
+	}
+
+	exit(0);
+}
+
+static void sig_abort(int sig)
+{
+	igt_assert(!"GPU hung");
+}
+
+void igt_fork_hang_detector(int fd)
+{
+	struct stat st;
+
+	if (igt_only_list_subtests())
+		return;
+
+	igt_assert(fstat(fd, &st) == 0);
+
+	signal(SIGRTMAX, sig_abort);
+	igt_fork_helper(&hang_detector)
+		hang_detector_process(getppid(), st.st_rdev);
+}
+
+void igt_stop_hang_detector(void)
+{
+	if (igt_only_list_subtests())
+		return;
+
+	igt_stop_helper(&hang_detector);
+}
+#else
+void igt_fork_hang_detector(int fd)
+{
+	if (igt_only_list_subtests())
+		return;
+
+	igt_skip();
+}
+
+void igt_stop_hang_detector(void)
+{
+}
+#endif
+
 /**
  * igt_check_boolean_env_var:
  * @env_var: environment variable name
diff --git a/lib/igt_aux.h b/lib/igt_aux.h
index 9fade67..eee80ca 100644
--- a/lib/igt_aux.h
+++ b/lib/igt_aux.h
@@ -40,6 +40,9 @@ extern int num_trash_bos;
 void igt_fork_signal_helper(void);
 void igt_stop_signal_helper(void);
 
+void igt_fork_hang_detector(int fd);
+void igt_stop_hang_detector(void);
+
 struct igt_sigiter {
 	unsigned pass;
 };
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 839b37d..24d374a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -56,9 +56,8 @@ AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(DEBUG_CFLAGS)\
 	$(LIBUNWIND_CFLAGS) \
 	$(NULL)
 
-LDADD = ../lib/libintel_tools.la $(PCIACCESS_LIBS) $(DRM_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS)
+LDADD = ../lib/libintel_tools.la $(GLIB_LIBS)
 
-LDADD += $(CAIRO_LIBS) $(LIBUDEV_LIBS) $(GLIB_LIBS) -lm
 AM_CFLAGS += $(CAIRO_CFLAGS) $(LIBUDEV_CFLAGS) $(GLIB_CFLAGS)
 AM_LDFLAGS = -Wl,--as-needed
 
diff --git a/tests/gem_exec_whisper.c b/tests/gem_exec_whisper.c
index b84f1a2..1991fed 100644
--- a/tests/gem_exec_whisper.c
+++ b/tests/gem_exec_whisper.c
@@ -368,6 +368,8 @@ igt_main
 	igt_fixture
 		fd = drm_open_driver_master(DRIVER_INTEL);
 
+	igt_fork_hang_detector(fd);
+
 	for (const struct mode *m = modes; m->name; m++)
 		igt_subtest_f("%s", *m->name ? m->name : "basic")
 			whisper(fd, -1, m->flags);
@@ -382,6 +384,8 @@ igt_main
 				whisper(fd, e->exec_id | e->flags, m->flags);
 	}
 
+	igt_stop_hang_detector();
+
 	igt_fixture
 		close(fd);
 }
diff --git a/tools/Makefile.am b/tools/Makefile.am
index 74c5521..df48d94 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -4,7 +4,7 @@ SUBDIRS = null_state_gen registers
 
 AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
 AM_CFLAGS = $(DEBUG_CFLAGS) $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) $(LIBUNWIND_CFLAGS) -DPKGDATADIR=\"$(pkgdatadir)\"
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUDEV_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm
+LDADD = $(top_builddir)/lib/libintel_tools.la
 AM_LDFLAGS = -Wl,--as-needed
 
 
-- 
2.8.0.rc3



More information about the Intel-gfx mailing list