[Intel-gfx] [PATCH igt] lib: Add a GPU error detector
Chris Wilson
chris at chris-wilson.co.uk
Tue Mar 22 11:48:23 UTC 2016
If we listen to the uevents from the kernel, we can detect when the GPU
hangs. This requires us to fork a helper process to do so and send a
signal back to the parent.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
benchmarks/Makefile.am | 2 +-
debugger/Makefile.am | 2 +-
demos/Makefile.am | 2 +-
lib/Makefile.am | 12 +++++--
lib/igt_aux.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++
lib/igt_aux.h | 3 ++
tests/Makefile.am | 3 +-
tests/gem_exec_whisper.c | 4 +++
tools/Makefile.am | 2 +-
9 files changed, 104 insertions(+), 8 deletions(-)
diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
index c67f472..2c2d100 100644
--- a/benchmarks/Makefile.am
+++ b/benchmarks/Makefile.am
@@ -3,7 +3,7 @@ include Makefile.sources
AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) $(LIBUNWIND_CFLAGS)
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm
+LDADD = $(top_builddir)/lib/libintel_tools.la
benchmarks_LTLIBRARIES = gem_exec_tracer.la
gem_exec_tracer_la_LDFLAGS = -module -avoid-version -no-undefined
diff --git a/debugger/Makefile.am b/debugger/Makefile.am
index 5a523f5..9d231d3 100644
--- a/debugger/Makefile.am
+++ b/debugger/Makefile.am
@@ -15,4 +15,4 @@ AM_CFLAGS = \
$(LIBUNWIND_CFLAGS) \
$(CWARNFLAGS)
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS)
+LDADD = $(top_builddir)/lib/libintel_tools.la
diff --git a/demos/Makefile.am b/demos/Makefile.am
index d18a705..e6fbb3b 100644
--- a/demos/Makefile.am
+++ b/demos/Makefile.am
@@ -4,4 +4,4 @@ bin_PROGRAMS = \
AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
AM_CFLAGS = $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) $(LIBUNWIND_CFLAGS)
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS)
+LDADD = $(top_builddir)/lib/libintel_tools.la
diff --git a/lib/Makefile.am b/lib/Makefile.am
index a8a1eb6..d2f2e16 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -15,12 +15,20 @@ if HAVE_VC4
endif
AM_CPPFLAGS = -I$(top_srcdir)
-AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(LIBUNWIND_CFLAGS) $(DEBUG_CFLAGS) \
+AM_CFLAGS = $(CWARNFLAGS) $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(LIBUNWIND_CFLAGS) $(DEBUG_CFLAGS) \
-DIGT_SRCDIR=\""$(abs_top_srcdir)/tests"\" \
-DIGT_DATADIR=\""$(pkgdatadir)"\" \
-DIGT_LOG_DOMAIN=\""$(subst _,-,$*)"\" \
-pthread
-LDADD = $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm
AM_CFLAGS += $(CAIRO_CFLAGS)
+libintel_tools_la_LIBADD = \
+ $(DRM_LIBS) \
+ $(PCIACCESS_LIBS) \
+ $(CAIRO_LIBS) \
+ $(LIBUDEV_LIBS) \
+ $(LIBUNWIND_LIBS) \
+ $(TIMER_LIBS) \
+ -lm
+
diff --git a/lib/igt_aux.c b/lib/igt_aux.c
index 7deaf2f..d8f72fb 100644
--- a/lib/igt_aux.c
+++ b/lib/igt_aux.c
@@ -42,6 +42,7 @@
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
+#include <sys/poll.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/types.h>
@@ -360,6 +361,87 @@ void igt_stop_signal_helper(void)
sig_stat = 0;
}
+#if HAVE_UDEV
+#include <libudev.h>
+
+static struct igt_helper_process hang_detector;
+static void __attribute__((noreturn))
+hang_detector_process(pid_t pid, dev_t rdev)
+{
+ struct udev_monitor *mon =
+ udev_monitor_new_from_netlink(udev_new(), "kernel");
+ struct pollfd pfd;
+
+ udev_monitor_filter_add_match_subsystem_devtype(mon, "drm", NULL);
+ udev_monitor_enable_receiving(mon);
+
+ pfd.fd = udev_monitor_get_fd(mon);
+ pfd.events = POLLIN;
+
+ while (poll(&pfd, 1, -1) > 0) {
+ struct udev_device *dev = udev_monitor_receive_device(mon);
+ dev_t devnum;
+
+ if (dev == NULL)
+ break;
+
+ devnum = udev_device_get_devnum(dev);
+ if (memcmp(&rdev, &devnum, sizeof(dev_t)) == 0) {
+ const char *str;
+
+ str = udev_device_get_property_value(dev, "ERROR");
+ if (str && atoi(str) == 1)
+ kill(pid, SIGRTMAX);
+ }
+
+ udev_device_unref(dev);
+ if (kill(pid, 0)) /* Parent has died, so must we. */
+ break;
+ }
+
+ exit(0);
+}
+
+static void sig_abort(int sig)
+{
+ igt_assert(!"GPU hung");
+}
+
+void igt_fork_hang_detector(int fd)
+{
+ struct stat st;
+
+ if (igt_only_list_subtests())
+ return;
+
+ igt_assert(fstat(fd, &st) == 0);
+
+ signal(SIGRTMAX, sig_abort);
+ igt_fork_helper(&hang_detector)
+ hang_detector_process(getppid(), st.st_rdev);
+}
+
+void igt_stop_hang_detector(void)
+{
+ if (igt_only_list_subtests())
+ return;
+
+ igt_stop_helper(&hang_detector);
+}
+#else
+void igt_fork_hang_detector(int fd)
+{
+ if (igt_only_list_subtests())
+ return;
+
+ igt_skip();
+}
+
+void igt_stop_hang_detector(void)
+{
+}
+#endif
+
/**
* igt_check_boolean_env_var:
* @env_var: environment variable name
diff --git a/lib/igt_aux.h b/lib/igt_aux.h
index 9fade67..eee80ca 100644
--- a/lib/igt_aux.h
+++ b/lib/igt_aux.h
@@ -40,6 +40,9 @@ extern int num_trash_bos;
void igt_fork_signal_helper(void);
void igt_stop_signal_helper(void);
+void igt_fork_hang_detector(int fd);
+void igt_stop_hang_detector(void);
+
struct igt_sigiter {
unsigned pass;
};
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 839b37d..24d374a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -56,9 +56,8 @@ AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(DEBUG_CFLAGS)\
$(LIBUNWIND_CFLAGS) \
$(NULL)
-LDADD = ../lib/libintel_tools.la $(PCIACCESS_LIBS) $(DRM_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS)
+LDADD = ../lib/libintel_tools.la $(GLIB_LIBS)
-LDADD += $(CAIRO_LIBS) $(LIBUDEV_LIBS) $(GLIB_LIBS) -lm
AM_CFLAGS += $(CAIRO_CFLAGS) $(LIBUDEV_CFLAGS) $(GLIB_CFLAGS)
AM_LDFLAGS = -Wl,--as-needed
diff --git a/tests/gem_exec_whisper.c b/tests/gem_exec_whisper.c
index b84f1a2..1991fed 100644
--- a/tests/gem_exec_whisper.c
+++ b/tests/gem_exec_whisper.c
@@ -368,6 +368,8 @@ igt_main
igt_fixture
fd = drm_open_driver_master(DRIVER_INTEL);
+ igt_fork_hang_detector(fd);
+
for (const struct mode *m = modes; m->name; m++)
igt_subtest_f("%s", *m->name ? m->name : "basic")
whisper(fd, -1, m->flags);
@@ -382,6 +384,8 @@ igt_main
whisper(fd, e->exec_id | e->flags, m->flags);
}
+ igt_stop_hang_detector();
+
igt_fixture
close(fd);
}
diff --git a/tools/Makefile.am b/tools/Makefile.am
index 74c5521..df48d94 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -4,7 +4,7 @@ SUBDIRS = null_state_gen registers
AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
AM_CFLAGS = $(DEBUG_CFLAGS) $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) $(LIBUNWIND_CFLAGS) -DPKGDATADIR=\"$(pkgdatadir)\"
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUDEV_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm
+LDADD = $(top_builddir)/lib/libintel_tools.la
AM_LDFLAGS = -Wl,--as-needed
--
2.8.0.rc3
More information about the Intel-gfx
mailing list