[Mesa-dev] [PATCH v2 2/3] i965: Add INTEL_DEBUG=hang

Chris Wilson chris at chris-wilson.co.uk
Sat Feb 16 12:41:30 UTC 2019


Introduce a new debug option to wilfully cause the GPU to hang and for
the kernel to accuse of being neglectful.
---
 src/intel/Makefile.sources                    |   2 +
 src/intel/common/gen_debug.c                  |   1 +
 src/intel/common/gen_debug.h                  |   1 +
 src/intel/common/gen_hang.c                   | 176 ++++++++++++++++++
 src/intel/common/gen_hang.h                   |  51 +++++
 src/intel/common/meson.build                  |   2 +
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  14 ++
 7 files changed, 247 insertions(+)
 create mode 100644 src/intel/common/gen_hang.c
 create mode 100644 src/intel/common/gen_hang.h

diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index 94a28d370e8..9058633abfc 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -18,6 +18,8 @@ COMMON_FILES = \
 	common/gen_disasm.h \
 	common/gen_defines.h \
 	common/gen_gem.h \
+	common/gen_hang.c \
+	common/gen_hang.h \
 	common/gen_l3_config.c \
 	common/gen_l3_config.h \
 	common/gen_urb_config.c \
diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index a978f2f5818..a4dd3965e13 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -85,6 +85,7 @@ static const struct debug_control debug_control[] = {
    { "nohiz",       DEBUG_NO_HIZ },
    { "color",       DEBUG_COLOR },
    { "reemit",      DEBUG_REEMIT },
+   { "hang",        DEBUG_HANG },
    { NULL,    0 }
 };
 
diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h
index 72d7ca20a39..49a93b87ebc 100644
--- a/src/intel/common/gen_debug.h
+++ b/src/intel/common/gen_debug.h
@@ -83,6 +83,7 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_NO_HIZ              (1ull << 39)
 #define DEBUG_COLOR               (1ull << 40)
 #define DEBUG_REEMIT              (1ull << 41)
+#define DEBUG_HANG                (1ull << 42)
 
 /* These flags are not compatible with the disk shader cache */
 #define DEBUG_DISK_CACHE_DISABLE_MASK DEBUG_SHADER_TIME
diff --git a/src/intel/common/gen_hang.c b/src/intel/common/gen_hang.c
new file mode 100644
index 00000000000..5f0dd4e0640
--- /dev/null
+++ b/src/intel/common/gen_hang.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * \file gen_hang.c
+ *
+ * Support for wilfully injecting GPU hangs.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <xf86drm.h>
+
+#include "drm-uapi/drm.h"
+#include "drm-uapi/i915_drm.h"
+
+#include "gen_hang.h"
+
+static uint32_t __gem_create(int fd, uint64_t size)
+{
+   struct drm_i915_gem_create arg = { .size = size };
+   drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &arg);
+   return arg.handle;
+}
+
+static int __gem_set_caching(int fd, uint32_t handle, unsigned int caching)
+{
+   struct drm_i915_gem_caching arg = { .handle = handle, .caching = caching };
+   return drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING, &arg) ? -errno : 0;
+}
+
+static void *
+__gem_mmap(int fd, uint32_t handle, uint64_t offset, uint64_t size)
+{
+   struct drm_i915_gem_mmap arg = {
+      .handle = handle,
+      .offset = offset,
+      .size = size,
+      .addr_ptr = -1,
+   };
+   drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP, &arg);
+   return (void *)(uintptr_t)arg.addr_ptr;
+}
+
+static void __gem_close(int fd, uint32_t handle)
+{
+   drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &handle);
+}
+
+#define HANG_GENMASK 0xff
+#define HANG_ALLOW_PREEMPTION (1 << 8)
+#define HANG_IMMEDIATE (1 << 9)
+
+int
+gen_inject_hang(int fd, uint32_t ctx, unsigned int flags)
+{
+   struct drm_i915_gem_relocation_entry reloc[2] = {};
+   struct drm_i915_gem_exec_object2 obj = {
+      .handle = __gem_create(fd, 4096),
+      .relocation_count = 2,
+      .relocs_ptr = (uintptr_t)reloc,
+   };
+   struct drm_i915_gem_execbuffer2 eb = {
+      .buffers_ptr = (uintptr_t)&obj,
+      .buffer_count = 1,
+      .rsvd1 = ctx
+   };
+   const int gen = flags & HANG_GENMASK;
+   uint32_t *batch, *cs;
+   int err;
+
+   err = __gem_set_caching(fd, obj.handle, 1);
+   if (err)
+      goto out_close;
+
+   batch = __gem_mmap(fd, obj.handle, 0, 4096);
+   if (batch == MAP_FAILED) {
+      err = -errno;
+      goto out_close;
+   }
+
+   reloc[0].target_handle = obj.handle;
+   reloc[0].delta = 4096 - sizeof(*cs);
+   cs = batch;
+   *cs++ = 0x20 << 23 | (gen < 6 ? 1 << 22 : 0) | 2; /* MI_STORE_DWORD_IMM */
+   if (gen >= 8) {
+      reloc[0].offset = sizeof(*cs);
+      *cs++ = reloc[0].delta;
+      *cs++ = 0;
+   } else if (gen >= 4) {
+      reloc[0].offset = 2 * sizeof(*cs);
+      *cs++ = 0;
+      *cs++ = reloc[0].delta;
+   } else {
+      reloc[0].offset = sizeof(*cs);
+      cs[-1]--;
+      *cs++ = reloc[0].delta;
+   }
+   *cs++ = 1;
+
+   if (flags & HANG_ALLOW_PREEMPTION)
+      batch[16] = 0x5 << 23;
+
+   cs = &batch[1000];
+   reloc[1].target_handle = obj.handle;
+   reloc[1].offset = 1001 * sizeof(*cs);
+   reloc[1].read_domains = I915_GEM_DOMAIN_COMMAND;
+   reloc[1].delta = 64;
+   if (gen >= 8) {
+      *cs++ = 0x31 << 23 | 1 << 8 | 1; /* MI_BATCH_BUFFER_START */
+      *cs++ = reloc[1].delta;
+      *cs++ = 0;
+   } else if (gen >= 6) {
+      *cs++ = 0x31 << 23 | 1 << 8; /* MI_BATCH_BUFFER_START */
+      *cs++ = reloc[1].delta;
+   } else {
+      *cs++ = 0x31 << 23 | 2 << 6; /* MI_BATCH_BUFFER_START */
+      if (gen < 4)
+         reloc[1].delta |= 1;
+      *cs = reloc[1].delta;
+      cs++;
+   }
+   *cs++ = 0xa << 23; /* never reached! */
+
+   if (drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &eb)) {
+      err = -errno;
+      goto out;
+   }
+
+   if (flags & HANG_IMMEDIATE) {
+      int fd;
+
+      fd = open("/sys/kernel/debug/dri/0/i915_wedged", O_WRONLY);
+      if (fd < 0) {
+         err = -errno;
+         goto out;
+      }
+
+      while (!*(volatile uint32_t *)&batch[1023])
+         ;
+
+      if (write(fd, "-1\n", 3) < 0)
+	      err = -errno;
+
+      close(fd);
+   }
+
+out:
+   munmap(batch, 4096);
+out_close:
+   __gem_close(fd, obj.handle);
+   return err;
+}
diff --git a/src/intel/common/gen_hang.h b/src/intel/common/gen_hang.h
new file mode 100644
index 00000000000..9efc6e1950d
--- /dev/null
+++ b/src/intel/common/gen_hang.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef GEN_HANG_H
+#define GEN_HANG_H
+
+#include <stdint.h>
+#include "compiler/shader_enums.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/**
+ * \file gen_hang.h
+ *
+ * Wilful GPU hang injection.
+ */
+
+#define HANG_GENMASK 0xff
+#define HANG_ALLOW_PREEMPTION (1 << 8)
+#define HANG_IMMEDIATE (1 << 9) /* requires debugfs access (root-only!) */
+
+int gen_inject_hang(int fd, uint32_t ctx, unsigned int gen_flags);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* GEN_HANG_H */
+
diff --git a/src/intel/common/meson.build b/src/intel/common/meson.build
index 332e978b0ad..7fa1349e35f 100644
--- a/src/intel/common/meson.build
+++ b/src/intel/common/meson.build
@@ -30,6 +30,8 @@ files_libintel_common = files(
   'gen_disasm.c',
   'gen_disasm.h',
   'gen_gem.h',
+  'gen_hang.c',
+  'gen_hang.h',
   'gen_l3_config.c',
   'gen_l3_config.h',
   'gen_urb_config.c',
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 8097392d22b..1cdf6fd65f5 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -33,6 +33,7 @@
 #include "brw_state.h"
 #include "common/gen_decoder.h"
 #include "common/gen_gem.h"
+#include "common/gen_hang.h"
 
 #include "util/hash_table.h"
 
@@ -897,6 +898,19 @@ _intel_batchbuffer_flush_fence(struct brw_context *brw,
       brw_bo_wait_rendering(brw->batch.batch.bo);
    }
 
+   if (unlikely(INTEL_DEBUG & DEBUG_HANG)) {
+      static int delay = 100;
+      if (--delay < 0) {
+         struct intel_screen *screen = brw->screen;
+         const struct gen_device_info *devinfo = &screen->devinfo;
+         __DRIscreen *dri_screen = screen->driScrnPriv;
+         fprintf(stderr, "injecting GPU hang\n");
+         gen_inject_hang(dri_screen->fd, brw->hw_ctx,
+                         devinfo->gen | HANG_ALLOW_PREEMPTION);
+         delay = 100;
+      }
+   }
+
    /* Start a new batch buffer. */
    brw_new_batch(brw);
 
-- 
2.20.1



More information about the mesa-dev mailing list