[Mesa-dev] [PATCH 01/11] i965: Mark shader programs for capture in the error state.

Matt Turner mattst88 at gmail.com
Mon May 1 20:54:45 UTC 2017


When the GPU hangs, the kernel saves some state for us. Until now it has
not included the shader programs, which are very often the reason the
GPU hang occurred. With the programs saved in the error state, we should
be more capable of debugging hangs.

Thanks to Chris Wilson and Ben Widawsky who provided the kernel support
for this feature ("drm/i915: Copy user requested buffers into the error
state"), which will be in kernel v4.13.
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c        | 1 +
 src/mesa/drivers/dri/i965/brw_bufmgr.h        | 5 +++++
 src/mesa/drivers/dri/i965/brw_program_cache.c | 4 ++++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 2 +-
 src/mesa/drivers/dri/i965/intel_screen.c      | 8 ++++++++
 src/mesa/drivers/dri/i965/intel_screen.h      | 7 +++++++
 6 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 4b64331..2f17934 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -599,6 +599,7 @@ bo_unreference_final(struct brw_bo *bo, time_t time)
       bo->free_time = time;
 
       bo->name = NULL;
+      bo->kflags = 0;
 
       list_addtail(&bo->head, &bucket->head);
    } else {
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index 1b1790a..56ec206 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -97,6 +97,11 @@ struct brw_bo {
    int refcount;
    const char *name;
 
+#ifndef EXEC_OBJECT_CAPTURE
+#define EXEC_OBJECT_CAPTURE            (1<<7)
+#endif
+   uint64_t kflags;
+
    /**
     * Kenel-assigned global name for this object
     *
diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c
index c06ee23..93eb119 100644
--- a/src/mesa/drivers/dri/i965/brw_program_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
@@ -216,6 +216,8 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
    struct brw_bo *new_bo;
 
    new_bo = brw_bo_alloc(brw->bufmgr, "program cache", new_size, 64);
+   if (can_do_exec_capture(brw->screen))
+      new_bo->kflags = EXEC_OBJECT_CAPTURE;
    if (brw->has_llc)
       brw_bo_map_unsynchronized(brw, new_bo);
 
@@ -407,6 +409,8 @@ brw_init_caches(struct brw_context *brw)
       calloc(cache->size, sizeof(struct brw_cache_item *));
 
    cache->bo = brw_bo_alloc(brw->bufmgr, "program cache",  4096, 64);
+   if (can_do_exec_capture(brw->screen))
+      cache->bo->kflags = EXEC_OBJECT_CAPTURE;
    if (brw->has_llc)
       brw_bo_map_unsynchronized(brw, cache->bo);
 }
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 154c095..496b492 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -530,7 +530,7 @@ add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo)
    }
    validation_entry->alignment = bo->align;
    validation_entry->offset = bo->offset64;
-   validation_entry->flags = 0;
+   validation_entry->flags = bo->kflags;
    validation_entry->rsvd1 = 0;
    validation_entry->rsvd2 = 0;
 
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index 34a5f18..514c17e 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1994,6 +1994,14 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
       screen->cmd_parser_version = 0;
    }
 
+   /* Kernel 4.13 retuired for exec object capture */
+#ifndef I915_PARAM_HAS_EXEC_CAPTURE
+#define I915_PARAM_HAS_EXEC_CAPTURE 45
+#endif
+   if (intel_get_boolean(screen, I915_PARAM_HAS_EXEC_CAPTURE)) {
+      screen->kernel_features |= KERNEL_ALLOWS_EXEC_CAPTURE;
+   }
+
    if (!intel_detect_pipelined_so(screen)) {
       /* We can't do anything, so the effective version is 0. */
       screen->cmd_parser_version = 0;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
index fe0e044..f9c1db6 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -74,6 +74,7 @@ struct intel_screen
 #define KERNEL_ALLOWS_MI_MATH_AND_LRR               (1<<2)
 #define KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3 (1<<3)
 #define KERNEL_ALLOWS_COMPUTE_DISPATCH              (1<<4)
+#define KERNEL_ALLOWS_EXEC_CAPTURE                  (1<<5)
 
    struct brw_bufmgr *bufmgr;
 
@@ -155,6 +156,12 @@ can_do_predicate_writes(const struct intel_screen *screen)
    return screen->kernel_features & KERNEL_ALLOWS_PREDICATE_WRITES;
 }
 
+static inline bool
+can_do_exec_capture(const struct intel_screen *screen)
+{
+   return screen->kernel_features & KERNEL_ALLOWS_EXEC_CAPTURE;
+}
+
 #ifdef __cplusplus
 }
 #endif
-- 
2.10.2



More information about the mesa-dev mailing list