[Intel-gfx] [PATCH 041/190] drm/i915: Allow userspace to request no-error-capture upon GPU hangs
Chris Wilson
chris at chris-wilson.co.uk
Mon Jan 11 01:16:52 PST 2016
igt likes to inject GPU hangs into its command streams. However, as we
expect these hangs, we don't actually want them recorded in the dmesg
output or stored in the i915_error_state (usually). To accomodate this
allow userspace to set a flag on the context that any hang emanating
from that context will not be recorded. We still do the error capture
(otherwise how do we find the guilty context and know its intent?) as
part of the reason for random GPU hang injection is to exercise the race
conditions between the error capture and normal execution.
v2: Split out the request->ringbuf error capture changes.
v3: Move the flag defines next to the intel_context->flags definition
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Acked-by: Daniel Vetter <daniel.vetter at ffwll.ch>
Reviewed-by: Dave Gordon <david.s.gordon at intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 7 +++++--
drivers/gpu/drm/i915/i915_gem_context.c | 13 +++++++++++++
drivers/gpu/drm/i915/i915_gpu_error.c | 14 +++++++++-----
include/uapi/drm/i915_drm.h | 1 +
4 files changed, 28 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c3b795f1566b..57e450e25ad6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -486,6 +486,7 @@ struct drm_i915_error_state {
struct timeval time;
char error_msg[128];
+ bool simulated;
int iommu;
u32 reset_count;
u32 suspend_count;
@@ -842,7 +843,6 @@ struct i915_ctx_hang_stats {
/* This must match up with the value previously used for execbuf2.rsvd1. */
#define DEFAULT_CONTEXT_HANDLE 0
-#define CONTEXT_NO_ZEROMAP (1<<0)
/**
* struct intel_context - as the name implies, represents a context.
* @ref: reference count.
@@ -867,11 +867,14 @@ struct intel_context {
int user_handle;
uint8_t remap_slice;
struct drm_i915_private *i915;
- int flags;
struct drm_i915_file_private *file_priv;
struct i915_ctx_hang_stats hang_stats;
struct i915_hw_ppgtt *ppgtt;
+ unsigned flags;
+#define CONTEXT_NO_ZEROMAP (1<<0)
+#define CONTEXT_NO_ERROR_CAPTURE (1<<1)
+
/* Legacy ring buffer submission */
struct {
struct drm_i915_gem_object *rcs_state;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index e5e9a8918f19..0aea5ccf6d68 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -939,6 +939,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
else
args->value = to_i915(dev)->gtt.base.total;
break;
+ case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+ args->value = !!(ctx->flags & CONTEXT_NO_ERROR_CAPTURE);
+ break;
default:
ret = -EINVAL;
break;
@@ -984,6 +987,16 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0;
}
break;
+ case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+ if (args->size) {
+ ret = -EINVAL;
+ } else {
+ if (args->value)
+ ctx->flags |= CONTEXT_NO_ERROR_CAPTURE;
+ else
+ ctx->flags &= ~CONTEXT_NO_ERROR_CAPTURE;
+ }
+ break;
default:
ret = -EINVAL;
break;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 93da2c7581f6..4f17d6847569 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1040,6 +1040,8 @@ static void i915_gem_record_rings(struct drm_device *dev,
rcu_read_unlock();
}
+ error->simulated |= request->ctx->flags & CONTEXT_NO_ERROR_CAPTURE;
+
rb = request->ringbuf;
error->ring[i].cpu_ring_head = rb->head;
error->ring[i].cpu_ring_tail = rb->tail;
@@ -1333,12 +1335,14 @@ void i915_capture_error_state(struct drm_device *dev, bool wedged,
i915_error_capture_msg(dev, error, wedged, error_msg);
DRM_INFO("%s\n", error->error_msg);
- spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
- if (dev_priv->gpu_error.first_error == NULL) {
- dev_priv->gpu_error.first_error = error;
- error = NULL;
+ if (!error->simulated) {
+ spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
+ if (dev_priv->gpu_error.first_error == NULL) {
+ dev_priv->gpu_error.first_error = error;
+ error = NULL;
+ }
+ spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
}
- spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
if (error) {
i915_error_state_free(&error->ref);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index acf21026c78a..7fee4416dcc7 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1140,6 +1140,7 @@ struct drm_i915_gem_context_param {
#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1
#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2
#define I915_CONTEXT_PARAM_GTT_SIZE 0x3
+#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4
__u64 value;
};
--
2.7.0.rc3
More information about the Intel-gfx
mailing list