[Intel-gfx] [PATCH 2/2] drm/i915: add i915_get_reset_stats_ioctl
Ian Romanick
idr at freedesktop.org
Wed Oct 30 18:37:38 CET 2013
On 10/30/2013 06:44 AM, Mika Kuoppala wrote:
> This ioctl returns reset stats for specified context.
>
> The struct returned contains context loss counters.
>
> reset_count: all resets across all contexts
> batch_active: active batches lost on resets
> batch_pending: pending batches lost on resets
>
> v2: get rid of state tracking completely and deliver only counts. Idea
> from Chris Wilson.
>
> v3: fix commit message
>
> v4: default context handled inside i915_gem_context_get_hang_stats
>
> v5: reset_count only for priviledged process
>
> v6: ctx=0 needs CAP_SYS_ADMIN for batch_* counters (Chris Wilson)
>
> v7: context hang stats never returns NULL
>
> v8: rebased on top of reworked context hang stats
> DRM_RENDER_ALLOW for ioctl
>
> v9: use DEFAULT_CONTEXT_ID. Improve comments for ioctl struct members
>
> Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
> Cc: Ian Romanick <idr at freedesktop.org>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> ---
> drivers/gpu/drm/i915/i915_dma.c | 1 +
> drivers/gpu/drm/i915/i915_drv.h | 2 ++
> drivers/gpu/drm/i915/intel_uncore.c | 34 ++++++++++++++++++++++++++++++++++
> include/uapi/drm/i915_drm.h | 19 +++++++++++++++++++
> 4 files changed, 56 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 6eecce7..f2cdeb2 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1921,6 +1921,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
> DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> };
>
> int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 9fd716d..8870804 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2369,6 +2369,8 @@ extern int intel_enable_rc6(const struct drm_device *dev);
> extern bool i915_semaphore_is_enabled(struct drm_device *dev);
> int i915_reg_read_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file);
> +int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file);
>
> /* overlay */
> extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index f6fae35..21cf951 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -633,6 +633,40 @@ int i915_reg_read_ioctl(struct drm_device *dev,
> return 0;
> }
>
> +int i915_get_reset_stats_ioctl(struct drm_device *dev,
> + void *data, struct drm_file *file)
> +{
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct drm_i915_reset_stats *args = data;
> + struct i915_ctx_hang_stats *hs;
> + int ret;
> +
> + if (args->ctx_id == DEFAULT_CONTEXT_ID && !capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + ret = mutex_lock_interruptible(&dev->struct_mutex);
> + if (ret)
> + return ret;
> +
> + hs = i915_gem_context_get_hang_stats(dev, file, args->ctx_id);
> + if (IS_ERR(hs)) {
> + mutex_unlock(&dev->struct_mutex);
> + return PTR_ERR(hs);
> + }
> +
> + if (capable(CAP_SYS_ADMIN))
> + args->reset_count = i915_reset_count(&dev_priv->gpu_error);
> + else
> + args->reset_count = 0;
We're having some additional debate about issues related to this. Eric
(added to CC so he'll notice) believes that we may encounter memory
corruption around a reset (most likely causing the reset instead of the
other way around). This means that we may need to deliver a reset
notification to an otherwise unaffected GL context after all. :(
If we decided that this is possible, we should deliver a single bit to
user mode that says "there was a reset after this context was created."
I assume that could be returned to user space in the flags field?
I don't think this provides the same potential information leak as
directly exposing the global reset count, but I could be wrong.
I don't think we need to change anything /yet/, but we may need to soon.
> +
> + args->batch_active = hs->batch_active;
> + args->batch_pending = hs->batch_pending;
> +
> + mutex_unlock(&dev->struct_mutex);
> +
> + return 0;
> +}
> +
> static int i965_reset_complete(struct drm_device *dev)
> {
> u8 gdrst;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 3a4e97b..52aed89 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -222,6 +222,7 @@ typedef struct _drm_i915_sarea {
> #define DRM_I915_GEM_SET_CACHING 0x2f
> #define DRM_I915_GEM_GET_CACHING 0x30
> #define DRM_I915_REG_READ 0x31
> +#define DRM_I915_GET_RESET_STATS 0x32
>
> #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
> #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -271,6 +272,7 @@ typedef struct _drm_i915_sarea {
> #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
> #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
> #define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
> +#define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
>
> /* Allow drivers to submit batchbuffers directly to hardware, relying
> * on the security mechanisms provided by hardware.
> @@ -1030,4 +1032,21 @@ struct drm_i915_reg_read {
> __u64 offset;
> __u64 val; /* Return value */
> };
> +
> +struct drm_i915_reset_stats {
> + __u32 ctx_id;
> + __u32 flags;
> +
> + /* All resets since boot/module reload, for all contexts */
> + __u32 reset_count;
> +
> + /* Number of batches lost when active in GPU, for this context */
> + __u32 batch_active;
> +
> + /* Number of batches lost pending for execution, for this context */
> + __u32 batch_pending;
> +
> + __u32 pad;
> +};
> +
> #endif /* _UAPI_I915_DRM_H_ */
>
More information about the Intel-gfx
mailing list