[Intel-gfx] [PATCH 2/2] drm/i915: add i915_get_reset_stats_ioctl

Ian Romanick idr at freedesktop.org
Wed Oct 30 18:37:38 CET 2013


On 10/30/2013 06:44 AM, Mika Kuoppala wrote:
> This ioctl returns reset stats for specified context.
> 
> The struct returned contains context loss counters.
> 
> reset_count:    all resets across all contexts
> batch_active:   active batches lost on resets
> batch_pending:  pending batches lost on resets
> 
> v2: get rid of state tracking completely and deliver only counts. Idea
>     from Chris Wilson.
> 
> v3: fix commit message
> 
> v4: default context handled inside i915_gem_context_get_hang_stats
> 
> v5: reset_count only for priviledged process
> 
> v6: ctx=0 needs CAP_SYS_ADMIN for batch_* counters (Chris Wilson)
> 
> v7: context hang stats never returns NULL
> 
> v8: rebased on top of reworked context hang stats
>     DRM_RENDER_ALLOW for ioctl
> 
> v9: use DEFAULT_CONTEXT_ID. Improve comments for ioctl struct members
> 
> Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
> Cc: Ian Romanick <idr at freedesktop.org>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> ---
>  drivers/gpu/drm/i915/i915_dma.c     |    1 +
>  drivers/gpu/drm/i915/i915_drv.h     |    2 ++
>  drivers/gpu/drm/i915/intel_uncore.c |   34 ++++++++++++++++++++++++++++++++++
>  include/uapi/drm/i915_drm.h         |   19 +++++++++++++++++++
>  4 files changed, 56 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 6eecce7..f2cdeb2 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1921,6 +1921,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
>  	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>  };
>  
>  int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 9fd716d..8870804 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2369,6 +2369,8 @@ extern int intel_enable_rc6(const struct drm_device *dev);
>  extern bool i915_semaphore_is_enabled(struct drm_device *dev);
>  int i915_reg_read_ioctl(struct drm_device *dev, void *data,
>  			struct drm_file *file);
> +int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
> +			       struct drm_file *file);
>  
>  /* overlay */
>  extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index f6fae35..21cf951 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -633,6 +633,40 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>  	return 0;
>  }
>  
> +int i915_get_reset_stats_ioctl(struct drm_device *dev,
> +			       void *data, struct drm_file *file)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_reset_stats *args = data;
> +	struct i915_ctx_hang_stats *hs;
> +	int ret;
> +
> +	if (args->ctx_id == DEFAULT_CONTEXT_ID && !capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	ret = mutex_lock_interruptible(&dev->struct_mutex);
> +	if (ret)
> +		return ret;
> +
> +	hs = i915_gem_context_get_hang_stats(dev, file, args->ctx_id);
> +	if (IS_ERR(hs)) {
> +		mutex_unlock(&dev->struct_mutex);
> +		return PTR_ERR(hs);
> +	}
> +
> +	if (capable(CAP_SYS_ADMIN))
> +		args->reset_count = i915_reset_count(&dev_priv->gpu_error);
> +	else
> +		args->reset_count = 0;

We're having some additional debate about issues related to this.  Eric
(added to CC so he'll notice) believes that we may encounter memory
corruption around a reset (most likely causing the reset instead of the
other way around).  This means that we may need to deliver a reset
notification to an otherwise unaffected GL context after all. :(

If we decided that this is possible, we should deliver a single bit to
user mode that says "there was a reset after this context was created."
 I assume that could be returned to user space in the flags field?

I don't think this provides the same potential information leak as
directly exposing the global reset count, but I could be wrong.

I don't think we need to change anything /yet/, but we may need to soon.

> +
> +	args->batch_active = hs->batch_active;
> +	args->batch_pending = hs->batch_pending;
> +
> +	mutex_unlock(&dev->struct_mutex);
> +
> +	return 0;
> +}
> +
>  static int i965_reset_complete(struct drm_device *dev)
>  {
>  	u8 gdrst;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 3a4e97b..52aed89 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -222,6 +222,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_GEM_SET_CACHING	0x2f
>  #define DRM_I915_GEM_GET_CACHING	0x30
>  #define DRM_I915_REG_READ		0x31
> +#define DRM_I915_GET_RESET_STATS	0x32
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -271,6 +272,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
>  #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>  #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
> +#define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
>  
>  /* Allow drivers to submit batchbuffers directly to hardware, relying
>   * on the security mechanisms provided by hardware.
> @@ -1030,4 +1032,21 @@ struct drm_i915_reg_read {
>  	__u64 offset;
>  	__u64 val; /* Return value */
>  };
> +
> +struct drm_i915_reset_stats {
> +	__u32 ctx_id;
> +	__u32 flags;
> +
> +	/* All resets since boot/module reload, for all contexts */
> +	__u32 reset_count;
> +
> +	/* Number of batches lost when active in GPU, for this context */
> +	__u32 batch_active;
> +
> +	/* Number of batches lost pending for execution, for this context */
> +	__u32 batch_pending;
> +
> +	__u32 pad;
> +};
> +
>  #endif /* _UAPI_I915_DRM_H_ */
> 




More information about the Intel-gfx mailing list