[Intel-gfx] [PATCH 08/62] drm/i915: Remove stop-rings debugfs interface

Arun Siluvery arun.siluvery at linux.intel.com
Wed Jun 8 11:50:51 UTC 2016


On 03/06/2016 22:06, Chris Wilson wrote:
> Now that we have (near) universal GPU recovery code, we can inject a
> real hang from userspace and not need any fakery. Not only does this
> mean that the testing is far more realistic, but we can simplify the
> kernel in the process.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c     | 35 --------------------------
>   drivers/gpu/drm/i915/i915_drv.c         | 17 ++-----------
>   drivers/gpu/drm/i915/i915_drv.h         | 19 --------------
>   drivers/gpu/drm/i915/i915_gem.c         | 44 ++++++++++-----------------------
>   drivers/gpu/drm/i915/intel_lrc.c        |  3 ---
>   drivers/gpu/drm/i915/intel_ringbuffer.c |  8 ------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
>   7 files changed, 15 insertions(+), 112 deletions(-)
>

looks good to me,
Reviewed-by: Arun Siluvery <arun.siluvery at linux.intel.com>

regards
Arun

> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index dd6cf222e8f5..8f576b443ff6 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -4821,40 +4821,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
>   			"%llu\n");
>
>   static int
> -i915_ring_stop_get(void *data, u64 *val)
> -{
> -	struct drm_device *dev = data;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -
> -	*val = dev_priv->gpu_error.stop_rings;
> -
> -	return 0;
> -}
> -
> -static int
> -i915_ring_stop_set(void *data, u64 val)
> -{
> -	struct drm_device *dev = data;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	int ret;
> -
> -	DRM_DEBUG_DRIVER("Stopping rings 0x%08llx\n", val);
> -
> -	ret = mutex_lock_interruptible(&dev->struct_mutex);
> -	if (ret)
> -		return ret;
> -
> -	dev_priv->gpu_error.stop_rings = val;
> -	mutex_unlock(&dev->struct_mutex);
> -
> -	return 0;
> -}
> -
> -DEFINE_SIMPLE_ATTRIBUTE(i915_ring_stop_fops,
> -			i915_ring_stop_get, i915_ring_stop_set,
> -			"0x%08llx\n");
> -
> -static int
>   i915_ring_missed_irq_get(void *data, u64 *val)
>   {
>   	struct drm_device *dev = data;
> @@ -5457,7 +5423,6 @@ static const struct i915_debugfs_files {
>   	{"i915_max_freq", &i915_max_freq_fops},
>   	{"i915_min_freq", &i915_min_freq_fops},
>   	{"i915_cache_sharing", &i915_cache_sharing_fops},
> -	{"i915_ring_stop", &i915_ring_stop_fops},
>   	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
>   	{"i915_ring_test_irq", &i915_ring_test_irq_fops},
>   	{"i915_gem_drop_caches", &i915_drop_caches_fops},
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 7ba040141722..f2ac0cae929b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -2125,24 +2125,11 @@ int i915_reset(struct drm_i915_private *dev_priv)
>   		goto error;
>   	}
>
> +	pr_notice("drm/i915: Resetting chip after gpu hang\n");
> +
>   	i915_gem_reset(dev);
>
>   	ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
> -
> -	/* Also reset the gpu hangman. */
> -	if (error->stop_rings != 0) {
> -		DRM_INFO("Simulated gpu hang, resetting stop_rings\n");
> -		error->stop_rings = 0;
> -		if (ret == -ENODEV) {
> -			DRM_INFO("Reset not implemented, but ignoring "
> -				 "error for simulated gpu hangs\n");
> -			ret = 0;
> -		}
> -	}
> -
> -	if (i915_stop_ring_allow_warn(dev_priv))
> -		pr_notice("drm/i915: Resetting chip after gpu hang\n");
> -
>   	if (ret) {
>   		if (ret != -ENODEV)
>   			DRM_ERROR("Failed to reset chip: %i\n", ret);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3f075adf9e84..a48c0f4e1d42 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1393,13 +1393,6 @@ struct i915_gpu_error {
>   	 */
>   	wait_queue_head_t reset_queue;
>
> -	/* Userspace knobs for gpu hang simulation;
> -	 * combines both a ring mask, and extra flags
> -	 */
> -	u32 stop_rings;
> -#define I915_STOP_RING_ALLOW_BAN       (1 << 31)
> -#define I915_STOP_RING_ALLOW_WARN      (1 << 30)
> -
>   	/* For missed irq/seqno simulation. */
>   	unsigned long test_irq_rings;
>   };
> @@ -3292,18 +3285,6 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
>   	return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2;
>   }
>
> -static inline bool i915_stop_ring_allow_ban(struct drm_i915_private *dev_priv)
> -{
> -	return dev_priv->gpu_error.stop_rings == 0 ||
> -		dev_priv->gpu_error.stop_rings & I915_STOP_RING_ALLOW_BAN;
> -}
> -
> -static inline bool i915_stop_ring_allow_warn(struct drm_i915_private *dev_priv)
> -{
> -	return dev_priv->gpu_error.stop_rings == 0 ||
> -		dev_priv->gpu_error.stop_rings & I915_STOP_RING_ALLOW_WARN;
> -}
> -
>   void i915_gem_reset(struct drm_device *dev);
>   bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
>   int __must_check i915_gem_init(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0f487e3b920c..f48f54193972 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2703,44 +2703,30 @@ void __i915_add_request(struct drm_i915_gem_request *request,
>   	i915_gem_mark_busy(dev_priv, engine);
>   }
>
> -static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
> -				   const struct i915_gem_context *ctx)
> +static bool i915_context_is_banned(const struct i915_gem_context *ctx)
>   {
>   	unsigned long elapsed;
>
> -	elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
> -
>   	if (ctx->hang_stats.banned)
>   		return true;
>
> +	elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
>   	if (ctx->hang_stats.ban_period_seconds &&
>   	    elapsed <= ctx->hang_stats.ban_period_seconds) {
> -		if (!i915_gem_context_is_default(ctx)) {
> -			DRM_DEBUG("context hanging too fast, banning!\n");
> -			return true;
> -		} else if (i915_stop_ring_allow_ban(dev_priv)) {
> -			if (i915_stop_ring_allow_warn(dev_priv))
> -				DRM_ERROR("gpu hanging too fast, banning!\n");
> -			return true;
> -		}
> +		DRM_DEBUG("context hanging too fast, banning!\n");
> +		return true;
>   	}
>
>   	return false;
>   }
>
> -static void i915_set_reset_status(struct drm_i915_private *dev_priv,
> -				  struct i915_gem_context *ctx,
> +static void i915_set_reset_status(struct i915_gem_context *ctx,
>   				  const bool guilty)
>   {
> -	struct i915_ctx_hang_stats *hs;
> -
> -	if (WARN_ON(!ctx))
> -		return;
> -
> -	hs = &ctx->hang_stats;
> +	struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
>
>   	if (guilty) {
> -		hs->banned = i915_context_is_banned(dev_priv, ctx);
> +		hs->banned = i915_context_is_banned(ctx);
>   		hs->batch_active++;
>   		hs->guilty_ts = get_seconds();
>   	} else {
> @@ -2867,27 +2853,23 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
>   	return NULL;
>   }
>
> -static void i915_gem_reset_engine_status(struct drm_i915_private *dev_priv,
> -				       struct intel_engine_cs *engine)
> +static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_gem_request *request;
>   	bool ring_hung;
>
>   	request = i915_gem_find_active_request(engine);
> -
>   	if (request == NULL)
>   		return;
>
>   	ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
>
> -	i915_set_reset_status(dev_priv, request->ctx, ring_hung);
> -
> +	i915_set_reset_status(request->ctx, ring_hung);
>   	list_for_each_entry_continue(request, &engine->request_list, list)
> -		i915_set_reset_status(dev_priv, request->ctx, false);
> +		i915_set_reset_status(request->ctx, false);
>   }
>
> -static void i915_gem_reset_engine_cleanup(struct drm_i915_private *dev_priv,
> -					struct intel_engine_cs *engine)
> +static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
>   {
>   	struct intel_ringbuffer *buffer;
>
> @@ -2957,10 +2939,10 @@ void i915_gem_reset(struct drm_device *dev)
>   	 * their reference to the objects, the inspection must be done first.
>   	 */
>   	for_each_engine(engine, dev_priv)
> -		i915_gem_reset_engine_status(dev_priv, engine);
> +		i915_gem_reset_engine_status(engine);
>
>   	for_each_engine(engine, dev_priv)
> -		i915_gem_reset_engine_cleanup(dev_priv, engine);
> +		i915_gem_reset_engine_cleanup(engine);
>
>   	i915_gem_context_reset(dev);
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 9e19b2c5b3ae..0742a849acce 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -764,9 +764,6 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>   	intel_logical_ring_emit(ringbuf, MI_NOOP);
>   	intel_logical_ring_advance(ringbuf);
>
> -	if (intel_engine_stopped(engine))
> -		return 0;
> -
>   	/* We keep the previous context alive until we retire the following
>   	 * request. This ensures that any the context object is still pinned
>   	 * for any residual writes the HW makes into it on the context switch
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 161c0792b1bf..327ad7fdf118 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -58,18 +58,10 @@ void intel_ring_update_space(struct intel_ringbuffer *ringbuf)
>   					    ringbuf->tail, ringbuf->size);
>   }
>
> -bool intel_engine_stopped(struct intel_engine_cs *engine)
> -{
> -	struct drm_i915_private *dev_priv = engine->i915;
> -	return dev_priv->gpu_error.stop_rings & intel_engine_flag(engine);
> -}
> -
>   static void __intel_ring_advance(struct intel_engine_cs *engine)
>   {
>   	struct intel_ringbuffer *ringbuf = engine->buffer;
>   	ringbuf->tail &= ringbuf->size - 1;
> -	if (intel_engine_stopped(engine))
> -		return;
>   	engine->write_tail(engine, ringbuf->tail);
>   }
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index d0cd9a1aa80e..6017367e94fb 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -480,7 +480,6 @@ static inline void intel_ring_advance(struct intel_engine_cs *engine)
>   }
>   int __intel_ring_space(int head, int tail, int size);
>   void intel_ring_update_space(struct intel_ringbuffer *ringbuf);
> -bool intel_engine_stopped(struct intel_engine_cs *engine);
>
>   int __must_check intel_engine_idle(struct intel_engine_cs *engine);
>   void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno);
>



More information about the Intel-gfx mailing list