[Intel-gfx] [PATCH 5/6] drm/i915: Add per client max context ban limit

Chris Wilson chris at chris-wilson.co.uk
Tue Nov 15 16:27:18 UTC 2016


On Tue, Nov 15, 2016 at 04:36:35PM +0200, Mika Kuoppala wrote:
> If we have a bad client submitting unfavourably across different
> contexts, creating new ones, the per context scoring of badness
> doesn't remove the root cause, the offending client.
> To counter, keep track of per client context bans. Deny access if
> client is responsible for more than 3 context bans in
> it's lifetime.
> 
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  4 ++++
>  drivers/gpu/drm/i915/i915_gem.c            | 28 +++++++++++++++++++++-------
>  drivers/gpu/drm/i915/i915_gem_context.c    | 16 ++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  8 ++++++--
>  drivers/gpu/drm/i915/i915_gpu_error.c      | 10 ++++++----
>  5 files changed, 53 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5af1c38..92c5284 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -411,6 +411,9 @@ struct drm_i915_file_private {
>  	} rps;
>  
>  	unsigned int bsd_engine;
> +
> +#define I915_MAX_CLIENT_CONTEXT_BANS 3
> +	int context_bans;

3 feels a little too small. But possibly ok since this is at the context
level. Still webgl...

>  };
>  
>  /* Used by dp and fdi links */
> @@ -854,6 +857,7 @@ struct drm_i915_error_state {
>  
>  		pid_t pid;
>  		char comm[TASK_COMM_LEN];
> +		int context_bans;
>  	} engine[I915_NUM_ENGINES];
>  
>  	struct drm_i915_error_buffer {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 40a9e10..f56230f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2636,19 +2636,33 @@ static bool i915_context_is_banned(const struct i915_gem_context *ctx)
>  	return false;
>  }
>  
> -static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
> +static void i915_gem_request_mark_guilty(struct drm_i915_gem_request *request)
>  {
> -	struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
> +	struct i915_ctx_hang_stats *hs = &request->ctx->hang_stats;
>  
>  	hs->ban_score += 10;
>  
> -	hs->banned = i915_context_is_banned(ctx);
> +	hs->banned = i915_context_is_banned(request->ctx);
>  	hs->batch_active++;
> +
> +	DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
> +			 request->ctx->name, hs->ban_score, yesno(hs->banned));
> +
> +	if (!request->file_priv)
> +		return;
> +
> +	if (hs->banned) {
> +		request->file_priv->context_bans++;
> +
> +		DRM_DEBUG_DRIVER("client %s has has %d context banned\n",
> +				 request->ctx->name,
> +				 request->file_priv->context_bans);
> +	}
>  }
>  
> -static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
> +static void i915_gem_request_mark_innocent(struct drm_i915_gem_request *request)
>  {
> -	struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
> +	struct i915_ctx_hang_stats *hs = &request->ctx->hang_stats;
>  
>  	hs->batch_pending++;
>  }
> @@ -2719,9 +2733,9 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
>  	}
>  
>  	if (ring_hung)
> -		i915_gem_context_mark_guilty(request->ctx);
> +		i915_gem_request_mark_guilty(request);
>  	else
> -		i915_gem_context_mark_innocent(request->ctx);
> +		i915_gem_request_mark_innocent(request);

Why? Just use the file_priv on the ctx.

> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 9abaae4..4ddd044 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -347,6 +347,14 @@ __create_hw_context(struct drm_device *dev,
>  	return ERR_PTR(ret);
>  }
>  
> +static bool client_is_banned(struct drm_i915_file_private *file_priv)
> +{
> +	if (file_priv->context_bans <= I915_MAX_CLIENT_CONTEXT_BANS)
> +		return false;

	return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS;
> +
> +	return true;
> +}
> +
>  /**
>   * The default context needs to exist per ring that uses contexts. It stores the
>   * context state of the GPU for applications that don't utilize HW contexts, as
> @@ -360,6 +368,14 @@ i915_gem_create_context(struct drm_device *dev,
>  
>  	lockdep_assert_held(&dev->struct_mutex);
>  
> +	if (file_priv && client_is_banned(file_priv)) {

So this belongs to context_craate_ioctl

> +		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
> +			  current->comm,
> +			  pid_nr(get_task_pid(current, PIDTYPE_PID)));
> +
> +		return ERR_PTR(-EIO);
> +	}
> +
>  	ctx = __create_hw_context(dev, file_priv);
>  	if (IS_ERR(ctx))
>  		return ctx;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index e804cb2..21beaf0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1231,16 +1231,20 @@ static struct i915_gem_context *
>  i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
>  			  struct intel_engine_cs *engine, const u32 ctx_id)
>  {
> +	struct drm_i915_file_private *file_priv = file->driver_priv;
>  	struct i915_gem_context *ctx;
>  	struct i915_ctx_hang_stats *hs;
>  
> -	ctx = i915_gem_context_lookup(file->driver_priv, ctx_id);
> +	ctx = i915_gem_context_lookup(file_priv, ctx_id);
>  	if (IS_ERR(ctx))
>  		return ctx;
>  
>  	hs = &ctx->hang_stats;
>  	if (hs->banned) {
> -		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
> +		DRM_DEBUG("Client %s banned from submitting (%d:%d)\n",
> +			  ctx->name,
> +			  file_priv->context_bans,
> +			  hs->ban_score);

Context bans prevents creating new contexts. What is its relevance here?

(We should start using pr_debug for these user aides, at least something
more suitable than DRM_DEBUG).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre


More information about the Intel-gfx mailing list