[Intel-gfx] [PATCH 5/6] drm/i915: Add per client max context ban limit
Chris Wilson
chris at chris-wilson.co.uk
Tue Nov 15 16:27:18 UTC 2016
On Tue, Nov 15, 2016 at 04:36:35PM +0200, Mika Kuoppala wrote:
> If we have a bad client submitting unfavourably across different
> contexts, creating new ones, the per context scoring of badness
> doesn't remove the root cause, the offending client.
> To counter, keep track of per client context bans. Deny access if
> client is responsible for more than 3 context bans in
> it's lifetime.
>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 4 ++++
> drivers/gpu/drm/i915/i915_gem.c | 28 +++++++++++++++++++++-------
> drivers/gpu/drm/i915/i915_gem_context.c | 16 ++++++++++++++++
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 ++++++--
> drivers/gpu/drm/i915/i915_gpu_error.c | 10 ++++++----
> 5 files changed, 53 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5af1c38..92c5284 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -411,6 +411,9 @@ struct drm_i915_file_private {
> } rps;
>
> unsigned int bsd_engine;
> +
> +#define I915_MAX_CLIENT_CONTEXT_BANS 3
> + int context_bans;
3 feels a little too small. But possibly ok since this is at the context
level. Still webgl...
> };
>
> /* Used by dp and fdi links */
> @@ -854,6 +857,7 @@ struct drm_i915_error_state {
>
> pid_t pid;
> char comm[TASK_COMM_LEN];
> + int context_bans;
> } engine[I915_NUM_ENGINES];
>
> struct drm_i915_error_buffer {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 40a9e10..f56230f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2636,19 +2636,33 @@ static bool i915_context_is_banned(const struct i915_gem_context *ctx)
> return false;
> }
>
> -static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
> +static void i915_gem_request_mark_guilty(struct drm_i915_gem_request *request)
> {
> - struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
> + struct i915_ctx_hang_stats *hs = &request->ctx->hang_stats;
>
> hs->ban_score += 10;
>
> - hs->banned = i915_context_is_banned(ctx);
> + hs->banned = i915_context_is_banned(request->ctx);
> hs->batch_active++;
> +
> + DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
> + request->ctx->name, hs->ban_score, yesno(hs->banned));
> +
> + if (!request->file_priv)
> + return;
> +
> + if (hs->banned) {
> + request->file_priv->context_bans++;
> +
> + DRM_DEBUG_DRIVER("client %s has has %d context banned\n",
> + request->ctx->name,
> + request->file_priv->context_bans);
> + }
> }
>
> -static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
> +static void i915_gem_request_mark_innocent(struct drm_i915_gem_request *request)
> {
> - struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
> + struct i915_ctx_hang_stats *hs = &request->ctx->hang_stats;
>
> hs->batch_pending++;
> }
> @@ -2719,9 +2733,9 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
> }
>
> if (ring_hung)
> - i915_gem_context_mark_guilty(request->ctx);
> + i915_gem_request_mark_guilty(request);
> else
> - i915_gem_context_mark_innocent(request->ctx);
> + i915_gem_request_mark_innocent(request);
Why? Just use the file_priv on the ctx.
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 9abaae4..4ddd044 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -347,6 +347,14 @@ __create_hw_context(struct drm_device *dev,
> return ERR_PTR(ret);
> }
>
> +static bool client_is_banned(struct drm_i915_file_private *file_priv)
> +{
> + if (file_priv->context_bans <= I915_MAX_CLIENT_CONTEXT_BANS)
> + return false;
return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS;
> +
> + return true;
> +}
> +
> /**
> * The default context needs to exist per ring that uses contexts. It stores the
> * context state of the GPU for applications that don't utilize HW contexts, as
> @@ -360,6 +368,14 @@ i915_gem_create_context(struct drm_device *dev,
>
> lockdep_assert_held(&dev->struct_mutex);
>
> + if (file_priv && client_is_banned(file_priv)) {
So this belongs to context_craate_ioctl
> + DRM_DEBUG("client %s[%d] banned from creating ctx\n",
> + current->comm,
> + pid_nr(get_task_pid(current, PIDTYPE_PID)));
> +
> + return ERR_PTR(-EIO);
> + }
> +
> ctx = __create_hw_context(dev, file_priv);
> if (IS_ERR(ctx))
> return ctx;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index e804cb2..21beaf0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1231,16 +1231,20 @@ static struct i915_gem_context *
> i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
> struct intel_engine_cs *engine, const u32 ctx_id)
> {
> + struct drm_i915_file_private *file_priv = file->driver_priv;
> struct i915_gem_context *ctx;
> struct i915_ctx_hang_stats *hs;
>
> - ctx = i915_gem_context_lookup(file->driver_priv, ctx_id);
> + ctx = i915_gem_context_lookup(file_priv, ctx_id);
> if (IS_ERR(ctx))
> return ctx;
>
> hs = &ctx->hang_stats;
> if (hs->banned) {
> - DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
> + DRM_DEBUG("Client %s banned from submitting (%d:%d)\n",
> + ctx->name,
> + file_priv->context_bans,
> + hs->ban_score);
Context bans prevents creating new contexts. What is its relevance here?
(We should start using pr_debug for these user aides, at least something
more suitable than DRM_DEBUG).
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
More information about the Intel-gfx
mailing list