[Intel-gfx] [PATCH 08/26] drm/i915: Make the semaphore saturation mask global

Wed Jun 19 10:45:47 UTC 2019

On 18/06/2019 08:41, Chris Wilson wrote:
> The idea behind keeping the saturation mask local to a context backfired
> spectacularly. The premise with the local mask was that we would be more
> proactive in attempting to use semaphores after each time the context
> idled, and that all new contexts would attempt to use semaphores
> ignoring the current state of the system. This turns out to be horribly
> optimistic. If the system state is still oversaturated and the existing
> workloads have all stopped using semaphores, the new workloads would
> attempt to use semaphores and be deprioritised behind real work. The
> new contexts would not switch off using semaphores until their initial
> batch of low priority work had completed. Given sufficient backload load
> of equal user priority, this would completely starve the new work of any
> GPU time.
> 
> To compensate, remove the local tracking in favour of keeping it as
> global state on the engine -- once the system is saturated and
> semaphores are disabled, everyone stops attempting to use semaphores
> until the system is idle again. One of the reason for preferring local
> context tracking was that it worked with virtual engines, so for
> switching to global state we could either do a complete check of all the
> virtual siblings or simply disable semaphores for those requests. This
> takes the simpler approach of disabling semaphores on virtual engines.
> 
> The downside is that the decision that the engine is saturated is a
> local measure -- we are only checking whether or not this context was
> scheduled in a timely fashion, it may be legitimately delayed due to user
> priorities. We still have the same dilemma though, that we do not want
> to employ the semaphore poll unless it will be used.
> 
> Fixes: ca6e56f654e7 ("drm/i915: Disable semaphore busywaits on saturated systems")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin at intel.com>
> Cc: Dmitry Ermilov <dmitry.ermilov at intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_context.c       | 2 --
>   drivers/gpu/drm/i915/gt/intel_context_types.h | 2 --
>   drivers/gpu/drm/i915/gt/intel_engine_pm.c     | 2 ++
>   drivers/gpu/drm/i915/gt/intel_engine_types.h  | 2 ++
>   drivers/gpu/drm/i915/gt/intel_lrc.c           | 2 +-
>   drivers/gpu/drm/i915/i915_request.c           | 4 ++--
>   6 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 42f45744d859..2c454f227c2e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -142,7 +142,6 @@ intel_context_init(struct intel_context *ce,
>   	ce->engine = engine;
>   	ce->ops = engine->cops;
>   	ce->sseu = engine->sseu;
> -	ce->saturated = 0;
>   
>   	INIT_LIST_HEAD(&ce->signal_link);
>   	INIT_LIST_HEAD(&ce->signals);
> @@ -223,7 +222,6 @@ void intel_context_enter_engine(struct intel_context *ce)
>   
>   void intel_context_exit_engine(struct intel_context *ce)
>   {
> -	ce->saturated = 0;
>   	intel_engine_pm_put(ce->engine);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index b565c3ff4378..4c0e211c715d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -58,8 +58,6 @@ struct intel_context {
>   	atomic_t pin_count;
>   	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
>   
> -	intel_engine_mask_t saturated; /* submitting semaphores too late? */
> -
>   	/**
>   	 * active: Active tracker for the rq activity (inc. external) on this
>   	 * intel_context object.
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index d14e352b0b17..2ce00d3dc42a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -100,6 +100,8 @@ static int __engine_park(struct intel_wakeref *wf)
>   	struct intel_engine_cs *engine =
>   		container_of(wf, typeof(*engine), wakeref);
>   
> +	engine->saturated = 0;
> +
>   	/*
>   	 * If one and only one request is completed between pm events,
>   	 * we know that we are inside the kernel context and it is
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 11a25f060fed..1cbe10a0fec7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -258,6 +258,8 @@ struct intel_engine_cs {
>   	struct intel_context *kernel_context; /* pinned */
>   	struct intel_context *preempt_context; /* pinned; optional */
>   
> +	intel_engine_mask_t saturated; /* submitting semaphores too late? */
> +
>   	unsigned long serial;
>   
>   	unsigned long wakeref_serial;
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 0563fe8398c5..bbbdc63906c6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -3198,7 +3198,6 @@ static void virtual_context_exit(struct intel_context *ce)
>   	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
>   	unsigned int n;
>   
> -	ce->saturated = 0;
>   	for (n = 0; n < ve->num_siblings; n++)
>   		intel_engine_pm_put(ve->siblings[n]);
>   }
> @@ -3397,6 +3396,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
>   	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
>   	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
>   	ve->base.flags = I915_ENGINE_IS_VIRTUAL;
> +	ve->base.saturated = ALL_ENGINES;

This could use a comment.

>   
>   	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
>   
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 98e4743b03be..27b9893fa8e3 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -418,7 +418,7 @@ void __i915_request_submit(struct i915_request *request)
>   	 */
>   	if (request->sched.semaphores &&
>   	    i915_sw_fence_signaled(&request->semaphore))
> -		request->hw_context->saturated |= request->sched.semaphores;
> +		engine->saturated |= request->sched.semaphores;
>   
>   	/* We may be recursing from the signal callback of another i915 fence */
>   	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
> @@ -798,7 +798,7 @@ already_busywaiting(struct i915_request *rq)
>   	 *
>   	 * See the are-we-too-late? check in __i915_request_submit().
>   	 */
> -	return rq->sched.semaphores | rq->hw_context->saturated;
> +	return rq->sched.semaphores | rq->engine->saturated;
>   }
>   
>   static int
> 

Otherwise:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko