[Intel-gfx] [PATCH 2/2] drm/i915: Use rcu instead of stop_machine in set_wedged
Chris Wilson
chris at chris-wilson.co.uk
Tue Oct 10 13:39:37 UTC 2017
Style nits:
Quoting Daniel Vetter (2017-10-09 17:44:01)
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 82a10036fb38..8d7d8d1f78db 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3057,49 +3057,71 @@ static void nop_submit_request(struct drm_i915_gem_request *request)
>
> spin_lock_irqsave(&request->engine->timeline->lock, flags);
> __i915_gem_request_submit(request);
> - intel_engine_init_global_seqno(request->engine, request->global_seqno);
> spin_unlock_irqrestore(&request->engine->timeline->lock, flags);
This reduced to i915_gem_request_submit().
Hah, you can just engine->submit_request = i915_gem_request_submit, and
keep the nop_submit_request for the second phase! That may make the
diff neater?
> -static int __i915_gem_set_wedged_BKL(void *data)
> +void i915_gem_set_wedged(struct drm_i915_private *i915)
> {
> - struct drm_i915_private *i915 = data;
> struct intel_engine_cs *engine;
> enum intel_engine_id id;
>
> + /* First, stop submission to hw, but do not yet complete requests by
> + * rolling the global seqno forward (since this would complete requests
> + * for which we haven't set the fence error to EIO yet).
> + */
I'm doing a quiet fixup of all my comments to follow
/*
* The core convention, which you normally use anyway.
*/
> for_each_engine(engine, i915, id)
> - engine_set_wedged(engine);
> + engine->submit_request = nop_submit_request;
>
> - set_bit(I915_WEDGED, &i915->gpu_error.flags);
> - wake_up_all(&i915->gpu_error.reset_queue);
> + /* Make sure no one is running the old callback before we proceed with
> + * cancelling requests and resetting the completion tracking. Otherwise
> + * we might submit a request to the hardware which never completes.
> + */
> + synchronize_rcu();
>
> - return 0;
> -}
> + for_each_engine(engine, i915, id) {
> + /* Mark all executing requests as skipped */
> + engine->cancel_requests(engine);
>
> -void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
> -{
> - stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL);
> + /* Only once we've force-cancelled all in-flight requests can we
> + * start to complete all requests.
> + */
> + engine->submit_request = nop_complete_submit_request;
> + }
> +
> + /* Make sure no request can slip through without getting completed by
> + * either this call here to intel_engine_init_global_seqno, or the one
> + * in nop_complete_submit_request.
> + */
> + synchronize_rcu();
> +
> + for_each_engine(engine, i915, id) {
> + unsigned long flags;
> +
> + /* Mark all pending requests as complete so that any concurrent
> + * (lockless) lookup doesn't try and wait upon the request as we
> + * reset it.
> + */
> + spin_lock_irqsave(&engine->timeline->lock, flags);
> + intel_engine_init_global_seqno(engine,
> + intel_engine_last_submit(engine));
> + spin_unlock_irqrestore(&engine->timeline->lock, flags);
> + }
> +
> + set_bit(I915_WEDGED, &i915->gpu_error.flags);
> + wake_up_all(&i915->gpu_error.reset_queue);
> }
>
> bool i915_gem_unset_wedged(struct drm_i915_private *i915)
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index b100b38f1dd2..ef78a85cb845 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -556,7 +556,9 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> switch (state) {
> case FENCE_COMPLETE:
> trace_i915_gem_request_submit(request);
> + rcu_read_lock();
This trick needs a comment.
/*
* We need to serialize use of the submit_request() callback with its
* hotplugging performed during an emergency i915_gem_set_wedged().
* We use the RCU mechanism to mark the critical section in order to
* force i915_gem_set_wedged() to wait until the submit_request() is
* completed before proceeding.
*/
-Chris
More information about the Intel-gfx
mailing list