[Intel-gfx] [PATCH 1/2] drm/i915: Fix modeset handling during gpu reset, v2.

Mon May 9 10:29:37 UTC 2016

Op 06-05-16 om 15:06 schreef Ville Syrjälä:
> On Mon, May 02, 2016 at 10:57:01AM +0200, Maarten Lankhorst wrote:
>> This function would call drm_modeset_lock_all, while the suspend/resume
>> functions already have their own locking. Fix this by factoring out
>> __intel_display_resume, and calling the atomic helpers for duplicating
>> atomic state and disabling all crtc's during suspend.
>>
>> Changes since v1:
>> - Deal with -EDEADLK right after lock_all and clean up calls
>>   to hw readout.
>> - Always take all modeset locks so updates during gpu reset are blocked.
> Found this patch by accident. --in-reply-to would have helped a bit.
>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
>> Fixes: e2c8b8701e2d ("drm/i915: Use atomic helpers for suspend, v2.")
>> Cc: drm-intel-fixes at lists.freedesktop.org
>> ---
>>  drivers/gpu/drm/i915/intel_display.c | 141 ++++++++++++++++++++++-------------
>>  1 file changed, 89 insertions(+), 52 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
>> index 2f36414702fe..4fb904f2bcf9 100644
>> --- a/drivers/gpu/drm/i915/intel_display.c
>> +++ b/drivers/gpu/drm/i915/intel_display.c
>> @@ -3144,27 +3144,83 @@ static void intel_update_primary_planes(struct drm_device *dev)
>>  	}
>>  }
>>  
>> +static int
>> +__intel_display_resume(struct drm_device *dev,
>> +		       struct drm_atomic_state *state)
>> +{
>> +	struct drm_crtc_state *crtc_state;
>> +	struct drm_crtc *crtc;
>> +	int i;
>> +
>> +	intel_modeset_setup_hw_state(dev);
>> +	i915_redisable_vga(dev);
>> +
>> +	if (!state)
>> +		return 0;
>> +
>> +	for_each_crtc_in_state(state, crtc, crtc_state, i) {
>> +		/*
>> +		 * Force recalculation even if we restore
>> +		 * current state. With fast modeset this may not result
>> +		 * in a modeset when the state is compatible.
>> +		 */
>> +		crtc_state->mode_changed = true;
>> +	}
>> +
>> +	return drm_atomic_commit(state);
> The -EDEADLK warn could be here so we don't have to duplicate it in two
> places perhaps? Extracting __intel_display_reset() could also be a separate
> patch to make this stuff a bit easier to review.
>
> Oh and BTW resume is also broken on platforms that have the force pipe A
> quirk. I do have some patches lined up to nuke that quirk for good, which
> I should probably post sooner rather than later. But those have at least
> a theoretical chance of regressing something, so in the meantime I think
> we'll still need to fix this thing for the normal resume path as well.
>
>> +}
>> +
>>  void intel_prepare_reset(struct drm_device *dev)
>>  {
>> +	struct drm_atomic_state *state;
>> +	struct drm_modeset_acquire_ctx *pctx;
>> +	struct drm_i915_private *dev_priv = to_i915(dev);
>> +	int ret;
>> +
>>  	/* no reset support for gen2 */
>> -	if (IS_GEN2(dev))
>> +	if (IS_GEN2(dev_priv))
>>  		return;
>>  
>> -	/* reset doesn't touch the display */
>> -	if (INTEL_INFO(dev)->gen >= 5 || IS_G4X(dev))
>> +	drm_modeset_lock_all(dev);
> Isn't that going to clash with the locking in
> intel_update_primary_planes() ?
Oops indeed, thanks for catching.
>> +
>> +	/* reset doesn't touch the display, but flips might get nuked anyway, */
>> +	if (INTEL_INFO(dev_priv)->gen >= 5 || IS_G4X(dev_priv))
>>  		return;
>>  
>> -	drm_modeset_lock_all(dev);
>> +	pctx = dev->mode_config.acquire_ctx;
> Still looks like power context.
Will change to ctx.
>> +
>>  	/*
>>  	 * Disabling the crtcs gracefully seems nicer. Also the
>>  	 * g33 docs say we should at least disable all the planes.
>>  	 */
>> -	intel_display_suspend(dev);
>> +
>> +	state = drm_atomic_helper_duplicate_state(dev, pctx);
>> +	if (IS_ERR(state)) {
>> +		ret = PTR_ERR(state);
>> +		state = NULL;
>> +		DRM_ERROR("Duplicating state failed with %i\n", ret);
>> +		goto err;
>> +	}
>> +
>> +	ret = drm_atomic_helper_disable_all(dev, pctx);
>> +	if (ret) {
>> +		DRM_ERROR("Suspending crtc's failed with %i\n", ret);
>> +		goto err;
>> +	}
>> +
>> +	dev_priv->modeset_restore_state = state;
>> +	state->acquire_ctx = pctx;
>> +	return;
>> +
>> +err:
>> +	drm_atomic_state_free(state);
>>  }
>>  
>>  void intel_finish_reset(struct drm_device *dev)
>>  {
>>  	struct drm_i915_private *dev_priv = to_i915(dev);
>> +	struct drm_atomic_state *state = dev_priv->modeset_restore_state;
>> +	int ret;
>>  
>>  	/*
>>  	 * Flips in the rings will be nuked by the reset,
>> @@ -3177,6 +3233,8 @@ void intel_finish_reset(struct drm_device *dev)
>>  	if (IS_GEN2(dev))
>>  		return;
>>  
>> +	dev_priv->modeset_restore_state = NULL;
>> +
>>  	/* reset doesn't touch the display */
>>  	if (INTEL_INFO(dev)->gen >= 5 || IS_G4X(dev)) {
>>  		/*
>> @@ -3189,26 +3247,27 @@ void intel_finish_reset(struct drm_device *dev)
>>  		 * CS-based flips (which might get lost in gpu resets) any more.
>>  		 */
>>  		intel_update_primary_planes(dev);
>> -		return;
>> -	}
>> -
>> -	/*
>> -	 * The display has been reset as well,
>> -	 * so need a full re-initialization.
>> -	 */
>> -	intel_runtime_pm_disable_interrupts(dev_priv);
>> -	intel_runtime_pm_enable_interrupts(dev_priv);
>> +	} else {
>> +		/*
>> +		 * The display has been reset as well,
>> +		 * so need a full re-initialization.
>> +		 */
>> +		intel_runtime_pm_disable_interrupts(dev_priv);
>> +		intel_runtime_pm_enable_interrupts(dev_priv);
>>  
>> -	intel_modeset_init_hw(dev);
>> +		intel_modeset_init_hw(dev);
>>  
>> -	spin_lock_irq(&dev_priv->irq_lock);
>> -	if (dev_priv->display.hpd_irq_setup)
>> -		dev_priv->display.hpd_irq_setup(dev);
>> -	spin_unlock_irq(&dev_priv->irq_lock);
>> +		spin_lock_irq(&dev_priv->irq_lock);
>> +		if (dev_priv->display.hpd_irq_setup)
>> +			dev_priv->display.hpd_irq_setup(dev);
>> +		spin_unlock_irq(&dev_priv->irq_lock);
>>  
>> -	intel_display_resume(dev);
>> +		ret = __intel_display_resume(dev, state);
>> +		if (ret)
>> +			DRM_ERROR("Restoring old state failed with %i\n", ret);
>>  
>> -	intel_hpd_init(dev_priv);
>> +		intel_hpd_init(dev_priv);
>> +	}
>>  
>>  	drm_modeset_unlock_all(dev);
>>  }
>> @@ -15957,9 +16016,10 @@ void intel_display_resume(struct drm_device *dev)
>>  	struct drm_atomic_state *state = dev_priv->modeset_restore_state;
>>  	struct drm_modeset_acquire_ctx ctx;
>>  	int ret;
>> -	bool setup = false;
>>  
>>  	dev_priv->modeset_restore_state = NULL;
>> +	if (state)
>> +		state->acquire_ctx = &ctx;
>>  
>>  	/*
>>  	 * This is a cludge because with real atomic modeset mode_config.mutex
>> @@ -15970,40 +16030,17 @@ void intel_display_resume(struct drm_device *dev)
>>  	mutex_lock(&dev->mode_config.mutex);
>>  	drm_modeset_acquire_init(&ctx, 0);
>>  
>> -retry:
>> -	ret = drm_modeset_lock_all_ctx(dev, &ctx);
>> -
>> -	if (ret == 0 && !setup) {
>> -		setup = true;
>> -
>> -		intel_modeset_setup_hw_state(dev);
>> -		i915_redisable_vga(dev);
>> -	}
>> -
>> -	if (ret == 0 && state) {
>> -		struct drm_crtc_state *crtc_state;
>> -		struct drm_crtc *crtc;
>> -		int i;
>> -
>> -		state->acquire_ctx = &ctx;
>> -
>> -		for_each_crtc_in_state(state, crtc, crtc_state, i) {
>> -			/*
>> -			 * Force recalculation even if we restore
>> -			 * current state. With fast modeset this may not result
>> -			 * in a modeset when the state is compatible.
>> -			 */
>> -			crtc_state->mode_changed = true;
>> -		}
>> -
>> -		ret = drm_atomic_commit(state);
>> -	}
>> +	while (1) {
>> +		ret = drm_modeset_lock_all_ctx(dev, &ctx);
>> +		if (ret != -EDEADLK)
>> +			break;
>>  
>> -	if (ret == -EDEADLK) {
>>  		drm_modeset_backoff(&ctx);
>> -		goto retry;
>>  	}
>>  
>> +	ret = __intel_display_resume(dev, state);
> Shouldn't we skip this call if the lock_all failed?
I guess for paranoia I could, but the locking can only fail with EDEADLK or EALREADY. EALREADY is mapped to 0 since multiple locking calls are allowed,
so can realistically only fail with -EDEADLK, won't even be able to fail with -EINTR in the future since s/r is not interruptible.