[PATCH] drm/amdgpu: fix dead lock if any ip block resume failed in s3

Deucher, Alexander Alexander.Deucher at amd.com
Thu Apr 13 12:55:10 UTC 2017


> -----Original Message-----
> From: Huang Rui [mailto:ray.huang at amd.com]
> Sent: Thursday, April 13, 2017 4:12 AM
> To: amd-gfx at lists.freedesktop.org; Deucher, Alexander
> Cc: Koenig, Christian; Wang, Ken; Huang, Ray
> Subject: [PATCH] drm/amdgpu: fix dead lock if any ip block resume failed in
> s3
> 
> Driver must free the console lock whether driver resuming successful
> or not.  Otherwise, fb_console will be always waiting for the lock and
> then cause system stuck.
> 
> [  244.405541] INFO: task kworker/0:0:4 blocked for more than 120 seconds.
> [  244.405543]       Tainted: G           OE   4.9.0-custom #1
> [  244.405544] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables
> this message.
> [  244.405541] INFO: task kworker/0:0:4 blocked for more than 120 seconds.
> [  244.405543]       Tainted: G           OE   4.9.0-custom #1
> [  244.405544] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables
> this message.
> [  244.405550] kworker/0:0     D    0     4      2 0x00080000
> [  244.405559] Workqueue: events console_callback
> [  244.405564]  ffff88045a2cfc00 0000000000000000 ffff880462b75940
> ffffffff81c0e500
> [  244.405568]  ffff880476419280 ffffc900018f7c90 ffffffff817dcf62
> 000000000000003c
> [  244.405572]  0000000100000000 0000000000000002 ffff880462b75940
> ffff880462b75940
> [  244.405573] Call Trace:
> [  244.405580]  [<ffffffff817dcf62>] ? __schedule+0x222/0x6a0
> [  244.405584]  [<ffffffff817dd416>] schedule+0x36/0x80
> [  244.405588]  [<ffffffff817e041c>] schedule_timeout+0x1fc/0x390
> [  244.405592]  [<ffffffff817df1b4>] __down_common+0xa5/0xf8
> [  244.405598]  [<ffffffff810b2ca8>] ? put_prev_entity+0x48/0x710
> [  244.405601]  [<ffffffff817df224>] __down+0x1d/0x1f
> [  244.405606]  [<ffffffff810c71a1>] down+0x41/0x50
> [  244.405611]  [<ffffffff810d380a>] console_lock+0x1a/0x40
> [  244.405614]  [<ffffffff814e3c03>] console_callback+0x13/0x160
> [  244.405617]  [<ffffffff817dcf6a>] ? __schedule+0x22a/0x6a0
> [  244.405623]  [<ffffffff810954e3>] process_one_work+0x153/0x3f0
> [  244.405628]  [<ffffffff81095cab>] worker_thread+0x12b/0x4b0
> [  244.405633]  [<ffffffff81095b80>] ? rescuer_thread+0x350/0x350
> [  244.405637]  [<ffffffff8109b473>] kthread+0xd3/0xf0
> [  244.405641]  [<ffffffff8109b3a0>] ? kthread_park+0x60/0x60
> [  244.405645]  [<ffffffff8109b3a0>] ? kthread_park+0x60/0x60
> [  244.405649]  [<ffffffff817e1ee5>] ret_from_fork+0x25/0x30
> 
> Signed-off-by: Huang Rui <ray.huang at amd.com>

Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 28 ++++++++++++-------
> ---------
>  1 file changed, 12 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index bd3a0d5..abb4dcc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2280,7 +2280,7 @@ int amdgpu_device_resume(struct drm_device
> *dev, bool resume, bool fbcon)
>  	struct drm_connector *connector;
>  	struct amdgpu_device *adev = dev->dev_private;
>  	struct drm_crtc *crtc;
> -	int r;
> +	int r = 0;
> 
>  	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
>  		return 0;
> @@ -2292,11 +2292,8 @@ int amdgpu_device_resume(struct drm_device
> *dev, bool resume, bool fbcon)
>  		pci_set_power_state(dev->pdev, PCI_D0);
>  		pci_restore_state(dev->pdev);
>  		r = pci_enable_device(dev->pdev);
> -		if (r) {
> -			if (fbcon)
> -				console_unlock();
> -			return r;
> -		}
> +		if (r)
> +			goto unlock;
>  	}
>  	if (adev->is_atom_fw)
>  		amdgpu_atomfirmware_scratch_regs_restore(adev);
> @@ -2313,7 +2310,7 @@ int amdgpu_device_resume(struct drm_device
> *dev, bool resume, bool fbcon)
>  	r = amdgpu_resume(adev);
>  	if (r) {
>  		DRM_ERROR("amdgpu_resume failed (%d).\n", r);
> -		return r;
> +		goto unlock;
>  	}
>  	amdgpu_fence_driver_resume(adev);
> 
> @@ -2324,11 +2321,8 @@ int amdgpu_device_resume(struct drm_device
> *dev, bool resume, bool fbcon)
>  	}
> 
>  	r = amdgpu_late_init(adev);
> -	if (r) {
> -		if (fbcon)
> -			console_unlock();
> -		return r;
> -	}
> +	if (r)
> +		goto unlock;
> 
>  	/* pin cursors */
>  	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
> @@ -2349,7 +2343,7 @@ int amdgpu_device_resume(struct drm_device
> *dev, bool resume, bool fbcon)
>  	}
>  	r = amdgpu_amdkfd_resume(adev);
>  	if (r)
> -		return r;
> +		goto unlock;
> 
>  	/* blat the mode back in */
>  	if (fbcon) {
> @@ -2396,12 +2390,14 @@ int amdgpu_device_resume(struct drm_device
> *dev, bool resume, bool fbcon)
>  	dev->dev->power.disable_depth--;
>  #endif
> 
> -	if (fbcon) {
> +	if (fbcon)
>  		amdgpu_fbdev_set_suspend(adev, 0);
> +
> +unlock:
> +	if (fbcon)
>  		console_unlock();
> -	}
> 
> -	return 0;
> +	return r;
>  }
> 
>  static bool amdgpu_check_soft_reset(struct amdgpu_device *adev)
> --
> 2.7.4



More information about the amd-gfx mailing list