[PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3

Christian König deathsimple at vodafone.de
Tue Aug 19 06:06:44 PDT 2014


Am 19.08.2014 um 14:22 schrieb Maarten Lankhorst:
> This is needed for the next commit, because the lockup detection
> will need the read lock to run.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at canonical.com>
> ---
> radeon_pm_compute_clocks already checks if dpm is enabled, so no need to check a second time.
>
> Because of locking and waiting stuff the radeon_pm_compute_clocks and resume_force_mode calls
> have to be done with read lock held.
>
> Seems to survive on my radeon when catting /sys/kernel/debug/dri/0/radeon_gpu_reset although
> uvd fails to reset, and that ring gets disabled as a result.

Depending on what hardware you have it's normal that UVD doesn't reset 
properly. I still haven't figured out the correct sequence in which I 
need to disable/enable the different UVD blocks on all hardware generations.

It seems to work fine on my Cayman, but doesn't for example on Turks 
(which at least theoretically should have the same UVD block). It should 
be fine as long as the engines gets properly disabled when the IB test 
fails after an reset.

Another common source of reset instability is DPM, while it now seems to 
be stable on NI and BTC I can't get a single reset to work once I use it.

Regarding the patch it looks good now, but I still want to test it a bit,
Christian.

>
>   drivers/gpu/drm/radeon/radeon.h         |  2 +-
>   drivers/gpu/drm/radeon/radeon_device.c  | 57 +++++++++++++++++++--------------
>   drivers/gpu/drm/radeon/radeon_display.c |  4 ++-
>   3 files changed, 37 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index b281886f6f51..9d97409c0443 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -2315,7 +2315,7 @@ struct radeon_device {
>   	bool				need_dma32;
>   	bool				accel_working;
>   	bool				fastfb_working; /* IGP feature*/
> -	bool				needs_reset;
> +	bool				needs_reset, in_reset;
>   	struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
>   	const struct firmware *me_fw;	/* all family ME firmware */
>   	const struct firmware *pfp_fw;	/* r6/700 PFP firmware */
> diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
> index 6a219bcee66d..124d8994e02a 100644
> --- a/drivers/gpu/drm/radeon/radeon_device.c
> +++ b/drivers/gpu/drm/radeon/radeon_device.c
> @@ -1671,6 +1671,7 @@ int radeon_gpu_reset(struct radeon_device *rdev)
>   	down_write(&rdev->exclusive_lock);
>   
>   	if (!rdev->needs_reset) {
> +		WARN_ON(rdev->in_reset);
>   		up_write(&rdev->exclusive_lock);
>   		return 0;
>   	}
> @@ -1683,17 +1684,21 @@ int radeon_gpu_reset(struct radeon_device *rdev)
>   	radeon_suspend(rdev);
>   	radeon_hpd_fini(rdev);
>   
> -	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
> -		ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i],
> -						   &ring_data[i]);
> -		if (ring_sizes[i]) {
> -			saved = true;
> -			dev_info(rdev->dev, "Saved %d dwords of commands "
> -				 "on ring %d.\n", ring_sizes[i], i);
> +	if (!rdev->in_reset) {
> +		rdev->in_reset = true;
> +
> +		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
> +			ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i],
> +							   &ring_data[i]);
> +			if (ring_sizes[i]) {
> +				saved = true;
> +				dev_info(rdev->dev, "Saved %d dwords of commands "
> +					 "on ring %d.\n", ring_sizes[i], i);
> +			}
>   		}
> -	}
> +	} else
> +		memset(ring_data, 0, sizeof(ring_data));
>   
> -retry:
>   	r = radeon_asic_reset(rdev);
>   	if (!r) {
>   		dev_info(rdev->dev, "GPU reset succeeded, trying to resume\n");
> @@ -1709,16 +1714,6 @@ retry:
>   			ring_sizes[i] = 0;
>   			ring_data[i] = NULL;
>   		}
> -
> -		r = radeon_ib_ring_tests(rdev);
> -		if (r) {
> -			dev_err(rdev->dev, "ib ring test failed (%d).\n", r);
> -			if (saved) {
> -				saved = false;
> -				radeon_suspend(rdev);
> -				goto retry;
> -			}
> -		}
>   	} else {
>   		radeon_fence_driver_force_completion(rdev);
>   		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
> @@ -1728,8 +1723,7 @@ retry:
>   
>   	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
>   		/* do dpm late init */
> -		r = radeon_pm_late_init(rdev);
> -		if (r) {
> +		if (radeon_pm_late_init(rdev)) {
>   			rdev->pm.dpm_enabled = false;
>   			DRM_ERROR("radeon_pm_late_init failed, disabling dpm\n");
>   		}
> @@ -1753,19 +1747,34 @@ retry:
>   	/* reset hpd state */
>   	radeon_hpd_init(rdev);
>   
> +	ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
> +	downgrade_write(&rdev->exclusive_lock);
> +
>   	drm_helper_resume_force_mode(rdev->ddev);
>   
>   	/* set the power state here in case we are a PX system or headless */
> -	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled)
> +	if ((rdev->pm.pm_method == PM_METHOD_DPM))
>   		radeon_pm_compute_clocks(rdev);
>   
> -	ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
> +	if (!r) {
> +		r = radeon_ib_ring_tests(rdev);
> +		if (r) {
> +			dev_err(rdev->dev, "ib ring test failed (%d).\n", r);
> +			if (saved) {
> +				rdev->needs_reset = true;
> +				up_read(&rdev->exclusive_lock);
> +				return -EAGAIN;
> +			}
> +		}
> +	}
> +
>   	if (r) {
>   		/* bad news, how to tell it to userspace ? */
>   		dev_info(rdev->dev, "GPU reset failed\n");
>   	}
>   
> -	up_write(&rdev->exclusive_lock);
> +	rdev->in_reset = false;
> +	up_read(&rdev->exclusive_lock);
>   	return r;
>   }
>   
> diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
> index 3fdf87318069..bd0d687379ee 100644
> --- a/drivers/gpu/drm/radeon/radeon_display.c
> +++ b/drivers/gpu/drm/radeon/radeon_display.c
> @@ -405,7 +405,9 @@ static void radeon_flip_work_func(struct work_struct *__work)
>   		r = radeon_fence_wait(work->fence, false);
>   		if (r == -EDEADLK) {
>   			up_read(&rdev->exclusive_lock);
> -			r = radeon_gpu_reset(rdev);
> +			do {
> +				r = radeon_gpu_reset(rdev);
> +			} while (r == -EAGAIN);
>   			down_read(&rdev->exclusive_lock);
>   		}
>   		if (r)



More information about the dri-devel mailing list