[PATCH 01/10] drm/amdgpu: is_dislay_hung will be called by soft reset

Fri Jul 15 06:36:54 UTC 2016

On 2016年07月14日 00:15, Deucher, Alexander wrote:
>> -----Original Message-----
>> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf
>> Of Christian König
>> Sent: Wednesday, July 13, 2016 9:01 AM
>> To: Zhou, David(ChunMing); amd-gfx at lists.freedesktop.org
>> Subject: Re: [PATCH 01/10] drm/amdgpu: is_dislay_hung will be called by soft
>> reset
>>
>> The set looks good in general, but you use the same old approach we had
>> for radeon. Alex, what do you think about this? Should we stick with
>> that approach or rather start over?
>>
>> I think it would be better if we can get rid of the AMDGPU_RESET_ enum
>> and use a per IP block based function to figure out if a block is hung
>> or not.
>>
>> Then we can have a per block flag which tels us if a hung of this block
>> can be cleared by a soft reset or needs a hard reset.
>>
>> Then last but not least we reset only the hung blocks if possible.
> This should be implemented at the IP level, not as a new asic callback.  We just never flushed out the design since it wasn't working at the time.  I think we should add any new callbacks to the IP functions and then we should use either the IP idle checks or add a new check_soft_reset callback to set the flags.  Then in amdgpu_device.c we could have a core soft_reset that just walks the IP list and calls the callbacks.  Something like:
>
> +static int amdgpu_soft_reset(struct amdgpu_device *adev)
> +{
> +	int i, r, failied = 0;
> +
> +	for (i = 0; i < adev->num_ip_blocks; i++) {
> +		if (!adev->ip_block_status[i].valid)
> +			continue;
> +		if (adev->ip_blocks[i].funcs->check_soft_reset((void *)adev)) {
> +			r = adev->ip_blocks[i].funcs->soft_reset((void *)adev);
> +			if (r) {
> +				DRM_ERROR("soft_reset %d failed %d\n", i, r);
> +				failed = r;
> +			}
> +		}
> +	}
> +
> +	return failed;
> +}
>
> No need for pre/post callbacks, that call all be handled in the soft_reset callback directly.

I see your means, this way will have a problem, take gfx hang for example:
gfx block soft reset will pre--->reset--->post, then gfx will start to 
run, but this will result in other blocks busy, at this time, the loop 
could walk to other block soft reset, then trigger other block reset, 
then gpu will dead off.

So we still need to post engine after all soft reset at a time.

Regards,
David Zhou
>
> Alex
>
>> Regards,
>> Christian.
>>
>> Am 13.07.2016 um 12:32 schrieb Chunming Zhou:
>>> Change-Id: Ie81f6b1124aeb4304f59f92282fe8b18fb06a693
>>> Signed-off-by: Chunming Zhou <David1.Zhou at amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/dce_v10_0.c        | 2 +-
>>>    drivers/gpu/drm/amd/amdgpu/dce_v10_0.h        | 2 +-
>>>    drivers/gpu/drm/amd/amdgpu/dce_v11_0.c        | 2 +-
>>>    drivers/gpu/drm/amd/amdgpu/dce_v11_0.h        | 2 +-
>>>    drivers/gpu/drm/amd/amdgpu/dce_v8_0.c         | 2 +-
>>>    drivers/gpu/drm/amd/amdgpu/dce_v8_0.h         | 2 +-
>>>    drivers/gpu/drm/amd/dal/amdgpu_dm/amdgpu_dm.c | 6 +++---
>>>    7 files changed, 9 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
>>> index 7554dd7..a592029 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
>>> @@ -532,7 +532,7 @@ static u32 dce_v10_0_hpd_get_gpio_reg(struct
>> amdgpu_device *adev)
>>>    	return mmDC_GPIO_HPD_A;
>>>    }
>>>
>>> -static bool dce_v10_0_is_display_hung(struct amdgpu_device *adev)
>>> +bool dce_v10_0_is_display_hung(struct amdgpu_device *adev)
>>>    {
>>>    	u32 crtc_hung = 0;
>>>    	u32 crtc_status[6];
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.h
>> b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.h
>>> index 3947956..77a732c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.h
>>> @@ -32,5 +32,5 @@ void dce_v10_0_stop_mc_access(struct
>> amdgpu_device *adev,
>>>    			      struct amdgpu_mode_mc_save *save);
>>>    void dce_v10_0_resume_mc_access(struct amdgpu_device *adev,
>>>    				struct amdgpu_mode_mc_save *save);
>>> -
>>> +bool dce_v10_0_is_display_hung(struct amdgpu_device *adev);
>>>    #endif
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
>> b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
>>> index d9c9b88..e18bd20 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
>>> @@ -549,7 +549,7 @@ static u32 dce_v11_0_hpd_get_gpio_reg(struct
>> amdgpu_device *adev)
>>>    	return mmDC_GPIO_HPD_A;
>>>    }
>>>
>>> -static bool dce_v11_0_is_display_hung(struct amdgpu_device *adev)
>>> +bool dce_v11_0_is_display_hung(struct amdgpu_device *adev)
>>>    {
>>>    	u32 crtc_hung = 0;
>>>    	u32 crtc_status[6];
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h
>> b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h
>>> index dc6ff04..fa1884c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h
>>> @@ -32,5 +32,5 @@ void dce_v11_0_stop_mc_access(struct
>> amdgpu_device *adev,
>>>    			      struct amdgpu_mode_mc_save *save);
>>>    void dce_v11_0_resume_mc_access(struct amdgpu_device *adev,
>>>    				struct amdgpu_mode_mc_save *save);
>>> -
>>> +bool dce_v11_0_is_display_hung(struct amdgpu_device *adev);
>>>    #endif
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
>>> index 14ac085..4863504 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
>>> @@ -478,7 +478,7 @@ static u32 dce_v8_0_hpd_get_gpio_reg(struct
>> amdgpu_device *adev)
>>>    	return mmDC_GPIO_HPD_A;
>>>    }
>>>
>>> -static bool dce_v8_0_is_display_hung(struct amdgpu_device *adev)
>>> +bool dce_v8_0_is_display_hung(struct amdgpu_device *adev)
>>>    {
>>>    	u32 crtc_hung = 0;
>>>    	u32 crtc_status[6];
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.h
>> b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.h
>>> index 4bb72ab..53f643c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.h
>>> @@ -32,5 +32,5 @@ void dce_v8_0_stop_mc_access(struct
>> amdgpu_device *adev,
>>>    			     struct amdgpu_mode_mc_save *save);
>>>    void dce_v8_0_resume_mc_access(struct amdgpu_device *adev,
>>>    			       struct amdgpu_mode_mc_save *save);
>>> -
>>> +bool dce_v8_0_is_display_hung(struct amdgpu_device *adev);
>>>    #endif
>>> diff --git a/drivers/gpu/drm/amd/dal/amdgpu_dm/amdgpu_dm.c
>> b/drivers/gpu/drm/amd/dal/amdgpu_dm/amdgpu_dm.c
>>> index ca64158..0ea0956 100644
>>> --- a/drivers/gpu/drm/amd/dal/amdgpu_dm/amdgpu_dm.c
>>> +++ b/drivers/gpu/drm/amd/dal/amdgpu_dm/amdgpu_dm.c
>>> @@ -1301,7 +1301,7 @@ static const struct amdgpu_display_funcs
>> dm_dce_v8_0_display_funcs = {
>>>    	.bandwidth_update = dm_bandwidth_update, /* called
>> unconditionally */
>>>    	.vblank_get_counter = dm_vblank_get_counter,/* called
>> unconditionally */
>>>    	.vblank_wait = NULL,
>>> -	.is_display_hung = NULL, /* not called anywhere */
>>> +	.is_display_hung = dce_v8_0_is_display_hung,
>>>    	.backlight_set_level =
>>>    		dm_set_backlight_level,/* called unconditionally */
>>>    	.backlight_get_level =
>>> @@ -1325,7 +1325,7 @@ static const struct amdgpu_display_funcs
>> dm_dce_v10_0_display_funcs = {
>>>    	.bandwidth_update = dm_bandwidth_update, /* called
>> unconditionally */
>>>    	.vblank_get_counter = dm_vblank_get_counter,/* called
>> unconditionally */
>>>    	.vblank_wait = NULL,
>>> -	.is_display_hung = NULL, /* not called anywhere */
>>> +	.is_display_hung = dce_v10_0_is_display_hung,
>>>    	.backlight_set_level =
>>>    		dm_set_backlight_level,/* called unconditionally */
>>>    	.backlight_get_level =
>>> @@ -1349,7 +1349,7 @@ static const struct amdgpu_display_funcs
>> dm_dce_v11_0_display_funcs = {
>>>    	.bandwidth_update = dm_bandwidth_update, /* called
>> unconditionally */
>>>    	.vblank_get_counter = dm_vblank_get_counter,/* called
>> unconditionally */
>>>    	.vblank_wait = NULL,
>>> -	.is_display_hung = NULL, /* not called anywhere */
>>> +	.is_display_hung = dce_v11_0_is_display_hung,
>>>    	.backlight_set_level =
>>>    		dm_set_backlight_level,/* called unconditionally */
>>>    	.backlight_get_level =
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx