[PATCH v1 06/13] drm/amdgpu: validate suspend before function call

Lazar, Lijo lijo.lazar at amd.com
Thu Oct 10 08:45:29 UTC 2024



On 10/10/2024 2:05 PM, Khatri, Sunil wrote:
> 
> On 10/10/2024 1:42 PM, Lazar, Lijo wrote:
>>
>> On 10/10/2024 1:13 PM, Christian König wrote:
>>> Am 09.10.24 um 16:24 schrieb Sunil Khatri:
>>>> Before making a function call to suspend, validate
>>>> the function pointer like we do in sw_init.
>>>>
>>>> Signed-off-by: Sunil Khatri <sunil.khatri at amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/aldebaran.c      | 15 ++++++------
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 26
>>>> ++++++++++++---------
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c   | 12 ++++++----
>>>>    drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c | 15 ++++++------
>>>>    drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c   | 15 ++++++------
>>>>    5 files changed, 46 insertions(+), 37 deletions(-)
>>>>
>> Original patch series is somehow missing in my inbox, hence posting it
>> here.
>>
>> Below ones are SOC specific files where we know those blocks implement
>> the suspend sequence. If they are taken out, then that needs to generate
>> attention. But this check will cause a silent skip.
> 
> Hello Lijo,
> 
> I have not idea why the series has not reached you in first time.
> Second i did not get your point clearly, I am just cleaning up code
> which just return 0 and doing nothing else. I guess those suspend/resume
> functions were expected to be implemented but right
> now its empty and just an additional call in stack doing nothing and
> hence needed cleanup.
> 

In those files, the logic is to call suspend resume of SDMA/GFX blocks.
The implementation is expected for those blocks. For others, it's a
'continue' statement.

Thanks,
Lijo

> Regards
> Sunil Khatri
> 
>>
>> aldebaran.c
>> sienna_cichlid.c
>> smu_v13_0_10.c
>>
>> Thanks,
>> Lijo
>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
>>>> b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
>>>> index c1ff24335a0c..e55d680d95ce 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
>>>> @@ -85,13 +85,14 @@ static int aldebaran_mode2_suspend_ip(struct
>>>> amdgpu_device *adev)
>>>>                      AMD_IP_BLOCK_TYPE_SDMA))
>>>>                continue;
>>>>    -        r =
>>>> adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]);
>>>> -
>>>> -        if (r) {
>>>> -            dev_err(adev->dev,
>>>> -                "suspend of IP block <%s> failed %d\n",
>>>> -                adev->ip_blocks[i].version->funcs->name, r);
>>>> -            return r;
>>>> +        if (adev->ip_blocks[i].version->funcs->suspend) {
>>>> +            r =
>>>> adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]);
>>>> +            if (r) {
>>>> +                dev_err(adev->dev,
>>>> +                    "suspend of IP block <%s> failed %d\n",
>>>> +                    adev->ip_blocks[i].version->funcs->name, r);
>>>> +                return r;
>>>> +            }
>>> I think we should probably create a function for that code and error
>>> message repeated a number of times. Same for the resume function.
>>>
>>> Apart from that the whole set looks good to me.
>>>
>>> Regards,
>>> Christian.
>>>
>>>>            }
>>>>              adev->ip_blocks[i].status.hw = false;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index 340141a74c12..51607ac8adb9 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -3471,12 +3471,14 @@ static int
>>>> amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
>>>>                continue;
>>>>              /* XXX handle errors */
>>>> -        r =
>>>> adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]);
>>>> -        /* XXX handle errors */
>>>> -        if (r) {
>>>> -            DRM_ERROR("suspend of IP block <%s> failed %d\n",
>>>> -                  adev->ip_blocks[i].version->funcs->name, r);
>>>> -            return r;
>>>> +        if (adev->ip_blocks[i].version->funcs->suspend) {
>>>> +            r =
>>>> adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]);
>>>> +            /* XXX handle errors */
>>>> +            if (r) {
>>>> +                DRM_ERROR("suspend of IP block <%s> failed %d\n",
>>>> +                    adev->ip_blocks[i].version->funcs->name, r);
>>>> +                return r;
>>>> +            }
>>>>            }
>>>>              adev->ip_blocks[i].status.hw = false;
>>>> @@ -3553,11 +3555,13 @@ static int
>>>> amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
>>>>                continue;
>>>>              /* XXX handle errors */
>>>> -        r =
>>>> adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]);
>>>> -        /* XXX handle errors */
>>>> -        if (r) {
>>>> -            DRM_ERROR("suspend of IP block <%s> failed %d\n",
>>>> -                  adev->ip_blocks[i].version->funcs->name, r);
>>>> +        if (adev->ip_blocks[i].version->funcs->suspend) {
>>>> +            r =
>>>> adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]);
>>>> +            /* XXX handle errors */
>>>> +            if (r) {
>>>> +                DRM_ERROR("suspend of IP block <%s> failed %d\n",
>>>> +                    adev->ip_blocks[i].version->funcs->name, r);
>>>> +            }
>>>>            }
>>>>            adev->ip_blocks[i].status.hw = false;
>>>>            /* handle putting the SMC in the appropriate state */
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>>>> index 3e2724590dbf..6bc75aa1c3b1 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>>>> @@ -40,11 +40,13 @@ static int
>>>> amdgpu_reset_xgmi_reset_on_init_suspend(struct amdgpu_device *adev)
>>>>                continue;
>>>>              /* XXX handle errors */
>>>> -        r =
>>>> adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]);
>>>> -        /* XXX handle errors */
>>>> -        if (r) {
>>>> -            dev_err(adev->dev, "suspend of IP block <%s> failed %d",
>>>> -                adev->ip_blocks[i].version->funcs->name, r);
>>>> +        if (adev->ip_blocks[i].version->funcs->suspend) {
>>>> +            r =
>>>> adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]);
>>>> +            /* XXX handle errors */
>>>> +            if (r) {
>>>> +                dev_err(adev->dev, "suspend of IP block <%s> failed
>>>> %d",
>>>> +                    adev->ip_blocks[i].version->funcs->name, r);
>>>> +            }
>>>>            }
>>>>            adev->ip_blocks[i].status.hw = false;
>>>>        }
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
>>>> b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
>>>> index 475b7df3a908..10dece12509f 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
>>>> @@ -81,13 +81,14 @@ static int sienna_cichlid_mode2_suspend_ip(struct
>>>> amdgpu_device *adev)
>>>>                      AMD_IP_BLOCK_TYPE_SDMA))
>>>>                continue;
>>>>    -        r =
>>>> adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]);
>>>> -
>>>> -        if (r) {
>>>> -            dev_err(adev->dev,
>>>> -                "suspend of IP block <%s> failed %d\n",
>>>> -                adev->ip_blocks[i].version->funcs->name, r);
>>>> -            return r;
>>>> +        if (adev->ip_blocks[i].version->funcs->suspend) {
>>>> +            r =
>>>> adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]);
>>>> +            if (r) {
>>>> +                dev_err(adev->dev,
>>>> +                    "suspend of IP block <%s> failed %d\n",
>>>> +                    adev->ip_blocks[i].version->funcs->name, r);
>>>> +                return r;
>>>> +            }
>>>>            }
>>>>            adev->ip_blocks[i].status.hw = false;
>>>>        }
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
>>>> b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
>>>> index 5ea9090b5040..ab049f0b4d39 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
>>>> @@ -80,13 +80,14 @@ static int smu_v13_0_10_mode2_suspend_ip(struct
>>>> amdgpu_device *adev)
>>>>                      AMD_IP_BLOCK_TYPE_MES))
>>>>                continue;
>>>>    -        r =
>>>> adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]);
>>>> -
>>>> -        if (r) {
>>>> -            dev_err(adev->dev,
>>>> -                "suspend of IP block <%s> failed %d\n",
>>>> -                adev->ip_blocks[i].version->funcs->name, r);
>>>> -            return r;
>>>> +        if (adev->ip_blocks[i].version->funcs->suspend) {
>>>> +            r =
>>>> adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]);
>>>> +            if (r) {
>>>> +                dev_err(adev->dev,
>>>> +                    "suspend of IP block <%s> failed %d\n",
>>>> +                    adev->ip_blocks[i].version->funcs->name, r);
>>>> +                return r;
>>>> +            }
>>>>            }
>>>>            adev->ip_blocks[i].status.hw = false;
>>>>        }


More information about the amd-gfx mailing list