[PATCH v2 4/4] drm/amd/pm: Add sysfs attribute to get pm log

Christian König ckoenig.leichtzumerken at gmail.com
Mon Oct 9 08:41:39 UTC 2023


Am 06.10.23 um 16:24 schrieb Alex Deucher:
> On Fri, Oct 6, 2023 at 4:32 AM Christian König
> <ckoenig.leichtzumerken at gmail.com> wrote:
>> Am 06.10.23 um 07:21 schrieb Lijo Lazar:
>>> Add sysfs attribute to read power management log. A snapshot is
>>> captured to the buffer when the attribute is read.
>>>
>>> Signed-off-by: Lijo Lazar <lijo.lazar at amd.com>
>>> ---
>>>
>>> v2: Pass PAGE_SIZE as the max size of input buffer
>>>
>>>    drivers/gpu/drm/amd/pm/amdgpu_pm.c | 40 ++++++++++++++++++++++++++++++
>>>    1 file changed, 40 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
>>> index 4c65a2fac028..5a1d21c52672 100644
>>> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
>>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
>>> @@ -1794,6 +1794,44 @@ static ssize_t amdgpu_set_apu_thermal_cap(struct device *dev,
>>>        return count;
>>>    }
>>>
>>> +static int amdgpu_pmlog_attr_update(struct amdgpu_device *adev,
>>> +                                 struct amdgpu_device_attr *attr,
>>> +                                 uint32_t mask,
>>> +                                 enum amdgpu_device_attr_states *states)
>>> +{
>>> +     if (amdgpu_dpm_get_pm_log(adev, NULL, 0) == -EOPNOTSUPP)
>>> +             *states = ATTR_STATE_UNSUPPORTED;
>>> +
>>> +     return 0;
>>> +}
>>> +
>>> +static ssize_t amdgpu_get_pmlog(struct device *dev,
>>> +                             struct device_attribute *attr, char *buf)
>>> +{
>>> +     struct drm_device *ddev = dev_get_drvdata(dev);
>>> +     struct amdgpu_device *adev = drm_to_adev(ddev);
>>> +     ssize_t size = 0;
>>> +     int ret;
>>> +
>>> +     if (amdgpu_in_reset(adev))
>>> +             return -EPERM;
>> Please stop using amdgpu_in_reset() for stuff like that altogether.
>>
>> When this is reset critical it should grab the reset rw semaphore. If it
>> isn't than that check isn't necessary.
> All of the power related sysfs files have this check.  It was
> originally added because users have processes running which poll
> various hwmon files at regular intervals and since SMU also handles
> reset, we don't want to get a metrics table request while a reset is
> happening otherwise the SMU gets confused.

Then this approach is completely broken. Nothing prevents the reset from 
starting right after doing the check.

If we need exclusive access to the SMU then we should just grab a lock.

Christian.

>
> Alex
>
>> Regards,
>> Christian.
>>
>>> +     if (adev->in_suspend && !adev->in_runpm)
>>> +             return -EPERM;
>>> +
>>> +     ret = pm_runtime_get_sync(ddev->dev);
>>> +     if (ret < 0) {
>>> +             pm_runtime_put_autosuspend(ddev->dev);
>>> +             return ret;
>>> +     }
>>> +
>>> +     size = amdgpu_dpm_get_pm_log(adev, buf, PAGE_SIZE);
>>> +
>>> +     pm_runtime_mark_last_busy(ddev->dev);
>>> +     pm_runtime_put_autosuspend(ddev->dev);
>>> +
>>> +     return size;
>>> +}
>>> +
>>>    /**
>>>     * DOC: gpu_metrics
>>>     *
>>> @@ -2091,6 +2129,8 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = {
>>>        AMDGPU_DEVICE_ATTR_RW(smartshift_bias,                          ATTR_FLAG_BASIC,
>>>                              .attr_update = ss_bias_attr_update),
>>>        AMDGPU_DEVICE_ATTR_RW(xgmi_plpd_policy,                         ATTR_FLAG_BASIC),
>>> +     AMDGPU_DEVICE_ATTR_RO(pmlog,                                    ATTR_FLAG_BASIC,
>>> +                           .attr_update = amdgpu_pmlog_attr_update),
>>>    };
>>>
>>>    static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,



More information about the amd-gfx mailing list