[PATCH] drm/amdgpu/pm: bail on sysfs/debugfs queries during platform suspend

Christian König ckoenig.leichtzumerken at gmail.com
Thu Mar 25 08:10:20 UTC 2021


Am 25.03.21 um 04:29 schrieb Quan, Evan:
> [AMD Public Use]
>
> Maybe we can have an API like is_hw_access_blocked(). So that we can put all those checks below within it.
> 	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;

Sounds like a good idea to me as well.

But my question is how the heck have we managed to access those files 
during suspend?

> Anyway, patch is reviewed-by: Evan Quan <evan.quan at amd.com>

Acked-by: Christian König <christian.koenig at amd.com>

>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Alex Deucher
> Sent: Thursday, March 25, 2021 5:18 AM
> To: amd-gfx at lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>
> Subject: [PATCH] drm/amdgpu/pm: bail on sysfs/debugfs queries during platform suspend
>
> The GPU is in the process of being shutdown.  Spurious queries during
> suspend and resume can put the SMU into a bad state.  Runtime PM is
> handled dynamically so we check if we are in non-runtime suspend.
>
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> ---
>   drivers/gpu/drm/amd/pm/amdgpu_pm.c | 98 ++++++++++++++++++++++++++++++
>   1 file changed, 98 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> index 2627870a786e..3c1b5483688b 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> @@ -129,6 +129,8 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0) {
> @@ -162,6 +164,8 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	if (strncmp("battery", buf, strlen("battery")) == 0)
>   		state = POWER_STATE_TYPE_BATTERY;
> @@ -268,6 +272,8 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0) {
> @@ -310,6 +316,8 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	if (strncmp("low", buf, strlen("low")) == 0) {
>   		level = AMD_DPM_FORCED_LEVEL_LOW;
> @@ -408,6 +416,8 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0) {
> @@ -448,6 +458,8 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0) {
> @@ -484,6 +496,8 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	if (adev->pp_force_state_enabled)
>   		return amdgpu_get_pp_cur_state(dev, attr, buf);
> @@ -504,6 +518,8 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	if (strlen(buf) == 1)
>   		adev->pp_force_state_enabled = false;
> @@ -564,6 +580,8 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0) {
> @@ -602,6 +620,8 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0) {
> @@ -764,6 +784,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	if (count > 127)
>   		return -EINVAL;
> @@ -865,6 +887,8 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0) {
> @@ -916,6 +940,8 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = kstrtou64(buf, 0, &featuremask);
>   	if (ret)
> @@ -959,6 +985,8 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0) {
> @@ -1018,6 +1046,8 @@ static ssize_t amdgpu_get_pp_dpm_clock(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0) {
> @@ -1083,6 +1113,8 @@ static ssize_t amdgpu_set_pp_dpm_clock(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = amdgpu_read_mask(buf, count, &mask);
>   	if (ret)
> @@ -1239,6 +1271,8 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0) {
> @@ -1269,6 +1303,8 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = kstrtol(buf, 0, &value);
>   
> @@ -1312,6 +1348,8 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0) {
> @@ -1342,6 +1380,8 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = kstrtol(buf, 0, &value);
>   
> @@ -1405,6 +1445,8 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0) {
> @@ -1443,6 +1485,8 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	tmp[0] = *(buf);
>   	tmp[1] = '\0';
> @@ -1506,6 +1550,8 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	r = pm_runtime_get_sync(ddev->dev);
>   	if (r < 0) {
> @@ -1544,6 +1590,8 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	r = pm_runtime_get_sync(ddev->dev);
>   	if (r < 0) {
> @@ -1587,6 +1635,8 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	if (adev->flags & AMD_IS_APU)
>   		return -ENODATA;
> @@ -1628,6 +1678,8 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	if (adev->unique_id)
>   		return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id);
> @@ -1726,6 +1778,8 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0) {
> @@ -1954,6 +2008,8 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	if (channel >= PP_TEMP_MAX)
>   		return -EINVAL;
> @@ -2090,6 +2146,8 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   	if (ret < 0) {
> @@ -2122,6 +2180,8 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	err = kstrtoint(buf, 10, &value);
>   	if (err)
> @@ -2172,6 +2232,8 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   	if (err < 0) {
> @@ -2220,6 +2282,8 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   	if (err < 0) {
> @@ -2253,6 +2317,8 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   	if (err < 0) {
> @@ -2285,6 +2351,8 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   	if (r < 0) {
> @@ -2315,6 +2383,8 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   	if (r < 0) {
> @@ -2344,6 +2414,8 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   	if (err < 0) {
> @@ -2376,6 +2448,8 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   	if (err < 0) {
> @@ -2422,6 +2496,8 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   	if (ret < 0) {
> @@ -2455,6 +2531,8 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	err = kstrtoint(buf, 10, &value);
>   	if (err)
> @@ -2496,6 +2574,8 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   	if (r < 0) {
> @@ -2533,6 +2613,8 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	/* only APUs have vddnb */
>   	if  (!(adev->flags & AMD_IS_APU))
> @@ -2575,6 +2657,8 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   	if (r < 0) {
> @@ -2619,6 +2703,8 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   	if (r < 0) {
> @@ -2656,6 +2742,8 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   	if (r < 0) {
> @@ -2693,6 +2781,8 @@ static ssize_t amdgpu_hwmon_show_power_cap_default(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   	if (r < 0) {
> @@ -2739,6 +2829,8 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	if (amdgpu_sriov_vf(adev))
>   		return -EINVAL;
> @@ -2780,6 +2872,8 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   	if (r < 0) {
> @@ -2817,6 +2911,8 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   	if (r < 0) {
> @@ -3390,6 +3486,8 @@ static int amdgpu_debugfs_pm_info_show(struct seq_file *m, void *unused)
>   
>   	if (amdgpu_in_reset(adev))
>   		return -EPERM;
> +	if (adev->in_suspend && !adev->in_runpm)
> +		return -EPERM;
>   
>   	r = pm_runtime_get_sync(dev->dev);
>   	if (r < 0) {



More information about the amd-gfx mailing list