[PATCH] drm/amdgpu/pm: return an error during GPU reset or suspend

Nirmoy nirmodas at amd.com
Tue May 26 18:38:13 UTC 2020


Acked-by: Nirmoy Das <nirmoy.das at amd.com>

On 5/26/20 7:56 PM, Alex Deucher wrote:
> Return an error for sysfs and debugfs power interfaces during
> gpu reset and suspend.  Prevents access to the hw while it may
> be in an unusable state.
>
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 171 +++++++++++++++++++++++++
>   1 file changed, 171 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> index 702a3a03c707..255e21d46921 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> @@ -163,6 +163,9 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
>   	enum amd_pm_state_type pm;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -196,6 +199,9 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
>   	enum amd_pm_state_type  state;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	if (strncmp("battery", buf, strlen("battery")) == 0)
>   		state = POWER_STATE_TYPE_BATTERY;
>   	else if (strncmp("balanced", buf, strlen("balanced")) == 0)
> @@ -297,6 +303,9 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
>   	enum amd_dpm_forced_level level = 0xff;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -334,6 +343,9 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>   	enum amd_dpm_forced_level current_level = 0xff;
>   	int ret = 0;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	if (strncmp("low", buf, strlen("low")) == 0) {
>   		level = AMD_DPM_FORCED_LEVEL_LOW;
>   	} else if (strncmp("high", buf, strlen("high")) == 0) {
> @@ -433,6 +445,9 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
>   	struct pp_states_info data;
>   	int i, buf_len, ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -472,6 +487,9 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
>   	enum amd_pm_state_type pm = 0;
>   	int i = 0, ret = 0;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -508,6 +526,9 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
>   	struct drm_device *ddev = dev_get_drvdata(dev);
>   	struct amdgpu_device *adev = ddev->dev_private;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	if (adev->pp_force_state_enabled)
>   		return amdgpu_get_pp_cur_state(dev, attr, buf);
>   	else
> @@ -525,6 +546,9 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
>   	unsigned long idx;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	if (strlen(buf) == 1)
>   		adev->pp_force_state_enabled = false;
>   	else if (is_support_sw_smu(adev))
> @@ -580,6 +604,9 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
>   	char *table = NULL;
>   	int size, ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -619,6 +646,9 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
>   	struct amdgpu_device *adev = ddev->dev_private;
>   	int ret = 0;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -721,6 +751,9 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
>   	const char delimiter[3] = {' ', '\n', '\0'};
>   	uint32_t type;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	if (count > 127)
>   		return -EINVAL;
>   
> @@ -810,6 +843,9 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
>   	ssize_t size;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -859,6 +895,9 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
>   	uint64_t featuremask;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = kstrtou64(buf, 0, &featuremask);
>   	if (ret)
>   		return -EINVAL;
> @@ -899,6 +938,9 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
>   	ssize_t size;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -955,6 +997,9 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
>   	ssize_t size;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -1018,6 +1063,9 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
>   	int ret;
>   	uint32_t mask = 0;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = amdgpu_read_mask(buf, count, &mask);
>   	if (ret)
>   		return ret;
> @@ -1049,6 +1097,9 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
>   	ssize_t size;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -1076,6 +1127,9 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
>   	uint32_t mask = 0;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = amdgpu_read_mask(buf, count, &mask);
>   	if (ret)
>   		return ret;
> @@ -1107,6 +1161,9 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
>   	ssize_t size;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -1134,6 +1191,9 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
>   	int ret;
>   	uint32_t mask = 0;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = amdgpu_read_mask(buf, count, &mask);
>   	if (ret)
>   		return ret;
> @@ -1167,6 +1227,9 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
>   	ssize_t size;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -1194,6 +1257,9 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
>   	int ret;
>   	uint32_t mask = 0;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = amdgpu_read_mask(buf, count, &mask);
>   	if (ret)
>   		return ret;
> @@ -1227,6 +1293,9 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
>   	ssize_t size;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -1254,6 +1323,9 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
>   	int ret;
>   	uint32_t mask = 0;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = amdgpu_read_mask(buf, count, &mask);
>   	if (ret)
>   		return ret;
> @@ -1287,6 +1359,9 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
>   	ssize_t size;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -1314,6 +1389,9 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
>   	int ret;
>   	uint32_t mask = 0;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = amdgpu_read_mask(buf, count, &mask);
>   	if (ret)
>   		return ret;
> @@ -1347,6 +1425,9 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
>   	uint32_t value = 0;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -1372,6 +1453,9 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
>   	int ret;
>   	long int value;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = kstrtol(buf, 0, &value);
>   
>   	if (ret)
> @@ -1410,6 +1494,9 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
>   	uint32_t value = 0;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -1435,6 +1522,9 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
>   	int ret;
>   	long int value;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = kstrtol(buf, 0, &value);
>   
>   	if (ret)
> @@ -1493,6 +1583,9 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
>   	ssize_t size;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -1528,6 +1621,9 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
>   	long int profile_mode = 0;
>   	const char delimiter[3] = {' ', '\n', '\0'};
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	tmp[0] = *(buf);
>   	tmp[1] = '\0';
>   	ret = kstrtol(tmp, 0, &profile_mode);
> @@ -1587,6 +1683,9 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev,
>   	struct amdgpu_device *adev = ddev->dev_private;
>   	int r, value, size = sizeof(value);
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	r = pm_runtime_get_sync(ddev->dev);
>   	if (r < 0)
>   		return r;
> @@ -1620,6 +1719,9 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev,
>   	struct amdgpu_device *adev = ddev->dev_private;
>   	int r, value, size = sizeof(value);
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	r = pm_runtime_get_sync(ddev->dev);
>   	if (r < 0)
>   		return r;
> @@ -1658,6 +1760,9 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
>   	uint64_t count0 = 0, count1 = 0;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	if (adev->flags & AMD_IS_APU)
>   		return -ENODATA;
>   
> @@ -1694,6 +1799,9 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
>   	struct drm_device *ddev = dev_get_drvdata(dev);
>   	struct amdgpu_device *adev = ddev->dev_private;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	if (adev->unique_id)
>   		return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id);
>   
> @@ -1876,6 +1984,9 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
>   	int channel = to_sensor_dev_attr(attr)->index;
>   	int r, temp = 0, size = sizeof(temp);
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	if (channel >= PP_TEMP_MAX)
>   		return -EINVAL;
>   
> @@ -2007,6 +2118,9 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
>   	u32 pwm_mode = 0;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(adev->ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -2038,6 +2152,9 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
>   	int err, ret;
>   	int value;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	err = kstrtoint(buf, 10, &value);
>   	if (err)
>   		return err;
> @@ -2087,6 +2204,9 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
>   	u32 value;
>   	u32 pwm_mode;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	err = pm_runtime_get_sync(adev->ddev->dev);
>   	if (err < 0)
>   		return err;
> @@ -2136,6 +2256,9 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
>   	int err;
>   	u32 speed = 0;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	err = pm_runtime_get_sync(adev->ddev->dev);
>   	if (err < 0)
>   		return err;
> @@ -2166,6 +2289,9 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
>   	int err;
>   	u32 speed = 0;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	err = pm_runtime_get_sync(adev->ddev->dev);
>   	if (err < 0)
>   		return err;
> @@ -2195,6 +2321,9 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
>   	u32 size = sizeof(min_rpm);
>   	int r;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	r = pm_runtime_get_sync(adev->ddev->dev);
>   	if (r < 0)
>   		return r;
> @@ -2220,6 +2349,9 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
>   	u32 size = sizeof(max_rpm);
>   	int r;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	r = pm_runtime_get_sync(adev->ddev->dev);
>   	if (r < 0)
>   		return r;
> @@ -2244,6 +2376,9 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
>   	int err;
>   	u32 rpm = 0;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	err = pm_runtime_get_sync(adev->ddev->dev);
>   	if (err < 0)
>   		return err;
> @@ -2273,6 +2408,9 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
>   	u32 value;
>   	u32 pwm_mode;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	err = pm_runtime_get_sync(adev->ddev->dev);
>   	if (err < 0)
>   		return err;
> @@ -2319,6 +2457,9 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
>   	u32 pwm_mode = 0;
>   	int ret;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	ret = pm_runtime_get_sync(adev->ddev->dev);
>   	if (ret < 0)
>   		return ret;
> @@ -2351,6 +2492,9 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
>   	int value;
>   	u32 pwm_mode;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	err = kstrtoint(buf, 10, &value);
>   	if (err)
>   		return err;
> @@ -2391,6 +2535,9 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
>   	u32 vddgfx;
>   	int r, size = sizeof(vddgfx);
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	r = pm_runtime_get_sync(adev->ddev->dev);
>   	if (r < 0)
>   		return r;
> @@ -2423,6 +2570,9 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
>   	u32 vddnb;
>   	int r, size = sizeof(vddnb);
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	/* only APUs have vddnb */
>   	if  (!(adev->flags & AMD_IS_APU))
>   		return -EINVAL;
> @@ -2460,6 +2610,9 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
>   	int r, size = sizeof(u32);
>   	unsigned uw;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	r = pm_runtime_get_sync(adev->ddev->dev);
>   	if (r < 0)
>   		return r;
> @@ -2496,6 +2649,9 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
>   	ssize_t size;
>   	int r;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	r = pm_runtime_get_sync(adev->ddev->dev);
>   	if (r < 0)
>   		return r;
> @@ -2525,6 +2681,9 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
>   	ssize_t size;
>   	int r;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	r = pm_runtime_get_sync(adev->ddev->dev);
>   	if (r < 0)
>   		return r;
> @@ -2555,6 +2714,9 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
>   	int err;
>   	u32 value;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	if (amdgpu_sriov_vf(adev))
>   		return -EINVAL;
>   
> @@ -2593,6 +2755,9 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
>   	uint32_t sclk;
>   	int r, size = sizeof(sclk);
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	r = pm_runtime_get_sync(adev->ddev->dev);
>   	if (r < 0)
>   		return r;
> @@ -2625,6 +2790,9 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
>   	uint32_t mclk;
>   	int r, size = sizeof(mclk);
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	r = pm_runtime_get_sync(adev->ddev->dev);
>   	if (r < 0)
>   		return r;
> @@ -3484,6 +3652,9 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
>   	u32 flags = 0;
>   	int r;
>   
> +	if (adev->in_gpu_reset || adev->in_suspend)
> +		return -EPERM;
> +
>   	r = pm_runtime_get_sync(dev->dev);
>   	if (r < 0)
>   		return r;


More information about the amd-gfx mailing list