[PATCH 1/2] drm/amdgpu: added a sysfs interface for thermal throttling
Quan, Evan
Evan.Quan at amd.com
Tue Feb 14 09:15:40 UTC 2023
[AMD Official Use Only - General]
> -----Original Message-----
> From: kunliu13 <Kun.Liu2 at amd.com>
> Sent: Tuesday, February 14, 2023 3:54 PM
> To: Limonciello, Mario <Mario.Limonciello at amd.com>; Liang, Richard qi
> <Richardqi.Liang at amd.com>; Yuan, Perry <Perry.Yuan at amd.com>; amd-
> gfx at lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Du, Xiaojian
> <Xiaojian.Du at amd.com>; Quan, Evan <Evan.Quan at amd.com>; Liu, Kun
> <Kun.Liu2 at amd.com>
> Subject: [PATCH 1/2] drm/amdgpu: added a sysfs interface for thermal
> throttling
>
> added a sysfs interface for thermal throttling, then userspace can get/update
> thermal limit
>
> Jira ID: SWDEV-354511
[Quan, Evan] Please drop this internal link. Other than this, the patch is Reviewed-by: Evan Quan <evan.quan at amd.com>
Evan
> Signed-off-by: Kun Liu <Kun.Liu2 at amd.com>
>
> Change-Id: I9948cb8966b731d2d74d7aad87cbcdc840dd34c8
> ---
> .../gpu/drm/amd/include/kgd_pp_interface.h | 2 +
> drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 28 +++++++
> drivers/gpu/drm/amd/pm/amdgpu_pm.c | 76
> +++++++++++++++++++
> drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 3 +
> drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 24 ++++++
> drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 12 +++
> 6 files changed, 145 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> index f3d64c78f..8394464ea 100644
> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> @@ -331,6 +331,8 @@ struct amd_pm_funcs {
> int (*get_mclk_od)(void *handle);
> int (*set_mclk_od)(void *handle, uint32_t value);
> int (*read_sensor)(void *handle, int idx, void *value, int *size);
> + int (*get_apu_thermal_limit)(void *handle, uint32_t *limit);
> + int (*set_apu_thermal_limit)(void *handle, uint32_t limit);
> enum amd_dpm_forced_level (*get_performance_level)(void
> *handle);
> enum amd_pm_state_type (*get_current_power_state)(void
> *handle);
> int (*get_fan_speed_rpm)(void *handle, uint32_t *rpm);
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> index 1b300c569..d9a9cf189 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> @@ -438,6 +438,34 @@ int amdgpu_dpm_read_sensor(struct
> amdgpu_device *adev, enum amd_pp_sensors senso
> return ret;
> }
>
> +int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device *adev,
> uint32_t *limit)
> +{
> + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
> + int ret = -EINVAL;
> +
> + if (pp_funcs && pp_funcs->get_apu_thermal_limit) {
> + mutex_lock(&adev->pm.mutex);
> + ret = pp_funcs->get_apu_thermal_limit(adev-
> >powerplay.pp_handle, limit);
> + mutex_unlock(&adev->pm.mutex);
> + }
> +
> + return ret;
> +}
> +
> +int amdgpu_dpm_set_apu_thermal_limit(struct amdgpu_device *adev,
> uint32_t limit)
> +{
> + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
> + int ret = -EINVAL;
> +
> + if (pp_funcs && pp_funcs->set_apu_thermal_limit) {
> + mutex_lock(&adev->pm.mutex);
> + ret = pp_funcs->set_apu_thermal_limit(adev-
> >powerplay.pp_handle, limit);
> + mutex_unlock(&adev->pm.mutex);
> + }
> +
> + return ret;
> +}
> +
> void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
> {
> const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> index 236657eec..99b249e55 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> @@ -1685,6 +1685,81 @@ static ssize_t
> amdgpu_set_thermal_throttling_logging(struct device *dev,
> return count;
> }
>
> +/**
> + * DOC: apu_thermal_cap
> + *
> + * The amdgpu driver provides a sysfs API for retrieving/updating thermal
> + * limit temperature in millidegrees Celsius
> + *
> + * Reading back the file shows you core limit value
> + *
> + * Writing an integer to the file, sets a new thermal limit. The value
> + * should be between 0 and 100. If the value is less than 0 or greater
> + * than 100, then the write request will be ignored.
> + */
> +static ssize_t amdgpu_get_apu_thermal_cap(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + int ret, size = 0;
> + u32 limit;
> + struct drm_device *ddev = dev_get_drvdata(dev);
> + struct amdgpu_device *adev = drm_to_adev(ddev);
> +
> + ret = pm_runtime_get_sync(ddev->dev);
> + if (ret < 0) {
> + pm_runtime_put_autosuspend(ddev->dev);
> + return size;
> + }
> +
> + ret = amdgpu_dpm_get_apu_thermal_limit(adev, &limit);
> + if (!ret)
> + size = sysfs_emit(buf, "%u\n", limit);
> + else
> + size = sysfs_emit(buf, "failed to get thermal limit\n");
> +
> + pm_runtime_mark_last_busy(ddev->dev);
> + pm_runtime_put_autosuspend(ddev->dev);
> +
> + return size;
> +}
> +
> +static ssize_t amdgpu_set_apu_thermal_cap(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf,
> + size_t count)
> +{
> + int ret;
> + u32 value;
> + struct drm_device *ddev = dev_get_drvdata(dev);
> + struct amdgpu_device *adev = drm_to_adev(ddev);
> +
> + ret = kstrtou32(buf, 10, &value);
> + if (ret)
> + return ret;
> +
> + if (value < 0 || value > 100) {
> + dev_err(dev, "Invalid argument !\n");
> + return count;
> + }
> +
> + ret = pm_runtime_get_sync(ddev->dev);
> + if (ret < 0) {
> + pm_runtime_put_autosuspend(ddev->dev);
> + return ret;
> + }
> +
> + ret = amdgpu_dpm_set_apu_thermal_limit(adev, value);
> + if (ret)
> + dev_err(dev, "failed to update thermal limit\n");
> +
> + pm_runtime_mark_last_busy(ddev->dev);
> + pm_runtime_put_autosuspend(ddev->dev);
> +
> + return count;
> +}
> +
> +
> /**
> * DOC: gpu_metrics
> *
> @@ -1937,6 +2012,7 @@ static struct amdgpu_device_attr
> amdgpu_device_attrs[] = {
> AMDGPU_DEVICE_ATTR_RW(pp_features,
> ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
> AMDGPU_DEVICE_ATTR_RO(unique_id,
> ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
> AMDGPU_DEVICE_ATTR_RW(thermal_throttling_logging,
> ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
> + AMDGPU_DEVICE_ATTR_RW(apu_thermal_cap,
> ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
> AMDGPU_DEVICE_ATTR_RO(gpu_metrics,
> ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
> AMDGPU_DEVICE_ATTR_RO(smartshift_apu_power,
> ATTR_FLAG_BASIC,
> .attr_update = ss_power_attr_update),
> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> index cb5b9df78..0cc379ea1 100644
> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> @@ -369,6 +369,9 @@ struct amdgpu_pm {
> int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum
> amd_pp_sensors sensor,
> void *data, uint32_t *size);
>
> +int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device *adev,
> uint32_t *limit);
> +int amdgpu_dpm_set_apu_thermal_limit(struct amdgpu_device *adev,
> uint32_t limit);
> +
> int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev,
> uint32_t block_type, bool gate);
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index 2fa79f892..b612fb6bd 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -2514,6 +2514,28 @@ static int smu_read_sensor(void *handle,
> return ret;
> }
>
> +static int smu_get_apu_thermal_limit(void *handle, uint32_t *limit)
> +{
> + int ret = -EINVAL;
> + struct smu_context *smu = handle;
> +
> + if (smu->ppt_funcs && smu->ppt_funcs->get_apu_thermal_limit)
> + ret = smu->ppt_funcs->get_apu_thermal_limit(smu, limit);
> +
> + return ret;
> +}
> +
> +static int smu_set_apu_thermal_limit(void *handle, uint32_t limit)
> +{
> + int ret = -EINVAL;
> + struct smu_context *smu = handle;
> +
> + if (smu->ppt_funcs && smu->ppt_funcs->set_apu_thermal_limit)
> + ret = smu->ppt_funcs->set_apu_thermal_limit(smu, limit);
> +
> + return ret;
> +}
> +
> static int smu_get_power_profile_mode(void *handle, char *buf)
> {
> struct smu_context *smu = handle;
> @@ -2998,6 +3020,8 @@ static const struct amd_pm_funcs
> swsmu_pm_funcs = {
> .emit_clock_levels = smu_emit_ppclk_levels,
> .force_performance_level = smu_force_performance_level,
> .read_sensor = smu_read_sensor,
> + .get_apu_thermal_limit = smu_get_apu_thermal_limit,
> + .set_apu_thermal_limit = smu_set_apu_thermal_limit,
> .get_performance_level = smu_get_performance_level,
> .get_current_power_state = smu_get_current_power_state,
> .get_fan_speed_rpm = smu_get_fan_speed_rpm,
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> index 3bc4128a2..378d3df4d 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> @@ -721,6 +721,18 @@ struct pptable_funcs {
> int (*read_sensor)(struct smu_context *smu, enum
> amd_pp_sensors sensor,
> void *data, uint32_t *size);
>
> + /**
> + * @get_apu_thermal_limit: get apu core limit from smu
> + * &limit: current limit temperature in millidegrees Celsius
> + */
> + int (*get_apu_thermal_limit)(struct smu_context *smu, uint32_t
> *limit);
> +
> + /**
> + * @set_apu_thermal_limit: update all controllers with new limit
> + * &limit: limit temperature to be setted, in millidegrees Celsius
> + */
> + int (*set_apu_thermal_limit)(struct smu_context *smu, uint32_t
> limit);
> +
> /**
> * @pre_display_config_changed: Prepare GPU for a display
> configuration
> * change.
> --
> 2.25.1
More information about the amd-gfx
mailing list