[PATCH 4/4] drm/amdgpu: added a sysfs interface for thermal throttling related V3
Evan Quan
evan.quan at amd.com
Thu May 28 03:04:44 UTC 2020
User can check and set the enablement of throttling logging and
the interval between each logging.
V2: simplify the sysfs interface(no string parsing)
V3: add proper lock protection on updating throttling_logging_rs.interval
Change-Id: Id37710e1e7fe0aaf7cc858a554db60e583be611d
Signed-off-by: Evan Quan <evan.quan at amd.com>
Reviewed-by: Christian König <christian.koenig at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++++
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 71 ++++++++++++++++++++++
drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 10 ++-
4 files changed, 89 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 894fe58276ae..6e74ff9990e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -996,6 +996,9 @@ struct amdgpu_device {
char serial[16];
struct amdgpu_autodump autodump;
+
+ atomic_t throttling_logging_enabled;
+ struct ratelimit_state throttling_logging_rs;
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2f0e8da7bacf..f1bfdba8bd6e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3035,6 +3035,17 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->gfx.gfx_off_req_count = 1;
adev->pm.ac_power = power_supply_is_system_supplied() > 0;
+ atomic_set(&adev->throttling_logging_enabled, 1);
+ /*
+ * if the throttling continues, the logging will be performed every
+ * minute to avoid log flooding. "-1" applied here due to the thermal
+ * throttling interrupt comes every 1 second. So, the total logging
+ * interval is 59 seconds(retelimited printk interval) + 1(waiting
+ * for throttling interrup) = 60 seconds.
+ */
+ ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
+ ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
+
/* Registers mapping */
/* TODO: block userspace mapping of io register */
if (adev->asic_type >= CHIP_BONAIRE) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 808884aaf36d..37fc54c7f586 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -1808,6 +1808,76 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
return 0;
}
+/**
+ * DOC: thermal_throttling_logging
+ *
+ * Thermal throttling will pull down the clock frequency and thus the performance.
+ * It's an useful mechanism to protect the chip from overheat. And due to performace
+ * impact, it will be good to prompt(log) user those thermal throttling events.
+ *
+ * Reading back the file will show you the status(enabled or disabled) of the thermal
+ * throttling logging and the current interval(in seconds) between each logging.
+ *
+ * By writing the file with a new logging "interval", you can manually adjust these
+ * settings. The "interval" specifies the interval(in seconds) between each logging.
+ * With "interval > 0 && interval <= 3600", the logging interval will be updated
+ * on request. And the logging feature will be reenabled if it was disabled.
+ * With "interval <= 0", the thermal throttling logging feature will be disabled.
+ * And for other setting("interval > 3600"), it is invalid and will be ignored
+ * by driver.
+ */
+static ssize_t amdgpu_get_thermal_throttling_logging(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+
+ return snprintf(buf, PAGE_SIZE, "%s: thermal throttling logging %s, with interval %d seconds\n",
+ adev->ddev->unique,
+ atomic_read(&adev->throttling_logging_enabled) ? "enabled" : "disabled",
+ adev->throttling_logging_rs.interval / HZ + 1);
+}
+
+static ssize_t amdgpu_set_thermal_throttling_logging(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ long throttling_logging_interval;
+ unsigned long flags;
+ int ret = 0;
+
+ ret = kstrtol(buf, 0, &throttling_logging_interval);
+ if (ret)
+ return ret;
+
+ if (throttling_logging_interval > 3600)
+ return -EINVAL;
+
+ if (throttling_logging_interval > 0) {
+ raw_spin_lock_irqsave(&adev->throttling_logging_rs.lock, flags);
+ /*
+ * Reset the ratelimit timer internals.
+ * This can effectively restart the timer.
+ */
+ adev->throttling_logging_rs.interval =
+ (throttling_logging_interval - 1) * HZ;
+ adev->throttling_logging_rs.begin = 0;
+ adev->throttling_logging_rs.printed = 0;
+ adev->throttling_logging_rs.missed = 0;
+ raw_spin_unlock_irqrestore(&adev->throttling_logging_rs.lock, flags);
+
+ atomic_set(&adev->throttling_logging_enabled, 1);
+ } else {
+ atomic_set(&adev->throttling_logging_enabled, 0);
+ }
+
+ return count;
+}
+
static struct amdgpu_device_attr amdgpu_device_attrs[] = {
AMDGPU_DEVICE_ATTR_RW(power_dpm_state, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
@@ -1830,6 +1900,7 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = {
AMDGPU_DEVICE_ATTR_RO(pcie_bw, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(pp_features, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RO(unique_id, ATTR_FLAG_BASIC),
+ AMDGPU_DEVICE_ATTR_RW(thermal_throttling_logging, ATTR_FLAG_BASIC),
};
static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index 7a2e855608de..edc9782743d2 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -1533,11 +1533,6 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev,
*/
uint32_t ctxid = entry->src_data[0];
uint32_t data;
- /*
- * if the throttling continues, the logging will be performed every
- * minute to avoid log flooding.
- */
- static DEFINE_RATELIMIT_STATE(ratelimit_state, 60 * HZ, 1);
if (client_id == SOC15_IH_CLIENTID_THM) {
switch (src_id) {
@@ -1582,7 +1577,10 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev,
smu_v11_0_ack_ac_dc_interrupt(&adev->smu);
break;
case 0x7:
- if (__ratelimit(&ratelimit_state))
+ if (!atomic_read(&adev->throttling_logging_enabled))
+ return 0;
+
+ if (__ratelimit(&adev->throttling_logging_rs))
smu_log_thermal_throttling(smu);
break;
--
2.26.2
More information about the amd-gfx
mailing list