[PATCH 2/8] drm/amd/powerplay: add new sysfs interface for retrieving gpu metrics(V2)
Evan Quan
evan.quan at amd.com
Thu Jul 30 03:28:33 UTC 2020
A new interface for UMD to retrieve gpu metrics data.
V2: rich the documentation
Change-Id: If7f3523915505c0ece0a56dfd476d2b8473440d4
Signed-off-by: Evan Quan <evan.quan at amd.com>
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
---
Documentation/gpu/amdgpu.rst | 6 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h | 3 +
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 57 +++++++++++++++++++
.../gpu/drm/amd/include/kgd_pp_interface.h | 1 +
drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 20 +++++++
.../gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 3 +
6 files changed, 90 insertions(+)
diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
index 17112352f605..0f7679a7cf54 100644
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@@ -206,6 +206,12 @@ pp_power_profile_mode
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
:doc: mem_busy_percent
+gpu_metrics
+~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+ :doc: gpu_metrics
+
GPU Product Information
=======================
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index aa27fe65cdfa..b190c0af7db1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -369,6 +369,9 @@ enum amdgpu_pcie_gen {
((adev)->powerplay.pp_funcs->set_ppfeature_status(\
(adev)->powerplay.pp_handle, (ppfeatures)))
+#define amdgpu_dpm_get_gpu_metrics(adev, table) \
+ ((adev)->powerplay.pp_funcs->get_gpu_metrics((adev)->powerplay.pp_handle, table))
+
struct amdgpu_dpm {
struct amdgpu_ps *ps;
/* number of valid power states */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 6df405e6221d..0198acd320b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -2122,6 +2122,59 @@ static ssize_t amdgpu_set_thermal_throttling_logging(struct device *dev,
return count;
}
+/**
+ * DOC: gpu_metrics
+ *
+ * The amdgpu driver provides a sysfs API for retrieving current gpu
+ * metrics data. The file gpu_metrics is used for this. Reading the
+ * file will dump all the current gpu metrics data.
+ *
+ * These data include temperature, frequency, engines utilization,
+ * power consume, throttler status, fan speed and cpu core statistics(
+ * available for APU only). That's it will give a snapshot of all sensors
+ * at the same time.
+ */
+static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ void *gpu_metrics;
+ ssize_t size = 0;
+ int ret;
+
+ if (amdgpu_in_reset(adev))
+ return -EPERM;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
+ return ret;
+ }
+
+ down_read(&adev->reset_sem);
+ if (is_support_sw_smu(adev))
+ size = smu_sys_get_gpu_metrics(&adev->smu, &gpu_metrics);
+ else if (adev->powerplay.pp_funcs->get_gpu_metrics)
+ size = amdgpu_dpm_get_gpu_metrics(adev, &gpu_metrics);
+ up_read(&adev->reset_sem);
+
+ if (size <= 0)
+ goto out;
+
+ if (size >= PAGE_SIZE)
+ size = PAGE_SIZE - 1;
+
+ memcpy(buf, gpu_metrics, size);
+
+out:
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return size;
+}
+
static struct amdgpu_device_attr amdgpu_device_attrs[] = {
AMDGPU_DEVICE_ATTR_RW(power_dpm_state, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level, ATTR_FLAG_BASIC),
@@ -2145,6 +2198,7 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = {
AMDGPU_DEVICE_ATTR_RW(pp_features, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RO(unique_id, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(thermal_throttling_logging, ATTR_FLAG_BASIC),
+ AMDGPU_DEVICE_ATTR_RO(gpu_metrics, ATTR_FLAG_BASIC),
};
static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,
@@ -2194,6 +2248,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
} else if (DEVICE_ATTR_IS(pp_features)) {
if (adev->flags & AMD_IS_APU || asic_type < CHIP_VEGA10)
*states = ATTR_STATE_UNSUPPORTED;
+ } else if (DEVICE_ATTR_IS(gpu_metrics)) {
+ if (asic_type < CHIP_VEGA12)
+ *states = ATTR_STATE_UNSUPPORTED;
}
if (asic_type == CHIP_ARCTURUS) {
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 5122f8e5436a..1900cfa20601 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -322,6 +322,7 @@ struct amd_pm_funcs {
int (*asic_reset_mode_2)(void *handle);
int (*set_df_cstate)(void *handle, enum pp_df_cstate state);
int (*set_xgmi_pstate)(void *handle, uint32_t pstate);
+ ssize_t (*get_gpu_metrics)(void *handle, void **table);
};
struct metrics_table_header {
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 55463e7a11e2..cf9c5205ef08 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -2516,3 +2516,23 @@ int smu_get_dpm_clock_table(struct smu_context *smu,
return ret;
}
+
+ssize_t smu_sys_get_gpu_metrics(struct smu_context *smu,
+ void **table)
+{
+ ssize_t size;
+
+ if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+ return -EOPNOTSUPP;
+
+ if (!smu->ppt_funcs->get_gpu_metrics)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&smu->mutex);
+
+ size = smu->ppt_funcs->get_gpu_metrics(smu, table);
+
+ mutex_unlock(&smu->mutex);
+
+ return size;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index b57b10406390..a08155b83289 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -589,6 +589,7 @@ struct pptable_funcs {
void (*log_thermal_throttling_event)(struct smu_context *smu);
size_t (*get_pp_feature_mask)(struct smu_context *smu, char *buf);
int (*set_pp_feature_mask)(struct smu_context *smu, uint64_t new_mask);
+ ssize_t (*get_gpu_metrics)(struct smu_context *smu, void **table);
};
typedef enum {
@@ -791,5 +792,7 @@ int smu_get_dpm_clock_table(struct smu_context *smu,
int smu_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value);
+ssize_t smu_sys_get_gpu_metrics(struct smu_context *smu, void **table);
+
#endif
#endif
--
2.28.0
More information about the amd-gfx
mailing list