[PATCH v3 2/3] drm/amd/pm: Add gpu_metrics_v1_4
Asad Kamal
asad.kamal at amd.com
Mon Oct 9 14:53:14 UTC 2023
Add new gpu_metrics_v1_4 to acquire XGMI data transfer,
pcie bandwidth & Clock lock status
v2:
Add pcie error counter to gpu metric table v1_4
Signed-off-by: Asad Kamal <asad.kamal at amd.com>
---
.../gpu/drm/amd/include/kgd_pp_interface.h | 76 +++++++++++++++++++
drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 3 +
2 files changed, 79 insertions(+)
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index e0bb6d39f0c3..eeb547d79eac 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -313,6 +313,10 @@ enum pp_xgmi_plpd_mode {
#define XGMI_MODE_PSTATE_D0 1
#define NUM_HBM_INSTANCES 4
+#define NUM_XGMI_LINKS 8
+#define MAX_GFX_CLKS 8
+#define MAX_CLKS 4
+#define NUM_VCN 4
struct seq_file;
enum amd_pp_clock_type;
@@ -696,6 +700,78 @@ struct gpu_metrics_v1_3 {
uint64_t indep_throttle_status;
};
+struct gpu_metrics_v1_4 {
+ struct metrics_table_header common_header;
+
+ /* Temperature (Celsius) */
+ uint16_t temperature_hotspot;
+ uint16_t temperature_mem;
+ uint16_t temperature_vrsoc;
+
+ /* Power (Watts) */
+ uint16_t curr_socket_power;
+
+ /* Utilization (%) */
+ uint16_t average_gfx_activity;
+ uint16_t average_umc_activity; // memory controller
+ uint16_t vcn_activity[NUM_VCN];
+
+ /* Energy (15.259uJ (2^-16) units) */
+ uint64_t energy_accumulator;
+
+ /* Driver attached timestamp (in ns) */
+ uint64_t system_clock_counter;
+
+ /* Throttle status */
+ uint32_t throttle_status;
+
+ /* Clock Lock Status. Each bit corresponds to clock instance */
+ uint32_t gfxclk_lock_status;
+
+ /* Link width (number of lanes) and speed (in 0.1 GT/s) */
+ uint16_t pcie_link_width;
+ uint16_t pcie_link_speed;
+
+ /* XGMI bus width and bitrate (in Gbps) */
+ uint16_t xgmi_link_width;
+ uint16_t xgmi_link_speed;
+
+ /* Utilization Accumulated (%) */
+ uint32_t gfx_activity_acc;
+ uint32_t mem_activity_acc;
+
+ /*PCIE accumulated bandwidth (GB/sec) */
+ uint64_t pcie_bandwidth_acc;
+
+ /*PCIE instantaneous bandwidth (GB/sec) */
+ uint64_t pcie_bandwidth_inst;
+
+ /* PCIE L0 to recovery state transition accumulated count */
+ uint64_t pcie_l0_to_recov_count_acc;
+
+ /* PCIE replay accumulated count */
+ uint64_t pcie_replay_count_acc;
+
+ /* PCIE replay rollover accumulated count */
+ uint64_t pcie_replay_rover_count_acc;
+
+ /* XGMI accumulated data transfer size(KiloBytes) */
+ uint64_t xgmi_read_data_acc[NUM_XGMI_LINKS];
+ uint64_t xgmi_write_data_acc[NUM_XGMI_LINKS];
+
+ /* PMFW attached timestamp (10ns resolution) */
+ uint64_t firmware_timestamp;
+
+ /* Current clocks (Mhz) */
+ uint16_t current_gfxclk[MAX_GFX_CLKS];
+ uint16_t current_socclk[MAX_CLKS];
+ uint16_t current_vclk0[MAX_CLKS];
+ uint16_t current_dclk0[MAX_CLKS];
+ uint16_t current_uclk;
+
+ uint16_t padding;
+};
+
/*
* gpu_metrics_v2_0 is not recommended as it's not naturally aligned.
* Use gpu_metrics_v2_1 or later instead.
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 12618a583e97..6e57c94379a9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -986,6 +986,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev)
case METRICS_VERSION(1, 3):
structure_size = sizeof(struct gpu_metrics_v1_3);
break;
+ case METRICS_VERSION(1, 4):
+ structure_size = sizeof(struct gpu_metrics_v1_4);
+ break;
case METRICS_VERSION(2, 0):
structure_size = sizeof(struct gpu_metrics_v2_0);
break;
--
2.42.0
More information about the amd-gfx
mailing list