[PATCH v3 09/10] drm/xe/pmu: Add PMU support for per-engine-class activity
Riana Tauro
riana.tauro at intel.com
Mon Jan 6 07:55:58 UTC 2025
PMU provides two counters (engine-active-ticks, total-ticks)
to calculate engine acitivity. When querying engine busyness,
user must group these 2 counters using the perf_event
group mechanism to ensure both counters are sampled together.
To list the events
./perf list
xe_0000_03_00.0/engine-active-ticks/ [Kernel PMU event]
xe_0000_03_00.0/total-ticks/ [Kernel PMU event]
The formats to be used with the above are
engine_class - config:12-19
engine_instance - config:20-27
gt_id - config:60-63
The events can then be read using perf tool
./perf stat -e xe_0000_03_00.0/engine-active-ticks,gt_id=0,
engine_class=0,engine_instance=0/,
xe_0000_03_00.0/total-ticks,gt_id=0,
engine_class=0,engine_instance=0/ -I 1000
Engine activity can then be calculated as below
engine activity % = (engine active ticks/total ticks) * 100
Signed-off-by: Riana Tauro <riana.tauro at intel.com>
---
drivers/gpu/drm/xe/xe_guc.c | 5 ++
drivers/gpu/drm/xe/xe_pmu.c | 139 +++++++++++++++++++++++++-----
drivers/gpu/drm/xe/xe_pmu_types.h | 7 ++
drivers/gpu/drm/xe/xe_uc.c | 3 +
4 files changed, 131 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 408365dfe4ee..f229745b78b9 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -26,6 +26,7 @@
#include "xe_guc_capture.h"
#include "xe_guc_ct.h"
#include "xe_guc_db_mgr.h"
+#include "xe_guc_engine_activity.h"
#include "xe_guc_hwconfig.h"
#include "xe_guc_log.h"
#include "xe_guc_pc.h"
@@ -743,6 +744,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
if (ret)
return ret;
+ ret = xe_guc_engine_activity_init(guc);
+ if (ret)
+ return ret;
+
return xe_guc_ads_init_post_hwconfig(&guc->ads);
}
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index bae8eb38fddd..5bd312b6b8f6 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -12,7 +12,9 @@
#include "xe_force_wake.h"
#include "xe_gt_clock.h"
#include "xe_gt_idle.h"
+#include "xe_guc_engine_activity.h"
#include "xe_guc_pc.h"
+#include "xe_hw_engine.h"
#include "xe_mmio.h"
#include "xe_macros.h"
#include "xe_module.h"
@@ -90,6 +92,17 @@ static unsigned int xe_pmu_target_cpu = -1;
* 1950
* 1950
* 1950
+ *
+ * Engine Activity: PMU provides two counters (engine-active-ticks, total-ticks) to calculate
+ * engine activity. While querying the engine activity the user should group these two counters
+ * using the perf_event group mechanism to ensure both counters are sampled together.
+ *
+ * To read a engine specific event for a GT of class 1 and instance 0
+ *
+ * perf stat -e xe_0000_03_00.0/engine-active-ticks,gt_id=0,engine_class=1,engine_instance=0/,
+ * xe_0000_03_00.0/total-ticks,gt_id=0,engine_class=1,engine_instance=0/ -I 1000
+ *
+ * engine active % = (engine active ticks/total ticks) * 100
*/
static struct xe_pmu *event_to_pmu(struct perf_event *event)
@@ -107,6 +120,33 @@ static u64 config_counter(const u64 config)
return config & ~(~0ULL << __XE_PMU_GT_SHIFT);
}
+static u64 engine_event_sample(const u64 config)
+{
+ return config_counter(config) & 0xfff;
+}
+
+static u8 engine_event_class(const u64 config)
+{
+ return (config_counter(config) >> XE_PMU_CLASS_SHIFT) & 0xff;
+}
+
+static u8 engine_event_instance(const u64 config)
+{
+ return (config_counter(config) >> XE_PMU_INSTANCE_SHIFT) & 0xff;
+}
+
+static bool is_engine_event(struct xe_device *xe, const u64 config)
+{
+ const u64 gt_id = config >> __XE_PMU_GT_SHIFT;
+ struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
+ u64 sample = engine_event_sample(config);
+
+ if (!xe_guc_engine_activity_supported(>->uc.guc))
+ return false;
+
+ return ((sample == XE_PMU_ENGINE_ACTIVITY_TICKS) || (sample == XE_PMU_TOTAL_TICKS));
+}
+
static unsigned int pm_bit(const u64 config)
{
unsigned int val;
@@ -192,6 +232,23 @@ config_status(struct xe_device *xe, u64 config)
return 0;
}
+static int engine_event_init(struct xe_device *xe, u64 config)
+{
+ const unsigned int gt_id = config_gt_id(config);
+ struct drm_xe_engine_class_instance eci;
+ struct xe_hw_engine *hwe;
+
+ eci.engine_class = engine_event_class(config);
+ eci.engine_instance = engine_event_instance(config);
+ eci.gt_id = gt_id;
+
+ hwe = xe_hw_engine_lookup(xe, eci);
+ if (!hwe || xe_hw_engine_is_reserved(hwe))
+ return -ENOENT;
+
+ return 0;
+}
+
static int xe_pmu_event_init(struct perf_event *event)
{
struct xe_device *xe =
@@ -221,7 +278,12 @@ static int xe_pmu_event_init(struct perf_event *event)
return -EINVAL;
event_config = event->attr.config;
- ret = config_status(xe, event_config);
+
+ if (is_engine_event(xe, event_config))
+ ret = engine_event_init(xe, event_config);
+ else
+ ret = config_status(xe, event_config);
+
if (ret)
return ret;
@@ -300,24 +362,49 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
u64 val = 0;
- switch (config_counter(config)) {
- case XE_PMU_C6_RESIDENCY:
- val = get_c6(gt);
- break;
- case XE_PMU_ACTUAL_FREQUENCY:
- val =
- div_u64(read_sample(pmu, gt_id,
- __XE_SAMPLE_FREQ_ACT),
- USEC_PER_SEC /* to MHz */);
- break;
- case XE_PMU_REQUESTED_FREQUENCY:
- val =
- div_u64(read_sample(pmu, gt_id,
- __XE_SAMPLE_FREQ_REQ),
- USEC_PER_SEC /* to MHz */);
- break;
- default:
- drm_warn(>->tile->xe->drm, "unknown pmu event\n");
+ if (is_engine_event(xe, config)) {
+ struct drm_xe_engine_class_instance eci;
+ struct xe_hw_engine *hwe;
+ u64 sample = engine_event_sample(config);
+
+ eci.engine_class = engine_event_class(config);
+ eci.engine_instance = engine_event_instance(config);
+ eci.gt_id = gt_id;
+
+ hwe = xe_hw_engine_lookup(xe, eci);
+ if (!hwe)
+ drm_WARN_ON_ONCE(&xe->drm, "unknown engine\n");
+
+ if (xe_pm_runtime_suspended(xe))
+ return 0;
+
+ if (sample == XE_PMU_ENGINE_ACTIVITY_TICKS)
+ val = xe_guc_engine_activity_active_ticks(hwe);
+ else if (sample == XE_PMU_TOTAL_TICKS)
+ val = xe_guc_engine_activity_total_ticks(hwe);
+ else
+ drm_warn(&xe->drm, "unknown pmu engine event\n");
+
+ } else {
+ switch (config_counter(config)) {
+ case XE_PMU_C6_RESIDENCY:
+ val = get_c6(gt);
+ break;
+ case XE_PMU_ACTUAL_FREQUENCY:
+ val =
+ div_u64(read_sample(pmu, gt_id,
+ __XE_SAMPLE_FREQ_ACT),
+ USEC_PER_SEC /* to MHz */);
+ break;
+ case XE_PMU_REQUESTED_FREQUENCY:
+ val =
+ div_u64(read_sample(pmu, gt_id,
+ __XE_SAMPLE_FREQ_REQ),
+ USEC_PER_SEC /* to MHz */);
+ break;
+ default:
+ drm_warn(>->tile->xe->drm, "unknown pmu event\n");
+ }
}
return val;
@@ -614,11 +701,15 @@ struct xe_str_attribute {
const char *str;
};
-PMU_FORMAT_ATTR(event, "config:0-11");
-PMU_FORMAT_ATTR(gt_id, "config:60-63");
+PMU_FORMAT_ATTR(event, "config:0-11");
+PMU_FORMAT_ATTR(engine_instance, "config:12-19");
+PMU_FORMAT_ATTR(engine_class, "config:20-27");
+PMU_FORMAT_ATTR(gt_id, "config:60-63");
static struct attribute *xe_pmu_format_attrs[] = {
&format_attr_event.attr,
+ &format_attr_engine_class.attr,
+ &format_attr_engine_instance.attr,
&format_attr_gt_id.attr,
NULL,
};
@@ -703,6 +794,8 @@ create_event_attributes(struct xe_pmu *pmu)
__event(0, "c6-residency", "ms"),
__event(1, "actual-frequency", "M"),
__event(2, "requested-frequency", "M"),
+ __event(XE_PMU_ENGINE_ACTIVITY_TICKS, "engine-active-ticks", NULL),
+ __event(XE_PMU_TOTAL_TICKS, "total-ticks", NULL),
};
struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
@@ -715,7 +808,7 @@ create_event_attributes(struct xe_pmu *pmu)
for (i = 0; i < ARRAY_SIZE(events); i++) {
u64 config = __XE_PMU_EVENT(events[i].counter);
- if (!config_status(xe, config))
+ if (!config_status(xe, config) || is_engine_event(xe, config))
count++;
}
@@ -741,7 +834,7 @@ create_event_attributes(struct xe_pmu *pmu)
u64 config = __XE_PMU_EVENT(events[i].counter);
char *str;
- if (config_status(xe, config))
+ if (config_status(xe, config) && !is_engine_event(xe, config))
continue;
str = kasprintf(GFP_KERNEL, "%s",
diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/xe_pmu_types.h
index 0109bb11937b..bb27d0950b72 100644
--- a/drivers/gpu/drm/xe/xe_pmu_types.h
+++ b/drivers/gpu/drm/xe/xe_pmu_types.h
@@ -19,6 +19,11 @@ enum {
#define XE_PMU_MAX_GT 2
+/* First 8 bits of config are reserved for other counters */
+#define XE_PMU_INSTANCE_SHIFT (12)
+#define XE_PMU_INSTANCE_BITS (8)
+#define XE_PMU_CLASS_SHIFT \
+ (XE_PMU_INSTANCE_SHIFT + XE_PMU_INSTANCE_BITS)
/*
* Top bits of every counter are GT id.
*/
@@ -32,6 +37,8 @@ enum {
#define XE_PMU_C6_RESIDENCY __XE_PMU_EVENT(0)
#define XE_PMU_ACTUAL_FREQUENCY __XE_PMU_EVENT(1)
#define XE_PMU_REQUESTED_FREQUENCY __XE_PMU_EVENT(2)
+#define XE_PMU_ENGINE_ACTIVITY_TICKS __XE_PMU_EVENT(3)
+#define XE_PMU_TOTAL_TICKS __XE_PMU_EVENT(4)
#define __XE_PMU_C6_RESIDENCY(gt) ___XE_PMU_EVENT(gt, 0)
#define __XE_PMU_ACTUAL_FREQUENCY(gt) ___XE_PMU_EVENT(gt, 1)
#define __XE_PMU_REQUESTED_FREQUENCY(gt) ___XE_PMU_EVENT(gt, 2)
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 0d073a9987c2..d129bdeaed57 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -13,6 +13,7 @@
#include "xe_gt_printk.h"
#include "xe_gt_sriov_vf.h"
#include "xe_guc.h"
+#include "xe_guc_engine_activity.h"
#include "xe_guc_pc.h"
#include "xe_huc.h"
#include "xe_sriov.h"
@@ -210,6 +211,8 @@ int xe_uc_init_hw(struct xe_uc *uc)
if (ret)
return ret;
+ xe_guc_engine_activity_enable_stats(&uc->guc);
+
/* We don't fail the driver load if HuC fails to auth, but let's warn */
ret = xe_huc_auth(&uc->huc, XE_HUC_AUTH_VIA_GUC);
xe_gt_assert(uc_to_gt(uc), !ret);
--
2.47.1
More information about the Intel-xe
mailing list