[PATCH v2 6/8] RFC drm/xe/pmu: Add PMU counters for engine busy ticks
Riana Tauro
riana.tauro at intel.com
Thu Dec 7 12:58:00 UTC 2023
GuC provides engine busyness ticks as a 64 bit counter which count
as clock ticks. These counters are maintained in a
shared memory buffer and internally updated on a continuous basis.
GuC also provides a periodically total active ticks that GT has been
active for. These counter is exposed to the user such that busyness can
be calculated as a percentage using
busyness % = (engine active ticks/total active ticks) * 100.
Expose busy-ticks for each engine as a PMU counter.
This is listed by perf tool as
sudo ./perf list
xe_0000_03_00.0/bcs0-busy-ticks-gt0/ [Kernel PMU event]
xe_0000_03_00.0/ccs0-busy-ticks-gt0/ [Kernel PMU event]
xe_0000_03_00.0/rcs0-busy-ticks-gt0/ [Kernel PMU event]
xe_0000_03_00.0/vcs0-busy-ticks-gt0/ [Kernel PMU event]
xe_0000_03_00.0/vecs0-busy-ticks-gt0/ [Kernel PMU event]
and read as
sudo ./perf stat -e xe_0000_03_00.0/bcs0-busy-ticks-gt0/ -I 1000
time counts unit events
1.000674178 2052 xe_0000_03_00.0/bcs0-busy-ticks-gt0/
2.006626312 2033 xe_0000_03_00.0/bcs0-busy-ticks-gt0/
3.009499300 40067 xe_0000_03_00.0/bcs0-busy-ticks-gt0/
4.010521486 8491 xe_0000_03_00.0/bcs0-busy-ticks-gt0/
v2: rebase
Signed-off-by: Riana Tauro <riana.tauro at intel.com>
---
drivers/gpu/drm/xe/xe_pmu.c | 152 ++++++++++++++++++++++++++++++++----
1 file changed, 137 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index 855cd7b3edb3..fa926e17ef6a 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -13,6 +13,8 @@
#include "xe_gt_clock.h"
#include "xe_mmio.h"
+#define XE_ENGINE_SAMPLE_COUNT (DRM_XE_PMU_SAMPLE_BUSY_TICKS + 1)
+
static cpumask_t xe_pmu_cpumask;
static unsigned int xe_pmu_target_cpu = -1;
@@ -26,6 +28,35 @@ static u64 config_counter(const u64 config)
return config & ~(~0ULL << __DRM_XE_PMU_GT_SHIFT);
}
+static u8 engine_event_sample(struct perf_event *event)
+{
+ u64 config = event->attr.config;
+
+ return (config - __DRM_XE_PMU_ENGINE_OFFSET(config_gt_id(config)))
+ & 0xf;
+}
+
+static u8 engine_event_class(struct perf_event *event)
+{
+ u64 config = event->attr.config;
+
+ return ((config - __DRM_XE_PMU_ENGINE_OFFSET(config_gt_id(config)))
+ >> __DRM_XE_PMU_CLASS_SHIFT) & 0xff;
+}
+
+static u8 engine_event_instance(struct perf_event *event)
+{
+ u64 config = event->attr.config;
+
+ return ((config - __DRM_XE_PMU_ENGINE_OFFSET(config_gt_id(config)))
+ >> __DRM_XE_PMU_SAMPLE_BITS) & 0xff;
+}
+
+static bool is_engine_event(struct perf_event *event)
+{
+ return config_counter(event->attr.config) >= __DRM_XE_PMU_ENGINE_OFFSET(0);
+}
+
static void xe_pmu_event_destroy(struct perf_event *event)
{
struct xe_device *xe =
@@ -134,6 +165,31 @@ config_status(struct xe_device *xe, u64 config)
return 0;
}
+static int engine_event_status(struct xe_hw_engine *hwe,
+ enum drm_xe_pmu_engine_sample sample)
+{
+ if (!hwe)
+ return -ENODEV;
+
+ /* Other engine events will be added, XE_ENGINE_SAMPLE_COUNT will be changed */
+ return (sample >= DRM_XE_PMU_SAMPLE_BUSY_TICKS && sample < XE_ENGINE_SAMPLE_COUNT)
+ ? 0 : -ENOENT;
+}
+
+static int engine_event_init(struct perf_event *event)
+{
+ struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
+ const u64 config = event->attr.config;
+ const unsigned int gt_id = config_gt_id(config);
+ struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
+ struct xe_hw_engine *hwe;
+
+ hwe = xe_gt_hw_engine(gt, xe_hw_engine_from_user_class(engine_event_class(event)),
+ engine_event_instance(event), true);
+
+ return engine_event_status(hwe, engine_event_sample(event));
+}
+
static int xe_pmu_event_init(struct perf_event *event)
{
struct xe_device *xe =
@@ -161,7 +217,10 @@ static int xe_pmu_event_init(struct perf_event *event)
if (!cpumask_test_cpu(event->cpu, &xe_pmu_cpumask))
return -EINVAL;
- ret = config_status(xe, event->attr.config);
+ if (is_engine_event(event))
+ ret = engine_event_init(event);
+ else
+ ret = config_status(xe, event->attr.config);
if (ret)
return ret;
@@ -180,20 +239,35 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
const unsigned int gt_id = config_gt_id(event->attr.config);
const u64 config = event->attr.config;
struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
- u64 val;
-
- switch (config_counter(config)) {
- case DRM_XE_PMU_RENDER_GROUP_BUSY(0):
- case DRM_XE_PMU_COPY_GROUP_BUSY(0):
- case DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
- case DRM_XE_PMU_MEDIA_GROUP_BUSY(0):
- val = engine_group_busyness_read(gt, config);
- break;
- case DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0):
- val = xe_gt_total_active_ticks(gt);
- break;
- default:
- drm_warn(>->tile->xe->drm, "unknown pmu event\n");
+ u64 val = 0;
+
+ if (is_engine_event(event)) {
+ u8 sample = engine_event_sample(event);
+ struct xe_hw_engine *hwe;
+
+ hwe = xe_gt_hw_engine(gt, xe_hw_engine_from_user_class(engine_event_class(event)),
+ engine_event_instance(event), true);
+
+ if (!hwe)
+ drm_WARN_ON_ONCE(&xe->drm, "unknown engine\n");
+ else if (sample == DRM_XE_PMU_SAMPLE_BUSY_TICKS)
+ val = xe_gt_engine_busy_ticks(gt, hwe);
+ else
+ drm_warn(&xe->drm, "unknown pmu engine event\n");
+ } else {
+ switch (config_counter(config)) {
+ case DRM_XE_PMU_RENDER_GROUP_BUSY(0):
+ case DRM_XE_PMU_COPY_GROUP_BUSY(0):
+ case DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
+ case DRM_XE_PMU_MEDIA_GROUP_BUSY(0):
+ val = engine_group_busyness_read(gt, config);
+ break;
+ case DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0):
+ val = xe_gt_total_active_ticks(gt);
+ break;
+ default:
+ drm_warn(>->tile->xe->drm, "unknown pmu event\n");
+ }
}
return val;
@@ -324,6 +398,12 @@ static const struct attribute_group xe_pmu_cpumask_attr_group = {
.global = true, \
}
+#define __engine_event(__sample, __name) \
+{ \
+ .sample = (__sample), \
+ .name = (__name), \
+}
+
static struct xe_ext_attribute *
add_xe_attr(struct xe_ext_attribute *attr, const char *name, u64 config)
{
@@ -366,9 +446,18 @@ create_event_attributes(struct xe_pmu *pmu)
__event(4, "total-active-ticks", NULL),
};
+ static const struct {
+ enum drm_xe_pmu_engine_sample sample;
+ char *name;
+ } engine_events[] = {
+ __engine_event(DRM_XE_PMU_SAMPLE_BUSY_TICKS, "busy-ticks"),
+ };
+
struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
struct xe_ext_attribute *xe_attr = NULL, *xe_iter;
struct attribute **attr = NULL, **attr_iter;
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
unsigned int count = 0;
unsigned int i, j;
struct xe_gt *gt;
@@ -383,6 +472,15 @@ create_event_attributes(struct xe_pmu *pmu)
}
}
+ for_each_gt(gt, xe, j) {
+ for_each_hw_engine(hwe, gt, id) {
+ for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
+ if (!engine_event_status(hwe, engine_events[i].sample))
+ count++;
+ }
+ }
+ }
+
/* Allocate attribute objects and table. */
xe_attr = kcalloc(count, sizeof(*xe_attr), GFP_KERNEL);
if (!xe_attr)
@@ -437,6 +535,30 @@ create_event_attributes(struct xe_pmu *pmu)
}
}
+ /* Initialize supported engine counters */
+ for_each_gt(gt, xe, j) {
+ for_each_hw_engine(hwe, gt, id) {
+ for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
+ char *str;
+
+ if (engine_event_status(hwe, engine_events[i].sample))
+ continue;
+
+ str = kasprintf(GFP_KERNEL, "%s-%s-gt%u",
+ hwe->name, engine_events[i].name, j);
+
+ if (!str)
+ goto err;
+
+ *attr_iter++ = &xe_iter->attr.attr;
+ xe_iter = add_xe_attr(xe_iter, str,
+ __DRM_XE_PMU_ENGINE(j, xe_hw_engine_to_user_class(hwe->class),
+ hwe->logical_instance,
+ engine_events[i].sample));
+ }
+ }
+ }
+
pmu->xe_attr = xe_attr;
pmu->pmu_attr = pmu_attr;
--
2.40.0
More information about the Intel-xe
mailing list