[PATCH v2 6/8] RFC drm/xe/pmu: Add PMU counters for engine busy ticks

Riana Tauro riana.tauro at intel.com
Thu Dec 7 12:58:00 UTC 2023


GuC provides engine busyness ticks as a 64 bit counter which count
as clock ticks. These counters are maintained in a
shared memory buffer and internally updated on a continuous basis.

GuC also provides a periodically total active ticks that GT has been
active for. These counter is exposed to the user such that busyness can
be calculated as a percentage using

busyness % = (engine active ticks/total active ticks) * 100.

Expose busy-ticks for each engine as a PMU counter.

This is listed by perf tool as

  sudo ./perf list
     xe_0000_03_00.0/bcs0-busy-ticks-gt0/               [Kernel PMU event]
     xe_0000_03_00.0/ccs0-busy-ticks-gt0/               [Kernel PMU event]
     xe_0000_03_00.0/rcs0-busy-ticks-gt0/               [Kernel PMU event]
     xe_0000_03_00.0/vcs0-busy-ticks-gt0/               [Kernel PMU event]
     xe_0000_03_00.0/vecs0-busy-ticks-gt0/              [Kernel PMU event]

and read as

  sudo ./perf stat -e xe_0000_03_00.0/bcs0-busy-ticks-gt0/  -I 1000
           time       counts unit       events
       1.000674178     2052       xe_0000_03_00.0/bcs0-busy-ticks-gt0/
       2.006626312     2033       xe_0000_03_00.0/bcs0-busy-ticks-gt0/
       3.009499300    40067       xe_0000_03_00.0/bcs0-busy-ticks-gt0/
       4.010521486     8491       xe_0000_03_00.0/bcs0-busy-ticks-gt0/

v2: rebase

Signed-off-by: Riana Tauro <riana.tauro at intel.com>
---
 drivers/gpu/drm/xe/xe_pmu.c | 152 ++++++++++++++++++++++++++++++++----
 1 file changed, 137 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index 855cd7b3edb3..fa926e17ef6a 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -13,6 +13,8 @@
 #include "xe_gt_clock.h"
 #include "xe_mmio.h"
 
+#define XE_ENGINE_SAMPLE_COUNT (DRM_XE_PMU_SAMPLE_BUSY_TICKS + 1)
+
 static cpumask_t xe_pmu_cpumask;
 static unsigned int xe_pmu_target_cpu = -1;
 
@@ -26,6 +28,35 @@ static u64 config_counter(const u64 config)
 	return config & ~(~0ULL << __DRM_XE_PMU_GT_SHIFT);
 }
 
+static u8 engine_event_sample(struct perf_event *event)
+{
+	u64 config = event->attr.config;
+
+	return (config - __DRM_XE_PMU_ENGINE_OFFSET(config_gt_id(config)))
+		& 0xf;
+}
+
+static u8 engine_event_class(struct perf_event *event)
+{
+	u64 config = event->attr.config;
+
+	return ((config - __DRM_XE_PMU_ENGINE_OFFSET(config_gt_id(config)))
+		>> __DRM_XE_PMU_CLASS_SHIFT) & 0xff;
+}
+
+static u8 engine_event_instance(struct perf_event *event)
+{
+	u64 config = event->attr.config;
+
+	return ((config - __DRM_XE_PMU_ENGINE_OFFSET(config_gt_id(config)))
+		>> __DRM_XE_PMU_SAMPLE_BITS) & 0xff;
+}
+
+static bool is_engine_event(struct perf_event *event)
+{
+	return config_counter(event->attr.config) >= __DRM_XE_PMU_ENGINE_OFFSET(0);
+}
+
 static void xe_pmu_event_destroy(struct perf_event *event)
 {
 	struct xe_device *xe =
@@ -134,6 +165,31 @@ config_status(struct xe_device *xe, u64 config)
 	return 0;
 }
 
+static int engine_event_status(struct xe_hw_engine *hwe,
+			       enum drm_xe_pmu_engine_sample sample)
+{
+	if (!hwe)
+		return -ENODEV;
+
+	/* Other engine events will be added, XE_ENGINE_SAMPLE_COUNT will be changed */
+	return (sample >= DRM_XE_PMU_SAMPLE_BUSY_TICKS && sample < XE_ENGINE_SAMPLE_COUNT)
+		? 0 : -ENOENT;
+}
+
+static int engine_event_init(struct perf_event *event)
+{
+	struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
+	const u64 config = event->attr.config;
+	const unsigned int gt_id = config_gt_id(config);
+	struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
+	struct xe_hw_engine *hwe;
+
+	hwe = xe_gt_hw_engine(gt, xe_hw_engine_from_user_class(engine_event_class(event)),
+			      engine_event_instance(event), true);
+
+	return engine_event_status(hwe, engine_event_sample(event));
+}
+
 static int xe_pmu_event_init(struct perf_event *event)
 {
 	struct xe_device *xe =
@@ -161,7 +217,10 @@ static int xe_pmu_event_init(struct perf_event *event)
 	if (!cpumask_test_cpu(event->cpu, &xe_pmu_cpumask))
 		return -EINVAL;
 
-	ret = config_status(xe, event->attr.config);
+	if (is_engine_event(event))
+		ret = engine_event_init(event);
+	else
+		ret = config_status(xe, event->attr.config);
 	if (ret)
 		return ret;
 
@@ -180,20 +239,35 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
 	const unsigned int gt_id = config_gt_id(event->attr.config);
 	const u64 config = event->attr.config;
 	struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
-	u64 val;
-
-	switch (config_counter(config)) {
-	case DRM_XE_PMU_RENDER_GROUP_BUSY(0):
-	case DRM_XE_PMU_COPY_GROUP_BUSY(0):
-	case DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
-	case DRM_XE_PMU_MEDIA_GROUP_BUSY(0):
-		val = engine_group_busyness_read(gt, config);
-		break;
-	case DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0):
-		val = xe_gt_total_active_ticks(gt);
-		break;
-	default:
-		drm_warn(&gt->tile->xe->drm, "unknown pmu event\n");
+	u64 val = 0;
+
+	if (is_engine_event(event)) {
+		u8 sample = engine_event_sample(event);
+		struct xe_hw_engine *hwe;
+
+		hwe = xe_gt_hw_engine(gt, xe_hw_engine_from_user_class(engine_event_class(event)),
+				      engine_event_instance(event), true);
+
+		if (!hwe)
+			drm_WARN_ON_ONCE(&xe->drm, "unknown engine\n");
+		else if (sample == DRM_XE_PMU_SAMPLE_BUSY_TICKS)
+			val = xe_gt_engine_busy_ticks(gt, hwe);
+		else
+			drm_warn(&xe->drm, "unknown pmu engine event\n");
+	} else {
+		switch (config_counter(config)) {
+		case DRM_XE_PMU_RENDER_GROUP_BUSY(0):
+		case DRM_XE_PMU_COPY_GROUP_BUSY(0):
+		case DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
+		case DRM_XE_PMU_MEDIA_GROUP_BUSY(0):
+			val = engine_group_busyness_read(gt, config);
+			break;
+		case DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0):
+			val = xe_gt_total_active_ticks(gt);
+			break;
+		default:
+			drm_warn(&gt->tile->xe->drm, "unknown pmu event\n");
+		}
 	}
 
 	return val;
@@ -324,6 +398,12 @@ static const struct attribute_group xe_pmu_cpumask_attr_group = {
 	.global = true, \
 }
 
+#define __engine_event(__sample, __name) \
+{ \
+	.sample = (__sample), \
+	.name = (__name), \
+}
+
 static struct xe_ext_attribute *
 add_xe_attr(struct xe_ext_attribute *attr, const char *name, u64 config)
 {
@@ -366,9 +446,18 @@ create_event_attributes(struct xe_pmu *pmu)
 		__event(4, "total-active-ticks", NULL),
 	};
 
+	static const struct {
+		enum drm_xe_pmu_engine_sample sample;
+		char *name;
+	} engine_events[] = {
+		__engine_event(DRM_XE_PMU_SAMPLE_BUSY_TICKS, "busy-ticks"),
+	};
+
 	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
 	struct xe_ext_attribute *xe_attr = NULL, *xe_iter;
 	struct attribute **attr = NULL, **attr_iter;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
 	unsigned int count = 0;
 	unsigned int i, j;
 	struct xe_gt *gt;
@@ -383,6 +472,15 @@ create_event_attributes(struct xe_pmu *pmu)
 		}
 	}
 
+	for_each_gt(gt, xe, j) {
+		for_each_hw_engine(hwe, gt, id) {
+			for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
+				if (!engine_event_status(hwe, engine_events[i].sample))
+					count++;
+			}
+		}
+	}
+
 	/* Allocate attribute objects and table. */
 	xe_attr = kcalloc(count, sizeof(*xe_attr), GFP_KERNEL);
 	if (!xe_attr)
@@ -437,6 +535,30 @@ create_event_attributes(struct xe_pmu *pmu)
 		}
 	}
 
+	/* Initialize supported engine counters */
+	for_each_gt(gt, xe, j) {
+		for_each_hw_engine(hwe, gt, id) {
+			for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
+				char *str;
+
+				if (engine_event_status(hwe, engine_events[i].sample))
+					continue;
+
+				str = kasprintf(GFP_KERNEL, "%s-%s-gt%u",
+						hwe->name, engine_events[i].name, j);
+
+				if (!str)
+					goto err;
+
+				*attr_iter++ = &xe_iter->attr.attr;
+				xe_iter = add_xe_attr(xe_iter, str,
+						      __DRM_XE_PMU_ENGINE(j, xe_hw_engine_to_user_class(hwe->class),
+									  hwe->logical_instance,
+									  engine_events[i].sample));
+			}
+		}
+	}
+
 	pmu->xe_attr = xe_attr;
 	pmu->pmu_attr = pmu_attr;
 
-- 
2.40.0



More information about the Intel-xe mailing list