[PATCH v2 4/8] RFC drm/xe/guc: Add PMU counter for total active ticks
Riana Tauro
riana.tauro at intel.com
Thu Dec 7 12:57:58 UTC 2023
GuC provides engine busyness ticks as a 64 bit counter which count
as clock ticks. These counters are maintained in a
shared memory buffer and internally updated on a continuous basis.
GuC also provides a periodically total active ticks that GT has been
active for. This counter is exposed to the user such that busyness can
be calculated as a percentage using
busyness % = (engine active ticks/total active ticks) * 100.
This patch provides a pmu counter for total active ticks.
This is listed by perf tool as
sudo ./perf list
xe_0000_03_00.0/total-active-ticks-gt0/ [Kernel PMU event]
and can be read using
sudo ./perf stat -e xe_0000_03_00.0/total-active-ticks-gt0/ -I 1000
time counts unit events
1.001332764 58942964 xe_0000_03_00.0/total-active-ticks-gt0/
2.011421147 21191869 xe_0000_03_00.0/total-active-ticks-gt0/
3.013223865 19269012 xe_0000_03_00.0/total-active-ticks-gt0/
Signed-off-by: Riana Tauro <riana.tauro at intel.com>
---
drivers/gpu/drm/xe/xe_gt.c | 11 +++
drivers/gpu/drm/xe/xe_gt.h | 1 +
drivers/gpu/drm/xe/xe_guc_engine_busyness.c | 75 +++++++++++++++++----
drivers/gpu/drm/xe/xe_guc_engine_busyness.h | 1 +
drivers/gpu/drm/xe/xe_pmu.c | 7 ++
include/uapi/drm/xe_drm.h | 1 +
6 files changed, 83 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 3d735b66f60d..07a94d315715 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -785,6 +785,17 @@ struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt,
return NULL;
}
+/**
+ * xe_gt_total_active_ticks - Return total active ticks
+ * @gt: GT structure
+ *
+ * Returns total active ticks that the GT was active for.
+ */
+u64 xe_gt_total_active_ticks(struct xe_gt *gt)
+{
+ return xe_guc_engine_busyness_active_ticks(>->uc.guc);
+}
+
/**
* xe_gt_engine_busy_ticks - Return current accumulated engine busyness ticks
* @gt: GT structure
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 2e3cd7031287..c2d49275cd31 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -43,6 +43,7 @@ void xe_gt_reset_async(struct xe_gt *gt);
void xe_gt_sanitize(struct xe_gt *gt);
u64 xe_gt_engine_busy_ticks(struct xe_gt *gt, struct xe_hw_engine *hwe);
+u64 xe_gt_total_active_ticks(struct xe_gt *gt);
/**
* xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
index 431d1ca59d2f..b1d65ed14244 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
+++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
@@ -20,7 +20,16 @@
* timer internal to GuC. The update rate is guaranteed to be at least 2Hz (but with
* a caveat that is not real time, best effort only).
*
+ * In addition to the engine busyness ticks, there is also a total time count which
+ * is a free running GT timestamp counter.
+ *
+ * Note that counters should be used as ratios of each other for calculating a
+ * percentage.
+ *
* engine busyness ticks (ticks_engine) : clock ticks for which engine was active
+ * total active ticks (ticks_gt) : total clock ticks
+ *
+ * engine busyness % = (ticks_engine / ticks_gt) * 100
*/
/* GuC version number components are only 8-bit, so converting to a 32bit 8.8.8 */
@@ -42,48 +51,69 @@ static bool guc_engine_busyness_supported(struct xe_guc *guc)
static void guc_engine_busyness_usage_map(struct xe_guc *guc,
struct xe_hw_engine *hwe,
- struct iosys_map *engine_map)
+ struct iosys_map *engine_map,
+ struct iosys_map *global_map)
{
struct iosys_map *map;
size_t offset;
u32 instance;
u8 guc_class;
- guc_class = xe_engine_class_to_guc_class(hwe->class);
- instance = hwe->logical_instance;
+ if (hwe) {
+ guc_class = xe_engine_class_to_guc_class(hwe->class);
+ instance = hwe->logical_instance;
+ }
map = &guc->busy.bo->vmap;
- offset = offsetof(struct guc_engine_observation_data,
- engine_data[guc_class][instance]);
+ if (hwe) {
+ offset = offsetof(struct guc_engine_observation_data,
+ engine_data[guc_class][instance]);
- *engine_map = IOSYS_MAP_INIT_OFFSET(map, offset);
+ *engine_map = IOSYS_MAP_INIT_OFFSET(map, offset);
+ }
+
+ *global_map = IOSYS_MAP_INIT_OFFSET(map, 0);
}
static void guc_engine_busyness_get_usage(struct xe_guc *guc,
struct xe_hw_engine *hwe,
- u64 *_ticks_engine)
+ u64 *_ticks_engine,
+ u64 *_ticks_gt)
{
- struct iosys_map engine_map;
- u64 ticks_engine = 0;
+ struct iosys_map engine_map, global_map;
+ u64 ticks_engine = 0, ticks_gt = 0;
int i = 0;
- guc_engine_busyness_usage_map(guc, hwe, &engine_map);
+ guc_engine_busyness_usage_map(guc, hwe, &engine_map, &global_map);
#define read_engine_usage(map_, field_) \
iosys_map_rd_field(map_, 0, struct guc_engine_data, field_)
+#define read_global_field(map_, field_) \
+ iosys_map_rd_field(map_, 0, struct guc_engine_observation_data, field_)
+
do {
- ticks_engine = read_engine_usage(&engine_map, total_execution_ticks);
+ if (hwe)
+ ticks_engine = read_engine_usage(&engine_map, total_execution_ticks);
+
+ ticks_gt = read_global_field(&global_map, gt_timestamp);
- if (read_engine_usage(&engine_map, total_execution_ticks) == ticks_engine)
+ if (hwe && read_engine_usage(&engine_map, total_execution_ticks) != ticks_engine)
+ continue;
+
+ if (read_global_field(&global_map, gt_timestamp) == ticks_gt)
break;
} while (++i < 6);
#undef read_engine_usage
+#undef read_global_field
if (_ticks_engine)
*_ticks_engine = ticks_engine;
+
+ if (_ticks_gt)
+ *_ticks_gt = ticks_gt;
}
static void guc_engine_busyness_enable_stats(struct xe_guc *guc)
@@ -113,6 +143,25 @@ static void guc_engine_busyness_fini(struct drm_device *drm, void *arg)
xe_bo_unpin_map_no_vm(guc->busy.bo);
}
+/*
+ * xe_guc_engine_busyness_active_ticks - Gets the total active ticks
+ * @guc: The GuC object
+ *
+ * Returns total active ticks that the GT has been running for.
+ */
+u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc)
+{
+ u64 ticks_gt;
+
+ /* Engine busyness supported only on GuC >= 70.11.1 */
+ if (!guc_engine_busyness_supported(guc))
+ return 0;
+
+ guc_engine_busyness_get_usage(guc, NULL, NULL, &ticks_gt);
+
+ return ticks_gt;
+}
+
/*
* xe_guc_engine_busyness_ticks - Gets current accumulated
* engine busyness ticks
@@ -129,7 +178,7 @@ u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
if (!guc_engine_busyness_supported(guc))
return 0;
- guc_engine_busyness_get_usage(guc, hwe, &ticks_engine);
+ guc_engine_busyness_get_usage(guc, hwe, &ticks_engine, NULL);
return ticks_engine;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
index d70f06209896..57325910ebc4 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
+++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
@@ -12,6 +12,7 @@ struct xe_hw_engine;
struct xe_guc;
int xe_guc_engine_busyness_init(struct xe_guc *guc);
+u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc);
u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
#endif
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index 9d0b7887cfc4..855cd7b3edb3 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -9,6 +9,7 @@
#include "regs/xe_gt_regs.h"
#include "xe_device.h"
+#include "xe_gt.h"
#include "xe_gt_clock.h"
#include "xe_mmio.h"
@@ -124,6 +125,8 @@ config_status(struct xe_device *xe, u64 config)
if (!(gt->info.engine_mask & (BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0))))
return -ENOENT;
break;
+ case DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0):
+ break;
default:
return -ENOENT;
}
@@ -186,6 +189,9 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
case DRM_XE_PMU_MEDIA_GROUP_BUSY(0):
val = engine_group_busyness_read(gt, config);
break;
+ case DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0):
+ val = xe_gt_total_active_ticks(gt);
+ break;
default:
drm_warn(>->tile->xe->drm, "unknown pmu event\n");
}
@@ -357,6 +363,7 @@ create_event_attributes(struct xe_pmu *pmu)
__event(1, "copy-group-busy", "ns"),
__event(2, "media-group-busy", "ns"),
__event(3, "any-engine-group-busy", "ns"),
+ __event(4, "total-active-ticks", NULL),
};
struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 0895e4d2a981..b5e7a4f673fa 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1119,6 +1119,7 @@ struct drm_xe_wait_user_fence {
#define DRM_XE_PMU_COPY_GROUP_BUSY(gt) ___DRM_XE_PMU_OTHER(gt, 1)
#define DRM_XE_PMU_MEDIA_GROUP_BUSY(gt) ___DRM_XE_PMU_OTHER(gt, 2)
#define DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(gt) ___DRM_XE_PMU_OTHER(gt, 3)
+#define DRM_XE_PMU_TOTAL_ACTIVE_TICKS(gt) ___DRM_XE_PMU_OTHER(gt, 4)
#if defined(__cplusplus)
}
--
2.40.0
More information about the Intel-xe
mailing list