[PATCH v4 7/8] drm/xe/guc: Dynamically enable/disable engine busyness stats

Riana Tauro riana.tauro at intel.com
Fri Dec 22 07:46:01 UTC 2023


Dynamically enable/disable engine busyness stats using GuC
action when PMU interface is opened and closed to avoid
power penality.

Co-developed-by: John Harrison <John.C.Harrison at Intel.com>
Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
Signed-off-by: Riana Tauro <riana.tauro at intel.com>
---
 drivers/gpu/drm/xe/xe_guc_engine_busyness.c | 96 ++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_guc_engine_busyness.h |  2 +
 drivers/gpu/drm/xe/xe_guc_types.h           | 14 +++
 drivers/gpu/drm/xe/xe_pmu.c                 | 32 +++++++
 4 files changed, 140 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
index 2dd06563d0ad..79ae06b71943 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
+++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
@@ -8,6 +8,7 @@
 
 #include "abi/guc_actions_abi.h"
 #include "xe_bo.h"
+#include "xe_device.h"
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
 
@@ -102,9 +103,9 @@ static void guc_engine_busyness_get_usage(struct xe_guc *guc,
 		*ticks_gt = gt_ticks;
 }
 
-static void guc_engine_busyness_enable_stats(struct xe_guc *guc)
+static void guc_engine_busyness_action_usage_stats(struct xe_guc *guc, bool enable)
 {
-	u32 ggtt_addr = xe_bo_ggtt_addr(guc->busy.bo);
+	u32 ggtt_addr = enable ? xe_bo_ggtt_addr(guc->busy.bo) : 0;
 	u32 action[] = {
 		XE_GUC_ACTION_SET_DEVICE_ENGINE_UTILIZATION,
 		ggtt_addr,
@@ -121,6 +122,45 @@ static void guc_engine_busyness_enable_stats(struct xe_guc *guc)
 		drm_err(&xe->drm, "Failed to enable usage stats %pe", ERR_PTR(ret));
 }
 
+static void guc_engine_busyness_enable_stats(struct xe_guc *guc, bool enable)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	bool skip;
+
+	spin_lock(&guc->busy.enable_lock);
+	skip = enable == guc->busy.enabled;
+	if (!skip)
+		guc->busy.enabled = enable;
+	spin_unlock(&guc->busy.enable_lock);
+
+	if (skip)
+		return;
+
+	xe_device_mem_access_get(xe);
+	guc_engine_busyness_action_usage_stats(guc, enable);
+	xe_device_mem_access_put(xe);
+}
+
+static void guc_engine_busyness_toggle_stats(struct xe_guc *guc)
+{
+	if (!guc->submission_state.enabled)
+		return;
+
+	/* Pmu_ref can increase before the worker thread runs this function */
+	if (guc->busy.pmu_ref >= 1)
+		guc_engine_busyness_enable_stats(guc, true);
+	else if (guc->busy.pmu_ref == 0)
+		guc_engine_busyness_enable_stats(guc, false);
+}
+
+static void guc_engine_buysness_worker_func(struct work_struct *w)
+{
+	struct xe_guc *guc = container_of(w, struct xe_guc,
+					  busy.enable_worker);
+
+	guc_engine_busyness_toggle_stats(guc);
+}
+
 static void guc_engine_busyness_fini(struct drm_device *drm, void *arg)
 {
 	struct xe_guc *guc = arg;
@@ -150,6 +190,52 @@ bool xe_guc_engine_busyness_supported(struct xe_guc *guc)
 	return false;
 }
 
+/*
+ * xe_guc_engine_busyness_pin - Dynamically enables engine busyness stats
+ * @guc: The GuC object
+ * @pmu_locked: boolean to indicate pmu event is started, locked by pmu spinlock
+ *
+ * Dynamically enables engine busyness by queueing a worker thread
+ * if guc submission is not yet enabled or if pmu event is started.
+ */
+void xe_guc_engine_busyness_pin(struct xe_guc *guc, bool pmu_locked)
+{
+	/* Engine busyness supported only on GuC >= 70.11.1 */
+	if (!xe_guc_engine_busyness_supported(guc))
+		return;
+
+	if (pmu_locked)
+		guc->busy.pmu_ref++;
+
+	if (!guc->submission_state.enabled || pmu_locked)
+		queue_work(system_unbound_wq, &guc->busy.enable_worker);
+	else
+		guc_engine_busyness_enable_stats(guc, true);
+}
+
+/*
+ * xe_guc_engine_busyness_unpin - Dynamically disables engine busyness stats
+ * @guc: The GuC object
+ * @pmu_locked: boolean to indicate pmu event is stopped, locked by pmu spinlock
+ *
+ * Dynamically disables engine busyness by queueing a worker thread
+ * if guc submission is not yet enabled or if pmu event is stopped.
+ */
+void xe_guc_engine_busyness_unpin(struct xe_guc *guc, bool pmu_locked)
+{
+	/* Engine busyness supported only on GuC >= 70.11.1 */
+	if (!xe_guc_engine_busyness_supported(guc))
+		return;
+
+	if (pmu_locked)
+		guc->busy.pmu_ref--;
+
+	if (!guc->submission_state.enabled || pmu_locked)
+		queue_work(system_unbound_wq, &guc->busy.enable_worker);
+	else
+		guc_engine_busyness_toggle_stats(guc);
+}
+
 /*
  * xe_guc_engine_busyness_active_ticks - Gets the total active ticks
  * @guc: The GuC object
@@ -224,9 +310,11 @@ int xe_guc_engine_busyness_init(struct xe_guc *guc)
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
+	spin_lock_init(&guc->busy.enable_lock);
+	INIT_WORK(&guc->busy.enable_worker, guc_engine_buysness_worker_func);
 	guc->busy.bo = bo;
-
-	guc_engine_busyness_enable_stats(guc);
+	guc->busy.enabled = false;
+	guc->busy.pmu_ref = 0;
 
 	err = drmm_add_action_or_reset(&xe->drm, guc_engine_busyness_fini, guc);
 	if (err)
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
index e3c74e0236af..008af1c0838a 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
+++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
@@ -15,4 +15,6 @@ int xe_guc_engine_busyness_init(struct xe_guc *guc);
 u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc);
 u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
 bool xe_guc_engine_busyness_supported(struct xe_guc *guc);
+void xe_guc_engine_busyness_pin(struct xe_guc *guc, bool pmu_locked);
+void xe_guc_engine_busyness_unpin(struct xe_guc *guc, bool pmu_locked);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index a75728071f46..1d4123fec9c0 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -89,6 +89,20 @@ struct xe_guc {
 	struct {
 		/** @bo: GGTT buffer object of engine busyness that is shared with GuC */
 		struct xe_bo *bo;
+		/** @enabled: state of engine stats */
+		bool enabled;
+		/** @enable_lock: for accessing @enabled */
+		spinlock_t enable_lock;
+		/**
+		 * @enable_worker: Async worker for enabling/disabling
+		 * busyness tracking from PMU
+		 */
+		struct work_struct enable_worker;
+		/**
+		 * @pmu_ref: how many outstanding PMU counters have
+		 * been requested, locked by PMU spinlock
+		 */
+		int pmu_ref;
 	} busy;
 
 	/**
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index f91652886b67..3161ed157bd2 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -9,6 +9,9 @@
 
 #include "xe_device.h"
 #include "xe_gt.h"
+#include "xe_gt_clock.h"
+#include "xe_guc_engine_busyness.h"
+#include "xe_mmio.h"
 
 #define XE_ENGINE_SAMPLE_COUNT (DRM_XE_PMU_SAMPLE_BUSY_TICKS + 1)
 
@@ -93,6 +96,8 @@ static int engine_event_init(struct perf_event *event)
 	hwe = xe_gt_hw_engine(gt, xe_hw_engine_from_user_class(engine_event_class(event)),
 			      engine_event_instance(event), true);
 
+	xe_guc_engine_busyness_pin(&gt->uc.guc, false);
+
 	return engine_event_status(hwe, engine_event_sample(event));
 }
 
@@ -204,6 +209,19 @@ static void xe_pmu_event_read(struct perf_event *event)
 
 static void xe_pmu_enable(struct perf_event *event)
 {
+	struct xe_device *xe =
+		container_of(event->pmu, typeof(*xe), pmu.base);
+	const int gt_id = config_gt_id(event->attr.config);
+	struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
+	struct xe_pmu *pmu = &xe->pmu;
+	unsigned long flags;
+
+	if (is_engine_event(event) ||
+	    config_counter(event->attr.config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0)) {
+		spin_lock_irqsave(&pmu->lock, flags);
+		xe_guc_engine_busyness_pin(&gt->uc.guc, true);
+		spin_unlock_irqrestore(&pmu->lock, flags);
+	}
 	/*
 	 * Store the current counter value so we can report the correct delta
 	 * for all listeners. Even when the event was already enabled and has
@@ -227,9 +245,23 @@ static void xe_pmu_event_start(struct perf_event *event, int flags)
 
 static void xe_pmu_event_stop(struct perf_event *event, int flags)
 {
+	struct xe_device *xe =
+		container_of(event->pmu, typeof(*xe), pmu.base);
+	const int gt_id = config_gt_id(event->attr.config);
+	struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
+	struct xe_pmu *pmu = &xe->pmu;
+	unsigned long irqflags;
+
 	if (flags & PERF_EF_UPDATE)
 		xe_pmu_event_read(event);
 
+	if (is_engine_event(event) ||
+	    config_counter(event->attr.config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0)) {
+		spin_lock_irqsave(&pmu->lock, irqflags);
+		xe_guc_engine_busyness_unpin(&gt->uc.guc, true);
+		spin_unlock_irqrestore(&pmu->lock, irqflags);
+	}
+
 	event->hw.state = PERF_HES_STOPPED;
 }
 
-- 
2.40.0



More information about the Intel-xe mailing list