[PATCH v3 09/10] RFC drm/xe/guc: Dynamically enable/disable engine busyness stats
Riana Tauro
riana.tauro at intel.com
Thu Dec 14 11:31:43 UTC 2023
Dynamically enable/disable engine busyness stats using GuC
action when PMU interface is opened and closed to avoid
power penality.
Co-developed-by: John Harrison <John.C.Harrison at Intel.com>
Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
Signed-off-by: Riana Tauro <riana.tauro at intel.com>
---
drivers/gpu/drm/xe/xe_guc_engine_busyness.c | 96 ++++++++++++++++++++-
drivers/gpu/drm/xe/xe_guc_engine_busyness.h | 2 +
drivers/gpu/drm/xe/xe_guc_types.h | 14 +++
drivers/gpu/drm/xe/xe_pmu.c | 32 +++++++
4 files changed, 140 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
index c40625f41ae5..56e3378d856d 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
+++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
@@ -8,6 +8,7 @@
#include "abi/guc_actions_abi.h"
#include "xe_bo.h"
+#include "xe_device.h"
#include "xe_guc.h"
#include "xe_guc_ct.h"
@@ -102,9 +103,9 @@ static void guc_engine_busyness_get_usage(struct xe_guc *guc,
*_ticks_gt = ticks_gt;
}
-static void guc_engine_busyness_enable_stats(struct xe_guc *guc)
+static void guc_engine_busyness_action_usage_stats(struct xe_guc *guc, bool enable)
{
- u32 ggtt_addr = xe_bo_ggtt_addr(guc->busy.bo);
+ u32 ggtt_addr = enable ? xe_bo_ggtt_addr(guc->busy.bo) : 0;
u32 action[] = {
XE_GUC_ACTION_SET_DEVICE_ENGINE_UTILIZATION,
ggtt_addr,
@@ -122,6 +123,45 @@ static void guc_engine_busyness_enable_stats(struct xe_guc *guc)
drm_err(&xe->drm, "Failed to enable usage stats %pe", ERR_PTR(ret));
}
+static void guc_engine_busyness_enable_stats(struct xe_guc *guc, bool enable)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ bool skip;
+
+ spin_lock(&guc->busy.enable_lock);
+ skip = enable == guc->busy.enabled;
+ if (!skip)
+ guc->busy.enabled = enable;
+ spin_unlock(&guc->busy.enable_lock);
+
+ if (skip)
+ return;
+
+ xe_device_mem_access_get(xe);
+ guc_engine_busyness_action_usage_stats(guc, enable);
+ xe_device_mem_access_put(xe);
+}
+
+static void guc_engine_busyness_toggle_stats(struct xe_guc *guc)
+{
+ if (!guc->submission_state.enabled)
+ return;
+
+ /* Pmu_ref can increase before the worker thread runs this function */
+ if (guc->busy.pmu_ref >= 1)
+ guc_engine_busyness_enable_stats(guc, true);
+ else if (guc->busy.pmu_ref == 0)
+ guc_engine_busyness_enable_stats(guc, false);
+}
+
+static void guc_engine_buysness_worker_func(struct work_struct *w)
+{
+ struct xe_guc *guc = container_of(w, struct xe_guc,
+ busy.enable_worker);
+
+ guc_engine_busyness_toggle_stats(guc);
+}
+
static void guc_engine_busyness_fini(struct drm_device *drm, void *arg)
{
struct xe_guc *guc = arg;
@@ -151,6 +191,52 @@ bool xe_guc_engine_busyness_supported(struct xe_guc *guc)
return false;
}
+/*
+ * xe_guc_engine_busyness_pin - Dynamically enables engine busyness stats
+ * @guc: The GuC object
+ * @pmu_locked: boolean to indicate pmu event is started, locked by pmu spinlock
+ *
+ * Dynamically enables engine busyness by queueing a worker thread
+ * if guc submission is not yet enabled or if pmu event is started.
+ */
+void xe_guc_engine_busyness_pin(struct xe_guc *guc, bool pmu_locked)
+{
+ /* Engine busyness supported only on GuC >= 70.11.1 */
+ if (!xe_guc_engine_busyness_supported(guc))
+ return;
+
+ if (pmu_locked)
+ guc->busy.pmu_ref++;
+
+ if (!guc->submission_state.enabled || pmu_locked)
+ queue_work(system_unbound_wq, &guc->busy.enable_worker);
+ else
+ guc_engine_busyness_enable_stats(guc, true);
+}
+
+/*
+ * xe_guc_engine_busyness_unpin - Dynamically disables engine busyness stats
+ * @guc: The GuC object
+ * @pmu_locked: boolean to indicate pmu event is stopped, locked by pmu spinlock
+ *
+ * Dynamically disables engine busyness by queueing a worker thread
+ * if guc submission is not yet enabled or if pmu event is stopped.
+ */
+void xe_guc_engine_busyness_unpin(struct xe_guc *guc, bool pmu_locked)
+{
+ /* Engine busyness supported only on GuC >= 70.11.1 */
+ if (!xe_guc_engine_busyness_supported(guc))
+ return;
+
+ if (pmu_locked)
+ guc->busy.pmu_ref--;
+
+ if (!guc->submission_state.enabled || pmu_locked)
+ queue_work(system_unbound_wq, &guc->busy.enable_worker);
+ else
+ guc_engine_busyness_toggle_stats(guc);
+}
+
/*
* xe_guc_engine_busyness_active_ticks - Gets the total active ticks
* @guc: The GuC object
@@ -227,9 +313,11 @@ int xe_guc_engine_busyness_init(struct xe_guc *guc)
if (IS_ERR(bo))
return PTR_ERR(bo);
+ spin_lock_init(&guc->busy.enable_lock);
+ INIT_WORK(&guc->busy.enable_worker, guc_engine_buysness_worker_func);
guc->busy.bo = bo;
-
- guc_engine_busyness_enable_stats(guc);
+ guc->busy.enabled = false;
+ guc->busy.pmu_ref = 0;
err = drmm_add_action_or_reset(&xe->drm, guc_engine_busyness_fini, guc);
if (err)
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
index e3c74e0236af..008af1c0838a 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
+++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
@@ -15,4 +15,6 @@ int xe_guc_engine_busyness_init(struct xe_guc *guc);
u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc);
u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
bool xe_guc_engine_busyness_supported(struct xe_guc *guc);
+void xe_guc_engine_busyness_pin(struct xe_guc *guc, bool pmu_locked);
+void xe_guc_engine_busyness_unpin(struct xe_guc *guc, bool pmu_locked);
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index 4e9602301aed..cf87fe75490b 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -74,6 +74,20 @@ struct xe_guc {
struct {
/** @bo: GGTT buffer object of engine busyness that is shared with GuC */
struct xe_bo *bo;
+ /** @enabled: state of engine stats */
+ bool enabled;
+ /** @enable_lock: for accessing @enabled */
+ spinlock_t enable_lock;
+ /**
+ * @enable_worker: Async worker for enabling/disabling
+ * busyness tracking from PMU
+ */
+ struct work_struct enable_worker;
+ /**
+ * @pmu_ref: how many outstanding PMU counters have
+ * been requested, locked by PMU spinlock
+ */
+ int pmu_ref;
} busy;
/**
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index 9c8591d59b54..5eeb904acfa2 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -9,6 +9,9 @@
#include "xe_device.h"
#include "xe_gt.h"
+#include "xe_gt_clock.h"
+#include "xe_guc_engine_busyness.h"
+#include "xe_mmio.h"
#define XE_ENGINE_SAMPLE_COUNT (DRM_XE_PMU_SAMPLE_BUSY_TICKS + 1)
@@ -93,6 +96,8 @@ static int engine_event_init(struct perf_event *event)
hwe = xe_gt_hw_engine(gt, xe_hw_engine_from_user_class(engine_event_class(event)),
engine_event_instance(event), true);
+ xe_guc_engine_busyness_pin(>->uc.guc, false);
+
return engine_event_status(hwe, engine_event_sample(event));
}
@@ -204,6 +209,19 @@ static void xe_pmu_event_read(struct perf_event *event)
static void xe_pmu_enable(struct perf_event *event)
{
+ struct xe_device *xe =
+ container_of(event->pmu, typeof(*xe), pmu.base);
+ const int gt_id = config_gt_id(event->attr.config);
+ struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
+ struct xe_pmu *pmu = &xe->pmu;
+ unsigned long flags;
+
+ if (is_engine_event(event) ||
+ config_counter(event->attr.config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0)) {
+ spin_lock_irqsave(&pmu->lock, flags);
+ xe_guc_engine_busyness_pin(>->uc.guc, true);
+ spin_unlock_irqrestore(&pmu->lock, flags);
+ }
/*
* Store the current counter value so we can report the correct delta
* for all listeners. Even when the event was already enabled and has
@@ -227,9 +245,23 @@ static void xe_pmu_event_start(struct perf_event *event, int flags)
static void xe_pmu_event_stop(struct perf_event *event, int flags)
{
+ struct xe_device *xe =
+ container_of(event->pmu, typeof(*xe), pmu.base);
+ const int gt_id = config_gt_id(event->attr.config);
+ struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
+ struct xe_pmu *pmu = &xe->pmu;
+ unsigned long irqflags;
+
if (flags & PERF_EF_UPDATE)
xe_pmu_event_read(event);
+ if (is_engine_event(event) ||
+ config_counter(event->attr.config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0)) {
+ spin_lock_irqsave(&pmu->lock, irqflags);
+ xe_guc_engine_busyness_unpin(>->uc.guc, true);
+ spin_unlock_irqrestore(&pmu->lock, irqflags);
+ }
+
event->hw.state = PERF_HES_STOPPED;
}
--
2.40.0
More information about the Intel-xe
mailing list