[PATCH 66/73] RFC drm/i915: Expose union(class) utililisation
Chris Wilson
chris at chris-wilson.co.uk
Tue Jan 12 13:58:09 UTC 2021
Combine our per-engine metrics to track begin/end of the class as a
whole and present that as cumulative runtime via PMU.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
---
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 14 +++
drivers/gpu/drm/i915/gt/intel_engine_stats.h | 34 ++++++
drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 +
drivers/gpu/drm/i915/gt/intel_engine_user.c | 3 +
drivers/gpu/drm/i915/gt/intel_gt.h | 2 +
drivers/gpu/drm/i915/gt/intel_gt_types.h | 28 +++++
drivers/gpu/drm/i915/i915_pmu.c | 119 +++++++++++++++----
include/uapi/drm/i915_drm.h | 3 +
8 files changed, 181 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 8bf7bef776fa..7dbb15e17757 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -314,6 +314,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->instance = info->instance;
__sprint_engine_name(engine);
+ engine->class_stats = >->stats_class[info->class];
+
engine->props.dma_latency_ns =
CONFIG_DRM_I915_DMA_LATENCY;
engine->props.heartbeat_interval_ms =
@@ -1787,6 +1789,18 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
return active;
}
+ktime_t intel_runtime_stats_get_busy_time(const struct intel_runtime_stats *st)
+{
+ ktime_t total = st->total;
+
+ if (atomic_read(&st->active)) {
+ smp_rmb();
+ total = ktime_add(total, ktime_sub(ktime_get(), st->start));
+ }
+
+ return total;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "mock_engine.c"
#include "selftest_engine.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_stats.h b/drivers/gpu/drm/i915/gt/intel_engine_stats.h
index 1d8314257ffa..0cb314528802 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_stats.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_stats.h
@@ -12,6 +12,36 @@
#include "i915_gem.h" /* GEM_BUG_ON */
#include "intel_engine.h"
+#include "intel_gt_types.h"
+
+static inline void intel_engine_class_start(struct intel_runtime_stats *st)
+{
+ if (atomic_add_unless(&st->active, 1, 0))
+ return;
+
+ spin_lock(&st->lock);
+ if (atomic_read(&st->active) == 0) {
+ st->start = ktime_get();
+ smp_mb__before_atomic();
+ }
+ atomic_inc(&st->active);
+ spin_unlock(&st->lock);
+}
+
+static inline void intel_engine_class_stop(struct intel_runtime_stats *st)
+{
+ if (atomic_add_unless(&st->active, -1, 1))
+ return;
+
+ spin_lock(&st->lock);
+ if (atomic_read(&st->active) == 1) {
+ st->total =
+ ktime_add(st->total, ktime_sub(ktime_get(), st->start));
+ smp_mb__before_atomic();
+ }
+ atomic_dec(&st->active);
+ spin_unlock(&st->lock);
+}
static inline void intel_engine_context_in(struct intel_engine_cs *engine)
{
@@ -20,6 +50,8 @@ static inline void intel_engine_context_in(struct intel_engine_cs *engine)
return;
}
+ intel_engine_class_start(engine->class_stats);
+
/* The writer is serialised; but the pmu reader may be from hardirq */
engine->stats.start = ktime_get();
smp_wmb(); /* pairs with intel_engine_get_busy_time() */
@@ -39,6 +71,8 @@ static inline void intel_engine_context_out(struct intel_engine_cs *engine)
ktime_sub(ktime_get(), engine->stats.start));
smp_wmb(); /* pairs with intel_engine_get_busy_time() */
WRITE_ONCE(engine->stats.active, 0);
+
+ intel_engine_class_stop(engine->class_stats);
}
#endif /* __INTEL_ENGINE_STATS_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 02779ebc94f8..e4ba61214a59 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -494,6 +494,7 @@ struct intel_engine_cs {
*/
ktime_t rps;
} stats;
+ struct intel_runtime_stats *class_stats;
struct {
unsigned long heartbeat_interval_ms;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 6b5a4fdc14a0..24ac33d1b16e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -210,6 +210,9 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
GEM_BUG_ON(engine->uabi_class >= ARRAY_SIZE(uabi_instances));
engine->uabi_instance = uabi_instances[engine->uabi_class]++;
+ if (intel_engine_supports_stats(engine))
+ engine->class_stats->enabled = true;
+
/* Replace the internal name with the final user facing name */
memcpy(old, engine->name, sizeof(engine->name));
scnprintf(engine->name, sizeof(engine->name), "%s%u",
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 9157c7411f60..f73f1d1d3754 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -77,4 +77,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
void intel_gt_info_print(const struct intel_gt_info *info,
struct drm_printer *p);
+ktime_t intel_runtime_stats_get_busy_time(const struct intel_runtime_stats *st);
+
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 4205c3f022ad..744b3bed871e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -29,6 +29,32 @@ struct i915_ggtt;
struct intel_engine_cs;
struct intel_uncore;
+struct intel_runtime_stats {
+ atomic_t active;
+
+ /**
+ * @lock: Lock protecting the below fields.
+ */
+ spinlock_t lock;
+
+ /**
+ * @total: Total time this engine was busy.
+ *
+ * Accumulated time not counting the most recent block in cases
+ * where engine is currently busy (active > 0).
+ */
+ ktime_t total;
+
+ /**
+ * @start: Timestamp of the last idle to active transition.
+ *
+ * Idle is defined as active == 0, active is active > 0.
+ */
+ ktime_t start;
+
+ bool enabled;
+};
+
struct intel_gt {
struct drm_i915_private *i915;
struct intel_uncore *uncore;
@@ -107,6 +133,8 @@ struct intel_gt {
ktime_t start;
} stats;
+ struct intel_runtime_stats stats_class[MAX_ENGINE_CLASS + 1];
+
struct intel_engine_cs *engine[I915_NUM_ENGINES];
struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
[MAX_ENGINE_INSTANCE + 1];
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 2b88c0baa1bf..026999a82231 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -9,6 +9,7 @@
#include "gt/intel_engine.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_engine_user.h"
+#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
#include "gt/intel_rc6.h"
#include "gt/intel_rps.h"
@@ -506,13 +507,28 @@ static int engine_event_init(struct perf_event *event)
struct drm_i915_private *i915 =
container_of(event->pmu, typeof(*i915), pmu.base);
struct intel_engine_cs *engine;
+ u8 class = engine_event_class(event);
+ u8 instance = engine_event_instance(event);
+ u8 sample = engine_event_sample(event);
- engine = intel_engine_lookup_user(i915, engine_event_class(event),
- engine_event_instance(event));
+ if (instance == 0xff) {
+ if (sample == 0)
+ return 0;
+
+ if (class >= ARRAY_SIZE(i915->gt.stats_class))
+ return -ENODEV;
+
+ if (!i915->gt.stats_class[class].enabled)
+ return -ENODEV;
+
+ return -ENOENT;
+ }
+
+ engine = intel_engine_lookup_user(i915, class, instance);
if (!engine)
return -ENODEV;
- return engine_event_status(engine, engine_event_sample(event));
+ return engine_event_status(engine, sample);
}
static int i915_pmu_event_init(struct perf_event *event)
@@ -557,6 +573,33 @@ static int i915_pmu_event_init(struct perf_event *event)
return 0;
}
+static u64 engine_event_read(struct perf_event *event)
+{
+ struct drm_i915_private *i915 =
+ container_of(event->pmu, typeof(*i915), pmu.base);
+ u8 sample = engine_event_sample(event);
+ u8 class = engine_event_class(event);
+ u8 instance = engine_event_instance(event);
+ struct intel_engine_cs *engine;
+ ktime_t unused;
+
+ if (instance == 0xff) {
+ struct intel_runtime_stats *st = &i915->gt.stats_class[class];
+
+ return intel_runtime_stats_get_busy_time(st);
+ }
+
+ engine = intel_engine_lookup_user(i915, class, instance);
+ if (GEM_WARN_ON(!engine))
+ return 0; /* Do nothing */
+ else if (sample == I915_SAMPLE_BUSY &&
+ intel_engine_supports_stats(engine))
+ return ktime_to_ns(intel_engine_get_busy_time(engine,
+ &unused));
+ else
+ return engine->pmu.sample[sample].cur;
+}
+
static u64 __i915_pmu_event_read(struct perf_event *event)
{
struct drm_i915_private *i915 =
@@ -565,24 +608,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
u64 val = 0;
if (is_engine_event(event)) {
- u8 sample = engine_event_sample(event);
- struct intel_engine_cs *engine;
-
- engine = intel_engine_lookup_user(i915,
- engine_event_class(event),
- engine_event_instance(event));
-
- if (drm_WARN_ON_ONCE(&i915->drm, !engine)) {
- /* Do nothing */
- } else if (sample == I915_SAMPLE_BUSY &&
- intel_engine_supports_stats(engine)) {
- ktime_t unused;
-
- val = ktime_to_ns(intel_engine_get_busy_time(engine,
- &unused));
- } else {
- val = engine->pmu.sample[sample].cur;
- }
+ val = engine_event_read(event);
} else {
switch (event->attr.config) {
case I915_PMU_ACTUAL_FREQUENCY:
@@ -666,7 +692,7 @@ static void i915_pmu_enable(struct perf_event *event)
* For per-engine events the bitmask and reference counting
* is stored per engine.
*/
- if (is_engine_event(event)) {
+ if (is_engine_event(event) && engine_event_instance(event) != 0xff) {
u8 sample = engine_event_sample(event);
struct intel_engine_cs *engine;
@@ -710,7 +736,7 @@ static void i915_pmu_disable(struct perf_event *event)
spin_lock_irqsave(&pmu->lock, flags);
- if (is_engine_event(event)) {
+ if (is_engine_event(event) && engine_event_instance(event) != 0xff) {
u8 sample = engine_event_sample(event);
struct intel_engine_cs *engine;
@@ -891,6 +917,18 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
return ++attr;
}
+static const char *class_repr(u8 class)
+{
+ switch (class) {
+ case RENDER_CLASS: return "render";
+ case VIDEO_DECODE_CLASS: return "video-decode";
+ case VIDEO_ENHANCEMENT_CLASS: return "video-enhance";
+ case COPY_ENGINE_CLASS: return "copy";
+ }
+
+ return NULL;
+}
+
static struct attribute **
create_event_attributes(struct i915_pmu *pmu)
{
@@ -919,6 +957,8 @@ create_event_attributes(struct i915_pmu *pmu)
struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
struct attribute **attr = NULL, **attr_iter;
struct intel_engine_cs *engine;
+ unsigned long class_enable = 0;
+ unsigned long class_disable = 0;
unsigned int i;
/* Count how many counters we will be exposing. */
@@ -928,6 +968,11 @@ create_event_attributes(struct i915_pmu *pmu)
}
for_each_uabi_engine(engine, i915) {
+ if (intel_engine_supports_stats(engine))
+ class_enable |= BIT(engine->class);
+ else
+ class_disable |= BIT(engine->class);
+
for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
if (!engine_event_status(engine,
engine_events[i].sample))
@@ -935,6 +980,9 @@ create_event_attributes(struct i915_pmu *pmu)
}
}
+ class_enable &= ~class_disable;
+ count += hweight_long(class_enable);
+
/* Allocate attribute objects and table. */
i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
if (!i915_attr)
@@ -1008,6 +1056,31 @@ create_event_attributes(struct i915_pmu *pmu)
}
}
+ for_each_set_bit(i, &class_enable, BITS_PER_LONG) {
+ const char *name;
+ char *str;
+
+ name = class_repr(i);
+ if (GEM_WARN_ON(!name))
+ continue;
+
+ str = kasprintf(GFP_KERNEL, "%s-%s", name, "busy");
+ if (!str)
+ goto err;
+
+ *attr_iter++ = &i915_iter->attr.attr;
+ i915_iter =
+ add_i915_attr(i915_iter, str,
+ __I915_PMU_ENGINE(i, 0xff, 0));
+
+ str = kasprintf(GFP_KERNEL, "%s.unit", str);
+ if (!str)
+ goto err;
+
+ *attr_iter++ = &pmu_iter->attr.attr;
+ pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
+ }
+
pmu->i915_attr = i915_attr;
pmu->pmu_attr = pmu_attr;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 1987e2ea79a3..3aacdf756e90 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -171,6 +171,9 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_ENGINE_SEMA(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+#define I915_PMU_CLASS_BUSY(class) \
+ __I915_PMU_ENGINE(class, 0xff, 0)
+
#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
--
2.20.1
More information about the Intel-gfx-trybot
mailing list