[Intel-gfx] [RFC v2 2/3] drm/i915/pmu: serve global events and support perf stat
Dmitry Rogozhkin
dmitry.v.rogozhkin at intel.com
Wed Aug 23 15:26:02 UTC 2017
This patch should probably be squashed with Tvrtko's PMU enabling
patch...
Making perf-stat workable with i915 PMU. The major point is that
current implementation of i915 PMU exposes global counter rather
thatn per-task counters. Thus, required changes are:
* Register PMU with .task_ctx_nr=perf_invalid_context
* Expose cpumask for the PMU with the single CPU in the mask
* Properly support pmu->stop(): it should call pmu->read()
* Properly support pmu->del(): it should call stop(event, PERF_EF_UPDATE)
Examples:
perf stat -e i915/rcs0-busy/ workload.sh
This should error out with "failed to read counter" because the
requested counter can't be queried in per-task mode (which is the case).
perf stat -e i915/rcs0-busy/ -a workload.sh
perf stat -e i915/rcs0-busy/ -a -C0 workload.sh
These 2 commands should give the same result with the correct counter
values. Counter value will be queried once in the end of the wrokload.
The example output would be:
Performance counter stats for 'system wide':
649,547,987 i915/rcs0-busy/
1.895530680 seconds time elapsed
perf stat -e i915/rcs0-busy/ -a -I 100 workload.sh
This will query counter perdiodically (each 100ms) and dump output:
0.100108369 4,137,438 i915/rcs0-busy/
i915/rcs0-busy/: 37037414 100149071 100149071
0.200249024 37,037,414 i915/rcs0-busy/
i915/rcs0-busy/: 36935429 100145077 100145077
0.300391916 36,935,429 i915/rcs0-busy/
i915/rcs0-busy/: 34262017 100126136 100126136
0.400518037 34,262,017 i915/rcs0-busy/
i915/rcs0-busy/: 34539960 100126217 100126217
v1: Make pmu.busy_stats a refcounter to avoid busy stats going away
with some deleted event.
v2: Expose cpumask for i915 PMU to avoid multiple events creation of
the same type followed by counter aggregation by perf-stat.
Change-Id: I7d1abe747a4399196e72253f7b66441a6528dbee
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin at intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Peter Zijlstra <peterz at infradead.org>
---
drivers/gpu/drm/i915/i915_pmu.c | 71 +++++++++++++++++++++++++--------
drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +-
2 files changed, 55 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index bcdf2bc..c551d64 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -15,6 +15,8 @@
#define ENGINE_SAMPLE_BITS (16)
+static cpumask_t i915_pmu_cpumask = CPU_MASK_CPU0;
+
static u8 engine_config_sample(u64 config)
{
return config & I915_PMU_SAMPLE_MASK;
@@ -285,16 +287,24 @@ static int i915_pmu_event_init(struct perf_event *event)
{
struct drm_i915_private *i915 =
container_of(event->pmu, typeof(*i915), pmu.base);
+ int cpu;
int ret;
- /* XXX ideally only want pid == -1 && cpu == -1 */
-
if (event->attr.type != event->pmu->type)
return -ENOENT;
if (has_branch_stack(event))
return -EOPNOTSUPP;
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ cpu = cpumask_any_and(&i915_pmu_cpumask,
+ topology_sibling_cpumask(event->cpu));
+
+ if (cpu >= nr_cpu_ids)
+ return -ENODEV;
+
ret = 0;
if (is_engine_event(event)) {
ret = engine_event_init(event);
@@ -314,6 +324,7 @@ static int i915_pmu_event_init(struct perf_event *event)
if (ret)
return ret;
+ event->cpu = cpu;
if (!event->parent)
event->destroy = i915_pmu_event_destroy;
@@ -469,11 +480,16 @@ static void i915_pmu_enable(struct perf_event *event)
ns_to_ktime(PERIOD), 0,
HRTIMER_MODE_REL_PINNED);
i915->pmu.timer_enabled = true;
- } else if (is_engine_event(event) && engine_needs_busy_stats(engine) &&
- !engine->pmu.busy_stats) {
- engine->pmu.busy_stats = true;
- if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
- queue_work(i915->wq, &engine->pmu.enable_busy_stats);
+ } else if (is_engine_event(event) && engine_needs_busy_stats(engine)) {
+ /* Enable busy stats for the first event demanding it, next
+ * events just reference counter. So, if some events will go
+ * away, we will still have busy stats enabled till remaining
+ * events still use them.
+ */
+ if (engine->pmu.busy_stats++ == 0) {
+ if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
+ queue_work(i915->wq, &engine->pmu.enable_busy_stats);
+ }
}
spin_unlock_irqrestore(&i915->pmu.lock, flags);
@@ -495,16 +511,16 @@ static void i915_pmu_disable(struct perf_event *event)
enum intel_engine_id id;
engine = intel_engine_lookup_user(i915,
- engine_event_class(event),
- engine_event_instance(event));
+ engine_event_class(event),
+ engine_event_instance(event));
GEM_BUG_ON(!engine);
engine->pmu.enable &= ~BIT(engine_event_sample(event));
- if (engine->pmu.busy_stats &&
- !engine_needs_busy_stats(engine)) {
- engine->pmu.busy_stats = false;
- queue_delayed_work(i915->wq,
- &engine->pmu.disable_busy_stats,
- round_jiffies_up_relative(2 * HZ));
+ if (!engine_needs_busy_stats(engine)) {
+ if (engine->pmu.busy_stats && --engine->pmu.busy_stats == 0) {
+ queue_delayed_work(i915->wq,
+ &engine->pmu.disable_busy_stats,
+ round_jiffies_up_relative(2 * HZ));
+ }
}
mask = 0;
for_each_engine(engine, i915, id)
@@ -529,6 +545,8 @@ static void i915_pmu_event_start(struct perf_event *event, int flags)
static void i915_pmu_event_stop(struct perf_event *event, int flags)
{
+ if (flags & PERF_EF_UPDATE)
+ i915_pmu_event_read(event);
i915_pmu_disable(event);
}
@@ -546,7 +564,7 @@ static int i915_pmu_event_add(struct perf_event *event, int flags)
static void i915_pmu_event_del(struct perf_event *event, int flags)
{
- i915_pmu_disable(event);
+ i915_pmu_event_stop(event, PERF_EF_UPDATE);
}
static int i915_pmu_event_event_idx(struct perf_event *event)
@@ -656,9 +674,28 @@ static ssize_t i915_pmu_event_show(struct device *dev,
.attrs = i915_pmu_events_attrs,
};
+static ssize_t i915_pmu_get_attr_cpumask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, i915_pmu_get_attr_cpumask, NULL);
+
+static struct attribute *i915_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group i915_pmu_cpumask_attr_group = {
+ .attrs = i915_cpumask_attrs,
+};
+
static const struct attribute_group *i915_pmu_attr_groups[] = {
&i915_pmu_format_attr_group,
&i915_pmu_events_attr_group,
+ &i915_pmu_cpumask_attr_group,
NULL
};
@@ -687,7 +724,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
return;
i915->pmu.base.attr_groups = i915_pmu_attr_groups;
- i915->pmu.base.task_ctx_nr = perf_sw_context;
+ i915->pmu.base.task_ctx_nr = perf_invalid_context;
i915->pmu.base.event_init = i915_pmu_event_init;
i915->pmu.base.add = i915_pmu_event_add;
i915->pmu.base.del = i915_pmu_event_del;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index fd5d838..0530e4e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -248,7 +248,7 @@ struct intel_engine_cs {
struct {
u32 enable;
u64 sample[4];
- bool busy_stats;
+ unsigned int busy_stats;
struct work_struct enable_busy_stats;
struct delayed_work disable_busy_stats;
} pmu;
--
1.8.3.1
More information about the Intel-gfx
mailing list