[Intel-gfx] [RFC 2/2] drm/i915/pmu: serve global events and support perf stat
Rogozhkin, Dmitry V
dmitry.v.rogozhkin at intel.com
Fri Aug 18 00:19:55 UTC 2017
On Tue, 2017-08-15 at 10:56 -0700, Dmitry Rogozhkin wrote:
> This patch should probably be squashed with Tvrtko's PMU enabling
> patch...
>
> i915 events don't have a CPU context to work with, so i915
> PMU should work in global mode, i.e. expose perf_invalid_context.
> This will make the following call to perf:
> perf stat workload.sh
> to error out with "failed to read counter". Correct usage would be:
> perf stat -a -C0 workload.sh
> And we will get expected output:
>
> Another change required to make perf stat happy is properly support
> pmu->del(): comments in del() declaration says that del() is required
> to call stop(event, PERF_EF_UPDATE) and perf stat implicitly gets
> event count thru this.
>
> Finally, if perf stat will be ran as follows:
> perf stat -a workload.sh
> i.e. without '-C0' option, then event will be initilized as many times
> as there are CPUs. Thus, patch adds PMU refcounting support to avoid
> multiple init/close of internal things. The issue which I did not
> overcome is that in this case counters will be multiplied on number
> of CPUs. Not sure whether it is possible to have some event enabling
> CPU mask or rather I need to simply error out.
>
> Change-Id: I7d1abe747a4399196e72253f7b66441a6528dbee
> Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Peter Zijlstra <peterz at infradead.org>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 1 +
> drivers/gpu/drm/i915/i915_pmu.c | 106 ++++++++++++++++++++++------------------
> 2 files changed, 60 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index b59da2c..215d47b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2663,6 +2663,7 @@ struct drm_i915_private {
> struct {
> struct pmu base;
> spinlock_t lock;
> + unsigned int ref;
> struct hrtimer timer;
> bool timer_enabled;
> u64 enable;
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index bcdf2bc..0972203 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -451,34 +451,39 @@ static void i915_pmu_enable(struct perf_event *event)
> container_of(event->pmu, typeof(*i915), pmu.base);
> struct intel_engine_cs *engine = NULL;
> unsigned long flags;
> + bool start_timer = false;
>
> spin_lock_irqsave(&i915->pmu.lock, flags);
>
> - if (is_engine_event(event)) {
> - engine = intel_engine_lookup_user(i915,
> - engine_event_class(event),
> - engine_event_instance(event));
> - GEM_BUG_ON(!engine);
> - engine->pmu.enable |= BIT(engine_event_sample(event));
> - }
> -
> - i915->pmu.enable |= event_enabled_mask(event);
> + if (i915->pmu.ref++ == 0) {
This was a stupid me: with this I support single busy_stat event only.
Correct way to do that is to make busy_stats a refcount. I did that in
the updated patch which I just sent. I wonder whether timer staff should
be updated with the similar way, but I am not yet tried this part of the
code to judge.
> + if (is_engine_event(event)) {
> + engine = intel_engine_lookup_user(i915,
> + engine_event_class(event),
> + engine_event_instance(event));
> + GEM_BUG_ON(!engine);
> + engine->pmu.enable |= BIT(engine_event_sample(event));
> + }
>
> - if (pmu_needs_timer(i915, true) && !i915->pmu.timer_enabled) {
> - hrtimer_start_range_ns(&i915->pmu.timer,
> - ns_to_ktime(PERIOD), 0,
> - HRTIMER_MODE_REL_PINNED);
> - i915->pmu.timer_enabled = true;
> - } else if (is_engine_event(event) && engine_needs_busy_stats(engine) &&
> - !engine->pmu.busy_stats) {
> - engine->pmu.busy_stats = true;
> - if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
> - queue_work(i915->wq, &engine->pmu.enable_busy_stats);
> + i915->pmu.enable |= event_enabled_mask(event);
> +
> + if (pmu_needs_timer(i915, true) && !i915->pmu.timer_enabled) {
> + hrtimer_start_range_ns(&i915->pmu.timer,
> + ns_to_ktime(PERIOD), 0,
> + HRTIMER_MODE_REL_PINNED);
> + i915->pmu.timer_enabled = true;
> + } else if (is_engine_event(event) && engine_needs_busy_stats(engine) &&
> + !engine->pmu.busy_stats) {
> + engine->pmu.busy_stats = true;
> + if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
> + queue_work(i915->wq, &engine->pmu.enable_busy_stats);
> + }
> + start_timer = true;
> }
>
> spin_unlock_irqrestore(&i915->pmu.lock, flags);
>
> - i915_pmu_timer_start(event);
> + if (start_timer)
> + i915_pmu_timer_start(event);
> }
>
> static void i915_pmu_disable(struct perf_event *event)
> @@ -486,40 +491,45 @@ static void i915_pmu_disable(struct perf_event *event)
> struct drm_i915_private *i915 =
> container_of(event->pmu, typeof(*i915), pmu.base);
> unsigned long flags;
> + bool timer_cancel = true;
> u64 mask;
>
> spin_lock_irqsave(&i915->pmu.lock, flags);
>
> - if (is_engine_event(event)) {
> - struct intel_engine_cs *engine;
> - enum intel_engine_id id;
> -
> - engine = intel_engine_lookup_user(i915,
> - engine_event_class(event),
> - engine_event_instance(event));
> - GEM_BUG_ON(!engine);
> - engine->pmu.enable &= ~BIT(engine_event_sample(event));
> - if (engine->pmu.busy_stats &&
> - !engine_needs_busy_stats(engine)) {
> - engine->pmu.busy_stats = false;
> - queue_delayed_work(i915->wq,
> - &engine->pmu.disable_busy_stats,
> - round_jiffies_up_relative(2 * HZ));
> + if (i915->pmu.ref && --i915->pmu.ref == 0) {
> + if (is_engine_event(event)) {
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> +
> + engine = intel_engine_lookup_user(i915,
> + engine_event_class(event),
> + engine_event_instance(event));
> + GEM_BUG_ON(!engine);
> + engine->pmu.enable &= ~BIT(engine_event_sample(event));
> + if (engine->pmu.busy_stats &&
> + !engine_needs_busy_stats(engine)) {
> + engine->pmu.busy_stats = false;
> + queue_delayed_work(i915->wq,
> + &engine->pmu.disable_busy_stats,
> + round_jiffies_up_relative(2 * HZ));
> + }
> + mask = 0;
> + for_each_engine(engine, i915, id)
> + mask |= engine->pmu.enable;
> + mask = (~mask) & ENGINE_SAMPLE_MASK;
> + } else {
> + mask = event_enabled_mask(event);
> }
> - mask = 0;
> - for_each_engine(engine, i915, id)
> - mask |= engine->pmu.enable;
> - mask = (~mask) & ENGINE_SAMPLE_MASK;
> - } else {
> - mask = event_enabled_mask(event);
> - }
>
> - i915->pmu.enable &= ~mask;
> - i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
> + i915->pmu.enable &= ~mask;
> + i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
> + timer_cancel = true;
> + }
>
> spin_unlock_irqrestore(&i915->pmu.lock, flags);
>
> - i915_pmu_timer_cancel(event);
> + if (timer_cancel)
> + i915_pmu_timer_cancel(event);
> }
>
> static void i915_pmu_event_start(struct perf_event *event, int flags)
> @@ -529,6 +539,8 @@ static void i915_pmu_event_start(struct perf_event *event, int flags)
>
> static void i915_pmu_event_stop(struct perf_event *event, int flags)
> {
> + if (flags & PERF_EF_UPDATE)
> + i915_pmu_event_read(event);
> i915_pmu_disable(event);
> }
>
> @@ -546,7 +558,7 @@ static int i915_pmu_event_add(struct perf_event *event, int flags)
>
> static void i915_pmu_event_del(struct perf_event *event, int flags)
> {
> - i915_pmu_disable(event);
> + i915_pmu_event_stop(event, PERF_EF_UPDATE);
> }
>
> static int i915_pmu_event_event_idx(struct perf_event *event)
> @@ -687,7 +699,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
> return;
>
> i915->pmu.base.attr_groups = i915_pmu_attr_groups;
> - i915->pmu.base.task_ctx_nr = perf_sw_context;
> + i915->pmu.base.task_ctx_nr = perf_invalid_context;
> i915->pmu.base.event_init = i915_pmu_event_init;
> i915->pmu.base.add = i915_pmu_event_add;
> i915->pmu.base.del = i915_pmu_event_del;
More information about the Intel-gfx
mailing list