[Intel-gfx] [RFC 01/14] RFC drm/i915: Expose a PMU interface for perf queries

Ben Widawsky benjamin.widawsky at intel.com
Tue Jul 25 01:09:08 UTC 2017


On 17-07-18 15:36:05, Tvrtko Ursulin wrote:
>From: Chris Wilson <chris at chris-wilson.co.uk>
>
>The first goal is to be able to measure GPU (and invidual ring) busyness
>without having to poll registers from userspace. (Which not only incurs
>holding the forcewake lock indefinitely, perturbing the system, but also
>runs the risk of hanging the machine.) As an alternative we can use the
>perf event counter interface to sample the ring registers periodically
>and send those results to userspace.
>
>To be able to do so, we need to export the two symbols from
>kernel/events/core.c to register and unregister a PMU device.
>
>v2: Use a common timer for the ring sampling.
>
>Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>---
> drivers/gpu/drm/i915/Makefile           |   1 +
> drivers/gpu/drm/i915/i915_drv.c         |   2 +
> drivers/gpu/drm/i915/i915_drv.h         |  23 ++
> drivers/gpu/drm/i915/i915_pmu.c         | 452 ++++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/intel_ringbuffer.h |   2 +
> include/uapi/drm/i915_drm.h             |  41 +++
> kernel/events/core.c                    |   1 +
> 7 files changed, 522 insertions(+)
> create mode 100644 drivers/gpu/drm/i915/i915_pmu.c
>
>diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
>index f8227318dcaf..1c720013dc42 100644
>--- a/drivers/gpu/drm/i915/Makefile
>+++ b/drivers/gpu/drm/i915/Makefile
>@@ -26,6 +26,7 @@ i915-y := i915_drv.o \
>
> i915-$(CONFIG_COMPAT)   += i915_ioc32.o
> i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
>+i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
>
> # GEM code
> i915-y += i915_cmd_parser.o \
>diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
>index d310d8245dca..f18ce519f6a2 100644
>--- a/drivers/gpu/drm/i915/i915_drv.c
>+++ b/drivers/gpu/drm/i915/i915_drv.c
>@@ -1194,6 +1194,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
> 	struct drm_device *dev = &dev_priv->drm;
>
> 	i915_gem_shrinker_init(dev_priv);
>+	i915_pmu_register(dev_priv);
>
> 	/*
> 	 * Notify a valid surface after modesetting,
>@@ -1247,6 +1248,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
> 	intel_opregion_unregister(dev_priv);
>
> 	i915_perf_unregister(dev_priv);
>+	i915_pmu_unregister(dev_priv);
>
> 	i915_teardown_sysfs(dev_priv);
> 	i915_guc_log_unregister(dev_priv);
>diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>index 7c6fab08a2e6..de518503e033 100644
>--- a/drivers/gpu/drm/i915/i915_drv.h
>+++ b/drivers/gpu/drm/i915/i915_drv.h
>@@ -40,6 +40,7 @@
> #include <linux/hash.h>
> #include <linux/intel-iommu.h>
> #include <linux/kref.h>
>+#include <linux/perf_event.h>
> #include <linux/pm_qos.h>
> #include <linux/reservation.h>
> #include <linux/shmem_fs.h>
>@@ -2093,6 +2094,12 @@ struct intel_cdclk_state {
> 	unsigned int cdclk, vco, ref;
> };
>
>+enum {
>+	__I915_SAMPLE_FREQ_ACT = 0,
>+	__I915_SAMPLE_FREQ_REQ,
>+	__I915_NUM_PMU_SAMPLERS
>+};
>+
> struct drm_i915_private {
> 	struct drm_device drm;
>
>@@ -2591,6 +2598,13 @@ struct drm_i915_private {
> 		int	irq;
> 	} lpe_audio;
>
>+	struct {
>+		struct pmu base;
>+		struct hrtimer timer;
>+		u64 enable;
>+		u64 sample[__I915_NUM_PMU_SAMPLERS];
>+	} pmu;
>+
> 	/*
> 	 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
> 	 * will be rejected. Instead look for a better place.
>@@ -3760,6 +3774,15 @@ extern void i915_perf_fini(struct drm_i915_private *dev_priv);
> extern void i915_perf_register(struct drm_i915_private *dev_priv);
> extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
>
>+/* i915_pmu.c */
>+#ifdef CONFIG_PERF_EVENTS
>+extern void i915_pmu_register(struct drm_i915_private *i915);
>+extern void i915_pmu_unregister(struct drm_i915_private *i915);
>+#else
>+static inline void i915_pmu_register(struct drm_i915_private *i915) {}
>+static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
>+#endif
>+
> /* i915_suspend.c */
> extern int i915_save_state(struct drm_i915_private *dev_priv);
> extern int i915_restore_state(struct drm_i915_private *dev_priv);
>diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
>new file mode 100644
>index 000000000000..f03ddad44da6
>--- /dev/null
>+++ b/drivers/gpu/drm/i915/i915_pmu.c
>@@ -0,0 +1,452 @@
>+#include <linux/perf_event.h>
>+#include <linux/pm_runtime.h>
>+
>+#include "i915_drv.h"
>+#include "intel_ringbuffer.h"
>+
>+#define FREQUENCY 200
>+#define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
>+
>+#define RING_MASK 0xffffffff
>+#define RING_MAX 32
>+
>+static void engines_sample(struct drm_i915_private *dev_priv)
>+{
>+	struct intel_engine_cs *engine;
>+	enum intel_engine_id id;
>+	bool fw = false;
>+
>+	if ((dev_priv->pmu.enable & RING_MASK) == 0)
>+		return;
>+
>+	if (!dev_priv->gt.awake)
>+		return;
>+
>+	if (!intel_runtime_pm_get_if_in_use(dev_priv))
>+		return;
>+
>+	for_each_engine(engine, dev_priv, id) {
>+		u32 val;
>+
>+		if ((dev_priv->pmu.enable & (0x7 << (4*id))) == 0)
>+			continue;
>+
>+		if (i915_seqno_passed(intel_engine_get_seqno(engine),
>+				      intel_engine_last_submit(engine)))
>+			continue;
>+

This seems too clever of an optimization, why not just use MODE_IDLE to be as
accurate as possible and rely as little as possible on software tracking.

>+		if (!fw) {
>+			intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
>+			fw = true;
>+		}
>+
>+		engine->pmu_sample[I915_SAMPLE_QUEUED] += PERIOD;
>+
>+		val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
>+		if (!(val & MODE_IDLE))
>+			engine->pmu_sample[I915_SAMPLE_BUSY] += PERIOD;
>+
>+		val = I915_READ_FW(RING_CTL(engine->mmio_base));
>+		if (val & RING_WAIT)
>+			engine->pmu_sample[I915_SAMPLE_WAIT] += PERIOD;
>+		if (val & RING_WAIT_SEMAPHORE)
>+			engine->pmu_sample[I915_SAMPLE_SEMA] += PERIOD;
>+	}
>+
>+	if (fw)
>+		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>+	intel_runtime_pm_put(dev_priv);
>+}
>+
>+static void frequency_sample(struct drm_i915_private *dev_priv)
>+{
>+	if (dev_priv->pmu.enable & BIT_ULL(I915_PMU_ACTUAL_FREQUENCY)) {
>+		u64 val;
>+
>+		val = dev_priv->rps.cur_freq;
>+		if (dev_priv->gt.awake &&
>+		    intel_runtime_pm_get_if_in_use(dev_priv)) {
>+			val = I915_READ_NOTRACE(GEN6_RPSTAT1);
>+			if (INTEL_GEN(dev_priv) >= 9)
>+				val = (val & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
>+			else if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8)
>+				val = (val & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
>+			else
>+				val = (val & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
>+			intel_runtime_pm_put(dev_priv);
>+		}
>+		val = intel_gpu_freq(dev_priv, val);
>+		dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT] += val * PERIOD;
>+	}
>+
>+	if (dev_priv->pmu.enable & BIT_ULL(I915_PMU_REQUESTED_FREQUENCY)) {
>+		u64 val = intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq);
>+		dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ] += val * PERIOD;
>+	}
>+}
>+
>+static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
>+{
>+	struct drm_i915_private *i915 =
>+		container_of(hrtimer, struct drm_i915_private, pmu.timer);
>+
>+	if (i915->pmu.enable == 0)
>+		return HRTIMER_NORESTART;
>+
>+	engines_sample(i915);
>+	frequency_sample(i915);
>+
>+	hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
>+	return HRTIMER_RESTART;
>+}
>+
>+static void i915_pmu_event_destroy(struct perf_event *event)
>+{
>+	WARN_ON(event->parent);
>+}
>+
>+static int engine_event_init(struct perf_event *event)
>+{
>+	struct drm_i915_private *i915 =
>+		container_of(event->pmu, typeof(*i915), pmu.base);
>+	int engine = event->attr.config >> 2;
>+	int sample = event->attr.config & 3;
>+
>+	switch (sample) {
>+	case I915_SAMPLE_QUEUED:
>+	case I915_SAMPLE_BUSY:
>+	case I915_SAMPLE_WAIT:
>+		break;
>+	case I915_SAMPLE_SEMA:
>+		if (INTEL_GEN(i915) < 6)
>+			return -ENODEV;
>+		break;
>+	default:
>+		return -ENOENT;
>+	}
>+
>+	if (engine >= I915_NUM_ENGINES)
>+		return -ENOENT;
>+
>+	if (!i915->engine[engine])
>+		return -ENODEV;
>+
>+	return 0;
>+}
>+
>+static enum hrtimer_restart hrtimer_sample(struct hrtimer *hrtimer)
>+{
>+	struct perf_sample_data data;
>+	struct perf_event *event;
>+	u64 period;
>+
>+	event = container_of(hrtimer, struct perf_event, hw.hrtimer);
>+	if (event->state != PERF_EVENT_STATE_ACTIVE)
>+		return HRTIMER_NORESTART;
>+
>+	event->pmu->read(event);
>+
>+	perf_sample_data_init(&data, 0, event->hw.last_period);
>+	perf_event_overflow(event, &data, NULL);
>+
>+	period = max_t(u64, 10000, event->hw.sample_period);
>+	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
>+	return HRTIMER_RESTART;
>+}
>+
>+static void init_hrtimer(struct perf_event *event)
>+{
>+	struct hw_perf_event *hwc = &event->hw;
>+
>+	if (!is_sampling_event(event))
>+		return;
>+
>+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>+	hwc->hrtimer.function = hrtimer_sample;
>+
>+	if (event->attr.freq) {
>+		long freq = event->attr.sample_freq;
>+
>+		event->attr.sample_period = NSEC_PER_SEC / freq;
>+		hwc->sample_period = event->attr.sample_period;
>+		local64_set(&hwc->period_left, hwc->sample_period);
>+		hwc->last_period = hwc->sample_period;
>+		event->attr.freq = 0;
>+	}
>+}
>+
>+static int i915_pmu_event_init(struct perf_event *event)
>+{
>+	struct drm_i915_private *i915 =
>+		container_of(event->pmu, typeof(*i915), pmu.base);
>+	int ret;
>+
>+	/* XXX ideally only want pid == -1 && cpu == -1 */
>+
>+	if (event->attr.type != event->pmu->type)
>+		return -ENOENT;
>+
>+	if (has_branch_stack(event))
>+		return -EOPNOTSUPP;
>+
>+	ret = 0;
>+	if (event->attr.config < RING_MAX) {
>+		ret = engine_event_init(event);
>+	} else switch (event->attr.config) {
>+	case I915_PMU_ACTUAL_FREQUENCY:
>+		if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
>+			ret = -ENODEV; /* requires a mutex for sampling! */
>+	case I915_PMU_REQUESTED_FREQUENCY:
>+	case I915_PMU_ENERGY:
>+	case I915_PMU_RC6_RESIDENCY:
>+	case I915_PMU_RC6p_RESIDENCY:
>+	case I915_PMU_RC6pp_RESIDENCY:
>+		if (INTEL_GEN(i915) < 6)
>+			ret = -ENODEV;
>+		break;
>+	}
>+	if (ret)
>+		return ret;
>+
>+	if (!event->parent)
>+		event->destroy = i915_pmu_event_destroy;
>+
>+	init_hrtimer(event);
>+
>+	return 0;
>+}
>+
>+static void i915_pmu_timer_start(struct perf_event *event)
>+{
>+	struct hw_perf_event *hwc = &event->hw;
>+	s64 period;
>+
>+	if (!is_sampling_event(event))
>+		return;
>+
>+	period = local64_read(&hwc->period_left);
>+	if (period) {
>+		if (period < 0)
>+			period = 10000;
>+
>+		local64_set(&hwc->period_left, 0);
>+	} else {
>+		period = max_t(u64, 10000, hwc->sample_period);
>+	}
>+
>+	hrtimer_start_range_ns(&hwc->hrtimer,
>+			       ns_to_ktime(period), 0,
>+			       HRTIMER_MODE_REL_PINNED);
>+}
>+
>+static void i915_pmu_timer_cancel(struct perf_event *event)
>+{
>+	struct hw_perf_event *hwc = &event->hw;
>+
>+	if (!is_sampling_event(event))
>+		return;
>+
>+	local64_set(&hwc->period_left,
>+		    ktime_to_ns(hrtimer_get_remaining(&hwc->hrtimer)));
>+	hrtimer_cancel(&hwc->hrtimer);
>+}
>+
>+static void i915_pmu_enable(struct perf_event *event)
>+{
>+	struct drm_i915_private *i915 =
>+		container_of(event->pmu, typeof(*i915), pmu.base);
>+
>+	if (i915->pmu.enable == 0)
>+		hrtimer_start_range_ns(&i915->pmu.timer,
>+				       ns_to_ktime(PERIOD), 0,
>+				       HRTIMER_MODE_REL_PINNED);
>+
>+	i915->pmu.enable |= BIT_ULL(event->attr.config);
>+
>+	i915_pmu_timer_start(event);
>+}
>+
>+static void i915_pmu_disable(struct perf_event *event)
>+{
>+	struct drm_i915_private *i915 =
>+		container_of(event->pmu, typeof(*i915), pmu.base);
>+
>+	i915->pmu.enable &= ~BIT_ULL(event->attr.config);
>+	i915_pmu_timer_cancel(event);
>+}
>+
>+static int i915_pmu_event_add(struct perf_event *event, int flags)
>+{
>+	struct hw_perf_event *hwc = &event->hw;
>+
>+	if (flags & PERF_EF_START)
>+		i915_pmu_enable(event);
>+
>+	hwc->state = !(flags & PERF_EF_START);
>+
>+	return 0;
>+}
>+
>+static void i915_pmu_event_del(struct perf_event *event, int flags)
>+{
>+	i915_pmu_disable(event);
>+}
>+
>+static void i915_pmu_event_start(struct perf_event *event, int flags)
>+{
>+	i915_pmu_enable(event);
>+}
>+
>+static void i915_pmu_event_stop(struct perf_event *event, int flags)
>+{
>+	i915_pmu_disable(event);
>+}
>+
>+static u64 read_energy_uJ(struct drm_i915_private *dev_priv)
>+{
>+	u64 power;
>+
>+	GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
>+
>+	intel_runtime_pm_get(dev_priv);
>+
>+	rdmsrl(MSR_RAPL_POWER_UNIT, power);
>+	power = (power & 0x1f00) >> 8;
>+	power = 1000000 >> power; /* convert to uJ */
>+	power *= I915_READ_NOTRACE(MCH_SECP_NRG_STTS);
>+
>+	intel_runtime_pm_put(dev_priv);
>+
>+	return power;
>+}
>+
>+static inline u64 calc_residency(struct drm_i915_private *dev_priv,
>+				 const i915_reg_t reg)
>+{
>+	u64 val, units = 128, div = 100000;
>+
>+	GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
>+
>+	intel_runtime_pm_get(dev_priv);
>+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
>+		div = dev_priv->czclk_freq;
>+		units = 1;
>+		if (I915_READ_NOTRACE(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
>+			units <<= 8;
>+	} else if (IS_GEN9_LP(dev_priv)) {
>+		div = 1200;
>+		units = 1;
>+	}
>+	val = I915_READ_NOTRACE(reg);
>+	intel_runtime_pm_put(dev_priv);
>+
>+	val *= units;
>+	return DIV_ROUND_UP_ULL(val, div);
>+}
>+
>+static u64 count_interrupts(struct drm_i915_private *i915)
>+{
>+	/* open-coded kstat_irqs() */
>+	struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
>+	u64 sum = 0;
>+	int cpu;
>+
>+	if (!desc || !desc->kstat_irqs)
>+		return 0;
>+
>+	for_each_possible_cpu(cpu)
>+		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
>+
>+	return sum;
>+}
>+
>+static void i915_pmu_event_read(struct perf_event *event)
>+{
>+	struct drm_i915_private *i915 =
>+		container_of(event->pmu, typeof(*i915), pmu.base);
>+	u64 val = 0;
>+
>+	if (event->attr.config < 32) {
>+		int engine = event->attr.config >> 2;
>+		int sample = event->attr.config & 3;
>+		val = i915->engine[engine]->pmu_sample[sample];
>+	} else switch (event->attr.config) {
>+	case I915_PMU_ACTUAL_FREQUENCY:
>+		val = i915->pmu.sample[__I915_SAMPLE_FREQ_ACT];
>+		break;
>+	case I915_PMU_REQUESTED_FREQUENCY:
>+		val = i915->pmu.sample[__I915_SAMPLE_FREQ_REQ];
>+		break;
>+	case I915_PMU_ENERGY:
>+		val = read_energy_uJ(i915);
>+		break;
>+	case I915_PMU_INTERRUPTS:
>+		val = count_interrupts(i915);
>+		break;
>+
>+	case I915_PMU_RC6_RESIDENCY:
>+		if (!i915->gt.awake)
>+			return;
>+
>+		val = calc_residency(i915, IS_VALLEYVIEW(i915) ? VLV_GT_RENDER_RC6 : GEN6_GT_GFX_RC6);
>+		break;
>+
>+	case I915_PMU_RC6p_RESIDENCY:
>+		if (!i915->gt.awake)
>+			return;
>+
>+		if (!IS_VALLEYVIEW(i915))
>+			val = calc_residency(i915, GEN6_GT_GFX_RC6p);
>+		break;
>+
>+	case I915_PMU_RC6pp_RESIDENCY:
>+		if (!i915->gt.awake)
>+			return;
>+
>+		if (!IS_VALLEYVIEW(i915))
>+			val = calc_residency(i915, GEN6_GT_GFX_RC6pp);
>+		break;
>+	}
>+
>+	local64_set(&event->count, val);
>+}
>+
>+static int i915_pmu_event_event_idx(struct perf_event *event)
>+{
>+	return 0;
>+}
>+
>+void i915_pmu_register(struct drm_i915_private *i915)
>+{
>+	if (INTEL_GEN(i915) <= 2)
>+		return;
>+
>+	i915->pmu.base.task_ctx_nr	= perf_sw_context;
>+	i915->pmu.base.event_init	= i915_pmu_event_init;
>+	i915->pmu.base.add		= i915_pmu_event_add;
>+	i915->pmu.base.del		= i915_pmu_event_del;
>+	i915->pmu.base.start		= i915_pmu_event_start;
>+	i915->pmu.base.stop		= i915_pmu_event_stop;
>+	i915->pmu.base.read		= i915_pmu_event_read;
>+	i915->pmu.base.event_idx	= i915_pmu_event_event_idx;
>+
>+	hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>+	i915->pmu.timer.function = i915_sample;
>+	i915->pmu.enable = 0;
>+
>+	if (perf_pmu_register(&i915->pmu.base, "i915", -1))
>+		i915->pmu.base.event_init = NULL;
>+}
>+
>+void i915_pmu_unregister(struct drm_i915_private *i915)
>+{
>+	if (!i915->pmu.base.event_init)
>+		return;
>+
>+	i915->pmu.enable = 0;
>+
>+	perf_pmu_unregister(&i915->pmu.base);
>+	i915->pmu.base.event_init = NULL;
>+
>+	hrtimer_cancel(&i915->pmu.timer);
>+}
>diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
>index d33c93444c0d..0877b151239d 100644
>--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>@@ -245,6 +245,8 @@ struct intel_engine_cs {
> 		I915_SELFTEST_DECLARE(bool mock : 1);
> 	} breadcrumbs;
>
>+	u64 pmu_sample[4];
>+
> 	/*
> 	 * A pool of objects to use as shadow copies of client batch buffers
> 	 * when the command parser is enabled. Prevents the client from
>diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>index 7ccbd6a2bbe0..733774f19a0b 100644
>--- a/include/uapi/drm/i915_drm.h
>+++ b/include/uapi/drm/i915_drm.h
>@@ -86,6 +86,47 @@ enum i915_mocs_table_index {
> 	I915_MOCS_CACHED,
> };
>
>+/**
>+ * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
>+ *
>+ */
>+#define I915_SAMPLE_QUEUED	0
>+#define I915_SAMPLE_BUSY	1
>+#define I915_SAMPLE_WAIT	2
>+#define I915_SAMPLE_SEMA	3
>+
>+#define I915_SAMPLE_RCS		0
>+#define I915_SAMPLE_VCS		1
>+#define I915_SAMPLE_BCS		2
>+#define I915_SAMPLE_VECS	3
>+
>+#define __I915_PMU_COUNT(ring, id) ((ring) << 2 | (id))
>+
>+#define I915_PMU_COUNT_RCS_BUSY __I915_PMU_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_BUSY)
>+#define I915_PMU_COUNT_RCS_WAIT __I915_PMU_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_WAIT)
>+#define I915_PMU_COUNT_RCS_SEMA __I915_PMU_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_SEMA)
>+
>+#define I915_PMU_COUNT_VCS_BUSY __I915_PMU_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_BUSY)
>+#define I915_PMU_COUNT_VCS_WAIT __I915_PMU_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_WAIT)
>+#define I915_PMU_COUNT_VCS_SEMA __I915_PMU_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_SEMA)
>+
>+#define I915_PMU_COUNT_BCS_BUSY __I915_PMU_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_BUSY)
>+#define I915_PMU_COUNT_BCS_WAIT __I915_PMU_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_WAIT)
>+#define I915_PMU_COUNT_BCS_SEMA __I915_PMU_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_SEMA)
>+
>+#define I915_PMU_COUNT_VECS_BUSY __I915_PMU_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_BUSY)
>+#define I915_PMU_COUNT_VECS_WAIT __I915_PMU_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_WAIT)
>+#define I915_PMU_COUNT_VECS_SEMA __I915_PMU_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_SEMA)
>+
>+#define I915_PMU_ACTUAL_FREQUENCY 32
>+#define I915_PMU_REQUESTED_FREQUENCY 33

This one seems less than useful.

>+#define I915_PMU_ENERGY 34
>+#define I915_PMU_INTERRUPTS 35
>+
>+#define I915_PMU_RC6_RESIDENCY		40
>+#define I915_PMU_RC6p_RESIDENCY	41
>+#define I915_PMU_RC6pp_RESIDENCY	42
>+
> /* Each region is a minimum of 16k, and there are at most 255 of them.
>  */
> #define I915_NR_TEX_REGIONS 255	/* table size 2k - maximum due to use
>diff --git a/kernel/events/core.c b/kernel/events/core.c
>index e46eba8cd1b7..7b8c6dce1078 100644
>--- a/kernel/events/core.c
>+++ b/kernel/events/core.c
>@@ -7386,6 +7386,7 @@ int perf_event_overflow(struct perf_event *event,
> {
> 	return __perf_event_overflow(event, 1, data, regs);
> }
>+EXPORT_SYMBOL_GPL(perf_event_overflow);
>
> /*
>  * Generic software event infrastructure
>-- 
>2.9.4
>

-- 
Ben Widawsky, Intel Open Source Technology Center


More information about the Intel-gfx mailing list