[Intel-gfx] [RFC 04/10] drm/i915: Expose a PMU interface for perf queries

Tvrtko Ursulin tursulin at ursulin.net
Wed Aug 2 12:32:44 UTC 2017


From: Chris Wilson <chris at chris-wilson.co.uk>

The first goal is to be able to measure GPU (and invidual ring) busyness
without having to poll registers from userspace. (Which not only incurs
holding the forcewake lock indefinitely, perturbing the system, but also
runs the risk of hanging the machine.) As an alternative we can use the
perf event counter interface to sample the ring registers periodically
and send those results to userspace.

To be able to do so, we need to export the two symbols from
kernel/events/core.c to register and unregister a PMU device.

v2: Use a common timer for the ring sampling.

v3:
 * Decouple uAPI from i915 engine ids.
 * Complete uAPI defines.
 * Refactor some code to helpers for clarity.
 * Skip sampling disabled engines.
 * Expose counters in sysfs.
 * Pass in fake regs to avoid null ptr deref in perf core.
 * Convert to class/instance uAPI.
 * Use shared driver code for rc6 residency, power and frequency.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/Makefile           |   1 +
 drivers/gpu/drm/i915/i915_drv.c         |   2 +
 drivers/gpu/drm/i915/i915_drv.h         |  25 ++
 drivers/gpu/drm/i915/i915_pmu.c         | 629 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_reg.h         |   3 +
 drivers/gpu/drm/i915/intel_engine_cs.c  |  10 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |  25 ++
 drivers/gpu/drm/i915/intel_ringbuffer.h |   8 +
 include/uapi/drm/i915_drm.h             |  55 +++
 kernel/events/core.c                    |   1 +
 10 files changed, 759 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_pmu.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index f8227318dcaf..1c720013dc42 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -26,6 +26,7 @@ i915-y := i915_drv.o \
 
 i915-$(CONFIG_COMPAT)   += i915_ioc32.o
 i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
+i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
 # GEM code
 i915-y += i915_cmd_parser.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 214555e813f1..d75c2d790875 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1194,6 +1194,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
 	struct drm_device *dev = &dev_priv->drm;
 
 	i915_gem_shrinker_init(dev_priv);
+	i915_pmu_register(dev_priv);
 
 	/*
 	 * Notify a valid surface after modesetting,
@@ -1248,6 +1249,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
 	intel_opregion_unregister(dev_priv);
 
 	i915_perf_unregister(dev_priv);
+	i915_pmu_unregister(dev_priv);
 
 	i915_teardown_sysfs(dev_priv);
 	i915_guc_log_unregister(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0c8bd1cdcbbd..142826742b86 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@
 #include <linux/hash.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
+#include <linux/perf_event.h>
 #include <linux/pm_qos.h>
 #include <linux/reservation.h>
 #include <linux/shmem_fs.h>
@@ -2111,6 +2112,12 @@ struct intel_cdclk_state {
 	unsigned int cdclk, vco, ref;
 };
 
+enum {
+	__I915_SAMPLE_FREQ_ACT = 0,
+	__I915_SAMPLE_FREQ_REQ,
+	__I915_NUM_PMU_SAMPLERS
+};
+
 struct drm_i915_private {
 	struct drm_device drm;
 
@@ -2158,6 +2165,7 @@ struct drm_i915_private {
 	struct pci_dev *bridge_dev;
 	struct i915_gem_context *kernel_context;
 	struct intel_engine_cs *engine[I915_NUM_ENGINES];
+	struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1][MAX_ENGINE_INSTANCE + 1];
 	struct i915_vma *semaphore;
 
 	struct drm_dma_handle *status_page_dmah;
@@ -2605,6 +2613,14 @@ struct drm_i915_private {
 		int	irq;
 	} lpe_audio;
 
+	struct {
+		struct pmu base;
+		spinlock_t lock;
+		struct hrtimer timer;
+		u64 enable;
+		u64 sample[__I915_NUM_PMU_SAMPLERS];
+	} pmu;
+
 	/*
 	 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
 	 * will be rejected. Instead look for a better place.
@@ -3813,6 +3829,15 @@ extern void i915_perf_fini(struct drm_i915_private *dev_priv);
 extern void i915_perf_register(struct drm_i915_private *dev_priv);
 extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
 
+/* i915_pmu.c */
+#ifdef CONFIG_PERF_EVENTS
+extern void i915_pmu_register(struct drm_i915_private *i915);
+extern void i915_pmu_unregister(struct drm_i915_private *i915);
+#else
+static inline void i915_pmu_register(struct drm_i915_private *i915) {}
+static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
+#endif
+
 /* i915_suspend.c */
 extern int i915_save_state(struct drm_i915_private *dev_priv);
 extern int i915_restore_state(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
new file mode 100644
index 000000000000..62c527c12641
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -0,0 +1,629 @@
+#include <linux/perf_event.h>
+#include <linux/pm_runtime.h>
+
+#include "i915_drv.h"
+#include "intel_ringbuffer.h"
+
+#define FREQUENCY 200
+#define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
+
+#define ENGINE_SAMPLE_MASK \
+	(BIT(I915_SAMPLE_QUEUED) | \
+	 BIT(I915_SAMPLE_BUSY) | \
+	 BIT(I915_SAMPLE_WAIT) | \
+	 BIT(I915_SAMPLE_SEMA))
+
+#define ENGINE_SAMPLE_BITS (16)
+
+static u8 engine_config_sample(u64 config)
+{
+	return config & I915_PMU_SAMPLE_MASK;
+}
+
+static u8 engine_event_sample(struct perf_event *event)
+{
+	return engine_config_sample(event->attr.config);
+}
+
+static u8 engine_event_class(struct perf_event *event)
+{
+	return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
+}
+
+static u8 engine_event_instance(struct perf_event *event)
+{
+	return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
+}
+
+static bool is_engine_config(u64 config)
+{
+	return config < __I915_PMU_OTHER(0);
+}
+
+static u64 config_enabled_mask(u64 config)
+{
+	if (is_engine_config(config))
+		return BIT_ULL(engine_config_sample(config));
+	else
+		return BIT_ULL(config - __I915_PMU_OTHER(0)) <<
+		       ENGINE_SAMPLE_BITS;
+}
+
+static bool is_engine_event(struct perf_event *event)
+{
+	return is_engine_config(event->attr.config);
+}
+
+static u64 event_enabled_mask(struct perf_event *event)
+{
+	return config_enabled_mask(event->attr.config);
+}
+
+static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
+{
+	if (!fw)
+		intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
+
+	return true;
+}
+
+static void engines_sample(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	bool fw = false;
+
+	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
+		return;
+
+	if (!dev_priv->gt.awake)
+		return;
+
+	if (!intel_runtime_pm_get_if_in_use(dev_priv))
+		return;
+
+	for_each_engine(engine, dev_priv, id) {
+		u32 enable = engine->pmu.enable;
+
+		if (i915_seqno_passed(intel_engine_get_seqno(engine),
+				      intel_engine_last_submit(engine)))
+			continue;
+
+		if (enable & BIT(I915_SAMPLE_QUEUED))
+			engine->pmu.sample[I915_SAMPLE_QUEUED] += PERIOD;
+
+		if (enable & BIT(I915_SAMPLE_BUSY)) {
+			u32 val;
+
+			fw = grab_forcewake(dev_priv, fw);
+			val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
+			if (!(val & MODE_IDLE))
+				engine->pmu.sample[I915_SAMPLE_BUSY] += PERIOD;
+		}
+
+		if (enable & (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA))) {
+			u32 val;
+
+			fw = grab_forcewake(dev_priv, fw);
+			val = I915_READ_FW(RING_CTL(engine->mmio_base));
+			if ((enable & BIT(I915_SAMPLE_WAIT)) &&
+			    (val & RING_WAIT))
+				engine->pmu.sample[I915_SAMPLE_WAIT] += PERIOD;
+			if ((enable & BIT(I915_SAMPLE_SEMA)) &&
+			    (val & RING_WAIT_SEMAPHORE))
+				engine->pmu.sample[I915_SAMPLE_SEMA] += PERIOD;
+		}
+	}
+
+	if (fw)
+		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+	intel_runtime_pm_put(dev_priv);
+}
+
+static void frequency_sample(struct drm_i915_private *dev_priv)
+{
+	if (dev_priv->pmu.enable &
+	    config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
+		u64 val;
+
+		val = dev_priv->rps.cur_freq;
+		if (dev_priv->gt.awake &&
+		    intel_runtime_pm_get_if_in_use(dev_priv)) {
+			val = intel_get_cagf(dev_priv,
+					     I915_READ_NOTRACE(GEN6_RPSTAT1));
+			intel_runtime_pm_put(dev_priv);
+		}
+		val = intel_gpu_freq(dev_priv, val);
+		dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT] += val * PERIOD;
+	}
+
+	if (dev_priv->pmu.enable &
+	    config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
+		u64 val = intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq);
+		dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ] += val * PERIOD;
+	}
+}
+
+static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
+{
+	struct drm_i915_private *i915 =
+		container_of(hrtimer, struct drm_i915_private, pmu.timer);
+
+	if (i915->pmu.enable == 0)
+		return HRTIMER_NORESTART;
+
+	engines_sample(i915);
+	frequency_sample(i915);
+
+	hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
+	return HRTIMER_RESTART;
+}
+
+static void i915_pmu_event_destroy(struct perf_event *event)
+{
+	WARN_ON(event->parent);
+}
+
+static int engine_event_init(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+
+	if (!intel_engine_lookup_user(i915, engine_event_class(event),
+				      engine_event_instance(event)))
+		return -ENODEV;
+
+	switch (engine_event_sample(event)) {
+	case I915_SAMPLE_QUEUED:
+	case I915_SAMPLE_BUSY:
+	case I915_SAMPLE_WAIT:
+		break;
+	case I915_SAMPLE_SEMA:
+		if (INTEL_GEN(i915) < 6)
+			return -ENODEV;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static DEFINE_PER_CPU(struct pt_regs, i915_pmu_pt_regs);
+
+static enum hrtimer_restart hrtimer_sample(struct hrtimer *hrtimer)
+{
+	struct pt_regs *regs = this_cpu_ptr(&i915_pmu_pt_regs);
+	struct perf_sample_data data;
+	struct perf_event *event;
+	u64 period;
+
+	event = container_of(hrtimer, struct perf_event, hw.hrtimer);
+	if (event->state != PERF_EVENT_STATE_ACTIVE)
+		return HRTIMER_NORESTART;
+
+	event->pmu->read(event);
+
+	perf_sample_data_init(&data, 0, event->hw.last_period);
+	perf_event_overflow(event, &data, regs);
+
+	period = max_t(u64, 10000, event->hw.sample_period);
+	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
+	return HRTIMER_RESTART;
+}
+
+static void init_hrtimer(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (!is_sampling_event(event))
+		return;
+
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hwc->hrtimer.function = hrtimer_sample;
+
+	if (event->attr.freq) {
+		long freq = event->attr.sample_freq;
+
+		event->attr.sample_period = NSEC_PER_SEC / freq;
+		hwc->sample_period = event->attr.sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+		hwc->last_period = hwc->sample_period;
+		event->attr.freq = 0;
+	}
+}
+
+static int i915_pmu_event_init(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	int ret;
+
+	/* XXX ideally only want pid == -1 && cpu == -1 */
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	ret = 0;
+	if (is_engine_event(event)) {
+		ret = engine_event_init(event);
+	} else switch (event->attr.config) {
+	case I915_PMU_ACTUAL_FREQUENCY:
+		if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+			ret = -ENODEV; /* requires a mutex for sampling! */
+	case I915_PMU_REQUESTED_FREQUENCY:
+	case I915_PMU_ENERGY:
+	case I915_PMU_RC6_RESIDENCY:
+	case I915_PMU_RC6p_RESIDENCY:
+	case I915_PMU_RC6pp_RESIDENCY:
+		if (INTEL_GEN(i915) < 6)
+			ret = -ENODEV;
+		break;
+	}
+	if (ret)
+		return ret;
+
+	if (!event->parent)
+		event->destroy = i915_pmu_event_destroy;
+
+	init_hrtimer(event);
+
+	return 0;
+}
+
+static void i915_pmu_timer_start(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	s64 period;
+
+	if (!is_sampling_event(event))
+		return;
+
+	period = local64_read(&hwc->period_left);
+	if (period) {
+		if (period < 0)
+			period = 10000;
+
+		local64_set(&hwc->period_left, 0);
+	} else {
+		period = max_t(u64, 10000, hwc->sample_period);
+	}
+
+	hrtimer_start_range_ns(&hwc->hrtimer,
+			       ns_to_ktime(period), 0,
+			       HRTIMER_MODE_REL_PINNED);
+}
+
+static void i915_pmu_timer_cancel(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (!is_sampling_event(event))
+		return;
+
+	local64_set(&hwc->period_left,
+		    ktime_to_ns(hrtimer_get_remaining(&hwc->hrtimer)));
+	hrtimer_cancel(&hwc->hrtimer);
+}
+
+static void i915_pmu_enable(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	unsigned long flags;
+
+	spin_lock_irqsave(&i915->pmu.lock, flags);
+
+	if (i915->pmu.enable == 0)
+		hrtimer_start_range_ns(&i915->pmu.timer,
+				       ns_to_ktime(PERIOD), 0,
+				       HRTIMER_MODE_REL_PINNED);
+
+	i915->pmu.enable |= event_enabled_mask(event);
+
+	if (is_engine_event(event)) {
+		struct intel_engine_cs *engine;
+
+		engine = intel_engine_lookup_user(i915,
+						  engine_event_class(event),
+						  engine_event_instance(event));
+		GEM_BUG_ON(!engine);
+		engine->pmu.enable |= BIT(engine_event_sample(event));
+	}
+
+	spin_unlock_irqrestore(&i915->pmu.lock, flags);
+
+	i915_pmu_timer_start(event);
+}
+
+static void i915_pmu_disable(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	unsigned long flags;
+	u64 mask;
+
+	spin_lock_irqsave(&i915->pmu.lock, flags);
+
+	if (is_engine_event(event)) {
+		struct intel_engine_cs *engine;
+		enum intel_engine_id id;
+
+		engine = intel_engine_lookup_user(i915,
+						  engine_event_class(event),
+						  engine_event_instance(event));
+		GEM_BUG_ON(!engine);
+		engine->pmu.enable &= ~BIT(engine_event_sample(event));
+		mask = 0;
+		for_each_engine(engine, i915, id)
+			mask |= engine->pmu.enable;
+		mask = ~mask;
+	} else {
+		mask = event_enabled_mask(event);
+	}
+
+	i915->pmu.enable &= ~mask;
+
+	spin_unlock_irqrestore(&i915->pmu.lock, flags);
+
+	i915_pmu_timer_cancel(event);
+}
+
+static int i915_pmu_event_add(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (flags & PERF_EF_START)
+		i915_pmu_enable(event);
+
+	hwc->state = !(flags & PERF_EF_START);
+
+	return 0;
+}
+
+static void i915_pmu_event_del(struct perf_event *event, int flags)
+{
+	i915_pmu_disable(event);
+}
+
+static void i915_pmu_event_start(struct perf_event *event, int flags)
+{
+	i915_pmu_enable(event);
+}
+
+static void i915_pmu_event_stop(struct perf_event *event, int flags)
+{
+	i915_pmu_disable(event);
+}
+
+static u64 count_interrupts(struct drm_i915_private *i915)
+{
+	/* open-coded kstat_irqs() */
+	struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
+	u64 sum = 0;
+	int cpu;
+
+	if (!desc || !desc->kstat_irqs)
+		return 0;
+
+	for_each_possible_cpu(cpu)
+		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
+
+	return sum;
+}
+
+static void i915_pmu_event_read(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	u64 val = 0;
+
+	if (is_engine_event(event)) {
+		u8 sample = engine_event_sample(event);
+		struct intel_engine_cs *engine;
+
+		engine = intel_engine_lookup_user(i915,
+						  engine_event_class(event),
+						  engine_event_instance(event));
+
+		if (WARN_ON_ONCE(!engine)) {
+			/* Do nothing */
+		} else {
+			val = engine->pmu.sample[sample];
+		}
+	} else switch (event->attr.config) {
+	case I915_PMU_ACTUAL_FREQUENCY:
+		val = i915->pmu.sample[__I915_SAMPLE_FREQ_ACT];
+		break;
+	case I915_PMU_REQUESTED_FREQUENCY:
+		val = i915->pmu.sample[__I915_SAMPLE_FREQ_REQ];
+		break;
+	case I915_PMU_ENERGY:
+		val = intel_energy_uJ(i915);
+		break;
+	case I915_PMU_INTERRUPTS:
+		val = count_interrupts(i915);
+		break;
+
+	case I915_PMU_RC6_RESIDENCY:
+		if (!i915->gt.awake)
+			return;
+
+		val = intel_rc6_residency_ns(i915,
+					     IS_VALLEYVIEW(i915) ?
+					     VLV_GT_RENDER_RC6 :
+					     GEN6_GT_GFX_RC6);
+		break;
+
+	case I915_PMU_RC6p_RESIDENCY:
+		if (!i915->gt.awake)
+			return;
+
+		if (!IS_VALLEYVIEW(i915))
+			val = intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+		break;
+
+	case I915_PMU_RC6pp_RESIDENCY:
+		if (!i915->gt.awake)
+			return;
+
+		if (!IS_VALLEYVIEW(i915))
+			val = intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+		break;
+	}
+
+	local64_set(&event->count, val);
+}
+
+static int i915_pmu_event_event_idx(struct perf_event *event)
+{
+	return 0;
+}
+
+static ssize_t i915_pmu_format_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+        struct dev_ext_attribute *eattr;
+
+        eattr = container_of(attr, struct dev_ext_attribute, attr);
+        return sprintf(buf, "%s\n", (char *) eattr->var);
+}
+
+#define I915_PMU_FORMAT_ATTR(_name, _config)           \
+        (&((struct dev_ext_attribute[]) {               \
+                { .attr = __ATTR(_name, S_IRUGO, i915_pmu_format_show, NULL), \
+                  .var = (void *) _config, }            \
+        })[0].attr.attr)
+
+static struct attribute *i915_pmu_format_attrs[] = {
+        I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-42"),
+        NULL,
+};
+
+static const struct attribute_group i915_pmu_format_attr_group = {
+        .name = "format",
+        .attrs = i915_pmu_format_attrs,
+};
+
+static ssize_t i915_pmu_event_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+        struct dev_ext_attribute *eattr;
+
+        eattr = container_of(attr, struct dev_ext_attribute, attr);
+        return sprintf(buf, "config=0x%lx\n", (unsigned long) eattr->var);
+}
+
+#define I915_PMU_EVENT_ATTR(_name, _config)            \
+        (&((struct dev_ext_attribute[]) {               \
+                { .attr = __ATTR(_name, S_IRUGO, i915_pmu_event_show, NULL), \
+                  .var = (void *) _config, }            \
+         })[0].attr.attr)
+
+static struct attribute *i915_pmu_events_attrs[] = {
+	I915_PMU_EVENT_ATTR(rcs0-queued,
+			    I915_PMU_ENGINE_QUEUED(I915_ENGINE_CLASS_RENDER, 0)),
+	I915_PMU_EVENT_ATTR(rcs0-busy,
+			    I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0)),
+	I915_PMU_EVENT_ATTR(rcs0-wait,
+			    I915_PMU_ENGINE_WAIT(I915_ENGINE_CLASS_RENDER, 0)),
+	I915_PMU_EVENT_ATTR(rcs0-sema,
+			    I915_PMU_ENGINE_SEMA(I915_ENGINE_CLASS_RENDER, 0)),
+
+	I915_PMU_EVENT_ATTR(bcs0-queued,
+			    I915_PMU_ENGINE_QUEUED(I915_ENGINE_CLASS_COPY, 0)),
+	I915_PMU_EVENT_ATTR(bcs0-busy,
+			    I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0)),
+	I915_PMU_EVENT_ATTR(bcs0-wait,
+			    I915_PMU_ENGINE_WAIT(I915_ENGINE_CLASS_COPY, 0)),
+	I915_PMU_EVENT_ATTR(bcs0-sema,
+			    I915_PMU_ENGINE_SEMA(I915_ENGINE_CLASS_COPY, 0)),
+
+	I915_PMU_EVENT_ATTR(vcs0-queued,
+			    I915_PMU_ENGINE_QUEUED(I915_ENGINE_CLASS_VIDEO, 0)),
+	I915_PMU_EVENT_ATTR(vcs0-busy,
+			    I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0)),
+	I915_PMU_EVENT_ATTR(vcs0-wait,
+			    I915_PMU_ENGINE_WAIT(I915_ENGINE_CLASS_VIDEO, 0)),
+	I915_PMU_EVENT_ATTR(vcs0-sema,
+			    I915_PMU_ENGINE_SEMA(I915_ENGINE_CLASS_VIDEO, 0)),
+
+	I915_PMU_EVENT_ATTR(vcs1-queued,
+			    I915_PMU_ENGINE_QUEUED(I915_ENGINE_CLASS_VIDEO, 1)),
+	I915_PMU_EVENT_ATTR(vcs1-busy,
+			    I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1)),
+	I915_PMU_EVENT_ATTR(vcs1-wait,
+			    I915_PMU_ENGINE_WAIT(I915_ENGINE_CLASS_VIDEO, 1)),
+	I915_PMU_EVENT_ATTR(vcs1-sema,
+			    I915_PMU_ENGINE_SEMA(I915_ENGINE_CLASS_VIDEO, 1)),
+
+	I915_PMU_EVENT_ATTR(vecs0-queued,
+			    I915_PMU_ENGINE_QUEUED(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0)),
+	I915_PMU_EVENT_ATTR(vecs0-busy,
+			    I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0)),
+	I915_PMU_EVENT_ATTR(vecs0-wait,
+			    I915_PMU_ENGINE_WAIT(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0)),
+	I915_PMU_EVENT_ATTR(vecs0-sema,
+			    I915_PMU_ENGINE_SEMA(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0)),
+
+        I915_PMU_EVENT_ATTR(actual-frequency,	 I915_PMU_ACTUAL_FREQUENCY),
+        I915_PMU_EVENT_ATTR(requested-frequency, I915_PMU_REQUESTED_FREQUENCY),
+        I915_PMU_EVENT_ATTR(energy,		 I915_PMU_ENERGY),
+        I915_PMU_EVENT_ATTR(interrupts,		 I915_PMU_INTERRUPTS),
+        I915_PMU_EVENT_ATTR(rc6-residency,	 I915_PMU_RC6_RESIDENCY),
+        I915_PMU_EVENT_ATTR(rc6p-residency,	 I915_PMU_RC6p_RESIDENCY),
+        I915_PMU_EVENT_ATTR(rc6pp-residency,	 I915_PMU_RC6pp_RESIDENCY),
+
+        NULL,
+};
+
+static const struct attribute_group i915_pmu_events_attr_group = {
+        .name = "events",
+        .attrs = i915_pmu_events_attrs,
+};
+
+static const struct attribute_group *i915_pmu_attr_groups[] = {
+        &i915_pmu_format_attr_group,
+        &i915_pmu_events_attr_group,
+        NULL
+};
+
+void i915_pmu_register(struct drm_i915_private *i915)
+{
+	if (INTEL_GEN(i915) <= 2)
+		return;
+
+	i915->pmu.base.attr_groups	= i915_pmu_attr_groups;
+	i915->pmu.base.task_ctx_nr	= perf_sw_context;
+	i915->pmu.base.event_init	= i915_pmu_event_init;
+	i915->pmu.base.add		= i915_pmu_event_add;
+	i915->pmu.base.del		= i915_pmu_event_del;
+	i915->pmu.base.start		= i915_pmu_event_start;
+	i915->pmu.base.stop		= i915_pmu_event_stop;
+	i915->pmu.base.read		= i915_pmu_event_read;
+	i915->pmu.base.event_idx	= i915_pmu_event_event_idx;
+
+	spin_lock_init(&i915->pmu.lock);
+	hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	i915->pmu.timer.function = i915_sample;
+	i915->pmu.enable = 0;
+
+	if (perf_pmu_register(&i915->pmu.base, "i915", -1))
+		i915->pmu.base.event_init = NULL;
+}
+
+void i915_pmu_unregister(struct drm_i915_private *i915)
+{
+	if (!i915->pmu.base.event_init)
+		return;
+
+	i915->pmu.enable = 0;
+
+	perf_pmu_unregister(&i915->pmu.base);
+	i915->pmu.base.event_init = NULL;
+
+	hrtimer_cancel(&i915->pmu.timer);
+}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 1dc7e7a2a23b..26bce766ab51 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -95,6 +95,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define VIDEO_ENHANCEMENT_CLASS	2
 #define COPY_ENGINE_CLASS	3
 #define OTHER_CLASS		4
+#define MAX_ENGINE_CLASS	4
+
+#define MAX_ENGINE_INSTANCE    1
 
 /* PCI config space */
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 9ab596941372..14630612325b 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -198,6 +198,15 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 	GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
 	class_info = &intel_engine_classes[info->class];
 
+	if (GEM_WARN_ON(info->class > MAX_ENGINE_CLASS))
+		return -EINVAL;
+
+	if (GEM_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
+		return -EINVAL;
+
+	if (GEM_WARN_ON(dev_priv->engine_class[info->class][info->instance]))
+		return -EINVAL;
+
 	GEM_BUG_ON(dev_priv->engine[id]);
 	engine = kzalloc(sizeof(*engine), GFP_KERNEL);
 	if (!engine)
@@ -225,6 +234,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 
 	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
 
+	dev_priv->engine_class[info->class][info->instance] = engine;
 	dev_priv->engine[id] = engine;
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index cdf084ef5aae..af8d85794c44 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2282,3 +2282,28 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
 
 	return intel_init_ring_buffer(engine);
 }
+
+static u8 user_class_map[I915_ENGINE_CLASS_MAX] = {
+	[I915_ENGINE_CLASS_OTHER] = OTHER_CLASS,
+	[I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
+	[I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS,
+	[I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS,
+	[I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS,
+};
+
+struct intel_engine_cs *
+intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
+{
+	if (class >= ARRAY_SIZE(user_class_map))
+		return NULL;
+
+	class = user_class_map[class];
+
+	if (WARN_ON_ONCE(class > MAX_ENGINE_CLASS))
+		return NULL;
+
+	if (instance > MAX_ENGINE_INSTANCE)
+		return NULL;
+
+	return i915->engine_class[class][instance];
+}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index d33c93444c0d..9fdf0cdf6220 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -245,6 +245,11 @@ struct intel_engine_cs {
 		I915_SELFTEST_DECLARE(bool mock : 1);
 	} breadcrumbs;
 
+	struct {
+		u32 enable;
+		u64 sample[4];
+	} pmu;
+
 	/*
 	 * A pool of objects to use as shadow copies of client batch buffers
 	 * when the command parser is enabled. Prevents the client from
@@ -735,4 +740,7 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 void intel_engines_mark_idle(struct drm_i915_private *i915);
 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
 
+struct intel_engine_cs *
+intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
+
 #endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7ccbd6a2bbe0..103874476a6d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -86,6 +86,61 @@ enum i915_mocs_table_index {
 	I915_MOCS_CACHED,
 };
 
+enum drm_i915_gem_engine_class {
+	I915_ENGINE_CLASS_OTHER = 0,
+	I915_ENGINE_CLASS_RENDER = 1,
+	I915_ENGINE_CLASS_COPY = 2,
+	I915_ENGINE_CLASS_VIDEO = 3,
+	I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
+	I915_ENGINE_CLASS_MAX /* non-ABI */
+};
+
+/**
+ * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
+ *
+ */
+
+enum drm_i915_pmu_engine_sample {
+	I915_SAMPLE_QUEUED = 0,
+	I915_SAMPLE_BUSY = 1,
+	I915_SAMPLE_WAIT = 2,
+	I915_SAMPLE_SEMA = 3
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+	(I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
+
+#define __I915_PMU_ENGINE(class, instance, sample) \
+	((class) << I915_PMU_CLASS_SHIFT | \
+	(instance) << I915_PMU_SAMPLE_BITS | \
+	(sample))
+
+#define I915_PMU_ENGINE_QUEUED(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
+
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
+
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
+
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+
+#define I915_PMU_ACTUAL_FREQUENCY 	__I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY	__I915_PMU_OTHER(1)
+#define I915_PMU_ENERGY			__I915_PMU_OTHER(2)
+#define I915_PMU_INTERRUPTS		__I915_PMU_OTHER(3)
+
+#define I915_PMU_RC6_RESIDENCY		__I915_PMU_OTHER(4)
+#define I915_PMU_RC6p_RESIDENCY		__I915_PMU_OTHER(5)
+#define I915_PMU_RC6pp_RESIDENCY	__I915_PMU_OTHER(6)
+
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
 #define I915_NR_TEX_REGIONS 255	/* table size 2k - maximum due to use
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7d34bc16ca1c..3b6eb0131204 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7382,6 +7382,7 @@ int perf_event_overflow(struct perf_event *event,
 {
 	return __perf_event_overflow(event, 1, data, regs);
 }
+EXPORT_SYMBOL_GPL(perf_event_overflow);
 
 /*
  * Generic software event infrastructure
-- 
2.9.4



More information about the Intel-gfx mailing list