[Intel-gfx] [PATCH 24/24] RFC drm/i915: Expose a PMU interface for perf queries
Dmitry Rogozhkin
dmitry.v.rogozhkin at intel.com
Thu May 18 23:48:47 UTC 2017
On 5/18/2017 2:46 AM, Chris Wilson wrote:
> The first goal is to be able to measure GPU (and invidual ring) busyness
> without having to poll registers from userspace. (Which not only incurs
> holding the forcewake lock indefinitely, perturbing the system, but also
> runs the risk of hanging the machine.) As an alternative we can use the
> perf event counter interface to sample the ring registers periodically
> and send those results to userspace.
>
> To be able to do so, we need to export the two symbols from
> kernel/events/core.c to register and unregister a PMU device.
>
> v2: Use a common timer for the ring sampling.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/Makefile | 1 +
> drivers/gpu/drm/i915/i915_drv.c | 2 +
> drivers/gpu/drm/i915/i915_drv.h | 23 ++
> drivers/gpu/drm/i915/i915_pmu.c | 449 ++++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +
> include/uapi/drm/i915_drm.h | 40 +++
> kernel/events/core.c | 1 +
> 7 files changed, 518 insertions(+)
> create mode 100644 drivers/gpu/drm/i915/i915_pmu.c
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 7b05fb802f4c..ca88e6e67910 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -26,6 +26,7 @@ i915-y := i915_drv.o \
>
> i915-$(CONFIG_COMPAT) += i915_ioc32.o
> i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
> +i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
>
> # GEM code
> i915-y += i915_cmd_parser.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 2d2fb4327f97..e3c6d052d1c9 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1144,6 +1144,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
> struct drm_device *dev = &dev_priv->drm;
>
> i915_gem_shrinker_init(dev_priv);
> + i915_pmu_register(dev_priv);
>
> /*
> * Notify a valid surface after modesetting,
> @@ -1197,6 +1198,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
> intel_opregion_unregister(dev_priv);
>
> i915_perf_unregister(dev_priv);
> + i915_pmu_unregister(dev_priv);
>
> i915_teardown_sysfs(dev_priv);
> i915_guc_log_unregister(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1fa1e7d48f02..10beae1a13c8 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -40,6 +40,7 @@
> #include <linux/hash.h>
> #include <linux/intel-iommu.h>
> #include <linux/kref.h>
> +#include <linux/perf_event.h>
> #include <linux/pm_qos.h>
> #include <linux/reservation.h>
> #include <linux/shmem_fs.h>
> @@ -2075,6 +2076,12 @@ struct intel_cdclk_state {
> unsigned int cdclk, vco, ref;
> };
>
> +enum {
> + __I915_SAMPLE_FREQ_ACT = 0,
> + __I915_SAMPLE_FREQ_REQ,
> + __I915_NUM_PMU_SAMPLERS
> +};
> +
> struct drm_i915_private {
> struct drm_device drm;
>
> @@ -2564,6 +2571,13 @@ struct drm_i915_private {
> int irq;
> } lpe_audio;
>
> + struct {
> + struct pmu base;
> + struct hrtimer timer;
> + u64 enable;
> + u64 sample[__I915_NUM_PMU_SAMPLERS];
> + } pmu;
> +
> /*
> * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
> * will be rejected. Instead look for a better place.
> @@ -3681,6 +3695,15 @@ extern void i915_perf_fini(struct drm_i915_private *dev_priv);
> extern void i915_perf_register(struct drm_i915_private *dev_priv);
> extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
>
> +/* i915_pmu.c */
> +#ifdef CONFIG_PERF_EVENTS
> +extern void i915_pmu_register(struct drm_i915_private *i915);
> +extern void i915_pmu_unregister(struct drm_i915_private *i915);
> +#else
> +static inline void i915_pmu_register(struct drm_i915_private *i915) {}
> +static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
> +#endif
> +
> /* i915_suspend.c */
> extern int i915_save_state(struct drm_i915_private *dev_priv);
> extern int i915_restore_state(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> new file mode 100644
> index 000000000000..80e1c07841ac
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -0,0 +1,449 @@
> +#include <linux/perf_event.h>
> +#include <linux/pm_runtime.h>
> +
> +#include "i915_drv.h"
> +#include "intel_ringbuffer.h"
> +
> +#define FREQUENCY 200
> +#define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
> +
> +#define RING_MASK 0xffffffff
> +#define RING_MAX 32
> +
> +static void engines_sample(struct drm_i915_private *dev_priv)
> +{
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> + bool fw = false;
> +
> + if ((dev_priv->pmu.enable & RING_MASK) == 0)
> + return;
> +
> + if (!dev_priv->gt.awake)
> + return;
> +
> + if (!intel_runtime_pm_get_if_in_use(dev_priv))
> + return;
> +
> + for_each_engine(engine, dev_priv, id) {
> + u32 val;
> +
> + if ((dev_priv->pmu.enable & (0x7 << (4*id))) == 0)
> + continue;
> +
> + if (i915_seqno_passed(intel_engine_get_seqno(engine),
> + intel_engine_last_submit(engine)))
> + continue;
> +
> + if (!fw) {
> + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> + fw = true;
> + }
> +
> + val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
> + if (!(val & MODE_IDLE))
> + engine->pmu_sample[I915_SAMPLE_BUSY] += PERIOD;
Could you, please, check that I understand correctly what you attempt to
do? Each PERIOD=10ms you check the status of the engines. If engine is
in a busy state you count the PERIOD time as if the engine was busy
during this period. If engine was in some other state you count this
toward this state (wait or sema below as I understand).
If that's what you are trying to do, your solution can be somewhat used
only for the cases where tasks are executed on the engines more then 2x
PERIOD time, i.e. longer than 20ms. Which is not the case for a lot of
tasks submitted by media with the usual execution time of just few
milliseconds. Considering that i915 currently tracks when batches were
scheduled and when they are ready, you can easily calculate very strong
precise metric of busy clocks for each engine from the perspective of
i915, i.e. that will not be precise time of how long engine calculated
things because it will include scheduling latency, but that is exactly
what end-user requires: end-user does not care whether there is a
latency or there is not, for him engine is busy all that time. This is
done in the alternate solution given by Tvrtko in "drm/i915: Export
engine busy stats in debugfs" patch. Why you do not take this as a
basis? Why this patch containing few arithmetic operations to calculate
engines busy clocks is ignored?
> +
> + val = I915_READ_FW(RING_CTL(engine->mmio_base));
> + if (val & RING_WAIT)
> + engine->pmu_sample[I915_SAMPLE_WAIT] += PERIOD;
> + if (val & RING_WAIT_SEMAPHORE)
> + engine->pmu_sample[I915_SAMPLE_SEMA] += PERIOD;
> + }
> +
> + if (fw)
> + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> + intel_runtime_pm_put(dev_priv);
> +}
> +
> +static void frequency_sample(struct drm_i915_private *dev_priv)
> +{
> + if (dev_priv->pmu.enable & BIT_ULL(I915_PMU_ACTUAL_FREQUENCY)) {
> + u64 val;
> +
> + val = dev_priv->rps.cur_freq;
> + if (dev_priv->gt.awake &&
> + intel_runtime_pm_get_if_in_use(dev_priv)) {
> + val = I915_READ_NOTRACE(GEN6_RPSTAT1);
> + if (INTEL_GEN(dev_priv) >= 9)
> + val = (val & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
> + else if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8)
> + val = (val & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
> + else
> + val = (val & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
> + intel_runtime_pm_put(dev_priv);
> + }
> + val = intel_gpu_freq(dev_priv, val);
> + dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT] += val * PERIOD;
> + }
> +
> + if (dev_priv->pmu.enable & BIT_ULL(I915_PMU_REQUESTED_FREQUENCY)) {
> + u64 val = intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq);
> + dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ] += val * PERIOD;
> + }
> +}
> +
> +static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
> +{
> + struct drm_i915_private *i915 =
> + container_of(hrtimer, struct drm_i915_private, pmu.timer);
> +
> + if (i915->pmu.enable == 0)
> + return HRTIMER_NORESTART;
> +
> + engines_sample(i915);
> + frequency_sample(i915);
> +
> + hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
> + return HRTIMER_RESTART;
> +}
> +
> +static void i915_pmu_event_destroy(struct perf_event *event)
> +{
> + WARN_ON(event->parent);
> +}
> +
> +static int engine_event_init(struct perf_event *event)
> +{
> + struct drm_i915_private *i915 =
> + container_of(event->pmu, typeof(*i915), pmu.base);
> + int engine = event->attr.config >> 2;
> + int sample = event->attr.config & 3;
> +
> + switch (sample) {
> + case I915_SAMPLE_BUSY:
> + case I915_SAMPLE_WAIT:
> + break;
> + case I915_SAMPLE_SEMA:
> + if (INTEL_GEN(i915) < 6)
> + return -ENODEV;
> + break;
> + default:
> + return -ENOENT;
> + }
> +
> + if (engine >= I915_NUM_ENGINES)
> + return -ENOENT;
> +
> + if (!i915->engine[engine])
> + return -ENODEV;
> +
> + return 0;
> +}
> +
> +static enum hrtimer_restart hrtimer_sample(struct hrtimer *hrtimer)
> +{
> + struct perf_sample_data data;
> + struct perf_event *event;
> + u64 period;
> +
> + event = container_of(hrtimer, struct perf_event, hw.hrtimer);
> + if (event->state != PERF_EVENT_STATE_ACTIVE)
> + return HRTIMER_NORESTART;
> +
> + event->pmu->read(event);
> +
> + perf_sample_data_init(&data, 0, event->hw.last_period);
> + perf_event_overflow(event, &data, NULL);
> +
> + period = max_t(u64, 10000, event->hw.sample_period);
> + hrtimer_forward_now(hrtimer, ns_to_ktime(period));
> + return HRTIMER_RESTART;
> +}
> +
> +static void init_hrtimer(struct perf_event *event)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> +
> + if (!is_sampling_event(event))
> + return;
> +
> + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> + hwc->hrtimer.function = hrtimer_sample;
> +
> + if (event->attr.freq) {
> + long freq = event->attr.sample_freq;
> +
> + event->attr.sample_period = NSEC_PER_SEC / freq;
> + hwc->sample_period = event->attr.sample_period;
> + local64_set(&hwc->period_left, hwc->sample_period);
> + hwc->last_period = hwc->sample_period;
> + event->attr.freq = 0;
> + }
> +}
> +
> +static int i915_pmu_event_init(struct perf_event *event)
> +{
> + struct drm_i915_private *i915 =
> + container_of(event->pmu, typeof(*i915), pmu.base);
> + int ret;
> +
> + /* XXX ideally only want pid == -1 && cpu == -1 */
> +
> + if (event->attr.type != event->pmu->type)
> + return -ENOENT;
> +
> + if (has_branch_stack(event))
> + return -EOPNOTSUPP;
> +
> + ret = 0;
> + if (event->attr.config < RING_MAX) {
> + ret = engine_event_init(event);
> + } else switch (event->attr.config) {
> + case I915_PMU_ACTUAL_FREQUENCY:
> + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
> + ret = -ENODEV; /* requires a mutex for sampling! */
> + case I915_PMU_REQUESTED_FREQUENCY:
> + case I915_PMU_ENERGY:
> + case I915_PMU_RC6_RESIDENCY:
> + case I915_PMU_RC6p_RESIDENCY:
> + case I915_PMU_RC6pp_RESIDENCY:
> + if (INTEL_GEN(i915) < 6)
> + ret = -ENODEV;
> + break;
> + }
> + if (ret)
> + return ret;
> +
> + if (!event->parent)
> + event->destroy = i915_pmu_event_destroy;
> +
> + init_hrtimer(event);
> +
> + return 0;
> +}
> +
> +static void i915_pmu_timer_start(struct perf_event *event)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> + s64 period;
> +
> + if (!is_sampling_event(event))
> + return;
> +
> + period = local64_read(&hwc->period_left);
> + if (period) {
> + if (period < 0)
> + period = 10000;
> +
> + local64_set(&hwc->period_left, 0);
> + } else {
> + period = max_t(u64, 10000, hwc->sample_period);
> + }
> +
> + hrtimer_start_range_ns(&hwc->hrtimer,
> + ns_to_ktime(period), 0,
> + HRTIMER_MODE_REL_PINNED);
> +}
> +
> +static void i915_pmu_timer_cancel(struct perf_event *event)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> +
> + if (!is_sampling_event(event))
> + return;
> +
> + local64_set(&hwc->period_left,
> + ktime_to_ns(hrtimer_get_remaining(&hwc->hrtimer)));
> + hrtimer_cancel(&hwc->hrtimer);
> +}
> +
> +static void i915_pmu_enable(struct perf_event *event)
> +{
> + struct drm_i915_private *i915 =
> + container_of(event->pmu, typeof(*i915), pmu.base);
> +
> + if (i915->pmu.enable == 0)
> + hrtimer_start_range_ns(&i915->pmu.timer,
> + ns_to_ktime(PERIOD), 0,
> + HRTIMER_MODE_REL_PINNED);
> +
> + i915->pmu.enable |= BIT_ULL(event->attr.config);
> +
> + i915_pmu_timer_start(event);
> +}
> +
> +static void i915_pmu_disable(struct perf_event *event)
> +{
> + struct drm_i915_private *i915 =
> + container_of(event->pmu, typeof(*i915), pmu.base);
> +
> + i915->pmu.enable &= ~BIT_ULL(event->attr.config);
> + i915_pmu_timer_cancel(event);
> +}
> +
> +static int i915_pmu_event_add(struct perf_event *event, int flags)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> +
> + if (flags & PERF_EF_START)
> + i915_pmu_enable(event);
> +
> + hwc->state = !(flags & PERF_EF_START);
> +
> + return 0;
> +}
> +
> +static void i915_pmu_event_del(struct perf_event *event, int flags)
> +{
> + i915_pmu_disable(event);
> +}
> +
> +static void i915_pmu_event_start(struct perf_event *event, int flags)
> +{
> + i915_pmu_enable(event);
> +}
> +
> +static void i915_pmu_event_stop(struct perf_event *event, int flags)
> +{
> + i915_pmu_disable(event);
> +}
> +
> +static u64 read_energy_uJ(struct drm_i915_private *dev_priv)
> +{
> + u64 power;
> +
> + GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
> +
> + intel_runtime_pm_get(dev_priv);
> +
> + rdmsrl(MSR_RAPL_POWER_UNIT, power);
> + power = (power & 0x1f00) >> 8;
> + power = 1000000 >> power; /* convert to uJ */
> + power *= I915_READ_NOTRACE(MCH_SECP_NRG_STTS);
> +
> + intel_runtime_pm_put(dev_priv);
> +
> + return power;
> +}
> +
> +static inline u64 calc_residency(struct drm_i915_private *dev_priv,
> + const i915_reg_t reg)
> +{
> + u64 val, units = 128, div = 100000;
> +
> + GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
> +
> + intel_runtime_pm_get(dev_priv);
> + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
> + div = dev_priv->czclk_freq;
> + units = 1;
> + if (I915_READ_NOTRACE(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
> + units <<= 8;
> + } else if (IS_GEN9_LP(dev_priv)) {
> + div = 1200;
> + units = 1;
> + }
> + val = I915_READ_NOTRACE(reg);
> + intel_runtime_pm_put(dev_priv);
> +
> + val *= units;
> + return DIV_ROUND_UP_ULL(val, div);
> +}
> +
> +static u64 count_interrupts(struct drm_i915_private *i915)
> +{
> + /* open-coded kstat_irqs() */
> + struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
> + u64 sum = 0;
> + int cpu;
> +
> + if (!desc || !desc->kstat_irqs)
> + return 0;
> +
> + for_each_possible_cpu(cpu)
> + sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
> +
> + return sum;
> +}
> +
> +static void i915_pmu_event_read(struct perf_event *event)
> +{
> + struct drm_i915_private *i915 =
> + container_of(event->pmu, typeof(*i915), pmu.base);
> + u64 val = 0;
> +
> + if (event->attr.config < 32) {
> + int engine = event->attr.config >> 2;
> + int sample = event->attr.config & 3;
> + val = i915->engine[engine]->pmu_sample[sample];
> + } else switch (event->attr.config) {
> + case I915_PMU_ACTUAL_FREQUENCY:
> + val = i915->pmu.sample[__I915_SAMPLE_FREQ_ACT];
> + break;
> + case I915_PMU_REQUESTED_FREQUENCY:
> + val = i915->pmu.sample[__I915_SAMPLE_FREQ_REQ];
> + break;
> + case I915_PMU_ENERGY:
> + val = read_energy_uJ(i915);
> + break;
> + case I915_PMU_INTERRUPTS:
> + val = count_interrupts(i915);
> + break;
> +
> + case I915_PMU_RC6_RESIDENCY:
> + if (!i915->gt.awake)
> + return;
> +
> + val = calc_residency(i915, IS_VALLEYVIEW(i915) ? VLV_GT_RENDER_RC6 : GEN6_GT_GFX_RC6);
> + break;
> +
> + case I915_PMU_RC6p_RESIDENCY:
> + if (!i915->gt.awake)
> + return;
> +
> + if (!IS_VALLEYVIEW(i915))
> + val = calc_residency(i915, GEN6_GT_GFX_RC6p);
> + break;
> +
> + case I915_PMU_RC6pp_RESIDENCY:
> + if (!i915->gt.awake)
> + return;
> +
> + if (!IS_VALLEYVIEW(i915))
> + val = calc_residency(i915, GEN6_GT_GFX_RC6pp);
> + break;
> + }
> +
> + local64_set(&event->count, val);
> +}
> +
> +static int i915_pmu_event_event_idx(struct perf_event *event)
> +{
> + return 0;
> +}
> +
> +void i915_pmu_register(struct drm_i915_private *i915)
> +{
> + if (INTEL_GEN(i915) <= 2)
> + return;
> +
> + i915->pmu.base.task_ctx_nr = perf_sw_context;
> + i915->pmu.base.event_init = i915_pmu_event_init;
> + i915->pmu.base.add = i915_pmu_event_add;
> + i915->pmu.base.del = i915_pmu_event_del;
> + i915->pmu.base.start = i915_pmu_event_start;
> + i915->pmu.base.stop = i915_pmu_event_stop;
> + i915->pmu.base.read = i915_pmu_event_read;
> + i915->pmu.base.event_idx = i915_pmu_event_event_idx;
> +
> + hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> + i915->pmu.timer.function = i915_sample;
> + i915->pmu.enable = 0;
> +
> + if (perf_pmu_register(&i915->pmu.base, "i915", -1))
> + i915->pmu.base.event_init = NULL;
> +}
> +
> +void i915_pmu_unregister(struct drm_i915_private *i915)
> +{
> + if (!i915->pmu.base.event_init)
> + return;
> +
> + i915->pmu.enable = 0;
> +
> + perf_pmu_unregister(&i915->pmu.base);
> + i915->pmu.base.event_init = NULL;
> +
> + hrtimer_cancel(&i915->pmu.timer);
> +}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 6aa20ac8cde3..084fa7816256 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -244,6 +244,8 @@ struct intel_engine_cs {
> I915_SELFTEST_DECLARE(bool mock : 1);
> } breadcrumbs;
>
> + u64 pmu_sample[3];
> +
> /*
> * A pool of objects to use as shadow copies of client batch buffers
> * when the command parser is enabled. Prevents the client from
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 34ee011f08ac..e9375ff29371 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -86,6 +86,46 @@ enum i915_mocs_table_index {
> I915_MOCS_CACHED,
> };
>
> +/**
> + * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
> + *
> + */
> +#define I915_SAMPLE_BUSY 0
> +#define I915_SAMPLE_WAIT 1
> +#define I915_SAMPLE_SEMA 2
> +
> +#define I915_SAMPLE_RCS 0
> +#define I915_SAMPLE_VCS 1
> +#define I915_SAMPLE_BCS 2
> +#define I915_SAMPLE_VECS 3
> +
> +#define __I915_PMU_COUNT(ring, id) ((ring) << 4 | (id))
> +
> +#define I915_PMU_COUNT_RCS_BUSY __I915_PMU_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_BUSY)
> +#define I915_PMU_COUNT_RCS_WAIT __I915_PMU_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_WAIT)
> +#define I915_PMU_COUNT_RCS_SEMA __I915_PMU_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_SEMA)
> +
> +#define I915_PMU_COUNT_VCS_BUSY __I915_PMU_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_BUSY)
> +#define I915_PMU_COUNT_VCS_WAIT __I915_PMU_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_WAIT)
> +#define I915_PMU_COUNT_VCS_SEMA __I915_PMU_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_SEMA)
> +
> +#define I915_PMU_COUNT_BCS_BUSY __I915_PMU_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_BUSY)
> +#define I915_PMU_COUNT_BCS_WAIT __I915_PMU_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_WAIT)
> +#define I915_PMU_COUNT_BCS_SEMA __I915_PMU_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_SEMA)
> +
> +#define I915_PMU_COUNT_VECS_BUSY __I915_PMU_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_BUSY)
> +#define I915_PMU_COUNT_VECS_WAIT __I915_PMU_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_WAIT)
> +#define I915_PMU_COUNT_VECS_SEMA __I915_PMU_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_SEMA)
> +
> +#define I915_PMU_ACTUAL_FREQUENCY 32
> +#define I915_PMU_REQUESTED_FREQUENCY 33
> +#define I915_PMU_ENERGY 34
> +#define I915_PMU_INTERRUPTS 35
> +
> +#define I915_PMU_RC6_RESIDENCY 40
> +#define I915_PMU_RC6p_RESIDENCY 41
> +#define I915_PMU_RC6pp_RESIDENCY 42
> +
> /* Each region is a minimum of 16k, and there are at most 255 of them.
> */
> #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index f811dd20bbc1..6351ed8a2e56 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7365,6 +7365,7 @@ int perf_event_overflow(struct perf_event *event,
> {
> return __perf_event_overflow(event, 1, data, regs);
> }
> +EXPORT_SYMBOL_GPL(perf_event_overflow);
>
> /*
> * Generic software event infrastructure
More information about the Intel-gfx
mailing list