[Intel-gfx] [RFC 01/14] RFC drm/i915: Expose a PMU interface for perf queries

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Thu Jul 20 08:55:12 UTC 2017


On 19/07/2017 10:53, Kamble, Sagar A wrote:
> Can we reuse calc_residency defined in i915_sysfs.c

Looks like it, that is intel_pm.c/intel_rc6_residency_us.
I will incorporate the change in the series or the patch. Thanks for 
spotting this!

Regards,

Tvrtko

> 
> On 7/18/2017 8:06 PM, Tvrtko Ursulin wrote:
>> From: Chris Wilson <chris at chris-wilson.co.uk>
>>
>> The first goal is to be able to measure GPU (and invidual ring) busyness
>> without having to poll registers from userspace. (Which not only incurs
>> holding the forcewake lock indefinitely, perturbing the system, but also
>> runs the risk of hanging the machine.) As an alternative we can use the
>> perf event counter interface to sample the ring registers periodically
>> and send those results to userspace.
>>
>> To be able to do so, we need to export the two symbols from
>> kernel/events/core.c to register and unregister a PMU device.
>>
>> v2: Use a common timer for the ring sampling.
>>
>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>> ---
>>   drivers/gpu/drm/i915/Makefile           |   1 +
>>   drivers/gpu/drm/i915/i915_drv.c         |   2 +
>>   drivers/gpu/drm/i915/i915_drv.h         |  23 ++
>>   drivers/gpu/drm/i915/i915_pmu.c         | 452 
>> ++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/intel_ringbuffer.h |   2 +
>>   include/uapi/drm/i915_drm.h             |  41 +++
>>   kernel/events/core.c                    |   1 +
>>   7 files changed, 522 insertions(+)
>>   create mode 100644 drivers/gpu/drm/i915/i915_pmu.c
>>
>> diff --git a/drivers/gpu/drm/i915/Makefile 
>> b/drivers/gpu/drm/i915/Makefile
>> index f8227318dcaf..1c720013dc42 100644
>> --- a/drivers/gpu/drm/i915/Makefile
>> +++ b/drivers/gpu/drm/i915/Makefile
>> @@ -26,6 +26,7 @@ i915-y := i915_drv.o \
>>   i915-$(CONFIG_COMPAT)   += i915_ioc32.o
>>   i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
>> +i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
>>   # GEM code
>>   i915-y += i915_cmd_parser.o \
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index d310d8245dca..f18ce519f6a2 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -1194,6 +1194,7 @@ static void i915_driver_register(struct 
>> drm_i915_private *dev_priv)
>>       struct drm_device *dev = &dev_priv->drm;
>>       i915_gem_shrinker_init(dev_priv);
>> +    i915_pmu_register(dev_priv);
>>       /*
>>        * Notify a valid surface after modesetting,
>> @@ -1247,6 +1248,7 @@ static void i915_driver_unregister(struct 
>> drm_i915_private *dev_priv)
>>       intel_opregion_unregister(dev_priv);
>>       i915_perf_unregister(dev_priv);
>> +    i915_pmu_unregister(dev_priv);
>>       i915_teardown_sysfs(dev_priv);
>>       i915_guc_log_unregister(dev_priv);
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 7c6fab08a2e6..de518503e033 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -40,6 +40,7 @@
>>   #include <linux/hash.h>
>>   #include <linux/intel-iommu.h>
>>   #include <linux/kref.h>
>> +#include <linux/perf_event.h>
>>   #include <linux/pm_qos.h>
>>   #include <linux/reservation.h>
>>   #include <linux/shmem_fs.h>
>> @@ -2093,6 +2094,12 @@ struct intel_cdclk_state {
>>       unsigned int cdclk, vco, ref;
>>   };
>> +enum {
>> +    __I915_SAMPLE_FREQ_ACT = 0,
>> +    __I915_SAMPLE_FREQ_REQ,
>> +    __I915_NUM_PMU_SAMPLERS
>> +};
>> +
>>   struct drm_i915_private {
>>       struct drm_device drm;
>> @@ -2591,6 +2598,13 @@ struct drm_i915_private {
>>           int    irq;
>>       } lpe_audio;
>> +    struct {
>> +        struct pmu base;
>> +        struct hrtimer timer;
>> +        u64 enable;
>> +        u64 sample[__I915_NUM_PMU_SAMPLERS];
>> +    } pmu;
>> +
>>       /*
>>        * NOTE: This is the dri1/ums dungeon, don't add stuff here. 
>> Your patch
>>        * will be rejected. Instead look for a better place.
>> @@ -3760,6 +3774,15 @@ extern void i915_perf_fini(struct 
>> drm_i915_private *dev_priv);
>>   extern void i915_perf_register(struct drm_i915_private *dev_priv);
>>   extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
>> +/* i915_pmu.c */
>> +#ifdef CONFIG_PERF_EVENTS
>> +extern void i915_pmu_register(struct drm_i915_private *i915);
>> +extern void i915_pmu_unregister(struct drm_i915_private *i915);
>> +#else
>> +static inline void i915_pmu_register(struct drm_i915_private *i915) {}
>> +static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
>> +#endif
>> +
>>   /* i915_suspend.c */
>>   extern int i915_save_state(struct drm_i915_private *dev_priv);
>>   extern int i915_restore_state(struct drm_i915_private *dev_priv);
>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c 
>> b/drivers/gpu/drm/i915/i915_pmu.c
>> new file mode 100644
>> index 000000000000..f03ddad44da6
>> --- /dev/null
>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
>> @@ -0,0 +1,452 @@
>> +#include <linux/perf_event.h>
>> +#include <linux/pm_runtime.h>
>> +
>> +#include "i915_drv.h"
>> +#include "intel_ringbuffer.h"
>> +
>> +#define FREQUENCY 200
>> +#define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
>> +
>> +#define RING_MASK 0xffffffff
>> +#define RING_MAX 32
>> +
>> +static void engines_sample(struct drm_i915_private *dev_priv)
>> +{
>> +    struct intel_engine_cs *engine;
>> +    enum intel_engine_id id;
>> +    bool fw = false;
>> +
>> +    if ((dev_priv->pmu.enable & RING_MASK) == 0)
>> +        return;
>> +
>> +    if (!dev_priv->gt.awake)
>> +        return;
>> +
>> +    if (!intel_runtime_pm_get_if_in_use(dev_priv))
>> +        return;
>> +
>> +    for_each_engine(engine, dev_priv, id) {
>> +        u32 val;
>> +
>> +        if ((dev_priv->pmu.enable & (0x7 << (4*id))) == 0)
>> +            continue;
>> +
>> +        if (i915_seqno_passed(intel_engine_get_seqno(engine),
>> +                      intel_engine_last_submit(engine)))
>> +            continue;
>> +
>> +        if (!fw) {
>> +            intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
>> +            fw = true;
>> +        }
>> +
>> +        engine->pmu_sample[I915_SAMPLE_QUEUED] += PERIOD;
>> +
>> +        val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
>> +        if (!(val & MODE_IDLE))
>> +            engine->pmu_sample[I915_SAMPLE_BUSY] += PERIOD;
>> +
>> +        val = I915_READ_FW(RING_CTL(engine->mmio_base));
>> +        if (val & RING_WAIT)
>> +            engine->pmu_sample[I915_SAMPLE_WAIT] += PERIOD;
>> +        if (val & RING_WAIT_SEMAPHORE)
>> +            engine->pmu_sample[I915_SAMPLE_SEMA] += PERIOD;
>> +    }
>> +
>> +    if (fw)
>> +        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>> +    intel_runtime_pm_put(dev_priv);
>> +}
>> +
>> +static void frequency_sample(struct drm_i915_private *dev_priv)
>> +{
>> +    if (dev_priv->pmu.enable & BIT_ULL(I915_PMU_ACTUAL_FREQUENCY)) {
>> +        u64 val;
>> +
>> +        val = dev_priv->rps.cur_freq;
>> +        if (dev_priv->gt.awake &&
>> +            intel_runtime_pm_get_if_in_use(dev_priv)) {
>> +            val = I915_READ_NOTRACE(GEN6_RPSTAT1);
>> +            if (INTEL_GEN(dev_priv) >= 9)
>> +                val = (val & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
>> +            else if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8)
>> +                val = (val & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
>> +            else
>> +                val = (val & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
>> +            intel_runtime_pm_put(dev_priv);
>> +        }
>> +        val = intel_gpu_freq(dev_priv, val);
>> +        dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT] += val * PERIOD;
>> +    }
>> +
>> +    if (dev_priv->pmu.enable & BIT_ULL(I915_PMU_REQUESTED_FREQUENCY)) {
>> +        u64 val = intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq);
>> +        dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ] += val * PERIOD;
>> +    }
>> +}
>> +
>> +static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
>> +{
>> +    struct drm_i915_private *i915 =
>> +        container_of(hrtimer, struct drm_i915_private, pmu.timer);
>> +
>> +    if (i915->pmu.enable == 0)
>> +        return HRTIMER_NORESTART;
>> +
>> +    engines_sample(i915);
>> +    frequency_sample(i915);
>> +
>> +    hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
>> +    return HRTIMER_RESTART;
>> +}
>> +
>> +static void i915_pmu_event_destroy(struct perf_event *event)
>> +{
>> +    WARN_ON(event->parent);
>> +}
>> +
>> +static int engine_event_init(struct perf_event *event)
>> +{
>> +    struct drm_i915_private *i915 =
>> +        container_of(event->pmu, typeof(*i915), pmu.base);
>> +    int engine = event->attr.config >> 2;
>> +    int sample = event->attr.config & 3;
>> +
>> +    switch (sample) {
>> +    case I915_SAMPLE_QUEUED:
>> +    case I915_SAMPLE_BUSY:
>> +    case I915_SAMPLE_WAIT:
>> +        break;
>> +    case I915_SAMPLE_SEMA:
>> +        if (INTEL_GEN(i915) < 6)
>> +            return -ENODEV;
>> +        break;
>> +    default:
>> +        return -ENOENT;
>> +    }
>> +
>> +    if (engine >= I915_NUM_ENGINES)
>> +        return -ENOENT;
>> +
>> +    if (!i915->engine[engine])
>> +        return -ENODEV;
>> +
>> +    return 0;
>> +}
>> +
>> +static enum hrtimer_restart hrtimer_sample(struct hrtimer *hrtimer)
>> +{
>> +    struct perf_sample_data data;
>> +    struct perf_event *event;
>> +    u64 period;
>> +
>> +    event = container_of(hrtimer, struct perf_event, hw.hrtimer);
>> +    if (event->state != PERF_EVENT_STATE_ACTIVE)
>> +        return HRTIMER_NORESTART;
>> +
>> +    event->pmu->read(event);
>> +
>> +    perf_sample_data_init(&data, 0, event->hw.last_period);
>> +    perf_event_overflow(event, &data, NULL);
>> +
>> +    period = max_t(u64, 10000, event->hw.sample_period);
>> +    hrtimer_forward_now(hrtimer, ns_to_ktime(period));
>> +    return HRTIMER_RESTART;
>> +}
>> +
>> +static void init_hrtimer(struct perf_event *event)
>> +{
>> +    struct hw_perf_event *hwc = &event->hw;
>> +
>> +    if (!is_sampling_event(event))
>> +        return;
>> +
>> +    hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>> +    hwc->hrtimer.function = hrtimer_sample;
>> +
>> +    if (event->attr.freq) {
>> +        long freq = event->attr.sample_freq;
>> +
>> +        event->attr.sample_period = NSEC_PER_SEC / freq;
>> +        hwc->sample_period = event->attr.sample_period;
>> +        local64_set(&hwc->period_left, hwc->sample_period);
>> +        hwc->last_period = hwc->sample_period;
>> +        event->attr.freq = 0;
>> +    }
>> +}
>> +
>> +static int i915_pmu_event_init(struct perf_event *event)
>> +{
>> +    struct drm_i915_private *i915 =
>> +        container_of(event->pmu, typeof(*i915), pmu.base);
>> +    int ret;
>> +
>> +    /* XXX ideally only want pid == -1 && cpu == -1 */
>> +
>> +    if (event->attr.type != event->pmu->type)
>> +        return -ENOENT;
>> +
>> +    if (has_branch_stack(event))
>> +        return -EOPNOTSUPP;
>> +
>> +    ret = 0;
>> +    if (event->attr.config < RING_MAX) {
>> +        ret = engine_event_init(event);
>> +    } else switch (event->attr.config) {
>> +    case I915_PMU_ACTUAL_FREQUENCY:
>> +        if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
>> +            ret = -ENODEV; /* requires a mutex for sampling! */
>> +    case I915_PMU_REQUESTED_FREQUENCY:
>> +    case I915_PMU_ENERGY:
>> +    case I915_PMU_RC6_RESIDENCY:
>> +    case I915_PMU_RC6p_RESIDENCY:
>> +    case I915_PMU_RC6pp_RESIDENCY:
>> +        if (INTEL_GEN(i915) < 6)
>> +            ret = -ENODEV;
>> +        break;
>> +    }
>> +    if (ret)
>> +        return ret;
>> +
>> +    if (!event->parent)
>> +        event->destroy = i915_pmu_event_destroy;
>> +
>> +    init_hrtimer(event);
>> +
>> +    return 0;
>> +}
>> +
>> +static void i915_pmu_timer_start(struct perf_event *event)
>> +{
>> +    struct hw_perf_event *hwc = &event->hw;
>> +    s64 period;
>> +
>> +    if (!is_sampling_event(event))
>> +        return;
>> +
>> +    period = local64_read(&hwc->period_left);
>> +    if (period) {
>> +        if (period < 0)
>> +            period = 10000;
>> +
>> +        local64_set(&hwc->period_left, 0);
>> +    } else {
>> +        period = max_t(u64, 10000, hwc->sample_period);
>> +    }
>> +
>> +    hrtimer_start_range_ns(&hwc->hrtimer,
>> +                   ns_to_ktime(period), 0,
>> +                   HRTIMER_MODE_REL_PINNED);
>> +}
>> +
>> +static void i915_pmu_timer_cancel(struct perf_event *event)
>> +{
>> +    struct hw_perf_event *hwc = &event->hw;
>> +
>> +    if (!is_sampling_event(event))
>> +        return;
>> +
>> +    local64_set(&hwc->period_left,
>> +            ktime_to_ns(hrtimer_get_remaining(&hwc->hrtimer)));
>> +    hrtimer_cancel(&hwc->hrtimer);
>> +}
>> +
>> +static void i915_pmu_enable(struct perf_event *event)
>> +{
>> +    struct drm_i915_private *i915 =
>> +        container_of(event->pmu, typeof(*i915), pmu.base);
>> +
>> +    if (i915->pmu.enable == 0)
>> +        hrtimer_start_range_ns(&i915->pmu.timer,
>> +                       ns_to_ktime(PERIOD), 0,
>> +                       HRTIMER_MODE_REL_PINNED);
>> +
>> +    i915->pmu.enable |= BIT_ULL(event->attr.config);
>> +
>> +    i915_pmu_timer_start(event);
>> +}
>> +
>> +static void i915_pmu_disable(struct perf_event *event)
>> +{
>> +    struct drm_i915_private *i915 =
>> +        container_of(event->pmu, typeof(*i915), pmu.base);
>> +
>> +    i915->pmu.enable &= ~BIT_ULL(event->attr.config);
>> +    i915_pmu_timer_cancel(event);
>> +}
>> +
>> +static int i915_pmu_event_add(struct perf_event *event, int flags)
>> +{
>> +    struct hw_perf_event *hwc = &event->hw;
>> +
>> +    if (flags & PERF_EF_START)
>> +        i915_pmu_enable(event);
>> +
>> +    hwc->state = !(flags & PERF_EF_START);
>> +
>> +    return 0;
>> +}
>> +
>> +static void i915_pmu_event_del(struct perf_event *event, int flags)
>> +{
>> +    i915_pmu_disable(event);
>> +}
>> +
>> +static void i915_pmu_event_start(struct perf_event *event, int flags)
>> +{
>> +    i915_pmu_enable(event);
>> +}
>> +
>> +static void i915_pmu_event_stop(struct perf_event *event, int flags)
>> +{
>> +    i915_pmu_disable(event);
>> +}
>> +
>> +static u64 read_energy_uJ(struct drm_i915_private *dev_priv)
>> +{
>> +    u64 power;
>> +
>> +    GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
>> +
>> +    intel_runtime_pm_get(dev_priv);
>> +
>> +    rdmsrl(MSR_RAPL_POWER_UNIT, power);
>> +    power = (power & 0x1f00) >> 8;
>> +    power = 1000000 >> power; /* convert to uJ */
>> +    power *= I915_READ_NOTRACE(MCH_SECP_NRG_STTS);
>> +
>> +    intel_runtime_pm_put(dev_priv);
>> +
>> +    return power;
>> +}
>> +
>> +static inline u64 calc_residency(struct drm_i915_private *dev_priv,
>> +                 const i915_reg_t reg)
>> +{
>> +    u64 val, units = 128, div = 100000;
>> +
>> +    GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
>> +
>> +    intel_runtime_pm_get(dev_priv);
>> +    if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
>> +        div = dev_priv->czclk_freq;
>> +        units = 1;
>> +        if (I915_READ_NOTRACE(VLV_COUNTER_CONTROL) & 
>> VLV_COUNT_RANGE_HIGH)
>> +            units <<= 8;
>> +    } else if (IS_GEN9_LP(dev_priv)) {
>> +        div = 1200;
>> +        units = 1;
>> +    }
>> +    val = I915_READ_NOTRACE(reg);
>> +    intel_runtime_pm_put(dev_priv);
>> +
>> +    val *= units;
>> +    return DIV_ROUND_UP_ULL(val, div);
>> +}
>> +
>> +static u64 count_interrupts(struct drm_i915_private *i915)
>> +{
>> +    /* open-coded kstat_irqs() */
>> +    struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
>> +    u64 sum = 0;
>> +    int cpu;
>> +
>> +    if (!desc || !desc->kstat_irqs)
>> +        return 0;
>> +
>> +    for_each_possible_cpu(cpu)
>> +        sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
>> +
>> +    return sum;
>> +}
>> +
>> +static void i915_pmu_event_read(struct perf_event *event)
>> +{
>> +    struct drm_i915_private *i915 =
>> +        container_of(event->pmu, typeof(*i915), pmu.base);
>> +    u64 val = 0;
>> +
>> +    if (event->attr.config < 32) {
>> +        int engine = event->attr.config >> 2;
>> +        int sample = event->attr.config & 3;
>> +        val = i915->engine[engine]->pmu_sample[sample];
>> +    } else switch (event->attr.config) {
>> +    case I915_PMU_ACTUAL_FREQUENCY:
>> +        val = i915->pmu.sample[__I915_SAMPLE_FREQ_ACT];
>> +        break;
>> +    case I915_PMU_REQUESTED_FREQUENCY:
>> +        val = i915->pmu.sample[__I915_SAMPLE_FREQ_REQ];
>> +        break;
>> +    case I915_PMU_ENERGY:
>> +        val = read_energy_uJ(i915);
>> +        break;
>> +    case I915_PMU_INTERRUPTS:
>> +        val = count_interrupts(i915);
>> +        break;
>> +
>> +    case I915_PMU_RC6_RESIDENCY:
>> +        if (!i915->gt.awake)
>> +            return;
>> +
>> +        val = calc_residency(i915, IS_VALLEYVIEW(i915) ? 
>> VLV_GT_RENDER_RC6 : GEN6_GT_GFX_RC6);
>> +        break;
>> +
>> +    case I915_PMU_RC6p_RESIDENCY:
>> +        if (!i915->gt.awake)
>> +            return;
>> +
>> +        if (!IS_VALLEYVIEW(i915))
>> +            val = calc_residency(i915, GEN6_GT_GFX_RC6p);
>> +        break;
>> +
>> +    case I915_PMU_RC6pp_RESIDENCY:
>> +        if (!i915->gt.awake)
>> +            return;
>> +
>> +        if (!IS_VALLEYVIEW(i915))
>> +            val = calc_residency(i915, GEN6_GT_GFX_RC6pp);
>> +        break;
>> +    }
>> +
>> +    local64_set(&event->count, val);
>> +}
>> +
>> +static int i915_pmu_event_event_idx(struct perf_event *event)
>> +{
>> +    return 0;
>> +}
>> +
>> +void i915_pmu_register(struct drm_i915_private *i915)
>> +{
>> +    if (INTEL_GEN(i915) <= 2)
>> +        return;
>> +
>> +    i915->pmu.base.task_ctx_nr    = perf_sw_context;
>> +    i915->pmu.base.event_init    = i915_pmu_event_init;
>> +    i915->pmu.base.add        = i915_pmu_event_add;
>> +    i915->pmu.base.del        = i915_pmu_event_del;
>> +    i915->pmu.base.start        = i915_pmu_event_start;
>> +    i915->pmu.base.stop        = i915_pmu_event_stop;
>> +    i915->pmu.base.read        = i915_pmu_event_read;
>> +    i915->pmu.base.event_idx    = i915_pmu_event_event_idx;
>> +
>> +    hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>> +    i915->pmu.timer.function = i915_sample;
>> +    i915->pmu.enable = 0;
>> +
>> +    if (perf_pmu_register(&i915->pmu.base, "i915", -1))
>> +        i915->pmu.base.event_init = NULL;
>> +}
>> +
>> +void i915_pmu_unregister(struct drm_i915_private *i915)
>> +{
>> +    if (!i915->pmu.base.event_init)
>> +        return;
>> +
>> +    i915->pmu.enable = 0;
>> +
>> +    perf_pmu_unregister(&i915->pmu.base);
>> +    i915->pmu.base.event_init = NULL;
>> +
>> +    hrtimer_cancel(&i915->pmu.timer);
>> +}
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
>> b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index d33c93444c0d..0877b151239d 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -245,6 +245,8 @@ struct intel_engine_cs {
>>           I915_SELFTEST_DECLARE(bool mock : 1);
>>       } breadcrumbs;
>> +    u64 pmu_sample[4];
>> +
>>       /*
>>        * A pool of objects to use as shadow copies of client batch 
>> buffers
>>        * when the command parser is enabled. Prevents the client from
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 7ccbd6a2bbe0..733774f19a0b 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -86,6 +86,47 @@ enum i915_mocs_table_index {
>>       I915_MOCS_CACHED,
>>   };
>> +/**
>> + * DOC: perf_events exposed by i915 through 
>> /sys/bus/event_sources/drivers/i915
>> + *
>> + */
>> +#define I915_SAMPLE_QUEUED    0
>> +#define I915_SAMPLE_BUSY    1
>> +#define I915_SAMPLE_WAIT    2
>> +#define I915_SAMPLE_SEMA    3
>> +
>> +#define I915_SAMPLE_RCS        0
>> +#define I915_SAMPLE_VCS        1
>> +#define I915_SAMPLE_BCS        2
>> +#define I915_SAMPLE_VECS    3
>> +
>> +#define __I915_PMU_COUNT(ring, id) ((ring) << 2 | (id))
>> +
>> +#define I915_PMU_COUNT_RCS_BUSY __I915_PMU_COUNT(I915_SAMPLE_RCS, 
>> I915_SAMPLE_BUSY)
>> +#define I915_PMU_COUNT_RCS_WAIT __I915_PMU_COUNT(I915_SAMPLE_RCS, 
>> I915_SAMPLE_WAIT)
>> +#define I915_PMU_COUNT_RCS_SEMA __I915_PMU_COUNT(I915_SAMPLE_RCS, 
>> I915_SAMPLE_SEMA)
>> +
>> +#define I915_PMU_COUNT_VCS_BUSY __I915_PMU_COUNT(I915_SAMPLE_VCS, 
>> I915_SAMPLE_BUSY)
>> +#define I915_PMU_COUNT_VCS_WAIT __I915_PMU_COUNT(I915_SAMPLE_VCS, 
>> I915_SAMPLE_WAIT)
>> +#define I915_PMU_COUNT_VCS_SEMA __I915_PMU_COUNT(I915_SAMPLE_VCS, 
>> I915_SAMPLE_SEMA)
>> +
>> +#define I915_PMU_COUNT_BCS_BUSY __I915_PMU_COUNT(I915_SAMPLE_BCS, 
>> I915_SAMPLE_BUSY)
>> +#define I915_PMU_COUNT_BCS_WAIT __I915_PMU_COUNT(I915_SAMPLE_BCS, 
>> I915_SAMPLE_WAIT)
>> +#define I915_PMU_COUNT_BCS_SEMA __I915_PMU_COUNT(I915_SAMPLE_BCS, 
>> I915_SAMPLE_SEMA)
>> +
>> +#define I915_PMU_COUNT_VECS_BUSY __I915_PMU_COUNT(I915_SAMPLE_VECS, 
>> I915_SAMPLE_BUSY)
>> +#define I915_PMU_COUNT_VECS_WAIT __I915_PMU_COUNT(I915_SAMPLE_VECS, 
>> I915_SAMPLE_WAIT)
>> +#define I915_PMU_COUNT_VECS_SEMA __I915_PMU_COUNT(I915_SAMPLE_VECS, 
>> I915_SAMPLE_SEMA)
>> +
>> +#define I915_PMU_ACTUAL_FREQUENCY 32
>> +#define I915_PMU_REQUESTED_FREQUENCY 33
>> +#define I915_PMU_ENERGY 34
>> +#define I915_PMU_INTERRUPTS 35
>> +
>> +#define I915_PMU_RC6_RESIDENCY        40
>> +#define I915_PMU_RC6p_RESIDENCY    41
>> +#define I915_PMU_RC6pp_RESIDENCY    42
>> +
>>   /* Each region is a minimum of 16k, and there are at most 255 of them.
>>    */
>>   #define I915_NR_TEX_REGIONS 255    /* table size 2k - maximum due to 
>> use
>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>> index e46eba8cd1b7..7b8c6dce1078 100644
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
>> @@ -7386,6 +7386,7 @@ int perf_event_overflow(struct perf_event *event,
>>   {
>>       return __perf_event_overflow(event, 1, data, regs);
>>   }
>> +EXPORT_SYMBOL_GPL(perf_event_overflow);
>>   /*
>>    * Generic software event infrastructure
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx


More information about the Intel-gfx mailing list