[PATCH] drm/xe/pmu: Add GT frequency events
Belgaumkar, Vinay
vinay.belgaumkar at intel.com
Mon Feb 3 20:23:49 UTC 2025
On 1/30/2025 9:41 PM, Lucas De Marchi wrote:
> On Thu, Jan 30, 2025 at 08:47:28PM -0800, Vinay Belgaumkar wrote:
>> Define PMU events for GT frequency (actual and requested). This is
>> a port from the i915 driver implementation, where an internal timer
>> is used to aggregate GT frequencies over certain fixed interval.
>> Following PMU events are being added-
>>
>> xe_0000_00_02.0/gt-actual-frequency/ [Kernel PMU event]
>> xe_0000_00_02.0/gt-requested-frequency/ [Kernel PMU event]
>>
>> Standard perf commands can be used to monitor GT frequency-
>> $ perf stat -e xe_0000_00_02.0/gt-requested-frequency,gt=0/ -I1000
>>
>> 1.001175175 700 M xe/gt-requested-frequency,gt=0/
>> 2.005891881 703 M xe/gt-requested-frequency,gt=0/
>> 3.007318169 700 M xe/gt-requested-frequency,gt=0/
>>
>> Actual frequencies will be reported as 0 when GT is suspended.
>>
>> Cc: Riana Tauro <riana.tauro at intel.com>
>> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
>> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_pmu.c | 157 +++++++++++++++++++++++++++++-
>> drivers/gpu/drm/xe/xe_pmu_types.h | 26 +++++
>> 2 files changed, 181 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
>> index 3910a82328ee..31ce575c834f 100644
>> --- a/drivers/gpu/drm/xe/xe_pmu.c
>> +++ b/drivers/gpu/drm/xe/xe_pmu.c
>> @@ -8,6 +8,7 @@
>>
>> #include "xe_device.h"
>> #include "xe_gt_idle.h"
>> +#include "xe_guc_pc.h"
>> #include "xe_pm.h"
>> #include "xe_pmu.h"
>>
>> @@ -37,6 +38,12 @@
>>
>> #define XE_PMU_EVENT_GT_MASK GENMASK_ULL(63, 60)
>> #define XE_PMU_EVENT_ID_MASK GENMASK_ULL(11, 0)
>> +#define SAMPLING_TIMER_FREQUENCY_HZ 200
>> +
>> +static struct xe_pmu *event_to_pmu(struct perf_event *event)
>> +{
>> + return container_of(event->pmu, struct xe_pmu, base);
>> +}
>>
>> static unsigned int config_to_event_id(u64 config)
>> {
>> @@ -49,6 +56,8 @@ static unsigned int config_to_gt_id(u64 config)
>> }
>>
>> #define XE_PMU_EVENT_GT_C6_RESIDENCY 0x01
>> +#define XE_PMU_EVENT_GT_ACTUAL_FREQUENCY 0x02
>> +#define XE_PMU_EVENT_GT_REQUESTED_FREQUENCY 0x04
>>
>> static struct xe_gt *event_to_gt(struct perf_event *event)
>> {
>> @@ -113,9 +122,18 @@ static int xe_pmu_event_init(struct perf_event
>> *event)
>> return 0;
>> }
>>
>> +static u64 read_sample(struct xe_pmu *pmu, unsigned int gt_id, int
>> sample)
>
> please reserve the word "read" for when we are reading from hw
> get_stored_sample() would be better.
ok.
>
>> +{
>> + return pmu->event_sample[gt_id][sample];
>
> this is read by perf, written by timer, you will need a lock to
> synchronize as a u64 is or may not be atomic.
yup, i think the locks got dropped during the rebase.
>
>> +}
>> +
>> static u64 __xe_pmu_event_read(struct perf_event *event)
>> {
>> struct xe_gt *gt = event_to_gt(event);
>> + struct xe_device *xe =
>> + container_of(event->pmu, typeof(*xe), pmu.base);
>> + struct xe_pmu *pmu = &xe->pmu;
>> + u64 gt_id = gt->info.id;
>>
>> if (!gt)
>> return 0;
>> @@ -123,6 +141,14 @@ static u64 __xe_pmu_event_read(struct perf_event
>> *event)
>> switch (config_to_event_id(event->attr.config)) {
>> case XE_PMU_EVENT_GT_C6_RESIDENCY:
>> return xe_gt_idle_residency_msec(>->gtidle);
>> + case XE_PMU_EVENT_GT_ACTUAL_FREQUENCY:
>> + return div_u64(read_sample(pmu, gt_id,
>> + __XE_SAMPLE_FREQ_ACT),
>
> if you index it by the event id, then you can pretty much group them
> here. However I'm not sure if the design is correct to store the value
> read in one place. What happens when you have 3 clients reading the same
> event? You are only storing the hw value in one place and returning it
> in all of them. However the old value read is kept per perf_event,
> so it will calculate the different deltas... maybe it's ok, but you are
> definitely missing the lock for the hw sampling, which will need to be
> a raw_spin_lock.
yup, lock is needed where the samples are stored. Also, sampling will be
common across all clients per GT.
>
>> + USEC_PER_SEC /* to MHz */);
>> + case XE_PMU_EVENT_GT_REQUESTED_FREQUENCY:
>> + return div_u64(read_sample(pmu, gt_id,
>> + __XE_SAMPLE_FREQ_REQ),
>> + USEC_PER_SEC /* to MHz */);
>> }
>>
>> return 0;
>> @@ -154,8 +180,109 @@ static void xe_pmu_event_read(struct perf_event
>> *event)
>> xe_pmu_event_update(event);
>> }
>>
>> +#define CONFIG_EVENT_ENABLED(config, event) \
>> + (FIELD_GET(XE_PMU_EVENT_ID_MASK, config) & event)
>> +
>> +#define CONFIG_EVENT_ENABLED_GT(config, event, gt) \
>> + (CONFIG_EVENT_ENABLED(config, event) && \
>> + (FIELD_GET(XE_PMU_EVENT_GT_MASK, config) == gt))
>> +
>> +static bool pmu_needs_timer(struct xe_pmu *pmu)
>> +{
>> + return CONFIG_EVENT_ENABLED(pmu->enable,
>> + (XE_PMU_EVENT_GT_ACTUAL_FREQUENCY |
>> + XE_PMU_EVENT_GT_REQUESTED_FREQUENCY));
>> +}
>> +
>> +static void xe_pmu_start_timer(struct xe_pmu *pmu)
>> +{
>> + u64 period = max_t(u64, 10000, NSEC_PER_SEC /
>> SAMPLING_TIMER_FREQUENCY_HZ);
>> +
>> + if (!pmu->timer_enabled && pmu_needs_timer(pmu)) {
>> + pmu->timer_enabled = true;
>> + pmu->timer_last = ktime_get();
>> + hrtimer_start_range_ns(&pmu->timer,
>> + ns_to_ktime(period), 0,
>> + HRTIMER_MODE_REL_PINNED);
>> + }
>> +}
>> +
>> +static void
>> +add_sample_mult(struct xe_pmu *pmu, unsigned int gt_id, int sample,
>> u32 val, u32 mul)
>> +{
>> + pmu->event_sample[gt_id][sample] += mul_u32_u32(val, mul);
>> +}
>> +
>> +static void
>> +frequency_sample(struct xe_gt *gt, unsigned int period_ns)
>> +{
>> + struct xe_device *xe = gt_to_xe(gt);
>> + struct xe_pmu *pmu = &xe->pmu;
>> + int ret;
>> + u32 cur_freq;
>> +
>> + if (CONFIG_EVENT_ENABLED_GT(pmu->enable,
>> XE_PMU_EVENT_GT_ACTUAL_FREQUENCY, gt->info.id)) {
>> + u32 val;
>> +
>> + /* Actual freq will be 0 when GT is in C6 */
>> + val = xe_guc_pc_get_act_freq(>->uc.guc.pc);
>> +
>> + add_sample_mult(pmu, gt->info.id, __XE_SAMPLE_FREQ_ACT,
>> + val, period_ns / 1000);
>> + }
>> +
>> + if (CONFIG_EVENT_ENABLED_GT(pmu->enable,
>> XE_PMU_EVENT_GT_REQUESTED_FREQUENCY,
>> + gt->info.id)) {
>> + ret = xe_guc_pc_get_cur_freq(>->uc.guc.pc, &cur_freq);
>> + if (!ret)
>> + add_sample_mult(pmu, gt->info.id, __XE_SAMPLE_FREQ_REQ,
>> + cur_freq,
>> + period_ns / 1000);
>> + }
>> +}
>> +
>> +static enum hrtimer_restart xe_sample(struct hrtimer *hrtimer)
>> +{
>> + struct xe_pmu *pmu = container_of(hrtimer, struct xe_pmu, timer);
>> + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
>> + u64 period = max_t(u64, 10000, NSEC_PER_SEC /
>> SAMPLING_TIMER_FREQUENCY_HZ);
>> + unsigned int period_ns;
>> + struct xe_gt *gt;
>> + ktime_t now;
>> + unsigned int i;
>> +
>> + if (!READ_ONCE(pmu->timer_enabled))
>> + return HRTIMER_NORESTART;
>> +
>> + now = ktime_get();
>> + period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
>> + pmu->timer_last = now;
>> +
>> + /*
>> + * The passed in period includes the time needed for grabbing
>> the forcewake.
>> + * However, the potential error from timer callback delay
>> greatly dominates
>> + * this so we keep it simple.
>> + */
>> +
>> + for_each_gt(gt, xe, i)
>> + frequency_sample(gt, period_ns);
>> +
>> + hrtimer_forward(hrtimer, now, ns_to_ktime(period));
>> +
>> + return HRTIMER_RESTART;
>> +}
>> +
>> static void xe_pmu_enable(struct perf_event *event)
>> {
>> + struct xe_pmu *pmu = event_to_pmu(event);
>> + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
>> +
>> + /* Save the event config */
>> + pmu->enable |= event->attr.config;
>
> same concern as above... what happens when you have 2 clients reading
> the same event? You storing here the same bits, then when any of them
> stop, you stop updating the other as well.
Added an enable_count as per the previous implementation.
Thanks,
Vinay.
>
> alternative is to have a linked list of active sampling events and use
> the event->active_entry to track that, like done in
> arch/x86/events/rapl.c.
>
> Lucas De Marchi
>
>> +
>> + /* Start a timer, if needed, to collect samples */
>> + xe_pmu_start_timer(pmu);
>> +
>> /*
>> * Store the current counter value so we can report the correct
>> delta
>> * for all listeners. Even when the event was already enabled and
>> has
>> @@ -176,15 +303,27 @@ static void xe_pmu_event_start(struct
>> perf_event *event, int flags)
>> event->hw.state = 0;
>> }
>>
>> +static void xe_pmu_disable(struct perf_event *event)
>> +{
>> + struct xe_device *xe = container_of(event->pmu, typeof(*xe),
>> pmu.base);
>> + struct xe_pmu *pmu = &xe->pmu;
>> +
>> + pmu->enable &= ~event->attr.config;
>> + pmu->timer_enabled &= pmu_needs_timer(pmu);
>> +}
>> +
>> static void xe_pmu_event_stop(struct perf_event *event, int flags)
>> {
>> struct xe_device *xe = container_of(event->pmu, typeof(*xe),
>> pmu.base);
>> struct xe_pmu *pmu = &xe->pmu;
>>
>> - if (pmu->registered)
>> + if (pmu->registered) {
>> if (flags & PERF_EF_UPDATE)
>> xe_pmu_event_update(event);
>>
>> + xe_pmu_disable(event);
>> + }
>> +
>> event->hw.state = PERF_HES_STOPPED;
>> }
>>
>> @@ -270,6 +409,10 @@ static ssize_t event_attr_show(struct device *dev,
>> XE_EVENT_ATTR_GROUP(v_, id_, &pmu_event_ ##v_.attr.attr)
>>
>> XE_EVENT_ATTR_SIMPLE(gt-c6-residency, gt_c6_residency,
>> XE_PMU_EVENT_GT_C6_RESIDENCY, "ms");
>> +XE_EVENT_ATTR_SIMPLE(gt-actual-frequency, gt_actual_frequency,
>> + XE_PMU_EVENT_GT_ACTUAL_FREQUENCY, "Mhz");
>> +XE_EVENT_ATTR_SIMPLE(gt-requested-frequency, gt_requested_frequency,
>> + XE_PMU_EVENT_GT_REQUESTED_FREQUENCY, "Mhz");
>>
>> static struct attribute *pmu_empty_event_attrs[] = {
>> /* Empty - all events are added as groups with .attr_update() */
>> @@ -283,6 +426,8 @@ static const struct attribute_group
>> pmu_events_attr_group = {
>>
>> static const struct attribute_group *pmu_events_attr_update[] = {
>> &pmu_group_gt_c6_residency,
>> + &pmu_group_gt_actual_frequency,
>> + &pmu_group_gt_requested_frequency,
>> NULL,
>> };
>>
>> @@ -290,8 +435,11 @@ static void set_supported_events(struct xe_pmu
>> *pmu)
>> {
>> struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
>>
>> - if (!xe->info.skip_guc_pc)
>> + if (!xe->info.skip_guc_pc) {
>> pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_C6_RESIDENCY);
>> + pmu->supported_events |=
>> BIT_ULL(XE_PMU_EVENT_GT_ACTUAL_FREQUENCY);
>> + pmu->supported_events |=
>> BIT_ULL(XE_PMU_EVENT_GT_REQUESTED_FREQUENCY);
>> + }
>> }
>>
>> /**
>> @@ -306,6 +454,8 @@ static void xe_pmu_unregister(void *arg)
>> if (!pmu->registered)
>> return;
>>
>> + hrtimer_cancel(&pmu->timer);
>> +
>> pmu->registered = false;
>>
>> perf_pmu_unregister(&pmu->base);
>> @@ -334,6 +484,9 @@ int xe_pmu_register(struct xe_pmu *pmu)
>> if (IS_SRIOV_VF(xe))
>> return 0;
>>
>> + hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>> + pmu->timer.function = xe_sample;
>> +
>> name = kasprintf(GFP_KERNEL, "xe_%s",
>> dev_name(xe->drm.dev));
>> if (!name)
>> diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h
>> b/drivers/gpu/drm/xe/xe_pmu_types.h
>> index f5ba4d56622c..8dc235e1b6d3 100644
>> --- a/drivers/gpu/drm/xe/xe_pmu_types.h
>> +++ b/drivers/gpu/drm/xe/xe_pmu_types.h
>> @@ -9,6 +9,12 @@
>> #include <linux/perf_event.h>
>> #include <linux/spinlock_types.h>
>>
>> +enum {
>> + __XE_SAMPLE_FREQ_ACT,
>> + __XE_SAMPLE_FREQ_REQ,
>> + __XE_NUM_PMU_SAMPLERS
>> +};
>> +
>> #define XE_PMU_MAX_GT 2
>>
>> /**
>> @@ -34,6 +40,26 @@ struct xe_pmu {
>> * @supported_events: Bitmap of supported events, indexed by
>> event id
>> */
>> u64 supported_events;
>> + /**
>> + * @timer: Timer for sampling GT frequency.
>> + */
>> + struct hrtimer timer;
>> + /**
>> + * @timer_last: Timestmap of the previous timer invocation.
>> + */
>> + ktime_t timer_last;
>> + /**
>> + * @timer_enabled: Should the internal sampling timer be running.
>> + */
>> + bool timer_enabled;
>> + /**
>> + * @event_sample: Store freq related counters.
>> + */
>> + u64 event_sample[XE_PMU_MAX_GT][__XE_NUM_PMU_SAMPLERS];
>> + /**
>> + * @enable: Store requested PMU event config.
>> + */
>> + u64 enable;
>> };
>>
>> #endif
>> --
>> 2.38.1
>>
More information about the Intel-xe
mailing list