[PATCH 2/3] drm/xe/pmu: Add GT C6 events

Rodrigo Vivi rodrigo.vivi at intel.com
Fri Sep 27 18:27:37 UTC 2024


On Thu, Sep 26, 2024 at 05:23:43PM -0700, Vinay Belgaumkar wrote:
> Provide a PMU interface for GT C6 residency counters. The implementation
> is ported over from the i915 PMU code. Residency is provided in units of
> ms(similar to sysfs entry - /sys/class/drm/card0/device/tile0/gt0/gtidle).
> 
> Following PMU events are being added-
> 
> >> perf list | grep rc6
> 
>   xe_0000_00_02.0/rc6-residency-gt0/                 [Kernel PMU event]
>   xe_0000_00_02.0/rc6-residency-gt1/                 [Kernel PMU event]
> 
> v2: Checkpatch fix, move timer code to next patch
> v3: Fix kunit issue
> 
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_gt.c        |   2 +
>  drivers/gpu/drm/xe/xe_gt_idle.c   |  20 ++--
>  drivers/gpu/drm/xe/xe_gt_idle.h   |   1 +
>  drivers/gpu/drm/xe/xe_pmu.c       | 172 ++++++++++++++++++++++++++++++
>  drivers/gpu/drm/xe/xe_pmu.h       |   2 +
>  drivers/gpu/drm/xe/xe_pmu_types.h |  58 ++++++++++
>  include/uapi/drm/xe_drm.h         |   4 +
>  7 files changed, 253 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index 9b0218109647..7a190c49c573 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -872,6 +872,8 @@ int xe_gt_suspend(struct xe_gt *gt)
>  
>  	xe_gt_idle_disable_pg(gt);
>  
> +	xe_pmu_suspend(gt);
> +
>  	xe_gt_disable_host_l2_vram(gt);
>  
>  	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
> diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
> index 531924b6c0a1..e0a12ac7387c 100644
> --- a/drivers/gpu/drm/xe/xe_gt_idle.c
> +++ b/drivers/gpu/drm/xe/xe_gt_idle.c
> @@ -277,18 +277,26 @@ static ssize_t idle_status_show(struct device *dev,
>  }
>  static DEVICE_ATTR_RO(idle_status);
>  
> -static ssize_t idle_residency_ms_show(struct device *dev,
> -				      struct device_attribute *attr, char *buff)
> +u64 xe_gt_idle_residency(struct xe_gt *gt)
>  {
> -	struct xe_gt_idle *gtidle = dev_to_gtidle(dev);
> +	struct xe_device *xe = gt_to_xe(gt);
> +	struct xe_gt_idle *gtidle = &gt->gtidle;
>  	struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
>  	u64 residency;
>  
> -	xe_pm_runtime_get(pc_to_xe(pc));
> +	xe_pm_runtime_get(xe);
>  	residency = gtidle->idle_residency(pc);
> -	xe_pm_runtime_put(pc_to_xe(pc));
> +	xe_pm_runtime_put(xe);
> +
> +	return get_residency_ms(gtidle, residency);
> +}
> +
> +static ssize_t idle_residency_ms_show(struct device *dev,
> +				      struct device_attribute *attr, char *buff)
> +{
> +	struct xe_gt_idle *gtidle = dev_to_gtidle(dev);
>  
> -	return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency));
> +	return sysfs_emit(buff, "%llu\n", xe_gt_idle_residency(gtidle_to_gt(gtidle)));
>  }
>  static DEVICE_ATTR_RO(idle_residency_ms);
>  
> diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h
> index 4455a6501cb0..887791f653ac 100644
> --- a/drivers/gpu/drm/xe/xe_gt_idle.h
> +++ b/drivers/gpu/drm/xe/xe_gt_idle.h
> @@ -17,5 +17,6 @@ void xe_gt_idle_disable_c6(struct xe_gt *gt);
>  void xe_gt_idle_enable_pg(struct xe_gt *gt);
>  void xe_gt_idle_disable_pg(struct xe_gt *gt);
>  int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p);
> +u64 xe_gt_idle_residency(struct xe_gt *gt);
>  
>  #endif /* _XE_GT_IDLE_H_ */
> diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
> index bdaea9ca1065..b1b38d245e00 100644
> --- a/drivers/gpu/drm/xe/xe_pmu.c
> +++ b/drivers/gpu/drm/xe/xe_pmu.c
> @@ -11,10 +11,15 @@
>  #include "xe_device.h"
>  #include "xe_force_wake.h"
>  #include "xe_gt_clock.h"
> +#include "xe_gt_idle.h"
> +#include "xe_guc_pc.h"
>  #include "xe_mmio.h"
>  #include "xe_macros.h"
> +#include "xe_module.h"
>  #include "xe_pm.h"
>  
> +#define FREQUENCY 200
> +
>  static cpumask_t xe_pmu_cpumask;
>  static unsigned int xe_pmu_target_cpu = -1;
>  
> @@ -35,6 +40,11 @@ static unsigned int xe_pmu_target_cpu = -1;
>   *
>   */
>  
> +static struct xe_pmu *event_to_pmu(struct perf_event *event)
> +{
> +	return container_of(event->pmu, struct xe_pmu, base);
> +}
> +
>  static unsigned int config_gt_id(const u64 config)
>  {
>  	return config >> __XE_PMU_GT_SHIFT;
> @@ -45,6 +55,35 @@ static u64 config_counter(const u64 config)
>  	return config & ~(~0ULL << __XE_PMU_GT_SHIFT);
>  }
>  
> +static unsigned int other_bit(const u64 config)
> +{
> +	unsigned int val;
> +
> +	switch (config_counter(config)) {
> +	case XE_PMU_RC6_RESIDENCY:
> +		val = __XE_PMU_RC6_RESIDENCY_ENABLED;
> +		break;
> +	default:
> +		/*
> +		 * Events that do not require sampling, or tracking state
> +		 * transitions between enabled and disabled can be ignored.
> +		 */
> +		return -1;
> +	}
> +
> +	return config_gt_id(config) * __XE_PMU_TRACKED_EVENT_COUNT + val;
> +}
> +
> +static unsigned int config_bit(const u64 config)
> +{
> +	return other_bit(config);
> +}
> +
> +static unsigned int event_bit(struct perf_event *event)
> +{
> +	return config_bit(event->attr.config);
> +}
> +
>  static void xe_pmu_event_destroy(struct perf_event *event)
>  {
>  	struct xe_device *xe =
> @@ -64,6 +103,10 @@ config_status(struct xe_device *xe, u64 config)
>  		return -ENOENT;
>  
>  	switch (config_counter(config)) {
> +	case XE_PMU_RC6_RESIDENCY:
> +		if (xe->info.skip_guc_pc)
> +			return -ENODEV;
> +		break;
>  	default:
>  		return -ENOENT;
>  	}
> @@ -110,6 +153,63 @@ static int xe_pmu_event_init(struct perf_event *event)
>  	return 0;
>  }
>  
> +static inline s64 ktime_since_raw(const ktime_t kt)
> +{
> +	return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
> +}
> +
> +static u64 read_sample(struct xe_pmu *pmu, unsigned int gt_id, int sample)
> +{
> +	return pmu->event_sample[gt_id][sample].cur;
> +}
> +
> +static void
> +store_sample(struct xe_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> +{
> +	pmu->event_sample[gt_id][sample].cur = val;
> +}
> +
> +static u64 get_rc6(struct xe_gt *gt)
> +{
> +	struct xe_device *xe = gt_to_xe(gt);
> +	const unsigned int gt_id = gt->info.id;
> +	struct xe_pmu *pmu = &xe->pmu;
> +	bool device_awake;
> +	unsigned long flags;
> +	u64 val;
> +
> +	device_awake = xe_pm_runtime_get_if_active(xe);
> +	if (device_awake) {
> +		val = xe_gt_idle_residency(gt);
> +		xe_pm_runtime_put(xe);
> +	}
> +
> +	spin_lock_irqsave(&pmu->lock, flags);
> +
> +	if (device_awake) {
> +		store_sample(pmu, gt_id, __XE_SAMPLE_RC6, val);
> +	} else {
> +		/*
> +		 * We think we are runtime suspended.
> +		 *
> +		 * Report the delta from when the device was suspended to now,
> +		 * on top of the last known real value, as the approximated RC6
> +		 * counter value.
> +		 */
> +		val = ktime_since_raw(pmu->sleep_last[gt_id]);
> +		val += read_sample(pmu, gt_id, __XE_SAMPLE_RC6);
> +	}
> +
> +	if (val < read_sample(pmu, gt_id, __XE_SAMPLE_RC6_LAST_REPORTED))
> +		val = read_sample(pmu, gt_id, __XE_SAMPLE_RC6_LAST_REPORTED);
> +	else
> +		store_sample(pmu, gt_id, __XE_SAMPLE_RC6_LAST_REPORTED, val);
> +
> +	spin_unlock_irqrestore(&pmu->lock, flags);
> +
> +	return val;
> +}
> +
>  static u64 __xe_pmu_event_read(struct perf_event *event)
>  {
>  	struct xe_device *xe =
> @@ -120,6 +220,9 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
>  	u64 val = 0;
>  
>  	switch (config_counter(config)) {
> +	case XE_PMU_RC6_RESIDENCY:
> +		val = get_rc6(gt);
> +		break;
>  	default:
>  		drm_warn(&gt->tile->xe->drm, "unknown pmu event\n");
>  	}
> @@ -151,6 +254,28 @@ static void xe_pmu_event_read(struct perf_event *event)
>  
>  static void xe_pmu_enable(struct perf_event *event)
>  {
> +	struct xe_pmu *pmu = event_to_pmu(event);
> +	const unsigned int bit = event_bit(event);
> +	unsigned long flags;
> +
> +	if (bit == -1)
> +		goto update;
> +
> +	spin_lock_irqsave(&pmu->lock, flags);

lockdep didn't like it:

[ 8969.701191] =============================
[ 8969.705238] [ BUG: Invalid wait context ]
[ 8969.709277] 6.11.0+ #44 Tainted: G     U     OE     
[ 8969.714289] -----------------------------
[ 8969.718326] perf/10438 is trying to lock:
[ 8969.722371] ffff88827ead2d50 (&pmu->lock){....}-{3:3}, at: xe_pmu_enable+0xdb/0x340 [xe]
[ 8969.731153] other info that might help us debug this:
[ 8969.736254] context-{5:5}
[ 8969.738912] 3 locks held by perf/10438:
[ 8969.742785]  #0: ffff888f73a11f98 (&cpuctx_mutex){+.+.}-{4:4}, at: perf_event_ctx_lock_nested+0x18b/0x340
[ 8969.752448]  #1: ffff888100fb6d98 (&event->child_mutex){+.+.}-{4:4}, at: perf_event_for_each_child+0x7d/0x140
[ 8969.762456]  #2: ffff888f73a11ef8 (&cpuctx_lock){....}-{2:2}, at: event_function+0x10f/0x460
[ 8969.770975] stack backtrace:
[ 8969.773882] CPU: 0 UID: 0 PID: 10438 Comm: perf Tainted: G     U     OE      6.11.0+ #44
[ 8969.782037] Tainted: [U]=USER, [O]=OOT_MODULE, [E]=UNSIGNED_MODULE
[ 8969.788260] Hardware name: iBUYPOWER INTEL/B660 DS3H AC DDR4-Y1, BIOS F5 12/17/2021
[ 8969.795976] Call Trace:
[ 8969.798453]  <TASK>
[ 8969.800581]  dump_stack_lvl+0x8f/0xe0
[ 8969.804290]  __lock_acquire+0x159a/0x6270
[ 8969.808357]  ? __pfx___lock_acquire+0x10/0x10
[ 8969.812763]  lock_acquire+0x19e/0x500
[ 8969.816469]  ? xe_pmu_enable+0xdb/0x340 [xe]


> +
> +	/*
> +	 * Update the bitmask of enabled events and increment
> +	 * the event reference counter.
> +	 */
> +	BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != XE_PMU_MASK_BITS);
> +	XE_WARN_ON(bit >= ARRAY_SIZE(pmu->enable_count));
> +	XE_WARN_ON(pmu->enable_count[bit] == ~0);
> +
> +	pmu->enable |= BIT(bit);
> +	pmu->enable_count[bit]++;
> +
> +	spin_unlock_irqrestore(&pmu->lock, flags);
> +update:
>  	/*
>  	 * Store the current counter value so we can report the correct delta
>  	 * for all listeners. Even when the event was already enabled and has
> @@ -277,6 +402,7 @@ create_event_attributes(struct xe_pmu *pmu)
>  		const char *name;
>  		const char *unit;
>  	} events[] = {
> +		__event(0, "rc6-residency", "ms"),
>  	};
>  
>  	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
> @@ -465,6 +591,32 @@ static void xe_pmu_unregister_cpuhp_state(struct xe_pmu *pmu)
>  	cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
>  }
>  
> +static void store_rc6_residency(struct xe_gt *gt)
> +{
> +	struct xe_device *xe = gt_to_xe(gt);
> +	struct xe_pmu *pmu = &xe->pmu;
> +
> +	store_sample(pmu, gt->info.id, __XE_SAMPLE_RC6,
> +		     xe_gt_idle_residency(gt));
> +	pmu->sleep_last[gt->info.id] = ktime_get_raw();
> +}
> +
> +/**
> + * xe_pmu_suspend() - Save residency count before suspend
> + */
> +void xe_pmu_suspend(struct xe_gt *gt)
> +{
> +	struct xe_device *xe = gt_to_xe(gt);
> +	struct xe_pmu *pmu = &xe->pmu;
> +
> +	if (!pmu->base.event_init)
> +		return;
> +
> +	spin_lock_irq(&pmu->lock);
> +	store_rc6_residency(gt);
> +	spin_unlock_irq(&pmu->lock);
> +}
> +
>  /**
>   * xe_pmu_unregister() - Remove/cleanup PMU registration
>   */
> @@ -492,6 +644,24 @@ void xe_pmu_unregister(void *arg)
>  	free_event_attributes(pmu);
>  }
>  
> +static void init_rc6(struct xe_pmu *pmu)
> +{
> +	struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
> +	struct xe_gt *gt;
> +	unsigned int j;
> +
> +	for_each_gt(gt, xe, j) {
> +		xe_pm_runtime_get(xe);
> +		u64 val = xe_gt_idle_residency(gt);
> +
> +		store_sample(pmu, j, __XE_SAMPLE_RC6, val);
> +		store_sample(pmu, j, __XE_SAMPLE_RC6_LAST_REPORTED,
> +			     val);
> +		pmu->sleep_last[j] = ktime_get_raw();
> +		xe_pm_runtime_put(xe);
> +	}
> +}
> +
>  /**
>   * xe_pmu_register() - Define basic PMU properties for Xe and add event callbacks.
>   *
> @@ -525,6 +695,8 @@ void xe_pmu_register(struct xe_pmu *pmu)
>  	if (!pmu->events_attr_group.attrs)
>  		goto err_name;
>  
> +	init_rc6(pmu);
> +
>  	pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
>  					GFP_KERNEL);
>  	if (!pmu->base.attr_groups)
> diff --git a/drivers/gpu/drm/xe/xe_pmu.h b/drivers/gpu/drm/xe/xe_pmu.h
> index d07e5dfdfec0..17f5a8d7d45c 100644
> --- a/drivers/gpu/drm/xe/xe_pmu.h
> +++ b/drivers/gpu/drm/xe/xe_pmu.h
> @@ -15,11 +15,13 @@ int xe_pmu_init(void);
>  void xe_pmu_exit(void);
>  void xe_pmu_register(struct xe_pmu *pmu);
>  void xe_pmu_unregister(void *arg);
> +void xe_pmu_suspend(struct xe_gt *gt);
>  #else
>  static inline int xe_pmu_init(void) { return 0; }
>  static inline void xe_pmu_exit(void) {}
>  static inline void xe_pmu_register(struct xe_pmu *pmu) {}
>  static inline void xe_pmu_unregister(void *arg) {}
> +static inline void xe_pmu_suspend(struct xe_gt *gt) {}
>  #endif
>  
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/xe_pmu_types.h
> index ca0e7cbe2081..1213d2a73492 100644
> --- a/drivers/gpu/drm/xe/xe_pmu_types.h
> +++ b/drivers/gpu/drm/xe/xe_pmu_types.h
> @@ -11,11 +11,34 @@
>  #include <uapi/drm/xe_drm.h>
>  
>  enum {
> +	__XE_SAMPLE_RC6,
> +	__XE_SAMPLE_RC6_LAST_REPORTED,
>  	__XE_NUM_PMU_SAMPLERS
>  };
>  
>  #define XE_PMU_MAX_GT 2
>  
> +/*
> + * Non-engine events that we need to track enabled-disabled transition and
> + * current state.
> + */
> +enum xe_pmu_tracked_events {
> +	__XE_PMU_RC6_RESIDENCY_ENABLED,
> +	__XE_PMU_TRACKED_EVENT_COUNT, /* count marker */
> +};
> +
> +/*
> + * How many different events we track in the global PMU mask.
> + *
> + * It is also used to know to needed number of event reference counters.
> + */
> +#define XE_PMU_MASK_BITS \
> +	(XE_PMU_MAX_GT * __XE_PMU_TRACKED_EVENT_COUNT)
> +
> +struct xe_pmu_sample {
> +	u64 cur;
> +};
> +
>  struct xe_pmu {
>  	/**
>  	 * @cpuhp: Struct used for CPU hotplug handling.
> @@ -58,6 +81,41 @@ struct xe_pmu {
>  	 * @pmu_attr: Memory block holding device attributes.
>  	 */
>  	void *pmu_attr;
> +
> +	/**
> +	 * @enable: Bitmask of specific enabled events.
> +	 *
> +	 * For some events we need to track their state and do some internal
> +	 * house keeping.
> +	 *
> +	 * Each engine event sampler type and event listed in enum
> +	 * i915_pmu_tracked_events gets a bit in this field.
> +	 *
> +	 * Low bits are engine samplers and other events continue from there.
> +	 */
> +	u32 enable;
> +
> +	/**
> +	 * @enable_count: Reference counts for the enabled events.
> +	 *
> +	 * Array indices are mapped in the same way as bits in the @enable field
> +	 * and they are used to control sampling on/off when multiple clients
> +	 * are using the PMU API.
> +	 */
> +	unsigned int enable_count[XE_PMU_MASK_BITS];
> +	/**
> +	 * @sample: Current and previous (raw) counters for sampling events.
> +	 *
> +	 * These counters are updated from the i915 PMU sampling timer.
> +	 *
> +	 * Only global counters are held here, while the per-engine ones are in
> +	 * struct intel_engine_cs.
> +	 */
> +	struct xe_pmu_sample event_sample[XE_PMU_MAX_GT][__XE_NUM_PMU_SAMPLERS];
> +	/**
> +	 * @sleep_last: Last time GT parked for RC6 estimation.
> +	 */
> +	ktime_t sleep_last[XE_PMU_MAX_GT];
>  };
>  
>  #endif
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index 2c5f258eee3a..3ef3926551dd 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -1396,6 +1396,10 @@ struct drm_xe_wait_user_fence {
>  
>  #define ___XE_PMU_OTHER(gt, x) \
>  	(((__u64)(x)) | ((__u64)(gt) << __XE_PMU_GT_SHIFT))
> +#define __XE_PMU_OTHER(x) ___XE_PMU_OTHER(0, x)
> +
> +#define XE_PMU_RC6_RESIDENCY                    __XE_PMU_OTHER(0)
> +#define __XE_PMU_RC6_RESIDENCY(gt)              ___XE_PMU_OTHER(gt, 0)
>  
>  /**
>   * enum drm_xe_observation_type - Observation stream types
> -- 
> 2.38.1
> 


More information about the Intel-xe mailing list