[Intel-xe] [PATCH v3 2/2] drm/xe/pmu: Enable PMU interface

Wed Aug 9 07:46:20 UTC 2023

On 09-08-2023 12:58, Dixit, Ashutosh wrote:

Hi Ashutosh,

> On Tue, 08 Aug 2023 04:54:36 -0700, Aravind Iddamsetty wrote:
>>
> 
> Hi Aravind,
> 
> Spotted a few remaining things. See if it's possible to fix these up and
> send another version.
> 
>> diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
>> new file mode 100644
>> index 000000000000..9637f8283641
>> --- /dev/null
>> +++ b/drivers/gpu/drm/xe/xe_pmu.c
>> @@ -0,0 +1,673 @@
>> +// SPDX-License-Identifier: MIT
>> +/*
>> + * Copyright © 2023 Intel Corporation
>> + */
>> +
>> +#include <drm/drm_drv.h>
>> +#include <drm/drm_managed.h>
>> +#include <drm/xe_drm.h>
>> +
>> +#include "regs/xe_gt_regs.h"
>> +#include "xe_device.h"
>> +#include "xe_gt_clock.h"
>> +#include "xe_mmio.h"
>> +
>> +static cpumask_t xe_pmu_cpumask;
>> +static unsigned int xe_pmu_target_cpu = -1;
>> +
>> +static unsigned int config_gt_id(const u64 config)
>> +{
>> +	return config >> __XE_PMU_GT_SHIFT;
>> +}
>> +
>> +static u64 config_counter(const u64 config)
>> +{
>> +	return config & ~(~0ULL << __XE_PMU_GT_SHIFT);
>> +}
>> +
>> +static int engine_busyness_sample_type(u64 config)
>> +{
>> +	int type = 0;
>> +
>> +	switch (config) {
>> +	case XE_PMU_RENDER_GROUP_BUSY(0):
>> +		type =  __XE_SAMPLE_RENDER_GROUP_BUSY;
> 
> One extra space here...
> 
>> +static u64 __engine_group_busyness_read(struct xe_gt *gt, int sample_type)
>> +{
>> +	u64 val = 0;
>> +
> 
> What is the forcewake domain for these registers? Don't we need to get
> forcewake before reading these. Something like:

I'll check on this if its needed
> 
>         XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
> 
>> +	switch (sample_type) {
>> +	case __XE_SAMPLE_RENDER_GROUP_BUSY:
>> +		val = xe_mmio_read32(gt, XE_OAG_RENDER_BUSY_FREE);
>> +		break;
>> +	case __XE_SAMPLE_COPY_GROUP_BUSY:
>> +		val = xe_mmio_read32(gt, XE_OAG_BLT_BUSY_FREE);
>> +		break;
>> +	case __XE_SAMPLE_MEDIA_GROUP_BUSY:
>> +		val = xe_mmio_read32(gt, XE_OAG_ANY_MEDIA_FF_BUSY_FREE);
>> +		break;
>> +	case __XE_SAMPLE_ANY_ENGINE_GROUP_BUSY:
>> +		val = xe_mmio_read32(gt, XE_OAG_RC0_ANY_ENGINE_BUSY_FREE);
>> +		break;
>> +	default:
>> +		drm_warn(&gt->tile->xe->drm, "unknown pmu event\n");
>> +	}
> 
> And similarly here:
> 
>         XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
> 
>> +static u64 engine_group_busyness_read(struct xe_gt *gt, u64 config)
>> +{
>> +	int sample_type = engine_busyness_sample_type(config);
>> +	const unsigned int gt_id = gt->info.id;
>> +	struct xe_device *xe = gt->tile->xe;
>> +	struct xe_pmu *pmu = &xe->pmu;
>> +	unsigned long flags;
>> +	bool device_awake;
>> +	u64 val;
>> +
>> +	device_awake = xe_device_mem_access_get_if_ongoing(xe);
>> +	if (device_awake)
>> +		val = __engine_group_busyness_read(gt, sample_type);
>> +
>> +	spin_lock_irqsave(&pmu->lock, flags);
>> +
>> +	if (device_awake) {
>> +		pmu->sample[gt_id][sample_type] = val;
>> +		xe_device_mem_access_put(xe);
> 
> I think we can add the xe_device_mem_access_put right after
> __engine_group_busyness_read:

yup agree.
> 
> 	if (device_awake) {
> 		val = __engine_group_busyness_read(gt, sample_type);
> 		xe_device_mem_access_put(xe);
> 	}
> 
> 
>> +	} else {
>> +		val = pmu->sample[gt_id][sample_type];
>> +	}
>> +
>> +	spin_unlock_irqrestore(&pmu->lock, flags);
>> +
>> +	return val;
>> +}
>> +
>> +static void engine_group_busyness_store(struct xe_gt *gt)
>> +{
>> +	struct xe_pmu *pmu = &gt->tile->xe->pmu;
>> +	unsigned int gt_id = gt->info.id;
>> +	unsigned long flags;
>> +	u64 val;
>> +	int i;
>> +
>> +	spin_lock_irqsave(&pmu->lock, flags);
>> +
>> +	for (i = __XE_SAMPLE_RENDER_GROUP_BUSY; i <= __XE_SAMPLE_ANY_ENGINE_GROUP_BUSY; i++) {
> 
> Change the termination condition to 'i < __XE_NUM_PMU_SAMPLERS'.

this is specific to engine group busyness, so in future if we have some
other events they might not fall into this routine hence the check.

> 
>> +		val = __engine_group_busyness_read(gt, i);
>> +		pmu->sample[gt_id][i] = val;
> 
> Let's eliminate val and just do:
> 
> 		pmu->sample[gt_id][i] = __engine_group_busyness_read(gt, i);

OK.
> 
>> +static int
>> +config_status(struct xe_device *xe, u64 config)
>> +{
>> +	unsigned int max_gt_id = xe->info.gt_count > 1 ? 1 : 0;
>> +	unsigned int gt_id = config_gt_id(config);
>> +	struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
>> +
>> +	if (gt_id > max_gt_id)
>> +		return -ENOENT;
>> +
>> +	switch (config_counter(config)) {
>> +	case XE_PMU_INTERRUPTS(0):
>> +		if (gt_id)
>> +			return -ENOENT;
>> +		break;
>> +	case XE_PMU_RENDER_GROUP_BUSY(0):
>> +	case XE_PMU_COPY_GROUP_BUSY(0):
>> +	case XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
>> +		break;
>> +	case XE_PMU_MEDIA_GROUP_BUSY(0):
>> +		if (!(gt->info.engine_mask & (BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0))))
> 
> Why change this? "if (gt->info.type != XE_GT_TYPE_MEDIA)" doesn't work?

Yes the previous is not sufficient, as we can have media engines fused
off like in PVC and also because the previous check is more applicable
to MTL+ and the current one applies to all platforms.

> 
>> +void xe_pmu_register(struct xe_pmu *pmu)
>> +{
>> +	struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
>> +	const struct attribute_group *attr_groups[] = {
>> +		&pmu->events_attr_group,
>> +		&xe_pmu_cpumask_attr_group,
>> +		NULL
>> +	};
>> +
>> +	int ret = -ENOMEM;
>> +
>> +	spin_lock_init(&pmu->lock);
>> +	pmu->cpuhp.cpu = -1;
>> +
>> +	pmu->name = kasprintf(GFP_KERNEL,
>> +			      "xe_%s",
>> +			      dev_name(xe->drm.dev));
>> +	if (pmu->name)
>> +		/* tools/perf reserves colons as special. */
>> +		strreplace((char *)pmu->name, ':', '_');
>> +
>> +	if (!pmu->name)
>> +		goto err;
>> +
>> +	pmu->events_attr_group.name = "events";
>> +	pmu->events_attr_group.attrs = create_event_attributes(pmu);
>> +	if (!pmu->events_attr_group.attrs)
>> +		goto err_name;
>> +
>> +	pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
>> +					GFP_KERNEL);
>> +	if (!pmu->base.attr_groups)
>> +		goto err_attr;
>> +
>> +	pmu->base.module	= THIS_MODULE;
>> +	pmu->base.task_ctx_nr	= perf_invalid_context;
>> +	pmu->base.event_init	= xe_pmu_event_init;
>> +	pmu->base.add		= xe_pmu_event_add;
>> +	pmu->base.del		= xe_pmu_event_del;
>> +	pmu->base.start		= xe_pmu_event_start;
>> +	pmu->base.stop		= xe_pmu_event_stop;
>> +	pmu->base.read		= xe_pmu_event_read;
>> +	pmu->base.event_idx	= xe_pmu_event_event_idx;
>> +
>> +	ret = perf_pmu_register(&pmu->base, pmu->name, -1);
>> +	if (ret)
>> +		goto err_groups;
>> +
>> +	ret = xe_pmu_register_cpuhp_state(pmu);
>> +	if (ret)
>> +		goto err_unreg;
>> +
>> +	ret = drmm_add_action_or_reset(&xe->drm, xe_pmu_unregister, pmu);
>> +	if (!ret)
>> +		return;
> 
> The error unwinding is not correct here, should be something like:
> 
> 	ret = drmm_add_action_or_reset(&xe->drm, xe_pmu_unregister, pmu);
> 	if (ret)
> 		goto err_cpuhp;

is it not same as above , on success the ret is zero, so if is true and
return or else will go to through the destroy path.

> 
> 	return;
> 
> err_cpuhp:
> 	xe_pmu_unregister_cpuhp_state(pmu);
>> +err_unreg:
>> +	perf_pmu_unregister(&pmu->base);
>> +err_groups:
>> +	kfree(pmu->base.attr_groups);
>> +err_attr:
>> +	pmu->base.event_init = NULL;
>> +	free_event_attributes(pmu);
>> +err_name:
>> +	kfree(pmu->name);
>> +err:
>> +	drm_notice(&xe->drm, "Failed to register PMU!\n");
>> +}
> 
>> diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/xe_pmu_types.h
>> new file mode 100644
>> index 000000000000..a950c892e364
>> --- /dev/null
>> +++ b/drivers/gpu/drm/xe/xe_pmu_types.h
>> @@ -0,0 +1,76 @@
>> +/* SPDX-License-Identifier: MIT */
>> +/*
>> + * Copyright © 2023 Intel Corporation
>> + */
>> +
>> +#ifndef _XE_PMU_TYPES_H_
>> +#define _XE_PMU_TYPES_H_
>> +
>> +#include <linux/perf_event.h>
>> +#include <linux/spinlock_types.h>
>> +#include <uapi/drm/xe_drm.h>
>> +
>> +enum {
>> +	__XE_SAMPLE_RENDER_GROUP_BUSY,
>> +	__XE_SAMPLE_COPY_GROUP_BUSY,
>> +	__XE_SAMPLE_MEDIA_GROUP_BUSY,
>> +	__XE_SAMPLE_ANY_ENGINE_GROUP_BUSY,
>> +	__XE_NUM_PMU_SAMPLERS
>> +};
>> +
>> +#define XE_MAX_GT_PER_TILE 2
>> +
>> +struct xe_pmu {
>> +	/**
>> +	 * @cpuhp: Struct used for CPU hotplug handling.
>> +	 */
>> +	struct {
>> +		struct hlist_node node;
>> +		unsigned int cpu;
>> +	} cpuhp;
>> +	/**
>> +	 * @base: PMU base.
>> +	 */
>> +	struct pmu base;
>> +	/**
>> +	 * @closed: xe is unregistering.
>> +	 */
>> +	bool closed;
>> +	/**
>> +	 * @name: Name as registered with perf core.
>> +	 */
>> +	const char *name;
>> +	/**
>> +	 * @lock: Lock protecting enable mask and ref count handling.
>> +	 */
>> +	spinlock_t lock;
>> +	/**
>> +	 * @sample: Current and previous (raw) counters.
>> +	 *
>> +	 * These counters are updated when the device is awake.
>> +	 *
>> +	 */
>> +	u64 sample[XE_MAX_GT_PER_TILE][__XE_NUM_PMU_SAMPLERS];
> 
> s/XE_MAX_GT_PER_TILE/XE_MAX_GT/ since the PMU is for the entire device not
> per tile, as I mentioned earlier.

right, so for a device this shall be sample[XE_MAX_TILES_PER_DEVICE *
XE_MAX_GT_PER_TILE][__XE_NUM_PMU_SAMPLERS]

Thanks,
Aravind.

> 
> Thanks.
> --
> Ashutosh