[PATCH v4 7/8] drm/xe/xe_pmu: Add pmu support for per-function engine activity stats

Lucas De Marchi lucas.demarchi at intel.com
Sat Feb 1 02:53:55 UTC 2025


On Fri, Jan 31, 2025 at 05:21:47PM -0800, Umesh Nerlige Ramappa wrote:
>On Fri, Jan 31, 2025 at 06:23:05PM -0600, Lucas De Marchi wrote:
>>On Wed, Jan 29, 2025 at 03:46:50PM +0530, Riana Tauro wrote:
>>>Add pmu support for per-function per-engine-class engine activity
>>>stats.
>>>
>>>per-function per-engine-class activity is enabled when num_vfs
>>>are set. If num_vfs is set to 2, then the applicable function ids
>>>are
>>>
>>>0 - Global per-engine-class activity
>>>1 - PF per-engine-class activity
>>>2,3 - per-VF per-engine-class activity from PF
>>
>>IMO that's super confusing. The pci function is already part of the
>>pmu name and hence event source:
>>
>>	/sys/bus/event_source/devices/xe_0000_00_02.0
>>					 function --^
>>
>>in a pf/vf scenario, what's the point of the global one?
>
>From a use case perspective, it should be possible to add up the 
>utilizations across the pf/vf to get the global one. We are exposing 
>the global one because
>
>1) We get that info from GuC anyways
>2) Backwards compatible with tools that implement Native only support
>
>I am open to dropping it though since I am not sure how tools will 
>consume this from Xe KMD.
>
>>Also is it monitoring per-vf, all vfs or what? The interface looks
>>confusing.
>
>From a PF, KMD has the capability to monitor (1) Global = PF and all 
>VFs, (2) PF only and (3) specific VF.
>
>w.r.t the interface:
>
>I forget what was decided (if at all) on how to expose the individual 
>PF and VF values. Did we plan on going ahead and creating a separate 
>PMU event source (and PMU device) for each. In that case, this should 
>be:

not for the common case, which is the driver attached to the VF is
actually running inside a VM. Remeber that there's no driver for the
global one - there's only the PF driver and the VF drivers.

If the host needs to monitor the utilization by the VF, then maybe we
can use a bitmask to specify the functions, but I'm not sure.

>
>/sys/bus/event_source/devices/xe_0000_00_02.0 (Global) (should return 
>the same counts on Native as well as SRIOV)
>
>/sys/bus/event_source/devices/xe_pf_0000_00_02.0 (PF monitoring PF)
>/sys/bus/event_source/devices/xe_pf_0000_00_02.1 (PF monitoring VF1)
>/sys/bus/event_source/devices/xe_pf_0000_00_02.2 (PF monitoring VF2)

that won't work... as per above, there's only one pmu driver, unless you
are attaching a vf driver on the same host, which is done only for test
purposes, not for a real case scenario.

>
>If in future a VF will be able to monitor it's own activity, it should 
>be
>
>/sys/bus/event_source/devices/xe_0000_00_02.1 (VF1 monitoring it's own 
>activity)
>
>If we do that, then vf index parameter can be dropped. TBH, I like the 
>current interface where vf index is being passed in config, but open 
>to other ideas.

maybe we only need to clarify the names and the global vs pf
distinction. Let's bring Michal to the discussion and clarify the
usages. I'd prefer to start these counters without the VF part, but in a
way that it can be extended for when we add it.

Lucas De Marchi

>
>Thanks,
>Umesh
>
>
>>
>>
>>>
>>>This can be read from perf tool as shown below
>>>
>>>./perf stat -e xe_<bdf>/engine-active-ticks,gt=0,engine_class=0,
>>>			 engine_instance=0,function=1/ -I 1000
>>>
>>>Signed-off-by: Riana Tauro <riana.tauro at intel.com>
>>>---
>>>drivers/gpu/drm/xe/xe_pmu.c | 45 ++++++++++++++++++++++++++++++-------
>>>1 file changed, 37 insertions(+), 8 deletions(-)
>>>
>>>diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
>>>index 15e9a57aa429..2968fc9a358c 100644
>>>--- a/drivers/gpu/drm/xe/xe_pmu.c
>>>+++ b/drivers/gpu/drm/xe/xe_pmu.c
>>>@@ -12,6 +12,7 @@
>>>#include "xe_hw_engine.h"
>>>#include "xe_pm.h"
>>>#include "xe_pmu.h"
>>>+#include "xe_sriov_pf_helpers.h"
>>>
>>>/**
>>>* DOC: Xe PMU (Performance Monitoring Unit)
>>>@@ -29,15 +30,21 @@
>>>*
>>>*        60        56        52        48        44        40        36        32
>>>* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
>>>- *   [ gt ]
>>>+ *   [ gt ]    [             function              ]
>>>*
>>>*        28        24        20        16        12         8         4         0
>>>* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
>>>*            [   engine_class  ] [ engine_instance ] [         event             ]
>>
>>I usually like ascii art, but I'm not sure this buys us anything
>>compared to the GENMASKs below, particularly because they don't apply to
>>all events and they are **not** set in stone. Userspace is supposed to
>>read from sysfs what are the bits correspond to each parameter.
>>
>>Lucas De Marchi
>>
>>>*
>>>- * engine_class and engine_instance bits will be applicable for
>>>+ * function, engine_class and engine_instance bits will be applicable for
>>>* per-engine-class activity events (engine-active-ticks, engine-total-ticks)
>>>*
>>>+ * Function id applicable for per-engine-class activity
>>>+ *
>>>+ * 0 - global per-engine-class activity
>>>+ * 1 - PF per-engine-class activity
>>>+ * 2 .. (num_vfs + 1) - per-VF per-engine-class activity from PF
>>>+ *
>>>* The standard perf tool can be used to grep for a certain event as well.
>>>* Example:
>>>*
>>>@@ -49,6 +56,7 @@
>>>*/
>>>
>>>#define XE_PMU_EVENT_GT_MASK			GENMASK_ULL(63, 60)
>>>+#define XE_PMU_EVENT_FUNCTION_MASK		GENMASK_ULL(59, 44)
>>>#define XE_PMU_EVENT_ENGINE_CLASS_MASK		GENMASK_ULL(27, 20)
>>>#define XE_PMU_EVENT_ENGINE_INSTANCE_MASK	GENMASK_ULL(19, 12)
>>>#define XE_PMU_EVENT_ID_MASK			GENMASK_ULL(11, 0)
>>>@@ -58,6 +66,11 @@ static unsigned int config_to_event_id(u64 config)
>>>	return FIELD_GET(XE_PMU_EVENT_ID_MASK, config);
>>>}
>>>
>>>+static unsigned int config_to_function_id(u64 config)
>>>+{
>>>+	return FIELD_GET(XE_PMU_EVENT_FUNCTION_MASK, config);
>>>+}
>>>+
>>>static unsigned int config_to_engine_class(u64 config)
>>>{
>>>	return FIELD_GET(XE_PMU_EVENT_ENGINE_CLASS_MASK, config);
>>>@@ -116,7 +129,7 @@ static bool event_supported(struct xe_pmu *pmu, unsigned int gt,
>>>static bool event_param_valid(struct perf_event *event)
>>>{
>>>	struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
>>>-	unsigned int engine_class, engine_instance;
>>>+	unsigned int engine_class, engine_instance, function_id;
>>>	u64 config = event->attr.config;
>>>	struct xe_gt *gt;
>>>
>>>@@ -124,18 +137,28 @@ static bool event_param_valid(struct perf_event *event)
>>>	if (!gt)
>>>		return false;
>>>
>>>+	function_id = config_to_function_id(config);
>>>+	if (function_id && !IS_SRIOV_PF(xe))
>>>+		return false;
>>>+
>>>	engine_class = config_to_engine_class(config);
>>>	engine_instance = config_to_engine_instance(config);
>>>
>>>	switch (config_to_event_id(config)) {
>>>	case XE_PMU_EVENT_GT_C6_RESIDENCY:
>>>-		if (engine_class || engine_instance)
>>>+		if (engine_class || engine_instance || function_id)
>>>			return false;
>>>		break;
>>>	case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS:
>>>	case XE_PMU_EVENT_ENGINE_TOTAL_TICKS:
>>>		if (!event_to_hwe(event))
>>>			return false;
>>>+		/*
>>>+		 * Two additional functions are required for global(0)
>>>+		 * and PF(1) when SRIOV is enabled
>>>+		 */
>>>+		if (function_id > xe_sriov_pf_get_totalvfs(xe) + 1)
>>>+			return false;
>>>		break;
>>>	}
>>>
>>>@@ -194,15 +217,19 @@ static u64 read_engine_events(struct perf_event *event)
>>>{
>>>	struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
>>>	struct xe_hw_engine *hwe;
>>>-	u64 val = 0;
>>>+	unsigned int function_id;
>>>+	u64 val = 0, config;
>>>+
>>>+	config = event->attr.config;
>>>+	function_id = config_to_function_id(config);
>>>
>>>	hwe = event_to_hwe(event);
>>>	if (!hwe)
>>>		drm_warn(&xe->drm, "unknown pmu engine\n");
>>>-	else if (config_to_event_id(event->attr.config) == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS)
>>>-		val = xe_guc_engine_activity_active_ticks(hwe, 0);
>>>+	else if (config_to_event_id(config) == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS)
>>>+		val = xe_guc_engine_activity_active_ticks(hwe, function_id);
>>>	else
>>>-		val = xe_guc_engine_activity_total_ticks(hwe, 0);
>>>+		val = xe_guc_engine_activity_total_ticks(hwe, function_id);
>>>
>>>	return val;
>>>}
>>>@@ -305,6 +332,7 @@ static void xe_pmu_event_del(struct perf_event *event, int flags)
>>>}
>>>
>>>PMU_FORMAT_ATTR(gt,			"config:60-63");
>>>+PMU_FORMAT_ATTR(function,		"config:44-59");
>>>PMU_FORMAT_ATTR(engine_class,		"config:20-27");
>>>PMU_FORMAT_ATTR(engine_instance,	"config:12-19");
>>>PMU_FORMAT_ATTR(event,			"config:0-11");
>>>@@ -313,6 +341,7 @@ static struct attribute *pmu_format_attrs[] = {
>>>	&format_attr_event.attr,
>>>	&format_attr_engine_class.attr,
>>>	&format_attr_engine_instance.attr,
>>>+	&format_attr_function.attr,
>>>	&format_attr_gt.attr,
>>>	NULL,
>>>};
>>>-- 
>>>2.47.1
>>>


More information about the Intel-xe mailing list