[PATCH v7 5/5] drm/xe/xe_pmu: Acquire forcewake on event init for engine events

Lucas De Marchi lucas.demarchi at intel.com
Fri Feb 21 06:26:56 UTC 2025


On Thu, Feb 20, 2025 at 10:45:46PM -0600, Lucas De Marchi wrote:
>On Thu, Feb 20, 2025 at 05:14:07PM -0800, Umesh Nerlige Ramappa wrote:
>>On Thu, Feb 20, 2025 at 03:46:55PM -0600, Lucas De Marchi wrote:
>>>On Fri, Feb 14, 2025 at 03:38:13PM +0530, Riana Tauro wrote:
>>>>When the engine events are created, acquire GT forcewake to read gpm
>>>>timestamp required for the events and release on event destroy. This
>>>>cannot be done during read due to the raw spinlock held my pmu.
>>>>
>>>>v2: remove forcewake counting (Umesh)
>>>>v3: remove extra space (Umesh)
>>>>
>>>>Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
>>>>Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
>>>>Signed-off-by: Riana Tauro <riana.tauro at intel.com>
>>>>Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>>>>---
>>>>drivers/gpu/drm/xe/xe_pmu.c       | 52 +++++++++++++++++++++++++++++--
>>>>drivers/gpu/drm/xe/xe_pmu_types.h |  4 +++
>>>>2 files changed, 54 insertions(+), 2 deletions(-)
>>>>
>>>>diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
>>>>index dc89fa6d0ec5..67693d642f5a 100644
>>>>--- a/drivers/gpu/drm/xe/xe_pmu.c
>>>>+++ b/drivers/gpu/drm/xe/xe_pmu.c
>>>>@@ -7,6 +7,7 @@
>>>>#include <linux/device.h>
>>>>
>>>>#include "xe_device.h"
>>>>+#include "xe_force_wake.h"
>>>>#include "xe_gt_idle.h"
>>>>#include "xe_guc_engine_activity.h"
>>>>#include "xe_hw_engine.h"
>>>>@@ -102,6 +103,37 @@ static struct xe_hw_engine *event_to_hwe(struct perf_event *event)
>>>>	return hwe;
>>>>}
>>>>
>>>>+static bool is_engine_event(u64 config)
>>>>+{
>>>>+	unsigned int event_id = config_to_event_id(config);
>>>>+
>>>>+	return (event_id == XE_PMU_EVENT_ENGINE_TOTAL_TICKS ||
>>>>+		event_id == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS);
>>>>+}
>>>>+
>>>>+static bool event_gt_forcewake(struct perf_event *event)
>>>>+{
>>>>+	struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
>>>>+	u64 config = event->attr.config;
>>>>+	struct xe_pmu *pmu = &xe->pmu;
>>>>+	struct xe_gt *gt;
>>>>+	unsigned int fw_ref;
>>>>+
>>>>+	if (!is_engine_event(config))
>>>>+		return true;
>>>>+
>>>>+	gt = xe_device_get_gt(xe, config_to_gt_id(config));
>>>>+
>>>>+	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
>>>>+	if (!fw_ref)
>>>>+		return false;
>>>>+
>>>>+	if (!pmu->fw_ref)
>>>>+		pmu->fw_ref = fw_ref;
>>>
>>>how this shared fw_ref is supposed to work for multiple
>>>perf_event_open()?
>>
>>Agree, not ideal, but I don't see an issue. This forcewake is only 
>>being taken for engine-* events and the domain is always XE_FW_GT. 
>>Looking at xe_force_wake_get(), I see that it returns a mask of 
>>domains enabled. In this case, it would be the XE_FW_GT. The return 
>>value is just stored so that the corresponding event destroy can put 
>>the forcewake.
>>
>>>
>>>fd1 = perf_event_open( ... gt=0 ...);
>>>
>>>	event_get_forcewake()
>>>		pmu->fw_ref = xe_force_wake_get()
>>>
>>>fd2 = perf_event_open( ... gt=1 ...);
>>>
>>>	event_get_forcewake()
>>>		// get the forcewake, but don't save the ref
>>>
>>>forcewake for gt1 is never put.
>>
>>pmu->fw_ref should be identical for all events taking this forcewake.
>>
>>>
>>>
>>>Or even multiple perf_event_open() for the same gt: we will not handle
>>>the count correctly.
>>
>>The count is actually handled in domain->ref in the forcewake 
>>implementation and note that forcewake is always taken for every 
>>engine event that is being initialized and hence always being put 
>>for every event that is destroyed. This code is not refcounting 
>>that.
>
>so... we never set pmu->fw_ref back to 0 and any event destroy will try to
>put the force wake? That seems a different bug that avoids the bug I
>was thinking about.

try calling perf stat multiple times and you end up with this:

cat  /sys/kernel/debug/dri/0/info | grep "force wake"
gt0 force wake -1
gt1 force wake 0
cat  /sys/kernel/debug/dri/0/info | grep "force wake"
gt0 force wake -2
gt1 force wake 0

and

[ 1614.778054] RIP: 0010:xe_force_wake_put+0xab6/0x1430 [xe]
[ 1614.778370] Code: ff 41 53 8b 85 f8 fe ff ff 50 ff b5 f0 fe ff ff 44 8b 8d e8 fe ff ff 4c 8b 85 d8 fe ff ff 48 8b 95 d0 fe ff ff e8 4a c3 27 df <0f> 0b 4c 89 e2 48 83 c4 60 48 b8 00 00 00 00 00 fc ff df 48 c1
  ea
[ 1614.778381] RSP: 0018:ffff88812cb2f7d0 EFLAGS: 00010046
[ 1614.778395] RAX: 0000000000000000 RBX: ffff888138b000d0 RCX: 0000000000000000
[ 1614.778405] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[ 1614.778413] RBP: ffff88812cb2f968 R08: 0000000000000000 R09: 0000000000000000
[ 1614.778421] R10: 0000000000000000 R11: 0000000000000000 R12: ffff888138b000e4
[ 1614.778429] R13: 0000000000000001 R14: ffff888138b000d0 R15: 00000000ffffffff
[ 1614.778438] FS:  00007a9b757acd40(0000) GS:ffff8883f0e00000(0000) knlGS:0000000000000000
[ 1614.778447] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1614.778456] CR2: 00007fcac269d5e0 CR3: 000000012a8ee003 CR4: 0000000000f72ef0
[ 1614.778464] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1614.778472] DR3: 0000000000000000 DR6: 00000000ffff07f0 DR7: 0000000000000400
[ 1614.778480] PKRU: 55555554
[ 1614.778488] Call Trace:
[ 1614.778496]  <TASK>
[ 1614.778506]  ? show_regs+0x6c/0x80
[ 1614.778528]  ? __warn+0xd2/0x2e0
[ 1614.778545]  ? xe_force_wake_put+0xab6/0x1430 [xe]
[ 1614.778867]  ? report_bug+0x282/0x2f0
[ 1614.778892]  ? handle_bug+0x6e/0xc0
[ 1614.778906]  ? exc_invalid_op+0x18/0x50
[ 1614.778919]  ? asm_exc_invalid_op+0x1b/0x20
[ 1614.778953]  ? xe_force_wake_put+0xab6/0x1430 [xe]
[ 1614.779318]  ? __pfx_xe_force_wake_put+0x10/0x10 [xe]
[ 1614.779650]  xe_pmu_event_destroy+0x143/0x240 [xe]


>
>>
>>>
>>>In summary I think this fw ref needs to be per event... an easy way to do
>>>that is to use the event->pmu_private field, to be populated on init...
>>
>>I am not opposed to that since that makes it future proof so that we 
>>can indeed have events taking different forcewake domains, but let 
>>me know if I missed something here since I think this alone should 
>>still work.
>>
>>>
>>>>+
>>>>+	return true;
>>>>+}
>>>>+
>>>>static bool event_supported(struct xe_pmu *pmu, unsigned int gt,
>>>>			    unsigned int id)
>>>>{
>>>>@@ -144,6 +176,13 @@ static bool event_param_valid(struct perf_event *event)
>>>>static void xe_pmu_event_destroy(struct perf_event *event)
>>>>{
>>>>	struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
>>>>+	struct xe_pmu *pmu = &xe->pmu;
>>>>+	struct xe_gt *gt;
>>>>+
>>>>+	if (pmu->fw_ref) {
>>>>+		gt = xe_device_get_gt(xe, config_to_gt_id(event->attr.config));
>>>>+		xe_force_wake_put(gt_to_fw(gt), pmu->fw_ref);
>>>>+	}
>>>>
>>>>	drm_WARN_ON(&xe->drm, event->parent);
>>>>	xe_pm_runtime_put(xe);
>>>>@@ -183,18 +222,27 @@ static int xe_pmu_event_init(struct perf_event *event)
>>>>	if (!event->parent) {
>>>>		drm_dev_get(&xe->drm);
>>>>		xe_pm_runtime_get(xe);
>>>>+		if (!event_gt_forcewake(event)) {
>>>
>>>if you group an engine vs non-engine counter, this won't work I think.
>>>Can we test it?
>>
>>When you group events, init is called for each event. From the Xe 
>>PMU implementation perspective, grouping shouldn't be any different.
>
>the event->parent is a shortcut to do this only once per group, but then you
>can't look inside the event to decide if you need to take the forcewake or
>not because it may be true for one event and false for another. Try
>creating the events with the group leader being a non-engine event.
>AFAICS the forcewake will not be taken.... something like (typed here
>without testing):
>
>perf stat  -I1000 -e '{xe_0000_00_02.0/gt-c6-residency/,xe_0000_00_02.0/engine-active-ticks/}'

it seems like I mixed up here. Neither of these will have parent
set - afaics that would be the case for events inherited from one task
to another which is not the case here.

Lucas De Marchi

>
>Lucas De Marchi
>
>>
>>Regards,
>>Umesh
>>
>>>
>>>Lucas De Marchi
>>>
>>>>+			xe_pm_runtime_put(xe);
>>>>+			drm_dev_put(&xe->drm);
>>>>+			return -EINVAL;
>>>>+		}
>>>>		event->destroy = xe_pmu_event_destroy;
>>>>	}
>>>>
>>>>	return 0;
>>>>}
>>>>
>>>>-static u64 read_engine_events(struct xe_gt *gt, struct perf_event *event)
>>>>+static u64 read_engine_events(struct xe_gt *gt, struct perf_event *event, u64 prev)
>>>>{
>>>>	struct xe_device *xe = gt_to_xe(gt);
>>>>+	struct xe_pmu *pmu = &xe->pmu;
>>>>	struct xe_hw_engine *hwe;
>>>>	u64 val = 0;
>>>>
>>>>+	if (!pmu->fw_ref)
>>>>+		return prev;
>>>>+
>>>>	hwe = event_to_hwe(event);
>>>>	if (!hwe)
>>>>		drm_warn(&xe->drm, "unknown engine\n");
>>>>@@ -218,7 +266,7 @@ static u64 __xe_pmu_event_read(struct perf_event *event, u64 prev)
>>>>		return xe_gt_idle_residency_msec(&gt->gtidle);
>>>>	case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS:
>>>>	case XE_PMU_EVENT_ENGINE_TOTAL_TICKS:
>>>>-		return read_engine_events(gt, event);
>>>>+		return read_engine_events(gt, event, prev);
>>>>	}
>>>>
>>>>	return 0;
>>>>diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/xe_pmu_types.h
>>>>index f5ba4d56622c..07c4e592106e 100644
>>>>--- a/drivers/gpu/drm/xe/xe_pmu_types.h
>>>>+++ b/drivers/gpu/drm/xe/xe_pmu_types.h
>>>>@@ -30,6 +30,10 @@ struct xe_pmu {
>>>>	 * @name: Name as registered with perf core.
>>>>	 */
>>>>	const char *name;
>>>>+	/**
>>>>+	 * @fw_ref: force_wake ref
>>>>+	 */
>>>>+	unsigned int fw_ref;
>>>>	/**
>>>>	 * @supported_events: Bitmap of supported events, indexed by event id
>>>>	 */
>>>>-- 
>>>>2.47.1
>>>>


More information about the Intel-xe mailing list