[PATCH v7 5/5] drm/xe/xe_pmu: Acquire forcewake on event init for engine events
Riana Tauro
riana.tauro at intel.com
Fri Feb 21 06:32:02 UTC 2025
Hi Lucas
On 2/21/2025 11:56 AM, Lucas De Marchi wrote:
> On Thu, Feb 20, 2025 at 10:45:46PM -0600, Lucas De Marchi wrote:
>> On Thu, Feb 20, 2025 at 05:14:07PM -0800, Umesh Nerlige Ramappa wrote:
>>> On Thu, Feb 20, 2025 at 03:46:55PM -0600, Lucas De Marchi wrote:
>>>> On Fri, Feb 14, 2025 at 03:38:13PM +0530, Riana Tauro wrote:
>>>>> When the engine events are created, acquire GT forcewake to read gpm
>>>>> timestamp required for the events and release on event destroy. This
>>>>> cannot be done during read due to the raw spinlock held my pmu.
>>>>>
>>>>> v2: remove forcewake counting (Umesh)
>>>>> v3: remove extra space (Umesh)
>>>>>
>>>>> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
>>>>> Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
>>>>> Signed-off-by: Riana Tauro <riana.tauro at intel.com>
>>>>> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>>>>> ---
>>>>> drivers/gpu/drm/xe/xe_pmu.c | 52 +++++++++++++++++++++++++++++--
>>>>> drivers/gpu/drm/xe/xe_pmu_types.h | 4 +++
>>>>> 2 files changed, 54 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
>>>>> index dc89fa6d0ec5..67693d642f5a 100644
>>>>> --- a/drivers/gpu/drm/xe/xe_pmu.c
>>>>> +++ b/drivers/gpu/drm/xe/xe_pmu.c
>>>>> @@ -7,6 +7,7 @@
>>>>> #include <linux/device.h>
>>>>>
>>>>> #include "xe_device.h"
>>>>> +#include "xe_force_wake.h"
>>>>> #include "xe_gt_idle.h"
>>>>> #include "xe_guc_engine_activity.h"
>>>>> #include "xe_hw_engine.h"
>>>>> @@ -102,6 +103,37 @@ static struct xe_hw_engine
>>>>> *event_to_hwe(struct perf_event *event)
>>>>> return hwe;
>>>>> }
>>>>>
>>>>> +static bool is_engine_event(u64 config)
>>>>> +{
>>>>> + unsigned int event_id = config_to_event_id(config);
>>>>> +
>>>>> + return (event_id == XE_PMU_EVENT_ENGINE_TOTAL_TICKS ||
>>>>> + event_id == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS);
>>>>> +}
>>>>> +
>>>>> +static bool event_gt_forcewake(struct perf_event *event)
>>>>> +{
>>>>> + struct xe_device *xe = container_of(event->pmu, typeof(*xe),
>>>>> pmu.base);
>>>>> + u64 config = event->attr.config;
>>>>> + struct xe_pmu *pmu = &xe->pmu;
>>>>> + struct xe_gt *gt;
>>>>> + unsigned int fw_ref;
>>>>> +
>>>>> + if (!is_engine_event(config))
>>>>> + return true;
>>>>> +
>>>>> + gt = xe_device_get_gt(xe, config_to_gt_id(config));
>>>>> +
>>>>> + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
>>>>> + if (!fw_ref)
>>>>> + return false;
>>>>> +
>>>>> + if (!pmu->fw_ref)
>>>>> + pmu->fw_ref = fw_ref;
>>>>
>>>> how this shared fw_ref is supposed to work for multiple
>>>> perf_event_open()?
>>>
>>> Agree, not ideal, but I don't see an issue. This forcewake is only
>>> being taken for engine-* events and the domain is always XE_FW_GT.
>>> Looking at xe_force_wake_get(), I see that it returns a mask of
>>> domains enabled. In this case, it would be the XE_FW_GT. The return
>>> value is just stored so that the corresponding event destroy can put
>>> the forcewake.
>>>
>>>>
>>>> fd1 = perf_event_open( ... gt=0 ...);
>>>>
>>>> event_get_forcewake()
>>>> pmu->fw_ref = xe_force_wake_get()
>>>>
>>>> fd2 = perf_event_open( ... gt=1 ...);
>>>>
>>>> event_get_forcewake()
>>>> // get the forcewake, but don't save the ref
>>>>
>>>> forcewake for gt1 is never put.
>>>
>>> pmu->fw_ref should be identical for all events taking this forcewake.
>>>
>>>>
>>>>
>>>> Or even multiple perf_event_open() for the same gt: we will not handle
>>>> the count correctly.
>>>
>>> The count is actually handled in domain->ref in the forcewake
>>> implementation and note that forcewake is always taken for every
>>> engine event that is being initialized and hence always being put for
>>> every event that is destroyed. This code is not refcounting that.
>>
>> so... we never set pmu->fw_ref back to 0 and any event destroy will
>> try to
>> put the force wake? That seems a different bug that avoids the bug I
>> was thinking about.
>
> try calling perf stat multiple times and you end up with this:
>
> cat /sys/kernel/debug/dri/0/info | grep "force wake"
> gt0 force wake -1
> gt1 force wake 0
> cat /sys/kernel/debug/dri/0/info | grep "force wake"
> gt0 force wake -2
> gt1 force wake 0
This would be for non-engine events and engine events with non-engine
events as leader?
>
> and
>
> [ 1614.778054] RIP: 0010:xe_force_wake_put+0xab6/0x1430 [xe]
> [ 1614.778370] Code: ff 41 53 8b 85 f8 fe ff ff 50 ff b5 f0 fe ff ff 44
> 8b 8d e8 fe ff ff 4c 8b 85 d8 fe ff ff 48 8b 95 d0 fe ff ff e8 4a c3 27
> df <0f> 0b 4c 89 e2 48 83 c4 60 48 b8 00 00 00 00 00 fc ff df 48 c1
> ea
> [ 1614.778381] RSP: 0018:ffff88812cb2f7d0 EFLAGS: 00010046
> [ 1614.778395] RAX: 0000000000000000 RBX: ffff888138b000d0 RCX:
> 0000000000000000
> [ 1614.778405] RDX: 0000000000000000 RSI: 0000000000000000 RDI:
> 0000000000000000
> [ 1614.778413] RBP: ffff88812cb2f968 R08: 0000000000000000 R09:
> 0000000000000000
> [ 1614.778421] R10: 0000000000000000 R11: 0000000000000000 R12:
> ffff888138b000e4
> [ 1614.778429] R13: 0000000000000001 R14: ffff888138b000d0 R15:
> 00000000ffffffff
> [ 1614.778438] FS: 00007a9b757acd40(0000) GS:ffff8883f0e00000(0000)
> knlGS:0000000000000000
> [ 1614.778447] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 1614.778456] CR2: 00007fcac269d5e0 CR3: 000000012a8ee003 CR4:
> 0000000000f72ef0
> [ 1614.778464] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
> 0000000000000000
> [ 1614.778472] DR3: 0000000000000000 DR6: 00000000ffff07f0 DR7:
> 0000000000000400
> [ 1614.778480] PKRU: 55555554
> [ 1614.778488] Call Trace:
> [ 1614.778496] <TASK>
> [ 1614.778506] ? show_regs+0x6c/0x80
> [ 1614.778528] ? __warn+0xd2/0x2e0
> [ 1614.778545] ? xe_force_wake_put+0xab6/0x1430 [xe]
> [ 1614.778867] ? report_bug+0x282/0x2f0
> [ 1614.778892] ? handle_bug+0x6e/0xc0
> [ 1614.778906] ? exc_invalid_op+0x18/0x50
> [ 1614.778919] ? asm_exc_invalid_op+0x1b/0x20
> [ 1614.778953] ? xe_force_wake_put+0xab6/0x1430 [xe]
> [ 1614.779318] ? __pfx_xe_force_wake_put+0x10/0x10 [xe]
> [ 1614.779650] xe_pmu_event_destroy+0x143/0x240 [xe]
>
>
>>
>>>
>>>>
>>>> In summary I think this fw ref needs to be per event... an easy way
>>>> to do
>>>> that is to use the event->pmu_private field, to be populated on init...
>>>
>>> I am not opposed to that since that makes it future proof so that we
>>> can indeed have events taking different forcewake domains, but let me
>>> know if I missed something here since I think this alone should still
>>> work.
>>>
>>>>
>>>>> +
>>>>> + return true;
>>>>> +}
>>>>> +
>>>>> static bool event_supported(struct xe_pmu *pmu, unsigned int gt,
>>>>> unsigned int id)
>>>>> {
>>>>> @@ -144,6 +176,13 @@ static bool event_param_valid(struct
>>>>> perf_event *event)
>>>>> static void xe_pmu_event_destroy(struct perf_event *event)
>>>>> {
>>>>> struct xe_device *xe = container_of(event->pmu, typeof(*xe),
>>>>> pmu.base);
>>>>> + struct xe_pmu *pmu = &xe->pmu;
>>>>> + struct xe_gt *gt;
>>>>> +
>>>>> + if (pmu->fw_ref) {
>>>>> + gt = xe_device_get_gt(xe, config_to_gt_id(event-
>>>>> >attr.config));
>>>>> + xe_force_wake_put(gt_to_fw(gt), pmu->fw_ref);
>>>>> + }
>>>>>
>>>>> drm_WARN_ON(&xe->drm, event->parent);
>>>>> xe_pm_runtime_put(xe);
>>>>> @@ -183,18 +222,27 @@ static int xe_pmu_event_init(struct
>>>>> perf_event *event)
>>>>> if (!event->parent) {
>>>>> drm_dev_get(&xe->drm);
>>>>> xe_pm_runtime_get(xe);
>>>>> + if (!event_gt_forcewake(event)) {
>>>>
>>>> if you group an engine vs non-engine counter, this won't work I think.
>>>> Can we test it?
>>>
>>> When you group events, init is called for each event. From the Xe PMU
>>> implementation perspective, grouping shouldn't be any different.
>>
>> the event->parent is a shortcut to do this only once per group, but
>> then you
>> can't look inside the event to decide if you need to take the
>> forcewake or
>> not because it may be true for one event and false for another. Try
>> creating the events with the group leader being a non-engine event.
>> AFAICS the forcewake will not be taken.... something like (typed here
>> without testing):
>>
>> perf stat -I1000 -e '{xe_0000_00_02.0/gt-c6-
>> residency/,xe_0000_00_02.0/engine-active-ticks/}'
>
> it seems like I mixed up here. Neither of these will have parent
> set - afaics that would be the case for events inherited from one task
> to another which is not the case here.
>
> Lucas De Marchi
>
>>
>> Lucas De Marchi
>>
>>>
>>> Regards,
>>> Umesh
>>>
>>>>
>>>> Lucas De Marchi
>>>>
>>>>> + xe_pm_runtime_put(xe);
>>>>> + drm_dev_put(&xe->drm);
>>>>> + return -EINVAL;
>>>>> + }
>>>>> event->destroy = xe_pmu_event_destroy;
>>>>> }
>>>>>
>>>>> return 0;
>>>>> }
>>>>>
>>>>> -static u64 read_engine_events(struct xe_gt *gt, struct perf_event
>>>>> *event)
>>>>> +static u64 read_engine_events(struct xe_gt *gt, struct perf_event
>>>>> *event, u64 prev)
>>>>> {
>>>>> struct xe_device *xe = gt_to_xe(gt);
>>>>> + struct xe_pmu *pmu = &xe->pmu;
>>>>> struct xe_hw_engine *hwe;
>>>>> u64 val = 0;
>>>>>
>>>>> + if (!pmu->fw_ref)
>>>>> + return prev;
>>>>> +
>>>>> hwe = event_to_hwe(event);
>>>>> if (!hwe)
>>>>> drm_warn(&xe->drm, "unknown engine\n");
>>>>> @@ -218,7 +266,7 @@ static u64 __xe_pmu_event_read(struct
>>>>> perf_event *event, u64 prev)
>>>>> return xe_gt_idle_residency_msec(>->gtidle);
>>>>> case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS:
>>>>> case XE_PMU_EVENT_ENGINE_TOTAL_TICKS:
>>>>> - return read_engine_events(gt, event);
>>>>> + return read_engine_events(gt, event, prev);
>>>>> }
>>>>>
>>>>> return 0;
>>>>> diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/
>>>>> xe/xe_pmu_types.h
>>>>> index f5ba4d56622c..07c4e592106e 100644
>>>>> --- a/drivers/gpu/drm/xe/xe_pmu_types.h
>>>>> +++ b/drivers/gpu/drm/xe/xe_pmu_types.h
>>>>> @@ -30,6 +30,10 @@ struct xe_pmu {
>>>>> * @name: Name as registered with perf core.
>>>>> */
>>>>> const char *name;
>>>>> + /**
>>>>> + * @fw_ref: force_wake ref
>>>>> + */
>>>>> + unsigned int fw_ref;
>>>>> /**
>>>>> * @supported_events: Bitmap of supported events, indexed by
>>>>> event id
>>>>> */
>>>>> --
>>>>> 2.47.1
>>>>>
More information about the Intel-xe
mailing list