[PATCH v5 5/8] drm/xe/xe_pmu: Acquire forcewake on event init for engine events

Ghimiray, Himal Prasad himal.prasad.ghimiray at intel.com
Fri Feb 7 06:51:24 UTC 2025



On 07-02-2025 11:48, Riana Tauro wrote:
> 
> Hi Himal
> 
> On 2/7/2025 8:39 AM, Ghimiray, Himal Prasad wrote:
>>
>>
>> On 06-02-2025 16:13, Riana Tauro wrote:
>>> When the engine events are created, acquire GT forcewake to read gpm
>>> timestamp required for the events and release on event destroy. This
>>> cannot be done during read due to the raw spinlock held my pmu.
>>>
>>> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
>>> Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
>>> Signed-off-by: Riana Tauro <riana.tauro at intel.com>
>>> ---
>>>   drivers/gpu/drm/xe/xe_pmu.c       | 47 +++++++++++++++++++++++++++++--
>>>   drivers/gpu/drm/xe/xe_pmu_types.h |  8 ++++++
>>>   2 files changed, 53 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
>>> index 06a1c72a3838..5b5fe4424aba 100644
>>> --- a/drivers/gpu/drm/xe/xe_pmu.c
>>> +++ b/drivers/gpu/drm/xe/xe_pmu.c
>>> @@ -7,6 +7,7 @@
>>>   #include <linux/device.h>
>>>   #include "xe_device.h"
>>> +#include "xe_force_wake.h"
>>>   #include "xe_gt_idle.h"
>>>   #include "xe_guc_engine_activity.h"
>>>   #include "xe_hw_engine.h"
>>> @@ -102,6 +103,36 @@ static struct xe_hw_engine *event_to_hwe(struct 
>>> perf_event *event)
>>>       return hwe;
>>>   }
>>> +static bool is_engine_event(u64 config)
>>> +{
>>> +    unsigned int event_id = config_to_event_id(config);
>>> +
>>> +    return (event_id == XE_PMU_EVENT_ENGINE_TOTAL_TICKS ||
>>> +        event_id == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS);
>>> +}
>>> +
>>> +static void event_gt_forcewake(struct perf_event *event)
>>> +{
>>> +    struct xe_device *xe = container_of(event->pmu, typeof(*xe), 
>>> pmu.base);
>>> +    u64 config = event->attr.config;
>>> +    struct xe_pmu *pmu = &xe->pmu;
>>> +    struct xe_gt *gt;
>>> +    unsigned int fw_ref;
>>> +
>>> +    gt = xe_device_get_gt(xe, config_to_gt_id(config));
>>> +    if (!gt || !is_engine_event(config))
>>> +        return;
>>> +
>>> +    fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
>>> +    if (!fw_ref)
>>> +        return;
>>> +
>>> +    if (!pmu->fw_ref)
>>> +        pmu->fw_ref = fw_ref;
>>> +
>>> +    pmu->fw_count++;
>>> +}
>>> +
>>>   static bool event_supported(struct xe_pmu *pmu, unsigned int gt,
>>>                   unsigned int id)
>>>   {
>>> @@ -144,6 +175,13 @@ static bool event_param_valid(struct perf_event 
>>> *event)
>>>   static void xe_pmu_event_destroy(struct perf_event *event)
>>>   {
>>>       struct xe_device *xe = container_of(event->pmu, typeof(*xe), 
>>> pmu.base);
>>> +    struct xe_pmu *pmu = &xe->pmu;
>>> +    struct xe_gt *gt;
>>> +
>>> +    if (pmu->fw_count--) {
>>> +        gt = xe_device_get_gt(xe, config_to_gt_id(event->attr.config));
>>> +        xe_force_wake_put(gt_to_fw(gt), pmu->fw_ref);
>>> +    }
>>
>>
>> Considering that fw->lock will be acquired and released multiple times 
>> during the put operation, this might create an overhead.
>>
>> How about implementing a _put function that can take the number of 
>> refcounts to decrement as an input parameter, similar to 
>> xe_force_wake_put_many?
> Could you give more details on your suggestion? Would put_many just 
> decrement the count? But wouldn't that still require a lock? Multiple 
> event_destroys can call the function at the same time right?

I was thinking about putting all refcounts at the end of last event 
destroy in case of multiple pmu's.

> 
> 
> One thing that can be done is to take forcewake on first count and 
> release it when the last event is destroyed in cases of multiple
> pmu being used

This sounds even better.

>>
>> If the overhead has already been considered and found to be 
>> acceptable, I am fine with avoiding unnecessary modifications to this 
>> patch.
> This is the first rev for this patch. Open to suggestions
> 
> Background for this patch: force_wake is needed to read the timestamp
> register required for engine events.Cannot take it while reading the 
> register from pmu_read due to a lockdep splat (PROVE_RAW_LOCK_NESTING).
> 
> The suggestion was to take forcewake throughout the duration of event 
> being read
> 
> Thanks
> Riana
>>
>>
>>>       drm_WARN_ON(&xe->drm, event->parent);
>>>       xe_pm_runtime_put(xe);
>>> @@ -183,18 +221,23 @@ static int xe_pmu_event_init(struct perf_event 
>>> *event)
>>>       if (!event->parent) {
>>>           drm_dev_get(&xe->drm);
>>>           xe_pm_runtime_get(xe);
>>> +        event_gt_forcewake(event);
>>>           event->destroy = xe_pmu_event_destroy;
>>>       }
>>>       return 0;
>>>   }
>>> -static u64 read_engine_events(struct perf_event *event)
>>> +static u64 read_engine_events(struct perf_event *event, u64 prev)
>>>   {
>>>       struct xe_device *xe = container_of(event->pmu, typeof(*xe), 
>>> pmu.base);
>>> +    struct xe_pmu *pmu = &xe->pmu;
>>>       struct xe_hw_engine *hwe;
>>>       u64 val = 0;
>>> +    if (!pmu->fw_count)
>>> +        return prev;
>>> +
>>>       hwe = event_to_hwe(event);
>>>       if (!hwe)
>>>           drm_warn(&xe->drm, "unknown pmu engine\n");
>>> @@ -218,7 +261,7 @@ static u64 __xe_pmu_event_read(struct perf_event 
>>> *event, u64 prev)
>>>           return xe_gt_idle_residency_msec(&gt->gtidle);
>>>       case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS:
>>>       case XE_PMU_EVENT_ENGINE_TOTAL_TICKS:
>>> -        return read_engine_events(event);
>>> +        return read_engine_events(event, prev);
>>>       }
>>>       return 0;
>>> diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/ 
>>> xe_pmu_types.h
>>> index f5ba4d56622c..134b3400b19c 100644
>>> --- a/drivers/gpu/drm/xe/xe_pmu_types.h
>>> +++ b/drivers/gpu/drm/xe/xe_pmu_types.h
>>> @@ -30,6 +30,14 @@ struct xe_pmu {
>>>        * @name: Name as registered with perf core.
>>>        */
>>>       const char *name;
>>> +    /**
>>> +     * @fw_ref: force_wake ref
>>> +     */
>>> +    unsigned int fw_ref;
>>> +    /**
>>> +     * @fw_count: force_wake count
>>> +     */
>>> +    unsigned int fw_count;
>>>       /**
>>>        * @supported_events: Bitmap of supported events, indexed by 
>>> event id
>>>        */
>>
> 



More information about the Intel-xe mailing list