[PATCH v5 5/8] drm/xe/xe_pmu: Acquire forcewake on event init for engine events
Riana Tauro
riana.tauro at intel.com
Fri Feb 7 06:18:21 UTC 2025
Hi Himal
On 2/7/2025 8:39 AM, Ghimiray, Himal Prasad wrote:
>
>
> On 06-02-2025 16:13, Riana Tauro wrote:
>> When the engine events are created, acquire GT forcewake to read gpm
>> timestamp required for the events and release on event destroy. This
>> cannot be done during read due to the raw spinlock held my pmu.
>>
>> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
>> Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
>> Signed-off-by: Riana Tauro <riana.tauro at intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_pmu.c | 47 +++++++++++++++++++++++++++++--
>> drivers/gpu/drm/xe/xe_pmu_types.h | 8 ++++++
>> 2 files changed, 53 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
>> index 06a1c72a3838..5b5fe4424aba 100644
>> --- a/drivers/gpu/drm/xe/xe_pmu.c
>> +++ b/drivers/gpu/drm/xe/xe_pmu.c
>> @@ -7,6 +7,7 @@
>> #include <linux/device.h>
>> #include "xe_device.h"
>> +#include "xe_force_wake.h"
>> #include "xe_gt_idle.h"
>> #include "xe_guc_engine_activity.h"
>> #include "xe_hw_engine.h"
>> @@ -102,6 +103,36 @@ static struct xe_hw_engine *event_to_hwe(struct
>> perf_event *event)
>> return hwe;
>> }
>> +static bool is_engine_event(u64 config)
>> +{
>> + unsigned int event_id = config_to_event_id(config);
>> +
>> + return (event_id == XE_PMU_EVENT_ENGINE_TOTAL_TICKS ||
>> + event_id == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS);
>> +}
>> +
>> +static void event_gt_forcewake(struct perf_event *event)
>> +{
>> + struct xe_device *xe = container_of(event->pmu, typeof(*xe),
>> pmu.base);
>> + u64 config = event->attr.config;
>> + struct xe_pmu *pmu = &xe->pmu;
>> + struct xe_gt *gt;
>> + unsigned int fw_ref;
>> +
>> + gt = xe_device_get_gt(xe, config_to_gt_id(config));
>> + if (!gt || !is_engine_event(config))
>> + return;
>> +
>> + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
>> + if (!fw_ref)
>> + return;
>> +
>> + if (!pmu->fw_ref)
>> + pmu->fw_ref = fw_ref;
>> +
>> + pmu->fw_count++;
>> +}
>> +
>> static bool event_supported(struct xe_pmu *pmu, unsigned int gt,
>> unsigned int id)
>> {
>> @@ -144,6 +175,13 @@ static bool event_param_valid(struct perf_event
>> *event)
>> static void xe_pmu_event_destroy(struct perf_event *event)
>> {
>> struct xe_device *xe = container_of(event->pmu, typeof(*xe),
>> pmu.base);
>> + struct xe_pmu *pmu = &xe->pmu;
>> + struct xe_gt *gt;
>> +
>> + if (pmu->fw_count--) {
>> + gt = xe_device_get_gt(xe, config_to_gt_id(event->attr.config));
>> + xe_force_wake_put(gt_to_fw(gt), pmu->fw_ref);
>> + }
>
>
> Considering that fw->lock will be acquired and released multiple times
> during the put operation, this might create an overhead.
>
> How about implementing a _put function that can take the number of
> refcounts to decrement as an input parameter, similar to
> xe_force_wake_put_many?
Could you give more details on your suggestion? Would put_many just
decrement the count? But wouldn't that still require a lock? Multiple
event_destroys can call the function at the same time right?
One thing that can be done is to take forcewake on first count and
release it when the last event is destroyed in cases of multiple
pmu being used
>
> If the overhead has already been considered and found to be acceptable,
> I am fine with avoiding unnecessary modifications to this patch.
This is the first rev for this patch. Open to suggestions
Background for this patch: force_wake is needed to read the timestamp
register required for engine events.Cannot take it while reading the
register from pmu_read due to a lockdep splat (PROVE_RAW_LOCK_NESTING).
The suggestion was to take forcewake throughout the duration of event
being read
Thanks
Riana
>
>
>> drm_WARN_ON(&xe->drm, event->parent);
>> xe_pm_runtime_put(xe);
>> @@ -183,18 +221,23 @@ static int xe_pmu_event_init(struct perf_event
>> *event)
>> if (!event->parent) {
>> drm_dev_get(&xe->drm);
>> xe_pm_runtime_get(xe);
>> + event_gt_forcewake(event);
>> event->destroy = xe_pmu_event_destroy;
>> }
>> return 0;
>> }
>> -static u64 read_engine_events(struct perf_event *event)
>> +static u64 read_engine_events(struct perf_event *event, u64 prev)
>> {
>> struct xe_device *xe = container_of(event->pmu, typeof(*xe),
>> pmu.base);
>> + struct xe_pmu *pmu = &xe->pmu;
>> struct xe_hw_engine *hwe;
>> u64 val = 0;
>> + if (!pmu->fw_count)
>> + return prev;
>> +
>> hwe = event_to_hwe(event);
>> if (!hwe)
>> drm_warn(&xe->drm, "unknown pmu engine\n");
>> @@ -218,7 +261,7 @@ static u64 __xe_pmu_event_read(struct perf_event
>> *event, u64 prev)
>> return xe_gt_idle_residency_msec(>->gtidle);
>> case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS:
>> case XE_PMU_EVENT_ENGINE_TOTAL_TICKS:
>> - return read_engine_events(event);
>> + return read_engine_events(event, prev);
>> }
>> return 0;
>> diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/
>> xe_pmu_types.h
>> index f5ba4d56622c..134b3400b19c 100644
>> --- a/drivers/gpu/drm/xe/xe_pmu_types.h
>> +++ b/drivers/gpu/drm/xe/xe_pmu_types.h
>> @@ -30,6 +30,14 @@ struct xe_pmu {
>> * @name: Name as registered with perf core.
>> */
>> const char *name;
>> + /**
>> + * @fw_ref: force_wake ref
>> + */
>> + unsigned int fw_ref;
>> + /**
>> + * @fw_count: force_wake count
>> + */
>> + unsigned int fw_count;
>> /**
>> * @supported_events: Bitmap of supported events, indexed by
>> event id
>> */
>
More information about the Intel-xe
mailing list