[Intel-gfx] [PATCH] drm/i915: Fix NPD in PMU during driver teardown

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Thu Jun 30 10:29:54 UTC 2022


On 29/06/2022 19:46, Stuart Summers wrote:
> In the driver teardown, we are unregistering the gt prior
> to unregistering the PMU. This means there is a small window
> of time in which the application can request metrics from the
> PMU, some of which are calling into the uapi engines list,
> while the engines are not available. In this case we can
> see null pointer dereferences.
> 
> Prevent this by simply checking if the engines are present
> when those PMU events come through. Print a debug message
> indicating when they aren't available.

Obvious question - can we just move PMU unregister PMU to before 
unregister GT?

Regards,

Tvrtko

> Signed-off-by: Stuart Summers <stuart.summers at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_pmu.c | 72 +++++++++++++++++++--------------
>   1 file changed, 42 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 958b37123bf1..796a1d8e36f2 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -670,21 +670,28 @@ static void i915_pmu_enable(struct perf_event *event)
>   	if (is_engine_event(event)) {
>   		u8 sample = engine_event_sample(event);
>   		struct intel_engine_cs *engine;
> -
> -		engine = intel_engine_lookup_user(i915,
> -						  engine_event_class(event),
> -						  engine_event_instance(event));
> -
> -		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
> -			     I915_ENGINE_SAMPLE_COUNT);
> -		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
> -			     I915_ENGINE_SAMPLE_COUNT);
> -		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
> -		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
> -		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
> -
> -		engine->pmu.enable |= BIT(sample);
> -		engine->pmu.enable_count[sample]++;
> +		u8 class = engine_event_class(event);
> +		u8 instance = engine_event_instance(event);
> +
> +		engine = intel_engine_lookup_user(i915, class, instance);
> +		if (engine) {
> +			BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
> +				     I915_ENGINE_SAMPLE_COUNT);
> +			BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
> +				     I915_ENGINE_SAMPLE_COUNT);
> +			GEM_BUG_ON(sample >=
> +				   ARRAY_SIZE(engine->pmu.enable_count));
> +			GEM_BUG_ON(sample >=
> +				   ARRAY_SIZE(engine->pmu.sample));
> +			GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
> +
> +			engine->pmu.enable |= BIT(sample);
> +			engine->pmu.enable_count[sample]++;
> +		} else {
> +			drm_dbg(&i915->drm,
> +				"Invalid engine event: { class:%d, inst:%d }\n",
> +				class, instance);
> +		}
>   	}
>   
>   	spin_unlock_irqrestore(&pmu->lock, flags);
> @@ -714,21 +721,26 @@ static void i915_pmu_disable(struct perf_event *event)
>   	if (is_engine_event(event)) {
>   		u8 sample = engine_event_sample(event);
>   		struct intel_engine_cs *engine;
> -
> -		engine = intel_engine_lookup_user(i915,
> -						  engine_event_class(event),
> -						  engine_event_instance(event));
> -
> -		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
> -		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
> -		GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
> -
> -		/*
> -		 * Decrement the reference count and clear the enabled
> -		 * bitmask when the last listener on an event goes away.
> -		 */
> -		if (--engine->pmu.enable_count[sample] == 0)
> -			engine->pmu.enable &= ~BIT(sample);
> +		u8 class = engine_event_class(event);
> +		u8 instance = engine_event_instance(event);
> +
> +		engine = intel_engine_lookup_user(i915, class, instance);
> +		if (engine) {
> +			GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
> +			GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
> +			GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
> +
> +			/*
> +			 * Decrement the reference count and clear the enabled
> +			 * bitmask when the last listener on an event goes away.
> +			 */
> +			if (--engine->pmu.enable_count[sample] == 0)
> +				engine->pmu.enable &= ~BIT(sample);
> +		} else {
> +			drm_dbg(&i915->drm,
> +				"Invalid engine event: { class:%d, inst:%d }\n",
> +				class, instance);
> +		}
>   	}
>   
>   	GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));


More information about the Intel-gfx mailing list