[Intel-xe] [PATCH v12 01/13] drm/xe: fix xe_device_mem_access_get() races

Fri Jun 30 15:22:55 UTC 2023


On 6/26/2023 4:20 PM, Matthew Auld wrote:
> It looks like there is at least one race here, given that the
> pm_runtime_suspended() check looks to return false if we are in the
> process of suspending the device (RPM_SUSPENDING vs RPM_SUSPENDED).  We
> later also do xe_pm_runtime_get_if_active(), but since the device is
> suspending or has now suspended, this doesn't do anything either.
> Following from this we can potentially return from
> xe_device_mem_access_get() with the device suspended or about to be,
> leading to broken behaviour.
> 
> Attempt to fix this by always grabbing the runtime ref when our internal
> ref transitions from 0 -> 1. The hard part is then dealing with the
> runtime_pm callbacks also calling xe_device_mem_access_get() and
> deadlocking, which the pm_runtime_suspended() check prevented.
> 
> v2:
>   - ct->lock looks to be primed with fs_reclaim, so holding that and then
>     allocating memory will cause lockdep to complain. Now that we
>     unconditionally grab the mem_access.lock around mem_access_{get,put}, we
>     need to change the ordering wrt to grabbing the ct->lock, since some of
>     the runtime_pm routines can allocate memory (or at least that's what
>     lockdep seems to suggest). Hopefully not a big deal.  It might be that
>     there were already issues with this, just that the atomics where
>     "hiding" the potential issues.
> v3:
>   - Use Thomas Hellström' idea with tracking the active task that is
>     executing in the resume or suspend callback, in order to avoid
>     recursive resume/suspend calls deadlocking on itself.
>   - Split the ct->lock change.
> v4:
>   - Add smb_mb() around accessing the pm_callback_task for extra safety.
>     (Thomas Hellström)
> v5:
>   - Clarify the kernel-doc for the mem_access.lock, given that it is quite
>     strange in what it protects (data vs code). The real motivation is to
>     aid lockdep. (Rodrigo Vivi)
> v6:
>   - Split out the lock change. We still want this as a lockdep aid but
>     only for the xe_device_mem_access_get() path. Sticking a lock on the
>     put() looks be a no-go, also the runtime_put() there is always async.
>   - Now that the lock is gone move to atomics and rely on the pm code
>     serialising multiple callers on the 0 -> 1 transition.
>   - g2h_worker_func() looks to be the next issue, given that
>     suspend-resume callbacks are using CT, so try to handle that.
> v7:
>   - Add xe_device_mem_access_get_if_ongoing(), and use it in
>     g2h_worker_func().
> 
> Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/258
> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Cc: Anshuman Gupta <anshuman.gupta at intel.com>
> ---
>   drivers/gpu/drm/xe/xe_device.c       | 58 +++++++++++++++++++-----
>   drivers/gpu/drm/xe/xe_device.h       | 12 ++---
>   drivers/gpu/drm/xe/xe_device_types.h |  6 +++
>   drivers/gpu/drm/xe/xe_guc_ct.c       | 34 +++++++++++++-
>   drivers/gpu/drm/xe/xe_pm.c           | 66 +++++++++++++++++++---------
>   drivers/gpu/drm/xe/xe_pm.h           |  3 +-
>   6 files changed, 134 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index c7985af85a53..1dc552da434f 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -411,27 +411,61 @@ u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
>   		DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0;
>   }
>   
> +bool xe_device_mem_access_ongoing(struct xe_device *xe)
> +{
> +	if (xe_pm_read_callback_task(xe) != NULL)
> +		return true;
> +
> +	return atomic_read(&xe->mem_access.ref);
> +}
> +
> +void xe_device_assert_mem_access(struct xe_device *xe)
> +{
> +	XE_WARN_ON(!xe_device_mem_access_ongoing(xe));
> +}
> +
> +bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe)
> +{
> +	return atomic_inc_not_zero(&xe->mem_access.ref);
> +}
> +
>   void xe_device_mem_access_get(struct xe_device *xe)
>   {
> -	bool resumed = xe_pm_runtime_resume_if_suspended(xe);
> -	int ref = atomic_inc_return(&xe->mem_access.ref);
> +	/*
> +	 * This looks racy, but should be fine since the pm_callback_task only
> +	 * transitions from NULL -> current (and back to NULL again), during the
> +	 * runtime_resume() or runtime_suspend() callbacks, for which there can
> +	 * only be a single one running for our device. We only need to prevent
> +	 * recursively calling the runtime_get or runtime_put from those
> +	 * callbacks, as well as preventing triggering any access_ongoing
> +	 * asserts.
> +	 */
two runtime_suspend() can run in parallel for two different pci device 
those worker thread pooled by pm_wq workqueue, it is not guaranteed 
that tast_struct will be different for two worker spawned by same pm_wq ?

> +	if (xe_pm_read_callback_task(xe) == current)
> +		return;
>   
> -	if (ref == 1)
> -		xe->mem_access.hold_rpm = xe_pm_runtime_get_if_active(xe);
> +	if (!atomic_inc_not_zero(&xe->mem_access.ref)) {
> +		bool hold_rpm = xe_pm_runtime_resume_and_get(xe);
> +		int ref;
>   
> -	/* The usage counter increased if device was immediately resumed */
> -	if (resumed)
> -		xe_pm_runtime_put(xe);
> -
> -	XE_WARN_ON(ref == S32_MAX);
> +		ref = atomic_inc_return(&xe->mem_access.ref);
> +		if (ref == 1)
> +			xe->mem_access.hold_rpm = hold_rpm;
> +		else
> +			xe_pm_runtime_put(xe);
> +	} else {
> +		XE_WARN_ON(atomic_read(&xe->mem_access.ref) == S32_MAX);
> +	}
>   }
>   
>   void xe_device_mem_access_put(struct xe_device *xe)
>   {
> -	bool hold = xe->mem_access.hold_rpm;
> -	int ref = atomic_dec_return(&xe->mem_access.ref);
> +	int ref;
>   
> -	if (!ref && hold)
> +	if (xe_pm_read_callback_task(xe) == current)
> +		return;
> +
> +	ref = atomic_dec_return(&xe->mem_access.ref);
> +	if (ref == 0 && xe->mem_access.hold_rpm)
>   		xe_pm_runtime_put(xe);
>   
>   	XE_WARN_ON(ref < 0);
/snip
> +
>   int xe_pm_runtime_suspend(struct xe_device *xe)
>   {
>   	struct xe_gt *gt;
>   	u8 id;
> -	int err;
> +	int err = 0;
> +
> +	if (xe->d3cold_allowed && xe_device_mem_access_ongoing(xe))
> +		return -EBUSY;
Not related to this patch but We should return -EBUSY even for d3hot as 
well.
Br,
Anshuman Gupta
> +
> +	/* Disable access_ongoing asserts and prevent recursive pm calls */
> +	xe_pm_write_callback_task(xe, current);
>   
>   	if (xe->d3cold_allowed) {
> -		if (xe_device_mem_access_ongoing(xe))
> -			return -EBUSY;
> -
>   		err = xe_bo_evict_all(xe);
>   		if (err)
> -			return err;
> +			goto out;
>   	}
>   
>   	for_each_gt(gt, xe, id) {
>   		err = xe_gt_suspend(gt);
>   		if (err)
> -			return err;
> +			goto out;
>   	}
>   
>   	xe_irq_suspend(xe);
> -
> -	return 0;
> +out:
> +	xe_pm_write_callback_task(xe, NULL);
> +	return err;
>   }
>   
>   int xe_pm_runtime_resume(struct xe_device *xe)
>   {
>   	struct xe_gt *gt;
>   	u8 id;
> -	int err;
> +	int err = 0;
> +
> +	/* Disable access_ongoing asserts and prevent recursive pm calls */
> +	xe_pm_write_callback_task(xe, current);
>   
>   	if (xe->d3cold_allowed) {
>   		for_each_gt(gt, xe, id) {
>   			err = xe_pcode_init(gt);
>   			if (err)
> -				return err;
> +				goto out;
>   		}
>   
>   		/*
> @@ -182,7 +210,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
>   		 */
>   		err = xe_bo_restore_kernel(xe);
>   		if (err)
> -			return err;
> +			goto out;
>   	}
>   
>   	xe_irq_resume(xe);
> @@ -193,10 +221,11 @@ int xe_pm_runtime_resume(struct xe_device *xe)
>   	if (xe->d3cold_allowed) {
>   		err = xe_bo_restore_user(xe);
>   		if (err)
> -			return err;
> +			goto out;
>   	}
> -
> -	return 0;
> +out:
> +	xe_pm_write_callback_task(xe, NULL);
> +	return err;
>   }
>   
>   int xe_pm_runtime_get(struct xe_device *xe)
> @@ -210,14 +239,9 @@ int xe_pm_runtime_put(struct xe_device *xe)
>   	return pm_runtime_put_autosuspend(xe->drm.dev);
>   }
>   
> -/* Return true if resume operation happened and usage count was increased */
> -bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe)
> +bool xe_pm_runtime_resume_and_get(struct xe_device *xe)
>   {
> -	/* In case we are suspended we need to immediately wake up */
> -	if (pm_runtime_suspended(xe->drm.dev))
> -		return !pm_runtime_resume_and_get(xe->drm.dev);
> -
> -	return false;
> +	return !pm_runtime_resume_and_get(xe->drm.dev);
>   }
>   
>   int xe_pm_runtime_get_if_active(struct xe_device *xe)
> diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
> index 6a885585f653..e92c508d44b9 100644
> --- a/drivers/gpu/drm/xe/xe_pm.h
> +++ b/drivers/gpu/drm/xe/xe_pm.h
> @@ -19,7 +19,8 @@ int xe_pm_runtime_suspend(struct xe_device *xe);
>   int xe_pm_runtime_resume(struct xe_device *xe);
>   int xe_pm_runtime_get(struct xe_device *xe);
>   int xe_pm_runtime_put(struct xe_device *xe);
> -bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe);
> +bool xe_pm_runtime_resume_and_get(struct xe_device *xe);
>   int xe_pm_runtime_get_if_active(struct xe_device *xe);
> +struct task_struct *xe_pm_read_callback_task(struct xe_device *xe);
>   
>   #endif