[PATCH] drm/xe: Use separate rpm lockdep map for non-d3cold-capable devices

Fri Aug 23 17:13:02 UTC 2024

On 23/08/2024 17:58, Thomas Hellström wrote:
> For non-d3cold-capable devices we'd like to be able to wake up the
> device from reclaim. In particular, for Lunar Lake we'd like to be
> able to blit CCS metadata to system at shrink time; at least from
> kswapd where it's reasonable OK to wait for rpm resume and a
> preceding rpm suspend.
> 
> Therefore use a separate lockdep map for such devices and prime it
> reclaim-tainted.
> 
> v2:
> - Rename lockmap acquire- and release functions. (Rodrigo Vivi).
> - Reinstate the old xe_pm_runtime_lockdep_prime() function and
>    rename it to xe_rpm_might_enter_cb(). (Matthew Auld).
> - Introduce a separate xe_pm_runtime_lockdep_prime function
>    called from module init for known required locking orders.
> 
> Cc: "Vivi, Rodrigo" <rodrigo.vivi at intel.com>
> Cc: "Auld, Matthew" <matthew.auld at intel.com>
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
>   drivers/gpu/drm/xe/xe_pm.c | 79 +++++++++++++++++++++++++++++++-------
>   drivers/gpu/drm/xe/xe_pm.h |  1 +
>   2 files changed, 66 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
> index 9f3c14fd9f33..747fb96453d3 100644
> --- a/drivers/gpu/drm/xe/xe_pm.c
> +++ b/drivers/gpu/drm/xe/xe_pm.c
> @@ -70,11 +70,29 @@
>    */
>   
>   #ifdef CONFIG_LOCKDEP
> -static struct lockdep_map xe_pm_runtime_lockdep_map = {
> -	.name = "xe_pm_runtime_lockdep_map"
> +static struct lockdep_map xe_pm_runtime_d3cold_map = {
> +	.name = "xe_rpm_d3cold_map"
> +};
> +
> +static struct lockdep_map xe_pm_runtime_nod3cold_map = {
> +	.name = "xe_rpm_nod3cold_map"
>   };
>   #endif
>   
> +static void xe_rpm_lockmap_acquire(const struct xe_device *xe)
> +{
> +	lock_map_acquire(xe->d3cold.capable ?
> +			 &xe_pm_runtime_d3cold_map :
> +			 &xe_pm_runtime_nod3cold_map);
> +}
> +
> +static void xe_rpm_lockmap_release(const struct xe_device *xe)
> +{
> +	lock_map_release(xe->d3cold.capable ?
> +			 &xe_pm_runtime_d3cold_map :
> +			 &xe_pm_runtime_nod3cold_map);
> +}
> +
>   /**
>    * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
>    * @xe: xe device instance
> @@ -354,7 +372,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
>   	 * annotation here and in xe_pm_runtime_get() lockdep will see
>   	 * the potential lock inversion and give us a nice splat.
>   	 */
> -	lock_map_acquire(&xe_pm_runtime_lockdep_map);
> +	xe_rpm_lockmap_acquire(xe);
>   
>   	/*
>   	 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
> @@ -386,7 +404,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
>   out:
>   	if (err)
>   		xe_display_pm_resume(xe, true);
> -	lock_map_release(&xe_pm_runtime_lockdep_map);
> +	xe_rpm_lockmap_release(xe);
>   	xe_pm_write_callback_task(xe, NULL);
>   	return err;
>   }
> @@ -407,7 +425,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
>   	/* Disable access_ongoing asserts and prevent recursive pm calls */
>   	xe_pm_write_callback_task(xe, current);
>   
> -	lock_map_acquire(&xe_pm_runtime_lockdep_map);
> +	xe_rpm_lockmap_acquire(xe);
>   
>   	if (xe->d3cold.allowed) {
>   		err = xe_pcode_ready(xe, true);
> @@ -437,7 +455,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
>   			goto out;
>   	}
>   out:
> -	lock_map_release(&xe_pm_runtime_lockdep_map);
> +	xe_rpm_lockmap_release(xe);
>   	xe_pm_write_callback_task(xe, NULL);
>   	return err;
>   }
> @@ -451,15 +469,37 @@ int xe_pm_runtime_resume(struct xe_device *xe)
>    * stuff that can happen inside the runtime_resume callback by acquiring
>    * a dummy lock (it doesn't protect anything and gets compiled out on
>    * non-debug builds).  Lockdep then only needs to see the
> - * xe_pm_runtime_lockdep_map -> runtime_resume callback once, and then can
> - * hopefully validate all the (callers_locks) -> xe_pm_runtime_lockdep_map.
> + * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can
> + * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map.
>    * For example if the (callers_locks) are ever grabbed in the
>    * runtime_resume callback, lockdep should give us a nice splat.
>    */
> -static void pm_runtime_lockdep_prime(void)
> +static void xe_rpm_might_enter_cb(const struct xe_device *xe)
> +{
> +	xe_rpm_lockmap_acquire(xe);
> +	xe_rpm_lockmap_release(xe);
> +}
> +
> +/*
> + * Prime the lockdep maps for known locking orders that need to
> + * be supported but that may not always occur on all systems.
> + */
> +static void xe_pm_runtime_lockdep_prime(void)
>   {
> -	lock_map_acquire(&xe_pm_runtime_lockdep_map);
> -	lock_map_release(&xe_pm_runtime_lockdep_map);
> +	struct dma_resv lockdep_resv;
> +
> +	dma_resv_init(&lockdep_resv);
> +	lock_map_acquire(&xe_pm_runtime_d3cold_map);
> +	/* D3Cold takes the dma_resv locks to evict bos */
> +	dma_resv_lock(&lockdep_resv, NULL);
> +	dma_resv_unlock(&lockdep_resv);
> +	lock_map_release(&xe_pm_runtime_d3cold_map);
> +
> +	/* Shrinkers might like to wake up the device under reclaim. */
> +	fs_reclaim_acquire(GFP_KERNEL);
> +	lock_map_acquire(&xe_pm_runtime_nod3cold_map);
> +	lock_map_release(&xe_pm_runtime_nod3cold_map);
> +	fs_reclaim_release(GFP_KERNEL);

Would it make sense to also prime the d3cold wrt reclaim, but with the 
opposite ordering?

Anyway,
Reviewed-by: Matthew Auld <matthew.auld at intel.com>

>   }
>   
>   /**
> @@ -474,7 +514,7 @@ void xe_pm_runtime_get(struct xe_device *xe)
>   	if (xe_pm_read_callback_task(xe) == current)
>   		return;
>   
> -	pm_runtime_lockdep_prime();
> +	xe_rpm_might_enter_cb(xe);
>   	pm_runtime_resume(xe->drm.dev);
>   }
>   
> @@ -506,7 +546,7 @@ int xe_pm_runtime_get_ioctl(struct xe_device *xe)
>   	if (WARN_ON(xe_pm_read_callback_task(xe) == current))
>   		return -ELOOP;
>   
> -	pm_runtime_lockdep_prime();
> +	xe_rpm_might_enter_cb(xe);
>   	return pm_runtime_get_sync(xe->drm.dev);
>   }
>   
> @@ -574,7 +614,7 @@ bool xe_pm_runtime_resume_and_get(struct xe_device *xe)
>   		return true;
>   	}
>   
> -	pm_runtime_lockdep_prime();
> +	xe_rpm_might_enter_cb(xe);
>   	return pm_runtime_resume_and_get(xe->drm.dev) >= 0;
>   }
>   
> @@ -666,3 +706,14 @@ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
>   	drm_dbg(&xe->drm,
>   		"d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed));
>   }
> +
> +/**
> + * xe_pm_module_init() - Perform xe_pm specific module initialization.
> + *
> + * Return: 0 on success. Currently doesn't fail.
> + */
> +int __init xe_pm_module_init(void)
> +{
> +	xe_pm_runtime_lockdep_prime();
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
> index 104a21ae6dfd..9aef673b1c8a 100644
> --- a/drivers/gpu/drm/xe/xe_pm.h
> +++ b/drivers/gpu/drm/xe/xe_pm.h
> @@ -32,5 +32,6 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe);
>   int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold);
>   void xe_pm_d3cold_allowed_toggle(struct xe_device *xe);
>   struct task_struct *xe_pm_read_callback_task(struct xe_device *xe);
> +int xe_pm_module_init(void);
>   
>   #endif