[PATCH v2] drm/amdkfd: Fixed kfd_process cleanup on module exit.

Wed Mar 8 22:50:48 UTC 2023

On 2023-03-08 17:03, David Belanger wrote:
> Handle case when module is unloaded (kfd_exit) before a process space
> (mm_struct) is released.
>
> v2: Fixed potential race conditions by removing all kfd_process from
> the process table first, then working on releasing the resources.
>
> Signed-off-by: David Belanger <david.belanger at amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_module.c  |  4 ++
>   drivers/gpu/drm/amd/amdkfd/kfd_process.c | 80 +++++++++++++++++++++---
>   2 files changed, 77 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
> index 09b966dc3768..8ef4bd9e4f7d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
> @@ -26,6 +26,9 @@
>   #include "kfd_priv.h"
>   #include "amdgpu_amdkfd.h"
>   
> +void kfd_cleanup_processes(void);

This should be declared in a header file.

> +
> +
>   static int kfd_init(void)
>   {
>   	int err;
> @@ -77,6 +80,7 @@ static int kfd_init(void)
>   
>   static void kfd_exit(void)
>   {
> +	kfd_cleanup_processes();
>   	kfd_debugfs_fini();
>   	kfd_process_destroy_wq();
>   	kfd_procfs_shutdown();
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index ebabe92f7edb..dd396a93a68d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -1167,6 +1167,19 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn)
>   	kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
>   }
>   
> +
> +static void kfd_process_notifier_release_internal(struct kfd_process *p)
> +{
> +	cancel_delayed_work_sync(&p->eviction_work);
> +	cancel_delayed_work_sync(&p->restore_work);
> +
> +	/* Indicate to other users that MM is no longer valid */
> +	p->mm = NULL;
> +
> +	mmu_notifier_put(&p->mmu_notifier);
> +}
> +
> +

You seem to like double emtpy newlines, as you're adding them before and 
after every function in this patch. It doesn't make sense here at least, 
because kfd_process_notifier_release_internal is so closely related to 
kfd_process_notifier_release.

>   static void kfd_process_notifier_release(struct mmu_notifier *mn,
>   					struct mm_struct *mm)
>   {
> @@ -1181,25 +1194,78 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
>   		return;
>   
>   	mutex_lock(&kfd_processes_mutex);
> +	/*
> +	 * Do early return if p is not in the table.
> +	 *
> +	 * This could potentially happen if this function is called concurrently
> +	 * by mmu_notifier and by kfd_cleanup_pocesses.
> +	 *
> +	 */
> +	if (!hash_hashed(&p->kfd_processes)) {
> +		mutex_unlock(&kfd_processes_mutex);

This won't give you the expected result when the process is still in the 
local cleanup_list in kfd_cleanup_processes, because it just tells you 
whether the process is on any list. However, if you get here holding the 
kfd_processes_mutex, kfd_cleanup_processes has either not entered its 
critical section yet, or it has completed it and the kfd_processes_table 
is empty. So you can check hash_empty(kfd_processes_table) here and exit 
early if it is empty.

> +		return;
> +	}
>   	hash_del_rcu(&p->kfd_processes);
>   	mutex_unlock(&kfd_processes_mutex);
>   	synchronize_srcu(&kfd_processes_srcu);
>   
> -	cancel_delayed_work_sync(&p->eviction_work);
> -	cancel_delayed_work_sync(&p->restore_work);
> -
> -	/* Indicate to other users that MM is no longer valid */
> -	p->mm = NULL;
> -
> -	mmu_notifier_put(&p->mmu_notifier);
> +	kfd_process_notifier_release_internal(p);
>   }
>   
> +

Extra newline.

>   static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
>   	.release = kfd_process_notifier_release,
>   	.alloc_notifier = kfd_process_alloc_notifier,
>   	.free_notifier = kfd_process_free_notifier,
>   };
>   
> +
> +void kfd_cleanup_processes(void)
> +{
> +	/*
> +	 * This code handles the case when driver is being unloaded before all
> +	 * mm_struct are released.  We need to safely free the kfd_process and
> +	 * avoid race conditions with mmu_notifier that might try to free them.
> +	 *
> +	 */
> +
> +	struct kfd_process *p;
> +	struct hlist_node *p_temp;
> +	unsigned int temp;
> +	HLIST_HEAD(cleanup_list);
> +
> +	/*
> +	 * Move all remaining kfd_process from the process table to a
> +	 * temp list for processing.   Once done, callback from mmu_notifier
> +	 * release will not see the kfd_process in the table and do early return,
> +	 * avoiding double free issues.
> +	 */
> +	mutex_lock(&kfd_processes_mutex);
> +	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {

This needs to use hash_for_each_safe to allow safe removal of elements 
in the loop. You can't use hash_for_each_rcu because you're not in an 
SRCU read-side critical section.

> +		hash_del_rcu(&p->kfd_processes);
> +		hlist_add_head(&p->kfd_processes, &cleanup_list);
> +	}
> +	mutex_unlock(&kfd_processes_mutex);
> +	synchronize_srcu(&kfd_processes_srcu);

You'll need synchronize_srcu before hlist_add_head above to make sure no 
other thread still depends on the element you're about to attach to a 
different list. Otherwise you break concurrent hash_for_each_rcu loops.

> +
> +	/*
> +	 * Release resources for all outstanding kfd_process collected.
> +	 */
> +	hlist_for_each_entry_safe(p, p_temp, &cleanup_list, kfd_processes) {

You don't need braces for a loop containing a single statement.

Regards,
   Felix

> +		kfd_process_notifier_release_internal(p);
> +	}
> +
> +	/*
> +	 * Must be called after all mmu_notifier_put are done and before
> +	 * kfd_process_wq is released.
> +	 *
> +	 * Ensures that all outstanding free_notifier get called, triggering
> +	 * the release of the kfd_process struct.
> +	 */
> +	mmu_notifier_synchronize();
> +}
> +
> +
>   static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
>   {
>   	unsigned long  offset;