[PATCH] drm/amdkfd: Fixed kfd_process cleanup on module exit.
Christian König
christian.koenig at amd.com
Tue Mar 7 07:04:33 UTC 2023
Am 06.03.23 um 22:58 schrieb David Belanger:
> Handle case when module is unloaded (kfd_exit) before a process space
> (mm_struct) is released.
Well that should never ever happen in the first place. It sounds like we
are missing grabbing module references.
Regards,
Christian.
>
> Signed-off-by: David Belanger <david.belanger at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_module.c | 4 ++
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 57 ++++++++++++++++++++++++
> 2 files changed, 61 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
> index 09b966dc3768..8ef4bd9e4f7d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
> @@ -26,6 +26,9 @@
> #include "kfd_priv.h"
> #include "amdgpu_amdkfd.h"
>
> +void kfd_cleanup_processes(void);
> +
> +
> static int kfd_init(void)
> {
> int err;
> @@ -77,6 +80,7 @@ static int kfd_init(void)
>
> static void kfd_exit(void)
> {
> + kfd_cleanup_processes();
> kfd_debugfs_fini();
> kfd_process_destroy_wq();
> kfd_procfs_shutdown();
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index ebabe92f7edb..b5b28a32639d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -1181,6 +1181,17 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
> return;
>
> mutex_lock(&kfd_processes_mutex);
> + /*
> + * Do early return if p is not in the table.
> + *
> + * This could potentially happen if this function is called concurrently
> + * by mmu_notifier and by kfd_cleanup_pocesses.
> + *
> + */
> + if (!hash_hashed(&p->kfd_processes)) {
> + mutex_unlock(&kfd_processes_mutex);
> + return;
> + }
> hash_del_rcu(&p->kfd_processes);
> mutex_unlock(&kfd_processes_mutex);
> synchronize_srcu(&kfd_processes_srcu);
> @@ -1200,6 +1211,52 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
> .free_notifier = kfd_process_free_notifier,
> };
>
> +
> +void kfd_cleanup_processes(void)
> +{
> + struct kfd_process *p;
> + unsigned int temp;
> +
> + /*
> + * Iterate over remaining processes in table, calling notifier release
> + * to free up notifier and process resources.
> + *
> + * This code handles the case when driver is unloaded before all mm_struct
> + * are released.
> + */
> + int idx = srcu_read_lock(&kfd_processes_srcu);
> +
> + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
> + if (p) {
> + /*
> + * Obtain a reference on p to avoid a late mmu_notifier release
> + * call triggering freeing the process.
> + */
> +
> + kref_get(&p->ref);
> +
> + srcu_read_unlock(&kfd_processes_srcu, idx);
> +
> + kfd_process_notifier_release(&p->mmu_notifier, p->mm);
> +
> + kfd_unref_process(p);
> +
> + idx = srcu_read_lock(&kfd_processes_srcu);
> + }
> + }
> + srcu_read_unlock(&kfd_processes_srcu, idx);
> +
> + /*
> + * Must be called after all mmu_notifier_put are done and before
> + * kfd_process_wq is released.
> + *
> + * Ensures that all outstanding free_notifier gets called, triggering the release
> + * of the process.
> + */
> + mmu_notifier_synchronize();
> +}
> +
> +
> static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
> {
> unsigned long offset;
More information about the amd-gfx
mailing list