[PATCH 1/8] drm/amdgpu: Avoid reclaim fs while eviction lock

Sierra Guiza, Alejandro (Alex) Alex.Sierra at amd.com
Fri Jan 10 22:58:48 UTC 2020


[AMD Official Use Only - Internal Distribution Only]

The most recen ones start at:
[PATCH 1/8] drm/amdgpu: Avoid reclaim fs while eviction lock
Through:
[PATCH 8/8] drm/amdgpu: replace kcq enable function on gfx_v9

Regards,
Alex Sierra

-----Original Message-----
From: Christian König <ckoenig.leichtzumerken at gmail.com> 
Sent: Friday, January 10, 2020 5:29 AM
To: Sierra Guiza, Alejandro (Alex) <Alex.Sierra at amd.com>; amd-gfx at lists.freedesktop.org
Subject: Re: [PATCH 1/8] drm/amdgpu: Avoid reclaim fs while eviction lock

[CAUTION: External Email]

Looks like you send that patch set out twice.

Which one is the most recent one?

Regards,
Christian.

Am 10.01.20 um 01:43 schrieb Alex Sierra:
> [Why]
> Avoid reclaim filesystem while eviction lock is held called from MMU 
> notifier.
>
> [How]
> Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked.
> Using memalloc_nofs_save / memalloc_nofs_restore API.
>
> Change-Id: I5531c9337836e7d4a430df3f16dcc82888e8018c
> Signed-off-by: Alex Sierra <alex.sierra at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 40 +++++++++++++++++++++-----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  6 +++-
>   2 files changed, 38 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b999b67ff57a..d6aba4f9df74 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -82,6 +82,32 @@ struct amdgpu_prt_cb {
>       struct dma_fence_cb cb;
>   };
>
> +/**
> + * vm eviction_lock can be taken in MMU notifiers. Make sure no 
> +reclaim-FS
> + * happens while holding this lock anywhere to prevent deadlocks when
> + * an MMU notifier runs in reclaim-FS context.
> + */
> +static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm) {
> +     mutex_lock(&vm->eviction_lock);
> +     vm->saved_flags = memalloc_nofs_save(); }
> +
> +static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm) {
> +     if (mutex_trylock(&vm->eviction_lock)) {
> +             vm->saved_flags = memalloc_nofs_save();
> +             return 1;
> +     }
> +     return 0;
> +}
> +
> +static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) {
> +     memalloc_nofs_restore(vm->saved_flags);
> +     mutex_unlock(&vm->eviction_lock); }
> +
>   /**
>    * amdgpu_vm_level_shift - return the addr shift for each level
>    *
> @@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>               }
>       }
>
> -     mutex_lock(&vm->eviction_lock);
> +     amdgpu_vm_eviction_lock(vm);
>       vm->evicting = false;
> -     mutex_unlock(&vm->eviction_lock);
> +     amdgpu_vm_eviction_unlock(vm);
>
>       return 0;
>   }
> @@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>       if (!(flags & AMDGPU_PTE_VALID))
>               owner = AMDGPU_FENCE_OWNER_KFD;
>
> -     mutex_lock(&vm->eviction_lock);
> +     amdgpu_vm_eviction_lock(vm);
>       if (vm->evicting) {
>               r = -EBUSY;
>               goto error_unlock;
> @@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>       r = vm->update_funcs->commit(&params, fence);
>
>   error_unlock:
> -     mutex_unlock(&vm->eviction_lock);
> +     amdgpu_vm_eviction_unlock(vm);
>       return r;
>   }
>
> @@ -2537,18 +2563,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>               return false;
>
>       /* Try to block ongoing updates */
> -     if (!mutex_trylock(&bo_base->vm->eviction_lock))
> +     if (!amdgpu_vm_eviction_trylock(bo_base->vm))
>               return false;
>
>       /* Don't evict VM page tables while they are updated */
>       if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
>           !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
> -             mutex_unlock(&bo_base->vm->eviction_lock);
> +             amdgpu_vm_eviction_unlock(bo_base->vm);
>               return false;
>       }
>
>       bo_base->vm->evicting = true;
> -     mutex_unlock(&bo_base->vm->eviction_lock);
> +     amdgpu_vm_eviction_unlock(bo_base->vm);
>       return true;
>   }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 100547f094ff..c21a36bebc0c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -30,6 +30,7 @@
>   #include <drm/gpu_scheduler.h>
>   #include <drm/drm_file.h>
>   #include <drm/ttm/ttm_bo_driver.h>
> +#include <linux/sched/mm.h>
>
>   #include "amdgpu_sync.h"
>   #include "amdgpu_ring.h"
> @@ -242,9 +243,12 @@ struct amdgpu_vm {
>       /* tree of virtual addresses mapped */
>       struct rb_root_cached   va;
>
> -     /* Lock to prevent eviction while we are updating page tables */
> +     /* Lock to prevent eviction while we are updating page tables
> +      * use vm_eviction_lock/unlock(vm)
> +      */
>       struct mutex            eviction_lock;
>       bool                    evicting;
> +     unsigned int            saved_flags;
>
>       /* BOs who needs a validation */
>       struct list_head        evicted;


More information about the amd-gfx mailing list