[PATCH] drm/amdkfd: Fix mm reference in SVM eviction worker
Philip Yang
yangp at amd.com
Wed Aug 10 19:19:58 UTC 2022
On 2022-08-08 20:38, Felix Kuehling wrote:
> Use the mm reference from the fence. This allows removing the
> svm_bo->svms pointer, which was problematic because we cannot assume
> that the struct kfd_process containing the svms is still allocated
> without holding a refcount on the process.
>
> Use mmget_not_zero to ensure the mm is still valid, and drop the svm_bo
> reference if it isn't.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
Reviewed-by: Philip Yang <Philip.Yang at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 17 +++++++----------
> drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 1 -
> 2 files changed, 7 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index a67ba8879a56..11074cc8c333 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -541,7 +541,6 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
> kfree(svm_bo);
> return -ESRCH;
> }
> - svm_bo->svms = prange->svms;
> svm_bo->eviction_fence =
> amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
> mm,
> @@ -3273,7 +3272,6 @@ int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
> static void svm_range_evict_svm_bo_worker(struct work_struct *work)
> {
> struct svm_range_bo *svm_bo;
> - struct kfd_process *p;
> struct mm_struct *mm;
> int r = 0;
>
> @@ -3281,13 +3279,12 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
> if (!svm_bo_ref_unless_zero(svm_bo))
> return; /* svm_bo was freed while eviction was pending */
>
> - /* svm_range_bo_release destroys this worker thread. So during
> - * the lifetime of this thread, kfd_process and mm will be valid.
> - */
> - p = container_of(svm_bo->svms, struct kfd_process, svms);
> - mm = p->mm;
> - if (!mm)
> + if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
> + mm = svm_bo->eviction_fence->mm;
> + } else {
> + svm_range_bo_unref(svm_bo);
> return;
> + }
>
> mmap_read_lock(mm);
> spin_lock(&svm_bo->list_lock);
> @@ -3305,8 +3302,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
>
> mutex_lock(&prange->migrate_mutex);
> do {
> - r = svm_migrate_vram_to_ram(prange,
> - svm_bo->eviction_fence->mm,
> + r = svm_migrate_vram_to_ram(prange, mm,
> KFD_MIGRATE_TRIGGER_TTM_EVICTION);
> } while (!r && prange->actual_loc && --retries);
>
> @@ -3324,6 +3320,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
> }
> spin_unlock(&svm_bo->list_lock);
> mmap_read_unlock(mm);
> + mmput(mm);
>
> dma_fence_signal(&svm_bo->eviction_fence->base);
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> index 9156b041ef17..cfac13ad06ef 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> @@ -46,7 +46,6 @@ struct svm_range_bo {
> spinlock_t list_lock;
> struct amdgpu_amdkfd_fence *eviction_fence;
> struct work_struct eviction_work;
> - struct svm_range_list *svms;
> uint32_t evicting;
> struct work_struct release_work;
> };
More information about the amd-gfx
mailing list