[PATCH 1/6] drm/amdkfd: retry validation to recover range

Philip Yang Philip.Yang at amd.com
Tue Apr 20 20:21:18 UTC 2021


GPU vm retry fault recover range need retry validation if

1. range is split in parallel by unmap while recover
2. range migrate to system memory and range is updated in system
memory while recover

Signed-off-by: Philip Yang <Philip.Yang at amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 0e0b4ffd20ab..40ef5709d0a7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1402,11 +1402,13 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 	svm_range_lock(prange);
 	if (!prange->actual_loc) {
 		if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
+			pr_debug("hmm update the range, need validate again\n");
 			r = -EAGAIN;
 			goto unlock_out;
 		}
 	}
 	if (!list_empty(&prange->child_list)) {
+		pr_debug("range split by unmap in parallel, validate again\n");
 		r = -EAGAIN;
 		goto unlock_out;
 	}
@@ -2254,6 +2256,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 out:
 	kfd_unref_process(p);
 
+	if (r == -EAGAIN) {
+		pr_debug("recover vm fault later\n");
+		r = 0;
+	}
 	return r;
 }
 
-- 
2.17.1



More information about the amd-gfx mailing list