[PATCH v2 5/7] drm/amdkfd: Check bitmap_mapped flag to skip retry fault

Philip Yang Philip.Yang at amd.com
Tue Oct 10 14:40:22 UTC 2023


Remove prange validate_timestamp which is not accurate for multiple
GPUs.

Use the bitmap_mapped flag to skip the retry fault from different pages
of the same range if the range is already mapped on the specific GPU.

Signed-off-by: Philip Yang <Philip.Yang at amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 24 ++++++++----------------
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  1 -
 2 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index daa996d7039d..0ee5633c8972 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -43,10 +43,6 @@
 
 #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
 
-/* Long enough to ensure no retry fault comes after svm range is restored and
- * page table is updated.
- */
-#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING	(2UL * NSEC_PER_MSEC)
 #if IS_ENABLED(CONFIG_DYNAMIC_DEBUG)
 #define dynamic_svm_range_dump(svms) \
 	_dynamic_func_call_no_desc("svm_range_dump", svm_range_debug_dump, svms)
@@ -369,7 +365,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
 	INIT_LIST_HEAD(&prange->deferred_list);
 	INIT_LIST_HEAD(&prange->child_list);
 	atomic_set(&prange->invalid, 0);
-	prange->validate_timestamp = 0;
 	prange->vram_pages = 0;
 	mutex_init(&prange->migrate_mutex);
 	mutex_init(&prange->lock);
@@ -1938,8 +1933,6 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 	}
 
 	svm_range_unreserve_bos(ctx);
-	if (!r)
-		prange->validate_timestamp = ktime_get_boottime();
 
 free_ctx:
 	kfree(ctx);
@@ -3214,15 +3207,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 		goto out_unlock_range;
 	}
 
-	/* skip duplicate vm fault on different pages of same range */
-	if (ktime_before(timestamp, ktime_add_ns(prange->validate_timestamp,
-				AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING))) {
-		pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
-			 svms, prange->start, prange->last);
-		r = 0;
-		goto out_unlock_range;
-	}
-
 	/* __do_munmap removed VMA, return success as we are handling stale
 	 * retry fault.
 	 */
@@ -3248,6 +3232,14 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 		goto out_unlock_range;
 	}
 
+	/* skip duplicate vm fault on different pages of same granularity range */
+	if (svm_range_partial_mapped_dev(gpuidx, prange, addr, addr)) {
+		pr_debug("svms 0x%p [0x%lx %lx] addr 0x%llx already mapped on gpu %d\n",
+			 svms, prange->start, prange->last, addr, gpuidx);
+		r = 0;
+		goto out_unlock_range;
+	}
+
 	pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n",
 		 svms, prange->start, prange->last, best_loc,
 		 prange->actual_loc);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 7e165854bc0e..7a12be42cf16 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -129,7 +129,6 @@ struct svm_range {
 	uint32_t			actual_loc;
 	uint8_t				granularity;
 	atomic_t			invalid;
-	ktime_t				validate_timestamp;
 	struct mmu_interval_notifier	notifier;
 	struct svm_work_list_item	work_item;
 	struct list_head		deferred_list;
-- 
2.35.1



More information about the amd-gfx mailing list