[PATCH 3/3] drm/amdkfd: Check bitmap_mapped flag to skip retry fault
Chen, Xiaogang
xiaogang.chen at amd.com
Mon Oct 2 17:08:13 UTC 2023
On 9/29/2023 9:11 AM, Philip Yang wrote:
> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
>
>
> Use bitmap_mapped flag to check if range already mapped to the specific
> GPU, to skip the retry fault from different page of the same range.
>
> Remove prange validate_timestamp which is not accurate for multiple
> GPUs.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 24 ++++++++----------------
> drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 1 -
> 2 files changed, 8 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index ac65bf25c685..5e063d902a46 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -43,10 +43,6 @@
>
> #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
>
> -/* Long enough to ensure no retry fault comes after svm range is restored and
> - * page table is updated.
> - */
> -#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING (2UL * NSEC_PER_MSEC)
> #if IS_ENABLED(CONFIG_DYNAMIC_DEBUG)
> #define dynamic_svm_range_dump(svms) \
> _dynamic_func_call_no_desc("svm_range_dump", svm_range_debug_dump, svms)
> @@ -365,7 +361,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
> INIT_LIST_HEAD(&prange->deferred_list);
> INIT_LIST_HEAD(&prange->child_list);
> atomic_set(&prange->invalid, 0);
> - prange->validate_timestamp = 0;
> mutex_init(&prange->migrate_mutex);
> mutex_init(&prange->lock);
>
> @@ -1876,8 +1871,6 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
> }
>
> svm_range_unreserve_bos(ctx);
> - if (!r)
> - prange->validate_timestamp = ktime_get_boottime();
>
> free_ctx:
> kfree(ctx);
> @@ -3162,15 +3155,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
> goto out_unlock_range;
> }
>
> - /* skip duplicate vm fault on different pages of same range */
> - if (ktime_before(timestamp, ktime_add_ns(prange->validate_timestamp,
> - AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING))) {
> - pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
> - svms, prange->start, prange->last);
> - r = 0;
> - goto out_unlock_range;
> - }
> -
> /* __do_munmap removed VMA, return success as we are handling stale
> * retry fault.
> */
> @@ -3196,6 +3180,14 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
> goto out_unlock_range;
> }
>
> + /* skip duplicate vm fault on different pages of same range */
I think the following call means if the prange->granularity range that
the addr is in is mapped on gpuidex already, not different pages of same
range.
Regards
Xiaogang
> + if (svm_range_partial_mapped_dev(gpuidx, prange, addr, addr)) {
> + pr_debug("svms 0x%p [0x%lx %lx] already restored on gpu %d\n",
> + svms, prange->start, prange->last, gpuidx);
> + r = 0;
> + goto out_unlock_range;
> + }
> +
> pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n",
> svms, prange->start, prange->last, best_loc,
> prange->actual_loc);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> index 10c92c5e23a7..3afc33a3dd30 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> @@ -125,7 +125,6 @@ struct svm_range {
> uint32_t actual_loc;
> uint8_t granularity;
> atomic_t invalid;
> - ktime_t validate_timestamp;
> struct mmu_interval_notifier notifier;
> struct svm_work_list_item work_item;
> struct list_head deferred_list;
> --
> 2.35.1
>
More information about the amd-gfx
mailing list