[PATCH 09/10] drm/amdkfd: partially actual_loc removed
Felix Kuehling
felix.kuehling at amd.com
Mon Jun 21 21:24:13 UTC 2021
On 2021-06-21 12:04 p.m., Alex Sierra wrote:
> actual_loc should not be used anymore, as pranges
> could have mixed locations (VRAM & SYSRAM) at the
> same time.
>
> Signed-off-by: Alex Sierra <alex.sierra at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 12 +---
> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 71 ++++++++++--------------
> 2 files changed, 29 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index f71f8d7e2b72..acb9f64577a0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -501,12 +501,6 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
> struct amdgpu_device *adev;
> int r = 0;
>
> - if (prange->actual_loc == best_loc) {
> - pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
> - prange->svms, prange->start, prange->last, best_loc);
> - return 0;
> - }
> -
> adev = svm_range_get_adev_by_id(prange, best_loc);
> if (!adev) {
> pr_debug("failed to get device by id 0x%x\n", best_loc);
> @@ -791,11 +785,7 @@ int
> svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
> struct mm_struct *mm)
> {
> - if (!prange->actual_loc)
> - return svm_migrate_ram_to_vram(prange, best_loc, mm);
> - else
> - return svm_migrate_vram_to_vram(prange, best_loc, mm);
> -
> + return svm_migrate_ram_to_vram(prange, best_loc, mm);
Can you remove svm_migrate_vram_to_vram in this case? I guess we're
relying on the svm_range_prefault call in svm_migrate_ram_to_vram now to
migrate VRAM in a different XGMI hive to system memory now. But
eventually we want to get rid of that pre-fault hack.
> }
>
> /**
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 3b05bc270732..ebc1ae7e5193 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -1421,42 +1421,38 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>
> svm_range_reserve_bos(&ctx);
>
> - if (!prange->actual_loc) {
> - p = container_of(prange->svms, struct kfd_process, svms);
> - owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap,
> - MAX_GPU_INSTANCE));
> - for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) {
> - if (kfd_svm_page_owner(p, idx) != owner) {
> - owner = NULL;
> - break;
> - }
> - }
> - r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
> - prange->start << PAGE_SHIFT,
> - prange->npages, &hmm_range,
> - false, true, owner);
> - if (r) {
> - pr_debug("failed %d to get svm range pages\n", r);
> - goto unreserve_out;
> - }
> -
> - r = svm_range_dma_map(prange, ctx.bitmap,
> - hmm_range->hmm_pfns);
> - if (r) {
> - pr_debug("failed %d to dma map range\n", r);
> - goto unreserve_out;
> + p = container_of(prange->svms, struct kfd_process, svms);
> + owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap,
> + MAX_GPU_INSTANCE));
> + for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) {
> + if (kfd_svm_page_owner(p, idx) != owner) {
> + owner = NULL;
> + break;
> }
> + }
> + r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
> + prange->start << PAGE_SHIFT,
> + prange->npages, &hmm_range,
> + false, true, owner);
> + if (r) {
> + pr_debug("failed %d to get svm range pages\n", r);
> + goto unreserve_out;
> + }
>
> - prange->validated_once = true;
> + r = svm_range_dma_map(prange, ctx.bitmap,
> + hmm_range->hmm_pfns);
> + if (r) {
> + pr_debug("failed %d to dma map range\n", r);
> + goto unreserve_out;
> }
>
> + prange->validated_once = true;
> +
> svm_range_lock(prange);
> - if (!prange->actual_loc) {
> - if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
> - pr_debug("hmm update the range, need validate again\n");
> - r = -EAGAIN;
> - goto unlock_out;
> - }
> + if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
> + pr_debug("hmm update the range, need validate again\n");
> + r = -EAGAIN;
> + goto unlock_out;
IMO, this is the most important part of this commit, and it should be
called out the the path description. Here we use hmm_range_fault for
getting VRAM addresses. This is what enables mixed mappings in the first
place.
Regards,
Felix
> }
> if (!list_empty(&prange->child_list)) {
> pr_debug("range split by unmap in parallel, validate again\n");
> @@ -2741,20 +2737,9 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
> *migrated = false;
> best_loc = svm_range_best_prefetch_location(prange);
>
> - if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
> - best_loc == prange->actual_loc)
> + if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED)
> return 0;
>
> - /*
> - * Prefetch to GPU without host access flag, set actual_loc to gpu, then
> - * validate on gpu and map to gpus will be handled afterwards.
> - */
> - if (best_loc && !prange->actual_loc &&
> - !(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) {
> - prange->actual_loc = best_loc;
> - return 0;
> - }
> -
> if (!best_loc) {
> r = svm_migrate_vram_to_ram(prange, mm);
> *migrated = !r;
More information about the amd-gfx
mailing list