[PATCH v3] amd/amdkfd: Set correct svm range actual loc after spliting

Thu Jan 11 17:37:59 UTC 2024

On 1/11/2024 10:54 AM, Felix Kuehling wrote:
>
> On 2024-01-10 17:01, Philip Yang wrote:
>> While svm range partial migrating to system memory, clear dma_addr vram
>> domain flag, otherwise the future split will get incorrect vram_pages
>> and actual loc.
>>
>> After range spliting, set new range and old range actual_loc:
>> new range actual_loc is 0 if new->vram_pages is 0.
>> old range actual_loc is 0 if old->vram_pages - new->vram_pages == 0.
>>
>> new range takes svm_bo ref only if vram_pages not equal to 0.
>>
>> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 20 +++++++++++++++++++-
>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 24 ++++++++++++++----------
>>   2 files changed, 33 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>> index f856901055d3..dae05f70257b 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>> @@ -563,18 +563,30 @@ svm_migrate_copy_to_ram(struct amdgpu_device 
>> *adev, struct svm_range *prange,
>>               struct migrate_vma *migrate, struct dma_fence **mfence,
>>               dma_addr_t *scratch, uint64_t npages)
>>   {
>> +    struct kfd_process *p = container_of(prange->svms, struct 
>> kfd_process, svms);
>>       struct device *dev = adev->dev;
>> +    dma_addr_t *dma_addr;
>>       uint64_t *src;
>>       dma_addr_t *dst;
>>       struct page *dpage;
>>       uint64_t i = 0, j;
>>       uint64_t addr;
>> +    s32 gpuidx;
>> +    u64 offset;
>>       int r = 0;
>>         pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, 
>> prange->start,
>>            prange->last);
>>   -    addr = prange->start << PAGE_SHIFT;
>
> Is this another bug fix for partial migration? If so, it may be worth 
> making that a separate patch.
>
Seems it is also a bug when prange is across multiple vma. With partial 
migration it become obvious.
>
>> +    gpuidx = kfd_process_gpuidx_from_gpuid(p, prange->actual_loc);
>> +    if (gpuidx < 0) {
>> +        pr_debug("no GPU id 0x%x found\n", prange->actual_loc);
>> +        return -EINVAL;
>> +    }
>> +
>> +    addr = migrate->start;
>> +    offset = (addr >> PAGE_SHIFT) - prange->start;
>> +    dma_addr = prange->dma_addr[gpuidx];
>>         src = (uint64_t *)(scratch + npages);
>>       dst = scratch;
>> @@ -623,6 +635,12 @@ svm_migrate_copy_to_ram(struct amdgpu_device 
>> *adev, struct svm_range *prange,
>>               goto out_oom;
>>           }
>>   +        /* Clear VRAM flag when page is migrated to ram, to count 
>> vram
>> +         * pages correctly when spliting the range.
>> +         */
>> +        if (dma_addr && (dma_addr[offset + i] & SVM_RANGE_VRAM_DOMAIN))
>> +            dma_addr[offset + i] = 0;
>> +
>
When come here we already know the page has been moved to system ram, do 
we still need check

dma_addr[offset + i] & SVM_RANGE_VRAM_DOMAIN)

You want to set dma_addr[offset + i] = 0 anyway.

> I'm not a big fan of messing with the DMA arrays here, but I don't 
> have a good alternative. I think what bothers me is, how the DMA 
> address array and handling of vram page count is now spread out across 
> so many places. It feels fragile.
>
> Maybe it would be good to add a helper in kfd_svm.c: 
> svm_get_dma_addr_for_page_count(prange, offset). That way you can keep 
> the choice of gpuid and offset calculation in one place in kfd_svm.c, 
> close to svm_range_copy_array.
>
> Other than that, the patch looks good to me.

svm_migrate_copy_to_ram already has scratch which save dma addresses of 
destination pages. Maybe we can check it after svm_migrate_copy_to_ram, 
before svm_range_dma_unmap_dev, that looks better.

Regards

Xiaogang

>
> Regards,
>   Felix
>
>
>>           pr_debug_ratelimited("dma mapping dst to 0x%llx, pfn 0x%lx\n",
>>                        dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
>>   diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index cc24f30f88fb..35ee9e648cca 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -362,7 +362,6 @@ svm_range *svm_range_new(struct svm_range_list 
>> *svms, uint64_t start,
>>       INIT_LIST_HEAD(&prange->child_list);
>>       atomic_set(&prange->invalid, 0);
>>       prange->validate_timestamp = 0;
>> -    prange->vram_pages = 0;
>>       mutex_init(&prange->migrate_mutex);
>>       mutex_init(&prange->lock);
>>   @@ -980,9 +979,14 @@ svm_range_split_pages(struct svm_range *new, 
>> struct svm_range *old,
>>           if (r)
>>               return r;
>>       }
>> -    if (old->actual_loc)
>> +    if (old->actual_loc && new->vram_pages) {
>>           old->vram_pages -= new->vram_pages;
>> -
>> +        new->actual_loc = old->actual_loc;
>> +        if (!old->vram_pages)
>> +            old->actual_loc = 0;
>> +    }
>> +    pr_debug("new->vram_pages 0x%llx loc 0x%x old->vram_pages 0x%llx 
>> loc 0x%x\n",
>> +         new->vram_pages, new->actual_loc, old->vram_pages, 
>> old->actual_loc);
>>       return 0;
>>   }
>>   @@ -1002,13 +1006,14 @@ svm_range_split_nodes(struct svm_range 
>> *new, struct svm_range *old,
>>           new->offset = old->offset + npages;
>>       }
>>   -    new->svm_bo = svm_range_bo_ref(old->svm_bo);
>> -    new->ttm_res = old->ttm_res;
>> -
>> -    spin_lock(&new->svm_bo->list_lock);
>> -    list_add(&new->svm_bo_list, &new->svm_bo->range_list);
>> -    spin_unlock(&new->svm_bo->list_lock);
>> +    if (new->vram_pages) {
>> +        new->svm_bo = svm_range_bo_ref(old->svm_bo);
>> +        new->ttm_res = old->ttm_res;
>>   +        spin_lock(&new->svm_bo->list_lock);
>> +        list_add(&new->svm_bo_list, &new->svm_bo->range_list);
>> +        spin_unlock(&new->svm_bo->list_lock);
>> +    }
>>       return 0;
>>   }
>>   @@ -1058,7 +1063,6 @@ svm_range_split_adjust(struct svm_range *new, 
>> struct svm_range *old,
>>       new->flags = old->flags;
>>       new->preferred_loc = old->preferred_loc;
>>       new->prefetch_loc = old->prefetch_loc;
>> -    new->actual_loc = old->actual_loc;
>>       new->granularity = old->granularity;
>>       new->mapped_to_gpu = old->mapped_to_gpu;
>>       bitmap_copy(new->bitmap_access, old->bitmap_access, 
>> MAX_GPU_INSTANCE);