[PATCH v2] drm/amdkfd: Use partial hmm page walk during buffer validation in SVM
Chen, Xiaogang
xiaogang.chen at amd.com
Wed Dec 6 15:26:52 UTC 2023
On 12/5/2023 12:48 PM, Philip Yang wrote:
>
>
> On 2023-12-04 15:23, Xiaogang.Chen wrote:
>> From: Xiaogang Chen<xiaogang.chen at amd.com>
>>
>> v2:
>> -not need calculate vram page number for new registered svm range, only
>> do it for split vram pages.
>>
>> SVM uses hmm page walk to valid buffer before map to gpu vm. After have partial
>> migration/mapping do validation on same vm range as migration/map do instead of
>> whole svm range that can be very large. This change is expected to improve svm
>> code performance.
>
> Seems we could calculate old, new prange->vram_pages inside
> svm_range_split_pages, using dma_addr & SVM_RANGE_VRAM_DOMAIN to
> decide if it is vram or system memory pages. This will cover both
> unmap from cpu and set_attr to split range cases, Thet the new
> function svm_range_vram_pages is not needed.
>
ok, I use dma address to calculate split svm range vram page number,
kfdtest passed. The new v3 patch includes it.
>
> prange->vram_pages update after migrating to vram should use mpages,
> not cpages, the total collected pages.
>
It is from original code that used cpages as mpages. I think
svm_migrate_successful_pages does not give correct migrate page number,
then the original code used cpages. I updated this part by using
svm_migrate_unsuccessful_pages, then mpages = cpages -
svm_migrate_unsuccessful_pages(&migrate). The new v3 patch also includes
this change. Please review it.
Regards
Xiaogang
> Regards,
>
> Philip
>
>> Signed-off-by: Xiaogang Chen<Xiaogang.Chen at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 149 ++++++++++++++++++++-------
>> 1 file changed, 109 insertions(+), 40 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index 2834fb351818..2f14cd1a3416 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -158,13 +158,12 @@ svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr)
>> static int
>> svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
>> unsigned long offset, unsigned long npages,
>> - unsigned long *hmm_pfns, uint32_t gpuidx, uint64_t *vram_pages)
>> + unsigned long *hmm_pfns, uint32_t gpuidx)
>> {
>> enum dma_data_direction dir = DMA_BIDIRECTIONAL;
>> dma_addr_t *addr = prange->dma_addr[gpuidx];
>> struct device *dev = adev->dev;
>> struct page *page;
>> - uint64_t vram_pages_dev;
>> int i, r;
>>
>> if (!addr) {
>> @@ -174,7 +173,6 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
>> prange->dma_addr[gpuidx] = addr;
>> }
>>
>> - vram_pages_dev = 0;
>> addr += offset;
>> for (i = 0; i < npages; i++) {
>> if (svm_is_valid_dma_mapping_addr(dev, addr[i]))
>> @@ -184,7 +182,6 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
>> if (is_zone_device_page(page)) {
>> struct amdgpu_device *bo_adev = prange->svm_bo->node->adev;
>>
>> - vram_pages_dev++;
>> addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
>> bo_adev->vm_manager.vram_base_offset -
>> bo_adev->kfd.pgmap.range.start;
>> @@ -201,14 +198,14 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
>> pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n",
>> addr[i] >> PAGE_SHIFT, page_to_pfn(page));
>> }
>> - *vram_pages = vram_pages_dev;
>> +
>> return 0;
>> }
>>
>> static int
>> svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
>> unsigned long offset, unsigned long npages,
>> - unsigned long *hmm_pfns, uint64_t *vram_pages)
>> + unsigned long *hmm_pfns)
>> {
>> struct kfd_process *p;
>> uint32_t gpuidx;
>> @@ -227,7 +224,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
>> }
>>
>> r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages,
>> - hmm_pfns, gpuidx, vram_pages);
>> + hmm_pfns, gpuidx);
>> if (r)
>> break;
>> }
>> @@ -982,11 +979,6 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
>> new->svm_bo = svm_range_bo_ref(old->svm_bo);
>> new->ttm_res = old->ttm_res;
>>
>> - /* set new's vram_pages as old range's now, the acurate vram_pages
>> - * will be updated during mapping
>> - */
>> - new->vram_pages = min(old->vram_pages, new->npages);
>> -
>> spin_lock(&new->svm_bo->list_lock);
>> list_add(&new->svm_bo_list, &new->svm_bo->range_list);
>> spin_unlock(&new->svm_bo->list_lock);
>> @@ -1107,9 +1099,9 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
>>
>> static int
>> svm_range_split_tail(struct svm_range *prange, uint64_t new_last,
>> - struct list_head *insert_list, struct list_head *remap_list)
>> + struct list_head *insert_list, struct list_head *remap_list,
>> + struct svm_range *tail)
>> {
>> - struct svm_range *tail;
>> int r = svm_range_split(prange, prange->start, new_last, &tail);
>>
>> if (!r) {
>> @@ -1122,9 +1114,9 @@ svm_range_split_tail(struct svm_range *prange, uint64_t new_last,
>>
>> static int
>> svm_range_split_head(struct svm_range *prange, uint64_t new_start,
>> - struct list_head *insert_list, struct list_head *remap_list)
>> + struct list_head *insert_list, struct list_head *remap_list,
>> + struct svm_range *head)
>> {
>> - struct svm_range *head;
>> int r = svm_range_split(prange, new_start, prange->last, &head);
>>
>> if (!r) {
>> @@ -1573,7 +1565,6 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>> struct svm_validate_context *ctx;
>> unsigned long start, end, addr;
>> struct kfd_process *p;
>> - uint64_t vram_pages;
>> void *owner;
>> int32_t idx;
>> int r = 0;
>> @@ -1642,15 +1633,13 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>> }
>> }
>>
>> - vram_pages = 0;
>> - start = prange->start << PAGE_SHIFT;
>> - end = (prange->last + 1) << PAGE_SHIFT;
>> + start = map_start << PAGE_SHIFT;
>> + end = (map_last + 1) << PAGE_SHIFT;
>> for (addr = start; !r && addr < end; ) {
>> struct hmm_range *hmm_range;
>> unsigned long map_start_vma;
>> unsigned long map_last_vma;
>> struct vm_area_struct *vma;
>> - uint64_t vram_pages_vma;
>> unsigned long next = 0;
>> unsigned long offset;
>> unsigned long npages;
>> @@ -1677,13 +1666,11 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>> }
>>
>> if (!r) {
>> - offset = (addr - start) >> PAGE_SHIFT;
>> + offset = (addr - (prange->start << PAGE_SHIFT)) >> PAGE_SHIFT;
>> r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
>> - hmm_range->hmm_pfns, &vram_pages_vma);
>> + hmm_range->hmm_pfns);
>> if (r)
>> pr_debug("failed %d to dma map range\n", r);
>> - else
>> - vram_pages += vram_pages_vma;
>> }
>>
>> svm_range_lock(prange);
>> @@ -1716,19 +1703,6 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>> addr = next;
>> }
>>
>> - if (addr == end) {
>> - prange->vram_pages = vram_pages;
>> -
>> - /* if prange does not include any vram page and it
>> - * has not released svm_bo drop its svm_bo reference
>> - * and set its actaul_loc to sys ram
>> - */
>> - if (!vram_pages && prange->ttm_res) {
>> - prange->actual_loc = 0;
>> - svm_range_vram_node_free(prange);
>> - }
>> - }
>> -
>> svm_range_unreserve_bos(ctx);
>> if (!r)
>> prange->validate_timestamp = ktime_get_boottime();
>> @@ -2037,6 +2011,81 @@ svm_range_split_new(struct svm_range_list *svms, uint64_t start, uint64_t last,
>> return 0;
>> }
>>
>> +static int
>> +svm_range_vram_pages(struct svm_range *prange)
>> +{
>> + unsigned long start, end, addr;
>> + struct svm_range_list *svms;
>> + struct kfd_process *p;
>> + struct mm_struct *mm;
>> + struct page *page;
>> + int32_t gpuidx;
>> + void *owner;
>> + int r = 0;
>> +
>> + prange->vram_pages = 0;
>> + svms = prange->svms;
>> + p = container_of(svms, struct kfd_process, svms);
>> + mm = get_task_mm(p->lead_thread);
>> + if (!mm) {
>> + pr_debug("svms 0x%p process mm gone\n", svms);
>> + return -ESRCH;
>> + }
>> +
>> + /* prange->actual_loc should not be 0 here */
>> + gpuidx = kfd_process_gpuidx_from_gpuid(p, prange->actual_loc);
>> + if (gpuidx < 0) {
>> + pr_debug("failed get device by id 0x%x\n", prange->actual_loc);
>> + return -EINVAL;
>> + }
>> + owner = kfd_svm_page_owner(p, gpuidx);
>> +
>> + start = prange->start << PAGE_SHIFT;
>> + end = (prange->last + 1) << PAGE_SHIFT;
>> + for (addr = start; addr < end; ) {
>> + struct hmm_range *hmm_range;
>> + struct vm_area_struct *vma;
>> + unsigned long next = 0;
>> + unsigned long npages;
>> + bool readonly;
>> +
>> + vma = vma_lookup(mm, addr);
>> + if (!vma) {
>> + mmput(mm);
>> + return -EFAULT;
>> + }
>> +
>> + readonly = !(vma->vm_flags & VM_WRITE);
>> + next = min(vma->vm_end, end);
>> + npages = (next - addr) >> PAGE_SHIFT;
>> + r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
>> + readonly, owner, NULL,
>> + &hmm_range);
>> + if (r) {
>> + pr_debug("failed %d to get svm range pages\n", r);
>> + mmput(mm);
>> + return r;
>> + }
>> +
>> + for (int i = 0; i < npages; i++) {
>> + page = hmm_pfn_to_page(hmm_range->hmm_pfns[i]);
>> + if (is_zone_device_page(page))
>> + prange->vram_pages++;
>> + }
>> +
>> + if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
>> + pr_debug("hmm update the range, need validate again\n");
>> + mmput(mm);
>> + return -EAGAIN;
>> + }
>> +
>> + addr = next;
>> + }
>> +
>> + mmput(mm);
>> + return 0;
>> +}
>> +
>> /**
>> * svm_range_add - add svm range and handle overlap
>> * @p: the range add to this process svms
>> @@ -2109,6 +2158,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
>> * will change. Clone and split it, apply updates only
>> * to the overlapping part
>> */
>> + struct svm_range *head, *tail;
>> struct svm_range *old = prange;
>>
>> prange = svm_range_clone(old);
>> @@ -2123,18 +2173,37 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
>>
>> if (node->start < start) {
>> pr_debug("change old range start\n");
>> + head = NULL;
>> r = svm_range_split_head(prange, start,
>> - insert_list, remap_list);
>> + insert_list, remap_list, head);
>> if (r)
>> goto out;
>> }
>> if (node->last > last) {
>> pr_debug("change old range last\n");
>> + tail = NULL;
>> r = svm_range_split_tail(prange, last,
>> - insert_list, remap_list);
>> + insert_list, remap_list, tail);
>> if (r)
>> goto out;
>> }
>> + /* cal each inserted svn pragen vram_pages */
>> + if (prange->actual_loc && prange->ttm_res) {
>> +
>> + if (head) {
>> + r = svm_range_vram_pages(head);
>> + if (r)
>> + return r;
>> + prange->vram_pages = prange->vram_pages - head->vram_pages;
>> + }
>> +
>> + if (tail) {
>> + r = svm_range_vram_pages(tail);
>> + if (r)
>> + return r;
>> + prange->vram_pages = prange->vram_pages - tail->vram_pages;
>> + }
>> + }
>> } else {
>> /* The node is contained within start..last,
>> * just update it
More information about the amd-gfx
mailing list