[PATCH] drm/amdkfd: Change page discontinuity handling at svm_migrate_copy_to_vram

Chen, Xiaogang xiaogang.chen at amd.com
Mon Jan 20 16:30:21 UTC 2025


On 1/20/2025 10:13 AM, Philip Yang wrote:
>
>
> On 2025-01-15 16:40, Xiaogang.Chen wrote:
>> From: Xiaogang Chen<xiaogang.chen at amd.com>
>>
>> Current svm_migrate_copy_to_vram handles sys pages(src) and dst pages (vram)
>> discontinuation in different way. When src got discontinuity migrates j pages
>> that ith page is not migrated; When dst got discontinuity migrates j+1 pages
>> that ith page is migrated. That cause error path have to iterate all pages to
>> find which page got migrated before error happened. Also makes code more
>> difficult to read.
> error handling path loop from 0 -> mpages or i -> 0, to rollback 
> migrated pages, I think both way should handle similar number of pages.
same number to call svm_migrate_put_vram_page, the loop number is not 
same. The change is not about correctness, but make the code more 
straight forward to read. At error path a more logical way is rollback 
from the place where the error happened, instead browse all pages.
>> This patch handles src and dst page discontinuity in consistent way, has its
>> logic and error recovery straight forward.
>
> I feel that svm_migrate_copy_memory_gart move to the beginning of the 
> loop is harder to understand.
>
If you feel handle vram page discontinuation at beginning of loop is not 
natural we can put it after sys ram discontinuation handling. The 
purpose is handling src/dst page discontinuity in consistent way.


> Regards,
>
> Philip
>
>> Signed-off-by: Xiaogang Chen<Xiaogang.Chen at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 68 ++++++++++++------------
>>   1 file changed, 35 insertions(+), 33 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>> index d05d199b5e44..2ce78c77f203 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>> @@ -299,6 +299,19 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
>>   	for (i = j = 0; (i < npages) && (mpages < migrate->cpages); i++) {
>>   		struct page *spage;
>>   
>> +		/* accumulated pages more than current cursor's block has */
>> +		if (j >= (cursor.size >> PAGE_SHIFT)) {
>> +			r = svm_migrate_copy_memory_gart(adev, src + i - j,
>> +							 dst + i - j, j,
>> +							 FROM_RAM_TO_VRAM,
>> +							 mfence);
>> +			if (r)
>> +				goto out_free_vram_pages;
>> +
>> +			amdgpu_res_next(&cursor, j * PAGE_SIZE);
>> +			j = 0;
>> +		}
>> +
>>   		if (migrate->src[i] & MIGRATE_PFN_MIGRATE) {
>>   			dst[i] = cursor.start + (j << PAGE_SHIFT);
>>   			migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
>> @@ -306,17 +319,10 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
>>   			migrate->dst[i] = migrate_pfn(migrate->dst[i]);
>>   			mpages++;
>>   		}
>> +
>>   		spage = migrate_pfn_to_page(migrate->src[i]);
>> -		if (spage && !is_zone_device_page(spage)) {
>> -			src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
>> -					      DMA_BIDIRECTIONAL);
>> -			r = dma_mapping_error(dev, src[i]);
>> -			if (r) {
>> -				dev_err(dev, "%s: fail %d dma_map_page\n",
>> -					__func__, r);
>> -				goto out_free_vram_pages;
>> -			}
>> -		} else {
>> +		if (!spage || is_zone_device_page(spage)) {
>> +			/* sdma accumulated pages before src got gap */
>>   			if (j) {
>>   				r = svm_migrate_copy_memory_gart(
>>   						adev, src + i - j,
>> @@ -325,29 +331,26 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
>>   						mfence);
>>   				if (r)
>>   					goto out_free_vram_pages;
>> -				amdgpu_res_next(&cursor, (j + 1) << PAGE_SHIFT);
>> +
>> +				amdgpu_res_next(&cursor, (j+1) << PAGE_SHIFT);
>>   				j = 0;
>> -			} else {
>> +			} else
>>   				amdgpu_res_next(&cursor, PAGE_SIZE);
>> -			}
>> +
>>   			continue;
>>   		}
>>   
>> -		pr_debug_ratelimited("dma mapping src to 0x%llx, pfn 0x%lx\n",
>> -				     src[i] >> PAGE_SHIFT, page_to_pfn(spage));
>> -
>> -		if (j >= (cursor.size >> PAGE_SHIFT) - 1 && i < npages - 1) {
>> -			r = svm_migrate_copy_memory_gart(adev, src + i - j,
>> -							 dst + i - j, j + 1,
>> -							 FROM_RAM_TO_VRAM,
>> -							 mfence);
>> -			if (r)
>> -				goto out_free_vram_pages;
>> -			amdgpu_res_next(&cursor, (j + 1) * PAGE_SIZE);
>> -			j = 0;
>> -		} else {
>> -			j++;
>> +		src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
>> +				DMA_BIDIRECTIONAL);
>> +		r = dma_mapping_error(dev, src[i]);
>> +		if (r) {
>> +			dev_err(dev, "%s: fail %d dma_map_page\n", __func__, r);
>> +			goto out_free_vram_pages;
>>   		}
>> +
>> +		pr_debug_ratelimited("dma mapping src to 0x%llx, pfn 0x%lx\n",
>> +							 src[i] >> PAGE_SHIFT, page_to_pfn(spage));
>> +		j++;
>>   	}
>>   
>>   	r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j,
>> @@ -356,12 +359,11 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
>>   out_free_vram_pages:
>>   	if (r) {
>>   		pr_debug("failed %d to copy memory to vram\n", r);
>> -		for (i = 0; i < npages && mpages; i++) {
>> -			if (!dst[i])
>> -				continue;
>> -			svm_migrate_put_vram_page(adev, dst[i]);
>> -			migrate->dst[i] = 0;
>> -			mpages--;
>> +		while (i--) {
>> +			if (migrate->dst[i]) {
>> +				svm_migrate_put_vram_page(adev, dst[i]);
>> +				migrate->dst[i] = 0;
>> +			}
>>   		}
>>   	}
>>   
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20250120/e1bc6742/attachment.htm>


More information about the amd-gfx mailing list