[PATCH v3] drm/amdkfd: fix dma mapping leaking warning

Felix Kuehling felix.kuehling at amd.com
Fri Sep 17 19:52:34 UTC 2021


On 2021-09-17 2:04 p.m., Philip Yang wrote:
> For xnack off, restore work dma unmap previous system memory page, and
> dma map the updated system memory page to update GPU mapping, this is
> not dma mapping leaking, remove the WARN_ONCE for dma mapping leaking.
>
> prange->dma_addr store the VRAM page pfn after the range migrated to
> VRAM, should not dma unmap VRAM page when updating GPU mapping or
> remove prange. Add helper svm_is_valid_dma_mapping_addr to check VRAM
> page and error cases.
>
> Mask out SVM_RANGE_VRAM_DOMAIN flag in dma_addr before calling amdgpu vm
> update to avoid BUG_ON(*addr & 0xFFFF00000000003FULL), and set it again
> immediately after. This flag is used to know the type of page later to
> dma unmapping system memory page.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++++++++++++----
>   1 file changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 57318edc4020..e47f1219ad84 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -118,6 +118,13 @@ static void svm_range_remove_notifier(struct svm_range *prange)
>   		mmu_interval_notifier_remove(&prange->notifier);
>   }
>   
> +static bool
> +svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr)
> +{
> +	return dma_addr && !dma_mapping_error(dev, dma_addr) &&
> +	       !(dma_addr & SVM_RANGE_VRAM_DOMAIN);
> +}
> +
>   static int
>   svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
>   		      unsigned long offset, unsigned long npages,
> @@ -139,8 +146,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
>   
>   	addr += offset;
>   	for (i = 0; i < npages; i++) {
> -		if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
> -			      "leaking dma mapping\n"))
> +		if (svm_is_valid_dma_mapping_addr(dev, addr[i]))
>   			dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
>   
>   		page = hmm_pfn_to_page(hmm_pfns[i]);
> @@ -209,7 +215,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
>   		return;
>   
>   	for (i = offset; i < offset + npages; i++) {
> -		if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i]))
> +		if (!svm_is_valid_dma_mapping_addr(dev, dma_addr[i]))
>   			continue;
>   		pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT);
>   		dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir);
> @@ -1165,7 +1171,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	unsigned long last_start;
>   	int last_domain;
>   	int r = 0;
> -	int64_t i;
> +	int64_t i, j;
>   
>   	last_start = prange->start + offset;
>   
> @@ -1205,6 +1211,10 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   						NULL, dma_addr,
>   						&vm->last_update,
>   						&table_freed);
> +
> +		for (j = last_start; j <= prange->start + i; j++)
> +			dma_addr[j - prange->start] |= last_domain;

This would be slightly more readable like this:

	for (j = last_start - prange->start; j <= i; j++)
		dma_addr[j] |= last_domain;

Other than that, the patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


> +
>   		if (r) {
>   			pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
>   			goto out;


More information about the amd-gfx mailing list