[PATCH] drm/amdgpu: remove PT BOs when unmapping

Christian König ckoenig.leichtzumerken at gmail.com
Wed Oct 30 13:52:49 UTC 2019


Am 29.10.19 um 21:06 schrieb Huang, JinHuiEric:
> The issue is PT BOs are not freed when unmapping VA,
> which causes vram usage accumulated is huge in some
> memory stress test, such as kfd big buffer stress test.
> Function amdgpu_vm_bo_update_mapping() is called by both
> amdgpu_vm_bo_update() and amdgpu_vm_clear_freed(). The
> solution is replacing amdgpu_vm_bo_update_mapping() in
> amdgpu_vm_clear_freed() with removing PT BOs function
> to save vram usage.

NAK, that is intentional behavior.

Otherwise we can run into out of memory situations when page tables need 
to be allocated again under stress.

Regards,
Christian.

>
> Change-Id: Ic24e35bff8ca85265b418a642373f189d972a924
> Signed-off-by: Eric Huang <JinhuiEric.Huang at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 56 +++++++++++++++++++++++++++++-----
>   1 file changed, 48 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 0f4c3b2..8a480c7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1930,6 +1930,51 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   }
>   
>   /**
> + * amdgpu_vm_remove_ptes - free PT BOs
> + *
> + * @adev: amdgpu device structure
> + * @vm: amdgpu vm structure
> + * @start: start of mapped range
> + * @end: end of mapped entry
> + *
> + * Free the page table level.
> + */
> +static int amdgpu_vm_remove_ptes(struct amdgpu_device *adev,
> +		struct amdgpu_vm *vm, uint64_t start, uint64_t end)
> +{
> +	struct amdgpu_vm_pt_cursor cursor;
> +	unsigned shift, num_entries;
> +
> +	amdgpu_vm_pt_start(adev, vm, start, &cursor);
> +	while (cursor.level < AMDGPU_VM_PTB) {
> +		if (!amdgpu_vm_pt_descendant(adev, &cursor))
> +			return -ENOENT;
> +	}
> +
> +	while (cursor.pfn < end) {
> +		amdgpu_vm_free_table(cursor.entry);
> +		num_entries = amdgpu_vm_num_entries(adev, cursor.level - 1);
> +
> +		if (cursor.entry != &cursor.parent->entries[num_entries - 1]) {
> +			/* Next ptb entry */
> +			shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
> +			cursor.pfn += 1ULL << shift;
> +			cursor.pfn &= ~((1ULL << shift) - 1);
> +			cursor.entry++;
> +		} else {
> +			/* Next ptb entry in next pd0 entry */
> +			amdgpu_vm_pt_ancestor(&cursor);
> +			shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
> +			cursor.pfn += 1ULL << shift;
> +			cursor.pfn &= ~((1ULL << shift) - 1);
> +			amdgpu_vm_pt_descendant(adev, &cursor);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +/**
>    * amdgpu_vm_clear_freed - clear freed BOs in the PT
>    *
>    * @adev: amdgpu_device pointer
> @@ -1949,7 +1994,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>   			  struct dma_fence **fence)
>   {
>   	struct amdgpu_bo_va_mapping *mapping;
> -	uint64_t init_pte_value = 0;
>   	struct dma_fence *f = NULL;
>   	int r;
>   
> @@ -1958,13 +2002,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>   			struct amdgpu_bo_va_mapping, list);
>   		list_del(&mapping->list);
>   
> -		if (vm->pte_support_ats &&
> -		    mapping->start < AMDGPU_GMC_HOLE_START)
> -			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
> +		r = amdgpu_vm_remove_ptes(adev, vm,
> +				(mapping->start + 0x1ff) & (~0x1ffll),
> +				(mapping->last + 1) & (~0x1ffll));
>   
> -		r = amdgpu_vm_bo_update_mapping(adev, vm, false, NULL,
> -						mapping->start, mapping->last,
> -						init_pte_value, 0, NULL, &f);
>   		amdgpu_vm_free_mapping(adev, vm, mapping, f);
>   		if (r) {
>   			dma_fence_put(f);
> @@ -1980,7 +2021,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>   	}
>   
>   	return 0;
> -
>   }
>   
>   /**



More information about the amd-gfx mailing list