[PATCH 3/5] drm/amdgpu: handle multiple MM nodes in the VMs

Mon Aug 29 22:29:46 UTC 2016

I looked really hard and couldn't find anything obviously broken. It
makes me a bit nervous that there is no bounds checking on the nodes
array, though.

Just one minor nit pick.

With that fixed, Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>

On 16-08-29 05:20 AM, Christian König wrote:
> From: Christian König <christian.koenig at amd.com>
>
> This allows us to map scattered VRAM BOs to the VMs.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 +++++++++++++++++++---------------
>  1 file changed, 44 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index ea1bd67..df6506d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1048,8 +1048,8 @@ error_free:
>   * @pages_addr: DMA addresses to use for mapping
>   * @vm: requested vm
>   * @mapping: mapped range and flags to use for the update
> - * @addr: addr to set the area to
>   * @flags: HW flags for the mapping
> + * @nodes: array of drm_mm_nodes with the MC addresses
>   * @fence: optional resulting fence
>   *
>   * Split the mapping into smaller chunks so that each update fits
> @@ -1062,12 +1062,11 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
>  				      dma_addr_t *pages_addr,
>  				      struct amdgpu_vm *vm,
>  				      struct amdgpu_bo_va_mapping *mapping,
> -				      uint32_t flags, uint64_t addr,
> +				      uint32_t flags,
> +				      struct drm_mm_node *nodes,
>  				      struct fence **fence)
>  {
> -	const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE;
> -
> -	uint64_t src = 0, start = mapping->it.start;
> +	uint64_t offset, src = 0, start = mapping->it.start;
>  	int r;
>  
>  	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
> @@ -1080,23 +1079,38 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
>  
>  	trace_amdgpu_vm_bo_update(mapping);
>  
> -	if (pages_addr) {
> -		if (flags == gtt_flags)
> -			src = adev->gart.table_addr + (addr >> 12) * 8;
> -		addr = 0;
> +	offset = mapping->offset;
> +	if (nodes) {
> +		while (offset > (nodes->size << PAGE_SHIFT)) {
> +			offset -= (nodes->size << PAGE_SHIFT);
> +			++nodes;
> +		}
>  	}
> -	addr += mapping->offset;
>  
> -	if (!pages_addr || src)
> -		return amdgpu_vm_bo_update_mapping(adev, exclusive,
> -						   src, pages_addr, vm,
> -						   start, mapping->it.last,
> -						   flags, addr, fence);
> +	do {
> +		uint64_t max_entries;
> +		uint64_t addr, last;
>  
> -	while (start != mapping->it.last + 1) {
> -		uint64_t last;
> +		if (nodes) {
> +			addr = nodes->start << PAGE_SHIFT;
> +			max_entries = nodes->size - (offset >> PAGE_SHIFT);

[FK] I think max_entries is the number of GPU page table entries. So you
should convert from the system page size to the GPU page size:
        max_entries = ((nodes->size << PAGE_SHIFT) - offset) >>
AMDGPU_GPU_PAGE_SHIFT;

> +		} else {
> +			addr = 0;
> +			max_entries = S64_MAX;
> +		}
> +		addr += offset;
> +
> +		if (pages_addr) {
> +			if (flags == gtt_flags)
> +				src = adev->gart.table_addr + (addr >> 12) * 8;
> +			else
> +				max_entries = min(max_entries, 16ull * 1024ull);
> +			addr = 0;
> +		} else if (flags & AMDGPU_PTE_VALID) {
> +			addr += adev->vm_manager.vram_base_offset;
> +		}
>  
> -		last = min((uint64_t)mapping->it.last, start + max_size - 1);
> +		last = min((uint64_t)mapping->it.last, start + max_entries - 1);
>  		r = amdgpu_vm_bo_update_mapping(adev, exclusive,
>  						src, pages_addr, vm,
>  						start, last, flags, addr,
> @@ -1104,9 +1118,14 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
>  		if (r)
>  			return r;
>  
> +		offset += (last - start + 1) * AMDGPU_GPU_PAGE_SIZE;
> +		if (nodes && nodes->size == (offset >> PAGE_SHIFT)) {
> +			offset = 0;
> +			++nodes;
> +		}
>  		start = last + 1;
> -		addr += max_size * AMDGPU_GPU_PAGE_SIZE;
> -	}
> +
> +	} while (likely(start != mapping->it.last + 1));
>  
>  	return 0;
>  }
> @@ -1130,34 +1149,24 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>  	dma_addr_t *pages_addr = NULL;
>  	uint32_t gtt_flags, flags;
>  	struct ttm_mem_reg *mem;
> +	struct drm_mm_node *nodes;
>  	struct fence *exclusive;
> -	uint64_t addr;
>  	int r;
>  
>  	if (clear) {
>  		mem = NULL;
> -		addr = 0;
> +		nodes = NULL;
>  		exclusive = NULL;
>  	} else {
>  		struct ttm_dma_tt *ttm;
>  
>  		mem = &bo_va->bo->tbo.mem;
> -		addr = (u64)mem->start << PAGE_SHIFT;
> -		switch (mem->mem_type) {
> -		case TTM_PL_TT:
> +		nodes = mem->mm_node;
> +		if (mem->mem_type == TTM_PL_TT) {
>  			ttm = container_of(bo_va->bo->tbo.ttm, struct
>  					   ttm_dma_tt, ttm);
>  			pages_addr = ttm->dma_address;
> -			break;
> -
> -		case TTM_PL_VRAM:
> -			addr += adev->vm_manager.vram_base_offset;
> -			break;
> -
> -		default:
> -			break;
>  		}
> -
>  		exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv);
>  	}
>  
> @@ -1172,7 +1181,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>  	list_for_each_entry(mapping, &bo_va->invalids, list) {
>  		r = amdgpu_vm_bo_split_mapping(adev, exclusive,
>  					       gtt_flags, pages_addr, vm,
> -					       mapping, flags, addr,
> +					       mapping, flags, nodes,
>  					       &bo_va->last_pt_update);
>  		if (r)
>  			return r;