[PATCH 2/2] drm/amdgpu: nuke amdgpu_vm_bo_split_mapping

Thu Oct 15 07:04:10 UTC 2020

Am 14.10.20 um 22:48 schrieb Luben Tuikov:
> On 2020-10-13 13:08, Christian König wrote:
>> Merge the functionality mostly into amdgpu_vm_bo_update_mapping.
>>
>> This way we can even handle small contiguous system pages without
>> to much extra CPU overhead.
>>
>> Large contiguous allocations (1GB) still improve from 1.2 to 0.3 seconds.
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 231 +++++++++++--------------
>>   1 file changed, 103 insertions(+), 128 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 3cd949aad500..48342e54b2cb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1429,21 +1429,18 @@ static void amdgpu_vm_fragment(struct amdgpu_vm_update_params *params,
>>    * 0 for success, -EINVAL for failure.
>>    */
>>   static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>> +				 struct amdgpu_vm_pt_cursor *cursor,
>>   				 uint64_t start, uint64_t end,
>>   				 uint64_t dst, uint64_t flags)
>>   {
>>   	struct amdgpu_device *adev = params->adev;
>> -	struct amdgpu_vm_pt_cursor cursor;
>>   	uint64_t frag_start = start, frag_end;
>>   	unsigned int frag;
>>   	int r;
>>   
>>   	/* figure out the initial fragment */
>>   	amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
>> -
>> -	/* walk over the address space and update the PTs */
>> -	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>> -	while (cursor.pfn < end) {
>> +	while (cursor->pfn < end) {
>>   		unsigned shift, parent_shift, mask;
>>   		uint64_t incr, entry_end, pe_start;
>>   		struct amdgpu_bo *pt;
>> @@ -1453,22 +1450,22 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>   			 * address range are actually allocated
>>   			 */
>>   			r = amdgpu_vm_alloc_pts(params->adev, params->vm,
>> -						&cursor, params->immediate);
>> +						cursor, params->immediate);
>>   			if (r)
>>   				return r;
>>   		}
>>   
>> -		shift = amdgpu_vm_level_shift(adev, cursor.level);
>> -		parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
>> +		shift = amdgpu_vm_level_shift(adev, cursor->level);
>> +		parent_shift = amdgpu_vm_level_shift(adev, cursor->level - 1);
>>   		if (params->unlocked) {
>>   			/* Unlocked updates are only allowed on the leaves */
>> -			if (amdgpu_vm_pt_descendant(adev, &cursor))
>> +			if (amdgpu_vm_pt_descendant(adev, cursor))
>>   				continue;
>>   		} else if (adev->asic_type < CHIP_VEGA10 &&
>>   			   (flags & AMDGPU_PTE_VALID)) {
>>   			/* No huge page support before GMC v9 */
>> -			if (cursor.level != AMDGPU_VM_PTB) {
>> -				if (!amdgpu_vm_pt_descendant(adev, &cursor))
>> +			if (cursor->level != AMDGPU_VM_PTB) {
>> +				if (!amdgpu_vm_pt_descendant(adev, cursor))
>>   					return -ENOENT;
>>   				continue;
>>   			}
>> @@ -1477,18 +1474,18 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>   			 * smaller than the address shift. Go to the next
>>   			 * child entry and try again.
>>   			 */
>> -			if (amdgpu_vm_pt_descendant(adev, &cursor))
>> +			if (amdgpu_vm_pt_descendant(adev, cursor))
>>   				continue;
>>   		} else if (frag >= parent_shift) {
>>   			/* If the fragment size is even larger than the parent
>>   			 * shift we should go up one level and check it again.
>>   			 */
>> -			if (!amdgpu_vm_pt_ancestor(&cursor))
>> +			if (!amdgpu_vm_pt_ancestor(cursor))
>>   				return -EINVAL;
>>   			continue;
>>   		}
>>   
>> -		pt = cursor.entry->base.bo;
>> +		pt = cursor->entry->base.bo;
>>   		if (!pt) {
>>   			/* We need all PDs and PTs for mapping something, */
>>   			if (flags & AMDGPU_PTE_VALID)
>> @@ -1497,10 +1494,10 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>   			/* but unmapping something can happen at a higher
>>   			 * level.
>>   			 */
>> -			if (!amdgpu_vm_pt_ancestor(&cursor))
>> +			if (!amdgpu_vm_pt_ancestor(cursor))
>>   				return -EINVAL;
>>   
>> -			pt = cursor.entry->base.bo;
>> +			pt = cursor->entry->base.bo;
>>   			shift = parent_shift;
>>   			frag_end = max(frag_end, ALIGN(frag_start + 1,
>>   				   1ULL << shift));
>> @@ -1508,10 +1505,10 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>   
>>   		/* Looks good so far, calculate parameters for the update */
>>   		incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
>> -		mask = amdgpu_vm_entries_mask(adev, cursor.level);
>> -		pe_start = ((cursor.pfn >> shift) & mask) * 8;
>> +		mask = amdgpu_vm_entries_mask(adev, cursor->level);
>> +		pe_start = ((cursor->pfn >> shift) & mask) * 8;
>>   		entry_end = ((uint64_t)mask + 1) << shift;
>> -		entry_end += cursor.pfn & ~(entry_end - 1);
>> +		entry_end += cursor->pfn & ~(entry_end - 1);
>>   		entry_end = min(entry_end, end);
>>   
>>   		do {
>> @@ -1529,7 +1526,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>   						    nptes, dst, incr, upd_flags,
>>   						    vm->task_info.pid,
>>   						    vm->immediate.fence_context);
>> -			amdgpu_vm_update_flags(params, pt, cursor.level,
>> +			amdgpu_vm_update_flags(params, pt, cursor->level,
>>   					       pe_start, dst, nptes, incr,
>>   					       upd_flags);
>>   
>> @@ -1546,21 +1543,21 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>   			}
>>   		} while (frag_start < entry_end);
>>   
>> -		if (amdgpu_vm_pt_descendant(adev, &cursor)) {
>> +		if (amdgpu_vm_pt_descendant(adev, cursor)) {
>>   			/* Free all child entries.
>>   			 * Update the tables with the flags and addresses and free up subsequent
>>   			 * tables in the case of huge pages or freed up areas.
>>   			 * This is the maximum you can free, because all other page tables are not
>>   			 * completely covered by the range and so potentially still in use.
>>   			 */
>> -			while (cursor.pfn < frag_start) {
>> -				amdgpu_vm_free_pts(adev, params->vm, &cursor);
>> -				amdgpu_vm_pt_next(adev, &cursor);
>> +			while (cursor->pfn < frag_start) {
>> +				amdgpu_vm_free_pts(adev, params->vm, cursor);
>> +				amdgpu_vm_pt_next(adev, cursor);
>>   			}
>>   
>>   		} else if (frag >= shift) {
>>   			/* or just move on to the next on the same level. */
>> -			amdgpu_vm_pt_next(adev, &cursor);
>> +			amdgpu_vm_pt_next(adev, cursor);
>>   		}
>>   	}
>>   
>> @@ -1570,7 +1567,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>   /**
>>    * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
>>    *
>> - * @adev: amdgpu_device pointer
>> + * @adev: amdgpu_device pointer of the VM
>> + * @bo_adev: amdgpu_device pointer of the mapped BO
>>    * @vm: requested vm
>>    * @immediate: immediate submission in a page fault
>>    * @unlocked: unlocked invalidation during MM callback
>> @@ -1578,7 +1576,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>    * @start: start of mapped range
>>    * @last: last mapped entry
>>    * @flags: flags for the entries
>> - * @addr: addr to set the area to
>> + * @offset: offset into nodes and pages_addr
>> + * @nodes: array of drm_mm_nodes with the MC addresses
>>    * @pages_addr: DMA addresses to use for mapping
>>    * @fence: optional resulting fence
>>    *
>> @@ -1588,15 +1587,19 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>    * 0 for success, -EINVAL for failure.
>>    */
>>   static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>> +				       struct amdgpu_device *bo_adev,
>>   				       struct amdgpu_vm *vm, bool immediate,
>>   				       bool unlocked, struct dma_resv *resv,
>>   				       uint64_t start, uint64_t last,
>> -				       uint64_t flags, uint64_t addr,
>> +				       uint64_t flags, uint64_t offset,
>> +				       struct drm_mm_node *nodes,
>>   				       dma_addr_t *pages_addr,
>>   				       struct dma_fence **fence)
>>   {
>>   	struct amdgpu_vm_update_params params;
>> +	struct amdgpu_vm_pt_cursor cursor;
>>   	enum amdgpu_sync_mode sync_mode;
>> +	uint64_t pfn;
>>   	int r;
>>   
>>   	memset(&params, 0, sizeof(params));
>> @@ -1614,6 +1617,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>   	else
>>   		sync_mode = AMDGPU_SYNC_EXPLICIT;
>>   
>> +	pfn = offset >> PAGE_SHIFT;
>> +	if (nodes) {
>> +		while (pfn >= nodes->size) {
>> +			pfn -= nodes->size;
>> +			++nodes;
>> +		}
>> +	}
>> +
> I believe here you can just do:
>
> 	if (nodes)
> 		nodes += pfn / nodes->size;
>
> So long as pfn and nodes->size are non-negative
> integers and nodes->size is non-zero, which
> conditions appear to be satisfied.

That won't work, the nodes->size is not constant but based on which bits 
where set in the original allocation size.

In other words if you allocate 84KiB of memory you get node sizes of 
64KiB, 16KiB, 4KiB IIRC.

The exception is if we have an allocation larger than 2MiB and are in an 
out of memory situation. In this case we cap at 2MiB allocations. But 
this case is so rare that it is probably not worth the extra handling.

Regards,
Christian.

>
> Regards,
> Luben
>
>
>>   	amdgpu_vm_eviction_lock(vm);
>>   	if (vm->evicting) {
>>   		r = -EBUSY;
>> @@ -1632,105 +1643,47 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>   	if (r)
>>   		goto error_unlock;
>>   
>> -	r = amdgpu_vm_update_ptes(&params, start, last + 1, addr, flags);
>> -	if (r)
>> -		goto error_unlock;
>> -
>> -	r = vm->update_funcs->commit(&params, fence);
>> -
>> -error_unlock:
>> -	amdgpu_vm_eviction_unlock(vm);
>> -	return r;
>> -}
>> -
>> -/**
>> - * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
>> - *
>> - * @adev: amdgpu_device pointer
>> - * @resv: fences we need to sync to
>> - * @pages_addr: DMA addresses to use for mapping
>> - * @vm: requested vm
>> - * @mapping: mapped range and flags to use for the update
>> - * @flags: HW flags for the mapping
>> - * @bo_adev: amdgpu_device pointer that bo actually been allocated
>> - * @nodes: array of drm_mm_nodes with the MC addresses
>> - * @fence: optional resulting fence
>> - *
>> - * Split the mapping into smaller chunks so that each update fits
>> - * into a SDMA IB.
>> - *
>> - * Returns:
>> - * 0 for success, -EINVAL for failure.
>> - */
>> -static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
>> -				      struct dma_resv *resv,
>> -				      dma_addr_t *pages_addr,
>> -				      struct amdgpu_vm *vm,
>> -				      struct amdgpu_bo_va_mapping *mapping,
>> -				      uint64_t flags,
>> -				      struct amdgpu_device *bo_adev,
>> -				      struct drm_mm_node *nodes,
>> -				      struct dma_fence **fence)
>> -{
>> -	unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size;
>> -	uint64_t pfn, start = mapping->start;
>> -	int r;
>> -
>> -	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
>> -	 * but in case of something, we filter the flags in first place
>> -	 */
>> -	if (!(mapping->flags & AMDGPU_PTE_READABLE))
>> -		flags &= ~AMDGPU_PTE_READABLE;
>> -	if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
>> -		flags &= ~AMDGPU_PTE_WRITEABLE;
>> -
>> -	/* Apply ASIC specific mapping flags */
>> -	amdgpu_gmc_get_vm_pte(adev, mapping, &flags);
>> -
>> -	trace_amdgpu_vm_bo_update(mapping);
>> -
>> -	pfn = mapping->offset >> PAGE_SHIFT;
>> -	if (nodes) {
>> -		while (pfn >= nodes->size) {
>> -			pfn -= nodes->size;
>> -			++nodes;
>> -		}
>> -	}
>> -
>> +	amdgpu_vm_pt_start(adev, vm, start, &cursor);
>>   	do {
>> -		dma_addr_t *dma_addr = NULL;
>> -		uint64_t max_entries;
>> -		uint64_t addr, last;
>> +		uint64_t tmp, num_entries, addr;
>>   
>> -		max_entries = mapping->last - start + 1;
>> +		num_entries = last - start + 1;
>>   		if (nodes) {
>>   			addr = nodes->start << PAGE_SHIFT;
>> -			max_entries = min((nodes->size - pfn) *
>> -				AMDGPU_GPU_PAGES_IN_CPU_PAGE, max_entries);
>> +			num_entries = min((nodes->size - pfn) *
>> +				AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries);
>>   		} else {
>>   			addr = 0;
>>   		}
>>   
>>   		if (pages_addr) {
>> -			uint64_t count;
>> +			bool contiguous = true;
>>   
>> -			for (count = 1;
>> -			     count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>> -			     ++count) {
>> -				uint64_t idx = pfn + count;
>> +			if (num_entries < AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
>> +				uint64_t count;
>>   
>> -				if (pages_addr[idx] !=
>> -				    (pages_addr[idx - 1] + PAGE_SIZE))
>> -					break;
>> +				contiguous = pages_addr[pfn + 1] ==
>> +					pages_addr[pfn] + PAGE_SIZE;
>> +
>> +				tmp = num_entries /
>> +					AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>> +				for (count = 2; count < tmp; ++count) {
>> +					uint64_t idx = pfn + count;
>> +
>> +					if (contiguous != (pages_addr[idx] ==
>> +					    pages_addr[idx - 1] + PAGE_SIZE))
>> +						break;
>> +				}
>> +				num_entries = count *
>> +					AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>>   			}
>>   
>> -			if (count < min_linear_pages) {
>> +			if (!contiguous) {
>>   				addr = pfn << PAGE_SHIFT;
>> -				dma_addr = pages_addr;
>> +				params.pages_addr = pages_addr;
>>   			} else {
>>   				addr = pages_addr[pfn];
>> -				max_entries = count *
>> -					AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>> +				params.pages_addr = NULL;
>>   			}
>>   
>>   		} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>> @@ -1738,23 +1691,26 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
>>   			addr += pfn << PAGE_SHIFT;
>>   		}
>>   
>> -		last = start + max_entries - 1;
>> -		r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>> -						start, last, flags, addr,
>> -						dma_addr, fence);
>> +		tmp = start + num_entries;
>> +		r = amdgpu_vm_update_ptes(&params, &cursor, start, tmp, addr,
>> +					  flags);
>>   		if (r)
>> -			return r;
>> +			goto error_unlock;
>>   
>> -		pfn += (last - start + 1) / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>> +		pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>>   		if (nodes && nodes->size == pfn) {
>>   			pfn = 0;
>>   			++nodes;
>>   		}
>> -		start = last + 1;
>> +		start = tmp;
>>   
>> -	} while (unlikely(start != mapping->last + 1));
>> +	} while (unlikely(start != last + 1));
>>   
>> -	return 0;
>> +	r = vm->update_funcs->commit(&params, fence);
>> +
>> +error_unlock:
>> +	amdgpu_vm_eviction_unlock(vm);
>> +	return r;
>>   }
>>   
>>   /**
>> @@ -1835,9 +1791,26 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>   	}
>>   
>>   	list_for_each_entry(mapping, &bo_va->invalids, list) {
>> -		r = amdgpu_vm_bo_split_mapping(adev, resv, pages_addr, vm,
>> -					       mapping, flags, bo_adev, nodes,
>> -					       last_update);
>> +		uint64_t update_flags = flags;
>> +
>> +		/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
>> +		 * but in case of something, we filter the flags in first place
>> +		 */
>> +		if (!(mapping->flags & AMDGPU_PTE_READABLE))
>> +			update_flags &= ~AMDGPU_PTE_READABLE;
>> +		if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
>> +			update_flags &= ~AMDGPU_PTE_WRITEABLE;
>> +
>> +		/* Apply ASIC specific mapping flags */
>> +		amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags);
>> +
>> +		trace_amdgpu_vm_bo_update(mapping);
>> +
>> +		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
>> +						resv, mapping->start,
>> +						mapping->last, update_flags,
>> +						mapping->offset, nodes,
>> +						pages_addr, last_update);
>>   		if (r)
>>   			return r;
>>   	}
>> @@ -2045,9 +2018,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>   		    mapping->start < AMDGPU_GMC_HOLE_START)
>>   			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>   
>> -		r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>> -						mapping->start, mapping->last,
>> -						init_pte_value, 0, NULL, &f);
>> +		r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
>> +						resv, mapping->start,
>> +						mapping->last, init_pte_value,
>> +						0, NULL, NULL, &f);
>>   		amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>   		if (r) {
>>   			dma_fence_put(f);
>> @@ -3375,8 +3349,9 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
>>   		value = 0;
>>   	}
>>   
>> -	r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr,
>> -					addr + 1, flags, value, NULL, NULL);
>> +	r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
>> +					addr + 1, flags, value, NULL, NULL,
>> +					NULL);
>>   	if (r)
>>   		goto error_unlock;
>>   
>>