[PATCH 1/3] drm/amdkfd: classify and map mixed svm range pages in GPU

Felix Kuehling felix.kuehling at amd.com
Mon May 17 14:40:01 UTC 2021


Am 2021-05-17 um 9:44 a.m. schrieb Felix Kuehling:
> A few more comments inline. Sorry I took so long.
>
> Am 2021-05-12 um 1:34 p.m. schrieb Alex Sierra:
>
>> [Why]
>> svm ranges can have mixed pages from device or system memory.
>> A good example is, after a prange has been allocated in VRAM and a
>> copy-on-write is triggered by a fork. This invalidates some pages
>> inside the prange. Endding up in mixed pages.
>>
>> [How]
>> By classifying each page inside a prange, based on its type. Device or
>> system memory, during dma mapping call. If page corresponds
>> to VRAM domain, a flag is set to its dma_addr entry for each GPU.
>> Then, at the GPU page table mapping. All group of contiguous pages within
>> the same type are mapped with their proper pte flags.
>>
>> v2:
>> Instead of using ttm_res to calculate vram pfns in the svm_range. It is now
>> done by setting the vram real physical address into drm_addr array.
>> This makes more flexible VRAM management, plus removes the need to have
>> a BO reference in the svm_range.
>>
>> Signed-off-by: Alex Sierra <alex.sierra at amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 71 ++++++++++++++++++----------
>>  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  1 +
>>  2 files changed, 46 insertions(+), 26 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index 2b4318646a75..0ab10cb24205 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -119,11 +119,12 @@ static void svm_range_remove_notifier(struct svm_range *prange)
>>  }
>>  
>>  static int
>> -svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr,
>> +svm_range_dma_map_dev(struct amdgpu_device *adev, dma_addr_t **dma_addr,
>>  		      unsigned long *hmm_pfns, uint64_t npages)
>>  {
>>  	enum dma_data_direction dir = DMA_BIDIRECTIONAL;
>>  	dma_addr_t *addr = *dma_addr;
>> +	struct device *dev = adev->dev;
>>  	struct page *page;
>>  	int i, r;
>>  
>> @@ -141,6 +142,14 @@ svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr,
>>  			dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
>>  
>>  		page = hmm_pfn_to_page(hmm_pfns[i]);
>> +		if (is_zone_device_page(page)) {
>> +			addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
>> +				   adev->vm_manager.vram_base_offset -
>> +				   adev->kfd.dev->pgmap.range.start;
>> +			addr[i] |= SVM_RANGE_VRAM_DOMAIN;
>> +			pr_debug("vram address detected: 0x%llx\n", addr[i]);
>> +			continue;
>> +		}
>>  		addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
>>  		r = dma_mapping_error(dev, addr[i]);
>>  		if (r) {
>> @@ -175,7 +184,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
>>  		}
>>  		adev = (struct amdgpu_device *)pdd->dev->kgd;
>>  
>> -		r = svm_range_dma_map_dev(adev->dev, &prange->dma_addr[gpuidx],
>> +		r = svm_range_dma_map_dev(adev, &prange->dma_addr[gpuidx],
>>  					  hmm_pfns, prange->npages);
>>  		if (r)
>>  			break;
>> @@ -1003,21 +1012,22 @@ svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
>>  }
>>  
>>  static uint64_t
>> -svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange)
>> +svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
>> +			int domain)
>>  {
>>  	struct amdgpu_device *bo_adev;
>>  	uint32_t flags = prange->flags;
>>  	uint32_t mapping_flags = 0;
>>  	uint64_t pte_flags;
>> -	bool snoop = !prange->ttm_res;
>> +	bool snoop = !!domain;
> This is a bit obscure and it assumes that SVM_RANGE_VRAM_DOMAIN is 0.
> And it's not actually correct. Make this
>
> bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
>
>
>>  	bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
>>  
>> -	if (prange->svm_bo && prange->ttm_res)
>> +	if (domain == SVM_RANGE_VRAM_DOMAIN)
>>  		bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
>>  
>>  	switch (adev->asic_type) {
>>  	case CHIP_ARCTURUS:
>> -		if (prange->svm_bo && prange->ttm_res) {
>> +		if (domain == SVM_RANGE_VRAM_DOMAIN) {
>>  			if (bo_adev == adev) {
>>  				mapping_flags |= coherent ?
>>  					AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
>> @@ -1032,7 +1042,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange)
>>  		}
>>  		break;
>>  	case CHIP_ALDEBARAN:
>> -		if (prange->svm_bo && prange->ttm_res) {
>> +		if (domain == SVM_RANGE_VRAM_DOMAIN) {
>>  			if (bo_adev == adev) {
>>  				mapping_flags |= coherent ?
>>  					AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
>> @@ -1061,14 +1071,14 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange)
>>  		mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
>>  
>>  	pte_flags = AMDGPU_PTE_VALID;
>> -	pte_flags |= prange->ttm_res ? 0 : AMDGPU_PTE_SYSTEM;
>> +	pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM;
>>  	pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
>>  
>>  	pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
>>  
>>  	pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n",
>>  		 prange->svms, prange->start, prange->last,
>> -		 prange->ttm_res ? 1:0, pte_flags, mapping_flags);
>> +		 (domain == SVM_RANGE_VRAM_DOMAIN) ? 1:0, pte_flags, mapping_flags);
>>  
>>  	return pte_flags;
>>  }
>> @@ -1138,31 +1148,40 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>  {
>>  	struct amdgpu_bo_va bo_va;
>>  	uint64_t pte_flags;
>> +	unsigned long last_start;
>> +	int last_domain;
>>  	int r = 0;
>> +	int64_t i;
>>  
>>  	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
>>  		 prange->last);
>>  
>> -	if (prange->svm_bo && prange->ttm_res) {
>> +	if (prange->svm_bo && prange->ttm_res)
>>  		bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
>> -		prange->mapping.bo_va = &bo_va;
>> -	}
>>  
>> -	prange->mapping.start = prange->start;
>> -	prange->mapping.last = prange->last;
>> -	prange->mapping.offset = prange->offset;
> It looks like you could remove mapping from the svm_range structure now.
>
>
>> -	pte_flags = svm_range_get_pte_flags(adev, prange);
>> +	last_start = prange->start;
>> +	for (i = 0; i < prange->npages; i++) {
>> +		last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
>> +		if ((prange->start + i) < prange->last &&
>> +		    last_domain == (dma_addr[i + 1] & SVM_RANGE_VRAM_DOMAIN))
>> +			continue;
>>  
>> -	r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
>> -					prange->mapping.start,
>> -					prange->mapping.last, pte_flags,
>> -					prange->mapping.offset,
>> -					prange->ttm_res ?
>> -						prange->ttm_res->mm_node : NULL,
>> -					dma_addr, &vm->last_update);
>> -	if (r) {
>> -		pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
>> -		goto out;
>> +		pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
>> +			 last_start, prange->start + i, last_domain ? "GPU" : "CPU");
>> +		pte_flags = svm_range_get_pte_flags(adev, prange, last_domain);
>> +		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
>> +						last_start,
>> +						prange->start + i, pte_flags,
>> +						prange->offset +
>> +						((last_start - prange->start) << PAGE_SHIFT),
>> +						NULL,
> I think this still needs to be ttm_res->mm_node if last_domain == VRAM.

Please ignore this comment and the one below. As discussed in our
meeting, we're always using the address array now, regardless of the
domain. So your code was correct. That was in fact what I asked for in
my last review.

Thanks,
  Felix


>
>
>> +						dma_addr,
> And this needs to be NULL if last_domain == VRAM.
>
> Regards,
>   Felix
>
>
>> +						&vm->last_update);
>> +		if (r) {
>> +			pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
>> +			goto out;
>> +		}
>> +		last_start = prange->start + i + 1;
>>  	}
>>  
>>  	r = amdgpu_vm_update_pdes(adev, vm, false);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>> index 08542fe39303..e68aa51322df 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>> @@ -35,6 +35,7 @@
>>  #include "amdgpu.h"
>>  #include "kfd_priv.h"
>>  
>> +#define SVM_RANGE_VRAM_DOMAIN (1UL << 0)
>>  #define SVM_ADEV_PGMAP_OWNER(adev)\
>>  			((adev)->hive ? (void *)(adev)->hive : (void *)(adev))
>>  
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list