[PATCH v2 2/2] drm/amdkfd: Update mapping if range attributes changed

Felix Kuehling felix.kuehling at amd.com
Fri Apr 22 22:05:30 UTC 2022


On 2022-04-22 10:06, Philip Yang wrote:
> Change SVM range mapping flags or access attributes don't trigger
> migration, if range is already mapped on GPUs we should update GPU
> mapping and pass flush_tlb flag true to amdgpu vm.
>
> Change SVM range preferred_loc or migration granularity don't need
> update GPU mapping, skip the validate_and_map.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 46 +++++++++++++++++++---------
>   1 file changed, 32 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 8a077cd066a1..e740384df9c7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -686,7 +686,8 @@ svm_range_check_attr(struct kfd_process *p,
>   
>   static void
>   svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
> -		      uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
> +		      uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
> +		      bool *update_mapping)
>   {
>   	uint32_t i;
>   	int gpuidx;
> @@ -702,6 +703,7 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
>   		case KFD_IOCTL_SVM_ATTR_ACCESS:
>   		case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
>   		case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
> +			*update_mapping = true;
>   			gpuidx = kfd_process_gpuidx_from_gpuid(p,
>   							       attrs[i].value);
>   			if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
> @@ -716,9 +718,11 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
>   			}
>   			break;
>   		case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
> +			*update_mapping = true;
>   			prange->flags |= attrs[i].value;
>   			break;
>   		case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
> +			*update_mapping = true;
>   			prange->flags &= ~attrs[i].value;
>   			break;
>   		case KFD_IOCTL_SVM_ATTR_GRANULARITY:
> @@ -1254,7 +1258,7 @@ static int
>   svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
>   		     unsigned long offset, unsigned long npages, bool readonly,
>   		     dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
> -		     struct dma_fence **fence)
> +		     struct dma_fence **fence, bool flush_tlb)
>   {
>   	struct amdgpu_device *adev = pdd->dev->adev;
>   	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
> @@ -1292,7 +1296,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
>   			 (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
>   			 pte_flags);
>   
> -		r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL,
> +		r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL,
>   					   last_start, prange->start + i,
>   					   pte_flags,
>   					   last_start - prange->start,
> @@ -1326,7 +1330,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
>   static int
>   svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
>   		      unsigned long npages, bool readonly,
> -		      unsigned long *bitmap, bool wait)
> +		      unsigned long *bitmap, bool wait, bool flush_tlb)
>   {
>   	struct kfd_process_device *pdd;
>   	struct amdgpu_device *bo_adev;
> @@ -1361,7 +1365,8 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
>   
>   		r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,
>   					 prange->dma_addr[gpuidx],
> -					 bo_adev, wait ? &fence : NULL);
> +					 bo_adev, wait ? &fence : NULL,
> +					 flush_tlb);
>   		if (r)
>   			break;
>   
> @@ -1482,8 +1487,8 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
>    * 5. Release page table (and SVM BO) reservation
>    */
>   static int svm_range_validate_and_map(struct mm_struct *mm,
> -				      struct svm_range *prange,
> -				      int32_t gpuidx, bool intr, bool wait)
> +				      struct svm_range *prange, int32_t gpuidx,
> +				      bool intr, bool wait, bool flush_tlb)
>   {
>   	struct svm_validate_context ctx;
>   	unsigned long start, end, addr;
> @@ -1522,8 +1527,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>   			  prange->bitmap_aip, MAX_GPU_INSTANCE);
>   	}
>   
> -	if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
> -		return 0;
> +	if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) {
> +		if (!prange->mapped_to_gpu)
> +			return 0;
> +
> +		bitmap_copy(ctx.bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
> +	}
>   
>   	if (prange->actual_loc && !prange->ttm_res) {
>   		/* This should never happen. actual_loc gets set by
> @@ -1595,7 +1604,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>   		}
>   
>   		r = svm_range_map_to_gpus(prange, offset, npages, readonly,
> -					  ctx.bitmap, wait);
> +					  ctx.bitmap, wait, flush_tlb);
>   
>   unlock_out:
>   		svm_range_unlock(prange);
> @@ -1691,7 +1700,7 @@ static void svm_range_restore_work(struct work_struct *work)
>   		mutex_lock(&prange->migrate_mutex);
>   
>   		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
> -					       false, true);
> +					       false, true, false);
>   		if (r)
>   			pr_debug("failed %d to map 0x%lx to gpus\n", r,
>   				 prange->start);
> @@ -2847,7 +2856,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>   		}
>   	}
>   
> -	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
> +	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
>   	if (r)
>   		pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
>   			 r, svms, prange->start, prange->last);
> @@ -3264,6 +3273,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
>   	struct svm_range_list *svms;
>   	struct svm_range *prange;
>   	struct svm_range *next;
> +	bool update_mapping = false;
> +	bool flush_tlb;
>   	int r = 0;
>   
>   	pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
> @@ -3302,7 +3313,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
>   		svm_range_add_notifier_locked(mm, prange);
>   	}
>   	list_for_each_entry(prange, &update_list, update_list) {
> -		svm_range_apply_attrs(p, prange, nattr, attrs);
> +		svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping);
>   		/* TODO: unmap ranges from GPU that lost access */
>   	}
>   	list_for_each_entry_safe(prange, next, &remove_list, update_list) {
> @@ -3335,8 +3346,15 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
>   			continue;
>   		}
>   
> +		if (!migrated && !update_mapping) {
> +			mutex_unlock(&prange->migrate_mutex);
> +			continue;
> +		}
> +
> +		flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
> +

Please check that this doesn't break the XNACK off case. If a new range 
is created, and that does not trigger a migration or any of the 
conditions that set update_mapping, we still need to make sure the GPU 
mapping is created so that a GPU access to the new range won't fault.

Regards,
   Felix


>   		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
> -					       true, true);
> +					       true, true, flush_tlb);
>   		if (r)
>   			pr_debug("failed %d to map svm range\n", r);
>   


More information about the amd-gfx mailing list