[Patch v4 07/24] drm/amdkfd: CRIU Implement KFD resume ioctl

Felix Kuehling felix.kuehling at amd.com
Mon Jan 10 23:16:26 UTC 2022


On 2021-12-22 7:36 p.m., Rajneesh Bhardwaj wrote:
> This adds support to create userptr BOs on restore and introduces a new
> ioctl to restart memory notifiers for the restored userptr BOs.
> When doing CRIU restore MMU notifications can happen anytime after we call
> amdgpu_mn_register. Prevent MMU notifications until we reach stage-4 of the
> restore process i.e. criu_resume ioctl is received, and the process is
> ready to be resumed. This ioctl is different from other KFD CRIU ioctls
> since its called by CRIU master restore process for all the target
> processes being resumed by CRIU.
>
> Signed-off-by: David Yat Sin <david.yatsin at amd.com>
> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  6 ++-
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 51 +++++++++++++++++--
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 44 ++++++++++++++--
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  1 +
>   drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 35 +++++++++++--
>   5 files changed, 123 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index fcbc8a9c9e06..5c5fc839f701 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -131,6 +131,7 @@ struct amdkfd_process_info {
>   	atomic_t evicted_bos;
>   	struct delayed_work restore_userptr_work;
>   	struct pid *pid;
> +	bool block_mmu_notifications;
>   };
>   
>   int amdgpu_amdkfd_init(void);
> @@ -269,7 +270,7 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
>   int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>   		struct amdgpu_device *adev, uint64_t va, uint64_t size,
>   		void *drm_priv, struct kgd_mem **mem,
> -		uint64_t *offset, uint32_t flags);
> +		uint64_t *offset, uint32_t flags, bool criu_resume);
>   int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>   		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
>   		uint64_t *size);
> @@ -297,6 +298,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
>   int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
>   				struct tile_config *config);
>   void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev);
> +void amdgpu_amdkfd_block_mmu_notifications(void *p);
> +int amdgpu_amdkfd_criu_resume(void *p);
> +
>   #if IS_ENABLED(CONFIG_HSA_AMD)
>   void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
>   void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 90b985436878..5679fb75ec88 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -846,7 +846,8 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
>    *
>    * Returns 0 for success, negative errno for errors.
>    */
> -static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
> +static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
> +			   bool criu_resume)
>   {
>   	struct amdkfd_process_info *process_info = mem->process_info;
>   	struct amdgpu_bo *bo = mem->bo;
> @@ -868,6 +869,17 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
>   		goto out;
>   	}
>   
> +	if (criu_resume) {
> +		/*
> +		 * During a CRIU restore operation, the userptr buffer objects
> +		 * will be validated in the restore_userptr_work worker at a
> +		 * later stage when it is scheduled by another ioctl called by
> +		 * CRIU master process for the target pid for restore.
> +		 */
> +		atomic_inc(&mem->invalid);
> +		mutex_unlock(&process_info->lock);
> +		return 0;
> +	}
>   	ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
>   	if (ret) {
>   		pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
> @@ -1240,6 +1252,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
>   		INIT_DELAYED_WORK(&info->restore_userptr_work,
>   				  amdgpu_amdkfd_restore_userptr_worker);
>   
> +		info->block_mmu_notifications = false;
>   		*process_info = info;
>   		*ef = dma_fence_get(&info->eviction_fence->base);
>   	}
> @@ -1456,10 +1469,37 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
>   	return avm->pd_phys_addr;
>   }
>   
> +void amdgpu_amdkfd_block_mmu_notifications(void *p)
> +{
> +	struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
> +
> +	pinfo->block_mmu_notifications = true;
> +}
> +
> +int amdgpu_amdkfd_criu_resume(void *p)
> +{
> +	int ret = 0;
> +	struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
> +
> +	mutex_lock(&pinfo->lock);
> +	pr_debug("scheduling work\n");
> +	atomic_inc(&pinfo->evicted_bos);
> +	if (!pinfo->block_mmu_notifications) {
> +		ret = -EINVAL;
> +		goto out_unlock;
> +	}
> +	pinfo->block_mmu_notifications = false;
> +	schedule_delayed_work(&pinfo->restore_userptr_work, 0);
> +
> +out_unlock:
> +	mutex_unlock(&pinfo->lock);
> +	return ret;
> +}
> +
>   int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>   		struct amdgpu_device *adev, uint64_t va, uint64_t size,
>   		void *drm_priv, struct kgd_mem **mem,
> -		uint64_t *offset, uint32_t flags)
> +		uint64_t *offset, uint32_t flags, bool criu_resume)
>   {
>   	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
>   	enum ttm_bo_type bo_type = ttm_bo_type_device;
> @@ -1562,7 +1602,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>   	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
>   
>   	if (user_addr) {
> -		ret = init_user_pages(*mem, user_addr);
> +		pr_debug("creating userptr BO for user_addr = %llu\n", user_addr);
> +		ret = init_user_pages(*mem, user_addr, criu_resume);
>   		if (ret)
>   			goto allocate_init_user_pages_failed;
>   	}
> @@ -2072,6 +2113,10 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
>   	int evicted_bos;
>   	int r = 0;
>   
> +	/* Do not process MMU notifications until stage-4 IOCTL is received */
> +	if (process_info->block_mmu_notifications)
> +		return 0;

This runs without holding the process_info lock that protects this 
variable. To avoid subtle race conditions and unexpected compiler 
optimizations, use READ_ONCE to read block_mmu_notifications here and 
use WRITE_ONCE wherever the variable is modified.


> +
>   	atomic_inc(&mem->invalid);
>   	evicted_bos = atomic_inc_return(&process_info->evicted_bos);
>   	if (evicted_bos == 1) {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index c93f74ad073f..87b9f019e96e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1325,7 +1325,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
>   	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>   		dev->adev, args->va_addr, args->size,
>   		pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
> -		flags);
> +		flags, false);
>   
>   	if (err)
>   		goto err_unlock;
> @@ -2107,6 +2107,7 @@ static int criu_restore_bos(struct kfd_process *p,
>   {
>   	struct kfd_criu_bo_bucket *bo_buckets;
>   	struct kfd_criu_bo_priv_data *bo_privs;
> +	const bool criu_resume = true;
>   	bool flush_tlbs = false;
>   	int ret = 0, j = 0;
>   	uint32_t i;
> @@ -2114,6 +2115,9 @@ static int criu_restore_bos(struct kfd_process *p,
>   	if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
>   		return -EINVAL;
>   
> +	/* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */
> +	amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);
> +
>   	bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL);
>   	if (!bo_buckets)
>   		return -ENOMEM;
> @@ -2203,6 +2207,7 @@ static int criu_restore_bos(struct kfd_process *p,
>   			offset = bo_priv->user_addr;
>   		}
>   
> +

Unnecessary whitespace change.


>   		/* Create the BO */
>   		ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(dev->adev,
>   						bo_bucket->addr,
> @@ -2210,7 +2215,8 @@ static int criu_restore_bos(struct kfd_process *p,
>   						pdd->drm_priv,
>   						(struct kgd_mem **) &mem,
>   						&offset,
> -						bo_bucket->alloc_flags);
> +						bo_bucket->alloc_flags,
> +						criu_resume);
>   		if (ret) {
>   			pr_err("Could not create the BO\n");
>   			ret = -ENOMEM;
> @@ -2228,8 +2234,8 @@ static int criu_restore_bos(struct kfd_process *p,
>   			pr_err("Could not allocate idr\n");
>   			amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev,
>   						(struct kgd_mem *)mem,
> -						pdd->drm_priv, NULL);
> -
> +						pdd->drm_priv,
> +						NULL);

Unnecessary formatting change.

Regards,
   Felix


>   			ret = -ENOMEM;
>   			goto exit;
>   		}
> @@ -2383,7 +2389,35 @@ static int criu_resume(struct file *filep,
>   			struct kfd_process *p,
>   			struct kfd_ioctl_criu_args *args)
>   {
> -	return 0;
> +	struct kfd_process *target = NULL;
> +	struct pid *pid = NULL;
> +	int ret = 0;
> +
> +	pr_debug("Inside %s, target pid for criu restore: %d\n", __func__,
> +		 args->pid);
> +
> +	pid = find_get_pid(args->pid);
> +	if (!pid) {
> +		pr_err("Cannot find pid info for %i\n", args->pid);
> +		return -ESRCH;
> +	}
> +
> +	pr_debug("calling kfd_lookup_process_by_pid\n");
> +	target = kfd_lookup_process_by_pid(pid);
> +
> +	put_pid(pid);
> +
> +	if (!target) {
> +		pr_debug("Cannot find process info for %i\n", args->pid);
> +		return -ESRCH;
> +	}
> +
> +	mutex_lock(&target->mutex);
> +	ret =  amdgpu_amdkfd_criu_resume(target->kgd_process_info);
> +	mutex_unlock(&target->mutex);
> +
> +	kfd_unref_process(target);
> +	return ret;
>   }
>   
>   static int criu_process_info(struct file *filep,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index e611366fbc34..cd72541a8f4f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -949,6 +949,7 @@ void *kfd_process_device_translate_handle(struct kfd_process_device *p,
>   					int handle);
>   void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
>   					int handle);
> +struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid);
>   
>   bool kfd_has_process_device_data(struct kfd_process *p);
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index f77d556ca0fc..d2fcdc5e581f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -64,7 +64,8 @@ static struct workqueue_struct *kfd_process_wq;
>    */
>   static struct workqueue_struct *kfd_restore_wq;
>   
> -static struct kfd_process *find_process(const struct task_struct *thread);
> +static struct kfd_process *find_process(const struct task_struct *thread,
> +					bool ref);
>   static void kfd_process_ref_release(struct kref *ref);
>   static struct kfd_process *create_process(const struct task_struct *thread);
>   static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
> @@ -715,7 +716,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
>   	int err;
>   
>   	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size,
> -						 pdd->drm_priv, mem, NULL, flags);
> +						 pdd->drm_priv, mem, NULL, flags, false);
>   	if (err)
>   		goto err_alloc_mem;
>   
> @@ -816,7 +817,7 @@ struct kfd_process *kfd_create_process(struct file *filep)
>   	mutex_lock(&kfd_processes_mutex);
>   
>   	/* A prior open of /dev/kfd could have already created the process. */
> -	process = find_process(thread);
> +	process = find_process(thread, false);
>   	if (process) {
>   		pr_debug("Process already found\n");
>   	} else {
> @@ -884,7 +885,7 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread)
>   	if (thread->group_leader->mm != thread->mm)
>   		return ERR_PTR(-EINVAL);
>   
> -	process = find_process(thread);
> +	process = find_process(thread, false);
>   	if (!process)
>   		return ERR_PTR(-EINVAL);
>   
> @@ -903,13 +904,16 @@ static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
>   	return NULL;
>   }
>   
> -static struct kfd_process *find_process(const struct task_struct *thread)
> +static struct kfd_process *find_process(const struct task_struct *thread,
> +					bool ref)
>   {
>   	struct kfd_process *p;
>   	int idx;
>   
>   	idx = srcu_read_lock(&kfd_processes_srcu);
>   	p = find_process_by_mm(thread->mm);
> +	if (p && ref)
> +		kref_get(&p->ref);
>   	srcu_read_unlock(&kfd_processes_srcu, idx);
>   
>   	return p;
> @@ -1675,6 +1679,27 @@ void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
>   		idr_remove(&pdd->alloc_idr, handle);
>   }
>   
> +/* This increments the process->ref counter. */
> +struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid)
> +{
> +	struct task_struct *task = NULL;
> +	struct kfd_process *p    = NULL;
> +
> +	if (!pid) {
> +		task = current;
> +		get_task_struct(task);
> +	} else {
> +		task = get_pid_task(pid, PIDTYPE_PID);
> +	}
> +
> +	if (task) {
> +		p = find_process(task, true);
> +		put_task_struct(task);
> +	}
> +
> +	return p;
> +}
> +
>   /* This increments the process->ref counter. */
>   struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid)
>   {


More information about the dri-devel mailing list