[Patch v4 07/24] drm/amdkfd: CRIU Implement KFD resume ioctl
Felix Kuehling
felix.kuehling at amd.com
Mon Jan 10 23:16:26 UTC 2022
On 2021-12-22 7:36 p.m., Rajneesh Bhardwaj wrote:
> This adds support to create userptr BOs on restore and introduces a new
> ioctl to restart memory notifiers for the restored userptr BOs.
> When doing CRIU restore MMU notifications can happen anytime after we call
> amdgpu_mn_register. Prevent MMU notifications until we reach stage-4 of the
> restore process i.e. criu_resume ioctl is received, and the process is
> ready to be resumed. This ioctl is different from other KFD CRIU ioctls
> since its called by CRIU master restore process for all the target
> processes being resumed by CRIU.
>
> Signed-off-by: David Yat Sin <david.yatsin at amd.com>
> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 6 ++-
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 51 +++++++++++++++++--
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 44 ++++++++++++++--
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 +
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 35 +++++++++++--
> 5 files changed, 123 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index fcbc8a9c9e06..5c5fc839f701 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -131,6 +131,7 @@ struct amdkfd_process_info {
> atomic_t evicted_bos;
> struct delayed_work restore_userptr_work;
> struct pid *pid;
> + bool block_mmu_notifications;
> };
>
> int amdgpu_amdkfd_init(void);
> @@ -269,7 +270,7 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
> int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
> struct amdgpu_device *adev, uint64_t va, uint64_t size,
> void *drm_priv, struct kgd_mem **mem,
> - uint64_t *offset, uint32_t flags);
> + uint64_t *offset, uint32_t flags, bool criu_resume);
> int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
> uint64_t *size);
> @@ -297,6 +298,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
> int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
> struct tile_config *config);
> void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev);
> +void amdgpu_amdkfd_block_mmu_notifications(void *p);
> +int amdgpu_amdkfd_criu_resume(void *p);
> +
> #if IS_ENABLED(CONFIG_HSA_AMD)
> void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
> void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 90b985436878..5679fb75ec88 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -846,7 +846,8 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
> *
> * Returns 0 for success, negative errno for errors.
> */
> -static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
> +static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
> + bool criu_resume)
> {
> struct amdkfd_process_info *process_info = mem->process_info;
> struct amdgpu_bo *bo = mem->bo;
> @@ -868,6 +869,17 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
> goto out;
> }
>
> + if (criu_resume) {
> + /*
> + * During a CRIU restore operation, the userptr buffer objects
> + * will be validated in the restore_userptr_work worker at a
> + * later stage when it is scheduled by another ioctl called by
> + * CRIU master process for the target pid for restore.
> + */
> + atomic_inc(&mem->invalid);
> + mutex_unlock(&process_info->lock);
> + return 0;
> + }
> ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
> if (ret) {
> pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
> @@ -1240,6 +1252,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
> INIT_DELAYED_WORK(&info->restore_userptr_work,
> amdgpu_amdkfd_restore_userptr_worker);
>
> + info->block_mmu_notifications = false;
> *process_info = info;
> *ef = dma_fence_get(&info->eviction_fence->base);
> }
> @@ -1456,10 +1469,37 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
> return avm->pd_phys_addr;
> }
>
> +void amdgpu_amdkfd_block_mmu_notifications(void *p)
> +{
> + struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
> +
> + pinfo->block_mmu_notifications = true;
> +}
> +
> +int amdgpu_amdkfd_criu_resume(void *p)
> +{
> + int ret = 0;
> + struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
> +
> + mutex_lock(&pinfo->lock);
> + pr_debug("scheduling work\n");
> + atomic_inc(&pinfo->evicted_bos);
> + if (!pinfo->block_mmu_notifications) {
> + ret = -EINVAL;
> + goto out_unlock;
> + }
> + pinfo->block_mmu_notifications = false;
> + schedule_delayed_work(&pinfo->restore_userptr_work, 0);
> +
> +out_unlock:
> + mutex_unlock(&pinfo->lock);
> + return ret;
> +}
> +
> int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
> struct amdgpu_device *adev, uint64_t va, uint64_t size,
> void *drm_priv, struct kgd_mem **mem,
> - uint64_t *offset, uint32_t flags)
> + uint64_t *offset, uint32_t flags, bool criu_resume)
> {
> struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
> enum ttm_bo_type bo_type = ttm_bo_type_device;
> @@ -1562,7 +1602,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
> add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
>
> if (user_addr) {
> - ret = init_user_pages(*mem, user_addr);
> + pr_debug("creating userptr BO for user_addr = %llu\n", user_addr);
> + ret = init_user_pages(*mem, user_addr, criu_resume);
> if (ret)
> goto allocate_init_user_pages_failed;
> }
> @@ -2072,6 +2113,10 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
> int evicted_bos;
> int r = 0;
>
> + /* Do not process MMU notifications until stage-4 IOCTL is received */
> + if (process_info->block_mmu_notifications)
> + return 0;
This runs without holding the process_info lock that protects this
variable. To avoid subtle race conditions and unexpected compiler
optimizations, use READ_ONCE to read block_mmu_notifications here and
use WRITE_ONCE wherever the variable is modified.
> +
> atomic_inc(&mem->invalid);
> evicted_bos = atomic_inc_return(&process_info->evicted_bos);
> if (evicted_bos == 1) {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index c93f74ad073f..87b9f019e96e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1325,7 +1325,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
> err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
> dev->adev, args->va_addr, args->size,
> pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
> - flags);
> + flags, false);
>
> if (err)
> goto err_unlock;
> @@ -2107,6 +2107,7 @@ static int criu_restore_bos(struct kfd_process *p,
> {
> struct kfd_criu_bo_bucket *bo_buckets;
> struct kfd_criu_bo_priv_data *bo_privs;
> + const bool criu_resume = true;
> bool flush_tlbs = false;
> int ret = 0, j = 0;
> uint32_t i;
> @@ -2114,6 +2115,9 @@ static int criu_restore_bos(struct kfd_process *p,
> if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
> return -EINVAL;
>
> + /* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */
> + amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);
> +
> bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL);
> if (!bo_buckets)
> return -ENOMEM;
> @@ -2203,6 +2207,7 @@ static int criu_restore_bos(struct kfd_process *p,
> offset = bo_priv->user_addr;
> }
>
> +
Unnecessary whitespace change.
> /* Create the BO */
> ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(dev->adev,
> bo_bucket->addr,
> @@ -2210,7 +2215,8 @@ static int criu_restore_bos(struct kfd_process *p,
> pdd->drm_priv,
> (struct kgd_mem **) &mem,
> &offset,
> - bo_bucket->alloc_flags);
> + bo_bucket->alloc_flags,
> + criu_resume);
> if (ret) {
> pr_err("Could not create the BO\n");
> ret = -ENOMEM;
> @@ -2228,8 +2234,8 @@ static int criu_restore_bos(struct kfd_process *p,
> pr_err("Could not allocate idr\n");
> amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev,
> (struct kgd_mem *)mem,
> - pdd->drm_priv, NULL);
> -
> + pdd->drm_priv,
> + NULL);
Unnecessary formatting change.
Regards,
Felix
> ret = -ENOMEM;
> goto exit;
> }
> @@ -2383,7 +2389,35 @@ static int criu_resume(struct file *filep,
> struct kfd_process *p,
> struct kfd_ioctl_criu_args *args)
> {
> - return 0;
> + struct kfd_process *target = NULL;
> + struct pid *pid = NULL;
> + int ret = 0;
> +
> + pr_debug("Inside %s, target pid for criu restore: %d\n", __func__,
> + args->pid);
> +
> + pid = find_get_pid(args->pid);
> + if (!pid) {
> + pr_err("Cannot find pid info for %i\n", args->pid);
> + return -ESRCH;
> + }
> +
> + pr_debug("calling kfd_lookup_process_by_pid\n");
> + target = kfd_lookup_process_by_pid(pid);
> +
> + put_pid(pid);
> +
> + if (!target) {
> + pr_debug("Cannot find process info for %i\n", args->pid);
> + return -ESRCH;
> + }
> +
> + mutex_lock(&target->mutex);
> + ret = amdgpu_amdkfd_criu_resume(target->kgd_process_info);
> + mutex_unlock(&target->mutex);
> +
> + kfd_unref_process(target);
> + return ret;
> }
>
> static int criu_process_info(struct file *filep,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index e611366fbc34..cd72541a8f4f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -949,6 +949,7 @@ void *kfd_process_device_translate_handle(struct kfd_process_device *p,
> int handle);
> void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
> int handle);
> +struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid);
>
> bool kfd_has_process_device_data(struct kfd_process *p);
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index f77d556ca0fc..d2fcdc5e581f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -64,7 +64,8 @@ static struct workqueue_struct *kfd_process_wq;
> */
> static struct workqueue_struct *kfd_restore_wq;
>
> -static struct kfd_process *find_process(const struct task_struct *thread);
> +static struct kfd_process *find_process(const struct task_struct *thread,
> + bool ref);
> static void kfd_process_ref_release(struct kref *ref);
> static struct kfd_process *create_process(const struct task_struct *thread);
> static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
> @@ -715,7 +716,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
> int err;
>
> err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size,
> - pdd->drm_priv, mem, NULL, flags);
> + pdd->drm_priv, mem, NULL, flags, false);
> if (err)
> goto err_alloc_mem;
>
> @@ -816,7 +817,7 @@ struct kfd_process *kfd_create_process(struct file *filep)
> mutex_lock(&kfd_processes_mutex);
>
> /* A prior open of /dev/kfd could have already created the process. */
> - process = find_process(thread);
> + process = find_process(thread, false);
> if (process) {
> pr_debug("Process already found\n");
> } else {
> @@ -884,7 +885,7 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread)
> if (thread->group_leader->mm != thread->mm)
> return ERR_PTR(-EINVAL);
>
> - process = find_process(thread);
> + process = find_process(thread, false);
> if (!process)
> return ERR_PTR(-EINVAL);
>
> @@ -903,13 +904,16 @@ static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
> return NULL;
> }
>
> -static struct kfd_process *find_process(const struct task_struct *thread)
> +static struct kfd_process *find_process(const struct task_struct *thread,
> + bool ref)
> {
> struct kfd_process *p;
> int idx;
>
> idx = srcu_read_lock(&kfd_processes_srcu);
> p = find_process_by_mm(thread->mm);
> + if (p && ref)
> + kref_get(&p->ref);
> srcu_read_unlock(&kfd_processes_srcu, idx);
>
> return p;
> @@ -1675,6 +1679,27 @@ void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
> idr_remove(&pdd->alloc_idr, handle);
> }
>
> +/* This increments the process->ref counter. */
> +struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid)
> +{
> + struct task_struct *task = NULL;
> + struct kfd_process *p = NULL;
> +
> + if (!pid) {
> + task = current;
> + get_task_struct(task);
> + } else {
> + task = get_pid_task(pid, PIDTYPE_PID);
> + }
> +
> + if (task) {
> + p = find_process(task, true);
> + put_task_struct(task);
> + }
> +
> + return p;
> +}
> +
> /* This increments the process->ref counter. */
> struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid)
> {
More information about the dri-devel
mailing list