[PATCH 16/18] drm/amdkfd: CRIU implement gpu_id remapping

Felix Kuehling felix.kuehling at amd.com
Mon Aug 23 18:48:46 UTC 2021


Am 2021-08-19 um 9:37 a.m. schrieb David Yat Sin:
> When doing a restore on a different node, the gpu_id's on the restore
> node may be different. But the user space application will still refer
> use the original gpu_id's in the ioctl calls. Adding code to create a
> gpu id mapping so that kfd can determine actual gpu_id during the user
> ioctl's.
>
> Signed-off-by: David Yat Sin <david.yatsin at amd.com>
> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 400 +++++++++++++++++------
>  drivers/gpu/drm/amd/amdkfd/kfd_events.c  |   5 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  10 +
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c |  18 +
>  4 files changed, 324 insertions(+), 109 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index c8f523d8ab81..90e4d4ce4398 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -294,13 +294,14 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
>  		return err;
>  
>  	pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
> -	dev = kfd_device_by_id(args->gpu_id);
> -	if (!dev) {
> +
> +	mutex_lock(&p->mutex);
> +	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
> +	if (!pdd) {
>  		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);

You need to unlock p->mutex here (i.e. jump to an appropriate error
handling label).

Regards,
  Felix


>  		return -EINVAL;
>  	}
> -
> -	mutex_lock(&p->mutex);
> +	dev = pdd->dev;
>  
>  	pdd = kfd_bind_process_to_device(dev, p);
>  	if (IS_ERR(pdd)) {
> @@ -491,7 +492,6 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
>  					struct kfd_process *p, void *data)
>  {
>  	struct kfd_ioctl_set_memory_policy_args *args = data;
> -	struct kfd_dev *dev;
>  	int err = 0;
>  	struct kfd_process_device *pdd;
>  	enum cache_policy default_policy, alternate_policy;
> @@ -506,13 +506,15 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
>  		return -EINVAL;
>  	}
>  
> -	dev = kfd_device_by_id(args->gpu_id);
> -	if (!dev)
> -		return -EINVAL;
> -
>  	mutex_lock(&p->mutex);
> +	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
> +	if (!pdd) {
> +		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
> +		err = -EINVAL;
> +		goto out;
> +	}
>  
> -	pdd = kfd_bind_process_to_device(dev, p);
> +	pdd = kfd_bind_process_to_device(pdd->dev, p);
>  	if (IS_ERR(pdd)) {
>  		err = -ESRCH;
>  		goto out;
> @@ -525,7 +527,7 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
>  		(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
>  		   ? cache_policy_coherent : cache_policy_noncoherent;
>  
> -	if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
> +	if (!pdd->dev->dqm->ops.set_cache_memory_policy(pdd->dev->dqm,
>  				&pdd->qpd,
>  				default_policy,
>  				alternate_policy,
> @@ -543,17 +545,18 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
>  					struct kfd_process *p, void *data)
>  {
>  	struct kfd_ioctl_set_trap_handler_args *args = data;
> -	struct kfd_dev *dev;
>  	int err = 0;
>  	struct kfd_process_device *pdd;
>  
> -	dev = kfd_device_by_id(args->gpu_id);
> -	if (!dev)
> -		return -EINVAL;
> -
>  	mutex_lock(&p->mutex);
>  
> -	pdd = kfd_bind_process_to_device(dev, p);
> +	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
> +	if (!pdd) {
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	pdd = kfd_bind_process_to_device(pdd->dev, p);
>  	if (IS_ERR(pdd)) {
>  		err = -ESRCH;
>  		goto out;
> @@ -577,16 +580,20 @@ static int kfd_ioctl_dbg_register(struct file *filep,
>  	bool create_ok;
>  	long status = 0;
>  
> -	dev = kfd_device_by_id(args->gpu_id);
> -	if (!dev)
> -		return -EINVAL;
> +	mutex_lock(&p->mutex);
> +	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
> +	if (!pdd) {
> +		status = -EINVAL;
> +		goto out_unlock_p;
> +	}
> +	dev = pdd->dev;
>  
>  	if (dev->device_info->asic_family == CHIP_CARRIZO) {
>  		pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
> -		return -EINVAL;
> +		status = -EINVAL;
> +		goto out_unlock_p;
>  	}
>  
> -	mutex_lock(&p->mutex);
>  	mutex_lock(kfd_get_dbgmgr_mutex());
>  
>  	/*
> @@ -596,7 +603,7 @@ static int kfd_ioctl_dbg_register(struct file *filep,
>  	pdd = kfd_bind_process_to_device(dev, p);
>  	if (IS_ERR(pdd)) {
>  		status = PTR_ERR(pdd);
> -		goto out;
> +		goto out_unlock_dbg;
>  	}
>  
>  	if (!dev->dbgmgr) {
> @@ -614,8 +621,9 @@ static int kfd_ioctl_dbg_register(struct file *filep,
>  		status = -EINVAL;
>  	}
>  
> -out:
> +out_unlock_dbg:
>  	mutex_unlock(kfd_get_dbgmgr_mutex());
> +out_unlock_p:
>  	mutex_unlock(&p->mutex);
>  
>  	return status;
> @@ -625,12 +633,18 @@ static int kfd_ioctl_dbg_unregister(struct file *filep,
>  				struct kfd_process *p, void *data)
>  {
>  	struct kfd_ioctl_dbg_unregister_args *args = data;
> +	struct kfd_process_device *pdd;
>  	struct kfd_dev *dev;
>  	long status;
>  
> -	dev = kfd_device_by_id(args->gpu_id);
> -	if (!dev || !dev->dbgmgr)
> +	mutex_lock(&p->mutex);
> +	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
> +	if (!pdd || !pdd->dev->dbgmgr) {
> +		mutex_unlock(&p->mutex);
>  		return -EINVAL;
> +	}
> +	dev = pdd->dev;
> +	mutex_unlock(&p->mutex);
>  
>  	if (dev->device_info->asic_family == CHIP_CARRIZO) {
>  		pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
> @@ -664,6 +678,7 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep,
>  {
>  	struct kfd_ioctl_dbg_address_watch_args *args = data;
>  	struct kfd_dev *dev;
> +	struct kfd_process_device *pdd;
>  	struct dbg_address_watch_info aw_info;
>  	unsigned char *args_buff;
>  	long status;
> @@ -673,9 +688,15 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep,
>  
>  	memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
>  
> -	dev = kfd_device_by_id(args->gpu_id);
> -	if (!dev)
> +	mutex_lock(&p->mutex);
> +	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
> +	if (!pdd) {
> +		mutex_unlock(&p->mutex);
> +		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
>  		return -EINVAL;
> +	}
> +	dev = pdd->dev;
> +	mutex_unlock(&p->mutex);
>  
>  	if (dev->device_info->asic_family == CHIP_CARRIZO) {
>  		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
> @@ -764,6 +785,7 @@ static int kfd_ioctl_dbg_wave_control(struct file *filep,
>  {
>  	struct kfd_ioctl_dbg_wave_control_args *args = data;
>  	struct kfd_dev *dev;
> +	struct kfd_process_device *pdd;
>  	struct dbg_wave_control_info wac_info;
>  	unsigned char *args_buff;
>  	uint32_t computed_buff_size;
> @@ -781,9 +803,15 @@ static int kfd_ioctl_dbg_wave_control(struct file *filep,
>  				sizeof(wac_info.dbgWave_msg.MemoryVA) +
>  				sizeof(wac_info.trapId);
>  
> -	dev = kfd_device_by_id(args->gpu_id);
> -	if (!dev)
> +	mutex_lock(&p->mutex);
> +	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
> +	if (!pdd) {
> +		mutex_unlock(&p->mutex);
> +		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
>  		return -EINVAL;
> +	}
> +	dev = pdd->dev;
> +	mutex_unlock(&p->mutex);
>  
>  	if (dev->device_info->asic_family == CHIP_CARRIZO) {
>  		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
> @@ -847,16 +875,19 @@ static int kfd_ioctl_get_clock_counters(struct file *filep,
>  				struct kfd_process *p, void *data)
>  {
>  	struct kfd_ioctl_get_clock_counters_args *args = data;
> -	struct kfd_dev *dev;
> +	struct kfd_process_device *pdd;
>  
> -	dev = kfd_device_by_id(args->gpu_id);
> -	if (dev)
> +	mutex_lock(&p->mutex);
> +	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
> +	if (pdd)
>  		/* Reading GPU clock counter from KGD */
> -		args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd);
> +		args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(pdd->dev->kgd);
>  	else
>  		/* Node without GPU resource */
>  		args->gpu_clock_counter = 0;
>  
> +	mutex_unlock(&p->mutex);
> +
>  	/* No access to rdtsc. Using raw monotonic time */
>  	args->cpu_clock_counter = ktime_get_raw_ns();
>  	args->system_clock_counter = ktime_get_boottime_ns();
> @@ -1070,11 +1101,13 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
>  	struct kfd_dev *dev;
>  	long err;
>  
> -	dev = kfd_device_by_id(args->gpu_id);
> -	if (!dev)
> -		return -EINVAL;
> -
>  	mutex_lock(&p->mutex);
> +	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
> +	if (!pdd) {
> +		err = -EINVAL;
> +		goto bind_process_to_device_fail;
> +	}
> +	dev = pdd->dev;
>  
>  	pdd = kfd_bind_process_to_device(dev, p);
>  	if (IS_ERR(pdd)) {
> @@ -1102,15 +1135,20 @@ static int kfd_ioctl_get_tile_config(struct file *filep,
>  		struct kfd_process *p, void *data)
>  {
>  	struct kfd_ioctl_get_tile_config_args *args = data;
> -	struct kfd_dev *dev;
> +	struct kfd_process_device *pdd;
>  	struct tile_config config;
>  	int err = 0;
>  
> -	dev = kfd_device_by_id(args->gpu_id);
> -	if (!dev)
> +	mutex_lock(&p->mutex);
> +	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
> +	if (!pdd) {
> +		mutex_unlock(&p->mutex);
>  		return -EINVAL;
> +	}
>  
> -	amdgpu_amdkfd_get_tile_config(dev->kgd, &config);
> +	amdgpu_amdkfd_get_tile_config(pdd->dev->kgd, &config);
> +
> +	mutex_unlock(&p->mutex);
>  
>  	args->gb_addr_config = config.gb_addr_config;
>  	args->num_banks = config.num_banks;
> @@ -1145,21 +1183,15 @@ static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
>  {
>  	struct kfd_ioctl_acquire_vm_args *args = data;
>  	struct kfd_process_device *pdd;
> -	struct kfd_dev *dev;
>  	struct file *drm_file;
>  	int ret;
>  
> -	dev = kfd_device_by_id(args->gpu_id);
> -	if (!dev)
> -		return -EINVAL;
> -
>  	drm_file = fget(args->drm_fd);
>  	if (!drm_file)
>  		return -EINVAL;
>  
>  	mutex_lock(&p->mutex);
> -
> -	pdd = kfd_get_process_device_data(dev, p);
> +	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
>  	if (!pdd) { 
>  		ret = -EINVAL;
>  		goto err_unlock;
> @@ -1218,19 +1250,23 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
>  	if (args->size == 0)
>  		return -EINVAL;
>  
> -	dev = kfd_device_by_id(args->gpu_id);
> -	if (!dev)
> -		return -EINVAL;
> +	mutex_lock(&p->mutex);
> +	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
> +	if (!pdd) {
> +		err = -EINVAL;
> +		goto err_unlock;
> +	}
> +
> +	dev = pdd->dev;
>  
>  	if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
>  		(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
>  		!kfd_dev_is_large_bar(dev)) {
>  		pr_err("Alloc host visible vram on small bar is not allowed\n");
> -		return -EINVAL;
> +		err = -EINVAL;
> +		goto err_unlock;
>  	}
>  
> -	mutex_lock(&p->mutex);
> -
>  	pdd = kfd_bind_process_to_device(dev, p);
>  	if (IS_ERR(pdd)) {
>  		err = PTR_ERR(pdd);
> @@ -1301,17 +1337,12 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
>  	struct kfd_ioctl_free_memory_of_gpu_args *args = data;
>  	struct kfd_process_device *pdd;
>  	void *mem;
> -	struct kfd_dev *dev;
>  	int ret;
>  	uint64_t size = 0;
>  
> -	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
> -	if (!dev)
> -		return -EINVAL;
> -
>  	mutex_lock(&p->mutex);
>  
> -	pdd = kfd_get_process_device_data(dev, p);
> +	pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
>  	if (!pdd) {
>  		pr_err("Process device data doesn't exist\n");
>  		ret = -EINVAL;
> @@ -1325,7 +1356,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
>  		goto err_unlock;
>  	}
>  
> -	ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
> +	ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd,
>  				(struct kgd_mem *)mem, pdd->drm_priv, &size);
>  
>  	/* If freeing the buffer failed, leave the handle in place for
> @@ -1348,15 +1379,11 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
>  	struct kfd_ioctl_map_memory_to_gpu_args *args = data;
>  	struct kfd_process_device *pdd, *peer_pdd;
>  	void *mem;
> -	struct kfd_dev *dev, *peer;
> +	struct kfd_dev *dev;
>  	long err = 0;
>  	int i;
>  	uint32_t *devices_arr = NULL;
>  
> -	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
> -	if (!dev)
> -		return -EINVAL;
> -
>  	if (!args->n_devices) {
>  		pr_debug("Device IDs array empty\n");
>  		return -EINVAL;
> @@ -1380,6 +1407,12 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
>  	}
>  
>  	mutex_lock(&p->mutex);
> +	pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
> +	if (!pdd) {
> +		err = -EINVAL;
> +		goto get_process_device_data_failed;
> +	}
> +	dev = pdd->dev;
>  
>  	pdd = kfd_bind_process_to_device(dev, p);
>  	if (IS_ERR(pdd)) {
> @@ -1395,21 +1428,21 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
>  	}
>  
>  	for (i = args->n_success; i < args->n_devices; i++) {
> -		peer = kfd_device_by_id(devices_arr[i]);
> -		if (!peer) {
> +		peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
> +		if (!peer_pdd) {
>  			pr_debug("Getting device by id failed for 0x%x\n",
>  				 devices_arr[i]);
>  			err = -EINVAL;
>  			goto get_mem_obj_from_handle_failed;
>  		}
>  
> -		peer_pdd = kfd_bind_process_to_device(peer, p);
> +		peer_pdd = kfd_bind_process_to_device(peer_pdd->dev, p);
>  		if (IS_ERR(peer_pdd)) {
>  			err = PTR_ERR(peer_pdd);
>  			goto get_mem_obj_from_handle_failed;
>  		}
>  		err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
> -			peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
> +			peer_pdd->dev->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
>  		if (err) {
>  			pr_err("Failed to map to gpu %d/%d\n",
>  			       i, args->n_devices);
> @@ -1428,12 +1461,10 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
>  
>  	/* Flush TLBs after waiting for the page table updates to complete */
>  	for (i = 0; i < args->n_devices; i++) {
> -		peer = kfd_device_by_id(devices_arr[i]);
> -		if (WARN_ON_ONCE(!peer))
> -			continue;
> -		peer_pdd = kfd_get_process_device_data(peer, p);
> +		peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
>  		if (WARN_ON_ONCE(!peer_pdd))
>  			continue;
> +
>  		kfd_flush_tlb(peer_pdd);
>  	}
>  
> @@ -1441,6 +1472,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
>  
>  	return err;
>  
> +get_process_device_data_failed:
>  bind_process_to_device_failed:
>  get_mem_obj_from_handle_failed:
>  map_memory_to_gpu_failed:
> @@ -1458,14 +1490,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>  	struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
>  	struct kfd_process_device *pdd, *peer_pdd;
>  	void *mem;
> -	struct kfd_dev *dev, *peer;
>  	long err = 0;
>  	uint32_t *devices_arr = NULL, i;
>  
> -	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
> -	if (!dev)
> -		return -EINVAL;
> -
>  	if (!args->n_devices) {
>  		pr_debug("Device IDs array empty\n");
>  		return -EINVAL;
> @@ -1489,8 +1516,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>  	}
>  
>  	mutex_lock(&p->mutex);
> -
> -	pdd = kfd_get_process_device_data(dev, p);
> +	pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
>  	if (!pdd) {
>  		err = -EINVAL;
>  		goto bind_process_to_device_failed;
> @@ -1504,19 +1530,13 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>  	}
>  
>  	for (i = args->n_success; i < args->n_devices; i++) {
> -		peer = kfd_device_by_id(devices_arr[i]);
> -		if (!peer) {
> -			err = -EINVAL;
> -			goto get_mem_obj_from_handle_failed;
> -		}
> -
> -		peer_pdd = kfd_get_process_device_data(peer, p);
> +		peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
>  		if (!peer_pdd) {
> -			err = -ENODEV;
> +			err = -EINVAL;
>  			goto get_mem_obj_from_handle_failed;
>  		}
>  		err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
> -			peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
> +			peer_pdd->dev->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
>  		if (err) {
>  			pr_err("Failed to unmap from gpu %d/%d\n",
>  			       i, args->n_devices);
> @@ -1645,23 +1665,26 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
>  	void *mem;
>  	int r;
>  
> -	dev = kfd_device_by_id(args->gpu_id);
> -	if (!dev)
> -		return -EINVAL;
> +	mutex_lock(&p->mutex);
> +	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
> +	if (!pdd) {
> +		r = -EINVAL;
> +		goto err_unlock;
> +	}
>  
>  	dmabuf = dma_buf_get(args->dmabuf_fd);
> -	if (IS_ERR(dmabuf))
> -		return PTR_ERR(dmabuf);
> -
> -	mutex_lock(&p->mutex);
> +	if (IS_ERR(dmabuf)) {
> +		r = PTR_ERR(dmabuf);
> +		goto err_unlock;
> +	}
>  
> -	pdd = kfd_bind_process_to_device(dev, p);
> +	pdd = kfd_bind_process_to_device(pdd->dev, p);
>  	if (IS_ERR(pdd)) {
>  		r = PTR_ERR(pdd);
>  		goto err_unlock;
>  	}
>  
> -	r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
> +	r = amdgpu_amdkfd_gpuvm_import_dmabuf(pdd->dev->kgd, dmabuf,
>  					      args->va_addr, pdd->drm_priv,
>  					      (struct kgd_mem **)&mem, &size,
>  					      NULL);
> @@ -1695,13 +1718,19 @@ static int kfd_ioctl_smi_events(struct file *filep,
>  				struct kfd_process *p, void *data)
>  {
>  	struct kfd_ioctl_smi_events_args *args = data;
> -	struct kfd_dev *dev;
> +	struct kfd_process_device *pdd;
>  
> -	dev = kfd_device_by_id(args->gpuid);
> -	if (!dev)
> +	mutex_lock(&p->mutex);
> +
> +	pdd = kfd_process_device_data_by_id(p, args->gpuid);
> +	if (!pdd) {
> +		mutex_unlock(&p->mutex);
>  		return -EINVAL;
> +	}
>  
> -	return kfd_smi_event_open(dev, &args->anon_fd);
> +	mutex_unlock(&p->mutex);
> +
> +	return kfd_smi_event_open(pdd->dev, &args->anon_fd);
>  }
>  
>  static int kfd_ioctl_set_xnack_mode(struct file *filep,
> @@ -1800,6 +1829,57 @@ static int criu_dump_process(struct kfd_process *p, struct kfd_ioctl_criu_dumper
>  	return ret;
>  }
>  
> +static int criu_dump_devices(struct kfd_process *p, struct kfd_ioctl_criu_dumper_args *args)
> +{
> +	struct kfd_criu_device_bucket *device_buckets;
> +	int ret = 0, i;
> +
> +	if (args->num_objects != p->n_pdds) {
> +		pr_err("Mismatch with number of devices (current:%d user:%lld)\n",
> +							p->n_pdds, args->num_objects);
> +		return -EINVAL;
> +	}
> +
> +	if (args->objects_size != args->num_objects *
> +		(sizeof(*device_buckets) + sizeof(struct kfd_criu_device_priv_data))) {
> +		pr_err("Invalid objects size for devices\n");
> +		return -EINVAL;
> +	}
> +
> +	device_buckets = kvzalloc(args->objects_size, GFP_KERNEL);
> +	if (!device_buckets)
> +		return -ENOMEM;
> +
> +	/* Private data for devices it not currently used. To set private data
> +	 * struct kfd_criu_device_priv_data * device_privs = (struct kfd_criu_device_priv_data*)
> +	 *				((uint8_t*)device_buckets +
> +	 *				 (args->num_objects * (sizeof(*device_buckets))));
> +	 */
> +
> +	for (i = 0; i < args->num_objects; i++) {
> +		struct kfd_process_device *pdd = p->pdds[i];
> +
> +		device_buckets[i].user_gpu_id = pdd->user_gpu_id;
> +		device_buckets[i].actual_gpu_id = pdd->dev->id;
> +
> +		/* priv_data does not contain useful information for now and is reserved for
> +		 * future use, so we do not set its contents
> +		 */
> +		device_buckets[i].priv_data_offset = i * sizeof(struct kfd_criu_device_priv_data);
> +		device_buckets[i].priv_data_size = sizeof(struct kfd_criu_device_priv_data);
> +	}
> +
> +	ret = copy_to_user((void __user *)args->objects, device_buckets, args->objects_size);
> +
> +	if (ret) {
> +		pr_err("Failed to copy device information to user\n");
> +		ret = -EFAULT;
> +	}
> +
> +	kvfree(device_buckets);
> +	return ret;
> +}
> +
>  uint64_t get_process_num_bos(struct kfd_process *p)
>  {
>  	uint64_t num_of_bos = 0, i;
> @@ -2231,6 +2311,9 @@ static int kfd_ioctl_criu_dumper(struct file *filep,
>  	case KFD_CRIU_OBJECT_TYPE_PROCESS:
>  		ret = criu_dump_process(p, args);
>  		break;
> +	case KFD_CRIU_OBJECT_TYPE_DEVICE:
> +		ret = criu_dump_devices(p, args);
> +		break;
>  	case KFD_CRIU_OBJECT_TYPE_BO:
>  		ret = criu_dump_bos(p, args);
>  		break;
> @@ -2240,7 +2323,6 @@ static int kfd_ioctl_criu_dumper(struct file *filep,
>  	case KFD_CRIU_OBJECT_TYPE_EVENT:
>  		ret = criu_dump_events(p, args);
>  		break;
> -	case KFD_CRIU_OBJECT_TYPE_DEVICE:
>  	case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
>  	default:
>  		pr_err("Unsupported object type:%d\n", args->type);
> @@ -2301,6 +2383,102 @@ static int criu_restore_process(struct kfd_process *p, struct kfd_ioctl_criu_res
>  	return ret;
>  }
>  
> +static int criu_restore_devices(struct kfd_process *p, struct kfd_ioctl_criu_restorer_args *args)
> +{
> +	int ret = 0, i;
> +	uint8_t *objects;
> +	struct kfd_criu_device_bucket *device_buckets;
> +
> +	if (args->num_objects != p->n_pdds)
> +		return -EINVAL;
> +
> +	if (args->objects_size != args->num_objects *
> +		(sizeof(*device_buckets) + sizeof(struct kfd_criu_device_priv_data))) {
> +		pr_err("Invalid objects size for devices\n");
> +		return -EINVAL;
> +	}
> +
> +	objects = kmalloc(args->objects_size, GFP_KERNEL);
> +	if (!objects)
> +		return -ENOMEM;
> +
> +	ret = copy_from_user(objects, (void __user *)args->objects, args->objects_size);
> +	if (ret) {
> +		pr_err("Failed to copy devices information from user\n");
> +		ret = -EFAULT;
> +		goto exit;
> +	}
> +
> +	device_buckets = (struct kfd_criu_device_bucket *) objects;
> +
> +	for (i = 0; i < args->num_objects; i++) {
> +		struct kfd_dev *dev;
> +		struct kfd_process_device *pdd;
> +		struct file *drm_file;
> +
> +		/* device private data is not currently used. To access device private data:
> +		 * uint8_t *private_datas = objects +
> +		 *				(args->num_objects * sizeof(*device_buckets));
> +		 *
> +		 * struct kfd_criu_device_priv_data *device_priv =
> +		 *			(struct kfd_criu_device_priv_data*)
> +		 *			(private_datas + device_buckets[i].priv_data_offset);
> +		 */
> +
> +		dev = kfd_device_by_id(device_buckets[i].actual_gpu_id);
> +		if (!dev) {
> +			pr_err("Failed to find device with gpu_id = %x\n",
> +				device_buckets[i].actual_gpu_id);
> +			ret = -EINVAL;
> +			goto exit;
> +		}
> +
> +		pdd = kfd_get_process_device_data(dev, p);
> +		if (!pdd) {
> +			pr_err("Failed to get pdd for gpu_id = %x\n",
> +					device_buckets[i].actual_gpu_id);
> +			ret = -EINVAL;
> +			goto exit;
> +		}
> +		pdd->user_gpu_id = device_buckets[i].user_gpu_id;
> +
> +		drm_file = fget(device_buckets[i].drm_fd);
> +		if (!drm_file) {
> +			pr_err("Invalid render node file descriptor sent from plugin (%d)\n",
> +				device_buckets[i].drm_fd);
> +			ret = -EINVAL;
> +			goto exit;
> +		}
> +
> +		if (pdd->drm_file) {
> +			ret = -EINVAL;
> +			goto exit;
> +		}
> +
> +		/* create the vm using render nodes for kfd pdd */
> +		if (kfd_process_device_init_vm(pdd, drm_file)) {
> +			pr_err("could not init vm for given pdd\n");
> +			/* On success, the PDD keeps the drm_file reference */
> +			fput(drm_file);
> +			ret = -EINVAL;
> +			goto exit;
> +		}
> +		/*
> +		 * pdd now already has the vm bound to render node so below api won't create a new
> +		 * exclusive kfd mapping but use existing one with renderDXXX but is still needed
> +		 * for iommu v2 binding  and runtime pm.
> +		 */
> +		pdd = kfd_bind_process_to_device(dev, p);
> +		if (IS_ERR(pdd)) {
> +			ret = PTR_ERR(pdd);
> +			goto exit;
> +		}
> +	}
> +exit:
> +	kvfree(objects);
> +	return ret;
> +}
> +
>  static int criu_restore_bos(struct kfd_process *p, struct kfd_ioctl_criu_restorer_args *args)
>  {
>  	uint8_t *objects, *private_data;
> @@ -2719,6 +2897,9 @@ static int kfd_ioctl_criu_restorer(struct file *filep,
>  	case KFD_CRIU_OBJECT_TYPE_PROCESS:
>  		ret = criu_restore_process(p, args);
>  		break;
> +	case KFD_CRIU_OBJECT_TYPE_DEVICE:
> +		ret = criu_restore_devices(p, args);
> +		break;
>  	case KFD_CRIU_OBJECT_TYPE_BO:
>  		ret = criu_restore_bos(p, args);
>  		break;
> @@ -2728,7 +2909,6 @@ static int kfd_ioctl_criu_restorer(struct file *filep,
>  	case KFD_CRIU_OBJECT_TYPE_EVENT:
>  		ret = criu_restore_events(filep, p, args);
>  		break;
> -	case KFD_CRIU_OBJECT_TYPE_DEVICE:
>  	case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
>  	default:
>  		pr_err("Unsupported object type:%d\n", args->type);
> @@ -2819,6 +2999,11 @@ static int kfd_ioctl_criu_process_info(struct file *filep,
>  
>  	args->process_priv_data_size = sizeof(struct kfd_criu_process_priv_data);
>  
> +	args->total_devices = p->n_pdds;
> +	/* devices_priv_data_size does not contain any useful information for now */
> +	args->devices_priv_data_size = args->total_devices *
> +					sizeof(struct kfd_criu_device_priv_data);
> +
>  	args->total_bos = get_process_num_bos(p);
>  	args->bos_priv_data_size = args->total_bos * sizeof(struct kfd_criu_bo_priv_data);
>  
> @@ -2832,7 +3017,8 @@ static int kfd_ioctl_criu_process_info(struct file *filep,
>  	args->total_events = kfd_get_num_events(p);
>  	args->events_priv_data_size = args->total_events * sizeof(struct kfd_criu_event_priv_data);
>  
> -	dev_dbg(kfd_device, "Num of bos:%llu queues:%u events:%u\n",
> +	dev_dbg(kfd_device, "Num of devices:%u bos:%llu queues:%u events:%u\n",
> +				args->total_devices,
>  				args->total_bos,
>  				args->total_queues,
>  				args->total_events);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index 18362478e351..5e9067b70908 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -343,11 +343,12 @@ int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
>  		return -EINVAL;
>  	}
>  
> -	kfd = kfd_device_by_id(GET_GPU_ID(event_page_offset));
> -	if (!kfd) {
> +	pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(event_page_offset));
> +	if (!pdd) {
>  		pr_err("Getting device by id failed in %s\n", __func__);
>  		return -EINVAL;
>  	}
> +	kfd = pdd->dev;
>  
>  	pdd = kfd_bind_process_to_device(kfd, p);
>  	if (IS_ERR(pdd))
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index bf10a5305ef7..1912df8d9101 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -759,6 +759,13 @@ struct kfd_process_device {
>  	 *  number of CU's a device has along with number of other competing processes
>  	 */
>  	struct attribute attr_cu_occupancy;
> +
> +	/*
> +	 * If this process has been checkpointed before, then the user
> +	 * application will use the original gpu_id on the
> +	 * checkpointed node to refer to this device.
> +	 */
> +	uint32_t user_gpu_id;
>  };
>  
>  #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
> @@ -914,6 +921,9 @@ int kfd_process_restore_queues(struct kfd_process *p);
>  void kfd_suspend_all_processes(void);
>  int kfd_resume_all_processes(void);
>  
> +struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *process,
> +				uint32_t gpu_id);
> +
>  int kfd_process_device_init_vm(struct kfd_process_device *pdd,
>  			       struct file *drm_file);
>  struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index e4cb2f778590..a23f2162eb8b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -1425,6 +1425,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
>  	pdd->runtime_inuse = false;
>  	pdd->vram_usage = 0;
>  	pdd->sdma_past_activity_counter = 0;
> +	pdd->user_gpu_id = dev->id;
>  	atomic64_set(&pdd->evict_duration_counter, 0);
>  	p->pdds[p->n_pdds++] = pdd;
>  
> @@ -1898,6 +1899,23 @@ void kfd_flush_tlb(struct kfd_process_device *pdd)
>  	}
>  }
>  
> +struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *p, uint32_t gpu_id)
> +{
> +	int i;
> +
> +	if (gpu_id) {
> +		for (i = 0; i < p->n_pdds; i++) {
> +			struct kfd_process_device *pdd = p->pdds[i];
> +
> +			if (pdd->user_gpu_id == gpu_id)
> +				return pdd;
> +		}
> +
> +		WARN_ONCE(1, "Failed to find mapping for gpu = 0x%x\n",  gpu_id);
> +	}
> +	return NULL;
> +}
> +
>  #if defined(CONFIG_DEBUG_FS)
>  
>  int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)


More information about the amd-gfx mailing list