[Patch v4 23/24] drm/amdkfd: CRIU prepare for svm resume

Felix Kuehling felix.kuehling at amd.com
Mon Jan 10 23:58:47 UTC 2022


On 2022-01-05 9:43 a.m., philip yang wrote:
>
>
> On 2021-12-22 7:37 p.m., Rajneesh Bhardwaj wrote:
>> During CRIU restore phase, the VMAs for the virtual address ranges are
>> not at their final location yet so in this stage, only cache the data
>> required to successfully resume the svm ranges during an imminent CRIU
>> resume phase.
>>
>> Signed-off-by: Rajneesh Bhardwaj<rajneesh.bhardwaj at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  4 +-
>>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  5 ++
>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 99 ++++++++++++++++++++++++
>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.h     | 12 +++
>>   4 files changed, 118 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index 916b8d000317..f7aa15b18f95 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -2638,8 +2638,8 @@ static int criu_restore_objects(struct file *filep,
>>   				goto exit;
>>   			break;
>>   		case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
>> -			/* TODO: Implement SVM range */
>> -			*priv_offset += sizeof(struct kfd_criu_svm_range_priv_data);
>> +			ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data,
>> +						     priv_offset, max_priv_data_size);
>>   			if (ret)
>>   				goto exit;
>>   			break;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index 87eb6739a78e..92191c541c29 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -790,6 +790,7 @@ struct svm_range_list {
>>   	struct list_head		list;
>>   	struct work_struct		deferred_list_work;
>>   	struct list_head		deferred_range_list;
>> +	struct list_head                criu_svm_metadata_list;
>>   	spinlock_t			deferred_list_lock;
>>   	atomic_t			evicted_ranges;
>>   	bool				drain_pagefaults;
>> @@ -1148,6 +1149,10 @@ int kfd_criu_restore_event(struct file *devkfd,
>>   			   uint8_t __user *user_priv_data,
>>   			   uint64_t *priv_data_offset,
>>   			   uint64_t max_priv_data_size);
>> +int kfd_criu_restore_svm(struct kfd_process *p,
>> +			 uint8_t __user *user_priv_data,
>> +			 uint64_t *priv_data_offset,
>> +			 uint64_t max_priv_data_size);
>>   /* CRIU - End */
>>   
>>   /* Queue Context Management */
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index 6d59f1bedcf2..e9f6c63c2a26 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -45,6 +45,14 @@
>>    */
>>   #define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING	2000
>>   
>> +struct criu_svm_metadata {
>> +	struct list_head list;
>> +	__u64 start_addr;
>> +	__u64 size;
>> +	/* Variable length array of attributes */
>> +	struct kfd_ioctl_svm_attribute attrs[0];
>> +};
> This data structure is struct kfd_criu_svm_range_priv_data plus 
> list_head, maybe you can add list_head to struct 
> kfd_criu_svm_range_priv_data and remove this new data structure, then 
> you can remove extra kzalloc, kfree for each svm object resume and 
> function svm_criu_prepare_for_resume could be removed. 

Adding list_head to the private structure is a bad idea, because that 
structure is copied to/from user mode. Kernel mode pointers should not 
be exposed to user mode, even in an opaque structure. That's just 
begging for an exploit.

But you could define criu_svm_metadata as

struct criu_svm_metadata {
	struct list_head list;
	kfd_criu_svm_range_priv_data data;
};

Then copy_from_user directly into criu_svm_md->data in 
kfd_criu_restore_svm to avoid the double allocation.

Regards,
   Felix


>> +
>>   static void svm_range_evict_svm_bo_worker(struct work_struct *work);
>>   static bool
>>   svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
>> @@ -2753,6 +2761,7 @@ int svm_range_list_init(struct kfd_process *p)
>>   	INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
>>   	INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
>>   	INIT_LIST_HEAD(&svms->deferred_range_list);
>> +	INIT_LIST_HEAD(&svms->criu_svm_metadata_list);
>>   	spin_lock_init(&svms->deferred_list_lock);
>>   
>>   	for (i = 0; i < p->n_pdds; i++)
>> @@ -3418,6 +3427,96 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm,
>>   	return 0;
>>   }
>>   
>> +int svm_criu_prepare_for_resume(struct kfd_process *p,
>> +				struct kfd_criu_svm_range_priv_data *svm_priv)
>> +{
>> +	int nattr_common = 4, nattr_accessibility = 1;
>> +	struct criu_svm_metadata *criu_svm_md = NULL;
>> +	uint64_t svm_attrs_size, svm_object_md_size;
>> +	struct svm_range_list *svms = &p->svms;
>> +	int num_devices = p->n_pdds;
>> +	int i, ret = 0;
>> +
>> +	svm_attrs_size = sizeof(struct kfd_ioctl_svm_attribute) *
>> +		(nattr_common + nattr_accessibility * num_devices);
>> +	svm_object_md_size = sizeof(struct criu_svm_metadata) + svm_attrs_size;
>> +
>> +	criu_svm_md = kzalloc(svm_object_md_size, GFP_KERNEL);
>> +	if (!criu_svm_md) {
>> +		pr_err("failed to allocate memory to store svm metadata\n");
>> +		ret = -ENOMEM;
>> +		goto exit;
>> +	}
>> +
>> +	criu_svm_md->start_addr = svm_priv->start_addr;
>> +	criu_svm_md->size = svm_priv->size;
>> +	for (i = 0; i < svm_attrs_size; i++)
>
> for (i = 0; i < nattr_common + nattr_accessibility * num_devices ; i++)
>
> This function and for loop is not needed if you remove struct 
> criu_svm_metadata.
>
> Regards,
>
> Philip
>
>> +	{
>> +		criu_svm_md->attrs[i].type = svm_priv->attrs[i].type;
>> +		criu_svm_md->attrs[i].value = svm_priv->attrs[i].value;
>> +	}
>> +
>> +	list_add_tail(&criu_svm_md->list, &svms->criu_svm_metadata_list);
>> +
>> +
>> +exit:
>> +	return ret;
>> +}
>> +
>> +int kfd_criu_restore_svm(struct kfd_process *p,
>> +			 uint8_t __user *user_priv_ptr,
>> +			 uint64_t *priv_data_offset,
>> +			 uint64_t max_priv_data_size)
>> +{
>> +	uint64_t total_size, accessibility_size, common_attr_size;
>> +	struct kfd_criu_svm_range_priv_data *svm_priv = NULL;
>> +	int nattr_common = 4, naatr_accessibility = 1;
>> +	uint32_t num_devices;
>> +	int ret = 0;
>> +
>> +	num_devices = p->n_pdds;
>> +	/* Handle one SVM range object at a time, also the number of gpus are
>> +	 * assumed to be same on the restore node, checking must be done while
>> +	 * evaluating the topology earlier */
>> +	common_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
>> +		nattr_common;
>> +	accessibility_size = sizeof(struct kfd_ioctl_svm_attribute) *
>> +		naatr_accessibility * num_devices;
>> +	total_size = sizeof(struct kfd_criu_svm_range_priv_data) +
>> +		common_attr_size + accessibility_size;
>> +
>> +	svm_priv = kvzalloc(total_size, GFP_KERNEL);
>> +	if (!svm_priv)
>> +		return -ENOMEM;
>> +
>> +	if (*priv_data_offset + total_size > max_priv_data_size) {
>> +		ret = -EINVAL;
>> +		goto exit;
>> +	}
>> +
>> +	ret = copy_from_user(svm_priv, user_priv_ptr + *priv_data_offset,
>> +			     total_size);
>> +	if (ret) {
>> +		ret = -EFAULT;
>> +		goto exit;
>> +	}
>> +	*priv_data_offset += total_size;
>> +
>> +	ret = svm_criu_prepare_for_resume(p, svm_priv);
>> +	if (ret) {
>> +		ret = -EFAULT;
>> +		pr_err("svm_criu_prepare_for_resume failed\n");
>> +		goto exit;
>> +	}
>> +
>> +
>> +exit:
>> +
>> +	kvfree(svm_priv);
>> +
>> +	return ret;
>> +}
>> +
>>   int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
>>   		       uint64_t *svm_priv_data_size)
>>   {
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>> index b00576db5baa..e0c0853f085c 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>> @@ -191,6 +191,10 @@ int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
>>   int kfd_criu_checkpoint_svm(struct kfd_process *p,
>>   			    uint8_t __user *user_priv_data,
>>   			    uint64_t *priv_offset);
>> +int kfd_criu_restore_svm(struct kfd_process *p,
>> +			 uint8_t __user *user_priv_ptr,
>> +			 uint64_t *priv_data_offset,
>> +			 uint64_t max_priv_data_size);
>>   struct kfd_process_device *
>>   svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev);
>>   void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm);
>> @@ -244,6 +248,14 @@ static inline int kfd_criu_checkpoint_svm(struct kfd_process *p,
>>   	return 0;
>>   }
>>   
>> +static inline int kfd_criu_restore_svm(struct kfd_process *p,
>> +				       uint8_t __user *user_priv_ptr,
>> +				       uint64_t *priv_data_offset,
>> +				       uint64_t max_priv_data_size)
>> +{
>> +	return -EINVAL;
>> +}
>> +
>>   #define KFD_IS_SVM_API_SUPPORTED(dev) false
>>   
>>   #endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */


More information about the amd-gfx mailing list