[Patch v4 23/24] drm/amdkfd: CRIU prepare for svm resume
Felix Kuehling
felix.kuehling at amd.com
Mon Jan 10 23:58:47 UTC 2022
On 2022-01-05 9:43 a.m., philip yang wrote:
>
>
> On 2021-12-22 7:37 p.m., Rajneesh Bhardwaj wrote:
>> During CRIU restore phase, the VMAs for the virtual address ranges are
>> not at their final location yet so in this stage, only cache the data
>> required to successfully resume the svm ranges during an imminent CRIU
>> resume phase.
>>
>> Signed-off-by: Rajneesh Bhardwaj<rajneesh.bhardwaj at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 +-
>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 ++
>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 99 ++++++++++++++++++++++++
>> drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 12 +++
>> 4 files changed, 118 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index 916b8d000317..f7aa15b18f95 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -2638,8 +2638,8 @@ static int criu_restore_objects(struct file *filep,
>> goto exit;
>> break;
>> case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
>> - /* TODO: Implement SVM range */
>> - *priv_offset += sizeof(struct kfd_criu_svm_range_priv_data);
>> + ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data,
>> + priv_offset, max_priv_data_size);
>> if (ret)
>> goto exit;
>> break;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index 87eb6739a78e..92191c541c29 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -790,6 +790,7 @@ struct svm_range_list {
>> struct list_head list;
>> struct work_struct deferred_list_work;
>> struct list_head deferred_range_list;
>> + struct list_head criu_svm_metadata_list;
>> spinlock_t deferred_list_lock;
>> atomic_t evicted_ranges;
>> bool drain_pagefaults;
>> @@ -1148,6 +1149,10 @@ int kfd_criu_restore_event(struct file *devkfd,
>> uint8_t __user *user_priv_data,
>> uint64_t *priv_data_offset,
>> uint64_t max_priv_data_size);
>> +int kfd_criu_restore_svm(struct kfd_process *p,
>> + uint8_t __user *user_priv_data,
>> + uint64_t *priv_data_offset,
>> + uint64_t max_priv_data_size);
>> /* CRIU - End */
>>
>> /* Queue Context Management */
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index 6d59f1bedcf2..e9f6c63c2a26 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -45,6 +45,14 @@
>> */
>> #define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000
>>
>> +struct criu_svm_metadata {
>> + struct list_head list;
>> + __u64 start_addr;
>> + __u64 size;
>> + /* Variable length array of attributes */
>> + struct kfd_ioctl_svm_attribute attrs[0];
>> +};
> This data structure is struct kfd_criu_svm_range_priv_data plus
> list_head, maybe you can add list_head to struct
> kfd_criu_svm_range_priv_data and remove this new data structure, then
> you can remove extra kzalloc, kfree for each svm object resume and
> function svm_criu_prepare_for_resume could be removed.
Adding list_head to the private structure is a bad idea, because that
structure is copied to/from user mode. Kernel mode pointers should not
be exposed to user mode, even in an opaque structure. That's just
begging for an exploit.
But you could define criu_svm_metadata as
struct criu_svm_metadata {
struct list_head list;
kfd_criu_svm_range_priv_data data;
};
Then copy_from_user directly into criu_svm_md->data in
kfd_criu_restore_svm to avoid the double allocation.
Regards,
Felix
>> +
>> static void svm_range_evict_svm_bo_worker(struct work_struct *work);
>> static bool
>> svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
>> @@ -2753,6 +2761,7 @@ int svm_range_list_init(struct kfd_process *p)
>> INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
>> INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
>> INIT_LIST_HEAD(&svms->deferred_range_list);
>> + INIT_LIST_HEAD(&svms->criu_svm_metadata_list);
>> spin_lock_init(&svms->deferred_list_lock);
>>
>> for (i = 0; i < p->n_pdds; i++)
>> @@ -3418,6 +3427,96 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm,
>> return 0;
>> }
>>
>> +int svm_criu_prepare_for_resume(struct kfd_process *p,
>> + struct kfd_criu_svm_range_priv_data *svm_priv)
>> +{
>> + int nattr_common = 4, nattr_accessibility = 1;
>> + struct criu_svm_metadata *criu_svm_md = NULL;
>> + uint64_t svm_attrs_size, svm_object_md_size;
>> + struct svm_range_list *svms = &p->svms;
>> + int num_devices = p->n_pdds;
>> + int i, ret = 0;
>> +
>> + svm_attrs_size = sizeof(struct kfd_ioctl_svm_attribute) *
>> + (nattr_common + nattr_accessibility * num_devices);
>> + svm_object_md_size = sizeof(struct criu_svm_metadata) + svm_attrs_size;
>> +
>> + criu_svm_md = kzalloc(svm_object_md_size, GFP_KERNEL);
>> + if (!criu_svm_md) {
>> + pr_err("failed to allocate memory to store svm metadata\n");
>> + ret = -ENOMEM;
>> + goto exit;
>> + }
>> +
>> + criu_svm_md->start_addr = svm_priv->start_addr;
>> + criu_svm_md->size = svm_priv->size;
>> + for (i = 0; i < svm_attrs_size; i++)
>
> for (i = 0; i < nattr_common + nattr_accessibility * num_devices ; i++)
>
> This function and for loop is not needed if you remove struct
> criu_svm_metadata.
>
> Regards,
>
> Philip
>
>> + {
>> + criu_svm_md->attrs[i].type = svm_priv->attrs[i].type;
>> + criu_svm_md->attrs[i].value = svm_priv->attrs[i].value;
>> + }
>> +
>> + list_add_tail(&criu_svm_md->list, &svms->criu_svm_metadata_list);
>> +
>> +
>> +exit:
>> + return ret;
>> +}
>> +
>> +int kfd_criu_restore_svm(struct kfd_process *p,
>> + uint8_t __user *user_priv_ptr,
>> + uint64_t *priv_data_offset,
>> + uint64_t max_priv_data_size)
>> +{
>> + uint64_t total_size, accessibility_size, common_attr_size;
>> + struct kfd_criu_svm_range_priv_data *svm_priv = NULL;
>> + int nattr_common = 4, naatr_accessibility = 1;
>> + uint32_t num_devices;
>> + int ret = 0;
>> +
>> + num_devices = p->n_pdds;
>> + /* Handle one SVM range object at a time, also the number of gpus are
>> + * assumed to be same on the restore node, checking must be done while
>> + * evaluating the topology earlier */
>> + common_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
>> + nattr_common;
>> + accessibility_size = sizeof(struct kfd_ioctl_svm_attribute) *
>> + naatr_accessibility * num_devices;
>> + total_size = sizeof(struct kfd_criu_svm_range_priv_data) +
>> + common_attr_size + accessibility_size;
>> +
>> + svm_priv = kvzalloc(total_size, GFP_KERNEL);
>> + if (!svm_priv)
>> + return -ENOMEM;
>> +
>> + if (*priv_data_offset + total_size > max_priv_data_size) {
>> + ret = -EINVAL;
>> + goto exit;
>> + }
>> +
>> + ret = copy_from_user(svm_priv, user_priv_ptr + *priv_data_offset,
>> + total_size);
>> + if (ret) {
>> + ret = -EFAULT;
>> + goto exit;
>> + }
>> + *priv_data_offset += total_size;
>> +
>> + ret = svm_criu_prepare_for_resume(p, svm_priv);
>> + if (ret) {
>> + ret = -EFAULT;
>> + pr_err("svm_criu_prepare_for_resume failed\n");
>> + goto exit;
>> + }
>> +
>> +
>> +exit:
>> +
>> + kvfree(svm_priv);
>> +
>> + return ret;
>> +}
>> +
>> int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
>> uint64_t *svm_priv_data_size)
>> {
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>> index b00576db5baa..e0c0853f085c 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>> @@ -191,6 +191,10 @@ int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
>> int kfd_criu_checkpoint_svm(struct kfd_process *p,
>> uint8_t __user *user_priv_data,
>> uint64_t *priv_offset);
>> +int kfd_criu_restore_svm(struct kfd_process *p,
>> + uint8_t __user *user_priv_ptr,
>> + uint64_t *priv_data_offset,
>> + uint64_t max_priv_data_size);
>> struct kfd_process_device *
>> svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev);
>> void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm);
>> @@ -244,6 +248,14 @@ static inline int kfd_criu_checkpoint_svm(struct kfd_process *p,
>> return 0;
>> }
>>
>> +static inline int kfd_criu_restore_svm(struct kfd_process *p,
>> + uint8_t __user *user_priv_ptr,
>> + uint64_t *priv_data_offset,
>> + uint64_t max_priv_data_size)
>> +{
>> + return -EINVAL;
>> +}
>> +
>> #define KFD_IS_SVM_API_SUPPORTED(dev) false
>>
>> #endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
More information about the amd-gfx
mailing list