<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body>
<p><br>
</p>
<div class="moz-cite-prefix">On 2022-01-10 6:58 p.m., Felix Kuehling
wrote:<br>
</div>
<blockquote type="cite" cite="mid:94adb83b-35dd-3478-909e-9f8ea77e5657@amd.com">On
2022-01-05 9:43 a.m., philip yang wrote:
<br>
<blockquote type="cite">
<br>
<br>
On 2021-12-22 7:37 p.m., Rajneesh Bhardwaj wrote:
<br>
<blockquote type="cite">During CRIU restore phase, the VMAs for
the virtual address ranges are
<br>
not at their final location yet so in this stage, only cache
the data
<br>
required to successfully resume the svm ranges during an
imminent CRIU
<br>
resume phase.
<br>
<br>
Signed-off-by: Rajneesh
Bhardwaj<a class="moz-txt-link-rfc2396E" href="mailto:rajneesh.bhardwaj@amd.com"><rajneesh.bhardwaj@amd.com></a>
<br>
---
<br>
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 +-
<br>
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 ++
<br>
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 99
++++++++++++++++++++++++
<br>
drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 12 +++
<br>
4 files changed, 118 insertions(+), 2 deletions(-)
<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
<br>
index 916b8d000317..f7aa15b18f95 100644
<br>
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
<br>
@@ -2638,8 +2638,8 @@ static int criu_restore_objects(struct
file *filep,
<br>
goto exit;
<br>
break;
<br>
case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
<br>
- /* TODO: Implement SVM range */
<br>
- *priv_offset += sizeof(struct
kfd_criu_svm_range_priv_data);
<br>
+ ret = kfd_criu_restore_svm(p, (uint8_t __user
*)args->priv_data,
<br>
+ priv_offset,
max_priv_data_size);
<br>
if (ret)
<br>
goto exit;
<br>
break;
<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
<br>
index 87eb6739a78e..92191c541c29 100644
<br>
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
<br>
@@ -790,6 +790,7 @@ struct svm_range_list {
<br>
struct list_head list;
<br>
struct work_struct deferred_list_work;
<br>
struct list_head deferred_range_list;
<br>
+ struct list_head criu_svm_metadata_list;
<br>
spinlock_t deferred_list_lock;
<br>
atomic_t evicted_ranges;
<br>
bool drain_pagefaults;
<br>
@@ -1148,6 +1149,10 @@ int kfd_criu_restore_event(struct file
*devkfd,
<br>
uint8_t __user *user_priv_data,
<br>
uint64_t *priv_data_offset,
<br>
uint64_t max_priv_data_size);
<br>
+int kfd_criu_restore_svm(struct kfd_process *p,
<br>
+ uint8_t __user *user_priv_data,
<br>
+ uint64_t *priv_data_offset,
<br>
+ uint64_t max_priv_data_size);
<br>
/* CRIU - End */
<br>
/* Queue Context Management */
<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
<br>
index 6d59f1bedcf2..e9f6c63c2a26 100644
<br>
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
<br>
@@ -45,6 +45,14 @@
<br>
*/
<br>
#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000
<br>
+struct criu_svm_metadata {
<br>
+ struct list_head list;
<br>
+ __u64 start_addr;
<br>
+ __u64 size;
<br>
+ /* Variable length array of attributes */
<br>
+ struct kfd_ioctl_svm_attribute attrs[0];
<br>
+};
<br>
</blockquote>
This data structure is struct kfd_criu_svm_range_priv_data plus
list_head, maybe you can add list_head to struct
kfd_criu_svm_range_priv_data and remove this new data structure,
then you can remove extra kzalloc, kfree for each svm object
resume and function svm_criu_prepare_for_resume could be
removed. </blockquote>
<br>
Adding list_head to the private structure is a bad idea, because
that structure is copied to/from user mode. Kernel mode pointers
should not be exposed to user mode, even in an opaque structure.
That's just begging for an exploit.
<br>
<br>
But you could define criu_svm_metadata as
<br>
<br>
struct criu_svm_metadata {
<br>
struct list_head list;
<br>
kfd_criu_svm_range_priv_data data;
<br>
};
<br>
<br>
Then copy_from_user directly into criu_svm_md->data in
kfd_criu_restore_svm to avoid the double allocation.
<br>
<br>
</blockquote>
<p>This is better, don't increase struct
kfd_criu_svm_range_priv_data size which copy_to/from_user, and
remove extra kzalloc/kfree.</p>
<p>Regards,</p>
<p>Philip<br>
</p>
<blockquote type="cite" cite="mid:94adb83b-35dd-3478-909e-9f8ea77e5657@amd.com">Regards,
<br>
Felix
<br>
<br>
<br>
<blockquote type="cite">
<blockquote type="cite">+
<br>
static void svm_range_evict_svm_bo_worker(struct work_struct
*work);
<br>
static bool
<br>
svm_range_cpu_invalidate_pagetables(struct
mmu_interval_notifier *mni,
<br>
@@ -2753,6 +2761,7 @@ int svm_range_list_init(struct
kfd_process *p)
<br>
INIT_DELAYED_WORK(&svms->restore_work,
svm_range_restore_work);
<br>
INIT_WORK(&svms->deferred_list_work,
svm_range_deferred_list_work);
<br>
INIT_LIST_HEAD(&svms->deferred_range_list);
<br>
+ INIT_LIST_HEAD(&svms->criu_svm_metadata_list);
<br>
spin_lock_init(&svms->deferred_list_lock);
<br>
for (i = 0; i < p->n_pdds; i++)
<br>
@@ -3418,6 +3427,96 @@ svm_range_get_attr(struct kfd_process
*p, struct mm_struct *mm,
<br>
return 0;
<br>
}
<br>
+int svm_criu_prepare_for_resume(struct kfd_process *p,
<br>
+ struct kfd_criu_svm_range_priv_data
*svm_priv)
<br>
+{
<br>
+ int nattr_common = 4, nattr_accessibility = 1;
<br>
+ struct criu_svm_metadata *criu_svm_md = NULL;
<br>
+ uint64_t svm_attrs_size, svm_object_md_size;
<br>
+ struct svm_range_list *svms = &p->svms;
<br>
+ int num_devices = p->n_pdds;
<br>
+ int i, ret = 0;
<br>
+
<br>
+ svm_attrs_size = sizeof(struct kfd_ioctl_svm_attribute) *
<br>
+ (nattr_common + nattr_accessibility * num_devices);
<br>
+ svm_object_md_size = sizeof(struct criu_svm_metadata) +
svm_attrs_size;
<br>
+
<br>
+ criu_svm_md = kzalloc(svm_object_md_size, GFP_KERNEL);
<br>
+ if (!criu_svm_md) {
<br>
+ pr_err("failed to allocate memory to store svm
metadata\n");
<br>
+ ret = -ENOMEM;
<br>
+ goto exit;
<br>
+ }
<br>
+
<br>
+ criu_svm_md->start_addr = svm_priv->start_addr;
<br>
+ criu_svm_md->size = svm_priv->size;
<br>
+ for (i = 0; i < svm_attrs_size; i++)
<br>
</blockquote>
<br>
for (i = 0; i < nattr_common + nattr_accessibility *
num_devices ; i++)
<br>
<br>
This function and for loop is not needed if you remove struct
criu_svm_metadata.
<br>
<br>
Regards,
<br>
<br>
Philip
<br>
<br>
<blockquote type="cite">+ {
<br>
+ criu_svm_md->attrs[i].type =
svm_priv->attrs[i].type;
<br>
+ criu_svm_md->attrs[i].value =
svm_priv->attrs[i].value;
<br>
+ }
<br>
+
<br>
+ list_add_tail(&criu_svm_md->list,
&svms->criu_svm_metadata_list);
<br>
+
<br>
+
<br>
+exit:
<br>
+ return ret;
<br>
+}
<br>
+
<br>
+int kfd_criu_restore_svm(struct kfd_process *p,
<br>
+ uint8_t __user *user_priv_ptr,
<br>
+ uint64_t *priv_data_offset,
<br>
+ uint64_t max_priv_data_size)
<br>
+{
<br>
+ uint64_t total_size, accessibility_size,
common_attr_size;
<br>
+ struct kfd_criu_svm_range_priv_data *svm_priv = NULL;
<br>
+ int nattr_common = 4, naatr_accessibility = 1;
<br>
+ uint32_t num_devices;
<br>
+ int ret = 0;
<br>
+
<br>
+ num_devices = p->n_pdds;
<br>
+ /* Handle one SVM range object at a time, also the number
of gpus are
<br>
+ * assumed to be same on the restore node, checking must
be done while
<br>
+ * evaluating the topology earlier */
<br>
+ common_attr_size = sizeof(struct kfd_ioctl_svm_attribute)
*
<br>
+ nattr_common;
<br>
+ accessibility_size = sizeof(struct
kfd_ioctl_svm_attribute) *
<br>
+ naatr_accessibility * num_devices;
<br>
+ total_size = sizeof(struct kfd_criu_svm_range_priv_data)
+
<br>
+ common_attr_size + accessibility_size;
<br>
+
<br>
+ svm_priv = kvzalloc(total_size, GFP_KERNEL);
<br>
+ if (!svm_priv)
<br>
+ return -ENOMEM;
<br>
+
<br>
+ if (*priv_data_offset + total_size >
max_priv_data_size) {
<br>
+ ret = -EINVAL;
<br>
+ goto exit;
<br>
+ }
<br>
+
<br>
+ ret = copy_from_user(svm_priv, user_priv_ptr +
*priv_data_offset,
<br>
+ total_size);
<br>
+ if (ret) {
<br>
+ ret = -EFAULT;
<br>
+ goto exit;
<br>
+ }
<br>
+ *priv_data_offset += total_size;
<br>
+
<br>
+ ret = svm_criu_prepare_for_resume(p, svm_priv);
<br>
+ if (ret) {
<br>
+ ret = -EFAULT;
<br>
+ pr_err("svm_criu_prepare_for_resume failed\n");
<br>
+ goto exit;
<br>
+ }
<br>
+
<br>
+
<br>
+exit:
<br>
+
<br>
+ kvfree(svm_priv);
<br>
+
<br>
+ return ret;
<br>
+}
<br>
+
<br>
int svm_range_get_info(struct kfd_process *p, uint32_t
*num_svm_ranges,
<br>
uint64_t *svm_priv_data_size)
<br>
{
<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
<br>
index b00576db5baa..e0c0853f085c 100644
<br>
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
<br>
@@ -191,6 +191,10 @@ int svm_range_get_info(struct kfd_process
*p, uint32_t *num_svm_ranges,
<br>
int kfd_criu_checkpoint_svm(struct kfd_process *p,
<br>
uint8_t __user *user_priv_data,
<br>
uint64_t *priv_offset);
<br>
+int kfd_criu_restore_svm(struct kfd_process *p,
<br>
+ uint8_t __user *user_priv_ptr,
<br>
+ uint64_t *priv_data_offset,
<br>
+ uint64_t max_priv_data_size);
<br>
struct kfd_process_device *
<br>
svm_range_get_pdd_by_adev(struct svm_range *prange, struct
amdgpu_device *adev);
<br>
void svm_range_list_lock_and_flush_work(struct
svm_range_list *svms, struct mm_struct *mm);
<br>
@@ -244,6 +248,14 @@ static inline int
kfd_criu_checkpoint_svm(struct kfd_process *p,
<br>
return 0;
<br>
}
<br>
+static inline int kfd_criu_restore_svm(struct kfd_process
*p,
<br>
+ uint8_t __user *user_priv_ptr,
<br>
+ uint64_t *priv_data_offset,
<br>
+ uint64_t max_priv_data_size)
<br>
+{
<br>
+ return -EINVAL;
<br>
+}
<br>
+
<br>
#define KFD_IS_SVM_API_SUPPORTED(dev) false
<br>
#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
<br>
</blockquote>
</blockquote>
</blockquote>
</body>
</html>