<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body>
    <p><br>
    </p>
    <div class="moz-cite-prefix">On 2022-01-10 6:58 p.m., Felix Kuehling
      wrote:<br>
    </div>
    <blockquote type="cite" cite="mid:94adb83b-35dd-3478-909e-9f8ea77e5657@amd.com">On
      2022-01-05 9:43 a.m., philip yang wrote:
      <br>
      <blockquote type="cite">
        <br>
        <br>
        On 2021-12-22 7:37 p.m., Rajneesh Bhardwaj wrote:
        <br>
        <blockquote type="cite">During CRIU restore phase, the VMAs for
          the virtual address ranges are
          <br>
          not at their final location yet so in this stage, only cache
          the data
          <br>
          required to successfully resume the svm ranges during an
          imminent CRIU
          <br>
          resume phase.
          <br>
          <br>
          Signed-off-by: Rajneesh
          Bhardwaj<a class="moz-txt-link-rfc2396E" href="mailto:rajneesh.bhardwaj@amd.com"><rajneesh.bhardwaj@amd.com></a>
          <br>
          ---
          <br>
            drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  4 +-
          <br>
            drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  5 ++
          <br>
            drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 99
          ++++++++++++++++++++++++
          <br>
            drivers/gpu/drm/amd/amdkfd/kfd_svm.h     | 12 +++
          <br>
            4 files changed, 118 insertions(+), 2 deletions(-)
          <br>
          <br>
          diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
          b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
          <br>
          index 916b8d000317..f7aa15b18f95 100644
          <br>
          --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
          <br>
          +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
          <br>
          @@ -2638,8 +2638,8 @@ static int criu_restore_objects(struct
          file *filep,
          <br>
                            goto exit;
          <br>
                        break;
          <br>
                    case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
          <br>
          -            /* TODO: Implement SVM range */
          <br>
          -            *priv_offset += sizeof(struct
          kfd_criu_svm_range_priv_data);
          <br>
          +            ret = kfd_criu_restore_svm(p, (uint8_t __user
          *)args->priv_data,
          <br>
          +                             priv_offset,
          max_priv_data_size);
          <br>
                        if (ret)
          <br>
                            goto exit;
          <br>
                        break;
          <br>
          diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
          b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
          <br>
          index 87eb6739a78e..92191c541c29 100644
          <br>
          --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
          <br>
          +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
          <br>
          @@ -790,6 +790,7 @@ struct svm_range_list {
          <br>
                struct list_head        list;
          <br>
                struct work_struct        deferred_list_work;
          <br>
                struct list_head        deferred_range_list;
          <br>
          +    struct list_head                criu_svm_metadata_list;
          <br>
                spinlock_t            deferred_list_lock;
          <br>
                atomic_t            evicted_ranges;
          <br>
                bool                drain_pagefaults;
          <br>
          @@ -1148,6 +1149,10 @@ int kfd_criu_restore_event(struct file
          *devkfd,
          <br>
                           uint8_t __user *user_priv_data,
          <br>
                           uint64_t *priv_data_offset,
          <br>
                           uint64_t max_priv_data_size);
          <br>
          +int kfd_criu_restore_svm(struct kfd_process *p,
          <br>
          +             uint8_t __user *user_priv_data,
          <br>
          +             uint64_t *priv_data_offset,
          <br>
          +             uint64_t max_priv_data_size);
          <br>
            /* CRIU - End */
          <br>
              /* Queue Context Management */
          <br>
          diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
          b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
          <br>
          index 6d59f1bedcf2..e9f6c63c2a26 100644
          <br>
          --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
          <br>
          +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
          <br>
          @@ -45,6 +45,14 @@
          <br>
             */
          <br>
            #define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING    2000
          <br>
            +struct criu_svm_metadata {
          <br>
          +    struct list_head list;
          <br>
          +    __u64 start_addr;
          <br>
          +    __u64 size;
          <br>
          +    /* Variable length array of attributes */
          <br>
          +    struct kfd_ioctl_svm_attribute attrs[0];
          <br>
          +};
          <br>
        </blockquote>
        This data structure is struct kfd_criu_svm_range_priv_data plus
        list_head, maybe you can add list_head to struct
        kfd_criu_svm_range_priv_data and remove this new data structure,
        then you can remove extra kzalloc, kfree for each svm object
        resume and function svm_criu_prepare_for_resume could be
        removed. </blockquote>
      <br>
      Adding list_head to the private structure is a bad idea, because
      that structure is copied to/from user mode. Kernel mode pointers
      should not be exposed to user mode, even in an opaque structure.
      That's just begging for an exploit.
      <br>
      <br>
      But you could define criu_svm_metadata as
      <br>
      <br>
      struct criu_svm_metadata {
      <br>
          struct list_head list;
      <br>
          kfd_criu_svm_range_priv_data data;
      <br>
      };
      <br>
      <br>
      Then copy_from_user directly into criu_svm_md->data in
      kfd_criu_restore_svm to avoid the double allocation.
      <br>
      <br>
    </blockquote>
    <p>This is better, don't increase struct
      kfd_criu_svm_range_priv_data size which copy_to/from_user, and
      remove extra kzalloc/kfree.</p>
    <p>Regards,</p>
    <p>Philip<br>
    </p>
    <blockquote type="cite" cite="mid:94adb83b-35dd-3478-909e-9f8ea77e5657@amd.com">Regards,
      <br>
        Felix
      <br>
      <br>
      <br>
      <blockquote type="cite">
        <blockquote type="cite">+
          <br>
            static void svm_range_evict_svm_bo_worker(struct work_struct
          *work);
          <br>
            static bool
          <br>
            svm_range_cpu_invalidate_pagetables(struct
          mmu_interval_notifier *mni,
          <br>
          @@ -2753,6 +2761,7 @@ int svm_range_list_init(struct
          kfd_process *p)
          <br>
                INIT_DELAYED_WORK(&svms->restore_work,
          svm_range_restore_work);
          <br>
                INIT_WORK(&svms->deferred_list_work,
          svm_range_deferred_list_work);
          <br>
                INIT_LIST_HEAD(&svms->deferred_range_list);
          <br>
          +    INIT_LIST_HEAD(&svms->criu_svm_metadata_list);
          <br>
                spin_lock_init(&svms->deferred_list_lock);
          <br>
                  for (i = 0; i < p->n_pdds; i++)
          <br>
          @@ -3418,6 +3427,96 @@ svm_range_get_attr(struct kfd_process
          *p, struct mm_struct *mm,
          <br>
                return 0;
          <br>
            }
          <br>
            +int svm_criu_prepare_for_resume(struct kfd_process *p,
          <br>
          +                struct kfd_criu_svm_range_priv_data
          *svm_priv)
          <br>
          +{
          <br>
          +    int nattr_common = 4, nattr_accessibility = 1;
          <br>
          +    struct criu_svm_metadata *criu_svm_md = NULL;
          <br>
          +    uint64_t svm_attrs_size, svm_object_md_size;
          <br>
          +    struct svm_range_list *svms = &p->svms;
          <br>
          +    int num_devices = p->n_pdds;
          <br>
          +    int i, ret = 0;
          <br>
          +
          <br>
          +    svm_attrs_size = sizeof(struct kfd_ioctl_svm_attribute) *
          <br>
          +        (nattr_common + nattr_accessibility * num_devices);
          <br>
          +    svm_object_md_size = sizeof(struct criu_svm_metadata) +
          svm_attrs_size;
          <br>
          +
          <br>
          +    criu_svm_md = kzalloc(svm_object_md_size, GFP_KERNEL);
          <br>
          +    if (!criu_svm_md) {
          <br>
          +        pr_err("failed to allocate memory to store svm
          metadata\n");
          <br>
          +        ret = -ENOMEM;
          <br>
          +        goto exit;
          <br>
          +    }
          <br>
          +
          <br>
          +    criu_svm_md->start_addr = svm_priv->start_addr;
          <br>
          +    criu_svm_md->size = svm_priv->size;
          <br>
          +    for (i = 0; i < svm_attrs_size; i++)
          <br>
        </blockquote>
        <br>
        for (i = 0; i < nattr_common + nattr_accessibility *
        num_devices ; i++)
        <br>
        <br>
        This function and for loop is not needed if you remove struct
        criu_svm_metadata.
        <br>
        <br>
        Regards,
        <br>
        <br>
        Philip
        <br>
        <br>
        <blockquote type="cite">+    {
          <br>
          +        criu_svm_md->attrs[i].type =
          svm_priv->attrs[i].type;
          <br>
          +        criu_svm_md->attrs[i].value =
          svm_priv->attrs[i].value;
          <br>
          +    }
          <br>
          +
          <br>
          +    list_add_tail(&criu_svm_md->list,
          &svms->criu_svm_metadata_list);
          <br>
          +
          <br>
          +
          <br>
          +exit:
          <br>
          +    return ret;
          <br>
          +}
          <br>
          +
          <br>
          +int kfd_criu_restore_svm(struct kfd_process *p,
          <br>
          +             uint8_t __user *user_priv_ptr,
          <br>
          +             uint64_t *priv_data_offset,
          <br>
          +             uint64_t max_priv_data_size)
          <br>
          +{
          <br>
          +    uint64_t total_size, accessibility_size,
          common_attr_size;
          <br>
          +    struct kfd_criu_svm_range_priv_data *svm_priv = NULL;
          <br>
          +    int nattr_common = 4, naatr_accessibility = 1;
          <br>
          +    uint32_t num_devices;
          <br>
          +    int ret = 0;
          <br>
          +
          <br>
          +    num_devices = p->n_pdds;
          <br>
          +    /* Handle one SVM range object at a time, also the number
          of gpus are
          <br>
          +     * assumed to be same on the restore node, checking must
          be done while
          <br>
          +     * evaluating the topology earlier */
          <br>
          +    common_attr_size = sizeof(struct kfd_ioctl_svm_attribute)
          *
          <br>
          +        nattr_common;
          <br>
          +    accessibility_size = sizeof(struct
          kfd_ioctl_svm_attribute) *
          <br>
          +        naatr_accessibility * num_devices;
          <br>
          +    total_size = sizeof(struct kfd_criu_svm_range_priv_data)
          +
          <br>
          +        common_attr_size + accessibility_size;
          <br>
          +
          <br>
          +    svm_priv = kvzalloc(total_size, GFP_KERNEL);
          <br>
          +    if (!svm_priv)
          <br>
          +        return -ENOMEM;
          <br>
          +
          <br>
          +    if (*priv_data_offset + total_size >
          max_priv_data_size) {
          <br>
          +        ret = -EINVAL;
          <br>
          +        goto exit;
          <br>
          +    }
          <br>
          +
          <br>
          +    ret = copy_from_user(svm_priv, user_priv_ptr +
          *priv_data_offset,
          <br>
          +                 total_size);
          <br>
          +    if (ret) {
          <br>
          +        ret = -EFAULT;
          <br>
          +        goto exit;
          <br>
          +    }
          <br>
          +    *priv_data_offset += total_size;
          <br>
          +
          <br>
          +    ret = svm_criu_prepare_for_resume(p, svm_priv);
          <br>
          +    if (ret) {
          <br>
          +        ret = -EFAULT;
          <br>
          +        pr_err("svm_criu_prepare_for_resume failed\n");
          <br>
          +        goto exit;
          <br>
          +    }
          <br>
          +
          <br>
          +
          <br>
          +exit:
          <br>
          +
          <br>
          +    kvfree(svm_priv);
          <br>
          +
          <br>
          +    return ret;
          <br>
          +}
          <br>
          +
          <br>
            int svm_range_get_info(struct kfd_process *p, uint32_t
          *num_svm_ranges,
          <br>
                           uint64_t *svm_priv_data_size)
          <br>
            {
          <br>
          diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
          b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
          <br>
          index b00576db5baa..e0c0853f085c 100644
          <br>
          --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
          <br>
          +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
          <br>
          @@ -191,6 +191,10 @@ int svm_range_get_info(struct kfd_process
          *p, uint32_t *num_svm_ranges,
          <br>
            int kfd_criu_checkpoint_svm(struct kfd_process *p,
          <br>
                            uint8_t __user *user_priv_data,
          <br>
                            uint64_t *priv_offset);
          <br>
          +int kfd_criu_restore_svm(struct kfd_process *p,
          <br>
          +             uint8_t __user *user_priv_ptr,
          <br>
          +             uint64_t *priv_data_offset,
          <br>
          +             uint64_t max_priv_data_size);
          <br>
            struct kfd_process_device *
          <br>
            svm_range_get_pdd_by_adev(struct svm_range *prange, struct
          amdgpu_device *adev);
          <br>
            void svm_range_list_lock_and_flush_work(struct
          svm_range_list *svms, struct mm_struct *mm);
          <br>
          @@ -244,6 +248,14 @@ static inline int
          kfd_criu_checkpoint_svm(struct kfd_process *p,
          <br>
                return 0;
          <br>
            }
          <br>
            +static inline int kfd_criu_restore_svm(struct kfd_process
          *p,
          <br>
          +                       uint8_t __user *user_priv_ptr,
          <br>
          +                       uint64_t *priv_data_offset,
          <br>
          +                       uint64_t max_priv_data_size)
          <br>
          +{
          <br>
          +    return -EINVAL;
          <br>
          +}
          <br>
          +
          <br>
            #define KFD_IS_SVM_API_SUPPORTED(dev) false
          <br>
              #endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
          <br>
        </blockquote>
      </blockquote>
    </blockquote>
  </body>
</html>