[PATCH v2] drm/amdkfd: Refactor kfd CRIU into its own file
David Francis
David.Francis at amd.com
Tue May 7 13:31:00 UTC 2024
The kfd CRIU code takes up about a thousand lines
in the kfd_chardev file; move it to its own file.
No functional change intended.
Signed-off-by: David Francis <David.Francis at amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling at amd.com>
---
drivers/gpu/drm/amd/amdkfd/Makefile | 1 +
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 974 +---------------------
drivers/gpu/drm/amd/amdkfd/kfd_criu.c | 989 +++++++++++++++++++++++
drivers/gpu/drm/amd/amdkfd/kfd_criu.h | 50 ++
4 files changed, 1046 insertions(+), 968 deletions(-)
create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_criu.c
create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_criu.h
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 0d3d8972240d..e06af4073ac5 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -32,6 +32,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_flat_memory.o \
$(AMDKFD_PATH)/kfd_process.o \
$(AMDKFD_PATH)/kfd_queue.o \
+ $(AMDKFD_PATH)/kfd_criu.o \
$(AMDKFD_PATH)/kfd_mqd_manager.o \
$(AMDKFD_PATH)/kfd_mqd_manager_cik.o \
$(AMDKFD_PATH)/kfd_mqd_manager_vi.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6b713fb0b818..d9587364130a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -36,15 +36,14 @@
#include <linux/mman.h>
#include <linux/ptrace.h>
#include <linux/dma-buf.h>
-#include <linux/fdtable.h>
#include <linux/processor.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_svm.h"
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"
-#include "amdgpu_dma_buf.h"
#include "kfd_debug.h"
+#include "kfd_criu.h"
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
@@ -1751,967 +1750,6 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
}
#endif
-static int criu_checkpoint_process(struct kfd_process *p,
- uint8_t __user *user_priv_data,
- uint64_t *priv_offset)
-{
- struct kfd_criu_process_priv_data process_priv;
- int ret;
-
- memset(&process_priv, 0, sizeof(process_priv));
-
- process_priv.version = KFD_CRIU_PRIV_VERSION;
- /* For CR, we don't consider negative xnack mode which is used for
- * querying without changing it, here 0 simply means disabled and 1
- * means enabled so retry for finding a valid PTE.
- */
- process_priv.xnack_mode = p->xnack_enabled ? 1 : 0;
-
- ret = copy_to_user(user_priv_data + *priv_offset,
- &process_priv, sizeof(process_priv));
-
- if (ret) {
- pr_err("Failed to copy process information to user\n");
- ret = -EFAULT;
- }
-
- *priv_offset += sizeof(process_priv);
- return ret;
-}
-
-static int criu_checkpoint_devices(struct kfd_process *p,
- uint32_t num_devices,
- uint8_t __user *user_addr,
- uint8_t __user *user_priv_data,
- uint64_t *priv_offset)
-{
- struct kfd_criu_device_priv_data *device_priv = NULL;
- struct kfd_criu_device_bucket *device_buckets = NULL;
- int ret = 0, i;
-
- device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), GFP_KERNEL);
- if (!device_buckets) {
- ret = -ENOMEM;
- goto exit;
- }
-
- device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL);
- if (!device_priv) {
- ret = -ENOMEM;
- goto exit;
- }
-
- for (i = 0; i < num_devices; i++) {
- struct kfd_process_device *pdd = p->pdds[i];
-
- device_buckets[i].user_gpu_id = pdd->user_gpu_id;
- device_buckets[i].actual_gpu_id = pdd->dev->id;
-
- /*
- * priv_data does not contain useful information for now and is reserved for
- * future use, so we do not set its contents.
- */
- }
-
- ret = copy_to_user(user_addr, device_buckets, num_devices * sizeof(*device_buckets));
- if (ret) {
- pr_err("Failed to copy device information to user\n");
- ret = -EFAULT;
- goto exit;
- }
-
- ret = copy_to_user(user_priv_data + *priv_offset,
- device_priv,
- num_devices * sizeof(*device_priv));
- if (ret) {
- pr_err("Failed to copy device information to user\n");
- ret = -EFAULT;
- }
- *priv_offset += num_devices * sizeof(*device_priv);
-
-exit:
- kvfree(device_buckets);
- kvfree(device_priv);
- return ret;
-}
-
-static uint32_t get_process_num_bos(struct kfd_process *p)
-{
- uint32_t num_of_bos = 0;
- int i;
-
- /* Run over all PDDs of the process */
- for (i = 0; i < p->n_pdds; i++) {
- struct kfd_process_device *pdd = p->pdds[i];
- void *mem;
- int id;
-
- idr_for_each_entry(&pdd->alloc_idr, mem, id) {
- struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
-
- if (!kgd_mem->va || kgd_mem->va > pdd->gpuvm_base)
- num_of_bos++;
- }
- }
- return num_of_bos;
-}
-
-static int criu_get_prime_handle(struct kgd_mem *mem,
- int flags, u32 *shared_fd)
-{
- struct dma_buf *dmabuf;
- int ret;
-
- ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);
- if (ret) {
- pr_err("dmabuf export failed for the BO\n");
- return ret;
- }
-
- ret = dma_buf_fd(dmabuf, flags);
- if (ret < 0) {
- pr_err("dmabuf create fd failed, ret:%d\n", ret);
- goto out_free_dmabuf;
- }
-
- *shared_fd = ret;
- return 0;
-
-out_free_dmabuf:
- dma_buf_put(dmabuf);
- return ret;
-}
-
-static int criu_checkpoint_bos(struct kfd_process *p,
- uint32_t num_bos,
- uint8_t __user *user_bos,
- uint8_t __user *user_priv_data,
- uint64_t *priv_offset)
-{
- struct kfd_criu_bo_bucket *bo_buckets;
- struct kfd_criu_bo_priv_data *bo_privs;
- int ret = 0, pdd_index, bo_index = 0, id;
- void *mem;
-
- bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);
- if (!bo_buckets)
- return -ENOMEM;
-
- bo_privs = kvzalloc(num_bos * sizeof(*bo_privs), GFP_KERNEL);
- if (!bo_privs) {
- ret = -ENOMEM;
- goto exit;
- }
-
- for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
- struct kfd_process_device *pdd = p->pdds[pdd_index];
- struct amdgpu_bo *dumper_bo;
- struct kgd_mem *kgd_mem;
-
- idr_for_each_entry(&pdd->alloc_idr, mem, id) {
- struct kfd_criu_bo_bucket *bo_bucket;
- struct kfd_criu_bo_priv_data *bo_priv;
- int i, dev_idx = 0;
-
- if (!mem) {
- ret = -ENOMEM;
- goto exit;
- }
-
- kgd_mem = (struct kgd_mem *)mem;
- dumper_bo = kgd_mem->bo;
-
- /* Skip checkpointing BOs that are used for Trap handler
- * code and state. Currently, these BOs have a VA that
- * is less GPUVM Base
- */
- if (kgd_mem->va && kgd_mem->va <= pdd->gpuvm_base)
- continue;
-
- bo_bucket = &bo_buckets[bo_index];
- bo_priv = &bo_privs[bo_index];
-
- bo_bucket->gpu_id = pdd->user_gpu_id;
- bo_bucket->addr = (uint64_t)kgd_mem->va;
- bo_bucket->size = amdgpu_bo_size(dumper_bo);
- bo_bucket->alloc_flags = (uint32_t)kgd_mem->alloc_flags;
- bo_priv->idr_handle = id;
-
- if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
- ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo,
- &bo_priv->user_addr);
- if (ret) {
- pr_err("Failed to obtain user address for user-pointer bo\n");
- goto exit;
- }
- }
- if (bo_bucket->alloc_flags
- & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
- ret = criu_get_prime_handle(kgd_mem,
- bo_bucket->alloc_flags &
- KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
- &bo_bucket->dmabuf_fd);
- if (ret)
- goto exit;
- } else {
- bo_bucket->dmabuf_fd = KFD_INVALID_FD;
- }
-
- if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
- bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |
- KFD_MMAP_GPU_ID(pdd->dev->id);
- else if (bo_bucket->alloc_flags &
- KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
- bo_bucket->offset = KFD_MMAP_TYPE_MMIO |
- KFD_MMAP_GPU_ID(pdd->dev->id);
- else
- bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo);
-
- for (i = 0; i < p->n_pdds; i++) {
- if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->dev->adev, kgd_mem))
- bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id;
- }
-
- pr_debug("bo_size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n"
- "gpu_id = 0x%x alloc_flags = 0x%x idr_handle = 0x%x",
- bo_bucket->size,
- bo_bucket->addr,
- bo_bucket->offset,
- bo_bucket->gpu_id,
- bo_bucket->alloc_flags,
- bo_priv->idr_handle);
- bo_index++;
- }
- }
-
- ret = copy_to_user(user_bos, bo_buckets, num_bos * sizeof(*bo_buckets));
- if (ret) {
- pr_err("Failed to copy BO information to user\n");
- ret = -EFAULT;
- goto exit;
- }
-
- ret = copy_to_user(user_priv_data + *priv_offset, bo_privs, num_bos * sizeof(*bo_privs));
- if (ret) {
- pr_err("Failed to copy BO priv information to user\n");
- ret = -EFAULT;
- goto exit;
- }
-
- *priv_offset += num_bos * sizeof(*bo_privs);
-
-exit:
- while (ret && bo_index--) {
- if (bo_buckets[bo_index].alloc_flags
- & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
- close_fd(bo_buckets[bo_index].dmabuf_fd);
- }
-
- kvfree(bo_buckets);
- kvfree(bo_privs);
- return ret;
-}
-
-static int criu_get_process_object_info(struct kfd_process *p,
- uint32_t *num_devices,
- uint32_t *num_bos,
- uint32_t *num_objects,
- uint64_t *objs_priv_size)
-{
- uint64_t queues_priv_data_size, svm_priv_data_size, priv_size;
- uint32_t num_queues, num_events, num_svm_ranges;
- int ret;
-
- *num_devices = p->n_pdds;
- *num_bos = get_process_num_bos(p);
-
- ret = kfd_process_get_queue_info(p, &num_queues, &queues_priv_data_size);
- if (ret)
- return ret;
-
- num_events = kfd_get_num_events(p);
-
- ret = svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
- if (ret)
- return ret;
-
- *num_objects = num_queues + num_events + num_svm_ranges;
-
- if (objs_priv_size) {
- priv_size = sizeof(struct kfd_criu_process_priv_data);
- priv_size += *num_devices * sizeof(struct kfd_criu_device_priv_data);
- priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);
- priv_size += queues_priv_data_size;
- priv_size += num_events * sizeof(struct kfd_criu_event_priv_data);
- priv_size += svm_priv_data_size;
- *objs_priv_size = priv_size;
- }
- return 0;
-}
-
-static int criu_checkpoint(struct file *filep,
- struct kfd_process *p,
- struct kfd_ioctl_criu_args *args)
-{
- int ret;
- uint32_t num_devices, num_bos, num_objects;
- uint64_t priv_size, priv_offset = 0, bo_priv_offset;
-
- if (!args->devices || !args->bos || !args->priv_data)
- return -EINVAL;
-
- mutex_lock(&p->mutex);
-
- if (!p->n_pdds) {
- pr_err("No pdd for given process\n");
- ret = -ENODEV;
- goto exit_unlock;
- }
-
- /* Confirm all process queues are evicted */
- if (!p->queues_paused) {
- pr_err("Cannot dump process when queues are not in evicted state\n");
- /* CRIU plugin did not call op PROCESS_INFO before checkpointing */
- ret = -EINVAL;
- goto exit_unlock;
- }
-
- ret = criu_get_process_object_info(p, &num_devices, &num_bos, &num_objects, &priv_size);
- if (ret)
- goto exit_unlock;
-
- if (num_devices != args->num_devices ||
- num_bos != args->num_bos ||
- num_objects != args->num_objects ||
- priv_size != args->priv_data_size) {
-
- ret = -EINVAL;
- goto exit_unlock;
- }
-
- /* each function will store private data inside priv_data and adjust priv_offset */
- ret = criu_checkpoint_process(p, (uint8_t __user *)args->priv_data, &priv_offset);
- if (ret)
- goto exit_unlock;
-
- ret = criu_checkpoint_devices(p, num_devices, (uint8_t __user *)args->devices,
- (uint8_t __user *)args->priv_data, &priv_offset);
- if (ret)
- goto exit_unlock;
-
- /* Leave room for BOs in the private data. They need to be restored
- * before events, but we checkpoint them last to simplify the error
- * handling.
- */
- bo_priv_offset = priv_offset;
- priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data);
-
- if (num_objects) {
- ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,
- &priv_offset);
- if (ret)
- goto exit_unlock;
-
- ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
- &priv_offset);
- if (ret)
- goto exit_unlock;
-
- ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);
- if (ret)
- goto exit_unlock;
- }
-
- /* This must be the last thing in this function that can fail.
- * Otherwise we leak dmabuf file descriptors.
- */
- ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
- (uint8_t __user *)args->priv_data, &bo_priv_offset);
-
-exit_unlock:
- mutex_unlock(&p->mutex);
- if (ret)
- pr_err("Failed to dump CRIU ret:%d\n", ret);
- else
- pr_debug("CRIU dump ret:%d\n", ret);
-
- return ret;
-}
-
-static int criu_restore_process(struct kfd_process *p,
- struct kfd_ioctl_criu_args *args,
- uint64_t *priv_offset,
- uint64_t max_priv_data_size)
-{
- int ret = 0;
- struct kfd_criu_process_priv_data process_priv;
-
- if (*priv_offset + sizeof(process_priv) > max_priv_data_size)
- return -EINVAL;
-
- ret = copy_from_user(&process_priv,
- (void __user *)(args->priv_data + *priv_offset),
- sizeof(process_priv));
- if (ret) {
- pr_err("Failed to copy process private information from user\n");
- ret = -EFAULT;
- goto exit;
- }
- *priv_offset += sizeof(process_priv);
-
- if (process_priv.version != KFD_CRIU_PRIV_VERSION) {
- pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n",
- process_priv.version, KFD_CRIU_PRIV_VERSION);
- return -EINVAL;
- }
-
- pr_debug("Setting XNACK mode\n");
- if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) {
- pr_err("xnack mode cannot be set\n");
- ret = -EPERM;
- goto exit;
- } else {
- pr_debug("set xnack mode: %d\n", process_priv.xnack_mode);
- p->xnack_enabled = process_priv.xnack_mode;
- }
-
-exit:
- return ret;
-}
-
-static int criu_restore_devices(struct kfd_process *p,
- struct kfd_ioctl_criu_args *args,
- uint64_t *priv_offset,
- uint64_t max_priv_data_size)
-{
- struct kfd_criu_device_bucket *device_buckets;
- struct kfd_criu_device_priv_data *device_privs;
- int ret = 0;
- uint32_t i;
-
- if (args->num_devices != p->n_pdds)
- return -EINVAL;
-
- if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size)
- return -EINVAL;
-
- device_buckets = kmalloc_array(args->num_devices, sizeof(*device_buckets), GFP_KERNEL);
- if (!device_buckets)
- return -ENOMEM;
-
- ret = copy_from_user(device_buckets, (void __user *)args->devices,
- args->num_devices * sizeof(*device_buckets));
- if (ret) {
- pr_err("Failed to copy devices buckets from user\n");
- ret = -EFAULT;
- goto exit;
- }
-
- for (i = 0; i < args->num_devices; i++) {
- struct kfd_node *dev;
- struct kfd_process_device *pdd;
- struct file *drm_file;
-
- /* device private data is not currently used */
-
- if (!device_buckets[i].user_gpu_id) {
- pr_err("Invalid user gpu_id\n");
- ret = -EINVAL;
- goto exit;
- }
-
- dev = kfd_device_by_id(device_buckets[i].actual_gpu_id);
- if (!dev) {
- pr_err("Failed to find device with gpu_id = %x\n",
- device_buckets[i].actual_gpu_id);
- ret = -EINVAL;
- goto exit;
- }
-
- pdd = kfd_get_process_device_data(dev, p);
- if (!pdd) {
- pr_err("Failed to get pdd for gpu_id = %x\n",
- device_buckets[i].actual_gpu_id);
- ret = -EINVAL;
- goto exit;
- }
- pdd->user_gpu_id = device_buckets[i].user_gpu_id;
-
- drm_file = fget(device_buckets[i].drm_fd);
- if (!drm_file) {
- pr_err("Invalid render node file descriptor sent from plugin (%d)\n",
- device_buckets[i].drm_fd);
- ret = -EINVAL;
- goto exit;
- }
-
- if (pdd->drm_file) {
- ret = -EINVAL;
- goto exit;
- }
-
- /* create the vm using render nodes for kfd pdd */
- if (kfd_process_device_init_vm(pdd, drm_file)) {
- pr_err("could not init vm for given pdd\n");
- /* On success, the PDD keeps the drm_file reference */
- fput(drm_file);
- ret = -EINVAL;
- goto exit;
- }
- /*
- * pdd now already has the vm bound to render node so below api won't create a new
- * exclusive kfd mapping but use existing one with renderDXXX but is still needed
- * for iommu v2 binding and runtime pm.
- */
- pdd = kfd_bind_process_to_device(dev, p);
- if (IS_ERR(pdd)) {
- ret = PTR_ERR(pdd);
- goto exit;
- }
-
- if (!pdd->qpd.proc_doorbells) {
- ret = kfd_alloc_process_doorbells(dev->kfd, pdd);
- if (ret)
- goto exit;
- }
- }
-
- /*
- * We are not copying device private data from user as we are not using the data for now,
- * but we still adjust for its private data.
- */
- *priv_offset += args->num_devices * sizeof(*device_privs);
-
-exit:
- kfree(device_buckets);
- return ret;
-}
-
-static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
- struct kfd_criu_bo_bucket *bo_bucket,
- struct kfd_criu_bo_priv_data *bo_priv,
- struct kgd_mem **kgd_mem)
-{
- int idr_handle;
- int ret;
- const bool criu_resume = true;
- u64 offset;
-
- if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
- if (bo_bucket->size !=
- kfd_doorbell_process_slice(pdd->dev->kfd))
- return -EINVAL;
-
- offset = kfd_get_process_doorbells(pdd);
- if (!offset)
- return -ENOMEM;
- } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
- /* MMIO BOs need remapped bus address */
- if (bo_bucket->size != PAGE_SIZE) {
- pr_err("Invalid page size\n");
- return -EINVAL;
- }
- offset = pdd->dev->adev->rmmio_remap.bus_addr;
- if (!offset) {
- pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n");
- return -ENOMEM;
- }
- } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
- offset = bo_priv->user_addr;
- }
- /* Create the BO */
- ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr,
- bo_bucket->size, pdd->drm_priv, kgd_mem,
- &offset, bo_bucket->alloc_flags, criu_resume);
- if (ret) {
- pr_err("Could not create the BO\n");
- return ret;
- }
- pr_debug("New BO created: size:0x%llx addr:0x%llx offset:0x%llx\n",
- bo_bucket->size, bo_bucket->addr, offset);
-
- /* Restore previous IDR handle */
- pr_debug("Restoring old IDR handle for the BO");
- idr_handle = idr_alloc(&pdd->alloc_idr, *kgd_mem, bo_priv->idr_handle,
- bo_priv->idr_handle + 1, GFP_KERNEL);
-
- if (idr_handle < 0) {
- pr_err("Could not allocate idr\n");
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, *kgd_mem, pdd->drm_priv,
- NULL);
- return -ENOMEM;
- }
-
- if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
- bo_bucket->restored_offset = KFD_MMAP_TYPE_DOORBELL | KFD_MMAP_GPU_ID(pdd->dev->id);
- if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
- bo_bucket->restored_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(pdd->dev->id);
- } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
- bo_bucket->restored_offset = offset;
- } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
- bo_bucket->restored_offset = offset;
- /* Update the VRAM usage count */
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
- }
- return 0;
-}
-
-static int criu_restore_bo(struct kfd_process *p,
- struct kfd_criu_bo_bucket *bo_bucket,
- struct kfd_criu_bo_priv_data *bo_priv)
-{
- struct kfd_process_device *pdd;
- struct kgd_mem *kgd_mem;
- int ret;
- int j;
-
- pr_debug("Restoring BO size:0x%llx addr:0x%llx gpu_id:0x%x flags:0x%x idr_handle:0x%x\n",
- bo_bucket->size, bo_bucket->addr, bo_bucket->gpu_id, bo_bucket->alloc_flags,
- bo_priv->idr_handle);
-
- pdd = kfd_process_device_data_by_id(p, bo_bucket->gpu_id);
- if (!pdd) {
- pr_err("Failed to get pdd\n");
- return -ENODEV;
- }
-
- ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem);
- if (ret)
- return ret;
-
- /* now map these BOs to GPU/s */
- for (j = 0; j < p->n_pdds; j++) {
- struct kfd_node *peer;
- struct kfd_process_device *peer_pdd;
-
- if (!bo_priv->mapped_gpuids[j])
- break;
-
- peer_pdd = kfd_process_device_data_by_id(p, bo_priv->mapped_gpuids[j]);
- if (!peer_pdd)
- return -EINVAL;
-
- peer = peer_pdd->dev;
-
- peer_pdd = kfd_bind_process_to_device(peer, p);
- if (IS_ERR(peer_pdd))
- return PTR_ERR(peer_pdd);
-
- ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem,
- peer_pdd->drm_priv);
- if (ret) {
- pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds);
- return ret;
- }
- }
-
- pr_debug("map memory was successful for the BO\n");
- /* create the dmabuf object and export the bo */
- if (bo_bucket->alloc_flags
- & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
- ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,
- &bo_bucket->dmabuf_fd);
- if (ret)
- return ret;
- } else {
- bo_bucket->dmabuf_fd = KFD_INVALID_FD;
- }
-
- return 0;
-}
-
-static int criu_restore_bos(struct kfd_process *p,
- struct kfd_ioctl_criu_args *args,
- uint64_t *priv_offset,
- uint64_t max_priv_data_size)
-{
- struct kfd_criu_bo_bucket *bo_buckets = NULL;
- struct kfd_criu_bo_priv_data *bo_privs = NULL;
- int ret = 0;
- uint32_t i = 0;
-
- if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
- return -EINVAL;
-
- /* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */
- amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);
-
- bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL);
- if (!bo_buckets)
- return -ENOMEM;
-
- ret = copy_from_user(bo_buckets, (void __user *)args->bos,
- args->num_bos * sizeof(*bo_buckets));
- if (ret) {
- pr_err("Failed to copy BOs information from user\n");
- ret = -EFAULT;
- goto exit;
- }
-
- bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);
- if (!bo_privs) {
- ret = -ENOMEM;
- goto exit;
- }
-
- ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,
- args->num_bos * sizeof(*bo_privs));
- if (ret) {
- pr_err("Failed to copy BOs information from user\n");
- ret = -EFAULT;
- goto exit;
- }
- *priv_offset += args->num_bos * sizeof(*bo_privs);
-
- /* Create and map new BOs */
- for (; i < args->num_bos; i++) {
- ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]);
- if (ret) {
- pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
- goto exit;
- }
- } /* done */
-
- /* Copy only the buckets back so user can read bo_buckets[N].restored_offset */
- ret = copy_to_user((void __user *)args->bos,
- bo_buckets,
- (args->num_bos * sizeof(*bo_buckets)));
- if (ret)
- ret = -EFAULT;
-
-exit:
- while (ret && i--) {
- if (bo_buckets[i].alloc_flags
- & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
- close_fd(bo_buckets[i].dmabuf_fd);
- }
- kvfree(bo_buckets);
- kvfree(bo_privs);
- return ret;
-}
-
-static int criu_restore_objects(struct file *filep,
- struct kfd_process *p,
- struct kfd_ioctl_criu_args *args,
- uint64_t *priv_offset,
- uint64_t max_priv_data_size)
-{
- int ret = 0;
- uint32_t i;
-
- BUILD_BUG_ON(offsetof(struct kfd_criu_queue_priv_data, object_type));
- BUILD_BUG_ON(offsetof(struct kfd_criu_event_priv_data, object_type));
- BUILD_BUG_ON(offsetof(struct kfd_criu_svm_range_priv_data, object_type));
-
- for (i = 0; i < args->num_objects; i++) {
- uint32_t object_type;
-
- if (*priv_offset + sizeof(object_type) > max_priv_data_size) {
- pr_err("Invalid private data size\n");
- return -EINVAL;
- }
-
- ret = get_user(object_type, (uint32_t __user *)(args->priv_data + *priv_offset));
- if (ret) {
- pr_err("Failed to copy private information from user\n");
- goto exit;
- }
-
- switch (object_type) {
- case KFD_CRIU_OBJECT_TYPE_QUEUE:
- ret = kfd_criu_restore_queue(p, (uint8_t __user *)args->priv_data,
- priv_offset, max_priv_data_size);
- if (ret)
- goto exit;
- break;
- case KFD_CRIU_OBJECT_TYPE_EVENT:
- ret = kfd_criu_restore_event(filep, p, (uint8_t __user *)args->priv_data,
- priv_offset, max_priv_data_size);
- if (ret)
- goto exit;
- break;
- case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
- ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data,
- priv_offset, max_priv_data_size);
- if (ret)
- goto exit;
- break;
- default:
- pr_err("Invalid object type:%u at index:%d\n", object_type, i);
- ret = -EINVAL;
- goto exit;
- }
- }
-exit:
- return ret;
-}
-
-static int criu_restore(struct file *filep,
- struct kfd_process *p,
- struct kfd_ioctl_criu_args *args)
-{
- uint64_t priv_offset = 0;
- int ret = 0;
-
- pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",
- args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);
-
- if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size ||
- !args->num_devices || !args->num_bos)
- return -EINVAL;
-
- mutex_lock(&p->mutex);
-
- /*
- * Set the process to evicted state to avoid running any new queues before all the memory
- * mappings are ready.
- */
- ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE);
- if (ret)
- goto exit_unlock;
-
- /* Each function will adjust priv_offset based on how many bytes they consumed */
- ret = criu_restore_process(p, args, &priv_offset, args->priv_data_size);
- if (ret)
- goto exit_unlock;
-
- ret = criu_restore_devices(p, args, &priv_offset, args->priv_data_size);
- if (ret)
- goto exit_unlock;
-
- ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size);
- if (ret)
- goto exit_unlock;
-
- ret = criu_restore_objects(filep, p, args, &priv_offset, args->priv_data_size);
- if (ret)
- goto exit_unlock;
-
- if (priv_offset != args->priv_data_size) {
- pr_err("Invalid private data size\n");
- ret = -EINVAL;
- }
-
-exit_unlock:
- mutex_unlock(&p->mutex);
- if (ret)
- pr_err("Failed to restore CRIU ret:%d\n", ret);
- else
- pr_debug("CRIU restore successful\n");
-
- return ret;
-}
-
-static int criu_unpause(struct file *filep,
- struct kfd_process *p,
- struct kfd_ioctl_criu_args *args)
-{
- int ret;
-
- mutex_lock(&p->mutex);
-
- if (!p->queues_paused) {
- mutex_unlock(&p->mutex);
- return -EINVAL;
- }
-
- ret = kfd_process_restore_queues(p);
- if (ret)
- pr_err("Failed to unpause queues ret:%d\n", ret);
- else
- p->queues_paused = false;
-
- mutex_unlock(&p->mutex);
-
- return ret;
-}
-
-static int criu_resume(struct file *filep,
- struct kfd_process *p,
- struct kfd_ioctl_criu_args *args)
-{
- struct kfd_process *target = NULL;
- struct pid *pid = NULL;
- int ret = 0;
-
- pr_debug("Inside %s, target pid for criu restore: %d\n", __func__,
- args->pid);
-
- pid = find_get_pid(args->pid);
- if (!pid) {
- pr_err("Cannot find pid info for %i\n", args->pid);
- return -ESRCH;
- }
-
- pr_debug("calling kfd_lookup_process_by_pid\n");
- target = kfd_lookup_process_by_pid(pid);
-
- put_pid(pid);
-
- if (!target) {
- pr_debug("Cannot find process info for %i\n", args->pid);
- return -ESRCH;
- }
-
- mutex_lock(&target->mutex);
- ret = kfd_criu_resume_svm(target);
- if (ret) {
- pr_err("kfd_criu_resume_svm failed for %i\n", args->pid);
- goto exit;
- }
-
- ret = amdgpu_amdkfd_criu_resume(target->kgd_process_info);
- if (ret)
- pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid);
-
-exit:
- mutex_unlock(&target->mutex);
-
- kfd_unref_process(target);
- return ret;
-}
-
-static int criu_process_info(struct file *filep,
- struct kfd_process *p,
- struct kfd_ioctl_criu_args *args)
-{
- int ret = 0;
-
- mutex_lock(&p->mutex);
-
- if (!p->n_pdds) {
- pr_err("No pdd for given process\n");
- ret = -ENODEV;
- goto err_unlock;
- }
-
- ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT);
- if (ret)
- goto err_unlock;
-
- p->queues_paused = true;
-
- args->pid = task_pid_nr_ns(p->lead_thread,
- task_active_pid_ns(p->lead_thread));
-
- ret = criu_get_process_object_info(p, &args->num_devices, &args->num_bos,
- &args->num_objects, &args->priv_data_size);
- if (ret)
- goto err_unlock;
-
- dev_dbg(kfd_device, "Num of devices:%u bos:%u objects:%u priv_data_size:%lld\n",
- args->num_devices, args->num_bos, args->num_objects,
- args->priv_data_size);
-
-err_unlock:
- if (ret) {
- kfd_process_restore_queues(p);
- p->queues_paused = false;
- }
- mutex_unlock(&p->mutex);
- return ret;
-}
-
static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)
{
struct kfd_ioctl_criu_args *args = data;
@@ -2720,19 +1758,19 @@ static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)
dev_dbg(kfd_device, "CRIU operation: %d\n", args->op);
switch (args->op) {
case KFD_CRIU_OP_PROCESS_INFO:
- ret = criu_process_info(filep, p, args);
+ ret = kfd_criu_process_info(filep, p, args);
break;
case KFD_CRIU_OP_CHECKPOINT:
- ret = criu_checkpoint(filep, p, args);
+ ret = kfd_criu_checkpoint(filep, p, args);
break;
case KFD_CRIU_OP_UNPAUSE:
- ret = criu_unpause(filep, p, args);
+ ret = kfd_criu_unpause(filep, p, args);
break;
case KFD_CRIU_OP_RESTORE:
- ret = criu_restore(filep, p, args);
+ ret = kfd_criu_restore(filep, p, args);
break;
case KFD_CRIU_OP_RESUME:
- ret = criu_resume(filep, p, args);
+ ret = kfd_criu_resume(filep, p, args);
break;
default:
dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_criu.c b/drivers/gpu/drm/amd/amdkfd/kfd_criu.c
new file mode 100644
index 000000000000..72a9b358a642
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_criu.c
@@ -0,0 +1,989 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/fdtable.h>
+
+#include "kfd_criu.h"
+#include "kfd_svm.h"
+
+static int criu_checkpoint_process(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset)
+{
+ struct kfd_criu_process_priv_data process_priv;
+ int ret;
+
+ memset(&process_priv, 0, sizeof(process_priv));
+
+ process_priv.version = KFD_CRIU_PRIV_VERSION;
+ /* For CR, we don't consider negative xnack mode which is used for
+ * querying without changing it, here 0 simply means disabled and 1
+ * means enabled so retry for finding a valid PTE.
+ */
+ process_priv.xnack_mode = p->xnack_enabled ? 1 : 0;
+
+ ret = copy_to_user(user_priv_data + *priv_offset,
+ &process_priv, sizeof(process_priv));
+
+ if (ret) {
+ pr_err("Failed to copy process information to user\n");
+ ret = -EFAULT;
+ }
+
+ *priv_offset += sizeof(process_priv);
+ return ret;
+}
+
+static int criu_checkpoint_devices(struct kfd_process *p,
+ uint32_t num_devices,
+ uint8_t __user *user_addr,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset)
+{
+ struct kfd_criu_device_priv_data *device_priv = NULL;
+ struct kfd_criu_device_bucket *device_buckets = NULL;
+ int ret = 0, i;
+
+ device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), GFP_KERNEL);
+ if (!device_buckets) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL);
+ if (!device_priv) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ for (i = 0; i < num_devices; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ device_buckets[i].user_gpu_id = pdd->user_gpu_id;
+ device_buckets[i].actual_gpu_id = pdd->dev->id;
+
+ /*
+ * priv_data does not contain useful information for now and is reserved for
+ * future use, so we do not set its contents.
+ */
+ }
+
+ ret = copy_to_user(user_addr, device_buckets, num_devices * sizeof(*device_buckets));
+ if (ret) {
+ pr_err("Failed to copy device information to user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ ret = copy_to_user(user_priv_data + *priv_offset,
+ device_priv,
+ num_devices * sizeof(*device_priv));
+ if (ret) {
+ pr_err("Failed to copy device information to user\n");
+ ret = -EFAULT;
+ }
+ *priv_offset += num_devices * sizeof(*device_priv);
+
+exit:
+ kvfree(device_buckets);
+ kvfree(device_priv);
+ return ret;
+}
+
+static uint32_t get_process_num_bos(struct kfd_process *p)
+{
+ uint32_t num_of_bos = 0;
+ int i;
+
+ /* Run over all PDDs of the process */
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+ void *mem;
+ int id;
+
+ idr_for_each_entry(&pdd->alloc_idr, mem, id) {
+ struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
+
+ if (!kgd_mem->va || kgd_mem->va > pdd->gpuvm_base)
+ num_of_bos++;
+ }
+ }
+ return num_of_bos;
+}
+
+static int criu_get_prime_handle(struct kgd_mem *mem,
+ int flags, u32 *shared_fd)
+{
+ struct dma_buf *dmabuf;
+ int ret;
+
+ ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);
+ if (ret) {
+ pr_err("dmabuf export failed for the BO\n");
+ return ret;
+ }
+
+ ret = dma_buf_fd(dmabuf, flags);
+ if (ret < 0) {
+ pr_err("dmabuf create fd failed, ret:%d\n", ret);
+ goto out_free_dmabuf;
+ }
+
+ *shared_fd = ret;
+ return 0;
+
+out_free_dmabuf:
+ dma_buf_put(dmabuf);
+ return ret;
+}
+
+static int criu_checkpoint_bos(struct kfd_process *p,
+ uint32_t num_bos,
+ uint8_t __user *user_bos,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset)
+{
+ struct kfd_criu_bo_bucket *bo_buckets;
+ struct kfd_criu_bo_priv_data *bo_privs;
+ int ret = 0, pdd_index, bo_index = 0, id;
+ void *mem;
+
+ bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);
+ if (!bo_buckets)
+ return -ENOMEM;
+
+ bo_privs = kvzalloc(num_bos * sizeof(*bo_privs), GFP_KERNEL);
+ if (!bo_privs) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
+ struct kfd_process_device *pdd = p->pdds[pdd_index];
+ struct amdgpu_bo *dumper_bo;
+ struct kgd_mem *kgd_mem;
+
+ idr_for_each_entry(&pdd->alloc_idr, mem, id) {
+ struct kfd_criu_bo_bucket *bo_bucket;
+ struct kfd_criu_bo_priv_data *bo_priv;
+ int i, dev_idx = 0;
+
+ if (!mem) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ kgd_mem = (struct kgd_mem *)mem;
+ dumper_bo = kgd_mem->bo;
+
+ /* Skip checkpointing BOs that are used for Trap handler
+ * code and state. Currently, these BOs have a VA that
+ * is less GPUVM Base
+ */
+ if (kgd_mem->va && kgd_mem->va <= pdd->gpuvm_base)
+ continue;
+
+ bo_bucket = &bo_buckets[bo_index];
+ bo_priv = &bo_privs[bo_index];
+
+ bo_bucket->gpu_id = pdd->user_gpu_id;
+ bo_bucket->addr = (uint64_t)kgd_mem->va;
+ bo_bucket->size = amdgpu_bo_size(dumper_bo);
+ bo_bucket->alloc_flags = (uint32_t)kgd_mem->alloc_flags;
+ bo_priv->idr_handle = id;
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo,
+ &bo_priv->user_addr);
+ if (ret) {
+ pr_err("Failed to obtain user address for user-pointer bo\n");
+ goto exit;
+ }
+ }
+ if (bo_bucket->alloc_flags
+ & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
+ ret = criu_get_prime_handle(kgd_mem,
+ bo_bucket->alloc_flags &
+ KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
+ &bo_bucket->dmabuf_fd);
+ if (ret)
+ goto exit;
+ } else {
+ bo_bucket->dmabuf_fd = KFD_INVALID_FD;
+ }
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
+ bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |
+ KFD_MMAP_GPU_ID(pdd->dev->id);
+ else if (bo_bucket->alloc_flags &
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
+ bo_bucket->offset = KFD_MMAP_TYPE_MMIO |
+ KFD_MMAP_GPU_ID(pdd->dev->id);
+ else
+ bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo);
+
+ for (i = 0; i < p->n_pdds; i++) {
+ if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->dev->adev, kgd_mem))
+ bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id;
+ }
+
+ pr_debug("bo_size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n"
+ "gpu_id = 0x%x alloc_flags = 0x%x idr_handle = 0x%x",
+ bo_bucket->size,
+ bo_bucket->addr,
+ bo_bucket->offset,
+ bo_bucket->gpu_id,
+ bo_bucket->alloc_flags,
+ bo_priv->idr_handle);
+ bo_index++;
+ }
+ }
+
+ ret = copy_to_user(user_bos, bo_buckets, num_bos * sizeof(*bo_buckets));
+ if (ret) {
+ pr_err("Failed to copy BO information to user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ ret = copy_to_user(user_priv_data + *priv_offset, bo_privs, num_bos * sizeof(*bo_privs));
+ if (ret) {
+ pr_err("Failed to copy BO priv information to user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ *priv_offset += num_bos * sizeof(*bo_privs);
+
+exit:
+ while (ret && bo_index--) {
+ if (bo_buckets[bo_index].alloc_flags
+ & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
+ close_fd(bo_buckets[bo_index].dmabuf_fd);
+ }
+
+ kvfree(bo_buckets);
+ kvfree(bo_privs);
+ return ret;
+}
+
+static int criu_get_process_object_info(struct kfd_process *p,
+ uint32_t *num_devices,
+ uint32_t *num_bos,
+ uint32_t *num_objects,
+ uint64_t *objs_priv_size)
+{
+ uint64_t queues_priv_data_size, svm_priv_data_size, priv_size;
+ uint32_t num_queues, num_events, num_svm_ranges;
+ int ret;
+
+ *num_devices = p->n_pdds;
+ *num_bos = get_process_num_bos(p);
+
+ ret = kfd_process_get_queue_info(p, &num_queues, &queues_priv_data_size);
+ if (ret)
+ return ret;
+
+ num_events = kfd_get_num_events(p);
+
+ ret = svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
+ if (ret)
+ return ret;
+
+ *num_objects = num_queues + num_events + num_svm_ranges;
+
+ if (objs_priv_size) {
+ priv_size = sizeof(struct kfd_criu_process_priv_data);
+ priv_size += *num_devices * sizeof(struct kfd_criu_device_priv_data);
+ priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);
+ priv_size += queues_priv_data_size;
+ priv_size += num_events * sizeof(struct kfd_criu_event_priv_data);
+ priv_size += svm_priv_data_size;
+ *objs_priv_size = priv_size;
+ }
+ return 0;
+}
+
+int kfd_criu_checkpoint(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ int ret;
+ uint32_t num_devices, num_bos, num_objects;
+ uint64_t priv_size, priv_offset = 0, bo_priv_offset;
+
+ if (!args->devices || !args->bos || !args->priv_data)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ if (!p->n_pdds) {
+ pr_err("No pdd for given process\n");
+ ret = -ENODEV;
+ goto exit_unlock;
+ }
+
+ /* Confirm all process queues are evicted */
+ if (!p->queues_paused) {
+ pr_err("Cannot dump process when queues are not in evicted state\n");
+ /* CRIU plugin did not call op PROCESS_INFO before checkpointing */
+ ret = -EINVAL;
+ goto exit_unlock;
+ }
+
+ ret = criu_get_process_object_info(p, &num_devices, &num_bos, &num_objects, &priv_size);
+ if (ret)
+ goto exit_unlock;
+
+ if (num_devices != args->num_devices ||
+ num_bos != args->num_bos ||
+ num_objects != args->num_objects ||
+ priv_size != args->priv_data_size) {
+
+ ret = -EINVAL;
+ goto exit_unlock;
+ }
+
+ /* each function will store private data inside priv_data and adjust priv_offset */
+ ret = criu_checkpoint_process(p, (uint8_t __user *)args->priv_data, &priv_offset);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_checkpoint_devices(p, num_devices, (uint8_t __user *)args->devices,
+ (uint8_t __user *)args->priv_data, &priv_offset);
+ if (ret)
+ goto exit_unlock;
+
+ /* Leave room for BOs in the private data. They need to be restored
+ * before events, but we checkpoint them last to simplify the error
+ * handling.
+ */
+ bo_priv_offset = priv_offset;
+ priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data);
+
+ if (num_objects) {
+ ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,
+ &priv_offset);
+ if (ret)
+ goto exit_unlock;
+
+ ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
+ &priv_offset);
+ if (ret)
+ goto exit_unlock;
+
+ ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);
+ if (ret)
+ goto exit_unlock;
+ }
+
+ /* This must be the last thing in this function that can fail.
+ * Otherwise we leak dmabuf file descriptors.
+ */
+ ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
+ (uint8_t __user *)args->priv_data, &bo_priv_offset);
+
+exit_unlock:
+ mutex_unlock(&p->mutex);
+ if (ret)
+ pr_err("Failed to dump CRIU ret:%d\n", ret);
+ else
+ pr_debug("CRIU dump ret:%d\n", ret);
+
+ return ret;
+}
+
+static int criu_restore_process(struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ int ret = 0;
+ struct kfd_criu_process_priv_data process_priv;
+
+ if (*priv_offset + sizeof(process_priv) > max_priv_data_size)
+ return -EINVAL;
+
+ ret = copy_from_user(&process_priv,
+ (void __user *)(args->priv_data + *priv_offset),
+ sizeof(process_priv));
+ if (ret) {
+ pr_err("Failed to copy process private information from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_offset += sizeof(process_priv);
+
+ if (process_priv.version != KFD_CRIU_PRIV_VERSION) {
+ pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n",
+ process_priv.version, KFD_CRIU_PRIV_VERSION);
+ return -EINVAL;
+ }
+
+ pr_debug("Setting XNACK mode\n");
+ if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) {
+ pr_err("xnack mode cannot be set\n");
+ ret = -EPERM;
+ goto exit;
+ } else {
+ pr_debug("set xnack mode: %d\n", process_priv.xnack_mode);
+ p->xnack_enabled = process_priv.xnack_mode;
+ }
+
+exit:
+ return ret;
+}
+
+static int criu_restore_devices(struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ struct kfd_criu_device_bucket *device_buckets;
+ struct kfd_criu_device_priv_data *device_privs;
+ int ret = 0;
+ uint32_t i;
+
+ if (args->num_devices != p->n_pdds)
+ return -EINVAL;
+
+ if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size)
+ return -EINVAL;
+
+ device_buckets = kmalloc_array(args->num_devices, sizeof(*device_buckets), GFP_KERNEL);
+ if (!device_buckets)
+ return -ENOMEM;
+
+ ret = copy_from_user(device_buckets, (void __user *)args->devices,
+ args->num_devices * sizeof(*device_buckets));
+ if (ret) {
+ pr_err("Failed to copy devices buckets from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ for (i = 0; i < args->num_devices; i++) {
+ struct kfd_node *dev;
+ struct kfd_process_device *pdd;
+ struct file *drm_file;
+
+ /* device private data is not currently used */
+
+ if (!device_buckets[i].user_gpu_id) {
+ pr_err("Invalid user gpu_id\n");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ dev = kfd_device_by_id(device_buckets[i].actual_gpu_id);
+ if (!dev) {
+ pr_err("Failed to find device with gpu_id = %x\n",
+ device_buckets[i].actual_gpu_id);
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd) {
+ pr_err("Failed to get pdd for gpu_id = %x\n",
+ device_buckets[i].actual_gpu_id);
+ ret = -EINVAL;
+ goto exit;
+ }
+ pdd->user_gpu_id = device_buckets[i].user_gpu_id;
+
+ drm_file = fget(device_buckets[i].drm_fd);
+ if (!drm_file) {
+ pr_err("Invalid render node file descriptor sent from plugin (%d)\n",
+ device_buckets[i].drm_fd);
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (pdd->drm_file) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ /* create the vm using render nodes for kfd pdd */
+ if (kfd_process_device_init_vm(pdd, drm_file)) {
+ pr_err("could not init vm for given pdd\n");
+ /* On success, the PDD keeps the drm_file reference */
+ fput(drm_file);
+ ret = -EINVAL;
+ goto exit;
+ }
+ /*
+ * pdd now already has the vm bound to render node so below api won't create a new
+ * exclusive kfd mapping but use existing one with renderDXXX but is still needed
+ * for iommu v2 binding and runtime pm.
+ */
+ pdd = kfd_bind_process_to_device(dev, p);
+ if (IS_ERR(pdd)) {
+ ret = PTR_ERR(pdd);
+ goto exit;
+ }
+
+ if (!pdd->qpd.proc_doorbells) {
+ ret = kfd_alloc_process_doorbells(dev->kfd, pdd);
+ if (ret)
+ goto exit;
+ }
+ }
+
+ /*
+ * We are not copying device private data from user as we are not using the data for now,
+ * but we still adjust for its private data.
+ */
+ *priv_offset += args->num_devices * sizeof(*device_privs);
+
+exit:
+ kfree(device_buckets);
+ return ret;
+}
+
+static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
+ struct kfd_criu_bo_bucket *bo_bucket,
+ struct kfd_criu_bo_priv_data *bo_priv,
+ struct kgd_mem **kgd_mem)
+{
+ int idr_handle;
+ int ret;
+ const bool criu_resume = true;
+ u64 offset;
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
+ if (bo_bucket->size !=
+ kfd_doorbell_process_slice(pdd->dev->kfd))
+ return -EINVAL;
+
+ offset = kfd_get_process_doorbells(pdd);
+ if (!offset)
+ return -ENOMEM;
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+ /* MMIO BOs need remapped bus address */
+ if (bo_bucket->size != PAGE_SIZE) {
+ pr_err("Invalid page size\n");
+ return -EINVAL;
+ }
+ offset = pdd->dev->adev->rmmio_remap.bus_addr;
+ if (!offset) {
+ pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n");
+ return -ENOMEM;
+ }
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ offset = bo_priv->user_addr;
+ }
+ /* Create the BO */
+ ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr,
+ bo_bucket->size, pdd->drm_priv, kgd_mem,
+ &offset, bo_bucket->alloc_flags, criu_resume);
+ if (ret) {
+ pr_err("Could not create the BO\n");
+ return ret;
+ }
+ pr_debug("New BO created: size:0x%llx addr:0x%llx offset:0x%llx\n",
+ bo_bucket->size, bo_bucket->addr, offset);
+
+ /* Restore previous IDR handle */
+ pr_debug("Restoring old IDR handle for the BO");
+ idr_handle = idr_alloc(&pdd->alloc_idr, *kgd_mem, bo_priv->idr_handle,
+ bo_priv->idr_handle + 1, GFP_KERNEL);
+
+ if (idr_handle < 0) {
+ pr_err("Could not allocate idr\n");
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, *kgd_mem, pdd->drm_priv,
+ NULL);
+ return -ENOMEM;
+ }
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
+ bo_bucket->restored_offset = KFD_MMAP_TYPE_DOORBELL | KFD_MMAP_GPU_ID(pdd->dev->id);
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+ bo_bucket->restored_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(pdd->dev->id);
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+ bo_bucket->restored_offset = offset;
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ bo_bucket->restored_offset = offset;
+ /* Update the VRAM usage count */
+ WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
+ }
+ return 0;
+}
+
+static int criu_restore_bo(struct kfd_process *p,
+ struct kfd_criu_bo_bucket *bo_bucket,
+ struct kfd_criu_bo_priv_data *bo_priv)
+{
+ struct kfd_process_device *pdd;
+ struct kgd_mem *kgd_mem;
+ int ret;
+ int j;
+
+ pr_debug("Restoring BO size:0x%llx addr:0x%llx gpu_id:0x%x flags:0x%x idr_handle:0x%x\n",
+ bo_bucket->size, bo_bucket->addr, bo_bucket->gpu_id, bo_bucket->alloc_flags,
+ bo_priv->idr_handle);
+
+ pdd = kfd_process_device_data_by_id(p, bo_bucket->gpu_id);
+ if (!pdd) {
+ pr_err("Failed to get pdd\n");
+ return -ENODEV;
+ }
+
+ ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem);
+ if (ret)
+ return ret;
+
+ /* now map these BOs to GPU/s */
+ for (j = 0; j < p->n_pdds; j++) {
+ struct kfd_node *peer;
+ struct kfd_process_device *peer_pdd;
+
+ if (!bo_priv->mapped_gpuids[j])
+ break;
+
+ peer_pdd = kfd_process_device_data_by_id(p, bo_priv->mapped_gpuids[j]);
+ if (!peer_pdd)
+ return -EINVAL;
+
+ peer = peer_pdd->dev;
+
+ peer_pdd = kfd_bind_process_to_device(peer, p);
+ if (IS_ERR(peer_pdd))
+ return PTR_ERR(peer_pdd);
+
+ ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem,
+ peer_pdd->drm_priv);
+ if (ret) {
+ pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds);
+ return ret;
+ }
+ }
+
+ pr_debug("map memory was successful for the BO\n");
+ /* create the dmabuf object and export the bo */
+ if (bo_bucket->alloc_flags
+ & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
+ ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,
+ &bo_bucket->dmabuf_fd);
+ if (ret)
+ return ret;
+ } else {
+ bo_bucket->dmabuf_fd = KFD_INVALID_FD;
+ }
+
+ return 0;
+}
+
+static int criu_restore_bos(struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ struct kfd_criu_bo_bucket *bo_buckets = NULL;
+ struct kfd_criu_bo_priv_data *bo_privs = NULL;
+ int ret = 0;
+ uint32_t i = 0;
+
+ if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
+ return -EINVAL;
+
+ /* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */
+ amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);
+
+ bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL);
+ if (!bo_buckets)
+ return -ENOMEM;
+
+ ret = copy_from_user(bo_buckets, (void __user *)args->bos,
+ args->num_bos * sizeof(*bo_buckets));
+ if (ret) {
+ pr_err("Failed to copy BOs information from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);
+ if (!bo_privs) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,
+ args->num_bos * sizeof(*bo_privs));
+ if (ret) {
+ pr_err("Failed to copy BOs information from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_offset += args->num_bos * sizeof(*bo_privs);
+
+ /* Create and map new BOs */
+ for (; i < args->num_bos; i++) {
+ ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]);
+ if (ret) {
+ pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
+ goto exit;
+ }
+ } /* done */
+
+ /* Copy only the buckets back so user can read bo_buckets[N].restored_offset */
+ ret = copy_to_user((void __user *)args->bos,
+ bo_buckets,
+ (args->num_bos * sizeof(*bo_buckets)));
+ if (ret)
+ ret = -EFAULT;
+
+exit:
+ while (ret && i--) {
+ if (bo_buckets[i].alloc_flags
+ & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
+ close_fd(bo_buckets[i].dmabuf_fd);
+ }
+ kvfree(bo_buckets);
+ kvfree(bo_privs);
+ return ret;
+}
+
+static int criu_restore_objects(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ int ret = 0;
+ uint32_t i;
+
+ BUILD_BUG_ON(offsetof(struct kfd_criu_queue_priv_data, object_type));
+ BUILD_BUG_ON(offsetof(struct kfd_criu_event_priv_data, object_type));
+ BUILD_BUG_ON(offsetof(struct kfd_criu_svm_range_priv_data, object_type));
+
+ for (i = 0; i < args->num_objects; i++) {
+ uint32_t object_type;
+
+ if (*priv_offset + sizeof(object_type) > max_priv_data_size) {
+ pr_err("Invalid private data size\n");
+ return -EINVAL;
+ }
+
+ ret = get_user(object_type, (uint32_t __user *)(args->priv_data + *priv_offset));
+ if (ret) {
+ pr_err("Failed to copy private information from user\n");
+ goto exit;
+ }
+
+ switch (object_type) {
+ case KFD_CRIU_OBJECT_TYPE_QUEUE:
+ ret = kfd_criu_restore_queue(p, (uint8_t __user *)args->priv_data,
+ priv_offset, max_priv_data_size);
+ if (ret)
+ goto exit;
+ break;
+ case KFD_CRIU_OBJECT_TYPE_EVENT:
+ ret = kfd_criu_restore_event(filep, p, (uint8_t __user *)args->priv_data,
+ priv_offset, max_priv_data_size);
+ if (ret)
+ goto exit;
+ break;
+ case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
+ ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data,
+ priv_offset, max_priv_data_size);
+ if (ret)
+ goto exit;
+ break;
+ default:
+ pr_err("Invalid object type:%u at index:%d\n", object_type, i);
+ ret = -EINVAL;
+ goto exit;
+ }
+ }
+exit:
+ return ret;
+}
+
+int kfd_criu_restore(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ uint64_t priv_offset = 0;
+ int ret = 0;
+
+ pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",
+ args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);
+
+ if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size ||
+ !args->num_devices || !args->num_bos)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ /*
+ * Set the process to evicted state to avoid running any new queues before all the memory
+ * mappings are ready.
+ */
+ ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE);
+ if (ret)
+ goto exit_unlock;
+
+ /* Each function will adjust priv_offset based on how many bytes they consumed */
+ ret = criu_restore_process(p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_restore_devices(p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_restore_objects(filep, p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ if (priv_offset != args->priv_data_size) {
+ pr_err("Invalid private data size\n");
+ ret = -EINVAL;
+ }
+
+exit_unlock:
+ mutex_unlock(&p->mutex);
+ if (ret)
+ pr_err("Failed to restore CRIU ret:%d\n", ret);
+ else
+ pr_debug("CRIU restore successful\n");
+
+ return ret;
+}
+
+int kfd_criu_unpause(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ int ret;
+
+ mutex_lock(&p->mutex);
+
+ if (!p->queues_paused) {
+ mutex_unlock(&p->mutex);
+ return -EINVAL;
+ }
+
+ ret = kfd_process_restore_queues(p);
+ if (ret)
+ pr_err("Failed to unpause queues ret:%d\n", ret);
+ else
+ p->queues_paused = false;
+
+ mutex_unlock(&p->mutex);
+
+ return ret;
+}
+
+int kfd_criu_resume(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ struct kfd_process *target = NULL;
+ struct pid *pid = NULL;
+ int ret = 0;
+
+ pr_debug("Inside %s, target pid for criu restore: %d\n", __func__,
+ args->pid);
+
+ pid = find_get_pid(args->pid);
+ if (!pid) {
+ pr_err("Cannot find pid info for %i\n", args->pid);
+ return -ESRCH;
+ }
+
+ pr_debug("calling kfd_lookup_process_by_pid\n");
+ target = kfd_lookup_process_by_pid(pid);
+
+ put_pid(pid);
+
+ if (!target) {
+ pr_debug("Cannot find process info for %i\n", args->pid);
+ return -ESRCH;
+ }
+
+ mutex_lock(&target->mutex);
+ ret = kfd_criu_resume_svm(target);
+ if (ret) {
+ pr_err("kfd_criu_resume_svm failed for %i\n", args->pid);
+ goto exit;
+ }
+
+ ret = amdgpu_amdkfd_criu_resume(target->kgd_process_info);
+ if (ret)
+ pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid);
+
+exit:
+ mutex_unlock(&target->mutex);
+
+ kfd_unref_process(target);
+ return ret;
+}
+
+int kfd_criu_process_info(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ int ret = 0;
+
+ mutex_lock(&p->mutex);
+
+ if (!p->n_pdds) {
+ pr_err("No pdd for given process\n");
+ ret = -ENODEV;
+ goto err_unlock;
+ }
+
+ ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT);
+ if (ret)
+ goto err_unlock;
+
+ p->queues_paused = true;
+
+ args->pid = task_pid_nr_ns(p->lead_thread,
+ task_active_pid_ns(p->lead_thread));
+
+ ret = criu_get_process_object_info(p, &args->num_devices, &args->num_bos,
+ &args->num_objects, &args->priv_data_size);
+ if (ret)
+ goto err_unlock;
+
+ dev_dbg(kfd_device, "Num of devices:%u bos:%u objects:%u priv_data_size:%lld\n",
+ args->num_devices, args->num_bos, args->num_objects,
+ args->priv_data_size);
+
+err_unlock:
+ if (ret) {
+ kfd_process_restore_queues(p);
+ p->queues_paused = false;
+ }
+ mutex_unlock(&p->mutex);
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_criu.h b/drivers/gpu/drm/amd/amdkfd/kfd_criu.h
new file mode 100644
index 000000000000..1a3d418a9505
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_criu.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __KFD_CRIU_H__
+#define __KFD_CRIU_H__
+
+#include <uapi/linux/kfd_ioctl.h>
+#include "kfd_priv.h"
+
+int kfd_criu_process_info(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args);
+
+int kfd_criu_checkpoint(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args);
+
+int kfd_criu_unpause(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args);
+
+int kfd_criu_restore(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args);
+
+int kfd_criu_resume(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args);
+
+#endif
--
2.34.1
More information about the amd-gfx
mailing list