[RFC PATCH 03/17] drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs

Felix Kuehling Felix.Kuehling at amd.com
Sat May 1 01:57:38 UTC 2021


From: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>

Checkpoint-Restore in userspace (CRIU) is a powerful tool that can
snapshot a running process and later restore it on same or a remote
machine but expects the processes that have a device file (e.g. GPU)
associated with them, provide necessary driver support to assist CRIU
and its extensible plugin interface. Thus, In order to support the
Checkpoint-Restore of any ROCm process, the AMD Radeon Open Compute
Kernel driver, needs to provide a set of new APIs that provide
necessary VRAM metadata and its contents to a userspace component
(CRIU plugin) that can store it in form of image files.

This introduces some new ioctls which will be used to checkpoint-Restore
any KFD bound user process. KFD doesn't allow any arbitrary ioctl call
unless it is called by the group leader process. Since these ioctls are
expected to be called from a KFD criu plugin which has elevated ptrace
attached priviledges and CAP_SYS_ADMIN capabilities attached with the file
descriptors so modify KFD to allow such calls.

Signed-off-by: David Yat Sin <david.yatsin at amd.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
(cherry picked from commit 72f4907135aed9c037b9f442a6055b51733b518a)
(cherry picked from commit 33ff4953c5352f51d57a77ba8ae6614b7993e70d)
Change-Id: I1b25f6f65ad44b897752ac2c771a95157d0b1130
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  60 ++++++++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  28 ++++++
 include/uapi/linux/kfd_ioctl.h           | 110 ++++++++++++++++++++++-
 3 files changed, 196 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 059c3f1ca27d..1fa2ba34a429 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -33,6 +33,7 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
+#include <linux/ptrace.h>
 #include <linux/dma-buf.h>
 #include <asm/processor.h>
 #include "kfd_priv.h"
@@ -1802,6 +1803,37 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
 	return -EPERM;
 }
 #endif
+static int kfd_ioctl_criu_dumper(struct file *filep,
+				struct kfd_process *p, void *data)
+{
+	pr_info("Inside %s\n",__func__);
+
+	return 0;
+}
+
+static int kfd_ioctl_criu_restorer(struct file *filep,
+				struct kfd_process *p, void *data)
+{
+	pr_info("Inside %s\n",__func__);
+
+	return 0;
+}
+
+static int kfd_ioctl_criu_helper(struct file *filep,
+				struct kfd_process *p, void *data)
+{
+	pr_info("Inside %s\n",__func__);
+
+	return 0;
+}
+
+static int kfd_ioctl_criu_resume(struct file *filep,
+				struct kfd_process *p, void *data)
+{
+	pr_info("Inside %s\n",__func__);
+
+	return 0;
+}
 
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
@@ -1906,6 +1938,18 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
 			kfd_ioctl_set_xnack_mode, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_DUMPER,
+			 kfd_ioctl_criu_dumper, KFD_IOC_FLAG_PTRACE_ATTACHED),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_RESTORER,
+			 kfd_ioctl_criu_restorer, KFD_IOC_FLAG_ROOT_ONLY),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_HELPER,
+			 kfd_ioctl_criu_helper, KFD_IOC_FLAG_PTRACE_ATTACHED),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_RESUME,
+			 kfd_ioctl_criu_resume, KFD_IOC_FLAG_ROOT_ONLY),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
@@ -1920,6 +1964,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 	char *kdata = NULL;
 	unsigned int usize, asize;
 	int retcode = -EINVAL;
+	bool ptrace_attached = false;
 
 	if (nr >= AMDKFD_CORE_IOCTL_COUNT)
 		goto err_i1;
@@ -1945,7 +1990,15 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 	 * processes need to create their own KFD device context.
 	 */
 	process = filep->private_data;
-	if (process->lead_thread != current->group_leader) {
+
+	rcu_read_lock();
+	if ((ioctl->flags & KFD_IOC_FLAG_PTRACE_ATTACHED) &&
+	    ptrace_parent(process->lead_thread) == current)
+		ptrace_attached = true;
+	rcu_read_unlock();
+
+	if (process->lead_thread != current->group_leader
+	    && !ptrace_attached) {
 		dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
 		retcode = -EBADF;
 		goto err_i1;
@@ -1960,6 +2013,11 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 		goto err_i1;
 	}
 
+	/* KFD_IOC_FLAG_ROOT_ONLY is only for CAP_SYS_ADMIN */
+	if (unlikely((ioctl->flags & KFD_IOC_FLAG_ROOT_ONLY) &&
+		     !capable(CAP_SYS_ADMIN)))
+		return -EACCES;
+
 	if (cmd & (IOC_IN | IOC_OUT)) {
 		if (asize <= sizeof(stack_kdata)) {
 			kdata = stack_kdata;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 64552f6b8ba4..a494d61543af 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -121,7 +121,35 @@
  */
 #define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512
 
+/**
+ * enum kfd_ioctl_flags - KFD ioctl flags
+ * Various flags that can be set in &amdkfd_ioctl_desc.flags to control how
+ * userspace can use a given ioctl.
+ */
+enum kfd_ioctl_flags {
+	/**
+	 * @KFD_IOC_FLAG_ROOT_ONLY:
+	 * Certain KFD ioctls such as AMDKFD_IOC_CRIU_RESTORER can potentially
+	 * perform privileged operations and load arbitrary data into MQDs and
+	 * eventually HQD registers when the queue is mapped by HWS. In order to
+	 * prevent this we should perform additional security checks. In other
+	 * cases, certain ioctls such as AMDKFD_IOC_CRIU_RESUME might be called
+	 * by an external process e.g. CRIU restore process, for each resuming
+	 * tasks and thus require elevated privileges.
+	 *
+	 * This is equivalent to callers with the SYSADMIN capability.
+	 */
+	KFD_IOC_FLAG_ROOT_ONLY = BIT(0),
+	/**
+	 * @KFD_IOC_FLAG_PTRACE_ATTACHED:
+	 * Certain KFD ioctls such as AMDKFD_IOC_CRIU_HELPER and
+	 * AMDKFD_IOC_CRIU_DUMPER are expected to be called during a Checkpoint
+	 * operation triggered by CRIU. Since, these are expected to be called
+	 * from a PTRACE attched context, we must authenticate these.
+	 */
+	KFD_IOC_FLAG_PTRACE_ATTACHED = BIT(1),
 
+};
 /*
  * Kernel module parameter to specify maximum number of supported queues per
  * device
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 3cb5b5dd9f77..9c8a77a0ce0a 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -467,6 +467,102 @@ struct kfd_ioctl_smi_events_args {
 	__u32 anon_fd;	/* from KFD */
 };
 
+struct kfd_criu_devinfo_bucket {
+	__u32 user_gpu_id;
+	__u32 actual_gpu_id;
+	__u32 drm_fd;
+};
+
+struct kfd_criu_bo_buckets {
+	__u64 bo_addr;  /* from KFD */
+	__u64 bo_size;  /* from KFD */
+	__u64 bo_offset;/* from KFD */
+	__u64 user_addr; /* from KFD */
+	__u32 bo_alloc_flags;/* from KFD */
+	__u32 gpu_id;/* from KFD */
+	__u32 idr_handle;/* from KFD */
+};
+
+struct kfd_criu_q_bucket {
+	__u64 q_address;
+	__u64 q_size;
+	__u64 read_ptr_addr;
+	__u64 write_ptr_addr;
+	__u64 doorbell_off;
+	__u64 eop_ring_buffer_address;		/* Relevant only for VI */
+	__u64 ctx_save_restore_area_address;	/* Relevant only for VI */
+	__u64 queues_data_offset;
+	__u32 gpu_id;
+	__u32 type;
+	__u32 format;
+	__u32 q_id;
+	__u32 priority;
+	__u32 q_percent;
+	__u32 doorbell_id;
+	__u32 is_gws;				/* TODO Implement me */
+	__u32 sdma_id;			/* Relevant only for sdma queues*/
+	__u32 eop_ring_buffer_size;		/* Relevant only for VI */
+	__u32 ctx_save_restore_area_size;	/* Relevant only for VI */
+	__u32 ctl_stack_size;			/* Relevant only for VI */
+	__u32 cu_mask_size;
+	__u32 mqd_size;
+};
+
+struct kfd_criu_ev_bucket {
+	__u32 event_id;
+	__u32 auto_reset;
+	__u32 type;
+	__u32 signaled;
+
+	union {
+		struct kfd_hsa_memory_exception_data memory_exception_data;
+		struct kfd_hsa_hw_exception_data hw_exception_data;
+	};
+};
+
+struct kfd_ioctl_criu_dumper_args {
+	__u64 num_of_bos;
+	__u64 kfd_criu_bo_buckets_ptr;
+	__u64 kfd_criu_q_buckets_ptr;
+	__u64 kfd_criu_ev_buckets_ptr;
+	__u64 kfd_criu_devinfo_buckets_ptr;
+	__u64 queues_data_size;
+	__u64 queues_data_ptr;
+	__u64 event_page_offset;
+	__u32 num_of_queues;
+	__u32 num_of_devices;
+	__u32 num_of_events;
+};
+
+struct kfd_ioctl_criu_restorer_args {
+	__u64 handle;   /* from KFD */
+	__u64 num_of_bos;
+	__u64 kfd_criu_bo_buckets_ptr;
+	__u64 restored_bo_array_ptr;
+	__u64 kfd_criu_q_buckets_ptr;
+	__u64 kfd_criu_ev_buckets_ptr;
+	__u64 kfd_criu_devinfo_buckets_ptr;
+	__u64 queues_data_size;
+	__u64 queues_data_ptr;
+	__u64 event_page_offset;
+	__u32 num_of_devices;
+	__u32 num_of_queues;
+	__u32 num_of_events;
+};
+
+struct kfd_ioctl_criu_helper_args {
+	__u64 num_of_bos;	/* from KFD */
+	__u64 queues_data_size;
+	__u32 task_pid;
+	__u32 num_of_devices;
+	__u32 num_of_queues;    /* from KFD */
+	__u32 num_of_events;	/* from KFD */
+};
+
+struct kfd_ioctl_criu_resume_args {
+	__u32 pid;	/* to KFD */
+};
+
 /* Register offset inside the remapped mmio page
  */
 enum kfd_mmio_remap {
@@ -740,7 +836,19 @@ struct kfd_ioctl_set_xnack_mode_args {
 #define AMDKFD_IOC_SET_XNACK_MODE		\
 		AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args)
 
+#define AMDKFD_IOC_CRIU_DUMPER			\
+		AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_dumper_args)
+
+#define AMDKFD_IOC_CRIU_RESTORER			\
+		AMDKFD_IOWR(0x23, struct kfd_ioctl_criu_restorer_args)
+
+#define AMDKFD_IOC_CRIU_HELPER			\
+		AMDKFD_IOWR(0x24, struct kfd_ioctl_criu_helper_args)
+
+#define AMDKFD_IOC_CRIU_RESUME			\
+		AMDKFD_IOWR(0x25, struct kfd_ioctl_criu_resume_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x22
+#define AMDKFD_COMMAND_END		0x26
 
 #endif
-- 
2.17.1



More information about the amd-gfx mailing list