[PATCH 03/18] drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs

Felix Kuehling felix.kuehling at amd.com
Mon Aug 23 18:57:36 UTC 2021


Am 2021-08-19 um 9:36 a.m. schrieb David Yat Sin:
> From: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
>
> Checkpoint-Restore in userspace (CRIU) is a powerful tool that can
> snapshot a running process and later restore it on same or a remote
> machine but expects the processes that have a device file (e.g. GPU)
> associated with them, provide necessary driver support to assist CRIU
> and its extensible plugin interface. Thus, In order to support the
> Checkpoint-Restore of any ROCm process, the AMD Radeon Open Compute
> Kernel driver, needs to provide a set of new APIs that provide
> necessary VRAM metadata and its contents to a userspace component
> (CRIU plugin) that can store it in form of image files.
>
> This introduces some new ioctls which will be used to checkpoint-Restore
> any KFD bound user process. KFD doesn't allow any arbitrary ioctl call
> unless it is called by the group leader process. Since these ioctls are
> expected to be called from a KFD criu plugin which has elevated ptrace
> attached privileges and CAP_SYS_ADMIN capabilities attached with the file
> descriptors so modify KFD to allow such calls.
>
> (API redesign suggested by Felix Kuehling and implemented by David Yat
> Sin)
>
> Signed-off-by: David Yat Sin <david.yatsin at amd.com>
> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
> (cherry picked from commit 72f4907135aed9c037b9f442a6055b51733b518a)
> (cherry picked from commit 33ff4953c5352f51d57a77ba8ae6614b7993e70d)
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  70 ++++++++++++++-
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  69 ++++++++++++++
>  include/uapi/linux/kfd_ioctl.h           | 110 ++++++++++++++++++++++-
>  3 files changed, 247 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 059c3f1ca27d..a1b60d29aae1 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -33,6 +33,7 @@
>  #include <linux/time.h>
>  #include <linux/mm.h>
>  #include <linux/mman.h>
> +#include <linux/ptrace.h>
>  #include <linux/dma-buf.h>
>  #include <asm/processor.h>
>  #include "kfd_priv.h"
> @@ -1802,6 +1803,44 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
>  	return -EPERM;
>  }
>  #endif
> +static int kfd_ioctl_criu_dumper(struct file *filep,
> +				struct kfd_process *p, void *data)
> +{
> +	pr_debug("Inside %s\n", __func__);
> +
> +	return 0;
> +}
> +
> +static int kfd_ioctl_criu_restorer(struct file *filep,
> +				struct kfd_process *p, void *data)
> +{
> +	pr_debug("Inside %s\n", __func__);
> +
> +	return 0;
> +}
> +
> +static int kfd_ioctl_criu_pause(struct file *filep, struct kfd_process *p, void *data)
> +{
> +	pr_debug("Inside %s\n", __func__);
> +
> +	return 0;
> +}
> +
> +static int kfd_ioctl_criu_resume(struct file *filep,
> +				struct kfd_process *p, void *data)
> +{
> +	pr_debug("Inside %s\n", __func__);
> +
> +	return 0;
> +}
> +
> +static int kfd_ioctl_criu_process_info(struct file *filep,
> +				struct kfd_process *p, void *data)
> +{
> +	pr_debug("Inside %s\n", __func__);
> +
> +	return 0;
> +}
>  
>  #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
>  	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
> @@ -1906,6 +1945,21 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
>  
>  	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
>  			kfd_ioctl_set_xnack_mode, 0),
> +
> +	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_DUMPER,
> +			 kfd_ioctl_criu_dumper, KFD_IOC_FLAG_PTRACE_ATTACHED),
> +
> +	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_RESTORER,
> +			 kfd_ioctl_criu_restorer, KFD_IOC_FLAG_ROOT_ONLY),
> +
> +	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_PROCESS_INFO,
> +			 kfd_ioctl_criu_process_info, KFD_IOC_FLAG_PTRACE_ATTACHED),
> +
> +	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_RESUME,
> +			 kfd_ioctl_criu_resume, KFD_IOC_FLAG_ROOT_ONLY),
> +
> +	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_PAUSE,
> +			 kfd_ioctl_criu_pause, KFD_IOC_FLAG_PTRACE_ATTACHED),
>  };
>  
>  #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
> @@ -1920,6 +1974,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
>  	char *kdata = NULL;
>  	unsigned int usize, asize;
>  	int retcode = -EINVAL;
> +	bool ptrace_attached = false;
>  
>  	if (nr >= AMDKFD_CORE_IOCTL_COUNT)
>  		goto err_i1;
> @@ -1945,7 +2000,15 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
>  	 * processes need to create their own KFD device context.
>  	 */
>  	process = filep->private_data;
> -	if (process->lead_thread != current->group_leader) {
> +
> +	rcu_read_lock();
> +	if ((ioctl->flags & KFD_IOC_FLAG_PTRACE_ATTACHED) &&
> +	    ptrace_parent(process->lead_thread) == current)
> +		ptrace_attached = true;
> +	rcu_read_unlock();
> +
> +	if (process->lead_thread != current->group_leader
> +	    && !ptrace_attached) {
>  		dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
>  		retcode = -EBADF;
>  		goto err_i1;
> @@ -1960,6 +2023,11 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
>  		goto err_i1;
>  	}
>  
> +	/* KFD_IOC_FLAG_ROOT_ONLY is only for CAP_SYS_ADMIN */
> +	if (unlikely((ioctl->flags & KFD_IOC_FLAG_ROOT_ONLY) &&
> +		     !capable(CAP_SYS_ADMIN)))
> +		return -EACCES;
> +
>  	if (cmd & (IOC_IN | IOC_OUT)) {
>  		if (asize <= sizeof(stack_kdata)) {
>  			kdata = stack_kdata;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 64552f6b8ba4..768cc3fe95d2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -121,7 +121,35 @@
>   */
>  #define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512
>  
> +/**
> + * enum kfd_ioctl_flags - KFD ioctl flags
> + * Various flags that can be set in &amdkfd_ioctl_desc.flags to control how
> + * userspace can use a given ioctl.
> + */
> +enum kfd_ioctl_flags {
> +	/**
> +	 * @KFD_IOC_FLAG_ROOT_ONLY:
> +	 * Certain KFD ioctls such as AMDKFD_IOC_CRIU_RESTORER can potentially
> +	 * perform privileged operations and load arbitrary data into MQDs and
> +	 * eventually HQD registers when the queue is mapped by HWS. In order to
> +	 * prevent this we should perform additional security checks. In other
> +	 * cases, certain ioctls such as AMDKFD_IOC_CRIU_RESUME might be called
> +	 * by an external process e.g. CRIU restore process, for each resuming
> +	 * tasks and thus require elevated privileges.
> +	 *
> +	 * This is equivalent to callers with the SYSADMIN capability.
> +	 */
> +	KFD_IOC_FLAG_ROOT_ONLY = BIT(0),
> +	/**
> +	 * @KFD_IOC_FLAG_PTRACE_ATTACHED:
> +	 * Certain KFD ioctls such as AMDKFD_IOC_CRIU_HELPER and
> +	 * AMDKFD_IOC_CRIU_DUMPER are expected to be called during a Checkpoint
> +	 * operation triggered by CRIU. Since, these are expected to be called
> +	 * from a PTRACE attached context, we must authenticate these.
> +	 */
> +	KFD_IOC_FLAG_PTRACE_ATTACHED = BIT(1),
>  
> +};
>  /*
>   * Kernel module parameter to specify maximum number of supported queues per
>   * device
> @@ -977,6 +1005,47 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
>  				  uint64_t tba_addr,
>  				  uint64_t tma_addr);
>  
> +/* CRIU */
> +/*
> + * Need to increment KFD_CRIU_PRIV_VERSION each time a change is made to any of the CRIU private
> + * structures:
> + * kfd_criu_process_priv_data
> + * kfd_criu_device_priv_data
> + * kfd_criu_bo_priv_data
> + * kfd_criu_queue_priv_data
> + * kfd_criu_event_priv_data
> + * kfd_criu_svm_range_priv_data
> + */
> +
> +#define KFD_CRIU_PRIV_VERSION 1
> +
> +struct kfd_criu_process_priv_data {
> +	uint32_t version;
> +};
> +
> +struct kfd_criu_device_priv_data {
> +	/* For future use */
> +	uint64_t reserved;
> +};
> +
> +struct kfd_criu_bo_priv_data {
> +	uint64_t reserved;
> +};
> +
> +struct kfd_criu_svm_range_priv_data {
> +	uint64_t reserved;
> +};
> +
> +struct kfd_criu_queue_priv_data {
> +	uint64_t reserved;
> +};
> +
> +struct kfd_criu_event_priv_data {
> +	uint64_t reserved;
> +};
> +
> +/* CRIU - End */
> +
>  /* Queue Context Management */
>  int init_queue(struct queue **q, const struct queue_properties *properties);
>  void uninit_queue(struct queue *q);
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index 3cb5b5dd9f77..19489e2ca58e 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -467,6 +467,99 @@ struct kfd_ioctl_smi_events_args {
>  	__u32 anon_fd;	/* from KFD */
>  };
>  
> +struct kfd_criu_process_bucket {
> +	__u64 priv_data_offset;
> +	__u64 priv_data_size;
> +};
> +
> +struct kfd_criu_device_bucket {
> +	__u64 priv_data_offset;
> +	__u64 priv_data_size;
> +	__u32 user_gpu_id;
> +	__u32 actual_gpu_id;
> +	__u32 drm_fd;
> +	__u32 pad;
> +};
> +
> +struct kfd_criu_bo_bucket {
> +	__u64 priv_data_offset;
> +	__u64 priv_data_size;
> +	__u64 addr;
> +	__u64 size;
> +	__u64 offset;
> +	__u64 restored_offset;
> +	__u32 gpu_id;
> +	__u32 alloc_flags;
> +	__u32 dmabuf_fd;
> +	__u32 pad;
> +};
> +
> +struct kfd_criu_queue_bucket {
> +	__u64 priv_data_offset;
> +	__u64 priv_data_size;
> +	__u32 gpu_id;
> +	__u32 pad;
> +};
> +
> +struct kfd_criu_event_bucket {
> +	__u64 priv_data_offset;
> +	__u64 priv_data_size;
> +	__u32 gpu_id;
> +	__u32 pad;
> +};
> +
> +struct kfd_ioctl_criu_process_info_args {
> +	__u64 process_priv_data_size;
> +	__u64 bos_priv_data_size;
> +	__u64 devices_priv_data_size;
> +	__u64 queues_priv_data_size;
> +	__u64 events_priv_data_size;
> +	__u64 svm_ranges_priv_data_size;
> +	__u64 total_bos;
> +	__u64 total_svm_ranges;
> +	__u32 total_devices;
> +	__u32 total_queues;
> +	__u32 total_events;
> +	__u32 task_pid;
> +};
> +
> +struct kfd_ioctl_criu_pause_args {
> +	__u32 pause;
> +	__u32 pad;
> +};
> +
> +enum kfd_criu_object_type {
> +	KFD_CRIU_OBJECT_TYPE_PROCESS	= 0,
> +	KFD_CRIU_OBJECT_TYPE_DEVICE	= 1,
> +	KFD_CRIU_OBJECT_TYPE_BO		= 2,
> +	KFD_CRIU_OBJECT_TYPE_QUEUE	= 3,
> +	KFD_CRIU_OBJECT_TYPE_EVENT	= 4,
> +	KFD_CRIU_OBJECT_TYPE_SVM_RANGE	= 5,
> +};
> +

Please add comments explaining the members of the ioctl args structures.
E.g. it's not obvious that objects is a user mode pointer, or the
semantics of the objects_index_start field.

Regards,
  Felix


> +struct kfd_ioctl_criu_dumper_args {
> +	__u64 num_objects;
> +	__u64 objects;
> +	__u64 objects_size;
> +	__u64 objects_index_start;
> +	__u32 type; /* enum kfd_criu_object_type */
> +	__u32 pad;
> +};
> +
> +struct kfd_ioctl_criu_restorer_args {
> +	__u64 num_objects;
> +	__u64 objects;
> +	__u64 objects_size;
> +	__u64 objects_index_start;
> +	__u32 type; /* enum kfd_criu_object_type */
> +	__u32 pad;
> +};
> +
> +struct kfd_ioctl_criu_resume_args {
> +	__u32 pid;	/* to KFD */
> +	__u32 pad;
> +};
> +
>  /* Register offset inside the remapped mmio page
>   */
>  enum kfd_mmio_remap {
> @@ -740,7 +833,22 @@ struct kfd_ioctl_set_xnack_mode_args {
>  #define AMDKFD_IOC_SET_XNACK_MODE		\
>  		AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args)
>  
> +#define AMDKFD_IOC_CRIU_DUMPER			\
> +		AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_dumper_args)
> +
> +#define AMDKFD_IOC_CRIU_RESTORER		\
> +		AMDKFD_IOWR(0x23, struct kfd_ioctl_criu_restorer_args)
> +
> +#define AMDKFD_IOC_CRIU_PROCESS_INFO		\
> +		AMDKFD_IOWR(0x24, struct kfd_ioctl_criu_process_info_args)
> +
> +#define AMDKFD_IOC_CRIU_RESUME			\
> +		AMDKFD_IOWR(0x25, struct kfd_ioctl_criu_resume_args)
> +
> +#define AMDKFD_IOC_CRIU_PAUSE			\
> +		AMDKFD_IOWR(0x26, struct kfd_ioctl_criu_pause_args)
> +
>  #define AMDKFD_COMMAND_START		0x01
> -#define AMDKFD_COMMAND_END		0x22
> +#define AMDKFD_COMMAND_END		0x27
>  
>  #endif


More information about the amd-gfx mailing list