[PATCH 15/18] drm/amdkfd: CRIU dump and restore events
Felix Kuehling
felix.kuehling at amd.com
Mon Aug 23 18:39:40 UTC 2021
Am 2021-08-19 um 9:37 a.m. schrieb David Yat Sin:
> Add support to existing CRIU ioctl's to save and restore events during
> criu checkpoint and restore.
>
> Signed-off-by: David Yat Sin <david.yatsin at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 130 +++++++-----
> drivers/gpu/drm/amd/amdkfd/kfd_events.c | 253 ++++++++++++++++++++---
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 25 ++-
> 3 files changed, 329 insertions(+), 79 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 19f16e3dd769..c8f523d8ab81 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1008,51 +1008,11 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
> * through the event_page_offset field.
> */
> if (args->event_page_offset) {
> - struct kfd_dev *kfd;
> - struct kfd_process_device *pdd;
> - void *mem, *kern_addr;
> - uint64_t size;
> -
> - if (p->signal_page) {
> - pr_err("Event page is already set\n");
> - return -EINVAL;
> - }
> -
> - kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
> - if (!kfd) {
> - pr_err("Getting device by id failed in %s\n", __func__);
> - return -EINVAL;
> - }
> -
> mutex_lock(&p->mutex);
> - pdd = kfd_bind_process_to_device(kfd, p);
> - if (IS_ERR(pdd)) {
> - err = PTR_ERR(pdd);
> - goto out_unlock;
> - }
> -
> - mem = kfd_process_device_translate_handle(pdd,
> - GET_IDR_HANDLE(args->event_page_offset));
> - if (!mem) {
> - pr_err("Can't find BO, offset is 0x%llx\n",
> - args->event_page_offset);
> - err = -EINVAL;
> - goto out_unlock;
> - }
> + err = kfd_kmap_event_page(p, args->event_page_offset);
> mutex_unlock(&p->mutex);
> -
> - err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
> - mem, &kern_addr, &size);
> - if (err) {
> - pr_err("Failed to map event page to kernel\n");
> - return err;
> - }
> -
> - err = kfd_event_page_set(p, kern_addr, size);
> - if (err) {
> - pr_err("Failed to set event page\n");
> + if (err)
> return err;
> - }
> }
>
> err = kfd_event_create(filp, p, args->event_type,
> @@ -1061,10 +1021,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
> &args->event_page_offset,
> &args->event_slot_index);
>
> - return err;
> -
> -out_unlock:
> - mutex_unlock(&p->mutex);
> + pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__);
> return err;
> }
>
> @@ -2208,6 +2165,41 @@ static int criu_dump_queues(struct kfd_process *p, struct kfd_ioctl_criu_dumper_
> return ret;
> }
>
> +static int criu_dump_events(struct kfd_process *p, struct kfd_ioctl_criu_dumper_args *args)
> +{
> + struct kfd_criu_event_bucket *ev_buckets;
> + uint32_t num_events;
> + int ret = 0;
> +
> + num_events = kfd_get_num_events(p);
> + if (args->num_objects != num_events) {
> + pr_err("Mismatch with number of events (current:%d user:%lld)\n",
> + num_events, args->num_objects);
> +
> + }
> +
> + if (args->objects_size != args->num_objects *
> + (sizeof(*ev_buckets) + sizeof(struct kfd_criu_event_priv_data))) {
> + pr_err("Invalid objects size for events\n");
> + return -EINVAL;
> + }
> +
> + ev_buckets = kvzalloc(args->objects_size, GFP_KERNEL);
> + if (!ev_buckets)
> + return -ENOMEM;
> +
> + ret = kfd_event_dump(p, ev_buckets, args->num_objects);
> + if (!ret) {
> + ret = copy_to_user((void __user *)args->objects, ev_buckets, args->objects_size);
> + if (ret) {
> + pr_err("Failed to copy events information to user\n");
> + ret = -EFAULT;
> + }
> + }
> + kvfree(ev_buckets);
> + return ret;
> +}
> +
> static int kfd_ioctl_criu_dumper(struct file *filep,
> struct kfd_process *p, void *data)
> {
> @@ -2246,6 +2238,8 @@ static int kfd_ioctl_criu_dumper(struct file *filep,
> ret = criu_dump_queues(p, args);
> break;
> case KFD_CRIU_OBJECT_TYPE_EVENT:
> + ret = criu_dump_events(p, args);
> + break;
> case KFD_CRIU_OBJECT_TYPE_DEVICE:
> case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
> default:
> @@ -2676,6 +2670,40 @@ static int criu_restore_queues(struct kfd_process *p,
> return ret;
> }
>
> +static int criu_restore_events(struct file *filp, struct kfd_process *p,
> + struct kfd_ioctl_criu_restorer_args *args)
> +{
> + int ret = 0, i;
> + uint8_t *objects, *private_data;
> + struct kfd_criu_event_bucket *ev_buckets;
> +
> + objects = kvzalloc(args->objects_size, GFP_KERNEL);
> + if (!objects)
> + return -ENOMEM;
> +
> + ret = copy_from_user(objects, (void __user *)args->objects, args->objects_size);
> + if (ret) {
> + pr_err("Failed to copy event information from user\n");
> + ret = -EFAULT;
> + goto exit;
> + }
> +
> + ev_buckets = (struct kfd_criu_event_bucket *) objects;
> + private_data = (void *)(ev_buckets + args->num_objects);
> +
> + for (i = 0; i < args->num_objects; i++) {
> + ret = kfd_event_restore(filp, p, &ev_buckets[i], private_data);
> + if (ret) {
> + pr_err("Failed to restore event (%d)\n", ret);
> + goto exit;
> + }
> + }
> +
> +exit:
> + kvfree(ev_buckets);
> + return ret;
> +}
> +
> static int kfd_ioctl_criu_restorer(struct file *filep,
> struct kfd_process *p, void *data)
> {
> @@ -2698,6 +2726,8 @@ static int kfd_ioctl_criu_restorer(struct file *filep,
> ret = criu_restore_queues(p, args);
> break;
> case KFD_CRIU_OBJECT_TYPE_EVENT:
> + ret = criu_restore_events(filep, p, args);
> + break;
> case KFD_CRIU_OBJECT_TYPE_DEVICE:
> case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
> default:
> @@ -2799,9 +2829,13 @@ static int kfd_ioctl_criu_process_info(struct file *filep,
> args->queues_priv_data_size = queues_extra_data_size +
> (args->total_queues * sizeof(struct kfd_criu_queue_priv_data));
>
> - dev_dbg(kfd_device, "Num of bos:%llu queues:%u\n",
> + args->total_events = kfd_get_num_events(p);
> + args->events_priv_data_size = args->total_events * sizeof(struct kfd_criu_event_priv_data);
> +
> + dev_dbg(kfd_device, "Num of bos:%llu queues:%u events:%u\n",
> args->total_bos,
> - args->total_queues);
> + args->total_queues,
> + args->total_events);
> err_unlock:
> mutex_unlock(&p->mutex);
> return ret;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index ba2c2ce0c55a..18362478e351 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -53,9 +53,9 @@ struct kfd_signal_page {
> uint64_t *kernel_address;
> uint64_t __user *user_address;
> bool need_to_free_pages;
> + uint64_t user_handle; /* Needed for CRIU dumped and restore */
> };
>
> -
> static uint64_t *page_slots(struct kfd_signal_page *page)
> {
> return page->kernel_address;
> @@ -92,7 +92,8 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
> }
>
> static int allocate_event_notification_slot(struct kfd_process *p,
> - struct kfd_event *ev)
> + struct kfd_event *ev,
> + const int *restore_id)
> {
> int id;
>
> @@ -104,14 +105,19 @@ static int allocate_event_notification_slot(struct kfd_process *p,
> p->signal_mapped_size = 256*8;
> }
>
> - /*
> - * Compatibility with old user mode: Only use signal slots
> - * user mode has mapped, may be less than
> - * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
> - * of the event limit without breaking user mode.
> - */
> - id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
> - GFP_KERNEL);
> + if (restore_id) {
> + id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
> + GFP_KERNEL);
> + } else {
> + /*
> + * Compatibility with old user mode: Only use signal slots
> + * user mode has mapped, may be less than
> + * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
> + * of the event limit without breaking user mode.
> + */
> + id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
> + GFP_KERNEL);
> + }
> if (id < 0)
> return id;
>
> @@ -178,9 +184,8 @@ static struct kfd_event *lookup_signaled_event_by_partial_id(
> return ev;
> }
>
> -static int create_signal_event(struct file *devkfd,
> - struct kfd_process *p,
> - struct kfd_event *ev)
> +static int create_signal_event(struct file *devkfd, struct kfd_process *p,
> + struct kfd_event *ev, const int *restore_id)
> {
> int ret;
>
> @@ -193,7 +198,7 @@ static int create_signal_event(struct file *devkfd,
> return -ENOSPC;
> }
>
> - ret = allocate_event_notification_slot(p, ev);
> + ret = allocate_event_notification_slot(p, ev, restore_id);
> if (ret) {
> pr_warn("Signal event wasn't created because out of kernel memory\n");
> return ret;
> @@ -209,16 +214,22 @@ static int create_signal_event(struct file *devkfd,
> return 0;
> }
>
> -static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
> +static int create_other_event(struct kfd_process *p, struct kfd_event *ev, const int *restore_id)
> {
> - /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
> - * intentional integer overflow to -1 without a compiler
> - * warning. idr_alloc treats a negative value as "maximum
> - * signed integer".
> - */
> - int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
> - (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
> - GFP_KERNEL);
> + int id;
> +
> + if (restore_id)
> + id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
> + GFP_KERNEL);
> + else
> + /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
> + * intentional integer overflow to -1 without a compiler
> + * warning. idr_alloc treats a negative value as "maximum
> + * signed integer".
> + */
> + id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
> + (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
> + GFP_KERNEL);
>
> if (id < 0)
> return id;
> @@ -295,8 +306,8 @@ static bool event_can_be_cpu_signaled(const struct kfd_event *ev)
> return ev->type == KFD_EVENT_TYPE_SIGNAL;
> }
>
> -int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
> - uint64_t size)
> +static int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
> + uint64_t size, uint64_t user_handle)
> {
> struct kfd_signal_page *page;
>
> @@ -315,10 +326,55 @@ int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
>
> p->signal_page = page;
> p->signal_mapped_size = size;
> -
> + p->signal_page->user_handle = user_handle;
> return 0;
> }
>
> +int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
This function should be static. I also think that this function and
criu_dump/restore_events could be moved into kfd_events.c.
Regards,
Felix
> +{
> + struct kfd_dev *kfd;
> + struct kfd_process_device *pdd;
> + void *mem, *kern_addr;
> + uint64_t size;
> + int err = 0;
> +
> + if (p->signal_page) {
> + pr_err("Event page is already set\n");
> + return -EINVAL;
> + }
> +
> + kfd = kfd_device_by_id(GET_GPU_ID(event_page_offset));
> + if (!kfd) {
> + pr_err("Getting device by id failed in %s\n", __func__);
> + return -EINVAL;
> + }
> +
> + pdd = kfd_bind_process_to_device(kfd, p);
> + if (IS_ERR(pdd))
> + return PTR_ERR(pdd);
> +
> + mem = kfd_process_device_translate_handle(pdd,
> + GET_IDR_HANDLE(event_page_offset));
> + if (!mem) {
> + pr_err("Can't find BO, offset is 0x%llx\n", event_page_offset);
> + return -EINVAL;
> + }
> +
> + err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
> + mem, &kern_addr, &size);
> + if (err) {
> + pr_err("Failed to map event page to kernel\n");
> + return err;
> + }
> +
> + err = kfd_event_page_set(p, kern_addr, size, event_page_offset);
> + if (err) {
> + pr_err("Failed to set event page\n");
> + return err;
> + }
> + return err;
> +}
> +
> int kfd_event_create(struct file *devkfd, struct kfd_process *p,
> uint32_t event_type, bool auto_reset, uint32_t node_id,
> uint32_t *event_id, uint32_t *event_trigger_data,
> @@ -343,14 +399,14 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
> switch (event_type) {
> case KFD_EVENT_TYPE_SIGNAL:
> case KFD_EVENT_TYPE_DEBUG:
> - ret = create_signal_event(devkfd, p, ev);
> + ret = create_signal_event(devkfd, p, ev, NULL);
> if (!ret) {
> *event_page_offset = KFD_MMAP_TYPE_EVENTS;
> *event_slot_index = ev->event_id;
> }
> break;
> default:
> - ret = create_other_event(p, ev);
> + ret = create_other_event(p, ev, NULL);
> break;
> }
>
> @@ -366,6 +422,147 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
> return ret;
> }
>
> +int kfd_event_restore(struct file *devkfd, struct kfd_process *p,
> + struct kfd_criu_event_bucket *ev_bucket,
> + uint8_t *priv_datas)
> +{
> + int ret = 0;
> + struct kfd_criu_event_priv_data *ev_priv;
> + struct kfd_event *ev;
> +
> + ev_priv = (struct kfd_criu_event_priv_data *)(priv_datas + ev_bucket->priv_data_offset);
> +
> + if (ev_priv->user_handle) {
> + ret = kfd_kmap_event_page(p, ev_priv->user_handle);
> + if (ret)
> + return ret;
> + }
> +
> + ev = kzalloc(sizeof(*ev), GFP_KERNEL);
> + if (!ev)
> + return -ENOMEM;
> +
> + ev->type = ev_priv->type;
> + ev->auto_reset = ev_priv->auto_reset;
> + ev->signaled = ev_priv->signaled;
> +
> + init_waitqueue_head(&ev->wq);
> +
> + mutex_lock(&p->event_mutex);
> + switch (ev->type) {
> + case KFD_EVENT_TYPE_SIGNAL:
> + case KFD_EVENT_TYPE_DEBUG:
> + ret = create_signal_event(devkfd, p, ev, &ev_priv->event_id);
> + break;
> + case KFD_EVENT_TYPE_MEMORY:
> + memcpy(&ev->memory_exception_data,
> + &ev_priv->memory_exception_data,
> + sizeof(struct kfd_hsa_memory_exception_data));
> +
> + ev->memory_exception_data.gpu_id = ev_bucket->gpu_id;
> + ret = create_other_event(p, ev, &ev_priv->event_id);
> + break;
> + case KFD_EVENT_TYPE_HW_EXCEPTION:
> + memcpy(&ev->hw_exception_data,
> + &ev_priv->hw_exception_data,
> + sizeof(struct kfd_hsa_hw_exception_data));
> +
> + ev->hw_exception_data.gpu_id = ev_bucket->gpu_id;
> + ret = create_other_event(p, ev, &ev_priv->event_id);
> + break;
> + }
> +
> + if (ret)
> + kfree(ev);
> +
> + mutex_unlock(&p->event_mutex);
> +
> + return ret;
> +}
> +
> +int kfd_event_dump(struct kfd_process *p,
> + struct kfd_criu_event_bucket *ev_buckets,
> + uint32_t num_events)
> +{
> + struct kfd_event *ev;
> + struct kfd_criu_event_priv_data *ev_privs;
> + uint32_t ev_id;
> + int i = 0;
> +
> + /* Private data for first event starts after all ev_buckets */
> + ev_privs = (struct kfd_criu_event_priv_data *)((uint8_t *)ev_buckets +
> + (num_events * (sizeof(*ev_buckets))));
> +
> +
> + idr_for_each_entry(&p->event_idr, ev, ev_id) {
> + struct kfd_criu_event_bucket *ev_bucket;
> + struct kfd_criu_event_priv_data *ev_priv;
> +
> + if (i >= num_events) {
> + pr_err("Number of events exceeds number allocated\n");
> + return -ENOMEM;
> + }
> +
> + ev_bucket = &ev_buckets[i];
> +
> + /* Currently, all events have same size of private_data, but the current ioctl's
> + * and CRIU plugin supports private_data of variable sizes
> + */
> + ev_priv = &ev_privs[i];
> +
> + ev_bucket->priv_data_offset = i * sizeof(*ev_priv);
> + ev_bucket->priv_data_size = sizeof(*ev_priv);
> +
> + /* We store the user_handle with the first event */
> + if (i == 0 && p->signal_page)
> + ev_priv->user_handle = p->signal_page->user_handle;
> +
> + ev_priv->event_id = ev->event_id;
> + ev_priv->auto_reset = ev->auto_reset;
> + ev_priv->type = ev->type;
> + ev_priv->signaled = ev->signaled;
> +
> + /* We store the gpu_id in the bucket section so that the userspace CRIU plugin can
> + * modify it if needed.
> + */
> + if (ev_priv->type == KFD_EVENT_TYPE_MEMORY) {
> + memcpy(&ev_priv->memory_exception_data,
> + &ev->memory_exception_data,
> + sizeof(struct kfd_hsa_memory_exception_data));
> +
> + ev_bucket->gpu_id = ev_priv->memory_exception_data.gpu_id;
> + } else if (ev_priv->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
> + memcpy(&ev_priv->hw_exception_data,
> + &ev->hw_exception_data,
> + sizeof(struct kfd_hsa_hw_exception_data));
> +
> + ev_bucket->gpu_id = ev_priv->hw_exception_data.gpu_id;
> + } else
> + ev_bucket->gpu_id = 0;
> +
> + pr_debug("Dumped event[%d] id = 0x%08x auto_reset = %x type = %x signaled = %x\n",
> + i,
> + ev_priv->event_id,
> + ev_priv->auto_reset,
> + ev_priv->type,
> + ev_priv->signaled);
> + i++;
> + }
> + return 0;
> +}
> +
> +int kfd_get_num_events(struct kfd_process *p)
> +{
> + struct kfd_event *ev;
> + uint32_t id;
> + u32 num_events = 0;
> +
> + idr_for_each_entry(&p->event_idr, ev, id)
> + num_events++;
> +
> + return num_events++;
> +}
> +
> /* Assumes that p is current. */
> int kfd_event_destroy(struct kfd_process *p, uint32_t event_id)
> {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 7ed6f831109d..bf10a5305ef7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -1069,9 +1069,26 @@ struct kfd_criu_queue_priv_data {
> };
>
> struct kfd_criu_event_priv_data {
> - uint64_t reserved;
> + uint64_t user_handle;
> + uint32_t event_id;
> + uint32_t auto_reset;
> + uint32_t type;
> + uint32_t signaled;
> +
> + union {
> + struct kfd_hsa_memory_exception_data memory_exception_data;
> + struct kfd_hsa_hw_exception_data hw_exception_data;
> + };
> };
>
> +int kfd_event_restore(struct file *devkfd, struct kfd_process *p,
> + struct kfd_criu_event_bucket *ev_bucket,
> + uint8_t *priv_datas);
> +
> +int kfd_event_dump(struct kfd_process *p,
> + struct kfd_criu_event_bucket *ev_buckets,
> + uint32_t num_events);
> +
> /* CRIU - End */
>
> /* Queue Context Management */
> @@ -1238,12 +1255,14 @@ void kfd_signal_iommu_event(struct kfd_dev *dev,
> void kfd_signal_hw_exception_event(u32 pasid);
> int kfd_set_event(struct kfd_process *p, uint32_t event_id);
> int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
> -int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
> - uint64_t size);
> +int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset);
> +
> int kfd_event_create(struct file *devkfd, struct kfd_process *p,
> uint32_t event_type, bool auto_reset, uint32_t node_id,
> uint32_t *event_id, uint32_t *event_trigger_data,
> uint64_t *event_page_offset, uint32_t *event_slot_index);
> +
> +int kfd_get_num_events(struct kfd_process *p);
> int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
>
> void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
More information about the amd-gfx
mailing list