[PATCH 06/16] drm/amdkfd: Clean up kfd_wait_on_events

Oded Gabbay oded.gabbay at gmail.com
Wed Oct 25 08:49:27 UTC 2017


On Sat, Oct 21, 2017 at 3:23 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote:
> Cleaned up the code while resolving some potential bugs and
> inconsistencies in the process.
>
> Clean-ups:
> * Remove enum kfd_event_wait_result, which duplicates
>   KFD_IOC_EVENT_RESULT definitions
> * alloc_event_waiters can be called without holding p->event_mutex
> * Return an error code from copy_signaled_event_data instead of bool
> * Clean up error handling code paths to minimize duplication in
>   kfd_wait_on_events
>
> Fixes:
> * Consistently return an error code from kfd_wait_on_events and set
>   wait_result to KFD_IOC_WAIT_RESULT_FAIL in all failure cases.
> * Always call free_waiters while holding p->event_mutex
> * copy_signaled_event_data might sleep. Don't call it while the task state
>   is TASK_INTERRUPTIBLE.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  5 +--
>  drivers/gpu/drm/amd/amdkfd/kfd_events.c  | 71 ++++++++++++++------------------
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  8 +---
>  3 files changed, 32 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 0ef82b2..a25321f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -835,15 +835,12 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
>                                 void *data)
>  {
>         struct kfd_ioctl_wait_events_args *args = data;
> -       enum kfd_event_wait_result wait_result;
>         int err;
>
>         err = kfd_wait_on_events(p, args->num_events,
>                         (void __user *)args->events_ptr,
>                         (args->wait_for_all != 0),
> -                       args->timeout, &wait_result);
> -
> -       args->wait_result = wait_result;
> +                       args->timeout, &args->wait_result);
>
>         return err;
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index c6d9572..5bb88b74 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -668,7 +668,7 @@ static bool test_event_condition(bool all, uint32_t num_events,
>   * Copy event specific data, if defined.
>   * Currently only memory exception events have additional data to copy to user
>   */
> -static bool copy_signaled_event_data(uint32_t num_events,
> +static int copy_signaled_event_data(uint32_t num_events,
>                 struct kfd_event_waiter *event_waiters,
>                 struct kfd_event_data __user *data)
>  {
> @@ -686,11 +686,11 @@ static bool copy_signaled_event_data(uint32_t num_events,
>                         src = &event->memory_exception_data;
>                         if (copy_to_user(dst, src,
>                                 sizeof(struct kfd_hsa_memory_exception_data)))
> -                               return false;
> +                               return -EFAULT;
>                 }
>         }
>
> -       return true;
> +       return 0;
>
>  }
>
> @@ -727,7 +727,7 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
>  int kfd_wait_on_events(struct kfd_process *p,
>                        uint32_t num_events, void __user *data,
>                        bool all, uint32_t user_timeout_ms,
> -                      enum kfd_event_wait_result *wait_result)
> +                      uint32_t *wait_result)
>  {
>         struct kfd_event_data __user *events =
>                         (struct kfd_event_data __user *) data;
> @@ -737,18 +737,18 @@ int kfd_wait_on_events(struct kfd_process *p,
>         struct kfd_event_waiter *event_waiters = NULL;
>         long timeout = user_timeout_to_jiffies(user_timeout_ms);
>
> +       event_waiters = alloc_event_waiters(num_events);
> +       if (!event_waiters) {
> +               ret = -ENOMEM;
> +               goto out;
> +       }
> +
>         mutex_lock(&p->event_mutex);
>
>         /* Set to something unreasonable - this is really
>          * just a bool for now.
>          */
> -       *wait_result = KFD_WAIT_TIMEOUT;
> -
> -       event_waiters = alloc_event_waiters(num_events);
> -       if (!event_waiters) {
> -               ret = -ENOMEM;
> -               goto fail;
> -       }
> +       *wait_result = KFD_IOC_WAIT_RESULT_TIMEOUT;
>
>         for (i = 0; i < num_events; i++) {
>                 struct kfd_event_data event_data;
> @@ -756,23 +756,21 @@ int kfd_wait_on_events(struct kfd_process *p,
>                 if (copy_from_user(&event_data, &events[i],
>                                 sizeof(struct kfd_event_data))) {
>                         ret = -EFAULT;
> -                       goto fail;
> +                       goto out_unlock;
>                 }
>
>                 ret = init_event_waiter_get_status(p, &event_waiters[i],
>                                 event_data.event_id, i);
>                 if (ret)
> -                       goto fail;
> +                       goto out_unlock;
>         }
>
>         /* Check condition once. */
>         if (test_event_condition(all, num_events, event_waiters)) {
> -               if (copy_signaled_event_data(num_events,
> -                               event_waiters, events))
> -                       *wait_result = KFD_WAIT_COMPLETE;
> -               else
> -                       *wait_result = KFD_WAIT_ERROR;
> -               free_waiters(num_events, event_waiters);
> +               *wait_result = KFD_IOC_WAIT_RESULT_COMPLETE;
> +               ret = copy_signaled_event_data(num_events,
> +                                              event_waiters, events);
> +               goto out_unlock;
>         } else {
>                 /* Add to wait lists if we need to wait. */
>                 for (i = 0; i < num_events; i++)
> @@ -781,12 +779,6 @@ int kfd_wait_on_events(struct kfd_process *p,
>
>         mutex_unlock(&p->event_mutex);
>
> -       /* Return if all waits were already satisfied. */
> -       if (*wait_result != KFD_WAIT_TIMEOUT) {
> -               __set_current_state(TASK_RUNNING);
> -               return ret;
> -       }
> -
>         while (true) {
>                 if (fatal_signal_pending(current)) {
>                         ret = -EINTR;
> @@ -818,16 +810,12 @@ int kfd_wait_on_events(struct kfd_process *p,
>                 set_current_state(TASK_INTERRUPTIBLE);
>
>                 if (test_event_condition(all, num_events, event_waiters)) {
> -                       if (copy_signaled_event_data(num_events,
> -                                       event_waiters, events))
> -                               *wait_result = KFD_WAIT_COMPLETE;
> -                       else
> -                               *wait_result = KFD_WAIT_ERROR;
> +                       *wait_result = KFD_IOC_WAIT_RESULT_COMPLETE;
>                         break;
>                 }
>
>                 if (timeout <= 0) {
> -                       *wait_result = KFD_WAIT_TIMEOUT;
> +                       *wait_result = KFD_IOC_WAIT_RESULT_TIMEOUT;
>                         break;
>                 }
>
> @@ -835,19 +823,20 @@ int kfd_wait_on_events(struct kfd_process *p,
>         }
>         __set_current_state(TASK_RUNNING);
>
> +       /* copy_signaled_event_data may sleep. So this has to happen
> +        * after the task state is set back to RUNNING.
> +        */
> +       if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE)
> +               ret = copy_signaled_event_data(num_events,
> +                                              event_waiters, events);
> +
>         mutex_lock(&p->event_mutex);
> +out_unlock:
>         free_waiters(num_events, event_waiters);
>         mutex_unlock(&p->event_mutex);
> -
> -       return ret;
> -
> -fail:
> -       if (event_waiters)
> -               free_waiters(num_events, event_waiters);
> -
> -       mutex_unlock(&p->event_mutex);
> -
> -       *wait_result = KFD_WAIT_ERROR;
> +out:
> +       if (ret)
> +               *wait_result = KFD_IOC_WAIT_RESULT_FAIL;
>
>         return ret;
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 1a483a7..d3cf53a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -726,19 +726,13 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
>  extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
>  extern const struct kfd_device_global_init_class device_global_init_class_cik;
>
> -enum kfd_event_wait_result {
> -       KFD_WAIT_COMPLETE,
> -       KFD_WAIT_TIMEOUT,
> -       KFD_WAIT_ERROR
> -};
> -
>  void kfd_event_init_process(struct kfd_process *p);
>  void kfd_event_free_process(struct kfd_process *p);
>  int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
>  int kfd_wait_on_events(struct kfd_process *p,
>                        uint32_t num_events, void __user *data,
>                        bool all, uint32_t user_timeout_ms,
> -                      enum kfd_event_wait_result *wait_result);
> +                      uint32_t *wait_result);
>  void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
>                                 uint32_t valid_id_bits);
>  void kfd_signal_iommu_event(struct kfd_dev *dev,
> --
> 2.7.4
>

This patch is:
Acked-by: Oded Gabbay <oded.gabbay at gmail.com>


More information about the amd-gfx mailing list