[PATCH 10/16] drm/amdkfd: Simplify events page allocator

Oded Gabbay oded.gabbay at gmail.com
Wed Oct 25 10:40:49 UTC 2017


On Sat, Oct 21, 2017 at 3:23 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote:
> The first event page is always big enough to handle all events.
> Handling of multiple events pages is not supported by user mode, and
> not necessary.
>
> Signed-off-by: Yong Zhao <yong.zhao at amd.com>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_events.c | 197 +++++++++++---------------------
>  drivers/gpu/drm/amd/amdkfd/kfd_events.h |   1 -
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |   4 +-
>  3 files changed, 70 insertions(+), 132 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index f178248..f800e48 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -41,6 +41,9 @@ struct kfd_event_waiter {
>         bool activated;          /* Becomes true when event is signaled */
>  };
>
> +#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
> +#define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE)
> +
>  /*
>   * Over-complicated pooled allocator for event notification slots.
>   *
> @@ -51,132 +54,98 @@ struct kfd_event_waiter {
>   * Individual signal events are then allocated a slot in a page.
>   */
>
> -struct signal_page {
> -       struct list_head event_pages;   /* kfd_process.signal_event_pages */
> +struct kfd_signal_page {
>         uint64_t *kernel_address;
>         uint64_t __user *user_address;
> -       uint32_t page_index;            /* Index into the mmap aperture. */
>         unsigned int free_slots;
> -       unsigned long used_slot_bitmap[0];
> +       unsigned long used_slot_bitmap[SLOT_BITMAP_LONGS];
>  };
>
> -#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
> -#define SLOT_BITMAP_SIZE BITS_TO_LONGS(SLOTS_PER_PAGE)
> -#define BITS_PER_PAGE (ilog2(SLOTS_PER_PAGE)+1)
> -#define SIGNAL_PAGE_SIZE (sizeof(struct signal_page) + \
> -                               SLOT_BITMAP_SIZE * sizeof(long))
> -
>  /*
>   * For signal events, the event ID is used as the interrupt user data.
>   * For SQ s_sendmsg interrupts, this is limited to 8 bits.
>   */
>
>  #define INTERRUPT_DATA_BITS 12
> -#define SIGNAL_EVENT_ID_SLOT_SHIFT 0
>
> -static uint64_t *page_slots(struct signal_page *page)
> +static uint64_t *page_slots(struct kfd_signal_page *page)
>  {
>         return page->kernel_address;
>  }
>
>  static bool allocate_free_slot(struct kfd_process *process,
> -                               struct signal_page **out_page,
> -                               unsigned int *out_slot_index)
> +                              unsigned int *out_slot_index)
>  {
> -       struct signal_page *page;
> +       struct kfd_signal_page *page = process->signal_page;
> +       unsigned int slot;
>
> -       list_for_each_entry(page, &process->signal_event_pages, event_pages) {
> -               if (page->free_slots > 0) {
> -                       unsigned int slot =
> -                               find_first_zero_bit(page->used_slot_bitmap,
> -                                                       SLOTS_PER_PAGE);
> +       if (!page || page->free_slots == 0) {
> +               pr_debug("No free event signal slots were found for process %p\n",
> +                        process);
>
> -                       __set_bit(slot, page->used_slot_bitmap);
> -                       page->free_slots--;
> +               return false;
> +       }
>
> -                       page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT;
> +       slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE);
>
> -                       *out_page = page;
> -                       *out_slot_index = slot;
> +       __set_bit(slot, page->used_slot_bitmap);
> +       page->free_slots--;
>
> -                       pr_debug("Allocated event signal slot in page %p, slot %d\n",
> -                                       page, slot);
> +       page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT;
>
> -                       return true;
> -               }
> -       }
> +       *out_slot_index = slot;
>
> -       pr_debug("No free event signal slots were found for process %p\n",
> -                       process);
> +       pr_debug("Allocated event signal slot in page %p, slot %d\n",
> +                page, slot);
>
> -       return false;
> +       return true;
>  }
>
> -#define list_tail_entry(head, type, member) \
> -       list_entry((head)->prev, type, member)
> -
> -static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p)
> +static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
>  {
>         void *backing_store;
> -       struct signal_page *page;
> +       struct kfd_signal_page *page;
>
> -       page = kzalloc(SIGNAL_PAGE_SIZE, GFP_KERNEL);
> +       page = kzalloc(sizeof(*page), GFP_KERNEL);
>         if (!page)
> -               goto fail_alloc_signal_page;
> +               return NULL;
>
>         page->free_slots = SLOTS_PER_PAGE;
>
> -       backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
> +       backing_store = (void *) __get_free_pages(GFP_KERNEL,
>                                         get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
>         if (!backing_store)
>                 goto fail_alloc_signal_store;
>
> -       /* prevent user-mode info leaks */
> +       /* Initialize all events to unsignaled */
>         memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT,
> -               KFD_SIGNAL_EVENT_LIMIT * 8);
> +              KFD_SIGNAL_EVENT_LIMIT * 8);
>
>         page->kernel_address = backing_store;
> -
> -       if (list_empty(&p->signal_event_pages))
> -               page->page_index = 0;
> -       else
> -               page->page_index = list_tail_entry(&p->signal_event_pages,
> -                                                  struct signal_page,
> -                                                  event_pages)->page_index + 1;
> -
>         pr_debug("Allocated new event signal page at %p, for process %p\n",
>                         page, p);
> -       pr_debug("Page index is %d\n", page->page_index);
> -
> -       list_add(&page->event_pages, &p->signal_event_pages);
>
> -       return true;
> +       return page;
>
>  fail_alloc_signal_store:
>         kfree(page);
> -fail_alloc_signal_page:
> -       return false;
> +       return NULL;
>  }
>
> -static bool allocate_event_notification_slot(struct file *devkfd,
> -                                       struct kfd_process *p,
> -                                       struct signal_page **page,
> -                                       unsigned int *signal_slot_index)
> +static bool allocate_event_notification_slot(struct kfd_process *p,
> +                                            unsigned int *signal_slot_index)
>  {
> -       bool ret;
> -
> -       ret = allocate_free_slot(p, page, signal_slot_index);
> -       if (!ret) {
> -               ret = allocate_signal_page(devkfd, p);
> -               if (ret)
> -                       ret = allocate_free_slot(p, page, signal_slot_index);
> +       if (!p->signal_page) {
> +               p->signal_page = allocate_signal_page(p);
> +               if (!p->signal_page)
> +                       return false;
>         }
>
> -       return ret;
> +       return allocate_free_slot(p, signal_slot_index);
>  }
>
>  /* Assumes that the process's event_mutex is locked. */
> -static void release_event_notification_slot(struct signal_page *page,
> +static void release_event_notification_slot(struct kfd_signal_page *page,
>                                                 size_t slot_index)
>  {
>         __clear_bit(slot_index, page->used_slot_bitmap);
> @@ -187,22 +156,6 @@ static void release_event_notification_slot(struct signal_page *page,
>          */
>  }
>
> -static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p,
> -                                               unsigned int page_index)
> -{
> -       struct signal_page *page;
> -
> -       /*
> -        * This is safe because we don't delete signal pages until the
> -        * process exits.
> -        */
> -       list_for_each_entry(page, &p->signal_event_pages, event_pages)
> -               if (page->page_index == page_index)
> -                       return page;
> -
> -       return NULL;
> -}
> -
>  /*
>   * Assumes that p->event_mutex is held and of course that p is not going
>   * away (current or locked).
> @@ -218,13 +171,6 @@ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
>         return NULL;
>  }
>
> -static u32 make_signal_event_id(struct signal_page *page,
> -                                        unsigned int signal_slot_index)
> -{
> -       return page->page_index |
> -                       (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT);
> -}
> -
>  /*
>   * Produce a kfd event id for a nonsignal event.
>   * These are arbitrary numbers, so we do a sequential search through
> @@ -270,10 +216,9 @@ static u32 make_nonsignal_event_id(struct kfd_process *p)
>  }
>
>  static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p,
> -                                               struct signal_page *page,
>                                                 unsigned int signal_slot)
>  {
> -       return lookup_event_by_id(p, make_signal_event_id(page, signal_slot));
> +       return lookup_event_by_id(p, signal_slot);
>  }
>
>  static int create_signal_event(struct file *devkfd,
> @@ -288,8 +233,7 @@ static int create_signal_event(struct file *devkfd,
>                 return -ENOMEM;
>         }
>
> -       if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page,
> -                                               &ev->signal_slot_index)) {
> +       if (!allocate_event_notification_slot(p, &ev->signal_slot_index)) {
>                 pr_warn("Signal event wasn't created because out of kernel memory\n");
>                 return -ENOMEM;
>         }
> @@ -297,10 +241,9 @@ static int create_signal_event(struct file *devkfd,
>         p->signal_event_count++;
>
>         ev->user_signal_address =
> -                       &ev->signal_page->user_address[ev->signal_slot_index];
> +                       &p->signal_page->user_address[ev->signal_slot_index];
>
> -       ev->event_id = make_signal_event_id(ev->signal_page,
> -                                               ev->signal_slot_index);
> +       ev->event_id = ev->signal_slot_index;
>
>         pr_debug("Signal event number %zu created with id %d, address %p\n",
>                         p->signal_event_count, ev->event_id,
> @@ -327,7 +270,7 @@ void kfd_event_init_process(struct kfd_process *p)
>  {
>         mutex_init(&p->event_mutex);
>         hash_init(p->events);
> -       INIT_LIST_HEAD(&p->signal_event_pages);
> +       p->signal_page = NULL;
>         p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
>         p->signal_event_count = 0;
>  }
> @@ -341,8 +284,9 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
>                 waiter->event = NULL;
>         wake_up_all(&ev->wq);
>
> -       if (ev->signal_page) {
> -               release_event_notification_slot(ev->signal_page,
> +       if ((ev->type == KFD_EVENT_TYPE_SIGNAL ||
> +            ev->type == KFD_EVENT_TYPE_DEBUG) && p->signal_page) {
> +               release_event_notification_slot(p->signal_page,
>                                                 ev->signal_slot_index);
>                 p->signal_event_count--;
>         }
> @@ -365,12 +309,11 @@ static void destroy_events(struct kfd_process *p)
>   * We assume that the process is being destroyed and there is no need to
>   * unmap the pages or keep bookkeeping data in order.
>   */
> -static void shutdown_signal_pages(struct kfd_process *p)
> +static void shutdown_signal_page(struct kfd_process *p)
>  {
> -       struct signal_page *page, *tmp;
> +       struct kfd_signal_page *page = p->signal_page;
>
> -       list_for_each_entry_safe(page, tmp, &p->signal_event_pages,
> -                                       event_pages) {
> +       if (page) {
>                 free_pages((unsigned long)page->kernel_address,
>                                 get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
>                 kfree(page);
> @@ -380,7 +323,7 @@ static void shutdown_signal_pages(struct kfd_process *p)
>  void kfd_event_free_process(struct kfd_process *p)
>  {
>         destroy_events(p);
> -       shutdown_signal_pages(p);
> +       shutdown_signal_page(p);
>  }
>
>  static bool event_can_be_gpu_signaled(const struct kfd_event *ev)
> @@ -420,8 +363,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
>         case KFD_EVENT_TYPE_DEBUG:
>                 ret = create_signal_event(devkfd, p, ev);
>                 if (!ret) {
> -                       *event_page_offset = (ev->signal_page->page_index |
> -                                       KFD_MMAP_EVENTS_MASK);
> +                       *event_page_offset = KFD_MMAP_EVENTS_MASK;
>                         *event_page_offset <<= PAGE_SHIFT;
>                         *event_slot_index = ev->signal_slot_index;
>                 }
> @@ -523,13 +465,17 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
>
>  static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
>  {
> -       page_slots(ev->signal_page)[ev->signal_slot_index] =
> +       page_slots(p->signal_page)[ev->signal_slot_index] =
>                                                 UNSIGNALED_EVENT_SLOT;
>  }
>
> -static bool is_slot_signaled(struct signal_page *page, unsigned int index)
> +static bool is_slot_signaled(struct kfd_process *p, unsigned int index)
>  {
> -       return page_slots(page)[index] != UNSIGNALED_EVENT_SLOT;
> +       if (!p->signal_page)
> +               return false;
> +       else
> +               return page_slots(p->signal_page)[index] !=
> +                       UNSIGNALED_EVENT_SLOT;
>  }
>
>  static void set_event_from_interrupt(struct kfd_process *p,
> @@ -562,22 +508,19 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
>                 /* Partial ID is a full ID. */
>                 ev = lookup_event_by_id(p, partial_id);
>                 set_event_from_interrupt(p, ev);
> -       } else {
> +       } else if (p->signal_page) {
>                 /*
>                  * Partial ID is in fact partial. For now we completely
>                  * ignore it, but we could use any bits we did receive to
>                  * search faster.
>                  */
> -               struct signal_page *page;
>                 unsigned int i;
>
> -               list_for_each_entry(page, &p->signal_event_pages, event_pages)
> -                       for (i = 0; i < SLOTS_PER_PAGE; i++)
> -                               if (is_slot_signaled(page, i)) {
> -                                       ev = lookup_event_by_page_slot(p,
> -                                                               page, i);
> -                                       set_event_from_interrupt(p, ev);
> -                               }
> +               for (i = 0; i < SLOTS_PER_PAGE; i++)
> +                       if (is_slot_signaled(p, i)) {
> +                               ev = lookup_event_by_page_slot(p, i);
> +                               set_event_from_interrupt(p, ev);
> +                       }
>         }
>
>         mutex_unlock(&p->event_mutex);
> @@ -842,9 +785,8 @@ int kfd_wait_on_events(struct kfd_process *p,
>  int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
>  {
>
> -       unsigned int page_index;
>         unsigned long pfn;
> -       struct signal_page *page;
> +       struct kfd_signal_page *page;
>
>         /* check required size is logical */
>         if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) !=
> @@ -853,13 +795,10 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
>                 return -EINVAL;
>         }
>
> -       page_index = vma->vm_pgoff;
> -
> -       page = lookup_signal_page_by_index(p, page_index);
> +       page = p->signal_page;
>         if (!page) {
>                 /* Probably KFD bug, but mmap is user-accessible. */
> -               pr_debug("Signal page could not be found for page_index %u\n",
> -                               page_index);
> +               pr_debug("Signal page could not be found\n");
>                 return -EINVAL;
>         }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
> index 96f9122..f85fcee 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
> @@ -60,7 +60,6 @@ struct kfd_event {
>         wait_queue_head_t wq; /* List of event waiters. */
>
>         /* Only for signal events. */
> -       struct signal_page *signal_page;
>         unsigned int signal_slot_index;
>         uint64_t __user *user_signal_address;
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index d3cf53a..c1b3ee2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -540,8 +540,8 @@ struct kfd_process {
>         struct mutex event_mutex;
>         /* All events in process hashed by ID, linked on kfd_event.events. */
>         DECLARE_HASHTABLE(events, 4);
> -       /* struct slot_page_header.event_pages */
> -       struct list_head signal_event_pages;
> +       /* Event page */
> +       struct kfd_signal_page *signal_page;
>         u32 next_nonsignal_event_id;
>         size_t signal_event_count;
>         bool signal_event_limit_reached;
> --
> 2.7.4
>

This patch is:
Acked-by: Oded Gabbay <oded.gabbay at gmail.com>


More information about the amd-gfx mailing list