[PATCH v2] amdkfd: Drop interrupt SW ring buffer
Christian König
deathsimple at vodafone.de
Thu Jan 8 03:24:40 PST 2015
Am 08.01.2015 um 05:27 schrieb Michel Dänzer:
> From: Michel Dänzer <michel.daenzer at amd.com>
>
> The work queue couldn't reliably prevent the SW ring buffer from
> overflowing, so dmesg was spammed by
>
> kfd kfd: Interrupt ring overflow, dropping interrupt.
>
> messages when running e.g. the Atlantis Substance demo from
> https://wiki.unrealengine.com/Linux_Demos on Kaveri.
>
> Since the SW ring buffer doesn't actually do anything at this point, just
> remove it for now. When actual interrupt processing code is added to
> amdkfd, it should try to do things immediately and only defer to work
> queues when necessary.
>
> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
Looks reasonable to me. Patch is Reviewed-by: Christian König
<christian.koenig at amd.com>
> ---
>
> v2: Clarify what the problem is and how it can be reproduced
>
> drivers/gpu/drm/amd/amdkfd/Makefile | 3 +-
> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 20 +---
> drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 176 -----------------------------
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 15 ---
> 4 files changed, 2 insertions(+), 212 deletions(-)
> delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
> index be6246d..307a309 100644
> --- a/drivers/gpu/drm/amd/amdkfd/Makefile
> +++ b/drivers/gpu/drm/amd/amdkfd/Makefile
> @@ -8,7 +8,6 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
> kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
> kfd_process.o kfd_queue.o kfd_mqd_manager.o \
> kfd_kernel_queue.o kfd_packet_manager.o \
> - kfd_process_queue_manager.o kfd_device_queue_manager.o \
> - kfd_interrupt.o
> + kfd_process_queue_manager.o kfd_device_queue_manager.o
>
> obj-$(CONFIG_HSA_AMD) += amdkfd.o
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 43884eb..633532a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -192,13 +192,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
> goto kfd_topology_add_device_error;
> }
>
> - if (kfd_interrupt_init(kfd)) {
> - dev_err(kfd_device,
> - "Error initializing interrupts for device (%x:%x)\n",
> - kfd->pdev->vendor, kfd->pdev->device);
> - goto kfd_interrupt_error;
> - }
> -
> if (!device_iommu_pasid_init(kfd)) {
> dev_err(kfd_device,
> "Error initializing iommuv2 for device (%x:%x)\n",
> @@ -237,8 +230,6 @@ dqm_start_error:
> device_queue_manager_error:
> amd_iommu_free_device(kfd->pdev);
> device_iommu_pasid_error:
> - kfd_interrupt_exit(kfd);
> -kfd_interrupt_error:
> kfd_topology_remove_device(kfd);
> kfd_topology_add_device_error:
> kfd2kgd->fini_sa_manager(kfd->kgd);
> @@ -254,7 +245,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
> if (kfd->init_complete) {
> device_queue_manager_uninit(kfd->dqm);
> amd_iommu_free_device(kfd->pdev);
> - kfd_interrupt_exit(kfd);
> kfd_topology_remove_device(kfd);
> }
>
> @@ -296,13 +286,5 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
> /* This is called directly from KGD at ISR. */
> void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
> {
> - if (kfd->init_complete) {
> - spin_lock(&kfd->interrupt_lock);
> -
> - if (kfd->interrupts_active
> - && enqueue_ih_ring_entry(kfd, ih_ring_entry))
> - schedule_work(&kfd->interrupt_work);
> -
> - spin_unlock(&kfd->interrupt_lock);
> - }
> + /* Process interrupts / schedule work as necessary */
> }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
> deleted file mode 100644
> index 5b99909..0000000
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
> +++ /dev/null
> @@ -1,176 +0,0 @@
> -/*
> - * Copyright 2014 Advanced Micro Devices, Inc.
> - *
> - * Permission is hereby granted, free of charge, to any person obtaining a
> - * copy of this software and associated documentation files (the "Software"),
> - * to deal in the Software without restriction, including without limitation
> - * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> - * and/or sell copies of the Software, and to permit persons to whom the
> - * Software is furnished to do so, subject to the following conditions:
> - *
> - * The above copyright notice and this permission notice shall be included in
> - * all copies or substantial portions of the Software.
> - *
> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> - * OTHER DEALINGS IN THE SOFTWARE.
> - */
> -
> -/*
> - * KFD Interrupts.
> - *
> - * AMD GPUs deliver interrupts by pushing an interrupt description onto the
> - * interrupt ring and then sending an interrupt. KGD receives the interrupt
> - * in ISR and sends us a pointer to each new entry on the interrupt ring.
> - *
> - * We generally can't process interrupt-signaled events from ISR, so we call
> - * out to each interrupt client module (currently only the scheduler) to ask if
> - * each interrupt is interesting. If they return true, then it requires further
> - * processing so we copy it to an internal interrupt ring and call each
> - * interrupt client again from a work-queue.
> - *
> - * There's no acknowledgment for the interrupts we use. The hardware simply
> - * queues a new interrupt each time without waiting.
> - *
> - * The fixed-size internal queue means that it's possible for us to lose
> - * interrupts because we have no back-pressure to the hardware.
> - */
> -
> -#include <linux/slab.h>
> -#include <linux/device.h>
> -#include "kfd_priv.h"
> -
> -#define KFD_INTERRUPT_RING_SIZE 256
> -
> -static void interrupt_wq(struct work_struct *);
> -
> -int kfd_interrupt_init(struct kfd_dev *kfd)
> -{
> - void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE,
> - kfd->device_info->ih_ring_entry_size,
> - GFP_KERNEL);
> - if (!interrupt_ring)
> - return -ENOMEM;
> -
> - kfd->interrupt_ring = interrupt_ring;
> - kfd->interrupt_ring_size =
> - KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size;
> - atomic_set(&kfd->interrupt_ring_wptr, 0);
> - atomic_set(&kfd->interrupt_ring_rptr, 0);
> -
> - spin_lock_init(&kfd->interrupt_lock);
> -
> - INIT_WORK(&kfd->interrupt_work, interrupt_wq);
> -
> - kfd->interrupts_active = true;
> -
> - /*
> - * After this function returns, the interrupt will be enabled. This
> - * barrier ensures that the interrupt running on a different processor
> - * sees all the above writes.
> - */
> - smp_wmb();
> -
> - return 0;
> -}
> -
> -void kfd_interrupt_exit(struct kfd_dev *kfd)
> -{
> - /*
> - * Stop the interrupt handler from writing to the ring and scheduling
> - * workqueue items. The spinlock ensures that any interrupt running
> - * after we have unlocked sees interrupts_active = false.
> - */
> - unsigned long flags;
> -
> - spin_lock_irqsave(&kfd->interrupt_lock, flags);
> - kfd->interrupts_active = false;
> - spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
> -
> - /*
> - * Flush_scheduled_work ensures that there are no outstanding
> - * work-queue items that will access interrupt_ring. New work items
> - * can't be created because we stopped interrupt handling above.
> - */
> - flush_scheduled_work();
> -
> - kfree(kfd->interrupt_ring);
> -}
> -
> -/*
> - * This assumes that it can't be called concurrently with itself
> - * but only with dequeue_ih_ring_entry.
> - */
> -bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
> -{
> - unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
> - unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
> -
> - if ((rptr - wptr) % kfd->interrupt_ring_size ==
> - kfd->device_info->ih_ring_entry_size) {
> - /* This is very bad, the system is likely to hang. */
> - dev_err_ratelimited(kfd_chardev(),
> - "Interrupt ring overflow, dropping interrupt.\n");
> - return false;
> - }
> -
> - memcpy(kfd->interrupt_ring + wptr, ih_ring_entry,
> - kfd->device_info->ih_ring_entry_size);
> -
> - wptr = (wptr + kfd->device_info->ih_ring_entry_size) %
> - kfd->interrupt_ring_size;
> - smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */
> - atomic_set(&kfd->interrupt_ring_wptr, wptr);
> -
> - return true;
> -}
> -
> -/*
> - * This assumes that it can't be called concurrently with itself
> - * but only with enqueue_ih_ring_entry.
> - */
> -static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
> -{
> - /*
> - * Assume that wait queues have an implicit barrier, i.e. anything that
> - * happened in the ISR before it queued work is visible.
> - */
> -
> - unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
> - unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
> -
> - if (rptr == wptr)
> - return false;
> -
> - memcpy(ih_ring_entry, kfd->interrupt_ring + rptr,
> - kfd->device_info->ih_ring_entry_size);
> -
> - rptr = (rptr + kfd->device_info->ih_ring_entry_size) %
> - kfd->interrupt_ring_size;
> -
> - /*
> - * Ensure the rptr write update is not visible until
> - * memcpy has finished reading.
> - */
> - smp_mb();
> - atomic_set(&kfd->interrupt_ring_rptr, rptr);
> -
> - return true;
> -}
> -
> -static void interrupt_wq(struct work_struct *work)
> -{
> - struct kfd_dev *dev = container_of(work, struct kfd_dev,
> - interrupt_work);
> -
> - uint32_t ih_ring_entry[DIV_ROUND_UP(
> - dev->device_info->ih_ring_entry_size,
> - sizeof(uint32_t))];
> -
> - while (dequeue_ih_ring_entry(dev, ih_ring_entry))
> - ;
> -}
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index f9fb81e..3a6ac90 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -135,22 +135,10 @@ struct kfd_dev {
>
> struct kgd2kfd_shared_resources shared_resources;
>
> - void *interrupt_ring;
> - size_t interrupt_ring_size;
> - atomic_t interrupt_ring_rptr;
> - atomic_t interrupt_ring_wptr;
> - struct work_struct interrupt_work;
> - spinlock_t interrupt_lock;
> -
> /* QCM Device instance */
> struct device_queue_manager *dqm;
>
> bool init_complete;
> - /*
> - * Interrupts of interest to KFD are copied
> - * from the HW ring into a SW ring.
> - */
> - bool interrupts_active;
> };
>
> /* KGD2KFD callbacks */
> @@ -513,10 +501,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
> struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
>
> /* Interrupts */
> -int kfd_interrupt_init(struct kfd_dev *dev);
> -void kfd_interrupt_exit(struct kfd_dev *dev);
> void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
> -bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry);
>
> /* Power Management */
> void kgd2kfd_suspend(struct kfd_dev *kfd);
More information about the dri-devel
mailing list