[PATCH v2] amdkfd: Drop interrupt SW ring buffer

Thu Jan 8 03:24:40 PST 2015

Am 08.01.2015 um 05:27 schrieb Michel Dänzer:
> From: Michel Dänzer <michel.daenzer at amd.com>
>
> The work queue couldn't reliably prevent the SW ring buffer from
> overflowing, so dmesg was spammed by
>
>   kfd kfd: Interrupt ring overflow, dropping interrupt.
>
> messages when running e.g. the Atlantis Substance demo from
> https://wiki.unrealengine.com/Linux_Demos on Kaveri.
>
> Since the SW ring buffer doesn't actually do anything at this point, just
> remove it for now. When actual interrupt processing code is added to
> amdkfd, it should try to do things immediately and only defer to work
> queues when necessary.
>
> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>

Looks reasonable to me. Patch is Reviewed-by: Christian König 
<christian.koenig at amd.com>

> ---
>
> v2: Clarify what the problem is and how it can be reproduced
>
>   drivers/gpu/drm/amd/amdkfd/Makefile        |   3 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_device.c    |  20 +---
>   drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 176 -----------------------------
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h      |  15 ---
>   4 files changed, 2 insertions(+), 212 deletions(-)
>   delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
> index be6246d..307a309 100644
> --- a/drivers/gpu/drm/amd/amdkfd/Makefile
> +++ b/drivers/gpu/drm/amd/amdkfd/Makefile
> @@ -8,7 +8,6 @@ amdkfd-y	:= kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
>   		kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
>   		kfd_process.o kfd_queue.o kfd_mqd_manager.o \
>   		kfd_kernel_queue.o kfd_packet_manager.o \
> -		kfd_process_queue_manager.o kfd_device_queue_manager.o \
> -		kfd_interrupt.o
> +		kfd_process_queue_manager.o kfd_device_queue_manager.o
>   
>   obj-$(CONFIG_HSA_AMD)	+= amdkfd.o
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 43884eb..633532a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -192,13 +192,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>   		goto kfd_topology_add_device_error;
>   	}
>   
> -	if (kfd_interrupt_init(kfd)) {
> -		dev_err(kfd_device,
> -			"Error initializing interrupts for device (%x:%x)\n",
> -			kfd->pdev->vendor, kfd->pdev->device);
> -		goto kfd_interrupt_error;
> -	}
> -
>   	if (!device_iommu_pasid_init(kfd)) {
>   		dev_err(kfd_device,
>   			"Error initializing iommuv2 for device (%x:%x)\n",
> @@ -237,8 +230,6 @@ dqm_start_error:
>   device_queue_manager_error:
>   	amd_iommu_free_device(kfd->pdev);
>   device_iommu_pasid_error:
> -	kfd_interrupt_exit(kfd);
> -kfd_interrupt_error:
>   	kfd_topology_remove_device(kfd);
>   kfd_topology_add_device_error:
>   	kfd2kgd->fini_sa_manager(kfd->kgd);
> @@ -254,7 +245,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
>   	if (kfd->init_complete) {
>   		device_queue_manager_uninit(kfd->dqm);
>   		amd_iommu_free_device(kfd->pdev);
> -		kfd_interrupt_exit(kfd);
>   		kfd_topology_remove_device(kfd);
>   	}
>   
> @@ -296,13 +286,5 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
>   /* This is called directly from KGD at ISR. */
>   void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
>   {
> -	if (kfd->init_complete) {
> -		spin_lock(&kfd->interrupt_lock);
> -
> -		if (kfd->interrupts_active
> -		    && enqueue_ih_ring_entry(kfd, ih_ring_entry))
> -			schedule_work(&kfd->interrupt_work);
> -
> -		spin_unlock(&kfd->interrupt_lock);
> -	}
> +	/* Process interrupts / schedule work as necessary */
>   }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
> deleted file mode 100644
> index 5b99909..0000000
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
> +++ /dev/null
> @@ -1,176 +0,0 @@
> -/*
> - * Copyright 2014 Advanced Micro Devices, Inc.
> - *
> - * Permission is hereby granted, free of charge, to any person obtaining a
> - * copy of this software and associated documentation files (the "Software"),
> - * to deal in the Software without restriction, including without limitation
> - * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> - * and/or sell copies of the Software, and to permit persons to whom the
> - * Software is furnished to do so, subject to the following conditions:
> - *
> - * The above copyright notice and this permission notice shall be included in
> - * all copies or substantial portions of the Software.
> - *
> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> - * OTHER DEALINGS IN THE SOFTWARE.
> - */
> -
> -/*
> - * KFD Interrupts.
> - *
> - * AMD GPUs deliver interrupts by pushing an interrupt description onto the
> - * interrupt ring and then sending an interrupt. KGD receives the interrupt
> - * in ISR and sends us a pointer to each new entry on the interrupt ring.
> - *
> - * We generally can't process interrupt-signaled events from ISR, so we call
> - * out to each interrupt client module (currently only the scheduler) to ask if
> - * each interrupt is interesting. If they return true, then it requires further
> - * processing so we copy it to an internal interrupt ring and call each
> - * interrupt client again from a work-queue.
> - *
> - * There's no acknowledgment for the interrupts we use. The hardware simply
> - * queues a new interrupt each time without waiting.
> - *
> - * The fixed-size internal queue means that it's possible for us to lose
> - * interrupts because we have no back-pressure to the hardware.
> - */
> -
> -#include <linux/slab.h>
> -#include <linux/device.h>
> -#include "kfd_priv.h"
> -
> -#define KFD_INTERRUPT_RING_SIZE 256
> -
> -static void interrupt_wq(struct work_struct *);
> -
> -int kfd_interrupt_init(struct kfd_dev *kfd)
> -{
> -	void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE,
> -					kfd->device_info->ih_ring_entry_size,
> -					GFP_KERNEL);
> -	if (!interrupt_ring)
> -		return -ENOMEM;
> -
> -	kfd->interrupt_ring = interrupt_ring;
> -	kfd->interrupt_ring_size =
> -		KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size;
> -	atomic_set(&kfd->interrupt_ring_wptr, 0);
> -	atomic_set(&kfd->interrupt_ring_rptr, 0);
> -
> -	spin_lock_init(&kfd->interrupt_lock);
> -
> -	INIT_WORK(&kfd->interrupt_work, interrupt_wq);
> -
> -	kfd->interrupts_active = true;
> -
> -	/*
> -	 * After this function returns, the interrupt will be enabled. This
> -	 * barrier ensures that the interrupt running on a different processor
> -	 * sees all the above writes.
> -	 */
> -	smp_wmb();
> -
> -	return 0;
> -}
> -
> -void kfd_interrupt_exit(struct kfd_dev *kfd)
> -{
> -	/*
> -	 * Stop the interrupt handler from writing to the ring and scheduling
> -	 * workqueue items. The spinlock ensures that any interrupt running
> -	 * after we have unlocked sees interrupts_active = false.
> -	 */
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&kfd->interrupt_lock, flags);
> -	kfd->interrupts_active = false;
> -	spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
> -
> -	/*
> -	 * Flush_scheduled_work ensures that there are no outstanding
> -	 * work-queue items that will access interrupt_ring. New work items
> -	 * can't be created because we stopped interrupt handling above.
> -	 */
> -	flush_scheduled_work();
> -
> -	kfree(kfd->interrupt_ring);
> -}
> -
> -/*
> - * This assumes that it can't be called concurrently with itself
> - * but only with dequeue_ih_ring_entry.
> - */
> -bool enqueue_ih_ring_entry(struct kfd_dev *kfd,	const void *ih_ring_entry)
> -{
> -	unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
> -	unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
> -
> -	if ((rptr - wptr) % kfd->interrupt_ring_size ==
> -					kfd->device_info->ih_ring_entry_size) {
> -		/* This is very bad, the system is likely to hang. */
> -		dev_err_ratelimited(kfd_chardev(),
> -			"Interrupt ring overflow, dropping interrupt.\n");
> -		return false;
> -	}
> -
> -	memcpy(kfd->interrupt_ring + wptr, ih_ring_entry,
> -			kfd->device_info->ih_ring_entry_size);
> -
> -	wptr = (wptr + kfd->device_info->ih_ring_entry_size) %
> -			kfd->interrupt_ring_size;
> -	smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */
> -	atomic_set(&kfd->interrupt_ring_wptr, wptr);
> -
> -	return true;
> -}
> -
> -/*
> - * This assumes that it can't be called concurrently with itself
> - * but only with enqueue_ih_ring_entry.
> - */
> -static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
> -{
> -	/*
> -	 * Assume that wait queues have an implicit barrier, i.e. anything that
> -	 * happened in the ISR before it queued work is visible.
> -	 */
> -
> -	unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
> -	unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
> -
> -	if (rptr == wptr)
> -		return false;
> -
> -	memcpy(ih_ring_entry, kfd->interrupt_ring + rptr,
> -			kfd->device_info->ih_ring_entry_size);
> -
> -	rptr = (rptr + kfd->device_info->ih_ring_entry_size) %
> -			kfd->interrupt_ring_size;
> -
> -	/*
> -	 * Ensure the rptr write update is not visible until
> -	 * memcpy has finished reading.
> -	 */
> -	smp_mb();
> -	atomic_set(&kfd->interrupt_ring_rptr, rptr);
> -
> -	return true;
> -}
> -
> -static void interrupt_wq(struct work_struct *work)
> -{
> -	struct kfd_dev *dev = container_of(work, struct kfd_dev,
> -						interrupt_work);
> -
> -	uint32_t ih_ring_entry[DIV_ROUND_UP(
> -				dev->device_info->ih_ring_entry_size,
> -				sizeof(uint32_t))];
> -
> -	while (dequeue_ih_ring_entry(dev, ih_ring_entry))
> -		;
> -}
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index f9fb81e..3a6ac90 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -135,22 +135,10 @@ struct kfd_dev {
>   
>   	struct kgd2kfd_shared_resources shared_resources;
>   
> -	void *interrupt_ring;
> -	size_t interrupt_ring_size;
> -	atomic_t interrupt_ring_rptr;
> -	atomic_t interrupt_ring_wptr;
> -	struct work_struct interrupt_work;
> -	spinlock_t interrupt_lock;
> -
>   	/* QCM Device instance */
>   	struct device_queue_manager *dqm;
>   
>   	bool init_complete;
> -	/*
> -	 * Interrupts of interest to KFD are copied
> -	 * from the HW ring into a SW ring.
> -	 */
> -	bool interrupts_active;
>   };
>   
>   /* KGD2KFD callbacks */
> @@ -513,10 +501,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
>   struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
>   
>   /* Interrupts */
> -int kfd_interrupt_init(struct kfd_dev *dev);
> -void kfd_interrupt_exit(struct kfd_dev *dev);
>   void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
> -bool enqueue_ih_ring_entry(struct kfd_dev *kfd,	const void *ih_ring_entry);
>   
>   /* Power Management */
>   void kgd2kfd_suspend(struct kfd_dev *kfd);