[PATCH 9/9] drm/amdgpu: Track pending retry faults in IH and VM

Sat Aug 26 13:36:09 UTC 2017

Am 26.08.2017 um 09:19 schrieb Felix Kuehling:
> IH tracks pending retry faults in a hash table for fast lookup in
> interrupt context. Each VM has a short FIFO of pending VM faults for
> processing in a bottom half.
>
> The IH prescreening stage adds retry faults and filters out repeated
> retry interrupts to minimize the impact of interrupt storms.
>
> It's the VM's responsibility remove pending faults once they are
> handled. For now this is only done when the VM is destroyed.
>
> Change-Id: I0cf15bfc767d06d9d5c3b13ad1ba7bc6aa520947
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>

Acked-by: Christian König <christian.koenig at amd.com>

> ---
>   drivers/gpu/drm/Kconfig                |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 76 +++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | 12 ++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  7 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  7 +++
>   drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 78 +++++++++++++++++++++++++++++++++-
>   6 files changed, 180 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 78d7fc0..f8902dc 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -184,6 +184,7 @@ config DRM_AMDGPU
>   	select BACKLIGHT_CLASS_DEVICE
>   	select BACKLIGHT_LCD_SUPPORT
>   	select INTERVAL_TREE
> +        select CHASH
>   	help
>   	  Choose this option if you have a recent AMD Radeon graphics card.
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> index c834a40..d4d3579 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> @@ -196,3 +196,79 @@ int amdgpu_ih_process(struct amdgpu_device *adev)
>   
>   	return IRQ_HANDLED;
>   }
> +
> +/**
> + * amdgpu_ih_add_fault - Add a page fault record
> + *
> + * @adev: amdgpu device pointer
> + * @key: 64-bit encoding of PASID and address
> + *
> + * This should be called when a retry page fault interrupt is
> + * received. If this is a new page fault, it will be added to a hash
> + * table. The return value indicates whether this is a new fault, or
> + * a fault that was already known and is already being handled.
> + *
> + * If there are too many pending page faults, this will fail. Retry
> + * interrupts should be ignored in this case until there is enough
> + * free space.
> + *
> + * Returns 0 if the fault was added, 1 if the fault was already known,
> + * -ENOSPC if there are too many pending faults.
> + */
> +int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key)
> +{
> +	unsigned long flags;
> +	int r = -ENOSPC;
> +
> +	if (WARN_ON_ONCE(!adev->irq.ih.faults))
> +		/* Should be allocated in <IP>_ih_sw_init on GPUs that
> +		 * support retry faults and require retry filtering.
> +		 */
> +		return r;
> +
> +	spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
> +
> +	/* Only let the hash table fill up to 50% for best performance */
> +	if (adev->irq.ih.faults->count > (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1)))
> +		goto unlock_out;
> +
> +	r = chash_table_copy_in(&adev->irq.ih.faults->hash, key, NULL);
> +	if (!r)
> +		adev->irq.ih.faults->count++;
> +
> +	/* chash_table_copy_in should never fail unless we're losing count */
> +	WARN_ON_ONCE(r < 0);
> +
> +unlock_out:
> +	spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
> +	return r;
> +}
> +
> +/**
> + * amdgpu_ih_clear_fault - Remove a page fault record
> + *
> + * @adev: amdgpu device pointer
> + * @key: 64-bit encoding of PASID and address
> + *
> + * This should be called when a page fault has been handled. Any
> + * future interrupt with this key will be processed as a new
> + * page fault.
> + */
> +void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key)
> +{
> +	unsigned long flags;
> +	int r;
> +
> +	if (!adev->irq.ih.faults)
> +		return;
> +
> +	spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
> +
> +	r = chash_table_remove(&adev->irq.ih.faults->hash, key, NULL);
> +	if (!WARN_ON_ONCE(r < 0)) {
> +		adev->irq.ih.faults->count--;
> +		WARN_ON_ONCE(adev->irq.ih.faults->count < 0);
> +	}
> +
> +	spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> index 3de8e74..d107f1b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> @@ -24,6 +24,8 @@
>   #ifndef __AMDGPU_IH_H__
>   #define __AMDGPU_IH_H__
>   
> +#include <linux/chash.h>
> +
>   struct amdgpu_device;
>    /*
>     * vega10+ IH clients
> @@ -69,6 +71,13 @@ enum amdgpu_ih_clientid
>   
>   #define AMDGPU_IH_CLIENTID_LEGACY 0
>   
> +#define AMDGPU_PAGEFAULT_HASH_BITS 10
> +struct amdgpu_retryfault_hashtable {
> +	DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
> +	spinlock_t	lock;
> +	int		count;
> +};
> +
>   /*
>    * R6xx+ IH ring
>    */
> @@ -87,6 +96,7 @@ struct amdgpu_ih_ring {
>   	bool			use_doorbell;
>   	bool			use_bus_addr;
>   	dma_addr_t		rb_dma_addr; /* only used when use_bus_addr = true */
> +	struct amdgpu_retryfault_hashtable *faults;
>   };
>   
>   #define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4
> @@ -109,5 +119,7 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
>   			bool use_bus_addr);
>   void amdgpu_ih_ring_fini(struct amdgpu_device *adev);
>   int amdgpu_ih_process(struct amdgpu_device *adev);
> +int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key);
> +void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key);
>   
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index c635699..8bdabb3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2622,6 +2622,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		vm->pasid = pasid;
>   	}
>   
> +	INIT_KFIFO(vm->faults);
> +
>   	vm->vm_context = vm_context;
>   	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
>   		mutex_lock(&id_mgr->lock);
> @@ -2688,6 +2690,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   {
>   	struct amdgpu_bo_va_mapping *mapping, *tmp;
>   	bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
> +	u64 fault;
>   	int i;
>   
>   	if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
> @@ -2710,6 +2713,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   		mutex_unlock(&id_mgr->lock);
>   	}
>   
> +	/* Clear pending page faults from IH when the VM is destroyed */
> +	while (kfifo_get(&vm->faults, &fault))
> +		amdgpu_ih_clear_fault(adev, fault);
> +
>   	if (vm->pasid) {
>   		unsigned long flags;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 692b05c..51d3e35 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -117,6 +117,10 @@ struct amdgpu_vm_pt {
>   	unsigned		last_entry_used;
>   };
>   
> +#define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr))
> +#define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48)
> +#define AMDGPU_VM_FAULT_ADDR(fault)  ((u64)(fault) & 0xfffffffff000ULL)
> +
>   struct amdgpu_vm {
>   	/* tree of virtual addresses mapped */
>   	struct rb_root		va;
> @@ -158,6 +162,9 @@ struct amdgpu_vm {
>   
>   	/* Whether this is a Compute or GFX Context */
>   	int			vm_context;
> +
> +	/* Up to 16 pending page faults */
> +	DECLARE_KFIFO(faults, u64, 16);
>   };
>   
>   struct amdgpu_vm_id {
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> index d14a2d5..ae2b84a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> @@ -235,8 +235,73 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev)
>    */
>   static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev)
>   {
> -	/* TODO: Filter known pending page faults */
> +	u32 ring_index = adev->irq.ih.rptr >> 2;
> +	u32 dw0, dw3, dw4, dw5;
> +	u16 pasid;
> +	u64 addr, key;
> +	struct amdgpu_vm *vm;
> +	int r;
> +
> +	dw0 = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
> +	dw3 = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
> +	dw4 = le32_to_cpu(adev->irq.ih.ring[ring_index + 4]);
> +	dw5 = le32_to_cpu(adev->irq.ih.ring[ring_index + 5]);
> +
> +	/* Filter retry page faults, let only the first one pass. If
> +	 * there are too many outstanding faults, ignore them until
> +	 * some faults get cleared.
> +	 */
> +	switch(dw0 & 0xff) {
> +	case AMDGPU_IH_CLIENTID_VMC:
> +	case AMDGPU_IH_CLIENTID_UTCL2:
> +		break;
> +	default:
> +		/* Not a VM fault */
> +		return true;
> +	}
> +
> +	/* Not a retry fault */
> +	if (!(dw5 & 0x80))
> +		return true;
> +
> +	pasid = dw3 & 0xffff;
> +	/* No PASID, can't identify faulting process */
> +	if (!pasid)
> +		return true;
> +
> +	addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12);
> +	key = AMDGPU_VM_FAULT(pasid, addr);
> +	r = amdgpu_ih_add_fault(adev, key);
> +
> +	/* Hash table is full or the fault is already being processed,
> +	 * ignore further page faults
> +	 */
> +	if (r != 0)
> +		goto ignore_iv;
> +
> +	/* Track retry faults in per-VM fault FIFO. */
> +	spin_lock(&adev->vm_manager.pasid_lock);
> +	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
> +	spin_unlock(&adev->vm_manager.pasid_lock);
> +	if (WARN_ON_ONCE(!vm)) {
> +		/* VM not found, process it normally */
> +		amdgpu_ih_clear_fault(adev, key);
> +		return true;
> +	}
> +	/* No locking required with single writer and single reader */
> +	r = kfifo_put(&vm->faults, key);
> +	if (!r) {
> +		/* FIFO is full. Ignore it until there is space */
> +		amdgpu_ih_clear_fault(adev, key);
> +		goto ignore_iv;
> +	}
> +
> +	/* It's the first fault for this address, process it normally */
>   	return true;
> +
> +ignore_iv:
> +	adev->irq.ih.rptr += 32;
> +	return false;
>   }
>   
>   /**
> @@ -323,6 +388,14 @@ static int vega10_ih_sw_init(void *handle)
>   	adev->irq.ih.use_doorbell = true;
>   	adev->irq.ih.doorbell_index = AMDGPU_DOORBELL64_IH << 1;
>   
> +	adev->irq.ih.faults = kmalloc(sizeof(*adev->irq.ih.faults), GFP_KERNEL);
> +	if (!adev->irq.ih.faults)
> +		return -ENOMEM;
> +	INIT_CHASH_TABLE(adev->irq.ih.faults->hash,
> +			 AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
> +	spin_lock_init(&adev->irq.ih.faults->lock);
> +	adev->irq.ih.faults->count = 0;
> +
>   	r = amdgpu_irq_init(adev);
>   
>   	return r;
> @@ -335,6 +408,9 @@ static int vega10_ih_sw_fini(void *handle)
>   	amdgpu_irq_fini(adev);
>   	amdgpu_ih_ring_fini(adev);
>   
> +	kfree(adev->irq.ih.faults);
> +	adev->irq.ih.faults = NULL;
> +
>   	return 0;
>   }
>