[PATCH 04/12] drm/amdgpu: move IV prescreening into the GMC code
Christian König
ckoenig.leichtzumerken at gmail.com
Wed Oct 10 07:08:01 UTC 2018
Yeah, exactly my thinking.
Basically the long term goal is to move most of the reporting and
handling of faults into amdgpu_gmc.c. Otherwise we would duplicate a lot
of handling for future hw generations.
On the other hand if the approach with the second IH ring buffer works
as expected we most likely won't need the pre-screening anymore at all.
But that needs more work to be 100% sure.
Christian.
Am 10.10.2018 um 01:46 schrieb Felix Kuehling:
> I realized that most of the code in gmc_v9_0_psescreen_iv is not
> actually hardware-specific. If it was not prescreening, but using an
> amdgpu_iv_entry that was already parsed, I think it could just be a
> generic function for processing retry faults:
>
> * looking up the VM of a fault
> * storing retry faults in a per-VM fifo
> * dropping faults that have already been seen
>
> In other words, it's just a generic top half interrupt handler for retry
> faults while the bottom half (worker thread) would use the per-VM FIFOs
> to handle those pending retry faults.
>
> Regards,
> Felix
>
>
> On 2018-09-26 09:53 AM, Christian König wrote:
>> The GMC/VM subsystem is causing the faults, so move the handling here as
>> well.
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 59 +++++++++++++++++++++++++++++
>> drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 69 ----------------------------------
>> 2 files changed, 59 insertions(+), 69 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 729a2c230f91..f8d69ab85fc3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -244,6 +244,62 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
>> return 0;
>> }
>>
>> +/**
>> + * vega10_ih_prescreen_iv - prescreen an interrupt vector
>> + *
>> + * @adev: amdgpu_device pointer
>> + *
>> + * Returns true if the interrupt vector should be further processed.
>> + */
>> +static bool gmc_v9_0_prescreen_iv(struct amdgpu_device *adev,
>> + struct amdgpu_iv_entry *entry,
>> + uint64_t addr)
>> +{
>> + struct amdgpu_vm *vm;
>> + u64 key;
>> + int r;
>> +
>> + /* No PASID, can't identify faulting process */
>> + if (!entry->pasid)
>> + return true;
>> +
>> + /* Not a retry fault */
>> + if (!(entry->src_data[1] & 0x80))
>> + return true;
>> +
>> + /* Track retry faults in per-VM fault FIFO. */
>> + spin_lock(&adev->vm_manager.pasid_lock);
>> + vm = idr_find(&adev->vm_manager.pasid_idr, entry->pasid);
>> + if (!vm) {
>> + /* VM not found, process it normally */
>> + spin_unlock(&adev->vm_manager.pasid_lock);
>> + return true;
>> + }
>> +
>> + key = AMDGPU_VM_FAULT(entry->pasid, addr);
>> + r = amdgpu_vm_add_fault(vm->fault_hash, key);
>> +
>> + /* Hash table is full or the fault is already being processed,
>> + * ignore further page faults
>> + */
>> + if (r != 0) {
>> + spin_unlock(&adev->vm_manager.pasid_lock);
>> + return false;
>> + }
>> + /* No locking required with single writer and single reader */
>> + r = kfifo_put(&vm->faults, key);
>> + if (!r) {
>> + /* FIFO is full. Ignore it until there is space */
>> + amdgpu_vm_clear_fault(vm->fault_hash, key);
>> + spin_unlock(&adev->vm_manager.pasid_lock);
>> + return false;
>> + }
>> +
>> + spin_unlock(&adev->vm_manager.pasid_lock);
>> + /* It's the first fault for this address, process it normally */
>> + return true;
>> +}
>> +
>> static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
>> struct amdgpu_irq_src *source,
>> struct amdgpu_iv_entry *entry)
>> @@ -255,6 +311,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
>> addr = (u64)entry->src_data[0] << 12;
>> addr |= ((u64)entry->src_data[1] & 0xf) << 44;
>>
>> + if (!gmc_v9_0_prescreen_iv(adev, entry, addr))
>> + return 1;
>> +
>> if (!amdgpu_sriov_vf(adev)) {
>> status = RREG32(hub->vm_l2_pro_fault_status);
>> WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>> index 0f50bef87163..0f68a0cd1fbf 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>> @@ -228,76 +228,7 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev)
>> */
>> static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev)
>> {
>> - u32 ring_index = adev->irq.ih.rptr >> 2;
>> - u32 dw0, dw3, dw4, dw5;
>> - u16 pasid;
>> - u64 addr, key;
>> - struct amdgpu_vm *vm;
>> - int r;
>> -
>> - dw0 = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
>> - dw3 = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
>> - dw4 = le32_to_cpu(adev->irq.ih.ring[ring_index + 4]);
>> - dw5 = le32_to_cpu(adev->irq.ih.ring[ring_index + 5]);
>> -
>> - /* Filter retry page faults, let only the first one pass. If
>> - * there are too many outstanding faults, ignore them until
>> - * some faults get cleared.
>> - */
>> - switch (dw0 & 0xff) {
>> - case SOC15_IH_CLIENTID_VMC:
>> - case SOC15_IH_CLIENTID_UTCL2:
>> - break;
>> - default:
>> - /* Not a VM fault */
>> - return true;
>> - }
>> -
>> - pasid = dw3 & 0xffff;
>> - /* No PASID, can't identify faulting process */
>> - if (!pasid)
>> - return true;
>> -
>> - /* Not a retry fault */
>> - if (!(dw5 & 0x80))
>> - return true;
>> -
>> - /* Track retry faults in per-VM fault FIFO. */
>> - spin_lock(&adev->vm_manager.pasid_lock);
>> - vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>> - addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12);
>> - key = AMDGPU_VM_FAULT(pasid, addr);
>> - if (!vm) {
>> - /* VM not found, process it normally */
>> - spin_unlock(&adev->vm_manager.pasid_lock);
>> - return true;
>> - } else {
>> - r = amdgpu_vm_add_fault(vm->fault_hash, key);
>> -
>> - /* Hash table is full or the fault is already being processed,
>> - * ignore further page faults
>> - */
>> - if (r != 0) {
>> - spin_unlock(&adev->vm_manager.pasid_lock);
>> - goto ignore_iv;
>> - }
>> - }
>> - /* No locking required with single writer and single reader */
>> - r = kfifo_put(&vm->faults, key);
>> - if (!r) {
>> - /* FIFO is full. Ignore it until there is space */
>> - amdgpu_vm_clear_fault(vm->fault_hash, key);
>> - spin_unlock(&adev->vm_manager.pasid_lock);
>> - goto ignore_iv;
>> - }
>> -
>> - spin_unlock(&adev->vm_manager.pasid_lock);
>> - /* It's the first fault for this address, process it normally */
>> return true;
>> -
>> -ignore_iv:
>> - adev->irq.ih.rptr += 32;
>> - return false;
>> }
>>
>> /**
More information about the amd-gfx
mailing list