[PATCH] drm/amdgpu: fix and cleanup TLB flushing of GMC9

Mon Oct 22 02:10:13 UTC 2018

>-----Original Message-----
>From: Christian König <ckoenig.leichtzumerken at gmail.com>
>Sent: Friday, October 19, 2018 4:24 PM
>To: amd-gfx at lists.freedesktop.org; Deng, Emily <Emily.Deng at amd.com>;
>Deucher, Alexander <Alexander.Deucher at amd.com>; StDenis, Tom
><Tom.StDenis at amd.com>
>Subject: Re: [PATCH] drm/amdgpu: fix and cleanup TLB flushing of GMC9
>
>Forwarding explicitly to a few people.
>
>Please comment and/or test,
>Christian.
>
>Am 19.10.18 um 09:49 schrieb Christian König:
>> Instead of simplifying the logic this was becoming more and more
>> complicated.
>>
>> Drop all that and just double check if the invalidation request was
>> dropped because of an engine reset.
Could you leave the original part with sriov flag? As it has no issues about sriov, so we don't want to change the logical.

Best wishes
Emily Deng

>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |   4 --
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |   3 +
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 100 +++++++------------------
>------
>>   3 files changed, 26 insertions(+), 81 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index c773ea147489..ae3041d7f82a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -230,10 +230,6 @@ enum amdgpu_kiq_irq {
>>   	AMDGPU_CP_KIQ_IRQ_LAST
>>   };
>>
>> -#define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */
>> -#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
>> -#define MAX_KIQ_REG_TRY 20
>> -
>>   int amdgpu_device_ip_set_clockgating_state(void *dev,
>>   					   enum amd_ip_block_type
>block_type,
>>   					   enum amd_clockgating_state state);
>diff --git
>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> index f2f358aa0597..54cadfc3b849 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> @@ -22,6 +22,9 @@
>>    */
>>
>>   #include "amdgpu.h"
>> +#define MAX_KIQ_REG_WAIT	5000 /* in usecs, 5ms */
>> +#define MAX_KIQ_REG_BAILOUT_INTERVAL	5 /* in msecs, 5ms */
>> +#define MAX_KIQ_REG_TRY 20
>>
>>   uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
>>   {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index f35d7a554ad5..3d072d26211f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -312,48 +312,6 @@ static uint32_t
>gmc_v9_0_get_invalidate_req(unsigned int vmid)
>>   	return req;
>>   }
>>
>> -static signed long  amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device
>*adev,
>> -						  uint32_t reg0, uint32_t reg1,
>> -						  uint32_t ref, uint32_t mask)
>> -{
>> -	signed long r, cnt = 0;
>> -	unsigned long flags;
>> -	uint32_t seq;
>> -	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>> -	struct amdgpu_ring *ring = &kiq->ring;
>> -
>> -	spin_lock_irqsave(&kiq->ring_lock, flags);
>> -
>> -	amdgpu_ring_alloc(ring, 32);
>> -	amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
>> -					    ref, mask);
>> -	amdgpu_fence_emit_polling(ring, &seq);
>> -	amdgpu_ring_commit(ring);
>> -	spin_unlock_irqrestore(&kiq->ring_lock, flags);
>> -
>> -	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
>> -
>> -	/* don't wait anymore for IRQ context */
>> -	if (r < 1 && in_interrupt())
>> -		goto failed_kiq;
>> -
>> -	might_sleep();
>> -
>> -	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
>> -		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
>> -		r = amdgpu_fence_wait_polling(ring, seq,
>MAX_KIQ_REG_WAIT);
>> -	}
>> -
>> -	if (cnt > MAX_KIQ_REG_TRY)
>> -		goto failed_kiq;
>> -
>> -	return 0;
>> -
>> -failed_kiq:
>> -	pr_err("failed to invalidate tlb with kiq\n");
>> -	return r;
>> -}
>> -
>>   /*
>>    * GART
>>    * VMID 0 is the physical GPU addresses as used by the kernel.
>> @@ -369,59 +327,47 @@ static signed long
>amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
>>    *
>>    * Flush the TLB for the requested page table.
>>    */
>> -static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
>> -					uint32_t vmid)
>> +static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
>> +uint32_t vmid)
>>   {
>> +	uint32_t req = gmc_v9_0_get_invalidate_req(vmid);
>> +
>>   	/* Use register 17 for GART */
>>   	const unsigned eng = 17;
>>   	unsigned i, j;
>> -	int r;
>> +
>> +	spin_lock(&adev->gmc.invalidate_lock);
>>
>>   	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
>>   		struct amdgpu_vmhub *hub = &adev->vmhub[i];
>> -		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
>> -
>> -		if (adev->gfx.kiq.ring.ready &&
>> -		    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))
>&&
>> -		    !adev->in_gpu_reset) {
>> -			r = amdgpu_kiq_reg_write_reg_wait(adev, hub-
>>vm_inv_eng0_req + eng,
>> -				hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
>> -			if (!r)
>> -				continue;
>> -		}
>>
>> -		spin_lock(&adev->gmc.invalidate_lock);
>> +		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, req);
>>
>> -		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
>> +		for (j = 0; j < adev->usec_timeout; j++) {
>> +			uint32_t tmp;
>>
>> -		/* Busy wait for ACK.*/
>> -		for (j = 0; j < 100; j++) {
>> +			/* Check if the request is completed */
>>   			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
>eng);
>> -			tmp &= 1 << vmid;
>> -			if (tmp)
>> +			tmp = REG_GET_FIELD(tmp,
>VM_INVALIDATE_ENG0_ACK,
>> +					    PER_VMID_INVALIDATE_ACK);
>> +			if (tmp & 1 << vmid)
>>   				break;
>> -			cpu_relax();
>> -		}
>> -		if (j < 100) {
>> -			spin_unlock(&adev->gmc.invalidate_lock);
>> -			continue;
>> -		}
>>
>> -		/* Wait for ACK with a delay.*/
>> -		for (j = 0; j < adev->usec_timeout; j++) {
>> -			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
>eng);
>> -			tmp &= 1 << vmid;
>> -			if (tmp)
>> +			/* Check if the request is dropped */
>> +			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_req +
>eng);
>> +			tmp = REG_GET_FIELD(tmp,
>VM_INVALIDATE_ENG0_REQ,
>> +					    PER_VMID_INVALIDATE_REQ);
>> +			if (!(tmp & 1 << vmid))
>>   				break;
>> +
>>   			udelay(1);
>>   		}
>> -		if (j < adev->usec_timeout) {
>> -			spin_unlock(&adev->gmc.invalidate_lock);
>> +		if (j < adev->usec_timeout)
>>   			continue;
>> -		}
>> -		spin_unlock(&adev->gmc.invalidate_lock);
>> -		DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>> +
>> +		DRM_ERROR("Timeout waiting for VM flush on HUB %d!\n", i);
>>   	}
>> +
>> +	spin_unlock(&adev->gmc.invalidate_lock);
>>   }
>>
>>   static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring
>> *ring,