[PATCH 4/4] drm/amdgpu: busywait KIQ register accessing

Ding, Pixel Pixel.Ding at amd.com
Fri Oct 13 09:35:47 UTC 2017


On 13/10/2017, 5:16 PM, "Christian König" <ckoenig.leichtzumerken at gmail.com> wrote:



>Am 13.10.2017 um 10:26 schrieb Pixel Ding:
>> From: pding <Pixel.Ding at amd.com>
>>
>> Register accessing is performed when IRQ is disabled. Never sleep in
>> this function.
>>
>> Known issue: dead sleep in many use cases of index/data registers.
>>
>> Signed-off-by: pding <Pixel.Ding at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  8 ++---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 55 ++++++++++++++++++------------
>>   6 files changed, 40 insertions(+), 28 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index ca212ef..f9de756 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -886,6 +886,7 @@ struct amdgpu_kiq {
>>   	u64			eop_gpu_addr;
>>   	struct amdgpu_bo	*eop_obj;
>>   	struct mutex		ring_mutex;
>
>You can remove the ring_mutex if you don't use it any more.
[Pixel] KFD still use the mutex, I didn’t change it also to spin lock, should I?
>
>> +	spinlock_t              ring_lock;
>>   	struct amdgpu_ring	ring;
>>   	struct amdgpu_irq_src	irq;
>>   };
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index ab8f0d6..1197274 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
>>   {
>>   	uint32_t ret;
>>   
>> -	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
>> -		BUG_ON(in_interrupt());
>> +	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
>>   		return amdgpu_virt_kiq_rreg(adev, reg);
>> -	}
>>   
>>   	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
>>   		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
>> @@ -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
>>   		adev->last_mm_index = v;
>>   	}
>>   
>> -	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
>> -		BUG_ON(in_interrupt());
>> +	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
>>   		return amdgpu_virt_kiq_wreg(adev, reg, v);
>> -	}
>>   
>>   	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
>>   		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> index 2044758..c6c7bf3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> @@ -109,7 +109,7 @@ static void amdgpu_fence_write(struct amdgpu_ring *ring, u32 seq)
>>    * Reads a fence value from memory (all asics).
>>    * Returns the value of the fence read from memory.
>>    */
>> -static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
>> +u32 amdgpu_fence_read(struct amdgpu_ring *ring)
>>   {
>>   	struct amdgpu_fence_driver *drv = &ring->fence_drv;
>>   	u32 seq = 0;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> index 4f6c68f..46fa88c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> @@ -186,6 +186,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
>>   	int r = 0;
>>   
>>   	mutex_init(&kiq->ring_mutex);
>> +	spin_lock_init(&kiq->ring_lock);
>>   
>>   	r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
>>   	if (r)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> index af8e544..a4fa923 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> @@ -89,6 +89,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
>>   void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
>>   void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
>>   int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
>> +u32 amdgpu_fence_read(struct amdgpu_ring *ring);
>>   void amdgpu_fence_process(struct amdgpu_ring *ring);
>>   int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
>>   unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> index ab05121..9757df1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> @@ -22,7 +22,7 @@
>>    */
>>   
>>   #include "amdgpu.h"
>> -#define MAX_KIQ_REG_WAIT	100000
>> +#define MAX_KIQ_REG_WAIT	100000000 /* in usecs */
>You want to busy wait 100 seconds for an result?
>
>Forget it that is way to long you will certainly run into problems with 
>the NMI much earlier.
>[Pixel] thanks, would change it to 10s.
>
>>   
>>   int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
>>   {
>> @@ -113,28 +113,32 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
>>   
>>   uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>>   {
>> -	signed long r;
>> -	uint32_t val;
>> -	struct dma_fence *f;
>> +	signed long timeout = usecs_to_jiffies(MAX_KIQ_REG_WAIT);
>> +	unsigned long flags;
>> +	uint32_t val, seq, wait_seq;
>>   	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>>   	struct amdgpu_ring *ring = &kiq->ring;
>>   
>>   	BUG_ON(!ring->funcs->emit_rreg);
>>   
>> -	mutex_lock(&kiq->ring_mutex);
>> +	spin_lock_irqsave(&kiq->ring_lock, flags);
>>   	amdgpu_ring_alloc(ring, 32);
>>   	amdgpu_ring_emit_rreg(ring, reg);
>> -	amdgpu_fence_emit(ring, &f);
>> +	wait_seq = ++ring->fence_drv.sync_seq;
>> +	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>> +			       wait_seq, AMDGPU_FENCE_FLAG_INT);
>
>Do you still need to send an interrupt when you busy wait? I don't think so.
[Pixel]I want to let the IRQ handler to update the last_seq in case of that KIQ is used by other functionality with fence. Is it necessary?
>
>>   	amdgpu_ring_commit(ring);
>> -	mutex_unlock(&kiq->ring_mutex);
>> -
>> -	r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
>> -	dma_fence_put(f);
>> -	if (r < 1) {
>> -		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>> +	spin_unlock_irqrestore(&kiq->ring_lock, flags);
>> +	do {
>> +		seq = amdgpu_fence_read(ring);
>> +		udelay(5);
>> +		timeout -= 5;
>> +	} while (seq < wait_seq && timeout > 0);
>
>You need to correctly handle sequence wrap around here.
[Pixel] yes thanks, but I didn’t see similar case handling in normal fence handling codepath, Is there? So I assumed 32bit fence seq could not overflow for years.
>
>> +
>> +	if (timeout < 1) {
>> +		DRM_ERROR("wait for kiq fence error: %ld\n", timeout);
>>   		return ~0;
>>   	}
>> -
>>   	val = adev->wb.wb[adev->virt.reg_val_offs];
>>   
>>   	return val;
>> @@ -142,24 +146,33 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>>   
>>   void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
>>   {
>> -	signed long r;
>> +	signed long timeout = usecs_to_jiffies(MAX_KIQ_REG_WAIT);
>> +	unsigned long flags;
>> +	uint32_t seq, wait_seq;
>>   	struct dma_fence *f;
>>   	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>>   	struct amdgpu_ring *ring = &kiq->ring;
>>   
>>   	BUG_ON(!ring->funcs->emit_wreg);
>>   
>> -	mutex_lock(&kiq->ring_mutex);
>> +	spin_lock_irqsave(&kiq->ring_lock, flags);
>>   	amdgpu_ring_alloc(ring, 32);
>>   	amdgpu_ring_emit_wreg(ring, reg, v);
>> -	amdgpu_fence_emit(ring, &f);
>> +	/* amdgpu_fence_emit(ring, &f); */
>> +	wait_seq = ++ring->fence_drv.sync_seq;
>> +	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>> +			       wait_seq, AMDGPU_FENCE_FLAG_INT);
>>   	amdgpu_ring_commit(ring);
>> -	mutex_unlock(&kiq->ring_mutex);
>> +	spin_unlock_irqrestore(&kiq->ring_lock, flags);
>> +
>> +	do {
>> +		seq = amdgpu_fence_read(ring);
>> +		udelay(5);
>> +		timeout -= 5;
>> +	} while (seq < wait_seq && timeout > 0);
>
>Same here. It would probably be better to have a function for this in 
>amdgpu_fence.c
[Pixel] get it, will add a wrapper.
>
>Regards,
>Christian.
>
>>   
>> -	r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
>> -	if (r < 1)
>> -		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>> -	dma_fence_put(f);
>> +	if (timeout < 1)
>> +		DRM_ERROR("wait for kiq fence error: %ld\n", timeout);
>>   }
>>   
>>   /**
>
>
>>Sincerely Yours,
>Pixel
>
>
>
>


More information about the amd-gfx mailing list