[PATCH 4/4] drm/amdgpu: busywait KIQ register accessing

Liu, Monk Monk.Liu at amd.com
Mon Oct 16 10:04:55 UTC 2017


alright, go for unify scheme then, use busy wait for all user.

-----Original Message-----
From: Ding, Pixel 
Sent: 2017年10月16日 16:55
To: Liu, Monk <Monk.Liu at amd.com>
Cc: Li, Bingley <Bingley.Li at amd.com>; amd-gfx at lists.freedesktop.org
Subject: Re: [PATCH 4/4] drm/amdgpu: busywait KIQ register accessing

Hi Monk,

So far I notice that it’s almost impossible to have both methods working together.

For example, if the num_fence_mask is 2, and there’re a series of sequence number like “fence seq 1, busy wait seq 2, busy wait seq 3, fence seq 4”. Then if IRQ handler for seq 1 catch the HW seq number as 2, the fence slot for seq 4 will be wrongly waken up.

If only dma_fence slots are used, the emit function ensures that each slot only has one valid seq number by waiting for the old one. But things go complicated if there’s busy waiting which registers seq number at the same time.


— 
Sincerely Yours,
Pixel







On 16/10/2017, 9:25 AM, "Ding, Pixel" <Pixel.Ding at amd.com> wrote:

>OK, I would keep both methods working together.
>>Sincerely Yours,
>Pixel
>
>
>
>
>
>
>
>On 13/10/2017, 6:04 PM, "Liu, Monk" <Monk.Liu at amd.com> wrote:
>
>>Why there will be racing issue ?
>>
>>Polling or sleep wait only have different result for the caller, not the job scheduled to KIQ 
>>
>>The sleep waiting is synchroniz sleep, it just release CPU resource to other process/thread, so the order is guaranteed 
>>
>>BR Monk
>>
>>-----Original Message-----
>>From: Ding, Pixel 
>>Sent: 2017年10月13日 17:39
>>To: Liu, Monk <Monk.Liu at amd.com>; amd-gfx at lists.freedesktop.org
>>Cc: Li, Bingley <Bingley.Li at amd.com>
>>Subject: Re: [PATCH 4/4] drm/amdgpu: busywait KIQ register accessing
>>
>>I’m afraid there’s racing issue if polling and IRQ use cases are mixed at the same time.
>>
>>The original implementation is as your suggested. Is there any benefit to keep to sleepy version?
>>>>Sincerely Yours,
>>Pixel
>>
>>
>>
>>
>>
>>
>>
>>
>>On 13/10/2017, 5:34 PM, "Liu, Monk" <Monk.Liu at amd.com> wrote:
>>
>>>Pixel
>>>
>>>I don't think this will work well, my suggestion is you add a new function like:
>>>amdgpu_wreg_kiq_busy(),
>>>
>>>which will write registers through KIQ and use polling/busy wait, and the original amdgu_wreg_no_kiq() can be still there.
>>>
>>>When you need to disable sleep like in IRQ CONTEXT, you can call wreg_kiq_busy() or wreg_no_kiq(),
>>>
>>>But don't just change the original function 
>>>
>>>BR Monk
>>>
>>>-----Original Message-----
>>>From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf Of Pixel Ding
>>>Sent: 2017年10月13日 16:26
>>>To: amd-gfx at lists.freedesktop.org
>>>Cc: Ding, Pixel <Pixel.Ding at amd.com>; Li, Bingley <Bingley.Li at amd.com>
>>>Subject: [PATCH 4/4] drm/amdgpu: busywait KIQ register accessing
>>>
>>>From: pding <Pixel.Ding at amd.com>
>>>
>>>Register accessing is performed when IRQ is disabled. Never sleep in this function.
>>>
>>>Known issue: dead sleep in many use cases of index/data registers.
>>>
>>>Signed-off-by: pding <Pixel.Ding at amd.com>
>>>---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  8 ++---  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  |  2 +-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    |  1 +
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  1 +
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 55 ++++++++++++++++++------------
>>> 6 files changed, 40 insertions(+), 28 deletions(-)
>>>
>>>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>index ca212ef..f9de756 100644
>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>@@ -886,6 +886,7 @@ struct amdgpu_kiq {
>>> 	u64			eop_gpu_addr;
>>> 	struct amdgpu_bo	*eop_obj;
>>> 	struct mutex		ring_mutex;
>>>+	spinlock_t              ring_lock;
>>> 	struct amdgpu_ring	ring;
>>> 	struct amdgpu_irq_src	irq;
>>> };
>>>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>index ab8f0d6..1197274 100644
>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>@@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,  {
>>> 	uint32_t ret;
>>> 
>>>-	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
>>>-		BUG_ON(in_interrupt());
>>>+	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
>>> 		return amdgpu_virt_kiq_rreg(adev, reg);
>>>-	}
>>> 
>>> 	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
>>> 		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); @@ -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
>>> 		adev->last_mm_index = v;
>>> 	}
>>> 
>>>-	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
>>>-		BUG_ON(in_interrupt());
>>>+	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
>>> 		return amdgpu_virt_kiq_wreg(adev, reg, v);
>>>-	}
>>> 
>>> 	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
>>> 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>>>index 2044758..c6c7bf3 100644
>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>>>@@ -109,7 +109,7 @@ static void amdgpu_fence_write(struct amdgpu_ring *ring, u32 seq)
>>>  * Reads a fence value from memory (all asics).
>>>  * Returns the value of the fence read from memory.
>>>  */
>>>-static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
>>>+u32 amdgpu_fence_read(struct amdgpu_ring *ring)
>>> {
>>> 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
>>> 	u32 seq = 0;
>>>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>>index 4f6c68f..46fa88c 100644
>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>>@@ -186,6 +186,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
>>> 	int r = 0;
>>> 
>>> 	mutex_init(&kiq->ring_mutex);
>>>+	spin_lock_init(&kiq->ring_lock);
>>> 
>>> 	r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
>>> 	if (r)
>>>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>index af8e544..a4fa923 100644
>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>@@ -89,6 +89,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,  void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);  void amdgpu_fence_driver_resume(struct amdgpu_device *adev);  int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
>>>+u32 amdgpu_fence_read(struct amdgpu_ring *ring);
>>> void amdgpu_fence_process(struct amdgpu_ring *ring);  int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);  unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>>index ab05121..9757df1 100644
>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>>@@ -22,7 +22,7 @@
>>>  */
>>> 
>>> #include "amdgpu.h"
>>>-#define MAX_KIQ_REG_WAIT	100000
>>>+#define MAX_KIQ_REG_WAIT	100000000 /* in usecs */
>>> 
>>> int amdgpu_allocate_static_csa(struct amdgpu_device *adev)  { @@ -113,28 +113,32 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
>>> 
>>> uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)  {
>>>-	signed long r;
>>>-	uint32_t val;
>>>-	struct dma_fence *f;
>>>+	signed long timeout = usecs_to_jiffies(MAX_KIQ_REG_WAIT);
>>>+	unsigned long flags;
>>>+	uint32_t val, seq, wait_seq;
>>> 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>>> 	struct amdgpu_ring *ring = &kiq->ring;
>>> 
>>> 	BUG_ON(!ring->funcs->emit_rreg);
>>> 
>>>-	mutex_lock(&kiq->ring_mutex);
>>>+	spin_lock_irqsave(&kiq->ring_lock, flags);
>>> 	amdgpu_ring_alloc(ring, 32);
>>> 	amdgpu_ring_emit_rreg(ring, reg);
>>>-	amdgpu_fence_emit(ring, &f);
>>>+	wait_seq = ++ring->fence_drv.sync_seq;
>>>+	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>>>+			       wait_seq, AMDGPU_FENCE_FLAG_INT);
>>> 	amdgpu_ring_commit(ring);
>>>-	mutex_unlock(&kiq->ring_mutex);
>>>-
>>>-	r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
>>>-	dma_fence_put(f);
>>>-	if (r < 1) {
>>>-		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>>>+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
>>>+	do {
>>>+		seq = amdgpu_fence_read(ring);
>>>+		udelay(5);
>>>+		timeout -= 5;
>>>+	} while (seq < wait_seq && timeout > 0);
>>>+
>>>+	if (timeout < 1) {
>>>+		DRM_ERROR("wait for kiq fence error: %ld\n", timeout);
>>> 		return ~0;
>>> 	}
>>>-
>>> 	val = adev->wb.wb[adev->virt.reg_val_offs];
>>> 
>>> 	return val;
>>>@@ -142,24 +146,33 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>>> 
>>> void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)  {
>>>-	signed long r;
>>>+	signed long timeout = usecs_to_jiffies(MAX_KIQ_REG_WAIT);
>>>+	unsigned long flags;
>>>+	uint32_t seq, wait_seq;
>>> 	struct dma_fence *f;
>>> 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>>> 	struct amdgpu_ring *ring = &kiq->ring;
>>> 
>>> 	BUG_ON(!ring->funcs->emit_wreg);
>>> 
>>>-	mutex_lock(&kiq->ring_mutex);
>>>+	spin_lock_irqsave(&kiq->ring_lock, flags);
>>> 	amdgpu_ring_alloc(ring, 32);
>>> 	amdgpu_ring_emit_wreg(ring, reg, v);
>>>-	amdgpu_fence_emit(ring, &f);
>>>+	/* amdgpu_fence_emit(ring, &f); */
>>>+	wait_seq = ++ring->fence_drv.sync_seq;
>>>+	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>>>+			       wait_seq, AMDGPU_FENCE_FLAG_INT);
>>> 	amdgpu_ring_commit(ring);
>>>-	mutex_unlock(&kiq->ring_mutex);
>>>+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
>>>+
>>>+	do {
>>>+		seq = amdgpu_fence_read(ring);
>>>+		udelay(5);
>>>+		timeout -= 5;
>>>+	} while (seq < wait_seq && timeout > 0);
>>> 
>>>-	r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
>>>-	if (r < 1)
>>>-		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>>>-	dma_fence_put(f);
>>>+	if (timeout < 1)
>>>+		DRM_ERROR("wait for kiq fence error: %ld\n", timeout);
>>> }
>>> 
>>> /**
>>>--
>>>2.9.5
>>>
>>>_______________________________________________
>>>amd-gfx mailing list
>>>amd-gfx at lists.freedesktop.org
>>>https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list