[PATCH 3/3] drm/amdgpu: busywait KIQ register accessing v2
Christian König
christian.koenig at amd.com
Tue Oct 17 08:20:14 UTC 2017
Am 17.10.2017 um 08:37 schrieb Pixel Ding:
> From: pding <Pixel.Ding at amd.com>
>
> Register accessing is performed when IRQ is disabled. Never sleep in
> this function.
>
> Known issue: dead sleep in many use cases of index/data registers.
>
> v2: wrap polling fence functions. don't trigger IRQ for polling in
> case of wrongly fence signal.
>
> Signed-off-by: pding <Pixel.Ding at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 4 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 4 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 +---
> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 53 +++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 30 ++++++-------
> 8 files changed, 78 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index ca212ef..cc06e62 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -885,7 +885,7 @@ struct amdgpu_mec {
> struct amdgpu_kiq {
> u64 eop_gpu_addr;
> struct amdgpu_bo *eop_obj;
> - struct mutex ring_mutex;
> + spinlock_t ring_lock;
> struct amdgpu_ring ring;
> struct amdgpu_irq_src irq;
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> index 9d9965d..6d83573 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> @@ -788,7 +788,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
> struct dma_fence *f;
> struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>
> - mutex_lock(&adev->gfx.kiq.ring_mutex);
> + spin_lock(&adev->gfx.kiq.ring_lock);
> amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
> amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> amdgpu_ring_write(ring,
> @@ -796,7 +796,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
> PACKET3_INVALIDATE_TLBS_PASID(pasid));
> amdgpu_fence_emit(ring, &f);
> amdgpu_ring_commit(ring);
> - mutex_unlock(&adev->gfx.kiq.ring_mutex);
> + spin_unlock(&adev->gfx.kiq.ring_lock);
>
> r = dma_fence_wait(f, false);
> if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index edbae19..c92217f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -973,7 +973,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
> struct dma_fence *f;
> struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>
> - mutex_lock(&adev->gfx.kiq.ring_mutex);
> + spin_lock(&adev->gfx.kiq.ring_lock);
> amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
> amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> amdgpu_ring_write(ring,
> @@ -983,7 +983,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
> PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(2));
> amdgpu_fence_emit(ring, &f);
> amdgpu_ring_commit(ring);
> - mutex_unlock(&adev->gfx.kiq.ring_mutex);
> + spin_unlock(&adev->gfx.kiq.ring_lock);
>
> r = dma_fence_wait(f, false);
> if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index ab8f0d6..1197274 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
> {
> uint32_t ret;
>
> - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
> - BUG_ON(in_interrupt());
> + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
> return amdgpu_virt_kiq_rreg(adev, reg);
> - }
>
> if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
> ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
> @@ -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
> adev->last_mm_index = v;
> }
>
> - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
> - BUG_ON(in_interrupt());
> + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
> return amdgpu_virt_kiq_wreg(adev, reg, v);
> - }
>
> if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
> writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index 688740e..68a5e90 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -169,6 +169,32 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
> }
>
> /**
> + * amdgpu_fence_emit_polling - emit a fence on the requeste ring
> + *
> + * @ring: ring the fence is associated with
> + * @s: resulting sequence number
> + *
> + * Emits a fence command on the requested ring (all asics).
> + * Used For polling fence.
> + * Returns 0 on success, -ENOMEM on failure.
> + */
> +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
> +{
> + uint32_t seq;
> +
> + if (!s)
> + return -EINVAL;
> +
> + seq = ++ring->fence_drv.sync_seq;
> + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
> + seq, AMDGPU_FENCE_FLAG_INT);
> +
> + *s = seq;
> +
> + return 0;
> +}
> +
> +/**
> * amdgpu_fence_schedule_fallback - schedule fallback check
> *
> * @ring: pointer to struct amdgpu_ring
> @@ -281,6 +307,33 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
> return r;
> }
>
Please add some documentation here that timeout is in usec.
> +signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
> + uint32_t wait_seq,
> + signed long timeout)
> +{
> + uint32_t seq, last_seq;
> + struct amdgpu_fence_driver *drv = &ring->fence_drv;
> +
> + last_seq = atomic_read(&ring->fence_drv.last_seq);
> +
> + do {
> + seq = amdgpu_fence_read(ring);
> +
> + if (unlikely(seq == last_seq))
> + break;
That doesn't look correct to me, it will abort the busy wait as soon as
we see the last value we have seen.
> + if (seq >= wait_seq && wait_seq >= last_seq)
> + break;
> + if (seq <= last_seq &&
> + (wait_seq <= seq || wait_seq >= last_seq))
> + break;
Why not just "(int32_t)(wait_seq - seq) > 0" ? IIRC that should be
sufficient for a wrap around test.
Regards,
Christian.
> + udelay(5);
> + timeout -= 5;
> + } while (timeout > 0);
> +
> + atomic_cmpxchg(&drv->last_seq, last_seq, wait_seq);
> +
> + return timeout;
> +}
> /**
> * amdgpu_fence_count_emitted - get the count of emitted fences
> *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 4f6c68f..e5a9077 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -185,7 +185,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
> struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> int r = 0;
>
> - mutex_init(&kiq->ring_mutex);
> + spin_lock_init(&kiq->ring_lock);
>
> r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
> if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index af8e544..9de89ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -89,8 +89,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
> void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
> void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
> int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
> +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
> void amdgpu_fence_process(struct amdgpu_ring *ring);
> int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
> +signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
> + uint32_t wait_seq,
> + signed long timeout);
> unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
>
> /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index ab05121..177fe10 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -22,7 +22,7 @@
> */
>
> #include "amdgpu.h"
> -#define MAX_KIQ_REG_WAIT 100000
> +#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */
>
> int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
> {
> @@ -114,27 +114,24 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
> uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
> {
> signed long r;
> - uint32_t val;
> - struct dma_fence *f;
> + uint32_t val, seq;
> struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> struct amdgpu_ring *ring = &kiq->ring;
>
> BUG_ON(!ring->funcs->emit_rreg);
>
> - mutex_lock(&kiq->ring_mutex);
> + spin_lock(&kiq->ring_lock);
> amdgpu_ring_alloc(ring, 32);
> amdgpu_ring_emit_rreg(ring, reg);
> - amdgpu_fence_emit(ring, &f);
> + amdgpu_fence_emit_polling(ring, &seq);
> amdgpu_ring_commit(ring);
> - mutex_unlock(&kiq->ring_mutex);
> + spin_unlock(&kiq->ring_lock);
>
> - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
> - dma_fence_put(f);
> + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
> if (r < 1) {
> - DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> + DRM_ERROR("wait for kiq fence error: %ld\n", r);
> return ~0;
> }
> -
> val = adev->wb.wb[adev->virt.reg_val_offs];
>
> return val;
> @@ -143,23 +140,22 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
> void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
> {
> signed long r;
> - struct dma_fence *f;
> + uint32_t seq;
> struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> struct amdgpu_ring *ring = &kiq->ring;
>
> BUG_ON(!ring->funcs->emit_wreg);
>
> - mutex_lock(&kiq->ring_mutex);
> + spin_lock(&kiq->ring_lock);
> amdgpu_ring_alloc(ring, 32);
> amdgpu_ring_emit_wreg(ring, reg, v);
> - amdgpu_fence_emit(ring, &f);
> + amdgpu_fence_emit_polling(ring, &seq);
> amdgpu_ring_commit(ring);
> - mutex_unlock(&kiq->ring_mutex);
> + spin_unlock(&kiq->ring_lock);
>
> - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
> + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
> if (r < 1)
> - DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> - dma_fence_put(f);
> + DRM_ERROR("wait for kiq fence error: %ld\n", r);
> }
>
> /**
More information about the amd-gfx
mailing list