[PATCH] drm/amdgpu: refine kiq read register
Christian König
ckoenig.leichtzumerken at gmail.com
Sun Apr 19 17:03:17 UTC 2020
Am 17.04.20 um 17:39 schrieb Felix Kuehling:
> Am 2020-04-17 um 2:53 a.m. schrieb Yintian Tao:
>> According to the current kiq read register method,
>> there will be race condition when using KIQ to read
>> register if multiple clients want to read at same time
>> just like the expample below:
>> 1. client-A start to read REG-0 throguh KIQ
>> 2. client-A poll the seqno-0
>> 3. client-B start to read REG-1 through KIQ
>> 4. client-B poll the seqno-1
>> 5. the kiq complete these two read operation
>> 6. client-A to read the register at the wb buffer and
>> get REG-1 value
>>
>> Therefore, directly make kiq write the register value at
>> the ring buffer then there will be no race condition for
>> the wb buffer.
>>
>> v2: supply the read_clock and move the reg_val_offs back
>>
>> Signed-off-by: Yintian Tao <yttao at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 11 ++++------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 -
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++--
>> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 14 +++++-------
>> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 14 +++++-------
>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 28 ++++++++++++------------
>> 6 files changed, 33 insertions(+), 40 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> index ea576b4260a4..4e1c0239e561 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> @@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
>>
>> spin_lock_init(&kiq->ring_lock);
>>
>> - r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs);
>> - if (r)
>> - return r;
>> -
>> ring->adev = NULL;
>> ring->ring_obj = NULL;
>> ring->use_doorbell = true;
>> @@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
>>
>> void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
>> {
>> - amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs);
>> amdgpu_ring_fini(ring);
>> }
>>
>> @@ -675,12 +670,14 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>> uint32_t seq;
>> struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>> struct amdgpu_ring *ring = &kiq->ring;
>> + uint64_t reg_val_offs = 0;
>>
>> BUG_ON(!ring->funcs->emit_rreg);
>>
>> spin_lock_irqsave(&kiq->ring_lock, flags);
>> amdgpu_ring_alloc(ring, 32);
>> - amdgpu_ring_emit_rreg(ring, reg);
>> + reg_val_offs = (ring->wptr & ring->buf_mask) + 30;
> I think that should be (ring->wptr + 30) & ring->buf_mask. Otherwise the
> reg_val_offset can be past the end of the ring.
>
> But that still leaves a problem if another command is submitted to the
> KIQ before you read the returned reg_val from the ring. Your reg_val can
> be overwritten by the new command and you get the wrong result. Or the
> command can be overwritten with the reg_val, which will most likely hang
> the CP.
>
> You could allocate space on the KIQ ring with a NOP command to prevent
> that space from being overwritten by other commands.
Well I was under the assumption that this is actually what is done here.
If that is not the case the patch is a rather clear NAK.
Regards,
Christian.
>
> Regards,
> Felix
>
>
>> + amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
>> amdgpu_fence_emit_polling(ring, &seq);
>> amdgpu_ring_commit(ring);
>> spin_unlock_irqrestore(&kiq->ring_lock, flags);
>> @@ -707,7 +704,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>> if (cnt > MAX_KIQ_REG_TRY)
>> goto failed_kiq_read;
>>
>> - return adev->wb.wb[kiq->reg_val_offs];
>> + return ring->ring[reg_val_offs];
>>
>> failed_kiq_read:
>> pr_err("failed to read reg:%x\n", reg);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> index 634746829024..ee698f0246d8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> @@ -103,7 +103,6 @@ struct amdgpu_kiq {
>> struct amdgpu_ring ring;
>> struct amdgpu_irq_src irq;
>> const struct kiq_pm4_funcs *pmf;
>> - uint32_t reg_val_offs;
>> };
>>
>> /*
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> index f61664ee4940..a3d88f2aa9f4 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> @@ -181,7 +181,8 @@ struct amdgpu_ring_funcs {
>> void (*end_use)(struct amdgpu_ring *ring);
>> void (*emit_switch_buffer) (struct amdgpu_ring *ring);
>> void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
>> - void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
>> + void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
>> + uint64_t reg_val_offs);
>> void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
>> void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
>> uint32_t val, uint32_t mask);
>> @@ -265,7 +266,7 @@ struct amdgpu_ring {
>> #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
>> #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
>> #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
>> -#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
>> +#define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o))
>> #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
>> #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
>> #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> index 0a03e2ad5d95..7c9a5e440509 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> @@ -7594,21 +7594,19 @@ static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
>> amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>> }
>>
>> -static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
>> +static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
>> + uint64_t reg_val_offs)
>> {
>> - struct amdgpu_device *adev = ring->adev;
>> - struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>> -
>> amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
>> amdgpu_ring_write(ring, 0 | /* src: register*/
>> (5 << 8) | /* dst: memory */
>> (1 << 20)); /* write confirm */
>> amdgpu_ring_write(ring, reg);
>> amdgpu_ring_write(ring, 0);
>> - amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
>> - kiq->reg_val_offs * 4));
>> - amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
>> - kiq->reg_val_offs * 4));
>> + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr +
>> + reg_val_offs * 4));
>> + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr +
>> + reg_val_offs * 4));
>> }
>>
>> static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> index fc6c2f2bc76c..8e7eee7838e0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> @@ -6383,21 +6383,19 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
>> ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
>> }
>>
>> -static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
>> +static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
>> + uint64_t reg_val_offs)
>> {
>> - struct amdgpu_device *adev = ring->adev;
>> - struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>> -
>> amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
>> amdgpu_ring_write(ring, 0 | /* src: register*/
>> (5 << 8) | /* dst: memory */
>> (1 << 20)); /* write confirm */
>> amdgpu_ring_write(ring, reg);
>> amdgpu_ring_write(ring, 0);
>> - amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
>> - kiq->reg_val_offs * 4));
>> - amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
>> - kiq->reg_val_offs * 4));
>> + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr +
>> + reg_val_offs * 4));
>> + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr +
>> + reg_val_offs * 4));
>> }
>>
>> static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index 84fcf842316d..ff279b1f5c24 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -4046,11 +4046,13 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
>> uint32_t seq;
>> struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>> struct amdgpu_ring *ring = &kiq->ring;
>> + uint64_t reg_val_offs = 0;
>>
>> BUG_ON(!ring->funcs->emit_rreg);
>>
>> spin_lock_irqsave(&kiq->ring_lock, flags);
>> amdgpu_ring_alloc(ring, 32);
>> + reg_val_offs = (ring->wptr & ring->buf_mask) + 30;
>> amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
>> amdgpu_ring_write(ring, 9 | /* src: register*/
>> (5 << 8) | /* dst: memory */
>> @@ -4058,10 +4060,10 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
>> (1 << 20)); /* write confirm */
>> amdgpu_ring_write(ring, 0);
>> amdgpu_ring_write(ring, 0);
>> - amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
>> - kiq->reg_val_offs * 4));
>> - amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
>> - kiq->reg_val_offs * 4));
>> + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr +
>> + reg_val_offs * 4));
>> + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr +
>> + reg_val_offs * 4));
>> amdgpu_fence_emit_polling(ring, &seq);
>> amdgpu_ring_commit(ring);
>> spin_unlock_irqrestore(&kiq->ring_lock, flags);
>> @@ -4088,8 +4090,8 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
>> if (cnt > MAX_KIQ_REG_TRY)
>> goto failed_kiq_read;
>>
>> - return (uint64_t)adev->wb.wb[kiq->reg_val_offs] |
>> - (uint64_t)adev->wb.wb[kiq->reg_val_offs + 1 ] << 32ULL;
>> + return (uint64_t)ring->ring[reg_val_offs] |
>> + (uint64_t)ring->ring[reg_val_offs + 1 ] << 32ULL;
>>
>> failed_kiq_read:
>> pr_err("failed to read gpu clock\n");
>> @@ -5482,21 +5484,19 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
>> ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
>> }
>>
>> -static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
>> +static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
>> + uint64_t reg_val_offs)
>> {
>> - struct amdgpu_device *adev = ring->adev;
>> - struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>> -
>> amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
>> amdgpu_ring_write(ring, 0 | /* src: register*/
>> (5 << 8) | /* dst: memory */
>> (1 << 20)); /* write confirm */
>> amdgpu_ring_write(ring, reg);
>> amdgpu_ring_write(ring, 0);
>> - amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
>> - kiq->reg_val_offs * 4));
>> - amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
>> - kiq->reg_val_offs * 4));
>> + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr +
>> + reg_val_offs * 4));
>> + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr +
>> + reg_val_offs * 4));
>> }
>>
>> static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list