[PATCH] drm/amdgpu: refine kiq read register

Tao, Yintian Yintian.Tao at amd.com
Mon Apr 20 04:16:56 UTC 2020


Hi Felix

Many thanks for your review. I have modified it according to your comments and suggestion.

Best Regards
Yintian Tao

-----Original Message-----
From: Kuehling, Felix <Felix.Kuehling at amd.com> 
Sent: 2020年4月17日 23:39
To: Tao, Yintian <Yintian.Tao at amd.com>; Liu, Monk <Monk.Liu at amd.com>
Cc: amd-gfx at lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq read register

Am 2020-04-17 um 2:53 a.m. schrieb Yintian Tao:
> According to the current kiq read register method, there will be race 
> condition when using KIQ to read register if multiple clients want to 
> read at same time just like the expample below:
> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the 
> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll 
> the seqno-1 5. the kiq complete these two read operation 6. client-A 
> to read the register at the wb buffer and
>    get REG-1 value
>
> Therefore, directly make kiq write the register value at the ring 
> buffer then there will be no race condition for the wb buffer.
>
> v2: supply the read_clock and move the reg_val_offs back
>
> Signed-off-by: Yintian Tao <yttao at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 11 ++++------  
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |  1 -  
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 14 +++++-------
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c    | 14 +++++-------
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 28 ++++++++++++------------
>  6 files changed, 33 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index ea576b4260a4..4e1c0239e561 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device 
> *adev,
>  
>  	spin_lock_init(&kiq->ring_lock);
>  
> -	r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs);
> -	if (r)
> -		return r;
> -
>  	ring->adev = NULL;
>  	ring->ring_obj = NULL;
>  	ring->use_doorbell = true;
> @@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device 
> *adev,
>  
>  void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)  {
> -	amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs);
>  	amdgpu_ring_fini(ring);
>  }
>  
> @@ -675,12 +670,14 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>  	uint32_t seq;
>  	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>  	struct amdgpu_ring *ring = &kiq->ring;
> +	uint64_t reg_val_offs = 0;
>  
>  	BUG_ON(!ring->funcs->emit_rreg);
>  
>  	spin_lock_irqsave(&kiq->ring_lock, flags);
>  	amdgpu_ring_alloc(ring, 32);
> -	amdgpu_ring_emit_rreg(ring, reg);
> +	reg_val_offs = (ring->wptr & ring->buf_mask) + 30;

I think that should be (ring->wptr + 30) & ring->buf_mask. Otherwise the reg_val_offset can be past the end of the ring.

But that still leaves a problem if another command is submitted to the KIQ before you read the returned reg_val from the ring. Your reg_val can be overwritten by the new command and you get the wrong result. Or the command can be overwritten with the reg_val, which will most likely hang the CP.

You could allocate space on the KIQ ring with a NOP command to prevent that space from being overwritten by other commands.

Regards,
  Felix


> +	amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
>  	amdgpu_fence_emit_polling(ring, &seq);
>  	amdgpu_ring_commit(ring);
>  	spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -707,7 +704,7 @@ 
> uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>  	if (cnt > MAX_KIQ_REG_TRY)
>  		goto failed_kiq_read;
>  
> -	return adev->wb.wb[kiq->reg_val_offs];
> +	return ring->ring[reg_val_offs];
>  
>  failed_kiq_read:
>  	pr_err("failed to read reg:%x\n", reg); diff --git 
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 634746829024..ee698f0246d8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -103,7 +103,6 @@ struct amdgpu_kiq {
>  	struct amdgpu_ring	ring;
>  	struct amdgpu_irq_src	irq;
>  	const struct kiq_pm4_funcs *pmf;
> -	uint32_t			reg_val_offs;
>  };
>  
>  /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index f61664ee4940..a3d88f2aa9f4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -181,7 +181,8 @@ struct amdgpu_ring_funcs {
>  	void (*end_use)(struct amdgpu_ring *ring);
>  	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
>  	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
> -	void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
> +	void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
> +			  uint64_t reg_val_offs);
>  	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
>  	void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
>  			      uint32_t val, uint32_t mask); @@ -265,7 +266,7 @@ struct 
> amdgpu_ring {  #define amdgpu_ring_emit_hdp_flush(r) 
> (r)->funcs->emit_hdp_flush((r))  #define 
> amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
>  #define amdgpu_ring_emit_cntxcntl(r, d) 
> (r)->funcs->emit_cntxcntl((r), (d)) -#define amdgpu_ring_emit_rreg(r, 
> d) (r)->funcs->emit_rreg((r), (d))
> +#define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), 
> +(d), (o))
>  #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), 
> (d), (v))  #define amdgpu_ring_emit_reg_wait(r, d, v, m) 
> (r)->funcs->emit_reg_wait((r), (d), (v), (m))  #define 
> amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) 
> (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) diff 
> --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 0a03e2ad5d95..7c9a5e440509 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -7594,21 +7594,19 @@ static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
>  	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));  }
>  
> -static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, 
> uint32_t reg)
> +static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
> +				     uint64_t reg_val_offs)
>  {
> -	struct amdgpu_device *adev = ring->adev;
> -	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> -
>  	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
>  	amdgpu_ring_write(ring, 0 |	/* src: register*/
>  				(5 << 8) |	/* dst: memory */
>  				(1 << 20));	/* write confirm */
>  	amdgpu_ring_write(ring, reg);
>  	amdgpu_ring_write(ring, 0);
> -	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
> -				kiq->reg_val_offs * 4));
> -	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
> -				kiq->reg_val_offs * 4));
> +	amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr +
> +					      reg_val_offs * 4));
> +	amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr +
> +					      reg_val_offs * 4));
>  }
>  
>  static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, 
> uint32_t reg, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index fc6c2f2bc76c..8e7eee7838e0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6383,21 +6383,19 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
>  		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;  }
>  
> -static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, 
> uint32_t reg)
> +static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
> +				    uint64_t reg_val_offs)
>  {
> -	struct amdgpu_device *adev = ring->adev;
> -	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> -
>  	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
>  	amdgpu_ring_write(ring, 0 |	/* src: register*/
>  				(5 << 8) |	/* dst: memory */
>  				(1 << 20));	/* write confirm */
>  	amdgpu_ring_write(ring, reg);
>  	amdgpu_ring_write(ring, 0);
> -	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
> -				kiq->reg_val_offs * 4));
> -	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
> -				kiq->reg_val_offs * 4));
> +	amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr +
> +					      reg_val_offs * 4));
> +	amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr +
> +					      reg_val_offs * 4));
>  }
>  
>  static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, 
> uint32_t reg, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 84fcf842316d..ff279b1f5c24 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -4046,11 +4046,13 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
>  	uint32_t seq;
>  	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>  	struct amdgpu_ring *ring = &kiq->ring;
> +	uint64_t reg_val_offs = 0;
>  
>  	BUG_ON(!ring->funcs->emit_rreg);
>  
>  	spin_lock_irqsave(&kiq->ring_lock, flags);
>  	amdgpu_ring_alloc(ring, 32);
> +	reg_val_offs = (ring->wptr & ring->buf_mask) + 30;
>  	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
>  	amdgpu_ring_write(ring, 9 |	/* src: register*/
>  				(5 << 8) |	/* dst: memory */
> @@ -4058,10 +4060,10 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
>  				(1 << 20));	/* write confirm */
>  	amdgpu_ring_write(ring, 0);
>  	amdgpu_ring_write(ring, 0);
> -	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
> -				kiq->reg_val_offs * 4));
> -	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
> -				kiq->reg_val_offs * 4));
> +	amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr +
> +					      reg_val_offs * 4));
> +	amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr +
> +					      reg_val_offs * 4));
>  	amdgpu_fence_emit_polling(ring, &seq);
>  	amdgpu_ring_commit(ring);
>  	spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -4088,8 +4090,8 
> @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
>  	if (cnt > MAX_KIQ_REG_TRY)
>  		goto failed_kiq_read;
>  
> -	return (uint64_t)adev->wb.wb[kiq->reg_val_offs] |
> -		(uint64_t)adev->wb.wb[kiq->reg_val_offs + 1 ] << 32ULL;
> +	return (uint64_t)ring->ring[reg_val_offs] |
> +		(uint64_t)ring->ring[reg_val_offs + 1 ] << 32ULL;
>  
>  failed_kiq_read:
>  	pr_err("failed to read gpu clock\n"); @@ -5482,21 +5484,19 @@ static 
> void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
>  		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;  }
>  
> -static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, 
> uint32_t reg)
> +static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
> +				    uint64_t reg_val_offs)
>  {
> -	struct amdgpu_device *adev = ring->adev;
> -	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> -
>  	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
>  	amdgpu_ring_write(ring, 0 |	/* src: register*/
>  				(5 << 8) |	/* dst: memory */
>  				(1 << 20));	/* write confirm */
>  	amdgpu_ring_write(ring, reg);
>  	amdgpu_ring_write(ring, 0);
> -	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
> -				kiq->reg_val_offs * 4));
> -	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
> -				kiq->reg_val_offs * 4));
> +	amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr +
> +					      reg_val_offs * 4));
> +	amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr +
> +					      reg_val_offs * 4));
>  }
>  
>  static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, 
> uint32_t reg,


More information about the amd-gfx mailing list