[PATCH] drm/amdgpu: fix a kcq hang issue for SRIOV

Tue Mar 27 08:23:31 UTC 2018

Hi Christian,
    I think the root cause has been found out. It is the hardware restriction that couldn't do world switch between
invalidate request, and ack. We couldn't control world switch in guest driver, but could replace the origin two command 
with one command to avoid world switch.

    For the multiple times writing, it is also the same root cause, when the world switch happened between the  invalidate request, and ack,
it will have problem. So we could give more chance for SRIOV to do the invalidate request to workaround the case that world switch happens
between invalidate request, and ack

Best Wishes,
Emily Deng

> -----Original Message-----
> From: Christian König [mailto:ckoenig.leichtzumerken at gmail.com]
> Sent: Tuesday, March 27, 2018 3:48 PM
> To: Deng, Emily <Emily.Deng at amd.com>; amd-gfx at lists.freedesktop.org
> Cc: Liu, Monk <Monk.Liu at amd.com>
> Subject: Re: [PATCH] drm/amdgpu: fix a kcq hang issue for SRIOV
> 
> Am 27.03.2018 um 07:58 schrieb Emily Deng:
> > issue:
> > the vmflush in KCQ could be preempted (not like GFX ring which doesn't
> > allow preemption in ring buffer) and this lead to vm flush fail when
> > there is a world switch during the vm flush procedure (between write
> > invalidate request and query invalidate ack)
> >
> > fix:
> > separate vm flush for gfx and compute ring, and use the new format
> > command in compute's vm flush which use only one package so no
> > preemption could allowed
> 
> NAK, as already discussed multiple times now that only circumvents the
> problem, but not really fixes it.
> 
> Just executing the "amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req +
> eng, req);" multiple times has the same effect and we need to figure out why.
> 
> Regards,
> Christian.
> 
> >
> > Signed-off-by: Monk Liu <Monk.Liu at amd.com>
> > Signed-off-by: Emily Deng <Emily.Deng at amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 ++
> >   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 10 +++++++++-
> >   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 18 +++++++++++++-----
> >   4 files changed, 25 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index a7e2229..986659f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -1790,6 +1790,7 @@ amdgpu_get_sdma_instance(struct
> amdgpu_ring *ring)
> >   #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
> >   #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
> >   #define amdgpu_ring_emit_reg_wait(r, d, v, m)
> > (r)->funcs->emit_reg_wait((r), (d), (v), (m))
> > +#define amdgpu_ring_emit_reg_wait1(r, d0, d1, v, m)
> > +(r)->funcs->emit_reg_wait1((r), (d0), (d1), (v), (m))
> >   #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
> >   #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
> >   #define amdgpu_ring_init_cond_exec(r)
> > (r)->funcs->init_cond_exec((r)) diff --git
> > a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> > index 1d0d250..d85df5d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> > @@ -152,6 +152,8 @@ struct amdgpu_ring_funcs {
> >   	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t
> val);
> >   	void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
> >   			      uint32_t val, uint32_t mask);
> > +	void (*emit_reg_wait1)(struct amdgpu_ring *ring, uint32_t reg0,
> > +				uint32_t reg1, uint32_t val, uint32_t mask);
> >   	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
> >   	/* priority functions */
> >   	void (*set_priority) (struct amdgpu_ring *ring, diff --git
> > a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > index 1ae3de1..509c9d2 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > @@ -4078,6 +4078,13 @@ static void gfx_v9_0_ring_emit_reg_wait(struct
> amdgpu_ring *ring, uint32_t reg,
> >   	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
> >   }
> >
> > +static void gfx_v9_0_ring_emit_reg_wait_compute(struct amdgpu_ring
> *ring,
> > +					uint32_t reg0, uint32_t reg1,
> > +					uint32_t val, uint32_t mask)
> > +{
> > +	gfx_v9_0_wait_reg_mem(ring, 0, 0, 1, reg0, reg1, val, mask, 0x20); }
> > +
> >   static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device
> *adev,
> >   						 enum
> amdgpu_interrupt_state state)
> >   {
> > @@ -4415,7 +4422,7 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_ring_funcs_compute = {
> >   		7 + /* gfx_v9_0_ring_emit_hdp_flush */
> >   		5 + /* hdp invalidate */
> >   		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> > -		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> > +		(SOC15_FLUSH_GPU_TLB_NUM_WREG - 1) * 5 +
> >   		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> >   		2 + /* gfx_v9_0_ring_emit_vm_flush */
> >   		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm
> fence
> > */ @@ -4433,6 +4440,7 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_ring_funcs_compute = {
> >   	.set_priority = gfx_v9_0_ring_set_priority_compute,
> >   	.emit_wreg = gfx_v9_0_ring_emit_wreg,
> >   	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
> > +	.emit_reg_wait1 = gfx_v9_0_ring_emit_reg_wait_compute,
> >   };
> >
> >   static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > index e687363..968447d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > @@ -385,11 +385,19 @@ static uint64_t
> gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
> >   	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
> >   			      upper_32_bits(pd_addr));
> >
> > -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> > -
> > -	/* wait for the invalidate to complete */
> > -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> > -				  1 << vmid, 1 << vmid);
> > +    /* The world switch cannot be allowed to occur while
> > +       some invalidation controller code is waiting for an ack.
> > +       To workaround the hardware restriction, replace the original
> > +       two command with one command for compute ring */
> > +	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
> amdgpu_sriov_vf(adev)) {
> > +		amdgpu_ring_emit_reg_wait1(ring, hub->vm_inv_eng0_req
> + eng,
> > +				   hub->vm_inv_eng0_ack + eng, req, 1 <<
> vmid);
> > +	} else {
> > +		amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng,
> req);
> > +		/* wait for the invalidate to complete */
> > +		amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack +
> eng,
> > +			   1 << vmid, 1 << vmid);
> > +	}
> >
> >   	return pd_addr;
> >   }