[PATCH v2] drm/amdgpu: fix the gfx hang while use per-ib secure flag (v2)

Thu Feb 27 12:06:16 UTC 2020

On Thu, Feb 27, 2020 at 07:47:17PM +0800, Koenig, Christian wrote:
> Am 27.02.20 um 12:38 schrieb Huang Rui:
> > Since 6643ba1 frame control packets are only issued in presence of secure IB(s).
> > This causes hangs on some hardware (eg: Raven1). This patch restores the
> > unconditionnal frame control packets issuing, that's to keep the per-IB logic
> > regarding the secure flag.
> >
> > Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ)
> >
> > v1 -> v2:
> > As suggested by Luben, and accept part of implemetation from this patch:
> > - Put "secure" closed to the loop and use optimization
> > - Change "secure" to bool again, and move "secure == -1" out of loop.
> >
> > Reported-and-Tested-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
> > Signed-off-by: Huang Rui <ray.huang at amd.com>
> > Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 29 +++++++++++++++--------------
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++++++++-------
> >   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 +++++++------
> >   4 files changed, 33 insertions(+), 29 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > index 4b2342d..0f4909a 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > @@ -216,7 +216,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
> >   		amdgpu_ring_emit_cntxcntl(ring, status);
> >   	}
> >   
> > +	/* Setup initial TMZiness and send it off.
> > +	 */
> >   	secure = false;
> > +	if (job && ring->funcs->emit_frame_cntl) {
> 
> Does anybody remember why we check the job here in the first place?

Align with previous logic, we only issue this packet while it's user mode
submission. Looks ib tests needn't this.

> 
> Independent of that I think that the check if 
> ring->funcs->emit_frame_cntl should be moved into the 
> amdgpu_ring_emit_frame_cntl() function so that we don't need to repeat 
> that over and over again.
> 
> If amdgpu_ring_emit_frame_cntl() is still a macro then that is probably 
> also a good opportunity to change that.

Agree, this looks better.

> 
> > +		secure = ib->flags & AMDGPU_IB_FLAGS_SECURE ? true : false;
> > +		amdgpu_ring_emit_frame_cntl(ring, true, secure);
> > +	}
> > +
> >   	for (i = 0; i < num_ibs; ++i) {
> >   		ib = &ibs[i];
> >   
> > @@ -228,27 +235,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
> >   		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
> >   			continue;
> >   
> > -		/* If this IB is TMZ, add frame TMZ start packet,
> > -		 * else, turn off TMZ.
> > -		 */
> > -		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
> > -			if (!secure) {
> > -				secure = true;
> > -				amdgpu_ring_emit_tmz(ring, true);
> > +		if (job && ring->funcs->emit_frame_cntl) {
> > +			if (!!secure ^ !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
> 
> Maybe write this as (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)). 
> That is probably easier to understand.

Actually, I spend quit a few minutes to understand previous checking. :-)
I am fine to change if Lunben has no objection.

Thanks,
Ray

> 
> Regards,
> Christian.
> 
> > +				amdgpu_ring_emit_frame_cntl(ring, false, secure);
> > +				secure = !secure;
> > +				amdgpu_ring_emit_frame_cntl(ring, true, secure);
> >   			}
> > -		} else if (secure) {
> > -			secure = false;
> > -			amdgpu_ring_emit_tmz(ring, false);
> >   		}
> >   
> >   		amdgpu_ring_emit_ib(ring, job, ib, status);
> >   		status &= ~AMDGPU_HAVE_CTX_SWITCH;
> >   	}
> >   
> > -	if (secure) {
> > -		secure = false;
> > -		amdgpu_ring_emit_tmz(ring, false);
> > -	}
> > +	if (job && ring->funcs->emit_frame_cntl)
> > +		amdgpu_ring_emit_frame_cntl(ring, false,
> > +					    secure ? true : false);
> >   
> >   #ifdef CONFIG_X86_64
> >   	if (!(adev->flags & AMD_IS_APU))
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> > index 24caff0..4d019d6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> > @@ -166,7 +166,8 @@ struct amdgpu_ring_funcs {
> >   	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
> >   					uint32_t reg0, uint32_t reg1,
> >   					uint32_t ref, uint32_t mask);
> > -	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
> > +	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
> > +				bool secure);
> >   	/* priority functions */
> >   	void (*set_priority) (struct amdgpu_ring *ring,
> >   			      enum drm_sched_priority priority);
> > @@ -247,7 +248,7 @@ struct amdgpu_ring {
> >   #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
> >   #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
> >   #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
> > -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
> > +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
> >   #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
> >   #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
> >   #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index 7b61583..748ac35 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > @@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
> >   static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
> >   static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
> >   static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
> > -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
> > +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
> >   
> >   static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
> >   {
> > @@ -4729,12 +4729,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
> >   					   sizeof(de_payload) >> 2);
> >   }
> >   
> > -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
> > +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
> > +				    bool secure)
> >   {
> > -	if (amdgpu_is_tmz(ring->adev)) {
> > -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> > -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
> > -	}
> > +	uint32_t v = secure ? FRAME_TMZ : 0;
> > +
> > +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> > +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
> >   }
> >   
> >   static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
> > @@ -5188,7 +5189,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
> >   	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
> >   	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
> >   	.preempt_ib = gfx_v10_0_ring_preempt_ib,
> > -	.emit_tmz = gfx_v10_0_ring_emit_tmz,
> > +	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
> >   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
> >   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> >   	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > index 1c7a16b..fbde712 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > @@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
> >   	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
> >   }
> >   
> > -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
> > +static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
> > +				   bool secure)
> >   {
> > -	if (amdgpu_is_tmz(ring->adev)) {
> > -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> > -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
> > -	}
> > +	uint32_t v = secure ? FRAME_TMZ : 0;
> > +
> > +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> > +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
> >   }
> >   
> >   static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
> > @@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
> >   	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
> >   	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
> >   	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
> > -	.emit_tmz = gfx_v9_0_ring_emit_tmz,
> > +	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
> >   	.emit_wreg = gfx_v9_0_ring_emit_wreg,
> >   	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
> >   	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
>