[PATCH v2] drm/amdgpu: fix the gfx hang while use per-ib secure flag (v2)

Fri Feb 28 00:40:08 UTC 2020

On 2020-02-27 5:10 p.m., Luben Tuikov wrote:
> On 2020-02-27 6:47 a.m., Christian König wrote:
>> Am 27.02.20 um 12:38 schrieb Huang Rui:
>>> Since 6643ba1 frame control packets are only issued in presence of secure IB(s).
>>> This causes hangs on some hardware (eg: Raven1). This patch restores the
>>> unconditionnal frame control packets issuing, that's to keep the per-IB logic
>>> regarding the secure flag.
>>>
>>> Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ)
>>>
>>> v1 -> v2:
>>> As suggested by Luben, and accept part of implemetation from this patch:
>>> - Put "secure" closed to the loop and use optimization
>>> - Change "secure" to bool again, and move "secure == -1" out of loop.
>>>
>>> Reported-and-Tested-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
>>> Signed-off-by: Huang Rui <ray.huang at amd.com>
>>> Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 29 +++++++++++++++--------------
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
>>>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++++++++-------
>>>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 +++++++------
>>>   4 files changed, 33 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> index 4b2342d..0f4909a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> @@ -216,7 +216,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>>>   		amdgpu_ring_emit_cntxcntl(ring, status);
>>>   	}
>>>   
>>> +	/* Setup initial TMZiness and send it off.
>>> +	 */
>>>   	secure = false;
>>> +	if (job && ring->funcs->emit_frame_cntl) {
>>
>> Does anybody remember why we check the job here in the first place?
> 
> Ring tests do not use a job.
> 
> Having said this, I would've written a separate
> submit for tests and a different one for jobs.
> 
>>
>> Independent of that I think that the check if 
>> ring->funcs->emit_frame_cntl should be moved into the 
>> amdgpu_ring_emit_frame_cntl() function so that we don't need to repeat 
>> that over and over again.
> 
> What if that function is called from a loop? Since it would
> be a static conditional it could be optimized away early on.
> It's a classic optimization pattern.
> 
>>
>> If amdgpu_ring_emit_frame_cntl() is still a macro then that is probably 
>> also a good opportunity to change that.
>>
>>> +		secure = ib->flags & AMDGPU_IB_FLAGS_SECURE ? true : false;

Tim says no need for the ternary here.

>>> +		amdgpu_ring_emit_frame_cntl(ring, true, secure);
>>> +	}
>>> +
>>>   	for (i = 0; i < num_ibs; ++i) {
>>>   		ib = &ibs[i];
>>>   
>>> @@ -228,27 +235,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>>>   		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
>>>   			continue;
>>>   
>>> -		/* If this IB is TMZ, add frame TMZ start packet,
>>> -		 * else, turn off TMZ.
>>> -		 */
>>> -		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
>>> -			if (!secure) {
>>> -				secure = true;
>>> -				amdgpu_ring_emit_tmz(ring, true);
>>> +		if (job && ring->funcs->emit_frame_cntl) {
>>> +			if (!!secure ^ !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
>>
>> Maybe write this as (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)). 
>> That is probably easier to understand.
> 
> Thanks! This is indeed better!
> 
> I'll resubmit with your suggestions and another good one from Tim.

Sorry, I thought this was a comment on my patch as the expression was taken
right out of it.

>>
>>> +				amdgpu_ring_emit_frame_cntl(ring, false, secure);
>>> +				secure = !secure;
>>> +				amdgpu_ring_emit_frame_cntl(ring, true, secure);
>>>   			}
>>> -		} else if (secure) {
>>> -			secure = false;
>>> -			amdgpu_ring_emit_tmz(ring, false);
>>>   		}
>>>   
>>>   		amdgpu_ring_emit_ib(ring, job, ib, status);
>>>   		status &= ~AMDGPU_HAVE_CTX_SWITCH;
>>>   	}
>>>   
>>> -	if (secure) {
>>> -		secure = false;
>>> -		amdgpu_ring_emit_tmz(ring, false);
>>> -	}
>>> +	if (job && ring->funcs->emit_frame_cntl)
>>> +		amdgpu_ring_emit_frame_cntl(ring, false,
>>> +					    secure ? true : false);

"secure" is already a boolean.

Regards,
Luben

>>>   
>>>   #ifdef CONFIG_X86_64
>>>   	if (!(adev->flags & AMD_IS_APU))
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> index 24caff0..4d019d6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> @@ -166,7 +166,8 @@ struct amdgpu_ring_funcs {
>>>   	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
>>>   					uint32_t reg0, uint32_t reg1,
>>>   					uint32_t ref, uint32_t mask);
>>> -	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
>>> +	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
>>> +				bool secure);
>>>   	/* priority functions */
>>>   	void (*set_priority) (struct amdgpu_ring *ring,
>>>   			      enum drm_sched_priority priority);
>>> @@ -247,7 +248,7 @@ struct amdgpu_ring {
>>>   #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
>>>   #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
>>>   #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
>>> -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
>>> +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
>>>   #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>>>   #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>>>   #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> index 7b61583..748ac35 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> @@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
>>>   static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
>>>   static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
>>>   static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
>>> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
>>> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
>>>   
>>>   static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
>>>   {
>>> @@ -4729,12 +4729,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
>>>   					   sizeof(de_payload) >> 2);
>>>   }
>>>   
>>> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
>>> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
>>> +				    bool secure)
>>>   {
>>> -	if (amdgpu_is_tmz(ring->adev)) {
>>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
>>> -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
>>> -	}
>>> +	uint32_t v = secure ? FRAME_TMZ : 0;
>>> +
>>> +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
>>> +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>>>   }
>>>   
>>>   static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
>>> @@ -5188,7 +5189,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>>>   	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
>>>   	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
>>>   	.preempt_ib = gfx_v10_0_ring_preempt_ib,
>>> -	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>>> +	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
>>>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>>>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
>>>   	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> index 1c7a16b..fbde712 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> @@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
>>>   	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
>>>   }
>>>   
>>> -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
>>> +static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
>>> +				   bool secure)
>>>   {
>>> -	if (amdgpu_is_tmz(ring->adev)) {
>>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
>>> -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
>>> -	}
>>> +	uint32_t v = secure ? FRAME_TMZ : 0;
>>> +
>>> +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
>>> +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>>>   }
>>>   
>>>   static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
>>> @@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>>>   	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
>>>   	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
>>>   	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
>>> -	.emit_tmz = gfx_v9_0_ring_emit_tmz,
>>> +	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
>>>   	.emit_wreg = gfx_v9_0_ring_emit_wreg,
>>>   	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
>>>   	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
>>
>