[PATCH 1/1] drm/amdgpu: Fix per-IB secure flag GFX hang

Thu Feb 27 11:56:48 UTC 2020

On Thu, Feb 27, 2020 at 06:39:03AM +0800, Tuikov, Luben wrote:
> Since commit "Move to a per-IB secure flag (TMZ)",
> we've been seeing hangs in GFX. Ray H. pointed out
> by sending a patch that we need to send FRAME
> CONTROL stop/start back-to-back, every time we
> flip the TMZ flag as per each IB we submit. That
> is, when we transition from TMZ to non-TMZ we have
> to send a stop with TMZ followed by a start with
> non-TMZ, and similarly for transitioning from
> non-TMZ into TMZ.
> 
> This patch implements this, thus fixing the GFX
> hang.
> 
> Signed-off-by: Luben Tuikov <luben.tuikov at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 87 +++++++++++++++++-------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +-
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 ++--
>  4 files changed, 79 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 4b2342d11520..16d6df3304d3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -216,40 +216,75 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>  		amdgpu_ring_emit_cntxcntl(ring, status);
>  	}
>  
> -	secure = false;
> +	/* Find the first non-preamble IB.
> +	 */
>  	for (i = 0; i < num_ibs; ++i) {
>  		ib = &ibs[i];
>  
>  		/* drop preamble IBs if we don't have a context switch */
> -		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
> -		    skip_preamble &&
> -		    !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
> -		    !amdgpu_mcbp &&
> -		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
> -			continue;
> -
> -		/* If this IB is TMZ, add frame TMZ start packet,
> -		 * else, turn off TMZ.
> -		 */
> -		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
> -			if (!secure) {
> -				secure = true;
> -				amdgpu_ring_emit_tmz(ring, true);
> -			}
> -		} else if (secure) {
> +		if (!(ib->flags & AMDGPU_IB_FLAG_PREAMBLE) ||
> +		    !skip_preamble ||
> +		    (status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) ||
> +		    amdgpu_mcbp ||
> +		    amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
> +			break;
> +	}
> +	if (i >= num_ibs)
> +		goto Done;
> +	/* Setup initial TMZiness and send it off.
> +	 */
> +	secure = false;
> +	if (job && ring->funcs->emit_frame_cntl) {
> +		if (ib->flags & AMDGPU_IB_FLAGS_SECURE)
> +			secure = true;
> +		else
>  			secure = false;
> -			amdgpu_ring_emit_tmz(ring, false);
> -		}
> -
> -		amdgpu_ring_emit_ib(ring, job, ib, status);
> -		status &= ~AMDGPU_HAVE_CTX_SWITCH;
> +		amdgpu_ring_emit_frame_cntl(ring, true, secure);
>  	}
> +	amdgpu_ring_emit_ib(ring, job, ib, status);
> +	status &= ~AMDGPU_HAVE_CTX_SWITCH;
> +	i += 1;
> +	/* Send the rest of the IBs.
> +	 */
> +	if (job && ring->funcs->emit_frame_cntl) {
> +		for ( ; i < num_ibs; ++i) {
> +			ib = &ibs[i];
> +
> +			/* drop preamble IBs if we don't have a context switch */
> +			if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
> +			    skip_preamble &&
> +			    !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
> +			    !amdgpu_mcbp &&
> +			    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
> +				continue;

Snip.

> +
> +			if (!!secure ^ !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
> +				amdgpu_ring_emit_frame_cntl(ring, false, secure);
> +				secure = !secure;
> +				amdgpu_ring_emit_frame_cntl(ring, true, secure);
> +			}

That's pretty good optimization! I spend quit a few time to understand this.

>  
> -	if (secure) {
> -		secure = false;
> -		amdgpu_ring_emit_tmz(ring, false);
> +			amdgpu_ring_emit_ib(ring, job, ib, status);
> +			status &= ~AMDGPU_HAVE_CTX_SWITCH;
> +		}
> +		amdgpu_ring_emit_frame_cntl(ring, false, secure);
> +	} else {
> +		for ( ; i < num_ibs; ++i) {
> +			ib = &ibs[i];
> +
> +			/* drop preamble IBs if we don't have a context switch */
> +			if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
> +			    skip_preamble &&
> +			    !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
> +			    !amdgpu_mcbp &&
> +			    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
> +				continue;
> +
> +			amdgpu_ring_emit_ib(ring, job, ib, status);
> +			status &= ~AMDGPU_HAVE_CTX_SWITCH;

To pull the checking "job && ring->funcs->emit_frame_cntl" out of the loop,
that make the implemenation more duplicated like below, we have to write
the same codes multiple times. I hope the implementation is more simple and
readable. Please see my V2 patch that I just send out. We can try to use
minimum changes to fix the issue.

		for ( ; i < num_ibs; ++i) {
			ib = &ibs[i];

			/* drop preamble IBs if we don't have a context switch */
			if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
			    skip_preamble &&
			    !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
			    !amdgpu_mcbp &&
			    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
				continue;

                        ...
			amdgpu_ring_emit_ib(ring, job, ib, status);

Thanks,
Ray