[PATCH] drm/amdgpu: Disable gfx IB preemption with gang submit

Christian König christian.koenig at amd.com
Wed Aug 27 09:21:55 UTC 2025


On 27.08.25 06:34, Yifan Zha wrote:
> [Why]
> Gang submit has conflict with IB preemption which will
> break the synchronization between enginees.
> Specifically, MCBP is enabled default under SRIOV.
> It will causes gang submit use case failure.

Gang submit is broken by design on SRIOV.

As long as nobody comes up with an idea to fix this generally I will systematically reject all patches trying to mitigate the situation.

What we could do is to return an error on gang submit under SRIOV.

> 
> [How]
> Disable gfx IB preemption when it is gang submit.
> 
> Signed-off-by: Yifan Zha <Yifan.Zha at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 +++-
>  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 +++++---
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 4 +++-
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 3 ++-
>  4 files changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 264183ab24ec..c4506be4d723 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -8653,7 +8653,9 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
>  
>  	control |= ib->length_dw | (vmid << 24);
>  
> -	if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
> +	if (ring->adev->gfx.mcbp &&
> +		(ib->flags & AMDGPU_IB_FLAG_PREEMPT) &&
> +		(job->gang_submit != &job->base.s_fence->scheduled)) {

That needs to be handled at the higher level.

>  		control |= INDIRECT_BUFFER_PRE_ENB(1);
>  
>  		if (flags & AMDGPU_IB_PREEMPTED)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index 456ba758fa94..725b0e4d9bf8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -2424,7 +2424,7 @@ static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
>  			if (version_minor == 3)
>  				gfx_v11_0_load_rlcp_rlcv_microcode(adev);
>  		}
> -		
> +

Unrelated white space change.

>  		return 0;
>  	}
>  
> @@ -3872,7 +3872,7 @@ static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
>  	}
>  
>  	memcpy(fw, fw_data, fw_size);
> -	
> +

Dito.

Regards,
Christian.

>  	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
>  	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
>  
> @@ -5855,7 +5855,9 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
>  
>  	control |= ib->length_dw | (vmid << 24);
>  
> -	if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
> +	if (ring->adev->gfx.mcbp &&
> +		(ib->flags & AMDGPU_IB_FLAG_PREEMPT) &&
> +		(job->gang_submit != &job->base.s_fence->scheduled)) {
>  		control |= INDIRECT_BUFFER_PRE_ENB(1);
>  
>  		if (flags & AMDGPU_IB_PREEMPTED)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 367449d8061b..843761947b73 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6051,7 +6051,9 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
>  
>  	control |= ib->length_dw | (vmid << 24);
>  
> -	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
> +	if (ring->adev->gfx.mcbp &&
> +		(ib->flags & AMDGPU_IB_FLAG_PREEMPT) &&
> +		(job->gang_submit != &job->base.s_fence->scheduled)) {
>  		control |= INDIRECT_BUFFER_PRE_ENB(1);
>  
>  		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index a6ff9a137a83..47b10528e07b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -5420,7 +5420,8 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
>  
>  	control |= ib->length_dw | (vmid << 24);
>  
> -	if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
> +	if ((ib->flags & AMDGPU_IB_FLAG_PREEMPT) &&
> +		(job->gang_submit != &job->base.s_fence->scheduled)) {
>  		control |= INDIRECT_BUFFER_PRE_ENB(1);
>  
>  		if (flags & AMDGPU_IB_PREEMPTED)



More information about the amd-gfx mailing list