[PATCH] drm/amdgpu: Disable gfx IB preemption with gang submit
Christian König
christian.koenig at amd.com
Wed Aug 27 09:21:55 UTC 2025
On 27.08.25 06:34, Yifan Zha wrote:
> [Why]
> Gang submit has conflict with IB preemption which will
> break the synchronization between enginees.
> Specifically, MCBP is enabled default under SRIOV.
> It will causes gang submit use case failure.
Gang submit is broken by design on SRIOV.
As long as nobody comes up with an idea to fix this generally I will systematically reject all patches trying to mitigate the situation.
What we could do is to return an error on gang submit under SRIOV.
>
> [How]
> Disable gfx IB preemption when it is gang submit.
>
> Signed-off-by: Yifan Zha <Yifan.Zha at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 +++-
> drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 +++++---
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 4 +++-
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 ++-
> 4 files changed, 13 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 264183ab24ec..c4506be4d723 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -8653,7 +8653,9 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
>
> control |= ib->length_dw | (vmid << 24);
>
> - if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
> + if (ring->adev->gfx.mcbp &&
> + (ib->flags & AMDGPU_IB_FLAG_PREEMPT) &&
> + (job->gang_submit != &job->base.s_fence->scheduled)) {
That needs to be handled at the higher level.
> control |= INDIRECT_BUFFER_PRE_ENB(1);
>
> if (flags & AMDGPU_IB_PREEMPTED)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index 456ba758fa94..725b0e4d9bf8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -2424,7 +2424,7 @@ static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
> if (version_minor == 3)
> gfx_v11_0_load_rlcp_rlcv_microcode(adev);
> }
> -
> +
Unrelated white space change.
> return 0;
> }
>
> @@ -3872,7 +3872,7 @@ static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
> }
>
> memcpy(fw, fw_data, fw_size);
> -
> +
Dito.
Regards,
Christian.
> amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
> amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
>
> @@ -5855,7 +5855,9 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
>
> control |= ib->length_dw | (vmid << 24);
>
> - if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
> + if (ring->adev->gfx.mcbp &&
> + (ib->flags & AMDGPU_IB_FLAG_PREEMPT) &&
> + (job->gang_submit != &job->base.s_fence->scheduled)) {
> control |= INDIRECT_BUFFER_PRE_ENB(1);
>
> if (flags & AMDGPU_IB_PREEMPTED)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 367449d8061b..843761947b73 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6051,7 +6051,9 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
>
> control |= ib->length_dw | (vmid << 24);
>
> - if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
> + if (ring->adev->gfx.mcbp &&
> + (ib->flags & AMDGPU_IB_FLAG_PREEMPT) &&
> + (job->gang_submit != &job->base.s_fence->scheduled)) {
> control |= INDIRECT_BUFFER_PRE_ENB(1);
>
> if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index a6ff9a137a83..47b10528e07b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -5420,7 +5420,8 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
>
> control |= ib->length_dw | (vmid << 24);
>
> - if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
> + if ((ib->flags & AMDGPU_IB_FLAG_PREEMPT) &&
> + (job->gang_submit != &job->base.s_fence->scheduled)) {
> control |= INDIRECT_BUFFER_PRE_ENB(1);
>
> if (flags & AMDGPU_IB_PREEMPTED)
More information about the amd-gfx
mailing list