[PATCH 4/8] drm/amdgpu: rework how isolation is enforced v2
Christian König
ckoenig.leichtzumerken at gmail.com
Fri Mar 14 07:07:17 UTC 2025
Hi Siqueira,
as discussed on the call if you can wrap your head around how the amdgpu_device_enforce_isolation() function works it should be trivial to write a new function or extend the function to insert a CPU bubble whenever the ownership of one of the compute rings change.
IIRC we already do load balancing between the 8 available compute rings anyway, so the only part missing is the CPU bubble to ensure that a queue reset only affects a single application.
Regards,
Christian.
Am 07.03.25 um 14:48 schrieb Christian König:
> Limiting the number of available VMIDs to enforce isolation causes some
> issues with gang submit and applying certain HW workarounds which
> require multiple VMIDs to work correctly.
>
> So instead start to track all submissions to the relevant engines in a
> per partition data structure and use the dma_fences of the submissions
> to enforce isolation similar to what a VMID limit does.
>
> v2: use ~0l for jobs without isolation to distinct it from kernel
> submissions which uses NULL for the owner. Add some warning when we
> are OOM.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 13 ++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 98 +++++++++++++++++++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 43 ++++------
> drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 16 +++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 19 +++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 1 +
> 6 files changed, 155 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 87062c1adcdf..f68a348dcec9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1211,9 +1211,15 @@ struct amdgpu_device {
> bool debug_exp_resets;
> bool debug_disable_gpu_ring_reset;
>
> - bool enforce_isolation[MAX_XCP];
> - /* Added this mutex for cleaner shader isolation between GFX and compute processes */
> + /* Protection for the following isolation structure */
> struct mutex enforce_isolation_mutex;
> + bool enforce_isolation[MAX_XCP];
> + struct amdgpu_isolation {
> + void *owner;
> + struct dma_fence *spearhead;
> + struct amdgpu_sync active;
> + struct amdgpu_sync prev;
> + } isolation[MAX_XCP];
>
> struct amdgpu_init_level *init_lvl;
>
> @@ -1499,6 +1505,9 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
> struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev);
> struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
> struct dma_fence *gang);
> +struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
> + struct amdgpu_ring *ring,
> + struct amdgpu_job *job);
> bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev);
> ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring);
> ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 337543ec615c..3fa7788b4e12 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -4272,6 +4272,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> mutex_init(&adev->gfx.reset_sem_mutex);
> /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
> mutex_init(&adev->enforce_isolation_mutex);
> + for (i = 0; i < MAX_XCP; ++i) {
> + adev->isolation[i].spearhead = dma_fence_get_stub();
> + amdgpu_sync_create(&adev->isolation[i].active);
> + amdgpu_sync_create(&adev->isolation[i].prev);
> + }
> mutex_init(&adev->gfx.kfd_sch_mutex);
>
> amdgpu_device_init_apu_flags(adev);
> @@ -4770,7 +4775,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
>
> void amdgpu_device_fini_sw(struct amdgpu_device *adev)
> {
> - int idx;
> + int i, idx;
> bool px;
>
> amdgpu_device_ip_fini(adev);
> @@ -4778,6 +4783,11 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
> amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
> adev->accel_working = false;
> dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
> + for (i = 0; i < MAX_XCP; ++i) {
> + dma_fence_put(adev->isolation[i].spearhead);
> + amdgpu_sync_free(&adev->isolation[i].active);
> + amdgpu_sync_free(&adev->isolation[i].prev);
> + }
>
> amdgpu_reset_fini(adev);
>
> @@ -6913,6 +6923,92 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
> return NULL;
> }
>
> +/**
> + * amdgpu_device_enforce_isolation - enforce HW isolation
> + * @adev: the amdgpu device pointer
> + * @ring: the HW ring the job is supposed to run on
> + * @job: the job which is about to be pushed to the HW ring
> + *
> + * Makes sure that only one client at a time can use the GFX block.
> + * Returns: The dependency to wait on before the job can be pushed to the HW.
> + * The function is called multiple times until NULL is returned.
> + */
> +struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
> + struct amdgpu_ring *ring,
> + struct amdgpu_job *job)
> +{
> + struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
> + struct drm_sched_fence *f = job->base.s_fence;
> + struct dma_fence *dep;
> + void *owner;
> + int r;
> +
> + /*
> + * For now enforce isolation only for the GFX block since we only need
> + * the cleaner shader on those rings.
> + */
> + if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
> + ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
> + return NULL;
> +
> + /*
> + * All submissions where enforce isolation is false are handled as if
> + * they come from a single client. Use ~0l as the owner to distinct it
> + * from kernel submissions where the owner is NULL.
> + */
> + owner = job->enforce_isolation ? f->owner : (void*)~0l;
> +
> + mutex_lock(&adev->enforce_isolation_mutex);
> +
> + /*
> + * The "spearhead" submission is the first one which changes the
> + * ownership to its client. We always need to wait for it to be
> + * pushed to the HW before proceeding with anything.
> + */
> + if (&f->scheduled != isolation->spearhead &&
> + !dma_fence_is_signaled(isolation->spearhead)) {
> + dep = isolation->spearhead;
> + goto out_grab_ref;
> + }
> +
> + if (isolation->owner != owner) {
> +
> + /*
> + * Wait for any gang to be assembled before switching to a
> + * different owner or otherwise we could deadlock the
> + * submissions.
> + */
> + if (!job->gang_submit) {
> + dep = amdgpu_device_get_gang(adev);
> + if (!dma_fence_is_signaled(dep))
> + goto out_return_dep;
> + dma_fence_put(dep);
> + }
> +
> + dma_fence_put(isolation->spearhead);
> + isolation->spearhead = dma_fence_get(&f->scheduled);
> + amdgpu_sync_move(&isolation->active, &isolation->prev);
> + isolation->owner = owner;
> + }
> +
> + /*
> + * Specifying the ring here helps to pipeline submissions even when
> + * isolation is enabled. If that is not desired for testing NULL can be
> + * used instead of the ring to enforce a CPU round trip while switching
> + * between clients.
> + */
> + dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
> + r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
> + if (r)
> + DRM_WARN("OOM tracking isolation\n");
> +
> +out_grab_ref:
> + dma_fence_get(dep);
> +out_return_dep:
> + mutex_unlock(&adev->enforce_isolation_mutex);
> + return dep;
> +}
> +
> bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
> {
> switch (adev->asic_type) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> index 56d27cea052e..92ab821afc06 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> @@ -287,40 +287,27 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
> (*id)->flushed_updates < updates ||
> !(*id)->last_flush ||
> ((*id)->last_flush->context != fence_context &&
> - !dma_fence_is_signaled((*id)->last_flush))) {
> + !dma_fence_is_signaled((*id)->last_flush)))
> + needs_flush = true;
> +
> + if ((*id)->owner != vm->immediate.fence_context ||
> + (!adev->vm_manager.concurrent_flush && needs_flush)) {
> struct dma_fence *tmp;
>
> - /* Wait for the gang to be assembled before using a
> - * reserved VMID or otherwise the gang could deadlock.
> + /* Don't use per engine and per process VMID at the
> + * same time
> */
> - tmp = amdgpu_device_get_gang(adev);
> - if (!dma_fence_is_signaled(tmp) && tmp != job->gang_submit) {
> + if (adev->vm_manager.concurrent_flush)
> + ring = NULL;
> +
> + /* to prevent one context starved by another context */
> + (*id)->pd_gpu_addr = 0;
> + tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
> + if (tmp) {
> *id = NULL;
> - *fence = tmp;
> + *fence = dma_fence_get(tmp);
> return 0;
> }
> - dma_fence_put(tmp);
> -
> - /* Make sure the id is owned by the gang before proceeding */
> - if (!job->gang_submit ||
> - (*id)->owner != vm->immediate.fence_context) {
> -
> - /* Don't use per engine and per process VMID at the
> - * same time
> - */
> - if (adev->vm_manager.concurrent_flush)
> - ring = NULL;
> -
> - /* to prevent one context starved by another context */
> - (*id)->pd_gpu_addr = 0;
> - tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
> - if (tmp) {
> - *id = NULL;
> - *fence = dma_fence_get(tmp);
> - return 0;
> - }
> - }
> - needs_flush = true;
> }
>
> /* Good we can use this VMID. Remember this submission as
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 5537c8bfd227..1a6cfef4c071 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -360,17 +360,24 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
> {
> struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
> struct amdgpu_job *job = to_amdgpu_job(sched_job);
> - struct dma_fence *fence = NULL;
> + struct dma_fence *fence;
> int r;
>
> r = drm_sched_entity_error(s_entity);
> if (r)
> goto error;
>
> - if (job->gang_submit)
> + if (job->gang_submit) {
> fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
> + if (fence)
> + return fence;
> + }
> +
> + fence = amdgpu_device_enforce_isolation(ring->adev, ring, job);
> + if (fence)
> + return fence;
>
> - if (!fence && job->vm && !job->vmid) {
> + if (job->vm && !job->vmid) {
> r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
> if (r) {
> dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r);
> @@ -383,9 +390,10 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
> */
> if (!fence)
> job->vm = NULL;
> + return fence;
> }
>
> - return fence;
> + return NULL;
>
> error:
> dma_fence_set_error(&job->base.s_fence->finished, r);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> index bfe12164d27d..c5f9db6b32a4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> @@ -405,6 +405,25 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
> return 0;
> }
>
> +/**
> + * amdgpu_sync_move - move all fences from src to dst
> + *
> + * @src: source of the fences, empty after function
> + * @dst: destination for the fences
> + *
> + * Moves all fences from source to destination. All fences in destination are
> + * freed and source is empty after the function call.
> + */
> +void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst)
> +{
> + unsigned int i;
> +
> + amdgpu_sync_free(dst);
> +
> + for (i = 0; i < HASH_SIZE(src->fences); ++i)
> + hlist_move_list(&src->fences[i], &dst->fences[i]);
> +}
> +
> /**
> * amdgpu_sync_push_to_job - push fences into job
> * @sync: sync object to get the fences from
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> index 1504f5e7fc46..51eb4382c91e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> @@ -57,6 +57,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
> struct amdgpu_ring *ring);
> struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
> int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
> +void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst);
> int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job);
> int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
> void amdgpu_sync_free(struct amdgpu_sync *sync);
More information about the amd-gfx
mailing list