[PATCH 07/28] drm/amdgpu: track ring state associated with a job

Tue Jun 3 15:18:00 UTC 2025

On Tue, Jun 3, 2025 at 11:01 AM Christian König
<christian.koenig at amd.com> wrote:
>
> On 6/3/25 16:27, Alex Deucher wrote:
> >>> I'm not quite sure I understand what you are proposing.  Is the idea
> >>> to track all of the jobs associated with a particular process and then
> >>> when we reset a queue, skip all of the ring contents associated with
> >>> those jobs and signal and set the error on all of their job fences?
> >>
> >> More or less yes, I think that is what is needed here.
> >>
> >> A simple example: Unigine Heaven in window mode on an X server. Each frame usually results in 3 job submissions from unigine, plus one submission from X to copy the result it into the displayed frame.
> >>
> >> When we now assume that we can schedule 4 jobs at a time on the ring we get: U1, U2, U3, X1 | U4, U5, U6, X2 | U7, U8, U9, X3....
> >>
> >> Let's assume U4 hangs and we initiate a queue reset, in this case we definately need to skip U5 and U6 as well because they belonged to the same context and depend on each other. Only skipping U4 would certainly crash the GPU again.
> >
> > In that case, we'd just reset the queue again, but I agree it would be
> > a nicer experience to skip all of the jobs for that app.
>
> Well, that is a must have I think. Otherwise problems only get worse and worse after a while and we can't let submissions run into timeouts over and over again.
>
> >>
> >> X2 also dependet on U6, but that submission is from X and totally innocent and rendering garbage for the window content is probably ok considering that the application just crashed.
> >>
> >>> What about cross ring dependencies?
> >>
> >> For gang submission we would need to do a queue reset for both the gfx and compute queue to get out of this again. But that is probably ok since each queue can timeout on its own.
> >>
> >> We also don't need to track the jobs per process, just looking if job->base.sched_fence->finished.context changes should be sufficient.
> >
> > Something like this?
> >
> >         for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) {
> >                 ptr = &ring->fence_drv.fences[i];
> >             old = rcu_dereference_protected(*ptr, 1);
> >                 if (old && old->ops == &amdgpu_job_fence_ops) {
> >                         struct amdgpu_job *other_job =
> >                 container_of(old, struct amdgpu_job, hw_fence.base);
> >
> >                         if (other_job->base.sched_fence->finished.context ==
> >                             job->base.sched_fence->finished.context) {
> >                                 struct amdgpu_fence *am_fence =
> > &other_job->hw_fence;
> >                                 // skip the ring contents associated
> > with this context
> >                         }
> >                 }
> >         }
>
> I would copy job->base.sched_fence->finished.context into the HW fence when it is created, but apart from that yes that is exactly what I had in mind.

Where and when does finished.context get set?

Alex

>
> Christian.
>
>
> >
> > Thanks,
> >
> > Alex
> >
> >
> >>
> >> Regards,
> >> Christian.
> >>
> >>>
> >>> Alex
> >>>
> >>>>
> >>>> Regards,
> >>>> Christian.
> >>>>
> >>>>
> >>>>>       return 0;
> >>>>>  }
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> >>>>> index a0fab947143b5..f0f752284b925 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> >>>>> @@ -91,6 +91,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
> >>>>>       struct amdgpu_job *job = to_amdgpu_job(s_job);
> >>>>>       struct amdgpu_task_info *ti;
> >>>>>       struct amdgpu_device *adev = ring->adev;
> >>>>> +     struct dma_fence *fence = &job->hw_fence;
> >>>>>       int idx;
> >>>>>       int r;
> >>>>>
> >>>>> @@ -154,8 +155,10 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
> >>>>>               else
> >>>>>                       is_guilty = true;
> >>>>>
> >>>>> -             if (is_guilty)
> >>>>> +             if (is_guilty) {
> >>>>> +                     amdgpu_ring_backup_unprocessed_jobs(ring, job->ring_wptr, fence->seqno);
> >>>>>                       dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
> >>>>> +             }
> >>>>>
> >>>>>               r = amdgpu_ring_reset(ring, job->vmid);
> >>>>>               if (!r) {
> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> >>>>> index f2c049129661f..c2ed0edb5179d 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> >>>>> @@ -79,6 +79,8 @@ struct amdgpu_job {
> >>>>>       /* enforce isolation */
> >>>>>       bool                    enforce_isolation;
> >>>>>       bool                    run_cleaner_shader;
> >>>>> +     /* wptr for the job for resets */
> >>>>> +     uint32_t                ring_wptr;
> >>>>>
> >>>>>       uint32_t                num_ibs;
> >>>>>       struct amdgpu_ib        ibs[];
> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> >>>>> index 426834806fbf2..909b121d432cb 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> >>>>> @@ -333,6 +333,12 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
> >>>>>       /*  Initialize cached_rptr to 0 */
> >>>>>       ring->cached_rptr = 0;
> >>>>>
> >>>>> +     if (!ring->ring_backup) {
> >>>>> +             ring->ring_backup = kvzalloc(ring->ring_size, GFP_KERNEL);
> >>>>> +             if (!ring->ring_backup)
> >>>>> +                     return -ENOMEM;
> >>>>> +     }
> >>>>> +
> >>>>>       /* Allocate ring buffer */
> >>>>>       if (ring->ring_obj == NULL) {
> >>>>>               r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE,
> >>>>> @@ -342,6 +348,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
> >>>>>                                           (void **)&ring->ring);
> >>>>>               if (r) {
> >>>>>                       dev_err(adev->dev, "(%d) ring create failed\n", r);
> >>>>> +                     kvfree(ring->ring_backup);
> >>>>>                       return r;
> >>>>>               }
> >>>>>               amdgpu_ring_clear_ring(ring);
> >>>>> @@ -385,6 +392,8 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
> >>>>>       amdgpu_bo_free_kernel(&ring->ring_obj,
> >>>>>                             &ring->gpu_addr,
> >>>>>                             (void **)&ring->ring);
> >>>>> +     kvfree(ring->ring_backup);
> >>>>> +     ring->ring_backup = NULL;
> >>>>>
> >>>>>       dma_fence_put(ring->vmid_wait);
> >>>>>       ring->vmid_wait = NULL;
> >>>>> @@ -753,3 +762,40 @@ bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring)
> >>>>>
> >>>>>       return true;
> >>>>>  }
> >>>>> +
> >>>>> +void amdgpu_ring_backup_unprocessed_jobs(struct amdgpu_ring *ring,
> >>>>> +                                      u64 bad_wptr, u32 bad_seq)
> >>>>> +{
> >>>>> +     unsigned int entries_to_copy = ring->wptr - bad_wptr;
> >>>>> +     unsigned int idx, i;
> >>>>> +
> >>>>> +     for (i = 0; i < entries_to_copy; i++) {
> >>>>> +             idx = (bad_wptr + i) & ring->buf_mask;
> >>>>> +             ring->ring_backup[i] = ring->ring[idx];
> >>>>> +     }
> >>>>> +     ring->ring_backup_entries_to_copy = entries_to_copy;
> >>>>> +     ring->ring_backup_seq = bad_seq;
> >>>>> +}
> >>>>> +
> >>>>> +int amdgpu_ring_reemit_unprocessed_jobs(struct amdgpu_ring *ring)
> >>>>> +{
> >>>>> +     unsigned int i;
> >>>>> +     int r;
> >>>>> +
> >>>>> +     /* signal the fence of the bad job */
> >>>>> +     amdgpu_fence_driver_seq_force_completion(ring, ring->ring_backup_seq);
> >>>>> +     /* verify that the ring is functional */
> >>>>> +     r = amdgpu_ring_test_ring(ring);
> >>>>> +     if (r)
> >>>>> +             return r;
> >>>>> +     /* re-emit the unprocessed ring contents */
> >>>>> +     if (ring->ring_backup_entries_to_copy) {
> >>>>> +             if (amdgpu_ring_alloc(ring, ring->ring_backup_entries_to_copy))
> >>>>> +                     return -ENOMEM;
> >>>>> +             for (i = 0; i < ring->ring_backup_entries_to_copy; i++)
> >>>>> +                     amdgpu_ring_write(ring, ring->ring_backup[i]);
> >>>>> +             amdgpu_ring_commit(ring);
> >>>>> +     }
> >>>>> +
> >>>>> +     return r;
> >>>>> +}
> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> >>>>> index b95b471107692..fd08449eee33f 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> >>>>> @@ -132,6 +132,8 @@ extern const struct drm_sched_backend_ops amdgpu_sched_ops;
> >>>>>  void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
> >>>>>  void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
> >>>>>  void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
> >>>>> +void amdgpu_fence_driver_seq_force_completion(struct amdgpu_ring *ring,
> >>>>> +                                           u32 seq);
> >>>>>
> >>>>>  int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
> >>>>>  int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
> >>>>> @@ -268,6 +270,9 @@ struct amdgpu_ring {
> >>>>>
> >>>>>       struct amdgpu_bo        *ring_obj;
> >>>>>       uint32_t                *ring;
> >>>>> +     uint32_t                *ring_backup;
> >>>>> +     uint32_t                ring_backup_seq;
> >>>>> +     unsigned int            ring_backup_entries_to_copy;
> >>>>>       unsigned                rptr_offs;
> >>>>>       u64                     rptr_gpu_addr;
> >>>>>       volatile u32            *rptr_cpu_addr;
> >>>>> @@ -534,4 +539,7 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev);
> >>>>>  void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
> >>>>>  int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
> >>>>>  bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring);
> >>>>> +void amdgpu_ring_backup_unprocessed_jobs(struct amdgpu_ring *ring,
> >>>>> +                                      u64 bad_wptr, u32 bad_seq);
> >>>>> +int amdgpu_ring_reemit_unprocessed_jobs(struct amdgpu_ring *ring);
> >>>>>  #endif
> >>>>
> >>
>