[PATCH 2/6] drm/sched/tests: Port to cancel_job()
Tvrtko Ursulin
tvrtko.ursulin at igalia.com
Wed Jul 2 11:25:48 UTC 2025
On 02/07/2025 11:56, Philipp Stanner wrote:
> On Wed, 2025-07-02 at 11:36 +0100, Tvrtko Ursulin wrote:
>>
>> On 01/07/2025 14:21, Philipp Stanner wrote:
>>> The GPU Scheduler now supports a new callback, cancel_job(), which
>>> lets
>>> the scheduler cancel all jobs which might not yet be freed when
>>> drm_sched_fini() runs. Using this callback allows for significantly
>>> simplifying the mock scheduler teardown code.
>>>
>>> Implement the cancel_job() callback and adjust the code where
>>> necessary.
>>
>> Cross referencing against my version I think you missed this hunk:
>>
>> --- a/drivers/gpu/drm/scheduler/tests/sched_tests.h
>> +++ b/drivers/gpu/drm/scheduler/tests/sched_tests.h
>> @@ -49,7 +49,6 @@ struct drm_mock_scheduler {
>>
>> spinlock_t lock;
>> struct list_head job_list;
>> - struct list_head done_list;
>>
>> struct {
>> u64 context;
>>
>
> Right, overlooked that one.
>
>>
>> I also had this:
>>
>> @@ -97,7 +96,8 @@ struct drm_mock_sched_job {
>> struct completion done;
>>
>> #define DRM_MOCK_SCHED_JOB_DONE 0x1
>> -#define DRM_MOCK_SCHED_JOB_TIMEDOUT 0x2
>> +#define DRM_MOCK_SCHED_JOB_CANCELED 0x2
>> +#define DRM_MOCK_SCHED_JOB_TIMEDOUT 0x4
>>
>> And was setting it in the callback. And since we should add a test to
>> explicitly cover the new callback, and just the callback, that could
>> make it very easy to do it.
>
> What do you imagine that to look like? The scheduler only invokes the
> callback on tear down.
>
> We also don't have tests that only test free_job() and the like, do
> we?
>
> You cannot test a callback for the scheduler, because the callback is
> implemented in the driver.
>
> Callbacks are tested by using the scheduler. In this case, it's tested
> the intended way by the unit tests invoking drm_sched_free().
Something like (untested):
static void drm_sched_test_cleanup(struct kunit *test)
{
struct drm_mock_sched_entity *entity;
struct drm_mock_scheduler *sched;
struct drm_mock_sched_job job;
bool done;
/*
* Check that the job cancel callback gets invoked by the scheduler.
*/
sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
entity = drm_mock_sched_entity_new(test,
DRM_SCHED_PRIORITY_NORMAL,
sched);
job = drm_mock_sched_job_new(test, entity);
drm_mock_sched_job_submit(job);
done = drm_mock_sched_job_wait_scheduled(job, HZ);
KUNIT_ASSERT_TRUE(test, done);
drm_mock_sched_entity_free(entity);
drm_mock_sched_fini(sched);
KUNIT_ASSERT_TRUE(test, job->flags & DRM_MOCK_SCHED_JOB_CANCELED);
}
Or via the hw fence status.
Regards,
Tvrtko
>>> Signed-off-by: Philipp Stanner <phasta at kernel.org>
>>> ---
>>> .../gpu/drm/scheduler/tests/mock_scheduler.c | 66 +++++++-------
>>> -----
>>> 1 file changed, 23 insertions(+), 43 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
>>> b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
>>> index 49d067fecd67..2d3169d95200 100644
>>> --- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
>>> +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
>>> @@ -63,7 +63,7 @@ static void drm_mock_sched_job_complete(struct
>>> drm_mock_sched_job *job)
>>> lockdep_assert_held(&sched->lock);
>>>
>>> job->flags |= DRM_MOCK_SCHED_JOB_DONE;
>>> - list_move_tail(&job->link, &sched->done_list);
>>> + list_del(&job->link);
>>> dma_fence_signal_locked(&job->hw_fence);
>>> complete(&job->done);
>>> }
>>> @@ -236,26 +236,39 @@ mock_sched_timedout_job(struct drm_sched_job
>>> *sched_job)
>>>
>>> static void mock_sched_free_job(struct drm_sched_job *sched_job)
>>> {
>>> - struct drm_mock_scheduler *sched =
>>> - drm_sched_to_mock_sched(sched_job->sched);
>>> struct drm_mock_sched_job *job =
>>> drm_sched_job_to_mock_job(sched_job);
>>> - unsigned long flags;
>>>
>>> - /* Remove from the scheduler done list. */
>>> - spin_lock_irqsave(&sched->lock, flags);
>>> - list_del(&job->link);
>>> - spin_unlock_irqrestore(&sched->lock, flags);
>>> dma_fence_put(&job->hw_fence);
>>> -
>>> drm_sched_job_cleanup(sched_job);
>>>
>>> /* Mock job itself is freed by the kunit framework. */
>>> }
>>>
>>> +static void mock_sched_cancel_job(struct drm_sched_job *sched_job)
>>> +{
>>> + struct drm_mock_scheduler *sched =
>>> drm_sched_to_mock_sched(sched_job->sched);
>>> + struct drm_mock_sched_job *job =
>>> drm_sched_job_to_mock_job(sched_job);
>>> + unsigned long flags;
>>> +
>>> + hrtimer_cancel(&job->timer);
>>> +
>>> + spin_lock_irqsave(&sched->lock, flags);
>>> + if (!dma_fence_is_signaled_locked(&job->hw_fence)) {
>>> + list_del(&job->link);
>>> + dma_fence_set_error(&job->hw_fence, -ECANCELED);
>>> + dma_fence_signal_locked(&job->hw_fence);
>>> + }
>>> + spin_unlock_irqrestore(&sched->lock, flags);
>>> +
>>> + /* The GPU Scheduler will call
>>> drm_sched_backend_ops.free_job(), still.
>>> + * Mock job itself is freed by the kunit framework. */
>>
>> /*
>> * Multiline comment style to stay consistent, at least in this
>> file.
>> */
>>
>> The rest looks good, but I need to revisit the timeout/free handling
>> since it has been a while and you changed it recently.
>>
>> Regards,
>>
>> Tvrtko
>>
>>> +}
>>> +
>>> static const struct drm_sched_backend_ops drm_mock_scheduler_ops
>>> = {
>>> .run_job = mock_sched_run_job,
>>> .timedout_job = mock_sched_timedout_job,
>>> - .free_job = mock_sched_free_job
>>> + .free_job = mock_sched_free_job,
>>> + .cancel_job = mock_sched_cancel_job,
>>> };
>>>
>>> /**
>>> @@ -289,7 +302,6 @@ struct drm_mock_scheduler
>>> *drm_mock_sched_new(struct kunit *test, long timeout)
>>> sched->hw_timeline.context = dma_fence_context_alloc(1);
>>> atomic_set(&sched->hw_timeline.next_seqno, 0);
>>> INIT_LIST_HEAD(&sched->job_list);
>>> - INIT_LIST_HEAD(&sched->done_list);
>>> spin_lock_init(&sched->lock);
>>>
>>> return sched;
>>> @@ -304,38 +316,6 @@ struct drm_mock_scheduler
>>> *drm_mock_sched_new(struct kunit *test, long timeout)
>>> */
>>> void drm_mock_sched_fini(struct drm_mock_scheduler *sched)
>>> {
>>> - struct drm_mock_sched_job *job, *next;
>>> - unsigned long flags;
>>> - LIST_HEAD(list);
>>> -
>>> - drm_sched_wqueue_stop(&sched->base);
>>> -
>>> - /* Force complete all unfinished jobs. */
>>> - spin_lock_irqsave(&sched->lock, flags);
>>> - list_for_each_entry_safe(job, next, &sched->job_list,
>>> link)
>>> - list_move_tail(&job->link, &list);
>>> - spin_unlock_irqrestore(&sched->lock, flags);
>>> -
>>> - list_for_each_entry(job, &list, link)
>>> - hrtimer_cancel(&job->timer);
>>> -
>>> - spin_lock_irqsave(&sched->lock, flags);
>>> - list_for_each_entry_safe(job, next, &list, link)
>>> - drm_mock_sched_job_complete(job);
>>> - spin_unlock_irqrestore(&sched->lock, flags);
>>> -
>>> - /*
>>> - * Free completed jobs and jobs not yet processed by the
>>> DRM scheduler
>>> - * free worker.
>>> - */
>>> - spin_lock_irqsave(&sched->lock, flags);
>>> - list_for_each_entry_safe(job, next, &sched->done_list,
>>> link)
>>> - list_move_tail(&job->link, &list);
>>> - spin_unlock_irqrestore(&sched->lock, flags);
>>> -
>>> - list_for_each_entry_safe(job, next, &list, link)
>>> - mock_sched_free_job(&job->base);
>>> -
>>> drm_sched_fini(&sched->base);
>>> }
>>>
>>
>
More information about the dri-devel
mailing list