[RFC 3/5] drm/scheduler: Add a simple TDR test
Tvrtko Ursulin
tvrtko.ursulin at igalia.com
Wed Feb 5 10:01:24 UTC 2025
On 04/02/2025 16:21, Christian König wrote:
> Am 03.02.25 um 16:30 schrieb Tvrtko Ursulin:
>> Add a very simple TDR test which submits a single job and verifies that
>> the TDR handling will run if the backend failed to complete the job in
>> time.
>
> I think I said it before but I strongly suggest to not use TDR as name
> in the scheduler at all.
>
> What the scheduler provides is a simple timeout while waiting for the HW
> fence to signal.
>
> That is fundamentally different to the TDR functionality Windows provide
> and we already had people confusing this.
I did a s/tdr/timeout/ locally.
> Apart from that "yes, please". Those tests are desperately needed.
Cool. Lets see what other people will say and if someone can actually
review.
Regards,
Tvrtko
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
>> Cc: Christian König <christian.koenig at amd.com>
>> Cc: Danilo Krummrich <dakr at kernel.org>
>> Cc: Matthew Brost <matthew.brost at intel.com>
>> Cc: Philipp Stanner <phasta at kernel.org>
>> ---
>> .../drm/scheduler/tests/drm_mock_scheduler.c | 12 +++-
>> .../gpu/drm/scheduler/tests/drm_sched_tests.h | 6 +-
>> .../scheduler/tests/drm_sched_tests_basic.c | 64 ++++++++++++++++++-
>> 3 files changed, 76 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/tests/drm_mock_scheduler.c
>> b/drivers/gpu/drm/scheduler/tests/drm_mock_scheduler.c
>> index f1985900a6ba..79b6193ce920 100644
>> --- a/drivers/gpu/drm/scheduler/tests/drm_mock_scheduler.c
>> +++ b/drivers/gpu/drm/scheduler/tests/drm_mock_scheduler.c
>> @@ -160,7 +160,11 @@ static struct dma_fence
>> *mock_sched_run_job(struct drm_sched_job *sched_job)
>> static enum drm_gpu_sched_stat
>> mock_sched_timedout_job(struct drm_sched_job *sched_job)
>> {
>> - return DRM_GPU_SCHED_STAT_ENODEV;
>> + struct drm_mock_sched_job *job =
>> drm_sched_job_to_mock_job(sched_job);
>> +
>> + job->flags |= DRM_MOCK_SCHED_JOB_TIMEDOUT;
>> +
>> + return DRM_GPU_SCHED_STAT_NOMINAL;
>> }
>> static void mock_sched_free_job(struct drm_sched_job *sched_job)
>> @@ -174,7 +178,9 @@ static const struct drm_sched_backend_ops
>> drm_mock_scheduler_ops = {
>> .free_job = mock_sched_free_job
>> };
>> -struct drm_mock_scheduler *drm_mock_new_scheduler(struct kunit *test)
>> +struct drm_mock_scheduler *
>> +drm_mock_new_scheduler(struct kunit *test,
>> + long timeout)
>> {
>> struct drm_mock_scheduler *sched;
>> int ret;
>> @@ -188,7 +194,7 @@ struct drm_mock_scheduler
>> *drm_mock_new_scheduler(struct kunit *test)
>> DRM_SCHED_PRIORITY_COUNT,
>> U32_MAX, /* max credits */
>> UINT_MAX, /* hang limit */
>> - MAX_SCHEDULE_TIMEOUT, /* timeout */
>> + timeout,
>> NULL, /* timeout wq */
>> NULL, /* score */
>> "drm-mock-scheduler",
>> diff --git a/drivers/gpu/drm/scheduler/tests/drm_sched_tests.h
>> b/drivers/gpu/drm/scheduler/tests/drm_sched_tests.h
>> index 421ee2712985..20695f55e453 100644
>> --- a/drivers/gpu/drm/scheduler/tests/drm_sched_tests.h
>> +++ b/drivers/gpu/drm/scheduler/tests/drm_sched_tests.h
>> @@ -35,6 +35,9 @@ struct drm_mock_sched_entity {
>> struct drm_mock_sched_job {
>> struct drm_sched_job base;
>> +#define DRM_MOCK_SCHED_JOB_TIMEDOUT 0x1
>> + unsigned long flags;
>> +
>> struct list_head link;
>> struct hrtimer timer;
>> @@ -65,7 +68,8 @@ drm_sched_job_to_mock_job(struct drm_sched_job
>> *sched_job)
>> return container_of(sched_job, struct drm_mock_sched_job, base);
>> };
>> -struct drm_mock_scheduler *drm_mock_new_scheduler(struct kunit *test);
>> +struct drm_mock_scheduler *drm_mock_new_scheduler(struct kunit *test,
>> + long timeout);
>> void drm_mock_scheduler_fini(struct drm_mock_scheduler *sched);
>> unsigned int drm_mock_sched_advance(struct drm_mock_scheduler *sched,
>> unsigned int num);
>> diff --git a/drivers/gpu/drm/scheduler/tests/drm_sched_tests_basic.c
>> b/drivers/gpu/drm/scheduler/tests/drm_sched_tests_basic.c
>> index 6fd39bea95b1..eb0d54d00f21 100644
>> --- a/drivers/gpu/drm/scheduler/tests/drm_sched_tests_basic.c
>> +++ b/drivers/gpu/drm/scheduler/tests/drm_sched_tests_basic.c
>> @@ -3,7 +3,7 @@
>> static int drm_sched_basic_init(struct kunit *test)
>> {
>> - test->priv = drm_mock_new_scheduler(test);
>> + test->priv = drm_mock_new_scheduler(test, MAX_SCHEDULE_TIMEOUT);
>> return 0;
>> }
>> @@ -15,6 +15,13 @@ static void drm_sched_basic_exit(struct kunit *test)
>> drm_mock_scheduler_fini(sched);
>> }
>> +static int drm_sched_tdr_init(struct kunit *test)
>> +{
>> + test->priv = drm_mock_new_scheduler(test, HZ);
>> +
>> + return 0;
>> +}
>> +
>> static void drm_sched_basic_submit(struct kunit *test)
>> {
>> struct drm_mock_scheduler *sched = test->priv;
>> @@ -244,4 +251,57 @@ static struct kunit_suite drm_sched_basic = {
>> .test_cases = drm_sched_basic_tests,
>> };
>> -kunit_test_suite(drm_sched_basic);
>> +static void drm_sched_basic_tdr(struct kunit *test)
>> +{
>> + struct drm_mock_scheduler *sched = test->priv;
>> + struct drm_mock_sched_entity *entity;
>> + struct drm_mock_sched_job *job;
>> + bool done;
>> +
>> + /*
>> + * Submit a single job against a scheduler with the timeout
>> configured
>> + * and verify that the timeout handling will run if the backend
>> fails
>> + * to complete it in time.
>> + */
>> +
>> + entity = drm_mock_new_sched_entity(test,
>> + DRM_SCHED_PRIORITY_NORMAL,
>> + sched);
>> + job = drm_mock_new_sched_job(test, entity);
>> +
>> + drm_mock_sched_job_submit(job);
>> +
>> + done = drm_mock_sched_job_wait_scheduled(job, HZ);
>> + KUNIT_ASSERT_EQ(test, done, true);
>> +
>> + done = drm_mock_sched_job_wait_finished(job, HZ / 2);
>> + KUNIT_ASSERT_EQ(test, done, false);
>> +
>> + KUNIT_ASSERT_EQ(test,
>> + job->flags & DRM_MOCK_SCHED_JOB_TIMEDOUT,
>> + 0);
>> +
>> + done = drm_mock_sched_job_wait_finished(job, HZ);
>> + KUNIT_ASSERT_EQ(test, done, false);
>> +
>> + KUNIT_ASSERT_EQ(test,
>> + job->flags & DRM_MOCK_SCHED_JOB_TIMEDOUT,
>> + DRM_MOCK_SCHED_JOB_TIMEDOUT);
>> +
>> + drm_mock_sched_entity_free(entity);
>> +}
>> +
>> +static struct kunit_case drm_sched_tdr_tests[] = {
>> + KUNIT_CASE(drm_sched_basic_tdr),
>> + {}
>> +};
>> +
>> +static struct kunit_suite drm_sched_tdr = {
>> + .name = "drm_sched_basic_tdr_tests",
>> + .init = drm_sched_tdr_init,
>> + .exit = drm_sched_basic_exit,
>> + .test_cases = drm_sched_tdr_tests,
>> +};
>> +
>> +kunit_test_suites(&drm_sched_basic,
>> + &drm_sched_tdr);
>
More information about the dri-devel
mailing list