[PATCH v2 4/8] drm/sched: Add new test for DRM_GPU_SCHED_STAT_NO_HANG

Tvrtko Ursulin tvrtko.ursulin at igalia.com
Mon Jun 2 09:34:18 UTC 2025


On 30/05/2025 15:01, Maíra Canal wrote:
> Add a test to submit a single job against a scheduler with the timeout
> configured and verify that if the job is still running, the timeout
> handler will skip the reset and allow the job to complete.
> 
> Signed-off-by: Maíra Canal <mcanal at igalia.com>
> ---
>   drivers/gpu/drm/scheduler/tests/mock_scheduler.c |  5 +++
>   drivers/gpu/drm/scheduler/tests/sched_tests.h    |  1 +
>   drivers/gpu/drm/scheduler/tests/tests_basic.c    | 43 ++++++++++++++++++++++++
>   3 files changed, 49 insertions(+)
> 
> diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> index fdf5f34b39e02c8a8648d8bea566a27fd3251516..39429f5cd19ee3c23816f257d566b47d3daa4baa 100644
> --- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> @@ -208,6 +208,11 @@ mock_sched_timedout_job(struct drm_sched_job *sched_job)
>   
>   	job->flags |= DRM_MOCK_SCHED_JOB_TIMEDOUT;
>   
> +	if (job->flags & DRM_MOCK_SCHED_JOB_DONT_RESET) {
> +		job->flags &= ~DRM_MOCK_SCHED_JOB_DONT_RESET;

If it isn't important to clear the flag I would consider omitting it.

> +		return DRM_GPU_SCHED_STAT_NO_HANG;
> +	}
> +
>   	return DRM_GPU_SCHED_STAT_RESET;
>   }
>   
> diff --git a/drivers/gpu/drm/scheduler/tests/sched_tests.h b/drivers/gpu/drm/scheduler/tests/sched_tests.h
> index 27caf8285fb74b9f3c9ce2daa1c44d4a0c967e92..5259f181e55387c41efbcd3f6addc9465331d787 100644
> --- a/drivers/gpu/drm/scheduler/tests/sched_tests.h
> +++ b/drivers/gpu/drm/scheduler/tests/sched_tests.h
> @@ -98,6 +98,7 @@ struct drm_mock_sched_job {
>   
>   #define DRM_MOCK_SCHED_JOB_DONE		0x1
>   #define DRM_MOCK_SCHED_JOB_TIMEDOUT	0x2
> +#define DRM_MOCK_SCHED_JOB_DONT_RESET	0x4
>   	unsigned long		flags;
>   
>   	struct list_head	link;
> diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> index 41c648782f4548e202bd8711b45d28eead9bd0b2..2ba2d1b0c3cad9626ab9d89cfae05244c670a826 100644
> --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
> +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> @@ -246,8 +246,51 @@ static void drm_sched_basic_timeout(struct kunit *test)
>   	drm_mock_sched_entity_free(entity);
>   }
>   
> +static void drm_sched_skip_reset(struct kunit *test)
> +{
> +	struct drm_mock_scheduler *sched = test->priv;
> +	struct drm_mock_sched_entity *entity;
> +	struct drm_mock_sched_job *job;
> +	bool done;
> +
> +	/*
> +	 * Submit a single job against a scheduler with the timeout configured
> +	 * and verify that if the job is still running, the timeout handler
> +	 * will skip the reset and allow the job to complete.
> +	 */
> +
> +	entity = drm_mock_sched_entity_new(test,
> +					   DRM_SCHED_PRIORITY_NORMAL,
> +					   sched);
> +	job = drm_mock_sched_job_new(test, entity);
> +
> +	job->flags = DRM_MOCK_SCHED_JOB_DONT_RESET;
> +
> +	drm_mock_sched_job_set_duration_us(job, jiffies_to_usecs(2 * MOCK_TIMEOUT));

Might be easier to not set the duration but advance the job manually 
after the timeout assert. One time based interaction less.

> +	drm_mock_sched_job_submit(job);
> +
> +	done = drm_mock_sched_job_wait_finished(job, MOCK_TIMEOUT);
> +	KUNIT_ASSERT_FALSE(test, done);
> +
> +	KUNIT_ASSERT_EQ(test,
> +			job->flags & DRM_MOCK_SCHED_JOB_TIMEDOUT,
> +			DRM_MOCK_SCHED_JOB_TIMEDOUT);
> +
> +	KUNIT_ASSERT_EQ(test,
> +			job->flags & DRM_MOCK_SCHED_JOB_DONT_RESET,
> +			0);


Wait_finished for 200ms is equal to the configured job timeout so could 
this be a bit racy? Safer to wait for 2 * MOCK_TIMEOUT I think.

(I also wonder whether I should have made the flags bit operations 
atomic so the visibility between CPU cores running different threads is 
guaranteed. I might follow up with that tweak.)

> +
> +	KUNIT_ASSERT_FALSE(test, list_empty(&sched->job_list));

Going back to my first comment - if you remove the set_duration and 
instead of this assert have do KUNIT_ASSERT_EQ(drm_mock_sched_advance(), 
1) I think that should be good enough and simpler.

Regards,

Tvrtko

> +
> +	done = drm_mock_sched_job_wait_finished(job, MOCK_TIMEOUT);
> +	KUNIT_ASSERT_TRUE(test, done);
> +
> +	drm_mock_sched_entity_free(entity);
> +}
> +
>   static struct kunit_case drm_sched_timeout_tests[] = {
>   	KUNIT_CASE(drm_sched_basic_timeout),
> +	KUNIT_CASE(drm_sched_skip_reset),
>   	{}
>   };
>   
> 



More information about the Intel-xe mailing list