[PATCH 1/2] drm/scheduler: Scheduler priority fixes (v2)

Mon Aug 17 20:26:21 UTC 2020

On 2020-08-17 9:53 a.m., Christian König wrote:
> Am 15.08.20 um 04:48 schrieb Luben Tuikov:
>> Remove DRM_SCHED_PRIORITY_LOW, as it was used
>> in only one place.
>>
>> Rename and separate by a line
>> DRM_SCHED_PRIORITY_MAX to DRM_SCHED_PRIORITY_COUNT
>> as it represents a (total) count of said
>> priorities and it is used as such in loops
>> throughout the code. (0-based indexing is the
>> the count number.)
>>
>> Remove redundant word HIGH in priority names,
>> and rename *KERNEL* to *HIGH*, as it really
>> means that, high.
>>
>> v2: Add back KERNEL and remove SW and HW,
>>      in lieu of a single HIGH between NORMAL and KERNEL.
>>
>> Signed-off-by: Luben Tuikov <luben.tuikov at amd.com>
> 
> I can't really judge the difference between MAX and COUNT, but the we 
> rename the values and get rid of the invalid one sounds like a good idea 
> to me.

Thanks Christian.

As to "max" vs. "count", I alluded to the difference
in the patch cover letter text:

> For instance, renaming MAX to COUNT, as usually a maximum value
> is a value which is part of the set of values, (e.g. a maxima of
> a function), and thus assignable, whereby a count is the size of
> a set (the enumeration in this case). It also makes it clearer
> when used to define size of arrays.

A "maximum" value is value which *can be attained.* For instance,
some would say, "The maximum temperature we expect today is 35 degC."
While a "count" is just the (usually integer) number of objects in a set.
The set could be composed of various objects, not necessarily integers.
It is possible that the maximum number in a set of integers to also
be the size of that set, e.g. A = { 1, 2, 3 }, max(A) = 3, sizeof(A) = 3,
but as you can see this is a special case; consider A = { Red, Green, Blue },
or A = { 2, 3, 5 }, or A = { 3 }.

To me it is confusing to read "MAX", as this is usually used
as a "watermark", say in temperature of a unit or something like that,
which we monitor and perform certain actions depending on whether
the maximum temperature is/has been attained. Usually, there'd
be one above it, called "CRITICAL".

And I've seen bugs where people would assume that MAX is an attainable
value, e.g. MAX_PRIORITY, "This is the maximum priority a task could
run at."

I'll add your RB to the patch! Thanks for your review.

Regards,
Luben

> 
> Reviewed-by: Christian König <christian.koenig at amd.com> for the series.
> 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   |  4 ++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c   |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c  |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c |  6 +++---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  2 +-
>>   drivers/gpu/drm/scheduler/sched_main.c    |  4 ++--
>>   include/drm/gpu_scheduler.h               | 12 +++++++-----
>>   8 files changed, 18 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index d85d13f7a043..68eaa4f687a6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -46,7 +46,7 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
>>   static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>>   				      enum drm_sched_priority priority)
>>   {
>> -	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>> +	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_COUNT)
>>   		return -EINVAL;
>>   
>>   	/* NORMAL and below are accessible by everyone */
>> @@ -65,7 +65,7 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>>   static enum gfx_pipe_priority amdgpu_ctx_sched_prio_to_compute_prio(enum drm_sched_priority prio)
>>   {
>>   	switch (prio) {
>> -	case DRM_SCHED_PRIORITY_HIGH_HW:
>> +	case DRM_SCHED_PRIORITY_HIGH:
>>   	case DRM_SCHED_PRIORITY_KERNEL:
>>   		return AMDGPU_GFX_PIPE_PRIO_HIGH;
>>   	default:
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> index 75d37dfb51aa..bb9e5481ff3c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> @@ -251,7 +251,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
>>   	int i;
>>   
>>   	/* Signal all jobs not yet scheduled */
>> -	for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
>> +	for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
>>   		struct drm_sched_rq *rq = &sched->sched_rq[i];
>>   
>>   		if (!rq)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
>> index 13ea8ebc421c..6d4fc79bf84a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
>> @@ -267,7 +267,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
>>   			&ring->sched;
>>   	}
>>   
>> -	for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i)
>> +	for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; ++i)
>>   		atomic_set(&ring->num_jobs[i], 0);
>>   
>>   	return 0;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> index da871d84b742..7112137689db 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> @@ -243,7 +243,7 @@ struct amdgpu_ring {
>>   	bool			has_compute_vm_bug;
>>   	bool			no_scheduler;
>>   
>> -	atomic_t		num_jobs[DRM_SCHED_PRIORITY_MAX];
>> +	atomic_t		num_jobs[DRM_SCHED_PRIORITY_COUNT];
>>   	struct mutex		priority_mutex;
>>   	/* protected by priority_mutex */
>>   	int			priority;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
>> index c799691dfa84..17661ede9488 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
>> @@ -36,14 +36,14 @@ enum drm_sched_priority amdgpu_to_sched_priority(int amdgpu_priority)
>>   {
>>   	switch (amdgpu_priority) {
>>   	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
>> -		return DRM_SCHED_PRIORITY_HIGH_HW;
>> +		return DRM_SCHED_PRIORITY_HIGH;
>>   	case AMDGPU_CTX_PRIORITY_HIGH:
>> -		return DRM_SCHED_PRIORITY_HIGH_SW;
>> +		return DRM_SCHED_PRIORITY_HIGH;
>>   	case AMDGPU_CTX_PRIORITY_NORMAL:
>>   		return DRM_SCHED_PRIORITY_NORMAL;
>>   	case AMDGPU_CTX_PRIORITY_LOW:
>>   	case AMDGPU_CTX_PRIORITY_VERY_LOW:
>> -		return DRM_SCHED_PRIORITY_LOW;
>> +		return DRM_SCHED_PRIORITY_MIN;
>>   	case AMDGPU_CTX_PRIORITY_UNSET:
>>   		return DRM_SCHED_PRIORITY_UNSET;
>>   	default:
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 20fa0497aaa4..60e2d3267ae5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -2114,7 +2114,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
>>   		ring = adev->mman.buffer_funcs_ring;
>>   		sched = &ring->sched;
>>   		r = drm_sched_entity_init(&adev->mman.entity,
>> -				          DRM_SCHED_PRIORITY_KERNEL, &sched,
>> +					  DRM_SCHED_PRIORITY_KERNEL, &sched,
>>   					  1, NULL);
>>   		if (r) {
>>   			DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>> index 2f319102ae9f..19f381e5e661 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -623,7 +623,7 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched)
>>   		return NULL;
>>   
>>   	/* Kernel run queue has higher priority than normal run queue*/
>> -	for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
>> +	for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
>>   		entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
>>   		if (entity)
>>   			break;
>> @@ -851,7 +851,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>>   	sched->name = name;
>>   	sched->timeout = timeout;
>>   	sched->hang_limit = hang_limit;
>> -	for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
>> +	for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++)
>>   		drm_sched_rq_init(sched, &sched->sched_rq[i]);
>>   
>>   	init_waitqueue_head(&sched->wake_up_worker);
>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index 26b04ff62676..ed998464ded4 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -33,14 +33,16 @@
>>   struct drm_gpu_scheduler;
>>   struct drm_sched_rq;
>>   
>> +/* These are often used as an (initial) index
>> + * to an array, and as such should start at 0.
>> + */
>>   enum drm_sched_priority {
>>   	DRM_SCHED_PRIORITY_MIN,
>> -	DRM_SCHED_PRIORITY_LOW = DRM_SCHED_PRIORITY_MIN,
>>   	DRM_SCHED_PRIORITY_NORMAL,
>> -	DRM_SCHED_PRIORITY_HIGH_SW,
>> -	DRM_SCHED_PRIORITY_HIGH_HW,
>> +	DRM_SCHED_PRIORITY_HIGH,
>>   	DRM_SCHED_PRIORITY_KERNEL,
>> -	DRM_SCHED_PRIORITY_MAX,
>> +
>> +	DRM_SCHED_PRIORITY_COUNT,
>>   	DRM_SCHED_PRIORITY_INVALID = -1,
>>   	DRM_SCHED_PRIORITY_UNSET = -2
>>   };
>> @@ -273,7 +275,7 @@ struct drm_gpu_scheduler {
>>   	uint32_t			hw_submission_limit;
>>   	long				timeout;
>>   	const char			*name;
>> -	struct drm_sched_rq		sched_rq[DRM_SCHED_PRIORITY_MAX];
>> +	struct drm_sched_rq		sched_rq[DRM_SCHED_PRIORITY_COUNT];
>>   	wait_queue_head_t		wake_up_worker;
>>   	wait_queue_head_t		job_scheduled;
>>   	atomic_t			hw_rq_count;
>