[PATCH 1/2] drm/scheduler: Scheduler priority fixes

Fri Aug 14 21:32:32 UTC 2020

On 2020-08-14 4:58 p.m., Alex Deucher wrote:
> On Fri, Aug 14, 2020 at 3:33 PM Alex Deucher <alexdeucher at gmail.com> wrote:
>>
>> + dri-devel
>>
>> On Fri, Aug 14, 2020 at 3:14 PM Luben Tuikov <luben.tuikov at amd.com> wrote:
>>>
>>> Remove DRM_SCHED_PRIORITY_LOW, as it was used
>>> in only one place.
>>>
>>> Rename and separate by a line
>>> DRM_SCHED_PRIORITY_MAX to DRM_SCHED_PRIORITY_COUNT
>>> as it represents a (total) count of said
>>> priorities and it is used as such in loops
>>> throughout the code. (0-based indexing is the
>>> the count number.)
>>>
>>> Remove redundant word HIGH in priority names,
>>> and rename *KERNEL* to *HIGH*, as it really
>>> means that, high.
>>>
>>> Signed-off-by: Luben Tuikov <luben.tuikov at amd.com>
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   |  6 +++---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c   |  2 +-
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c  |  2 +-
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |  2 +-
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c |  6 +++---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  2 +-
>>>  drivers/gpu/drm/scheduler/sched_main.c    |  8 ++++----
>>>  include/drm/gpu_scheduler.h               | 15 +++++++++------
>>>  8 files changed, 23 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> index d85d13f7a043..fd97ac34277b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> @@ -46,7 +46,7 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
>>>  static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>>>                                       enum drm_sched_priority priority)
>>>  {
>>> -       if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>>> +       if (priority < 0 || priority >= DRM_SCHED_PRIORITY_COUNT)
>>>                 return -EINVAL;
>>>
>>>         /* NORMAL and below are accessible by everyone */
>>> @@ -65,8 +65,8 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>>>  static enum gfx_pipe_priority amdgpu_ctx_sched_prio_to_compute_prio(enum drm_sched_priority prio)
>>>  {
>>>         switch (prio) {
>>> -       case DRM_SCHED_PRIORITY_HIGH_HW:
>>> -       case DRM_SCHED_PRIORITY_KERNEL:
>>> +       case DRM_SCHED_PRIORITY_HW:
>>> +       case DRM_SCHED_PRIORITY_HIGH:
>>>                 return AMDGPU_GFX_PIPE_PRIO_HIGH;
>>>         default:
>>>                 return AMDGPU_GFX_PIPE_PRIO_NORMAL;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>> index 75d37dfb51aa..bb9e5481ff3c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>> @@ -251,7 +251,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
>>>         int i;
>>>
>>>         /* Signal all jobs not yet scheduled */
>>> -       for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
>>> +       for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
>>>                 struct drm_sched_rq *rq = &sched->sched_rq[i];
>>>
>>>                 if (!rq)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
>>> index 13ea8ebc421c..6d4fc79bf84a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
>>> @@ -267,7 +267,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
>>>                         &ring->sched;
>>>         }
>>>
>>> -       for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i)
>>> +       for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; ++i)
>>>                 atomic_set(&ring->num_jobs[i], 0);
>>>
>>>         return 0;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> index da871d84b742..7112137689db 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> @@ -243,7 +243,7 @@ struct amdgpu_ring {
>>>         bool                    has_compute_vm_bug;
>>>         bool                    no_scheduler;
>>>
>>> -       atomic_t                num_jobs[DRM_SCHED_PRIORITY_MAX];
>>> +       atomic_t                num_jobs[DRM_SCHED_PRIORITY_COUNT];
>>>         struct mutex            priority_mutex;
>>>         /* protected by priority_mutex */
>>>         int                     priority;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
>>> index c799691dfa84..e05bc22a0a49 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
>>> @@ -36,14 +36,14 @@ enum drm_sched_priority amdgpu_to_sched_priority(int amdgpu_priority)
>>>  {
>>>         switch (amdgpu_priority) {
>>>         case AMDGPU_CTX_PRIORITY_VERY_HIGH:
>>> -               return DRM_SCHED_PRIORITY_HIGH_HW;
>>> +               return DRM_SCHED_PRIORITY_HW;
>>>         case AMDGPU_CTX_PRIORITY_HIGH:
>>> -               return DRM_SCHED_PRIORITY_HIGH_SW;
>>> +               return DRM_SCHED_PRIORITY_SW;
>>>         case AMDGPU_CTX_PRIORITY_NORMAL:
>>>                 return DRM_SCHED_PRIORITY_NORMAL;
>>>         case AMDGPU_CTX_PRIORITY_LOW:
>>>         case AMDGPU_CTX_PRIORITY_VERY_LOW:
>>> -               return DRM_SCHED_PRIORITY_LOW;
>>> +               return DRM_SCHED_PRIORITY_MIN;
>>>         case AMDGPU_CTX_PRIORITY_UNSET:
>>>                 return DRM_SCHED_PRIORITY_UNSET;
>>>         default:
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index 20fa0497aaa4..bc4bdb90d8f7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -2114,7 +2114,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
>>>                 ring = adev->mman.buffer_funcs_ring;
>>>                 sched = &ring->sched;
>>>                 r = drm_sched_entity_init(&adev->mman.entity,
>>> -                                         DRM_SCHED_PRIORITY_KERNEL, &sched,
>>> +                                         DRM_SCHED_PRIORITY_HIGH, &sched,
>>>                                           1, NULL);
>>>                 if (r) {
>>>                         DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>>> index 2f319102ae9f..c2947e73d1b6 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>> @@ -335,9 +335,9 @@ void drm_sched_increase_karma(struct drm_sched_job *bad)
>>>          * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
>>>          * corrupt but keep in mind that kernel jobs always considered good.
>>>          */
>>> -       if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
>>> +       if (bad->s_priority != DRM_SCHED_PRIORITY_HIGH) {
>>>                 atomic_inc(&bad->karma);
>>> -               for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
>>> +               for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_HIGH;
>>>                      i++) {
>>>                         struct drm_sched_rq *rq = &sched->sched_rq[i];
>>>
>>> @@ -623,7 +623,7 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched)
>>>                 return NULL;
>>>
>>>         /* Kernel run queue has higher priority than normal run queue*/
>>> -       for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
>>> +       for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
>>>                 entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
>>>                 if (entity)
>>>                         break;
>>> @@ -851,7 +851,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>>>         sched->name = name;
>>>         sched->timeout = timeout;
>>>         sched->hang_limit = hang_limit;
>>> -       for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
>>> +       for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++)
>>>                 drm_sched_rq_init(sched, &sched->sched_rq[i]);
>>>
>>>         init_waitqueue_head(&sched->wake_up_worker);
>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>>> index 26b04ff62676..8ae9b8f73bf9 100644
>>> --- a/include/drm/gpu_scheduler.h
>>> +++ b/include/drm/gpu_scheduler.h
>>> @@ -33,14 +33,17 @@
>>>  struct drm_gpu_scheduler;
>>>  struct drm_sched_rq;
>>>
>>> +/* These are often used as an (initial) index
>>> + * to an array, and as such should start at 0.
>>> + */
>>>  enum drm_sched_priority {
>>>         DRM_SCHED_PRIORITY_MIN,
>>> -       DRM_SCHED_PRIORITY_LOW = DRM_SCHED_PRIORITY_MIN,
>>>         DRM_SCHED_PRIORITY_NORMAL,
>>> -       DRM_SCHED_PRIORITY_HIGH_SW,
>>> -       DRM_SCHED_PRIORITY_HIGH_HW,
>>
>> We originally added HIGH_SW and HIGH_HW to differentiate between two
>> different high priority modes; e.g., HIGH_SW meant that the gpu
>> scheduler would schedule with high priority, HIGH_HW meant that the
>> hardware would schedule with high priority. You can set different
>> queue priorities in the hardware and work from the high priority queue
>> will get scheduled on the hw sooner than lower priority queues.  Not
>> all engines support HW queue priorities however.
> 
> Thinking about this more, I think we can probably just get rid of the
> SW and HW settings and just have a HIGH setting.  We can use HIGH
> whether or not we have additional hw priority features or not.  We
> want to keep KERNEL however since that is the highest priority and it
> makes it obvious what it is for.  Submissions from the kernel need to
> be the highest priority (e.g., memory managment or page table
> updates).
> 

"MIN, NORMAL, HIGH, KERNEL", sure we can do that. That's better
and gets rid of the ambiguous "SW, HW". I'll do that.

Regards,
Luben