[PATCH 1/1] drm/amdgpu: rework context priority handling

Wed Aug 25 13:35:41 UTC 2021

On 8/25/2021 2:29 PM, Christian König wrote:
> Am 25.08.21 um 14:20 schrieb Lazar, Lijo:
>> On 8/25/2021 4:52 PM, Nirmoy Das wrote:
>>> To get a hardware queue priority for a context, we are currently
>>> mapping AMDGPU_CTX_PRIORITY_* to DRM_SCHED_PRIORITY_* and then
>>> to hardware queue priority, which is not the right way to do that
>>> as DRM_SCHED_PRIORITY_* is software scheduler's priority and it is
>>> independent from a hardware queue priority.
>>>
>>> Use userspace provided context priority, AMDGPU_CTX_PRIORITY_* to
>>> map a context to proper hardware queue priority.
>>>
>>> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   | 127 
>>> ++++++++++++++++------
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h   |   8 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c |  44 ++------
>>>   3 files changed, 105 insertions(+), 74 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> index e7a010b7ca1f..c88c5c6c54a2 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> @@ -43,14 +43,61 @@ const unsigned int 
>>> amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
>>>       [AMDGPU_HW_IP_VCN_JPEG]    =    1,
>>>   };
>>>   +bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
>>> +{
>>> +    switch (ctx_prio) {
>>> +    case AMDGPU_CTX_PRIORITY_UNSET:
>>> +    case AMDGPU_CTX_PRIORITY_VERY_LOW:
>>> +    case AMDGPU_CTX_PRIORITY_LOW:
>>> +    case AMDGPU_CTX_PRIORITY_NORMAL:
>>> +    case AMDGPU_CTX_PRIORITY_HIGH:
>>> +    case AMDGPU_CTX_PRIORITY_VERY_HIGH:
>>> +        return true;
>>> +    default:
>>> +        return false;
>>> +    }
>>> +}
>>> +
>>> +static enum drm_sched_priority
>>> +amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
>>> +{
>>> +    switch (ctx_prio) {
>>> +    case AMDGPU_CTX_PRIORITY_UNSET:
>>> +        return DRM_SCHED_PRIORITY_UNSET;
>>> +
>>> +    case AMDGPU_CTX_PRIORITY_VERY_LOW:
>>> +        return DRM_SCHED_PRIORITY_MIN;
>>> +
>>> +    case AMDGPU_CTX_PRIORITY_LOW:
>>> +        return DRM_SCHED_PRIORITY_MIN;
>>> +
>>> +    case AMDGPU_CTX_PRIORITY_NORMAL:
>>> +        return DRM_SCHED_PRIORITY_NORMAL;
>>> +
>>> +    case AMDGPU_CTX_PRIORITY_HIGH:
>>> +        return DRM_SCHED_PRIORITY_HIGH;
>>> +
>>> +    case AMDGPU_CTX_PRIORITY_VERY_HIGH:
>>> +        return DRM_SCHED_PRIORITY_HIGH;
>>> +
>>> +    /* This should not happen as we sanitized userspace provided 
>>> priority
>>> +     * already, WARN if this happens.
>>> +     */
>>> +    default:
>>> +        WARN(1, "Invalid context priority %d\n", ctx_prio);
>>> +        return DRM_SCHED_PRIORITY_NORMAL;
>>> +    }
>>> +
>>> +}
>>> +
>>>   static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>>> -                      enum drm_sched_priority priority)
>>> +                      int32_t priority)
>>>   {
>>> -    if (priority < 0 || priority >= DRM_SCHED_PRIORITY_COUNT)
>>> +    if (!amdgpu_ctx_priority_is_valid(priority))
>>>           return -EINVAL;
>>>         /* NORMAL and below are accessible by everyone */
>>> -    if (priority <= DRM_SCHED_PRIORITY_NORMAL)
>>> +    if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
>>>           return 0;
>>>         if (capable(CAP_SYS_NICE))
>>> @@ -62,26 +109,35 @@ static int amdgpu_ctx_priority_permit(struct 
>>> drm_file *filp,
>>>       return -EACCES;
>>>   }
>>>   -static enum gfx_pipe_priority 
>>> amdgpu_ctx_sched_prio_to_compute_prio(enum drm_sched_priority prio)
>>> +static enum gfx_pipe_priority 
>>> amdgpu_ctx_prio_to_compute_prio(int32_t prio)
>>>   {
>>>       switch (prio) {
>>> -    case DRM_SCHED_PRIORITY_HIGH:
>>> -    case DRM_SCHED_PRIORITY_KERNEL:
>>> +    case AMDGPU_CTX_PRIORITY_HIGH:
>>> +    case AMDGPU_CTX_PRIORITY_VERY_HIGH:
>>>           return AMDGPU_GFX_PIPE_PRIO_HIGH;
>>>       default:
>>>           return AMDGPU_GFX_PIPE_PRIO_NORMAL;
>>>       }
>>>   }
>>>   -static unsigned int amdgpu_ctx_prio_sched_to_hw(struct 
>>> amdgpu_device *adev,
>>> -                         enum drm_sched_priority prio,
>>> -                         u32 hw_ip)
>>> +static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, 
>>> u32 hw_ip)
>>>   {
>>> +    struct amdgpu_device *adev = ctx->adev;
>>> +    int32_t ctx_prio;
>>>       unsigned int hw_prio;
>>>   -    hw_prio = (hw_ip == AMDGPU_HW_IP_COMPUTE) ?
>>> -            amdgpu_ctx_sched_prio_to_compute_prio(prio) :
>>> -            AMDGPU_RING_PRIO_DEFAULT;
>>> +    ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
>>> +            ctx->init_priority : ctx->override_priority;
>>> +
>>> +    switch (hw_ip) {
>>> +    case AMDGPU_HW_IP_COMPUTE:
>>> +        hw_prio = amdgpu_ctx_prio_to_compute_prio(ctx_prio);
>>> +        break;
>>> +    default:
>>> +        hw_prio = AMDGPU_RING_PRIO_DEFAULT;
>>> +        break;
>>> +    }
>>> +
>>>       hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
>>>       if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
>>>           hw_prio = AMDGPU_RING_PRIO_DEFAULT;
>>> @@ -89,15 +145,17 @@ static unsigned int 
>>> amdgpu_ctx_prio_sched_to_hw(struct amdgpu_device *adev,
>>>       return hw_prio;
>>>   }
>>>   +
>>>   static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
>>> -                   const u32 ring)
>>> +                  const u32 ring)
>>>   {
>>>       struct amdgpu_device *adev = ctx->adev;
>>>       struct amdgpu_ctx_entity *entity;
>>>       struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
>>>       unsigned num_scheds = 0;
>>> +    int32_t ctx_prio;
>>>       unsigned int hw_prio;
>>> -    enum drm_sched_priority priority;
>>> +    enum drm_sched_priority drm_prio;
>>>       int r;
>>>         entity = kzalloc(struct_size(entity, fences, 
>>> amdgpu_sched_jobs),
>>> @@ -105,10 +163,11 @@ static int amdgpu_ctx_init_entity(struct 
>>> amdgpu_ctx *ctx, u32 hw_ip,
>>>       if (!entity)
>>>           return  -ENOMEM;
>>>   +    ctx_prio = (ctx->override_priority == 
>>> AMDGPU_CTX_PRIORITY_UNSET) ?
>>> +            ctx->init_priority : ctx->override_priority;
>>>       entity->sequence = 1;
>>> -    priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
>>> -                ctx->init_priority : ctx->override_priority;
>>> -    hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority, hw_ip);
>>> +    hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
>>> +    drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
>>>         hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
>>>       scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
>>> @@ -124,7 +183,7 @@ static int amdgpu_ctx_init_entity(struct 
>>> amdgpu_ctx *ctx, u32 hw_ip,
>>>           num_scheds = 1;
>>>       }
>>>   -    r = drm_sched_entity_init(&entity->entity, priority, scheds, 
>>> num_scheds,
>>> +    r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, 
>>> num_scheds,
>>>                     &ctx->guilty);
>>>       if (r)
>>>           goto error_free_entity;
>>> @@ -139,7 +198,7 @@ static int amdgpu_ctx_init_entity(struct 
>>> amdgpu_ctx *ctx, u32 hw_ip,
>>>   }
>>>     static int amdgpu_ctx_init(struct amdgpu_device *adev,
>>> -               enum drm_sched_priority priority,
>>> +               int32_t priority,
>>>                  struct drm_file *filp,
>>>                  struct amdgpu_ctx *ctx)
>>>   {
>>> @@ -161,7 +220,7 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>>> *adev,
>>>       ctx->reset_counter_query = ctx->reset_counter;
>>>       ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
>>>       ctx->init_priority = priority;
>>> -    ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>>> +    ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
>>>         return 0;
>>>   }
>>> @@ -234,7 +293,7 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx 
>>> *ctx, u32 hw_ip, u32 instance,
>>>   static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
>>>                   struct amdgpu_fpriv *fpriv,
>>>                   struct drm_file *filp,
>>> -                enum drm_sched_priority priority,
>>> +                int32_t priority,
>>>                   uint32_t *id)
>>>   {
>>>       struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
>>> @@ -397,19 +456,19 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, 
>>> void *data,
>>>   {
>>>       int r;
>>>       uint32_t id;
>>> -    enum drm_sched_priority priority;
>>> +    int32_t priority;
>>>         union drm_amdgpu_ctx *args = data;
>>>       struct amdgpu_device *adev = drm_to_adev(dev);
>>>       struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>         id = args->in.ctx_id;
>>> -    r = amdgpu_to_sched_priority(args->in.priority, &priority);
>>> +    priority = args->in.priority;
>>>         /* For backwards compatibility reasons, we need to accept
>>>        * ioctls with garbage in the priority field */
>>> -    if (r == -EINVAL)
>>> -        priority = DRM_SCHED_PRIORITY_NORMAL;
>>> +    if (!amdgpu_ctx_priority_is_valid(priority))
>>> +        priority = AMDGPU_CTX_PRIORITY_NORMAL;
>>>         switch (args->in.op) {
>>>       case AMDGPU_CTX_OP_ALLOC_CTX:
>>> @@ -515,9 +574,9 @@ struct dma_fence *amdgpu_ctx_get_fence(struct 
>>> amdgpu_ctx *ctx,
>>>   }
>>>     static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
>>> -                        struct amdgpu_ctx_entity *aentity,
>>> -                        int hw_ip,
>>> -                        enum drm_sched_priority priority)
>>> +                       struct amdgpu_ctx_entity *aentity,
>>> +                       int hw_ip,
>>> +                       int32_t priority)
>>>   {
>>>       struct amdgpu_device *adev = ctx->adev;
>>>       unsigned int hw_prio;
>>> @@ -525,12 +584,12 @@ static void 
>>> amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
>>>       unsigned num_scheds;
>>>         /* set sw priority */
>>> -    drm_sched_entity_set_priority(&aentity->entity, priority);
>>> +    drm_sched_entity_set_priority(&aentity->entity,
>>> + amdgpu_ctx_to_drm_sched_prio(priority));
>>>         /* set hw priority */
>>>       if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
>>> -        hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority,
>>> -                              AMDGPU_HW_IP_COMPUTE);
>>> +        hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
>>>           hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
>>
>> Not related to this patch, but this kind of logic may break some day. 
>> There is a HWIP specific priority and there is another RING_PRIO 
>> which is unmapped to HWIP priority Ex: when a new priority is added 
>> for VCN which is higher than any of the existing priorities.
>
> Yes, that's something I've noticed as well.
>
> Either we should leave the exact mapping to the engine or have a 
> global definition of possible hw priorities (the later is preferable I 
> think).


Will  this be sufficient :

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index d43fe2ed8116..937320293029 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -43,9 +43,9 @@
  #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES

  enum gfx_pipe_priority {
-       AMDGPU_GFX_PIPE_PRIO_NORMAL = 1,
-       AMDGPU_GFX_PIPE_PRIO_HIGH,
-       AMDGPU_GFX_PIPE_PRIO_MAX
+       AMDGPU_GFX_PIPE_PRIO_NORMAL = AMDGPU_RING_PRIO_1,
+       AMDGPU_GFX_PIPE_PRIO_HIGH = AMDGPU_RING_PRIO_2,
+       AMDGPU_GFX_PIPE_PRIO_MAX = AMDGPU_RING_PRIO_3
  };

  /* Argument for PPSMC_MSG_GpuChangeState */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index e713d31619fe..c54539faf209 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -37,7 +37,14 @@
  #define AMDGPU_MAX_UVD_ENC_RINGS       2

  #define AMDGPU_RING_PRIO_DEFAULT       1
-#define AMDGPU_RING_PRIO_MAX           AMDGPU_GFX_PIPE_PRIO_MAX
+
+enum amdgpu_ring_priority_level {
+       AMDGPU_RING_PRIO_0,
+       AMDGPU_RING_PRIO_1,
+       AMDGPU_RING_PRIO_2,
+       AMDGPU_RING_PRIO_3,
+       AMDGPU_RING_PRIO_MAX

+};


Regards,

Nirmoy

>
> Christian.
>
>>
>> Thanks,
>> Lijo
>>
>>>           scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
>>>           num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
>>> @@ -540,14 +599,14 @@ static void 
>>> amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
>>>   }
>>>     void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
>>> -                  enum drm_sched_priority priority)
>>> +                  int32_t priority)
>>>   {
>>> -    enum drm_sched_priority ctx_prio;
>>> +    int32_t ctx_prio;
>>>       unsigned i, j;
>>>         ctx->override_priority = priority;
>>>   -    ctx_prio = (ctx->override_priority == 
>>> DRM_SCHED_PRIORITY_UNSET) ?
>>> +    ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
>>>               ctx->init_priority : ctx->override_priority;
>>>       for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>>>           for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>> index 14db16bc3322..a44b8b8ed39c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>> @@ -47,8 +47,8 @@ struct amdgpu_ctx {
>>>       spinlock_t            ring_lock;
>>>       struct amdgpu_ctx_entity 
>>> *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
>>>       bool                preamble_presented;
>>> -    enum drm_sched_priority        init_priority;
>>> -    enum drm_sched_priority        override_priority;
>>> +    int32_t                init_priority;
>>> +    int32_t                override_priority;
>>>       struct mutex            lock;
>>>       atomic_t            guilty;
>>>       unsigned long            ras_counter_ce;
>>> @@ -75,8 +75,8 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
>>>   struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
>>>                          struct drm_sched_entity *entity,
>>>                          uint64_t seq);
>>> -void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
>>> -                  enum drm_sched_priority priority);
>>> +bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio);
>>> +void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, int32_t 
>>> ctx_prio);
>>>     int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
>>>                struct drm_file *filp);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
>>> index b7d861ed5284..e9b45089a28a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
>>> @@ -32,37 +32,9 @@
>>>   #include "amdgpu_sched.h"
>>>   #include "amdgpu_vm.h"
>>>   -int amdgpu_to_sched_priority(int amdgpu_priority,
>>> -                 enum drm_sched_priority *prio)
>>> -{
>>> -    switch (amdgpu_priority) {
>>> -    case AMDGPU_CTX_PRIORITY_VERY_HIGH:
>>> -        *prio = DRM_SCHED_PRIORITY_HIGH;
>>> -        break;
>>> -    case AMDGPU_CTX_PRIORITY_HIGH:
>>> -        *prio = DRM_SCHED_PRIORITY_HIGH;
>>> -        break;
>>> -    case AMDGPU_CTX_PRIORITY_NORMAL:
>>> -        *prio = DRM_SCHED_PRIORITY_NORMAL;
>>> -        break;
>>> -    case AMDGPU_CTX_PRIORITY_LOW:
>>> -    case AMDGPU_CTX_PRIORITY_VERY_LOW:
>>> -        *prio = DRM_SCHED_PRIORITY_MIN;
>>> -        break;
>>> -    case AMDGPU_CTX_PRIORITY_UNSET:
>>> -        *prio = DRM_SCHED_PRIORITY_UNSET;
>>> -        break;
>>> -    default:
>>> -        WARN(1, "Invalid context priority %d\n", amdgpu_priority);
>>> -        return -EINVAL;
>>> -    }
>>> -
>>> -    return 0;
>>> -}
>>> -
>>>   static int amdgpu_sched_process_priority_override(struct 
>>> amdgpu_device *adev,
>>>                             int fd,
>>> -                          enum drm_sched_priority priority)
>>> +                          int32_t priority)
>>>   {
>>>       struct fd f = fdget(fd);
>>>       struct amdgpu_fpriv *fpriv;
>>> @@ -89,7 +61,7 @@ static int 
>>> amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
>>>   static int amdgpu_sched_context_priority_override(struct 
>>> amdgpu_device *adev,
>>>                             int fd,
>>>                             unsigned ctx_id,
>>> -                          enum drm_sched_priority priority)
>>> +                          int32_t priority)
>>>   {
>>>       struct fd f = fdget(fd);
>>>       struct amdgpu_fpriv *fpriv;
>>> @@ -124,7 +96,6 @@ int amdgpu_sched_ioctl(struct drm_device *dev, 
>>> void *data,
>>>   {
>>>       union drm_amdgpu_sched *args = data;
>>>       struct amdgpu_device *adev = drm_to_adev(dev);
>>> -    enum drm_sched_priority priority;
>>>       int r;
>>>         /* First check the op, then the op's argument.
>>> @@ -138,21 +109,22 @@ int amdgpu_sched_ioctl(struct drm_device *dev, 
>>> void *data,
>>>           return -EINVAL;
>>>       }
>>>   -    r = amdgpu_to_sched_priority(args->in.priority, &priority);
>>> -    if (r)
>>> -        return r;
>>> +    if (!amdgpu_ctx_priority_is_valid(args->in.priority)) {
>>> +        WARN(1, "Invalid context priority %d\n", args->in.priority);
>>> +        return -EINVAL;
>>> +    }
>>>         switch (args->in.op) {
>>>       case AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE:
>>>           r = amdgpu_sched_process_priority_override(adev,
>>>                                  args->in.fd,
>>> -                               priority);
>>> +                               args->in.priority);
>>>           break;
>>>       case AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE:
>>>           r = amdgpu_sched_context_priority_override(adev,
>>>                                  args->in.fd,
>>>                                  args->in.ctx_id,
>>> -                               priority);
>>> +                               args->in.priority);
>>>           break;
>>>       default:
>>>           /* Impossible.
>>>
>