[RFC PATCH] drm/scheduler: rework entity creation
Nirmoy
nirmodas at amd.com
Thu Dec 5 11:04:44 UTC 2019
Hi Christian,
I am not exactly sure about drm_sched_entity_set_priority() I wonder if
just changing
entity->priority to ctx->override_priority should work. With this
change drm_sched_entity_select_rq()
will chose a rq based on entity->priority which seems to me correct. But
is this enough to fix the old bug you were
talking about which mess up already scheduled job on priority change?
okay I just realized I need a lock to make sure
drm_sched_entity_set_priority() and drm_sched_entity_select_rq()
shouldn't happen at the same time.
Regards,
Nirmoy
On 12/5/19 11:52 AM, Nirmoy Das wrote:
> Entity currently keeps a copy of run_queue list and modify it in
> drm_sched_entity_set_priority(). Entities shouldn't modify run_queue
> list. Use drm_gpu_scheduler list instead of drm_sched_rq list
> in drm_sched_entity struct. In this way we can select a runqueue based
> on entity/ctx's priority for a drm scheduler.
>
> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 7 +--
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 +--
> drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 7 +--
> drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 7 +--
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 14 +++--
> drivers/gpu/drm/etnaviv/etnaviv_drv.c | 8 +--
> drivers/gpu/drm/lima/lima_sched.c | 5 +-
> drivers/gpu/drm/panfrost/panfrost_job.c | 7 +--
> drivers/gpu/drm/scheduler/sched_entity.c | 65 +++++++++---------------
> drivers/gpu/drm/v3d/v3d_drv.c | 7 +--
> include/drm/gpu_scheduler.h | 9 ++--
> 11 files changed, 69 insertions(+), 74 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index a0d3d7b756eb..e8f46c13d073 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -122,7 +122,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>
> for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
> - struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
> + struct drm_gpu_scheduler *sched_list[AMDGPU_MAX_RINGS];
> unsigned num_rings = 0;
> unsigned num_rqs = 0;
>
> @@ -181,12 +181,13 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
> if (!rings[j]->adev)
> continue;
>
> - rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
> + sched_list[num_rqs++] = &rings[j]->sched;
> }
>
> for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> r = drm_sched_entity_init(&ctx->entities[i][j].entity,
> - rqs, num_rqs, &ctx->guilty);
> + sched_list, num_rqs,
> + &ctx->guilty, priority);
> if (r)
> goto error_cleanup_entities;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 19ffe00d9072..a960dd7c0711 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1957,11 +1957,12 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
>
> if (enable) {
> struct amdgpu_ring *ring;
> - struct drm_sched_rq *rq;
> + struct drm_gpu_scheduler *sched;
>
> ring = adev->mman.buffer_funcs_ring;
> - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
> - r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL);
> + sched = &ring->sched;
> + r = drm_sched_entity_init(&adev->mman.entity, &sched,
> + 1, NULL, DRM_SCHED_PRIORITY_KERNEL);
> if (r) {
> DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
> r);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index e324bfe6c58f..b803a8882864 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -330,12 +330,13 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
> int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
> {
> struct amdgpu_ring *ring;
> - struct drm_sched_rq *rq;
> + struct drm_gpu_scheduler *sched;
> int r;
>
> ring = &adev->uvd.inst[0].ring;
> - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
> - r = drm_sched_entity_init(&adev->uvd.entity, &rq, 1, NULL);
> + sched = &ring->sched;
> + r = drm_sched_entity_init(&adev->uvd.entity, &sched,
> + 1, NULL, DRM_SCHED_PRIORITY_NORMAL);
> if (r) {
> DRM_ERROR("Failed setting up UVD kernel entity.\n");
> return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> index 46b590af2fd2..b44f28d44fb4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> @@ -240,12 +240,13 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
> int amdgpu_vce_entity_init(struct amdgpu_device *adev)
> {
> struct amdgpu_ring *ring;
> - struct drm_sched_rq *rq;
> + struct drm_gpu_scheduler *sched;
> int r;
>
> ring = &adev->vce.ring[0];
> - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
> - r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL);
> + sched = &ring->sched;
> + r = drm_sched_entity_init(&adev->vce.entity, &sched,
> + 1, NULL, DRM_SCHED_PRIORITY_NORMAL);
> if (r != 0) {
> DRM_ERROR("Failed setting up VCE run queue.\n");
> return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index a94c4faa5af1..ec6141773a92 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2687,6 +2687,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> {
> struct amdgpu_bo_param bp;
> struct amdgpu_bo *root;
> + struct drm_gpu_scheduler *sched_list[AMDGPU_MAX_RINGS];
> int r, i;
>
> vm->va = RB_ROOT_CACHED;
> @@ -2700,14 +2701,19 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> spin_lock_init(&vm->invalidated_lock);
> INIT_LIST_HEAD(&vm->freed);
>
> + for (i = 0; i < adev->vm_manager.vm_pte_num_rqs; i++)
> + sched_list[i] = adev->vm_manager.vm_pte_rqs[i]->sched;
> +
> /* create scheduler entities for page table updates */
> - r = drm_sched_entity_init(&vm->direct, adev->vm_manager.vm_pte_rqs,
> - adev->vm_manager.vm_pte_num_rqs, NULL);
> + r = drm_sched_entity_init(&vm->direct, sched_list,
> + adev->vm_manager.vm_pte_num_rqs,
> + NULL, DRM_SCHED_PRIORITY_KERNEL);
> if (r)
> return r;
>
> - r = drm_sched_entity_init(&vm->delayed, adev->vm_manager.vm_pte_rqs,
> - adev->vm_manager.vm_pte_num_rqs, NULL);
> + r = drm_sched_entity_init(&vm->delayed, sched_list,
> + adev->vm_manager.vm_pte_num_rqs,
> + NULL, DRM_SCHED_PRIORITY_KERNEL);
> if (r)
> goto error_free_direct;
>
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
> index 1f9c01be40d7..a65c1e115e35 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
> @@ -65,12 +65,12 @@ static int etnaviv_open(struct drm_device *dev, struct drm_file *file)
>
> for (i = 0; i < ETNA_MAX_PIPES; i++) {
> struct etnaviv_gpu *gpu = priv->gpu[i];
> - struct drm_sched_rq *rq;
> + struct drm_gpu_scheduler *sched;
>
> if (gpu) {
> - rq = &gpu->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
> - drm_sched_entity_init(&ctx->sched_entity[i],
> - &rq, 1, NULL);
> + sched = &gpu->sched;
> + drm_sched_entity_init(&ctx->sched_entity[i], &sched,
> + 1, NULL, DRM_SCHED_PRIORITY_NORMAL);
> }
> }
>
> diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
> index f522c5f99729..a7e53878d841 100644
> --- a/drivers/gpu/drm/lima/lima_sched.c
> +++ b/drivers/gpu/drm/lima/lima_sched.c
> @@ -159,9 +159,10 @@ int lima_sched_context_init(struct lima_sched_pipe *pipe,
> struct lima_sched_context *context,
> atomic_t *guilty)
> {
> - struct drm_sched_rq *rq = pipe->base.sched_rq + DRM_SCHED_PRIORITY_NORMAL;
> + struct drm_gpu_scheduler *sched = &pipe->base;
>
> - return drm_sched_entity_init(&context->base, &rq, 1, guilty);
> + return drm_sched_entity_init(&context->base, &sched,
> + 1, guilty, DRM_SCHED_PRIORITY_NORMAL);
> }
>
> void lima_sched_context_fini(struct lima_sched_pipe *pipe,
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> index d411eb6c8eb9..84178bcf35c9 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -542,12 +542,13 @@ int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
> {
> struct panfrost_device *pfdev = panfrost_priv->pfdev;
> struct panfrost_job_slot *js = pfdev->js;
> - struct drm_sched_rq *rq;
> + struct drm_gpu_scheduler *sched;
> int ret, i;
>
> for (i = 0; i < NUM_JOB_SLOTS; i++) {
> - rq = &js->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
> - ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i], &rq, 1, NULL);
> + sched = &js->queue[i].sched;
> + ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i],
> + &sched, 1, NULL, DRM_SCHED_PRIORITY_NORMAL);
> if (WARN_ON(ret))
> return ret;
> }
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
> index 461a7a8129f4..e10d37266836 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -38,9 +38,9 @@
> * submit to HW ring.
> *
> * @entity: scheduler entity to init
> - * @rq_list: the list of run queue on which jobs from this
> + * @sched_list: the list of drm scheds on which jobs from this
> * entity can be submitted
> - * @num_rq_list: number of run queue in rq_list
> + * @num_sched_list: number of drm sched in sched_list
> * @guilty: atomic_t set to 1 when a job on this queue
> * is found to be guilty causing a timeout
> *
> @@ -50,32 +50,34 @@
> * Returns 0 on success or a negative error code on failure.
> */
> int drm_sched_entity_init(struct drm_sched_entity *entity,
> - struct drm_sched_rq **rq_list,
> - unsigned int num_rq_list,
> - atomic_t *guilty)
> + struct drm_gpu_scheduler **sched_list,
> + unsigned int num_sched_list,
> + atomic_t *guilty, enum drm_sched_priority priority)
> {
> int i;
>
> - if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0])))
> + if (!(entity && sched_list && (num_sched_list == 0 || sched_list[0])))
> return -EINVAL;
>
> memset(entity, 0, sizeof(struct drm_sched_entity));
> INIT_LIST_HEAD(&entity->list);
> entity->rq = NULL;
> entity->guilty = guilty;
> - entity->num_rq_list = num_rq_list;
> - entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq *),
> - GFP_KERNEL);
> - if (!entity->rq_list)
> + entity->num_sched_list = num_sched_list;
> + entity->priority = priority;
> + entity->sched_list = kcalloc(num_sched_list,
> + sizeof(struct drm_gpu_scheduler *), GFP_KERNEL);
> +
> + if(!entity->sched_list)
> return -ENOMEM;
>
> init_completion(&entity->entity_idle);
>
> - for (i = 0; i < num_rq_list; ++i)
> - entity->rq_list[i] = rq_list[i];
> + for (i = 0; i < num_sched_list; i++)
> + entity->sched_list[i] = sched_list[i];
>
> - if (num_rq_list)
> - entity->rq = rq_list[0];
> + if (num_sched_list)
> + entity->rq = &entity->sched_list[0]->sched_rq[entity->priority];
>
> entity->last_scheduled = NULL;
>
> @@ -139,10 +141,10 @@ drm_sched_entity_get_free_sched(struct drm_sched_entity *entity)
> unsigned int min_jobs = UINT_MAX, num_jobs;
> int i;
>
> - for (i = 0; i < entity->num_rq_list; ++i) {
> - struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
> + for (i = 0; i < entity->num_sched_list; ++i) {
> + struct drm_gpu_scheduler *sched = entity->sched_list[i];
>
> - if (!entity->rq_list[i]->sched->ready) {
> + if (!entity->sched_list[i]->ready) {
> DRM_WARN("sched%s is not ready, skipping", sched->name);
> continue;
> }
> @@ -150,7 +152,7 @@ drm_sched_entity_get_free_sched(struct drm_sched_entity *entity)
> num_jobs = atomic_read(&sched->num_jobs);
> if (num_jobs < min_jobs) {
> min_jobs = num_jobs;
> - rq = entity->rq_list[i];
> + rq = &entity->sched_list[i]->sched_rq[entity->priority];
> }
> }
>
> @@ -308,7 +310,7 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity)
>
> dma_fence_put(entity->last_scheduled);
> entity->last_scheduled = NULL;
> - kfree(entity->rq_list);
> + kfree(entity->sched_list);
> }
> EXPORT_SYMBOL(drm_sched_entity_fini);
>
> @@ -353,15 +355,6 @@ static void drm_sched_entity_wakeup(struct dma_fence *f,
> drm_sched_wakeup(entity->rq->sched);
> }
>
> -/**
> - * drm_sched_entity_set_rq_priority - helper for drm_sched_entity_set_priority
> - */
> -static void drm_sched_entity_set_rq_priority(struct drm_sched_rq **rq,
> - enum drm_sched_priority priority)
> -{
> - *rq = &(*rq)->sched->sched_rq[priority];
> -}
> -
> /**
> * drm_sched_entity_set_priority - Sets priority of the entity
> *
> @@ -373,20 +366,8 @@ static void drm_sched_entity_set_rq_priority(struct drm_sched_rq **rq,
> void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
> enum drm_sched_priority priority)
> {
> - unsigned int i;
> -
> - spin_lock(&entity->rq_lock);
>
> - for (i = 0; i < entity->num_rq_list; ++i)
> - drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);
> -
> - if (entity->rq) {
> - drm_sched_rq_remove_entity(entity->rq, entity);
> - drm_sched_entity_set_rq_priority(&entity->rq, priority);
> - drm_sched_rq_add_entity(entity->rq, entity);
> - }
> -
> - spin_unlock(&entity->rq_lock);
> + entity->priority = priority;
> }
> EXPORT_SYMBOL(drm_sched_entity_set_priority);
>
> @@ -490,7 +471,7 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity)
> struct dma_fence *fence;
> struct drm_sched_rq *rq;
>
> - if (spsc_queue_count(&entity->job_queue) || entity->num_rq_list <= 1)
> + if (spsc_queue_count(&entity->job_queue) || entity->num_sched_list <= 1)
> return;
>
> fence = READ_ONCE(entity->last_scheduled);
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
> index 1a07462b4528..c6aff1aedd27 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.c
> +++ b/drivers/gpu/drm/v3d/v3d_drv.c
> @@ -140,7 +140,7 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
> {
> struct v3d_dev *v3d = to_v3d_dev(dev);
> struct v3d_file_priv *v3d_priv;
> - struct drm_sched_rq *rq;
> + struct drm_gpu_scheduler *sched;
> int i;
>
> v3d_priv = kzalloc(sizeof(*v3d_priv), GFP_KERNEL);
> @@ -150,8 +150,9 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
> v3d_priv->v3d = v3d;
>
> for (i = 0; i < V3D_MAX_QUEUES; i++) {
> - rq = &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
> - drm_sched_entity_init(&v3d_priv->sched_entity[i], &rq, 1, NULL);
> + sched = &v3d->queue[i].sched;
> + drm_sched_entity_init(&v3d_priv->sched_entity[i], &sched,
> + 1, NULL, DRM_SCHED_PRIORITY_NORMAL);
> }
>
> file->driver_priv = v3d_priv;
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 684692a8ed76..9df322dfac30 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -81,8 +81,9 @@ enum drm_sched_priority {
> struct drm_sched_entity {
> struct list_head list;
> struct drm_sched_rq *rq;
> - struct drm_sched_rq **rq_list;
> - unsigned int num_rq_list;
> + unsigned int num_sched_list;
> + struct drm_gpu_scheduler **sched_list;
> + enum drm_sched_priority priority;
> spinlock_t rq_lock;
>
> struct spsc_queue job_queue;
> @@ -312,9 +313,9 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
> struct drm_sched_entity *entity);
>
> int drm_sched_entity_init(struct drm_sched_entity *entity,
> - struct drm_sched_rq **rq_list,
> + struct drm_gpu_scheduler **sched_list,
> unsigned int num_rq_list,
> - atomic_t *guilty);
> + atomic_t *guilty, enum drm_sched_priority priority);
> long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout);
> void drm_sched_entity_fini(struct drm_sched_entity *entity);
> void drm_sched_entity_destroy(struct drm_sched_entity *entity);
More information about the amd-gfx
mailing list