[PATCH 2/2] drm/scheduler: stop setting rq to NULL
Nayan Deshmukh
nayan26deshmukh at gmail.com
Thu Aug 2 06:43:21 UTC 2018
Hi Andrey,
Good catch. I guess since both Christian and I were working on it
parallelly we didn't catch this problem.
If it is only causing a problem in push_job then we can handle it something
like this:
----------------------------------------------------------------------------------------------------------------------------
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index 05dc6ecd4003..f344ce32f128 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -563,6 +563,11 @@ void drm_sched_entity_push_job(struct drm_sched_job
*sched_job,
first = spsc_queue_count(&entity->job_queue) == 0;
reschedule = idle && first && (entity->num_rq_list > 1);
+ if (first && list_empty(&entity->list)) {
+ DRM_ERROR("Trying to push to a killed entity\n");
+ return;
+ }
+
if (reschedule) {
rq = drm_sched_entity_get_free_sched(entity);
spin_lock(&entity->rq_lock);
@@ -580,11 +585,6 @@ void drm_sched_entity_push_job(struct drm_sched_job
*sched_job,
if (first) {
/* Add the entity to the run queue */
spin_lock(&entity->rq_lock);
- if (!entity->rq) {
- DRM_ERROR("Trying to push to a killed entity\n");
- spin_unlock(&entity->rq_lock);
- return;
- }
drm_sched_rq_add_entity(entity->rq, entity);
spin_unlock(&entity->rq_lock);
drm_sched_wakeup(entity->rq->sched);
-----------------------------------------------------------------------------------------------------------------------------
On Thu, Aug 2, 2018 at 3:55 AM Andrey Grodzovsky <Andrey.Grodzovsky at amd.com>
wrote:
> Thinking again about this change and 53d5f1e drm/scheduler: move idle
> entities to scheduler with less load v2
>
> Looks to me like use case for which fc9a539 drm/scheduler: add NULL
> pointer check for run queue (v2) was done
>
> will not work anymore.
>
> First of all in drm_sched_entity_push_job, 'if (!entity->rq)' will never
> be true any more since we stopped entity->rq to NULL in
>
> drm_sched_entity_flush.
>
> Second of all, even after we removed the entity from rq in
> drm_sched_entity_flush to terminate any subsequent submissions
>
> to the queue the other thread doing push job can just acquire again a
> run queue
>
> from drm_sched_entity_get_free_sched and continue submission so you
> can't substitute ' if (!entity->rq)' to 'if (list_empty(&entity->list))'.
>
> What about adding a 'stopped' flag to drm_sched_entity to be set in
> drm_sched_entity_flush and in
>
> But it might be worth adding a stopped flag field if it is required at
other places as well.
Thanks,
Nayan
> drm_sched_entity_push_job check for 'if (!entity->stopped)' instead of
> ' if (!entity->rq)' ?
>
> Andrey
>
>
> On 07/30/2018 07:03 AM, Christian König wrote:
> > We removed the redundancy of having an extra scheduler field, so we
> > can't set the rq to NULL any more or otherwise won't know which
> > scheduler to use for the cleanup.
> >
> > Just remove the entity from the scheduling list instead.
> >
> > Signed-off-by: Christian König <christian.koenig at amd.com>
> > ---
> > drivers/gpu/drm/scheduler/gpu_scheduler.c | 35
> +++++++------------------------
> > 1 file changed, 8 insertions(+), 27 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> > index f563e4fbb4b6..1b733229201e 100644
> > --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> > @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct drm_sched_entity
> *entity,
> > }
> > EXPORT_SYMBOL(drm_sched_entity_init);
> >
> > -/**
> > - * drm_sched_entity_is_initialized - Query if entity is initialized
> > - *
> > - * @sched: Pointer to scheduler instance
> > - * @entity: The pointer to a valid scheduler entity
> > - *
> > - * return true if entity is initialized, false otherwise
> > -*/
> > -static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler
> *sched,
> > - struct drm_sched_entity
> *entity)
> > -{
> > - return entity->rq != NULL &&
> > - entity->rq->sched == sched;
> > -}
> > -
> > /**
> > * drm_sched_entity_is_idle - Check if entity is idle
> > *
> > @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct
> drm_sched_entity *entity)
> > {
> > rmb();
> >
> > - if (!entity->rq || spsc_queue_peek(&entity->job_queue) == NULL)
> > + if (list_empty(&entity->list) ||
> > + spsc_queue_peek(&entity->job_queue) == NULL)
> > return true;
> >
> > return false;
> > @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct drm_sched_entity
> *entity, long timeout)
> > long ret = timeout;
> >
> > sched = entity->rq->sched;
> > - if (!drm_sched_entity_is_initialized(sched, entity))
> > - return ret;
> > /**
> > * The client will not queue more IBs during this fini, consume
> existing
> > * queued IBs or discard them on SIGKILL
> > @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct drm_sched_entity
> *entity, long timeout)
> > last_user = cmpxchg(&entity->last_user, current->group_leader,
> NULL);
> > if ((!last_user || last_user == current->group_leader) &&
> > (current->flags & PF_EXITING) && (current->exit_code ==
> SIGKILL))
> > - drm_sched_entity_set_rq(entity, NULL);
> > + drm_sched_rq_remove_entity(entity->rq, entity);
> >
> > return ret;
> > }
> > @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct drm_sched_entity
> *entity)
> > struct drm_gpu_scheduler *sched;
> >
> > sched = entity->rq->sched;
> > - drm_sched_entity_set_rq(entity, NULL);
> > + drm_sched_rq_remove_entity(entity->rq, entity);
> >
> > /* Consumption of existing IBs wasn't completed. Forcefully
> > * remove them here.
> > @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct
> drm_sched_entity *entity,
> > if (entity->rq == rq)
> > return;
> >
> > - spin_lock(&entity->rq_lock);
> > -
> > - if (entity->rq)
> > - drm_sched_rq_remove_entity(entity->rq, entity);
> > + BUG_ON(!rq);
> >
> > + spin_lock(&entity->rq_lock);
> > + drm_sched_rq_remove_entity(entity->rq, entity);
> > entity->rq = rq;
> > - if (rq)
> > - drm_sched_rq_add_entity(rq, entity);
> > -
> > + drm_sched_rq_add_entity(rq, entity);
> > spin_unlock(&entity->rq_lock);
> > }
> > EXPORT_SYMBOL(drm_sched_entity_set_rq);
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/dri-devel/attachments/20180802/e4e171f6/attachment.html>
More information about the dri-devel
mailing list