<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <p><br>
    </p>
    <br>
    <div class="moz-cite-prefix">On 08/02/2018 02:43 AM, Nayan Deshmukh
      wrote:<br>
    </div>
    <blockquote type="cite"
cite="mid:CAFd4ddxVj14Jtkw4jSqst2FTvkLBkDbe0Lj51pQrw5PFbm3GRg@mail.gmail.com">
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
      <div dir="ltr">
        <div>
          <div>
            <div>Hi Andrey,<br>
              <br>
            </div>
            Good catch. I guess since both Christian and I were working
            on it parallelly we didn't catch this problem.<br>
            <br>
          </div>
          If it is only causing a problem in push_job then we can handle
          it something like this:<br>
          <br>
----------------------------------------------------------------------------------------------------------------------------<br>
          diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
          b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
          index 05dc6ecd4003..f344ce32f128 100644<br>
          --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
          +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
          @@ -563,6 +563,11 @@ void drm_sched_entity_push_job(struct
          drm_sched_job *sched_job,<br>
                  first = spsc_queue_count(&entity->job_queue) ==
          0;<br>
                  reschedule = idle && first &&
          (entity->num_rq_list > 1);<br>
           <br>
          +       if (first && list_empty(&entity->list))
          {<br>
        </div>
      </div>
    </blockquote>
    <br>
    first might be false if the other process interrupted by SIGKILL  in
    middle of  wait_event_killable in drm_sched_entity_flush when there
    were still item in queue.<br>
    I don't see a good way besides adding a 'stopped' flag to
    drm_sched_enitity.<br>
    <br>
    Andrey<br>
    <br>
    <blockquote type="cite"
cite="mid:CAFd4ddxVj14Jtkw4jSqst2FTvkLBkDbe0Lj51pQrw5PFbm3GRg@mail.gmail.com">
      <div dir="ltr">
        <div>+               DRM_ERROR("Trying to push to a killed
          entity\n");<br>
          +               return;<br>
          +       }<br>
          +<br>
                  if (reschedule) {<br>
                          rq = drm_sched_entity_get_free_sched(entity);<br>
                          spin_lock(&entity->rq_lock);<br>
          @@ -580,11 +585,6 @@ void drm_sched_entity_push_job(struct
          drm_sched_job *sched_job,<br>
                  if (first) {<br>
                          /* Add the entity to the run queue */<br>
                          spin_lock(&entity->rq_lock);<br>
          -               if (!entity->rq) {<br>
          -                       DRM_ERROR("Trying to push to a killed
          entity\n");<br>
          -                       spin_unlock(&entity->rq_lock);<br>
          -                       return;<br>
          -               }<br>
                          drm_sched_rq_add_entity(entity->rq,
          entity);<br>
                          spin_unlock(&entity->rq_lock);<br>
                          drm_sched_wakeup(entity->rq->sched);<br>
-----------------------------------------------------------------------------------------------------------------------------<br>
        </div>
        <br>
        <br>
        <div class="gmail_quote">
          <div dir="ltr">On Thu, Aug 2, 2018 at 3:55 AM Andrey
            Grodzovsky <<a href="mailto:Andrey.Grodzovsky@amd.com"
              target="_blank" moz-do-not-send="true">Andrey.Grodzovsky@amd.com</a>>
            wrote:<br>
          </div>
          <blockquote class="gmail_quote" style="margin:0 0 0
            .8ex;border-left:1px #ccc solid;padding-left:1ex">Thinking
            again about this change and 53d5f1e drm/scheduler: move idle
            <br>
            entities to scheduler with less load v2<br>
            <br>
            Looks to me like use case for which fc9a539 drm/scheduler:
            add NULL <br>
            pointer check for run queue (v2) was done<br>
            <br>
            will not work anymore.<br>
            <br>
            First of all in drm_sched_entity_push_job, 'if
            (!entity->rq)' will never <br>
            be true any more since we stopped entity->rq to NULL in<br>
            <br>
            drm_sched_entity_flush.<br>
            <br>
            Second of all, even after we removed the entity from rq in <br>
            drm_sched_entity_flush to terminate any subsequent
            submissions<br>
            <br>
            to the queue the other thread doing push job can just
            acquire again a <br>
            run queue<br>
            <br>
            from drm_sched_entity_get_free_sched and continue submission
            so you <br>
            can't substitute ' if (!entity->rq)' to 'if
            (list_empty(&entity->list))'.<br>
            <br>
            What about adding a 'stopped' flag to drm_sched_entity to be
            set in <br>
            drm_sched_entity_flush and in<br>
            <br>
          </blockquote>
          <div>But it might be worth adding a stopped flag field if it
            is required at other places as well.  <br>
            <br>
          </div>
          <div>Thanks,<br>
          </div>
          <div>Nayan<br>
          </div>
          <blockquote class="gmail_quote" style="margin:0 0 0
            .8ex;border-left:1px #ccc solid;padding-left:1ex">
            drm_sched_entity_push_job check for  'if
            (!entity->stopped)' instead of  <br>
            ' if (!entity->rq)' ?<br>
            <br>
            Andrey<br>
            <br>
            <br>
            On 07/30/2018 07:03 AM, Christian König wrote:<br>
            > We removed the redundancy of having an extra scheduler
            field, so we<br>
            > can't set the rq to NULL any more or otherwise won't
            know which<br>
            > scheduler to use for the cleanup.<br>
            ><br>
            > Just remove the entity from the scheduling list
            instead.<br>
            ><br>
            > Signed-off-by: Christian König <<a
              href="mailto:christian.koenig@amd.com" target="_blank"
              moz-do-not-send="true">christian.koenig@amd.com</a>><br>
            > ---<br>
            >   drivers/gpu/drm/scheduler/gpu_scheduler.c | 35
            +++++++------------------------<br>
            >   1 file changed, 8 insertions(+), 27 deletions(-)<br>
            ><br>
            > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
            b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
            > index f563e4fbb4b6..1b733229201e 100644<br>
            > --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
            > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
            > @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct
            drm_sched_entity *entity,<br>
            >   }<br>
            >   EXPORT_SYMBOL(drm_sched_entity_init);<br>
            >   <br>
            > -/**<br>
            > - * drm_sched_entity_is_initialized - Query if entity
            is initialized<br>
            > - *<br>
            > - * @sched: Pointer to scheduler instance<br>
            > - * @entity: The pointer to a valid scheduler entity<br>
            > - *<br>
            > - * return true if entity is initialized, false
            otherwise<br>
            > -*/<br>
            > -static bool drm_sched_entity_is_initialized(struct
            drm_gpu_scheduler *sched,<br>
            > -                                         struct
            drm_sched_entity *entity)<br>
            > -{<br>
            > -     return entity->rq != NULL &&<br>
            > -             entity->rq->sched == sched;<br>
            > -}<br>
            > -<br>
            >   /**<br>
            >    * drm_sched_entity_is_idle - Check if entity is idle<br>
            >    *<br>
            > @@ -224,7 +209,8 @@ static bool
            drm_sched_entity_is_idle(struct drm_sched_entity *entity)<br>
            >   {<br>
            >       rmb();<br>
            >   <br>
            > -     if (!entity->rq ||
            spsc_queue_peek(&entity->job_queue) == NULL)<br>
            > +     if (list_empty(&entity->list) ||<br>
            > +         spsc_queue_peek(&entity->job_queue) ==
            NULL)<br>
            >               return true;<br>
            >   <br>
            >       return false;<br>
            > @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct
            drm_sched_entity *entity, long timeout)<br>
            >       long ret = timeout;<br>
            >   <br>
            >       sched = entity->rq->sched;<br>
            > -     if (!drm_sched_entity_is_initialized(sched,
            entity))<br>
            > -             return ret;<br>
            >       /**<br>
            >        * The client will not queue more IBs during this
            fini, consume existing<br>
            >        * queued IBs or discard them on SIGKILL<br>
            > @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct
            drm_sched_entity *entity, long timeout)<br>
            >       last_user = cmpxchg(&entity->last_user,
            current->group_leader, NULL);<br>
            >       if ((!last_user || last_user ==
            current->group_leader) &&<br>
            >           (current->flags & PF_EXITING)
            && (current->exit_code == SIGKILL))<br>
            > -             drm_sched_entity_set_rq(entity, NULL);<br>
            > +             drm_sched_rq_remove_entity(entity->rq,
            entity);<br>
            >   <br>
            >       return ret;<br>
            >   }<br>
            > @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct
            drm_sched_entity *entity)<br>
            >       struct drm_gpu_scheduler *sched;<br>
            >   <br>
            >       sched = entity->rq->sched;<br>
            > -     drm_sched_entity_set_rq(entity, NULL);<br>
            > +     drm_sched_rq_remove_entity(entity->rq,
            entity);<br>
            >   <br>
            >       /* Consumption of existing IBs wasn't completed.
            Forcefully<br>
            >        * remove them here.<br>
            > @@ -416,15 +400,12 @@ void
            drm_sched_entity_set_rq(struct drm_sched_entity *entity,<br>
            >       if (entity->rq == rq)<br>
            >               return;<br>
            >   <br>
            > -     spin_lock(&entity->rq_lock);<br>
            > -<br>
            > -     if (entity->rq)<br>
            > -             drm_sched_rq_remove_entity(entity->rq,
            entity);<br>
            > +     BUG_ON(!rq);<br>
            >   <br>
            > +     spin_lock(&entity->rq_lock);<br>
            > +     drm_sched_rq_remove_entity(entity->rq,
            entity);<br>
            >       entity->rq = rq;<br>
            > -     if (rq)<br>
            > -             drm_sched_rq_add_entity(rq, entity);<br>
            > -<br>
            > +     drm_sched_rq_add_entity(rq, entity);<br>
            >       spin_unlock(&entity->rq_lock);<br>
            >   }<br>
            >   EXPORT_SYMBOL(drm_sched_entity_set_rq);<br>
            <br>
          </blockquote>
        </div>
      </div>
    </blockquote>
    <br>
  </body>
</html>