<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <p><br>
    </p>
    <br>
    <div class="moz-cite-prefix">On 08/03/2018 08:12 AM, Nayan Deshmukh
      wrote:<br>
    </div>
    <blockquote type="cite"
cite="mid:CAFd4ddyk2QPgK3LgFBgxAAcxkr9qZ0yiN4bYcQXzkMyFX=EZHQ@mail.gmail.com">
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
      <div dir="auto">
        <div><br>
          <br>
          <div class="gmail_quote">
            <div dir="ltr">On Thu, Aug 2, 2018, 8:09 PM Andrey
              Grodzovsky <<a href="mailto:Andrey.Grodzovsky@amd.com"
                moz-do-not-send="true">Andrey.Grodzovsky@amd.com</a>>
              wrote:<br>
            </div>
            <blockquote class="gmail_quote" style="margin:0 0 0
              .8ex;border-left:1px #ccc solid;padding-left:1ex">
              <div text="#000000" bgcolor="#FFFFFF">
                <p><br>
                </p>
                <br>
                <div class="m_-2527259700652160276moz-cite-prefix">On
                  08/02/2018 02:43 AM, Nayan Deshmukh wrote:<br>
                </div>
                <blockquote type="cite">
                  <div dir="ltr">
                    <div>
                      <div>
                        <div>Hi Andrey,<br>
                          <br>
                        </div>
                        Good catch. I guess since both Christian and I
                        were working on it parallelly we didn't catch
                        this problem.<br>
                        <br>
                      </div>
                      If it is only causing a problem in push_job then
                      we can handle it something like this:<br>
                      <br>
----------------------------------------------------------------------------------------------------------------------------<br>
                      diff --git
                      a/drivers/gpu/drm/scheduler/gpu_scheduler.c
                      b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
                      index 05dc6ecd4003..f344ce32f128 100644<br>
                      --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
                      +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
                      @@ -563,6 +563,11 @@ void
                      drm_sched_entity_push_job(struct drm_sched_job
                      *sched_job,<br>
                              first =
                      spsc_queue_count(&entity->job_queue) == 0;<br>
                              reschedule = idle && first
                      && (entity->num_rq_list > 1);<br>
                       <br>
                      +       if (first &&
                      list_empty(&entity->list)) {<br>
                    </div>
                  </div>
                </blockquote>
                <br>
                first might be false if the other process interrupted by
                SIGKILL  in middle of  wait_event_killable in
                drm_sched_entity_flush when there were still item in
                queue.<br>
                I don't see a good way besides adding a 'stopped' flag
                to drm_sched_enitity.<br>
              </div>
            </blockquote>
          </div>
        </div>
        <div dir="auto">Sorry I missed this mail. This case might happen
          but this was also not being handled previously. </div>
        <div dir="auto"><br>
        </div>
        <div dir="auto">Nayan <br>
        </div>
      </div>
    </blockquote>
    <br>
    The original code before  'drm/scheduler: stop setting rq to NULL'
    was , because you didn't use the queue's emptiness as a criteria for
    aborting your next push to queue but rather<br>
    checking for entity->rq != NULL.<br>
    <br>
    Andrey<br>
    <br>
    <blockquote type="cite"
cite="mid:CAFd4ddyk2QPgK3LgFBgxAAcxkr9qZ0yiN4bYcQXzkMyFX=EZHQ@mail.gmail.com">
      <div dir="auto">
        <div dir="auto">
          <div class="gmail_quote">
            <blockquote class="gmail_quote" style="margin:0 0 0
              .8ex;border-left:1px #ccc solid;padding-left:1ex">
              <div text="#000000" bgcolor="#FFFFFF"> <br>
                Andrey<br>
                <br>
                <blockquote type="cite">
                  <div dir="ltr">
                    <div>+               DRM_ERROR("Trying to push to a
                      killed entity\n");<br>
                      +               return;<br>
                      +       }<br>
                      +<br>
                              if (reschedule) {<br>
                                      rq =
                      drm_sched_entity_get_free_sched(entity);<br>
                                     
                      spin_lock(&entity->rq_lock);<br>
                      @@ -580,11 +585,6 @@ void
                      drm_sched_entity_push_job(struct drm_sched_job
                      *sched_job,<br>
                              if (first) {<br>
                                      /* Add the entity to the run queue
                      */<br>
                                     
                      spin_lock(&entity->rq_lock);<br>
                      -               if (!entity->rq) {<br>
                      -                       DRM_ERROR("Trying to push
                      to a killed entity\n");<br>
                      -                      
                      spin_unlock(&entity->rq_lock);<br>
                      -                       return;<br>
                      -               }<br>
                                     
                      drm_sched_rq_add_entity(entity->rq, entity);<br>
                                     
                      spin_unlock(&entity->rq_lock);<br>
                                     
                      drm_sched_wakeup(entity->rq->sched);<br>
-----------------------------------------------------------------------------------------------------------------------------<br>
                    </div>
                    <br>
                    <br>
                    <div class="gmail_quote">
                      <div dir="ltr">On Thu, Aug 2, 2018 at 3:55 AM
                        Andrey Grodzovsky <<a
                          href="mailto:Andrey.Grodzovsky@amd.com"
                          target="_blank" rel="noreferrer"
                          moz-do-not-send="true">Andrey.Grodzovsky@amd.com</a>>
                        wrote:<br>
                      </div>
                      <blockquote class="gmail_quote" style="margin:0 0
                        0 .8ex;border-left:1px #ccc
                        solid;padding-left:1ex">Thinking again about
                        this change and 53d5f1e drm/scheduler: move idle
                        <br>
                        entities to scheduler with less load v2<br>
                        <br>
                        Looks to me like use case for which fc9a539
                        drm/scheduler: add NULL <br>
                        pointer check for run queue (v2) was done<br>
                        <br>
                        will not work anymore.<br>
                        <br>
                        First of all in drm_sched_entity_push_job, 'if
                        (!entity->rq)' will never <br>
                        be true any more since we stopped entity->rq
                        to NULL in<br>
                        <br>
                        drm_sched_entity_flush.<br>
                        <br>
                        Second of all, even after we removed the entity
                        from rq in <br>
                        drm_sched_entity_flush to terminate any
                        subsequent submissions<br>
                        <br>
                        to the queue the other thread doing push job can
                        just acquire again a <br>
                        run queue<br>
                        <br>
                        from drm_sched_entity_get_free_sched and
                        continue submission so you <br>
                        can't substitute ' if (!entity->rq)' to 'if
                        (list_empty(&entity->list))'.<br>
                        <br>
                        What about adding a 'stopped' flag to
                        drm_sched_entity to be set in <br>
                        drm_sched_entity_flush and in<br>
                        <br>
                      </blockquote>
                      <div>But it might be worth adding a stopped flag
                        field if it is required at other places as
                        well.  <br>
                        <br>
                      </div>
                      <div>Thanks,<br>
                      </div>
                      <div>Nayan<br>
                      </div>
                      <blockquote class="gmail_quote" style="margin:0 0
                        0 .8ex;border-left:1px #ccc
                        solid;padding-left:1ex">
                        drm_sched_entity_push_job check for  'if
                        (!entity->stopped)' instead of  <br>
                        ' if (!entity->rq)' ?<br>
                        <br>
                        Andrey<br>
                        <br>
                        <br>
                        On 07/30/2018 07:03 AM, Christian König wrote:<br>
                        > We removed the redundancy of having an
                        extra scheduler field, so we<br>
                        > can't set the rq to NULL any more or
                        otherwise won't know which<br>
                        > scheduler to use for the cleanup.<br>
                        ><br>
                        > Just remove the entity from the scheduling
                        list instead.<br>
                        ><br>
                        > Signed-off-by: Christian König <<a
                          href="mailto:christian.koenig@amd.com"
                          target="_blank" rel="noreferrer"
                          moz-do-not-send="true">christian.koenig@amd.com</a>><br>
                        > ---<br>
                        >   drivers/gpu/drm/scheduler/gpu_scheduler.c
                        | 35 +++++++------------------------<br>
                        >   1 file changed, 8 insertions(+), 27
                        deletions(-)<br>
                        ><br>
                        > diff --git
                        a/drivers/gpu/drm/scheduler/gpu_scheduler.c
                        b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
                        > index f563e4fbb4b6..1b733229201e 100644<br>
                        > ---
                        a/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
                        > +++
                        b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
                        > @@ -198,21 +198,6 @@ int
                        drm_sched_entity_init(struct drm_sched_entity
                        *entity,<br>
                        >   }<br>
                        >   EXPORT_SYMBOL(drm_sched_entity_init);<br>
                        >   <br>
                        > -/**<br>
                        > - * drm_sched_entity_is_initialized - Query
                        if entity is initialized<br>
                        > - *<br>
                        > - * @sched: Pointer to scheduler instance<br>
                        > - * @entity: The pointer to a valid
                        scheduler entity<br>
                        > - *<br>
                        > - * return true if entity is initialized,
                        false otherwise<br>
                        > -*/<br>
                        > -static bool
                        drm_sched_entity_is_initialized(struct
                        drm_gpu_scheduler *sched,<br>
                        > -                                       
                         struct drm_sched_entity *entity)<br>
                        > -{<br>
                        > -     return entity->rq != NULL
                        &&<br>
                        > -             entity->rq->sched ==
                        sched;<br>
                        > -}<br>
                        > -<br>
                        >   /**<br>
                        >    * drm_sched_entity_is_idle - Check if
                        entity is idle<br>
                        >    *<br>
                        > @@ -224,7 +209,8 @@ static bool
                        drm_sched_entity_is_idle(struct drm_sched_entity
                        *entity)<br>
                        >   {<br>
                        >       rmb();<br>
                        >   <br>
                        > -     if (!entity->rq ||
                        spsc_queue_peek(&entity->job_queue) ==
                        NULL)<br>
                        > +     if (list_empty(&entity->list)
                        ||<br>
                        > +       
                         spsc_queue_peek(&entity->job_queue) ==
                        NULL)<br>
                        >               return true;<br>
                        >   <br>
                        >       return false;<br>
                        > @@ -279,8 +265,6 @@ long
                        drm_sched_entity_flush(struct drm_sched_entity
                        *entity, long timeout)<br>
                        >       long ret = timeout;<br>
                        >   <br>
                        >       sched = entity->rq->sched;<br>
                        > -     if
                        (!drm_sched_entity_is_initialized(sched,
                        entity))<br>
                        > -             return ret;<br>
                        >       /**<br>
                        >        * The client will not queue more IBs
                        during this fini, consume existing<br>
                        >        * queued IBs or discard them on
                        SIGKILL<br>
                        > @@ -299,7 +283,7 @@ long
                        drm_sched_entity_flush(struct drm_sched_entity
                        *entity, long timeout)<br>
                        >       last_user =
                        cmpxchg(&entity->last_user,
                        current->group_leader, NULL);<br>
                        >       if ((!last_user || last_user ==
                        current->group_leader) &&<br>
                        >           (current->flags &
                        PF_EXITING) && (current->exit_code ==
                        SIGKILL))<br>
                        > -           
                         drm_sched_entity_set_rq(entity, NULL);<br>
                        > +           
                         drm_sched_rq_remove_entity(entity->rq,
                        entity);<br>
                        >   <br>
                        >       return ret;<br>
                        >   }<br>
                        > @@ -320,7 +304,7 @@ void
                        drm_sched_entity_fini(struct drm_sched_entity
                        *entity)<br>
                        >       struct drm_gpu_scheduler *sched;<br>
                        >   <br>
                        >       sched = entity->rq->sched;<br>
                        > -     drm_sched_entity_set_rq(entity,
                        NULL);<br>
                        > +   
                         drm_sched_rq_remove_entity(entity->rq,
                        entity);<br>
                        >   <br>
                        >       /* Consumption of existing IBs wasn't
                        completed. Forcefully<br>
                        >        * remove them here.<br>
                        > @@ -416,15 +400,12 @@ void
                        drm_sched_entity_set_rq(struct drm_sched_entity
                        *entity,<br>
                        >       if (entity->rq == rq)<br>
                        >               return;<br>
                        >   <br>
                        > -     spin_lock(&entity->rq_lock);<br>
                        > -<br>
                        > -     if (entity->rq)<br>
                        > -           
                         drm_sched_rq_remove_entity(entity->rq,
                        entity);<br>
                        > +     BUG_ON(!rq);<br>
                        >   <br>
                        > +     spin_lock(&entity->rq_lock);<br>
                        > +   
                         drm_sched_rq_remove_entity(entity->rq,
                        entity);<br>
                        >       entity->rq = rq;<br>
                        > -     if (rq)<br>
                        > -             drm_sched_rq_add_entity(rq,
                        entity);<br>
                        > -<br>
                        > +     drm_sched_rq_add_entity(rq, entity);<br>
                        >       spin_unlock(&entity->rq_lock);<br>
                        >   }<br>
                        >   EXPORT_SYMBOL(drm_sched_entity_set_rq);<br>
                        <br>
                      </blockquote>
                    </div>
                  </div>
                </blockquote>
                <br>
              </div>
            </blockquote>
          </div>
        </div>
      </div>
    </blockquote>
    <br>
  </body>
</html>