<div dir="ltr"><br><br><div class="gmail_quote"><div dir="ltr">On Fri, Aug 3, 2018 at 7:17 PM Andrey Grodzovsky <<a href="mailto:Andrey.Grodzovsky@amd.com" target="_blank">Andrey.Grodzovsky@amd.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
  
    
  
  <div text="#000000" bgcolor="#FFFFFF">
    <p><br>
    </p>
    <br>
    <div class="m_-6223222008664542963m_3550633764703809527moz-cite-prefix">On 08/03/2018 08:12 AM, Nayan Deshmukh
      wrote:<br>
    </div>
    <blockquote type="cite">
      
      <div dir="auto">
        <div><br>
          <br>
          <div class="gmail_quote">
            <div dir="ltr">On Thu, Aug 2, 2018, 8:09 PM Andrey
              Grodzovsky <<a href="mailto:Andrey.Grodzovsky@amd.com" target="_blank">Andrey.Grodzovsky@amd.com</a>>
              wrote:<br>
            </div>
            <blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
              <div text="#000000" bgcolor="#FFFFFF">
                <p><br>
                </p>
                <br>
                <div class="m_-6223222008664542963m_3550633764703809527m_-2527259700652160276moz-cite-prefix">On
                  08/02/2018 02:43 AM, Nayan Deshmukh wrote:<br>
                </div>
                <blockquote type="cite">
                  <div dir="ltr">
                    <div>
                      <div>
                        <div>Hi Andrey,<br>
                          <br>
                        </div>
                        Good catch. I guess since both Christian and I
                        were working on it parallelly we didn't catch
                        this problem.<br>
                        <br>
                      </div>
                      If it is only causing a problem in push_job then
                      we can handle it something like this:<br>
                      <br>
----------------------------------------------------------------------------------------------------------------------------<br>
                      diff --git
                      a/drivers/gpu/drm/scheduler/gpu_scheduler.c
                      b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
                      index 05dc6ecd4003..f344ce32f128 100644<br>
                      --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
                      +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
                      @@ -563,6 +563,11 @@ void
                      drm_sched_entity_push_job(struct drm_sched_job
                      *sched_job,<br>
                              first =
                      spsc_queue_count(&entity->job_queue) == 0;<br>
                              reschedule = idle && first
                      && (entity->num_rq_list > 1);<br>
                       <br>
                      +       if (first &&
                      list_empty(&entity->list)) {<br>
                    </div>
                  </div>
                </blockquote>
                <br>
                first might be false if the other process interrupted by
                SIGKILL  in middle of  wait_event_killable in
                drm_sched_entity_flush when there were still item in
                queue.<br>
                I don't see a good way besides adding a 'stopped' flag
                to drm_sched_enitity.<br>
              </div>
            </blockquote>
          </div>
        </div>
        <div dir="auto">Sorry I missed this mail. This case might happen
          but this was also not being handled previously. </div>
        <div dir="auto"><br>
        </div>
        <div dir="auto">Nayan <br>
        </div>
      </div>
    </blockquote>
    <br>
    The original code before  'drm/scheduler: stop setting rq to NULL'
    was , because you didn't use the queue's emptiness as a criteria for
    aborting your next push to queue but rather<br>
    checking for entity->rq != NULL.<br></div></blockquote><div>But (entity->rq != NULL) check was inside the body of if (first) so it was also dependent on the queue's emptiness earlier. Well in any case it doesn't matter now as with Christian's patch we can remove the if condition altogether.   <br><br></div><div>Nayan<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div text="#000000" bgcolor="#FFFFFF">
    <br>
    Andrey<br>
    <br>
    <blockquote type="cite">
      <div dir="auto">
        <div dir="auto">
          <div class="gmail_quote">
            <blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
              <div text="#000000" bgcolor="#FFFFFF"> <br>
                Andrey<br>
                <br>
                <blockquote type="cite">
                  <div dir="ltr">
                    <div>+               DRM_ERROR("Trying to push to a
                      killed entity\n");<br>
                      +               return;<br>
                      +       }<br>
                      +<br>
                              if (reschedule) {<br>
                                      rq =
                      drm_sched_entity_get_free_sched(entity);<br>
                                     
                      spin_lock(&entity->rq_lock);<br>
                      @@ -580,11 +585,6 @@ void
                      drm_sched_entity_push_job(struct drm_sched_job
                      *sched_job,<br>
                              if (first) {<br>
                                      /* Add the entity to the run queue
                      */<br>
                                     
                      spin_lock(&entity->rq_lock);<br>
                      -               if (!entity->rq) {<br>
                      -                       DRM_ERROR("Trying to push
                      to a killed entity\n");<br>
                      -                      
                      spin_unlock(&entity->rq_lock);<br>
                      -                       return;<br>
                      -               }<br>
                                     
                      drm_sched_rq_add_entity(entity->rq, entity);<br>
                                     
                      spin_unlock(&entity->rq_lock);<br>
                                     
                      drm_sched_wakeup(entity->rq->sched);<br>
-----------------------------------------------------------------------------------------------------------------------------<br>
                    </div>
                    <br>
                    <br>
                    <div class="gmail_quote">
                      <div dir="ltr">On Thu, Aug 2, 2018 at 3:55 AM
                        Andrey Grodzovsky <<a href="mailto:Andrey.Grodzovsky@amd.com" rel="noreferrer" target="_blank">Andrey.Grodzovsky@amd.com</a>>
                        wrote:<br>
                      </div>
                      <blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Thinking again about
                        this change and 53d5f1e drm/scheduler: move idle
                        <br>
                        entities to scheduler with less load v2<br>
                        <br>
                        Looks to me like use case for which fc9a539
                        drm/scheduler: add NULL <br>
                        pointer check for run queue (v2) was done<br>
                        <br>
                        will not work anymore.<br>
                        <br>
                        First of all in drm_sched_entity_push_job, 'if
                        (!entity->rq)' will never <br>
                        be true any more since we stopped entity->rq
                        to NULL in<br>
                        <br>
                        drm_sched_entity_flush.<br>
                        <br>
                        Second of all, even after we removed the entity
                        from rq in <br>
                        drm_sched_entity_flush to terminate any
                        subsequent submissions<br>
                        <br>
                        to the queue the other thread doing push job can
                        just acquire again a <br>
                        run queue<br>
                        <br>
                        from drm_sched_entity_get_free_sched and
                        continue submission so you <br>
                        can't substitute ' if (!entity->rq)' to 'if
                        (list_empty(&entity->list))'.<br>
                        <br>
                        What about adding a 'stopped' flag to
                        drm_sched_entity to be set in <br>
                        drm_sched_entity_flush and in<br>
                        <br>
                      </blockquote>
                      <div>But it might be worth adding a stopped flag
                        field if it is required at other places as
                        well.  <br>
                        <br>
                      </div>
                      <div>Thanks,<br>
                      </div>
                      <div>Nayan<br>
                      </div>
                      <blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
                        drm_sched_entity_push_job check for  'if
                        (!entity->stopped)' instead of  <br>
                        ' if (!entity->rq)' ?<br>
                        <br>
                        Andrey<br>
                        <br>
                        <br>
                        On 07/30/2018 07:03 AM, Christian König wrote:<br>
                        > We removed the redundancy of having an
                        extra scheduler field, so we<br>
                        > can't set the rq to NULL any more or
                        otherwise won't know which<br>
                        > scheduler to use for the cleanup.<br>
                        ><br>
                        > Just remove the entity from the scheduling
                        list instead.<br>
                        ><br>
                        > Signed-off-by: Christian König <<a href="mailto:christian.koenig@amd.com" rel="noreferrer" target="_blank">christian.koenig@amd.com</a>><br>
                        > ---<br>
                        >   drivers/gpu/drm/scheduler/gpu_scheduler.c
                        | 35 +++++++------------------------<br>
                        >   1 file changed, 8 insertions(+), 27
                        deletions(-)<br>
                        ><br>
                        > diff --git
                        a/drivers/gpu/drm/scheduler/gpu_scheduler.c
                        b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
                        > index f563e4fbb4b6..1b733229201e 100644<br>
                        > ---
                        a/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
                        > +++
                        b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
                        > @@ -198,21 +198,6 @@ int
                        drm_sched_entity_init(struct drm_sched_entity
                        *entity,<br>
                        >   }<br>
                        >   EXPORT_SYMBOL(drm_sched_entity_init);<br>
                        >   <br>
                        > -/**<br>
                        > - * drm_sched_entity_is_initialized - Query
                        if entity is initialized<br>
                        > - *<br>
                        > - * @sched: Pointer to scheduler instance<br>
                        > - * @entity: The pointer to a valid
                        scheduler entity<br>
                        > - *<br>
                        > - * return true if entity is initialized,
                        false otherwise<br>
                        > -*/<br>
                        > -static bool
                        drm_sched_entity_is_initialized(struct
                        drm_gpu_scheduler *sched,<br>
                        > -                                       
                         struct drm_sched_entity *entity)<br>
                        > -{<br>
                        > -     return entity->rq != NULL
                        &&<br>
                        > -             entity->rq->sched ==
                        sched;<br>
                        > -}<br>
                        > -<br>
                        >   /**<br>
                        >    * drm_sched_entity_is_idle - Check if
                        entity is idle<br>
                        >    *<br>
                        > @@ -224,7 +209,8 @@ static bool
                        drm_sched_entity_is_idle(struct drm_sched_entity
                        *entity)<br>
                        >   {<br>
                        >       rmb();<br>
                        >   <br>
                        > -     if (!entity->rq ||
                        spsc_queue_peek(&entity->job_queue) ==
                        NULL)<br>
                        > +     if (list_empty(&entity->list)
                        ||<br>
                        > +       
                         spsc_queue_peek(&entity->job_queue) ==
                        NULL)<br>
                        >               return true;<br>
                        >   <br>
                        >       return false;<br>
                        > @@ -279,8 +265,6 @@ long
                        drm_sched_entity_flush(struct drm_sched_entity
                        *entity, long timeout)<br>
                        >       long ret = timeout;<br>
                        >   <br>
                        >       sched = entity->rq->sched;<br>
                        > -     if
                        (!drm_sched_entity_is_initialized(sched,
                        entity))<br>
                        > -             return ret;<br>
                        >       /**<br>
                        >        * The client will not queue more IBs
                        during this fini, consume existing<br>
                        >        * queued IBs or discard them on
                        SIGKILL<br>
                        > @@ -299,7 +283,7 @@ long
                        drm_sched_entity_flush(struct drm_sched_entity
                        *entity, long timeout)<br>
                        >       last_user =
                        cmpxchg(&entity->last_user,
                        current->group_leader, NULL);<br>
                        >       if ((!last_user || last_user ==
                        current->group_leader) &&<br>
                        >           (current->flags &
                        PF_EXITING) && (current->exit_code ==
                        SIGKILL))<br>
                        > -           
                         drm_sched_entity_set_rq(entity, NULL);<br>
                        > +           
                         drm_sched_rq_remove_entity(entity->rq,
                        entity);<br>
                        >   <br>
                        >       return ret;<br>
                        >   }<br>
                        > @@ -320,7 +304,7 @@ void
                        drm_sched_entity_fini(struct drm_sched_entity
                        *entity)<br>
                        >       struct drm_gpu_scheduler *sched;<br>
                        >   <br>
                        >       sched = entity->rq->sched;<br>
                        > -     drm_sched_entity_set_rq(entity,
                        NULL);<br>
                        > +   
                         drm_sched_rq_remove_entity(entity->rq,
                        entity);<br>
                        >   <br>
                        >       /* Consumption of existing IBs wasn't
                        completed. Forcefully<br>
                        >        * remove them here.<br>
                        > @@ -416,15 +400,12 @@ void
                        drm_sched_entity_set_rq(struct drm_sched_entity
                        *entity,<br>
                        >       if (entity->rq == rq)<br>
                        >               return;<br>
                        >   <br>
                        > -     spin_lock(&entity->rq_lock);<br>
                        > -<br>
                        > -     if (entity->rq)<br>
                        > -           
                         drm_sched_rq_remove_entity(entity->rq,
                        entity);<br>
                        > +     BUG_ON(!rq);<br>
                        >   <br>
                        > +     spin_lock(&entity->rq_lock);<br>
                        > +   
                         drm_sched_rq_remove_entity(entity->rq,
                        entity);<br>
                        >       entity->rq = rq;<br>
                        > -     if (rq)<br>
                        > -             drm_sched_rq_add_entity(rq,
                        entity);<br>
                        > -<br>
                        > +     drm_sched_rq_add_entity(rq, entity);<br>
                        >       spin_unlock(&entity->rq_lock);<br>
                        >   }<br>
                        >   EXPORT_SYMBOL(drm_sched_entity_set_rq);<br>
                        <br>
                      </blockquote>
                    </div>
                  </div>
                </blockquote>
                <br>
              </div>
            </blockquote>
          </div>
        </div>
      </div>
    </blockquote>
    <br>
  </div>

</blockquote></div></div>