<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body text="#000000" bgcolor="#FFFFFF">
<p><br>
</p>
<br>
<div class="moz-cite-prefix">On 08/03/2018 08:12 AM, Nayan Deshmukh
wrote:<br>
</div>
<blockquote type="cite"
cite="mid:CAFd4ddyk2QPgK3LgFBgxAAcxkr9qZ0yiN4bYcQXzkMyFX=EZHQ@mail.gmail.com">
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<div dir="auto">
<div><br>
<br>
<div class="gmail_quote">
<div dir="ltr">On Thu, Aug 2, 2018, 8:09 PM Andrey
Grodzovsky <<a href="mailto:Andrey.Grodzovsky@amd.com"
moz-do-not-send="true">Andrey.Grodzovsky@amd.com</a>>
wrote:<br>
</div>
<blockquote class="gmail_quote" style="margin:0 0 0
.8ex;border-left:1px #ccc solid;padding-left:1ex">
<div text="#000000" bgcolor="#FFFFFF">
<p><br>
</p>
<br>
<div class="m_-2527259700652160276moz-cite-prefix">On
08/02/2018 02:43 AM, Nayan Deshmukh wrote:<br>
</div>
<blockquote type="cite">
<div dir="ltr">
<div>
<div>
<div>Hi Andrey,<br>
<br>
</div>
Good catch. I guess since both Christian and I
were working on it parallelly we didn't catch
this problem.<br>
<br>
</div>
If it is only causing a problem in push_job then
we can handle it something like this:<br>
<br>
----------------------------------------------------------------------------------------------------------------------------<br>
diff --git
a/drivers/gpu/drm/scheduler/gpu_scheduler.c
b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
index 05dc6ecd4003..f344ce32f128 100644<br>
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
@@ -563,6 +563,11 @@ void
drm_sched_entity_push_job(struct drm_sched_job
*sched_job,<br>
first =
spsc_queue_count(&entity->job_queue) == 0;<br>
reschedule = idle && first
&& (entity->num_rq_list > 1);<br>
<br>
+ if (first &&
list_empty(&entity->list)) {<br>
</div>
</div>
</blockquote>
<br>
first might be false if the other process interrupted by
SIGKILL in middle of wait_event_killable in
drm_sched_entity_flush when there were still item in
queue.<br>
I don't see a good way besides adding a 'stopped' flag
to drm_sched_enitity.<br>
</div>
</blockquote>
</div>
</div>
<div dir="auto">Sorry I missed this mail. This case might happen
but this was also not being handled previously. </div>
<div dir="auto"><br>
</div>
<div dir="auto">Nayan <br>
</div>
</div>
</blockquote>
<br>
The original code before 'drm/scheduler: stop setting rq to NULL'
was , because you didn't use the queue's emptiness as a criteria for
aborting your next push to queue but rather<br>
checking for entity->rq != NULL.<br>
<br>
Andrey<br>
<br>
<blockquote type="cite"
cite="mid:CAFd4ddyk2QPgK3LgFBgxAAcxkr9qZ0yiN4bYcQXzkMyFX=EZHQ@mail.gmail.com">
<div dir="auto">
<div dir="auto">
<div class="gmail_quote">
<blockquote class="gmail_quote" style="margin:0 0 0
.8ex;border-left:1px #ccc solid;padding-left:1ex">
<div text="#000000" bgcolor="#FFFFFF"> <br>
Andrey<br>
<br>
<blockquote type="cite">
<div dir="ltr">
<div>+ DRM_ERROR("Trying to push to a
killed entity\n");<br>
+ return;<br>
+ }<br>
+<br>
if (reschedule) {<br>
rq =
drm_sched_entity_get_free_sched(entity);<br>
spin_lock(&entity->rq_lock);<br>
@@ -580,11 +585,6 @@ void
drm_sched_entity_push_job(struct drm_sched_job
*sched_job,<br>
if (first) {<br>
/* Add the entity to the run queue
*/<br>
spin_lock(&entity->rq_lock);<br>
- if (!entity->rq) {<br>
- DRM_ERROR("Trying to push
to a killed entity\n");<br>
-
spin_unlock(&entity->rq_lock);<br>
- return;<br>
- }<br>
drm_sched_rq_add_entity(entity->rq, entity);<br>
spin_unlock(&entity->rq_lock);<br>
drm_sched_wakeup(entity->rq->sched);<br>
-----------------------------------------------------------------------------------------------------------------------------<br>
</div>
<br>
<br>
<div class="gmail_quote">
<div dir="ltr">On Thu, Aug 2, 2018 at 3:55 AM
Andrey Grodzovsky <<a
href="mailto:Andrey.Grodzovsky@amd.com"
target="_blank" rel="noreferrer"
moz-do-not-send="true">Andrey.Grodzovsky@amd.com</a>>
wrote:<br>
</div>
<blockquote class="gmail_quote" style="margin:0 0
0 .8ex;border-left:1px #ccc
solid;padding-left:1ex">Thinking again about
this change and 53d5f1e drm/scheduler: move idle
<br>
entities to scheduler with less load v2<br>
<br>
Looks to me like use case for which fc9a539
drm/scheduler: add NULL <br>
pointer check for run queue (v2) was done<br>
<br>
will not work anymore.<br>
<br>
First of all in drm_sched_entity_push_job, 'if
(!entity->rq)' will never <br>
be true any more since we stopped entity->rq
to NULL in<br>
<br>
drm_sched_entity_flush.<br>
<br>
Second of all, even after we removed the entity
from rq in <br>
drm_sched_entity_flush to terminate any
subsequent submissions<br>
<br>
to the queue the other thread doing push job can
just acquire again a <br>
run queue<br>
<br>
from drm_sched_entity_get_free_sched and
continue submission so you <br>
can't substitute ' if (!entity->rq)' to 'if
(list_empty(&entity->list))'.<br>
<br>
What about adding a 'stopped' flag to
drm_sched_entity to be set in <br>
drm_sched_entity_flush and in<br>
<br>
</blockquote>
<div>But it might be worth adding a stopped flag
field if it is required at other places as
well. <br>
<br>
</div>
<div>Thanks,<br>
</div>
<div>Nayan<br>
</div>
<blockquote class="gmail_quote" style="margin:0 0
0 .8ex;border-left:1px #ccc
solid;padding-left:1ex">
drm_sched_entity_push_job check for 'if
(!entity->stopped)' instead of <br>
' if (!entity->rq)' ?<br>
<br>
Andrey<br>
<br>
<br>
On 07/30/2018 07:03 AM, Christian König wrote:<br>
> We removed the redundancy of having an
extra scheduler field, so we<br>
> can't set the rq to NULL any more or
otherwise won't know which<br>
> scheduler to use for the cleanup.<br>
><br>
> Just remove the entity from the scheduling
list instead.<br>
><br>
> Signed-off-by: Christian König <<a
href="mailto:christian.koenig@amd.com"
target="_blank" rel="noreferrer"
moz-do-not-send="true">christian.koenig@amd.com</a>><br>
> ---<br>
> drivers/gpu/drm/scheduler/gpu_scheduler.c
| 35 +++++++------------------------<br>
> 1 file changed, 8 insertions(+), 27
deletions(-)<br>
><br>
> diff --git
a/drivers/gpu/drm/scheduler/gpu_scheduler.c
b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
> index f563e4fbb4b6..1b733229201e 100644<br>
> ---
a/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
> +++
b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
> @@ -198,21 +198,6 @@ int
drm_sched_entity_init(struct drm_sched_entity
*entity,<br>
> }<br>
> EXPORT_SYMBOL(drm_sched_entity_init);<br>
> <br>
> -/**<br>
> - * drm_sched_entity_is_initialized - Query
if entity is initialized<br>
> - *<br>
> - * @sched: Pointer to scheduler instance<br>
> - * @entity: The pointer to a valid
scheduler entity<br>
> - *<br>
> - * return true if entity is initialized,
false otherwise<br>
> -*/<br>
> -static bool
drm_sched_entity_is_initialized(struct
drm_gpu_scheduler *sched,<br>
> -
struct drm_sched_entity *entity)<br>
> -{<br>
> - return entity->rq != NULL
&&<br>
> - entity->rq->sched ==
sched;<br>
> -}<br>
> -<br>
> /**<br>
> * drm_sched_entity_is_idle - Check if
entity is idle<br>
> *<br>
> @@ -224,7 +209,8 @@ static bool
drm_sched_entity_is_idle(struct drm_sched_entity
*entity)<br>
> {<br>
> rmb();<br>
> <br>
> - if (!entity->rq ||
spsc_queue_peek(&entity->job_queue) ==
NULL)<br>
> + if (list_empty(&entity->list)
||<br>
> +
spsc_queue_peek(&entity->job_queue) ==
NULL)<br>
> return true;<br>
> <br>
> return false;<br>
> @@ -279,8 +265,6 @@ long
drm_sched_entity_flush(struct drm_sched_entity
*entity, long timeout)<br>
> long ret = timeout;<br>
> <br>
> sched = entity->rq->sched;<br>
> - if
(!drm_sched_entity_is_initialized(sched,
entity))<br>
> - return ret;<br>
> /**<br>
> * The client will not queue more IBs
during this fini, consume existing<br>
> * queued IBs or discard them on
SIGKILL<br>
> @@ -299,7 +283,7 @@ long
drm_sched_entity_flush(struct drm_sched_entity
*entity, long timeout)<br>
> last_user =
cmpxchg(&entity->last_user,
current->group_leader, NULL);<br>
> if ((!last_user || last_user ==
current->group_leader) &&<br>
> (current->flags &
PF_EXITING) && (current->exit_code ==
SIGKILL))<br>
> -
drm_sched_entity_set_rq(entity, NULL);<br>
> +
drm_sched_rq_remove_entity(entity->rq,
entity);<br>
> <br>
> return ret;<br>
> }<br>
> @@ -320,7 +304,7 @@ void
drm_sched_entity_fini(struct drm_sched_entity
*entity)<br>
> struct drm_gpu_scheduler *sched;<br>
> <br>
> sched = entity->rq->sched;<br>
> - drm_sched_entity_set_rq(entity,
NULL);<br>
> +
drm_sched_rq_remove_entity(entity->rq,
entity);<br>
> <br>
> /* Consumption of existing IBs wasn't
completed. Forcefully<br>
> * remove them here.<br>
> @@ -416,15 +400,12 @@ void
drm_sched_entity_set_rq(struct drm_sched_entity
*entity,<br>
> if (entity->rq == rq)<br>
> return;<br>
> <br>
> - spin_lock(&entity->rq_lock);<br>
> -<br>
> - if (entity->rq)<br>
> -
drm_sched_rq_remove_entity(entity->rq,
entity);<br>
> + BUG_ON(!rq);<br>
> <br>
> + spin_lock(&entity->rq_lock);<br>
> +
drm_sched_rq_remove_entity(entity->rq,
entity);<br>
> entity->rq = rq;<br>
> - if (rq)<br>
> - drm_sched_rq_add_entity(rq,
entity);<br>
> -<br>
> + drm_sched_rq_add_entity(rq, entity);<br>
> spin_unlock(&entity->rq_lock);<br>
> }<br>
> EXPORT_SYMBOL(drm_sched_entity_set_rq);<br>
<br>
</blockquote>
</div>
</div>
</blockquote>
<br>
</div>
</blockquote>
</div>
</div>
</div>
</blockquote>
<br>
</body>
</html>