[PATCH] drm/panthor: Make the timeout per-queue instead of per-job

Boris Brezillon boris.brezillon at collabora.com
Tue Apr 1 12:10:41 UTC 2025


On Fri,  7 Mar 2025 15:55:52 +0000
Ashley Smith <ashley.smith at collabora.com> wrote:

> +static void
> +queue_suspend_timeout(struct panthor_queue *queue)
> +{
> +	unsigned long qtimeout, now;
> +	struct panthor_group *group;
> +	struct panthor_job *job;
> +	bool timer_was_active;
> +
> +	spin_lock(&queue->fence_ctx.lock);
> +
> +	/* Already suspended, nothing to do. */
> +	if (queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT)
> +		goto out_unlock;
> +
> +	job = list_first_entry_or_null(&queue->fence_ctx.in_flight_jobs,
> +				       struct panthor_job, node);
> +	group = job ? job->group : NULL;
> +
> +	/* If the queue is blocked and the group is idle, we want the timer to
> +	 * keep running because the group can't be unblocked by other queues,
> +	 * so it has to come from an external source, and we want to timebox
> +	 * this external signalling.
> +	 */
> +	if (group && (group->blocked_queues & BIT(job->queue_idx)) &&
> +	    group_is_idle(group))
> +		goto out_unlock;
> +
> +	now = jiffies;
> +	qtimeout = queue->timeout.work.timer.expires;
> +
> +	/* Cancel the timer. */
> +	timer_was_active = cancel_delayed_work(&queue->timeout.work);

Looks like queue_suspend_timeout() is only called on a state update,
and this won't happen if the group suspension/termination fails (FW
hang), which will leave this delayed work behind, possibly leading
to a UAF or a spurious queue_timeout_work() call when we don't expect
one.

diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index e96179ed74e6..1106967af0ac 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -2784,8 +2784,18 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
                         * automatically terminate all active groups, so let's
                         * force the state to halted here.
                         */
-                       if (csg_slot->group->state != PANTHOR_CS_GROUP_TERMINATED)
+                       if (csg_slot->group->state != PANTHOR_CS_GROUP_TERMINATED) {
                                csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
+
+                               /* Reset the queue slots manually if the termination
+                                * request failed.
+                                */
+                               for (i = 0; i < group->queue_count; i++) {
+                                       if (group->queues[i])
+                                               cs_slot_reset_locked(ptdev, csg_id, i);
+                               }
+                       }
+
                        slot_mask &= ~BIT(csg_id);
                }
        }


More information about the dri-devel mailing list