[PATCH] drm/xe: fix UAF around queue destruction
Matthew Auld
matthew.auld at intel.com
Mon Sep 23 08:51:23 UTC 2024
On 20/09/2024 20:27, Matthew Brost wrote:
> On Fri, Sep 20, 2024 at 06:26:00PM +0100, Matthew Auld wrote:
>> We currently do stuff like queuing the final destruction step on a
>> random system wq, which will outlive the driver instance. With bad
>
> I understand that job destruction is async but I thought our ref
> counting made this safe. I suppose we don't ref count the device which
> is likely a problem.
Yeah, there is no refcounting on the drm device. That looked the
simplest however it would mean module unload would sometimes randomly
fail, even though all clients are closed, so figured we need some kind
of flush.
>
>> timing we can teardown the driver with one or more work workqueue still
>> being alive leading to various UAF splats. Add a fini step to ensure
>> user queues are properly torn down. At this point GuC should already be
>> nuked so queue itself should no longer be referenced from hw pov.
>>
>> Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2317
>> Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
>> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
>> Cc: Matthew Brost <matthew.brost at intel.com>
>> Cc: <stable at vger.kernel.org> # v6.8+
>> ---
>> drivers/gpu/drm/xe/xe_device.c | 6 +++++-
>> drivers/gpu/drm/xe/xe_device_types.h | 3 +++
>> drivers/gpu/drm/xe/xe_guc_submit.c | 32 +++++++++++++++++++++++++++-
>> 3 files changed, 39 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
>> index cb5a9fd820cf..90b3478ed7cd 100644
>> --- a/drivers/gpu/drm/xe/xe_device.c
>> +++ b/drivers/gpu/drm/xe/xe_device.c
>> @@ -297,6 +297,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
>> if (xe->unordered_wq)
>> destroy_workqueue(xe->unordered_wq);
>>
>> + if (xe->destroy_wq)
>> + destroy_workqueue(xe->destroy_wq);
>> +
>> ttm_device_fini(&xe->ttm);
>> }
>>
>> @@ -360,8 +363,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
>> xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
>> xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
>> xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
>> + xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);
>> if (!xe->ordered_wq || !xe->unordered_wq ||
>> - !xe->preempt_fence_wq) {
>> + !xe->preempt_fence_wq || !xe->destroy_wq) {
>> /*
>> * Cleanup done in xe_device_destroy via
>> * drmm_add_action_or_reset register above
>> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
>> index 5ad96d283a71..515385b916cc 100644
>> --- a/drivers/gpu/drm/xe/xe_device_types.h
>> +++ b/drivers/gpu/drm/xe/xe_device_types.h
>> @@ -422,6 +422,9 @@ struct xe_device {
>> /** @unordered_wq: used to serialize unordered work, mostly display */
>> struct workqueue_struct *unordered_wq;
>>
>> + /** @destroy_wq: used to serialize user destroy work, like queue */
>> + struct workqueue_struct *destroy_wq;
>> +
>> /** @tiles: device tiles */
>> struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE];
>>
>> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
>> index fbbe6a487bbb..66441efa0bcd 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
>> @@ -276,10 +276,37 @@ static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
>> }
>> #endif
>>
>> +static void guc_exec_queue_fini_async(struct xe_exec_queue *q);
>> +
>> +static void xe_guc_submit_fini(struct xe_guc *guc)
>> +{
>> + struct xe_device *xe = guc_to_xe(guc);
>> + struct xe_exec_queue *q;
>> + unsigned long index;
>> +
>> + mutex_lock(&guc->submission_state.lock);
>> + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
>> + struct xe_gpu_scheduler *sched = &q->guc->sched;
>> +
>> + xe_assert(xe, !kref_read(&q->refcount));
>> +
>> + xe_sched_submission_stop(sched);
>> +
>> + if (exec_queue_registered(q) && !exec_queue_wedged(q))
>> + guc_exec_queue_fini_async(q);
>
> I don't think this is safe. Jobs ref count the 'q' and if those are
> flushing out in the scheduler that seems like it could be problem is the
> free of queue happens while jobs are still around. At this point all
> queues should have 'kill' called on them and are naturally cleaning
> themselves up.
>
> Can we just wait for 'xa_empty(&guc->submission_state.exec_queue_lookup)'
> and then call 'drain_workqueue(xe->destroy_wq)'? The wait could be
> implemented via a simple wait queue. Would that work? Seems safer.
Ok, and I guess with some kind of timeout. Let me try that. Thanks for
taking a look.
>
> Matt
>
>> + }
>> + mutex_unlock(&guc->submission_state.lock);
>> +
>> + drain_workqueue(xe->destroy_wq);
>> +
>> + xe_assert(xe, xa_empty(&guc->submission_state.exec_queue_lookup));
>> +}
>> +
>> static void guc_submit_fini(struct drm_device *drm, void *arg)
>> {
>> struct xe_guc *guc = arg;
>>
>> + xe_guc_submit_fini(guc);
>> xa_destroy(&guc->submission_state.exec_queue_lookup);
>> free_submit_wq(guc);
>> }
>> @@ -1268,13 +1295,16 @@ static void __guc_exec_queue_fini_async(struct work_struct *w)
>>
>> static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
>> {
>> + struct xe_guc *guc = exec_queue_to_guc(q);
>> + struct xe_device *xe = guc_to_xe(guc);
>> +
>> INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
>>
>> /* We must block on kernel engines so slabs are empty on driver unload */
>> if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
>> __guc_exec_queue_fini_async(&q->guc->fini_async);
>> else
>> - queue_work(system_wq, &q->guc->fini_async);
>> + queue_work(xe->destroy_wq, &q->guc->fini_async);
>> }
>>
>> static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
>> --
>> 2.46.0
>>
More information about the Intel-xe
mailing list