[RFC PATCH] drm/sched: Fix a UAF on drm_sched_fence::sched
Christian König
christian.koenig at amd.com
Fri Aug 30 08:14:18 UTC 2024
Am 29.08.24 um 19:12 schrieb Boris Brezillon:
> dma_fence objects created by an entity might outlive the
> drm_gpu_scheduler this entity was bound to if those fences are retained
> by other other objects, like a dma_buf resv. This means that
> drm_sched_fence::sched might be invalid when the resv is walked, which
> in turn leads to a UAF when dma_fence_ops::get_timeline_name() is called.
>
> This probably went unnoticed so far, because the drm_gpu_scheduler had
> the lifetime of the drm_device, so, unless you were removing the device,
> there were no reasons for the scheduler to be gone before its fences.
Nope, that is intentional design. get_timeline_name() is not safe to be
called after the fence signaled because that would causes circular
dependency problems.
E.g. when you have hardware fences it can happen that fences reference a
driver module (for the function printing the name) and the module in
turn keeps fences around.
So you easily end up with a module you can never unload.
> With the introduction of a new model where each entity has its own
> drm_gpu_scheduler instance, this situation is likely to happen every time
> a GPU context is destroyed and some of its fences remain attached to
> dma_buf objects still owned by other drivers/processes.
>
> In order to make drm_sched_fence_get_timeline_name() safe, we need to
> copy the scheduler name into our own refcounted object that's only
> destroyed when both the scheduler and all its fences are gone.
>
> The fact drm_sched_fence might have a reference to the drm_gpu_scheduler
> even after it's been released is worrisome though, but I'd rather
> discuss that with everyone than come up with a solution that's likely
> to end up being rejected.
>
> Note that the bug was found while repeatedly reading dma_buf's debugfs
> file, which, at some point, calls dma_resv_describe() on a resv that
> contains signalled fences coming from a destroyed GPU context.
> AFAIK, there's nothing invalid there.
Yeah but reading debugfs is not guaranteed to crash the kernel.
On the other hand the approach with a kref'ed string looks rather sane
to me. One comment on this below.
>
> Cc: Luben Tuikov <ltuikov89 at gmail.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Cc: "Christian König" <christian.koenig at amd.com>
> Cc: Danilo Krummrich <dakr at redhat.com>
> Signed-off-by: Boris Brezillon <boris.brezillon at collabora.com>
> ---
> drivers/gpu/drm/scheduler/sched_fence.c | 8 +++-
> drivers/gpu/drm/scheduler/sched_main.c | 18 ++++++++-
> include/drm/gpu_scheduler.h | 52 +++++++++++++++++++++++++
> 3 files changed, 75 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> index 0f35f009b9d3..efa2a71d98eb 100644
> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> @@ -90,7 +90,7 @@ static const char *drm_sched_fence_get_driver_name(struct dma_fence *fence)
> static const char *drm_sched_fence_get_timeline_name(struct dma_fence *f)
> {
> struct drm_sched_fence *fence = to_drm_sched_fence(f);
> - return (const char *)fence->sched->name;
> + return (const char *)fence->timeline->name;
> }
>
> static void drm_sched_fence_free_rcu(struct rcu_head *rcu)
> @@ -112,8 +112,10 @@ static void drm_sched_fence_free_rcu(struct rcu_head *rcu)
> */
> void drm_sched_fence_free(struct drm_sched_fence *fence)
> {
> + drm_sched_fence_timeline_put(fence->timeline);
> +
> /* This function should not be called if the fence has been initialized. */
> - if (!WARN_ON_ONCE(fence->sched))
> + if (!WARN_ON_ONCE(fence->timeline))
> kmem_cache_free(sched_fence_slab, fence);
> }
>
> @@ -224,6 +226,8 @@ void drm_sched_fence_init(struct drm_sched_fence *fence,
> unsigned seq;
>
> fence->sched = entity->rq->sched;
> + fence->timeline = fence->sched->fence_timeline;
> + drm_sched_fence_timeline_get(fence->timeline);
> seq = atomic_inc_return(&entity->fence_seq);
> dma_fence_init(&fence->scheduled, &drm_sched_fence_ops_scheduled,
> &fence->lock, entity->fence_context, seq);
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 7e90c9f95611..1e31a9c8ce15 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -1288,10 +1288,21 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
> sched->own_submit_wq = true;
> }
>
> + sched->fence_timeline = kzalloc(sizeof(*sched->fence_timeline), GFP_KERNEL);
> + if (!sched->fence_timeline)
> + goto Out_check_own;
> +
> + sched->fence_timeline->name = kasprintf(GFP_KERNEL, "%s", sched->name);
> + if (!sched->fence_timeline->name)
> + goto Out_free_fence_timeline;
> +
> + kref_init(&sched->fence_timeline->kref);
> +
> sched->sched_rq = kmalloc_array(num_rqs, sizeof(*sched->sched_rq),
> GFP_KERNEL | __GFP_ZERO);
> if (!sched->sched_rq)
> - goto Out_check_own;
> + goto Out_free_fence_timeline;
> +
> sched->num_rqs = num_rqs;
> for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
> sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL);
> @@ -1319,6 +1330,10 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>
> kfree(sched->sched_rq);
> sched->sched_rq = NULL;
> +Out_free_fence_timeline:
> + if (sched->fence_timeline)
> + kfree(sched->fence_timeline->name);
> + kfree(sched->fence_timeline);
> Out_check_own:
> if (sched->own_submit_wq)
> destroy_workqueue(sched->submit_wq);
> @@ -1367,6 +1382,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
> sched->ready = false;
> kfree(sched->sched_rq);
> sched->sched_rq = NULL;
> + drm_sched_fence_timeline_put(sched->fence_timeline);
> }
> EXPORT_SYMBOL(drm_sched_fini);
>
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 5acc64954a88..615ca89f77dc 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -261,6 +261,52 @@ struct drm_sched_rq {
> struct rb_root_cached rb_tree_root;
> };
>
> +/**
> + * struct drm_sched_fence_timeline - Wrapped around the timeline name
> + *
> + * This is needed to cope with the fact dma_fence objects created by
> + * an entity might outlive the drm_gpu_scheduler this entity was bound
> + * to, making drm_sched_fence::sched invalid and leading to a UAF when
> + * dma_fence_ops::get_timeline_name() is called.
> + */
> +struct drm_sched_fence_timeline {
> + /** @kref: Reference count of this timeline object. */
> + struct kref kref;
> +
> + /**
> + * @name: Name of the timeline.
> + *
> + * This is currently a copy of drm_gpu_scheduler::name.
> + */
> + const char *name;
Make that a char name[] and embed the name into the structure. The macro
struct_size() can be used to calculate the size.
> +};
> +
> +static inline void
> +drm_sched_fence_timeline_release(struct kref *kref)
> +{
> + struct drm_sched_fence_timeline *tl =
> + container_of(kref, struct drm_sched_fence_timeline, kref);
> +
> + kfree(tl->name);
> + kfree(tl);
This avoids having two allocations for the timeline name.
Regards,
Christian.
> +}
> +
> +static inline void
> +drm_sched_fence_timeline_put(struct drm_sched_fence_timeline *tl)
> +{
> + if (tl)
> + kref_put(&tl->kref, drm_sched_fence_timeline_release);
> +}
> +
> +static inline struct drm_sched_fence_timeline *
> +drm_sched_fence_timeline_get(struct drm_sched_fence_timeline *tl)
> +{
> + if (tl)
> + kref_get(&tl->kref);
> +
> + return tl;
> +}
> +
> /**
> * struct drm_sched_fence - fences corresponding to the scheduling of a job.
> */
> @@ -289,6 +335,9 @@ struct drm_sched_fence {
> */
> ktime_t deadline;
>
> + /** @timeline: the timeline this fence belongs to. */
> + struct drm_sched_fence_timeline *timeline;
> +
> /**
> * @parent: the fence returned by &drm_sched_backend_ops.run_job
> * when scheduling the job on hardware. We signal the
> @@ -488,6 +537,8 @@ struct drm_sched_backend_ops {
> * @credit_count: the current credit count of this scheduler
> * @timeout: the time after which a job is removed from the scheduler.
> * @name: name of the ring for which this scheduler is being used.
> + * @fence_timeline: fence timeline that will be passed to fences created by
> + * and entity that's bound to this scheduler.
> * @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT,
> * as there's usually one run-queue per priority, but could be less.
> * @sched_rq: An allocated array of run-queues of size @num_rqs;
> @@ -521,6 +572,7 @@ struct drm_gpu_scheduler {
> atomic_t credit_count;
> long timeout;
> const char *name;
> + struct drm_sched_fence_timeline *fence_timeline;
> u32 num_rqs;
> struct drm_sched_rq **sched_rq;
> wait_queue_head_t job_scheduled;
More information about the dri-devel
mailing list