[RFC PATCH] drm/sched: Make sure drm_sched_fence_ops don't vanish

Matthew Brost matthew.brost at intel.com
Fri Aug 30 22:28:19 UTC 2024


On Fri, Aug 30, 2024 at 12:40:57PM +0200, Boris Brezillon wrote:
> dma_fence objects created by drm_sched might hit other subsystems, which
> means the fence object might potentially outlive the drm_sched module
> lifetime, and at this point the dma_fence::ops points to a memory region
> that no longer exists.
> 
> In order to fix that, let's make sure the drm_sched_fence code is always
> statically linked, just like dma-fence{-chain}.c does.
> 
> Cc: Luben Tuikov <ltuikov89 at gmail.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Cc: "Christian König" <christian.koenig at amd.com>
> Cc: Danilo Krummrich <dakr at redhat.com>
> Signed-off-by: Boris Brezillon <boris.brezillon at collabora.com>
> ---
> Just like for the other UAF fix, this is an RFC, as I'm not sure that's
> how we want it fixed. The other option we have is to retain a module ref
> for each initialized fence and release it when the fence is freed.

So this is different than your other patch. From Xe PoV the other patch
is referencing an object which is tied to an IOCTL rather than a module
whereas this referencing a module. If a module can disappear when fence
tied to the module, well that is a bit scary and Xe might have some
issues there - I'd have audit our of exporting internally created
fences.

Based on Christian's feedback we really shouldn't be allowing this but
also don't really love the idea of a fence holding a module ref either.

Seems like we need a well defined + documented rule - exported fences
need to be completely decoupled from the module upon signaling or
exported fences must retain a module ref. I'd probably lean towards the
former. One reason to support the former is fences can be released in
IRQ contexts and dropping a module ref in IRQ context seems a bit
problematic. Also because some oher part of kernel holds on to fence ref
lazily block a module unload just seems wrong.

Sorry if above we have well defined rule and I'm just not aware.

Matt 

> ---
>  drivers/gpu/drm/Makefile                |  2 +-
>  drivers/gpu/drm/scheduler/Makefile      |  7 ++++++-
>  drivers/gpu/drm/scheduler/sched_fence.c | 21 +++++++++------------
>  drivers/gpu/drm/scheduler/sched_main.c  |  3 +++
>  4 files changed, 19 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> index 68cc9258ffc4..b97127da58b7 100644
> --- a/drivers/gpu/drm/Makefile
> +++ b/drivers/gpu/drm/Makefile
> @@ -158,7 +158,7 @@ obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
>  obj-y			+= arm/
>  obj-y			+= display/
>  obj-$(CONFIG_DRM_TTM)	+= ttm/
> -obj-$(CONFIG_DRM_SCHED)	+= scheduler/
> +obj-y			+= scheduler/
>  obj-$(CONFIG_DRM_RADEON)+= radeon/
>  obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
>  obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/
> diff --git a/drivers/gpu/drm/scheduler/Makefile b/drivers/gpu/drm/scheduler/Makefile
> index 53863621829f..bc18d26f27a1 100644
> --- a/drivers/gpu/drm/scheduler/Makefile
> +++ b/drivers/gpu/drm/scheduler/Makefile
> @@ -20,6 +20,11 @@
>  # OTHER DEALINGS IN THE SOFTWARE.
>  #
>  #
> -gpu-sched-y := sched_main.o sched_fence.o sched_entity.o
> +
> +# sched_fence.c contains dma_fence_ops that can't go away, so make sure it's
> +# statically linked when CONFIG_DRM_SCHED=m
> +obj-y += $(if $(CONFIG_DRM_SCHED),sched_fence.o,)
> +
> +gpu-sched-y := sched_main.o sched_entity.o
>  
>  obj-$(CONFIG_DRM_SCHED) += gpu-sched.o
> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> index efa2a71d98eb..ac65589476dd 100644
> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> @@ -39,12 +39,7 @@ static int __init drm_sched_fence_slab_init(void)
>  
>  	return 0;
>  }
> -
> -static void __exit drm_sched_fence_slab_fini(void)
> -{
> -	rcu_barrier();
> -	kmem_cache_destroy(sched_fence_slab);
> -}
> +subsys_initcall(drm_sched_fence_slab_init);
>  
>  static void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
>  				       struct dma_fence *fence)
> @@ -74,6 +69,7 @@ void drm_sched_fence_scheduled(struct drm_sched_fence *fence,
>  
>  	dma_fence_signal(&fence->scheduled);
>  }
> +EXPORT_SYMBOL(drm_sched_fence_scheduled);
>  
>  void drm_sched_fence_finished(struct drm_sched_fence *fence, int result)
>  {
> @@ -81,6 +77,7 @@ void drm_sched_fence_finished(struct drm_sched_fence *fence, int result)
>  		dma_fence_set_error(&fence->finished, result);
>  	dma_fence_signal(&fence->finished);
>  }
> +EXPORT_SYMBOL(drm_sched_fence_finished);
>  
>  static const char *drm_sched_fence_get_driver_name(struct dma_fence *fence)
>  {
> @@ -118,6 +115,7 @@ void drm_sched_fence_free(struct drm_sched_fence *fence)
>  	if (!WARN_ON_ONCE(fence->timeline))
>  		kmem_cache_free(sched_fence_slab, fence);
>  }
> +EXPORT_SYMBOL(drm_sched_fence_free);
>  
>  /**
>   * drm_sched_fence_release_scheduled - callback that fence can be freed
> @@ -210,6 +208,9 @@ struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
>  {
>  	struct drm_sched_fence *fence = NULL;
>  
> +	if (unlikely(!sched_fence_slab))
> +		return NULL;
> +
>  	fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL);
>  	if (fence == NULL)
>  		return NULL;
> @@ -219,6 +220,7 @@ struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
>  
>  	return fence;
>  }
> +EXPORT_SYMBOL(drm_sched_fence_alloc);
>  
>  void drm_sched_fence_init(struct drm_sched_fence *fence,
>  			  struct drm_sched_entity *entity)
> @@ -234,9 +236,4 @@ void drm_sched_fence_init(struct drm_sched_fence *fence,
>  	dma_fence_init(&fence->finished, &drm_sched_fence_ops_finished,
>  		       &fence->lock, entity->fence_context + 1, seq);
>  }
> -
> -module_init(drm_sched_fence_slab_init);
> -module_exit(drm_sched_fence_slab_fini);
> -
> -MODULE_DESCRIPTION("DRM GPU scheduler");
> -MODULE_LICENSE("GPL and additional rights");
> +EXPORT_SYMBOL(drm_sched_fence_init);
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 1e31a9c8ce15..eaaf086eab30 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -1467,3 +1467,6 @@ void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
>  	queue_work(sched->submit_wq, &sched->work_free_job);
>  }
>  EXPORT_SYMBOL(drm_sched_wqueue_start);
> +
> +MODULE_DESCRIPTION("DRM GPU scheduler");
> +MODULE_LICENSE("GPL and additional rights");
> -- 
> 2.46.0
> 


More information about the dri-devel mailing list