[PATCH] drm/xe: Fix missing runtime outer protection for ggtt_remove_node

Matthew Auld matthew.auld at intel.com
Fri Jun 14 14:29:06 UTC 2024


On 13/06/2024 22:53, Rodrigo Vivi wrote:
> Defer the ggtt node removal to a thread if runtime_pm is not active.
> 
> The ggtt node removal can be called from multiple places, including
> places where we cannot protect with outer callers and places we are
> within other locks. So, try to grab the runtime reference if the
> device is already active, otherwise defer the removal to a separate
> thread from where we are sure we can wake the device up.
> 
> v2: - use xe wq instead of system wq (Matt and CI)
>      - Avoid GFP_KERNEL to be future proof since this removal can
>      be called from outside our drivers and we don't want to block
>      if atomic is needed. (Brost)
> v3: amend forgot chunk declaring xe_device.
> v4: Use a xe_ggtt_region to encapsulate the node and remova info,
>      wihtout the need for any memory allocation at runtime.
> v5: Actually fill the delayed_removal.invalidate (Brost)
> v6: - Ensure that ggtt_region is not freed before work finishes (Auld)
>      - Own wq to ensures that the queued works are flushed before
>        ggtt_fini (Brost)
> 
> Cc: Matthew Auld <matthew.auld at intel.com>
> Cc: Paulo Zanoni <paulo.r.zanoni at intel.com>
> Cc: Francois Dugast <francois.dugast at intel.com>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
> ---
>   drivers/gpu/drm/xe/display/xe_fb_pin.c |  10 +-
>   drivers/gpu/drm/xe/xe_bo.c             |   2 +-
>   drivers/gpu/drm/xe/xe_bo.h             |   6 +-
>   drivers/gpu/drm/xe/xe_bo_types.h       |   6 +-
>   drivers/gpu/drm/xe/xe_ggtt.c           | 129 ++++++++++++++++++-------
>   drivers/gpu/drm/xe/xe_ggtt.h           |   1 -
>   drivers/gpu/drm/xe/xe_ggtt_types.h     |  23 +++++
>   7 files changed, 130 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
> index a2f417209124..b84139b27970 100644
> --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
> +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
> @@ -153,7 +153,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
>   	}
>   
>   	vma->dpt = dpt;
> -	vma->node = dpt->ggtt_node;
> +	vma->node = dpt->ggtt_region->node;
>   	return 0;
>   }
>   
> @@ -203,8 +203,8 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
>   	if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
>   		align = max_t(u32, align, SZ_64K);
>   
> -	if (bo->ggtt_node.size && view->type == I915_GTT_VIEW_NORMAL) {
> -		vma->node = bo->ggtt_node;
> +	if (bo->ggtt_region->node.size && view->type == I915_GTT_VIEW_NORMAL) {
> +		vma->node = bo->ggtt_region->node;
>   	} else if (view->type == I915_GTT_VIEW_NORMAL) {
>   		u32 x, size = bo->ttm.base.size;
>   
> @@ -322,8 +322,8 @@ static void __xe_unpin_fb_vma(struct i915_vma *vma)
>   
>   	if (vma->dpt)
>   		xe_bo_unpin_map_no_vm(vma->dpt);
> -	else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) ||
> -		 vma->bo->ggtt_node.start != vma->node.start)
> +	else if (!drm_mm_node_allocated(&vma->bo->ggtt_region->node) ||
> +		 vma->bo->ggtt_region->node.start != vma->node.start)
>   		xe_ggtt_remove_node(ggtt, &vma->node, false);
>   
>   	ttm_bo_reserve(&vma->bo->ttm, false, false, NULL);
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 74294f1b05bc..0bfd6d1eb0be 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -1072,7 +1072,7 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
>   
>   	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
>   
> -	if (bo->ggtt_node.size)
> +	if (bo->ggtt_region->node.size)
>   		xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
>   
>   #ifdef CONFIG_PROC_FS
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index 6de894c728f5..aeb1ebc0d3fc 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -194,9 +194,9 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size)
>   static inline u32
>   xe_bo_ggtt_addr(struct xe_bo *bo)
>   {
> -	XE_WARN_ON(bo->ggtt_node.size > bo->size);
> -	XE_WARN_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32));
> -	return bo->ggtt_node.start;
> +	XE_WARN_ON(bo->ggtt_region->node.size > bo->size);
> +	XE_WARN_ON(bo->ggtt_region->node.start + bo->ggtt_region->node.size > (1ull << 32));
> +	return bo->ggtt_region->node.start;
>   }
>   
>   int xe_bo_vmap(struct xe_bo *bo);
> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
> index 86422e113d39..f45b1c4bbb6d 100644
> --- a/drivers/gpu/drm/xe/xe_bo_types.h
> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
> @@ -14,6 +14,8 @@
>   #include <drm/ttm/ttm_execbuf_util.h>
>   #include <drm/ttm/ttm_placement.h>
>   
> +#include "xe_ggtt_types.h"
> +
>   struct xe_device;
>   struct xe_vm;
>   
> @@ -38,8 +40,8 @@ struct xe_bo {
>   	struct ttm_place placements[XE_BO_MAX_PLACEMENTS];
>   	/** @placement: current placement for this BO */
>   	struct ttm_placement placement;
> -	/** @ggtt_node: GGTT node if this BO is mapped in the GGTT */
> -	struct drm_mm_node ggtt_node;
> +	/** @ggtt_region: GGTT region node if this BO is mapped in the GGTT */
> +	struct xe_ggtt_region *ggtt_region;
>   	/** @vmap: iosys map of this buffer */
>   	struct iosys_map vmap;
>   	/** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */
> diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
> index 8ff91fd1b7c8..d0284f2c7cbc 100644
> --- a/drivers/gpu/drm/xe/xe_ggtt.c
> +++ b/drivers/gpu/drm/xe/xe_ggtt.c
> @@ -101,6 +101,7 @@ static void ggtt_fini_early(struct drm_device *drm, void *arg)
>   {
>   	struct xe_ggtt *ggtt = arg;
>   
> +	destroy_workqueue(ggtt->wq);
>   	mutex_destroy(&ggtt->lock);
>   	drm_mm_takedown(&ggtt->mm);
>   }
> @@ -191,6 +192,8 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
>   	else
>   		ggtt->pt_ops = &xelp_pt_ops;
>   
> +	ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, 0);

Check for error?

> +
>   	drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
>   		    ggtt->size - xe_wopcm_size(xe));
>   	mutex_init(&ggtt->lock);
> @@ -389,7 +392,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
>   {
>   	u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB;
>   	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode];
> -	u64 start = bo->ggtt_node.start;
> +	u64 start = bo->ggtt_region->node.start;
>   	u64 offset, pte;
>   
>   	for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) {
> @@ -398,18 +401,97 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
>   	}
>   }
>   
> +static void ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
> +			     bool invalidate)
> +{
> +	struct xe_device *xe = tile_to_xe(ggtt->tile);
> +	struct xe_ggtt_region *ggtt_region = container_of(node,
> +							  typeof(*ggtt_region),
> +							  node);
> +	bool bound;
> +	int idx;
> +
> +	bound = drm_dev_enter(&xe->drm, &idx);
> +
> +	mutex_lock(&ggtt->lock);
> +	if (bound)
> +		xe_ggtt_clear(ggtt, node->start, node->size);
> +	drm_mm_remove_node(node);
> +	node->size = 0;
> +	mutex_unlock(&ggtt->lock);
> +
> +	if (!bound)
> +		return;

Missing the kfree here?

> +
> +	if (invalidate)
> +		xe_ggtt_invalidate(ggtt);
> +
> +	drm_dev_exit(idx);
> +	kfree(ggtt_region);
> +}
> +
> +static void ggtt_remove_node_work_func(struct work_struct *work)
> +{
> +	struct xe_ggtt_region *ggtt_region = container_of(work,
> +							  typeof(*ggtt_region),
> +							  delayed_removal.work);
> +	struct xe_device *xe = tile_to_xe(ggtt_region->ggtt->tile);
> +
> +	xe_pm_runtime_get(xe);
> +	ggtt_remove_node(ggtt_region->ggtt, &ggtt_region->node,
> +			 ggtt_region->delayed_removal.invalidate);
> +	xe_pm_runtime_put(xe);
> +}
> +
> +static void ggtt_queue_remove_node(struct drm_mm_node *node, bool invalidate)
> +{
> +	struct xe_ggtt_region *ggtt_region = container_of(node,
> +							  typeof(*ggtt_region),
> +							  node);
> +
> +	queue_work(ggtt_region->ggtt->wq, &ggtt_region->delayed_removal.work);
> +}
> +
> +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
> +			 bool invalidate)
> +{
> +	struct xe_device *xe = tile_to_xe(ggtt->tile);
> +
> +	if (xe_pm_runtime_get_if_active(xe)) {
> +		ggtt_remove_node(ggtt, node, invalidate);
> +		xe_pm_runtime_put(xe);
> +	} else {
> +		ggtt_queue_remove_node(node, invalidate);
> +	}
> +}
> +
> +static struct xe_ggtt_region *ggtt_region_init(struct xe_ggtt *ggtt)
> +{
> +	struct xe_ggtt_region *ggtt_region = kzalloc(sizeof(*ggtt_region),
> +						     GFP_KERNEL);
> +
> +	if (!ggtt_region)
> +		return ERR_PTR(-ENOMEM);
> +
> +	INIT_WORK(&ggtt_region->delayed_removal.work, ggtt_remove_node_work_func);
> +	ggtt_region->ggtt = ggtt;
> +
> +	return ggtt_region;
> +}
> +
>   static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
>   				  u64 start, u64 end)
>   {
>   	int err;
>   	u64 alignment = XE_PAGE_SIZE;
> +	struct xe_ggtt_region *ggtt_region;
>   
>   	if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
>   		alignment = SZ_64K;
>   
> -	if (XE_WARN_ON(bo->ggtt_node.size)) {
> +	if (XE_WARN_ON(bo->ggtt_region->node.size)) {
>   		/* Someone's already inserted this BO in the GGTT */
> -		xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
> +		xe_tile_assert(ggtt->tile, bo->ggtt_region->node.size == bo->size);
>   		return 0;
>   	}
>   
> @@ -417,9 +499,14 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
>   	if (err)
>   		return err;
>   
> +	ggtt_region = ggtt_region_init(ggtt);
> +	if (IS_ERR(ggtt_region))
> +		return PTR_ERR(ggtt_region);
> +	bo->ggtt_region = ggtt_region;
> +
>   	xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile));
>   	mutex_lock(&ggtt->lock);
> -	err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size,
> +	err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_region->node, bo->size,
>   					  alignment, 0, start, end, 0);
>   	if (!err)
>   		xe_ggtt_map_bo(ggtt, bo);
> @@ -443,43 +530,15 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
>   	return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX);
>   }
>   
> -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
> -			 bool invalidate)
> -{
> -	struct xe_device *xe = tile_to_xe(ggtt->tile);
> -	bool bound;
> -	int idx;
> -
> -	bound = drm_dev_enter(&xe->drm, &idx);
> -	if (bound)
> -		xe_pm_runtime_get_noresume(xe);
> -
> -	mutex_lock(&ggtt->lock);
> -	if (bound)
> -		xe_ggtt_clear(ggtt, node->start, node->size);
> -	drm_mm_remove_node(node);
> -	node->size = 0;
> -	mutex_unlock(&ggtt->lock);
> -
> -	if (!bound)
> -		return;
> -
> -	if (invalidate)
> -		xe_ggtt_invalidate(ggtt);
> -
> -	xe_pm_runtime_put(xe);
> -	drm_dev_exit(idx);
> -}
> -
>   void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
>   {
> -	if (XE_WARN_ON(!bo->ggtt_node.size))
> +	if (XE_WARN_ON(!bo->ggtt_region->node.size))
>   		return;
>   
>   	/* This BO is not currently in the GGTT */
> -	xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
> +	xe_tile_assert(ggtt->tile, bo->ggtt_region->node.size == bo->size);
>   
> -	xe_ggtt_remove_node(ggtt, &bo->ggtt_node,
> +	xe_ggtt_remove_node(ggtt, &bo->ggtt_region->node,
>   			    bo->flags & XE_BO_FLAG_GGTT_INVALIDATE);
>   }
>   
> diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
> index 4a41a1762358..75511b5f43eb 100644
> --- a/drivers/gpu/drm/xe/xe_ggtt.h
> +++ b/drivers/gpu/drm/xe/xe_ggtt.h
> @@ -30,7 +30,6 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
>   int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
>   			 u64 start, u64 end);
>   void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
> -
>   int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p);
>   
>   #ifdef CONFIG_PCI_IOV
> diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
> index d8c584d9a8c3..202d7f3085ba 100644
> --- a/drivers/gpu/drm/xe/xe_ggtt_types.h
> +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
> @@ -34,6 +34,29 @@ struct xe_ggtt {
>   	const struct xe_ggtt_pt_ops *pt_ops;
>   
>   	struct drm_mm mm;
> +	/** @wq: Dedicated unordered work queue to process node removals */
> +	struct workqueue_struct *wq;
> +};
> +
> +/**
> + * struct xe_ggtt_region - GGTT region node
> + * It needs to be initialized before the drm_mm_node insertion in the GGTT.
> + */
> +struct xe_ggtt_region {
> +	/** @ggtt: Back pointer to xe_ggtt where this region will be inserted at */
> +	struct xe_ggtt *ggtt;
> +	/** @node: The drm_mm_node itself */
> +	struct drm_mm_node node;
> +	/**
> +	 * @delayed_removal: Information for removal through work thread when
> +	 * device runtime_pm is suspended
> +	 */
> +	struct {
> +		/** @delayed_removal.work: The work struct for the delayed removal */
> +		struct work_struct work;
> +		/** @delayed_removal.invalidate: If it needs invalidation upon removal */
> +		bool invalidate;
> +	} delayed_removal;
>   };
>   
>   #endif


More information about the Intel-xe mailing list