[PATCH] drm/xe: Fix missing runtime outer protection for ggtt_remove_node
Matthew Auld
matthew.auld at intel.com
Fri Jun 14 14:29:06 UTC 2024
On 13/06/2024 22:53, Rodrigo Vivi wrote:
> Defer the ggtt node removal to a thread if runtime_pm is not active.
>
> The ggtt node removal can be called from multiple places, including
> places where we cannot protect with outer callers and places we are
> within other locks. So, try to grab the runtime reference if the
> device is already active, otherwise defer the removal to a separate
> thread from where we are sure we can wake the device up.
>
> v2: - use xe wq instead of system wq (Matt and CI)
> - Avoid GFP_KERNEL to be future proof since this removal can
> be called from outside our drivers and we don't want to block
> if atomic is needed. (Brost)
> v3: amend forgot chunk declaring xe_device.
> v4: Use a xe_ggtt_region to encapsulate the node and remova info,
> wihtout the need for any memory allocation at runtime.
> v5: Actually fill the delayed_removal.invalidate (Brost)
> v6: - Ensure that ggtt_region is not freed before work finishes (Auld)
> - Own wq to ensures that the queued works are flushed before
> ggtt_fini (Brost)
>
> Cc: Matthew Auld <matthew.auld at intel.com>
> Cc: Paulo Zanoni <paulo.r.zanoni at intel.com>
> Cc: Francois Dugast <francois.dugast at intel.com>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
> ---
> drivers/gpu/drm/xe/display/xe_fb_pin.c | 10 +-
> drivers/gpu/drm/xe/xe_bo.c | 2 +-
> drivers/gpu/drm/xe/xe_bo.h | 6 +-
> drivers/gpu/drm/xe/xe_bo_types.h | 6 +-
> drivers/gpu/drm/xe/xe_ggtt.c | 129 ++++++++++++++++++-------
> drivers/gpu/drm/xe/xe_ggtt.h | 1 -
> drivers/gpu/drm/xe/xe_ggtt_types.h | 23 +++++
> 7 files changed, 130 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
> index a2f417209124..b84139b27970 100644
> --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
> +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
> @@ -153,7 +153,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
> }
>
> vma->dpt = dpt;
> - vma->node = dpt->ggtt_node;
> + vma->node = dpt->ggtt_region->node;
> return 0;
> }
>
> @@ -203,8 +203,8 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
> if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
> align = max_t(u32, align, SZ_64K);
>
> - if (bo->ggtt_node.size && view->type == I915_GTT_VIEW_NORMAL) {
> - vma->node = bo->ggtt_node;
> + if (bo->ggtt_region->node.size && view->type == I915_GTT_VIEW_NORMAL) {
> + vma->node = bo->ggtt_region->node;
> } else if (view->type == I915_GTT_VIEW_NORMAL) {
> u32 x, size = bo->ttm.base.size;
>
> @@ -322,8 +322,8 @@ static void __xe_unpin_fb_vma(struct i915_vma *vma)
>
> if (vma->dpt)
> xe_bo_unpin_map_no_vm(vma->dpt);
> - else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) ||
> - vma->bo->ggtt_node.start != vma->node.start)
> + else if (!drm_mm_node_allocated(&vma->bo->ggtt_region->node) ||
> + vma->bo->ggtt_region->node.start != vma->node.start)
> xe_ggtt_remove_node(ggtt, &vma->node, false);
>
> ttm_bo_reserve(&vma->bo->ttm, false, false, NULL);
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 74294f1b05bc..0bfd6d1eb0be 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -1072,7 +1072,7 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
>
> xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
>
> - if (bo->ggtt_node.size)
> + if (bo->ggtt_region->node.size)
> xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
>
> #ifdef CONFIG_PROC_FS
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index 6de894c728f5..aeb1ebc0d3fc 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -194,9 +194,9 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size)
> static inline u32
> xe_bo_ggtt_addr(struct xe_bo *bo)
> {
> - XE_WARN_ON(bo->ggtt_node.size > bo->size);
> - XE_WARN_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32));
> - return bo->ggtt_node.start;
> + XE_WARN_ON(bo->ggtt_region->node.size > bo->size);
> + XE_WARN_ON(bo->ggtt_region->node.start + bo->ggtt_region->node.size > (1ull << 32));
> + return bo->ggtt_region->node.start;
> }
>
> int xe_bo_vmap(struct xe_bo *bo);
> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
> index 86422e113d39..f45b1c4bbb6d 100644
> --- a/drivers/gpu/drm/xe/xe_bo_types.h
> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
> @@ -14,6 +14,8 @@
> #include <drm/ttm/ttm_execbuf_util.h>
> #include <drm/ttm/ttm_placement.h>
>
> +#include "xe_ggtt_types.h"
> +
> struct xe_device;
> struct xe_vm;
>
> @@ -38,8 +40,8 @@ struct xe_bo {
> struct ttm_place placements[XE_BO_MAX_PLACEMENTS];
> /** @placement: current placement for this BO */
> struct ttm_placement placement;
> - /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */
> - struct drm_mm_node ggtt_node;
> + /** @ggtt_region: GGTT region node if this BO is mapped in the GGTT */
> + struct xe_ggtt_region *ggtt_region;
> /** @vmap: iosys map of this buffer */
> struct iosys_map vmap;
> /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */
> diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
> index 8ff91fd1b7c8..d0284f2c7cbc 100644
> --- a/drivers/gpu/drm/xe/xe_ggtt.c
> +++ b/drivers/gpu/drm/xe/xe_ggtt.c
> @@ -101,6 +101,7 @@ static void ggtt_fini_early(struct drm_device *drm, void *arg)
> {
> struct xe_ggtt *ggtt = arg;
>
> + destroy_workqueue(ggtt->wq);
> mutex_destroy(&ggtt->lock);
> drm_mm_takedown(&ggtt->mm);
> }
> @@ -191,6 +192,8 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
> else
> ggtt->pt_ops = &xelp_pt_ops;
>
> + ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, 0);
Check for error?
> +
> drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
> ggtt->size - xe_wopcm_size(xe));
> mutex_init(&ggtt->lock);
> @@ -389,7 +392,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
> {
> u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB;
> u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode];
> - u64 start = bo->ggtt_node.start;
> + u64 start = bo->ggtt_region->node.start;
> u64 offset, pte;
>
> for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) {
> @@ -398,18 +401,97 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
> }
> }
>
> +static void ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
> + bool invalidate)
> +{
> + struct xe_device *xe = tile_to_xe(ggtt->tile);
> + struct xe_ggtt_region *ggtt_region = container_of(node,
> + typeof(*ggtt_region),
> + node);
> + bool bound;
> + int idx;
> +
> + bound = drm_dev_enter(&xe->drm, &idx);
> +
> + mutex_lock(&ggtt->lock);
> + if (bound)
> + xe_ggtt_clear(ggtt, node->start, node->size);
> + drm_mm_remove_node(node);
> + node->size = 0;
> + mutex_unlock(&ggtt->lock);
> +
> + if (!bound)
> + return;
Missing the kfree here?
> +
> + if (invalidate)
> + xe_ggtt_invalidate(ggtt);
> +
> + drm_dev_exit(idx);
> + kfree(ggtt_region);
> +}
> +
> +static void ggtt_remove_node_work_func(struct work_struct *work)
> +{
> + struct xe_ggtt_region *ggtt_region = container_of(work,
> + typeof(*ggtt_region),
> + delayed_removal.work);
> + struct xe_device *xe = tile_to_xe(ggtt_region->ggtt->tile);
> +
> + xe_pm_runtime_get(xe);
> + ggtt_remove_node(ggtt_region->ggtt, &ggtt_region->node,
> + ggtt_region->delayed_removal.invalidate);
> + xe_pm_runtime_put(xe);
> +}
> +
> +static void ggtt_queue_remove_node(struct drm_mm_node *node, bool invalidate)
> +{
> + struct xe_ggtt_region *ggtt_region = container_of(node,
> + typeof(*ggtt_region),
> + node);
> +
> + queue_work(ggtt_region->ggtt->wq, &ggtt_region->delayed_removal.work);
> +}
> +
> +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
> + bool invalidate)
> +{
> + struct xe_device *xe = tile_to_xe(ggtt->tile);
> +
> + if (xe_pm_runtime_get_if_active(xe)) {
> + ggtt_remove_node(ggtt, node, invalidate);
> + xe_pm_runtime_put(xe);
> + } else {
> + ggtt_queue_remove_node(node, invalidate);
> + }
> +}
> +
> +static struct xe_ggtt_region *ggtt_region_init(struct xe_ggtt *ggtt)
> +{
> + struct xe_ggtt_region *ggtt_region = kzalloc(sizeof(*ggtt_region),
> + GFP_KERNEL);
> +
> + if (!ggtt_region)
> + return ERR_PTR(-ENOMEM);
> +
> + INIT_WORK(&ggtt_region->delayed_removal.work, ggtt_remove_node_work_func);
> + ggtt_region->ggtt = ggtt;
> +
> + return ggtt_region;
> +}
> +
> static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
> u64 start, u64 end)
> {
> int err;
> u64 alignment = XE_PAGE_SIZE;
> + struct xe_ggtt_region *ggtt_region;
>
> if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
> alignment = SZ_64K;
>
> - if (XE_WARN_ON(bo->ggtt_node.size)) {
> + if (XE_WARN_ON(bo->ggtt_region->node.size)) {
> /* Someone's already inserted this BO in the GGTT */
> - xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
> + xe_tile_assert(ggtt->tile, bo->ggtt_region->node.size == bo->size);
> return 0;
> }
>
> @@ -417,9 +499,14 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
> if (err)
> return err;
>
> + ggtt_region = ggtt_region_init(ggtt);
> + if (IS_ERR(ggtt_region))
> + return PTR_ERR(ggtt_region);
> + bo->ggtt_region = ggtt_region;
> +
> xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile));
> mutex_lock(&ggtt->lock);
> - err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size,
> + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_region->node, bo->size,
> alignment, 0, start, end, 0);
> if (!err)
> xe_ggtt_map_bo(ggtt, bo);
> @@ -443,43 +530,15 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
> return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX);
> }
>
> -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
> - bool invalidate)
> -{
> - struct xe_device *xe = tile_to_xe(ggtt->tile);
> - bool bound;
> - int idx;
> -
> - bound = drm_dev_enter(&xe->drm, &idx);
> - if (bound)
> - xe_pm_runtime_get_noresume(xe);
> -
> - mutex_lock(&ggtt->lock);
> - if (bound)
> - xe_ggtt_clear(ggtt, node->start, node->size);
> - drm_mm_remove_node(node);
> - node->size = 0;
> - mutex_unlock(&ggtt->lock);
> -
> - if (!bound)
> - return;
> -
> - if (invalidate)
> - xe_ggtt_invalidate(ggtt);
> -
> - xe_pm_runtime_put(xe);
> - drm_dev_exit(idx);
> -}
> -
> void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
> {
> - if (XE_WARN_ON(!bo->ggtt_node.size))
> + if (XE_WARN_ON(!bo->ggtt_region->node.size))
> return;
>
> /* This BO is not currently in the GGTT */
> - xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
> + xe_tile_assert(ggtt->tile, bo->ggtt_region->node.size == bo->size);
>
> - xe_ggtt_remove_node(ggtt, &bo->ggtt_node,
> + xe_ggtt_remove_node(ggtt, &bo->ggtt_region->node,
> bo->flags & XE_BO_FLAG_GGTT_INVALIDATE);
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
> index 4a41a1762358..75511b5f43eb 100644
> --- a/drivers/gpu/drm/xe/xe_ggtt.h
> +++ b/drivers/gpu/drm/xe/xe_ggtt.h
> @@ -30,7 +30,6 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
> int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
> u64 start, u64 end);
> void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
> -
> int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p);
>
> #ifdef CONFIG_PCI_IOV
> diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
> index d8c584d9a8c3..202d7f3085ba 100644
> --- a/drivers/gpu/drm/xe/xe_ggtt_types.h
> +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
> @@ -34,6 +34,29 @@ struct xe_ggtt {
> const struct xe_ggtt_pt_ops *pt_ops;
>
> struct drm_mm mm;
> + /** @wq: Dedicated unordered work queue to process node removals */
> + struct workqueue_struct *wq;
> +};
> +
> +/**
> + * struct xe_ggtt_region - GGTT region node
> + * It needs to be initialized before the drm_mm_node insertion in the GGTT.
> + */
> +struct xe_ggtt_region {
> + /** @ggtt: Back pointer to xe_ggtt where this region will be inserted at */
> + struct xe_ggtt *ggtt;
> + /** @node: The drm_mm_node itself */
> + struct drm_mm_node node;
> + /**
> + * @delayed_removal: Information for removal through work thread when
> + * device runtime_pm is suspended
> + */
> + struct {
> + /** @delayed_removal.work: The work struct for the delayed removal */
> + struct work_struct work;
> + /** @delayed_removal.invalidate: If it needs invalidation upon removal */
> + bool invalidate;
> + } delayed_removal;
> };
>
> #endif
More information about the Intel-xe
mailing list