[PATCH 12/12] drm/xe: Fix missing runtime outer protection for ggtt_remove_node
Rodrigo Vivi
rodrigo.vivi at intel.com
Thu Aug 15 22:07:32 UTC 2024
Defer the ggtt node removal to a thread if runtime_pm is not active.
The ggtt node removal can be called from multiple places, including
places where we cannot protect with outer callers and places we are
within other locks. So, try to grab the runtime reference if the
device is already active, otherwise defer the removal to a separate
thread from where we are sure we can wake the device up.
v2: - use xe wq instead of system wq (Matt and CI)
- Avoid GFP_KERNEL to be future proof since this removal can
be called from outside our drivers and we don't want to block
if atomic is needed. (Brost)
v3: amend forgot chunk declaring xe_device.
v4: Use a xe_ggtt_region to encapsulate the node and remova info,
wihtout the need for any memory allocation at runtime.
v5: Actually fill the delayed_removal.invalidate (Brost)
v6: - Ensure that ggtt_region is not freed before work finishes (Auld)
- Own wq to ensures that the queued works are flushed before
ggtt_fini (Brost)
v7: also free ggtt_region on early !bound return (Auld)
v8: Address the null deref (CI)
v9: Based on the new xe_ggtt_node for the proper care of the lifetime
of the object.
v10: Redo the lost v5 change. (Brost)
Cc: Matthew Auld <matthew.auld at intel.com>
Cc: Paulo Zanoni <paulo.r.zanoni at intel.com>
Cc: Francois Dugast <francois.dugast at intel.com>
Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Cc: Matthew Brost <matthew.brost at intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
---
drivers/gpu/drm/xe/xe_ggtt.c | 107 ++++++++++++++++++-----------
drivers/gpu/drm/xe/xe_ggtt_types.h | 12 ++++
2 files changed, 79 insertions(+), 40 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 5c04c1bc8417..110acf828974 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -161,6 +161,7 @@ static void ggtt_fini_early(struct drm_device *drm, void *arg)
{
struct xe_ggtt *ggtt = arg;
+ destroy_workqueue(ggtt->wq);
mutex_destroy(&ggtt->lock);
drm_mm_takedown(&ggtt->mm);
}
@@ -242,6 +243,8 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
else
ggtt->pt_ops = &xelp_pt_ops;
+ ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, 0);
+
drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
ggtt->size - xe_wopcm_size(xe));
mutex_init(&ggtt->lock);
@@ -276,6 +279,68 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
mutex_unlock(&ggtt->lock);
}
+static void ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node,
+ bool invalidate)
+{
+ struct xe_device *xe = tile_to_xe(ggtt->tile);
+ bool bound;
+ int idx;
+
+ if (!node || !node->ggtt)
+ return;
+
+ bound = drm_dev_enter(&xe->drm, &idx);
+
+ mutex_lock(&ggtt->lock);
+ if (bound)
+ xe_ggtt_clear(ggtt, node->base.start, node->base.size);
+ drm_mm_remove_node(&node->base);
+ node->base.size = 0;
+ mutex_unlock(&ggtt->lock);
+
+ if (!bound)
+ goto free_node;
+
+ if (invalidate)
+ xe_ggtt_invalidate(ggtt);
+
+ drm_dev_exit(idx);
+
+free_node:
+ xe_ggtt_node_fini(node);
+}
+
+static void ggtt_node_remove_work_func(struct work_struct *work)
+{
+ struct xe_ggtt_node *node = container_of(work, typeof(*node),
+ delayed_removal.work);
+ struct xe_device *xe = tile_to_xe(node->ggtt->tile);
+
+ xe_pm_runtime_get(xe);
+ ggtt_node_remove(node->ggtt, node, node->delayed_removal.invalidate);
+ xe_pm_runtime_put(xe);
+}
+
+/**
+ * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT
+ * @ggtt: the &xe_ggtt where node will be removed
+ * @node: the &xe_ggtt_node to be removed
+ * @invalidate: if node needs invalidation upon removal
+ */
+void xe_ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node,
+ bool invalidate)
+{
+ struct xe_device *xe = tile_to_xe(ggtt->tile);
+
+ if (xe_pm_runtime_get_if_active(xe)) {
+ ggtt_node_remove(ggtt, node, invalidate);
+ xe_pm_runtime_put(xe);
+ } else {
+ node->delayed_removal.invalidate = invalidate;
+ queue_work(ggtt->wq, &node->delayed_removal.work);
+ }
+}
+
/**
* xe_ggtt_init - Regular non-early GGTT initialization
* @ggtt: the &xe_ggtt to be initialized
@@ -482,7 +547,9 @@ struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt)
if (!node)
return ERR_PTR(-ENOMEM);
+ INIT_WORK(&node->delayed_removal.work, ggtt_node_remove_work_func);
node->ggtt = ggtt;
+
return node;
}
@@ -499,46 +566,6 @@ void xe_ggtt_node_fini(struct xe_ggtt_node *node)
kfree(node);
}
-/**
- * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT
- * @ggtt: the &xe_ggtt where node will be removed
- * @node: the &xe_ggtt_node to be removed
- * @invalidate: if node needs invalidation upon removal
- */
-void xe_ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node,
- bool invalidate)
-{
- struct xe_device *xe = tile_to_xe(ggtt->tile);
- bool bound;
- int idx;
-
- if (!node || !node->ggtt)
- return;
-
- bound = drm_dev_enter(&xe->drm, &idx);
- if (bound)
- xe_pm_runtime_get_noresume(xe);
-
- mutex_lock(&ggtt->lock);
- if (bound)
- xe_ggtt_clear(ggtt, node->base.start, node->base.size);
- drm_mm_remove_node(&node->base);
- node->base.size = 0;
- mutex_unlock(&ggtt->lock);
-
- if (!bound)
- goto free_node;
-
- if (invalidate)
- xe_ggtt_invalidate(ggtt);
-
- xe_pm_runtime_put(xe);
- drm_dev_exit(idx);
-
-free_node:
- xe_ggtt_node_fini(node);
-}
-
/**
* xe_ggtt_node_allocated - Check if node is allocated in GGTT
* @node: the &xe_ggtt_node to be inspected
diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
index 0e8822ae13fc..8b83610c6ee6 100644
--- a/drivers/gpu/drm/xe/xe_ggtt_types.h
+++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
@@ -47,6 +47,8 @@ struct xe_ggtt {
struct drm_mm mm;
/** @access_count: counts GGTT writes */
unsigned int access_count;
+ /** @wq: Dedicated unordered work queue to process node removals */
+ struct workqueue_struct *wq;
};
/**
@@ -61,6 +63,16 @@ struct xe_ggtt_node {
struct xe_ggtt *ggtt;
/** @base: A drm_mm_node */
struct drm_mm_node base;
+ /**
+ * @delayed_removal: Information for removal through work thread when
+ * device runtime_pm is suspended
+ */
+ struct {
+ /** @delayed_removal.work: The work struct for the delayed removal */
+ struct work_struct work;
+ /** @delayed_removal.invalidate: If it needs invalidation upon removal */
+ bool invalidate;
+ } delayed_removal;
};
/**
--
2.46.0
More information about the Intel-xe
mailing list