[PATCH] drm/xe: Fix missing runtime outer protection for ggtt_remove_node

Rodrigo Vivi rodrigo.vivi at intel.com
Fri May 31 19:53:15 UTC 2024


Defer the ggtt node removal to a thread if runtime_pm is not active.

The ggtt node removal can be called from multiple places, including
places where we cannot protect with outer callers and places we are
within other locks. So, try to grab the runtime reference if the
device is already active, otherwise defer the removal to a separate
thread from where we are sure we can wake the device up.

v2: - use xe wq instead of system wq (Matt and CI)
    - Avoid GFP_KERNEL to be future proof since this removal can
    be called from outside our drivers and we don't want to block
    if atomic is needed. (Matt)

Cc: Paulo Zanoni <paulo.r.zanoni at intel.com>
Cc: Francois Dugast <francois.dugast at intel.com>
Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Cc: Matthew Brost <matthew.brost at intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
---
 drivers/gpu/drm/xe/xe_ggtt.c | 56 ++++++++++++++++++++++++++++++++----
 1 file changed, 51 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index b01a670fecb8..da8a65bdb9a3 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -443,16 +443,14 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 	return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX);
 }
 
-void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
-			 bool invalidate)
+static void ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+			     bool invalidate)
 {
 	struct xe_device *xe = tile_to_xe(ggtt->tile);
 	bool bound;
 	int idx;
 
 	bound = drm_dev_enter(&xe->drm, &idx);
-	if (bound)
-		xe_pm_runtime_get_noresume(xe);
 
 	mutex_lock(&ggtt->lock);
 	if (bound)
@@ -467,10 +465,58 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
 	if (invalidate)
 		xe_ggtt_invalidate(ggtt);
 
-	xe_pm_runtime_put(xe);
 	drm_dev_exit(idx);
 }
 
+struct remove_node_work {
+	struct work_struct work;
+	struct xe_ggtt *ggtt;
+	struct drm_mm_node *node;
+	bool invalidate;
+};
+
+static void ggtt_remove_node_work_func(struct work_struct *work)
+{
+	struct remove_node_work *remove_node = container_of(work, struct remove_node_work, work);
+	struct xe_device *xe = tile_to_xe(remove_node->ggtt->tile);
+
+	xe_pm_runtime_get(xe);
+	ggtt_remove_node(remove_node->ggtt, remove_node->node, remove_node->invalidate);
+	xe_pm_runtime_put(xe);
+
+	kfree(remove_node);
+}
+
+static void ggtt_queue_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+				   bool invalidate)
+{
+	struct remove_node_work *remove_node;
+
+	remove_node = kmalloc(sizeof(*remove_node), GFP_ATOMIC);
+	if (!remove_node)
+		return;
+
+	INIT_WORK(&remove_node->work, ggtt_remove_node_work_func);
+	remove_node->ggtt = ggtt;
+	remove_node->node = node;
+	remove_node->invalidate = invalidate;
+
+	queue_work(xe->unordered_wq, &remove_node->work);
+}
+
+void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+			 bool invalidate)
+{
+	struct xe_device *xe = tile_to_xe(ggtt->tile);
+
+	if (xe_pm_runtime_get_if_active(xe)) {
+		ggtt_remove_node(ggtt, node, invalidate);
+		xe_pm_runtime_put(xe);
+	} else {
+		ggtt_queue_remove_node(ggtt, node, invalidate);
+	}
+}
+
 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 {
 	if (XE_WARN_ON(!bo->ggtt_node.size))
-- 
2.45.1



More information about the Intel-xe mailing list