[PATCH 4/9] drm/xe: Move lockdep protection from mem_access to xe_pm_runtime
Rodrigo Vivi
rodrigo.vivi at intel.com
Wed Apr 10 01:58:20 UTC 2024
The mem_access itself is not holding any lock, but attempting
to train lockdep with possible scarring locks happening during
runtime pm. We are going soon to kill the mem_access get and put
helpers in favor of direct xe_pm_runtime calls, so let's just
move this lock around to where it now belongs.
v2: s/lockdep_training/lockdep_prime (Matt Auld)
Reviewed-by: Matthew Auld <matthew.auld at intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
---
drivers/gpu/drm/xe/xe_device.c | 23 -----------------
drivers/gpu/drm/xe/xe_device.h | 4 ---
drivers/gpu/drm/xe/xe_pm.c | 45 ++++++++++++++++++++++++++++------
3 files changed, 37 insertions(+), 35 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 9083f5e02dd9..7a3dfee5473e 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -45,12 +45,6 @@
#include "xe_vm.h"
#include "xe_wait_user_fence.h"
-#ifdef CONFIG_LOCKDEP
-struct lockdep_map xe_device_mem_access_lockdep_map = {
- .name = "xe_device_mem_access_lockdep_map"
-};
-#endif
-
static int xe_file_open(struct drm_device *dev, struct drm_file *file)
{
struct xe_device *xe = to_xe_device(dev);
@@ -713,23 +707,6 @@ void xe_device_mem_access_get(struct xe_device *xe)
if (xe_pm_read_callback_task(xe) == current)
return;
- /*
- * Since the resume here is synchronous it can be quite easy to deadlock
- * if we are not careful. Also in practice it might be quite timing
- * sensitive to ever see the 0 -> 1 transition with the callers locks
- * held, so deadlocks might exist but are hard for lockdep to ever see.
- * With this in mind, help lockdep learn about the potentially scary
- * stuff that can happen inside the runtime_resume callback by acquiring
- * a dummy lock (it doesn't protect anything and gets compiled out on
- * non-debug builds). Lockdep then only needs to see the
- * mem_access_lockdep_map -> runtime_resume callback once, and then can
- * hopefully validate all the (callers_locks) -> mem_access_lockdep_map.
- * For example if the (callers_locks) are ever grabbed in the
- * runtime_resume callback, lockdep should give us a nice splat.
- */
- lock_map_acquire(&xe_device_mem_access_lockdep_map);
- lock_map_release(&xe_device_mem_access_lockdep_map);
-
xe_pm_runtime_get(xe);
ref = atomic_inc_return(&xe->mem_access.ref);
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index d413bc2c6be5..02eda6610972 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -16,10 +16,6 @@ struct xe_file;
#include "xe_force_wake.h"
#include "xe_macros.h"
-#ifdef CONFIG_LOCKDEP
-extern struct lockdep_map xe_device_mem_access_lockdep_map;
-#endif
-
static inline struct xe_device *to_xe_device(const struct drm_device *dev)
{
return container_of(dev, struct xe_device, drm);
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 1692cf08e950..9d80a2d8c402 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -68,6 +68,12 @@
* management (RPS).
*/
+#ifdef CONFIG_LOCKDEP
+struct lockdep_map xe_pm_runtime_lockdep_map = {
+ .name = "xe_pm_runtime_lockdep_map"
+};
+#endif
+
/**
* xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
* @xe: xe device instance
@@ -309,11 +315,11 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
xe_pm_write_callback_task(xe, current);
/*
- * The actual xe_device_mem_access_put() is always async underneath, so
+ * The actual xe_pm_runtime_put() is always async underneath, so
* exactly where that is called should makes no difference to us. However
* we still need to be very careful with the locks that this callback
* acquires and the locks that are acquired and held by any callers of
- * xe_device_mem_access_get(). We already have the matching annotation
+ * xe_runtime_pm_get(). We already have the matching annotation
* on that side, but we also need it here. For example lockdep should be
* able to tell us if the following scenario is in theory possible:
*
@@ -321,15 +327,15 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
* lock(A) |
* | xe_pm_runtime_suspend()
* | lock(A)
- * xe_device_mem_access_get() |
+ * xe_pm_runtime_get() |
*
* This will clearly deadlock since rpm core needs to wait for
* xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
* on CPU0 which prevents CPU1 making forward progress. With the
- * annotation here and in xe_device_mem_access_get() lockdep will see
+ * annotation here and in xe_pm_runtime_get() lockdep will see
* the potential lock inversion and give us a nice splat.
*/
- lock_map_acquire(&xe_device_mem_access_lockdep_map);
+ lock_map_acquire(&xe_pm_runtime_lockdep_map);
/*
* Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
@@ -355,7 +361,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
xe_irq_suspend(xe);
out:
- lock_map_release(&xe_device_mem_access_lockdep_map);
+ lock_map_release(&xe_pm_runtime_lockdep_map);
xe_pm_write_callback_task(xe, NULL);
return err;
}
@@ -375,7 +381,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
/* Disable access_ongoing asserts and prevent recursive pm calls */
xe_pm_write_callback_task(xe, current);
- lock_map_acquire(&xe_device_mem_access_lockdep_map);
+ lock_map_acquire(&xe_pm_runtime_lockdep_map);
/*
* It can be possible that xe has allowed d3cold but other pcie devices
@@ -412,11 +418,31 @@ int xe_pm_runtime_resume(struct xe_device *xe)
goto out;
}
out:
- lock_map_release(&xe_device_mem_access_lockdep_map);
+ lock_map_release(&xe_pm_runtime_lockdep_map);
xe_pm_write_callback_task(xe, NULL);
return err;
}
+/*
+ * For places where resume is synchronous it can be quite easy to deadlock
+ * if we are not careful. Also in practice it might be quite timing
+ * sensitive to ever see the 0 -> 1 transition with the callers locks
+ * held, so deadlocks might exist but are hard for lockdep to ever see.
+ * With this in mind, help lockdep learn about the potentially scary
+ * stuff that can happen inside the runtime_resume callback by acquiring
+ * a dummy lock (it doesn't protect anything and gets compiled out on
+ * non-debug builds). Lockdep then only needs to see the
+ * xe_pm_runtime_lockdep_map -> runtime_resume callback once, and then can
+ * hopefully validate all the (callers_locks) -> xe_pm_runtime_lockdep_map.
+ * For example if the (callers_locks) are ever grabbed in the
+ * runtime_resume callback, lockdep should give us a nice splat.
+ */
+static void pm_runtime_lockdep_prime(void)
+{
+ lock_map_acquire(&xe_pm_runtime_lockdep_map);
+ lock_map_release(&xe_pm_runtime_lockdep_map);
+}
+
/**
* xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously
* @xe: xe device instance
@@ -428,6 +454,7 @@ void xe_pm_runtime_get(struct xe_device *xe)
if (xe_pm_read_callback_task(xe) == current)
return;
+ pm_runtime_lockdep_prime();
pm_runtime_resume(xe->drm.dev);
}
@@ -457,6 +484,7 @@ int xe_pm_runtime_get_ioctl(struct xe_device *xe)
if (WARN_ON(xe_pm_read_callback_task(xe) == current))
return -ELOOP;
+ pm_runtime_lockdep_prime();
return pm_runtime_get_sync(xe->drm.dev);
}
@@ -523,6 +551,7 @@ bool xe_pm_runtime_resume_and_get(struct xe_device *xe)
return true;
}
+ pm_runtime_lockdep_prime();
return pm_runtime_resume_and_get(xe->drm.dev) >= 0;
}
--
2.44.0
More information about the Intel-xe
mailing list