[RFC PATCH 2/2] drm/amdgpu: protect ring from concurrency access

xinhui pan xinhui.pan at amd.com
Mon Sep 13 05:55:21 UTC 2021


Park the ring scheduler thread before accessing the ring.
And unpark it only when we finish accessing the ring.

The right sequence should be like below.
lock ring
park ring thread
direct access ring
[unlock ring, do something, lock ring]
unpark ring thread
unlock ring

Signed-off-by: xinhui pan <xinhui.pan at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 30 ++++++---------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c      |  6 +++++
 2 files changed, 14 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 19323b4cce7b..5138d5b52c9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1349,7 +1349,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
 	struct drm_device *dev = adev_to_drm(adev);
-	int r = 0, i;
+	int r = 0;
 
 	r = pm_runtime_get_sync(dev->dev);
 	if (r < 0) {
@@ -1362,15 +1362,6 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
 	if (r)
 		return r;
 
-	/* hold on the scheduler */
-	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
-		struct amdgpu_ring *ring = adev->rings[i];
-
-		if (!ring || !ring->sched.thread)
-			continue;
-		kthread_park(ring->sched.thread);
-	}
-
 	seq_printf(m, "run ib test:\n");
 	r = amdgpu_ib_ring_tests(adev);
 	if (r)
@@ -1378,15 +1369,6 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
 	else
 		seq_printf(m, "ib ring tests passed.\n");
 
-	/* go on the scheduler */
-	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
-		struct amdgpu_ring *ring = adev->rings[i];
-
-		if (!ring || !ring->sched.thread)
-			continue;
-		kthread_unpark(ring->sched.thread);
-	}
-
 	up_read(&adev->reset_sem);
 
 	pm_runtime_mark_last_busy(dev->dev);
@@ -1650,13 +1632,14 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
 	if (r)
 		goto pro_end;
 
-	/* stop the scheduler */
-	kthread_park(ring->sched.thread);
-
 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
 
 	/* preempt the IB */
+	mutex_lock(&ring->lock);
+	/* stop the scheduler */
+	kthread_park(ring->sched.thread);
 	r = amdgpu_ring_preempt_ib(ring);
+	mutex_unlock(&ring->lock);
 	if (r) {
 		DRM_WARN("failed to preempt ring %d\n", ring->idx);
 		goto failure;
@@ -1686,8 +1669,11 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
 	}
 
 failure:
+	/* make sure thread is not unparked accidently. */
+	mutex_lock(&ring->lock);
 	/* restart the scheduler */
 	kthread_unpark(ring->sched.thread);
+	mutex_unlock(&ring->lock);
 
 	up_read(&adev->reset_sem);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 9274f32c3661..ea83b3aef8fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -402,7 +402,13 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 		else
 			tmo = tmo_gfx;
 
+		mutex_lock(&ring->lock);
+		if (ring->sched.thread)
+			kthread_park(ring->sched.thread);
 		r = amdgpu_ring_test_ib(ring, tmo);
+		if (ring->sched.thread)
+			kthread_unpark(ring->sched.thread);
+		mutex_unlock(&ring->lock);
 		if (!r) {
 			DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n",
 				      ring->name);
-- 
2.25.1



More information about the amd-gfx mailing list