[PATCH 2/2] drm/amdgpu: schedule fault recover work to another CPU

Philip Yang Philip.Yang at amd.com
Mon May 30 14:57:01 UTC 2022


Flooding GPU vm fault interrupt may hold CPU and delay the scheduled
recover work to handle vm fault, schedule recover work to another CPU of
same NUMA node.

Signed-off-by: Philip Yang <Philip.Yang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 13 +++++++++++--
 drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 13 +++++++++++--
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index cdd599a08125..3a47107737a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -435,12 +435,21 @@ static int vega10_ih_self_irq(struct amdgpu_device *adev,
 			      struct amdgpu_irq_src *source,
 			      struct amdgpu_iv_entry *entry)
 {
+	int cpu, new_cpu;
+
+	cpu = new_cpu = smp_processor_id();
+	do {
+		new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
+		if (cpu_to_node(new_cpu) == numa_node_id())
+			break;
+	} while (cpu != new_cpu);
+
 	switch (entry->ring_id) {
 	case 1:
-		schedule_work(&adev->irq.ih1_work);
+		schedule_work_on(new_cpu, &adev->irq.ih1_work);
 		break;
 	case 2:
-		schedule_work(&adev->irq.ih2_work);
+		schedule_work_on(new_cpu, &adev->irq.ih2_work);
 		break;
 	default: break;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index 3b4eb8285943..a9465f0d8fbe 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -487,12 +487,21 @@ static int vega20_ih_self_irq(struct amdgpu_device *adev,
 			      struct amdgpu_irq_src *source,
 			      struct amdgpu_iv_entry *entry)
 {
+	int cpu, new_cpu;
+
+	cpu = new_cpu = smp_processor_id();
+	do {
+		new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
+		if (cpu_to_node(new_cpu) == numa_node_id())
+			break;
+	} while (cpu != new_cpu);
+
 	switch (entry->ring_id) {
 	case 1:
-		schedule_work(&adev->irq.ih1_work);
+		schedule_work_on(new_cpu, &adev->irq.ih1_work);
 		break;
 	case 2:
-		schedule_work(&adev->irq.ih2_work);
+		schedule_work_on(new_cpu, &adev->irq.ih2_work);
 		break;
 	default: break;
 	}
-- 
2.35.1



More information about the amd-gfx mailing list