[PATCH 3/5] drm/amdgpu: clear all messages reset flags in fifo before gpu reset

YiPeng Chai YiPeng.Chai at amd.com
Thu Jun 13 02:25:02 UTC 2024


To avoid resetting the gpu repeatedly, clear all
message reset flags in the fifo before the first
gpu reset.

Signed-off-by: YiPeng Chai <YiPeng.Chai at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 59 ++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1e6e06009577..7dfb2e548d70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2768,6 +2768,49 @@ static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev,
 
 	return kfifo_get(&con->poison_fifo, poison_msg);
 }
+
+static void amdgpu_ras_clear_poison_fifo_msg_reset_flag(struct amdgpu_device *adev,
+			uint32_t *cached_reset)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_poison_msg msg;
+	uint32_t cached_msg_count;
+	uint32_t reset = 0;
+	int i, ret;
+
+	cached_msg_count = kfifo_len(&con->poison_fifo);
+
+	for (i = 0; i < cached_msg_count; i++) {
+		ret = kfifo_get(&con->poison_fifo, &msg);
+		if (!ret)
+			continue;
+
+		if (msg.block != AMDGPU_RAS_BLOCK__UMC) {
+			reset |= msg.reset;
+
+			/* Clear reset flag */
+			msg.reset = 0;
+		}
+
+		/* add message back to fifo */
+		ret = kfifo_put(&con->poison_fifo, msg);
+		if (!ret)
+			dev_info(adev->dev, "Poison fifo drop message!\n");
+	}
+	*cached_reset = reset;
+}
+
+static void amdgpu_ras_clear_poison_fifo(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_poison_msg msg;
+	int ret;
+
+	do {
+		ret = kfifo_get(&con->poison_fifo, &msg);
+	} while (ret);
+
+}
 #endif
 
 #ifdef HAVE_RADIX_TREE_ITER_DELETE
@@ -2886,9 +2929,23 @@ static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
 		poison_msg->pasid_fn(adev, pasid, poison_msg->data);
 
 	if (reset) {
+		uint32_t fifo_cached_reset = 0;
+
 		flush_delayed_work(&con->page_retirement_dwork);
 
-		con->gpu_reset_flags |= reset;
+		amdgpu_ras_clear_poison_fifo_msg_reset_flag(adev, &fifo_cached_reset);
+
+		reset |= fifo_cached_reset;
+
+		if (reset & AMDGPU_RAS_GPU_RESET_MODE1_RESET) {
+			con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+			amdgpu_ras_clear_poison_fifo(adev);
+		} else if (reset & AMDGPU_RAS_GPU_RESET_MODE2_RESET) {
+			con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+		} else {
+			con->gpu_reset_flags |= reset;
+		}
+
 		amdgpu_ras_reset_gpu(adev);
 	}
 
-- 
2.34.1



More information about the amd-gfx mailing list