[PATCH 3/5] drm/amdgpu: clear all messages reset flags in fifo before gpu reset
YiPeng Chai
YiPeng.Chai at amd.com
Thu Jun 13 02:25:02 UTC 2024
To avoid resetting the gpu repeatedly, clear all
message reset flags in the fifo before the first
gpu reset.
Signed-off-by: YiPeng Chai <YiPeng.Chai at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 59 ++++++++++++++++++++++++-
1 file changed, 58 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1e6e06009577..7dfb2e548d70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2768,6 +2768,49 @@ static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev,
return kfifo_get(&con->poison_fifo, poison_msg);
}
+
+static void amdgpu_ras_clear_poison_fifo_msg_reset_flag(struct amdgpu_device *adev,
+ uint32_t *cached_reset)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_poison_msg msg;
+ uint32_t cached_msg_count;
+ uint32_t reset = 0;
+ int i, ret;
+
+ cached_msg_count = kfifo_len(&con->poison_fifo);
+
+ for (i = 0; i < cached_msg_count; i++) {
+ ret = kfifo_get(&con->poison_fifo, &msg);
+ if (!ret)
+ continue;
+
+ if (msg.block != AMDGPU_RAS_BLOCK__UMC) {
+ reset |= msg.reset;
+
+ /* Clear reset flag */
+ msg.reset = 0;
+ }
+
+ /* add message back to fifo */
+ ret = kfifo_put(&con->poison_fifo, msg);
+ if (!ret)
+ dev_info(adev->dev, "Poison fifo drop message!\n");
+ }
+ *cached_reset = reset;
+}
+
+static void amdgpu_ras_clear_poison_fifo(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_poison_msg msg;
+ int ret;
+
+ do {
+ ret = kfifo_get(&con->poison_fifo, &msg);
+ } while (ret);
+
+}
#endif
#ifdef HAVE_RADIX_TREE_ITER_DELETE
@@ -2886,9 +2929,23 @@ static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
poison_msg->pasid_fn(adev, pasid, poison_msg->data);
if (reset) {
+ uint32_t fifo_cached_reset = 0;
+
flush_delayed_work(&con->page_retirement_dwork);
- con->gpu_reset_flags |= reset;
+ amdgpu_ras_clear_poison_fifo_msg_reset_flag(adev, &fifo_cached_reset);
+
+ reset |= fifo_cached_reset;
+
+ if (reset & AMDGPU_RAS_GPU_RESET_MODE1_RESET) {
+ con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ amdgpu_ras_clear_poison_fifo(adev);
+ } else if (reset & AMDGPU_RAS_GPU_RESET_MODE2_RESET) {
+ con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else {
+ con->gpu_reset_flags |= reset;
+ }
+
amdgpu_ras_reset_gpu(adev);
}
--
2.34.1
More information about the amd-gfx
mailing list