[PATCH 2/5] drm/amdgpu: add threshold to interrupt waiting for DE data to be ready
YiPeng Chai
YiPeng.Chai at amd.com
Thu Jun 13 02:25:01 UTC 2024
If the number of messages to be processed in the fifo exceeds
the threshold, it will not continue to wait for the DE data
to be ready.
Signed-off-by: YiPeng Chai <YiPeng.Chai at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 13 +++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 4 +++-
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index ed260966363f..1e6e06009577 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -124,6 +124,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block)
#define AMDGPU_RAS_RETIRE_PAGE_INTERVAL 100 //ms
+#define RAS_POISON_FIFO_MSG_PENDING_THRESHOLD (AMDGPU_RAS_POISON_FIFO_SIZE/4)
+
enum amdgpu_ras_retire_page_reservation {
AMDGPU_RAS_RETIRE_PAGE_RESERVED,
AMDGPU_RAS_RETIRE_PAGE_PENDING,
@@ -2832,6 +2834,7 @@ static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
struct ras_query_if info;
uint32_t timeout = timeout_ms;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ bool trigger_threshold = false;
memset(&info, 0, sizeof(info));
info.head.block = AMDGPU_RAS_BLOCK__UMC;
@@ -2845,6 +2848,12 @@ static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
return;
}
+ if (atomic_read(&ras->page_retirement_req_cnt) >
+ RAS_POISON_FIFO_MSG_PENDING_THRESHOLD) {
+ trigger_threshold = true;
+ break;
+ }
+
if (timeout && !ecc_log->de_updated) {
msleep(1);
timeout--;
@@ -2856,6 +2865,10 @@ static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
return;
}
+ if (trigger_threshold)
+ dev_dbg(adev->dev, "Waiting for deferred data %d ms, pending msg:%d\n",
+ timeout_ms - timeout, atomic_read(&ras->page_retirement_req_cnt));
+
if (!ret)
schedule_delayed_work(&ras->page_retirement_dwork, 0);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index e70c45712ddb..103436bb650e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -36,6 +36,8 @@
struct amdgpu_iv_entry;
+#define AMDGPU_RAS_POISON_FIFO_SIZE 128
+
#define AMDGPU_RAS_GPU_ERR_MEM_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 0, 0)
#define AMDGPU_RAS_GPU_ERR_FW_LOAD(x) AMDGPU_GET_REG_FIELD(x, 1, 1)
#define AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 2, 2)
@@ -532,7 +534,7 @@ struct amdgpu_ras {
struct mutex page_retirement_lock;
atomic_t page_retirement_req_cnt;
struct mutex page_rsv_lock;
- DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128);
+ DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, AMDGPU_RAS_POISON_FIFO_SIZE);
struct ras_ecc_log_info umc_ecc_log;
struct delayed_work page_retirement_dwork;
--
2.34.1
More information about the amd-gfx
mailing list