[PATCH 2/4] drm/amdgpu: add ras POSION_CREATION event id support
Zhou1, Tao
Tao.Zhou1 at amd.com
Wed Jul 3 08:22:29 UTC 2024
[AMD Official Use Only - AMD Internal Distribution Only]
> -----Original Message-----
> From: Wang, Yang(Kevin) <KevinYang.Wang at amd.com>
> Sent: Wednesday, July 3, 2024 1:52 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Zhang, Hawking <Hawking.Zhang at amd.com>; Zhou1, Tao
> <Tao.Zhou1 at amd.com>
> Subject: [PATCH 2/4] drm/amdgpu: add ras POSION_CREATION event id support
>
> add amdgpu ras POSION_CREATION event id support.
>
> Signed-off-by: Yang Wang <kevinyang.wang at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 17 ++++++++++++++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 +
> 2 files changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 45ac82a34d49..8a98611d2353 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2116,8 +2116,17 @@ static void
> amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
> static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager
> *obj,
> struct amdgpu_iv_entry *entry)
> {
> - dev_info(obj->adev->dev,
> - "Poison is created\n");
> + struct amdgpu_device *adev = obj->adev;
> + enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION;
> + u64 event_id;
> + int ret;
> +
> + ret = amdgpu_ras_mark_ras_event(adev, type);
> + if (ret)
[Tao] do we need to add warning message here?
> + return;
> +
> + event_id = amdgpu_ras_acquire_event_id(adev, type);
> + RAS_EVENT_LOG(adev, event_id, "Poison is created\n");
>
> if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0,
> 0)) {
> struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev);
> @@ -2889,6 +2898,7 @@ static int amdgpu_ras_poison_creation_handler(struct
> amdgpu_device *adev,
> uint32_t new_detect_count, total_detect_count;
> uint32_t need_query_count = poison_creation_count;
> bool query_data_timeout = false;
> + enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION;
>
> memset(&info, 0, sizeof(info));
> info.head.block = AMDGPU_RAS_BLOCK__UMC; @@ -2896,7 +2906,7
> @@ static int amdgpu_ras_poison_creation_handler(struct amdgpu_device
> *adev,
> ecc_log = &ras->umc_ecc_log;
> total_detect_count = 0;
> do {
> - ret = amdgpu_ras_query_error_status(adev, &info);
> + ret = amdgpu_ras_query_error_status_with_event(adev, &info,
> type);
> if (ret)
> return ret;
>
> @@ -3964,6 +3974,7 @@ u64 amdgpu_ras_acquire_event_id(struct
> amdgpu_device *adev, enum ras_event_type
>
> switch (type) {
> case RAS_EVENT_TYPE_ISR:
> + case RAS_EVENT_TYPE_POISON_CREATION:
> event_mgr = __get_ras_event_mgr(adev);
> if (!event_mgr)
> return RAS_EVENT_INVALID_ID;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> index 88df4be5d122..1343cfbc913b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> @@ -432,6 +432,7 @@ struct umc_ecc_info { enum ras_event_type {
> RAS_EVENT_TYPE_INVALID = 0,
> RAS_EVENT_TYPE_ISR,
> + RAS_EVENT_TYPE_POISON_CREATION,
> RAS_EVENT_TYPE_COUNT,
> };
>
> --
> 2.34.1
More information about the amd-gfx
mailing list