[PATCH 2/4] drm/amdgpu: add ras POSION_CREATION event id support

Zhou1, Tao Tao.Zhou1 at amd.com
Wed Jul 3 08:22:29 UTC 2024


[AMD Official Use Only - AMD Internal Distribution Only]

> -----Original Message-----
> From: Wang, Yang(Kevin) <KevinYang.Wang at amd.com>
> Sent: Wednesday, July 3, 2024 1:52 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Zhang, Hawking <Hawking.Zhang at amd.com>; Zhou1, Tao
> <Tao.Zhou1 at amd.com>
> Subject: [PATCH 2/4] drm/amdgpu: add ras POSION_CREATION event id support
>
> add amdgpu ras POSION_CREATION event id support.
>
> Signed-off-by: Yang Wang <kevinyang.wang at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 17 ++++++++++++++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  1 +
>  2 files changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 45ac82a34d49..8a98611d2353 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2116,8 +2116,17 @@ static void
> amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
> static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager
> *obj,
>                               struct amdgpu_iv_entry *entry)
>  {
> -     dev_info(obj->adev->dev,
> -             "Poison is created\n");
> +     struct amdgpu_device *adev = obj->adev;
> +     enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION;
> +     u64 event_id;
> +     int ret;
> +
> +     ret = amdgpu_ras_mark_ras_event(adev, type);
> +     if (ret)

[Tao] do we need to add warning message here?

> +             return;
> +
> +     event_id = amdgpu_ras_acquire_event_id(adev, type);
> +     RAS_EVENT_LOG(adev, event_id, "Poison is created\n");
>
>       if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0,
> 0)) {
>               struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev);
> @@ -2889,6 +2898,7 @@ static int amdgpu_ras_poison_creation_handler(struct
> amdgpu_device *adev,
>       uint32_t new_detect_count, total_detect_count;
>       uint32_t need_query_count = poison_creation_count;
>       bool query_data_timeout = false;
> +     enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION;
>
>       memset(&info, 0, sizeof(info));
>       info.head.block = AMDGPU_RAS_BLOCK__UMC; @@ -2896,7 +2906,7
> @@ static int amdgpu_ras_poison_creation_handler(struct amdgpu_device
> *adev,
>       ecc_log = &ras->umc_ecc_log;
>       total_detect_count = 0;
>       do {
> -             ret = amdgpu_ras_query_error_status(adev, &info);
> +             ret = amdgpu_ras_query_error_status_with_event(adev, &info,
> type);
>               if (ret)
>                       return ret;
>
> @@ -3964,6 +3974,7 @@ u64 amdgpu_ras_acquire_event_id(struct
> amdgpu_device *adev, enum ras_event_type
>
>       switch (type) {
>       case RAS_EVENT_TYPE_ISR:
> +     case RAS_EVENT_TYPE_POISON_CREATION:
>               event_mgr = __get_ras_event_mgr(adev);
>               if (!event_mgr)
>                       return RAS_EVENT_INVALID_ID;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> index 88df4be5d122..1343cfbc913b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> @@ -432,6 +432,7 @@ struct umc_ecc_info {  enum ras_event_type {
>       RAS_EVENT_TYPE_INVALID = 0,
>       RAS_EVENT_TYPE_ISR,
> +     RAS_EVENT_TYPE_POISON_CREATION,
>       RAS_EVENT_TYPE_COUNT,
>  };
>
> --
> 2.34.1



More information about the amd-gfx mailing list