[PATCH 09/11] drm/amdgpu: Rework xgmi_wafl_pcs ras sw_init
Yang, Stanley
Stanley.Yang at amd.com
Mon Mar 6 07:28:20 UTC 2023
> -----Original Message-----
> From: Zhang, Hawking <Hawking.Zhang at amd.com>
> Sent: Monday, March 6, 2023 10:32 AM
> To: amd-gfx at lists.freedesktop.org; Zhou1, Tao <Tao.Zhou1 at amd.com>;
> Yang, Stanley <Stanley.Yang at amd.com>; Li, Candice <Candice.Li at amd.com>;
> Chai, Thomas <YiPeng.Chai at amd.com>
> Cc: Zhang, Hawking <Hawking.Zhang at amd.com>
> Subject: [PATCH 09/11] drm/amdgpu: Rework xgmi_wafl_pcs ras sw_init
>
> To align with other IP blocks.
>
> Signed-off-by: Hawking Zhang <Hawking.Zhang at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 9 +++++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 28
> +++++++++++++++++++-----
> drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 1 +
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 ++++++
> 4 files changed, 37 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 524e2c9b3012..d4685d22be60 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -500,9 +500,12 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device
> *adev)
>
> /* xgmi ras block */
> if (amdgpu_ras_is_supported(adev,
> AMDGPU_RAS_BLOCK__XGMI_WAFL)) {
> - adev->gmc.xgmi.ras = &xgmi_ras;
> - amdgpu_ras_register_ras_block(adev, &adev-
> >gmc.xgmi.ras->ras_block);
> - adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras-
> >ras_block.ras_comm;
> + r = amdgpu_xgmi_ras_sw_init(adev);
> + if (r) {
> + dev_err(adev->dev, "Failed to initialize
> xgmi_wafl_pcs ras block!\n");
> + return r;
> + }
> +
> }
>
> return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index fef1575cd0cf..3fe24348d199 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -1048,12 +1048,30 @@ struct amdgpu_ras_block_hw_ops
> xgmi_ras_hw_ops = {
>
> struct amdgpu_xgmi_ras xgmi_ras = {
> .ras_block = {
> - .ras_comm = {
> - .name = "xgmi_wafl",
> - .block = AMDGPU_RAS_BLOCK__XGMI_WAFL,
> - .type =
> AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
> - },
> .hw_ops = &xgmi_ras_hw_ops,
> .ras_late_init = amdgpu_xgmi_ras_late_init,
> },
> };
> +
> +int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev) {
> + int err;
> + struct amdgpu_xgmi_ras *ras;
> +
> + if (!adev->gmc.xgmi.ras)
> + return 0;
> +
> + ras = adev->gmc.xgmi.ras;
> + err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
> + if (err) {
> + dev_err(adev->dev, "Failed to register xgmi_wafl_pcs ras
> block!\n");
> + return err;
> + }
> +
> + strcpy(ras->ras_block.ras_comm.name, "xgmi_wafl_pcs");
> + ras->ras_block.ras_comm.block =
> AMDGPU_RAS_BLOCK__XGMI_WAFL;
> + ras->ras_block.ras_comm.type =
> AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
> + adev->gmc.xgmi.ras_if = &ras->ras_block.ras_comm;
> +
> + return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> index 30dcc1681b4e..86fbf56938f4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> @@ -73,5 +73,6 @@ static inline bool amdgpu_xgmi_same_hive(struct
> amdgpu_device *adev,
> adev->gmc.xgmi.hive_id &&
> adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id); }
> +int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev);
>
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 67c2a5186b8a..2a8dc9b52c2d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -1381,6 +1381,12 @@ static void gmc_v9_0_set_mca_ras_funcs(struct
> amdgpu_device *adev)
> }
> }
>
> +static void gmc_v9_0_set_xgmi_ras_funcs(struct amdgpu_device *adev) {
> + if (!adev->gmc.xgmi.connected_to_cpu)
[Stanley]: Can we use if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__ XGMI_WAFL)) instead of if (!adev->gmc.xgmi.connected_to_cpu)
to keep the ip ras judgment uniform.
Regards,
Stanley
> + adev->gmc.xgmi.ras = &xgmi_ras;
> +}
> +
> static int gmc_v9_0_early_init(void *handle) {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -1404,6 +1410,7 @@ static int gmc_v9_0_early_init(void *handle)
> gmc_v9_0_set_gfxhub_funcs(adev);
> gmc_v9_0_set_hdp_ras_funcs(adev);
> gmc_v9_0_set_mca_ras_funcs(adev);
> + gmc_v9_0_set_xgmi_ras_funcs(adev);
>
> adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
> adev->gmc.shared_aperture_end =
> --
> 2.17.1
More information about the amd-gfx
mailing list