[PATCH V2 11/11] drm/amdgpu: Move error inject function from amdgpu_ras.c to each block
Zhou1, Tao
Tao.Zhou1 at amd.com
Mon Dec 6 07:33:44 UTC 2021
[AMD Official Use Only]
The error injection has no difference among RAS blocks except GFX and XGMI.
I agree to move the xgmi error injection to amdgpu_xgmi.c, but I don't think it's necessary to implement specific error injection functions for all other RAS blocks.
Regards,
Tao
> -----Original Message-----
> From: Chai, Thomas <YiPeng.Chai at amd.com>
> Sent: Wednesday, December 1, 2021 6:53 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Chai, Thomas <YiPeng.Chai at amd.com>; Zhang, Hawking
> <Hawking.Zhang at amd.com>; Zhou1, Tao <Tao.Zhou1 at amd.com>; Chai,
> Thomas <YiPeng.Chai at amd.com>
> Subject: [PATCH V2 11/11] drm/amdgpu: Move error inject function from
> amdgpu_ras.c to each block
>
> Move each block error inject function from amdgpu_ras.c to each block.
>
> Signed-off-by: yipechai <YiPeng.Chai at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 62 +++++-------------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 28 +++++++++++
> drivers/gpu/drm/amd/amdgpu/mca_v3_0.c | 18 +++++++
> drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 16 ++++++
> drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c | 16 ++++++
> drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 16 ++++++
> drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 16 ++++++
> drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 16 ++++++
> drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c | 16 ++++++
> drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 16 ++++++
> drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 16 ++++++
> drivers/gpu/drm/amd/amdgpu/umc_v8_7.c | 16 ++++++
> 12 files changed, 201 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 2e38bd3d3d45..87b625d305c9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -1032,31 +1032,7 @@ int amdgpu_ras_reset_error_status(struct
> amdgpu_device *adev,
> return 0;
> }
>
> -/* Trigger XGMI/WAFL error */
> -static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
> - struct ta_ras_trigger_error_input *block_info)
> -{
> - int ret;
> -
> - if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
> - dev_warn(adev->dev, "Failed to disallow df cstate");
>
> - if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
> - dev_warn(adev->dev, "Failed to disallow XGMI power down");
> -
> - ret = psp_ras_trigger_error(&adev->psp, block_info);
> -
> - if (amdgpu_ras_intr_triggered())
> - return ret;
> -
> - if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
> - dev_warn(adev->dev, "Failed to allow XGMI power down");
> -
> - if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
> - dev_warn(adev->dev, "Failed to allow df cstate");
> -
> - return ret;
> -}
>
> /* wrapper of psp_ras_trigger_error */
> int amdgpu_ras_error_inject(struct amdgpu_device *adev, @@ -1076,41
> +1052,25 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
> if (!obj)
> return -EINVAL;
>
> + if (!block_obj || !block_obj->ops) {
> + dev_info(adev->dev, "%s don't config ras function \n",
> get_ras_block_str(&info->head));
> + return -EINVAL;
> + }
> +
> /* Calculate XGMI relative offset */
> if (adev->gmc.xgmi.num_physical_nodes > 1) {
> - block_info.address =
> - amdgpu_xgmi_get_relative_phy_addr(adev,
> - block_info.address);
> + block_info.address =
> amdgpu_xgmi_get_relative_phy_addr(adev,
> +block_info.address);
> }
>
> - switch (info->head.block) {
> - case AMDGPU_RAS_BLOCK__GFX:
> - if (!block_obj || !block_obj->ops) {
> - dev_info(adev->dev, "%s don't config ras function \n",
> get_ras_block_str(&info->head));
> - return -EINVAL;
> - }
> - if (block_obj->ops->ras_error_inject)
> + if (block_obj->ops->ras_error_inject) {
> + if(info->head.block == AMDGPU_RAS_BLOCK__GFX)
> ret = block_obj->ops->ras_error_inject(adev, info);
> - break;
> - case AMDGPU_RAS_BLOCK__UMC:
> - case AMDGPU_RAS_BLOCK__SDMA:
> - case AMDGPU_RAS_BLOCK__MMHUB:
> - case AMDGPU_RAS_BLOCK__PCIE_BIF:
> - case AMDGPU_RAS_BLOCK__MCA:
> - ret = psp_ras_trigger_error(&adev->psp, &block_info);
> - break;
> - case AMDGPU_RAS_BLOCK__XGMI_WAFL:
> - ret = amdgpu_ras_error_inject_xgmi(adev, &block_info);
> - break;
> - default:
> - dev_info(adev->dev, "%s error injection is not supported yet\n",
> - get_ras_block_str(&info->head));
> - ret = -EINVAL;
> + else
> + ret = block_obj->ops->ras_error_inject(adev,
> &block_info);
> }
>
> if (ret)
> - dev_err(adev->dev, "ras inject %s failed %d\n",
> - get_ras_block_str(&info->head), ret);
> + dev_err(adev->dev, "ras inject %s failed %d\n",
> +get_ras_block_str(&info->head), ret);
>
> return ret;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index da541c7b1ec2..298742afba99 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -940,6 +940,33 @@ static void
> amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
> err_data->ce_count += ce_cnt;
> }
>
> +/* Trigger XGMI/WAFL error */
> +static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
> + void *inject_if) {
> + int ret = 0;;
> + struct ta_ras_trigger_error_input *block_info = (struct
> +ta_ras_trigger_error_input *)inject_if;
> +
> + if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
> + dev_warn(adev->dev, "Failed to disallow df cstate");
> +
> + if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
> + dev_warn(adev->dev, "Failed to disallow XGMI power
> + down");
> +
> + ret = psp_ras_trigger_error(&adev->psp, block_info);
> +
> + if (amdgpu_ras_intr_triggered())
> + return ret;
> +
> + if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
> + dev_warn(adev->dev, "Failed to allow XGMI power down");
> +
> + if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
> + dev_warn(adev->dev, "Failed to allow df cstate");
> +
> + return ret;
> +}
> +
> static int amdgpu_xgmi_ras_block_match(struct amdgpu_ras_block_object*
> block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index) {
> if(!block_obj)
> @@ -958,6 +985,7 @@ struct amdgpu_ras_block_ops xgmi_ras_ops = {
> .ras_fini = amdgpu_xgmi_ras_fini,
> .query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
> .reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
> + .ras_error_inject = amdgpu_ras_error_inject_xgmi,
> };
>
> struct amdgpu_xgmi_ras xgmi_ras = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
> b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
> index 99edc75ed4ec..ce6841967b05 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
> @@ -60,12 +60,28 @@ static int mca_v3_0_ras_block_match(struct
> amdgpu_ras_block_object* block_obj, e
> return -EINVAL;
> }
>
> +static int mca_v3_0_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> + int ret = 0;
> + if (!adev || !inject_if) {
> + dev_err(adev->dev, "%s invaild parameters \n", __func__);
> + return -EINVAL;
> + }
> +
> + mutex_lock(&adev->grbm_idx_mutex);
> + ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> + mutex_unlock(&adev->grbm_idx_mutex);
> +
> + return ret;
> +}
> +
> const struct amdgpu_ras_block_ops mca_v3_0_mp0_ops = {
> .ras_block_match = mca_v3_0_ras_block_match,
> .ras_late_init = mca_v3_0_mp0_ras_late_init,
> .ras_fini = mca_v3_0_mp0_ras_fini,
> .query_ras_error_count = mca_v3_0_mp0_query_ras_error_count,
> .query_ras_error_address = NULL,
> + .ras_error_inject = mca_v3_0_ras_error_inject,
> };
>
> struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = { @@ -101,6 +117,7 @@
> const struct amdgpu_ras_block_ops mca_v3_0_mp1_ops = {
> .ras_fini = mca_v3_0_mp1_ras_fini,
> .query_ras_error_count = mca_v3_0_mp1_query_ras_error_count,
> .query_ras_error_address = NULL,
> + .ras_error_inject = mca_v3_0_ras_error_inject,
> };
>
> struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = { @@ -136,6 +153,7 @@
> const struct amdgpu_ras_block_ops mca_v3_0_mpio_ops = {
> .ras_fini = mca_v3_0_mpio_ras_fini,
> .query_ras_error_count = mca_v3_0_mpio_query_ras_error_count,
> .query_ras_error_address = NULL,
> + .ras_error_inject = mca_v3_0_ras_error_inject,
> };
>
> struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = { diff --git
> a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> index da505314802a..7cca86c504e6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> @@ -786,12 +786,28 @@ static int mmhub_v1_0_ras_block_match(struct
> amdgpu_ras_block_object* block_obj,
> return -EINVAL;
> }
>
> +static int mmhub_v1_0_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> + int ret = 0;
> + if (!adev || !inject_if) {
> + dev_err(adev->dev, "%s invaild parameters \n", __func__);
> + return -EINVAL;
> + }
> +
> + mutex_lock(&adev->grbm_idx_mutex);
> + ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> + mutex_unlock(&adev->grbm_idx_mutex);
> +
> + return ret;
> +}
> +
> struct amdgpu_ras_block_ops mmhub_v1_0_ras_ops = {
> .ras_block_match = mmhub_v1_0_ras_block_match,
> .ras_late_init = amdgpu_mmhub_ras_late_init,
> .ras_fini = amdgpu_mmhub_ras_fini,
> .query_ras_error_count = mmhub_v1_0_query_ras_error_count,
> .reset_ras_error_count = mmhub_v1_0_reset_ras_error_count,
> + .ras_error_inject = mmhub_v1_0_ras_error_inject,
> };
>
> struct amdgpu_mmhub_ras mmhub_v1_0_ras = { diff --git
> a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
> index 829d14ee87d3..79a9995caef1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
> @@ -1333,6 +1333,21 @@ static int mmhub_v1_7_ras_block_match(struct
> amdgpu_ras_block_object* block_obj,
> return -EINVAL;
> }
>
> +static int mmhub_v1_7_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> + int ret = 0;
> + if (!adev || !inject_if) {
> + dev_err(adev->dev, "%s invaild parameters \n", __func__);
> + return -EINVAL;
> + }
> +
> + mutex_lock(&adev->grbm_idx_mutex);
> + ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> + mutex_unlock(&adev->grbm_idx_mutex);
> +
> + return ret;
> +}
> +
> struct amdgpu_ras_block_ops mmhub_v1_7_ras_ops = {
> .ras_block_match = mmhub_v1_7_ras_block_match,
> .ras_late_init = amdgpu_mmhub_ras_late_init, @@ -1341,6 +1356,7
> @@ struct amdgpu_ras_block_ops mmhub_v1_7_ras_ops = {
> .reset_ras_error_count = mmhub_v1_7_reset_ras_error_count,
> .query_ras_error_status = mmhub_v1_7_query_ras_error_status,
> .reset_ras_error_status = mmhub_v1_7_reset_ras_error_status,
> + .ras_error_inject = mmhub_v1_7_ras_error_inject,
> };
>
> struct amdgpu_mmhub_ras mmhub_v1_7_ras = { diff --git
> a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
> b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
> index 1edc98e5bcbb..eaed556b9551 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
> @@ -1667,6 +1667,21 @@ static int mmhub_v9_4_ras_block_match(struct
> amdgpu_ras_block_object* block_obj,
> return -EINVAL;
> }
>
> +static int mmhub_v9_4_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> + int ret = 0;
> + if (!adev || !inject_if) {
> + dev_err(adev->dev, "%s invaild parameters \n", __func__);
> + return -EINVAL;
> + }
> +
> + mutex_lock(&adev->grbm_idx_mutex);
> + ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> + mutex_unlock(&adev->grbm_idx_mutex);
> +
> + return ret;
> +}
> +
> const struct amdgpu_ras_block_ops mmhub_v9_4_ras_ops = {
> .ras_block_match = mmhub_v9_4_ras_block_match,
> .ras_late_init = amdgpu_mmhub_ras_late_init, @@ -1674,6 +1689,7
> @@ const struct amdgpu_ras_block_ops mmhub_v9_4_ras_ops = {
> .query_ras_error_count = mmhub_v9_4_query_ras_error_count,
> .reset_ras_error_count = mmhub_v9_4_reset_ras_error_count,
> .query_ras_error_status = mmhub_v9_4_query_ras_error_status,
> + .ras_error_inject = mmhub_v9_4_ras_error_inject,
> };
>
> struct amdgpu_mmhub_ras mmhub_v9_4_ras = { diff --git
> a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> index 14f7265d954e..8e62e2ffabe5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> @@ -650,11 +650,27 @@ static int nbio_v7_4_ras_block_match(struct
> amdgpu_ras_block_object* block_obj,
> return -EINVAL;
> }
>
> +static int nbio_v7_4_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> + int ret = 0;
> + if (!adev || !inject_if) {
> + dev_err(adev->dev, "%s invaild parameters \n", __func__);
> + return -EINVAL;
> + }
> +
> + mutex_lock(&adev->grbm_idx_mutex);
> + ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> + mutex_unlock(&adev->grbm_idx_mutex);
> +
> + return ret;
> +}
> +
> const struct amdgpu_ras_block_ops nbio_v7_4_ras_ops = {
> .ras_block_match = nbio_v7_4_ras_block_match,
> .query_ras_error_count = nbio_v7_4_query_ras_error_count,
> .ras_late_init = amdgpu_nbio_ras_late_init,
> .ras_fini = amdgpu_nbio_ras_fini,
> + .ras_error_inject = nbio_v7_4_ras_error_inject,
> };
>
> struct amdgpu_nbio_ras nbio_v7_4_ras = { diff --git
> a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 30a651613776..578ee40cc0d1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -2803,11 +2803,27 @@ static int sdma_v4_0_ras_block_match(struct
> amdgpu_ras_block_object* block_obj,
> return -EINVAL;
> }
>
> +static int sdma_v4_0_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> + int ret = 0;
> + if (!adev || !inject_if) {
> + dev_err(adev->dev, "%s invaild parameters \n", __func__);
> + return -EINVAL;
> + }
> +
> + mutex_lock(&adev->grbm_idx_mutex);
> + ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> + mutex_unlock(&adev->grbm_idx_mutex);
> +
> + return ret;
> +}
> +
> const struct amdgpu_ras_block_ops sdma_v4_0_ras_ops = {
> .ras_block_match = sdma_v4_0_ras_block_match,
> .ras_fini = amdgpu_sdma_ras_fini,
> .query_ras_error_count = sdma_v4_0_query_ras_error_count,
> .reset_ras_error_count = sdma_v4_0_reset_ras_error_count,
> + .ras_error_inject = sdma_v4_0_ras_error_inject,
> };
>
> static struct amdgpu_sdma_ras sdma_v4_0_ras = { diff --git
> a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
> index 8c165bcb0ffa..0656c6a7a2c1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
> @@ -270,11 +270,27 @@ static int sdma_v4_4_ras_block_match(struct
> amdgpu_ras_block_object* block_obj,
> return -EINVAL;
> }
>
> +static int sdma_v4_4_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> + int ret = 0;
> + if (!adev || !inject_if) {
> + dev_err(adev->dev, "%s invaild parameters \n", __func__);
> + return -EINVAL;
> + }
> +
> + mutex_lock(&adev->grbm_idx_mutex);
> + ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> + mutex_unlock(&adev->grbm_idx_mutex);
> +
> + return ret;
> +}
> +
> const struct amdgpu_ras_block_ops sdma_v4_4_ras_ops = {
> .ras_block_match = sdma_v4_4_ras_block_match,
> .ras_fini = amdgpu_sdma_ras_fini,
> .query_ras_error_count = sdma_v4_4_query_ras_error_count,
> .reset_ras_error_count = sdma_v4_4_reset_ras_error_count,
> + .ras_error_inject = sdma_v4_4_ras_error_inject,
> };
>
> struct amdgpu_sdma_ras sdma_v4_4_ras = { diff --git
> a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
> index ed480c2081a6..2058439b02cd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
> @@ -477,12 +477,28 @@ static int umc_v6_1_ras_block_match(struct
> amdgpu_ras_block_object* block_obj, e
> return -EINVAL;
> }
>
> +static int umc_v6_1_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> + int ret = 0;
> + if (!adev || !inject_if) {
> + dev_err(adev->dev, "%s invaild parameters \n", __func__);
> + return -EINVAL;
> + }
> +
> + mutex_lock(&adev->grbm_idx_mutex);
> + ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> + mutex_unlock(&adev->grbm_idx_mutex);
> +
> + return ret;
> +}
> +
> const struct amdgpu_ras_block_ops umc_v6_1_ras_ops = {
> .ras_block_match = umc_v6_1_ras_block_match,
> .ras_late_init = amdgpu_umc_ras_late_init,
> .ras_fini = amdgpu_umc_ras_fini,
> .query_ras_error_count = umc_v6_1_query_ras_error_count,
> .query_ras_error_address = umc_v6_1_query_ras_error_address,
> + .ras_error_inject = umc_v6_1_ras_error_inject,
> };
>
> struct amdgpu_umc_ras umc_v6_1_ras = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> index e26728dbc6e9..2e87e7de4a55 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> @@ -333,6 +333,21 @@ static int umc_v6_7_ras_block_match(struct
> amdgpu_ras_block_object* block_obj, e
> return -EINVAL;
> }
>
> +static int umc_v6_7_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> + int ret = 0;
> + if (!adev || !inject_if) {
> + dev_err(adev->dev, "%s invaild parameters \n", __func__);
> + return -EINVAL;
> + }
> +
> + mutex_lock(&adev->grbm_idx_mutex);
> + ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> + mutex_unlock(&adev->grbm_idx_mutex);
> +
> + return ret;
> +}
> +
> const struct amdgpu_ras_block_ops umc_v6_7_ras_pos = {
> .ras_block_match = umc_v6_7_ras_block_match,
> .ras_late_init = amdgpu_umc_ras_late_init, @@ -340,6 +355,7 @@
> const struct amdgpu_ras_block_ops umc_v6_7_ras_pos = {
> .query_ras_error_count = umc_v6_7_query_ras_error_count,
> .query_ras_error_address = umc_v6_7_query_ras_error_address,
> .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
> + .ras_error_inject = umc_v6_7_ras_error_inject,
> };
>
> struct amdgpu_umc_ras umc_v6_7_ras = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
> index 037791e90c24..f7fb653434b9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
> @@ -336,12 +336,28 @@ static int umc_v8_7_ras_block_match(struct
> amdgpu_ras_block_object* block_obj, e
> return -EINVAL;
> }
>
> +static int umc_v8_7_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> + int ret = 0;
> + if (!adev || !inject_if) {
> + dev_err(adev->dev, "%s invaild parameters \n", __func__);
> + return -EINVAL;
> + }
> +
> + mutex_lock(&adev->grbm_idx_mutex);
> + ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> + mutex_unlock(&adev->grbm_idx_mutex);
> +
> + return ret;
> +}
> +
> const struct amdgpu_ras_block_ops umc_v8_7_ras_ops = {
> .ras_block_match = umc_v8_7_ras_block_match,
> .ras_late_init = amdgpu_umc_ras_late_init,
> .ras_fini = amdgpu_umc_ras_fini,
> .query_ras_error_count = umc_v8_7_query_ras_error_count,
> .query_ras_error_address = umc_v8_7_query_ras_error_address,
> + .ras_error_inject = umc_v8_7_ras_error_inject,
> };
>
> struct amdgpu_umc_ras umc_v8_7_ras = {
> --
> 2.25.1
More information about the amd-gfx
mailing list