[PATCH V2 11/11] drm/amdgpu: Move error inject function from amdgpu_ras.c to each block

Zhou1, Tao Tao.Zhou1 at amd.com
Mon Dec 6 07:33:44 UTC 2021


[AMD Official Use Only]

The error injection has no difference among RAS blocks except GFX and XGMI.
I agree to move the xgmi error injection to amdgpu_xgmi.c, but I don't think it's necessary to implement specific error injection functions for all other RAS blocks.

Regards,
Tao

> -----Original Message-----
> From: Chai, Thomas <YiPeng.Chai at amd.com>
> Sent: Wednesday, December 1, 2021 6:53 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Chai, Thomas <YiPeng.Chai at amd.com>; Zhang, Hawking
> <Hawking.Zhang at amd.com>; Zhou1, Tao <Tao.Zhou1 at amd.com>; Chai,
> Thomas <YiPeng.Chai at amd.com>
> Subject: [PATCH V2 11/11] drm/amdgpu: Move error inject function from
> amdgpu_ras.c to each block
> 
> Move each block error inject function from amdgpu_ras.c to each block.
> 
> Signed-off-by: yipechai <YiPeng.Chai at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  | 62 +++++-------------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 28 +++++++++++
>  drivers/gpu/drm/amd/amdgpu/mca_v3_0.c    | 18 +++++++
>  drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 16 ++++++
> drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c  | 16 ++++++
> drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c  | 16 ++++++
>  drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   | 16 ++++++
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 16 ++++++
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c   | 16 ++++++
>  drivers/gpu/drm/amd/amdgpu/umc_v6_1.c    | 16 ++++++
>  drivers/gpu/drm/amd/amdgpu/umc_v6_7.c    | 16 ++++++
>  drivers/gpu/drm/amd/amdgpu/umc_v8_7.c    | 16 ++++++
>  12 files changed, 201 insertions(+), 51 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 2e38bd3d3d45..87b625d305c9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -1032,31 +1032,7 @@ int amdgpu_ras_reset_error_status(struct
> amdgpu_device *adev,
>  	return 0;
>  }
> 
> -/* Trigger XGMI/WAFL error */
> -static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
> -				 struct ta_ras_trigger_error_input *block_info)
> -{
> -	int ret;
> -
> -	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
> -		dev_warn(adev->dev, "Failed to disallow df cstate");
> 
> -	if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
> -		dev_warn(adev->dev, "Failed to disallow XGMI power down");
> -
> -	ret = psp_ras_trigger_error(&adev->psp, block_info);
> -
> -	if (amdgpu_ras_intr_triggered())
> -		return ret;
> -
> -	if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
> -		dev_warn(adev->dev, "Failed to allow XGMI power down");
> -
> -	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
> -		dev_warn(adev->dev, "Failed to allow df cstate");
> -
> -	return ret;
> -}
> 
>  /* wrapper of psp_ras_trigger_error */
>  int amdgpu_ras_error_inject(struct amdgpu_device *adev, @@ -1076,41
> +1052,25 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
>  	if (!obj)
>  		return -EINVAL;
> 
> +	if (!block_obj || !block_obj->ops)	{
> +		dev_info(adev->dev, "%s don't config ras function \n",
> get_ras_block_str(&info->head));
> +		return -EINVAL;
> +	}
> +
>  	/* Calculate XGMI relative offset */
>  	if (adev->gmc.xgmi.num_physical_nodes > 1) {
> -		block_info.address =
> -			amdgpu_xgmi_get_relative_phy_addr(adev,
> -							  block_info.address);
> +		block_info.address =
> amdgpu_xgmi_get_relative_phy_addr(adev,
> +block_info.address);
>  	}
> 
> -	switch (info->head.block) {
> -	case AMDGPU_RAS_BLOCK__GFX:
> -		if (!block_obj || !block_obj->ops)	{
> -			dev_info(adev->dev, "%s don't config ras function \n",
> get_ras_block_str(&info->head));
> -			return -EINVAL;
> -		}
> -		if (block_obj->ops->ras_error_inject)
> +	if (block_obj->ops->ras_error_inject) {
> +		if(info->head.block == AMDGPU_RAS_BLOCK__GFX)
>  			ret = block_obj->ops->ras_error_inject(adev, info);
> -		break;
> -	case AMDGPU_RAS_BLOCK__UMC:
> -	case AMDGPU_RAS_BLOCK__SDMA:
> -	case AMDGPU_RAS_BLOCK__MMHUB:
> -	case AMDGPU_RAS_BLOCK__PCIE_BIF:
> -	case AMDGPU_RAS_BLOCK__MCA:
> -		ret = psp_ras_trigger_error(&adev->psp, &block_info);
> -		break;
> -	case AMDGPU_RAS_BLOCK__XGMI_WAFL:
> -		ret = amdgpu_ras_error_inject_xgmi(adev, &block_info);
> -		break;
> -	default:
> -		dev_info(adev->dev, "%s error injection is not supported yet\n",
> -			 get_ras_block_str(&info->head));
> -		ret = -EINVAL;
> +		else
> +			ret = block_obj->ops->ras_error_inject(adev,
> &block_info);
>  	}
> 
>  	if (ret)
> -		dev_err(adev->dev, "ras inject %s failed %d\n",
> -			get_ras_block_str(&info->head), ret);
> +		dev_err(adev->dev, "ras inject %s failed %d\n",
> +get_ras_block_str(&info->head), ret);
> 
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index da541c7b1ec2..298742afba99 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -940,6 +940,33 @@ static void
> amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
>  	err_data->ce_count += ce_cnt;
>  }
> 
> +/* Trigger XGMI/WAFL error */
> +static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
> +                                void *inject_if) {
> +       int ret = 0;;
> +       struct ta_ras_trigger_error_input *block_info =  (struct
> +ta_ras_trigger_error_input *)inject_if;
> +
> +       if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
> +               dev_warn(adev->dev, "Failed to disallow df cstate");
> +
> +       if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
> +               dev_warn(adev->dev, "Failed to disallow XGMI power
> + down");
> +
> +       ret = psp_ras_trigger_error(&adev->psp, block_info);
> +
> +       if (amdgpu_ras_intr_triggered())
> +               return ret;
> +
> +       if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
> +               dev_warn(adev->dev, "Failed to allow XGMI power down");
> +
> +       if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
> +               dev_warn(adev->dev, "Failed to allow df cstate");
> +
> +       return ret;
> +}
> +
>  static int amdgpu_xgmi_ras_block_match(struct amdgpu_ras_block_object*
> block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index)  {
>  	if(!block_obj)
> @@ -958,6 +985,7 @@ struct amdgpu_ras_block_ops  xgmi_ras_ops = {
>  	.ras_fini = amdgpu_xgmi_ras_fini,
>  	.query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
>  	.reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
> +	.ras_error_inject = amdgpu_ras_error_inject_xgmi,
>  };
> 
>  struct amdgpu_xgmi_ras xgmi_ras = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
> b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
> index 99edc75ed4ec..ce6841967b05 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
> @@ -60,12 +60,28 @@ static int mca_v3_0_ras_block_match(struct
> amdgpu_ras_block_object* block_obj, e
>  	return -EINVAL;
>  }
> 
> +static int mca_v3_0_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> +	int ret = 0;
> +	if (!adev || !inject_if) {
> +		dev_err(adev->dev, "%s invaild parameters \n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	mutex_lock(&adev->grbm_idx_mutex);
> +	ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> +	mutex_unlock(&adev->grbm_idx_mutex);
> +
> +	return ret;
> +}
> +
>  const struct amdgpu_ras_block_ops mca_v3_0_mp0_ops = {
>  	.ras_block_match = mca_v3_0_ras_block_match,
>  	.ras_late_init = mca_v3_0_mp0_ras_late_init,
>  	.ras_fini = mca_v3_0_mp0_ras_fini,
>  	.query_ras_error_count = mca_v3_0_mp0_query_ras_error_count,
>  	.query_ras_error_address = NULL,
> +	.ras_error_inject = mca_v3_0_ras_error_inject,
>  };
> 
>  struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = { @@ -101,6 +117,7 @@
> const struct amdgpu_ras_block_ops mca_v3_0_mp1_ops = {
>  	.ras_fini = mca_v3_0_mp1_ras_fini,
>  	.query_ras_error_count = mca_v3_0_mp1_query_ras_error_count,
>  	.query_ras_error_address = NULL,
> +	.ras_error_inject = mca_v3_0_ras_error_inject,
>  };
> 
>  struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = { @@ -136,6 +153,7 @@
> const struct amdgpu_ras_block_ops mca_v3_0_mpio_ops = {
>  	.ras_fini = mca_v3_0_mpio_ras_fini,
>  	.query_ras_error_count = mca_v3_0_mpio_query_ras_error_count,
>  	.query_ras_error_address = NULL,
> +	.ras_error_inject = mca_v3_0_ras_error_inject,
>  };
> 
>  struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = { diff --git
> a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> index da505314802a..7cca86c504e6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> @@ -786,12 +786,28 @@ static int mmhub_v1_0_ras_block_match(struct
> amdgpu_ras_block_object* block_obj,
>  	return -EINVAL;
>  }
> 
> +static int mmhub_v1_0_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> +	int ret = 0;
> +	if (!adev || !inject_if) {
> +		dev_err(adev->dev, "%s invaild parameters \n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	mutex_lock(&adev->grbm_idx_mutex);
> +	ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> +	mutex_unlock(&adev->grbm_idx_mutex);
> +
> +	return ret;
> +}
> +
>  struct amdgpu_ras_block_ops mmhub_v1_0_ras_ops = {
>  	.ras_block_match = mmhub_v1_0_ras_block_match,
>  	.ras_late_init = amdgpu_mmhub_ras_late_init,
>  	.ras_fini = amdgpu_mmhub_ras_fini,
>  	.query_ras_error_count = mmhub_v1_0_query_ras_error_count,
>  	.reset_ras_error_count = mmhub_v1_0_reset_ras_error_count,
> +	.ras_error_inject = mmhub_v1_0_ras_error_inject,
>  };
> 
>  struct amdgpu_mmhub_ras mmhub_v1_0_ras = { diff --git
> a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
> index 829d14ee87d3..79a9995caef1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
> @@ -1333,6 +1333,21 @@ static int mmhub_v1_7_ras_block_match(struct
> amdgpu_ras_block_object* block_obj,
>  	return -EINVAL;
>  }
> 
> +static int mmhub_v1_7_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> +	int ret = 0;
> +	if (!adev || !inject_if) {
> +		dev_err(adev->dev, "%s invaild parameters \n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	mutex_lock(&adev->grbm_idx_mutex);
> +	ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> +	mutex_unlock(&adev->grbm_idx_mutex);
> +
> +	return ret;
> +}
> +
>  struct amdgpu_ras_block_ops mmhub_v1_7_ras_ops = {
>  	.ras_block_match = mmhub_v1_7_ras_block_match,
>  	.ras_late_init = amdgpu_mmhub_ras_late_init, @@ -1341,6 +1356,7
> @@ struct amdgpu_ras_block_ops mmhub_v1_7_ras_ops = {
>  	.reset_ras_error_count = mmhub_v1_7_reset_ras_error_count,
>  	.query_ras_error_status = mmhub_v1_7_query_ras_error_status,
>  	.reset_ras_error_status = mmhub_v1_7_reset_ras_error_status,
> +	.ras_error_inject = mmhub_v1_7_ras_error_inject,
>  };
> 
>  struct amdgpu_mmhub_ras mmhub_v1_7_ras = { diff --git
> a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
> b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
> index 1edc98e5bcbb..eaed556b9551 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
> @@ -1667,6 +1667,21 @@ static int mmhub_v9_4_ras_block_match(struct
> amdgpu_ras_block_object* block_obj,
>  	return -EINVAL;
>  }
> 
> +static int mmhub_v9_4_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> +	int ret = 0;
> +	if (!adev || !inject_if) {
> +		dev_err(adev->dev, "%s invaild parameters \n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	mutex_lock(&adev->grbm_idx_mutex);
> +	ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> +	mutex_unlock(&adev->grbm_idx_mutex);
> +
> +	return ret;
> +}
> +
>  const struct amdgpu_ras_block_ops mmhub_v9_4_ras_ops = {
>  	.ras_block_match = mmhub_v9_4_ras_block_match,
>  	.ras_late_init = amdgpu_mmhub_ras_late_init, @@ -1674,6 +1689,7
> @@ const struct amdgpu_ras_block_ops mmhub_v9_4_ras_ops = {
>  	.query_ras_error_count = mmhub_v9_4_query_ras_error_count,
>  	.reset_ras_error_count = mmhub_v9_4_reset_ras_error_count,
>  	.query_ras_error_status = mmhub_v9_4_query_ras_error_status,
> +	.ras_error_inject = mmhub_v9_4_ras_error_inject,
>  };
> 
>  struct amdgpu_mmhub_ras mmhub_v9_4_ras = { diff --git
> a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> index 14f7265d954e..8e62e2ffabe5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> @@ -650,11 +650,27 @@ static int nbio_v7_4_ras_block_match(struct
> amdgpu_ras_block_object* block_obj,
>  	return -EINVAL;
>  }
> 
> +static int nbio_v7_4_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> +	int ret = 0;
> +	if (!adev || !inject_if) {
> +		dev_err(adev->dev, "%s invaild parameters \n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	mutex_lock(&adev->grbm_idx_mutex);
> +	ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> +	mutex_unlock(&adev->grbm_idx_mutex);
> +
> +	return ret;
> +}
> +
>  const struct amdgpu_ras_block_ops nbio_v7_4_ras_ops = {
>  	.ras_block_match = nbio_v7_4_ras_block_match,
>  	.query_ras_error_count = nbio_v7_4_query_ras_error_count,
>  	.ras_late_init = amdgpu_nbio_ras_late_init,
>  	.ras_fini = amdgpu_nbio_ras_fini,
> +	.ras_error_inject = nbio_v7_4_ras_error_inject,
>  };
> 
>  struct amdgpu_nbio_ras nbio_v7_4_ras = { diff --git
> a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 30a651613776..578ee40cc0d1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -2803,11 +2803,27 @@ static int sdma_v4_0_ras_block_match(struct
> amdgpu_ras_block_object* block_obj,
>  	return -EINVAL;
>  }
> 
> +static int sdma_v4_0_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> +	int ret = 0;
> +	if (!adev || !inject_if) {
> +		dev_err(adev->dev, "%s invaild parameters \n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	mutex_lock(&adev->grbm_idx_mutex);
> +	ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> +	mutex_unlock(&adev->grbm_idx_mutex);
> +
> +	return ret;
> +}
> +
>  const struct amdgpu_ras_block_ops sdma_v4_0_ras_ops = {
>  	.ras_block_match = sdma_v4_0_ras_block_match,
>  	.ras_fini = amdgpu_sdma_ras_fini,
>  	.query_ras_error_count = sdma_v4_0_query_ras_error_count,
>  	.reset_ras_error_count = sdma_v4_0_reset_ras_error_count,
> +	.ras_error_inject = sdma_v4_0_ras_error_inject,
>  };
> 
>  static struct amdgpu_sdma_ras sdma_v4_0_ras = { diff --git
> a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
> index 8c165bcb0ffa..0656c6a7a2c1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
> @@ -270,11 +270,27 @@ static int sdma_v4_4_ras_block_match(struct
> amdgpu_ras_block_object* block_obj,
>  	return -EINVAL;
>  }
> 
> +static int sdma_v4_4_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> +	int ret = 0;
> +	if (!adev || !inject_if) {
> +		dev_err(adev->dev, "%s invaild parameters \n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	mutex_lock(&adev->grbm_idx_mutex);
> +	ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> +	mutex_unlock(&adev->grbm_idx_mutex);
> +
> +	return ret;
> +}
> +
>  const struct amdgpu_ras_block_ops sdma_v4_4_ras_ops = {
>  	.ras_block_match = sdma_v4_4_ras_block_match,
>  	.ras_fini = amdgpu_sdma_ras_fini,
>  	.query_ras_error_count = sdma_v4_4_query_ras_error_count,
>  	.reset_ras_error_count = sdma_v4_4_reset_ras_error_count,
> +	.ras_error_inject = sdma_v4_4_ras_error_inject,
>  };
> 
>  struct amdgpu_sdma_ras sdma_v4_4_ras = { diff --git
> a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
> index ed480c2081a6..2058439b02cd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
> @@ -477,12 +477,28 @@ static int umc_v6_1_ras_block_match(struct
> amdgpu_ras_block_object* block_obj, e
>  	return -EINVAL;
>  }
> 
> +static int umc_v6_1_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> +	int ret = 0;
> +	if (!adev || !inject_if) {
> +		dev_err(adev->dev, "%s invaild parameters \n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	mutex_lock(&adev->grbm_idx_mutex);
> +	ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> +	mutex_unlock(&adev->grbm_idx_mutex);
> +
> +	return ret;
> +}
> +
>  const struct amdgpu_ras_block_ops umc_v6_1_ras_ops = {
>  	.ras_block_match = umc_v6_1_ras_block_match,
>  	.ras_late_init = amdgpu_umc_ras_late_init,
>  	.ras_fini = amdgpu_umc_ras_fini,
>  	.query_ras_error_count = umc_v6_1_query_ras_error_count,
>  	.query_ras_error_address = umc_v6_1_query_ras_error_address,
> +	.ras_error_inject = umc_v6_1_ras_error_inject,
>  };
> 
>  struct amdgpu_umc_ras umc_v6_1_ras = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> index e26728dbc6e9..2e87e7de4a55 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> @@ -333,6 +333,21 @@ static int umc_v6_7_ras_block_match(struct
> amdgpu_ras_block_object* block_obj, e
>  	return -EINVAL;
>  }
> 
> +static int umc_v6_7_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> +	int ret = 0;
> +	if (!adev || !inject_if) {
> +		dev_err(adev->dev, "%s invaild parameters \n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	mutex_lock(&adev->grbm_idx_mutex);
> +	ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> +	mutex_unlock(&adev->grbm_idx_mutex);
> +
> +	return ret;
> +}
> +
>  const struct amdgpu_ras_block_ops umc_v6_7_ras_pos = {
>  	.ras_block_match = umc_v6_7_ras_block_match,
>  	.ras_late_init = amdgpu_umc_ras_late_init, @@ -340,6 +355,7 @@
> const struct amdgpu_ras_block_ops umc_v6_7_ras_pos = {
>  	.query_ras_error_count = umc_v6_7_query_ras_error_count,
>  	.query_ras_error_address = umc_v6_7_query_ras_error_address,
>  	.query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
> +	.ras_error_inject = umc_v6_7_ras_error_inject,
>  };
> 
>  struct amdgpu_umc_ras umc_v6_7_ras = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
> index 037791e90c24..f7fb653434b9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
> @@ -336,12 +336,28 @@ static int umc_v8_7_ras_block_match(struct
> amdgpu_ras_block_object* block_obj, e
>  	return -EINVAL;
>  }
> 
> +static int umc_v8_7_ras_error_inject(struct amdgpu_device *adev, void
> +*inject_if) {
> +	int ret = 0;
> +	if (!adev || !inject_if) {
> +		dev_err(adev->dev, "%s invaild parameters \n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	mutex_lock(&adev->grbm_idx_mutex);
> +	ret = psp_ras_trigger_error(&adev->psp, (struct
> ta_ras_trigger_error_input *)inject_if);
> +	mutex_unlock(&adev->grbm_idx_mutex);
> +
> +	return ret;
> +}
> +
>  const struct amdgpu_ras_block_ops umc_v8_7_ras_ops = {
>  	.ras_block_match = umc_v8_7_ras_block_match,
>  	.ras_late_init = amdgpu_umc_ras_late_init,
>  	.ras_fini = amdgpu_umc_ras_fini,
>  	.query_ras_error_count = umc_v8_7_query_ras_error_count,
>  	.query_ras_error_address = umc_v8_7_query_ras_error_address,
> +	.ras_error_inject = umc_v8_7_ras_error_inject,
>  };
> 
>  struct amdgpu_umc_ras umc_v8_7_ras = {
> --
> 2.25.1


More information about the amd-gfx mailing list