[PATCH 2/2] drm/amdgpu: add new ras workflow control flags

Quan, Evan Evan.Quan at amd.com
Tue Mar 12 10:22:48 UTC 2019


Reviewed-by: Evan Quan <evan.quan at amd.com>

> -----Original Message-----
> From: Pan, Xinhui
> Sent: 2019年3月12日 18:14
> To: amd-gfx at lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Quan, Evan
> <Evan.Quan at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>
> Subject: [PATCH 2/2] drm/amdgpu: add new ras workflow control flags
> 
> add ras post init function.
> Do some initialization after all IP have finished their late init.
> 
> Add new member flags which will control the ras work flow.
> For now, vbios enable ras for us on boot. That might change in the future.
> So there should be a flag from vbios to tell us if ras is enabled or not on boot.
> Looks like there is no such info now.
> 
> Other bits of the flags are reserved to control other parts of ras.
> 
> Signed-off-by: xinhui pan <xinhui.pan at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c    | 34
> +++++++++++++++++++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h    |  3 ++
>  3 files changed, 39 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 29c44a2eabcf..95cd3b7886ff 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2707,6 +2707,9 @@ int amdgpu_device_init(struct amdgpu_device
> *adev,
>  		goto failed;
>  	}
> 
> +	/* must succeed. */
> +	amdgpu_ras_post_init(adev);
> +
>  	return 0;
> 
>  failed:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 10ce40d2c040..238b46c304cc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -118,6 +118,11 @@ const char *ras_block_string[] = {  #define
> ras_err_str(i) (ras_error_string[ffs(i)])  #define ras_block_str(i)
> (ras_block_string[i])
> 
> +enum amdgpu_ras_flags {
> +	AMDGPU_RAS_FLAG_INIT_BY_VBIOS = 1,
> +};
> +#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
> +
>  static void amdgpu_ras_self_test(struct amdgpu_device *adev)  {
>  	/* TODO */
> @@ -1387,13 +1392,16 @@ int amdgpu_ras_init(struct amdgpu_device
> *adev)
>  			&con->supported);
>  	con->features = 0;
>  	INIT_LIST_HEAD(&con->head);
> +	/* Might need get this flag from vbios. */
> +	con->flags = RAS_DEFAULT_FLAGS;
> 
>  	if (amdgpu_ras_recovery_init(adev))
>  		goto recovery_out;
> 
>  	amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK;
> 
> -	amdgpu_ras_enable_all_features(adev, 1);
> +	if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
> +		amdgpu_ras_enable_all_features(adev, 1);
> 
>  	if (amdgpu_ras_fs_init(adev))
>  		goto fs_out;
> @@ -1413,6 +1421,30 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
>  	return -EINVAL;
>  }
> 
> +/* do some init work after IP late init as dependence */ void
> +amdgpu_ras_post_init(struct amdgpu_device *adev) {
> +	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
> +	struct ras_manager *obj, *tmp;
> +
> +	if (!con)
> +		return;
> +
> +	/* We enable ras on all hw_supported block, but as boot parameter
> might
> +	 * disable some of them and one or more IP has not implemented
> yet.
> +	 * So we disable them on behalf.
> +	 */
> +	if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
> +		list_for_each_entry_safe(obj, tmp, &con->head, node) {
> +			if (!amdgpu_ras_is_supported(adev, obj-
> >head.block)) {
> +				amdgpu_ras_feature_enable(adev, &obj-
> >head, 0);
> +				/* there should be no any reference. */
> +				WARN_ON(alive_obj(obj));
> +			}
> +		};
> +	}
> +}
> +
>  /* do some fini work before IP fini as dependence */  int
> amdgpu_ras_pre_fini(struct amdgpu_device *adev)  { diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> index 2b6077762b91..7a35316baab0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> @@ -103,6 +103,8 @@ struct amdgpu_ras {
>  	/* error handler data */
>  	struct ras_err_handler_data *eh_data;
>  	struct mutex recovery_lock;
> +
> +	uint32_t flags;
>  };
> 
>  /* interfaces for IP */
> @@ -197,6 +199,7 @@ static inline int amdgpu_ras_reset_gpu(struct
> amdgpu_device *adev,
> 
>  /* called in ip_init and ip_fini */
>  int amdgpu_ras_init(struct amdgpu_device *adev);
> +void amdgpu_ras_post_init(struct amdgpu_device *adev);
>  int amdgpu_ras_fini(struct amdgpu_device *adev);  int
> amdgpu_ras_pre_fini(struct amdgpu_device *adev);
> 
> --
> 2.17.1



More information about the amd-gfx mailing list