[PATCH 2/2] drm/amdgpu: add new ras workflow control flags
Deucher, Alexander
Alexander.Deucher at amd.com
Tue Mar 12 13:48:54 UTC 2019
> -----Original Message-----
> From: Pan, Xinhui <Xinhui.Pan at amd.com>
> Sent: Tuesday, March 12, 2019 6:14 AM
> To: amd-gfx at lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Quan, Evan
> <Evan.Quan at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>
> Subject: [PATCH 2/2] drm/amdgpu: add new ras workflow control flags
>
> add ras post init function.
> Do some initialization after all IP have finished their late init.
>
> Add new member flags which will control the ras work flow.
> For now, vbios enable ras for us on boot. That might change in the future.
> So there should be a flag from vbios to tell us if ras is enabled or not on boot.
> Looks like there is no such info now.
>
> Other bits of the flags are reserved to control other parts of ras.
>
> Signed-off-by: xinhui pan <xinhui.pan at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 34
> +++++++++++++++++++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 ++
> 3 files changed, 39 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 29c44a2eabcf..95cd3b7886ff 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2707,6 +2707,9 @@ int amdgpu_device_init(struct amdgpu_device
> *adev,
> goto failed;
> }
>
> + /* must succeed. */
> + amdgpu_ras_post_init(adev);
> +
> return 0;
>
> failed:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 10ce40d2c040..238b46c304cc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -118,6 +118,11 @@ const char *ras_block_string[] = { #define
> ras_err_str(i) (ras_error_string[ffs(i)]) #define ras_block_str(i)
> (ras_block_string[i])
>
> +enum amdgpu_ras_flags {
> + AMDGPU_RAS_FLAG_INIT_BY_VBIOS = 1,
> +};
> +#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
> +
Move this enum to amdgpu_ras.h? Although if you are going to be using this as flags, maybe defines would be better.
> static void amdgpu_ras_self_test(struct amdgpu_device *adev) {
> /* TODO */
> @@ -1387,13 +1392,16 @@ int amdgpu_ras_init(struct amdgpu_device
> *adev)
> &con->supported);
> con->features = 0;
> INIT_LIST_HEAD(&con->head);
> + /* Might need get this flag from vbios. */
> + con->flags = RAS_DEFAULT_FLAGS;
>
> if (amdgpu_ras_recovery_init(adev))
> goto recovery_out;
>
> amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK;
>
> - amdgpu_ras_enable_all_features(adev, 1);
> + if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
> + amdgpu_ras_enable_all_features(adev, 1);
>
> if (amdgpu_ras_fs_init(adev))
> goto fs_out;
> @@ -1413,6 +1421,30 @@ int amdgpu_ras_init(struct amdgpu_device
> *adev)
> return -EINVAL;
> }
>
> +/* do some init work after IP late init as dependence */ void
> +amdgpu_ras_post_init(struct amdgpu_device *adev) {
> + struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
> + struct ras_manager *obj, *tmp;
> +
> + if (!con)
> + return;
> +
> + /* We enable ras on all hw_supported block, but as boot parameter
> might
> + * disable some of them and one or more IP has not implemented
> yet.
> + * So we disable them on behalf.
> + */
> + if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
> + list_for_each_entry_safe(obj, tmp, &con->head, node) {
> + if (!amdgpu_ras_is_supported(adev, obj-
> >head.block)) {
> + amdgpu_ras_feature_enable(adev, &obj-
> >head, 0);
> + /* there should be no any reference. */
> + WARN_ON(alive_obj(obj));
> + }
> + };
> + }
> +}
> +
> /* do some fini work before IP fini as dependence */ int
> amdgpu_ras_pre_fini(struct amdgpu_device *adev) { diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> index 2b6077762b91..7a35316baab0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> @@ -103,6 +103,8 @@ struct amdgpu_ras {
> /* error handler data */
> struct ras_err_handler_data *eh_data;
> struct mutex recovery_lock;
> +
> + uint32_t flags;
> };
>
> /* interfaces for IP */
> @@ -197,6 +199,7 @@ static inline int amdgpu_ras_reset_gpu(struct
> amdgpu_device *adev,
>
> /* called in ip_init and ip_fini */
> int amdgpu_ras_init(struct amdgpu_device *adev);
> +void amdgpu_ras_post_init(struct amdgpu_device *adev);
> int amdgpu_ras_fini(struct amdgpu_device *adev); int
> amdgpu_ras_pre_fini(struct amdgpu_device *adev);
>
> --
> 2.17.1
More information about the amd-gfx
mailing list