[PATCH] drm/amdgpu: correct ras error count type
Zhou1, Tao
Tao.Zhou1 at amd.com
Tue Aug 20 03:18:42 UTC 2019
Consider amdgpu_ras_error_query < 0 and !con are almost impossible, the patch is:
Reviewed-by: Tao Zhou <tao.zhou1 at amd.com>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of
> Guchun Chen
> Sent: 2019年8月20日 10:25
> To: amd-gfx at lists.freedesktop.org; Zhang, Hawking
> <Hawking.Zhang at amd.com>; Li, Dennis <Dennis.Li at amd.com>; Pan, Xinhui
> <Xinhui.Pan at amd.com>; Zhou1, Tao <Tao.Zhou1 at amd.com>
> Cc: Chen, Guchun <Guchun.Chen at amd.com>
> Subject: [PATCH] drm/amdgpu: correct ras error count type
>
> Use unsigned long type for the same ras count variable.
> This will avoid overflow on 64 bit system.
>
> Change-Id: I011406d81bad69a65433b63960e1691c4959bbc5
> Signed-off-by: Guchun Chen <guchun.chen at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 4 ++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 +++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +-
> 4 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 89c26bb0f05c..da337a2bab65 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -352,7 +352,7 @@ static int amdgpu_ctx_query2(struct amdgpu_device
> *adev, {
> struct amdgpu_ctx *ctx;
> struct amdgpu_ctx_mgr *mgr;
> - uint32_t ras_counter;
> + unsigned long ras_counter;
>
> if (!fpriv)
> return -EINVAL;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index 5f1b54c9bcdb..da808633732b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -49,8 +49,8 @@ struct amdgpu_ctx {
> enum drm_sched_priority override_priority;
> struct mutex lock;
> atomic_t guilty;
> - uint32_t ras_counter_ce;
> - uint32_t ras_counter_ue;
> + unsigned long ras_counter_ce;
> + unsigned long ras_counter_ue;
> };
>
> struct amdgpu_ctx_mgr {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 50c13b02d234..df4b9ae39c5e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -686,7 +686,7 @@ int amdgpu_ras_error_cure(struct amdgpu_device
> *adev, }
>
> /* get the total error counts on all IPs */ -int
> amdgpu_ras_query_error_count(struct amdgpu_device *adev,
> +unsigned long amdgpu_ras_query_error_count(struct amdgpu_device
> *adev,
> bool is_ce)
> {
> struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -
> 694,7 +694,7 @@ int amdgpu_ras_query_error_count(struct amdgpu_device
> *adev,
> struct ras_err_data data = {0, 0};
>
> if (!con)
> - return -EINVAL;
> + return 0;
>
> list_for_each_entry(obj, &con->head, node) {
> struct ras_query_if info = {
> @@ -702,7 +702,7 @@ int amdgpu_ras_query_error_count(struct
> amdgpu_device *adev,
> };
>
> if (amdgpu_ras_error_query(adev, &info))
> - return -EINVAL;
> + return 0;
>
> data.ce_count += info.ce_count;
> data.ue_count += info.ue_count;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> index 2765f2dbb1e6..02a51e3dfa14 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> @@ -484,7 +484,7 @@ int amdgpu_ras_request_reset_on_boot(struct
> amdgpu_device *adev, void amdgpu_ras_resume(struct amdgpu_device
> *adev); void amdgpu_ras_suspend(struct amdgpu_device *adev);
>
> -int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
> +unsigned long amdgpu_ras_query_error_count(struct amdgpu_device
> *adev,
> bool is_ce);
>
> /* error handling functions */
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list