[PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for the unified ras block data and ops
Zhou1, Tao
Tao.Zhou1 at amd.com
Mon Dec 6 06:58:00 UTC 2021
[AMD Official Use Only]
Please see my comments inline.
> -----Original Message-----
> From: Chai, Thomas <YiPeng.Chai at amd.com>
> Sent: Wednesday, December 1, 2021 6:53 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Chai, Thomas <YiPeng.Chai at amd.com>; Zhang, Hawking
> <Hawking.Zhang at amd.com>; Zhou1, Tao <Tao.Zhou1 at amd.com>; Chai,
> Thomas <YiPeng.Chai at amd.com>
> Subject: [PATCH V2 03/11] drm/amdgpu: Modify gfx block to fit for the unified
> ras block data and ops
>
> 1.Modify gfx block to fit for the unified ras block data and ops
> 2.Implement .ras_block_match function pointer for gfx block to identify itself.
> 3.Change amdgpu_gfx_ras_funcs to amdgpu_gfx_ras, and the corresponding
> variable name remove _funcs suffix.
> 4.Remove the const flag of gfx ras variable so that gfx ras block can be able to
> be insertted into amdgpu device ras block link list.
> 5.Invoke amdgpu_ras_register_ras_block function to register gfx ras block into
> amdgpu device ras block link list.
> 6.Remove the redundant code about gfx in amdgpu_ras.c after using the unified
> ras block.
>
> Signed-off-by: yipechai <YiPeng.Chai at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 6 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 15 ++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 80 ++++++++++++++++++-------
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 73 +++++++++++++++-------
> drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c | 39 ++++++++----
> drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h | 2 +-
> drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 42 +++++++++----
> drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h | 2 +-
> 8 files changed, 178 insertions(+), 81 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 1795d448c700..da8691259ac1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -696,9 +696,9 @@ int amdgpu_gfx_process_ras_data_cb(struct
> amdgpu_device *adev,
> */
> if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
> kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
> - if (adev->gfx.ras_funcs &&
> - adev->gfx.ras_funcs->query_ras_error_count)
> - adev->gfx.ras_funcs->query_ras_error_count(adev,
> err_data);
> + if (adev->gfx.ras && adev->gfx.ras->ras_block.ops &&
> + adev->gfx.ras->ras_block.ops->query_ras_error_count)
> + adev->gfx.ras->ras_block.ops-
> >query_ras_error_count(adev, err_data);
> amdgpu_ras_reset_gpu(adev);
> }
> return AMDGPU_RAS_SUCCESS;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 6b78b4a0e182..ff4a8428a84b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -31,6 +31,7 @@
> #include "amdgpu_ring.h"
> #include "amdgpu_rlc.h"
> #include "soc15.h"
> +#include "amdgpu_ras.h"
>
> /* GFX current status */
> #define AMDGPU_GFX_NORMAL_MODE 0x00000000L
> @@ -213,16 +214,8 @@ struct amdgpu_cu_info {
> uint32_t bitmap[4][4];
> };
>
> -struct amdgpu_gfx_ras_funcs {
> - int (*ras_late_init)(struct amdgpu_device *adev);
> - void (*ras_fini)(struct amdgpu_device *adev);
> - int (*ras_error_inject)(struct amdgpu_device *adev,
> - void *inject_if);
> - int (*query_ras_error_count)(struct amdgpu_device *adev,
> - void *ras_error_status);
> - void (*reset_ras_error_count)(struct amdgpu_device *adev);
> - void (*query_ras_error_status)(struct amdgpu_device *adev);
> - void (*reset_ras_error_status)(struct amdgpu_device *adev);
> +struct amdgpu_gfx_ras {
> + struct amdgpu_ras_block_object ras_block;
> void (*enable_watchdog_timer)(struct amdgpu_device *adev); };
[Tao] Can we add " enable_watchdog_timer" function into amdgpu_ras_block_ops structure?
And I think using ras_block directly is more simple than amdgpu_gfx_ras gfx_v9_0_ras structure.
>
> @@ -348,7 +341,7 @@ struct amdgpu_gfx {
>
> /*ras */
> struct ras_common_if *ras_if;
> - const struct amdgpu_gfx_ras_funcs *ras_funcs;
> + struct amdgpu_gfx_ras *ras;
> };
>
> #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs-
> >get_gpu_clock_counter((adev))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 1cf1f6331db1..190a4a4e9d7a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -862,6 +862,27 @@ static int amdgpu_ras_enable_all_features(struct
> amdgpu_device *adev, }
> /* feature ctl end */
>
> +static struct amdgpu_ras_block_object* amdgpu_ras_get_ras_block(struct
> amdgpu_device *adev,
> + enum amdgpu_ras_block block,
> uint32_t sub_block_index) {
> + struct amdgpu_ras_block_object *obj, *tmp;
> +
> + if (block >= AMDGPU_RAS_BLOCK__LAST) {
> + return NULL;
> + }
[Tao] The "{}" can be dropped since only one line under the if.
> +
> + list_for_each_entry_safe(obj, tmp, &adev->ras_list, node) {
> + if( !obj->ops || !obj->ops->ras_block_match) {
[Tao] Need a space after "if" and the space before "!obj" can be removed.
> + dev_info(adev->dev, "%s don't config ops or
> ras_block_match\n", obj->name);
> + continue;
> + }
> + if (!obj->ops->ras_block_match(obj, block, sub_block_index)) {
> + return obj;
> + }
[Tao] The "{}" can be removed.
> + }
> +
> + return NULL;
> +}
[Tao] This is a generic ras function, not gfx specific, the code can be moved to patch #1.
>
> void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev,
> struct ras_common_if *ras_block, @@ -
> 892,6 +913,7 @@ void amdgpu_ras_mca_query_error_status(struct
> amdgpu_device *adev, int amdgpu_ras_query_error_status(struct
> amdgpu_device *adev,
> struct ras_query_if *info)
> {
> + struct amdgpu_ras_block_object* block_obj = NULL;
> struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
> struct ras_err_data err_data = {0, 0, 0, NULL};
> int i;
> @@ -899,6 +921,8 @@ int amdgpu_ras_query_error_status(struct
> amdgpu_device *adev,
> if (!obj)
> return -EINVAL;
>
> + block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0);
> +
> switch (info->head.block) {
> case AMDGPU_RAS_BLOCK__UMC:
> if (adev->umc.ras_funcs &&
> @@ -919,13 +943,17 @@ int amdgpu_ras_query_error_status(struct
> amdgpu_device *adev,
> }
> break;
> case AMDGPU_RAS_BLOCK__GFX:
> - if (adev->gfx.ras_funcs &&
> - adev->gfx.ras_funcs->query_ras_error_count)
> - adev->gfx.ras_funcs->query_ras_error_count(adev,
> &err_data);
> + if (!block_obj || !block_obj->ops) {
> + dev_info(adev->dev, "%s don't config ras function \n",
> + get_ras_block_str(&info->head));
> + return -EINVAL;
> + }
[Tao] Can we put the check behind "block_obj = amdgpu_ras_get_ras_block"? The same suggestion to all similar code.
> +
> + if (block_obj->ops->query_ras_error_count)
> + block_obj->ops->query_ras_error_count(adev,
> &err_data);
>
> - if (adev->gfx.ras_funcs &&
> - adev->gfx.ras_funcs->query_ras_error_status)
> - adev->gfx.ras_funcs->query_ras_error_status(adev);
> + if (block_obj->ops->query_ras_error_status)
> + block_obj->ops->query_ras_error_status(adev);
> break;
> case AMDGPU_RAS_BLOCK__MMHUB:
> if (adev->mmhub.ras_funcs &&
> @@ -1012,18 +1040,21 @@ int amdgpu_ras_query_error_status(struct
> amdgpu_device *adev, int amdgpu_ras_reset_error_status(struct
> amdgpu_device *adev,
> enum amdgpu_ras_block block)
> {
> + struct amdgpu_ras_block_object* block_obj =
> +amdgpu_ras_get_ras_block(adev, block, 0);
> if (!amdgpu_ras_is_supported(adev, block))
> return -EINVAL;
>
> switch (block) {
> case AMDGPU_RAS_BLOCK__GFX:
> - if (adev->gfx.ras_funcs &&
> - adev->gfx.ras_funcs->reset_ras_error_count)
> - adev->gfx.ras_funcs->reset_ras_error_count(adev);
> + if (!block_obj || !block_obj->ops) {
> + dev_info(adev->dev, "%s don't config ras function \n",
> ras_block_str(block));
> + return -EINVAL;
> + }
> + if (block_obj->ops->reset_ras_error_count)
> + block_obj->ops->reset_ras_error_count(adev);
>
> - if (adev->gfx.ras_funcs &&
> - adev->gfx.ras_funcs->reset_ras_error_status)
> - adev->gfx.ras_funcs->reset_ras_error_status(adev);
> + if (block_obj->ops->reset_ras_error_status)
> + block_obj->ops->reset_ras_error_status(adev);
> break;
> case AMDGPU_RAS_BLOCK__MMHUB:
> if (adev->mmhub.ras_funcs &&
> @@ -1088,7 +1119,8 @@ int amdgpu_ras_error_inject(struct amdgpu_device
> *adev,
> .address = info->address,
> .value = info->value,
> };
> - int ret = 0;
> + int ret = -EINVAL;
> + struct amdgpu_ras_block_object* block_obj =
> +amdgpu_ras_get_ras_block(adev, info->head.block,
> +info->head.sub_block_index);
>
> if (!obj)
> return -EINVAL;
> @@ -1102,11 +1134,12 @@ int amdgpu_ras_error_inject(struct amdgpu_device
> *adev,
>
> switch (info->head.block) {
> case AMDGPU_RAS_BLOCK__GFX:
> - if (adev->gfx.ras_funcs &&
> - adev->gfx.ras_funcs->ras_error_inject)
> - ret = adev->gfx.ras_funcs->ras_error_inject(adev, info);
> - else
> - ret = -EINVAL;
> + if (!block_obj || !block_obj->ops) {
> + dev_info(adev->dev, "%s don't config ras function \n",
> get_ras_block_str(&info->head));
> + return -EINVAL;
> + }
> + if (block_obj->ops->ras_error_inject)
> + ret = block_obj->ops->ras_error_inject(adev, info);
> break;
> case AMDGPU_RAS_BLOCK__UMC:
> case AMDGPU_RAS_BLOCK__SDMA:
> @@ -1727,15 +1760,20 @@ static void amdgpu_ras_log_on_err_counter(struct
> amdgpu_device *adev) static void amdgpu_ras_error_status_query(struct
> amdgpu_device *adev,
> struct ras_query_if *info)
> {
> + struct amdgpu_ras_block_object* block_obj =
> +amdgpu_ras_get_ras_block(adev, info->head.block,
> +info->head.sub_block_index);
> /*
> * Only two block need to query read/write
> * RspStatus at current state
> */
> switch (info->head.block) {
> case AMDGPU_RAS_BLOCK__GFX:
> - if (adev->gfx.ras_funcs &&
> - adev->gfx.ras_funcs->query_ras_error_status)
> - adev->gfx.ras_funcs->query_ras_error_status(adev);
> + if (!block_obj || !block_obj->ops) {
> + dev_info(adev->dev, "%s don't config ras function \n",
> get_ras_block_str(&info->head));
> + return ;
> + }
> +
> + if (block_obj->ops->query_ras_error_status)
> + block_obj->ops->query_ras_error_status(adev);
> break;
> case AMDGPU_RAS_BLOCK__MMHUB:
> if (adev->mmhub.ras_funcs &&
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 08e91e7245df..2ffde223c4f5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -817,7 +817,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device
> *adev, static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device
> *adev); static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
> static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); -static
> int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
> +static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
> void *ras_error_status);
> static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
> void *inject_if);
> @@ -2118,6 +2118,18 @@ static void gfx_v9_0_select_me_pipe_q(struct
> amdgpu_device *adev,
> soc15_grbm_select(adev, me, pipe, q, vm); }
>
> +static int gfx_v9_0_ras_block_match(struct amdgpu_ras_block_object*
> +block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index) {
> + if(!block_obj)
> + return -EINVAL;
> +
> + if(block_obj->block == block) {
> + return 0;
> + }
> +
> + return -EINVAL;
[Tao] The return type can be changed to bool and return value is true or false instead of -EINVAL and 0.
> +}
[Tao] It's better to implement a general ras block match function in amdgpu_ras.c
> +
> static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
> .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
> .select_se_sh = &gfx_v9_0_select_se_sh, @@ -2127,12 +2139,21 @@
> static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
> .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, };
>
> -static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = {
> - .ras_late_init = amdgpu_gfx_ras_late_init,
> - .ras_fini = amdgpu_gfx_ras_fini,
> - .ras_error_inject = &gfx_v9_0_ras_error_inject,
> - .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
> - .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
> +const struct amdgpu_ras_block_ops gfx_v9_0_ras_ops = {
[Tao] static const?
> + .ras_block_match = gfx_v9_0_ras_block_match,
> + .ras_late_init = amdgpu_gfx_ras_late_init,
> + .ras_fini = amdgpu_gfx_ras_fini,
> + .ras_error_inject = &gfx_v9_0_ras_error_inject,
> + .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
> + .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count, };
> +
> +static struct amdgpu_gfx_ras gfx_v9_0_ras = {
> + .ras_block = {
> + .name = "gfx",
> + .block = AMDGPU_RAS_BLOCK__GFX,
> + .ops = &gfx_v9_0_ras_ops,
> + },
> };
>
> static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) @@ -2161,7
> +2182,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
> DRM_INFO("fix gfx.config for vega12\n");
> break;
> case CHIP_VEGA20:
> - adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs;
> + adev->gfx.ras = &gfx_v9_0_ras;
> adev->gfx.config.max_hw_contexts = 8;
> adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
> adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -
> 2187,7 +2208,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device
> *adev)
> gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
> break;
> case CHIP_ARCTURUS:
> - adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs;
> + adev->gfx.ras = &gfx_v9_4_ras;
> adev->gfx.config.max_hw_contexts = 8;
> adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
> adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -
> 2208,7 +2229,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device
> *adev)
> gb_addr_config |= 0x22010042;
> break;
> case CHIP_ALDEBARAN:
> - adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs;
> + adev->gfx.ras = &gfx_v9_4_2_ras;
> adev->gfx.config.max_hw_contexts = 8;
> adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
> adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -
> 2227,6 +2248,14 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device
> *adev)
> break;
> }
>
> + if (adev->gfx.ras) {
> + err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras-
> >ras_block);
> + if (err) {
> + DRM_ERROR("Failed to register gfx ras block!\n");
> + return err;
> + }
> + }
> +
> adev->gfx.config.gb_addr_config = gb_addr_config;
>
> adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << @@ -2448,9
> +2477,9 @@ static int gfx_v9_0_sw_fini(void *handle)
> int i;
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> - if (adev->gfx.ras_funcs &&
> - adev->gfx.ras_funcs->ras_fini)
> - adev->gfx.ras_funcs->ras_fini(adev);
> + if (adev->gfx.ras && adev->gfx.ras->ras_block.ops &&
> + adev->gfx.ras->ras_block.ops->ras_fini)
> + adev->gfx.ras->ras_block.ops->ras_fini(adev);
>
> for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
> @@ -4888,16 +4917,16 @@ static int gfx_v9_0_ecc_late_init(void *handle)
> if (r)
> return r;
>
> - if (adev->gfx.ras_funcs &&
> - adev->gfx.ras_funcs->ras_late_init) {
> - r = adev->gfx.ras_funcs->ras_late_init(adev);
> + if (adev->gfx.ras && adev->gfx.ras->ras_block.ops &&
> + adev->gfx.ras->ras_block.ops->ras_late_init) {
> + r = adev->gfx.ras->ras_block.ops->ras_late_init(adev);
> if (r)
> return r;
> }
>
> - if (adev->gfx.ras_funcs &&
> - adev->gfx.ras_funcs->enable_watchdog_timer)
> - adev->gfx.ras_funcs->enable_watchdog_timer(adev);
> + if (adev->gfx.ras &&
> + adev->gfx.ras->enable_watchdog_timer)
> + adev->gfx.ras->enable_watchdog_timer(adev);
>
> return 0;
> }
> @@ -6841,7 +6870,7 @@ static void gfx_v9_0_reset_ras_error_count(struct
> amdgpu_device *adev)
> WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); }
>
> -static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
> +static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
> void *ras_error_status)
> {
> struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> @@ -6850,7 +6879,7 @@ static int gfx_v9_0_query_ras_error_count(struct
> amdgpu_device *adev,
> uint32_t reg_value;
>
> if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
> - return -EINVAL;
> + return;
>
> err_data->ue_count = 0;
> err_data->ce_count = 0;
> @@ -6879,8 +6908,6 @@ static int gfx_v9_0_query_ras_error_count(struct
> amdgpu_device *adev,
> mutex_unlock(&adev->grbm_idx_mutex);
>
> gfx_v9_0_query_utc_edc_status(adev, err_data);
> -
> - return 0;
> }
>
> static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) diff --git
> a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
> index b4789dfc2bb9..2d816addbd4d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
> @@ -863,7 +863,7 @@ static int gfx_v9_4_ras_error_count(struct
> amdgpu_device *adev,
> return 0;
> }
>
> -static int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
> +static void gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
> void *ras_error_status)
> {
> struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> @@ -872,7 +872,7 @@ static int gfx_v9_4_query_ras_error_count(struct
> amdgpu_device *adev,
> uint32_t reg_value;
>
> if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
> - return -EINVAL;
> + return;
>
> err_data->ue_count = 0;
> err_data->ce_count = 0;
> @@ -903,7 +903,6 @@ static int gfx_v9_4_query_ras_error_count(struct
> amdgpu_device *adev,
>
> gfx_v9_4_query_utc_edc_status(adev, err_data);
>
> - return 0;
> }
>
> static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev) @@
> -1029,11 +1028,31 @@ static void gfx_v9_4_query_ras_error_status(struct
> amdgpu_device *adev)
> mutex_unlock(&adev->grbm_idx_mutex);
> }
>
> -const struct amdgpu_gfx_ras_funcs gfx_v9_4_ras_funcs = {
> - .ras_late_init = amdgpu_gfx_ras_late_init,
> - .ras_fini = amdgpu_gfx_ras_fini,
> - .ras_error_inject = &gfx_v9_4_ras_error_inject,
> - .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
> - .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
> - .query_ras_error_status = &gfx_v9_4_query_ras_error_status,
> +static int gfx_v9_4_ras_block_match(struct amdgpu_ras_block_object*
> +block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index) {
> + if(!block_obj)
> + return -EINVAL;
> +
> + if(block_obj->block == block) {
> + return 0;
> + }
> + return -EINVAL;
> +}
> +
> +const struct amdgpu_ras_block_ops gfx_v9_4_ras_ops = {
> + .ras_block_match = gfx_v9_4_ras_block_match,
> + .ras_late_init = amdgpu_gfx_ras_late_init,
> + .ras_fini = amdgpu_gfx_ras_fini,
> + .ras_error_inject = &gfx_v9_4_ras_error_inject,
> + .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
> + .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
> + .query_ras_error_status = &gfx_v9_4_query_ras_error_status, };
> +
> +struct amdgpu_gfx_ras gfx_v9_4_ras = {
> + .ras_block = {
> + .name = "gfx",
> + .block = AMDGPU_RAS_BLOCK__GFX,
> + .ops = &gfx_v9_4_ras_ops,
> + },
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h
> index bdd16b568021..ca520a767267 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h
> @@ -24,6 +24,6 @@
> #ifndef __GFX_V9_4_H__
> #define __GFX_V9_4_H__
>
> -extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_ras_funcs;
> +extern struct amdgpu_gfx_ras gfx_v9_4_ras;
>
> #endif /* __GFX_V9_4_H__ */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
> index 54306fd45ff1..2744709fa09d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
> @@ -1644,14 +1644,14 @@ static int gfx_v9_4_2_query_utc_edc_count(struct
> amdgpu_device *adev,
> return 0;
> }
>
> -static int gfx_v9_4_2_query_ras_error_count(struct amdgpu_device *adev,
> +static void gfx_v9_4_2_query_ras_error_count(struct amdgpu_device
> +*adev,
> void *ras_error_status)
> {
> struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> uint32_t sec_count = 0, ded_count = 0;
>
> if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
> - return -EINVAL;
> + return;
>
> err_data->ue_count = 0;
> err_data->ce_count = 0;
> @@ -1664,7 +1664,6 @@ static int gfx_v9_4_2_query_ras_error_count(struct
> amdgpu_device *adev,
> err_data->ce_count += sec_count;
> err_data->ue_count += ded_count;
>
> - return 0;
> }
>
> static void gfx_v9_4_2_reset_utc_err_status(struct amdgpu_device *adev) @@
> -1934,13 +1933,34 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct
> amdgpu_device *adev)
> mutex_unlock(&adev->grbm_idx_mutex);
> }
>
> -const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs = {
> - .ras_late_init = amdgpu_gfx_ras_late_init,
> - .ras_fini = amdgpu_gfx_ras_fini,
> - .ras_error_inject = &gfx_v9_4_2_ras_error_inject,
> - .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
> - .reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count,
> - .query_ras_error_status = &gfx_v9_4_2_query_ras_error_status,
> - .reset_ras_error_status = &gfx_v9_4_2_reset_ras_error_status,
> +static int gfx_v9_4_2_ras_block_match(struct amdgpu_ras_block_object*
> +block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index) {
> + if(!block_obj)
> + return -EINVAL;
> +
> + if(block_obj->block == block) {
> + return 0;
> + }
> +
> + return -EINVAL;
> +}
> +
> +struct amdgpu_ras_block_ops gfx_v9_4_2_ras_ops ={
> + .ras_block_match = gfx_v9_4_2_ras_block_match,
> + .ras_late_init = amdgpu_gfx_ras_late_init,
> + .ras_fini = amdgpu_gfx_ras_fini,
> + .ras_error_inject = &gfx_v9_4_2_ras_error_inject,
> + .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
> + .reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count,
> + .query_ras_error_status =
> &gfx_v9_4_2_query_ras_error_status,
> + .reset_ras_error_status = &gfx_v9_4_2_reset_ras_error_status,
> +};
> +
> +struct amdgpu_gfx_ras gfx_v9_4_2_ras = {
> + .ras_block = {
> + .name = "gfx",
> + .block = AMDGPU_RAS_BLOCK__GFX,
> + .ops = &gfx_v9_4_2_ras_ops,
> + },
> .enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer,
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
> index 6db1f88509af..7584624b641c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
> @@ -31,6 +31,6 @@ void gfx_v9_4_2_init_golden_registers(struct
> amdgpu_device *adev, void gfx_v9_4_2_set_power_brake_sequence(struct
> amdgpu_device *adev); int gfx_v9_4_2_do_edc_gpr_workarounds(struct
> amdgpu_device *adev);
>
> -extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs;
> +extern struct amdgpu_gfx_ras gfx_v9_4_2_ras;
>
> #endif /* __GFX_V9_4_2_H__ */
> --
> 2.25.1
More information about the amd-gfx
mailing list