[PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10
Zhou1, Tao
Tao.Zhou1 at amd.com
Mon Apr 3 03:44:52 UTC 2023
[AMD Official Use Only - General]
> -----Original Message-----
> From: Chai, Thomas <YiPeng.Chai at amd.com>
> Sent: Monday, April 3, 2023 9:59 AM
> To: amd-gfx at lists.freedesktop.org
> Cc: Chai, Thomas <YiPeng.Chai at amd.com>; Zhang, Hawking
> <Hawking.Zhang at amd.com>; Zhou1, Tao <Tao.Zhou1 at amd.com>; Li, Candice
> <Candice.Li at amd.com>; Yang, Stanley <Stanley.Yang at amd.com>; Chai,
> Thomas <YiPeng.Chai at amd.com>
> Subject: [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10
>
> Optimize redundant code in umc_v8_10
>
> Signed-off-by: YiPeng Chai <YiPeng.Chai at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 31 ++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 7 +
> drivers/gpu/drm/amd/amdgpu/umc_v8_10.c | 197 +++++++++---------------
> 3 files changed, 115 insertions(+), 120 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> index 9e2e97207e53..734442315cf6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> @@ -302,3 +302,34 @@ void amdgpu_umc_fill_error_record(struct
> ras_err_data *err_data,
>
> err_data->err_addr_cnt++;
> }
> +
> +int amdgpu_umc_scan_all_umc_channels(struct amdgpu_device *adev,
> + umc_func func, void *data)
> +{
> + uint32_t node_inst = 0;
> + uint32_t umc_inst = 0;
> + uint32_t ch_inst = 0;
> + int ret = 0;
> +
> + if (adev->umc.node_inst_num) {
> + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst,
> ch_inst) {
> + ret = func(adev, node_inst, umc_inst, ch_inst, data);
> + if (ret) {
> + dev_err(adev->dev, "Node %d umc %d ch %d
> func returns %d\n",
> + node_inst, umc_inst, ch_inst, ret);
> + return ret;
> + }
> + }
> + } else {
> + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
[Tao] for ASIC which doesn't support node, can we set its node_inst_num to 1 and retire the macro LOOP_UMC_INST_AND_CH?
> + ret = func(adev, 0, umc_inst, ch_inst, data);
> + if (ret) {
> + dev_err(adev->dev, "Umc %d ch %d func
> returns %d\n",
> + umc_inst, ch_inst, ret);
> + return ret;
> + }
> + }
> + }
> +
> + return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> index d7f1229ff11f..f279c8057f96 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> @@ -47,6 +47,10 @@
> #define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> \
> LOOP_UMC_NODE_INST((node_inst))
> LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst))
>
> +
> +typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
> + uint32_t umc_inst, uint32_t ch_inst, void *data);
> +
> struct amdgpu_umc_ras {
> struct amdgpu_ras_block_object ras_block;
> void (*err_cnt_init)(struct amdgpu_device *adev); @@ -104,4 +108,7
> @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
> struct amdgpu_iv_entry *entry);
> int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
> uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst);
> +
> +int amdgpu_umc_scan_all_umc_channels(struct amdgpu_device *adev,
> + umc_func func, void *data);
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> index fb55e8cb9967..6dff313ac04c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> @@ -76,10 +76,13 @@ static inline uint32_t get_umc_v8_10_reg_offset(struct
> amdgpu_device *adev,
> UMC_8_NODE_DIST * node_inst;
> }
>
> -static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device
> *adev,
> - uint32_t umc_reg_offset)
> +static int umc_v8_10_clear_error_count_per_channel(struct amdgpu_device
> *adev,
> + uint32_t node_inst, uint32_t umc_inst,
> + uint32_t ch_inst, void *data)
> {
> uint32_t ecc_err_cnt_addr;
> + uint32_t umc_reg_offset =
> + get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
>
> ecc_err_cnt_addr =
> SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); @@
> -87,24 +90,14 @@ static void
> umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev,
> /* clear error count */
> WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
> UMC_V8_10_CE_CNT_INIT);
> +
> + return 0;
> }
>
> static void umc_v8_10_clear_error_count(struct amdgpu_device *adev) {
> - uint32_t node_inst = 0;
> - uint32_t umc_inst = 0;
> - uint32_t ch_inst = 0;
> - uint32_t umc_reg_offset = 0;
> -
> - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> - umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> - node_inst,
> - umc_inst,
> - ch_inst);
> -
> - umc_v8_10_clear_error_count_per_channel(adev,
> - umc_reg_offset);
> - }
> + amdgpu_umc_scan_all_umc_channels(adev,
> + umc_v8_10_clear_error_count_per_channel, NULL);
> }
>
> static void umc_v8_10_query_correctable_error_count(struct amdgpu_device
> *adev, @@ -147,29 +140,29 @@ static void
> umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev
> *error_count += 1;
> }
>
> +static int umc_v8_10_query_ecc_error_count(struct amdgpu_device *adev,
> + uint32_t node_inst, uint32_t umc_inst,
> + uint32_t ch_inst, void *data)
> +{
> + struct ras_err_data *err_data = (struct ras_err_data *)data;
> + uint32_t umc_reg_offset =
> + get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> +
> + umc_v8_10_query_correctable_error_count(adev,
> + umc_reg_offset,
> + &(err_data->ce_count));
> + umc_v8_10_query_uncorrectable_error_count(adev,
> + umc_reg_offset,
> + &(err_data->ue_count));
> +
> + return 0;
> +}
> +
> static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev,
> void *ras_error_status)
> {
> - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> -
> - uint32_t node_inst = 0;
> - uint32_t umc_inst = 0;
> - uint32_t ch_inst = 0;
> - uint32_t umc_reg_offset = 0;
> -
> - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> - umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> - node_inst,
> - umc_inst,
> - ch_inst);
> -
> - umc_v8_10_query_correctable_error_count(adev,
> - umc_reg_offset,
> - &(err_data->ce_count));
> - umc_v8_10_query_uncorrectable_error_count(adev,
> - umc_reg_offset,
> - &(err_data->ue_count));
> - }
> + amdgpu_umc_scan_all_umc_channels(adev,
> + umc_v8_10_query_ecc_error_count, ras_error_status);
>
> umc_v8_10_clear_error_count(adev);
> }
> @@ -248,28 +241,28 @@ static void umc_v8_10_convert_error_address(struct
> amdgpu_device *adev,
> }
> }
>
> -static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
> - struct ras_err_data *err_data,
> - uint32_t umc_reg_offset,
> - uint32_t node_inst,
> - uint32_t ch_inst,
> - uint32_t umc_inst)
> +static int umc_v8_10_query_error_address(struct amdgpu_device *adev,
> + uint32_t node_inst, uint32_t umc_inst,
> + uint32_t ch_inst, void *data)
> {
> uint64_t mc_umc_status_addr;
> uint64_t mc_umc_status, err_addr;
> uint64_t mc_umc_addrt0;
> + struct ras_err_data *err_data = (struct ras_err_data *)data;
> + uint32_t umc_reg_offset =
> + get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
>
> mc_umc_status_addr =
> SOC15_REG_OFFSET(UMC, 0,
> regMCA_UMC_UMC0_MCUMC_STATUST0);
> mc_umc_status = RREG64_PCIE((mc_umc_status_addr +
> umc_reg_offset) * 4);
>
> if (mc_umc_status == 0)
> - return;
> + return 0;
>
> if (!err_data->err_addr) {
> /* clear umc status */
> WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4,
> 0x0ULL);
> - return;
> + return 0;
> }
>
> /* calculate error address if ue error is detected */ @@ -287,37 +280,25
> @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
>
> /* clear umc status */
> WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
> +
> + return 0;
> }
>
> static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev,
> void *ras_error_status)
> {
> - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> - uint32_t node_inst = 0;
> - uint32_t umc_inst = 0;
> - uint32_t ch_inst = 0;
> - uint32_t umc_reg_offset = 0;
> -
> - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> - umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> - node_inst,
> - umc_inst,
> - ch_inst);
> -
> - umc_v8_10_query_error_address(adev,
> - err_data,
> - umc_reg_offset,
> - node_inst,
> - ch_inst,
> - umc_inst);
> - }
> + amdgpu_umc_scan_all_umc_channels(adev,
> + umc_v8_10_query_error_address, ras_error_status);
> }
>
> -static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
> - uint32_t umc_reg_offset)
> +static int umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
> + uint32_t node_inst, uint32_t umc_inst,
> + uint32_t ch_inst, void *data)
> {
> uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
> uint32_t ecc_err_cnt_addr;
> + uint32_t umc_reg_offset =
> + get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
>
> ecc_err_cnt_sel_addr =
> SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel);
> @@ -332,23 +313,14 @@ static void
> umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
> WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
> ecc_err_cnt_sel);
> /* set error count to initial value */
> WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
> UMC_V8_10_CE_CNT_INIT);
> +
> + return 0;
> }
>
> static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev) {
> - uint32_t node_inst = 0;
> - uint32_t umc_inst = 0;
> - uint32_t ch_inst = 0;
> - uint32_t umc_reg_offset = 0;
> -
> - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> - umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> - node_inst,
> - umc_inst,
> - ch_inst);
> -
> - umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset);
> - }
> + amdgpu_umc_scan_all_umc_channels(adev,
> + umc_v8_10_err_cnt_init_per_channel, NULL);
> }
>
> static bool umc_v8_10_query_ras_poison_mode(struct amdgpu_device *adev)
> @@ -406,37 +378,35 @@ static void
> umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_dev
> }
> }
>
> +static int umc_v8_10_ecc_info_query_ecc_error_count(struct amdgpu_device
> *adev,
> + uint32_t node_inst, uint32_t umc_inst,
> + uint32_t ch_inst, void *data)
> +{
> + struct ras_err_data *err_data = (struct ras_err_data *)data;
> +
> + umc_v8_10_ecc_info_query_correctable_error_count(adev,
> + node_inst, umc_inst, ch_inst,
> + &(err_data->ce_count));
> + umc_v8_10_ecc_info_query_uncorrectable_error_count(adev,
> + node_inst, umc_inst, ch_inst,
> + &(err_data->ue_count));
> + return 0;
> +}
> +
> static void umc_v8_10_ecc_info_query_ras_error_count(struct amdgpu_device
> *adev,
> void *ras_error_status)
> {
> - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> -
> - uint32_t node_inst = 0;
> - uint32_t umc_inst = 0;
> - uint32_t ch_inst = 0;
> -
> - /* TODO: driver needs to toggle DF Cstate to ensure
> - * safe access of UMC registers. Will add the protection
> - */
> - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> - umc_v8_10_ecc_info_query_correctable_error_count(adev,
> - node_inst, umc_inst,
> ch_inst,
> - &(err_data-
> >ce_count));
> - umc_v8_10_ecc_info_query_uncorrectable_error_count(adev,
> - node_inst, umc_inst,
> ch_inst,
> - &(err_data-
> >ue_count));
> - }
> + amdgpu_umc_scan_all_umc_channels(adev,
> + umc_v8_10_ecc_info_query_ecc_error_count,
> ras_error_status);
> }
>
> -static void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device
> *adev,
> - struct ras_err_data *err_data,
> - uint32_t ch_inst,
> - uint32_t umc_inst,
> - uint32_t node_inst)
> +static int umc_v8_10_ecc_info_query_error_address(struct amdgpu_device
> *adev,
> + uint32_t node_inst, uint32_t umc_inst,
> + uint32_t ch_inst, void *data)
> {
> uint32_t eccinfo_table_idx;
> uint64_t mc_umc_status, err_addr;
> -
> + struct ras_err_data *err_data = (struct ras_err_data *)data;
> struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
>
> eccinfo_table_idx = node_inst * adev->umc.umc_inst_num * @@ -
> 447,10 +417,10 @@ static void
> umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev,
> mc_umc_status = ras-
> >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
>
> if (mc_umc_status == 0)
> - return;
> + return 0;
>
> if (!err_data->err_addr)
> - return;
> + return 0;
>
> /* calculate error address if ue error is detected */
> if (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && @@ -463,28 +433,15 @@
> static void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device
> *adev,
> umc_v8_10_convert_error_address(adev, err_data, err_addr,
> ch_inst, umc_inst, node_inst,
> mc_umc_status);
> }
> +
> + return 0;
> }
>
> static void umc_v8_10_ecc_info_query_ras_error_address(struct
> amdgpu_device *adev,
> void *ras_error_status)
> {
> - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> -
> - uint32_t node_inst = 0;
> - uint32_t umc_inst = 0;
> - uint32_t ch_inst = 0;
> -
> - /* TODO: driver needs to toggle DF Cstate to ensure
> - * safe access of UMC resgisters. Will add the protection
> - * when firmware interface is ready
> - */
> - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> - umc_v8_10_ecc_info_query_error_address(adev,
> - err_data,
> - ch_inst,
> - umc_inst,
> - node_inst);
> - }
> + amdgpu_umc_scan_all_umc_channels(adev,
> + umc_v8_10_ecc_info_query_error_address, ras_error_status);
> }
>
> const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
> --
> 2.34.1
More information about the amd-gfx
mailing list