[PATCH] drm/amdgpu: Save nps to eeprom and refine code
Zhou1, Tao
Tao.Zhou1 at amd.com
Thu Feb 20 07:48:44 UTC 2025
[AMD Official Use Only - AMD Internal Distribution Only]
Please split the patch into at least three parts, one for bad page/record number calculation based on nps, one for nps saving and the third one for code refine of bad page add.
Please check my inline comments for other suggestions.
> -----Original Message-----
> From: Xie, Patrick <Gangliang.Xie at amd.com>
> Sent: Thursday, February 20, 2025 2:17 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Zhang, Hawking <Hawking.Zhang at amd.com>; Zhou1, Tao
> <Tao.Zhou1 at amd.com>; Xie, Patrick <Gangliang.Xie at amd.com>
> Subject: [PATCH] drm/amdgpu: Save nps to eeprom and refine code
>
> add nps info into eeprom records, and refine adding bad page logic based on nps
> info.
>
> Signed-off-by: ganglxie <ganglxie at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 244 +++++++++---------
> .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 25 +-
> .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h | 20 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 7 +
> 4 files changed, 153 insertions(+), 143 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 5420e2d6d244..3945dda54b3f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2801,20 +2801,101 @@ static int amdgpu_ras_mca2pa(struct amdgpu_device
> *adev,
> return -EINVAL;
> }
>
> +static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev,
> + struct eeprom_table_record *bps, int count) {
> + int j;
> + struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
> + struct ras_err_handler_data *data = con->eh_data;
> +
> + for (j = 0; j < count; j++) {
> + if (amdgpu_ras_check_bad_page_unlock(con,
> + bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT))
> + continue;
> +
> + if (!data->space_left &&
> + amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
> + return -ENOMEM;
> + }
> +
> + amdgpu_ras_reserve_page(adev, bps[j].retired_page);
> +
> + memcpy(&data->bps[data->count], &(bps[j]),
> + sizeof(struct eeprom_table_record));
> + data->count++;
> + data->space_left--;
> + }
> +
> + return 0;
> +}
> +
> +static int __amdgpu_ras_convert_unit_rec_from_rom(struct amdgpu_device *adev,
[Tao] for the name of unit_rec, how about rec_array?
> + struct eeprom_table_record *bps, struct ras_err_data
> *err_data,
> + enum amdgpu_memory_partition nps)
> +{
> + int i = 0;
> + int ret = 0;
> + enum amdgpu_memory_partition save_nps;
> +
> + save_nps = (bps[0].retired_page >> UMC_NPS_SHIFT) &
> UMC_NPS_MASK;
> +
> + for (i = 0; i < adev->umc.retire_unit; i++)
> + bps[i].retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
> +
> + if (save_nps) {
> + if (save_nps == nps) {
> + if (amdgpu_umc_pages_in_a_row(adev, err_data,
> + bps[0].retired_page <<
> AMDGPU_GPU_PAGE_SHIFT))
> + return -EINVAL;
> + } else {
> + if (amdgpu_ras_mca2pa_by_idx(adev, &bps[0], err_data))
> + return -EINVAL;
> + }
> + } else {
> + if (amdgpu_ras_mca2pa(adev, &bps[0], err_data)) {
> + if (nps == AMDGPU_NPS1_PARTITION_MODE)
> + memcpy(err_data->err_addr, bps,
> + sizeof(struct eeprom_table_record) * adev-
> >umc.retire_unit);
> + else
> + return -EOPNOTSUPP;
> + }
> + }
> +
> + return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr,
> +adev->umc.retire_unit); }
> +
> +static int __amdgpu_ras_convert_single_rec_from_rom(struct amdgpu_device
[Tao] we can name it as __amdgpu_ras_convert _rec_from_rom for short
> *adev,
> + struct eeprom_table_record *bps, struct ras_err_data
> *err_data,
> + enum amdgpu_memory_partition nps)
> +{
> + enum amdgpu_memory_partition save_nps;
> +
> + save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
> + bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
> +
> + if (save_nps == nps) {
> + if (amdgpu_umc_pages_in_a_row(adev, err_data,
> + bps->retired_page <<
> AMDGPU_GPU_PAGE_SHIFT))
> + return -EINVAL;
> + } else {
> + if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data))
> + return -EINVAL;
> + }
> + return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr,
> + adev-
> >umc.retire_unit);
> +}
> +
> /* it deal with vram only. */
> int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
> struct eeprom_table_record *bps, int pages, bool from_rom) {
> struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
> - struct ras_err_handler_data *data;
> struct ras_err_data err_data;
> - struct eeprom_table_record *err_rec;
> struct amdgpu_ras_eeprom_control *control =
> &adev->psp.ras_context.ras->eeprom_control;
> enum amdgpu_memory_partition nps =
> AMDGPU_NPS1_PARTITION_MODE;
> int ret = 0;
> - uint32_t i, j, loop_cnt = 1;
> - bool find_pages_per_pa = false;
> + uint32_t i;
>
> if (!con || !con->eh_data || !bps || pages <= 0)
> return 0;
> @@ -2825,108 +2906,41 @@ int amdgpu_ras_add_bad_pages(struct
> amdgpu_device *adev,
> sizeof(struct eeprom_table_record), GFP_KERNEL);
> if (!err_data.err_addr) {
> dev_warn(adev->dev, "Failed to alloc UMC error address
> record in mca2pa conversion!\n");
> - ret = -ENOMEM;
> - goto out;
> + return -ENOMEM;
> }
>
> - err_rec = err_data.err_addr;
> - loop_cnt = adev->umc.retire_unit;
> if (adev->gmc.gmc_funcs->query_mem_partition_mode)
> nps = adev->gmc.gmc_funcs-
> >query_mem_partition_mode(adev);
> }
>
> mutex_lock(&con->recovery_lock);
> - data = con->eh_data;
> - if (!data) {
> - /* Returning 0 as the absence of eh_data is acceptable */
> - goto free;
> - }
> -
> - for (i = 0; i < pages; i++) {
> - if (from_rom &&
> - control->rec_type == AMDGPU_RAS_EEPROM_REC_MCA) {
> - if (!find_pages_per_pa) {
> - if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i],
> &err_data)) {
> - if (!i && nps ==
> AMDGPU_NPS1_PARTITION_MODE) {
> - /* may use old RAS TA, use PA to find
> pages in
> - * one row
> - */
> - if
> (amdgpu_umc_pages_in_a_row(adev, &err_data,
> -
> bps[i].retired_page <<
> -
> AMDGPU_GPU_PAGE_SHIFT)) {
> - ret = -EINVAL;
> - goto free;
> - } else {
> - find_pages_per_pa = true;
> - }
> - } else {
> - /* unsupported cases */
> - ret = -EOPNOTSUPP;
> - goto free;
> - }
> - }
> - } else {
> - if (amdgpu_umc_pages_in_a_row(adev, &err_data,
> - bps[i].retired_page <<
> AMDGPU_GPU_PAGE_SHIFT)) {
> - ret = -EINVAL;
> - goto free;
> - }
> - }
> - } else {
> - if (from_rom && !find_pages_per_pa) {
> - if (bps[i].retired_page & UMC_CHANNEL_IDX_V2) {
> - /* bad page in any NPS mode in eeprom */
> - if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i],
> &err_data)) {
> - ret = -EINVAL;
> +
> + if (from_rom) {
> + for (i = 0; i < pages; i++) {
> + if (control->ras_num_recs - i >= adev->umc.retire_unit) {
> + if ((bps[i].address == bps[i + 1].address) &&
> + (bps[i].mem_channel == bps[i +
[Tao] here should use space instead of tab
> 1].mem_channel)) {
> + //deal retire_unit records a time
[Tao] deal with
> + ret =
> __amdgpu_ras_convert_unit_rec_from_rom(adev,
> + &bps[i],
> &err_data, nps);
> + if (ret)
> goto free;
> - }
> + i += (adev->umc.retire_unit - 1);
> } else {
> - /* legacy bad page in eeprom, generated only
> in
> - * NPS1 mode
> - */
> - if (amdgpu_ras_mca2pa(adev, &bps[i],
> &err_data)) {
> - /* old RAS TA or ASICs which don't
> support to
> - * convert addrss via mca address
> - */
> - if (!i && nps ==
> AMDGPU_NPS1_PARTITION_MODE) {
> - find_pages_per_pa = true;
> - err_rec = &bps[i];
> - loop_cnt = 1;
> - } else {
> - /* non-nps1 mode, old RAS TA
> - * can't support it
> - */
> - ret = -EOPNOTSUPP;
> - goto free;
> - }
> - }
> + break;
> }
> -
> - if (!find_pages_per_pa)
> - i += (adev->umc.retire_unit - 1);
> } else {
> - err_rec = &bps[i];
> + break;
> }
> }
> -
> - for (j = 0; j < loop_cnt; j++) {
> - if (amdgpu_ras_check_bad_page_unlock(con,
> - err_rec[j].retired_page <<
> AMDGPU_GPU_PAGE_SHIFT))
> - continue;
> -
> - if (!data->space_left &&
> - amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
> - ret = -ENOMEM;
> + for (; i < pages; i++) {
> + ret = __amdgpu_ras_convert_single_rec_from_rom(adev,
> + &bps[i], &err_data, nps);
> + if (ret)
> goto free;
> - }
> -
> - amdgpu_ras_reserve_page(adev, err_rec[j].retired_page);
> -
> - memcpy(&data->bps[data->count], &(err_rec[j]),
> - sizeof(struct eeprom_table_record));
> - data->count++;
> - data->space_left--;
> }
> + } else {
> + ret = __amdgpu_ras_restore_bad_pages(adev, bps, pages);
> }
>
> free:
> @@ -2971,24 +2985,14 @@ int amdgpu_ras_save_bad_pages(struct
> amdgpu_device *adev,
>
> /* only new entries are saved */
> if (save_count > 0) {
> - if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) {
> + for (i = 0; i < unit_num; i++) {
> if (amdgpu_ras_eeprom_append(control,
> - &data->bps[control->ras_num_recs],
> - save_count)) {
> + &data->bps[bad_page_num + i * adev-
> >umc.retire_unit],
> + 1)) {
> dev_err(adev->dev, "Failed to save EEPROM table
> data!");
> return -EIO;
> }
> - } else {
> - for (i = 0; i < unit_num; i++) {
> - if (amdgpu_ras_eeprom_append(control,
> - &data->bps[bad_page_num + i * adev-
> >umc.retire_unit],
> - 1)) {
> - dev_err(adev->dev, "Failed to save EEPROM
> table data!");
> - return -EIO;
> - }
> - }
> }
> -
> dev_info(adev->dev, "Saved %d pages to EEPROM table.\n",
> save_count);
> }
>
> @@ -3005,6 +3009,7 @@ static int amdgpu_ras_load_bad_pages(struct
> amdgpu_device *adev)
> &adev->psp.ras_context.ras->eeprom_control;
> struct eeprom_table_record *bps;
> int ret;
> + int i = 0;
[Tao] I prefer to "int ret, i = 0;"
>
> /* no bad page record, skip eeprom access */
> if (control->ras_num_recs == 0 || amdgpu_bad_page_threshold == 0) @@ -
> 3018,13 +3023,23 @@ static int amdgpu_ras_load_bad_pages(struct
> amdgpu_device *adev)
> if (ret) {
> dev_err(adev->dev, "Failed to load EEPROM table records!");
> } else {
> - if (control->ras_num_recs > 1 &&
> - adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
> - if ((bps[0].address == bps[1].address) &&
> - (bps[0].mem_channel == bps[1].mem_channel))
> - control->rec_type =
> AMDGPU_RAS_EEPROM_REC_PA;
> - else
> - control->rec_type =
> AMDGPU_RAS_EEPROM_REC_MCA;
> + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
> + for (i = 0; i < control->ras_num_recs; i++) {
> + if ((control->ras_num_recs - i) >= adev-
> >umc.retire_unit) {
> + if ((bps[i].address == bps[i + 1].address) &&
> + (bps[i].mem_channel == bps[i +
> 1].mem_channel)) {
> + control->ras_num_pa_recs += adev-
> >umc.retire_unit;
> + i += (adev->umc.retire_unit - 1);
> + } else {
> + control->ras_num_mca_recs +=
> + (control-
> >ras_num_recs - i);
> + break;
> + }
> + } else {
> + control->ras_num_mca_recs += (control-
> >ras_num_recs - i);
> + break;
> + }
> + }
> }
>
> ret = amdgpu_ras_eeprom_check(control); @@ -3438,12 +3453,7
> @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev)
> return ret;
>
> if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr)
> - control->rec_type = AMDGPU_RAS_EEPROM_REC_PA;
> -
> - /* default status is MCA storage */
> - if (control->ras_num_recs <= 1 &&
> - adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
> - control->rec_type = AMDGPU_RAS_EEPROM_REC_MCA;
> + control->ras_num_pa_recs = control->ras_num_recs;
>
> if (control->ras_num_recs) {
> ret = amdgpu_ras_load_bad_pages(adev); diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> index 83b54efcaa87..ab27cecb5519 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> @@ -727,11 +727,9 @@ amdgpu_ras_eeprom_append_table(struct
> amdgpu_ras_eeprom_control *control,
> - control->ras_fri)
> % control->ras_max_record_count;
>
> - if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA)
> - control->ras_num_bad_pages = control->ras_num_recs;
> - else
> - control->ras_num_bad_pages =
> - control->ras_num_recs * adev->umc.retire_unit;
> + control->ras_num_mca_recs += num;
> + control->ras_num_bad_pages += num * adev->umc.retire_unit;
> +
> Out:
> kfree(buf);
> return res;
> @@ -852,6 +850,7 @@ int amdgpu_ras_eeprom_append(struct
> amdgpu_ras_eeprom_control *control, {
> struct amdgpu_device *adev = to_amdgpu_device(control);
> int res, i;
> + uint64_t nps = AMDGPU_NPS1_PARTITION_MODE;
>
> if (!__is_ras_eeprom_supported(adev))
> return 0;
> @@ -865,9 +864,12 @@ int amdgpu_ras_eeprom_append(struct
> amdgpu_ras_eeprom_control *control,
> return -EINVAL;
> }
>
> + if (adev->gmc.gmc_funcs->query_mem_partition_mode)
> + nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
> +
> /* set the new channel index flag */
> for (i = 0; i < num; i++)
> - record[i].retired_page |= UMC_CHANNEL_IDX_V2;
> + record[i].retired_page |= (nps << UMC_NPS_SHIFT);
>
> mutex_lock(&control->ras_tbl_mutex);
>
> @@ -881,7 +883,7 @@ int amdgpu_ras_eeprom_append(struct
> amdgpu_ras_eeprom_control *control,
>
> /* clear channel index flag, the flag is only saved on eeprom */
> for (i = 0; i < num; i++)
> - record[i].retired_page &= ~UMC_CHANNEL_IDX_V2;
> + record[i].retired_page &= ~(nps << UMC_NPS_SHIFT);
>
> return res;
> }
> @@ -1392,6 +1394,8 @@ int amdgpu_ras_eeprom_init(struct
> amdgpu_ras_eeprom_control *control)
> }
> control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
>
> + control->ras_num_mca_recs = 0;
> + control->ras_num_pa_recs = 0;
> return 0;
> }
>
> @@ -1412,11 +1416,8 @@ int amdgpu_ras_eeprom_check(struct
> amdgpu_ras_eeprom_control *control)
> if (!__get_eeprom_i2c_addr(adev, control))
> return -EINVAL;
>
> - if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA)
> - control->ras_num_bad_pages = control->ras_num_recs;
> - else
> - control->ras_num_bad_pages =
> - control->ras_num_recs * adev->umc.retire_unit;
> + control->ras_num_bad_pages = control->ras_num_pa_recs +
> + control->ras_num_mca_recs * adev->umc.retire_unit;
>
> if (hdr->header == RAS_TABLE_HDR_VAL) {
> DRM_DEBUG_DRIVER("Found existing EEPROM table with %d
> records", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
> index 81d55cb7b397..13f7eda9a696 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
> @@ -43,19 +43,6 @@ enum amdgpu_ras_eeprom_err_type {
> AMDGPU_RAS_EEPROM_ERR_COUNT,
> };
>
> -/*
> - * one UMC MCA address could map to multiply physical address (PA),
> - * such as 1:16, we use eeprom_table_record.address to store MCA
> - * address and use eeprom_table_record.retired_page to save PA.
> - *
> - * AMDGPU_RAS_EEPROM_REC_PA: one record store one PA
> - * AMDGPU_RAS_EEPROM_REC_MCA: one record store one MCA address
> - */
> -enum amdgpu_ras_eeprom_rec_type {
> - AMDGPU_RAS_EEPROM_REC_PA,
> - AMDGPU_RAS_EEPROM_REC_MCA,
> -};
> -
> struct amdgpu_ras_eeprom_table_header {
> uint32_t header;
> uint32_t version;
> @@ -100,6 +87,12 @@ struct amdgpu_ras_eeprom_control {
> */
> u32 ras_num_bad_pages;
>
> + /* Number of records store mca address */
> + u32 ras_num_mca_recs;
> +
> + /* Number of records store physical address */
> + u32 ras_num_pa_recs;
> +
> /* First record index to read, 0-based.
> * Range is [0, num_recs-1]. This is
> * an absolute index, starting right after @@ -120,7 +113,6 @@ struct
> amdgpu_ras_eeprom_control {
> /* Record channel info which occurred bad pages
> */
> u32 bad_channel_bitmap;
> - enum amdgpu_ras_eeprom_rec_type rec_type;
> };
>
> /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> index a4a7e61817aa..857693bcd8d4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> @@ -71,6 +71,13 @@
> */
> #define UMC_CHANNEL_IDX_V2 BIT_ULL(47)
>
> +/*
> + * save nps value to eeprom_table_record.retired_page[47:40],
> + * the channel index flag above will be retired.
> + */
> +#define UMC_NPS_SHIFT 40
> +#define UMC_NPS_MASK 0xffULL
> +
> typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
> uint32_t umc_inst, uint32_t ch_inst, void *data);
>
> --
> 2.34.1
More information about the amd-gfx
mailing list