[PATCH 35/40] drm/amdgpu: Simplify RAS EEPROM checksum calculations

Alex Deucher alexdeucher at gmail.com
Fri Jun 11 17:07:18 UTC 2021


On Tue, Jun 8, 2021 at 5:41 PM Luben Tuikov <luben.tuikov at amd.com> wrote:
>
> Rename update_table_header() to
> write_table_header() as this function is actually
> writing it to EEPROM.
>
> Use kernel types; use u8 to carry around the
> checksum, in order to take advantage of arithmetic
> modulo 8-bits (256).
>
> Tidy up to 80 columns.
>
> When updating the checksum, just recalculate the
> whole thing.
>
> Cc: Alexander Deucher <Alexander.Deucher at amd.com>
> Cc: Andrey Grodzovsky <Andrey.Grodzovsky at amd.com>
> Signed-off-by: Luben Tuikov <luben.tuikov at amd.com>

Acked-by: Alex Deucher <alexander.deucher at amd.com>

> ---
>  .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c    | 98 +++++++++----------
>  .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h    |  2 +-
>  2 files changed, 50 insertions(+), 50 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> index 7d0f9e1e62dc4f..54ef31594accd9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> @@ -141,8 +141,8 @@ static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_heade
>         hdr->checksum         = le32_to_cpu(pp[4]);
>  }
>
> -static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
> -                                unsigned char *buff)
> +static int __write_table_header(struct amdgpu_ras_eeprom_control *control,
> +                               unsigned char *buff)
>  {
>         int ret = 0;
>         struct amdgpu_device *adev = to_amdgpu_device(control);
> @@ -162,69 +162,74 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
>         return ret;
>  }
>
> -static uint32_t  __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control)
> +static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control)
>  {
>         int i;
> -       uint32_t tbl_sum = 0;
> +       u8 hdr_sum = 0;
> +       u8  *p;
> +       size_t sz;
>
>         /* Header checksum, skip checksum field in the calculation */
> -       for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++)
> -               tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i);
> +       sz = sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum);
> +       p = (u8 *) &control->tbl_hdr;
> +       for (i = 0; i < sz; i++, p++)
> +               hdr_sum += *p;
>
> -       return tbl_sum;
> +       return hdr_sum;
>  }
>
> -static uint32_t  __calc_recs_byte_sum(struct eeprom_table_record *records,
> -                                     int num)
> +static u8 __calc_recs_byte_sum(const struct eeprom_table_record *record,
> +                              const int num)
>  {
>         int i, j;
> -       uint32_t tbl_sum = 0;
> +       u8  tbl_sum = 0;
> +
> +       if (!record)
> +               return 0;
>
>         /* Records checksum */
>         for (i = 0; i < num; i++) {
> -               struct eeprom_table_record *record = &records[i];
> +               u8 *p = (u8 *) &record[i];
>
> -               for (j = 0; j < sizeof(*record); j++) {
> -                       tbl_sum += *(((unsigned char *)record) + j);
> -               }
> +               for (j = 0; j < sizeof(*record); j++, p++)
> +                       tbl_sum += *p;
>         }
>
>         return tbl_sum;
>  }
>
> -static inline uint32_t  __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control,
> -                                 struct eeprom_table_record *records, int num)
> +static inline u8
> +__calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control,
> +                   struct eeprom_table_record *records, int num)
>  {
> -       return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num);
> +       return __calc_hdr_byte_sum(control) +
> +               __calc_recs_byte_sum(records, num);
>  }
>
> -/* Checksum = 256 -((sum of all table entries) mod 256) */
>  static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
> -                                 struct eeprom_table_record *records, int num,
> -                                 uint32_t old_hdr_byte_sum)
> +                                 struct eeprom_table_record *records, int num)
>  {
> -       /*
> -        * This will update the table sum with new records.
> -        *
> -        * TODO: What happens when the EEPROM table is to be wrapped around
> -        * and old records from start will get overridden.
> -        */
> -
> -       /* need to recalculate updated header byte sum */
> -       control->tbl_byte_sum -= old_hdr_byte_sum;
> -       control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num);
> +       u8 v;
>
> -       control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256);
> +       control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num);
> +       /* Avoid 32-bit sign extension. */
> +       v = -control->tbl_byte_sum;
> +       control->tbl_hdr.checksum = v;
>  }
>
> -/* table sum mod 256 + checksum must equals 256 */
> -static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
> -                           struct eeprom_table_record *records, int num)
> +static bool __verify_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
> +                                 struct eeprom_table_record *records,
> +                                 int num)
>  {
> +       u8 result;
> +
>         control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num);
>
> -       if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) {
> -               DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum);
> +       result = (u8)control->tbl_hdr.checksum + control->tbl_byte_sum;
> +       if (result) {
> +               DRM_WARN("RAS table checksum mismatch: stored:0x%02X wants:0x%02hhX",
> +                        control->tbl_hdr.checksum,
> +                        -control->tbl_byte_sum);
>                 return false;
>         }
>
> @@ -232,8 +237,8 @@ static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
>  }
>
>  static int amdgpu_ras_eeprom_correct_header_tag(
> -                               struct amdgpu_ras_eeprom_control *control,
> -                               uint32_t header)
> +       struct amdgpu_ras_eeprom_control *control,
> +       uint32_t header)
>  {
>         unsigned char buff[RAS_TABLE_HEADER_SIZE];
>         struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
> @@ -243,7 +248,7 @@ static int amdgpu_ras_eeprom_correct_header_tag(
>
>         mutex_lock(&control->tbl_mutex);
>         hdr->header = header;
> -       ret = __update_table_header(control, buff);
> +       ret = __write_table_header(control, buff);
>         mutex_unlock(&control->tbl_mutex);
>
>         return ret;
> @@ -262,11 +267,9 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
>         hdr->first_rec_offset = RAS_RECORD_START;
>         hdr->tbl_size = RAS_TABLE_HEADER_SIZE;
>
> -       control->tbl_byte_sum = 0;
> -       __update_tbl_checksum(control, NULL, 0, 0);
> +       __update_tbl_checksum(control, NULL, 0);
>         control->next_addr = RAS_RECORD_START;
> -
> -       ret = __update_table_header(control, buff);
> +       ret = __write_table_header(control, buff);
>
>         mutex_unlock(&control->tbl_mutex);
>
> @@ -521,8 +524,6 @@ static int amdgpu_ras_eeprom_xfer(struct amdgpu_ras_eeprom_control *control,
>         }
>
>         if (write) {
> -               uint32_t old_hdr_byte_sum = __calc_hdr_byte_sum(control);
> -
>                 /*
>                  * Update table header with size and CRC and account for table
>                  * wrap around where the assumption is that we treat it as empty
> @@ -537,10 +538,9 @@ static int amdgpu_ras_eeprom_xfer(struct amdgpu_ras_eeprom_control *control,
>                         control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
>                         control->num_recs * RAS_TABLE_RECORD_SIZE;
>
> -               __update_tbl_checksum(control, records, num, old_hdr_byte_sum);
> -
> -               __update_table_header(control, buffs);
> -       } else if (!__validate_tbl_checksum(control, records, num)) {
> +               __update_tbl_checksum(control, records, num);
> +               __write_table_header(control, buffs);
> +       } else if (!__verify_tbl_checksum(control, records, num)) {
>                 DRM_WARN("EEPROM Table checksum mismatch!");
>                 /* TODO Uncomment when EEPROM read/write is relliable */
>                 /* ret = -EIO; */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
> index fa9c509a8e2f2b..4906ed9fb8cdd3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
> @@ -48,7 +48,7 @@ struct amdgpu_ras_eeprom_control {
>         uint32_t next_addr;
>         unsigned int num_recs;
>         struct mutex tbl_mutex;
> -       uint32_t tbl_byte_sum;
> +       u8 tbl_byte_sum;
>  };
>
>  /*
> --
> 2.32.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list