[PATCH 3/4] drm/amdgpu: Read IPMI data for product information

Luben Tuikov luben.tuikov at amd.com
Wed Nov 16 23:42:30 UTC 2022


It is actually reading the IPMI data as it should. Previously to this, it was
just assuming where the data would be, it's size, and so on. In talking
to some engineers internally, we concluded that it shouldn't do that, and it
should instead follow the IPMI spec to read the data--in the same way
that data was written there--folloing the IPMI spec.

Regards,
Luben

On 2022-11-16 15:58, Alex Deucher wrote:
> On Wed, Nov 16, 2022 at 2:49 PM Luben Tuikov <luben.tuikov at amd.com> wrote:
>>
>> Read and interpret IPMI data to get the product name, product model, and
>> product serial number.
> 
> Patches 1,2,4 are:
> Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
> for patch 3:
> It's not clear from the commit message what this change is doing.  Is
> this just a rewrite/cleanup of the existing FRU parsing or is there a
> bug fix in here related to the rest of this series?
> 
> Alex
> 
>>
>> Cc: Alex Deucher <Alexander.Deucher at amd.com>
>> Cc: Kent Russell <kent.russell at amd.com>
>> Signed-off-by: Luben Tuikov <luben.tuikov at amd.com>
>> Tested-by: Kent Russell <kent.russell at amd.com>
>> ---
>>  .../gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c    | 183 ++++++++----------
>>  1 file changed, 85 insertions(+), 98 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
>> index 9b2ff386b7c4f8..2c38ac7bc643d5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
>> @@ -94,39 +94,12 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
>>         }
>>  }
>>
>> -static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr,
>> -                                 unsigned char *buf, size_t buf_size)
>> -{
>> -       int ret;
>> -       u8 size;
>> -
>> -       ret = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addrptr, buf, 1);
>> -       if (ret < 1) {
>> -               DRM_WARN("FRU: Failed to get size field");
>> -               return ret;
>> -       }
>> -
>> -       /* The size returned by the i2c requires subtraction of 0xC0 since the
>> -        * size apparently always reports as 0xC0+actual size.
>> -        */
>> -       size = buf[0] & 0x3F;
>> -       size = min_t(size_t, size, buf_size);
>> -
>> -       ret = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addrptr + 1,
>> -                                buf, size);
>> -       if (ret < 1) {
>> -               DRM_WARN("FRU: Failed to get data field");
>> -               return ret;
>> -       }
>> -
>> -       return size;
>> -}
>> -
>>  int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
>>  {
>> -       unsigned char buf[AMDGPU_PRODUCT_NAME_LEN];
>> -       u32 addrptr, fru_addr;
>> +       unsigned char buf[8], *pia;
>> +       u32 addr, fru_addr;
>>         int size, len;
>> +       u8 csum;
>>
>>         if (!is_fru_eeprom_supported(adev, &fru_addr))
>>                 return 0;
>> @@ -137,88 +110,102 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
>>                 return -ENODEV;
>>         }
>>
>> -       /* There's a lot of repetition here. This is due to the FRU having
>> -        * variable-length fields. To get the information, we have to find the
>> -        * size of each field, and then keep reading along and reading along
>> -        * until we get all of the data that we want. We use addrptr to track
>> -        * the address as we go
>> -        */
>> -
>> -       /* The first fields are all of size 1-byte, from 0-7 are offsets that
>> -        * contain information that isn't useful to us.
>> -        * Bytes 8-a are all 1-byte and refer to the size of the entire struct,
>> -        * and the language field, so just start from 0xb, manufacturer size
>> -        */
>> -       addrptr = fru_addr + 0xb;
>> -       size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf));
>> -       if (size < 1) {
>> -               DRM_ERROR("Failed to read FRU Manufacturer, ret:%d", size);
>> -               return -EINVAL;
>> +       /* Read the IPMI Common header */
>> +       len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, fru_addr, buf,
>> +                                sizeof(buf));
>> +       if (len != 8) {
>> +               DRM_ERROR("Couldn't read the IPMI Common Header: %d", len);
>> +               return len < 0 ? len : -EIO;
>>         }
>>
>> -       /* Increment the addrptr by the size of the field, and 1 due to the
>> -        * size field being 1 byte. This pattern continues below.
>> -        */
>> -       addrptr += size + 1;
>> -       size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf));
>> -       if (size < 1) {
>> -               DRM_ERROR("Failed to read FRU product name, ret:%d", size);
>> -               return -EINVAL;
>> +       if (buf[0] != 1) {
>> +               DRM_ERROR("Bad IPMI Common Header version: 0x%02x", buf[0]);
>> +               return -EIO;
>>         }
>>
>> -       len = size;
>> -       if (len >= AMDGPU_PRODUCT_NAME_LEN) {
>> -               DRM_WARN("FRU Product Name is larger than %d characters. This is likely a mistake",
>> -                               AMDGPU_PRODUCT_NAME_LEN);
>> -               len = AMDGPU_PRODUCT_NAME_LEN - 1;
>> -       }
>> -       memcpy(adev->product_name, buf, len);
>> -       adev->product_name[len] = '\0';
>> -
>> -       addrptr += size + 1;
>> -       size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf));
>> -       if (size < 1) {
>> -               DRM_ERROR("Failed to read FRU product number, ret:%d", size);
>> -               return -EINVAL;
>> +       for (csum = 0; len > 0; len--)
>> +               csum += buf[len - 1];
>> +       if (csum) {
>> +               DRM_ERROR("Bad IPMI Common Header checksum: 0x%02x", csum);
>> +               return -EIO;
>>         }
>>
>> -       len = size;
>> -       /* Product number should only be 16 characters. Any more,
>> -        * and something could be wrong. Cap it at 16 to be safe
>> -        */
>> -       if (len >= sizeof(adev->product_number)) {
>> -               DRM_WARN("FRU Product Number is larger than 16 characters. This is likely a mistake");
>> -               len = sizeof(adev->product_number) - 1;
>> -       }
>> -       memcpy(adev->product_number, buf, len);
>> -       adev->product_number[len] = '\0';
>> +       /* Get the offset to the Product Info Area (PIA). */
>> +       addr = buf[4] * 8;
>> +       if (!addr)
>> +               return 0;
>>
>> -       addrptr += size + 1;
>> -       size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf));
>> +       /* Get the absolute address to the PIA. */
>> +       addr += fru_addr;
>>
>> -       if (size < 1) {
>> -               DRM_ERROR("Failed to read FRU product version, ret:%d", size);
>> -               return -EINVAL;
>> +       /* Read the header of the PIA. */
>> +       len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, buf, 3);
>> +       if (len != 3) {
>> +               DRM_ERROR("Couldn't read the Product Info Area header: %d", len);
>> +               return len < 0 ? len : -EIO;
>>         }
>>
>> -       addrptr += size + 1;
>> -       size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf));
>> +       if (buf[0] != 1) {
>> +               DRM_ERROR("Bad IPMI Product Info Area version: 0x%02x", buf[0]);
>> +               return -EIO;
>> +       }
>>
>> -       if (size < 1) {
>> -               DRM_ERROR("Failed to read FRU serial number, ret:%d", size);
>> -               return -EINVAL;
>> +       size = buf[1] * 8;
>> +       pia = kzalloc(size, GFP_KERNEL);
>> +       if (!pia)
>> +               return -ENOMEM;
>> +
>> +       /* Read the whole PIA. */
>> +       len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, pia, size);
>> +       if (len != size) {
>> +               kfree(pia);
>> +               DRM_ERROR("Couldn't read the Product Info Area: %d", len);
>> +               return len < 0 ? len : -EIO;
>>         }
>>
>> -       len = size;
>> -       /* Serial number should only be 16 characters. Any more,
>> -        * and something could be wrong. Cap it at 16 to be safe
>> -        */
>> -       if (len >= sizeof(adev->serial)) {
>> -               DRM_WARN("FRU Serial Number is larger than 16 characters. This is likely a mistake");
>> -               len = sizeof(adev->serial) - 1;
>> +       for (csum = 0; size > 0; size--)
>> +               csum += pia[size - 1];
>> +       if (csum) {
>> +               DRM_ERROR("Bad Product Info Area checksum: 0x%02x", csum);
>> +               return -EIO;
>>         }
>> -       memcpy(adev->serial, buf, len);
>> -       adev->serial[len] = '\0';
>>
>> +       /* Now extract useful information from the PIA.
>> +        *
>> +        * Skip the Manufacturer Name at [3] and go directly to
>> +        * the Product Name field.
>> +        */
>> +       addr = 3 + 1 + (pia[3] & 0x3F);
>> +       if (addr + 1 >= len)
>> +               goto Out;
>> +       memcpy(adev->product_name, pia + addr + 1,
>> +              min_t(size_t,
>> +                    sizeof(adev->product_name),
>> +                    pia[addr] & 0x3F));
>> +       adev->product_name[sizeof(adev->product_name) - 1] = '\0';
>> +
>> +       /* Go to the Product Part/Model Number field. */
>> +       addr += 1 + (pia[addr] & 0x3F);
>> +       if (addr + 1 >= len)
>> +               goto Out;
>> +       memcpy(adev->product_number, pia + addr + 1,
>> +              min_t(size_t,
>> +                    sizeof(adev->product_number),
>> +                    pia[addr] & 0x3F));
>> +       adev->product_number[sizeof(adev->product_number) - 1] = '\0';
>> +
>> +       /* Go to the Product Version field. */
>> +       addr += 1 + (pia[addr] & 0x3F);
>> +
>> +       /* Go to the Product Serial Number field. */
>> +       addr += 1 + (pia[addr] & 0x3F);
>> +       if (addr + 1 >= len)
>> +               goto Out;
>> +       memcpy(adev->serial, pia + addr + 1, min_t(size_t,
>> +                                                  sizeof(adev->serial),
>> +                                                  pia[addr] & 0x3F));
>> +       adev->serial[sizeof(adev->serial) - 1] = '\0';
>> +Out:
>> +       kfree(pia);
>>         return 0;
>>  }
>> --
>> 2.38.1
>>


More information about the amd-gfx mailing list