[PATCH 1/1] drm/amdgpu: Show IP discovery in sysfs
Luben Tuikov
luben.tuikov at amd.com
Wed Feb 9 19:30:31 UTC 2022
On 2022-02-09 14:21, Luben Tuikov wrote:
>
>
> On 2022-02-09 13:54, Alex Deucher wrote:
>> On Wed, Feb 9, 2022 at 11:30 AM Luben Tuikov <luben.tuikov at amd.com> wrote:
>>>
>>> Add IP discovery data in sysfs. The format is:
>>> /sys/class/drm/cardX/device/ip_discovery/die/D/B/I/<attrs>
>>> where,
>>> X is the card ID, an integer,
>>> D is the die ID, an integer,
>>> B is the IP HW ID, an integer, aka block type,
>>> I is the IP HW ID instance, an integer.
>>> <attrs> are the attributes of the block instance. At the moment these
>>> include HW ID, instance number, major, minor, revision, number of base
>>> addresses, and the base addresses themselves.
>>>
>>> A symbolic link of the acronym HW ID is also created, under D/, if you
>>> prefer to browse by something humanly accessible.
>>>
>>> Cc: Alex Deucher <Alexander.Deucher at amd.com>
>>> Cc: Tom StDenis <tom.stdenis at amd.com>
>>> Signed-off-by: Luben Tuikov <luben.tuikov at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 423 +++++++++++++++++-
>>> 2 files changed, 426 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index e4eb812ade2fa4..3a126dce8a2fe9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -772,6 +772,8 @@ struct amd_powerplay {
>>> const struct amd_pm_funcs *pp_funcs;
>>> };
>>>
>>> +struct ip_discovery_top;
>>> +
>>> /* polaris10 kickers */
>>> #define ASICID_IS_P20(did, rid) (((did == 0x67DF) && \
>>> ((rid == 0xE3) || \
>>> @@ -1097,6 +1099,8 @@ struct amdgpu_device {
>>> bool ram_is_direct_mapped;
>>>
>>> struct list_head ras_list;
>>> +
>>> + struct ip_discovery_top *ip_top;
>>> };
>>>
>>> static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
>>> index 07623634fdc2f1..427400ccc90662 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
>>> @@ -382,6 +382,425 @@ static int amdgpu_discovery_validate_ip(const struct ip *ip)
>>> return 0;
>>> }
>>>
>>> +/* ================================================== */
>>> +
>>> +struct ip_hw_instance {
>>> + struct kobject kobj; /* ip_discovery/die/#die/#hw_id/#instance/<attrs...> */
>>> +
>>> + int hw_id;
>>> + u8 num_instance;
>>> + u8 major, minor, revision;
>>> +
>>> + int num_base_addresses;
>>> + u32 base_addr[0];
>>> +};
>>> +
>>> +struct ip_hw_id {
>>> + struct kset hw_id_kset; /* ip_discovery/die/#die/#hw_id/ */
>>> + int hw_id;
>>> +};
>>> +
>>> +struct ip_die_entry {
>>> + struct kset ip_kset; /* ip_discovery/die/#die/ */
>>> + u16 num_ips;
>>> +};
>>> +
>>> +/* -------------------------------------------------- */
>>> +
>>> +struct ip_hw_instance_attr {
>>> + struct attribute attr;
>>> + ssize_t (*show)(struct ip_hw_instance *ip_hw_instance, char *buf);
>>> +};
>>> +
>>> +static ssize_t hw_id_show(struct ip_hw_instance *ip_hw_instance, char *buf)
>>> +{
>>> + return sprintf(buf, "%d\n", ip_hw_instance->hw_id);
>>> +}
>>> +
>>> +static ssize_t num_instance_show(struct ip_hw_instance *ip_hw_instance, char *buf)
>>> +{
>>> + return sprintf(buf, "%d\n", ip_hw_instance->num_instance);
>>> +}
>>> +
>>> +static ssize_t major_show(struct ip_hw_instance *ip_hw_instance, char *buf)
>>> +{
>>> + return sprintf(buf, "%d\n", ip_hw_instance->major);
>>> +}
>>> +
>>> +static ssize_t minor_show(struct ip_hw_instance *ip_hw_instance, char *buf)
>>> +{
>>> + return sprintf(buf, "%d\n", ip_hw_instance->minor);
>>> +}
>>> +
>>> +static ssize_t revision_show(struct ip_hw_instance *ip_hw_instance, char *buf)
>>> +{
>>> + return sprintf(buf, "%d\n", ip_hw_instance->revision);
>>> +}
>>> +
>>> +static ssize_t num_base_addresses_show(struct ip_hw_instance *ip_hw_instance, char *buf)
>>> +{
>>> + return sprintf(buf, "%d\n", ip_hw_instance->num_base_addresses);
>>> +}
>>> +
>>> +static ssize_t base_addr_show(struct ip_hw_instance *ip_hw_instance, char *buf)
>>> +{
>>> + ssize_t res = 0;
>>> + int ii;
>>> +
>>> + for (ii = 0; ii < ip_hw_instance->num_base_addresses; ii++) {
>>> + if (res + 12 >= PAGE_SIZE)
>>> + break;
>>> + res += sprintf(buf + res, "0x%08X\n", ip_hw_instance->base_addr[ii]);
>>> + }
>>> +
>>> + return res;
>>> +}
>>> +
>>> +static struct ip_hw_instance_attr ip_hw_attr[] = {
>>> + __ATTR_RO(hw_id),
>>> + __ATTR_RO(num_instance),
>>> + __ATTR_RO(major),
>>> + __ATTR_RO(minor),
>>> + __ATTR_RO(revision),
>>> + __ATTR_RO(num_base_addresses),
>>> + __ATTR_RO(base_addr),
>>> +};
>>> +
>>> +static struct attribute *ip_hw_instance_attrs[] = {
>>> + &ip_hw_attr[0].attr,
>>> + &ip_hw_attr[1].attr,
>>> + &ip_hw_attr[2].attr,
>>> + &ip_hw_attr[3].attr,
>>> + &ip_hw_attr[4].attr,
>>> + &ip_hw_attr[5].attr,
>>> + &ip_hw_attr[6].attr,
>>> + NULL,
>>> +};
>>> +ATTRIBUTE_GROUPS(ip_hw_instance);
>>> +
>>> +#define to_ip_hw_instance(x) container_of(x, struct ip_hw_instance, kobj)
>>> +#define to_ip_hw_instance_attr(x) container_of(x, struct ip_hw_instance_attr, attr)
>>> +
>>> +static ssize_t ip_hw_instance_attr_show(struct kobject *kobj,
>>> + struct attribute *attr,
>>> + char *buf)
>>> +{
>>> + struct ip_hw_instance *ip_hw_instance = to_ip_hw_instance(kobj);
>>> + struct ip_hw_instance_attr *ip_hw_attr = to_ip_hw_instance_attr(attr);
>>> +
>>> + if (!ip_hw_attr->show)
>>> + return -EIO;
>>> +
>>> + return ip_hw_attr->show(ip_hw_instance, buf);
>>> +}
>>> +
>>> +static const struct sysfs_ops ip_hw_instance_sysfs_ops = {
>>> + .show = ip_hw_instance_attr_show,
>>> +};
>>> +
>>> +static void ip_hw_instance_release(struct kobject *kobj)
>>> +{
>>> + struct ip_hw_instance *ip_hw_instance = to_ip_hw_instance(kobj);
>>> +
>>> + kfree(ip_hw_instance);
>>> +}
>>> +
>>> +static struct kobj_type ip_hw_instance_ktype = {
>>> + .release = ip_hw_instance_release,
>>> + .sysfs_ops = &ip_hw_instance_sysfs_ops,
>>> + .default_groups = ip_hw_instance_groups,
>>> +};
>>> +
>>> +/* -------------------------------------------------- */
>>> +
>>> +#define to_ip_hw_id(x) container_of(to_kset(x), struct ip_hw_id, hw_id_kset)
>>> +
>>> +static void ip_hw_id_release(struct kobject *kobj)
>>> +{
>>> + struct ip_hw_id *ip_hw_id = to_ip_hw_id(kobj);
>>> +
>>> + /* TODO: Check that the kset is empty. */
>>> + kfree(ip_hw_id);
>>> +}
>>> +
>>> +static struct kobj_type ip_hw_id_ktype = {
>>> + .release = ip_hw_id_release,
>>> + .sysfs_ops = &kobj_sysfs_ops,
>>> +};
>>> +
>>> +/* -------------------------------------------------- */
>>> +
>>> +static void die_kobj_release(struct kobject *kobj);
>>> +static void ip_disc_release(struct kobject *kobj);
>>> +
>>> +struct ip_die_entry_attribute {
>>> + struct attribute attr;
>>> + ssize_t (*show)(struct ip_die_entry *ip_die_entry, char *buf);
>>> +};
>>> +
>>> +#define to_ip_die_entry_attr(x) container_of(x, struct ip_die_entry_attribute, attr)
>>> +
>>> +static ssize_t num_ips_show(struct ip_die_entry *ip_die_entry, char *buf)
>>> +{
>>> + return sprintf(buf, "%d\n", ip_die_entry->num_ips);
>>> +}
>>> +
>>> +/* If there are more ip_die_entry attrs, other than the number of IPs,
>>> + * we can make this intro an array of attrs, and then initialize
>>> + * ip_die_entry_attrs in a loop.
>>> + */
>>> +static struct ip_die_entry_attribute num_ips_attr =
>>> + __ATTR_RO(num_ips);
>>> +
>>> +static struct attribute *ip_die_entry_attrs[] = {
>>> + &num_ips_attr.attr,
>>> + NULL,
>>> +};
>>> +ATTRIBUTE_GROUPS(ip_die_entry); /* ip_die_entry_groups */
>>> +
>>> +#define to_ip_die_entry(x) container_of(to_kset(x), struct ip_die_entry, ip_kset)
>>> +
>>> +static ssize_t ip_die_entry_attr_show(struct kobject *kobj,
>>> + struct attribute *attr,
>>> + char *buf)
>>> +{
>>> + struct ip_die_entry_attribute *ip_die_entry_attr = to_ip_die_entry_attr(attr);
>>> + struct ip_die_entry *ip_die_entry = to_ip_die_entry(kobj);
>>> +
>>> + if (!ip_die_entry_attr->show)
>>> + return -EIO;
>>> +
>>> + return ip_die_entry_attr->show(ip_die_entry, buf);
>>> +}
>>> +
>>> +static void ip_die_entry_release(struct kobject *kobj)
>>> +{
>>> + struct ip_die_entry *ip_die_entry = to_ip_die_entry(kobj);
>>> +
>>> + /* TODO: Check that the kset is empty. */
>>> + kfree(ip_die_entry);
>>> +}
>>> +
>>> +static const struct sysfs_ops ip_die_entry_sysfs_ops = {
>>> + .show = ip_die_entry_attr_show,
>>> +};
>>> +
>>> +static struct kobj_type ip_die_entry_ktype = {
>>> + .release = ip_die_entry_release,
>>> + .sysfs_ops = &ip_die_entry_sysfs_ops,
>>> + .default_groups = ip_die_entry_groups,
>>> +};
>>> +
>>> +static struct kobj_type die_kobj_ktype = {
>>> + .release = die_kobj_release,
>>> + .sysfs_ops = &kobj_sysfs_ops,
>>> +};
>>> +
>>> +static struct kobj_type ip_discovery_ktype = {
>>> + .release = ip_disc_release,
>>> + .sysfs_ops = &kobj_sysfs_ops,
>>> +};
>>> +
>>> +struct ip_discovery_top {
>>> + struct kobject kobj; /* ip_discovery/ */
>>> + struct kset die_kset; /* ip_discovery/die/ */
>>> + struct amdgpu_device *adev;
>>> +};
>>> +
>>> +static void die_kobj_release(struct kobject *kobj)
>>> +{
>>> + ;
>>> +}
>>> +
>>> +static void ip_disc_release(struct kobject *kobj)
>>> +{
>>> + struct ip_discovery_top *ip_top = container_of(kobj, struct ip_discovery_top,
>>> + kobj);
>>> + struct amdgpu_device *adev = ip_top->adev;
>>> +
>>> + /* TODO: Check that the kset is empty. */
>>> + adev->ip_top = NULL;
>>> + kfree(ip_top);
>>> +}
>>> +
>>> +static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
>>> + struct ip_die_entry *ip_die_entry,
>>> + const size_t _ip_offset, const int num_ips)
>>> +{
>>> + int ii, jj, kk, res;
>>> +
>>> + DRM_DEBUG("num_ips:%d", num_ips);
>>> +
>>> + /* Find all IPs of a given HW ID, and add their instance to
>>> + * #die/#hw_id/#instance/<attributes>
>>> + */
>>> + for (ii = 0; ii < HW_ID_MAX; ii++) {
>>> + struct ip_hw_id *ip_hw_id = NULL;
>>> + size_t ip_offset = _ip_offset;
>>> +
>>> + for (jj = 0; jj < num_ips; jj++) {
>>> + struct ip *ip;
>>> + struct ip_hw_instance *ip_hw_instance;
>>> +
>>> + ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
>>> + if (amdgpu_discovery_validate_ip(ip) ||
>>> + le16_to_cpu(ip->hw_id) != ii)
>>> + goto next_ip;
>>> +
>>> + DRM_DEBUG("match:%d @ ip_offset:%ld", ii, ip_offset);
>>> +
>>> + /* We have a hw_id match; register the hw
>>> + * block if not yet registered.
>>> + */
>>> + if (!ip_hw_id) {
>>> + ip_hw_id = kzalloc(sizeof(*ip_hw_id), GFP_KERNEL);
>>> + if (!ip_hw_id)
>>> + return -ENOMEM;
>>> + ip_hw_id->hw_id = ii;
>>> +
>>> + kobject_set_name(&ip_hw_id->hw_id_kset.kobj, "%d", ii);
>>> + ip_hw_id->hw_id_kset.kobj.kset = &ip_die_entry->ip_kset;
>>> + ip_hw_id->hw_id_kset.kobj.ktype = &ip_hw_id_ktype;
>>> + res = kset_register(&ip_hw_id->hw_id_kset);
>>> + if (res) {
>>> + DRM_ERROR("Couldn't register ip_hw_id kset");
>>> + kfree(ip_hw_id);
>>> + return res;
>>> + }
>>> + if (hw_id_names[ii]) {
>>> + res = sysfs_create_link(&ip_die_entry->ip_kset.kobj,
>>> + &ip_hw_id->hw_id_kset.kobj,
>>> + hw_id_names[ii]);
>>> + if (res) {
>>> + DRM_ERROR("Couldn't create IP link %s in IP Die:%s\n",
>>> + hw_id_names[ii],
>>> + kobject_name(&ip_die_entry->ip_kset.kobj));
>>> + }
>>> + }
>>> + }
>>> +
>>> + /* Now register its instance.
>>> + */
>>> + ip_hw_instance = kzalloc(sizeof(*ip_hw_instance) +
>>> + sizeof(u32) * ip->num_base_address,
>>> + GFP_KERNEL);
>>> + if (!ip_hw_instance) {
>>> + DRM_ERROR("no memory for ip_hw_instance");
>>> + return -ENOMEM;
>>> + }
>>> + ip_hw_instance->hw_id = le16_to_cpu(ip->hw_id); /* == ii */
>>
>> Not sure we need the hw_id since that is already part of the directory
>> structure.
>
> It's part of the attribute set, so I left it there for completeness.
>
>>
>>> + ip_hw_instance->num_instance = ip->number_instance;
>>> + ip_hw_instance->major = ip->major;
>>> + ip_hw_instance->minor = ip->minor;
>>> + ip_hw_instance->revision = ip->revision;
>>
>> Bikeshed, but maybe just combine these into a version leaf instead. I
>> don't know that we need to keep them separate.
>
> I don't want to interpret this for user space and users. If you insist,
> then please let me know the format of the format string.
The other point is that these are attributes and presented verbatim in sysfs.
Someone only familiar with the binary data layout, would be surprised to see
this interpreted and randomly formatted, rather then the attributes shown vebatim.
Moreover, we lose their nomenclature if we scrub them and reinterpret them.
I say, we just provide the attributes and leave it to user space to pick and choose
what they are interested in.
For instance, a user might be interested ONLY in the major version, and so they
have a way of finding components with such and such major version, as opposed to
having to DECOMPOSE a formatted "version"--something which the kernel composed which
had been stored in separate attributes already.
I say we leave this interpretation to userspace.
Regards,
Luben
>
>>
>>> + ip_hw_instance->num_base_addresses = ip->num_base_address;
>>> +
>>> + for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++)
>>> + ip_hw_instance->base_addr[kk] = ip->base_address[kk];
>>> +
>>> + kobject_init(&ip_hw_instance->kobj, &ip_hw_instance_ktype);
>>> + ip_hw_instance->kobj.kset = &ip_hw_id->hw_id_kset;
>>> + res = kobject_add(&ip_hw_instance->kobj, NULL,
>>> + "%d", ip_hw_instance->num_instance);
>>> +next_ip:
>>> + ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
>>> + }
>>> + }
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev)
>>> +{
>>> + struct binary_header *bhdr;
>>> + struct ip_discovery_header *ihdr;
>>> + struct die_header *dhdr;
>>> + struct kset *die_kset = &adev->ip_top->die_kset;
>>> + u16 num_dies, die_offset, num_ips;
>>> + size_t ip_offset;
>>> + int ii, res;
>>> +
>>> + bhdr = (struct binary_header *)adev->mman.discovery_bin;
>>> + ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
>>> + le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
>>> + num_dies = le16_to_cpu(ihdr->num_dies);
>>> +
>>> + DRM_DEBUG("number of dies: %d\n", num_dies);
>>> +
>>> + for (ii = 0; ii < num_dies; ii++) {
>>> + struct ip_die_entry *ip_die_entry;
>>> +
>>> + die_offset = le16_to_cpu(ihdr->die_info[ii].die_offset);
>>> + dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
>>> + num_ips = le16_to_cpu(dhdr->num_ips);
>>> + ip_offset = die_offset + sizeof(*dhdr);
>>> +
>>> + /* Add the die to the kset.
>>> + *
>>> + * dhdr->die_id == ii, which was checked in
>>> + * amdgpu_discovery_reg_base_init().
>>> + */
>>> +
>>> + ip_die_entry = kzalloc(sizeof(*ip_die_entry), GFP_KERNEL);
>>> + if (!ip_die_entry)
>>> + return -ENOMEM;
>>> +
>>> + ip_die_entry->num_ips = num_ips;
>>> +
>>> + kobject_set_name(&ip_die_entry->ip_kset.kobj, "%d", le16_to_cpu(dhdr->die_id));
>>> + ip_die_entry->ip_kset.kobj.kset = die_kset;
>>> + ip_die_entry->ip_kset.kobj.ktype = &ip_die_entry_ktype;
>>> + res = kset_register(&ip_die_entry->ip_kset);
>>> + if (res) {
>>> + DRM_ERROR("Couldn't register ip_die_entry kset");
>>> + kfree(ip_die_entry);
>>> + return res;
>>> + }
>>> +
>>> + amdgpu_discovery_sysfs_ips(adev, ip_die_entry, ip_offset, num_ips);
>>> + }
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int amdgpu_discovery_sysfs(struct amdgpu_device *adev)
>>> +{
>>> + struct kset *die_kset;
>>> + int res;
>>> +
>>> + adev->ip_top = kzalloc(sizeof(*adev->ip_top), GFP_KERNEL);
>>> + if (!adev->ip_top)
>>> + return -ENOMEM;
>>> +
>>> + adev->ip_top->adev = adev;
>>> +
>>> + res = kobject_init_and_add(&adev->ip_top->kobj, &ip_discovery_ktype,
>>> + &adev->dev->kobj, "ip_discovery");
>>> + if (res) {
>>> + DRM_ERROR("Couldn't init and add ip_discovery/");
>>> + goto Err;
>>> + }
>>> +
>>> + die_kset = &adev->ip_top->die_kset;
>>> + kobject_set_name(&die_kset->kobj, "%s", "die");
>>> + die_kset->kobj.parent = &adev->ip_top->kobj;
>>> + die_kset->kobj.ktype = &die_kobj_ktype;
>>> + res = kset_register(&adev->ip_top->die_kset);
>>> + if (res) {
>>> + DRM_ERROR("Couldn't register die_kset");
>>> + goto Err;
>>> + }
>>> +
>>> + res = amdgpu_discovery_sysfs_recurse(adev);
>>> +
>>> + return res;
>>> +Err:
>>> + kobject_put(&adev->ip_top->kobj);
>>> + return res;
>>> +}
>>> +
>>> +/* ================================================== */
>>> +
>>> int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
>>> {
>>> struct binary_header *bhdr;
>>> @@ -405,7 +824,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
>>>
>>> bhdr = (struct binary_header *)adev->mman.discovery_bin;
>>> ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
>>> - le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
>>> + le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
>>> num_dies = le16_to_cpu(ihdr->num_dies);
>>>
>>> DRM_DEBUG("number of dies: %d\n", num_dies);
>>> @@ -492,6 +911,8 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
>>> }
>>> }
>>>
>>> + amdgpu_discovery_sysfs(adev);
>>> +
>>
>> We should probably also tear this down in amdgpu_discovery_fini() or
>> is that already handled some other way via sysfs?
>
> No, it is not. I'll add this as well. This is why I left comment crumbs.
>
> Regards,
> Luben
>
>>
>> Alex
>>
>>> return 0;
>>> }
>>>
>>> --
>>> 2.35.0.3.gb23dac905b
>>>
>
> Regards,
Regards,
--
Luben
More information about the amd-gfx
mailing list