[PATCH 22/37] drm/amdkfd: Add perf counters to topology
Christian König
christian.koenig at amd.com
Mon Dec 11 15:53:10 UTC 2017
Am 11.12.2017 um 16:23 schrieb Oded Gabbay:
> On Sat, Dec 9, 2017 at 6:09 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote:
>> From: Amber Lin <Amber.Lin at amd.com>
>>
>> For hardware blocks whose performance counters are accessed via MMIO
>> registers, KFD provides the support for those privileged blocks. IOMMU is
>> one of those privileged blocks. Most performance counter properties
>> required by Thunk are available at /sys/bus/event_source/devices/amd_iommu.
>> This patch adds properties to topology in KFD sysfs for information not
>> available in /sys/bus/event_source/devices/amd_iommu. They are shown at
>> /sys/devices/virtual/kfd/kfd/topology/nodes/0/perf/iommu/ formatted as
>> /sys/devices/virtual/kfd/kfd/topology/nodes/0/perf/<block>/<property>, i.e.
>> /sys/devices/virtual/kfd/kfd/topology/nodes/0/perf/iommu/max_concurrent.
>> For dGPUs, who don't have IOMMU, nothing appears under
>> /sys/devices/virtual/kfd/kfd/topology/nodes/0/perf.
> I don't feel comfortable with this patch. It seems to me you didn't
> have anywhere to put these counters so you just stuck them in a place
> the thunk already reads because it was "convenient" for you to do it.
> But, as you point out in a comment later, these counters have nothing
> to do with topology.
> So this just feels wrong and I would like to:
>
> a. get additional opinions on it. Christian ? Alex ? What do you think
> ?
I agree that this looks odd.
But that any device specific informations show up outside of
/sys/devices/pci0000:00... is strange to start with.
Please don't tell me that we have build up a secondary topology parallel
to the linux device tree here?
In other word for my Vega10 the real subdirectory for any device
specific config is:
./devices/pci0000:00/0000:00:02.1/0000:01:00.0/0000:02:00.0/0000:03:00.0
With the following files as symlink to it:
./bus/pci/devices/0000:03:00.0
./bus/pci/drivers/amdgpu/0000:03:00.0
> How the GPU's GFX counters are exposed ?
We discussed that internally but haven't decided on anything AFAIK.
> b. Ask why not use IOCTL to get the counters ?
Per process counters are directly readable inside the command submission
affecting them.
Only the timer tick is exposed as IOCTL as well IIRC.
Regards,
Christian.
>
> btw, I tried to search for other drivers that do this (expose perf
> counters in sysfs) and didn't find any (it wasn't an exhaustive search
> so I may have missed).
>
> Thanks,
> Oded
>
>
>
>
>> Signed-off-by: Amber Lin <Amber.Lin at amd.com>
>> Signed-off-by: Kent Russell <kent.russell at amd.com>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 116 +++++++++++++++++++++++++++++-
>> drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 13 ++++
>> 2 files changed, 127 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>> index 7fe7ee0..52d20f5 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>> @@ -104,6 +104,7 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
>> struct kfd_mem_properties *mem;
>> struct kfd_cache_properties *cache;
>> struct kfd_iolink_properties *iolink;
>> + struct kfd_perf_properties *perf;
>>
>> list_del(&dev->list);
>>
>> @@ -128,6 +129,13 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
>> kfree(iolink);
>> }
>>
>> + while (dev->perf_props.next != &dev->perf_props) {
>> + perf = container_of(dev->perf_props.next,
>> + struct kfd_perf_properties, list);
>> + list_del(&perf->list);
>> + kfree(perf);
>> + }
>> +
>> kfree(dev);
>> }
>>
>> @@ -162,6 +170,7 @@ struct kfd_topology_device *kfd_create_topology_device(
>> INIT_LIST_HEAD(&dev->mem_props);
>> INIT_LIST_HEAD(&dev->cache_props);
>> INIT_LIST_HEAD(&dev->io_link_props);
>> + INIT_LIST_HEAD(&dev->perf_props);
>>
>> list_add_tail(&dev->list, device_list);
>>
>> @@ -328,6 +337,39 @@ static struct kobj_type cache_type = {
>> .sysfs_ops = &cache_ops,
>> };
>>
>> +/****** Sysfs of Performance Counters ******/
>> +
>> +struct kfd_perf_attr {
>> + struct kobj_attribute attr;
>> + uint32_t data;
>> +};
>> +
>> +static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
>> + char *buf)
>> +{
>> + struct kfd_perf_attr *attr;
>> +
>> + buf[0] = 0;
>> + attr = container_of(attrs, struct kfd_perf_attr, attr);
>> + if (!attr->data) /* invalid data for PMC */
>> + return 0;
>> + else
>> + return sysfs_show_32bit_val(buf, attr->data);
>> +}
>> +
>> +#define KFD_PERF_DESC(_name, _data) \
>> +{ \
>> + .attr = __ATTR(_name, 0444, perf_show, NULL), \
>> + .data = _data, \
>> +}
>> +
>> +static struct kfd_perf_attr perf_attr_iommu[] = {
>> + KFD_PERF_DESC(max_concurrent, 0),
>> + KFD_PERF_DESC(num_counters, 0),
>> + KFD_PERF_DESC(counter_ids, 0),
>> +};
>> +/****************************************/
>> +
>> static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
>> char *buffer)
>> {
>> @@ -452,6 +494,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
>> struct kfd_iolink_properties *iolink;
>> struct kfd_cache_properties *cache;
>> struct kfd_mem_properties *mem;
>> + struct kfd_perf_properties *perf;
>>
>> if (dev->kobj_iolink) {
>> list_for_each_entry(iolink, &dev->io_link_props, list)
>> @@ -488,6 +531,16 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
>> dev->kobj_mem = NULL;
>> }
>>
>> + if (dev->kobj_perf) {
>> + list_for_each_entry(perf, &dev->perf_props, list) {
>> + kfree(perf->attr_group);
>> + perf->attr_group = NULL;
>> + }
>> + kobject_del(dev->kobj_perf);
>> + kobject_put(dev->kobj_perf);
>> + dev->kobj_perf = NULL;
>> + }
>> +
>> if (dev->kobj_node) {
>> sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid);
>> sysfs_remove_file(dev->kobj_node, &dev->attr_name);
>> @@ -504,8 +557,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
>> struct kfd_iolink_properties *iolink;
>> struct kfd_cache_properties *cache;
>> struct kfd_mem_properties *mem;
>> + struct kfd_perf_properties *perf;
>> int ret;
>> - uint32_t i;
>> + uint32_t i, num_attrs;
>> + struct attribute **attrs;
>>
>> if (WARN_ON(dev->kobj_node))
>> return -EEXIST;
>> @@ -534,6 +589,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
>> if (!dev->kobj_iolink)
>> return -ENOMEM;
>>
>> + dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
>> + if (!dev->kobj_perf)
>> + return -ENOMEM;
>> +
>> /*
>> * Creating sysfs files for node properties
>> */
>> @@ -611,7 +670,33 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
>> if (ret < 0)
>> return ret;
>> i++;
>> -}
>> + }
>> +
>> + /* All hardware blocks have the same number of attributes. */
>> + num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr);
>> + list_for_each_entry(perf, &dev->perf_props, list) {
>> + perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr)
>> + * num_attrs + sizeof(struct attribute_group),
>> + GFP_KERNEL);
>> + if (!perf->attr_group)
>> + return -ENOMEM;
>> +
>> + attrs = (struct attribute **)(perf->attr_group + 1);
>> + if (!strcmp(perf->block_name, "iommu")) {
>> + /* Information of IOMMU's num_counters and counter_ids is shown
>> + * under /sys/bus/event_source/devices/amd_iommu. We don't
>> + * duplicate here.
>> + */
>> + perf_attr_iommu[0].data = perf->max_concurrent;
>> + for (i = 0; i < num_attrs; i++)
>> + attrs[i] = &perf_attr_iommu[i].attr.attr;
>> + }
>> + perf->attr_group->name = perf->block_name;
>> + perf->attr_group->attrs = attrs;
>> + ret = sysfs_create_group(dev->kobj_perf, perf->attr_group);
>> + if (ret < 0)
>> + return ret;
>> + }
>>
>> return 0;
>> }
>> @@ -778,6 +863,29 @@ static void find_system_memory(const struct dmi_header *dm,
>> }
>> }
>> }
>> +
>> +/*
>> + * Performance counters information is not part of CRAT but we would like to
>> + * put them in the sysfs under topology directory for Thunk to get the data.
>> + * This function is called before updating the sysfs.
>> + */
>> +static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
>> +{
>> + struct kfd_perf_properties *props;
>> +
>> + if (amd_iommu_pc_supported()) {
>> + props = kfd_alloc_struct(props);
>> + if (!props)
>> + return -ENOMEM;
>> + strcpy(props->block_name, "iommu");
>> + props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
>> + amd_iommu_pc_get_max_counters(0); /* assume one iommu */
>> + list_add_tail(&props->list, &kdev->perf_props);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> /* kfd_add_non_crat_information - Add information that is not currently
>> * defined in CRAT but is necessary for KFD topology
>> * @dev - topology device to which addition info is added
>> @@ -860,6 +968,10 @@ int kfd_topology_init(void)
>> }
>> }
>>
>> + kdev = list_first_entry(&temp_topology_device_list,
>> + struct kfd_topology_device, list);
>> + kfd_add_perf_to_topology(kdev);
>> +
>> down_write(&topology_lock);
>> kfd_topology_update_device_list(&temp_topology_device_list,
>> &topology_device_list);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
>> index 55de56f..b9f3142 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
>> @@ -134,6 +134,13 @@ struct kfd_iolink_properties {
>> struct attribute attr;
>> };
>>
>> +struct kfd_perf_properties {
>> + struct list_head list;
>> + char block_name[16];
>> + uint32_t max_concurrent;
>> + struct attribute_group *attr_group;
>> +};
>> +
>> struct kfd_topology_device {
>> struct list_head list;
>> uint32_t gpu_id;
>> @@ -144,11 +151,13 @@ struct kfd_topology_device {
>> struct list_head cache_props;
>> uint32_t io_link_count;
>> struct list_head io_link_props;
>> + struct list_head perf_props;
>> struct kfd_dev *gpu;
>> struct kobject *kobj_node;
>> struct kobject *kobj_mem;
>> struct kobject *kobj_cache;
>> struct kobject *kobj_iolink;
>> + struct kobject *kobj_perf;
>> struct attribute attr_gpuid;
>> struct attribute attr_name;
>> struct attribute attr_props;
>> @@ -173,4 +182,8 @@ struct kfd_topology_device *kfd_create_topology_device(
>> struct list_head *device_list);
>> void kfd_release_topology_device_list(struct list_head *device_list);
>>
>> +extern bool amd_iommu_pc_supported(void);
>> +extern u8 amd_iommu_pc_get_max_banks(u16 devid);
>> +extern u8 amd_iommu_pc_get_max_counters(u16 devid);
>> +
>> #endif /* __KFD_TOPOLOGY_H__ */
>> --
>> 2.7.4
>>
More information about the amd-gfx
mailing list