[PATCH] drm/amdgpu: Add sysfs entries for xgmi hive.

Koenig, Christian Christian.Koenig at amd.com
Tue Mar 5 18:55:26 UTC 2019


Am 05.03.19 um 19:36 schrieb Grodzovsky, Andrey:
> On 3/5/19 1:26 PM, Koenig, Christian wrote:
>> Am 05.03.19 um 19:24 schrieb Grodzovsky, Andrey:
>>> On 3/5/19 1:18 PM, Koenig, Christian wrote:
>>>> Am 05.03.19 um 18:47 schrieb Andrey Grodzovsky:
>>>>> For each device a file xgmi_device_id is created.
>>>>> On the first device a subdirectory named xgmi_hive_info is created,
>>>>> It contains  a file named hive_id and symlinks named node 1-4 linking
>>>>> to each device in the hive.
>>>>>
>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>>>>> ---
>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 145 ++++++++++++++++++++++++++++++-
>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h |   6 +-
>>>>>       2 files changed, 146 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>> index 407dd16c..394be10 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>> @@ -34,12 +34,132 @@ static DEFINE_MUTEX(xgmi_mutex);
>>>>>       static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE];
>>>>>       static unsigned hive_count = 0;
>>>>>       
>>>>> -
>>>>>       void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive)
>>>>>       {
>>>>>       	return &hive->device_list;
>>>>>       }
>>>>>       
>>>>> +static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev,
>>>>> +		struct device_attribute *attr, char *buf)
>>>>> +{
>>>>> +	struct amdgpu_hive_info *hive =
>>>>> +			container_of(attr, struct amdgpu_hive_info, dev_attr);
>>>>> +
>>>>> +	return snprintf(buf, PAGE_SIZE, "%llu\n", hive->hive_id);
>>>>> +}
>>>>> +
>>>>> +static int amdgpu_xgmi_sysfs_create(struct amdgpu_device *adev,
>>>>> +				    struct amdgpu_hive_info *hive)
>>>>> +{
>>>>> +	int ret = 0;
>>>>> +
>>>>> +	if (WARN_ON(hive->kobj))
>>>>> +		return -1;
>>>> Please return proper error codes here.
>>>>
>>>>> +
>>>>> +	hive->kobj = kobject_create_and_add("xgmi_hive_info", &adev->dev->kobj);
>>>>> +	if (!hive->kobj) {
>>>>> +		dev_err(adev->dev, "XGMI: Failed to allocate sysfs entry!\n");
>>>>> +		return -1;
>>>>> +	}
>>>>> +
>>>>> +	hive->dev_attr = (struct device_attribute) {
>>>>> +		.attr = {
>>>>> +			.name = "xgmi_hive_id",
>>>>> +			.mode = S_IRUGO,
>>>>> +
>>>>> +		},
>>>>> +		.show = amdgpu_xgmi_show_hive_id,
>>>>> +	};
>>>> Why do you put the attribute into the hive? Usually those are statically
>>>> declared somewhere.
>>> To access the hive from amdgpu_xgmi_show_hive_id using container_of
>> Ok that is rather unusual.
>>
>> Can't we use device_create_file() and a proper device attribute here?
>>
>> See amdgpu_get_dpm_forced_performance_level for an example how to use it.
> device_create_file will attach the file to the device's kobj which will place it in sys/class/drm/cardX/device while we want it to reside inside the xgmi_hive_info folder, for this i added
> kobj to the hive and use sysfs_create_file directly (device_create_file is just a wrapper around this) to specify i want to create the file (device property) for hive's kobj.

Gotcha, now I understand. Not sure if that is the right approach, but it 
will certainly work.

Alternative would be to embed the kobject for the hive directory in the 
hive structure, that's how it is done with the devices as well.

Anyway feel free to go ahead with the current approach.

Christian.

>
> Andrey
>
>> Christian.
>>
>>> Andrey
>>>
>>>
>>>> Apart from that looks good to me,
>>>> Christian.
>>>>
>>>>> +
>>>>> +	ret = sysfs_create_file(hive->kobj, &hive->dev_attr.attr);
>>>>> +	if (ret) {
>>>>> +		dev_err(adev->dev, "XGMI: Failed to create device file xgmi_hive_id\n");
>>>>> +		kobject_del(hive->kobj);
>>>>> +		kobject_put(hive->kobj);
>>>>> +		hive->kobj = NULL;
>>>>> +	}
>>>>> +
>>>>> +	return ret;
>>>>> +}
>>>>> +
>>>>> +static void amdgpu_xgmi_sysfs_destroy(struct amdgpu_device *adev,
>>>>> +				    struct amdgpu_hive_info *hive)
>>>>> +{
>>>>> +	sysfs_remove_file(hive->kobj, &hive->dev_attr.attr);
>>>>> +	kobject_del(hive->kobj);
>>>>> +	kobject_put(hive->kobj);
>>>>> +	hive->kobj = NULL;
>>>>> +}
>>>>> +
>>>>> +static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
>>>>> +				     struct device_attribute *attr,
>>>>> +				     char *buf)
>>>>> +{
>>>>> +	struct drm_device *ddev = dev_get_drvdata(dev);
>>>>> +	struct amdgpu_device *adev = ddev->dev_private;
>>>>> +
>>>>> +	return snprintf(buf, PAGE_SIZE, "%llu\n", adev->gmc.xgmi.node_id);
>>>>> +
>>>>> +}
>>>>> +
>>>>> +
>>>>> +static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL);
>>>>> +
>>>>> +
>>>>> +static int amdgpu_xgmi_sysf_add_dev_info(struct amdgpu_device *adev,
>>>>> +					 struct amdgpu_hive_info *hive)
>>>>> +{
>>>>> +	int ret = 0;
>>>>> +	char node[10] = { 0 };
>>>>> +
>>>>> +	/* Create xgmi device id file */
>>>>> +	ret = device_create_file(adev->dev, &dev_attr_xgmi_device_id);
>>>>> +	if (ret) {
>>>>> +		dev_err(adev->dev, "XGMI: Failed to create device file xgmi_device_id\n");
>>>>> +		return ret;
>>>>> +	}
>>>>> +
>>>>> +	/* Create sysfs link to hive info folder on the first device */
>>>>> +	if (adev != hive->adev) {
>>>>> +		ret = sysfs_create_link(&adev->dev->kobj, hive->kobj,
>>>>> +					"xgmi_hive_info");
>>>>> +		if (ret) {
>>>>> +			dev_err(adev->dev, "XGMI: Failed to create link to hive info");
>>>>> +			goto remove_file;
>>>>> +		}
>>>>> +	}
>>>>> +
>>>>> +	sprintf(node, "node%d", hive->number_devices);
>>>>> +	/* Create sysfs link form the hive folder to yorself */
>>>>> +	ret = sysfs_create_link(hive->kobj, &adev->dev->kobj, node);
>>>>> +	if (ret) {
>>>>> +		dev_err(adev->dev, "XGMI: Failed to create link from hive info");
>>>>> +		goto remove_link;
>>>>> +	}
>>>>> +
>>>>> +	goto success;
>>>>> +
>>>>> +
>>>>> +remove_link:
>>>>> +	sysfs_remove_link(&adev->dev->kobj, adev->ddev->unique);
>>>>> +
>>>>> +remove_file:
>>>>> +	device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
>>>>> +
>>>>> +success:
>>>>> +	return ret;
>>>>> +}
>>>>> +
>>>>> +static void amdgpu_xgmi_sysf_rem_dev_info(struct amdgpu_device *adev,
>>>>> +					  struct amdgpu_hive_info *hive)
>>>>> +{
>>>>> +	device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
>>>>> +	sysfs_remove_link(&adev->dev->kobj, adev->ddev->unique);
>>>>> +	sysfs_remove_link(hive->kobj, adev->ddev->unique);
>>>>> +}
>>>>> +
>>>>> +
>>>>> +
>>>>>       struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock)
>>>>>       {
>>>>>       	int i;
>>>>> @@ -66,10 +186,18 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
>>>>>       
>>>>>       	/* initialize new hive if not exist */
>>>>>       	tmp = &xgmi_hives[hive_count++];
>>>>> +
>>>>> +	if (amdgpu_xgmi_sysfs_create(adev, tmp)) {
>>>>> +		mutex_unlock(&xgmi_mutex);
>>>>> +		return NULL;
>>>>> +	}
>>>>> +
>>>>> +	tmp->adev = adev;
>>>>>       	tmp->hive_id = adev->gmc.xgmi.hive_id;
>>>>>       	INIT_LIST_HEAD(&tmp->device_list);
>>>>>       	mutex_init(&tmp->hive_lock);
>>>>>       	mutex_init(&tmp->reset_lock);
>>>>> +
>>>>>       	if (lock)
>>>>>       		mutex_lock(&tmp->hive_lock);
>>>>>       
>>>>> @@ -156,8 +284,17 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
>>>>>       			break;
>>>>>       	}
>>>>>       
>>>>> -	dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",
>>>>> -		 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id);
>>>>> +	if (!ret)
>>>>> +		ret = amdgpu_xgmi_sysf_add_dev_info(adev, hive);
>>>>> +
>>>>> +	if (!ret)
>>>>> +		dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",
>>>>> +			 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id);
>>>>> +	else
>>>>> +		dev_err(adev->dev, "XGMI: Failed to add node %d, hive 0x%llx ret: %d\n",
>>>>> +			adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id,
>>>>> +			ret);
>>>>> +
>>>>>       
>>>>>       	mutex_unlock(&hive->hive_lock);
>>>>>       exit:
>>>>> @@ -176,9 +313,11 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
>>>>>       		return;
>>>>>       
>>>>>       	if (!(hive->number_devices--)) {
>>>>> +		amdgpu_xgmi_sysfs_destroy(adev, hive);
>>>>>       		mutex_destroy(&hive->hive_lock);
>>>>>       		mutex_destroy(&hive->reset_lock);
>>>>>       	} else {
>>>>> +		amdgpu_xgmi_sysf_rem_dev_info(adev, hive);
>>>>>       		mutex_unlock(&hive->hive_lock);
>>>>>       	}
>>>>>       }
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>> index 14bc606..24a3b03 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>> @@ -29,8 +29,10 @@ struct amdgpu_hive_info {
>>>>>       	struct list_head	device_list;
>>>>>       	struct psp_xgmi_topology_info	topology_info;
>>>>>       	int number_devices;
>>>>> -	struct mutex hive_lock,
>>>>> -		     reset_lock;
>>>>> +	struct mutex hive_lock, reset_lock;
>>>>> +	struct kobject *kobj;
>>>>> +	struct device_attribute dev_attr;
>>>>> +	struct amdgpu_device *adev;
>>>>>       };
>>>>>       
>>>>>       struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock);



More information about the amd-gfx mailing list