[igt-dev] [PATCH i-g-t] lib/igt_device_scan: Retry when parent pci device lacks properties
Petri Latvala
petri.latvala at intel.com
Mon Aug 29 10:20:48 UTC 2022
On Wed, Aug 24, 2022 at 06:34:51AM +0200, Zbigniew Kempczyński wrote:
> When driver is loaded or binded and device scanning is performed immediate
> after that we observed sometimes parent pci device lacks properties.
> Device selection is built on top of this so we need to solve this
> situation somehow.
>
> Unfortunately reproduction of this bug is extremely hard and likely
> is located in libudev or libsystemd libraries. Situation complicates
> fact udev_device_get_parent() internally scans parent device once so
> consecutive calls returns stale (and maybe properties missing) parent
> device.
>
> We know syspath of parent device so we can enforce scanning of such
> device calling udev_device_new_from_syspath() which returns freshly
> scanned device. To increase probability we get properly scanned device
> retry loop tries to do this several times.
>
> Previous code which dumps properties, attributes and uevent was removed.
> We don't need it anymore as it was temporary code to get some picture
> what's wrong.
>
> Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> Cc: Petri Latvala <petri.latvala at intel.com>
Reviewed-by: Petri Latvala <petri.latvala at intel.com>
> ---
> lib/igt_device_scan.c | 93 +++++++++++++++++++------------------------
> 1 file changed, 41 insertions(+), 52 deletions(-)
>
> diff --git a/lib/igt_device_scan.c b/lib/igt_device_scan.c
> index d61d79276c..eb6b45b876 100644
> --- a/lib/igt_device_scan.c
> +++ b/lib/igt_device_scan.c
> @@ -241,6 +241,8 @@ static struct {
> bool devs_scanned;
> } igt_devs;
>
> +static void igt_device_free(struct igt_device *dev);
> +
> typedef char *(*devname_fn)(uint16_t, uint16_t);
> typedef enum dev_type (*devtype_fn)(uint16_t, uint16_t, const char *);
>
> @@ -568,28 +570,6 @@ static void dump_props_and_attrs(const struct igt_device *dev)
> printf("\n");
> }
>
> -static void dump_uevent_file(struct igt_device *dev)
> -{
> - char filename[FILENAME_MAX];
> - const char *devpath = get_prop(dev, "DEVPATH");
> - char *line = NULL;
> - FILE *in;
> - size_t n;
> -
> - igt_assert_f(devpath, "DEVPATH property doesn't exist\n");
> - snprintf(filename, FILENAME_MAX, "/sys%s/uevent", devpath);
> -
> - in = fopen(filename, "r");
> - igt_assert(in);
> -
> - printf("[uevent: %s]\n", filename);
> - while (getline(&line, &n, in) >= 0)
> - printf("%s", line);
> -
> - free(line);
> - fclose(in);
> -}
> -
> /*
> * Get PCI_SLOT_NAME property, it should be in format of
> * xxxx:yy:zz.z
> @@ -597,22 +577,9 @@ static void dump_uevent_file(struct igt_device *dev)
> static bool set_pci_slot_name(struct igt_device *dev)
> {
> const char *pci_slot_name = get_prop(dev, "PCI_SLOT_NAME");
> - int len;
>
> - if (!pci_slot_name) {
> - dump_props_and_attrs(dev);
> - igt_warn("PCI_SLOT_NAME property == NULL\n");
> - dump_uevent_file(dev);
> + if (!pci_slot_name || strlen(pci_slot_name) != PCI_SLOT_NAME_SIZE)
> return false;
> - }
> -
> - len = strlen(pci_slot_name);
> - if (len != PCI_SLOT_NAME_SIZE) {
> - dump_props_and_attrs(dev);
> - igt_warn("PCI_SLOT_NAME length != %d [%s]\n", len, pci_slot_name);
> - dump_uevent_file(dev);
> - return false;
> - }
>
> dev->pci_slot_name = strdup(pci_slot_name);
> return true;
> @@ -623,14 +590,17 @@ static bool set_pci_slot_name(struct igt_device *dev)
> * xxxx to dev->vendor and yyyy to dev->device for
> * faster access.
> */
> -static void set_vendor_device(struct igt_device *dev)
> +static bool set_vendor_device(struct igt_device *dev)
> {
> const char *pci_id = get_prop(dev, "PCI_ID");
>
> if (!pci_id || strlen(pci_id) != 9)
> - return;
> + return false;
> +
> dev->vendor = strndup(pci_id, 4);
> dev->device = strndup(pci_id + 5, 4);
> +
> + return true;
> }
>
> /* Initialize lists for keeping scanned devices */
> @@ -674,16 +644,9 @@ static struct igt_device *igt_device_new_from_udev(struct udev_device *dev)
> if (is_pci_subsystem(idev)) {
> uint16_t vendor, device;
>
> - set_vendor_device(idev);
> -
> - /*
> - * Very rare we observe there's no PCI_SLOT_NAME property.
> - * We depend on it so retry acquiring properties from udev.
> - */
> - if (!set_pci_slot_name(idev)) {
> - g_hash_table_remove_all(idev->props_ht);
> - get_props(dev, idev);
> - igt_assert(set_pci_slot_name(idev));
> + if (!set_vendor_device(idev) || !set_pci_slot_name(idev)) {
> + igt_device_free(idev);
> + return NULL;
> }
> get_pci_vendor_device(idev, &vendor, &device);
> idev->codename = __pci_codename(vendor, device);
> @@ -847,17 +810,24 @@ static struct igt_device *igt_device_from_syspath(const char *syspath)
> return NULL;
> }
>
> +#define RETRIES_GET_PARENT 5
> /* For each drm igt_device add or update its parent igt_device to the array.
> * As card/render drm devices mostly have same parent (vkms is an exception)
> * link to it and update corresponding drm_card / drm_render fields.
> */
> -static void update_or_add_parent(struct udev_device *dev,
> +static void update_or_add_parent(struct udev *udev,
> + struct udev_device *dev,
> struct igt_device *idev)
> {
> struct udev_device *parent_dev;
> struct igt_device *parent_idev;
> const char *subsystem, *syspath, *devname;
> + int retries = RETRIES_GET_PARENT;
>
> + /*
> + * Get parent for drm node. It caches parent in udev device
> + * and will be destroyed along with the node.
> + */
> parent_dev = udev_device_get_parent(dev);
> igt_assert(parent_dev);
>
> @@ -865,10 +835,29 @@ static void update_or_add_parent(struct udev_device *dev,
> syspath = udev_device_get_syspath(parent_dev);
>
> parent_idev = igt_device_find(subsystem, syspath);
> - if (!parent_idev) {
> + while (!parent_idev && retries--) {
> + /*
> + * Don't care about previous parent_dev, it is tracked
> + * by the child node. There's very rare race when driver module
> + * is loaded or binded - in this moment getting parent device
> + * may finish with incomplete properties. Unfortunately even
> + * if we notice this (missing PCI_ID or PCI_SLOT_NAME)
> + * consecutive calling udev_device_get_parent() will return
> + * stale (cached parent) device. We don't want this so
> + * only udev_device_new*() will scan sys directory and
> + * return fresh udev device.
> + */
> + parent_dev = udev_device_new_from_syspath(udev, syspath);
> parent_idev = igt_device_new_from_udev(parent_dev);
> - igt_list_add_tail(&parent_idev->link, &igt_devs.all);
> + udev_device_unref(parent_dev);
> +
> + if (parent_idev)
> + igt_list_add_tail(&parent_idev->link, &igt_devs.all);
> + else
> + usleep(100000); /* arbitrary, 100ms should be enough */
> }
> + igt_assert(parent_idev);
> +
> devname = udev_device_get_devnode(dev);
> if (devname != NULL && strstr(devname, "/dev/dri/card"))
> parent_idev->drm_card = strdup(devname);
> @@ -997,7 +986,7 @@ static void scan_drm_devices(void)
> udev_dev = udev_device_new_from_syspath(udev, path);
> idev = igt_device_new_from_udev(udev_dev);
> igt_list_add_tail(&idev->link, &igt_devs.all);
> - update_or_add_parent(udev_dev, idev);
> + update_or_add_parent(udev, udev_dev, idev);
>
> udev_device_unref(udev_dev);
> }
> --
> 2.34.1
>
More information about the igt-dev
mailing list