[PATCH i-g-t v7 3/8] lib/drmtest: allow opening cards out of order
Kamil Konieczny
kamil.konieczny at linux.intel.com
Fri Feb 9 17:00:32 UTC 2024
Hi Janusz,
On 2024-02-06 at 15:27:55 +0100, Janusz Krzysztofik wrote:
> Hi Kamil,
>
> On Friday, 2 February 2024 16:35:12 CET Kamil Konieczny wrote:
> > Current __drm_open_driver_another() implementation prevents
> > opening cards out of order, for example card 3 without previous
>
> How about "in random order" instead of "out of order", here and in commit
> message (Subject: field)?
>
Right, this is better description, I will fix it.
> > opens of 0..2. This can be seen with three GPU cards system and
> > gem_open_basic test:
> >
> > sudo IGT_DEVICE=pci:vendor=Intel,card=all build/tests/gem_basic --r
> > multigpu-create-close
> >
> > IGT-Version: 1.28-g2548a539e (x86_64) (Linux: 6.7.0-rc5 x86_64)
> > Using IGT_SRANDOM=1706630533 for randomisation
> > Opened device: /dev/dri/card0
> > Starting subtest: multigpu-create-close
> > <g:0> Testing creating and closing an object.
> > <g:1> Opened device: /dev/dri/card1
> > <g:1> Testing creating and closing an object.
> > gem_basic: ../lib/drmtest.c:341: _is_already_opened: Assertion `as_idx <=
> > _opened_fds_count' failed.
> > Received signal SIGABRT.
> >
> > Relax that condition
> ..., in both _is_already_opened() and _set_opened_fd() which is called next,
> ...
> > and
> ... add ...
> > some more debug prints.
>
> Is that what you meant?
>
Yes, I will update this.
> >
> > Changes in __search_and_open() was suggested by Zbigniew.
>
> Either "Change ... was ..." or "Changes ... were ...". However, ...
>
Will fix.
> >
> > v5: removed caching for names of opened cards
> > relaxed opening of filtered devices
> > v6: fixed assingment in _is_already_open() (Janusz)
> > simplify use of this function
> > v7: restore check for already opened filtered card (Janusz)
> >
> > Cc: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> > Cc: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com>
> > Signed-off-by: Kamil Konieczny <kamil.konieczny at linux.intel.com>
> > ---
> > lib/drmtest.c | 81 +++++++++++++++++++++++++++++++++++++++------------
> > 1 file changed, 62 insertions(+), 19 deletions(-)
> >
> > diff --git a/lib/drmtest.c b/lib/drmtest.c
> > index 73d9159af..9101425bd 100644
> > --- a/lib/drmtest.c
> > +++ b/lib/drmtest.c
> > @@ -322,60 +322,97 @@ static int _opened_fds_count;
> > static void _set_opened_fd(int idx, int fd)
> > {
> > assert(idx < ARRAY_SIZE(_opened_fds));
> > - assert(idx <= _opened_fds_count);
> > + assert(idx >= 0);
> >
> > _opened_fds[idx].fd = fd;
> >
> > assert(fstat(fd, &_opened_fds[idx].stat) == 0);
> >
> > - _opened_fds_count = idx+1;
> > + for (int n = _opened_fds_count; n < idx; n++)
> > + _opened_fds[n].fd = -1;
> > +
> > + if (idx >= _opened_fds_count)
> > + _opened_fds_count = idx + 1;
> > }
> >
> > -static bool _is_already_opened(const char *path, int as_idx)
> > +/* in *err returns -errno or index to _opened_fds[] */
> > +static bool _is_already_opened(const char *path, int as_idx, int *err)
> > {
> > struct stat new;
> > + bool found = false;
> >
> > assert(as_idx < ARRAY_SIZE(_opened_fds));
> > - assert(as_idx <= _opened_fds_count);
> > + assert(as_idx >= 0);
> >
> > /*
> > * we cannot even stat the device, so it's of no use - let's claim
> it's
> > * already opened
> > */
> > - if (igt_debug_on(stat(path, &new) != 0))
> > + if (igt_debug_on(stat(path, &new) != 0)) {
> > + if (err)
> > + *err = -errno;
> > +
> > return true;
> > + }
> > +
> > + if (err)
> > + *err = 0;
> > +
> > + for (int i = 0; i < _opened_fds_count; ++i) {
> > + if (_opened_fds[i].fd == -1)
> > + continue;
> >
> > - for (int i = 0; i < as_idx; ++i) {
> > /* did we cross filesystem boundary? */
> > assert(_opened_fds[i].stat.st_dev == new.st_dev);
> >
> > - if (_opened_fds[i].stat.st_ino == new.st_ino)
> > - return true;
> > + if (_opened_fds[i].stat.st_ino == new.st_ino) {
> > + found = true;
> > + if (err)
> > + *err = i;
> > +
> > + if (i != as_idx)
> > + return true;
> > + }
> > }
> >
> > - return false;
> > + return found;
> > }
> >
> > static int __search_and_open(const char *base, int offset, unsigned int
> chipset, int as_idx)
> > {
> > const char *forced;
> > + int err;
> >
> > forced = forced_driver();
> > if (forced)
> > igt_debug("Force option used: Using driver %s\n", forced);
> >
> > - for (int i = 0; i < 16; i++) {
> > + for (int i = 0, idx = -1; i < 16 && idx < as_idx; i++) {
> > char name[80];
> > int fd;
> >
> > sprintf(name, "%s%u", base, i + offset);
> >
> > - if (_is_already_opened(name, as_idx))
> > - continue;
> > + if (_is_already_opened(name, as_idx, &err)) {
> > + if (err < 0)
> > + continue;
> > +
> > + if (idx + 1 < as_idx) {
> > + ++idx;
> > + continue;
> > + }
> > + }
> >
> > fd = __drm_open_device(name, chipset);
> > - if (fd != -1)
> > + if (fd != -1) {
> > + ++idx;
> > + if (idx < as_idx) {
> > + close(fd);
> > + continue;
> > + }
> > +
> > return fd;
> > + }
> > }
> >
> > return -1;
> > @@ -486,7 +523,7 @@ static bool __get_card_for_nth_filter(int idx, struct
> igt_device_card *card)
> > * * idx-th filter (starting with 0, filters are semicolon separated) is
> used
> > * * if there is no idx-th filter, goto 2
> > * * first device maching the filter is selected
> > - * * if it's already opened (for indexes = 0..idx-1) we fail with -1
> > + * * if it's already opened (for indexes != idx) we fail with -1
> > * * otherwise open the device and return the fd
> > *
> > * 2. compatibility mode - open the first DRM device we can find that is
> not
> > @@ -531,6 +568,7 @@ int __drm_open_driver_another(int idx, int chipset)
> > if (chipset != DRIVER_VGEM && igt_device_filter_count() > idx) {
> > struct igt_device_card card;
> > bool found;
> > + int err;
> >
> > found = __get_card_for_nth_filter(idx, &card);
> >
> > @@ -539,14 +577,19 @@ int __drm_open_driver_another(int idx, int chipset)
> > found = __get_card_for_nth_filter(idx, &card);
> > }
> >
> > - if (!found || !strlen(card.card))
> > + if (!found || !strlen(card.card)) {
> > igt_warn("No card matches the filter! [%s]\n",
> > igt_device_filter_get(idx));
> > - else if (_is_already_opened(card.card, idx))
> > - igt_warn("card maching filter %d is already
> > opened\n", idx);
> > - else
> > + } else if (_is_already_opened(card.card, idx, &err)) {
> > + if (err < 0)
> > + igt_warn("cannot stat card maching
> > filter %d errno=%d card=%s\n", idx, err, card.card);
> > + else if (idx != err)
> > + igt_warn("card maching filter %d is
> > already opened as %d, card=%s\n", idx, err, card.card);
> > + else
> > + fd = __open_driver_exact(card.card,
> > chipset);
> > + } else {
> > fd = __open_driver_exact(card.card, chipset);
> > -
> > + }
> > } else {
> > /* no filter for device idx, let's open whatever is
> > available */
> > fd = __open_driver("/dev/dri/card", 0, chipset, idx);
> >
>
> I'm still not sure if all these changes are really needed for resolution of
> the issue you called in your commit description. Doesn't a significant part
> of them belong to a future patch that is going to allow reopening cards
> already opened? For me, the following subset of changes effectively kills the
> failure that may now pop up when opening cards in random order.
As for changes in __drm_open_driver_another() I was tempted
to actually use error which is returned from __search_and_open()
and differentiate error from stat() and actual already opened
file and print more specific error there.
Please see below a comment to your proposed changes.
>
> Thanks,
> Janusz
>
> ---
> @@ -320,34 +320,41 @@ static struct {
> static int _opened_fds_count;
>
> static void _set_opened_fd(int idx, int fd)
> {
> assert(idx < ARRAY_SIZE(_opened_fds));
> - assert(idx <= _opened_fds_count);
> + assert(idx >= 0);
>
> _opened_fds[idx].fd = fd;
>
> assert(fstat(fd, &_opened_fds[idx].stat) == 0);
>
> - _opened_fds_count = idx+1;
> + for (int n = _opened_fds_count; n < idx; n++)
> + _opened_fds[n].fd = -1;
> +
> + if (idx >= _opened_fds_count)
> + _opened_fds_count = idx + 1;
> }
>
> static bool _is_already_opened(const char *path, int as_idx)
> {
> struct stat new;
>
> assert(as_idx < ARRAY_SIZE(_opened_fds));
> - assert(as_idx <= _opened_fds_count);
> + assert(as_idx >= 0);
>
> /*
> * we cannot even stat the device, so it's of no use - let's claim
> it's
> * already opened
> */
> if (igt_debug_on(stat(path, &new) != 0))
> return true;
>
> - for (int i = 0; i < as_idx; ++i) {
> + for (int i = 0; i < _opened_fds_count; ++i) {
This is tricky part, it would work if 'for' loop is:
for (int i = 0, end = min(as_idx, _opened_fds_count); i < end; ++i) {
the reason being _is_already_opened() is called before opening
and setting new, bigger _opened_fds_count.
Regards,
Kamil
> + if (_opened_fds[i].fd == -1)
> + continue;
> +
> /* did we cross filesystem boundary? */
> assert(_opened_fds[i].stat.st_dev == new.st_dev);
>
> if (_opened_fds[i].stat.st_ino == new.st_ino)
> return true;
> @@ -484,11 +491,11 @@ static bool __get_card_for_nth_filter(int idx, struct
> igt_device_card *card)
> * 1. when --device arguments are present:
> * * device scanning is executed,
> * * idx-th filter (starting with 0, filters are semicolon separated) is
> used
> * * if there is no idx-th filter, goto 2
> * * first device maching the filter is selected
> - * * if it's already opened (for indexes = 0..idx-1) we fail with -1
> + * * if it's already opened we fail with -1
> * * otherwise open the device and return the fd
> *
> * 2. compatibility mode - open the first DRM device we can find that is not
> * already opened for indexes 0..idx-1, searching up to 16 device nodes
> *
>
>
>
>
>
More information about the igt-dev
mailing list