[PATCH i-g-t v7 3/8] lib/drmtest: allow opening cards out of order

Kamil Konieczny kamil.konieczny at linux.intel.com
Fri Feb 9 17:00:32 UTC 2024


Hi Janusz,
On 2024-02-06 at 15:27:55 +0100, Janusz Krzysztofik wrote:
> Hi Kamil,
> 
> On Friday, 2 February 2024 16:35:12 CET Kamil Konieczny wrote:
> > Current __drm_open_driver_another() implementation prevents
> > opening cards out of order, for example card 3 without previous
> 
> How about "in random order" instead of "out of order", here and in commit 
> message (Subject: field)?
> 

Right, this is better description, I will fix it.

> > opens of 0..2. This can be seen with three GPU cards system and
> > gem_open_basic test:
> > 
> > sudo IGT_DEVICE=pci:vendor=Intel,card=all build/tests/gem_basic --r 
> > multigpu-create-close
> > 
> > IGT-Version: 1.28-g2548a539e (x86_64) (Linux: 6.7.0-rc5 x86_64)
> > Using IGT_SRANDOM=1706630533 for randomisation
> > Opened device: /dev/dri/card0
> > Starting subtest: multigpu-create-close
> > <g:0> Testing creating and closing an object.
> > <g:1> Opened device: /dev/dri/card1
> > <g:1> Testing creating and closing an object.
> > gem_basic: ../lib/drmtest.c:341: _is_already_opened: Assertion `as_idx <= 
> > _opened_fds_count' failed.
> > Received signal SIGABRT.
> > 
> > Relax that condition
> ..., in both _is_already_opened() and _set_opened_fd() which is called next, 
> ...
> > and 
> ... add ...
> > some more debug prints.
> 
> Is that what you meant?
> 

Yes, I will update this.

> > 
> > Changes in __search_and_open() was suggested by Zbigniew.
> 
> Either "Change ... was ..." or "Changes ... were ...".  However, ...
>

Will fix.

> > 
> > v5: removed caching for names of opened cards
> >  relaxed opening of filtered devices
> > v6: fixed assingment in _is_already_open() (Janusz)
> >  simplify use of this function
> > v7: restore check for already opened filtered card (Janusz)
> > 
> > Cc: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> > Cc: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com>
> > Signed-off-by: Kamil Konieczny <kamil.konieczny at linux.intel.com>
> > ---
> >  lib/drmtest.c | 81 +++++++++++++++++++++++++++++++++++++++------------
> >  1 file changed, 62 insertions(+), 19 deletions(-)
> > 
> > diff --git a/lib/drmtest.c b/lib/drmtest.c
> > index 73d9159af..9101425bd 100644
> > --- a/lib/drmtest.c
> > +++ b/lib/drmtest.c
> > @@ -322,60 +322,97 @@ static int _opened_fds_count;
> >  static void _set_opened_fd(int idx, int fd)
> >  {
> >  	assert(idx < ARRAY_SIZE(_opened_fds));
> > -	assert(idx <= _opened_fds_count);
> > +	assert(idx >= 0);
> >  
> >  	_opened_fds[idx].fd = fd;
> >  
> >  	assert(fstat(fd, &_opened_fds[idx].stat) == 0);
> >  
> > -	_opened_fds_count = idx+1;
> > +	for (int n = _opened_fds_count; n < idx; n++)
> > +		_opened_fds[n].fd = -1;
> > +
> > +	if (idx >= _opened_fds_count)
> > +		_opened_fds_count = idx + 1;
> >  }
> >  
> > -static bool _is_already_opened(const char *path, int as_idx)
> > +/* in *err returns -errno or index to _opened_fds[] */
> > +static bool _is_already_opened(const char *path, int as_idx, int *err)
> >  {
> >  	struct stat new;
> > +	bool found = false;
> >  
> >  	assert(as_idx < ARRAY_SIZE(_opened_fds));
> > -	assert(as_idx <= _opened_fds_count);
> > +	assert(as_idx >= 0);
> >  
> >  	/*
> >  	 * we cannot even stat the device, so it's of no use - let's claim 
> it's
> >  	 * already opened
> >  	 */
> > -	if (igt_debug_on(stat(path, &new) != 0))
> > +	if (igt_debug_on(stat(path, &new) != 0)) {
> > +		if (err)
> > +			*err = -errno;
> > +
> >  		return true;
> > +	}
> > +
> > +	if (err)
> > +		*err = 0;
> > +
> > +	for (int i = 0; i < _opened_fds_count; ++i) {
> > +		if (_opened_fds[i].fd == -1)
> > +			continue;
> >  
> > -	for (int i = 0; i < as_idx; ++i) {
> >  		/* did we cross filesystem boundary? */
> >  		assert(_opened_fds[i].stat.st_dev == new.st_dev);
> >  
> > -		if (_opened_fds[i].stat.st_ino == new.st_ino)
> > -			return true;
> > +		if (_opened_fds[i].stat.st_ino == new.st_ino) {
> > +			found = true;
> > +			if (err)
> > +				*err = i;
> > +
> > +			if (i != as_idx)
> > +				return true;
> > +		}
> >  	}
> >  
> > -	return false;
> > +	return found;
> >  }
> >  
> >  static int __search_and_open(const char *base, int offset, unsigned int 
> chipset, int as_idx)
> >  {
> >  	const char *forced;
> > +	int err;
> >  
> >  	forced = forced_driver();
> >  	if (forced)
> >  		igt_debug("Force option used: Using driver %s\n", forced);
> >  
> > -	for (int i = 0; i < 16; i++) {
> > +	for (int i = 0, idx = -1; i < 16 && idx < as_idx; i++) {
> >  		char name[80];
> >  		int fd;
> >  
> >  		sprintf(name, "%s%u", base, i + offset);
> >  
> > -		if (_is_already_opened(name, as_idx))
> > -			continue;
> > +		if (_is_already_opened(name, as_idx, &err)) {
> > +			if (err < 0)
> > +				continue;
> > +
> > +			if (idx + 1 < as_idx) {
> > +				++idx;
> > +				continue;
> > +			}
> > +		}
> >  
> >  		fd = __drm_open_device(name, chipset);
> > -		if (fd != -1)
> > +		if (fd != -1) {
> > +			++idx;
> > +			if (idx < as_idx) {
> > +				close(fd);
> > +				continue;
> > +			}
> > +
> >  			return fd;
> > +		}
> >  	}
> >  
> >  	return -1;
> > @@ -486,7 +523,7 @@ static bool __get_card_for_nth_filter(int idx, struct 
> igt_device_card *card)
> >   *   * idx-th filter (starting with 0, filters are semicolon separated) is 
> used
> >   *   * if there is no idx-th filter, goto 2
> >   *   * first device maching the filter is selected
> > - *   * if it's already opened (for indexes = 0..idx-1) we fail with -1
> > + *   * if it's already opened (for indexes != idx) we fail with -1
> >   *   * otherwise open the device and return the fd
> >   *
> >   * 2. compatibility mode - open the first DRM device we can find that is 
> not
> > @@ -531,6 +568,7 @@ int __drm_open_driver_another(int idx, int chipset)
> >  	if (chipset != DRIVER_VGEM && igt_device_filter_count() > idx) {
> >  		struct igt_device_card card;
> >  		bool found;
> > +		int err;
> >  
> >  		found = __get_card_for_nth_filter(idx, &card);
> >  
> > @@ -539,14 +577,19 @@ int __drm_open_driver_another(int idx, int chipset)
> >  			found = __get_card_for_nth_filter(idx, &card);
> >  		}
> >  
> > -		if (!found || !strlen(card.card))
> > +		if (!found || !strlen(card.card)) {
> >  			igt_warn("No card matches the filter! [%s]\n",
> >  				 igt_device_filter_get(idx));
> > -		else if (_is_already_opened(card.card, idx))
> > -			igt_warn("card maching filter %d is already 
> > opened\n", idx);
> > -		else
> > +		} else if (_is_already_opened(card.card, idx, &err)) {
> > +			if (err < 0)
> > +				igt_warn("cannot stat card maching 
> > filter %d errno=%d card=%s\n", idx, err, card.card);
> > +			else if (idx != err)
> > +				igt_warn("card maching filter %d is 
> > already opened as %d, card=%s\n", idx, err, card.card);
> > +			else
> > +				fd = __open_driver_exact(card.card, 
> > chipset);
> > +		} else {
> >  			fd = __open_driver_exact(card.card, chipset);
> > -
> > +		}
> >  	} else {
> >  		/* no filter for device idx, let's open whatever is 
> > available */
> >  		fd = __open_driver("/dev/dri/card", 0, chipset, idx);
> > 
> 
> I'm still not sure if all these changes are really needed for resolution of 
> the issue you called in your commit description.  Doesn't a significant part 
> of them belong to a future patch that is going to allow reopening cards 
> already opened?  For me, the following subset of changes effectively kills the 
> failure that may now pop up when opening cards in random order.

As for changes in  __drm_open_driver_another() I was tempted
to actually use error which is returned from __search_and_open()
and differentiate error from stat() and actual already opened
file and print more specific error there.

Please see below a comment to your proposed changes.

> 
> Thanks,
> Janusz
> 
> ---
> @@ -320,34 +320,41 @@ static struct {
>  static int _opened_fds_count;
>  
>  static void _set_opened_fd(int idx, int fd)
>  {
>  	assert(idx < ARRAY_SIZE(_opened_fds));
> -	assert(idx <= _opened_fds_count);
> +	assert(idx >= 0);
>  
>  	_opened_fds[idx].fd = fd;
>  
>  	assert(fstat(fd, &_opened_fds[idx].stat) == 0);
>  
> -	_opened_fds_count = idx+1;
> +	for (int n = _opened_fds_count; n < idx; n++)
> +		_opened_fds[n].fd = -1;
> +
> +	if (idx >= _opened_fds_count)
> +		_opened_fds_count = idx + 1;
>  }
>  
>  static bool _is_already_opened(const char *path, int as_idx)
>  {
>  	struct stat new;
>  
>  	assert(as_idx < ARRAY_SIZE(_opened_fds));
> -	assert(as_idx <= _opened_fds_count);
> +	assert(as_idx >= 0);
>  
>  	/*
>  	 * we cannot even stat the device, so it's of no use - let's claim 
> it's
>  	 * already opened
>  	 */
>  	if (igt_debug_on(stat(path, &new) != 0))
>  		return true;
>  
> -	for (int i = 0; i < as_idx; ++i) {
> +	for (int i = 0; i < _opened_fds_count; ++i) {

This is tricky part, it would work if 'for' loop is:

for (int i = 0, end = min(as_idx, _opened_fds_count); i < end; ++i) {

the reason being _is_already_opened() is called before opening
and setting new, bigger _opened_fds_count.

Regards,
Kamil

> +		if (_opened_fds[i].fd == -1)
> +			continue;
> +
>  		/* did we cross filesystem boundary? */
>  		assert(_opened_fds[i].stat.st_dev == new.st_dev);
>  
>  		if (_opened_fds[i].stat.st_ino == new.st_ino)
>  			return true;
> @@ -484,11 +491,11 @@ static bool __get_card_for_nth_filter(int idx, struct 
> igt_device_card *card)
>   * 1. when --device arguments are present:
>   *   * device scanning is executed,
>   *   * idx-th filter (starting with 0, filters are semicolon separated) is 
> used
>   *   * if there is no idx-th filter, goto 2
>   *   * first device maching the filter is selected
> - *   * if it's already opened (for indexes = 0..idx-1) we fail with -1
> + *   * if it's already opened we fail with -1
>   *   * otherwise open the device and return the fd
>   *
>   * 2. compatibility mode - open the first DRM device we can find that is not
>   *    already opened for indexes 0..idx-1, searching up to 16 device nodes
>   *
> 
> 
> 
> 
> 


More information about the igt-dev mailing list