[PATCH i-g-t v7 3/8] lib/drmtest: allow opening cards out of order

Janusz Krzysztofik janusz.krzysztofik at linux.intel.com
Tue Feb 6 14:27:55 UTC 2024


Hi Kamil,

On Friday, 2 February 2024 16:35:12 CET Kamil Konieczny wrote:
> Current __drm_open_driver_another() implementation prevents
> opening cards out of order, for example card 3 without previous

How about "in random order" instead of "out of order", here and in commit 
message (Subject: field)?

> opens of 0..2. This can be seen with three GPU cards system and
> gem_open_basic test:
> 
> sudo IGT_DEVICE=pci:vendor=Intel,card=all build/tests/gem_basic --r 
multigpu-create-close
> 
> IGT-Version: 1.28-g2548a539e (x86_64) (Linux: 6.7.0-rc5 x86_64)
> Using IGT_SRANDOM=1706630533 for randomisation
> Opened device: /dev/dri/card0
> Starting subtest: multigpu-create-close
> <g:0> Testing creating and closing an object.
> <g:1> Opened device: /dev/dri/card1
> <g:1> Testing creating and closing an object.
> gem_basic: ../lib/drmtest.c:341: _is_already_opened: Assertion `as_idx <= 
_opened_fds_count' failed.
> Received signal SIGABRT.
> 
> Relax that condition
..., in both _is_already_opened() and _set_opened_fd() which is called next, 
...
> and 
... add ...
> some more debug prints.

Is that what you meant?

> 
> Changes in __search_and_open() was suggested by Zbigniew.

Either "Change ... was ..." or "Changes ... were ...".  However, ...

> 
> v5: removed caching for names of opened cards
>  relaxed opening of filtered devices
> v6: fixed assingment in _is_already_open() (Janusz)
>  simplify use of this function
> v7: restore check for already opened filtered card (Janusz)
> 
> Cc: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> Cc: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com>
> Signed-off-by: Kamil Konieczny <kamil.konieczny at linux.intel.com>
> ---
>  lib/drmtest.c | 81 +++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 62 insertions(+), 19 deletions(-)
> 
> diff --git a/lib/drmtest.c b/lib/drmtest.c
> index 73d9159af..9101425bd 100644
> --- a/lib/drmtest.c
> +++ b/lib/drmtest.c
> @@ -322,60 +322,97 @@ static int _opened_fds_count;
>  static void _set_opened_fd(int idx, int fd)
>  {
>  	assert(idx < ARRAY_SIZE(_opened_fds));
> -	assert(idx <= _opened_fds_count);
> +	assert(idx >= 0);
>  
>  	_opened_fds[idx].fd = fd;
>  
>  	assert(fstat(fd, &_opened_fds[idx].stat) == 0);
>  
> -	_opened_fds_count = idx+1;
> +	for (int n = _opened_fds_count; n < idx; n++)
> +		_opened_fds[n].fd = -1;
> +
> +	if (idx >= _opened_fds_count)
> +		_opened_fds_count = idx + 1;
>  }
>  
> -static bool _is_already_opened(const char *path, int as_idx)
> +/* in *err returns -errno or index to _opened_fds[] */
> +static bool _is_already_opened(const char *path, int as_idx, int *err)
>  {
>  	struct stat new;
> +	bool found = false;
>  
>  	assert(as_idx < ARRAY_SIZE(_opened_fds));
> -	assert(as_idx <= _opened_fds_count);
> +	assert(as_idx >= 0);
>  
>  	/*
>  	 * we cannot even stat the device, so it's of no use - let's claim 
it's
>  	 * already opened
>  	 */
> -	if (igt_debug_on(stat(path, &new) != 0))
> +	if (igt_debug_on(stat(path, &new) != 0)) {
> +		if (err)
> +			*err = -errno;
> +
>  		return true;
> +	}
> +
> +	if (err)
> +		*err = 0;
> +
> +	for (int i = 0; i < _opened_fds_count; ++i) {
> +		if (_opened_fds[i].fd == -1)
> +			continue;
>  
> -	for (int i = 0; i < as_idx; ++i) {
>  		/* did we cross filesystem boundary? */
>  		assert(_opened_fds[i].stat.st_dev == new.st_dev);
>  
> -		if (_opened_fds[i].stat.st_ino == new.st_ino)
> -			return true;
> +		if (_opened_fds[i].stat.st_ino == new.st_ino) {
> +			found = true;
> +			if (err)
> +				*err = i;
> +
> +			if (i != as_idx)
> +				return true;
> +		}
>  	}
>  
> -	return false;
> +	return found;
>  }
>  
>  static int __search_and_open(const char *base, int offset, unsigned int 
chipset, int as_idx)
>  {
>  	const char *forced;
> +	int err;
>  
>  	forced = forced_driver();
>  	if (forced)
>  		igt_debug("Force option used: Using driver %s\n", forced);
>  
> -	for (int i = 0; i < 16; i++) {
> +	for (int i = 0, idx = -1; i < 16 && idx < as_idx; i++) {
>  		char name[80];
>  		int fd;
>  
>  		sprintf(name, "%s%u", base, i + offset);
>  
> -		if (_is_already_opened(name, as_idx))
> -			continue;
> +		if (_is_already_opened(name, as_idx, &err)) {
> +			if (err < 0)
> +				continue;
> +
> +			if (idx + 1 < as_idx) {
> +				++idx;
> +				continue;
> +			}
> +		}
>  
>  		fd = __drm_open_device(name, chipset);
> -		if (fd != -1)
> +		if (fd != -1) {
> +			++idx;
> +			if (idx < as_idx) {
> +				close(fd);
> +				continue;
> +			}
> +
>  			return fd;
> +		}
>  	}
>  
>  	return -1;
> @@ -486,7 +523,7 @@ static bool __get_card_for_nth_filter(int idx, struct 
igt_device_card *card)
>   *   * idx-th filter (starting with 0, filters are semicolon separated) is 
used
>   *   * if there is no idx-th filter, goto 2
>   *   * first device maching the filter is selected
> - *   * if it's already opened (for indexes = 0..idx-1) we fail with -1
> + *   * if it's already opened (for indexes != idx) we fail with -1
>   *   * otherwise open the device and return the fd
>   *
>   * 2. compatibility mode - open the first DRM device we can find that is 
not
> @@ -531,6 +568,7 @@ int __drm_open_driver_another(int idx, int chipset)
>  	if (chipset != DRIVER_VGEM && igt_device_filter_count() > idx) {
>  		struct igt_device_card card;
>  		bool found;
> +		int err;
>  
>  		found = __get_card_for_nth_filter(idx, &card);
>  
> @@ -539,14 +577,19 @@ int __drm_open_driver_another(int idx, int chipset)
>  			found = __get_card_for_nth_filter(idx, &card);
>  		}
>  
> -		if (!found || !strlen(card.card))
> +		if (!found || !strlen(card.card)) {
>  			igt_warn("No card matches the filter! [%s]\n",
>  				 igt_device_filter_get(idx));
> -		else if (_is_already_opened(card.card, idx))
> -			igt_warn("card maching filter %d is already 
opened\n", idx);
> -		else
> +		} else if (_is_already_opened(card.card, idx, &err)) {
> +			if (err < 0)
> +				igt_warn("cannot stat card maching 
filter %d errno=%d card=%s\n", idx, err, card.card);
> +			else if (idx != err)
> +				igt_warn("card maching filter %d is 
already opened as %d, card=%s\n", idx, err, card.card);
> +			else
> +				fd = __open_driver_exact(card.card, 
chipset);
> +		} else {
>  			fd = __open_driver_exact(card.card, chipset);
> -
> +		}
>  	} else {
>  		/* no filter for device idx, let's open whatever is 
available */
>  		fd = __open_driver("/dev/dri/card", 0, chipset, idx);
> 

I'm still not sure if all these changes are really needed for resolution of 
the issue you called in your commit description.  Doesn't a significant part 
of them belong to a future patch that is going to allow reopening cards 
already opened?  For me, the following subset of changes effectively kills the 
failure that may now pop up when opening cards in random order.

Thanks,
Janusz

---
@@ -320,34 +320,41 @@ static struct {
 static int _opened_fds_count;
 
 static void _set_opened_fd(int idx, int fd)
 {
 	assert(idx < ARRAY_SIZE(_opened_fds));
-	assert(idx <= _opened_fds_count);
+	assert(idx >= 0);
 
 	_opened_fds[idx].fd = fd;
 
 	assert(fstat(fd, &_opened_fds[idx].stat) == 0);
 
-	_opened_fds_count = idx+1;
+	for (int n = _opened_fds_count; n < idx; n++)
+		_opened_fds[n].fd = -1;
+
+	if (idx >= _opened_fds_count)
+		_opened_fds_count = idx + 1;
 }
 
 static bool _is_already_opened(const char *path, int as_idx)
 {
 	struct stat new;
 
 	assert(as_idx < ARRAY_SIZE(_opened_fds));
-	assert(as_idx <= _opened_fds_count);
+	assert(as_idx >= 0);
 
 	/*
 	 * we cannot even stat the device, so it's of no use - let's claim 
it's
 	 * already opened
 	 */
 	if (igt_debug_on(stat(path, &new) != 0))
 		return true;
 
-	for (int i = 0; i < as_idx; ++i) {
+	for (int i = 0; i < _opened_fds_count; ++i) {
+		if (_opened_fds[i].fd == -1)
+			continue;
+
 		/* did we cross filesystem boundary? */
 		assert(_opened_fds[i].stat.st_dev == new.st_dev);
 
 		if (_opened_fds[i].stat.st_ino == new.st_ino)
 			return true;
@@ -484,11 +491,11 @@ static bool __get_card_for_nth_filter(int idx, struct 
igt_device_card *card)
  * 1. when --device arguments are present:
  *   * device scanning is executed,
  *   * idx-th filter (starting with 0, filters are semicolon separated) is 
used
  *   * if there is no idx-th filter, goto 2
  *   * first device maching the filter is selected
- *   * if it's already opened (for indexes = 0..idx-1) we fail with -1
+ *   * if it's already opened we fail with -1
  *   * otherwise open the device and return the fd
  *
  * 2. compatibility mode - open the first DRM device we can find that is not
  *    already opened for indexes 0..idx-1, searching up to 16 device nodes
  *







More information about the igt-dev mailing list