[igt-dev] [PATCH i-g-t] tests/device_reset: Test device sysfs reset

Vivi, Rodrigo rodrigo.vivi at intel.com
Wed Jul 8 12:21:35 UTC 2020



> On Jul 8, 2020, at 2:30 AM, Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com> wrote:
> 
> On Wed, 2020-07-08 at 08:57 +0000, Bernatowicz, Marcin wrote:
>> On Tue, 2020-07-07 at 14:14 -0700, Rodrigo Vivi wrote:
>>> On Tue, Jul 07, 2020 at 05:08:07PM +0200, Janusz Krzysztofik wrote:
>>>> On Tue, 2020-07-07 at 16:58 +0200, Marcin Bernatowicz wrote:
>>>>> Device reset is initiated by writing "1" to reset sysfs file,
>>>>> which should initiate PCI device Function Level Reset
>>>>> if supported by device.
>>>>> 
>>>>> Test scenarios:
>>>>> 1. unbind driver from device, initiate sysfs reset, rebind driver
>>>>> to
>>>>> device
>>>>> 2. device reset with bound driver
>>>>> 
>>>>> v2: removed unbind-rebind (duplicates 
>>>>> core_hotunplug at unbind-rebind)
>>>>>    added healthcheck to each test (Janusz)
>>>>> v3: after review corrections (renamed sysfs_fds to device_fds,
>>>>>    corrected not closed file descriptor, removed variable length
>>>>> array)
>>>>> v4: updated description (Martin), snprintf corrections (Janusz),
>>>>>    reset-bound added to blacklist as it hangs several gens of
>>>>> hosts (Tomi)
>>>>> 
>>>>> Signed-off-by: Marcin Bernatowicz <marcin.bernatowicz at intel.com>
>>>>> ---
>>>>> tests/device_reset.c         | 281
>>>>> +++++++++++++++++++++++++++++++++++
>>>>> tests/intel-ci/blacklist.txt |   3 +
>>>>> tests/meson.build            |   1 +
>>>>> 3 files changed, 285 insertions(+)
>>>>> create mode 100644 tests/device_reset.c
>>>>> 
>>>>> diff --git a/tests/device_reset.c b/tests/device_reset.c
>>>>> new file mode 100644
>>>>> index 000000000..b1181b3be
>>>>> --- /dev/null
>>>>> +++ b/tests/device_reset.c
>>>>> @@ -0,0 +1,281 @@
>>>>> +// SPDX-License-Identifier: MIT
>>>>> +/*
>>>>> + * Copyright(c) 2020 Intel Corporation. All rights reserved.
>>>>> + */
>>>>> +#include <fcntl.h>
>>>>> +#include <sys/ioctl.h>
>>>>> +#include <sys/stat.h>
>>>>> +
>>>>> +#include "i915/gem.h"
>>>>> +#include "igt.h"
>>>>> +#include "igt_device_scan.h"
>>>>> +#include "igt_sysfs.h"
>>>>> +
>>>>> +IGT_TEST_DESCRIPTION("Examine behavior of a driver on device
>>>>> sysfs reset");
>>>>> +
>>>>> +
>>>>> +#define DEV_PATH_LEN 80
>>>>> +#define DEV_BUS_ADDR_LEN 13 /* addr has form 0000:00:00.0 */
>>>>> +
>>>>> +/**
>>>>> + * Helper structure containing file descriptors
>>>>> + * and bus address related to tested device
>>>>> + */
>>>>> +struct device_fds {
>>>>> +	struct {
>>>>> +		int dev;
>>>>> +		int dev_dir;
>>>>> +		int drv_dir;
>>>>> +	} fds;
>>>>> +	char dev_bus_addr[DEV_BUS_ADDR_LEN];
>>>>> +};
>>>>> +
>>>>> +static int __open_sysfs_dir(int fd, const char* path)
>>>>> +{
>>>>> +	int sysfs;
>>>>> +
>>>>> +	sysfs = igt_sysfs_open(fd);
>>>>> +	if (sysfs < 0) {
>>>>> +		return -1;
>>>>> +	}
>>>>> +
>>>>> +	fd = openat(sysfs, path, O_DIRECTORY);
>>>>> +	close(sysfs);
>>>>> +	return fd;
>>>>> +}
>>>>> +
>>>>> +static int open_device_sysfs_dir(int fd)
>>>>> +{
>>>>> +	return __open_sysfs_dir(fd, "device");
>>>>> +}
>>>>> +
>>>>> +static int open_driver_sysfs_dir(int fd)
>>>>> +{
>>>>> +	return __open_sysfs_dir(fd, "device/driver");
>>>>> +}
>>>>> +
>>>>> +/**
>>>>> + * device_sysfs_path:
>>>>> + * @fd: opened device file descriptor
>>>>> + * @path: buffer to store sysfs path to device directory
>>>>> + *
>>>>> + * Returns:
>>>>> + * On successfull path resolution sysfs path to device
>>>>> directory,
>>>>> + * NULL otherwise
>>>>> + */
>>>>> +static char *device_sysfs_path(int fd, char *path)
>>>>> +{
>>>>> +	char sysfs[DEV_PATH_LEN];
>>>>> +
>>>>> +	if (!igt_sysfs_path(fd, sysfs, sizeof(sysfs)))
>>>>> +		return NULL;
>>>>> +
>>>>> +	if (DEV_PATH_LEN <= (strlen(sysfs) + strlen("/device")))
>>>>> +		return NULL;
>>>>> +
>>>>> +	strcat(sysfs, "/device");
>>>>> +
>>>>> +	return realpath(sysfs, path);
>>>>> +}
>>>>> +
>>>>> +static void init_device_fds(struct device_fds *dev)
>>>>> +{
>>>>> +	char dev_path[PATH_MAX];
>>>>> +	char *addr_pos;
>>>>> +
>>>>> +	igt_debug("open device\n");
>>>>> +	/**
>>>>> +	 * As subtests must be able to close examined devices
>>>>> +	 * completely, don't use drm_open_driver() as it keeps
>>>>> +	 * a device file descriptor open for exit handler use.
>>>>> +	 */
>>>>> +	dev->fds.dev = __drm_open_driver(DRIVER_ANY);
>>>>> +	igt_assert_fd(dev->fds.dev);
>>>>> +	if (is_i915_device(dev->fds.dev))
>>>>> +		igt_require_gem(dev->fds.dev);
>>>>> +
>>>>> +	igt_assert(device_sysfs_path(dev->fds.dev, dev_path));
>>>>> +	addr_pos = strrchr(dev_path, '/');
>>>>> +	igt_assert(addr_pos);
>>>>> +	igt_assert_eq(sizeof(dev->dev_bus_addr) - 1,
>>>>> +		      snprintf(dev->dev_bus_addr, sizeof(dev-
>>>>>> dev_bus_addr),
>>>>> +			       "%s", addr_pos + 1));
>>>>> +
>>>>> +	dev->fds.dev_dir = open_device_sysfs_dir(dev->fds.dev);
>>>>> +	igt_assert_fd(dev->fds.dev_dir);
>>>>> +
>>>>> +	dev->fds.drv_dir = open_driver_sysfs_dir(dev->fds.dev);
>>>>> +	igt_assert_fd(dev->fds.drv_dir);
>>>>> +}
>>>>> +
>>>>> +static int close_if_opened(int *fd)
>>>>> +{
>>>>> +	int rc = 0;
>>>>> +
>>>>> +	if (fd && *fd != -1) {
>>>>> +		rc = close(*fd);
>>>>> +		*fd = -1;
>>>>> +	}
>>>>> +	return rc;
>>>>> +}
>>>>> +
>>>>> +static void cleanup_device_fds(struct device_fds *dev)
>>>>> +{
>>>>> +	igt_ignore_warn(close_if_opened(&dev->fds.dev));
>>>>> +	igt_ignore_warn(close_if_opened(&dev->fds.dev_dir));
>>>>> +	igt_ignore_warn(close_if_opened(&dev->fds.drv_dir));
>>>>> +}
>>>>> +
>>>>> +/**
>>>>> + * is_sysfs_reset_supported:
>>>>> + * @fd: opened device file descriptor
>>>>> + *
>>>>> + * Check if device supports reset based on sysfs file presence.
>>>>> + *
>>>>> + * Returns:
>>>>> + * True if device supports reset, false otherwise.
>>>>> + */
>>>>> +static bool is_sysfs_reset_supported(int fd)
>>>>> +{
>>>>> +	struct stat st;
>>>>> +	int rc;
>>>>> +	int sysfs;
>>>>> +	int reset_fd = -1;
>>>>> +
>>>>> +	sysfs = igt_sysfs_open(fd);
>>>>> +
>>>>> +	if (sysfs >= 0) {
>>>>> +		reset_fd = openat(sysfs, "device/reset", O_WRONLY);
>>>>> +		close(sysfs);
>>>>> +	}
>>>>> +
>>>>> +	if (reset_fd < 0)
>>>>> +		return false;
>>>>> +
>>>>> +	rc = fstat(reset_fd, &st);
>>>>> +	close(reset_fd);
>>>>> +
>>>>> +	if (rc || !S_ISREG(st.st_mode))
>>>>> +		return false;
>>>>> +
>>>>> +	return true;
>>>>> +}
>>>>> +
>>>>> +/* Unbind the driver from the device */
>>>>> +static void driver_unbind(struct device_fds *dev)
>>>>> +{
>>>>> +	igt_debug("unbind the driver from the device\n");
>>>>> +	igt_assert(igt_sysfs_set(dev->fds.drv_dir, "unbind",
>>>>> +		   dev->dev_bus_addr));
>>>>> +}
>>>>> +
>>>>> +/* Re-bind the driver to the device */
>>>>> +static void driver_bind(struct device_fds *dev)
>>>>> +{
>>>>> +	igt_debug("rebind the driver to the device\n");
>>>>> +	igt_abort_on_f(!igt_sysfs_set(dev->fds.drv_dir, "bind",
>>>>> +		       dev->dev_bus_addr), "driver rebind failed");
>>>>> +}
>>>>> +
>>>>> +/* Initiate device reset */
>>>>> +static void initiate_device_reset(struct device_fds *dev)
>>>>> +{
>>>>> +	igt_debug("reset device\n");
>>>>> +	igt_assert(igt_sysfs_set(dev->fds.dev_dir, "reset", "1"));
>>>>> +}
>>>>> +
>>>>> +static bool is_i915_wedged(int i915)
>>>>> +{
>>>>> +	int err = 0;
>>>>> +
>>>>> +	if (ioctl(i915, DRM_IOCTL_I915_GEM_THROTTLE))
>>>>> +		err = -errno;
>>>>> +	return err == -EIO;
>>>>> +}
>>>>> +
>>>>> +/**
>>>>> + * healthcheck:
>>>>> + * @dev: structure with device descriptor, if descriptor equals
>>>>> -1
>>>>> + * 	 the device is reopened
>>>>> + */
>>>>> +static void healthcheck(struct device_fds *dev)
>>>>> +{
>>>>> +	if (dev->fds.dev == -1) {
>>>>> +		/* refresh device list */
>>>>> +		igt_devices_scan(true);
>>>>> +		igt_debug("reopen the device\n");
>>>>> +		dev->fds.dev = __drm_open_driver(DRIVER_ANY);
>>>>> +	}
>>>>> +	igt_assert_fd(dev->fds.dev);
>>>>> +
>>>>> +	if (is_i915_device(dev->fds.dev))
>>>>> +		igt_assert(!is_i915_wedged(dev->fds.dev));
>>>>> +}
>>>>> +
>>>>> +/**
>>>>> + * set_device_filter:
>>>>> + *
>>>>> + * Sets device filter to ensure subtests always reopen the same
>>>>> device
>>>>> + *
>>>>> + * @dev_path: path to device under tests
>>>>> + */
>>>>> +static void set_device_filter(const char* dev_path)
>>>>> +{
>>>>> +#define FILTER_PREFIX_LEN 4
>>>>> +	char filter[PATH_MAX + FILTER_PREFIX_LEN];
>>>>> +
>>>>> +	igt_assert_lt(FILTER_PREFIX_LEN, snprintf(filter,
>>>>> sizeof(filter),
>>>>> +						  "sys:%s", dev_path));
>>>>> +	igt_device_filter_free_all();
>>>>> +	igt_assert_eq(igt_device_filter_add(filter), 1);
>>>>> +}
>>>>> +
>>>>> +static void unbind_reset_rebind(struct device_fds *dev)
>>>>> +{
>>>>> +	igt_debug("close the device\n");
>>>>> +	close_if_opened(&dev->fds.dev);
>>>>> +
>>>>> +	driver_unbind(dev);
>>>>> +
>>>>> +	initiate_device_reset(dev);
>>>>> +
>>>>> +	driver_bind(dev);
>>>>> +}
>>>>> +
>>>>> +igt_main
>>>>> +{
>>>>> +	struct device_fds dev = { .fds = {-1, -1, -1}, .dev_bus_addr =
>>>>> {0}};
>>>>> +
>>>>> +	igt_fixture {
>>>>> +		char dev_path[PATH_MAX];
>>>>> +
>>>>> +		igt_debug("opening device\n");
>>>>> +		init_device_fds(&dev);
>>>>> +
>>>>> +		/* Make sure subtests always reopen the same device */
>>>>> +		igt_assert(device_sysfs_path(dev.fds.dev, dev_path));
>>>>> +		set_device_filter(dev_path);
>>>>> +
>>>>> +		igt_skip_on(!is_sysfs_reset_supported(dev.fds.dev));
>>>>> +
>>>>> +		igt_set_timeout(60, "device reset tests timed out after
>>>>> 60s");
>>>>> +	}
>>>>> +
>>>>> +	igt_describe("Unbinds driver from device, initiates reset"
>>>>> +		     " then rebinds driver to device");
>>>>> +	igt_subtest("unbind-reset-rebind") {
>>>>> +		unbind_reset_rebind(&dev);
>>>>> +		healthcheck(&dev);
>>>>> +	}
>>>>> +
>>>>> +	igt_describe("Resets device with bound driver");
>>>>> +	igt_subtest("reset-bound") {
>>>>> +		initiate_device_reset(&dev);
>>>>> +		healthcheck(&dev);
>>>>> +	}
>>>>> +
>>>>> +	igt_fixture {
>>>>> +		igt_reset_timeout();
>>>>> +		cleanup_device_fds(&dev);
>>>>> +	}
>>>>> +}
>>>>> diff --git a/tests/intel-ci/blacklist.txt b/tests/intel-
>>>>> ci/blacklist.txt
>>>>> index ecbec5080..f9a57cb54 100644
>>>>> --- a/tests/intel-ci/blacklist.txt
>>>>> +++ b/tests/intel-ci/blacklist.txt
>>>>> @@ -121,3 +121,6 @@ igt at perf_pmu@cpu-hotplug
>>>>> # Currently fails and leaves the machine in a very bad state,
>>>>> and
>>>>> # causes coverage loss for other tests.
>>>>> igt at core_hotunplug@.*
>>>>> +
>>>>> +# hangs several gens of hosts, and has no immediate fix
>>>>> +igt at device_reset@reset-bound
>>> 
>>> I wonder if we should filter by platform?
>>> if gen >= 12? or if dgfx?
>>> 
>>> or is it really random?
>> 
>> I can restrict the tests on intel platforms with
>> 
>> if (is_i915_device(fd))
>>    igt_skip_on(intel_gen(intel_get_drm_devid(fd)) < 12);
> 
> That seems pointless to me unless igt at device_reset@reset-bound works on
> gen12+ while it doesn't work on platforms we don't care.

I see your point....

But I was thinking that maybe with this check we don't need to add this
to the blacklist and we would have some CI coverage at least on the platforms
that we know that it is currently working.

If this is on blacklist we will never have CI.

> 
> Thanks,
> Janusz
> 
> 
>> 
>>>>> \ No newline at end of file
>>>> 
>>>> I'm only not sure if that missing newline will not break CI
>>>> scripts,
>>>> other than that, my R-b still applies.
>>>> 
>>>> Reviewed-by: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com
>>> 
>>> anyway it looks good to me
>>> 
>>> Acked-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
>>> 
>>> 
>>> 
>>>> Thanks,
>>>> Janusz
>>>> 
>>>> 
>>>>> diff --git a/tests/meson.build b/tests/meson.build
>>>>> index 172d18e59..d9253f5f9 100644
>>>>> --- a/tests/meson.build
>>>>> +++ b/tests/meson.build
>>>>> @@ -8,6 +8,7 @@ test_progs = [
>>>>> 	'core_setmaster_vs_auth',
>>>>> 	'debugfs_test',
>>>>> 	'dmabuf',
>>>>> +	'device_reset',
>>>>> 	'drm_import_export',
>>>>> 	'drm_mm',
>>>>> 	'drm_read',



More information about the igt-dev mailing list