[i-g-t] tests/intel/xe_wedged: Add new test csc-wedged
Riana Tauro
riana.tauro at intel.com
Thu Jul 10 07:23:15 UTC 2025
Hi Anirban
On 7/8/2025 2:53 PM, Sk Anirban wrote:
> Inject a CSC error through uevent to cause the Xe device to enter a wedged
Add details about survivability mode. What is the expectation of the test
Add a link to kernel patches
> state. To return the device to a normal state, reload the driver, as
> the wedged state can only be resolved by rebinding/reprobing the driver.
>
> Signed-off-by: Sk Anirban <sk.anirban at intel.com>
> ---
> tests/intel/xe_wedged.c | 85 +++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 85 insertions(+)
>
> diff --git a/tests/intel/xe_wedged.c b/tests/intel/xe_wedged.c
> index 7fc7ca9eb..b29e9bcb5 100644
> --- a/tests/intel/xe_wedged.c
> +++ b/tests/intel/xe_wedged.c
> @@ -14,6 +14,7 @@
>
> #include <limits.h>
> #include <dirent.h>
> +#include <libudev.h>
>
> #include "igt.h"
> #include "igt_device.h"
> @@ -46,6 +47,46 @@ static void force_wedged(int fd)
> sleep(1);
> }
>
> +static void force_wedged_csc_error(int fd)
> +{
> + igt_debugfs_write(fd, "inject_csc_hw_error/probability", "100");
> + igt_debugfs_write(fd, "inject_csc_hw_error/times", "1");
> +
> + xe_force_gt_reset_sync(fd, 0);
> + sleep(1);
> +}
> +
> +static char bus_addr[NAME_MAX];> +
> +static int check_survivability_mode(int fd)
> +{
> + struct pci_device *pci_dev;
> + char path[PATH_MAX];
> + int dirfd;
> +
> + pci_dev = igt_device_get_pci_device(fd);
> + snprintf(bus_addr, sizeof(bus_addr), "%04x:%02x:%02x.%01x",
> + pci_dev->domain, pci_dev->bus, pci_dev->dev, pci_dev->func);
> + snprintf(path, PATH_MAX, "/sys/bus/pci/devices/%s/survivability_mode", bus_addr);
> + dirfd = open(path, O_RDONLY);
> +
> + return dirfd;
> +}
> +
> +static void intercept_udev_events(struct udev_device *device)
> +{
> + const char *dev_path = udev_device_get_property_value(device, "DEVPATH");
> + const char *wedged = udev_device_get_property_value(device, "WEDGED");
> +
> + igt_assert_f(wedged && !strcmp(wedged, "vendor-specific"),
> + "Expected WEDGED property to be 'vendor-specific', got '%s'",
> + wedged);
> +
> + igt_assert_f(dev_path && strstr(dev_path, bus_addr),
> + "Expected bus address '%s' to be part of DEVPATH '%s'",
> + bus_addr, dev_path);
> +}
> +
> static int simple_ioctl(int fd)
> {
> int ret;
> @@ -208,6 +249,11 @@ simple_hang(int fd, struct drm_xe_sync *sync)
> * SUBTEST: basic-wedged-read
> * Description: Read wedged_mode debugfs
> */
> +/**
> + * SUBTEST: csc-wedged
> + * Description: Force Xe device wedged after injecting a failure in CSC
> + */
> +
> igt_main
> {
> struct drm_xe_engine_class_instance *hwe;
> @@ -300,12 +346,51 @@ igt_main
> igt_assert_f(str[0] != '\0', "Failed to read wedged_mode from debugfs!\n");
> }
>
> + igt_subtest("csc-wedged") {
> + struct udev *udev = udev_new();
> + struct udev_monitor *monitor;
We can use this instead
struct udev_monitor *mon = igt_watch_uevents();
> + struct udev_device *device;
> +
> + igt_require(igt_debugfs_exists(fd, "inject_csc_hw_error/probability",
> + O_RDWR));
> +
> + igt_assert_f(check_survivability_mode(fd) < 0,
> + "survivability_mode sysfs available");
why?
> +
> + igt_debugfs_write(fd, "inject_csc_hw_error/verbose", "1");
> + igt_assert_eq(simple_ioctl(fd), 0);
Is this required?
> + ignore_wedged_in_dmesg();
Ignoring the interrupt message and runtime survivability also might be
needed. Can check once kernel patches are merged
> +
> + monitor = udev_monitor_new_from_netlink(udev, "kernel");
> + udev_monitor_enable_receiving(monitor);
> +
> + force_wedged_csc_error(fd);
> +
> + device = udev_monitor_receive_device(monitor);
> + intercept_udev_events(device);
> +
> + igt_assert_f(check_survivability_mode(fd) >= 0,
> + "survivability_mode sysfs not available");
you can add both of this in a single function
(check_runtime_survivability_mode)
Thanks
Riana
> +
> + drm_close_driver(fd);
> + igt_kmod_rebind("xe", pci_slot);
> + fd = drm_open_driver(DRIVER_XE);
> + igt_assert_eq(simple_ioctl(fd), 0);
> + xe_for_each_engine(fd, hwe)
> + simple_exec(fd, hwe);
> + }
> +
> igt_fixture {
> if (igt_debugfs_exists(fd, "fail_gt_reset/probability", O_RDWR)) {
> igt_debugfs_write(fd, "fail_gt_reset/probability", "0");
> igt_debugfs_write(fd, "fail_gt_reset/times", "1");
> }
>
> + if (igt_debugfs_exists(fd, "inject_csc_hw_error/probability", O_RDWR)) {
> + igt_debugfs_write(fd, "inject_csc_hw_error/probability", "0");
> + igt_debugfs_write(fd, "inject_csc_hw_error/times", "1");
> + }
> +
> /* Tests might have failed, force a rebind before exiting */
> drm_close_driver(fd);
> igt_kmod_rebind("xe", pci_slot);
More information about the igt-dev
mailing list