[i-g-t] tests/intel/xe_wedged: Add new test csc-wedged
Anirban, Sk
sk.anirban at intel.com
Mon Jul 14 05:11:43 UTC 2025
Hi,
On 10-07-2025 12:53, Riana Tauro wrote:
> Hi Anirban
>
> On 7/8/2025 2:53 PM, Sk Anirban wrote:
>> Inject a CSC error through uevent to cause the Xe device to enter a
>> wedged
>
> Add details about survivability mode. What is the expectation of the test
>
> Add a link to kernel patches
Sure, I will add this.
>
>> state. To return the device to a normal state, reload the driver, as
>> the wedged state can only be resolved by rebinding/reprobing the driver.
>>
>> Signed-off-by: Sk Anirban <sk.anirban at intel.com>
>> ---
>> tests/intel/xe_wedged.c | 85 +++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 85 insertions(+)
>>
>> diff --git a/tests/intel/xe_wedged.c b/tests/intel/xe_wedged.c
>> index 7fc7ca9eb..b29e9bcb5 100644
>> --- a/tests/intel/xe_wedged.c
>> +++ b/tests/intel/xe_wedged.c
>> @@ -14,6 +14,7 @@
>> #include <limits.h>
>> #include <dirent.h>
>> +#include <libudev.h>
>> #include "igt.h"
>> #include "igt_device.h"
>> @@ -46,6 +47,46 @@ static void force_wedged(int fd)
>> sleep(1);
>> }
>> +static void force_wedged_csc_error(int fd)
>> +{
>> + igt_debugfs_write(fd, "inject_csc_hw_error/probability", "100");
>> + igt_debugfs_write(fd, "inject_csc_hw_error/times", "1");
>> +
>> + xe_force_gt_reset_sync(fd, 0);
>> + sleep(1);
>> +}
>> +
>> +static char bus_addr[NAME_MAX];> +
>> +static int check_survivability_mode(int fd)
>> +{
>> + struct pci_device *pci_dev;
>> + char path[PATH_MAX];
>> + int dirfd;
>> +
>> + pci_dev = igt_device_get_pci_device(fd);
>> + snprintf(bus_addr, sizeof(bus_addr), "%04x:%02x:%02x.%01x",
>> + pci_dev->domain, pci_dev->bus, pci_dev->dev, pci_dev->func);
>> + snprintf(path, PATH_MAX,
>> "/sys/bus/pci/devices/%s/survivability_mode", bus_addr);
>> + dirfd = open(path, O_RDONLY);
>> +
>> + return dirfd;
>> +}
>> +
>> +static void intercept_udev_events(struct udev_device *device)
>> +{
>> + const char *dev_path = udev_device_get_property_value(device,
>> "DEVPATH");
>> + const char *wedged = udev_device_get_property_value(device,
>> "WEDGED");
>> +
>> + igt_assert_f(wedged && !strcmp(wedged, "vendor-specific"),
>> + "Expected WEDGED property to be 'vendor-specific', got
>> '%s'",
>> + wedged);
>> +
>> + igt_assert_f(dev_path && strstr(dev_path, bus_addr),
>> + "Expected bus address '%s' to be part of DEVPATH '%s'",
>> + bus_addr, dev_path);
>> +}
>> +
>> static int simple_ioctl(int fd)
>> {
>> int ret;
>> @@ -208,6 +249,11 @@ simple_hang(int fd, struct drm_xe_sync *sync)
>> * SUBTEST: basic-wedged-read
>> * Description: Read wedged_mode debugfs
>> */
>> +/**
>> + * SUBTEST: csc-wedged
>> + * Description: Force Xe device wedged after injecting a failure in CSC
>> + */
>> +
>> igt_main
>> {
>> struct drm_xe_engine_class_instance *hwe;
>> @@ -300,12 +346,51 @@ igt_main
>> igt_assert_f(str[0] != '\0', "Failed to read wedged_mode
>> from debugfs!\n");
>> }
>> + igt_subtest("csc-wedged") {
>> + struct udev *udev = udev_new();
>> + struct udev_monitor *monitor;
>
> We can use this instead
>
> struct udev_monitor *mon = igt_watch_uevents();
Sure, I will check this.
>
>
>> + struct udev_device *device;
>> +
>> + igt_require(igt_debugfs_exists(fd,
>> "inject_csc_hw_error/probability",
>> + O_RDWR));
>> +
>> + igt_assert_f(check_survivability_mode(fd) < 0,
>> + "survivability_mode sysfs available");
>
> why?
Just to check if the node is not available before the cse wedged.
>> +
>> + igt_debugfs_write(fd, "inject_csc_hw_error/verbose", "1");
>> + igt_assert_eq(simple_ioctl(fd), 0);
> Is this required?
As discussed offline, I will remove this.
>> + ignore_wedged_in_dmesg();
>
> Ignoring the interrupt message and runtime survivability also might be
> needed. Can check once kernel patches are merged
Sure, I’ll align this with the Kernel patches once they are merged.
>> +
>> + monitor = udev_monitor_new_from_netlink(udev, "kernel");
>> + udev_monitor_enable_receiving(monitor);
>> +
>> + force_wedged_csc_error(fd);
>> +
>> + device = udev_monitor_receive_device(monitor);
>
>
>> + intercept_udev_events(device);
>> +
>> + igt_assert_f(check_survivability_mode(fd) >= 0,
>> + "survivability_mode sysfs not available");
>
> you can add both of this in a single function
> (check_runtime_survivability_mode)
Sure, I will modify this.
>
> Thanks
> Riana
Thanks,
Anirban
>> +
>> + drm_close_driver(fd);
>> + igt_kmod_rebind("xe", pci_slot);
>> + fd = drm_open_driver(DRIVER_XE);
>> + igt_assert_eq(simple_ioctl(fd), 0);
>> + xe_for_each_engine(fd, hwe)
>> + simple_exec(fd, hwe);
>> + }
>> +
>> igt_fixture {
>> if (igt_debugfs_exists(fd, "fail_gt_reset/probability",
>> O_RDWR)) {
>> igt_debugfs_write(fd, "fail_gt_reset/probability", "0");
>> igt_debugfs_write(fd, "fail_gt_reset/times", "1");
>> }
>> + if (igt_debugfs_exists(fd,
>> "inject_csc_hw_error/probability", O_RDWR)) {
>> + igt_debugfs_write(fd, "inject_csc_hw_error/probability",
>> "0");
>> + igt_debugfs_write(fd, "inject_csc_hw_error/times", "1");
>> + }
>> +
>> /* Tests might have failed, force a rebind before exiting */
>> drm_close_driver(fd);
>> igt_kmod_rebind("xe", pci_slot);
>
More information about the igt-dev
mailing list