[PATCH i-g-t] tests/intel/xe_fault_injection: Add new test for fault injection
Francois Dugast
francois.dugast at intel.com
Tue Sep 24 09:16:05 UTC 2024
On Mon, Sep 23, 2024 at 02:09:54PM -0400, Rodrigo Vivi wrote:
> On Fri, Sep 20, 2024 at 03:19:13PM +0200, Francois Dugast wrote:
> > Use the kernel fault injection infrastructure to test error handling
> > of xe at probe time.
> >
> > Add the following test:
> > * "function-fault-injection"
> >
> > Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> > Cc: Lucas De Marchi <lucas.demarchi at intel.com>
> > Cc: Matthew Brost <matthew.brost at intel.com>
> > Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> > Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> > ---
> > tests/intel/xe_fault_injection.c | 229 +++++++++++++++++++++++++++++++
> > tests/meson.build | 1 +
> > 2 files changed, 230 insertions(+)
> > create mode 100644 tests/intel/xe_fault_injection.c
> >
> > diff --git a/tests/intel/xe_fault_injection.c b/tests/intel/xe_fault_injection.c
> > new file mode 100644
> > index 000000000..86fb6029d
> > --- /dev/null
> > +++ b/tests/intel/xe_fault_injection.c
>
> perhaps we should name this xe_probe_faults.c ?
The intention for this file is to exercise the newly introduced use of
fault-inject.h in the driver. For now the scope is indeed limited to
probe time (hence the name of the only test in this file) but in the
future there is no reason to limit to only probe time.
>
> > @@ -0,0 +1,229 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2024 Intel Corporation
> > + */
> > +
> > +/**
> > + * TEST: Check fault injection
> > + * Category: Core
> > + * Mega feature: General Core features
> > + * Sub-category: driver
> > + * Test category: fault injection
> > + */
> > +
> > +#include <regex.h>
> > +
> > +#include "igt.h"
> > +#include "igt_device.h"
> > +#include "igt_kmod.h"
> > +#include "igt_sysfs.h"
> > +
> > +#define MAX_LINE_SIZE 1024
> > +#define PATH_FUNCTIONS_INJECTABLE "/sys/kernel/debug/fail_function/injectable"
> > +#define PATH_FUNCTIONS_INJECT "/sys/kernel/debug/fail_function/inject"
> > +#define PATH_FUNCTIONS_RETVAL "/sys/kernel/debug/fail_function/%s/retval"
> > +#define REGEX_XE_FUNCTIONS "^(.+)\\[xe\\]"
> > +#define INJECT_ERRNO -ENOMEM
> > +
> > +enum sysfs_action {
> > + SYSFS_BIND,
> > + SYSFS_UNBIND,
> > +};
> > +
> > +enum injection_list_action {
> > + INJECTION_LIST_ADD,
> > + INJECTION_LIST_REMOVE,
> > +};
> > +
> > +/*
> > + * The injectable file requires CONFIG_FUNCTION_ERROR_INJECTION in kernel.
> > + */
> > +static bool function_error_injection_enabled(void)
> > +{
> > + FILE *file = fopen(PATH_FUNCTIONS_INJECTABLE, "rw");
> > +
> > + if (file) {
> > + fclose(file);
> > + return true;
> > + }
> > +
> > + return false;
> > +}
> > +
> > +static int sysfs_do(enum sysfs_action action, const char pci_slot[])
> > +{
> > + int sysfs;
> > + int ret;
> > +
> > + sysfs = open("/sys/bus/pci/drivers/xe", O_DIRECTORY);
> > + igt_assert(sysfs);
> > +
> > + switch(action) {
> > + case SYSFS_BIND:
> > + ret = igt_sysfs_set(sysfs, "bind", pci_slot);
> > + break;
> > + case SYSFS_UNBIND:
> > + ret = igt_sysfs_set(sysfs, "unbind", pci_slot);
> > + break;
>
> cool and clean fn, but we should probably add it to a library place.
> And then reused in common places like xe_wedge...
Sure, will do.
>
> > + default:
> > + igt_assert(!"missing");
> > + }
> > +
> > + close(sysfs);
> > +
> > + return ret;
> > +}
> > +
> > +static void injection_list_do(enum injection_list_action action, char function_name[])
> > +{
> > + FILE *file_inject;
> > +
> > + file_inject = fopen(PATH_FUNCTIONS_INJECT, "w");
> > + igt_assert(file_inject);
> > +
> > + switch(action) {
> > + case INJECTION_LIST_ADD:
> > + fprintf(file_inject, "%s", function_name);
> > + break;
> > + case INJECTION_LIST_REMOVE:
> > + fprintf(file_inject, "!%s", function_name);
> > + break;
> > + default:
> > + igt_assert(!"missing");
> > + }
> > +
> > + fclose(file_inject);
> > +}
> > +
> > +/*
> > + * See https://docs.kernel.org/fault-injection/fault-injection.html#application-examples
> > + */
> > +static void setup_injection_fault(void)
> > +{
> > + FILE *file;
> > +
> > + file = fopen("/sys/kernel/debug/fail_function/task-filter", "w");
> > + igt_assert(file);
> > + fprintf(file, "N");
> > + fclose(file);
> > +
> > + file = fopen("/sys/kernel/debug/fail_function/probability", "w");
> > + igt_assert(file);
> > + fprintf(file, "100");
> > + fclose(file);
> > +
> > + file = fopen("/sys/kernel/debug/fail_function/interval", "w");
> > + igt_assert(file);
> > + fprintf(file, "0");
> > + fclose(file);
> > +
> > + file = fopen("/sys/kernel/debug/fail_function/times", "w");
> > + igt_assert(file);
> > + fprintf(file, "-1");
> > + fclose(file);
> > +
> > + file = fopen("/sys/kernel/debug/fail_function/space", "w");
> > + igt_assert(file);
> > + fprintf(file, "0");
> > + fclose(file);
> > +
> > + file = fopen("/sys/kernel/debug/fail_function/verbose", "w");
> > + igt_assert(file);
> > + fprintf(file, "1");
> > + fclose(file);
> > +}
> > +
> > +static void cleanup_injection_fault(void)
> > +{
> > + FILE *file;
> > +
> > + file = fopen(PATH_FUNCTIONS_INJECT, "w");
> > + igt_assert(file);
> > + fprintf(file, "\n");
> > + fclose(file);
> > +}
> > +
> > +static void set_retval(char function_name[], long long retval)
> > +{
> > + FILE *file_retval;
> > + char file_path[MAX_LINE_SIZE];
> > +
> > + sprintf(file_path, PATH_FUNCTIONS_RETVAL, function_name);
> > +
> > + file_retval = fopen(file_path, "w");
> > + igt_assert(file_retval);
> > +
> > + fprintf(file_retval, "%#016llx", retval);
> > + fclose(file_retval);
> > +}
> > +
> > +static void inject_fault_try_bind(char pci_slot[], char function_name[])
> > +{
> > + igt_info("Injecting error \"%s\" (%d) in function \"%s\"\n",
> > + strerror(-INJECT_ERRNO), INJECT_ERRNO, function_name);
> > +
> > + injection_list_do(INJECTION_LIST_ADD, function_name);
> > + set_retval(function_name, INJECT_ERRNO);
> > + sysfs_do(SYSFS_BIND, pci_slot);
> > + igt_assert_eq(-errno, INJECT_ERRNO);
> > + injection_list_do(INJECTION_LIST_REMOVE, function_name);
> > +}
> > +
> > +/**
> > + * SUBTEST: function-fault-injection-during-probe
> > + * Description: inject an error in each injectable function then reprobe driver
> > + */
> > +static void
> > +function_fault_injection_during_probe(void)
> > +{
> > + FILE *file_injectable;
> > + char line[MAX_LINE_SIZE];
> > + char function_name[MAX_LINE_SIZE];
> > + regex_t regex;
> > + regmatch_t pmatch[2];
> > + char pci_slot[MAX_LINE_SIZE];
> > + int fd;
> > +
> > + fd = drm_open_driver(DRIVER_XE);
> > + igt_device_get_pci_slot_name(fd, pci_slot);
> > + drm_close_driver(fd);
> > +
> > + igt_assert_eq(regcomp(®ex, REGEX_XE_FUNCTIONS, REG_EXTENDED), 0);
> > +
> > + file_injectable = fopen(PATH_FUNCTIONS_INJECTABLE, "r");
> > + igt_assert(file_injectable);
> > +
> > + sysfs_do(SYSFS_UNBIND, pci_slot);
> > +
> > + /*
> > + * Iterate over each error injectable function of the xe module
> > + */
> > + while ((fgets(line, MAX_LINE_SIZE, file_injectable)) != NULL) {
> > + if (regexec(®ex, line, 2, pmatch, 0) == 0) {
> > + strcpy(function_name, line);
> > + function_name[pmatch[1].rm_eo - 1] = '\0';
> > + inject_fault_try_bind(pci_slot, function_name);
> > + }
>
> hmmm... will this really work 100% of the cases? what about reset and
> wedged cases? will they get listed and attempt the rebind?
>
> But well, the good part of this is that any kernel addition doesn't
> require a change in igt... then perhaps the name of this test case
> file is right indeed...
This test covers 100% of the cases introduced by [1] where fault-inject.h
is used instead of the i915 macro to detect issues at probe time. This
means only uses of the ALLOW_ERROR_INJECTION() macro for error injectable
functions [2], not other uses of fault-inject.h such as in reset and
wedged cases.
Yes it is dynamic, meaning no change is required in this test after adding
one new case of ALLOW_ERROR_INJECTION() in the driver, as long as it is
meant to fail at probe time with error INJECT_ERRNO.
With the current proposal [1], execution of this test looks like this:
Starting subtest: function-fault-injection-during-probe
Opened device: /dev/dri/card0
Injecting error "Cannot allocate memory" (-12) in function "wait_for_lmem_ready"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_device_create"
Injecting error "Cannot allocate memory" (-12) in function "xe_ggtt_init_early"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_guc_ads_init"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_guc_ct_init"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_guc_log_init"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_pm_init_early"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_tile_init_early"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_uc_fw_init"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_wa_init"
Injecting error "Cannot allocate memory" (-12) in function "xe_wopcm_init"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_guc_relay_init"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_sriov_init"
Subtest function-fault-injection-during-probe: SUCCESS (2.532s)
[1] https://patchwork.freedesktop.org/series/138654/
[2] https://docs.kernel.org/fault-injection/fault-injection.html#error-injectable-functions
Francois
>
> > + }
> > +
> > + fclose(file_injectable);
> > + regfree(®ex);
> > +}
> > +
> > +igt_main
> > +{
> > + igt_fixture {
> > + igt_require(function_error_injection_enabled());
> > + setup_injection_fault();
> > + }
> > +
> > + igt_subtest("function-fault-injection") {
> > + function_fault_injection_during_probe();
> > + }
> > +
> > + igt_fixture {
> > + cleanup_injection_fault();
> > + igt_xe_driver_unload();
> > + igt_xe_driver_load(NULL);
> > + }
> > +}
> > diff --git a/tests/meson.build b/tests/meson.build
> > index 00556c9d6..cf0d4486e 100644
> > --- a/tests/meson.build
> > +++ b/tests/meson.build
> > @@ -293,6 +293,7 @@ intel_xe_progs = [
> > 'xe_exec_store',
> > 'xe_exec_threads',
> > 'xe_exercise_blt',
> > + 'xe_fault_injection',
> > 'xe_gpgpu_fill',
> > 'xe_gt_freq',
> > 'xe_huc_copy',
> > --
> > 2.43.0
> >
More information about the igt-dev
mailing list