[v3, i-g-t, 1/2] tests/intel/xe_sriov_flr: Add parallel FLR subtest for SR-IOV VFs
Laguna, Lukasz
lukasz.laguna at intel.com
Wed Jan 29 11:13:51 UTC 2025
On 1/16/2025 20:29, Marcin Bernatowicz wrote:
> Introduce a new subtest flr-vfs-parallel to validate parallel FLR
> execution on all VFs. This subtest ensures correct behavior during
> simultaneous resets.
>
> Refactor verify_flr to accept an execution strategy function pointer,
> allowing for both sequential and parallel FLR strategies.
>
> Update clear_tests to use the new execution strategy approach and
> modify existing subtests to utilize the sequential FLR strategy.
>
> v2: Reintroduce condition to reinitialize test data only if more VFs
> remain to be tested (omitted when extracting execute_sequential_flr).
> v3: Introduce threaded FLR initiation to achieve better parallelism
> by mitigating 100ms reset delays.(Lukasz)
>
> Signed-off-by: Marcin Bernatowicz <marcin.bernatowicz at linux.intel.com>
> Cc: Adam Miszczak <adam.miszczak at linux.intel.com>
> Cc: Jakub Kolakowski <jakub1.kolakowski at intel.com>
> Cc: Marcin Bernatowicz <marcin.bernatowicz at linux.intel.com>
> Cc: Michał Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Michał Winiarski <michal.winiarski at intel.com>
> Cc: Narasimha C V <narasimha.c.v at intel.com>
> Cc: Piotr Piórkowski <piotr.piorkowski at intel.com>
> Cc: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
> Cc: Tomasz Lis <tomasz.lis at intel.com>
> ---
> tests/intel/xe_sriov_flr.c | 210 ++++++++++++++++++++++++++++++++-----
> 1 file changed, 186 insertions(+), 24 deletions(-)
>
> diff --git a/tests/intel/xe_sriov_flr.c b/tests/intel/xe_sriov_flr.c
> index 550d58bb9..a8d35be31 100644
> --- a/tests/intel/xe_sriov_flr.c
> +++ b/tests/intel/xe_sriov_flr.c
> @@ -4,6 +4,7 @@
> */
>
> #include <fcntl.h>
> +#include <pthread.h>
> #include <sys/stat.h>
> #include "drmtest.h"
> #include "igt_core.h"
> @@ -35,6 +36,11 @@
> * Description:
> * Sequentially performs FLR on each VF to verify isolation and
> * clearing of LMEM, GGTT, and SCRATCH_REGS on the reset VF only.
> + *
> + * SUBTEST: flr-vfs-parallel
> + * Run type: FULL
> + * Description:
> + * Executes FLR on all VFs simultaneously to validate correct behavior during parallel resets.
> */
>
> IGT_TEST_DESCRIPTION("Xe tests for SR-IOV VF FLR (Functional Level Reset)");
> @@ -210,6 +216,26 @@ static void subchecks_report_results(struct subcheck *checks, int num_checks)
> igt_skip_on(skips == num_checks);
> }
>
> +/**
> + * flr_exec_strategy - Function pointer for FLR execution strategy
> + * @pf_fd: File descriptor for the Physical Function (PF).
> + * @num_vfs: Total number of Virtual Functions (VFs) to test.
> + * @checks: Array of subchecks.
> + * @num_checks: Number of subchecks.
> + * @wait_flr_ms: Time to wait (in milliseconds) for FLR to complete
> + *
> + * Defines a strategy for executing FLRs (Functional Level Resets)
> + * across multiple VFs. The strategy determines the order and
> + * manner (e.g., sequential or parallel) in which FLRs are performed.
> + * It is expected to initiate FLRs and handle related operations,
> + * such as verifying and preparing subchecks.
> + *
> + * Return: The ID of the last VF for which FLR was successfully initiated.
> + */
> +typedef int (*flr_exec_strategy)(int pf_fd, int num_vfs,
> + struct subcheck *checks, int num_checks,
> + const int wait_flr_ms);
> +
> /**
> * verify_flr - Orchestrates the verification of Function Level Reset (FLR)
> * across multiple Virtual Functions (VFs).
> @@ -222,18 +248,20 @@ static void subchecks_report_results(struct subcheck *checks, int num_checks)
> * @num_vfs: Total number of Virtual Functions (VFs) to test.
> * @checks: Array of subchecks.
> * @num_checks: Number of subchecks.
> + * @flr_exec_strategy: Execution strategy for FLR (e.g., sequential or parallel).
> *
> * Detailed Workflow:
> * - Initializes and prepares VFs for testing.
> - * - Iterates through each VF, performing FLR, and verifies that only
> - * the reset VF is affected while others remain unchanged.
> - * - Reinitializes test data for the FLRed VF if there are more VFs to test.
> - * - Continues the process until all VFs are tested.
> - * - Handles any test failures or early exits, cleans up, and reports results.
> + * - Executes the FLR operation using the provided execution strategy
> + * (e.g., sequential or parallel) and validates that the reset VF behaves
> + * as expected.
> + * - Cleans up resources and reports results after all VFs have been tested
> + * or in the case of an early exit.
> *
> * A timeout is used to wait for FLR operations to complete.
> */
> -static void verify_flr(int pf_fd, int num_vfs, struct subcheck *checks, int num_checks)
> +static void verify_flr(int pf_fd, int num_vfs, struct subcheck *checks,
> + int num_checks, flr_exec_strategy exec_strategy)
> {
> const int wait_flr_ms = 200;
> int i, vf_id, flr_vf_id = -1;
> @@ -242,6 +270,7 @@ static void verify_flr(int pf_fd, int num_vfs, struct subcheck *checks, int num_
> igt_sriov_enable_vfs(pf_fd, num_vfs);
> if (igt_warn_on(!igt_sriov_device_reset_exists(pf_fd, 1)))
> goto disable_vfs;
> +
> /* Refresh PCI state */
> if (igt_warn_on(igt_pci_system_reinit()))
> goto disable_vfs;
> @@ -257,14 +286,34 @@ static void verify_flr(int pf_fd, int num_vfs, struct subcheck *checks, int num_
> if (no_subchecks_can_proceed(checks, num_checks))
> goto cleanup;
>
> - flr_vf_id = 1;
> + /* Execute the chosen FLR strategy */
> + flr_vf_id = exec_strategy(pf_fd, num_vfs, checks, num_checks, wait_flr_ms);
> +
> +cleanup:
> + for (i = 0; i < num_checks; ++i)
> + checks[i].cleanup(checks[i].data);
> +
> +disable_vfs:
> + igt_sriov_disable_vfs(pf_fd);
> +
> + if (flr_vf_id > 0 || no_subchecks_can_proceed(checks, num_checks))
> + subchecks_report_results(checks, num_checks);
> + else
> + igt_skip("No checks executed\n");
> +}
> +
> +static int execute_sequential_flr(int pf_fd, int num_vfs,
> + struct subcheck *checks, int num_checks,
> + const int wait_flr_ms)
> +{
> + int i, vf_id, flr_vf_id = 1;
>
> do {
> if (igt_warn_on_f(!igt_sriov_device_reset(pf_fd, flr_vf_id),
> "Initiating VF%u FLR failed\n", flr_vf_id))
> - goto cleanup;
> + break;
>
> - /* assume FLR is finished after wait_flr_ms */
> + /* Assume FLR is finished after wait_flr_ms */
> usleep(wait_flr_ms * 1000);
>
> for (vf_id = 1; vf_id <= num_vfs; ++vf_id)
> @@ -272,28 +321,132 @@ static void verify_flr(int pf_fd, int num_vfs, struct subcheck *checks, int num_
> if (subcheck_can_proceed(&checks[i]))
> checks[i].verify_vf(vf_id, flr_vf_id, checks[i].data);
>
> - /* reinitialize test data for FLRed VF */
> + /* Reinitialize test data for the FLRed VF */
> if (flr_vf_id < num_vfs)
> for (i = 0; i < num_checks; ++i)
> if (subcheck_can_proceed(&checks[i]))
> checks[i].prepare_vf(flr_vf_id, checks[i].data);
>
> if (no_subchecks_can_proceed(checks, num_checks))
> - goto cleanup;
> + break;
>
> } while (++flr_vf_id <= num_vfs);
>
> -cleanup:
> - for (i = 0; i < num_checks; ++i)
> - checks[i].cleanup(checks[i].data);
> + return flr_vf_id - 1;
> +}
>
> -disable_vfs:
> - igt_sriov_disable_vfs(pf_fd);
> +pthread_mutex_t signal_mutex = PTHREAD_MUTEX_INITIALIZER;
> +pthread_cond_t signal_cond = PTHREAD_COND_INITIALIZER;
>
> - if (flr_vf_id > 1 || no_subchecks_can_proceed(checks, num_checks))
> - subchecks_report_results(checks, num_checks);
> - else
> - igt_skip("No checks executed\n");
> +enum thread_signal {
> + SIGNAL_WAIT,
> + SIGNAL_START,
> + SIGNAL_SKIP
> +} thread_signal = SIGNAL_WAIT;
> +
> +struct flr_thread_data {
> + int pf_fd;
> + int vf_id;
> + int flr_instance;
> + int result;
> +};
> +
> +static void *flr_thread(void *arg)
> +{
> + struct flr_thread_data *data = (struct flr_thread_data *)arg;
> +
> + pthread_mutex_lock(&signal_mutex);
> + while (thread_signal == SIGNAL_WAIT)
> + pthread_cond_wait(&signal_cond, &signal_mutex);
> + pthread_mutex_unlock(&signal_mutex);
> +
> + if (thread_signal == SIGNAL_START &&
> + igt_warn_on_f(!igt_sriov_device_reset(data->pf_fd, data->vf_id),
> + "Initiating VF%u FLR failed (flr_instance=%u)\n",
> + data->vf_id, data->flr_instance))
> + data->result = -1;
> +
> + return NULL;
> +}
> +
> +static int execute_parallel_flr_(int pf_fd, int num_vfs,
> + struct subcheck *checks,
> + int num_checks, const int wait_flr_ms,
> + unsigned int num_flrs_per_vf)
> +{
> + pthread_t threads[num_vfs * num_flrs_per_vf];
> + struct flr_thread_data thread_data[num_vfs * num_flrs_per_vf];
nit: you can store num_vfs * num_flrs_per_vf in variable
> + int vf_id = 0, last_vf_id = 0;
> + int i, j, k, created_threads = 0;
> +
> + igt_assert(num_flrs_per_vf > 0);
> +
> + for (i = 0; i < num_vfs; ++i) {
> + for (j = 0; j < num_flrs_per_vf; ++j) {
> + thread_data[created_threads].pf_fd = pf_fd;
> + thread_data[created_threads].vf_id = i + 1; // VF IDs are 1-based
> + thread_data[created_threads].flr_instance = j;
> + thread_data[created_threads].result = 0;
> +
> + if (pthread_create(&threads[created_threads], NULL,
> + flr_thread,
> + &thread_data[created_threads])) {
> + last_vf_id = i + 1;
> +
> + goto cleanup_threads;
> + } else {
> + created_threads++;
> + }
> + }
> + }
> +
> +cleanup_threads:
> + pthread_mutex_lock(&signal_mutex);
> + thread_signal = (created_threads == num_vfs * num_flrs_per_vf) ?
> + SIGNAL_START :
> + SIGNAL_SKIP;
> + pthread_cond_broadcast(&signal_cond);
> + pthread_mutex_unlock(&signal_mutex);
> +
> + for (i = 0; i < created_threads; ++i)
> + pthread_join(threads[i], NULL);
> +
> + if (last_vf_id) {
> + for (k = 0; k < num_checks; ++k)
> + set_skip_reason(checks[k].data,
> + "Thread creation failed for VF%u\n", last_vf_id);
> + return 0;
> + }
> +
> + /* Assume FLRs finished after wait_flr_ms */
> + usleep(wait_flr_ms * 1000);
> +
> + /* Verify results */
> + for (i = 0; i < created_threads; ++i) {
> + vf_id = thread_data[i].vf_id;
> +
> + /* Skip already checked VF or if the FLR initiation failed */
> + if (vf_id == last_vf_id || thread_data[i].result != 0)
> + continue;
> +
> + for (k = 0; k < num_checks; ++k)
> + if (subcheck_can_proceed(&checks[k]))
> + checks[k].verify_vf(vf_id, vf_id, checks[k].data);
> +
> + if (no_subchecks_can_proceed(checks, num_checks))
> + break;
> +
> + last_vf_id = vf_id;
> + }
> +
> + return last_vf_id;
> +}
> +
> +static int execute_parallel_flr(int pf_fd, int num_vfs, struct subcheck *checks,
> + int num_checks, const int wait_flr_ms)
> +{
> + return execute_parallel_flr_(pf_fd, num_vfs, checks, num_checks,
> + wait_flr_ms, 1);
> }
>
> #define GEN12_VF_CAP_REG 0x1901f8
> @@ -817,7 +970,7 @@ static void regs_subcheck_cleanup(struct subcheck_data *data)
> intel_register_access_fini(&rdata->mmio[i]);
> }
>
> -static void clear_tests(int pf_fd, int num_vfs)
> +static void clear_tests(int pf_fd, int num_vfs, flr_exec_strategy exec_strategy)
> {
> struct xe_mmio xemmio = { };
> const unsigned int num_gts = xe_number_gt(pf_fd);
> @@ -882,7 +1035,7 @@ static void clear_tests(int pf_fd, int num_vfs)
> };
> igt_assert_eq(i, num_checks);
>
> - verify_flr(pf_fd, num_vfs, checks, num_checks);
> + verify_flr(pf_fd, num_vfs, checks, num_checks, exec_strategy);
> }
>
> igt_main
> @@ -899,7 +1052,7 @@ igt_main
>
> igt_describe("Verify LMEM, GGTT, and SCRATCH_REGS are properly cleared after VF1 FLR");
> igt_subtest("flr-vf1-clear") {
> - clear_tests(pf_fd, 1);
> + clear_tests(pf_fd, 1, execute_sequential_flr);
> }
>
> igt_describe("Perform sequential FLR on each VF, verifying that LMEM, GGTT, and SCRATCH_REGS are cleared only on the reset VF.");
> @@ -908,7 +1061,16 @@ igt_main
>
> igt_require(total_vfs > 1);
>
> - clear_tests(pf_fd, total_vfs > 3 ? 3 : total_vfs);
> + clear_tests(pf_fd, total_vfs > 3 ? 3 : total_vfs, execute_sequential_flr);
> + }
> +
> + igt_describe("Perform FLR on all VFs in parallel, ensuring correct behavior during simultaneous resets.");
> + igt_subtest("flr-vfs-parallel") {
> + unsigned int total_vfs = igt_sriov_get_total_vfs(pf_fd);
> +
> + igt_require(total_vfs > 1);
> +
> + clear_tests(pf_fd, total_vfs, execute_parallel_flr);
> }
>
> igt_fixture {
LGTM,
Reviewed-by: Lukasz Laguna <lukasz.laguna at intel.com>
More information about the igt-dev
mailing list