[v3, i-g-t, 1/2] tests/intel/xe_sriov_flr: Add parallel FLR subtest for SR-IOV VFs

Laguna, Lukasz lukasz.laguna at intel.com
Wed Jan 29 11:13:51 UTC 2025


On 1/16/2025 20:29, Marcin Bernatowicz wrote:
> Introduce a new subtest flr-vfs-parallel to validate parallel FLR
> execution on all VFs. This subtest ensures correct behavior during
> simultaneous resets.
>
> Refactor verify_flr to accept an execution strategy function pointer,
> allowing for both sequential and parallel FLR strategies.
>
> Update clear_tests to use the new execution strategy approach and
> modify existing subtests to utilize the sequential FLR strategy.
>
> v2: Reintroduce condition to reinitialize test data only if more VFs
>      remain to be tested (omitted when extracting execute_sequential_flr).
> v3: Introduce threaded FLR initiation to achieve better parallelism
>      by mitigating 100ms reset delays.(Lukasz)
>
> Signed-off-by: Marcin Bernatowicz <marcin.bernatowicz at linux.intel.com>
> Cc: Adam Miszczak <adam.miszczak at linux.intel.com>
> Cc: Jakub Kolakowski <jakub1.kolakowski at intel.com>
> Cc: Marcin Bernatowicz <marcin.bernatowicz at linux.intel.com>
> Cc: Michał Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Michał Winiarski <michal.winiarski at intel.com>
> Cc: Narasimha C V <narasimha.c.v at intel.com>
> Cc: Piotr Piórkowski <piotr.piorkowski at intel.com>
> Cc: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
> Cc: Tomasz Lis <tomasz.lis at intel.com>
> ---
>   tests/intel/xe_sriov_flr.c | 210 ++++++++++++++++++++++++++++++++-----
>   1 file changed, 186 insertions(+), 24 deletions(-)
>
> diff --git a/tests/intel/xe_sriov_flr.c b/tests/intel/xe_sriov_flr.c
> index 550d58bb9..a8d35be31 100644
> --- a/tests/intel/xe_sriov_flr.c
> +++ b/tests/intel/xe_sriov_flr.c
> @@ -4,6 +4,7 @@
>    */
>   
>   #include <fcntl.h>
> +#include <pthread.h>
>   #include <sys/stat.h>
>   #include "drmtest.h"
>   #include "igt_core.h"
> @@ -35,6 +36,11 @@
>    * Description:
>    *   Sequentially performs FLR on each VF to verify isolation and
>    *   clearing of LMEM, GGTT, and SCRATCH_REGS on the reset VF only.
> + *
> + * SUBTEST: flr-vfs-parallel
> + * Run type: FULL
> + * Description:
> + *   Executes FLR on all VFs simultaneously to validate correct behavior during parallel resets.
>    */
>   
>   IGT_TEST_DESCRIPTION("Xe tests for SR-IOV VF FLR (Functional Level Reset)");
> @@ -210,6 +216,26 @@ static void subchecks_report_results(struct subcheck *checks, int num_checks)
>   	igt_skip_on(skips == num_checks);
>   }
>   
> +/**
> + * flr_exec_strategy - Function pointer for FLR execution strategy
> + * @pf_fd: File descriptor for the Physical Function (PF).
> + * @num_vfs: Total number of Virtual Functions (VFs) to test.
> + * @checks: Array of subchecks.
> + * @num_checks: Number of subchecks.
> + * @wait_flr_ms: Time to wait (in milliseconds) for FLR to complete
> + *
> + * Defines a strategy for executing FLRs (Functional Level Resets)
> + * across multiple VFs. The strategy determines the order and
> + * manner (e.g., sequential or parallel) in which FLRs are performed.
> + * It is expected to initiate FLRs and handle related operations,
> + * such as verifying and preparing subchecks.
> + *
> + * Return: The ID of the last VF for which FLR was successfully initiated.
> + */
> +typedef int (*flr_exec_strategy)(int pf_fd, int num_vfs,
> +				 struct subcheck *checks, int num_checks,
> +				 const int wait_flr_ms);
> +
>   /**
>    * verify_flr - Orchestrates the verification of Function Level Reset (FLR)
>    *              across multiple Virtual Functions (VFs).
> @@ -222,18 +248,20 @@ static void subchecks_report_results(struct subcheck *checks, int num_checks)
>    * @num_vfs: Total number of Virtual Functions (VFs) to test.
>    * @checks: Array of subchecks.
>    * @num_checks: Number of subchecks.
> + * @flr_exec_strategy: Execution strategy for FLR (e.g., sequential or parallel).
>    *
>    * Detailed Workflow:
>    * - Initializes and prepares VFs for testing.
> - * - Iterates through each VF, performing FLR, and verifies that only
> - *   the reset VF is affected while others remain unchanged.
> - * - Reinitializes test data for the FLRed VF if there are more VFs to test.
> - * - Continues the process until all VFs are tested.
> - * - Handles any test failures or early exits, cleans up, and reports results.
> + * - Executes the FLR operation using the provided execution strategy
> + *   (e.g., sequential or parallel) and validates that the reset VF behaves
> + *   as expected.
> + * - Cleans up resources and reports results after all VFs have been tested
> + *   or in the case of an early exit.
>    *
>    * A timeout is used to wait for FLR operations to complete.
>    */
> -static void verify_flr(int pf_fd, int num_vfs, struct subcheck *checks, int num_checks)
> +static void verify_flr(int pf_fd, int num_vfs, struct subcheck *checks,
> +		       int num_checks, flr_exec_strategy exec_strategy)
>   {
>   	const int wait_flr_ms = 200;
>   	int i, vf_id, flr_vf_id = -1;
> @@ -242,6 +270,7 @@ static void verify_flr(int pf_fd, int num_vfs, struct subcheck *checks, int num_
>   	igt_sriov_enable_vfs(pf_fd, num_vfs);
>   	if (igt_warn_on(!igt_sriov_device_reset_exists(pf_fd, 1)))
>   		goto disable_vfs;
> +
>   	/* Refresh PCI state */
>   	if (igt_warn_on(igt_pci_system_reinit()))
>   		goto disable_vfs;
> @@ -257,14 +286,34 @@ static void verify_flr(int pf_fd, int num_vfs, struct subcheck *checks, int num_
>   	if (no_subchecks_can_proceed(checks, num_checks))
>   		goto cleanup;
>   
> -	flr_vf_id = 1;
> +	/* Execute the chosen FLR strategy */
> +	flr_vf_id = exec_strategy(pf_fd, num_vfs, checks, num_checks, wait_flr_ms);
> +
> +cleanup:
> +	for (i = 0; i < num_checks; ++i)
> +		checks[i].cleanup(checks[i].data);
> +
> +disable_vfs:
> +	igt_sriov_disable_vfs(pf_fd);
> +
> +	if (flr_vf_id > 0 || no_subchecks_can_proceed(checks, num_checks))
> +		subchecks_report_results(checks, num_checks);
> +	else
> +		igt_skip("No checks executed\n");
> +}
> +
> +static int execute_sequential_flr(int pf_fd, int num_vfs,
> +				  struct subcheck *checks, int num_checks,
> +				  const int wait_flr_ms)
> +{
> +	int i, vf_id, flr_vf_id = 1;
>   
>   	do {
>   		if (igt_warn_on_f(!igt_sriov_device_reset(pf_fd, flr_vf_id),
>   				  "Initiating VF%u FLR failed\n", flr_vf_id))
> -			goto cleanup;
> +			break;
>   
> -		/* assume FLR is finished after wait_flr_ms */
> +		/* Assume FLR is finished after wait_flr_ms */
>   		usleep(wait_flr_ms * 1000);
>   
>   		for (vf_id = 1; vf_id <= num_vfs; ++vf_id)
> @@ -272,28 +321,132 @@ static void verify_flr(int pf_fd, int num_vfs, struct subcheck *checks, int num_
>   				if (subcheck_can_proceed(&checks[i]))
>   					checks[i].verify_vf(vf_id, flr_vf_id, checks[i].data);
>   
> -		/* reinitialize test data for FLRed VF */
> +		/* Reinitialize test data for the FLRed VF */
>   		if (flr_vf_id < num_vfs)
>   			for (i = 0; i < num_checks; ++i)
>   				if (subcheck_can_proceed(&checks[i]))
>   					checks[i].prepare_vf(flr_vf_id, checks[i].data);
>   
>   		if (no_subchecks_can_proceed(checks, num_checks))
> -			goto cleanup;
> +			break;
>   
>   	} while (++flr_vf_id <= num_vfs);
>   
> -cleanup:
> -	for (i = 0; i < num_checks; ++i)
> -		checks[i].cleanup(checks[i].data);
> +	return flr_vf_id - 1;
> +}
>   
> -disable_vfs:
> -	igt_sriov_disable_vfs(pf_fd);
> +pthread_mutex_t signal_mutex = PTHREAD_MUTEX_INITIALIZER;
> +pthread_cond_t signal_cond = PTHREAD_COND_INITIALIZER;
>   
> -	if (flr_vf_id > 1 || no_subchecks_can_proceed(checks, num_checks))
> -		subchecks_report_results(checks, num_checks);
> -	else
> -		igt_skip("No checks executed\n");
> +enum thread_signal {
> +	SIGNAL_WAIT,
> +	SIGNAL_START,
> +	SIGNAL_SKIP
> +} thread_signal = SIGNAL_WAIT;
> +
> +struct flr_thread_data {
> +	int pf_fd;
> +	int vf_id;
> +	int flr_instance;
> +	int result;
> +};
> +
> +static void *flr_thread(void *arg)
> +{
> +	struct flr_thread_data *data = (struct flr_thread_data *)arg;
> +
> +	pthread_mutex_lock(&signal_mutex);
> +	while (thread_signal == SIGNAL_WAIT)
> +		pthread_cond_wait(&signal_cond, &signal_mutex);
> +	pthread_mutex_unlock(&signal_mutex);
> +
> +	if (thread_signal == SIGNAL_START &&
> +	    igt_warn_on_f(!igt_sriov_device_reset(data->pf_fd, data->vf_id),
> +			  "Initiating VF%u FLR failed (flr_instance=%u)\n",
> +			  data->vf_id, data->flr_instance))
> +		data->result = -1;
> +
> +	return NULL;
> +}
> +
> +static int execute_parallel_flr_(int pf_fd, int num_vfs,
> +				 struct subcheck *checks,
> +				 int num_checks, const int wait_flr_ms,
> +				 unsigned int num_flrs_per_vf)
> +{
> +	pthread_t threads[num_vfs * num_flrs_per_vf];
> +	struct flr_thread_data thread_data[num_vfs * num_flrs_per_vf];

nit: you can store num_vfs * num_flrs_per_vf in variable

> +	int vf_id = 0, last_vf_id = 0;
> +	int i, j, k, created_threads = 0;
> +
> +	igt_assert(num_flrs_per_vf > 0);
> +
> +	for (i = 0; i < num_vfs; ++i) {
> +		for (j = 0; j < num_flrs_per_vf; ++j) {
> +			thread_data[created_threads].pf_fd = pf_fd;
> +			thread_data[created_threads].vf_id = i + 1; // VF IDs are 1-based
> +			thread_data[created_threads].flr_instance = j;
> +			thread_data[created_threads].result = 0;
> +
> +			if (pthread_create(&threads[created_threads], NULL,
> +					   flr_thread,
> +					   &thread_data[created_threads])) {
> +				last_vf_id = i + 1;
> +
> +				goto cleanup_threads;
> +			} else {
> +				created_threads++;
> +			}
> +		}
> +	}
> +
> +cleanup_threads:
> +	pthread_mutex_lock(&signal_mutex);
> +	thread_signal = (created_threads == num_vfs * num_flrs_per_vf) ?
> +				SIGNAL_START :
> +				SIGNAL_SKIP;
> +	pthread_cond_broadcast(&signal_cond);
> +	pthread_mutex_unlock(&signal_mutex);
> +
> +	for (i = 0; i < created_threads; ++i)
> +		pthread_join(threads[i], NULL);
> +
> +	if (last_vf_id) {
> +		for (k = 0; k < num_checks; ++k)
> +			set_skip_reason(checks[k].data,
> +					"Thread creation failed for VF%u\n", last_vf_id);
> +		return 0;
> +	}
> +
> +	/* Assume FLRs finished after wait_flr_ms */
> +	usleep(wait_flr_ms * 1000);
> +
> +	/* Verify results */
> +	for (i = 0; i < created_threads; ++i) {
> +		vf_id = thread_data[i].vf_id;
> +
> +		/* Skip already checked VF or if the FLR initiation failed */
> +		if (vf_id == last_vf_id || thread_data[i].result != 0)
> +			continue;
> +
> +		for (k = 0; k < num_checks; ++k)
> +			if (subcheck_can_proceed(&checks[k]))
> +				checks[k].verify_vf(vf_id, vf_id, checks[k].data);
> +
> +		if (no_subchecks_can_proceed(checks, num_checks))
> +			break;
> +
> +		last_vf_id = vf_id;
> +	}
> +
> +	return last_vf_id;
> +}
> +
> +static int execute_parallel_flr(int pf_fd, int num_vfs, struct subcheck *checks,
> +				int num_checks, const int wait_flr_ms)
> +{
> +	return execute_parallel_flr_(pf_fd, num_vfs, checks, num_checks,
> +				     wait_flr_ms, 1);
>   }
>   
>   #define GEN12_VF_CAP_REG			0x1901f8
> @@ -817,7 +970,7 @@ static void regs_subcheck_cleanup(struct subcheck_data *data)
>   				intel_register_access_fini(&rdata->mmio[i]);
>   }
>   
> -static void clear_tests(int pf_fd, int num_vfs)
> +static void clear_tests(int pf_fd, int num_vfs, flr_exec_strategy exec_strategy)
>   {
>   	struct xe_mmio xemmio = { };
>   	const unsigned int num_gts = xe_number_gt(pf_fd);
> @@ -882,7 +1035,7 @@ static void clear_tests(int pf_fd, int num_vfs)
>   	};
>   	igt_assert_eq(i, num_checks);
>   
> -	verify_flr(pf_fd, num_vfs, checks, num_checks);
> +	verify_flr(pf_fd, num_vfs, checks, num_checks, exec_strategy);
>   }
>   
>   igt_main
> @@ -899,7 +1052,7 @@ igt_main
>   
>   	igt_describe("Verify LMEM, GGTT, and SCRATCH_REGS are properly cleared after VF1 FLR");
>   	igt_subtest("flr-vf1-clear") {
> -		clear_tests(pf_fd, 1);
> +		clear_tests(pf_fd, 1, execute_sequential_flr);
>   	}
>   
>   	igt_describe("Perform sequential FLR on each VF, verifying that LMEM, GGTT, and SCRATCH_REGS are cleared only on the reset VF.");
> @@ -908,7 +1061,16 @@ igt_main
>   
>   		igt_require(total_vfs > 1);
>   
> -		clear_tests(pf_fd, total_vfs > 3 ? 3 : total_vfs);
> +		clear_tests(pf_fd, total_vfs > 3 ? 3 : total_vfs, execute_sequential_flr);
> +	}
> +
> +	igt_describe("Perform FLR on all VFs in parallel, ensuring correct behavior during simultaneous resets.");
> +	igt_subtest("flr-vfs-parallel") {
> +		unsigned int total_vfs = igt_sriov_get_total_vfs(pf_fd);
> +
> +		igt_require(total_vfs > 1);
> +
> +		clear_tests(pf_fd, total_vfs, execute_parallel_flr);
>   	}
>   
>   	igt_fixture {

LGTM,
Reviewed-by: Lukasz Laguna <lukasz.laguna at intel.com>



More information about the igt-dev mailing list