[PATCH v2] drm/xe/vf: Make multi-GT migration less error prone
Michał Winiarski
michal.winiarski at intel.com
Fri Jun 27 20:22:02 UTC 2025
On Tue, Jun 24, 2025 at 11:09:57PM +0200, Tomasz Lis wrote:
> There is a remote chance that after migration, some GTs will not
> send the MIGRATED interrupt, or due to current VF KMD state the
> interrupt will not lead to marking the GT for recovery.
>
> Requiring IRQs from all GTs before starting migration introduces
> the possibility that the process will get stalled due to one GuC.
>
> One could argue it is also waste of time to wait for all IRQs,
> but we should get them all IRQs as soon as VGPU starts, so that's
> not really an implactful argument.
>
> Still, not waiting for all GTs makes it easier to handle situations:
> * where one GuC IRQ is missing
> * where state before probe is unclean - getting MIGRATED IRQ as soon
> as interrupts are enabled
> * where multiple migrations happen close to each other
>
> To help with these cases, this patch alters the post-migration
> recovery so that recovery task is started as soon as one GuC IRQ
> is handled, and other GTs are included in recovery later as the
> subsequent IRQs are serviced.
>
> The post-migration recovery can now be called for any selection of
> GTs, and it will perform recovery on all GTs for which IRQs have
> arrived, even multiple times if necessary.
>
> v2: Typos and style fixes
>
> Signed-off-by: Tomasz Lis <tomasz.lis at intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Michal Winiarski <michal.winiarski at intel.com>
> Cc: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
> Acked-by: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
> ---
> drivers/gpu/drm/xe/xe_sriov_vf.c | 167 ++++++++++++++-----------------
> 1 file changed, 76 insertions(+), 91 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
> index 6526fe450e55..57b36ea74a45 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
> @@ -147,127 +147,111 @@ void xe_sriov_vf_init_early(struct xe_device *xe)
> xe_sriov_info(xe, "migration not supported by this module version\n");
> }
>
> -/**
> - * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning.
> - * @xe: the &xe_device struct instance
> - *
> - * After migration, we need to re-query all VF configuration to make sure
> - * they match previous provisioning. Note that most of VF provisioning
> - * shall be the same, except GGTT range, since GGTT is not virtualized per-VF.
> - *
> - * Returns: 0 if the operation completed successfully, or a negative error
> - * code otherwise.
> +static bool gt_vf_post_migration_needed(struct xe_gt *gt)
> +{
> + return test_bit(gt->info.id, >_to_xe(gt)->sriov.vf.migration.gt_flags);
> +}
> +
> +/*
> + * Notify GuCs marked in flags about resource fixups apply finished.
> */
> -static int vf_post_migration_requery_guc(struct xe_device *xe)
> +static int vf_post_migration_notify_resfix_done(struct xe_device *xe, unsigned long *gt_flags)
Why is gt_flags passed as a pointer? It doesn't look like the caller is
doing anything with this variable after passing it to this function.
> {
> struct xe_gt *gt;
> unsigned int id;
> - int err, ret = 0;
> + int err = 0;
>
> for_each_gt(gt, xe, id) {
> - err = xe_gt_sriov_vf_query_config(gt);
> - ret = ret ?: err;
> + if (!test_bit(id, gt_flags))
> + continue;
> + /* skip asking GuC for RESFIX exit if new recovery request arrived */
> + if (gt_vf_post_migration_needed(gt))
> + continue;
If the IRQ arrives here - we'll send the resfix_done, which may have
some unexpected consequences.
I think we may need to tweak the protocol with GuC, but that's something
outside the scope of this patch.
Reviewed-by: Michał Winiarski <michal.winiarski at intel.com>
Thanks,
-Michał
> + err = xe_gt_sriov_vf_notify_resfix_done(gt);
> + if (err)
> + break;
> + clear_bit(id, gt_flags);
> }
>
> - return ret;
> + if (*gt_flags && !err)
> + drm_dbg(&xe->drm, "another recovery imminent, skipped some notifications\n");
> + return err;
> }
>
> -static void vf_post_migration_fixup_ctb(struct xe_device *xe)
> +static int vf_get_next_migrated_gt_id(struct xe_device *xe)
> {
> struct xe_gt *gt;
> unsigned int id;
>
> - xe_assert(xe, IS_SRIOV_VF(xe));
> -
> for_each_gt(gt, xe, id) {
> - s32 shift = xe_gt_sriov_vf_ggtt_shift(gt);
> -
> - xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift);
> + if (test_and_clear_bit(id, &xe->sriov.vf.migration.gt_flags))
> + return id;
> }
> + return -1;
> }
>
> -/*
> - * vf_post_migration_imminent - Check if post-restore recovery is coming.
> - * @xe: the &xe_device struct instance
> +/**
> + * Perform post-migration fixups on a single GT.
> *
> - * Return: True if migration recovery worker will soon be running. Any worker currently
> - * executing does not affect the result.
> + * After migration, GuC needs to be re-queried for VF configuration to check
> + * if it matches previous provisioning. Most of VF provisioning shall be the
> + * same, except GGTT range, since GGTT is not virtualized per-VF. If GGTT
> + * range has changed, we have to perform fixups - shift all GGTT references
> + * used anywhere within the driver. After the fixups in this function succeed,
> + * it is allowed to ask the GuC bound to this GT to continue normal operation.
> + *
> + * Returns: 0 if the operation completed successfully, or a negative error
> + * code otherwise.
> */
> -static bool vf_post_migration_imminent(struct xe_device *xe)
> +static int gt_vf_post_migration_fixups(struct xe_gt *gt)
> {
> - return xe->sriov.vf.migration.gt_flags != 0 ||
> - work_pending(&xe->sriov.vf.migration.worker);
> -}
> -
> -static bool vf_post_migration_fixup_ggtt_nodes(struct xe_device *xe)
> -{
> - bool need_fixups = false;
> - struct xe_tile *tile;
> - unsigned int id;
> -
> - for_each_tile(tile, xe, id) {
> - struct xe_gt *gt = tile->primary_gt;
> - s64 shift;
> -
> - shift = xe_gt_sriov_vf_ggtt_shift(gt);
> - if (shift) {
> - need_fixups = true;
> - xe_tile_sriov_vf_fixup_ggtt_nodes(tile, shift);
> - }
> - }
> - return need_fixups;
> -}
> + s64 shift;
> + int err;
>
> -/*
> - * Notify all GuCs about resource fixups apply finished.
> - */
> -static void vf_post_migration_notify_resfix_done(struct xe_device *xe)
> -{
> - struct xe_gt *gt;
> - unsigned int id;
> + err = xe_gt_sriov_vf_query_config(gt);
> + if (err)
> + return err;
>
> - for_each_gt(gt, xe, id) {
> - if (vf_post_migration_imminent(xe))
> - goto skip;
> - xe_gt_sriov_vf_notify_resfix_done(gt);
> + shift = xe_gt_sriov_vf_ggtt_shift(gt);
> + if (shift) {
> + xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift);
> + /* FIXME: add the recovery steps */
> + xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift);
> }
> - return;
> -
> -skip:
> - drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n");
> + return 0;
> }
>
> static void vf_post_migration_recovery(struct xe_device *xe)
> {
> - bool need_fixups;
> - int err;
> + unsigned long fixed_gts = 0;
> + int id, err;
>
> drm_dbg(&xe->drm, "migration recovery in progress\n");
> xe_pm_runtime_get(xe);
> - err = vf_post_migration_requery_guc(xe);
> - if (vf_post_migration_imminent(xe))
> - goto defer;
> - if (unlikely(err))
> - goto fail;
> +
> if (!vf_migration_supported(xe)) {
> xe_sriov_err(xe, "migration not supported by this module version\n");
> err = -ENOTRECOVERABLE;
> goto fail;
> }
>
> - need_fixups = vf_post_migration_fixup_ggtt_nodes(xe);
> - /* FIXME: add the recovery steps */
> - if (need_fixups)
> - vf_post_migration_fixup_ctb(xe);
> + while (id = vf_get_next_migrated_gt_id(xe), id >= 0) {
> + struct xe_gt *gt = xe_device_get_gt(xe, id);
> +
> + err = gt_vf_post_migration_fixups(gt);
> + if (err)
> + goto fail;
> +
> + set_bit(id, &fixed_gts);
> + }
> +
> + err = vf_post_migration_notify_resfix_done(xe, &fixed_gts);
> + if (err)
> + goto fail;
>
> - vf_post_migration_notify_resfix_done(xe);
> xe_pm_runtime_put(xe);
> drm_notice(&xe->drm, "migration recovery ended\n");
> return;
> -defer:
> - xe_pm_runtime_put(xe);
> - drm_dbg(&xe->drm, "migration recovery deferred\n");
> - return;
> fail:
> xe_pm_runtime_put(xe);
> drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err));
> @@ -282,18 +266,23 @@ static void migration_worker_func(struct work_struct *w)
> vf_post_migration_recovery(xe);
> }
>
> -static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe)
> +/*
> + * Check if post-restore recovery is coming on any of GTs.
> + * @xe: the &xe_device struct instance
> + *
> + * Return: True if migration recovery worker will soon be running. Any worker currently
> + * executing does not affect the result.
> + */
> +static bool vf_ready_to_recovery_on_any_gts(struct xe_device *xe)
> {
> struct xe_gt *gt;
> unsigned int id;
>
> for_each_gt(gt, xe, id) {
> - if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) {
> - xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n");
> - return false;
> - }
> + if (test_bit(id, &xe->sriov.vf.migration.gt_flags))
> + return true;
> }
> - return true;
> + return false;
> }
>
> /**
> @@ -308,13 +297,9 @@ void xe_sriov_vf_start_migration_recovery(struct xe_device *xe)
>
> xe_assert(xe, IS_SRIOV_VF(xe));
>
> - if (!vf_ready_to_recovery_on_all_gts(xe))
> + if (!vf_ready_to_recovery_on_any_gts(xe))
> return;
>
> - WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0);
> - /* Ensure other threads see that no flags are set now. */
> - smp_mb();
> -
> started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker);
> drm_info(&xe->drm, "VF migration recovery %s\n", started ?
> "scheduled" : "already in progress");
> --
> 2.25.1
>
More information about the Intel-xe
mailing list