[PATCH] drm/xe/pf: Move VFs reprovisioning to worker
Summers, Stuart
stuart.summers at intel.com
Mon Jan 27 17:07:14 UTC 2025
On Sat, 2025-01-25 at 22:55 +0100, Michal Wajdeczko wrote:
> Since the GuC is reset during GT reset, we need to re-send the
> entire SR-IOV provisioning configuration to the GuC. But since
> this whole configuration is protected by the PF master mutex and
> we can't avoid making allocations under this mutex (like during
> LMEM provisioning), we can't do this reprovisioning from gt-reset
> path if we want to be reclaim-safe. Move VFs reprovisioning to a
> async worker that we will start from the gt-reset path.
Admittedly I don't fully understand the PF restart flow here from
userspace. Is there some race condition we need to check for whether
GuC completes base configuration before the PF config comes through? Is
it possible we can get into either some deadlock between the native
init and the PF init or start running content on some engines in native
mode before PF completes?
Thanks,
Stuart
>
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> ---
> drivers/gpu/drm/xe/xe_gt_sriov_pf.c | 53 ++++++++++++++++++++-
> --
> drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h | 10 +++++
> 2 files changed, 56 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
> b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
> index 6f906c8e8108..d66478deab98 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
> @@ -15,7 +15,11 @@
> #include "xe_gt_sriov_pf_helpers.h"
> #include "xe_gt_sriov_pf_migration.h"
> #include "xe_gt_sriov_pf_service.h"
> +#include "xe_gt_sriov_printk.h"
> #include "xe_mmio.h"
> +#include "xe_pm.h"
> +
> +static void pf_worker_restart_func(struct work_struct *w);
>
> /*
> * VF's metadata is maintained in the flexible array where:
> @@ -41,6 +45,11 @@ static int pf_alloc_metadata(struct xe_gt *gt)
> return 0;
> }
>
> +static void pf_init_workers(struct xe_gt *gt)
> +{
> + INIT_WORK(>->sriov.pf.workers.restart,
> pf_worker_restart_func);
> +}
> +
> /**
> * xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on
> PF.
> * @gt: the &xe_gt to initialize
> @@ -65,6 +74,8 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
> if (err)
> return err;
>
> + pf_init_workers(gt);
> +
> return 0;
> }
>
> @@ -155,14 +166,42 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt
> *gt, unsigned int vfid)
> pf_clear_vf_scratch_regs(gt, vfid);
> }
>
> -/**
> - * xe_gt_sriov_pf_restart - Restart SR-IOV support after a GT reset.
> - * @gt: the &xe_gt
> - *
> - * This function can only be called on PF.
> - */
> -void xe_gt_sriov_pf_restart(struct xe_gt *gt)
> +static void pf_restart(struct xe_gt *gt)
> {
> + struct xe_device *xe = gt_to_xe(gt);
> +
> + xe_pm_runtime_get(xe);
> xe_gt_sriov_pf_config_restart(gt);
> xe_gt_sriov_pf_control_restart(gt);
> + xe_pm_runtime_put(xe);
> +
> + xe_gt_sriov_dbg(gt, "restart completed\n");
> +}
> +
> +static void pf_worker_restart_func(struct work_struct *w)
> +{
> + struct xe_gt *gt = container_of(w, typeof(*gt),
> sriov.pf.workers.restart);
> +
> + pf_restart(gt);
> +}
> +
> +static void pf_queue_restart(struct xe_gt *gt)
> +{
> + struct xe_device *xe = gt_to_xe(gt);
> +
> + xe_gt_assert(gt, IS_SRIOV_PF(xe));
> +
> + if (!queue_work(xe->sriov.wq, >->sriov.pf.workers.restart))
> + xe_gt_sriov_dbg(gt, "restart already in queue!\n");
> +}
> +
> +/**
> + * xe_gt_sriov_pf_restart - Restart SR-IOV support after a GT reset.
> + * @gt: the &xe_gt
> + *
> + * This function can only be called on PF.
> + */
> +void xe_gt_sriov_pf_restart(struct xe_gt *gt)
> +{
> + pf_queue_restart(gt);
> }
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
> b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
> index 0426b1a77069..a64a6835ad65 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
> @@ -35,8 +35,17 @@ struct xe_gt_sriov_metadata {
> struct xe_gt_sriov_state_snapshot snapshot;
> };
>
> +/**
> + * struct xe_gt_sriov_pf_workers - GT level workers used by the PF.
> + */
> +struct xe_gt_sriov_pf_workers {
> + /** @restart: worker that executes actions post GT reset */
> + struct work_struct restart;
> +};
> +
> /**
> * struct xe_gt_sriov_pf - GT level PF virtualization data.
> + * @workers: workers data.
> * @service: service data.
> * @control: control data.
> * @policy: policy data.
> @@ -45,6 +54,7 @@ struct xe_gt_sriov_metadata {
> * @vfs: metadata for all VFs.
> */
> struct xe_gt_sriov_pf {
> + struct xe_gt_sriov_pf_workers workers;
> struct xe_gt_sriov_pf_service service;
> struct xe_gt_sriov_pf_control control;
> struct xe_gt_sriov_pf_policy policy;
More information about the Intel-xe
mailing list