[PATCH v6 2/3] drm/xe: Enable Boot Survivability mode

K V P, Satyanarayana satyanarayana.k.v.p at intel.com
Thu Jan 30 06:35:08 UTC 2025


Hi
> -----Original Message-----
> From: Tauro, Riana <riana.tauro at intel.com>
> Sent: Thursday, January 30, 2025 12:03 PM
> To: K V P, Satyanarayana <satyanarayana.k.v.p at intel.com>; intel-
> xe at lists.freedesktop.org
> Cc: Gupta, Anshuman <anshuman.gupta at intel.com>; Vivi, Rodrigo
> <rodrigo.vivi at intel.com>
> Subject: Re: [PATCH v6 2/3] drm/xe: Enable Boot Survivability mode
> 
> Hi Satyanarayana
> 
> Thanks for the review comments
> 
> On 1/29/2025 12:57 PM, K V P, Satyanarayana wrote:
> >> From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of Riana
> >> Tauro
> >> Sent: Tuesday, January 28, 2025 3:27 PM
> >> To: intel-xe at lists.freedesktop.org
> >> Cc: Tauro, Riana <riana.tauro at intel.com>; Gupta, Anshuman
> >> <anshuman.gupta at intel.com>; Vivi, Rodrigo <rodrigo.vivi at intel.com>
> >> Subject: [PATCH v6 2/3] drm/xe: Enable Boot Survivability mode
> >>
> >> Enable boot survivability mode if pcode initialization fails and
> >> if boot status indicates a failure. In this mode, drm card is not
> >> exposed and driver probe returns success after loading the bare minimum
> >> to allow firmware to be flashed via mei.
> >>
> >> v2: abstract survivability mode variable
> >>      add BMG check inside function (Jani, Rodrigo)
> >>
> >> v3: return -EBUSY during system suspend (Anshuman)
> >>      check survivability mode in pci probe only
> >>      on error
> >>
> >> Signed-off-by: Riana Tauro <riana.tauro at intel.com>
> >> Reviewed-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
> >> ---
> >>   drivers/gpu/drm/xe/xe_device.c             |  7 ++++++-
> >>   drivers/gpu/drm/xe/xe_pci.c                | 23 ++++++++++++++++++++--
> >>   drivers/gpu/drm/xe/xe_survivability_mode.c | 16 +++++++++++++++
> >>   drivers/gpu/drm/xe/xe_survivability_mode.h |  1 +
> >>   4 files changed, 44 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/xe/xe_device.c
> b/drivers/gpu/drm/xe/xe_device.c
> >> index f3f754beb812..8fedc72e9db4 100644
> >> --- a/drivers/gpu/drm/xe/xe_device.c
> >> +++ b/drivers/gpu/drm/xe/xe_device.c
> >> @@ -52,6 +52,7 @@
> >>   #include "xe_pmu.h"
> >>   #include "xe_query.h"
> >>   #include "xe_sriov.h"
> >> +#include "xe_survivability_mode.h"
> >>   #include "xe_tile.h"
> >>   #include "xe_ttm_stolen_mgr.h"
> >>   #include "xe_ttm_sys_mgr.h"
> >> @@ -693,8 +694,12 @@ int xe_device_probe_early(struct xe_device *xe)
> >>   	update_device_info(xe);
> >>
> >>   	err = xe_pcode_probe_early(xe);
> >> -	if (err)
> >> +	if (err) {
> >> +		if (xe_survivability_mode_required(xe))
> >> +			xe_survivability_mode_init(xe);
> >> +
> >>   		return err;
> >> +	}
> >>
> >>   	err = wait_for_lmem_ready(xe);
> >>   	if (err)
> >> diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
> >> index e6ad189294c1..b483c5c90132 100644
> >> --- a/drivers/gpu/drm/xe/xe_pci.c
> >> +++ b/drivers/gpu/drm/xe/xe_pci.c
> >> @@ -30,6 +30,7 @@
> >>   #include "xe_pm.h"
> >>   #include "xe_sriov.h"
> >>   #include "xe_step.h"
> >> +#include "xe_survivability_mode.h"
> >>   #include "xe_tile.h"
> >>
> >>   enum toggle_d3cold {
> >> @@ -766,6 +767,9 @@ static void xe_pci_remove(struct pci_dev *pdev)
> >>   	if (IS_SRIOV_PF(xe))
> >>   		xe_pci_sriov_configure(pdev, 0);
> >>
> >> +	if (xe_survivability_mode_enabled(xe))
> >> +		return xe_survivability_mode_remove(xe);
> >> +
> >>   	xe_device_remove(xe);
> >>   	xe_pm_runtime_fini(xe);
> >>   	pci_set_drvdata(pdev, NULL);
> >> @@ -838,8 +842,19 @@ static int xe_pci_probe(struct pci_dev *pdev,
> const
> >> struct pci_device_id *ent)
> >>   		return err;
> >>
> >>   	err = xe_device_probe_early(xe);
> >> -	if (err)
> >> +
> >> +	/*
> >> +	 * In Boot Survivability mode, no drm card is exposed
> >> +	 * and driver is loaded with bare minimum to allow
> >> +	 * for firmware to be flashed through mei. Return
> >> +	 * success if survivability mode is enabled.
> >> +	 */
> >> +	if (err) {
> >> +		if (xe_survivability_mode_enabled(xe))
> >> +			return 0;
> >> +
> >>   		return err;
> >> +	}
> >>
> >>   	err = xe_info_init(xe, desc->graphics, desc->media);
> >>   	if (err)
> >> @@ -926,9 +941,13 @@ static void d3cold_toggle(struct pci_dev *pdev,
> >> enum toggle_d3cold toggle)
> >>   static int xe_pci_suspend(struct device *dev)
> >>   {
> >>   	struct pci_dev *pdev = to_pci_dev(dev);
> >> +	struct xe_device *xe = pdev_to_xe_device(pdev);
> >>   	int err;
> >>
> >> -	err = xe_pm_suspend(pdev_to_xe_device(pdev));
> >> +	if (xe_survivability_mode_enabled(xe))
> >> +		return -EBUSY;
> >> +
> >> +	err = xe_pm_suspend(xe);
> >>   	if (err)
> >>   		return err;
> >>
> >> diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c
> >> b/drivers/gpu/drm/xe/xe_survivability_mode.c
> >> index 9911e9f6b99b..633f5effa349 100644
> >> --- a/drivers/gpu/drm/xe/xe_survivability_mode.c
> >> +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c
> >> @@ -144,6 +144,19 @@ static void enable_survivability_mode(struct
> pci_dev
> >> *pdev)
> >>   	}
> >>   }
> >>
> >> +/**
> >> + * xe_survivability_mode_enabled - check if survivability mode is enabled
> >> + * @xe: xe device instance
> >> + *
> >> + * Returns true if in survivability mode, false otherwise
> >> + */
> >> +bool xe_survivability_mode_enabled(struct xe_device *xe)
> >> +{
> >> +	struct xe_survivability *survivability = &xe->survivability;
> >> +
> >> +	return survivability->mode;
> >> +}
> >> +
> >>   /**
> >>    * xe_survivability_mode_required - checks if survivability mode is required
> >>    * @xe: xe device instance
> >> @@ -158,6 +171,9 @@ bool xe_survivability_mode_required(struct
> >> xe_device *xe)
> >>   	struct xe_mmio *mmio = xe_root_tile_mmio(xe);
> >>   	u32 data;
> >>
> >> +	if (!IS_DGFX(xe) || xe->info.platform < XE_BATTLEMAGE)
> >> +		return false;
> >> +
> >
> > The survivability mode is not needed in case VF. Make sure to return false in
> case of VF as well.
> > if (!IS_DGFX(xe) || xe->info.platform < XE_BATTLEMAGE ||
> IS_SRIOV_VF(xe))
> > 	return false;
> 
> As discussed offline, survivability mode is set only when pcode
> initialization fails. So if it fails for PF, it will not continue with
> sriov_init.
> 
> Thanks
> Riana
> >
LGTM.
Reviewed-by: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
> >>   	data = xe_mmio_read32(mmio, PCODE_SCRATCH(0));
> >>   	survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data);
> >>
> >> diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h
> >> b/drivers/gpu/drm/xe/xe_survivability_mode.h
> >> index 410e3ee5f5d1..f530507a22c6 100644
> >> --- a/drivers/gpu/drm/xe/xe_survivability_mode.h
> >> +++ b/drivers/gpu/drm/xe/xe_survivability_mode.h
> >> @@ -12,6 +12,7 @@ struct xe_device;
> >>
> >>   void xe_survivability_mode_init(struct xe_device *xe);
> >>   void xe_survivability_mode_remove(struct xe_device *xe);
> >> +bool xe_survivability_mode_enabled(struct xe_device *xe);
> >>   bool xe_survivability_mode_required(struct xe_device *xe);
> >>
> >>   #endif /* _XE_SURVIVABILITY_MODE_H_ */
> >> --
> >> 2.47.1
> >



More information about the Intel-xe mailing list