[Intel-xe] [PATCH] drm/xe: implement driver initiated functional reset

Ofir Bitton obitton at habana.ai
Sun Oct 1 08:50:27 UTC 2023


On 27/09/2023 10:46, Andrzej Hajda wrote:
> Driver initiated functional reset (FLR) is the highest level of reset
> that we can trigger from within the driver. In contrast to PCI FLR it
> doesn't require re-enumeration of PCI BAR. It can be useful in case
> GT fails to reset. It is also the only way to trigger GSC reset from
> the driver and can be used in future addition of GSC support.
> 
> Signed-off-by: Andrzej Hajda <andrzej.hajda at intel.com>
> ---
> Hi all,
> 
> This is initial attempt to implement Driver-FLR in xe. Implementation
> is copied from i915. I've skipped comments, but I can re-add them if
> requested.
> In i915 Driver-FLR is performed on driver unload if GSC was used.
> In xe GSC is not yet supported, but I have added trigger to perform FLR
> in case GT fails to reset, I guess this could be proper use-case and
> of course this is prerequisite to GSC support.
> 
> Since this is my 1st patch to xe please be merciful in comments :)
> 
> Regards
> Andrzej
> ---
>   drivers/gpu/drm/xe/regs/xe_gt_regs.h |  6 +++++
>   drivers/gpu/drm/xe/xe_gt.c           |  2 ++
>   drivers/gpu/drm/xe/xe_gt_types.h     |  3 +++
>   drivers/gpu/drm/xe/xe_mmio.c         | 38 ++++++++++++++++++++++++++++
>   4 files changed, 49 insertions(+)
> 
> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> index e13fbbdf6929ea..769a8fd005931a 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> @@ -359,6 +359,12 @@
>   #define RCU_MODE				XE_REG(0x14800, XE_REG_OPTION_MASKED)
>   #define   RCU_MODE_CCS_ENABLE			REG_BIT(0)
>   
> +#define GU_CNTL					XE_REG(0x101010)
> +#define   LMEM_INIT				REG_BIT(7)
> +#define   DRIVERFLR				REG_BIT(31)
> +#define GU_DEBUG				XE_REG(0x101018)
> +#define   DRIVERFLR_STATUS			REG_BIT(31)
> +
>   #define FORCEWAKE_ACK_GT			XE_REG(0x130044)
>   #define   FORCEWAKE_KERNEL			BIT(0)
>   #define   FORCEWAKE_USER			BIT(1)
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index 1aa44d4f9ac1c8..1e0b76000ed46b 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -604,6 +604,8 @@ static int gt_reset(struct xe_gt *gt)
>   	xe_uevent_gt_reset_failure(to_pci_dev(gt_to_xe(gt)->drm.dev),
>   				   gt_to_tile(gt)->id, gt->info.id);
>   
> +	gt->needs_flr_on_fini = true;
> +
>   	return err;
>   }

I think this should be platform dependent and not set to true by default.

>   
> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> index d4310be3e1e7c8..dc194e9656abd5 100644
> --- a/drivers/gpu/drm/xe/xe_gt_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> @@ -347,6 +347,9 @@ struct xe_gt {
>   		/** @oob: bitmap with active OOB workaroudns */
>   		unsigned long *oob;
>   	} wa_active;
> +
> +	/** @needs_flr_on_fini: requests functional reset on fini */
> +	bool needs_flr_on_fini;
>   };
>   
>   #endif
> diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
> index 3ccc0af4430be4..593ecc98cdfe8a 100644
> --- a/drivers/gpu/drm/xe/xe_mmio.c
> +++ b/drivers/gpu/drm/xe/xe_mmio.c
> @@ -4,6 +4,7 @@
>    */
>   
>   #include <linux/minmax.h>
> +#include <linux/units.h>
>   
>   #include "xe_mmio.h"
>   
> @@ -364,9 +365,46 @@ static void xe_mmio_probe_tiles(struct xe_device *xe)
>   	}
>   }
>   
> +static void driver_initiated_flr(struct xe_gt *gt)
> +{
> +	const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
> +	struct xe_device *xe = gt->tile->xe;
> +	int ret;
> +
> +	drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
> +
> +	ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
> +	if (ret) {
> +		drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
> +		return;
> +	}
> +	xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
> +	xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR);
> +	ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
> +	if (ret) {
> +		drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
> +		return;
> +	}
> +	ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
> +			     flr_timeout, NULL, false);
> +	if (ret) {
> +		drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
> +		return;
> +	}
> +
> +	/* Clear sticky completion status */
> +	xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
> +}
> +
>   static void mmio_fini(struct drm_device *drm, void *arg)
>   {
>   	struct xe_device *xe = arg;
> +	struct xe_gt *gt;
> +	u8 id;
> +
> +	for_each_gt(gt, xe, id)
> +		if (gt->needs_flr_on_fini)
> +			driver_initiated_flr(gt);
>   
>   	pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
>   	if (xe->mem.vram.mapping)

Not sure mmio_fini is the best place to put this, as this is not mmio 
related.

--
Ofir




More information about the Intel-xe mailing list