[CI 2/2] drm/xe: re-order lmem init check and wait for initialization to complete

Rodrigo Vivi rodrigo.vivi at intel.com
Mon Apr 8 21:45:32 UTC 2024


On Thu, Mar 28, 2024 at 11:32:57AM +0530, Riana Tauro wrote:
> Lmem init check should be done only after pcode initialization
> status is complete. Move lmem init check after pcode status
> check. Also wait for a short while after pcode status check
> to allow completion of the task.
> 
> Failing to do so, can lead to aborting the module load
> leaving the system unusable. Wait until the lmem initialization
> is complete within a timeout (60s) or till the user aborts.
> 
> v2: use bool as return type
>     re-order the code comment (Rodrigo)
>     add comment for deferring probe (Himal)

unfortunately this doesn't apply cleanly anymore.
Could you please send another version? And wet try to merge it quickly next time.

Sorry,
Rodrigo.

> 
> Signed-off-by: Riana Tauro <riana.tauro at intel.com>
> Acked-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Reviewed-by: Himal Prasad Ghimiray<himal.prasad.ghimiray at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_device.c | 62 +++++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/xe/xe_mmio.c   | 29 ----------------
>  2 files changed, 61 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 69d05e610f03..be60fa5c05fc 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -413,12 +413,68 @@ static int xe_set_dma_info(struct xe_device *xe)
>  	return err;
>  }
>  
> +static bool verify_lmem_ready(struct xe_gt *gt)
> +{
> +	u32 val = xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT;
> +
> +	return !!val;
> +}
> +
> +static int wait_for_lmem_ready(struct xe_device *xe)
> +{
> +	struct xe_gt *gt = xe_root_mmio_gt(xe);
> +	unsigned long timeout, start;
> +
> +	if (!IS_DGFX(xe))
> +		return 0;
> +
> +	if (IS_SRIOV_VF(xe))
> +		return 0;
> +
> +	if (verify_lmem_ready(gt))
> +		return 0;
> +
> +	drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
> +
> +	start = jiffies;
> +	timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */
> +
> +	do {
> +		if (signal_pending(current))
> +			return -EINTR;
> +
> +		/*
> +		 * The boot firmware initializes local memory and
> +		 * assesses its health. If memory training fails,
> +		 * the punit will have been instructed to keep the GT powered
> +		 * down.we won't be able to communicate with it
> +		 *
> +		 * If the status check is done before punit updates the register,
> +		 * it can lead to the system being unusable.
> +		 * use a timeout and defer the probe to prevent this.
> +		 */
> +		if (time_after(jiffies, timeout)) {
> +			drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
> +			return -EPROBE_DEFER;
> +		}
> +
> +		msleep(20);
> +
> +	} while (!verify_lmem_ready(gt));
> +
> +	drm_dbg(&xe->drm, "lmem ready after %ums",
> +		jiffies_to_msecs(jiffies - start));
> +
> +	return 0;
> +}
> +
>  /**
>   * xe_device_probe_early: Device early probe
>   * @xe: xe device instance
>   *
>   * Initialize MMIO resources that don't require any
> - * knowledge about tile count. Also initialize pcode
> + * knowledge about tile count. Also initialize pcode and
> + * check vram initialization on root tile.
>   *
>   * Return: 0 on success, error code on failure
>   */
> @@ -438,6 +494,10 @@ int xe_device_probe_early(struct xe_device *xe)
>  	if (err)
>  		return err;
>  
> +	err = wait_for_lmem_ready(xe);
> +	if (err)
> +		return err;
> +
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
> index 1de9de4f94b6..06d507998d3c 100644
> --- a/drivers/gpu/drm/xe/xe_mmio.c
> +++ b/drivers/gpu/drm/xe/xe_mmio.c
> @@ -360,30 +360,6 @@ static void mmio_fini(struct drm_device *drm, void *arg)
>  		iounmap(xe->mem.vram.mapping);
>  }
>  
> -static int xe_verify_lmem_ready(struct xe_device *xe)
> -{
> -	struct xe_gt *gt = xe_root_mmio_gt(xe);
> -
> -	if (!IS_DGFX(xe))
> -		return 0;
> -
> -	if (IS_SRIOV_VF(xe))
> -		return 0;
> -
> -	/*
> -	 * The boot firmware initializes local memory and assesses its health.
> -	 * If memory training fails, the punit will have been instructed to
> -	 * keep the GT powered down; we won't be able to communicate with it
> -	 * and we should not continue with driver initialization.
> -	 */
> -	if (!(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) {
> -		drm_err(&xe->drm, "VRAM not initialized by firmware\n");
> -		return -ENODEV;
> -	}
> -
> -	return 0;
> -}
> -
>  int xe_mmio_init(struct xe_device *xe)
>  {
>  	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
> @@ -407,16 +383,11 @@ int xe_mmio_init(struct xe_device *xe)
>  int xe_mmio_root_tile_init(struct xe_device *xe)
>  {
>  	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
> -	int err;
>  
>  	/* Setup first tile; other tiles (if present) will be setup later. */
>  	root_tile->mmio.size = SZ_16M;
>  	root_tile->mmio.regs = xe->mmio.regs;
>  
> -	err = xe_verify_lmem_ready(xe);
> -	if (err)
> -		return err;
> -
>  	return 0;
>  }
>  
> -- 
> 2.40.0
> 


More information about the Intel-xe mailing list