[CI 2/2] drm/xe: re-order lmem init check and wait for initialization to complete

Riana Tauro riana.tauro at intel.com
Wed Apr 10 12:08:19 UTC 2024



On 4/9/2024 3:15 AM, Rodrigo Vivi wrote:
> On Thu, Mar 28, 2024 at 11:32:57AM +0530, Riana Tauro wrote:
>> Lmem init check should be done only after pcode initialization
>> status is complete. Move lmem init check after pcode status
>> check. Also wait for a short while after pcode status check
>> to allow completion of the task.
>>
>> Failing to do so, can lead to aborting the module load
>> leaving the system unusable. Wait until the lmem initialization
>> is complete within a timeout (60s) or till the user aborts.
>>
>> v2: use bool as return type
>>      re-order the code comment (Rodrigo)
>>      add comment for deferring probe (Himal)
> 
> unfortunately this doesn't apply cleanly anymore.
> Could you please send another version? And wet try to merge it quickly next time.
Hi Rodrigo

Sorry, delayed in asking for the merge. The function name changed.
Resent the patch series after rebase

Thanks
Riana
> 
> Sorry,
> Rodrigo.
> 
>>
>> Signed-off-by: Riana Tauro <riana.tauro at intel.com>
>> Acked-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
>> Reviewed-by: Himal Prasad Ghimiray<himal.prasad.ghimiray at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_device.c | 62 +++++++++++++++++++++++++++++++++-
>>   drivers/gpu/drm/xe/xe_mmio.c   | 29 ----------------
>>   2 files changed, 61 insertions(+), 30 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
>> index 69d05e610f03..be60fa5c05fc 100644
>> --- a/drivers/gpu/drm/xe/xe_device.c
>> +++ b/drivers/gpu/drm/xe/xe_device.c
>> @@ -413,12 +413,68 @@ static int xe_set_dma_info(struct xe_device *xe)
>>   	return err;
>>   }
>>   
>> +static bool verify_lmem_ready(struct xe_gt *gt)
>> +{
>> +	u32 val = xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT;
>> +
>> +	return !!val;
>> +}
>> +
>> +static int wait_for_lmem_ready(struct xe_device *xe)
>> +{
>> +	struct xe_gt *gt = xe_root_mmio_gt(xe);
>> +	unsigned long timeout, start;
>> +
>> +	if (!IS_DGFX(xe))
>> +		return 0;
>> +
>> +	if (IS_SRIOV_VF(xe))
>> +		return 0;
>> +
>> +	if (verify_lmem_ready(gt))
>> +		return 0;
>> +
>> +	drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
>> +
>> +	start = jiffies;
>> +	timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */
>> +
>> +	do {
>> +		if (signal_pending(current))
>> +			return -EINTR;
>> +
>> +		/*
>> +		 * The boot firmware initializes local memory and
>> +		 * assesses its health. If memory training fails,
>> +		 * the punit will have been instructed to keep the GT powered
>> +		 * down.we won't be able to communicate with it
>> +		 *
>> +		 * If the status check is done before punit updates the register,
>> +		 * it can lead to the system being unusable.
>> +		 * use a timeout and defer the probe to prevent this.
>> +		 */
>> +		if (time_after(jiffies, timeout)) {
>> +			drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
>> +			return -EPROBE_DEFER;
>> +		}
>> +
>> +		msleep(20);
>> +
>> +	} while (!verify_lmem_ready(gt));
>> +
>> +	drm_dbg(&xe->drm, "lmem ready after %ums",
>> +		jiffies_to_msecs(jiffies - start));
>> +
>> +	return 0;
>> +}
>> +
>>   /**
>>    * xe_device_probe_early: Device early probe
>>    * @xe: xe device instance
>>    *
>>    * Initialize MMIO resources that don't require any
>> - * knowledge about tile count. Also initialize pcode
>> + * knowledge about tile count. Also initialize pcode and
>> + * check vram initialization on root tile.
>>    *
>>    * Return: 0 on success, error code on failure
>>    */
>> @@ -438,6 +494,10 @@ int xe_device_probe_early(struct xe_device *xe)
>>   	if (err)
>>   		return err;
>>   
>> +	err = wait_for_lmem_ready(xe);
>> +	if (err)
>> +		return err;
>> +
>>   	return 0;
>>   }
>>   
>> diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
>> index 1de9de4f94b6..06d507998d3c 100644
>> --- a/drivers/gpu/drm/xe/xe_mmio.c
>> +++ b/drivers/gpu/drm/xe/xe_mmio.c
>> @@ -360,30 +360,6 @@ static void mmio_fini(struct drm_device *drm, void *arg)
>>   		iounmap(xe->mem.vram.mapping);
>>   }
>>   
>> -static int xe_verify_lmem_ready(struct xe_device *xe)
>> -{
>> -	struct xe_gt *gt = xe_root_mmio_gt(xe);
>> -
>> -	if (!IS_DGFX(xe))
>> -		return 0;
>> -
>> -	if (IS_SRIOV_VF(xe))
>> -		return 0;
>> -
>> -	/*
>> -	 * The boot firmware initializes local memory and assesses its health.
>> -	 * If memory training fails, the punit will have been instructed to
>> -	 * keep the GT powered down; we won't be able to communicate with it
>> -	 * and we should not continue with driver initialization.
>> -	 */
>> -	if (!(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) {
>> -		drm_err(&xe->drm, "VRAM not initialized by firmware\n");
>> -		return -ENODEV;
>> -	}
>> -
>> -	return 0;
>> -}
>> -
>>   int xe_mmio_init(struct xe_device *xe)
>>   {
>>   	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
>> @@ -407,16 +383,11 @@ int xe_mmio_init(struct xe_device *xe)
>>   int xe_mmio_root_tile_init(struct xe_device *xe)
>>   {
>>   	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
>> -	int err;
>>   
>>   	/* Setup first tile; other tiles (if present) will be setup later. */
>>   	root_tile->mmio.size = SZ_16M;
>>   	root_tile->mmio.regs = xe->mmio.regs;
>>   
>> -	err = xe_verify_lmem_ready(xe);
>> -	if (err)
>> -		return err;
>> -
>>   	return 0;
>>   }
>>   
>> -- 
>> 2.40.0
>>


More information about the Intel-xe mailing list