[Intel-gfx] [PATCH v3] drm/i915/guc: Propagate the fw xfer timeout

Thu Oct 18 20:48:30 UTC 2018

On 18/10/18 12:55, Chris Wilson wrote:
> Propagate the timeout on transferring the fw back to the caller where it
> may act upon it, usually by restarting the xfer before failing.
> 
> v2: Simplify the wait to only wait upon the guc signaling completion,
> with an assertion that the fw xfer must have completed for it to be
> ready!
> 
> Testcase: igt/drv_selftest/live_hangcheck
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> ---
>   drivers/gpu/drm/i915/intel_guc_fw.c | 106 +++++++++++++---------------
>   1 file changed, 50 insertions(+), 56 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c
> index a9e6fcce467c..51b292001edd 100644
> --- a/drivers/gpu/drm/i915/intel_guc_fw.c
> +++ b/drivers/gpu/drm/i915/intel_guc_fw.c
> @@ -125,66 +125,26 @@ static void guc_prepare_xfer(struct intel_guc *guc)
>   }
>   
>   /* Copy RSA signature from the fw image to HW for verification */
> -static int guc_xfer_rsa(struct intel_guc *guc, struct i915_vma *vma)
> +static void guc_xfer_rsa(struct intel_guc *guc, struct i915_vma *vma)
>   {
>   	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> -	struct intel_uc_fw *guc_fw = &guc->fw;
> -	struct sg_table *sg = vma->pages;
>   	u32 rsa[UOS_RSA_SCRATCH_COUNT];
>   	int i;
>   
> -	if (sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, sizeof(rsa),
> -			       guc_fw->rsa_offset) != sizeof(rsa))
> -		return -EINVAL;
> +	sg_pcopy_to_buffer(vma->pages->sgl, vma->pages->nents,
> +			   rsa, sizeof(rsa), guc->fw.rsa_offset);
>   
>   	for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
>   		I915_WRITE(UOS_RSA_SCRATCH(i), rsa[i]);
> -
> -	return 0;
>   }
>   
> -/*
> - * Transfer the firmware image to RAM for execution by the microcontroller.
> - *
> - * Architecturally, the DMA engine is bidirectional, and can potentially even
> - * transfer between GTT locations. This functionality is left out of the API
> - * for now as there is no need for it.
> - */
> -static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma)
> +static bool guc_xfer_completed(struct intel_guc *guc, u32 *status)
>   {
>   	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> -	struct intel_uc_fw *guc_fw = &guc->fw;
> -	unsigned long offset;
> -	u32 status;
> -	int ret;
> -
> -	/*
> -	 * The header plus uCode will be copied to WOPCM via DMA, excluding any
> -	 * other components
> -	 */
> -	I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size);
> -
> -	/* Set the source address for the new blob */
> -	offset = intel_guc_ggtt_offset(guc, vma) + guc_fw->header_offset;
> -	I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
> -	I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
>   
> -	/*
> -	 * Set the DMA destination. Current uCode expects the code to be
> -	 * loaded at 8k; locations below this are used for the stack.
> -	 */
> -	I915_WRITE(DMA_ADDR_1_LOW, 0x2000);
> -	I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
> -
> -	/* Finally start the DMA */
> -	I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA));
> -
> -	/* Wait for DMA to finish */
> -	ret = __intel_wait_for_register_fw(dev_priv, DMA_CTRL, START_DMA, 0,
> -					   2, 100, &status);
> -	DRM_DEBUG_DRIVER("GuC DMA status %#x\n", status);
> -
> -	return ret;
> +	/* Did we complete the xfer? */
> +	*status = I915_READ(DMA_CTRL);
> +	return !(*status & START_DMA);
>   }
>   
>   /*
> @@ -228,9 +188,51 @@ static int guc_wait_ucode(struct intel_guc *guc)
>   		ret = -ENOEXEC;
>   	}
>   
> +	if (ret == 0 && !guc_xfer_completed(guc, &status)) {
> +		DRM_ERROR("GuC is ready, but the xfer %08x is incomplete\n",
> +			  status);
> +		ret = -ENXIO;

Do we want to allow reset & retry in this scenario? This feels like 
something weird happening in HW, so I'd say it'd be worth to allow a retry.

Thanks,
Daniele

> +	}
> +
>   	return ret;
>   }
>   
> +/*
> + * Transfer the firmware image to RAM for execution by the microcontroller.
> + *
> + * Architecturally, the DMA engine is bidirectional, and can potentially even
> + * transfer between GTT locations. This functionality is left out of the API
> + * for now as there is no need for it.
> + */
> +static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma)
> +{
> +	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> +	struct intel_uc_fw *guc_fw = &guc->fw;
> +	unsigned long offset;
> +
> +	/*
> +	 * The header plus uCode will be copied to WOPCM via DMA, excluding any
> +	 * other components
> +	 */
> +	I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size);
> +
> +	/* Set the source address for the new blob */
> +	offset = intel_guc_ggtt_offset(guc, vma) + guc_fw->header_offset;
> +	I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
> +	I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
> +
> +	/*
> +	 * Set the DMA destination. Current uCode expects the code to be
> +	 * loaded at 8k; locations below this are used for the stack.
> +	 */
> +	I915_WRITE(DMA_ADDR_1_LOW, 0x2000);
> +	I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
> +
> +	/* Finally start the DMA */
> +	I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA));
> +
> +	return guc_wait_ucode(guc);
> +}
>   /*
>    * Load the GuC firmware blob into the MinuteIA.
>    */
> @@ -251,17 +253,9 @@ static int guc_fw_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma)
>   	 * by the DMA engine in one operation, whereas the RSA signature is
>   	 * loaded via MMIO.
>   	 */
> -	ret = guc_xfer_rsa(guc, vma);
> -	if (ret)
> -		DRM_WARN("GuC firmware signature xfer error %d\n", ret);
> +	guc_xfer_rsa(guc, vma);
>   
>   	ret = guc_xfer_ucode(guc, vma);
> -	if (ret)
> -		DRM_WARN("GuC firmware code xfer error %d\n", ret);
> -
> -	ret = guc_wait_ucode(guc);
> -	if (ret)
> -		DRM_ERROR("GuC firmware xfer error %d\n", ret);
>   
>   	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>   
>