[Intel-gfx] [PATCH v2 1/2] drm/i915: Impletments dynamic WOPCM partitioning.

Michal Wajdeczko michal.wajdeczko at intel.com
Thu Nov 16 21:47:46 UTC 2017


On Thu, 16 Nov 2017 08:34:01 +0100, Sagar Arun Kamble  
<sagar.a.kamble at intel.com> wrote:

> Typo in the subject.
> GLK showing failure to load GuC with this approach on CI.
>
> On 11/15/2017 10:47 PM, Jackie Li wrote:
>> Static WOPCM partitioning will lead to GuC loading failure
>> if the GuC/HuC firmware size exceeded current static 512KB
>> partition boundary.
>>
>> This patch enables the dynamical calculation of the WOPCM aperture
>> used by GuC/HuC firmware. GuC WOPCM offset is  set to
>> HuC size + reserved WOPCM size. GuC WOPCM size is set to
>> total WOPCM size - GuC WOPCM offset - RC6CTX size. In this case,
>> GuC WOPCM offset will be updated based on the size of HuC firmware
>> while GuC WOPCM size will be set to use all the remaining WOPCM space.
>>
>> v2:
>>   - Removed intel_wopcm_init (Ville/Sagar/Joonas)
>>   - Renamed and Moved the intel_wopcm_partition into intel_guc (Sagar)
>>   - Removed unnecessary function calls (Joonas)
>>   - Init GuC WOPCM partition as soon as firmware fetching is completed
>>
>> Signed-off-by: Jackie Li <yaodong.li at intel.com>
>> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
>> Cc: Sagar Arun Kamble <sagar.a.kamble at intel.com>
>> Cc: Sujaritha Sundaresan <sujaritha.sundaresan at intel.com>
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> Cc: John Spotswood <john.a.spotswood at intel.com>
>> Cc: Oscar Mateo <oscar.mateo at intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem_context.c |   4 +-
>>   drivers/gpu/drm/i915/i915_guc_reg.h     |  18 +++--
>>   drivers/gpu/drm/i915/intel_guc.c        |  25 ++++---
>>   drivers/gpu/drm/i915/intel_guc.h        |  25 +++----
>>   drivers/gpu/drm/i915/intel_huc.c        |   2 +-
>>   drivers/gpu/drm/i915/intel_uc.c         | 116  
>> +++++++++++++++++++++++++++++++-
>>   drivers/gpu/drm/i915/intel_uc_fw.c      |  11 ++-
>>   7 files changed, 163 insertions(+), 38 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c  
>> b/drivers/gpu/drm/i915/i915_gem_context.c
>> index 2db0406..285c310 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -312,12 +312,12 @@ __create_hw_context(struct drm_i915_private  
>> *dev_priv,
>>   	ctx->desc_template =
>>   		default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt);
>>   -	/* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC  
>> is not
>> +	/* GuC requires the ring to be placed above GuC WOPCM top. if GuC is  
>> not
>>   	 * present or not in use we still need a small bias as ring  
>> wraparound
>>   	 * at offset 0 sometimes hangs. No idea why.
>>   	 */
>>   	if (HAS_GUC(dev_priv) && i915_modparams.enable_guc_loading)
>> -		ctx->ggtt_offset_bias = GUC_WOPCM_TOP;
>> +		ctx->ggtt_offset_bias = dev_priv->guc.wopcm.top;
>>   	else
>>   		ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE;
>>   diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h  
>> b/drivers/gpu/drm/i915/i915_guc_reg.h
>> index bc1ae7d..567df12 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_guc_reg.h
>> @@ -67,17 +67,27 @@
>>   #define DMA_GUC_WOPCM_OFFSET		_MMIO(0xc340)
>>   #define   HUC_LOADING_AGENT_VCR		  (0<<1)
>>   #define   HUC_LOADING_AGENT_GUC		  (1<<1)
>> -#define   GUC_WOPCM_OFFSET_VALUE	  0x80000	/* 512KB */
>>   #define GUC_MAX_IDLE_COUNT		_MMIO(0xC3E4)
>>     #define HUC_STATUS2             _MMIO(0xD3B0)
>>   #define   HUC_FW_VERIFIED       (1<<7)
>>     /* Defines WOPCM space available to GuC firmware */
>> +/* Default WOPCM size 1MB */
>> +#define WOPCM_DEFAULT_SIZE		(0x1 << 20)
> possible to get this size from register GEN6_STOLEN_RESERVED  
> (ggtt->stolen_reserved_size)
>> +/* Reserved WOPCM size 16KB */
>> +#define WOPCM_RESERVED_SIZE		(0x4000)
>> +/* GUC WOPCM Offset need to be 16KB aligned */
>> +#define WOPCM_OFFSET_ALIGNMENT		(0x4000)
> prefix this with GUC_ as it is specific to GuC in WOPCM
>> +/* 8KB stack reserved for GuC FW*/
>> +#define GUC_WOPCM_STACK_RESERVED	(0x2000)
>> +/* 24KB WOPCM reserved for RC6 CTX on BXT */
>> +#define BXT_WOPCM_RC6_RESERVED		(0x6000)
>> +
>> +#define GEN9_GUC_WOPCM_DELTA		4
>> +#define GEN9_GUC_WOPCM_OFFSET		(0x24000)

I'm not sure that definitions unrelated to register bits shall
be defined here

>> +
>>   #define GUC_WOPCM_SIZE			_MMIO(0xc050)
>> -/* GuC addresses below GUC_WOPCM_TOP don't map through the GTT */
>> -#define   GUC_WOPCM_TOP			  (0x80 << 12)	/* 512KB */
>> -#define   BXT_GUC_WOPCM_RC6_RESERVED	  (0x10 << 12)	/* 64KB  */
>>     /* GuC addresses above GUC_GGTT_TOP also don't map through the GTT  
>> */
>>   #define GUC_GGTT_TOP			0xFEE00000
>> diff --git a/drivers/gpu/drm/i915/intel_guc.c  
>> b/drivers/gpu/drm/i915/intel_guc.c
>> index 9678630..0c6bd63 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.c
>> +++ b/drivers/gpu/drm/i915/intel_guc.c
>> @@ -337,7 +337,7 @@ int intel_guc_resume(struct drm_i915_private  
>> *dev_priv)
>>    * This is a wrapper to create an object for use with the GuC. In  
>> order to
>>    * use it inside the GuC, an object needs to be pinned lifetime, so  
>> we allocate
>>    * both some backing storage and a range inside the Global GTT. We  
>> must pin
>> - * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because  
>> that
>> + * it in the GGTT somewhere other than than [0, GuC WOPCM top) because  
>> that
>>    * range is reserved inside GuC.
>>    *
>>    * Return:	A i915_vma if successful, otherwise an ERR_PTR.
>> @@ -358,7 +358,8 @@ struct i915_vma *intel_guc_allocate_vma(struct  
>> intel_guc *guc, u32 size)
>>   		goto err;
>>     	ret = i915_vma_pin(vma, 0, PAGE_SIZE,
>> -			   PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
>> +			   PIN_GLOBAL | PIN_OFFSET_BIAS |
>> +			   dev_priv->guc.wopcm.top);
>>   	if (ret) {
>>   		vma = ERR_PTR(ret);
>>   		goto err;
>> @@ -371,13 +372,19 @@ struct i915_vma *intel_guc_allocate_vma(struct  
>> intel_guc *guc, u32 size)
>>   	return vma;
>>   }
>>   -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv)
>> +/*
>> + * GuC does not allow any gfx GGTT address that falls into range
>> + * [0, GuC WOPCM top), which is reserved for Boot ROM, SRAM and WOPCM.
>> + * All gfx objects used by GuC is pinned with PIN_OFFSET_BIAS along  
>> with
>> + * top of WOPCM.
>> + */
>> +u32 guc_ggtt_offset(struct i915_vma *vma)
>>   {
>> -	u32 wopcm_size = GUC_WOPCM_TOP;
>> -
>> -	/* On BXT, the top of WOPCM is reserved for RC6 context */
>> -	if (IS_GEN9_LP(dev_priv))
>> -		wopcm_size -= BXT_GUC_WOPCM_RC6_RESERVED;
>> +	u32 guc_wopcm_top = vma->vm->i915->guc.wopcm.top;
>> +	u32 offset = i915_ggtt_offset(vma);
>>   -	return wopcm_size;
>> +	GEM_BUG_ON(!guc_wopcm_top);
>> +	GEM_BUG_ON(offset < guc_wopcm_top);
>> +	GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP));
>> +	return offset;
>>   }
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h  
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index 607e025..77f619b 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -39,6 +39,12 @@ struct guc_preempt_work {
>>   	struct intel_engine_cs *engine;
>>   };
>>   +struct guc_wopcm_partition {

s/guc_wopcm_partition/intel_guc_wopcm ?

>> +	u32 offset;
>> +	u32 size;
>> +	u32 top;
>> +};
>> +
>>   /*
>>    * Top level structure of GuC. It handles firmware loading and  
>> manages client
>>    * pool and doorbells. intel_guc owns a i915_guc_client to replace  
>> the legacy
>> @@ -46,6 +52,7 @@ struct guc_preempt_work {
>>    */
>>   struct intel_guc {
>>   	struct intel_uc_fw fw;
>> +	struct guc_wopcm_partition wopcm;

hmm, or if we call it struct intel_wopcm then we could place it in
drm_i915_private near guc/huc.

>>   	struct intel_guc_log log;
>>   	struct intel_guc_ct ct;
>>   @@ -100,22 +107,6 @@ static inline void intel_guc_notify(struct  
>> intel_guc *guc)
>>   	guc->notify(guc);
>>   }
>>   -/*
>> - * GuC does not allow any gfx GGTT address that falls into range [0,  
>> WOPCM_TOP),
>> - * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top  
>> address is
>> - * 512K. In order to exclude 0-512K address space from GGTT, all gfx  
>> objects
>> - * used by GuC is pinned with PIN_OFFSET_BIAS along with size of WOPCM.
>> - */
>> -static inline u32 guc_ggtt_offset(struct i915_vma *vma)
>> -{
>> -	u32 offset = i915_ggtt_offset(vma);
>> -
>> -	GEM_BUG_ON(offset < GUC_WOPCM_TOP);
>> -	GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP));
>> -
>> -	return offset;
>> -}
>> -
>>   void intel_guc_init_early(struct intel_guc *guc);
>>   void intel_guc_init_send_regs(struct intel_guc *guc);
>>   void intel_guc_init_params(struct intel_guc *guc);
>> @@ -126,6 +117,6 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32  
>> rsa_offset);
>>   int intel_guc_suspend(struct drm_i915_private *dev_priv);
>>   int intel_guc_resume(struct drm_i915_private *dev_priv);
>>   struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32  
>> size);
>> -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv);
>> +u32 guc_ggtt_offset(struct i915_vma *vma);
> Why are we exporting this function. If we have to export then name  
> should be intel_guc_ggtt_offset also
> pass intel_guc struct as param.
> Any function we export for GuC should have name prefixed "intel_guc" and  
> intel_guc struct as param.
> suspend/resume functions above are to be updated in upcoming series.
>>     #endif
>> diff --git a/drivers/gpu/drm/i915/intel_huc.c  
>> b/drivers/gpu/drm/i915/intel_huc.c
>> index 98d1725..0baedc4 100644
>> --- a/drivers/gpu/drm/i915/intel_huc.c
>> +++ b/drivers/gpu/drm/i915/intel_huc.c
>> @@ -202,7 +202,7 @@ void intel_huc_auth(struct intel_huc *huc)
>>   		return;
>>     	vma = i915_gem_object_ggtt_pin(huc->fw.obj, NULL, 0, 0,
>> -				PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
>> +				PIN_OFFSET_BIAS | guc->wopcm.top);
>>   	if (IS_ERR(vma)) {
>>   		DRM_ERROR("failed to pin huc fw object %d\n",
>>   				(int)PTR_ERR(vma));
>> diff --git a/drivers/gpu/drm/i915/intel_uc.c  
>> b/drivers/gpu/drm/i915/intel_uc.c
>> index aec2954..4f6fa67 100644
>> --- a/drivers/gpu/drm/i915/intel_uc.c
>> +++ b/drivers/gpu/drm/i915/intel_uc.c
>> @@ -86,10 +86,122 @@ void intel_uc_init_early(struct drm_i915_private  
>> *dev_priv)
>>   	intel_guc_init_early(&dev_priv->guc);
>>   }
>>   +static inline u32 reserved_wopcm_size(struct drm_i915_private *i915)
> Can we make this wopcm_top_reserved_size.
> there is reserved at the beginning (WOPCM_RESERVED_SIZE). Can name it  
> WOPCM_BEGIN_RESERVED_SIZE.
>> +{
>> +	/* On BXT, the top of WOPCM is reserved for RC6 context */
>> +	if (IS_GEN9_LP(i915))
>> +		return BXT_WOPCM_RC6_RESERVED;
>> +
>> +	return 0;
>> +}
>> +
>> +static int do_wopcm_partition(struct drm_i915_private *i915, u32  
>> offset,
>> +			       u32 reserved_size)
> This can be named guc_wopcm_partition_init as Chris suggested.
> and name caller as intel_guc_init_wopcm_partition.
> These functions are GuC specific hence they should be defined in  
> intel_guc.c.

Hmm, as during wopcm partition we need to know HuC fw size, I'm not 100%
sure that these functions shall be placed in intel_guc.c ... unless we
try to pass fw sizes as params, or

Other option would be to create intel_[guc|uc]_wopcm.c|h

>> +{
>> +	struct guc_wopcm_partition *wp = &i915->guc.wopcm;
>> +	u32 aligned_offset = ALIGN(offset, WOPCM_OFFSET_ALIGN	MENT);
>> +
>> +	if (offset >= WOPCM_DEFAULT_SIZE)
>> +		return -E2BIG;
>> +
>> +	if (reserved_size >= WOPCM_DEFAULT_SIZE)
>> +		return -E2BIG;
>> +
>> +	if ((aligned_offset + reserved_size) >= WOPCM_DEFAULT_SIZE)
>> +		return -E2BIG;
>> +
>> +	wp->offset = aligned_offset;
>> +	wp->top = WOPCM_DEFAULT_SIZE - wp->offset;
>> +	wp->size = wp->top - reserved_size;
>> +
>> +	return 0;
>> +}
>> +
>> +static void guc_init_wopcm_partition(struct drm_i915_private *i915)
>> +{
>> +	struct intel_uc_fw *guc_fw = &i915->guc.fw;
>> +	struct intel_uc_fw *huc_fw = &i915->huc.fw;
>> +	struct guc_wopcm_partition *wp = &i915->guc.wopcm;
>> +	size_t huc_size, guc_size;
>> +	u32 offset;
>> +	u32 reserved;
>> +	u32 wopcm_base;
>> +	u32 delta;
>> +	int err;
>> +
>> +	if (!i915_modparams.enable_guc_loading)
>> +		return;
>> +
>> +	/* No need to do partition if failed to fetch both GuC and HuC FW */
>> +	if (guc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS &&
>> +		huc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS)
>> +		goto fail;
>> +
>> +	huc_size = 0;
>> +	guc_size = 0;
>> +	offset = WOPCM_RESERVED_SIZE;
>> +	reserved = reserved_wopcm_size(i915);
>> +
>> +	if (huc_fw->fetch_status == INTEL_UC_FIRMWARE_SUCCESS)
>> +		huc_size = huc_fw->header_size + huc_fw->ucode_size;

maybe to reduce code duplication you can create inline in intel_uc_fw.h

static inline u32 intel_uc_fw_get_size(struct intel_uc_fw *uc_fw)
{
	if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS)
		return 0;
	return uc_fw->header_size + uc_fw->ucode_size;
}

>> +
>> +	err = do_wopcm_partition(i915, offset + huc_size, reserved);
>> +	if (err) {
>> +		if (!huc_size)
>> +			goto fail;
>> +
>> +		/* Partition without loading HuC FW. */
>> +		err = do_wopcm_partition(i915, offset, reserved);
>> +		if (err)
>> +			goto fail;
>> +	}
>> +
>> +	/*
>> +	 * Check hardware restriction on Gen9
>> +	 * GuC WOPCM size is at least 4 bytes larger than GuC WOPCM base due
>> +	 * to hardware limitation on Gen9.
>> +	 */
>> +	if (IS_GEN9(i915)) {
>> +		wopcm_base = wp->offset + GEN9_GUC_WOPCM_OFFSET;
>> +		if (unlikely(wopcm_base > wp->size))
>> +			goto fail;
>> +
>> +		delta = wp->size - wopcm_base;
>> +		if (unlikely(delta < GEN9_GUC_WOPCM_DELTA))
>> +			goto fail;
>> +	}
>> +
>> +	if (guc_fw->fetch_status == INTEL_UC_FIRMWARE_SUCCESS) {
>> +		guc_size = guc_fw->header_size + guc_fw->ucode_size;
>> +		/* Need 8K stack for GuC */
>> +		guc_size += GUC_WOPCM_STACK_RESERVED;
>> +	}
>> +
>> +	if (guc_size > wp->size)
>> +		goto fail;
>> +
>> +	DRM_DEBUG_DRIVER("GuC WOPCM offset %dKB, size %dKB, top %dKB\n",
>> +		wp->offset >> 10, wp->size >> 10, wp->top >> 10);
>> +
>> +	return;
>> +
>> +fail:
>> +	DRM_ERROR("WOPCM partitioning failed. Falling back to execlist  
>> mode\n");
>> +
>> +	intel_uc_fini_fw(i915);
>> +
>> +	/* GuC submission won't work without valid GuC WOPCM partition */
>> +	if (i915_modparams.enable_guc_submission)
>> +		i915_modparams.enable_guc_submission = 0;
>> +
>> +	i915_modparams.enable_guc_loading = 0;
>> +}
>> +
>>   void intel_uc_init_fw(struct drm_i915_private *dev_priv)
>>   {
>>   	intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw);
>>   	intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw);
>> +	guc_init_wopcm_partition(dev_priv);
> Create intel_uc_init_wopcm_partition(dev_priv) and call  
> intel_guc_init_wopcm_partition(guc) from there.

hmm, I'm not sure what benefit you expect from this call chain ?

>>   }
>>     void intel_uc_fini_fw(struct drm_i915_private *dev_priv)
>> @@ -174,9 +286,9 @@ int intel_uc_init_hw(struct drm_i915_private  
>> *dev_priv)
>>   	}
>>     	/* init WOPCM */
>> -	I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
>> +	I915_WRITE(GUC_WOPCM_SIZE, guc->wopcm.size);
>>   	I915_WRITE(DMA_GUC_WOPCM_OFFSET,
>> -		   GUC_WOPCM_OFFSET_VALUE | HUC_LOADING_AGENT_GUC);
>> +		   guc->wopcm.offset | HUC_LOADING_AGENT_GUC);
>>     	/* WaEnableuKernelHeaderValidFix:skl */
>>   	/* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
>> diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c  
>> b/drivers/gpu/drm/i915/intel_uc_fw.c
>> index 4bc82d3..34db2b1 100644
>> --- a/drivers/gpu/drm/i915/intel_uc_fw.c
>> +++ b/drivers/gpu/drm/i915/intel_uc_fw.c
>> @@ -95,9 +95,13 @@ void intel_uc_fw_fetch(struct drm_i915_private  
>> *dev_priv,
>>   	uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size;
>>   	uc_fw->ucode_size = (css->size_dw - css->header_size_dw) *  
>> sizeof(u32);
>>   -	/* Header and uCode will be loaded to WOPCM */
>> +	/*
>> +	 * Header and uCode will be loaded to WOPCM
>> +	 * Only check the size against the overall available WOPCM here. Will
>> +	 * continue to check the size during WOPCM partition calculation.
>> +	 */
>>   	size = uc_fw->header_size + uc_fw->ucode_size;
>> -	if (size > intel_guc_wopcm_size(dev_priv)) {
>> +	if (size > WOPCM_DEFAULT_SIZE) {
>>   		DRM_WARN("%s: Firmware is too large to fit in WOPCM\n",
>>   			 intel_uc_fw_type_repr(uc_fw->type));
>>   		err = -E2BIG;
>> @@ -207,6 +211,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
>>   		       int (*xfer)(struct intel_uc_fw *uc_fw,
>>   				   struct i915_vma *vma))
>>   {
>> +	struct drm_i915_private *i915 = to_i915(uc_fw->obj->base.dev);
>>   	struct i915_vma *vma;
>>   	int err;
>>   @@ -230,7 +235,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
>>   	}
>>     	vma = i915_gem_object_ggtt_pin(uc_fw->obj, NULL, 0, 0,
>> -				       PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
>> +				       PIN_OFFSET_BIAS | i915->guc.wopcm.top);
>>   	if (IS_ERR(vma)) {
>>   		err = PTR_ERR(vma);
>>   		DRM_DEBUG_DRIVER("%s fw ggtt-pin err=%d\n",


More information about the Intel-gfx mailing list