[Intel-gfx] [PATCH v2 1/2] drm/i915: Impletments dynamic WOPCM partitioning.
Yaodong Li
yaodong.li at intel.com
Wed Nov 29 00:27:16 UTC 2017
On 11/16/2017 01:47 PM, Michal Wajdeczko wrote:
> On Thu, 16 Nov 2017 08:34:01 +0100, Sagar Arun Kamble
> <sagar.a.kamble at intel.com> wrote:
>
>> Typo in the subject.
>> GLK showing failure to load GuC with this approach on CI.
>>
>> On 11/15/2017 10:47 PM, Jackie Li wrote:
>>> Static WOPCM partitioning will lead to GuC loading failure
>>> if the GuC/HuC firmware size exceeded current static 512KB
>>> partition boundary.
>>>
>>> This patch enables the dynamical calculation of the WOPCM aperture
>>> used by GuC/HuC firmware. GuC WOPCM offset is set to
>>> HuC size + reserved WOPCM size. GuC WOPCM size is set to
>>> total WOPCM size - GuC WOPCM offset - RC6CTX size. In this case,
>>> GuC WOPCM offset will be updated based on the size of HuC firmware
>>> while GuC WOPCM size will be set to use all the remaining WOPCM space.
>>>
>>> v2:
>>> - Removed intel_wopcm_init (Ville/Sagar/Joonas)
>>> - Renamed and Moved the intel_wopcm_partition into intel_guc (Sagar)
>>> - Removed unnecessary function calls (Joonas)
>>> - Init GuC WOPCM partition as soon as firmware fetching is completed
>>>
>>> Signed-off-by: Jackie Li <yaodong.li at intel.com>
>>> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
>>> Cc: Sagar Arun Kamble <sagar.a.kamble at intel.com>
>>> Cc: Sujaritha Sundaresan <sujaritha.sundaresan at intel.com>
>>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>>> Cc: John Spotswood <john.a.spotswood at intel.com>
>>> Cc: Oscar Mateo <oscar.mateo at intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_gem_context.c | 4 +-
>>> drivers/gpu/drm/i915/i915_guc_reg.h | 18 +++--
>>> drivers/gpu/drm/i915/intel_guc.c | 25 ++++---
>>> drivers/gpu/drm/i915/intel_guc.h | 25 +++----
>>> drivers/gpu/drm/i915/intel_huc.c | 2 +-
>>> drivers/gpu/drm/i915/intel_uc.c | 116
>>> +++++++++++++++++++++++++++++++-
>>> drivers/gpu/drm/i915/intel_uc_fw.c | 11 ++-
>>> 7 files changed, 163 insertions(+), 38 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
>>> b/drivers/gpu/drm/i915/i915_gem_context.c
>>> index 2db0406..285c310 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>> @@ -312,12 +312,12 @@ __create_hw_context(struct drm_i915_private
>>> *dev_priv,
>>> ctx->desc_template =
>>> default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt);
>>> - /* GuC requires the ring to be placed above GUC_WOPCM_TOP. If
>>> GuC is not
>>> + /* GuC requires the ring to be placed above GuC WOPCM top. if
>>> GuC is not
>>> * present or not in use we still need a small bias as ring
>>> wraparound
>>> * at offset 0 sometimes hangs. No idea why.
>>> */
>>> if (HAS_GUC(dev_priv) && i915_modparams.enable_guc_loading)
>>> - ctx->ggtt_offset_bias = GUC_WOPCM_TOP;
>>> + ctx->ggtt_offset_bias = dev_priv->guc.wopcm.top;
>>> else
>>> ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE;
>>> diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h
>>> b/drivers/gpu/drm/i915/i915_guc_reg.h
>>> index bc1ae7d..567df12 100644
>>> --- a/drivers/gpu/drm/i915/i915_guc_reg.h
>>> +++ b/drivers/gpu/drm/i915/i915_guc_reg.h
>>> @@ -67,17 +67,27 @@
>>> #define DMA_GUC_WOPCM_OFFSET _MMIO(0xc340)
>>> #define HUC_LOADING_AGENT_VCR (0<<1)
>>> #define HUC_LOADING_AGENT_GUC (1<<1)
>>> -#define GUC_WOPCM_OFFSET_VALUE 0x80000 /* 512KB */
>>> #define GUC_MAX_IDLE_COUNT _MMIO(0xC3E4)
>>> #define HUC_STATUS2 _MMIO(0xD3B0)
>>> #define HUC_FW_VERIFIED (1<<7)
>>> /* Defines WOPCM space available to GuC firmware */
>>> +/* Default WOPCM size 1MB */
>>> +#define WOPCM_DEFAULT_SIZE (0x1 << 20)
>> possible to get this size from register GEN6_STOLEN_RESERVED
>> (ggtt->stolen_reserved_size)
>>> +/* Reserved WOPCM size 16KB */
>>> +#define WOPCM_RESERVED_SIZE (0x4000)
>>> +/* GUC WOPCM Offset need to be 16KB aligned */
>>> +#define WOPCM_OFFSET_ALIGNMENT (0x4000)
>> prefix this with GUC_ as it is specific to GuC in WOPCM
>>> +/* 8KB stack reserved for GuC FW*/
>>> +#define GUC_WOPCM_STACK_RESERVED (0x2000)
>>> +/* 24KB WOPCM reserved for RC6 CTX on BXT */
>>> +#define BXT_WOPCM_RC6_RESERVED (0x6000)
>>> +
>>> +#define GEN9_GUC_WOPCM_DELTA 4
>>> +#define GEN9_GUC_WOPCM_OFFSET (0x24000)
>
> I'm not sure that definitions unrelated to register bits shall
> be defined here
>
I was trying to align with the current implementation. since we had
put definitions such as GUC_WOPCM _TOP, BXT_GUC_WOPCM_RC6_RESERVED here.
It's better to create a header file for wopcm related definitions if we
want to keep it absolutely
clean. I will think about it. Thanks for comments.
>>> +
>>> #define GUC_WOPCM_SIZE _MMIO(0xc050)
>>> -/* GuC addresses below GUC_WOPCM_TOP don't map through the GTT */
>>> -#define GUC_WOPCM_TOP (0x80 << 12) /* 512KB */
>>> -#define BXT_GUC_WOPCM_RC6_RESERVED (0x10 << 12) /* 64KB */
>>> /* GuC addresses above GUC_GGTT_TOP also don't map through the
>>> GTT */
>>> #define GUC_GGTT_TOP 0xFEE00000
>>> diff --git a/drivers/gpu/drm/i915/intel_guc.c
>>> b/drivers/gpu/drm/i915/intel_guc.c
>>> index 9678630..0c6bd63 100644
>>> --- a/drivers/gpu/drm/i915/intel_guc.c
>>> +++ b/drivers/gpu/drm/i915/intel_guc.c
>>> @@ -337,7 +337,7 @@ int intel_guc_resume(struct drm_i915_private
>>> *dev_priv)
>>> * This is a wrapper to create an object for use with the GuC. In
>>> order to
>>> * use it inside the GuC, an object needs to be pinned lifetime,
>>> so we allocate
>>> * both some backing storage and a range inside the Global GTT. We
>>> must pin
>>> - * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP)
>>> because that
>>> + * it in the GGTT somewhere other than than [0, GuC WOPCM top)
>>> because that
>>> * range is reserved inside GuC.
>>> *
>>> * Return: A i915_vma if successful, otherwise an ERR_PTR.
>>> @@ -358,7 +358,8 @@ struct i915_vma *intel_guc_allocate_vma(struct
>>> intel_guc *guc, u32 size)
>>> goto err;
>>> ret = i915_vma_pin(vma, 0, PAGE_SIZE,
>>> - PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
>>> + PIN_GLOBAL | PIN_OFFSET_BIAS |
>>> + dev_priv->guc.wopcm.top);
>>> if (ret) {
>>> vma = ERR_PTR(ret);
>>> goto err;
>>> @@ -371,13 +372,19 @@ struct i915_vma *intel_guc_allocate_vma(struct
>>> intel_guc *guc, u32 size)
>>> return vma;
>>> }
>>> -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv)
>>> +/*
>>> + * GuC does not allow any gfx GGTT address that falls into range
>>> + * [0, GuC WOPCM top), which is reserved for Boot ROM, SRAM and WOPCM.
>>> + * All gfx objects used by GuC is pinned with PIN_OFFSET_BIAS along
>>> with
>>> + * top of WOPCM.
>>> + */
>>> +u32 guc_ggtt_offset(struct i915_vma *vma)
>>> {
>>> - u32 wopcm_size = GUC_WOPCM_TOP;
>>> -
>>> - /* On BXT, the top of WOPCM is reserved for RC6 context */
>>> - if (IS_GEN9_LP(dev_priv))
>>> - wopcm_size -= BXT_GUC_WOPCM_RC6_RESERVED;
>>> + u32 guc_wopcm_top = vma->vm->i915->guc.wopcm.top;
>>> + u32 offset = i915_ggtt_offset(vma);
>>> - return wopcm_size;
>>> + GEM_BUG_ON(!guc_wopcm_top);
>>> + GEM_BUG_ON(offset < guc_wopcm_top);
>>> + GEM_BUG_ON(range_overflows_t(u64, offset, vma->size,
>>> GUC_GGTT_TOP));
>>> + return offset;
>>> }
>>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>>> b/drivers/gpu/drm/i915/intel_guc.h
>>> index 607e025..77f619b 100644
>>> --- a/drivers/gpu/drm/i915/intel_guc.h
>>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>>> @@ -39,6 +39,12 @@ struct guc_preempt_work {
>>> struct intel_engine_cs *engine;
>>> };
>>> +struct guc_wopcm_partition {
>
> s/guc_wopcm_partition/intel_guc_wopcm ?
>
>>> + u32 offset;
>>> + u32 size;
>>> + u32 top;
>>> +};
>>> +
>>> /*
>>> * Top level structure of GuC. It handles firmware loading and
>>> manages client
>>> * pool and doorbells. intel_guc owns a i915_guc_client to replace
>>> the legacy
>>> @@ -46,6 +52,7 @@ struct guc_preempt_work {
>>> */
>>> struct intel_guc {
>>> struct intel_uc_fw fw;
>>> + struct guc_wopcm_partition wopcm;
>
> hmm, or if we call it struct intel_wopcm then we could place it in
> drm_i915_private near guc/huc.
>
I had a struct called intel_wopcm originally which contains the overall
wopcm info
such as wopcm size, etc. but I think wopcm partition should be guc
specific structure
since we only need it when guc is active.
>>> struct intel_guc_log log;
>>> struct intel_guc_ct ct;
>>> @@ -100,22 +107,6 @@ static inline void intel_guc_notify(struct
>>> intel_guc *guc)
>>> guc->notify(guc);
>>> }
>>> -/*
>>> - * GuC does not allow any gfx GGTT address that falls into range
>>> [0, WOPCM_TOP),
>>> - * which is reserved for Boot ROM, SRAM and WOPCM. Currently this
>>> top address is
>>> - * 512K. In order to exclude 0-512K address space from GGTT, all
>>> gfx objects
>>> - * used by GuC is pinned with PIN_OFFSET_BIAS along with size of
>>> WOPCM.
>>> - */
>>> -static inline u32 guc_ggtt_offset(struct i915_vma *vma)
>>> -{
>>> - u32 offset = i915_ggtt_offset(vma);
>>> -
>>> - GEM_BUG_ON(offset < GUC_WOPCM_TOP);
>>> - GEM_BUG_ON(range_overflows_t(u64, offset, vma->size,
>>> GUC_GGTT_TOP));
>>> -
>>> - return offset;
>>> -}
>>> -
>>> void intel_guc_init_early(struct intel_guc *guc);
>>> void intel_guc_init_send_regs(struct intel_guc *guc);
>>> void intel_guc_init_params(struct intel_guc *guc);
>>> @@ -126,6 +117,6 @@ int intel_guc_auth_huc(struct intel_guc *guc,
>>> u32 rsa_offset);
>>> int intel_guc_suspend(struct drm_i915_private *dev_priv);
>>> int intel_guc_resume(struct drm_i915_private *dev_priv);
>>> struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32
>>> size);
>>> -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv);
>>> +u32 guc_ggtt_offset(struct i915_vma *vma);
>> Why are we exporting this function. If we have to export then name
>> should be intel_guc_ggtt_offset also
>> pass intel_guc struct as param.
>> Any function we export for GuC should have name prefixed "intel_guc"
>> and intel_guc struct as param.
>> suspend/resume functions above are to be updated in upcoming series.
>>> #endif
>>> diff --git a/drivers/gpu/drm/i915/intel_huc.c
>>> b/drivers/gpu/drm/i915/intel_huc.c
>>> index 98d1725..0baedc4 100644
>>> --- a/drivers/gpu/drm/i915/intel_huc.c
>>> +++ b/drivers/gpu/drm/i915/intel_huc.c
>>> @@ -202,7 +202,7 @@ void intel_huc_auth(struct intel_huc *huc)
>>> return;
>>> vma = i915_gem_object_ggtt_pin(huc->fw.obj, NULL, 0, 0,
>>> - PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
>>> + PIN_OFFSET_BIAS | guc->wopcm.top);
>>> if (IS_ERR(vma)) {
>>> DRM_ERROR("failed to pin huc fw object %d\n",
>>> (int)PTR_ERR(vma));
>>> diff --git a/drivers/gpu/drm/i915/intel_uc.c
>>> b/drivers/gpu/drm/i915/intel_uc.c
>>> index aec2954..4f6fa67 100644
>>> --- a/drivers/gpu/drm/i915/intel_uc.c
>>> +++ b/drivers/gpu/drm/i915/intel_uc.c
>>> @@ -86,10 +86,122 @@ void intel_uc_init_early(struct
>>> drm_i915_private *dev_priv)
>>> intel_guc_init_early(&dev_priv->guc);
>>> }
>>> +static inline u32 reserved_wopcm_size(struct drm_i915_private *i915)
>> Can we make this wopcm_top_reserved_size.
>> there is reserved at the beginning (WOPCM_RESERVED_SIZE). Can name it
>> WOPCM_BEGIN_RESERVED_SIZE.
>>> +{
>>> + /* On BXT, the top of WOPCM is reserved for RC6 context */
>>> + if (IS_GEN9_LP(i915))
>>> + return BXT_WOPCM_RC6_RESERVED;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int do_wopcm_partition(struct drm_i915_private *i915, u32
>>> offset,
>>> + u32 reserved_size)
>> This can be named guc_wopcm_partition_init as Chris suggested.
>> and name caller as intel_guc_init_wopcm_partition.
>> These functions are GuC specific hence they should be defined in
>> intel_guc.c.
>
> Hmm, as during wopcm partition we need to know HuC fw size, I'm not 100%
> sure that these functions shall be placed in intel_guc.c ... unless we
> try to pass fw sizes as params, or
>
> Other option would be to create intel_[guc|uc]_wopcm.c|h
Yes, that was reason I left them in intel_uc.c.
>
>>> +{
>>> + struct guc_wopcm_partition *wp = &i915->guc.wopcm;
>>> + u32 aligned_offset = ALIGN(offset, WOPCM_OFFSET_ALIGN MENT);
>>> +
>>> + if (offset >= WOPCM_DEFAULT_SIZE)
>>> + return -E2BIG;
>>> +
>>> + if (reserved_size >= WOPCM_DEFAULT_SIZE)
>>> + return -E2BIG;
>>> +
>>> + if ((aligned_offset + reserved_size) >= WOPCM_DEFAULT_SIZE)
>>> + return -E2BIG;
>>> +
>>> + wp->offset = aligned_offset;
>>> + wp->top = WOPCM_DEFAULT_SIZE - wp->offset;
>>> + wp->size = wp->top - reserved_size;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static void guc_init_wopcm_partition(struct drm_i915_private *i915)
>>> +{
>>> + struct intel_uc_fw *guc_fw = &i915->guc.fw;
>>> + struct intel_uc_fw *huc_fw = &i915->huc.fw;
>>> + struct guc_wopcm_partition *wp = &i915->guc.wopcm;
>>> + size_t huc_size, guc_size;
>>> + u32 offset;
>>> + u32 reserved;
>>> + u32 wopcm_base;
>>> + u32 delta;
>>> + int err;
>>> +
>>> + if (!i915_modparams.enable_guc_loading)
>>> + return;
>>> +
>>> + /* No need to do partition if failed to fetch both GuC and HuC
>>> FW */
>>> + if (guc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS &&
>>> + huc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS)
>>> + goto fail;
>>> +
>>> + huc_size = 0;
>>> + guc_size = 0;
>>> + offset = WOPCM_RESERVED_SIZE;
>>> + reserved = reserved_wopcm_size(i915);
>>> +
>>> + if (huc_fw->fetch_status == INTEL_UC_FIRMWARE_SUCCESS)
>>> + huc_size = huc_fw->header_size + huc_fw->ucode_size;
>
> maybe to reduce code duplication you can create inline in intel_uc_fw.h
>
> static inline u32 intel_uc_fw_get_size(struct intel_uc_fw *uc_fw)
> {
> if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS)
> return 0;
> return uc_fw->header_size + uc_fw->ucode_size;
> }
>
Will change it.
>>> +
>>> + err = do_wopcm_partition(i915, offset + huc_size, reserved);
>>> + if (err) {
>>> + if (!huc_size)
>>> + goto fail;
>>> +
>>> + /* Partition without loading HuC FW. */
>>> + err = do_wopcm_partition(i915, offset, reserved);
>>> + if (err)
>>> + goto fail;
>>> + }
>>> +
>>> + /*
>>> + * Check hardware restriction on Gen9
>>> + * GuC WOPCM size is at least 4 bytes larger than GuC WOPCM
>>> base due
>>> + * to hardware limitation on Gen9.
>>> + */
>>> + if (IS_GEN9(i915)) {
>>> + wopcm_base = wp->offset + GEN9_GUC_WOPCM_OFFSET;
>>> + if (unlikely(wopcm_base > wp->size))
>>> + goto fail;
>>> +
>>> + delta = wp->size - wopcm_base;
>>> + if (unlikely(delta < GEN9_GUC_WOPCM_DELTA))
>>> + goto fail;
>>> + }
>>> +
>>> + if (guc_fw->fetch_status == INTEL_UC_FIRMWARE_SUCCESS) {
>>> + guc_size = guc_fw->header_size + guc_fw->ucode_size;
>>> + /* Need 8K stack for GuC */
>>> + guc_size += GUC_WOPCM_STACK_RESERVED;
>>> + }
>>> +
>>> + if (guc_size > wp->size)
>>> + goto fail;
>>> +
>>> + DRM_DEBUG_DRIVER("GuC WOPCM offset %dKB, size %dKB, top %dKB\n",
>>> + wp->offset >> 10, wp->size >> 10, wp->top >> 10);
>>> +
>>> + return;
>>> +
>>> +fail:
>>> + DRM_ERROR("WOPCM partitioning failed. Falling back to execlist
>>> mode\n");
Do you have any suggestions about what should be printed here on the
partitioning failure?
should it be a error/warning/debug message?
>>> +
>>> + intel_uc_fini_fw(i915);
>>> +
>>> + /* GuC submission won't work without valid GuC WOPCM partition */
>>> + if (i915_modparams.enable_guc_submission)
>>> + i915_modparams.enable_guc_submission = 0;
>>> +
>>> + i915_modparams.enable_guc_loading = 0;
>>> +}
>>> +
>>> void intel_uc_init_fw(struct drm_i915_private *dev_priv)
>>> {
>>> intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw);
>>> intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw);
>>> + guc_init_wopcm_partition(dev_priv);
>> Create intel_uc_init_wopcm_partition(dev_priv) and call
>> intel_guc_init_wopcm_partition(guc) from there.
>
> hmm, I'm not sure what benefit you expect from this call chain ?
>
probably, I should rename it back to intel_uc_init_wopcm_partition since
it relies on both huc and guc fw size.
>>> }
>>> void intel_uc_fini_fw(struct drm_i915_private *dev_priv)
>>> @@ -174,9 +286,9 @@ int intel_uc_init_hw(struct drm_i915_private
>>> *dev_priv)
>>> }
>>> /* init WOPCM */
>>> - I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
>>> + I915_WRITE(GUC_WOPCM_SIZE, guc->wopcm.size);
>>> I915_WRITE(DMA_GUC_WOPCM_OFFSET,
>>> - GUC_WOPCM_OFFSET_VALUE | HUC_LOADING_AGENT_GUC);
>>> + guc->wopcm.offset | HUC_LOADING_AGENT_GUC);
>>> /* WaEnableuKernelHeaderValidFix:skl */
>>> /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
>>> diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c
>>> b/drivers/gpu/drm/i915/intel_uc_fw.c
>>> index 4bc82d3..34db2b1 100644
>>> --- a/drivers/gpu/drm/i915/intel_uc_fw.c
>>> +++ b/drivers/gpu/drm/i915/intel_uc_fw.c
>>> @@ -95,9 +95,13 @@ void intel_uc_fw_fetch(struct drm_i915_private
>>> *dev_priv,
>>> uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size;
>>> uc_fw->ucode_size = (css->size_dw - css->header_size_dw) *
>>> sizeof(u32);
>>> - /* Header and uCode will be loaded to WOPCM */
>>> + /*
>>> + * Header and uCode will be loaded to WOPCM
>>> + * Only check the size against the overall available WOPCM
>>> here. Will
>>> + * continue to check the size during WOPCM partition calculation.
>>> + */
>>> size = uc_fw->header_size + uc_fw->ucode_size;
>>> - if (size > intel_guc_wopcm_size(dev_priv)) {
>>> + if (size > WOPCM_DEFAULT_SIZE) {
>>> DRM_WARN("%s: Firmware is too large to fit in WOPCM\n",
>>> intel_uc_fw_type_repr(uc_fw->type));
>>> err = -E2BIG;
>>> @@ -207,6 +211,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
>>> int (*xfer)(struct intel_uc_fw *uc_fw,
>>> struct i915_vma *vma))
>>> {
>>> + struct drm_i915_private *i915 = to_i915(uc_fw->obj->base.dev);
>>> struct i915_vma *vma;
>>> int err;
>>> @@ -230,7 +235,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
>>> }
>>> vma = i915_gem_object_ggtt_pin(uc_fw->obj, NULL, 0, 0,
>>> - PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
>>> + PIN_OFFSET_BIAS | i915->guc.wopcm.top);
>>> if (IS_ERR(vma)) {
>>> err = PTR_ERR(vma);
>>> DRM_DEBUG_DRIVER("%s fw ggtt-pin err=%d\n",
More information about the Intel-gfx
mailing list