[Intel-gfx] [PATCH v2 1/2] drm/i915: Impletments dynamic WOPCM partitioning.
Michal Wajdeczko
michal.wajdeczko at intel.com
Thu Nov 16 21:47:46 UTC 2017
On Thu, 16 Nov 2017 08:34:01 +0100, Sagar Arun Kamble
<sagar.a.kamble at intel.com> wrote:
> Typo in the subject.
> GLK showing failure to load GuC with this approach on CI.
>
> On 11/15/2017 10:47 PM, Jackie Li wrote:
>> Static WOPCM partitioning will lead to GuC loading failure
>> if the GuC/HuC firmware size exceeded current static 512KB
>> partition boundary.
>>
>> This patch enables the dynamical calculation of the WOPCM aperture
>> used by GuC/HuC firmware. GuC WOPCM offset is set to
>> HuC size + reserved WOPCM size. GuC WOPCM size is set to
>> total WOPCM size - GuC WOPCM offset - RC6CTX size. In this case,
>> GuC WOPCM offset will be updated based on the size of HuC firmware
>> while GuC WOPCM size will be set to use all the remaining WOPCM space.
>>
>> v2:
>> - Removed intel_wopcm_init (Ville/Sagar/Joonas)
>> - Renamed and Moved the intel_wopcm_partition into intel_guc (Sagar)
>> - Removed unnecessary function calls (Joonas)
>> - Init GuC WOPCM partition as soon as firmware fetching is completed
>>
>> Signed-off-by: Jackie Li <yaodong.li at intel.com>
>> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
>> Cc: Sagar Arun Kamble <sagar.a.kamble at intel.com>
>> Cc: Sujaritha Sundaresan <sujaritha.sundaresan at intel.com>
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> Cc: John Spotswood <john.a.spotswood at intel.com>
>> Cc: Oscar Mateo <oscar.mateo at intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_gem_context.c | 4 +-
>> drivers/gpu/drm/i915/i915_guc_reg.h | 18 +++--
>> drivers/gpu/drm/i915/intel_guc.c | 25 ++++---
>> drivers/gpu/drm/i915/intel_guc.h | 25 +++----
>> drivers/gpu/drm/i915/intel_huc.c | 2 +-
>> drivers/gpu/drm/i915/intel_uc.c | 116
>> +++++++++++++++++++++++++++++++-
>> drivers/gpu/drm/i915/intel_uc_fw.c | 11 ++-
>> 7 files changed, 163 insertions(+), 38 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
>> b/drivers/gpu/drm/i915/i915_gem_context.c
>> index 2db0406..285c310 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -312,12 +312,12 @@ __create_hw_context(struct drm_i915_private
>> *dev_priv,
>> ctx->desc_template =
>> default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt);
>> - /* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC
>> is not
>> + /* GuC requires the ring to be placed above GuC WOPCM top. if GuC is
>> not
>> * present or not in use we still need a small bias as ring
>> wraparound
>> * at offset 0 sometimes hangs. No idea why.
>> */
>> if (HAS_GUC(dev_priv) && i915_modparams.enable_guc_loading)
>> - ctx->ggtt_offset_bias = GUC_WOPCM_TOP;
>> + ctx->ggtt_offset_bias = dev_priv->guc.wopcm.top;
>> else
>> ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE;
>> diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h
>> b/drivers/gpu/drm/i915/i915_guc_reg.h
>> index bc1ae7d..567df12 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_guc_reg.h
>> @@ -67,17 +67,27 @@
>> #define DMA_GUC_WOPCM_OFFSET _MMIO(0xc340)
>> #define HUC_LOADING_AGENT_VCR (0<<1)
>> #define HUC_LOADING_AGENT_GUC (1<<1)
>> -#define GUC_WOPCM_OFFSET_VALUE 0x80000 /* 512KB */
>> #define GUC_MAX_IDLE_COUNT _MMIO(0xC3E4)
>> #define HUC_STATUS2 _MMIO(0xD3B0)
>> #define HUC_FW_VERIFIED (1<<7)
>> /* Defines WOPCM space available to GuC firmware */
>> +/* Default WOPCM size 1MB */
>> +#define WOPCM_DEFAULT_SIZE (0x1 << 20)
> possible to get this size from register GEN6_STOLEN_RESERVED
> (ggtt->stolen_reserved_size)
>> +/* Reserved WOPCM size 16KB */
>> +#define WOPCM_RESERVED_SIZE (0x4000)
>> +/* GUC WOPCM Offset need to be 16KB aligned */
>> +#define WOPCM_OFFSET_ALIGNMENT (0x4000)
> prefix this with GUC_ as it is specific to GuC in WOPCM
>> +/* 8KB stack reserved for GuC FW*/
>> +#define GUC_WOPCM_STACK_RESERVED (0x2000)
>> +/* 24KB WOPCM reserved for RC6 CTX on BXT */
>> +#define BXT_WOPCM_RC6_RESERVED (0x6000)
>> +
>> +#define GEN9_GUC_WOPCM_DELTA 4
>> +#define GEN9_GUC_WOPCM_OFFSET (0x24000)
I'm not sure that definitions unrelated to register bits shall
be defined here
>> +
>> #define GUC_WOPCM_SIZE _MMIO(0xc050)
>> -/* GuC addresses below GUC_WOPCM_TOP don't map through the GTT */
>> -#define GUC_WOPCM_TOP (0x80 << 12) /* 512KB */
>> -#define BXT_GUC_WOPCM_RC6_RESERVED (0x10 << 12) /* 64KB */
>> /* GuC addresses above GUC_GGTT_TOP also don't map through the GTT
>> */
>> #define GUC_GGTT_TOP 0xFEE00000
>> diff --git a/drivers/gpu/drm/i915/intel_guc.c
>> b/drivers/gpu/drm/i915/intel_guc.c
>> index 9678630..0c6bd63 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.c
>> +++ b/drivers/gpu/drm/i915/intel_guc.c
>> @@ -337,7 +337,7 @@ int intel_guc_resume(struct drm_i915_private
>> *dev_priv)
>> * This is a wrapper to create an object for use with the GuC. In
>> order to
>> * use it inside the GuC, an object needs to be pinned lifetime, so
>> we allocate
>> * both some backing storage and a range inside the Global GTT. We
>> must pin
>> - * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because
>> that
>> + * it in the GGTT somewhere other than than [0, GuC WOPCM top) because
>> that
>> * range is reserved inside GuC.
>> *
>> * Return: A i915_vma if successful, otherwise an ERR_PTR.
>> @@ -358,7 +358,8 @@ struct i915_vma *intel_guc_allocate_vma(struct
>> intel_guc *guc, u32 size)
>> goto err;
>> ret = i915_vma_pin(vma, 0, PAGE_SIZE,
>> - PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
>> + PIN_GLOBAL | PIN_OFFSET_BIAS |
>> + dev_priv->guc.wopcm.top);
>> if (ret) {
>> vma = ERR_PTR(ret);
>> goto err;
>> @@ -371,13 +372,19 @@ struct i915_vma *intel_guc_allocate_vma(struct
>> intel_guc *guc, u32 size)
>> return vma;
>> }
>> -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv)
>> +/*
>> + * GuC does not allow any gfx GGTT address that falls into range
>> + * [0, GuC WOPCM top), which is reserved for Boot ROM, SRAM and WOPCM.
>> + * All gfx objects used by GuC is pinned with PIN_OFFSET_BIAS along
>> with
>> + * top of WOPCM.
>> + */
>> +u32 guc_ggtt_offset(struct i915_vma *vma)
>> {
>> - u32 wopcm_size = GUC_WOPCM_TOP;
>> -
>> - /* On BXT, the top of WOPCM is reserved for RC6 context */
>> - if (IS_GEN9_LP(dev_priv))
>> - wopcm_size -= BXT_GUC_WOPCM_RC6_RESERVED;
>> + u32 guc_wopcm_top = vma->vm->i915->guc.wopcm.top;
>> + u32 offset = i915_ggtt_offset(vma);
>> - return wopcm_size;
>> + GEM_BUG_ON(!guc_wopcm_top);
>> + GEM_BUG_ON(offset < guc_wopcm_top);
>> + GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP));
>> + return offset;
>> }
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index 607e025..77f619b 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -39,6 +39,12 @@ struct guc_preempt_work {
>> struct intel_engine_cs *engine;
>> };
>> +struct guc_wopcm_partition {
s/guc_wopcm_partition/intel_guc_wopcm ?
>> + u32 offset;
>> + u32 size;
>> + u32 top;
>> +};
>> +
>> /*
>> * Top level structure of GuC. It handles firmware loading and
>> manages client
>> * pool and doorbells. intel_guc owns a i915_guc_client to replace
>> the legacy
>> @@ -46,6 +52,7 @@ struct guc_preempt_work {
>> */
>> struct intel_guc {
>> struct intel_uc_fw fw;
>> + struct guc_wopcm_partition wopcm;
hmm, or if we call it struct intel_wopcm then we could place it in
drm_i915_private near guc/huc.
>> struct intel_guc_log log;
>> struct intel_guc_ct ct;
>> @@ -100,22 +107,6 @@ static inline void intel_guc_notify(struct
>> intel_guc *guc)
>> guc->notify(guc);
>> }
>> -/*
>> - * GuC does not allow any gfx GGTT address that falls into range [0,
>> WOPCM_TOP),
>> - * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top
>> address is
>> - * 512K. In order to exclude 0-512K address space from GGTT, all gfx
>> objects
>> - * used by GuC is pinned with PIN_OFFSET_BIAS along with size of WOPCM.
>> - */
>> -static inline u32 guc_ggtt_offset(struct i915_vma *vma)
>> -{
>> - u32 offset = i915_ggtt_offset(vma);
>> -
>> - GEM_BUG_ON(offset < GUC_WOPCM_TOP);
>> - GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP));
>> -
>> - return offset;
>> -}
>> -
>> void intel_guc_init_early(struct intel_guc *guc);
>> void intel_guc_init_send_regs(struct intel_guc *guc);
>> void intel_guc_init_params(struct intel_guc *guc);
>> @@ -126,6 +117,6 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32
>> rsa_offset);
>> int intel_guc_suspend(struct drm_i915_private *dev_priv);
>> int intel_guc_resume(struct drm_i915_private *dev_priv);
>> struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32
>> size);
>> -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv);
>> +u32 guc_ggtt_offset(struct i915_vma *vma);
> Why are we exporting this function. If we have to export then name
> should be intel_guc_ggtt_offset also
> pass intel_guc struct as param.
> Any function we export for GuC should have name prefixed "intel_guc" and
> intel_guc struct as param.
> suspend/resume functions above are to be updated in upcoming series.
>> #endif
>> diff --git a/drivers/gpu/drm/i915/intel_huc.c
>> b/drivers/gpu/drm/i915/intel_huc.c
>> index 98d1725..0baedc4 100644
>> --- a/drivers/gpu/drm/i915/intel_huc.c
>> +++ b/drivers/gpu/drm/i915/intel_huc.c
>> @@ -202,7 +202,7 @@ void intel_huc_auth(struct intel_huc *huc)
>> return;
>> vma = i915_gem_object_ggtt_pin(huc->fw.obj, NULL, 0, 0,
>> - PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
>> + PIN_OFFSET_BIAS | guc->wopcm.top);
>> if (IS_ERR(vma)) {
>> DRM_ERROR("failed to pin huc fw object %d\n",
>> (int)PTR_ERR(vma));
>> diff --git a/drivers/gpu/drm/i915/intel_uc.c
>> b/drivers/gpu/drm/i915/intel_uc.c
>> index aec2954..4f6fa67 100644
>> --- a/drivers/gpu/drm/i915/intel_uc.c
>> +++ b/drivers/gpu/drm/i915/intel_uc.c
>> @@ -86,10 +86,122 @@ void intel_uc_init_early(struct drm_i915_private
>> *dev_priv)
>> intel_guc_init_early(&dev_priv->guc);
>> }
>> +static inline u32 reserved_wopcm_size(struct drm_i915_private *i915)
> Can we make this wopcm_top_reserved_size.
> there is reserved at the beginning (WOPCM_RESERVED_SIZE). Can name it
> WOPCM_BEGIN_RESERVED_SIZE.
>> +{
>> + /* On BXT, the top of WOPCM is reserved for RC6 context */
>> + if (IS_GEN9_LP(i915))
>> + return BXT_WOPCM_RC6_RESERVED;
>> +
>> + return 0;
>> +}
>> +
>> +static int do_wopcm_partition(struct drm_i915_private *i915, u32
>> offset,
>> + u32 reserved_size)
> This can be named guc_wopcm_partition_init as Chris suggested.
> and name caller as intel_guc_init_wopcm_partition.
> These functions are GuC specific hence they should be defined in
> intel_guc.c.
Hmm, as during wopcm partition we need to know HuC fw size, I'm not 100%
sure that these functions shall be placed in intel_guc.c ... unless we
try to pass fw sizes as params, or
Other option would be to create intel_[guc|uc]_wopcm.c|h
>> +{
>> + struct guc_wopcm_partition *wp = &i915->guc.wopcm;
>> + u32 aligned_offset = ALIGN(offset, WOPCM_OFFSET_ALIGN MENT);
>> +
>> + if (offset >= WOPCM_DEFAULT_SIZE)
>> + return -E2BIG;
>> +
>> + if (reserved_size >= WOPCM_DEFAULT_SIZE)
>> + return -E2BIG;
>> +
>> + if ((aligned_offset + reserved_size) >= WOPCM_DEFAULT_SIZE)
>> + return -E2BIG;
>> +
>> + wp->offset = aligned_offset;
>> + wp->top = WOPCM_DEFAULT_SIZE - wp->offset;
>> + wp->size = wp->top - reserved_size;
>> +
>> + return 0;
>> +}
>> +
>> +static void guc_init_wopcm_partition(struct drm_i915_private *i915)
>> +{
>> + struct intel_uc_fw *guc_fw = &i915->guc.fw;
>> + struct intel_uc_fw *huc_fw = &i915->huc.fw;
>> + struct guc_wopcm_partition *wp = &i915->guc.wopcm;
>> + size_t huc_size, guc_size;
>> + u32 offset;
>> + u32 reserved;
>> + u32 wopcm_base;
>> + u32 delta;
>> + int err;
>> +
>> + if (!i915_modparams.enable_guc_loading)
>> + return;
>> +
>> + /* No need to do partition if failed to fetch both GuC and HuC FW */
>> + if (guc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS &&
>> + huc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS)
>> + goto fail;
>> +
>> + huc_size = 0;
>> + guc_size = 0;
>> + offset = WOPCM_RESERVED_SIZE;
>> + reserved = reserved_wopcm_size(i915);
>> +
>> + if (huc_fw->fetch_status == INTEL_UC_FIRMWARE_SUCCESS)
>> + huc_size = huc_fw->header_size + huc_fw->ucode_size;
maybe to reduce code duplication you can create inline in intel_uc_fw.h
static inline u32 intel_uc_fw_get_size(struct intel_uc_fw *uc_fw)
{
if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS)
return 0;
return uc_fw->header_size + uc_fw->ucode_size;
}
>> +
>> + err = do_wopcm_partition(i915, offset + huc_size, reserved);
>> + if (err) {
>> + if (!huc_size)
>> + goto fail;
>> +
>> + /* Partition without loading HuC FW. */
>> + err = do_wopcm_partition(i915, offset, reserved);
>> + if (err)
>> + goto fail;
>> + }
>> +
>> + /*
>> + * Check hardware restriction on Gen9
>> + * GuC WOPCM size is at least 4 bytes larger than GuC WOPCM base due
>> + * to hardware limitation on Gen9.
>> + */
>> + if (IS_GEN9(i915)) {
>> + wopcm_base = wp->offset + GEN9_GUC_WOPCM_OFFSET;
>> + if (unlikely(wopcm_base > wp->size))
>> + goto fail;
>> +
>> + delta = wp->size - wopcm_base;
>> + if (unlikely(delta < GEN9_GUC_WOPCM_DELTA))
>> + goto fail;
>> + }
>> +
>> + if (guc_fw->fetch_status == INTEL_UC_FIRMWARE_SUCCESS) {
>> + guc_size = guc_fw->header_size + guc_fw->ucode_size;
>> + /* Need 8K stack for GuC */
>> + guc_size += GUC_WOPCM_STACK_RESERVED;
>> + }
>> +
>> + if (guc_size > wp->size)
>> + goto fail;
>> +
>> + DRM_DEBUG_DRIVER("GuC WOPCM offset %dKB, size %dKB, top %dKB\n",
>> + wp->offset >> 10, wp->size >> 10, wp->top >> 10);
>> +
>> + return;
>> +
>> +fail:
>> + DRM_ERROR("WOPCM partitioning failed. Falling back to execlist
>> mode\n");
>> +
>> + intel_uc_fini_fw(i915);
>> +
>> + /* GuC submission won't work without valid GuC WOPCM partition */
>> + if (i915_modparams.enable_guc_submission)
>> + i915_modparams.enable_guc_submission = 0;
>> +
>> + i915_modparams.enable_guc_loading = 0;
>> +}
>> +
>> void intel_uc_init_fw(struct drm_i915_private *dev_priv)
>> {
>> intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw);
>> intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw);
>> + guc_init_wopcm_partition(dev_priv);
> Create intel_uc_init_wopcm_partition(dev_priv) and call
> intel_guc_init_wopcm_partition(guc) from there.
hmm, I'm not sure what benefit you expect from this call chain ?
>> }
>> void intel_uc_fini_fw(struct drm_i915_private *dev_priv)
>> @@ -174,9 +286,9 @@ int intel_uc_init_hw(struct drm_i915_private
>> *dev_priv)
>> }
>> /* init WOPCM */
>> - I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
>> + I915_WRITE(GUC_WOPCM_SIZE, guc->wopcm.size);
>> I915_WRITE(DMA_GUC_WOPCM_OFFSET,
>> - GUC_WOPCM_OFFSET_VALUE | HUC_LOADING_AGENT_GUC);
>> + guc->wopcm.offset | HUC_LOADING_AGENT_GUC);
>> /* WaEnableuKernelHeaderValidFix:skl */
>> /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
>> diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c
>> b/drivers/gpu/drm/i915/intel_uc_fw.c
>> index 4bc82d3..34db2b1 100644
>> --- a/drivers/gpu/drm/i915/intel_uc_fw.c
>> +++ b/drivers/gpu/drm/i915/intel_uc_fw.c
>> @@ -95,9 +95,13 @@ void intel_uc_fw_fetch(struct drm_i915_private
>> *dev_priv,
>> uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size;
>> uc_fw->ucode_size = (css->size_dw - css->header_size_dw) *
>> sizeof(u32);
>> - /* Header and uCode will be loaded to WOPCM */
>> + /*
>> + * Header and uCode will be loaded to WOPCM
>> + * Only check the size against the overall available WOPCM here. Will
>> + * continue to check the size during WOPCM partition calculation.
>> + */
>> size = uc_fw->header_size + uc_fw->ucode_size;
>> - if (size > intel_guc_wopcm_size(dev_priv)) {
>> + if (size > WOPCM_DEFAULT_SIZE) {
>> DRM_WARN("%s: Firmware is too large to fit in WOPCM\n",
>> intel_uc_fw_type_repr(uc_fw->type));
>> err = -E2BIG;
>> @@ -207,6 +211,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
>> int (*xfer)(struct intel_uc_fw *uc_fw,
>> struct i915_vma *vma))
>> {
>> + struct drm_i915_private *i915 = to_i915(uc_fw->obj->base.dev);
>> struct i915_vma *vma;
>> int err;
>> @@ -230,7 +235,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
>> }
>> vma = i915_gem_object_ggtt_pin(uc_fw->obj, NULL, 0, 0,
>> - PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
>> + PIN_OFFSET_BIAS | i915->guc.wopcm.top);
>> if (IS_ERR(vma)) {
>> err = PTR_ERR(vma);
>> DRM_DEBUG_DRIVER("%s fw ggtt-pin err=%d\n",
More information about the Intel-gfx
mailing list