[PATCH v3 1/7] drm/i915/uc: perma-pin firmwares
John Harrison
john.c.harrison at intel.com
Tue May 30 22:52:52 UTC 2023
On 5/26/2023 17:52, Daniele Ceraolo Spurio wrote:
> Now that each FW has its own reserved area, we can keep them always
> pinned and skip the pin/unpin dance on reset. This will make things
> easier for the 2-step HuC authentication, which requires the FW to be
> pinned in GGTT after the xfer is completed.
> Since the vma is now valid for a long time and not just for the quick
> pin-load-unpin dance, the name "dummy" is no longer appropriare and has
> been replaced with vma_res. All the functions have also been updated to
> operate on vma_res for consistency.
> Given that we pin the vma behind the allocator's back (which is ok
> because we do the pinning in an area that was previously reserved for
> thus purpose), we do need to explicitly re-pin on resume because the
> automated helper won't cover us.
>
> v2: better comments and commit message, s/dummy/vma_res/
>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Alan Previn <alan.previn.teres.alexis at intel.com>
> Cc: John Harrison <John.C.Harrison at Intel.com>
Reviewed-by: John Harrison <John.C.Harrison at Intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_ggtt.c | 3 ++
> drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c | 7 +++-
> drivers/gpu/drm/i915/gt/uc/intel_guc.c | 2 +-
> drivers/gpu/drm/i915/gt/uc/intel_huc.c | 2 +-
> drivers/gpu/drm/i915/gt/uc/intel_uc.c | 8 ++++
> drivers/gpu/drm/i915/gt/uc/intel_uc.h | 2 +
> drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 50 ++++++++++++++++-------
> drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 22 ++++++----
> 8 files changed, 71 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 2a7942fac798..f4e8aa8246e8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -1326,6 +1326,9 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
> ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start,
> ggtt->error_capture.size);
>
> + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
> + intel_uc_resume_mappings(>->uc);
> +
> ggtt->invalidate(ggtt);
>
> if (flush)
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
> index fb0984f875f9..b26f493f86fa 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
> @@ -90,7 +90,12 @@ void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc)
> {
> struct intel_gt *gt = gsc_uc_to_gt(gsc);
>
> - intel_uc_fw_init_early(&gsc->fw, INTEL_UC_FW_TYPE_GSC);
> + /*
> + * GSC FW needs to be copied to a dedicated memory allocations for
> + * loading (see gsc->local), so we don't need to GGTT map the FW image
> + * itself into GGTT.
> + */
> + intel_uc_fw_init_early(&gsc->fw, INTEL_UC_FW_TYPE_GSC, false);
> INIT_WORK(&gsc->work, gsc_work);
>
> /* we can arrive here from i915_driver_early_probe for primary
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> index c9f20385f6a0..2eb891b270ae 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> @@ -164,7 +164,7 @@ void intel_guc_init_early(struct intel_guc *guc)
> struct intel_gt *gt = guc_to_gt(guc);
> struct drm_i915_private *i915 = gt->i915;
>
> - intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC);
> + intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC, true);
> intel_guc_ct_init_early(&guc->ct);
> intel_guc_log_init_early(&guc->log);
> intel_guc_submission_init_early(guc);
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> index 04724ff56ded..268e036f8f28 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> @@ -276,7 +276,7 @@ void intel_huc_init_early(struct intel_huc *huc)
> struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
> struct intel_gt *gt = huc_to_gt(huc);
>
> - intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC);
> + intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC, true);
>
> /*
> * we always init the fence as already completed, even if HuC is not
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index c8b9cbb7ba3a..1e7f5cc9d550 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -700,6 +700,12 @@ void intel_uc_suspend(struct intel_uc *uc)
> }
> }
>
> +static void __uc_resume_mappings(struct intel_uc *uc)
> +{
> + intel_uc_fw_resume_mapping(&uc->guc.fw);
> + intel_uc_fw_resume_mapping(&uc->huc.fw);
> +}
> +
> static int __uc_resume(struct intel_uc *uc, bool enable_communication)
> {
> struct intel_guc *guc = &uc->guc;
> @@ -767,4 +773,6 @@ static const struct intel_uc_ops uc_ops_on = {
>
> .init_hw = __uc_init_hw,
> .fini_hw = __uc_fini_hw,
> +
> + .resume_mappings = __uc_resume_mappings,
> };
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> index d585524d94de..014bb7d83689 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> @@ -24,6 +24,7 @@ struct intel_uc_ops {
> void (*fini)(struct intel_uc *uc);
> int (*init_hw)(struct intel_uc *uc);
> void (*fini_hw)(struct intel_uc *uc);
> + void (*resume_mappings)(struct intel_uc *uc);
> };
>
> struct intel_uc {
> @@ -114,6 +115,7 @@ intel_uc_ops_function(init, init, int, 0);
> intel_uc_ops_function(fini, fini, void, );
> intel_uc_ops_function(init_hw, init_hw, int, 0);
> intel_uc_ops_function(fini_hw, fini_hw, void, );
> +intel_uc_ops_function(resume_mappings, resume_mappings, void, );
> #undef intel_uc_ops_function
>
> #endif
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> index dc5c96c503a9..31776c279f32 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> @@ -471,12 +471,14 @@ static void __uc_fw_user_override(struct drm_i915_private *i915, struct intel_uc
> * intel_uc_fw_init_early - initialize the uC object and select the firmware
> * @uc_fw: uC firmware
> * @type: type of uC
> + * @needs_ggtt_mapping: whether the FW needs to be GGTT mapped for loading
> *
> * Initialize the state of our uC object and relevant tracking and select the
> * firmware to fetch and load.
> */
> void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
> - enum intel_uc_fw_type type)
> + enum intel_uc_fw_type type,
> + bool needs_ggtt_mapping)
> {
> struct intel_gt *gt = ____uc_fw_to_gt(uc_fw, type);
> struct drm_i915_private *i915 = gt->i915;
> @@ -490,6 +492,7 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
> GEM_BUG_ON(uc_fw->file_selected.path);
>
> uc_fw->type = type;
> + uc_fw->needs_ggtt_mapping = needs_ggtt_mapping;
>
> if (HAS_GT_UC(i915)) {
> if (!validate_fw_table_type(i915, type)) {
> @@ -755,7 +758,7 @@ static int try_firmware_load(struct intel_uc_fw *uc_fw, const struct firmware **
> if (err)
> return err;
>
> - if ((*fw)->size > INTEL_UC_RSVD_GGTT_PER_FW) {
> + if (uc_fw->needs_ggtt_mapping && (*fw)->size > INTEL_UC_RSVD_GGTT_PER_FW) {
> gt_err(gt, "%s firmware %s: size (%zuKB) exceeds max supported size (%uKB)\n",
> intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
> (*fw)->size / SZ_1K, INTEL_UC_RSVD_GGTT_PER_FW / SZ_1K);
> @@ -940,29 +943,32 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
> {
> struct drm_i915_gem_object *obj = uc_fw->obj;
> struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
> - struct i915_vma_resource *dummy = &uc_fw->dummy;
> + struct i915_vma_resource *vma_res = &uc_fw->vma_res;
> u32 pte_flags = 0;
>
> - dummy->start = uc_fw_ggtt_offset(uc_fw);
> - dummy->node_size = obj->base.size;
> - dummy->bi.pages = obj->mm.pages;
> + if (!uc_fw->needs_ggtt_mapping)
> + return;
> +
> + vma_res->start = uc_fw_ggtt_offset(uc_fw);
> + vma_res->node_size = obj->base.size;
> + vma_res->bi.pages = obj->mm.pages;
>
> GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
>
> /* uc_fw->obj cache domains were not controlled across suspend */
> if (i915_gem_object_has_struct_page(obj))
> - drm_clflush_sg(dummy->bi.pages);
> + drm_clflush_sg(vma_res->bi.pages);
>
> if (i915_gem_object_is_lmem(obj))
> pte_flags |= PTE_LM;
>
> if (ggtt->vm.raw_insert_entries)
> - ggtt->vm.raw_insert_entries(&ggtt->vm, dummy,
> + ggtt->vm.raw_insert_entries(&ggtt->vm, vma_res,
> i915_gem_get_pat_index(ggtt->vm.i915,
> I915_CACHE_NONE),
> pte_flags);
> else
> - ggtt->vm.insert_entries(&ggtt->vm, dummy,
> + ggtt->vm.insert_entries(&ggtt->vm, vma_res,
> i915_gem_get_pat_index(ggtt->vm.i915,
> I915_CACHE_NONE),
> pte_flags);
> @@ -970,11 +976,13 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
>
> static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw)
> {
> - struct drm_i915_gem_object *obj = uc_fw->obj;
> struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
> - u64 start = uc_fw_ggtt_offset(uc_fw);
> + struct i915_vma_resource *vma_res = &uc_fw->vma_res;
> +
> + if (!vma_res->node_size)
> + return;
>
> - ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size);
> + ggtt->vm.clear_range(&ggtt->vm, vma_res->start, vma_res->node_size);
> }
>
> static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
> @@ -991,7 +999,7 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
> intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
>
> /* Set the source address for the uCode */
> - offset = uc_fw_ggtt_offset(uc_fw);
> + offset = uc_fw->vma_res.start;
> GEM_BUG_ON(upper_32_bits(offset) & 0xFFFF0000);
> intel_uncore_write_fw(uncore, DMA_ADDR_0_LOW, lower_32_bits(offset));
> intel_uncore_write_fw(uncore, DMA_ADDR_0_HIGH, upper_32_bits(offset));
> @@ -1065,9 +1073,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
> return -ENOEXEC;
>
> /* Call custom loader */
> - uc_fw_bind_ggtt(uc_fw);
> err = uc_fw_xfer(uc_fw, dst_offset, dma_flags);
> - uc_fw_unbind_ggtt(uc_fw);
> if (err)
> goto fail;
>
> @@ -1171,6 +1177,8 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
> goto out_unpin;
> }
>
> + uc_fw_bind_ggtt(uc_fw);
> +
> return 0;
>
> out_unpin:
> @@ -1181,6 +1189,7 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
>
> void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
> {
> + uc_fw_unbind_ggtt(uc_fw);
> uc_fw_rsa_data_destroy(uc_fw);
>
> if (i915_gem_object_has_pinned_pages(uc_fw->obj))
> @@ -1189,6 +1198,17 @@ void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
> intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE);
> }
>
> +void intel_uc_fw_resume_mapping(struct intel_uc_fw *uc_fw)
> +{
> + if (!intel_uc_fw_is_available(uc_fw))
> + return;
> +
> + if (!i915_gem_object_has_pinned_pages(uc_fw->obj))
> + return;
> +
> + uc_fw_bind_ggtt(uc_fw);
> +}
> +
> /**
> * intel_uc_fw_cleanup_fetch - cleanup uC firmware
> * @uc_fw: uC firmware
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
> index 6ba00e6b3975..2be9470eb712 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
> @@ -99,13 +99,19 @@ struct intel_uc_fw {
> struct drm_i915_gem_object *obj;
>
> /**
> - * @dummy: A vma used in binding the uc fw to ggtt. We can't define this
> - * vma on the stack as it can lead to a stack overflow, so we define it
> - * here. Safe to have 1 copy per uc fw because the binding is single
> - * threaded as it done during driver load (inherently single threaded)
> - * or during a GT reset (mutex guarantees single threaded).
> + * @needs_ggtt_mapping: indicates whether the fw object needs to be
> + * pinned to ggtt. If true, the fw is pinned at init time and unpinned
> + * during driver unload.
> */
> - struct i915_vma_resource dummy;
> + bool needs_ggtt_mapping;
> +
> + /**
> + * @vma_res: A vma resource used in binding the uc fw to ggtt. The fw is
> + * pinned in a reserved area of the ggtt (above the maximum address
> + * usable by GuC); therefore, we can't use the normal vma functions to
> + * do the pinning and we instead use this resource to do so.
> + */
> + struct i915_vma_resource vma_res;
> struct i915_vma *rsa_data;
>
> u32 rsa_size;
> @@ -282,12 +288,14 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
> }
>
> void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
> - enum intel_uc_fw_type type);
> + enum intel_uc_fw_type type,
> + bool needs_ggtt_mapping);
> int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw);
> void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw);
> int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags);
> int intel_uc_fw_init(struct intel_uc_fw *uc_fw);
> void intel_uc_fw_fini(struct intel_uc_fw *uc_fw);
> +void intel_uc_fw_resume_mapping(struct intel_uc_fw *uc_fw);
> size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len);
> int intel_uc_fw_mark_load_failed(struct intel_uc_fw *uc_fw, int err);
> void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p);
More information about the dri-devel
mailing list