[Intel-gfx] [PATCH 1/2] drm/i915/uc: Support for version reduced and multiple firmware files

Ceraolo Spurio, Daniele daniele.ceraolospurio at intel.com
Thu Aug 25 04:49:56 UTC 2022



On 8/16/2022 1:28 PM, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
>
> There was a misunderstanding in how firmware file compatibility should
> be managed within i915. This has been clarified as:
>    i915 must support all existing firmware releases forever
>    new minor firmware releases should replace prior versions
>    only backwards compatibility breaking releases should be a new file
>
> Hence this patch cleans up the single fallback file support that was
> added as a quick fix emergency effort. That is now removed in
> preference to supporting arbitrary numbers of firmware files per
> platform as normal.
>
> The patch also adds support for having GuC firmwrae files that are
> named by major version only (because the major version indicates
> backwards breaking changes that affect the KMD) and for having HuC
> firmware files with no version number at all (because the KMD has no
> interface requirements with the HuC).
>
> For GuC, the driver will report via dmesg if the found file is older than
> expected. For HuC, the KMD will no longer require updating for any new
> HuC release so will not be able to report what the latest expected
> version is.
>
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> ---
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  10 +-
>   drivers/gpu/drm/i915/gt/uc/intel_uc.c         |   4 +-
>   drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c      | 396 +++++++++++-------
>   drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h      |  33 +-
>   drivers/gpu/drm/i915/i915_gpu_error.c         |  16 +-
>   5 files changed, 275 insertions(+), 184 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 0d17da77e7872..d1715971fdd79 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -1868,7 +1868,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
>   	if (guc->submission_initialized)
>   		return 0;
>   
> -	if (guc->fw.major_ver_found < 70) {
> +	if (guc->fw.file_found.major_ver < 70) {
>   		ret = guc_lrc_desc_pool_create_v69(guc);
>   		if (ret)
>   			return ret;
> @@ -2303,7 +2303,7 @@ static int register_context(struct intel_context *ce, bool loop)
>   	GEM_BUG_ON(intel_context_is_child(ce));
>   	trace_intel_context_register(ce);
>   
> -	if (guc->fw.major_ver_found >= 70)
> +	if (guc->fw.file_found.major_ver >= 70)
>   		ret = register_context_v70(guc, ce, loop);
>   	else
>   		ret = register_context_v69(guc, ce, loop);
> @@ -2315,7 +2315,7 @@ static int register_context(struct intel_context *ce, bool loop)
>   		set_context_registered(ce);
>   		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
>   
> -		if (guc->fw.major_ver_found >= 70)
> +		if (guc->fw.file_found.major_ver >= 70)
>   			guc_context_policy_init_v70(ce, loop);
>   	}
>   
> @@ -2921,7 +2921,7 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
>   						 u16 guc_id,
>   						 u32 preemption_timeout)
>   {
> -	if (guc->fw.major_ver_found >= 70) {
> +	if (guc->fw.file_found.major_ver >= 70) {
>   		struct context_policy policy;
>   
>   		__guc_context_policy_start_klv(&policy, guc_id);
> @@ -3186,7 +3186,7 @@ static int guc_context_alloc(struct intel_context *ce)
>   static void __guc_context_set_prio(struct intel_guc *guc,
>   				   struct intel_context *ce)
>   {
> -	if (guc->fw.major_ver_found >= 70) {
> +	if (guc->fw.file_found.major_ver >= 70) {
>   		struct context_policy policy;
>   
>   		__guc_context_policy_start_klv(&policy, ce->guc_id.id);
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index f2e7c82985efd..0697128cc3362 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -436,8 +436,8 @@ static void print_fw_ver(struct intel_uc *uc, struct intel_uc_fw *fw)
>   	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
>   
>   	drm_info(&i915->drm, "%s firmware %s version %u.%u\n",
> -		 intel_uc_fw_type_repr(fw->type), fw->path,
> -		 fw->major_ver_found, fw->minor_ver_found);
> +		 intel_uc_fw_type_repr(fw->type), fw->file_found.path,
> +		 fw->file_found.major_ver, fw->file_found.minor_ver);
>   }
>   
>   static int __uc_init_hw(struct intel_uc *uc)
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> index 58547292efa0a..eb3a15f0fa479 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> @@ -41,7 +41,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
>   		"%s firmware -> %s\n",
>   		intel_uc_fw_type_repr(uc_fw->type),
>   		status == INTEL_UC_FIRMWARE_SELECTED ?
> -		uc_fw->path : intel_uc_fw_status_repr(status));
> +		uc_fw->file_found.path : intel_uc_fw_status_repr(status));
>   }
>   #endif
>   
> @@ -52,83 +52,113 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
>    * Note that RKL and ADL-S have the same GuC/HuC device ID's and use the same
>    * firmware as TGL.
>    */
> -#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
> -	fw_def(DG2,          0, guc_def(dg2,  70, 4, 1)) \
> -	fw_def(ALDERLAKE_P,  0, guc_def(adlp, 70, 1, 1)) \
> -	fw_def(ALDERLAKE_S,  0, guc_def(tgl,  70, 1, 1)) \
> -	fw_def(DG1,          0, guc_def(dg1,  70, 1, 1)) \
> -	fw_def(ROCKETLAKE,   0, guc_def(tgl,  70, 1, 1)) \
> -	fw_def(TIGERLAKE,    0, guc_def(tgl,  70, 1, 1)) \
> -	fw_def(JASPERLAKE,   0, guc_def(ehl,  70, 1, 1)) \
> -	fw_def(ELKHARTLAKE,  0, guc_def(ehl,  70, 1, 1)) \
> -	fw_def(ICELAKE,      0, guc_def(icl,  70, 1, 1)) \
> -	fw_def(COMETLAKE,    5, guc_def(cml,  70, 1, 1)) \
> -	fw_def(COMETLAKE,    0, guc_def(kbl,  70, 1, 1)) \
> -	fw_def(COFFEELAKE,   0, guc_def(kbl,  70, 1, 1)) \
> -	fw_def(GEMINILAKE,   0, guc_def(glk,  70, 1, 1)) \
> -	fw_def(KABYLAKE,     0, guc_def(kbl,  70, 1, 1)) \
> -	fw_def(BROXTON,      0, guc_def(bxt,  70, 1, 1)) \
> -	fw_def(SKYLAKE,      0, guc_def(skl,  70, 1, 1))
> -
> -#define INTEL_GUC_FIRMWARE_DEFS_FALLBACK(fw_def, guc_def) \
> -	fw_def(ALDERLAKE_P,  0, guc_def(adlp, 69, 0, 3)) \
> -	fw_def(ALDERLAKE_S,  0, guc_def(tgl,  69, 0, 3))
> -
> -#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
> -	fw_def(ALDERLAKE_P,  0, huc_def(tgl,  7, 9, 3)) \
> -	fw_def(ALDERLAKE_S,  0, huc_def(tgl,  7, 9, 3)) \
> -	fw_def(DG1,          0, huc_def(dg1,  7, 9, 3)) \
> -	fw_def(ROCKETLAKE,   0, huc_def(tgl,  7, 9, 3)) \
> -	fw_def(TIGERLAKE,    0, huc_def(tgl,  7, 9, 3)) \
> -	fw_def(JASPERLAKE,   0, huc_def(ehl,  9, 0, 0)) \
> -	fw_def(ELKHARTLAKE,  0, huc_def(ehl,  9, 0, 0)) \
> -	fw_def(ICELAKE,      0, huc_def(icl,  9, 0, 0)) \
> -	fw_def(COMETLAKE,    5, huc_def(cml,  4, 0, 0)) \
> -	fw_def(COMETLAKE,    0, huc_def(kbl,  4, 0, 0)) \
> -	fw_def(COFFEELAKE,   0, huc_def(kbl,  4, 0, 0)) \
> -	fw_def(GEMINILAKE,   0, huc_def(glk,  4, 0, 0)) \
> -	fw_def(KABYLAKE,     0, huc_def(kbl,  4, 0, 0)) \
> -	fw_def(BROXTON,      0, huc_def(bxt,  2, 0, 0)) \
> -	fw_def(SKYLAKE,      0, huc_def(skl,  2, 0, 0))
> -
> -#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \
> +#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_maj, guc_mmp) \
> +	fw_def(ALDERLAKE_P,  0, guc_mmp(adlp, 70, 1, 1)) \
> +	fw_def(ALDERLAKE_P,  0, guc_mmp(adlp, 69, 0, 3)) \
> +	fw_def(ALDERLAKE_S,  0, guc_mmp(tgl,  70, 1, 1)) \
> +	fw_def(DG1,          0, guc_mmp(dg1,  70, 1, 1)) \
> +	fw_def(ROCKETLAKE,   0, guc_mmp(tgl,  70, 1, 1)) \
> +	fw_def(TIGERLAKE,    0, guc_mmp(tgl,  70, 1, 1)) \
> +	fw_def(JASPERLAKE,   0, guc_mmp(ehl,  70, 1, 1)) \
> +	fw_def(ELKHARTLAKE,  0, guc_mmp(ehl,  70, 1, 1)) \
> +	fw_def(ICELAKE,      0, guc_mmp(icl,  70, 1, 1)) \
> +	fw_def(COMETLAKE,    5, guc_mmp(cml,  70, 1, 1)) \
> +	fw_def(COMETLAKE,    0, guc_mmp(kbl,  70, 1, 1)) \
> +	fw_def(COFFEELAKE,   0, guc_mmp(kbl,  70, 1, 1)) \
> +	fw_def(GEMINILAKE,   0, guc_mmp(glk,  70, 1, 1)) \
> +	fw_def(KABYLAKE,     0, guc_mmp(kbl,  70, 1, 1)) \
> +	fw_def(BROXTON,      0, guc_mmp(bxt,  70, 1, 1)) \
> +	fw_def(SKYLAKE,      0, guc_mmp(skl,  70, 1, 1))
> +
> +#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp) \
> +	fw_def(ALDERLAKE_P,  0, huc_mmp(tgl,  7, 9, 3)) \
> +	fw_def(ALDERLAKE_S,  0, huc_mmp(tgl,  7, 9, 3)) \
> +	fw_def(DG1,          0, huc_mmp(dg1,  7, 9, 3)) \
> +	fw_def(ROCKETLAKE,   0, huc_mmp(tgl,  7, 9, 3)) \
> +	fw_def(TIGERLAKE,    0, huc_mmp(tgl,  7, 9, 3)) \
> +	fw_def(JASPERLAKE,   0, huc_mmp(ehl,  9, 0, 0)) \
> +	fw_def(ELKHARTLAKE,  0, huc_mmp(ehl,  9, 0, 0)) \
> +	fw_def(ICELAKE,      0, huc_mmp(icl,  9, 0, 0)) \
> +	fw_def(COMETLAKE,    5, huc_mmp(cml,  4, 0, 0)) \
> +	fw_def(COMETLAKE,    0, huc_mmp(kbl,  4, 0, 0)) \
> +	fw_def(COFFEELAKE,   0, huc_mmp(kbl,  4, 0, 0)) \
> +	fw_def(GEMINILAKE,   0, huc_mmp(glk,  4, 0, 0)) \
> +	fw_def(KABYLAKE,     0, huc_mmp(kbl,  4, 0, 0)) \
> +	fw_def(BROXTON,      0, huc_mmp(bxt,  2, 0, 0)) \
> +	fw_def(SKYLAKE,      0, huc_mmp(skl,  2, 0, 0))
> +
> +#define __MAKE_UC_FW_PATH_BLANK(prefix_, name_) \
> +	"i915/" \
> +	__stringify(prefix_) name_ ".bin"
> +
> +#define __MAKE_UC_FW_PATH_MAJOR(prefix_, name_, major_) \
> +	"i915/" \
> +	__stringify(prefix_) name_ \
> +	__stringify(major_) ".bin"
> +
> +#define __MAKE_UC_FW_PATH_MMP(prefix_, name_, major_, minor_, patch_) \
>   	"i915/" \
>   	__stringify(prefix_) name_ \
>   	__stringify(major_) "." \
>   	__stringify(minor_) "." \
>   	__stringify(patch_) ".bin"

This needs a comment to explain all these different path types, 
something like: "Following the Linux Firmware Guidelines (see relevant 
documentation for details), we expect GuC binaries to be identified only 
by platform+major version, while HuC binaries don't have an interface 
with i915 so they're only identified by platform. However, for backward 
compatibility reasons, we also need to keep supporting older firmwares 
using the full platform_major.minor.patch nomenclature." .

>   
> -#define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \
> -	__MAKE_UC_FW_PATH(prefix_, "_guc_", major_, minor_, patch_)
> +#define MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_) \
> +	__MAKE_UC_FW_PATH_MAJOR(prefix_, "_guc_", major_)
> +
> +#define MAKE_GUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \
> +	__MAKE_UC_FW_PATH_MMP(prefix_, "_guc_", major_, minor_, patch_)
>   
> -#define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \
> -	__MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_)
> +#define MAKE_HUC_FW_PATH_BLANK(prefix_) \
> +	__MAKE_UC_FW_PATH_BLANK(prefix_, "_huc")
> +
> +#define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \
> +	__MAKE_UC_FW_PATH_MMP(prefix_, "_huc_", major_, minor_, patch_)
>   
>   /* All blobs need to be declared via MODULE_FIRMWARE() */
>   #define INTEL_UC_MODULE_FW(platform_, revid_, uc_) \
>   	MODULE_FIRMWARE(uc_);
>   
> -INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
> -INTEL_GUC_FIRMWARE_DEFS_FALLBACK(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
> -INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH)
> +INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH_MAJOR, MAKE_GUC_FW_PATH_MMP)
> +INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP)
>   
>   /* The below structs and macros are used to iterate across the list of blobs */
>   struct __packed uc_fw_blob {
> +	const char *path;
> +	bool legacy;
>   	u8 major;
>   	u8 minor;
> -	const char *path;
> +	u8 patch;
>   };
>   
> -#define UC_FW_BLOB(major_, minor_, path_) \
> -	{ .major = major_, .minor = minor_, .path = path_ }
> +#define UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
> +	.major = major_, \
> +	.minor = minor_, \
> +	.patch = patch_, \
> +	.path = path_,
> +
> +#define UC_FW_BLOB_NEW(major_, minor_, patch_, path_) \
> +	{ UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
> +	  .legacy = false }
> +
> +#define UC_FW_BLOB_OLD(major_, minor_, patch_, path_) \
> +	{ UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
> +	  .legacy = true }
>   
> -#define GUC_FW_BLOB(prefix_, major_, minor_, patch_) \
> -	UC_FW_BLOB(major_, minor_, \
> -		   MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_))
> +#define GUC_FW_BLOB(prefix_, major_, minor_) \
> +	UC_FW_BLOB_NEW(major_, minor_, 0, \
> +		       MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_))

All these macros are a bit confusing, but AFAIU you want to record the 
expected minor on the i915 side but not encode it in the fetch path and 
only use it for logging purposes. Maybe add a comment to explain this? 
otherwise it is just confusing that we say we only want major on one 
side and we include the minor on the other.

>   
> -#define HUC_FW_BLOB(prefix_, major_, minor_, bld_num_) \
> -	UC_FW_BLOB(major_, minor_, \
> -		   MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_))
> +#define GUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \
> +	UC_FW_BLOB_OLD(major_, minor_, patch_, \
> +		       MAKE_GUC_FW_PATH_MMP(prefix_, major_, minor_, patch_))
> +
> +#define HUC_FW_BLOB(prefix_) \
> +	UC_FW_BLOB_NEW(0, 0, 0, MAKE_HUC_FW_PATH_BLANK(prefix_))
> +
> +#define HUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \
> +	UC_FW_BLOB_OLD(major_, minor_, patch_, \
> +		       MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_))
>   
>   struct __packed uc_fw_platform_requirement {
>   	enum intel_platform p;
> @@ -152,13 +182,10 @@ static void
>   __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
>   {
>   	static const struct uc_fw_platform_requirement blobs_guc[] = {
> -		INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB)
> -	};
> -	static const struct uc_fw_platform_requirement blobs_guc_fallback[] = {
> -		INTEL_GUC_FIRMWARE_DEFS_FALLBACK(MAKE_FW_LIST, GUC_FW_BLOB)
> +		INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, GUC_FW_BLOB_MMP)
>   	};
>   	static const struct uc_fw_platform_requirement blobs_huc[] = {
> -		INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB)
> +		INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP)
>   	};
>   	static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = {
>   		[INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
> @@ -184,49 +211,89 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
>   	fw_count = blobs_all[uc_fw->type].count;
>   
>   	for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) {
> -		if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) {
> -			const struct uc_fw_blob *blob = &fw_blobs[i].blob;
> -			uc_fw->path = blob->path;
> -			uc_fw->wanted_path = blob->path;
> -			uc_fw->major_ver_wanted = blob->major;
> -			uc_fw->minor_ver_wanted = blob->minor;
> -			break;
> -		}
> -	}
> +		const struct uc_fw_blob *blob = &fw_blobs[i].blob;
>   
> -	if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) {
> -		const struct uc_fw_platform_requirement *blobs = blobs_guc_fallback;
> -		u32 count = ARRAY_SIZE(blobs_guc_fallback);
> +		if (p != fw_blobs[i].p)
> +			continue;
>   
> -		for (i = 0; i < count && p <= blobs[i].p; i++) {
> -			if (p == blobs[i].p && rev >= blobs[i].rev) {
> -				const struct uc_fw_blob *blob = &blobs[i].blob;
> +		if (rev < fw_blobs[i].rev)
> +			continue;
>   
> -				uc_fw->fallback.path = blob->path;
> -				uc_fw->fallback.major_ver = blob->major;
> -				uc_fw->fallback.minor_ver = blob->minor;
> -				break;
> -			}
> -		}
> +		if (uc_fw->file_wanted.path &&
> +		    uc_fw->file_wanted.major_ver == blob->major)
> +			continue;

This needs a comment to explain that this function can be called 
multiple times if the first fw we look for is not found on disk. Also, 
how does this work with new vs legacy blob with the same version? e.g. 
if new style dg2_guc_70.bin is not on disk, but old style dg2_guc_70.4.1 
is, the major is 70 in both cases. Or am I missing something?

> +
> +		uc_fw->file_found.path = blob->path;
> +		uc_fw->file_wanted.path = blob->path;
> +		uc_fw->file_wanted.major_ver = blob->major;
> +		uc_fw->file_wanted.minor_ver = blob->minor;

The naming of "wanted" vs "found" here is a bit misleading, because 
found is assigned here before we even attempt the fetch, so  we haven't 
actually found it. Looks like you're before the fetch you're using this 
mainly for printing, so can't you use file_wanted instead and only 
assign file_found after the fetch?

> +		break;
>   	}
>   
>   	/* make sure the list is ordered as expected */
>   	if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) {
>   		for (i = 1; i < fw_count; i++) {
> +			/* Next platform is good: */
>   			if (fw_blobs[i].p < fw_blobs[i - 1].p)
>   				continue;
>   
> +			/* Next platform revision is good: */
>   			if (fw_blobs[i].p == fw_blobs[i - 1].p &&
>   			    fw_blobs[i].rev < fw_blobs[i - 1].rev)
>   				continue;
>   
> -			drm_err(&i915->drm, "Invalid FW blob order: %s r%u comes before %s r%u\n",
> -				intel_platform_name(fw_blobs[i - 1].p),
> -				fw_blobs[i - 1].rev,
> -				intel_platform_name(fw_blobs[i].p),
> -				fw_blobs[i].rev);
> +			/* Platform/revision must be in order: */
> +			if (fw_blobs[i].p != fw_blobs[i - 1].p ||
> +			    fw_blobs[i].rev != fw_blobs[i - 1].rev)
> +				goto bad;
>   
> -			uc_fw->path = NULL;
> +			/* Next major version is good: */
> +			if (fw_blobs[i].blob.major < fw_blobs[i - 1].blob.major)
> +				continue;
> +
> +			/* New must be before leggacy: */

typo legacy

> +			if (!fw_blobs[i].blob.legacy && fw_blobs[i - 1].blob.legacy)
> +				goto bad;
> +
> +			/* New to legacy also means 0.0 to X.Y, or X.0 to X.Y */
> +			if (fw_blobs[i].blob.legacy && !fw_blobs[i - 1].blob.legacy) {
> +				if (!fw_blobs[i - 1].blob.major)

I'd add a comment here to remind that we don't track the versions with HuC

> +					continue;
> +
> +				if (fw_blobs[i].blob.major == fw_blobs[i - 1].blob.major)
> +					continue;
> +			}
> +
> +			/* Major versions must be in order: */
> +			if (fw_blobs[i].blob.major != fw_blobs[i - 1].blob.major)
> +				goto bad;
> +
> +			/* Next minor version is good: */
> +			if (fw_blobs[i].blob.minor < fw_blobs[i - 1].blob.minor)
> +				continue;
> +
> +			/* Minor versions must be in order: */
> +			if (fw_blobs[i].blob.minor != fw_blobs[i - 1].blob.minor)
> +				goto bad;
> +
> +			/* Patch versions must be in order: */
> +			if (fw_blobs[i].blob.patch <= fw_blobs[i - 1].blob.patch)
> +				continue;
> +
> +bad:
> +			drm_err(&i915->drm, "\x1B[35;1mInvalid FW blob order: %s r%u %s%d.%d.%d comes before %s r%u %s%d.%d.%d\n",
> +				intel_platform_name(fw_blobs[i - 1].p), fw_blobs[i - 1].rev,
> +				fw_blobs[i - 1].blob.legacy ? "L" : "v",
> +				fw_blobs[i - 1].blob.major,
> +				fw_blobs[i - 1].blob.minor,
> +				fw_blobs[i - 1].blob.patch,
> +				intel_platform_name(fw_blobs[i].p), fw_blobs[i].rev,
> +				fw_blobs[i].blob.legacy ? "L" : "v",
> +				fw_blobs[i].blob.major,
> +				fw_blobs[i].blob.minor,
> +				fw_blobs[i].blob.patch);
> +
> +			uc_fw->file_found.path = NULL;
>   		}

This has grown big enough that it should probably be moved to a mock 
selftest. Can be done as a follow up.

>   	}
>   }
> @@ -259,7 +326,7 @@ static void __uc_fw_user_override(struct drm_i915_private *i915, struct intel_uc
>   	}
>   
>   	if (unlikely(path)) {
> -		uc_fw->path = path;
> +		uc_fw->file_found.path = path;
>   		uc_fw->user_overridden = true;
>   	}
>   }
> @@ -283,7 +350,7 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
>   	 */
>   	BUILD_BUG_ON(INTEL_UC_FIRMWARE_UNINITIALIZED);
>   	GEM_BUG_ON(uc_fw->status);
> -	GEM_BUG_ON(uc_fw->path);
> +	GEM_BUG_ON(uc_fw->file_found.path);
>   
>   	uc_fw->type = type;
>   
> @@ -292,7 +359,7 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
>   		__uc_fw_user_override(i915, uc_fw);
>   	}
>   
> -	intel_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ?
> +	intel_uc_fw_change_status(uc_fw, uc_fw->file_found.path ? *uc_fw->file_found.path ?
>   				  INTEL_UC_FIRMWARE_SELECTED :
>   				  INTEL_UC_FIRMWARE_DISABLED :
>   				  INTEL_UC_FIRMWARE_NOT_SUPPORTED);
> @@ -305,32 +372,32 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, int e)
>   
>   	if (i915_inject_probe_error(i915, e)) {
>   		/* non-existing blob */
> -		uc_fw->path = "<invalid>";
> +		uc_fw->file_found.path = "<invalid>";
>   		uc_fw->user_overridden = user;
>   	} else if (i915_inject_probe_error(i915, e)) {
>   		/* require next major version */
> -		uc_fw->major_ver_wanted += 1;
> -		uc_fw->minor_ver_wanted = 0;
> +		uc_fw->file_wanted.major_ver += 1;
> +		uc_fw->file_wanted.minor_ver = 0;
>   		uc_fw->user_overridden = user;
>   	} else if (i915_inject_probe_error(i915, e)) {
>   		/* require next minor version */
> -		uc_fw->minor_ver_wanted += 1;
> +		uc_fw->file_wanted.minor_ver += 1;
>   		uc_fw->user_overridden = user;
> -	} else if (uc_fw->major_ver_wanted &&
> +	} else if (uc_fw->file_wanted.major_ver &&
>   		   i915_inject_probe_error(i915, e)) {
>   		/* require prev major version */
> -		uc_fw->major_ver_wanted -= 1;
> -		uc_fw->minor_ver_wanted = 0;
> +		uc_fw->file_wanted.major_ver -= 1;
> +		uc_fw->file_wanted.minor_ver = 0;
>   		uc_fw->user_overridden = user;
> -	} else if (uc_fw->minor_ver_wanted &&
> +	} else if (uc_fw->file_wanted.minor_ver &&
>   		   i915_inject_probe_error(i915, e)) {
>   		/* require prev minor version - hey, this should work! */
> -		uc_fw->minor_ver_wanted -= 1;
> +		uc_fw->file_wanted.minor_ver -= 1;
>   		uc_fw->user_overridden = user;
>   	} else if (user && i915_inject_probe_error(i915, e)) {
>   		/* officially unsupported platform */
> -		uc_fw->major_ver_wanted = 0;
> -		uc_fw->minor_ver_wanted = 0;
> +		uc_fw->file_wanted.major_ver = 0;
> +		uc_fw->file_wanted.minor_ver = 0;
>   		uc_fw->user_overridden = true;
>   	}
>   }
> @@ -341,8 +408,8 @@ static int check_gsc_manifest(const struct firmware *fw,
>   	u32 *dw = (u32 *)fw->data;
>   	u32 version = dw[HUC_GSC_VERSION_DW];
>   
> -	uc_fw->major_ver_found = FIELD_GET(HUC_GSC_MAJOR_VER_MASK, version);
> -	uc_fw->minor_ver_found = FIELD_GET(HUC_GSC_MINOR_VER_MASK, version);
> +	uc_fw->file_found.major_ver = FIELD_GET(HUC_GSC_MAJOR_VER_MASK, version);
> +	uc_fw->file_found.minor_ver = FIELD_GET(HUC_GSC_MINOR_VER_MASK, version);
>   
>   	return 0;
>   }
> @@ -357,7 +424,7 @@ static int check_ccs_header(struct drm_i915_private *i915,
>   	/* Check the size of the blob before examining buffer contents */
>   	if (unlikely(fw->size < sizeof(struct uc_css_header))) {
>   		drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n",
> -			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
> +			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_found.path,
>   			 fw->size, sizeof(struct uc_css_header));
>   		return -ENODATA;
>   	}
> @@ -370,7 +437,7 @@ static int check_ccs_header(struct drm_i915_private *i915,
>   	if (unlikely(size != sizeof(struct uc_css_header))) {
>   		drm_warn(&i915->drm,
>   			 "%s firmware %s: unexpected header size: %zu != %zu\n",
> -			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
> +			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_found.path,
>   			 fw->size, sizeof(struct uc_css_header));
>   		return -EPROTO;
>   	}
> @@ -385,7 +452,7 @@ static int check_ccs_header(struct drm_i915_private *i915,
>   	size = sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->rsa_size;
>   	if (unlikely(fw->size < size)) {
>   		drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n",
> -			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
> +			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_found.path,
>   			 fw->size, size);
>   		return -ENOEXEC;
>   	}
> @@ -394,16 +461,16 @@ static int check_ccs_header(struct drm_i915_private *i915,
>   	size = __intel_uc_fw_get_upload_size(uc_fw);
>   	if (unlikely(size >= i915->wopcm.size)) {
>   		drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu > %zu\n",
> -			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
> +			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_found.path,
>   			 size, (size_t)i915->wopcm.size);
>   		return -E2BIG;
>   	}
>   
>   	/* Get version numbers from the CSS header */
> -	uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
> -					   css->sw_version);
> -	uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
> -					   css->sw_version);
> +	uc_fw->file_found.major_ver = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
> +						css->sw_version);
> +	uc_fw->file_found.minor_ver = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
> +						css->sw_version);
>   
>   	if (uc_fw->type == INTEL_UC_FW_TYPE_GUC)
>   		uc_fw->private_data_size = css->private_data_size;
> @@ -422,9 +489,11 @@ static int check_ccs_header(struct drm_i915_private *i915,
>   int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
>   {
>   	struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915;
> +	struct intel_uc_fw_file file_ideal;
>   	struct device *dev = i915->drm.dev;
>   	struct drm_i915_gem_object *obj;
>   	const struct firmware *fw = NULL;
> +	bool old_ver = false;
>   	int err;
>   
>   	GEM_BUG_ON(!i915->wopcm.size);
> @@ -437,27 +506,30 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
>   	__force_fw_fetch_failures(uc_fw, -EINVAL);
>   	__force_fw_fetch_failures(uc_fw, -ESTALE);
>   
> -	err = firmware_request_nowarn(&fw, uc_fw->path, dev);
> -	if (err && !intel_uc_fw_is_overridden(uc_fw) && uc_fw->fallback.path) {
> -		err = firmware_request_nowarn(&fw, uc_fw->fallback.path, dev);
> -		if (!err) {
> -			drm_notice(&i915->drm,
> -				   "%s firmware %s is recommended, but only %s was found\n",
> -				   intel_uc_fw_type_repr(uc_fw->type),
> -				   uc_fw->wanted_path,
> -				   uc_fw->fallback.path);
> -			drm_info(&i915->drm,
> -				 "Consider updating your linux-firmware pkg or downloading from %s\n",
> -				 INTEL_UC_FIRMWARE_URL);
> -
> -			uc_fw->path = uc_fw->fallback.path;
> -			uc_fw->major_ver_wanted = uc_fw->fallback.major_ver;
> -			uc_fw->minor_ver_wanted = uc_fw->fallback.minor_ver;
> +	err = firmware_request_nowarn(&fw, uc_fw->file_found.path, dev);
> +	memcpy(&file_ideal, &uc_fw->file_wanted, sizeof(file_ideal));
> +	if (!err || intel_uc_fw_is_overridden(uc_fw))
> +		goto done;
> +
> +	while (err == -ENOENT) {
> +		__uc_fw_auto_select(i915, uc_fw);
> +		if (!uc_fw->file_found.path) {
> +			/*
> +			 * No more options! But set path back to something
> +			 * valid just in case it gets dereferenced.
> +			 */
> +			uc_fw->file_found.path = file_ideal.path;
> +			break;
>   		}
> +
> +		err = firmware_request_nowarn(&fw, uc_fw->file_found.path, dev);
>   	}
> +
>   	if (err)
>   		goto fail;
>   
> +	old_ver = true;
> +done:
>   	if (uc_fw->loaded_via_gsc)
>   		err = check_gsc_manifest(fw, uc_fw);
>   	else
> @@ -465,18 +537,41 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
>   	if (err)
>   		goto fail;
>   
> -	if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
> -	    uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
> -		drm_notice(&i915->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
> -			   intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
> -			   uc_fw->major_ver_found, uc_fw->minor_ver_found,
> -			   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
> -		if (!intel_uc_fw_is_overridden(uc_fw)) {
> -			err = -ENOEXEC;
> -			goto fail;
> +	if (uc_fw->file_wanted.major_ver) {
> +		/* Check the file's major version was as it claimed */
> +		if (uc_fw->file_found.major_ver != uc_fw->file_wanted.major_ver) {
> +			drm_notice(&i915->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
> +				   intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_found.path,
> +				   uc_fw->file_found.major_ver, uc_fw->file_found.minor_ver,
> +				   uc_fw->file_wanted.major_ver, uc_fw->file_wanted.minor_ver);
> +			if (!intel_uc_fw_is_overridden(uc_fw)) {
> +				err = -ENOEXEC;
> +				goto fail;
> +			}
> +		} else {
> +			if (uc_fw->file_found.minor_ver < uc_fw->file_wanted.minor_ver)
> +				old_ver = true;
>   		}
>   	}
>   
> +	if (old_ver) {
> +		/* Preserve the version that was really wanted */
> +		memcpy(&uc_fw->file_wanted, &file_ideal, sizeof(uc_fw->file_wanted));
> +
> +		drm_notice(&i915->drm,
> +			   "%s firmware %s (%d.%d) is recommended, but only %s (%d.%d) was found\n",

This is going to be a weird message if we were looking for new style 
guc_70.0 and found old style guc_70.4 instead. Can we limit this log to 
when the found version is smaller than the wanted one?

> +			   intel_uc_fw_type_repr(uc_fw->type),
> +			   uc_fw->file_wanted.path,
> +			   uc_fw->file_wanted.major_ver,
> +			   uc_fw->file_wanted.minor_ver,
> +			   uc_fw->file_found.path,
> +			   uc_fw->file_found.major_ver,
> +			   uc_fw->file_found.minor_ver);
> +		drm_info(&i915->drm,
> +			 "Consider updating your linux-firmware pkg or downloading from %s\n",
> +			 INTEL_UC_FIRMWARE_URL);
> +	}
> +
>   	if (HAS_LMEM(i915)) {
>   		obj = i915_gem_object_create_lmem_from_data(i915, fw->data, fw->size);
>   		if (!IS_ERR(obj))
> @@ -503,7 +598,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
>   				  INTEL_UC_FIRMWARE_ERROR);
>   
>   	i915_probe_error(i915, "%s firmware %s: fetch failed with error %d\n",
> -			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
> +			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_found.path, err);
>   	drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n",
>   		 intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL);
>   
> @@ -645,7 +740,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
>   
>   fail:
>   	i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n",
> -			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
> +			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_found.path,
>   			 err);
>   	intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
>   	return err;
> @@ -864,18 +959,19 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
>   void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p)
>   {
>   	drm_printf(p, "%s firmware: %s\n",
> -		   intel_uc_fw_type_repr(uc_fw->type), uc_fw->wanted_path);
> -	if (uc_fw->fallback.path) {
> -		drm_printf(p, "%s firmware fallback: %s\n",
> -			   intel_uc_fw_type_repr(uc_fw->type), uc_fw->fallback.path);
> -		drm_printf(p, "fallback selected: %s\n",
> -			   str_yes_no(uc_fw->path == uc_fw->fallback.path));
> -	}
> +		   intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_found.path);
> +	if (uc_fw->file_found.path == uc_fw->file_wanted.path)

Was this meant to be a != check? otherwise it's the same print twice

Daniele

> +		drm_printf(p, "%s firmware wanted: %s\n",
> +			   intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_wanted.path);
>   	drm_printf(p, "\tstatus: %s\n",
>   		   intel_uc_fw_status_repr(uc_fw->status));
> -	drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n",
> -		   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted,
> -		   uc_fw->major_ver_found, uc_fw->minor_ver_found);
> +	if (uc_fw->file_wanted.major_ver)
> +		drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n",
> +			   uc_fw->file_wanted.major_ver, uc_fw->file_wanted.minor_ver,
> +			   uc_fw->file_found.major_ver, uc_fw->file_found.minor_ver);
> +	else
> +		drm_printf(p, "\tversion: found %u.%u\n",
> +			   uc_fw->file_found.major_ver, uc_fw->file_found.minor_ver);
>   	drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
>   	drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
>   }
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
> index 7aa2644400b98..5c1751773c756 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
> @@ -64,6 +64,17 @@ enum intel_uc_fw_type {
>   };
>   #define INTEL_UC_FW_NUM_TYPES 2
>   
> +/*
> + * The firmware build process will generate a version header file with major and
> + * minor version defined. The versions are built into CSS header of firmware.
> + * i915 kernel driver set the minimal firmware version required per platform.
> + */
> +struct intel_uc_fw_file {
> +	const char *path;
> +	u16 major_ver;
> +	u16 minor_ver;
> +};
> +
>   /*
>    * This structure encapsulates all the data needed during the process
>    * of fetching, caching, and loading the firmware image into the uC.
> @@ -74,11 +85,12 @@ struct intel_uc_fw {
>   		const enum intel_uc_fw_status status;
>   		enum intel_uc_fw_status __status; /* no accidental overwrites */
>   	};
> -	const char *wanted_path;
> -	const char *path;
> +	struct intel_uc_fw_file file_wanted;
> +	struct intel_uc_fw_file file_found;
>   	bool user_overridden;
>   	size_t size;
>   	struct drm_i915_gem_object *obj;
> +
>   	/**
>   	 * @dummy: A vma used in binding the uc fw to ggtt. We can't define this
>   	 * vma on the stack as it can lead to a stack overflow, so we define it
> @@ -89,25 +101,8 @@ struct intel_uc_fw {
>   	struct i915_vma_resource dummy;
>   	struct i915_vma *rsa_data;
>   
> -	/*
> -	 * The firmware build process will generate a version header file with major and
> -	 * minor version defined. The versions are built into CSS header of firmware.
> -	 * i915 kernel driver set the minimal firmware version required per platform.
> -	 */
> -	u16 major_ver_wanted;
> -	u16 minor_ver_wanted;
> -	u16 major_ver_found;
> -	u16 minor_ver_found;
> -
> -	struct {
> -		const char *path;
> -		u16 major_ver;
> -		u16 minor_ver;
> -	} fallback;
> -
>   	u32 rsa_size;
>   	u32 ucode_size;
> -
>   	u32 private_data_size;
>   
>   	bool loaded_via_gsc;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 32e92651ef7c2..83cbb3589c9be 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1004,8 +1004,10 @@ static void cleanup_params(struct i915_gpu_coredump *error)
>   
>   static void cleanup_uc(struct intel_uc_coredump *uc)
>   {
> -	kfree(uc->guc_fw.path);
> -	kfree(uc->huc_fw.path);
> +	kfree(uc->guc_fw.file_found.path);
> +	kfree(uc->huc_fw.file_found.path);
> +	kfree(uc->guc_fw.file_wanted.path);
> +	kfree(uc->huc_fw.file_wanted.path);
>   	i915_vma_coredump_free(uc->guc_log);
>   
>   	kfree(uc);
> @@ -1669,12 +1671,10 @@ gt_record_uc(struct intel_gt_coredump *gt,
>   	memcpy(&error_uc->guc_fw, &uc->guc.fw, sizeof(uc->guc.fw));
>   	memcpy(&error_uc->huc_fw, &uc->huc.fw, sizeof(uc->huc.fw));
>   
> -	/* Non-default firmware paths will be specified by the modparam.
> -	 * As modparams are generally accesible from the userspace make
> -	 * explicit copies of the firmware paths.
> -	 */
> -	error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL);
> -	error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL);
> +	error_uc->guc_fw.file_found.path = kstrdup(uc->guc.fw.file_found.path, ALLOW_FAIL);
> +	error_uc->huc_fw.file_found.path = kstrdup(uc->huc.fw.file_found.path, ALLOW_FAIL);
> +	error_uc->guc_fw.file_wanted.path = kstrdup(uc->guc.fw.file_wanted.path, ALLOW_FAIL);
> +	error_uc->huc_fw.file_wanted.path = kstrdup(uc->huc.fw.file_wanted.path, ALLOW_FAIL);
>   	error_uc->guc_log = create_vma_coredump(gt->_gt, uc->guc.log.vma,
>   						"GuC log buffer", compress);
>   



More information about the Intel-gfx mailing list