[Intel-xe] [RFC] drm/xe/uc: rework uC version tracking

John Harrison john.c.harrison at intel.com
Thu Aug 24 18:00:08 UTC 2023


On 8/16/2023 14:55, Daniele Ceraolo Spurio wrote:
> The GSC firmware, support for which is coming soon for Xe, has both a
> release version (updated on every release) and a compatibility version
> (update only on interface changes). The GuC has something similar, with
> a global release version and a submission version (which is also known
> as the VF compatibility version). The main difference is that for the
> GuC we still want to check the driver requirement against the release
> version, while for the GSC we'll need to check against the compatibility
> version.
> Instead of special casing the GSC, this patch proposes a rework of the
> FW logic so that we store both versions at the uc_fw level for all
> binaries and we allow checking against either of the versions. Initially,
> we'll use it to support GSC, but the logic could be re-used to allow VFs
> to check against their expected GuC compatibility version.
>
> RFC note: I've added a couple of the GSC defines to show how this is
> supposed to be different for the GSC, but those are not intended to be
> part of this patch in its final version. I'd like to get some early
> feedback on this before I build the full GSC version checking on top of
> it.
Seems plausible to me.

Some comments below, mostly not actually related to this patch...

> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Cc: John Harrison <John.C.Harrison at Intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Michał Winiarski <michal.winiarski at intel.com>
> Cc: Zhanjun Dong <zhanjun.dong at intel.com>
> ---
>   drivers/gpu/drm/xe/xe_guc_types.h   |   9 --
>   drivers/gpu/drm/xe/xe_uc_fw.c       | 131 +++++++++++++++++-----------
>   drivers/gpu/drm/xe/xe_uc_fw.h       |   2 +
>   drivers/gpu/drm/xe/xe_uc_fw_types.h |  36 ++++++--
>   4 files changed, 108 insertions(+), 70 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
> index a5e58917a499..343cc39ce5bc 100644
> --- a/drivers/gpu/drm/xe/xe_guc_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_types.h
> @@ -52,15 +52,6 @@ struct xe_guc {
>   			/** @seqno: suspend fences seqno */
>   			u32 seqno;
>   		} suspend;
> -		/** @version: submission version */
> -		struct {
> -			/** @major: major version of GuC submission */
> -			u32 major;
> -			/** @minor: minor version of GuC submission */
> -			u32 minor;
> -			/** @patch: patch version of GuC submission */
> -			u32 patch;
> -		} version;
>   		/** @enabled: submission is enabled */
>   		bool enabled;
>   	} submission_state;
> diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
> index 2e70dd4880f6..e22afd65e5d6 100644
> --- a/drivers/gpu/drm/xe/xe_uc_fw.c
> +++ b/drivers/gpu/drm/xe/xe_uc_fw.c
> @@ -201,9 +201,15 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
>   	for (i = 0; i < count && p <= entries[i].platform; i++) {
>   		if (p == entries[i].platform) {
>   			uc_fw->path = entries[i].path;
> -			uc_fw->major_ver_wanted = entries[i].major;
> -			uc_fw->minor_ver_wanted = entries[i].minor;
> +			uc_fw->versions.wanted.major = entries[i].major;
> +			uc_fw->versions.wanted.minor = entries[i].minor;
>   			uc_fw->full_ver_required = entries[i].full_ver_required;
> +
> +			if (uc_fw->type == XE_UC_FW_TYPE_GSC)
> +				uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY;
> +			else
> +				uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE;
> +
>   			break;
>   		}
>   	}
> @@ -245,33 +251,30 @@ static void uc_fw_fini(struct drm_device *drm, void *arg)
>   
>   static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
>   {
> -	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
> -	struct xe_guc *guc = &gt->uc.guc;
> +	struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
> +	struct xe_uc_fw_version *compatibility = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY];
>   
>   	XE_WARN_ON(uc_fw->type != XE_UC_FW_TYPE_GUC);
> -	XE_WARN_ON(uc_fw->major_ver_found < 70);
> +	XE_WARN_ON(release->major < 70);
Is this supposed to be a WARN_ON? There is no need for kernel stack 
traces and such just because the user has supplied a dodgy firmware 
file. Shouldn't it just be a gt_warn() same as the other firmware load 
errors?

>   
> -	if (uc_fw->minor_ver_found >= 6) {
> +	if (release->major > 70 || release->minor >= 6) {
>   		/* v70.6.0 adds CSS header support */
> -		guc->submission_state.version.major =
> -			FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
> -				  css->submission_version);
> -		guc->submission_state.version.minor =
> -			FIELD_GET(CSS_SW_VERSION_UC_MINOR,
> -				  css->submission_version);
> -		guc->submission_state.version.patch =
> -			FIELD_GET(CSS_SW_VERSION_UC_PATCH,
> -				  css->submission_version);
> -	} else if (uc_fw->minor_ver_found >= 3) {
> +		compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
> +						 css->submission_version);
> +		compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
> +						 css->submission_version);
> +		compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH,
> +						 css->submission_version);
> +	} else if (release->minor >= 3) {
>   		/* v70.3.0 introduced v1.1.0 */
> -		guc->submission_state.version.major = 1;
> -		guc->submission_state.version.minor = 1;
> -		guc->submission_state.version.patch = 0;
> +		compatibility->major = 1;
> +		compatibility->minor = 1;
> +		compatibility->patch = 0;
>   	} else {
>   		/* v70.0.0 introduced v1.0.0 */
> -		guc->submission_state.version.major = 1;
> -		guc->submission_state.version.minor = 0;
> -		guc->submission_state.version.patch = 0;
> +		compatibility->major = 1;
> +		compatibility->minor = 0;
> +		compatibility->patch = 0;
Do we need any of this in Xe? There is no need for Xe to support GuC 
versions that were obsoleted while Xe was/is still in pre-release state.

>   	}
>   
>   	uc_fw->private_data_size = css->private_data_size;
> @@ -280,30 +283,31 @@ static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
>   static int uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw)
>   {
>   	struct xe_device *xe = uc_fw_to_xe(uc_fw);
> +	struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted;
> +	struct xe_uc_fw_version *found = &uc_fw->versions.found[uc_fw->versions.wanted_type];
Need some kind of range check BUG_ON thing here?

>   
>   	/* Driver has no requirement on any version, any is good. */
> -	if (!uc_fw->major_ver_wanted)
> +	if (!wanted->major)
>   		return 0;
>   
>   	/*
>   	 * If full version is required, both major and minor should match.
>   	 * Otherwise, at least the major version.
>   	 */
> -	if (uc_fw->major_ver_wanted != uc_fw->major_ver_found ||
> -	    (uc_fw->full_ver_required &&
> -	     uc_fw->minor_ver_wanted != uc_fw->minor_ver_found)) {
> +	if (wanted->major != found->major ||
> +	    (uc_fw->full_ver_required && wanted->minor != found->minor)) {
>   		drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
Does Xe not have gt_notice and friends?

>   			   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
> -			   uc_fw->major_ver_found, uc_fw->minor_ver_found,
> -			   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
> +			   found->major, found->minor,
> +			   wanted->major, wanted->minor);
>   		goto fail;
>   	}
>   
> -	if (uc_fw->minor_ver_wanted > uc_fw->minor_ver_found) {
> +	if (wanted->minor > found->minor) {
>   		drm_notice(&xe->drm, "%s firmware (%u.%u) is recommended, but only (%u.%u) was found in %s\n",
>   			   xe_uc_fw_type_repr(uc_fw->type),
> -			   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted,
> -			   uc_fw->major_ver_found, uc_fw->minor_ver_found,
> +			   wanted->major, wanted->minor,
> +			   found->major, found->minor,
>   			   uc_fw->path);
>   		drm_info(&xe->drm, "Consider updating your linux-firmware pkg or downloading from %s\n",
>   			 XE_UC_FIRMWARE_URL);
> @@ -325,6 +329,7 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
>   	struct xe_tile *tile = gt_to_tile(gt);
>   	struct device *dev = xe->drm.dev;
>   	const struct firmware *fw = NULL;
> +	struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
>   	struct uc_css_header *css;
>   	struct xe_bo *obj;
>   	size_t size;
> @@ -395,15 +400,13 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
>   	}
>   
>   	/* Get version numbers from the CSS header */
> -	uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
> -					   css->sw_version);
> -	uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
> -					   css->sw_version);
> +	release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->sw_version);
> +	release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version);
> +	release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version);
>   
>   	drm_info(&xe->drm, "Using %s firmware (%u.%u) from %s\n",
>   		 xe_uc_fw_type_repr(uc_fw->type),
> -		 uc_fw->major_ver_found, uc_fw->minor_ver_found,
> -		 uc_fw->path);
> +		 release->major, release->minor, uc_fw->path);
>   
>   	err = uc_fw_check_version_requirements(uc_fw);
>   	if (err)
> @@ -523,26 +526,50 @@ int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
>   	return err;
>   }
>   
> +static const char *version_wanted_type_repr(enum xe_uc_fw_version_types type)
> +{
> +	switch (type) {
> +	case XE_UC_FW_VER_RELEASE:
> +		return "release version wanted";
> +	case XE_UC_FW_VER_COMPATIBILITY:
> +		return "compatibility version wanted";
> +	default:
> +		return "Unknown version type wanted";
> +	}
> +}
> +
> +static void print_uc_version(struct drm_printer *p, const char *prefix, struct xe_uc_fw_version *version)
> +{
> +	if (version->build)
> +		drm_printf(p, "\t%s %u.%u.%u.%u\n",
> +			   prefix, version->major, version->minor,
> +			   version->patch, version->build);
> +	else
> +		drm_printf(p, "\t%s %u.%u.%u\n",
> +			   prefix, version->major, version->minor,
> +			   version->patch);
> +}
>   
>   void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p)
>   {
> +	struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted;
> +	struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
> +	struct xe_uc_fw_version *compatibility = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY];
> +
>   	drm_printf(p, "%s firmware: %s\n",
>   		   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
>   	drm_printf(p, "\tstatus: %s\n",
>   		   xe_uc_fw_status_repr(uc_fw->status));
> -	drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n",
> -		   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted,
> -		   uc_fw->major_ver_found, uc_fw->minor_ver_found);
> -	drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
> -	drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
> -
> -	if (uc_fw->type == XE_UC_FW_TYPE_GUC) {
> -		struct xe_gt *gt = uc_fw_to_gt(uc_fw);
> -		struct xe_guc *guc = &gt->uc.guc;
> -
> -		drm_printf(p, "\tSubmit version: %u.%u.%u\n",
> -			   guc->submission_state.version.major,
> -			   guc->submission_state.version.minor,
> -			   guc->submission_state.version.patch);
> -	}
> +	print_uc_version(p, version_wanted_type_repr(uc_fw->versions.wanted_type), wanted);
> +
> +	if (release->major)
> +		print_uc_version(p, "release version found", release);
> +
> +	if (compatibility->major)
> +		print_uc_version(p, "compatibility version found", compatibility);
These two prints could be done with a for loop.

> +
> +	if (uc_fw->ucode_size)
> +		drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
> +	if (uc_fw->rsa_size)
> +		drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
>   }
> diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h
> index a519c77d4962..1b5bbbb97aaf 100644
> --- a/drivers/gpu/drm/xe/xe_uc_fw.h
> +++ b/drivers/gpu/drm/xe/xe_uc_fw.h
> @@ -96,6 +96,8 @@ static inline const char *xe_uc_fw_type_repr(enum xe_uc_fw_type type)
>   		return "GuC";
>   	case XE_UC_FW_TYPE_HUC:
>   		return "HuC";
> +	case XE_UC_FW_TYPE_GSC:
> +		return "GSC";
>   	}
>   	return "uC";
>   }
> diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
> index 837f49a2347e..69d3f2446781 100644
> --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
> +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
> @@ -55,10 +55,27 @@ enum xe_uc_fw_status {
>   
>   enum xe_uc_fw_type {
>   	XE_UC_FW_TYPE_GUC = 0,
> -	XE_UC_FW_TYPE_HUC
> +	XE_UC_FW_TYPE_HUC,
> +	XE_UC_FW_TYPE_GSC
>   };
>   #define XE_UC_FW_NUM_TYPES 2
>   
> +/**
> + * struct xe_uc_fw_version - Version for XE micro controller firmware
> + */
> +struct xe_uc_fw_version {
> +	u16 major;
> +	u16 minor;
> +	u16 patch;
> +	u16 build;
Should these be 32bit? The submission version structure that has been 
removed from above was all 32bit fields as is the corresponding i915 
structure.

John.

> +};
> +
> +enum xe_uc_fw_version_types {
> +	XE_UC_FW_VER_RELEASE,
> +	XE_UC_FW_VER_COMPATIBILITY,
> +	XE_UC_FW_VER_TYPE_COUNT
> +};
> +
>   /**
>    * struct xe_uc_fw - XE micro controller firmware
>    */
> @@ -98,14 +115,15 @@ struct xe_uc_fw {
>   	 * version required per platform.
>   	 */
>   
> -	/** @major_ver_wanted: major firmware version wanted by platform */
> -	u16 major_ver_wanted;
> -	/** @minor_ver_wanted: minor firmware version wanted by platform */
> -	u16 minor_ver_wanted;
> -	/** @major_ver_found: major version found in firmware blob */
> -	u16 major_ver_found;
> -	/** @minor_ver_found: major version found in firmware blob */
> -	u16 minor_ver_found;
> +	/** @versions: FW versions wanted and found */
> +	struct {
> +		/** @wanted: firmware version wanted by platform */
> +		struct xe_uc_fw_version wanted;
> +		/** @wanted_type: type of firmware version wanted (release vs compatibility) */
> +		enum xe_uc_fw_version_types wanted_type;
> +		/** @found: fw versions found in firmware blob */
> +		struct xe_uc_fw_version found[XE_UC_FW_VER_TYPE_COUNT];
> +	} versions;
>   
>   	/** @rsa_size: RSA size */
>   	u32 rsa_size;



More information about the Intel-xe mailing list