[Intel-xe] [RFC] drm/xe/uc: rework uC version tracking
John Harrison
john.c.harrison at intel.com
Thu Aug 24 18:00:08 UTC 2023
On 8/16/2023 14:55, Daniele Ceraolo Spurio wrote:
> The GSC firmware, support for which is coming soon for Xe, has both a
> release version (updated on every release) and a compatibility version
> (update only on interface changes). The GuC has something similar, with
> a global release version and a submission version (which is also known
> as the VF compatibility version). The main difference is that for the
> GuC we still want to check the driver requirement against the release
> version, while for the GSC we'll need to check against the compatibility
> version.
> Instead of special casing the GSC, this patch proposes a rework of the
> FW logic so that we store both versions at the uc_fw level for all
> binaries and we allow checking against either of the versions. Initially,
> we'll use it to support GSC, but the logic could be re-used to allow VFs
> to check against their expected GuC compatibility version.
>
> RFC note: I've added a couple of the GSC defines to show how this is
> supposed to be different for the GSC, but those are not intended to be
> part of this patch in its final version. I'd like to get some early
> feedback on this before I build the full GSC version checking on top of
> it.
Seems plausible to me.
Some comments below, mostly not actually related to this patch...
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Cc: John Harrison <John.C.Harrison at Intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Michał Winiarski <michal.winiarski at intel.com>
> Cc: Zhanjun Dong <zhanjun.dong at intel.com>
> ---
> drivers/gpu/drm/xe/xe_guc_types.h | 9 --
> drivers/gpu/drm/xe/xe_uc_fw.c | 131 +++++++++++++++++-----------
> drivers/gpu/drm/xe/xe_uc_fw.h | 2 +
> drivers/gpu/drm/xe/xe_uc_fw_types.h | 36 ++++++--
> 4 files changed, 108 insertions(+), 70 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
> index a5e58917a499..343cc39ce5bc 100644
> --- a/drivers/gpu/drm/xe/xe_guc_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_types.h
> @@ -52,15 +52,6 @@ struct xe_guc {
> /** @seqno: suspend fences seqno */
> u32 seqno;
> } suspend;
> - /** @version: submission version */
> - struct {
> - /** @major: major version of GuC submission */
> - u32 major;
> - /** @minor: minor version of GuC submission */
> - u32 minor;
> - /** @patch: patch version of GuC submission */
> - u32 patch;
> - } version;
> /** @enabled: submission is enabled */
> bool enabled;
> } submission_state;
> diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
> index 2e70dd4880f6..e22afd65e5d6 100644
> --- a/drivers/gpu/drm/xe/xe_uc_fw.c
> +++ b/drivers/gpu/drm/xe/xe_uc_fw.c
> @@ -201,9 +201,15 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
> for (i = 0; i < count && p <= entries[i].platform; i++) {
> if (p == entries[i].platform) {
> uc_fw->path = entries[i].path;
> - uc_fw->major_ver_wanted = entries[i].major;
> - uc_fw->minor_ver_wanted = entries[i].minor;
> + uc_fw->versions.wanted.major = entries[i].major;
> + uc_fw->versions.wanted.minor = entries[i].minor;
> uc_fw->full_ver_required = entries[i].full_ver_required;
> +
> + if (uc_fw->type == XE_UC_FW_TYPE_GSC)
> + uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY;
> + else
> + uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE;
> +
> break;
> }
> }
> @@ -245,33 +251,30 @@ static void uc_fw_fini(struct drm_device *drm, void *arg)
>
> static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
> {
> - struct xe_gt *gt = uc_fw_to_gt(uc_fw);
> - struct xe_guc *guc = >->uc.guc;
> + struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
> + struct xe_uc_fw_version *compatibility = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY];
>
> XE_WARN_ON(uc_fw->type != XE_UC_FW_TYPE_GUC);
> - XE_WARN_ON(uc_fw->major_ver_found < 70);
> + XE_WARN_ON(release->major < 70);
Is this supposed to be a WARN_ON? There is no need for kernel stack
traces and such just because the user has supplied a dodgy firmware
file. Shouldn't it just be a gt_warn() same as the other firmware load
errors?
>
> - if (uc_fw->minor_ver_found >= 6) {
> + if (release->major > 70 || release->minor >= 6) {
> /* v70.6.0 adds CSS header support */
> - guc->submission_state.version.major =
> - FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
> - css->submission_version);
> - guc->submission_state.version.minor =
> - FIELD_GET(CSS_SW_VERSION_UC_MINOR,
> - css->submission_version);
> - guc->submission_state.version.patch =
> - FIELD_GET(CSS_SW_VERSION_UC_PATCH,
> - css->submission_version);
> - } else if (uc_fw->minor_ver_found >= 3) {
> + compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
> + css->submission_version);
> + compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
> + css->submission_version);
> + compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH,
> + css->submission_version);
> + } else if (release->minor >= 3) {
> /* v70.3.0 introduced v1.1.0 */
> - guc->submission_state.version.major = 1;
> - guc->submission_state.version.minor = 1;
> - guc->submission_state.version.patch = 0;
> + compatibility->major = 1;
> + compatibility->minor = 1;
> + compatibility->patch = 0;
> } else {
> /* v70.0.0 introduced v1.0.0 */
> - guc->submission_state.version.major = 1;
> - guc->submission_state.version.minor = 0;
> - guc->submission_state.version.patch = 0;
> + compatibility->major = 1;
> + compatibility->minor = 0;
> + compatibility->patch = 0;
Do we need any of this in Xe? There is no need for Xe to support GuC
versions that were obsoleted while Xe was/is still in pre-release state.
> }
>
> uc_fw->private_data_size = css->private_data_size;
> @@ -280,30 +283,31 @@ static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
> static int uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw)
> {
> struct xe_device *xe = uc_fw_to_xe(uc_fw);
> + struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted;
> + struct xe_uc_fw_version *found = &uc_fw->versions.found[uc_fw->versions.wanted_type];
Need some kind of range check BUG_ON thing here?
>
> /* Driver has no requirement on any version, any is good. */
> - if (!uc_fw->major_ver_wanted)
> + if (!wanted->major)
> return 0;
>
> /*
> * If full version is required, both major and minor should match.
> * Otherwise, at least the major version.
> */
> - if (uc_fw->major_ver_wanted != uc_fw->major_ver_found ||
> - (uc_fw->full_ver_required &&
> - uc_fw->minor_ver_wanted != uc_fw->minor_ver_found)) {
> + if (wanted->major != found->major ||
> + (uc_fw->full_ver_required && wanted->minor != found->minor)) {
> drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
Does Xe not have gt_notice and friends?
> xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
> - uc_fw->major_ver_found, uc_fw->minor_ver_found,
> - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
> + found->major, found->minor,
> + wanted->major, wanted->minor);
> goto fail;
> }
>
> - if (uc_fw->minor_ver_wanted > uc_fw->minor_ver_found) {
> + if (wanted->minor > found->minor) {
> drm_notice(&xe->drm, "%s firmware (%u.%u) is recommended, but only (%u.%u) was found in %s\n",
> xe_uc_fw_type_repr(uc_fw->type),
> - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted,
> - uc_fw->major_ver_found, uc_fw->minor_ver_found,
> + wanted->major, wanted->minor,
> + found->major, found->minor,
> uc_fw->path);
> drm_info(&xe->drm, "Consider updating your linux-firmware pkg or downloading from %s\n",
> XE_UC_FIRMWARE_URL);
> @@ -325,6 +329,7 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
> struct xe_tile *tile = gt_to_tile(gt);
> struct device *dev = xe->drm.dev;
> const struct firmware *fw = NULL;
> + struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
> struct uc_css_header *css;
> struct xe_bo *obj;
> size_t size;
> @@ -395,15 +400,13 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
> }
>
> /* Get version numbers from the CSS header */
> - uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
> - css->sw_version);
> - uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
> - css->sw_version);
> + release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->sw_version);
> + release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version);
> + release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version);
>
> drm_info(&xe->drm, "Using %s firmware (%u.%u) from %s\n",
> xe_uc_fw_type_repr(uc_fw->type),
> - uc_fw->major_ver_found, uc_fw->minor_ver_found,
> - uc_fw->path);
> + release->major, release->minor, uc_fw->path);
>
> err = uc_fw_check_version_requirements(uc_fw);
> if (err)
> @@ -523,26 +526,50 @@ int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
> return err;
> }
>
> +static const char *version_wanted_type_repr(enum xe_uc_fw_version_types type)
> +{
> + switch (type) {
> + case XE_UC_FW_VER_RELEASE:
> + return "release version wanted";
> + case XE_UC_FW_VER_COMPATIBILITY:
> + return "compatibility version wanted";
> + default:
> + return "Unknown version type wanted";
> + }
> +}
> +
> +static void print_uc_version(struct drm_printer *p, const char *prefix, struct xe_uc_fw_version *version)
> +{
> + if (version->build)
> + drm_printf(p, "\t%s %u.%u.%u.%u\n",
> + prefix, version->major, version->minor,
> + version->patch, version->build);
> + else
> + drm_printf(p, "\t%s %u.%u.%u\n",
> + prefix, version->major, version->minor,
> + version->patch);
> +}
>
> void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p)
> {
> + struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted;
> + struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
> + struct xe_uc_fw_version *compatibility = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY];
> +
> drm_printf(p, "%s firmware: %s\n",
> xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
> drm_printf(p, "\tstatus: %s\n",
> xe_uc_fw_status_repr(uc_fw->status));
> - drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n",
> - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted,
> - uc_fw->major_ver_found, uc_fw->minor_ver_found);
> - drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
> - drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
> -
> - if (uc_fw->type == XE_UC_FW_TYPE_GUC) {
> - struct xe_gt *gt = uc_fw_to_gt(uc_fw);
> - struct xe_guc *guc = >->uc.guc;
> -
> - drm_printf(p, "\tSubmit version: %u.%u.%u\n",
> - guc->submission_state.version.major,
> - guc->submission_state.version.minor,
> - guc->submission_state.version.patch);
> - }
> + print_uc_version(p, version_wanted_type_repr(uc_fw->versions.wanted_type), wanted);
> +
> + if (release->major)
> + print_uc_version(p, "release version found", release);
> +
> + if (compatibility->major)
> + print_uc_version(p, "compatibility version found", compatibility);
These two prints could be done with a for loop.
> +
> + if (uc_fw->ucode_size)
> + drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
> + if (uc_fw->rsa_size)
> + drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
> }
> diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h
> index a519c77d4962..1b5bbbb97aaf 100644
> --- a/drivers/gpu/drm/xe/xe_uc_fw.h
> +++ b/drivers/gpu/drm/xe/xe_uc_fw.h
> @@ -96,6 +96,8 @@ static inline const char *xe_uc_fw_type_repr(enum xe_uc_fw_type type)
> return "GuC";
> case XE_UC_FW_TYPE_HUC:
> return "HuC";
> + case XE_UC_FW_TYPE_GSC:
> + return "GSC";
> }
> return "uC";
> }
> diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
> index 837f49a2347e..69d3f2446781 100644
> --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
> +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
> @@ -55,10 +55,27 @@ enum xe_uc_fw_status {
>
> enum xe_uc_fw_type {
> XE_UC_FW_TYPE_GUC = 0,
> - XE_UC_FW_TYPE_HUC
> + XE_UC_FW_TYPE_HUC,
> + XE_UC_FW_TYPE_GSC
> };
> #define XE_UC_FW_NUM_TYPES 2
>
> +/**
> + * struct xe_uc_fw_version - Version for XE micro controller firmware
> + */
> +struct xe_uc_fw_version {
> + u16 major;
> + u16 minor;
> + u16 patch;
> + u16 build;
Should these be 32bit? The submission version structure that has been
removed from above was all 32bit fields as is the corresponding i915
structure.
John.
> +};
> +
> +enum xe_uc_fw_version_types {
> + XE_UC_FW_VER_RELEASE,
> + XE_UC_FW_VER_COMPATIBILITY,
> + XE_UC_FW_VER_TYPE_COUNT
> +};
> +
> /**
> * struct xe_uc_fw - XE micro controller firmware
> */
> @@ -98,14 +115,15 @@ struct xe_uc_fw {
> * version required per platform.
> */
>
> - /** @major_ver_wanted: major firmware version wanted by platform */
> - u16 major_ver_wanted;
> - /** @minor_ver_wanted: minor firmware version wanted by platform */
> - u16 minor_ver_wanted;
> - /** @major_ver_found: major version found in firmware blob */
> - u16 major_ver_found;
> - /** @minor_ver_found: major version found in firmware blob */
> - u16 minor_ver_found;
> + /** @versions: FW versions wanted and found */
> + struct {
> + /** @wanted: firmware version wanted by platform */
> + struct xe_uc_fw_version wanted;
> + /** @wanted_type: type of firmware version wanted (release vs compatibility) */
> + enum xe_uc_fw_version_types wanted_type;
> + /** @found: fw versions found in firmware blob */
> + struct xe_uc_fw_version found[XE_UC_FW_VER_TYPE_COUNT];
> + } versions;
>
> /** @rsa_size: RSA size */
> u32 rsa_size;
More information about the Intel-xe
mailing list