[Intel-xe] [RFC] drm/xe/uc: rework uC version tracking

Ceraolo Spurio, Daniele daniele.ceraolospurio at intel.com
Thu Aug 24 18:12:49 UTC 2023



On 8/24/2023 11:00 AM, John Harrison wrote:
> On 8/16/2023 14:55, Daniele Ceraolo Spurio wrote:
>> The GSC firmware, support for which is coming soon for Xe, has both a
>> release version (updated on every release) and a compatibility version
>> (update only on interface changes). The GuC has something similar, with
>> a global release version and a submission version (which is also known
>> as the VF compatibility version). The main difference is that for the
>> GuC we still want to check the driver requirement against the release
>> version, while for the GSC we'll need to check against the compatibility
>> version.
>> Instead of special casing the GSC, this patch proposes a rework of the
>> FW logic so that we store both versions at the uc_fw level for all
>> binaries and we allow checking against either of the versions. 
>> Initially,
>> we'll use it to support GSC, but the logic could be re-used to allow VFs
>> to check against their expected GuC compatibility version.
>>
>> RFC note: I've added a couple of the GSC defines to show how this is
>> supposed to be different for the GSC, but those are not intended to be
>> part of this patch in its final version. I'd like to get some early
>> feedback on this before I build the full GSC version checking on top of
>> it.
> Seems plausible to me.
>
> Some comments below, mostly not actually related to this patch...
>
>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> Cc: Matthew Brost <matthew.brost at intel.com>
>> Cc: John Harrison <John.C.Harrison at Intel.com>
>> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
>> Cc: Michał Winiarski <michal.winiarski at intel.com>
>> Cc: Zhanjun Dong <zhanjun.dong at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_guc_types.h   |   9 --
>>   drivers/gpu/drm/xe/xe_uc_fw.c       | 131 +++++++++++++++++-----------
>>   drivers/gpu/drm/xe/xe_uc_fw.h       |   2 +
>>   drivers/gpu/drm/xe/xe_uc_fw_types.h |  36 ++++++--
>>   4 files changed, 108 insertions(+), 70 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_guc_types.h 
>> b/drivers/gpu/drm/xe/xe_guc_types.h
>> index a5e58917a499..343cc39ce5bc 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_types.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_types.h
>> @@ -52,15 +52,6 @@ struct xe_guc {
>>               /** @seqno: suspend fences seqno */
>>               u32 seqno;
>>           } suspend;
>> -        /** @version: submission version */
>> -        struct {
>> -            /** @major: major version of GuC submission */
>> -            u32 major;
>> -            /** @minor: minor version of GuC submission */
>> -            u32 minor;
>> -            /** @patch: patch version of GuC submission */
>> -            u32 patch;
>> -        } version;
>>           /** @enabled: submission is enabled */
>>           bool enabled;
>>       } submission_state;
>> diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c 
>> b/drivers/gpu/drm/xe/xe_uc_fw.c
>> index 2e70dd4880f6..e22afd65e5d6 100644
>> --- a/drivers/gpu/drm/xe/xe_uc_fw.c
>> +++ b/drivers/gpu/drm/xe/xe_uc_fw.c
>> @@ -201,9 +201,15 @@ uc_fw_auto_select(struct xe_device *xe, struct 
>> xe_uc_fw *uc_fw)
>>       for (i = 0; i < count && p <= entries[i].platform; i++) {
>>           if (p == entries[i].platform) {
>>               uc_fw->path = entries[i].path;
>> -            uc_fw->major_ver_wanted = entries[i].major;
>> -            uc_fw->minor_ver_wanted = entries[i].minor;
>> +            uc_fw->versions.wanted.major = entries[i].major;
>> +            uc_fw->versions.wanted.minor = entries[i].minor;
>>               uc_fw->full_ver_required = entries[i].full_ver_required;
>> +
>> +            if (uc_fw->type == XE_UC_FW_TYPE_GSC)
>> +                uc_fw->versions.wanted_type = 
>> XE_UC_FW_VER_COMPATIBILITY;
>> +            else
>> +                uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE;
>> +
>>               break;
>>           }
>>       }
>> @@ -245,33 +251,30 @@ static void uc_fw_fini(struct drm_device *drm, 
>> void *arg)
>>     static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct 
>> uc_css_header *css)
>>   {
>> -    struct xe_gt *gt = uc_fw_to_gt(uc_fw);
>> -    struct xe_guc *guc = &gt->uc.guc;
>> +    struct xe_uc_fw_version *release = 
>> &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
>> +    struct xe_uc_fw_version *compatibility = 
>> &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY];
>>         XE_WARN_ON(uc_fw->type != XE_UC_FW_TYPE_GUC);
>> -    XE_WARN_ON(uc_fw->major_ver_found < 70);
>> +    XE_WARN_ON(release->major < 70);
> Is this supposed to be a WARN_ON? There is no need for kernel stack 
> traces and such just because the user has supplied a dodgy firmware 
> file. Shouldn't it just be a gt_warn() same as the other firmware load 
> errors?

I think a bunch of those used to be XE_BUG_ON and got mass converted to 
XE_WARN_ON when XE_BUG_ON was removed from the driver. I believe the 
plan is to re-evaluate them and check which ones need to be WARN and 
which ones don't.

>
>>   -    if (uc_fw->minor_ver_found >= 6) {
>> +    if (release->major > 70 || release->minor >= 6) {
>>           /* v70.6.0 adds CSS header support */
>> -        guc->submission_state.version.major =
>> -            FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
>> -                  css->submission_version);
>> -        guc->submission_state.version.minor =
>> -            FIELD_GET(CSS_SW_VERSION_UC_MINOR,
>> -                  css->submission_version);
>> -        guc->submission_state.version.patch =
>> -            FIELD_GET(CSS_SW_VERSION_UC_PATCH,
>> -                  css->submission_version);
>> -    } else if (uc_fw->minor_ver_found >= 3) {
>> +        compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
>> +                         css->submission_version);
>> +        compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
>> +                         css->submission_version);
>> +        compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH,
>> +                         css->submission_version);
>> +    } else if (release->minor >= 3) {
>>           /* v70.3.0 introduced v1.1.0 */
>> -        guc->submission_state.version.major = 1;
>> -        guc->submission_state.version.minor = 1;
>> -        guc->submission_state.version.patch = 0;
>> +        compatibility->major = 1;
>> +        compatibility->minor = 1;
>> +        compatibility->patch = 0;
>>       } else {
>>           /* v70.0.0 introduced v1.0.0 */
>> -        guc->submission_state.version.major = 1;
>> -        guc->submission_state.version.minor = 0;
>> -        guc->submission_state.version.patch = 0;
>> +        compatibility->major = 1;
>> +        compatibility->minor = 0;
>> +        compatibility->patch = 0;
> Do we need any of this in Xe? There is no need for Xe to support GuC 
> versions that were obsoleted while Xe was/is still in pre-release state.

We definitely need the 70.3 check, since we're still using 70.5 for most 
platforms.

>
>>       }
>>         uc_fw->private_data_size = css->private_data_size;
>> @@ -280,30 +283,31 @@ static void guc_read_css_info(struct xe_uc_fw 
>> *uc_fw, struct uc_css_header *css)
>>   static int uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw)
>>   {
>>       struct xe_device *xe = uc_fw_to_xe(uc_fw);
>> +    struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted;
>> +    struct xe_uc_fw_version *found = 
>> &uc_fw->versions.found[uc_fw->versions.wanted_type];
> Need some kind of range check BUG_ON thing here?

For the array? It should be impossible to get wrong, since the 
wanted_type is set at select time.

>
>>         /* Driver has no requirement on any version, any is good. */
>> -    if (!uc_fw->major_ver_wanted)
>> +    if (!wanted->major)
>>           return 0;
>>         /*
>>        * If full version is required, both major and minor should match.
>>        * Otherwise, at least the major version.
>>        */
>> -    if (uc_fw->major_ver_wanted != uc_fw->major_ver_found ||
>> -        (uc_fw->full_ver_required &&
>> -         uc_fw->minor_ver_wanted != uc_fw->minor_ver_found)) {
>> +    if (wanted->major != found->major ||
>> +        (uc_fw->full_ver_required && wanted->minor != found->minor)) {
>>           drm_notice(&xe->drm, "%s firmware %s: unexpected version: 
>> %u.%u != %u.%u\n",
> Does Xe not have gt_notice and friends?

it does, but not all files have been converted over yet.

>
>> xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
>> -               uc_fw->major_ver_found, uc_fw->minor_ver_found,
>> -               uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
>> +               found->major, found->minor,
>> +               wanted->major, wanted->minor);
>>           goto fail;
>>       }
>>   -    if (uc_fw->minor_ver_wanted > uc_fw->minor_ver_found) {
>> +    if (wanted->minor > found->minor) {
>>           drm_notice(&xe->drm, "%s firmware (%u.%u) is recommended, 
>> but only (%u.%u) was found in %s\n",
>>                  xe_uc_fw_type_repr(uc_fw->type),
>> -               uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted,
>> -               uc_fw->major_ver_found, uc_fw->minor_ver_found,
>> +               wanted->major, wanted->minor,
>> +               found->major, found->minor,
>>                  uc_fw->path);
>>           drm_info(&xe->drm, "Consider updating your linux-firmware 
>> pkg or downloading from %s\n",
>>                XE_UC_FIRMWARE_URL);
>> @@ -325,6 +329,7 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
>>       struct xe_tile *tile = gt_to_tile(gt);
>>       struct device *dev = xe->drm.dev;
>>       const struct firmware *fw = NULL;
>> +    struct xe_uc_fw_version *release = 
>> &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
>>       struct uc_css_header *css;
>>       struct xe_bo *obj;
>>       size_t size;
>> @@ -395,15 +400,13 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
>>       }
>>         /* Get version numbers from the CSS header */
>> -    uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
>> -                       css->sw_version);
>> -    uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
>> -                       css->sw_version);
>> +    release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, 
>> css->sw_version);
>> +    release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, 
>> css->sw_version);
>> +    release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, 
>> css->sw_version);
>>         drm_info(&xe->drm, "Using %s firmware (%u.%u) from %s\n",
>>            xe_uc_fw_type_repr(uc_fw->type),
>> -         uc_fw->major_ver_found, uc_fw->minor_ver_found,
>> -         uc_fw->path);
>> +         release->major, release->minor, uc_fw->path);
>>         err = uc_fw_check_version_requirements(uc_fw);
>>       if (err)
>> @@ -523,26 +526,50 @@ int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 
>> offset, u32 dma_flags)
>>       return err;
>>   }
>>   +static const char *version_wanted_type_repr(enum 
>> xe_uc_fw_version_types type)
>> +{
>> +    switch (type) {
>> +    case XE_UC_FW_VER_RELEASE:
>> +        return "release version wanted";
>> +    case XE_UC_FW_VER_COMPATIBILITY:
>> +        return "compatibility version wanted";
>> +    default:
>> +        return "Unknown version type wanted";
>> +    }
>> +}
>> +
>> +static void print_uc_version(struct drm_printer *p, const char 
>> *prefix, struct xe_uc_fw_version *version)
>> +{
>> +    if (version->build)
>> +        drm_printf(p, "\t%s %u.%u.%u.%u\n",
>> +               prefix, version->major, version->minor,
>> +               version->patch, version->build);
>> +    else
>> +        drm_printf(p, "\t%s %u.%u.%u\n",
>> +               prefix, version->major, version->minor,
>> +               version->patch);
>> +}
>>     void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p)
>>   {
>> +    struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted;
>> +    struct xe_uc_fw_version *release = 
>> &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
>> +    struct xe_uc_fw_version *compatibility = 
>> &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY];
>> +
>>       drm_printf(p, "%s firmware: %s\n",
>>              xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
>>       drm_printf(p, "\tstatus: %s\n",
>>              xe_uc_fw_status_repr(uc_fw->status));
>> -    drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n",
>> -           uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted,
>> -           uc_fw->major_ver_found, uc_fw->minor_ver_found);
>> -    drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
>> -    drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
>> -
>> -    if (uc_fw->type == XE_UC_FW_TYPE_GUC) {
>> -        struct xe_gt *gt = uc_fw_to_gt(uc_fw);
>> -        struct xe_guc *guc = &gt->uc.guc;
>> -
>> -        drm_printf(p, "\tSubmit version: %u.%u.%u\n",
>> -               guc->submission_state.version.major,
>> -               guc->submission_state.version.minor,
>> -               guc->submission_state.version.patch);
>> -    }
>> +    print_uc_version(p, 
>> version_wanted_type_repr(uc_fw->versions.wanted_type), wanted);
>> +
>> +    if (release->major)
>> +        print_uc_version(p, "release version found", release);
>> +
>> +    if (compatibility->major)
>> +        print_uc_version(p, "compatibility version found", 
>> compatibility);
> These two prints could be done with a for loop.
>
>> +
>> +    if (uc_fw->ucode_size)
>> +        drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
>> +    if (uc_fw->rsa_size)
>> +        drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
>>   }
>> diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h 
>> b/drivers/gpu/drm/xe/xe_uc_fw.h
>> index a519c77d4962..1b5bbbb97aaf 100644
>> --- a/drivers/gpu/drm/xe/xe_uc_fw.h
>> +++ b/drivers/gpu/drm/xe/xe_uc_fw.h
>> @@ -96,6 +96,8 @@ static inline const char *xe_uc_fw_type_repr(enum 
>> xe_uc_fw_type type)
>>           return "GuC";
>>       case XE_UC_FW_TYPE_HUC:
>>           return "HuC";
>> +    case XE_UC_FW_TYPE_GSC:
>> +        return "GSC";
>>       }
>>       return "uC";
>>   }
>> diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h 
>> b/drivers/gpu/drm/xe/xe_uc_fw_types.h
>> index 837f49a2347e..69d3f2446781 100644
>> --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
>> +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
>> @@ -55,10 +55,27 @@ enum xe_uc_fw_status {
>>     enum xe_uc_fw_type {
>>       XE_UC_FW_TYPE_GUC = 0,
>> -    XE_UC_FW_TYPE_HUC
>> +    XE_UC_FW_TYPE_HUC,
>> +    XE_UC_FW_TYPE_GSC
>>   };
>>   #define XE_UC_FW_NUM_TYPES 2
>>   +/**
>> + * struct xe_uc_fw_version - Version for XE micro controller firmware
>> + */
>> +struct xe_uc_fw_version {
>> +    u16 major;
>> +    u16 minor;
>> +    u16 patch;
>> +    u16 build;
> Should these be 32bit? The submission version structure that has been 
> removed from above was all 32bit fields as is the corresponding i915 
> structure.

The fields are either 8 or 16 bits wide in the FW headers, so having 16 
here is functionally correct. We could switch it to u32 to be future 
proof, but it's not strictly required right now.

Daniele

>
> John.
>
>> +};
>> +
>> +enum xe_uc_fw_version_types {
>> +    XE_UC_FW_VER_RELEASE,
>> +    XE_UC_FW_VER_COMPATIBILITY,
>> +    XE_UC_FW_VER_TYPE_COUNT
>> +};
>> +
>>   /**
>>    * struct xe_uc_fw - XE micro controller firmware
>>    */
>> @@ -98,14 +115,15 @@ struct xe_uc_fw {
>>        * version required per platform.
>>        */
>>   -    /** @major_ver_wanted: major firmware version wanted by 
>> platform */
>> -    u16 major_ver_wanted;
>> -    /** @minor_ver_wanted: minor firmware version wanted by platform */
>> -    u16 minor_ver_wanted;
>> -    /** @major_ver_found: major version found in firmware blob */
>> -    u16 major_ver_found;
>> -    /** @minor_ver_found: major version found in firmware blob */
>> -    u16 minor_ver_found;
>> +    /** @versions: FW versions wanted and found */
>> +    struct {
>> +        /** @wanted: firmware version wanted by platform */
>> +        struct xe_uc_fw_version wanted;
>> +        /** @wanted_type: type of firmware version wanted (release 
>> vs compatibility) */
>> +        enum xe_uc_fw_version_types wanted_type;
>> +        /** @found: fw versions found in firmware blob */
>> +        struct xe_uc_fw_version found[XE_UC_FW_VER_TYPE_COUNT];
>> +    } versions;
>>         /** @rsa_size: RSA size */
>>       u32 rsa_size;
>



More information about the Intel-xe mailing list