[PATCH v2 04/10] drm/xe/xe_late_bind_fw: Initialize late binding firmware

Nilawar, Badal badal.nilawar at intel.com
Thu Jun 12 10:35:04 UTC 2025


On 11-06-2025 05:36, Daniele Ceraolo Spurio wrote:
>
>
> On 6/6/2025 10:57 AM, Badal Nilawar wrote:
>> Search for late binding firmware binaries and populate the meta data of
>> firmware structures.
>>
>> v2:
>>   - drm_err if firmware size is more than max pay load size (Daniele)
>>   - s/request_firmware/firmware_request_nowarn/ as firmware will
>>     not be available for all possible cards (Daniele)
>>
>> Signed-off-by: Badal Nilawar <badal.nilawar at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_device.c             |  2 +
>>   drivers/gpu/drm/xe/xe_late_bind_fw.c       | 86 +++++++++++++++++++++-
>>   drivers/gpu/drm/xe/xe_late_bind_fw.h       |  1 +
>>   drivers/gpu/drm/xe/xe_late_bind_fw_types.h | 37 ++++++++++
>>   4 files changed, 124 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_device.c 
>> b/drivers/gpu/drm/xe/xe_device.c
>> index e062ddaa83bb..df4b0e038a6d 100644
>> --- a/drivers/gpu/drm/xe/xe_device.c
>> +++ b/drivers/gpu/drm/xe/xe_device.c
>> @@ -891,6 +891,8 @@ int xe_device_probe(struct xe_device *xe)
>>         xe_late_bind_init(&xe->late_bind);
>>   +    xe_late_bind_fw_init(&xe->late_bind);
>
> I still think this should be called from xe_late_bind_init(). You also 
> need to check the return code.
Sure, I will move this to xe_late_bind_init.
>
>> +
>>       err = xe_oa_init(xe);
>>       if (err)
>>           return err;
>> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c 
>> b/drivers/gpu/drm/xe/xe_late_bind_fw.c
>> index 22eb9b51b4ee..0231f3dcfc18 100644
>> --- a/drivers/gpu/drm/xe/xe_late_bind_fw.c
>> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c
>> @@ -5,6 +5,7 @@
>>     #include <linux/component.h>
>>   #include <linux/delay.h>
>> +#include <linux/firmware.h>
>>     #include <drm/drm_managed.h>
>>   #include <drm/intel/i915_component.h>
>> @@ -13,13 +14,94 @@
>>     #include "xe_device.h"
>>   #include "xe_late_bind_fw.h"
>> +#include "xe_pcode.h"
>> +#include "xe_pcode_api.h"
>>   -static struct xe_device *
>> -late_bind_to_xe(struct xe_late_bind *late_bind)
>
> nit: might be worth modifying the previous patch to have this 
> introduced in 1 line instead of modifying it here, to keep the diff 
> cleaner.
Ok,
>
>> +static const char * const fw_type_to_name[] = {
>> +        [CSC_LATE_BINDING_TYPE_FAN_CONTROL] = "fan_control",
>> +    };
>> +
>> +static struct xe_device *late_bind_to_xe(struct xe_late_bind 
>> *late_bind)
>>   {
>>       return container_of(late_bind, struct xe_device, late_bind);
>>   }
>>   +static int late_bind_fw_num_fans(struct xe_late_bind *late_bind)
>> +{
>> +    struct xe_device *xe = late_bind_to_xe(late_bind);
>> +    struct xe_tile *root_tile = xe_device_get_root_tile(xe);
>> +    u32 uval;
>> +
>> +    if (!xe_pcode_read(root_tile,
>> +               PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), 
>> &uval, NULL))
>> +        return uval;
>> +    else
>> +        return 0;
>> +}
>> +
>> +static int late_bind_fw_init(struct xe_late_bind *late_bind, u32 type)
>> +{
>> +    struct xe_device *xe = late_bind_to_xe(late_bind);
>> +    struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
>> +    struct xe_late_bind_fw *lb_fw;
>> +    const struct firmware *fw;
>> +    u32 num_fans;
>> +    int ret;
>> +
>> +    if (!late_bind->component_added)
>> +        return 0;
>> +
>> +    lb_fw = &late_bind->late_bind_fw;
>> +
>> +    lb_fw->type = type;
>
> Should we validate that "type" is sane?
You mean if type is not max supported type. Sure, I will add.
>
>> +    lb_fw->valid = false;
>> +
>> +    if (lb_fw->type == CSC_LATE_BINDING_TYPE_FAN_CONTROL) {
>> +        num_fans = late_bind_fw_num_fans(late_bind);
>> +        drm_dbg(&xe->drm, "Number of Fans: %d\n", num_fans);
>> +        if (!num_fans)
>> +            return 0;
>> +    }
>> +
>> +    lb_fw->flags = CSC_LATE_BINDING_FLAGS_IS_PERSISTENT;
>> +
>> +    snprintf(lb_fw->blob_path, sizeof(lb_fw->blob_path), 
>> "xe/%s_8086_%04x_%04x_%04x.bin",
>> +         fw_type_to_name[type], pdev->device,
>> +         pdev->subsystem_vendor, pdev->subsystem_device);
>> +
>> +    drm_dbg(&xe->drm, "Request late binding firmware %s\n", 
>> lb_fw->blob_path);
>> +    ret = firmware_request_nowarn(&fw, lb_fw->blob_path, xe->drm.dev);
>> +    if (ret) {
>> +        drm_dbg(&xe->drm, "Failed to request %s\n", lb_fw->blob_path);
>
> This log still seems like an error when it's actually an ok outcome. 
> Maybe change it to something like:
>
> drm_dbg("%s late binding fw not available for current device", 
> fw_type_to_name[type])
Ok,
>
>> +        return 0;
>> +    }
>> +
>> +    if (fw->size > MAX_PAYLOAD_SIZE) {
>> +        lb_fw->payload_size = MAX_PAYLOAD_SIZE;
>
> Why are we even setting payload_size here?
>
>> +        drm_err(&xe->drm, "Firmware %s size %zu is larger than max 
>> pay load size %u\n",
>> +            lb_fw->blob_path, fw->size, MAX_PAYLOAD_SIZE);
>> +        return 0;
>
> Maybe add a comment to explain why we don't return an error here (if 
> this was indeed on purpose).
> Also, you're not calling release_firmware() when exiting from here.

I will fix this. In fact I will return error here, as firmware is found 
but is of incorrect size.

>
>> +    }
>> +
>> +    lb_fw->payload_size = fw->size;
>> +
>> +    memcpy(lb_fw->payload, fw->data, lb_fw->payload_size);
>> +    release_firmware(fw);
>> +    lb_fw->valid = true;
>> +
>> +    return 0;
>
> This function seems to return 0 from all return calls. Is that 
> intentional? if so, just switch to void.
>> +}
>> +
>> +/**
>> + * xe_mei_late_bind_fw_init() - Initialize late bind firmware
>> + *
>> + * Return: 0 if the initialization was successful, a negative errno 
>> otherwise.
>> + */
>> +int xe_late_bind_fw_init(struct xe_late_bind *late_bind)
>> +{
>> +    return late_bind_fw_init(late_bind, 
>> CSC_LATE_BINDING_TYPE_FAN_CONTROL);
>> +}
>> +
>>   static int xe_late_bind_component_bind(struct device *xe_kdev,
>>                          struct device *mei_kdev, void *data)
>>   {
>> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.h 
>> b/drivers/gpu/drm/xe/xe_late_bind_fw.h
>> index 4c73571c3e62..a8b47523b203 100644
>> --- a/drivers/gpu/drm/xe/xe_late_bind_fw.h
>> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.h
>> @@ -11,5 +11,6 @@
>>   struct xe_late_bind;
>>     int xe_late_bind_init(struct xe_late_bind *late_bind);
>> +int xe_late_bind_fw_init(struct xe_late_bind *late_bind);
>>     #endif
>> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h 
>> b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
>> index afa1917b5f51..c427e141c685 100644
>> --- a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
>> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
>> @@ -10,6 +10,41 @@
>>   #include <linux/mutex.h>
>>   #include <linux/types.h>
>>   +#define MAX_PAYLOAD_SIZE (1024 * 4)
>> +
>> +/**
>> + * xe_late_bind_fw_type - enum to determine late binding fw type
>> + */
>> +enum xe_late_bind_type {
>> +    CSC_LATE_BINDING_TYPE_FAN_CONTROL = 1,
>> +};
>> +
>> +/**
>> + * Late Binding flags
>> + */
>> +enum csc_late_binding_flags {
>> +    /** Persistent across warm reset */
>> +    CSC_LATE_BINDING_FLAGS_IS_PERSISTENT = 0x1
>
> Should this be a define, since it is part of a bitmask of flags?
>> +};
>
> AFAIU those 2 enums are not internal to Xe and are defined as part of 
> CSC interface instead, but that's not clear here. IMO better to either 
> move them to a file in the abi folder, or at least make it extremely 
> clear that those values are CSC-defined. Moving then to 
> late_bind_mei_interface.h could also be an option since they're values 
> for that interface.
Fine I will move this to late_bind_mei_interface.h
>
>> +
>> +/**
>> + * struct xe_late_bind_fw
>> + */
>> +struct xe_late_bind_fw {
>> +    /** @late_bind_fw.valid */
>> +    bool valid;
>> +    /** @late_bind_fw.blob_path: late binding fw blob path */
>> +    char blob_path[PATH_MAX];
>> +    /** @late_bind_fw.type */
>> +    u32  type;
>> +    /** @late_bind_fw.flags */
>
> Missing descriptions for these 3 vars
I will fix this.
>
>> +    u32  flags;
>> +    /** @late_bind_fw.payload: to store the late binding blob */
>> +    u8  payload[MAX_PAYLOAD_SIZE];
>> +    /** @late_bind_fw.payload_size: late binding blob payload_size */
>
> if you only set payload_size when the fw init is successful you can 
> use it being non-zero as a valid check instead of having a dedicated 
> variable for that. not a blocker.
>
>> +    size_t payload_size;
>> +};
>> +
>>   /**
>>    * struct xe_late_bind_component - Late Binding services component
>>    * @mei_dev: device that provide Late Binding service.
>> @@ -34,6 +69,8 @@ struct xe_late_bind {
>>       bool component_added;
>>       /** @late_bind.mutex: protects the component binding and usage */
>>       struct mutex mutex;
>> +    /** @late_bind.late_bind_fw: late binding firmware */
>> +    struct xe_late_bind_fw late_bind_fw;
>
> The code seems to be designed from multiple type of late binding FW in 
> mind (hence the type), but here there is only one. Maybe switch this 
> to an array, even if it only has one entry for now. That way if we 
> need to extend it later we just increase the array size.

Yes, it is designed to support multiple types. In fact in previous 
implementation this was array only.  If ok I will move back to array 
based implementation and just Populate FAN type.

Regards,
Badal

>
> Daniele
>
>>   };
>>     #endif
>


More information about the dri-devel mailing list