[PATCH v2 04/10] drm/xe/xe_late_bind_fw: Initialize late binding firmware

Daniele Ceraolo Spurio daniele.ceraolospurio at intel.com
Wed Jun 11 00:06:39 UTC 2025



On 6/6/2025 10:57 AM, Badal Nilawar wrote:
> Search for late binding firmware binaries and populate the meta data of
> firmware structures.
>
> v2:
>   - drm_err if firmware size is more than max pay load size (Daniele)
>   - s/request_firmware/firmware_request_nowarn/ as firmware will
>     not be available for all possible cards (Daniele)
>
> Signed-off-by: Badal Nilawar <badal.nilawar at intel.com>
> ---
>   drivers/gpu/drm/xe/xe_device.c             |  2 +
>   drivers/gpu/drm/xe/xe_late_bind_fw.c       | 86 +++++++++++++++++++++-
>   drivers/gpu/drm/xe/xe_late_bind_fw.h       |  1 +
>   drivers/gpu/drm/xe/xe_late_bind_fw_types.h | 37 ++++++++++
>   4 files changed, 124 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index e062ddaa83bb..df4b0e038a6d 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -891,6 +891,8 @@ int xe_device_probe(struct xe_device *xe)
>   
>   	xe_late_bind_init(&xe->late_bind);
>   
> +	xe_late_bind_fw_init(&xe->late_bind);

I still think this should be called from xe_late_bind_init(). You also 
need to check the return code.

> +
>   	err = xe_oa_init(xe);
>   	if (err)
>   		return err;
> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c b/drivers/gpu/drm/xe/xe_late_bind_fw.c
> index 22eb9b51b4ee..0231f3dcfc18 100644
> --- a/drivers/gpu/drm/xe/xe_late_bind_fw.c
> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c
> @@ -5,6 +5,7 @@
>   
>   #include <linux/component.h>
>   #include <linux/delay.h>
> +#include <linux/firmware.h>
>   
>   #include <drm/drm_managed.h>
>   #include <drm/intel/i915_component.h>
> @@ -13,13 +14,94 @@
>   
>   #include "xe_device.h"
>   #include "xe_late_bind_fw.h"
> +#include "xe_pcode.h"
> +#include "xe_pcode_api.h"
>   
> -static struct xe_device *
> -late_bind_to_xe(struct xe_late_bind *late_bind)

nit: might be worth modifying the previous patch to have this introduced 
in 1 line instead of modifying it here, to keep the diff cleaner.

> +static const char * const fw_type_to_name[] = {
> +		[CSC_LATE_BINDING_TYPE_FAN_CONTROL] = "fan_control",
> +	};
> +
> +static struct xe_device *late_bind_to_xe(struct xe_late_bind *late_bind)
>   {
>   	return container_of(late_bind, struct xe_device, late_bind);
>   }
>   
> +static int late_bind_fw_num_fans(struct xe_late_bind *late_bind)
> +{
> +	struct xe_device *xe = late_bind_to_xe(late_bind);
> +	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
> +	u32 uval;
> +
> +	if (!xe_pcode_read(root_tile,
> +			   PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), &uval, NULL))
> +		return uval;
> +	else
> +		return 0;
> +}
> +
> +static int late_bind_fw_init(struct xe_late_bind *late_bind, u32 type)
> +{
> +	struct xe_device *xe = late_bind_to_xe(late_bind);
> +	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
> +	struct xe_late_bind_fw *lb_fw;
> +	const struct firmware *fw;
> +	u32 num_fans;
> +	int ret;
> +
> +	if (!late_bind->component_added)
> +		return 0;
> +
> +	lb_fw = &late_bind->late_bind_fw;
> +
> +	lb_fw->type = type;

Should we validate that "type" is sane?

> +	lb_fw->valid = false;
> +
> +	if (lb_fw->type == CSC_LATE_BINDING_TYPE_FAN_CONTROL) {
> +		num_fans = late_bind_fw_num_fans(late_bind);
> +		drm_dbg(&xe->drm, "Number of Fans: %d\n", num_fans);
> +		if (!num_fans)
> +			return 0;
> +	}
> +
> +	lb_fw->flags = CSC_LATE_BINDING_FLAGS_IS_PERSISTENT;
> +
> +	snprintf(lb_fw->blob_path, sizeof(lb_fw->blob_path), "xe/%s_8086_%04x_%04x_%04x.bin",
> +		 fw_type_to_name[type], pdev->device,
> +		 pdev->subsystem_vendor, pdev->subsystem_device);
> +
> +	drm_dbg(&xe->drm, "Request late binding firmware %s\n", lb_fw->blob_path);
> +	ret = firmware_request_nowarn(&fw, lb_fw->blob_path, xe->drm.dev);
> +	if (ret) {
> +		drm_dbg(&xe->drm, "Failed to request %s\n", lb_fw->blob_path);

This log still seems like an error when it's actually an ok outcome. 
Maybe change it to something like:

drm_dbg("%s late binding fw not available for current device", 
fw_type_to_name[type])

> +		return 0;
> +	}
> +
> +	if (fw->size > MAX_PAYLOAD_SIZE) {
> +		lb_fw->payload_size = MAX_PAYLOAD_SIZE;

Why are we even setting payload_size here?

> +		drm_err(&xe->drm, "Firmware %s size %zu is larger than max pay load size %u\n",
> +			lb_fw->blob_path, fw->size, MAX_PAYLOAD_SIZE);
> +		return 0;

Maybe add a comment to explain why we don't return an error here (if 
this was indeed on purpose).
Also, you're not calling release_firmware() when exiting from here.

> +	}
> +
> +	lb_fw->payload_size = fw->size;
> +
> +	memcpy(lb_fw->payload, fw->data, lb_fw->payload_size);
> +	release_firmware(fw);
> +	lb_fw->valid = true;
> +
> +	return 0;

This function seems to return 0 from all return calls. Is that 
intentional? if so, just switch to void.

> +}
> +
> +/**
> + * xe_mei_late_bind_fw_init() - Initialize late bind firmware
> + *
> + * Return: 0 if the initialization was successful, a negative errno otherwise.
> + */
> +int xe_late_bind_fw_init(struct xe_late_bind *late_bind)
> +{
> +	return late_bind_fw_init(late_bind, CSC_LATE_BINDING_TYPE_FAN_CONTROL);
> +}
> +
>   static int xe_late_bind_component_bind(struct device *xe_kdev,
>   				       struct device *mei_kdev, void *data)
>   {
> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.h b/drivers/gpu/drm/xe/xe_late_bind_fw.h
> index 4c73571c3e62..a8b47523b203 100644
> --- a/drivers/gpu/drm/xe/xe_late_bind_fw.h
> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.h
> @@ -11,5 +11,6 @@
>   struct xe_late_bind;
>   
>   int xe_late_bind_init(struct xe_late_bind *late_bind);
> +int xe_late_bind_fw_init(struct xe_late_bind *late_bind);
>   
>   #endif
> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
> index afa1917b5f51..c427e141c685 100644
> --- a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
> @@ -10,6 +10,41 @@
>   #include <linux/mutex.h>
>   #include <linux/types.h>
>   
> +#define MAX_PAYLOAD_SIZE (1024 * 4)
> +
> +/**
> + * xe_late_bind_fw_type - enum to determine late binding fw type
> + */
> +enum xe_late_bind_type {
> +	CSC_LATE_BINDING_TYPE_FAN_CONTROL = 1,
> +};
> +
> +/**
> + * Late Binding flags
> + */
> +enum csc_late_binding_flags {
> +	/** Persistent across warm reset */
> +	CSC_LATE_BINDING_FLAGS_IS_PERSISTENT = 0x1

Should this be a define, since it is part of a bitmask of flags?

> +};

AFAIU those 2 enums are not internal to Xe and are defined as part of 
CSC interface instead, but that's not clear here. IMO better to either 
move them to a file in the abi folder, or at least make it extremely 
clear that those values are CSC-defined. Moving then to 
late_bind_mei_interface.h could also be an option since they're values 
for that interface.

> +
> +/**
> + * struct xe_late_bind_fw
> + */
> +struct xe_late_bind_fw {
> +	/** @late_bind_fw.valid */
> +	bool valid;
> +	/** @late_bind_fw.blob_path: late binding fw blob path */
> +	char blob_path[PATH_MAX];
> +	/** @late_bind_fw.type */
> +	u32  type;
> +	/** @late_bind_fw.flags */

Missing descriptions for these 3 vars

> +	u32  flags;
> +	/** @late_bind_fw.payload: to store the late binding blob */
> +	u8  payload[MAX_PAYLOAD_SIZE];
> +	/** @late_bind_fw.payload_size: late binding blob payload_size */

if you only set payload_size when the fw init is successful you can use 
it being non-zero as a valid check instead of having a dedicated 
variable for that. not a blocker.

> +	size_t payload_size;
> +};
> +
>   /**
>    * struct xe_late_bind_component - Late Binding services component
>    * @mei_dev: device that provide Late Binding service.
> @@ -34,6 +69,8 @@ struct xe_late_bind {
>   	bool component_added;
>   	/** @late_bind.mutex: protects the component binding and usage */
>   	struct mutex mutex;
> +	/** @late_bind.late_bind_fw: late binding firmware */
> +	struct xe_late_bind_fw late_bind_fw;

The code seems to be designed from multiple type of late binding FW in 
mind (hence the type), but here there is only one. Maybe switch this to 
an array, even if it only has one entry for now. That way if we need to 
extend it later we just increase the array size.

Daniele

>   };
>   
>   #endif



More information about the dri-devel mailing list