[PATCH v6 05/10] drm/xe/xe_late_bind_fw: Load late binding firmware
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Thu Jul 3 21:38:17 UTC 2025
On 7/3/2025 12:31 PM, Badal Nilawar wrote:
> Load late binding firmware
>
> v2:
> - s/EAGAIN/EBUSY/
> - Flush worker in suspend and driver unload (Daniele)
> v3:
> - Use retry interval of 6s, in steps of 200ms, to allow
> other OS components release MEI CL handle (Sasha)
> v4:
> - return -ENODEV if component not added (Daniele)
> - parse and print status returned by csc
> v5:
> - Use payload to check firmware valid (Daniele)
> - Obtain the RPM reference before scheduling the worker to
> ensure the device remains awake until the worker completes
> firmware loading (Rodrigo)
> v6:
> - In case of error donot re-attempt fw download (Daniele)
>
> Signed-off-by: Badal Nilawar <badal.nilawar at intel.com>
> ---
> drivers/gpu/drm/xe/xe_late_bind_fw.c | 155 ++++++++++++++++++++-
> drivers/gpu/drm/xe/xe_late_bind_fw.h | 1 +
> drivers/gpu/drm/xe/xe_late_bind_fw_types.h | 7 +
> 3 files changed, 162 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c b/drivers/gpu/drm/xe/xe_late_bind_fw.c
> index ab83ab06aee7..4e8a2256802d 100644
> --- a/drivers/gpu/drm/xe/xe_late_bind_fw.c
> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c
> @@ -16,6 +16,20 @@
> #include "xe_late_bind_fw.h"
> #include "xe_pcode.h"
> #include "xe_pcode_api.h"
> +#include "xe_pm.h"
> +
> +/*
> + * The component should load quite quickly in most cases, but it could take
> + * a bit. Using a very big timeout just to cover the worst case scenario
> + */
> +#define LB_INIT_TIMEOUT_MS 20000
> +
> +/*
> + * Retry interval set to 6 seconds, in steps of 200 ms, to allow time for
> + * other OS components to release the MEI CL handle
> + */
> +#define LB_FW_LOAD_RETRY_MAXCOUNT 30
> +#define LB_FW_LOAD_RETRY_PAUSE_MS 200
>
> static const u32 fw_id_to_type[] = {
> [XE_LB_FW_FAN_CONTROL] = CSC_LATE_BINDING_TYPE_FAN_CONTROL,
> @@ -31,6 +45,30 @@ late_bind_to_xe(struct xe_late_bind *late_bind)
> return container_of(late_bind, struct xe_device, late_bind);
> }
>
> +static const char *xe_late_bind_parse_status(uint32_t status)
> +{
> + switch (status) {
> + case CSC_LATE_BINDING_STATUS_SUCCESS:
> + return "success";
> + case CSC_LATE_BINDING_STATUS_4ID_MISMATCH:
> + return "4Id Mismatch";
> + case CSC_LATE_BINDING_STATUS_ARB_FAILURE:
> + return "ARB Failure";
> + case CSC_LATE_BINDING_STATUS_GENERAL_ERROR:
> + return "General Error";
> + case CSC_LATE_BINDING_STATUS_INVALID_PARAMS:
> + return "Invalid Params";
> + case CSC_LATE_BINDING_STATUS_INVALID_SIGNATURE:
> + return "Invalid Signature";
> + case CSC_LATE_BINDING_STATUS_INVALID_PAYLOAD:
> + return "Invalid Payload";
> + case CSC_LATE_BINDING_STATUS_TIMEOUT:
> + return "Timeout";
> + default:
> + return "Unknown error";
> + }
> +}
> +
> static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind)
> {
> struct xe_device *xe = late_bind_to_xe(late_bind);
> @@ -44,6 +82,99 @@ static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind)
> return 0;
> }
>
> +static void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind)
> +{
> + struct xe_device *xe = late_bind_to_xe(late_bind);
> + struct xe_late_bind_fw *lbfw;
> + int fw_id;
> +
> + for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) {
> + lbfw = &late_bind->late_bind_fw[fw_id];
> + if (lbfw->payload && late_bind->wq) {
> + drm_dbg(&xe->drm, "Flush work: load %s firmware\n",
> + fw_id_to_name[lbfw->id]);
> + flush_work(&lbfw->work);
> + }
> + }
> +}
> +
> +static void xe_late_bind_work(struct work_struct *work)
> +{
> + struct xe_late_bind_fw *lbfw = container_of(work, struct xe_late_bind_fw, work);
> + struct xe_late_bind *late_bind = container_of(lbfw, struct xe_late_bind,
> + late_bind_fw[lbfw->id]);
> + struct xe_device *xe = late_bind_to_xe(late_bind);
> + int retry = LB_FW_LOAD_RETRY_MAXCOUNT;
> + int ret;
> + int slept;
> +
> + xe_device_assert_mem_access(xe);
> +
> + /* we can queue this before the component is bound */
> + for (slept = 0; slept < LB_INIT_TIMEOUT_MS; slept += 100) {
> + if (late_bind->component.ops)
> + break;
> + msleep(100);
> + }
> +
> + if (!late_bind->component.ops) {
> + drm_err(&xe->drm, "Late bind component not bound\n");
> + /* Do not re-attempt fw load */
> + drmm_kfree(&xe->drm, (void *)lbfw->payload);
> + lbfw->payload = NULL;
> + goto out;
> + }
> +
> + drm_dbg(&xe->drm, "Load %s firmware\n", fw_id_to_name[lbfw->id]);
> +
> + do {
> + ret = late_bind->component.ops->push_config(late_bind->component.mei_dev,
> + lbfw->type, lbfw->flags,
> + lbfw->payload, lbfw->payload_size);
> + if (!ret)
> + break;
> + msleep(LB_FW_LOAD_RETRY_PAUSE_MS);
> + } while (--retry && ret == -EBUSY);
> +
> + if (!ret) {
> + drm_dbg(&xe->drm, "Load %s firmware successful\n",
> + fw_id_to_name[lbfw->id]);
> + goto out;
> + }
> +
> + if (ret > 0)
> + drm_err(&xe->drm, "Load %s firmware failed with err %d, %s\n",
> + fw_id_to_name[lbfw->id], ret, xe_late_bind_parse_status(ret));
> + else
> + drm_err(&xe->drm, "Load %s firmware failed with err %d",
> + fw_id_to_name[lbfw->id], ret);
> + /* Do not re-attempt fw load */
> + drmm_kfree(&xe->drm, (void *)lbfw->payload);
> + lbfw->payload = NULL;
> +
> +out:
> + xe_pm_runtime_put(xe);
> +}
> +
> +int xe_late_bind_fw_load(struct xe_late_bind *late_bind)
> +{
> + struct xe_device *xe = late_bind_to_xe(late_bind);
> + struct xe_late_bind_fw *lbfw;
> + int fw_id;
> +
> + if (!late_bind->component_added)
> + return -ENODEV;
> +
> + for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) {
> + lbfw = &late_bind->late_bind_fw[fw_id];
> + if (lbfw->payload) {
> + xe_pm_runtime_get_noresume(xe);
> + queue_work(late_bind->wq, &lbfw->work);
> + }
> + }
> + return 0;
> +}
> +
> static int __xe_late_bind_fw_init(struct xe_late_bind *late_bind, u32 fw_id)
> {
> struct xe_device *xe = late_bind_to_xe(late_bind);
> @@ -97,6 +228,7 @@ static int __xe_late_bind_fw_init(struct xe_late_bind *late_bind, u32 fw_id)
>
> memcpy((void *)lb_fw->payload, fw->data, lb_fw->payload_size);
> release_firmware(fw);
> + INIT_WORK(&lb_fw->work, xe_late_bind_work);
>
> return 0;
> }
> @@ -106,11 +238,16 @@ static int xe_late_bind_fw_init(struct xe_late_bind *late_bind)
> int ret;
> int fw_id;
>
> + late_bind->wq = alloc_ordered_workqueue("late-bind-ordered-wq", 0);
> + if (!late_bind->wq)
> + return -ENOMEM;
> +
> for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) {
> ret = __xe_late_bind_fw_init(late_bind, fw_id);
> if (ret)
> return ret;
> }
> +
> return 0;
> }
>
> @@ -132,6 +269,8 @@ static void xe_late_bind_component_unbind(struct device *xe_kdev,
> struct xe_device *xe = kdev_to_xe_device(xe_kdev);
> struct xe_late_bind *late_bind = &xe->late_bind;
>
> + xe_late_bind_wait_for_worker_completion(late_bind);
> +
> late_bind->component.ops = NULL;
> }
>
> @@ -145,7 +284,15 @@ static void xe_late_bind_remove(void *arg)
> struct xe_late_bind *late_bind = arg;
> struct xe_device *xe = late_bind_to_xe(late_bind);
>
> + xe_late_bind_wait_for_worker_completion(late_bind);
> +
> + late_bind->component_added = false;
> +
> component_del(xe->drm.dev, &xe_late_bind_component_ops);
> + if (late_bind->wq) {
> + destroy_workqueue(late_bind->wq);
> + late_bind->wq = NULL;
> + }
> }
>
> /**
> @@ -173,9 +320,15 @@ int xe_late_bind_init(struct xe_late_bind *late_bind)
> return err;
> }
>
> + late_bind->component_added = true;
> +
> err = devm_add_action_or_reset(xe->drm.dev, xe_late_bind_remove, late_bind);
> if (err)
> return err;
>
> - return xe_late_bind_fw_init(late_bind);
> + err = xe_late_bind_fw_init(late_bind);
> + if (err)
> + return err;
> +
> + return xe_late_bind_fw_load(late_bind);
> }
> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.h b/drivers/gpu/drm/xe/xe_late_bind_fw.h
> index 4c73571c3e62..28d56ed2bfdc 100644
> --- a/drivers/gpu/drm/xe/xe_late_bind_fw.h
> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.h
> @@ -11,5 +11,6 @@
> struct xe_late_bind;
>
> int xe_late_bind_init(struct xe_late_bind *late_bind);
> +int xe_late_bind_fw_load(struct xe_late_bind *late_bind);
>
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
> index cd3143256a7c..f650cb8641b3 100644
> --- a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
> @@ -9,6 +9,7 @@
> #include <linux/iosys-map.h>
> #include <linux/mutex.h>
> #include <linux/types.h>
> +#include <linux/workqueue.h>
>
> #define XE_LB_MAX_PAYLOAD_SIZE SZ_4K
>
> @@ -36,6 +37,8 @@ struct xe_late_bind_fw {
> const u8 *payload;
> /** @late_bind_fw.payload_size: late binding blob payload_size */
> size_t payload_size;
> + /** @late_bind_fw.work: worker to upload latebind blob */
> + struct work_struct work;
> };
>
> /**
> @@ -60,6 +63,10 @@ struct xe_late_bind {
> struct xe_late_bind_component component;
> /** @late_bind.late_bind_fw: late binding firmware array */
> struct xe_late_bind_fw late_bind_fw[XE_LB_FW_MAX_ID];
> + /** @late_bind.wq: workqueue to submit request to download late bind blob */
> + struct workqueue_struct *wq;
> + /** @late_bind.component_added: whether the component has been added */
> + bool component_added;
The hooks run by CI spotted issues with the docs here. With those addressed:
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
Daniele
> };
>
> #endif
More information about the Intel-xe
mailing list