[PATCH v2 05/10] drm/xe/xe_late_bind_fw: Load late binding firmware

Nilawar, Badal badal.nilawar at intel.com
Thu Jun 12 13:31:35 UTC 2025


On 11-06-2025 05:47, Daniele Ceraolo Spurio wrote:
>
>
> On 6/6/2025 10:57 AM, Badal Nilawar wrote:
>> Load late binding firmware
>>
>> v2:
>>   - s/EAGAIN/EBUSY/
>>   - Flush worker in suspend and driver unload (Daniele)
>>
>> Signed-off-by: Badal Nilawar <badal.nilawar at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_late_bind_fw.c       | 121 ++++++++++++++++++++-
>>   drivers/gpu/drm/xe/xe_late_bind_fw.h       |   1 +
>>   drivers/gpu/drm/xe/xe_late_bind_fw_types.h |   5 +
>>   3 files changed, 126 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c 
>> b/drivers/gpu/drm/xe/xe_late_bind_fw.c
>> index 0231f3dcfc18..7fe304c54374 100644
>> --- a/drivers/gpu/drm/xe/xe_late_bind_fw.c
>> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c
>> @@ -16,6 +16,16 @@
>>   #include "xe_late_bind_fw.h"
>>   #include "xe_pcode.h"
>>   #include "xe_pcode_api.h"
>> +#include "xe_pm.h"
>> +
>> +/*
>> + * The component should load quite quickly in most cases, but it 
>> could take
>> + * a bit. Using a very big timeout just to cover the worst case 
>> scenario
>> + */
>> +#define LB_INIT_TIMEOUT_MS 20000
>> +
>> +#define LB_FW_LOAD_RETRY_MAXCOUNT 40
>> +#define LB_FW_LOAD_RETRY_PAUSE_MS 50
>
> Are those retry values spec'd anywhere? For GSC we use those because 
> the GSC specs say to retry in 50ms intervals for up to 2 secs to give 
> time for the GSC to do proxy handling. Does it make sense to do the 
> same in this case, given that there is no proxy involved?
>
>>     static const char * const fw_type_to_name[] = {
>>           [CSC_LATE_BINDING_TYPE_FAN_CONTROL] = "fan_control",
>> @@ -39,6 +49,95 @@ static int late_bind_fw_num_fans(struct 
>> xe_late_bind *late_bind)
>>           return 0;
>>   }
>>   +static void xe_late_bind_wait_for_worker_completion(struct 
>> xe_late_bind *late_bind)
>> +{
>> +    struct xe_device *xe = late_bind_to_xe(late_bind);
>> +    struct xe_late_bind_fw *lbfw;
>> +
>> +    lbfw = &late_bind->late_bind_fw;
>> +    if (lbfw->valid && late_bind->wq) {
>> +        drm_dbg(&xe->drm, "Flush work: load %s firmware\n",
>> +            fw_type_to_name[lbfw->type]);
>> +            flush_work(&lbfw->work);
>> +    }
>> +}
>> +
>> +static void late_bind_work(struct work_struct *work)
>> +{
>> +    struct xe_late_bind_fw *lbfw = container_of(work, struct 
>> xe_late_bind_fw, work);
>> +    struct xe_late_bind *late_bind = container_of(lbfw, struct 
>> xe_late_bind,
>> +                              late_bind_fw);
>> +    struct xe_device *xe = late_bind_to_xe(late_bind);
>> +    int retry = LB_FW_LOAD_RETRY_MAXCOUNT;
>> +    int ret;
>> +    int slept;
>> +
>> +    if (!late_bind->component_added)
>> +        return;
>> +
>> +    if (!lbfw->valid)
>> +        return;
>
> The first check is redundant because lbfw->valid can't be true if 
> late_bind->component_added is false with the current code.
I will remove this change, while scheduling work already lbfw->valid is 
being checked. Shall I even remove check for late_bind->component_added) 
as this is also being checked while before scheduling work.
>
>> +
>> +    /* we can queue this before the component is bound */
>> +    for (slept = 0; slept < LB_INIT_TIMEOUT_MS; slept += 100) {
>> +        if (late_bind->component.ops)
>> +            break;
>> +        msleep(100);
>> +    }
>> +
>> +    xe_pm_runtime_get(xe);
>> +    mutex_lock(&late_bind->mutex);
>> +
>> +    if (!late_bind->component.ops) {
>> +        drm_err(&xe->drm, "Late bind component not bound\n");
>> +        goto out;
>> +    }
>> +
>> +    drm_dbg(&xe->drm, "Load %s firmware\n", 
>> fw_type_to_name[lbfw->type]);
>> +
>> +    do {
>> +        ret = 
>> late_bind->component.ops->push_config(late_bind->component.mei_dev,
>> +                                lbfw->type, lbfw->flags,
>> +                                lbfw->payload, lbfw->payload_size);
>> +        if (!ret)
>> +            break;
>> +        msleep(LB_FW_LOAD_RETRY_PAUSE_MS);
>> +    } while (--retry && ret == -EBUSY);
>> +
>> +    if (ret)
>> +        drm_err(&xe->drm, "Load %s firmware failed with err %d\n",
>> +            fw_type_to_name[lbfw->type], ret);
>> +    else
>> +        drm_dbg(&xe->drm, "Load %s firmware successful\n",
>> +            fw_type_to_name[lbfw->type]);
>> +out:
>> +    mutex_unlock(&late_bind->mutex);
>> +    xe_pm_runtime_put(xe);
>> +}
>> +
>> +int xe_late_bind_fw_load(struct xe_late_bind *late_bind)
>> +{
>> +    struct xe_device *xe = late_bind_to_xe(late_bind);
>> +    struct xe_late_bind_fw *lbfw;
>> +
>> +    if (!late_bind->component_added)
>> +        return -EINVAL;
>> +
>> +    lbfw = &late_bind->late_bind_fw;
>> +    if (lbfw->valid) {
>> +        drm_dbg(&xe->drm, "Queue work: to load %s firmware\n",
>> +            fw_type_to_name[lbfw->type]);
>
> This log seems a bit too specific, also given that you also have logs 
> inside the work

Will remove this log.

Thanks,
Badal

>
> Daniele
>
>> +            queue_work(late_bind->wq, &lbfw->work);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +/**
>> + * late_bind_fw_init() - initialize late bind firmware
>> + *
>> + * Return: 0 if the initialization was successful, a negative errno 
>> otherwise.
>> + */
>>   static int late_bind_fw_init(struct xe_late_bind *late_bind, u32 type)
>>   {
>>       struct xe_device *xe = late_bind_to_xe(late_bind);
>> @@ -87,6 +186,7 @@ static int late_bind_fw_init(struct xe_late_bind 
>> *late_bind, u32 type)
>>         memcpy(lb_fw->payload, fw->data, lb_fw->payload_size);
>>       release_firmware(fw);
>> +    INIT_WORK(&lb_fw->work, late_bind_work);
>>       lb_fw->valid = true;
>>         return 0;
>> @@ -99,7 +199,17 @@ static int late_bind_fw_init(struct xe_late_bind 
>> *late_bind, u32 type)
>>    */
>>   int xe_late_bind_fw_init(struct xe_late_bind *late_bind)
>>   {
>> -    return late_bind_fw_init(late_bind, 
>> CSC_LATE_BINDING_TYPE_FAN_CONTROL);
>> +    int ret;
>> +
>> +    late_bind->wq = alloc_ordered_workqueue("late-bind-ordered-wq", 0);
>> +    if (!late_bind->wq)
>> +        return -ENOMEM;
>> +
>> +    ret = late_bind_fw_init(late_bind, 
>> CSC_LATE_BINDING_TYPE_FAN_CONTROL);
>> +    if (ret)
>> +        return ret;
>> +
>> +    return xe_late_bind_fw_load(late_bind);
>>   }
>>     static int xe_late_bind_component_bind(struct device *xe_kdev,
>> @@ -122,6 +232,8 @@ static void xe_late_bind_component_unbind(struct 
>> device *xe_kdev,
>>       struct xe_device *xe = kdev_to_xe_device(xe_kdev);
>>       struct xe_late_bind *late_bind = &xe->late_bind;
>>   +    xe_late_bind_wait_for_worker_completion(late_bind);
>> +
>>       mutex_lock(&late_bind->mutex);
>>       late_bind->component.ops = NULL;
>>       mutex_unlock(&late_bind->mutex);
>> @@ -140,9 +252,16 @@ static void xe_late_bind_remove(void *arg)
>>       if (!late_bind->component_added)
>>           return;
>>   +    xe_late_bind_wait_for_worker_completion(late_bind);
>> +
>>       component_del(xe->drm.dev, &xe_late_bind_component_ops);
>>       late_bind->component_added = false;
>> +    if (late_bind->wq) {
>> +        destroy_workqueue(late_bind->wq);
>> +        late_bind->wq = NULL;
>> +    }
>>       mutex_destroy(&late_bind->mutex);
>> +
>>   }
>>     /**
>> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.h 
>> b/drivers/gpu/drm/xe/xe_late_bind_fw.h
>> index a8b47523b203..166957e84c2f 100644
>> --- a/drivers/gpu/drm/xe/xe_late_bind_fw.h
>> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.h
>> @@ -12,5 +12,6 @@ struct xe_late_bind;
>>     int xe_late_bind_init(struct xe_late_bind *late_bind);
>>   int xe_late_bind_fw_init(struct xe_late_bind *late_bind);
>> +int xe_late_bind_fw_load(struct xe_late_bind *late_bind);
>>     #endif
>> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h 
>> b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
>> index c427e141c685..690680e8ff22 100644
>> --- a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
>> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
>> @@ -9,6 +9,7 @@
>>   #include <linux/iosys-map.h>
>>   #include <linux/mutex.h>
>>   #include <linux/types.h>
>> +#include <linux/workqueue.h>
>>     #define MAX_PAYLOAD_SIZE (1024 * 4)
>>   @@ -43,6 +44,8 @@ struct xe_late_bind_fw {
>>       u8  payload[MAX_PAYLOAD_SIZE];
>>       /** @late_bind_fw.payload_size: late binding blob payload_size */
>>       size_t payload_size;
>> +    /** @late_bind_fw.work: worker to upload latebind blob */
>> +    struct work_struct work;
>>   };
>>     /**
>> @@ -71,6 +74,8 @@ struct xe_late_bind {
>>       struct mutex mutex;
>>       /** @late_bind.late_bind_fw: late binding firmware */
>>       struct xe_late_bind_fw late_bind_fw;
>> +    /** @late_bind.wq: workqueue to submit request to download late 
>> bind blob */
>> +    struct workqueue_struct *wq;
>>   };
>>     #endif
>


More information about the dri-devel mailing list