[Intel-xe] [PATCH 1/2] drm/xe/uc: Fix uC status tracking

John Harrison john.c.harrison at intel.com
Tue Sep 12 00:33:55 UTC 2023


On 9/11/2023 17:12, John Harrison wrote:
> On 9/11/2023 17:03, Daniele Ceraolo Spurio wrote:
>> On 9/11/2023 4:54 PM, John Harrison wrote:
>>> On 8/2/2023 14:26, Daniele Ceraolo Spurio wrote:
>>>> The current uC status tracking has a few issues:
>>>>
>>>> 1) the HuC is moved to "disabled" instead of "not supported"
>>>>
>>>> 2) the status is left uninitialized instead of "disabled" when the
>>>>     modparam is used to disable support
>>>>
>>>> 3) due to #1, a number of checks are done against "disabled" 
>>>> instead of
>>>>     the appropriate status.
>>>>
>>>> Address all of those by making sure to follow the appropriate state
>>>> transition and checking against the required state.
>>>>
>>>> Signed-off-by: Daniele Ceraolo Spurio 
>>>> <daniele.ceraolospurio at intel.com>
>>>> Cc: John Harrison <John.C.Harrison at Intel.com>
>>>> Cc: Matthew Brost <matthew.brost at intel.com>
>>>> ---
>>>>   drivers/gpu/drm/xe/xe_guc.c   |  3 +++
>>>>   drivers/gpu/drm/xe/xe_huc.c   | 15 +++++++--------
>>>>   drivers/gpu/drm/xe/xe_uc.c    | 10 +++++++---
>>>>   drivers/gpu/drm/xe/xe_uc_fw.c | 14 ++++++++------
>>>>   drivers/gpu/drm/xe/xe_wopcm.c |  3 +--
>>>>   5 files changed, 26 insertions(+), 19 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
>>>> index 2493c5859948..7cef5dcf571e 100644
>>>> --- a/drivers/gpu/drm/xe/xe_guc.c
>>>> +++ b/drivers/gpu/drm/xe/xe_guc.c
>>>> @@ -228,6 +228,9 @@ int xe_guc_init(struct xe_guc *guc)
>>>>       if (ret)
>>>>           goto out;
>>>>   +    if (!xe_uc_fw_is_enabled(&guc->fw))
>>>> +        return 0;
>>>> +
>>>>       ret = xe_guc_log_init(&guc->log);
>>>>       if (ret)
>>>>           goto out;
>>>> diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
>>>> index 177cda14864e..35e2fdd07f69 100644
>>>> --- a/drivers/gpu/drm/xe/xe_huc.c
>>>> +++ b/drivers/gpu/drm/xe/xe_huc.c
>>>> @@ -42,22 +42,21 @@ int xe_huc_init(struct xe_huc *huc)
>>>>       if (ret)
>>>>           goto out;
>>>>   +    if (!xe_uc_fw_is_enabled(&huc->fw))
>>>> +        return 0;
>>>> +
>>>>       xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE);
>>>>         return 0;
>>>>     out:
>>>> -    if (xe_uc_fw_is_disabled(&huc->fw)) {
>>>> -        drm_info(&xe->drm, "HuC disabled\n");
>>>> -        return 0;
>>>> -    }
>>>>       drm_err(&xe->drm, "HuC init failed with %d", ret);
>>>>       return ret;
>>>>   }
>>>>     int xe_huc_upload(struct xe_huc *huc)
>>>>   {
>>>> -    if (xe_uc_fw_is_disabled(&huc->fw))
>>>> +    if (!xe_uc_fw_is_loadable(&huc->fw))
>>>>           return 0;
>>>>       return xe_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL);
>>>>   }
>>>> @@ -69,7 +68,7 @@ int xe_huc_auth(struct xe_huc *huc)
>>>>       struct xe_guc *guc = huc_to_guc(huc);
>>>>       int ret;
>>>>   -    if (xe_uc_fw_is_disabled(&huc->fw))
>>>> +    if (!xe_uc_fw_is_loadable(&huc->fw))
>>>>           return 0;
>>>>         XE_WARN_ON(xe_uc_fw_is_running(&huc->fw));
>>>> @@ -106,7 +105,7 @@ int xe_huc_auth(struct xe_huc *huc)
>>>>     void xe_huc_sanitize(struct xe_huc *huc)
>>>>   {
>>>> -    if (xe_uc_fw_is_disabled(&huc->fw))
>>>> +    if (!xe_uc_fw_is_loadable(&huc->fw))
>>>>           return;
>>>>       xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE);
>>>>   }
>>>> @@ -118,7 +117,7 @@ void xe_huc_print_info(struct xe_huc *huc, 
>>>> struct drm_printer *p)
>>>>         xe_uc_fw_print(&huc->fw, p);
>>>>   -    if (xe_uc_fw_is_disabled(&huc->fw))
>>>> +    if (!xe_uc_fw_is_enabled(&huc->fw))
>>>>           return;
>>>>         err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
>>>> diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
>>>> index addd6f2681b9..368e4feb57db 100644
>>>> --- a/drivers/gpu/drm/xe/xe_uc.c
>>>> +++ b/drivers/gpu/drm/xe/xe_uc.c
>>>> @@ -31,9 +31,10 @@ int xe_uc_init(struct xe_uc *uc)
>>>>   {
>>>>       int ret;
>>>>   -    /* GuC submission not enabled, nothing to do */
>>>> -    if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
>>>> -        return 0;
>>>> +    /*
>>>> +     * We call the GuC/HuC init functions even if GuC submission 
>>>> is off to
>>>> +     * correctly move our tracking of the FW state to "disabled".
>>>> +     */
>>>>         ret = xe_guc_init(&uc->guc);
>>>>       if (ret)
>>>> @@ -43,6 +44,9 @@ int xe_uc_init(struct xe_uc *uc)
>>>>       if (ret)
>>>>           goto err;
>>>>   +    if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
>>>> +        return 0;
>>>> +
>>>>       ret = xe_wopcm_init(&uc->wopcm);
>>>>       if (ret)
>>>>           goto err;
>>>> diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c 
>>>> b/drivers/gpu/drm/xe/xe_uc_fw.c
>>>> index 2e70dd4880f6..fd53ef9e5c99 100644
>>>> --- a/drivers/gpu/drm/xe/xe_uc_fw.c
>>>> +++ b/drivers/gpu/drm/xe/xe_uc_fw.c
>>>> @@ -339,17 +339,19 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
>>>>       XE_WARN_ON(uc_fw->path);
>>>>         uc_fw_auto_select(xe, uc_fw);
>>>> -    xe_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ?
>>>> +    xe_uc_fw_change_status(uc_fw, uc_fw->path ?
>>>>                      XE_UC_FIRMWARE_SELECTED :
>>>> -                   XE_UC_FIRMWARE_DISABLED :
>>>>                      XE_UC_FIRMWARE_NOT_SUPPORTED);
>>>>   -    /* Transform no huc in the list into firmware disabled */
>>>> -    if (uc_fw->type == XE_UC_FW_TYPE_HUC && 
>>>> !xe_uc_fw_is_supported(uc_fw)) {
>>>> +    if (!xe_uc_fw_is_supported(uc_fw))
>>>> +        return 0;
>>>> +
>>>> +    if (!xe_device_guc_submission_enabled(xe)) {
>>> This is generic code for all firmware types. So why specifically 
>>> check for GuC submission? Can't we still load the HuC even with GuC 
>>> submission disabled?
>>
>> This is an unfortunately named function that is not actually GuC (or 
>> even submission) specific, but it is actually used to track usage of 
>> all FWs and related features (see e.g. xe_uc_init). I agree that it 
>> could be renamed, but that's out of scope for this patch.
> I would argue that correct naming of functions is a critical 
> requirement. Otherwise it makes it impossible to understand what the 
> code is doing! Either when developing or when reviewing patches.
>
> John.

PS: Agreed that such renaming is not part of this patch. So with a 
suitable bug logged to track it (or a patch posted)...
Reviewed-by: John Harrison <John.C.Harrison at Intel.com>

>
>> Note that there is no way in Xe to load HuC without GuC submission, 
>> nor there is a plan to add it AFAIK, and even this existing switch is 
>> only there to turn off the FWs for debug.
>>
>> Daniele
>>
>>>
>>> John.
>>>
>>>> xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED);
>>>> -        err = -ENOPKG;
>>>> -        return err;
>>>> +        drm_dbg(&xe->drm, "%s disabled", 
>>>> xe_uc_fw_type_repr(uc_fw->type));
>>>> +        return 0;
>>>>       }
>>>> +
>>>>       err = request_firmware(&fw, uc_fw->path, dev);
>>>>       if (err)
>>>>           goto fail;
>>>> diff --git a/drivers/gpu/drm/xe/xe_wopcm.c 
>>>> b/drivers/gpu/drm/xe/xe_wopcm.c
>>>> index 9a85bcc18830..bf85d4fa56cc 100644
>>>> --- a/drivers/gpu/drm/xe/xe_wopcm.c
>>>> +++ b/drivers/gpu/drm/xe/xe_wopcm.c
>>>> @@ -139,8 +139,7 @@ static int __wopcm_init_regs(struct xe_device 
>>>> *xe, struct xe_gt *gt,
>>>>   {
>>>>       u32 base = wopcm->guc.base;
>>>>       u32 size = wopcm->guc.size;
>>>> -    u32 huc_agent = xe_uc_fw_is_disabled(&gt->uc.huc.fw) ? 0 :
>>>> -        HUC_LOADING_AGENT_GUC;
>>>> +    u32 huc_agent = xe_uc_fw_is_available(&gt->uc.huc.fw) ? 
>>>> HUC_LOADING_AGENT_GUC : 0;
>>>>       u32 mask;
>>>>       int err;
>>>
>>
>



More information about the Intel-xe mailing list