[v1,2/3] drm/amd/pm: send the SMT-enable message to pmfw

Fri Mar 24 02:29:20 UTC 2023

On 3/23/2023 11:36 PM, Limonciello, Mario wrote:
> On 3/23/2023 12:41, Limonciello, Mario wrote:
>> On 3/22/2023 00:48, Wenyou Yang wrote:
>>> When the CPU SMT status change in the fly, sent the SMT-enable
>>> message to pmfw to notify it that the SMT status changed.
>>>
>>> Signed-off-by: Wenyou Yang <WenYou.Yang at amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     | 41 +++++++++++++++++++
>>>   drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  5 +++
>>>   2 files changed, 46 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
>>> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> index b5d64749990e..5cd85a9d149d 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> @@ -22,6 +22,7 @@
>>>   #define SWSMU_CODE_LAYER_L1
>>> +#include <linux/cpu.h>
>>>   #include <linux/firmware.h>
>>>   #include <linux/pci.h>
>>> @@ -69,6 +70,14 @@ static int smu_set_fan_speed_rpm(void *handle, 
>>> uint32_t speed);
>>>   static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled);
>>>   static int smu_set_mp1_state(void *handle, enum pp_mp1_state 
>>> mp1_state);
>>> +static int smt_notifier_callback(struct notifier_block *nb, unsigned 
>>> long action, void *data);
>>> +
>>> +extern struct raw_notifier_head smt_notifier_head;
>>> +
>>> +static struct notifier_block smt_notifier = {
>>> +    .notifier_call = smt_notifier_callback,
>>> +};
>>> +
>>>   static int smu_sys_get_pp_feature_mask(void *handle,
>>>                          char *buf)
>>>   {
>>> @@ -625,6 +634,8 @@ static int smu_set_funcs(struct amdgpu_device *adev)
>>>       return 0;
>>>   }
>>> +static struct smu_context *current_smu;
>>> +
>>>   static int smu_early_init(void *handle)
>>>   {
>>>       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>> @@ -645,6 +656,7 @@ static int smu_early_init(void *handle)
>>>       mutex_init(&smu->message_lock);
>>>       adev->powerplay.pp_handle = smu;
>>> +    current_smu = smu;
> 
> Although this series is intended for the Van Gogh case right now, I 
> dont't think this would scale well for multiple GPUs in a system.
> 
> I think that instead you may want to move the notifier callback to be a 
> level "higher" in amdgpu.  Perhaps amdgpu_device.c?  Then when that 
> notifier call is received you'll want to walk through the PCI device 
> space to find any GPUs that are bound with AMDGPU a series of 
> wrappers/calls that end up calling smu_set_cpu_smt_enable with the 
> approriate arguments.
> 

This is not required when the notifier is registered only within Vangogh 
ppt function. Then follow Evan's suggestion of keeping the notifier 
block inside smu. From the notifier block, it can find the smu block and 
then call cpu_smt_enable/disable. That way notifier callback comes only 
once even with multiple dGPUs + Vangogh and processed for the 
corresponding smu.

This notifier doesn't need to be registered for platforms only with 
dGPUs or APUs which don't need this.

Thanks,
Lijo

> 
>>>       adev->powerplay.pp_funcs = &swsmu_pm_funcs;
>>>       r = smu_set_funcs(adev);
>>> @@ -1105,6 +1117,8 @@ static int smu_sw_init(void *handle)
>>>       if (!smu->ppt_funcs->get_fan_control_mode)
>>>           smu->adev->pm.no_fan = true;
>>> +    raw_notifier_chain_register(&smt_notifier_head, &smt_notifier);
>>> +
>>>       return 0;
>>>   }
>>> @@ -1122,6 +1136,8 @@ static int smu_sw_fini(void *handle)
>>>       smu_fini_microcode(smu);
>>> +    raw_notifier_chain_unregister(&smt_notifier_head, &smt_notifier);
>>> +
>>>       return 0;
>>>   }
>>> @@ -3241,3 +3257,28 @@ int smu_send_hbm_bad_channel_flag(struct 
>>> smu_context *smu, uint32_t size)
>>>       return ret;
>>>   }
>>> +
>>> +static int smu_set_cpu_smt_enable(struct smu_context *smu, bool enable)
>>> +{
>>> +    int ret = -EINVAL;
>>> +
>>> +    if (smu->ppt_funcs && smu->ppt_funcs->set_cpu_smt_enable)
>>> +        ret = smu->ppt_funcs->set_cpu_smt_enable(smu, enable);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +static int smt_notifier_callback(struct notifier_block *nb,
>>> +                 unsigned long action, void *data)
>>> +{
>>> +    struct smu_context *smu = current_smu;
>>> +    int ret = NOTIFY_OK;
>>
>> This initialization is pointless, it's clobbered in the next line.
>>
>>> +
>>> +    ret = (action == SMT_ENABLED) ?
>>> +                smu_set_cpu_smt_enable(smu, true) :
>>> +                smu_set_cpu_smt_enable(smu, false);
>>
>> How about this instead, it should be more readable:
>>
>>      ret = smu_set_cpu_smt_enable(smu, action == SMT_ENABLED);
>>
>>> +    if (ret)
>>> +        ret = NOTIFY_BAD;
>>> +
>>> +    return ret;
>>
>> How about instead:
>>
>>      dev_dbg(adev->dev, "failed to %sable SMT: %d\n", action == 
>> SMT_ENABLED ? "en" : "dis", ret);
>>
>>      return ret ? NOTIFY_BAD : NOTIFY_OK;
>>
>>> +}
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
>>> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>>> index 09469c750a96..7c6594bba796 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>>> @@ -1354,6 +1354,11 @@ struct pptable_funcs {
>>>        * @init_pptable_microcode: Prepare the pptable microcode to 
>>> upload via PSP
>>>        */
>>>       int (*init_pptable_microcode)(struct smu_context *smu);
>>> +
>>> +    /**
>>> +     * @set_cpu_smt_enable: Set the CPU SMT status
>>> +     */
>>> +    int (*set_cpu_smt_enable)(struct smu_context *smu, bool enable);
>>>   };
>>>   typedef enum {
>>
>