[PATCH v3 2/5] drm/amdgpu: add new functions to set GPU power profile

Tue Sep 27 13:47:35 UTC 2022

On 9/27/2022 3:29 PM, Lazar, Lijo wrote:
> 
> 
> On 9/27/2022 6:23 PM, Sharma, Shashank wrote:
>>
>>
>> On 9/27/2022 2:39 PM, Lazar, Lijo wrote:
>>>
>>>
>>> On 9/27/2022 5:53 PM, Sharma, Shashank wrote:
>>>>
>>>>
>>>> On 9/27/2022 2:10 PM, Lazar, Lijo wrote:
>>>>>
>>>>>
>>>>> On 9/27/2022 5:11 PM, Sharma, Shashank wrote:
>>>>>>
>>>>>>
>>>>>> On 9/27/2022 11:58 AM, Lazar, Lijo wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 9/27/2022 3:10 AM, Shashank Sharma wrote:
>>>>>>>> This patch adds new functions which will allow a user to
>>>>>>>> change the GPU power profile based a GPU workload hint
>>>>>>>> flag.
>>>>>>>>
>>>>>>>> Cc: Alex Deucher <alexander.deucher at amd.com>
>>>>>>>> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
>>>>>>>> ---
>>>>>>>>   drivers/gpu/drm/amd/amdgpu/Makefile           |  2 +-
>>>>>>>>   .../gpu/drm/amd/amdgpu/amdgpu_ctx_workload.c  | 97 
>>>>>>>> +++++++++++++++++++
>>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  1 +
>>>>>>>>   .../gpu/drm/amd/include/amdgpu_ctx_workload.h | 54 +++++++++++
>>>>>>>>   drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h       |  5 +
>>>>>>>>   5 files changed, 158 insertions(+), 1 deletion(-)
>>>>>>>>   create mode 100644 
>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx_workload.c
>>>>>>>>   create mode 100644 
>>>>>>>> drivers/gpu/drm/amd/include/amdgpu_ctx_workload.h
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>>>> index 5a283d12f8e1..34679c657ecc 100644
>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>>>> @@ -50,7 +50,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>>>>>>>       atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
>>>>>>>>       atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
>>>>>>>>       amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o 
>>>>>>>> amdgpu_pll.o \
>>>>>>>> -    amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
>>>>>>>> +    amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o 
>>>>>>>> amdgpu_ctx_workload.o amdgpu_sync.o \
>>>>>>>>       amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o 
>>>>>>>> amdgpu_virt.o \
>>>>>>>>       amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
>>>>>>>>       amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o \
>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx_workload.c 
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx_workload.c
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..a11cf29bc388
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx_workload.c
>>>>>>>> @@ -0,0 +1,97 @@
>>>>>>>> +/*
>>>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>>>> + *
>>>>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>>>>> obtaining a
>>>>>>>> + * copy of this software and associated documentation files 
>>>>>>>> (the "Software"),
>>>>>>>> + * to deal in the Software without restriction, including 
>>>>>>>> without limitation
>>>>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>>>>> sublicense,
>>>>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>>>>> whom the
>>>>>>>> + * Software is furnished to do so, subject to the following 
>>>>>>>> conditions:
>>>>>>>> + *
>>>>>>>> + * The above copyright notice and this permission notice shall 
>>>>>>>> be included in
>>>>>>>> + * all copies or substantial portions of the Software.
>>>>>>>> + *
>>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>>>>> KIND, EXPRESS OR
>>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>>>>> MERCHANTABILITY,
>>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
>>>>>>>> EVENT SHALL
>>>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY 
>>>>>>>> CLAIM, DAMAGES OR
>>>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>>>>> OTHERWISE,
>>>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 
>>>>>>>> THE USE OR
>>>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>>>> + *
>>>>>>>> + */
>>>>>>>> +#include <drm/drm.h>
>>>>>>>> +#include "kgd_pp_interface.h"
>>>>>>>> +#include "amdgpu_ctx_workload.h"
>>>>>>>> +
>>>>>>>> +static enum PP_SMC_POWER_PROFILE
>>>>>>>> +amdgpu_workload_to_power_profile(uint32_t hint)
>>>>>>>> +{
>>>>>>>> +    switch (hint) {
>>>>>>>> +    case AMDGPU_CTX_WORKLOAD_HINT_NONE:
>>>>>>>> +    default:
>>>>>>>> +        return PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
>>>>>>>> +
>>>>>>>> +    case AMDGPU_CTX_WORKLOAD_HINT_3D:
>>>>>>>> +        return PP_SMC_POWER_PROFILE_FULLSCREEN3D;
>>>>>>>> +    case AMDGPU_CTX_WORKLOAD_HINT_VIDEO:
>>>>>>>> +        return PP_SMC_POWER_PROFILE_VIDEO;
>>>>>>>> +    case AMDGPU_CTX_WORKLOAD_HINT_VR:
>>>>>>>> +        return PP_SMC_POWER_PROFILE_VR;
>>>>>>>> +    case AMDGPU_CTX_WORKLOAD_HINT_COMPUTE:
>>>>>>>> +        return PP_SMC_POWER_PROFILE_COMPUTE;
>>>>>>>> +    }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +int amdgpu_set_workload_profile(struct amdgpu_device *adev,
>>>>>>>> +                uint32_t hint)
>>>>>>>> +{
>>>>>>>> +    int ret = 0;
>>>>>>>> +    enum PP_SMC_POWER_PROFILE profile =
>>>>>>>> +            amdgpu_workload_to_power_profile(hint);
>>>>>>>> +
>>>>>>>> +    if (adev->pm.workload_mode == hint)
>>>>>>>> +        return 0;
>>>>>>>> +
>>>>>>>> +    mutex_lock(&adev->pm.smu_workload_lock);
>>>>>>>
>>>>>>> If it's all about pm subsystem variable accesses, this API should 
>>>>>>> rather be inside amd/pm subsystem. No need to expose the variable 
>>>>>>> outside pm subsytem. Also currently all amdgpu_dpm* calls are 
>>>>>>> protected under one mutex. Then this extra lock won't be needed.
>>>>>>>
>>>>>>
>>>>>> This is tricky, this is not all about PM subsystem. Note that the 
>>>>>> job management and scheduling is handled into amdgpu_ctx, so the 
>>>>>> workload hint is set in context_management API. The API is 
>>>>>> consumed when the job is actually run from amdgpu_run() layer. So 
>>>>>> its a joint interface between context and PM.
>>>>>>
>>>>>
>>>>> If you take out amdgpu_workload_to_power_profile() line, everything 
>>>>> else looks to touch only pm variables/functions. 
>>>>
>>>> That's not a line, that function converts a AMGPU_CTX hint to PPM 
>>>> profile. And going by that logic, this whole code was kept in the 
>>>> amdgpu_ctx.c file as well, coz this code is consuming the PM API. So 
>>>> to avoid these conflicts and having a new file is a better idea.
>>>>
>>>> You could still keep a
>>>>> wrapper though. Also dpm_* functions are protected, so the extra 
>>>>> mutex can be avoided as well.
>>>>>
>>>> The lock also protects pm.workload_mode writes.
>>>>
>>>>>>>> +
>>>>>>>> +    if (adev->pm.workload_mode == hint)
>>>>>>>> +        goto unlock;
>>>>>>>> +
>>>>>>>> +    ret = amdgpu_dpm_switch_power_profile(adev, profile, 1);
>>>>>>>> +    if (!ret)
>>>>>>>> +        adev->pm.workload_mode = hint;
>>>>>>>> +    atomic_inc(&adev->pm.workload_switch_ref);
>>>>>>>
>>>>>>> Why is this reference kept? The swtiching happens inside a lock 
>>>>>>> and there is already a check not to switch if the hint matches 
>>>>>>> with current workload.
>>>>>>>
>>>>>>
>>>>>> This reference is kept so that we would not reset the PM mode to 
>>>>>> DEFAULT when some other context has switched the PP mode. If you 
>>>>>> see the 4th patch, the PM mode will be changed when the job in 
>>>>>> that context is run, and a pm_reset function will be scheduled 
>>>>>> when the job is done. But in between if another job from another 
>>>>>> context has changed the PM mode, the refrence count will prevent 
>>>>>> us from resetting the PM mode.
>>>>>>
>>>>>
>>>>> This helps only if multiple jobs request the same mode. If they 
>>>>> request different modes, then this is not helping much.
>>>>
>>>> No that's certainly not the case. It's a counter, whose aim is to 
>>>> allow a PP reset only when the counter is 0. Do note that the 
>>>> reset() happens only in the job_free_cb(), which gets schedule 
>>>> later. If this counter is not zero, which means another work has 
>>>> changed the profile in between, and we should not reset it.
>>>>
>>>>>
>>>>> It could be useful to profile some apps assuming it has exclusive 
>>>>> access.
>>>>>
>>>>> However, in general, the API is not reliable from a user point as 
>>>>> the mode requested can be overridden by some other job. Then a 
>>>>> better thing to do is to document that and avoid the extra stuff 
>>>>> around it.
>>>>>
>>>> As I mentioned before, like any PM feature, the benefits can be seen 
>>>> only while running consistant workloads for long time. I an still 
>>>> add a doc note in the UAPI page.
>>>>
>>>
>>>
>>> a) What is the goal of the API? Is it guaranteeing the job to run 
>>> under a workprofile mode or something else?
>>
>> No, it does not guarentee anything. If you see the cover letter, it 
>> just provides an interface to an app to submit workload under a power 
>> profile which can be more suitable for its workload type. As I 
>> mentioned, it could be very useful for many scenarios like fullscreen 
>> 3D / fullscreen MM scenarios. It could also allow a system-gfx-manager 
>> to shift load balance towards one type of workload. There are many 
>> applications, once the UAPI is in place.
>>
>>>
>>> b) If it's to guarantee work profile mode, does it really guarantee 
>>> that - the answer is NO when some other job is running. It may or may 
>>> not work is the answer.
>>>
>>> c) What is the difference between one job resetting the profile mode 
>>> to NONE vs another job change the mode to say VIDEO when the original 
>>> request is for COMPUTE? While that is the case, what is the use of 
>>> any sort of 'pseudo-protection' other than running some code to do 
>>> extra lock/unlock stuff.
>>>
>>
>> Your understanding of protection is wrong here. There is intentionally 
>> no protection for a job changing another job's set workload profile, 
>> coz in that was we will end up seriazling/bottlenecking workload 
>> submission until PM profile is ready to be changed, which takes away 
>> benefit of having multiple queues of parallel submission.
>>
>> The protection provided by the ref counter is to avoid the clearing of 
>> the profile (to NONE), while another workload is in execution. The 
>> difference between NONE and VIDEO is still that NONE is the default 
>> profile without any fine tuning, and VIDEO is still fine tuned for 
>> VIDEO type of workloads.
>>
> 
> Protection 1 is - mutex_lock(&adev->pm.smu_workload_lock);
> 
> The line that follows is amdgpu_dpm_switch_power_profile() - this one 
> will allow only single client use- two jobs won't be able to switch at 
> the same time. All *dpm* APIs are protected like that.
> 

this also protects the pm.workload_mode variable which is being set 
after the amdgpu_dpm_switch_power_profile call is successful here:
adev->pm.workload_mode = hint;

> Protection 2 is - ref counter.
> 
> It helps only in this kind of scenario when two jobs requested the same 
> mode successively -
>      Job 1 requested compute
>      Job 2 requested compute
>      Job 1 ends (doesnt't reset)
> 
> Scenario - 2
>      Job 1 requested compute
>      Job 2 requested compute
>      Job 3 requested 3D
>      Job 1 ends (doesnt't reset, it continues in 3D)
> 
> In this mixed scenario case, I would say NONE is much more optimized as 
> it's under FW control. Actually, it does much more fine tuning because 
> of its background data collection.
> 

It helps in mixed scenarios as well, consider this scenario:
Job 1 requests: 3D
Job 2 requests: Media
Job 1 finishes, but job 2 is ongoing
Job 1 calls reset(), but checks the counter is non-zero and doesn't reset

So the media workload continues in Media mode, not None.

- Shashank

>> In the end, *again* the actual benefit comes when consistant workload 
>> is submitted for a long time, like fullscreen 3D game playback, 
>> fullscreen Video movie playback, and so on.
>>
> 
> "only under consistent", doesn't justify any software protection logic. 
> Again, if the workload is consistent most likely PMFW could be managing 
> it better.
> 
> Thanks,
> Lijo
> 
>> - Shashank
>>
>>> Thanks,
>>> Lijo
>>>
>>>> - Shashank
>>>>
>>>>> Thanks,
>>>>> Lijo
>>>>>
>>>>>> - Shashank
>>>>>>
>>>>>>> Thanks,
>>>>>>> Lijo
>>>>>>>
>>>>>>>> +
>>>>>>>> +unlock:
>>>>>>>> +    mutex_unlock(&adev->pm.smu_workload_lock);
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +int amdgpu_clear_workload_profile(struct amdgpu_device *adev,
>>>>>>>> +                  uint32_t hint)
>>>>>>>> +{
>>>>>>>> +    int ret = 0;
>>>>>>>> +    enum PP_SMC_POWER_PROFILE profile =
>>>>>>>> +            amdgpu_workload_to_power_profile(hint);
>>>>>>>> +
>>>>>>>> +    if (hint == AMDGPU_CTX_WORKLOAD_HINT_NONE)
>>>>>>>> +        return 0;
>>>>>>>> +
>>>>>>>> +    /* Do not reset GPU power profile if another reset is 
>>>>>>>> coming */
>>>>>>>> +    if (atomic_dec_return(&adev->pm.workload_switch_ref) > 0)
>>>>>>>> +        return 0;
>>>>>>>> +
>>>>>>>> +    mutex_lock(&adev->pm.smu_workload_lock);
>>>>>>>> +
>>>>>>>> +    if (adev->pm.workload_mode != hint)
>>>>>>>> +        goto unlock;
>>>>>>>> +
>>>>>>>> +    ret = amdgpu_dpm_switch_power_profile(adev, profile, 0);
>>>>>>>> +    if (!ret)
>>>>>>>> +        adev->pm.workload_mode = AMDGPU_CTX_WORKLOAD_HINT_NONE;
>>>>>>>> +
>>>>>>>> +unlock:
>>>>>>>> +    mutex_unlock(&adev->pm.smu_workload_lock);
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>>> index be7aff2d4a57..1f0f64662c04 100644
>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>>> @@ -3554,6 +3554,7 @@ int amdgpu_device_init(struct 
>>>>>>>> amdgpu_device *adev,
>>>>>>>>       mutex_init(&adev->psp.mutex);
>>>>>>>>       mutex_init(&adev->notifier_lock);
>>>>>>>>       mutex_init(&adev->pm.stable_pstate_ctx_lock);
>>>>>>>> +    mutex_init(&adev->pm.smu_workload_lock);
>>>>>>>>       mutex_init(&adev->benchmark_mutex);
>>>>>>>>       amdgpu_device_init_apu_flags(adev);
>>>>>>>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_ctx_workload.h 
>>>>>>>> b/drivers/gpu/drm/amd/include/amdgpu_ctx_workload.h
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..6060fc53c3b0
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/drivers/gpu/drm/amd/include/amdgpu_ctx_workload.h
>>>>>>>> @@ -0,0 +1,54 @@
>>>>>>>> +/*
>>>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>>>> + *
>>>>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>>>>> obtaining a
>>>>>>>> + * copy of this software and associated documentation files 
>>>>>>>> (the "Software"),
>>>>>>>> + * to deal in the Software without restriction, including 
>>>>>>>> without limitation
>>>>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>>>>> sublicense,
>>>>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>>>>> whom the
>>>>>>>> + * Software is furnished to do so, subject to the following 
>>>>>>>> conditions:
>>>>>>>> + *
>>>>>>>> + * The above copyright notice and this permission notice shall 
>>>>>>>> be included in
>>>>>>>> + * all copies or substantial portions of the Software.
>>>>>>>> + *
>>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>>>>> KIND, EXPRESS OR
>>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>>>>> MERCHANTABILITY,
>>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
>>>>>>>> EVENT SHALL
>>>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY 
>>>>>>>> CLAIM, DAMAGES OR
>>>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>>>>> OTHERWISE,
>>>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 
>>>>>>>> THE USE OR
>>>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>>>> + *
>>>>>>>> + */
>>>>>>>> +#ifndef _AMDGPU_CTX_WL_H_
>>>>>>>> +#define _AMDGPU_CTX_WL_H_
>>>>>>>> +#include <drm/amdgpu_drm.h>
>>>>>>>> +#include "amdgpu.h"
>>>>>>>> +
>>>>>>>> +/* Workload mode names */
>>>>>>>> +static const char * const amdgpu_workload_mode_name[] = {
>>>>>>>> +    "None",
>>>>>>>> +    "3D",
>>>>>>>> +    "Video",
>>>>>>>> +    "VR",
>>>>>>>> +    "Compute",
>>>>>>>> +    "Unknown",
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +static inline const
>>>>>>>> +char *amdgpu_workload_profile_name(uint32_t profile)
>>>>>>>> +{
>>>>>>>> +    if (profile >= AMDGPU_CTX_WORKLOAD_HINT_NONE &&
>>>>>>>> +        profile < AMDGPU_CTX_WORKLOAD_HINT_MAX)
>>>>>>>> +        return 
>>>>>>>> amdgpu_workload_mode_name[AMDGPU_CTX_WORKLOAD_INDEX(profile)];
>>>>>>>> +
>>>>>>>> +    return 
>>>>>>>> amdgpu_workload_mode_name[AMDGPU_CTX_WORKLOAD_HINT_MAX];
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +int amdgpu_clear_workload_profile(struct amdgpu_device *adev,
>>>>>>>> +                uint32_t hint);
>>>>>>>> +
>>>>>>>> +int amdgpu_set_workload_profile(struct amdgpu_device *adev,
>>>>>>>> +                uint32_t hint);
>>>>>>>> +
>>>>>>>> +#endif
>>>>>>>> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h 
>>>>>>>> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>>>>>>>> index 65624d091ed2..565131f789d0 100644
>>>>>>>> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>>>>>>>> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>>>>>>>> @@ -361,6 +361,11 @@ struct amdgpu_pm {
>>>>>>>>       struct mutex            stable_pstate_ctx_lock;
>>>>>>>>       struct amdgpu_ctx       *stable_pstate_ctx;
>>>>>>>> +    /* SMU workload mode */
>>>>>>>> +    struct mutex smu_workload_lock;
>>>>>>>> +    uint32_t workload_mode;
>>>>>>>> +    atomic_t workload_switch_ref;
>>>>>>>> +
>>>>>>>>       struct config_table_setting config_table;
>>>>>>>>       /* runtime mode */
>>>>>>>>       enum amdgpu_runpm_mode rpm_mode;
>>>>>>>>