[PATCH] drm/amdgpu: Add GFXOFF auto-tunning algorithm
Alex Deucher
alexdeucher at gmail.com
Mon Mar 24 14:13:50 UTC 2025
On Mon, Mar 24, 2025 at 5:06 AM Sergey Kovalenko
<seryoga.engineering at gmail.com> wrote:
>
> Predict an optimal delay to enable GFXOFF for the next interval
> based on the request count:
> - less than 15 requests per second - zero delay
> - less than 25 requests per second - default delay
> - 25 and more requests per second - don't enable GFXOFF
>
> The algorithm allows maintaining low power consumption in idle,
> as well as using the full GPU power under load by eliminating
> hundreds of extra GFXOFF ON/OFF switches.
I still don't understand what problem this is solving. This already
happens with the way the code works now. If there are a lot of
requests to toggle gfxoff, the worker thread to allow it again gets
cancelled and scheduled again, extending the time it's disallowed.
Alex
Alex
>
> Test configuration:
> - Ryzen 5 2500U
> - Ryzen 5 3400G
> - Chromium 134.0.6998.88 Arch Linux
> - Mesa 1:24.3.4-1
> - KDE Plasma 6.3.2
>
> GFXOFF enable requests per second:
> | Test | min | max |
> |-----------------------------------------------------|-----|-----|
> | System idle | 0 | 64 |
> | Web browsing | 0 | 127 |
> | WebGL aquarium | 10 | 236 |
> | Heavy load: glxgears + vkcube + resizing + flipping | 39 | 677 |
>
> Test results, Ryzen 5 2500U:
> | Test | patched-6.13.7.arch1 | 6.13.7.arch1-1 |
> |-----------------------------|----------------------|------------------|
> | System idle (PkgWatt) | ~0.74W | ~1.25W |
> | glxgears (vblank_mode=0) | ~7300 fps, ~7.3W | ~7300 fps, ~7.3W |
> | WebGL aquarium 15.000 fish | 56-60 fps, ~9.8W | 55-60 fps, ~9.8W |
>
> Test results, Ryzen 5 3400G:
> | Test | patched-6.13.7.arch1 | 6.13.7.arch1-1 |
> |-----------------------------|----------------------|------------------|
> | System idle (PkgWatt) | ~3.8W | ~4.3W |
> | glxgears (vblank_mode=0) | ~7200 fps | ~7200 fps |
> | WebGL aquarium 30.000 fish | 37 fps, 47W | 38 fps, 47W |
>
> Signed-off-by: Sergey Kovalenko <seryoga.engineering at gmail.com>
> Tested-by: Liam Fleming <fleming.squared at proton.me>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 94 +++++++++++++++++--------
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 +
> 2 files changed, 67 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index c1f35ded684e..5e23b956e0bf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -781,55 +781,89 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device
> *adev, int xcc_id)
> * 3. other client can cancel their request of disable gfx off feature
> * 4. other client should not send request to enable gfx off feature
> before disable gfx off feature.
> */
> -
> void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
> {
> - unsigned long delay = GFX_OFF_DELAY_ENABLE;
> -
> if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
> return;
>
> mutex_lock(&adev->gfx.gfx_off_mutex);
>
> if (enable) {
> - /* If the count is already 0, it means there's an imbalance bug
> somewhere.
> - * Note that the bug may be in a different caller than the one which
> triggers the
> - * WARN_ON_ONCE.
> - */
> - if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
> + /* This case covers multiple calls from parallel threads */
> + if (!adev->gfx.gfx_off_req_count)
> goto unlock;
>
> - adev->gfx.gfx_off_req_count--;
> + /* Process only if req_count == 0 and GFXOFF is disabled */
> + if (--adev->gfx.gfx_off_req_count || adev->gfx.gfx_off_state)
> + goto unlock;
> +
> + /* If going to s2idle, no need to wait */
> + if (adev->in_s0ix) {
> + if (!amdgpu_dpm_set_powergating_by_smu(
> + adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
> + adev->gfx.gfx_off_state = true;
> +
> + /* Reset delay flag */
> + adev->gfx.gfx_off_use_delay = 0;
> + goto unlock;
> + }
>
> - if (adev->gfx.gfx_off_req_count == 0 &&
> - !adev->gfx.gfx_off_state) {
> - /* If going to s2idle, no need to wait */
> - if (adev->in_s0ix) {
> - if (!amdgpu_dpm_set_powergating_by_smu(adev,
> - AMD_IP_BLOCK_TYPE_GFX, true, 0))
> - adev->gfx.gfx_off_state = true;
> + ++adev->gfx.gfx_off_counter;
> +
> + uint64_t now = get_jiffies_64();
> + uint64_t delta =
> + jiffies_to_msecs(now - adev->gfx.gfx_off_timestamp);
> +
> + if (delta >= 1000u) {
> + /*
> + * Predict the optimal delay for the next interval
> + * based on the current number of requests:
> + * <15 - idle, no delay
> + * <25 - light/medium load, default delay
> + * 25 and more - high load, GFXOFF disabled
> + */
> + if (adev->gfx.gfx_off_counter < 15u) {
> + adev->gfx.gfx_off_use_delay = 0;
> + } else if (adev->gfx.gfx_off_counter < 25u) {
> + adev->gfx.gfx_off_use_delay = 1;
> } else {
> - schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
> - delay);
> + adev->gfx.gfx_off_use_delay = 2;
> }
> +
> + adev->gfx.gfx_off_counter = 0;
> + adev->gfx.gfx_off_timestamp = now;
> }
> +
> + /* Don't schedule gfxoff under heavy load */
> + if (adev->gfx.gfx_off_use_delay == 2)
> + goto unlock;
> +
> + schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
> + adev->gfx.gfx_off_use_delay ?
> + GFX_OFF_DELAY_ENABLE :
> + GFX_OFF_NO_DELAY);
> } else {
> - if (adev->gfx.gfx_off_req_count == 0) {
> - cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
> + /* GFXOFF was enabled when req_count == 0 */
> + if (++adev->gfx.gfx_off_req_count != 1)
> + goto unlock;
>
> - if (adev->gfx.gfx_off_state &&
> - !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX,
> false, 0)) {
> - adev->gfx.gfx_off_state = false;
> + /* Nothing to do if the work wasn't scheduled */
> + if (adev->gfx.gfx_off_use_delay == 2)
> + goto unlock;
>
> - if (adev->gfx.funcs->init_spm_golden) {
> - dev_dbg(adev->dev,
> - "GFXOFF is disabled, re-init SPM golden settings\n");
> - amdgpu_gfx_init_spm_golden(adev);
> - }
> + cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
> +
> + if (adev->gfx.gfx_off_state &&
> + !amdgpu_dpm_set_powergating_by_smu(
> + adev, AMD_IP_BLOCK_TYPE_GFX, false, 0)) {
> + adev->gfx.gfx_off_state = false;
> +
> + if (adev->gfx.funcs->init_spm_golden) {
> + dev_dbg(adev->dev,
> + "GFXOFF is disabled, re-init SPM golden settings\n");
> + amdgpu_gfx_init_spm_golden(adev);
> }
> }
> -
> - adev->gfx.gfx_off_req_count++;
> }
>
> unlock:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 8b5bd63b5773..38fd445a353b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -430,7 +430,10 @@ struct amdgpu_gfx {
> /* gfx off */
> bool gfx_off_state; /* true: enabled,
> false: disabled */
> struct mutex gfx_off_mutex; /* mutex to
> change gfxoff state */
> + uint64_t gfx_off_timestamp; /* gfxoff enable call timestamp */
> + uint32_t gfx_off_use_delay; /* flag to choose the delay range */
> uint32_t gfx_off_req_count; /* default 1,
> enable gfx off: dec 1, disable gfx off: add 1 */
> + uint32_t gfx_off_counter; /* count of gfxoff enable calls */
> struct delayed_work gfx_off_delay_work; /* async work to
> set gfx block off */
> uint32_t gfx_off_residency; /* last logged
> residency */
> uint64_t gfx_off_entrycount; /* count of times
> GPU has get into GFXOFF state */
> --
> 2.49.0
More information about the amd-gfx
mailing list