[PATCH] drm/amdgpu/vcn: Fix video_profile switch race condition

Alex Deucher alexdeucher at gmail.com
Tue Aug 12 16:08:50 UTC 2025


On Tue, Aug 12, 2025 at 10:56 AM Sathishkumar S
<sathishkumar.sundararaju at amd.com> wrote:
>
> There is a race condition which leads to dpm video power
> profile switch (disable and enable) during active video
> decode on multi-instance VCN hardware.
>
> This patch aims to fix/skip step 3 in the below sequence:
>
>  - inst_1       power_on
>  - inst_0(idle) power_off
>  - inst_0(idle) video_power_profile OFF (step 3)
>  - inst_1       video_power_profile ON during next begin_use
>
> Add flags to track ON/OFF vcn instances and check if all
> instances are off before disabling video power profile.

I think you could also just look at the outstanding fences on the
other instances.  Something like the attached patch.  Either way works
for me.

Alex

>
> Protect workload_profile_active also within pg_lock and ON it
> during first use and OFF it when last VCN instance is powered
> OFF. VCN workload_profile_mutex can be removed after similar
> clean up is done for vcn2_5.
>
> Signed-off-by: Sathishkumar S <sathishkumar.sundararaju at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 24 +++++++++---------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  4 ++++
>  2 files changed, 13 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> index 9a76e11d1c18..da372dd7b761 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> @@ -445,16 +445,16 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
>         if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) {
>                 mutex_lock(&vcn_inst->vcn_pg_lock);
>                 vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE);
> -               mutex_unlock(&vcn_inst->vcn_pg_lock);
> -               mutex_lock(&adev->vcn.workload_profile_mutex);
> -               if (adev->vcn.workload_profile_active) {
> +               adev->vcn.flags &= AMDGPU_VCN_FLAG_VINST_OFF(vcn_inst->inst);
> +               if (adev->vcn.workload_profile_active &&
> +                   !(adev->vcn.flags & AMDGPU_VCN_FLAG_VINST_MASK(adev->vcn.num_vcn_inst))) {
>                         r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
>                                                             false);
>                         if (r)
>                                 dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
>                         adev->vcn.workload_profile_active = false;
>                 }
> -               mutex_unlock(&adev->vcn.workload_profile_mutex);
> +               mutex_unlock(&vcn_inst->vcn_pg_lock);
>         } else {
>                 schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT);
>         }
> @@ -470,14 +470,8 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
>
>         cancel_delayed_work_sync(&vcn_inst->idle_work);
>
> -       /* We can safely return early here because we've cancelled the
> -        * the delayed work so there is no one else to set it to false
> -        * and we don't care if someone else sets it to true.
> -        */
> -       if (adev->vcn.workload_profile_active)
> -               goto pg_lock;
> +       mutex_lock(&vcn_inst->vcn_pg_lock);
>
> -       mutex_lock(&adev->vcn.workload_profile_mutex);
>         if (!adev->vcn.workload_profile_active) {
>                 r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
>                                                     true);
> @@ -485,11 +479,11 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
>                         dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
>                 adev->vcn.workload_profile_active = true;
>         }
> -       mutex_unlock(&adev->vcn.workload_profile_mutex);
>
> -pg_lock:
> -       mutex_lock(&vcn_inst->vcn_pg_lock);
> -       vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
> +       if (!(adev->vcn.flags & AMDGPU_VCN_FLAG_VINST_ON(vcn_inst->inst))) {
> +               vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
> +               adev->vcn.flags |= AMDGPU_VCN_FLAG_VINST_ON(vcn_inst->inst);
> +       }
>
>         /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
>         if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> index b3fb1d0e43fc..a876a182ff88 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> @@ -366,6 +366,10 @@ struct amdgpu_vcn {
>         struct mutex            workload_profile_mutex;
>         u32 reg_count;
>         const struct amdgpu_hwip_reg_entry *reg_list;
> +#define AMDGPU_VCN_FLAG_VINST_MASK(n)  (BIT(n+1) - 1)
> +#define AMDGPU_VCN_FLAG_VINST_ON(n)    (BIT(n))
> +#define AMDGPU_VCN_FLAG_VINST_OFF(n)   (~BIT(n))
> +       u32 flags;
>  };
>
>  struct amdgpu_fw_shared_rb_ptrs_struct {
> --
> 2.48.1
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-drm-amdgpu-vcn-fix-video-profile-race-condition.patch
Type: text/x-patch
Size: 3153 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20250812/55491b45/attachment-0001.bin>


More information about the amd-gfx mailing list