[PATCH] drm/amdgpu/vcn: Fix video_profile switch race condition

Sundararaju, Sathishkumar sathishkumar.sundararaju at amd.com
Tue Aug 12 20:03:25 UTC 2025


Hi Alex,


On 8/12/2025 9:38 PM, Alex Deucher wrote:
> On Tue, Aug 12, 2025 at 10:56 AM Sathishkumar S
> <sathishkumar.sundararaju at amd.com> wrote:
>> There is a race condition which leads to dpm video power
>> profile switch (disable and enable) during active video
>> decode on multi-instance VCN hardware.
>>
>> This patch aims to fix/skip step 3 in the below sequence:
>>
>>   - inst_1       power_on
>>   - inst_0(idle) power_off
>>   - inst_0(idle) video_power_profile OFF (step 3)
>>   - inst_1       video_power_profile ON during next begin_use
>>
>> Add flags to track ON/OFF vcn instances and check if all
>> instances are off before disabling video power profile.
> I think you could also just look at the outstanding fences on the
> other instances.  Something like the attached patch.  Either way works
> for me.

Yes, checking other instance fences is simpler, your patch is :-

Reviewed-by: Sathishkumar S <sathishkumar.sundararaju at amd.com>

Regards,
Sathish
>
> Alex
>
>> Protect workload_profile_active also within pg_lock and ON it
>> during first use and OFF it when last VCN instance is powered
>> OFF. VCN workload_profile_mutex can be removed after similar
>> clean up is done for vcn2_5.
>>
>> Signed-off-by: Sathishkumar S <sathishkumar.sundararaju at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 24 +++++++++---------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  4 ++++
>>   2 files changed, 13 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
>> index 9a76e11d1c18..da372dd7b761 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
>> @@ -445,16 +445,16 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
>>          if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) {
>>                  mutex_lock(&vcn_inst->vcn_pg_lock);
>>                  vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE);
>> -               mutex_unlock(&vcn_inst->vcn_pg_lock);
>> -               mutex_lock(&adev->vcn.workload_profile_mutex);
>> -               if (adev->vcn.workload_profile_active) {
>> +               adev->vcn.flags &= AMDGPU_VCN_FLAG_VINST_OFF(vcn_inst->inst);
>> +               if (adev->vcn.workload_profile_active &&
>> +                   !(adev->vcn.flags & AMDGPU_VCN_FLAG_VINST_MASK(adev->vcn.num_vcn_inst))) {
>>                          r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
>>                                                              false);
>>                          if (r)
>>                                  dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
>>                          adev->vcn.workload_profile_active = false;
>>                  }
>> -               mutex_unlock(&adev->vcn.workload_profile_mutex);
>> +               mutex_unlock(&vcn_inst->vcn_pg_lock);
>>          } else {
>>                  schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT);
>>          }
>> @@ -470,14 +470,8 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
>>
>>          cancel_delayed_work_sync(&vcn_inst->idle_work);
>>
>> -       /* We can safely return early here because we've cancelled the
>> -        * the delayed work so there is no one else to set it to false
>> -        * and we don't care if someone else sets it to true.
>> -        */
>> -       if (adev->vcn.workload_profile_active)
>> -               goto pg_lock;
>> +       mutex_lock(&vcn_inst->vcn_pg_lock);
>>
>> -       mutex_lock(&adev->vcn.workload_profile_mutex);
>>          if (!adev->vcn.workload_profile_active) {
>>                  r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
>>                                                      true);
>> @@ -485,11 +479,11 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
>>                          dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
>>                  adev->vcn.workload_profile_active = true;
>>          }
>> -       mutex_unlock(&adev->vcn.workload_profile_mutex);
>>
>> -pg_lock:
>> -       mutex_lock(&vcn_inst->vcn_pg_lock);
>> -       vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
>> +       if (!(adev->vcn.flags & AMDGPU_VCN_FLAG_VINST_ON(vcn_inst->inst))) {
>> +               vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
>> +               adev->vcn.flags |= AMDGPU_VCN_FLAG_VINST_ON(vcn_inst->inst);
>> +       }
>>
>>          /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
>>          if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
>> index b3fb1d0e43fc..a876a182ff88 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
>> @@ -366,6 +366,10 @@ struct amdgpu_vcn {
>>          struct mutex            workload_profile_mutex;
>>          u32 reg_count;
>>          const struct amdgpu_hwip_reg_entry *reg_list;
>> +#define AMDGPU_VCN_FLAG_VINST_MASK(n)  (BIT(n+1) - 1)
>> +#define AMDGPU_VCN_FLAG_VINST_ON(n)    (BIT(n))
>> +#define AMDGPU_VCN_FLAG_VINST_OFF(n)   (~BIT(n))
>> +       u32 flags;
>>   };
>>
>>   struct amdgpu_fw_shared_rb_ptrs_struct {
>> --
>> 2.48.1
>>


More information about the amd-gfx mailing list