[PATCH] drm/amdgpu/vcn: Fix video_profile switch race condition
Sundararaju, Sathishkumar
sathishkumar.sundararaju at amd.com
Tue Aug 12 20:03:25 UTC 2025
Hi Alex,
On 8/12/2025 9:38 PM, Alex Deucher wrote:
> On Tue, Aug 12, 2025 at 10:56 AM Sathishkumar S
> <sathishkumar.sundararaju at amd.com> wrote:
>> There is a race condition which leads to dpm video power
>> profile switch (disable and enable) during active video
>> decode on multi-instance VCN hardware.
>>
>> This patch aims to fix/skip step 3 in the below sequence:
>>
>> - inst_1 power_on
>> - inst_0(idle) power_off
>> - inst_0(idle) video_power_profile OFF (step 3)
>> - inst_1 video_power_profile ON during next begin_use
>>
>> Add flags to track ON/OFF vcn instances and check if all
>> instances are off before disabling video power profile.
> I think you could also just look at the outstanding fences on the
> other instances. Something like the attached patch. Either way works
> for me.
Yes, checking other instance fences is simpler, your patch is :-
Reviewed-by: Sathishkumar S <sathishkumar.sundararaju at amd.com>
Regards,
Sathish
>
> Alex
>
>> Protect workload_profile_active also within pg_lock and ON it
>> during first use and OFF it when last VCN instance is powered
>> OFF. VCN workload_profile_mutex can be removed after similar
>> clean up is done for vcn2_5.
>>
>> Signed-off-by: Sathishkumar S <sathishkumar.sundararaju at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 24 +++++++++---------------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 4 ++++
>> 2 files changed, 13 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
>> index 9a76e11d1c18..da372dd7b761 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
>> @@ -445,16 +445,16 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
>> if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) {
>> mutex_lock(&vcn_inst->vcn_pg_lock);
>> vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE);
>> - mutex_unlock(&vcn_inst->vcn_pg_lock);
>> - mutex_lock(&adev->vcn.workload_profile_mutex);
>> - if (adev->vcn.workload_profile_active) {
>> + adev->vcn.flags &= AMDGPU_VCN_FLAG_VINST_OFF(vcn_inst->inst);
>> + if (adev->vcn.workload_profile_active &&
>> + !(adev->vcn.flags & AMDGPU_VCN_FLAG_VINST_MASK(adev->vcn.num_vcn_inst))) {
>> r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
>> false);
>> if (r)
>> dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
>> adev->vcn.workload_profile_active = false;
>> }
>> - mutex_unlock(&adev->vcn.workload_profile_mutex);
>> + mutex_unlock(&vcn_inst->vcn_pg_lock);
>> } else {
>> schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT);
>> }
>> @@ -470,14 +470,8 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
>>
>> cancel_delayed_work_sync(&vcn_inst->idle_work);
>>
>> - /* We can safely return early here because we've cancelled the
>> - * the delayed work so there is no one else to set it to false
>> - * and we don't care if someone else sets it to true.
>> - */
>> - if (adev->vcn.workload_profile_active)
>> - goto pg_lock;
>> + mutex_lock(&vcn_inst->vcn_pg_lock);
>>
>> - mutex_lock(&adev->vcn.workload_profile_mutex);
>> if (!adev->vcn.workload_profile_active) {
>> r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
>> true);
>> @@ -485,11 +479,11 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
>> dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
>> adev->vcn.workload_profile_active = true;
>> }
>> - mutex_unlock(&adev->vcn.workload_profile_mutex);
>>
>> -pg_lock:
>> - mutex_lock(&vcn_inst->vcn_pg_lock);
>> - vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
>> + if (!(adev->vcn.flags & AMDGPU_VCN_FLAG_VINST_ON(vcn_inst->inst))) {
>> + vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
>> + adev->vcn.flags |= AMDGPU_VCN_FLAG_VINST_ON(vcn_inst->inst);
>> + }
>>
>> /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
>> if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
>> index b3fb1d0e43fc..a876a182ff88 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
>> @@ -366,6 +366,10 @@ struct amdgpu_vcn {
>> struct mutex workload_profile_mutex;
>> u32 reg_count;
>> const struct amdgpu_hwip_reg_entry *reg_list;
>> +#define AMDGPU_VCN_FLAG_VINST_MASK(n) (BIT(n+1) - 1)
>> +#define AMDGPU_VCN_FLAG_VINST_ON(n) (BIT(n))
>> +#define AMDGPU_VCN_FLAG_VINST_OFF(n) (~BIT(n))
>> + u32 flags;
>> };
>>
>> struct amdgpu_fw_shared_rb_ptrs_struct {
>> --
>> 2.48.1
>>
More information about the amd-gfx
mailing list