[PATCH] drm/amdgpu/vcn: Fix video_profile switch race condition

Sundararaju, Sathishkumar sathishkumar.sundararaju at amd.com
Thu Aug 14 09:20:03 UTC 2025


Please ignore this.

Regards,

Sathish

On 8/14/2025 2:40 PM, Sathishkumar S wrote:
> There is a race condition which leads to dpm video power
> profile switch (disable and enable) during active video
> decode on multi-instance VCN hardware.
>
> This patch aims to fix/skip step 3 in the below sequence:
>
>   - inst_1       power_on
>   - inst_0(idle) power_off
>   - inst_0(idle) video_power_profile OFF (step 3)
>   - inst_1       video_power_profile ON during next begin_use
>
> Add flags to track ON/OFF vcn instances and check if all
> instances are off before disabling video power profile.
>
> v2: (David Wu)
>   - pg_lock is per instance it doesn't help solve the issue.
>   - protect flags also with global workload_profile_mutex.
>
> Signed-off-by: Sathishkumar S <sathishkumar.sundararaju at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 24 +++++++++++++++---------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  4 ++++
>   2 files changed, 19 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> index 9a76e11d1c18..b677b287dd49 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> @@ -447,7 +447,14 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
>   		vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE);
>   		mutex_unlock(&vcn_inst->vcn_pg_lock);
>   		mutex_lock(&adev->vcn.workload_profile_mutex);
> -		if (adev->vcn.workload_profile_active) {
> +		if (!adev->vcn.workload_profile_active &&
> +		    !(adev->vcn.flags & AMDGPU_VCN_FLAG_VINST_MASK(adev->vcn.num_vcn_inst))) {
> +			/* video profile is active , we are holding global workload_profile_mutex.
> +			 * it is safe to check if flags are 0 here and be assured that all instances
> +			 * are off, since no other begin_use paths can be holding this lock now.
> +			 * so off video_power_profile and update workload_profile_active = false
> +			 * since all vcn instances are inactive here.
> +			 */
>   			r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
>   							    false);
>   			if (r)
> @@ -470,24 +477,23 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
>   
>   	cancel_delayed_work_sync(&vcn_inst->idle_work);
>   
> -	/* We can safely return early here because we've cancelled the
> -	 * the delayed work so there is no one else to set it to false
> -	 * and we don't care if someone else sets it to true.
> -	 */
> -	if (adev->vcn.workload_profile_active)
> -		goto pg_lock;
> -
>   	mutex_lock(&adev->vcn.workload_profile_mutex);
>   	if (!adev->vcn.workload_profile_active) {
> +	/* If inactive proceed to ON video_power_profile and update workload_profile_active */
>   		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
>   						    true);
>   		if (r)
>   			dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
>   		adev->vcn.workload_profile_active = true;
>   	}
> +	/* Holding global workload_profile_mutex, so none of the idle handlers can access flags.
> +	 * and cannot OFF video_power_profile at this point. Can safely update vcn.flags to
> +	 * indicate active vcn instances, which is visible to any idle handlers who later grab
> +	 * this lock and check flags for any active vcn instances.
> +	 */
> +	adev->vcn.flags |= AMDGPU_VCN_FLAG_VINST_ON(vcn_inst->inst);
>   	mutex_unlock(&adev->vcn.workload_profile_mutex);
>   
> -pg_lock:
>   	mutex_lock(&vcn_inst->vcn_pg_lock);
>   	vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> index b3fb1d0e43fc..a876a182ff88 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> @@ -366,6 +366,10 @@ struct amdgpu_vcn {
>   	struct mutex            workload_profile_mutex;
>   	u32 reg_count;
>   	const struct amdgpu_hwip_reg_entry *reg_list;
> +#define AMDGPU_VCN_FLAG_VINST_MASK(n)  (BIT(n+1) - 1)
> +#define AMDGPU_VCN_FLAG_VINST_ON(n)    (BIT(n))
> +#define AMDGPU_VCN_FLAG_VINST_OFF(n)   (~BIT(n))
> +	u32 flags;
>   };
>   
>   struct amdgpu_fw_shared_rb_ptrs_struct {


More information about the amd-gfx mailing list