[PATCH] drm/amdgpu/vcn: Fix race condition during dpm off

Sathishkumar S sathishkumar.sundararaju at amd.com
Wed Aug 13 20:59:23 UTC 2025


There is a race condition which leads to dpm video power profile
OFF during active video decode on multi-instance VCN hardware.
Add flags to track ON/OFF vcn instances and check if all
instances are off before turning OFF video power profile.

v2:
using per instance lock is wrong, use a global lock (David Wu)

Fixes: 3b669df92c85 ("drm/amdgpu/vcn: adjust workload profile handling")
Signed-off-by: Sathishkumar S <sathishkumar.sundararaju at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c    | 31 +++++++++-------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h    |  7 +++--
 drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c      | 18 +++----------
 4 files changed, 21 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f7d7e4748197..69a889b3222a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4444,7 +4444,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	}
 	mutex_init(&adev->gfx.userq_sch_mutex);
 	mutex_init(&adev->gfx.workload_profile_mutex);
-	mutex_init(&adev->vcn.workload_profile_mutex);
 	mutex_init(&adev->userq_mutex);
 
 	amdgpu_device_init_apu_flags(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 9a76e11d1c18..e034baa77977 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -102,6 +102,7 @@ int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i)
 	adev->vcn.inst[i].adev = adev;
 	adev->vcn.inst[i].inst = i;
 	amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix));
+	mutex_init(&adev->vcn.lock);
 
 	if (i != 0 && adev->vcn.per_inst_fw) {
 		r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw,
@@ -134,7 +135,6 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i)
 	int r;
 
 	mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround);
-	mutex_init(&adev->vcn.inst[i].vcn_pg_lock);
 	mutex_init(&adev->vcn.inst[i].engine_reset_mutex);
 	atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0);
 	INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler);
@@ -290,7 +290,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i)
 	if (adev->vcn.reg_list)
 		amdgpu_vcn_reg_dump_fini(adev);
 
-	mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock);
 	mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround);
 
 	return 0;
@@ -443,18 +442,18 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
 	fences += fence[i];
 
 	if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) {
-		mutex_lock(&vcn_inst->vcn_pg_lock);
+		mutex_lock(&adev->vcn.lock);
 		vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE);
-		mutex_unlock(&vcn_inst->vcn_pg_lock);
-		mutex_lock(&adev->vcn.workload_profile_mutex);
-		if (adev->vcn.workload_profile_active) {
+		adev->vcn.flags &= AMDGPU_VCN_FLAG_VINST_OFF(vcn_inst->inst);
+		if (adev->vcn.workload_profile_active &&
+		    !(adev->vcn.flags & AMDGPU_VCN_FLAG_VINST_MASK(adev->vcn.num_vcn_inst))) {
 			r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
 							    false);
 			if (r)
 				dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
 			adev->vcn.workload_profile_active = false;
 		}
-		mutex_unlock(&adev->vcn.workload_profile_mutex);
+		mutex_unlock(&adev->vcn.lock);
 	} else {
 		schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT);
 	}
@@ -470,14 +469,8 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 
 	cancel_delayed_work_sync(&vcn_inst->idle_work);
 
-	/* We can safely return early here because we've cancelled the
-	 * the delayed work so there is no one else to set it to false
-	 * and we don't care if someone else sets it to true.
-	 */
-	if (adev->vcn.workload_profile_active)
-		goto pg_lock;
+	mutex_lock(&adev->vcn.lock);
 
-	mutex_lock(&adev->vcn.workload_profile_mutex);
 	if (!adev->vcn.workload_profile_active) {
 		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
 						    true);
@@ -485,11 +478,11 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 			dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
 		adev->vcn.workload_profile_active = true;
 	}
-	mutex_unlock(&adev->vcn.workload_profile_mutex);
 
-pg_lock:
-	mutex_lock(&vcn_inst->vcn_pg_lock);
-	vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
+	if (!(adev->vcn.flags & AMDGPU_VCN_FLAG_VINST_ON(vcn_inst->inst))) {
+		vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
+		adev->vcn.flags |= AMDGPU_VCN_FLAG_VINST_ON(vcn_inst->inst);
+	}
 
 	/* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
@@ -514,7 +507,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 
 		vcn_inst->pause_dpg_mode(vcn_inst, &new_state);
 	}
-	mutex_unlock(&vcn_inst->vcn_pg_lock);
+	mutex_unlock(&adev->vcn.lock);
 }
 
 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index b3fb1d0e43fc..4457dcc5f9dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -320,7 +320,6 @@ struct amdgpu_vcn_inst {
 	uint8_t			vcn_config;
 	uint32_t		vcn_codec_disable_mask;
 	atomic_t		total_submission_cnt;
-	struct mutex		vcn_pg_lock;
 	enum amd_powergating_state cur_state;
 	struct delayed_work	idle_work;
 	unsigned		fw_version;
@@ -363,9 +362,13 @@ struct amdgpu_vcn {
 	unsigned		fw_version;
 
 	bool			workload_profile_active;
-	struct mutex            workload_profile_mutex;
+	struct mutex		lock;
 	u32 reg_count;
 	const struct amdgpu_hwip_reg_entry *reg_list;
+#define AMDGPU_VCN_FLAG_VINST_MASK(n)  (BIT(n+1) - 1)
+#define AMDGPU_VCN_FLAG_VINST_ON(n)    (BIT(n))
+#define AMDGPU_VCN_FLAG_VINST_OFF(n)   (~BIT(n))
+	u32 flags;
 };
 
 struct amdgpu_fw_shared_rb_ptrs_struct {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 3a7c137a83ef..344ab5c4cb18 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -147,9 +147,9 @@ static void vcn_v2_5_idle_work_handler(struct work_struct *work)
 	}
 
 	if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) {
+		mutex_lock(&adev->vcn.lock);
 		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
 						       AMD_PG_STATE_GATE);
-		mutex_lock(&adev->vcn.workload_profile_mutex);
 		if (adev->vcn.workload_profile_active) {
 			r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
 							    false);
@@ -157,7 +157,7 @@ static void vcn_v2_5_idle_work_handler(struct work_struct *work)
 				dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
 			adev->vcn.workload_profile_active = false;
 		}
-		mutex_unlock(&adev->vcn.workload_profile_mutex);
+		mutex_unlock(&adev->vcn.lock);
 	} else {
 		schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);
 	}
@@ -173,14 +173,7 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring)
 
 	cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
 
-	/* We can safely return early here because we've cancelled the
-	 * the delayed work so there is no one else to set it to false
-	 * and we don't care if someone else sets it to true.
-	 */
-	if (adev->vcn.workload_profile_active)
-		goto pg_lock;
-
-	mutex_lock(&adev->vcn.workload_profile_mutex);
+	mutex_lock(&adev->vcn.lock);
 	if (!adev->vcn.workload_profile_active) {
 		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
 						    true);
@@ -188,10 +181,7 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring)
 			dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
 		adev->vcn.workload_profile_active = true;
 	}
-	mutex_unlock(&adev->vcn.workload_profile_mutex);
 
-pg_lock:
-	mutex_lock(&adev->vcn.inst[0].vcn_pg_lock);
 	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
 					       AMD_PG_STATE_UNGATE);
 
@@ -217,7 +207,7 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring)
 		}
 		v->pause_dpg_mode(v, &new_state);
 	}
-	mutex_unlock(&adev->vcn.inst[0].vcn_pg_lock);
+	mutex_unlock(&adev->vcn.lock);
 }
 
 static void vcn_v2_5_ring_end_use(struct amdgpu_ring *ring)
-- 
2.48.1



More information about the amd-gfx mailing list