[PATCH] drm/amdgpu/vcn: fix idle work handler for VCN 2.5

Wed Mar 5 14:20:01 UTC 2025

On 2025-03-04 18:01, Alex Deucher wrote:
> VCN 2.5 uses the PG callback to enable VCN DPM which is
> a global state.  As such, we need to make sure all instances
> are in the same state.
>
> v2: switch to a ref count (Lijo)
> v3: switch to its own idle work handler
>
> Fixes: 4ce4fe27205c ("drm/amdgpu/vcn: use per instance callbacks for idle work handler")
> Signed-off-by: Alex Deucher<alexander.deucher at amd.com>


Reviewed-by: Boyuan Zhang <Boyuan.Zhang at amd.com> 
<mailto:Boyuan.Zhang at amd.com>


> ---
>   drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 126 +++++++++++++++++++++++++-
>   1 file changed, 122 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
> index dff1a88590363..fa66521b940de 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
> @@ -107,6 +107,121 @@ static int amdgpu_ih_clientid_vcns[] = {
>   	SOC15_IH_CLIENTID_VCN1
>   };
>   
> +static void vcn_v2_5_idle_work_handler(struct work_struct *work)
> +{
> +	struct amdgpu_vcn_inst *vcn_inst =
> +		container_of(work, struct amdgpu_vcn_inst, idle_work.work);
> +	struct amdgpu_device *adev = vcn_inst->adev;
> +	unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
> +	unsigned int i, j;
> +	int r = 0;
> +
> +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> +		struct amdgpu_vcn_inst *v = &adev->vcn.inst[i];
> +
> +		if (adev->vcn.harvest_config & (1 << i))
> +			continue;
> +
> +		for (j = 0; j < v->num_enc_rings; ++j)
> +			fence[i] += amdgpu_fence_count_emitted(&v->ring_enc[j]);
> +
> +		/* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
> +		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
> +		    !v->using_unified_queue) {
> +			struct dpg_pause_state new_state;
> +
> +			if (fence[i] ||
> +			    unlikely(atomic_read(&v->dpg_enc_submission_cnt)))
> +				new_state.fw_based = VCN_DPG_STATE__PAUSE;
> +			else
> +				new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
> +
> +			v->pause_dpg_mode(v, &new_state);
> +		}
> +
> +		fence[i] += amdgpu_fence_count_emitted(&v->ring_dec);
> +		fences += fence[i];
> +
> +	}
> +
> +	if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) {
> +		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
> +						       AMD_PG_STATE_GATE);
> +		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
> +						    false);
> +		if (r)
> +			dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
> +	} else {
> +		schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);
> +	}
> +}
> +
> +static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	int r = 0, i;
> +
> +	atomic_inc(&adev->vcn.inst[0].total_submission_cnt);
> +
> +	if (!cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work)) {
> +		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
> +						    true);
> +		if (r)
> +			dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
> +	}
> +
> +	mutex_lock(&adev->vcn.inst[0].vcn_pg_lock);
> +	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
> +					       AMD_PG_STATE_UNGATE);
> +
> +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> +		struct amdgpu_vcn_inst *v = &adev->vcn.inst[i];
> +
> +		if (adev->vcn.harvest_config & (1 << i))
> +			continue;
> +		/* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
> +		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
> +		    !v->using_unified_queue) {
> +			struct dpg_pause_state new_state;
> +
> +			if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
> +				atomic_inc(&v->dpg_enc_submission_cnt);
> +				new_state.fw_based = VCN_DPG_STATE__PAUSE;
> +			} else {
> +				unsigned int fences = 0;
> +				unsigned int i;
> +
> +				for (i = 0; i < v->num_enc_rings; ++i)
> +					fences += amdgpu_fence_count_emitted(&v->ring_enc[i]);
> +
> +				if (fences || atomic_read(&v->dpg_enc_submission_cnt))
> +					new_state.fw_based = VCN_DPG_STATE__PAUSE;
> +				else
> +					new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
> +			}
> +
> +			v->pause_dpg_mode(v, &new_state);
> +		}
> +	}
> +	mutex_unlock(&adev->vcn.inst[0].vcn_pg_lock);
> +}
> +
> +static void vcn_v2_5_ring_end_use(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +
> +	/* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
> +	if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
> +	    ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC &&
> +	    !adev->vcn.inst[ring->me].using_unified_queue)
> +		atomic_dec(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
> +
> +	atomic_dec(&adev->vcn.inst[0].total_submission_cnt);
> +
> +	schedule_delayed_work(&adev->vcn.inst[0].idle_work,
> +			      VCN_IDLE_TIMEOUT);
> +}
> +
>   /**
>    * vcn_v2_5_early_init - set function pointers and load microcode
>    *
> @@ -201,6 +316,9 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block)
>   		if (r)
>   			return r;
>   
> +		/* Override the work func */
> +		adev->vcn.inst[j].idle_work.work.func = vcn_v2_5_idle_work_handler;
> +
>   		amdgpu_vcn_setup_ucode(adev, j);
>   
>   		r = amdgpu_vcn_resume(adev, j);
> @@ -1661,8 +1779,8 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = {
>   	.insert_start = vcn_v2_0_dec_ring_insert_start,
>   	.insert_end = vcn_v2_0_dec_ring_insert_end,
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
> -	.begin_use = amdgpu_vcn_ring_begin_use,
> -	.end_use = amdgpu_vcn_ring_end_use,
> +	.begin_use = vcn_v2_5_ring_begin_use,
> +	.end_use = vcn_v2_5_ring_end_use,
>   	.emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
>   	.emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
>   	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
> @@ -1759,8 +1877,8 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {
>   	.insert_nop = amdgpu_ring_insert_nop,
>   	.insert_end = vcn_v2_0_enc_ring_insert_end,
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
> -	.begin_use = amdgpu_vcn_ring_begin_use,
> -	.end_use = amdgpu_vcn_ring_end_use,
> +	.begin_use = vcn_v2_5_ring_begin_use,
> +	.end_use = vcn_v2_5_ring_end_use,
>   	.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
>   	.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
>   	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20250305/7e2f44e9/attachment-0001.htm>